diff --git a/archivebox/config.py b/archivebox/config.py index 985bc409..9c9cd795 100644 --- a/archivebox/config.py +++ b/archivebox/config.py @@ -48,18 +48,21 @@ CHROME_BINARY = os.getenv('CHROME_BINARY', None) REPO_DIR = os.path.abspath(os.path.join(os.path.dirname(os.path.abspath(__file__)), '..')) OUTPUT_DIR = os.path.abspath(os.getenv('OUTPUT_DIR', os.path.join(REPO_DIR, 'output'))) -ARCHIVE_DIR = os.path.join(OUTPUT_DIR, 'archive') -SOURCES_DIR = os.path.join(OUTPUT_DIR, 'sources') - -PYTHON_PATH = os.path.join(REPO_DIR, 'archivebox') -TEMPLATES_DIR = os.path.join(PYTHON_PATH, 'templates') # ****************************************************************************** # ********************** Do not edit below this point ************************** # ****************************************************************************** -CHROME_SANDBOX = os.getenv('CHROME_SANDBOX', 'True' ).lower() == 'true' +ARCHIVE_DIR_NAME = 'archive' +SOURCES_DIR_NAME = 'sources' +ARCHIVE_DIR = os.path.join(OUTPUT_DIR, ARCHIVE_DIR_NAME) +SOURCES_DIR = os.path.join(OUTPUT_DIR, SOURCES_DIR_NAME) +PYTHON_PATH = os.path.join(REPO_DIR, 'archivebox') +TEMPLATES_DIR = os.path.join(PYTHON_PATH, 'templates') + + +CHROME_SANDBOX = os.getenv('CHROME_SANDBOX', 'True').lower() == 'true' USE_CHROME = FETCH_PDF or FETCH_SCREENSHOT or FETCH_DOM USE_WGET = FETCH_WGET or FETCH_WGET_REQUISITES or FETCH_WARC diff --git a/archivebox/index.py b/archivebox/index.py index 9cb0f689..9126919c 100644 --- a/archivebox/index.py +++ b/archivebox/index.py @@ -164,7 +164,7 @@ def parse_json_link_index(out_dir): def write_html_link_index(out_dir, link): check_link_structure(link) - with open(os.path.join(TEMPLATES_DIR, 'link_index_fancy.html'), 'r', encoding='utf-8') as f: + with open(os.path.join(TEMPLATES_DIR, 'link_index.html'), 'r', encoding='utf-8') as f: link_html = f.read() path = os.path.join(out_dir, 'index.html') diff --git a/archivebox/templates/index_row.html b/archivebox/templates/index_row.html index 3458f945..a6e7da2b 100644 --- a/archivebox/templates/index_row.html +++ b/archivebox/templates/index_row.html @@ -1,16 +1,16 @@ $bookmarked_date - - + + - + $title $tags - 🖼 - 📜 - 📄 + 🖼 + 📜 + 📄 🏛 $url diff --git a/archivebox/templates/link_index.html b/archivebox/templates/link_index.html index 3b58a09a..43833cf1 100644 --- a/archivebox/templates/link_index.html +++ b/archivebox/templates/link_index.html @@ -2,61 +2,318 @@ $title + +
-

+

+ + Archive Icon + + + ▾ + $title
$base_url

-
-
- Tags: $tags
- Type: $type
-
- Bookmarked:
- $bookmarked_date
- Archived:
- $updated_date
+ -
- - + + + + + + diff --git a/archivebox/templates/link_index_fancy.html b/archivebox/templates/link_index_fancy.html deleted file mode 100644 index 43833cf1..00000000 --- a/archivebox/templates/link_index_fancy.html +++ /dev/null @@ -1,319 +0,0 @@ - - - - $title - - - - -
-

- - Archive Icon - - - ▾ - - $title
- - $base_url - -

-
- - - - - - - - - diff --git a/archivebox/util.py b/archivebox/util.py index facfc4a0..fb803732 100644 --- a/archivebox/util.py +++ b/archivebox/util.py @@ -36,6 +36,7 @@ from config import ( FETCH_GIT, FETCH_MEDIA, SUBMIT_ARCHIVE_DOT_ORG, + ARCHIVE_DIR_NAME, ) ### Parsing Helpers @@ -271,7 +272,7 @@ def wget_output_path(link, look_in=None): if re.search(".+\\.[Hh][Tt][Mm][Ll]?$", f, re.I | re.M) ] if html_files: - return urlencode(os.path.join('archive', link['timestamp'], *wget_folder, html_files[0])) + return urlencode(os.path.join(*wget_folder, html_files[0])) return None @@ -389,6 +390,7 @@ def derived_link_info(link): extended_info = { **link, + 'link_dir': '{}/{}'.format(ARCHIVE_DIR_NAME, link['timestamp']), 'bookmarked_date': to_date_str(link['timestamp']), 'updated_date': to_date_str(link['updated']) if 'updated' in link else None, 'domain': domain(url), @@ -400,17 +402,17 @@ def derived_link_info(link): # Archive Method Output URLs extended_info = { **extended_info, - 'favicon_url': 'archive/{timestamp}/favicon.ico'.format(**extended_info), + 'index_url': 'index.html', + 'favicon_url': 'favicon.ico', 'google_favicon_url': 'https://www.google.com/s2/favicons?domain={domain}'.format(**extended_info), - 'files_url': 'archive/{timestamp}/index.html'.format(**extended_info), - 'archive_url': wget_output_path(link) or 'archive/{}/index.html'.format(link['timestamp']), - 'warc_url': 'archive/{timestamp}/warc'.format(**extended_info), - 'pdf_url': 'archive/{timestamp}/output.pdf'.format(**extended_info), - 'screenshot_url': 'archive/{timestamp}/screenshot.png'.format(**extended_info), - 'dom_url': 'archive/{timestamp}/output.html'.format(**extended_info), + 'archive_url': wget_output_path(link) or 'index.html', + 'warc_url': 'warc', + 'pdf_url': 'output.pdf', + 'screenshot_url': 'screenshot.png', + 'dom_url': 'output.html', 'archive_org_url': 'https://web.archive.org/web/{base_url}'.format(**extended_info), - 'git_url': 'archive/{timestamp}/git'.format(**extended_info), - 'media_url': 'archive/{timestamp}/media'.format(**extended_info), + 'git_url': 'git', + 'media_url': 'media', } @@ -419,10 +421,10 @@ def derived_link_info(link): if link['type'] in ('PDF', 'image'): extended_info.update({ 'title': basename(link['url']), - 'archive_url': 'archive/{timestamp}/{base_url}'.format(**extended_info), - 'pdf_url': 'archive/{timestamp}/{base_url}'.format(**extended_info), - 'screenshot_url': 'archive/{timestamp}/{base_url}'.format(**extended_info), - 'dom_url': 'archive/{timestamp}/{base_url}'.format(**extended_info), + 'archive_url': base_url(url), + 'pdf_url': base_url(url), + 'screenshot_url': base_url(url), + 'dom_url': base_url(url), }) return extended_info