From a9986f1f05bfcda8cbb6b7c915854560f98d3e3e Mon Sep 17 00:00:00 2001 From: Nick Sweeting Date: Sat, 10 Apr 2021 04:19:30 -0400 Subject: [PATCH] add timezone support, tons of CSS and layout improvements, more detailed snapshot admin form info, ability to sort by recently updated, better grid view styling, better table layouts, better dark mode support --- archivebox/config.py | 7 +- archivebox/core/admin.py | 1 + archivebox/core/settings.py | 17 +- archivebox/core/templatetags/core_tags.py | 13 +- archivebox/extractors/__init__.py | 4 +- archivebox/extractors/readability.py | 1 + archivebox/extractors/wget.py | 4 +- archivebox/index/html.py | 14 +- archivebox/index/json.py | 4 +- archivebox/index/schema.py | 17 +- archivebox/logging_util.py | 30 +- archivebox/main.py | 1 + archivebox/parsers/__init__.py | 6 +- archivebox/parsers/generic_html.py | 4 +- archivebox/parsers/generic_json.py | 4 +- archivebox/parsers/generic_txt.py | 8 +- archivebox/parsers/pinboard_rss.py | 4 +- archivebox/parsers/url_list.py | 4 +- archivebox/templates/admin/base.html | 396 +++++++++--------- .../templates/admin/snapshots_grid.html | 305 ++++++++------ archivebox/templates/core/add.html | 2 +- archivebox/templates/core/base.html | 39 +- archivebox/templates/core/index_row.html | 41 +- archivebox/templates/core/progressbar.html | 45 ++ archivebox/templates/core/public_index.html | 71 ++-- archivebox/templates/core/snapshot.html | 118 +++--- archivebox/templates/static/admin.css | 60 ++- archivebox/util.py | 10 +- 28 files changed, 681 insertions(+), 549 deletions(-) create mode 100644 archivebox/templates/core/progressbar.html diff --git a/archivebox/config.py b/archivebox/config.py index b12cd59e..2ecc3415 100644 --- a/archivebox/config.py +++ b/archivebox/config.py @@ -34,7 +34,7 @@ import django from hashlib import md5 from pathlib import Path -from datetime import datetime +from datetime import datetime, timezone from typing import Optional, Type, Tuple, Dict, Union, List from subprocess import run, PIPE, DEVNULL from configparser import ConfigParser @@ -80,7 +80,8 @@ CONFIG_SCHEMA: Dict[str, ConfigDefaultDict] = { 'PUBLIC_ADD_VIEW': {'type': bool, 'default': False}, 'FOOTER_INFO': {'type': str, 'default': 'Content is hosted for personal archiving purposes only. Contact server owner for any takedown requests.'}, 'SNAPSHOTS_PER_PAGE': {'type': int, 'default': 40}, - 'CUSTOM_TEMPLATES_DIR': {'type': str, 'default': None} + 'CUSTOM_TEMPLATES_DIR': {'type': str, 'default': None}, + 'TIME_ZONE': {'type': str, 'default': 'UTC'}, }, 'ARCHIVE_METHOD_TOGGLES': { @@ -1105,7 +1106,7 @@ def setup_django(out_dir: Path=None, check_db=False, config: ConfigDict=CONFIG, # log startup message to the error log with open(settings.ERROR_LOG, "a+", encoding='utf-8') as f: command = ' '.join(sys.argv) - ts = datetime.now().strftime('%Y-%m-%d__%H:%M:%S') + ts = datetime.now(timezone.utc).strftime('%Y-%m-%d__%H:%M:%S') f.write(f"\n> {command}; ts={ts} version={config['VERSION']} docker={config['IN_DOCKER']} is_tty={config['IS_TTY']}\n") diff --git a/archivebox/core/admin.py b/archivebox/core/admin.py index a5bb1351..0329d9b0 100644 --- a/archivebox/core/admin.py +++ b/archivebox/core/admin.py @@ -3,6 +3,7 @@ __package__ = 'archivebox.core' from io import StringIO from pathlib import Path from contextlib import redirect_stdout +from datetime import datetime, timezone from django.contrib import admin from django.urls import path diff --git a/archivebox/core/settings.py b/archivebox/core/settings.py index fade85db..8bc44b60 100644 --- a/archivebox/core/settings.py +++ b/archivebox/core/settings.py @@ -19,9 +19,9 @@ from ..config import ( SQL_INDEX_FILENAME, OUTPUT_DIR, LOGS_DIR, + TIME_ZONE, ) - IS_MIGRATING = 'makemigrations' in sys.argv[:3] or 'migrate' in sys.argv[:3] IS_TESTING = 'test' in sys.argv[:3] or 'PYTEST_CURRENT_TEST' in os.environ IS_SHELL = 'shell' in sys.argv[:3] or 'shell_plus' in sys.argv[:3] @@ -154,6 +154,7 @@ DATABASES = { 'timeout': 60, 'check_same_thread': False, }, + 'TIME_ZONE': 'UTC', # DB setup is sometimes modified at runtime by setup_django() in config.py } } @@ -182,6 +183,7 @@ ALLOWED_HOSTS = ALLOWED_HOSTS.split(',') SECURE_BROWSER_XSS_FILTER = True SECURE_CONTENT_TYPE_NOSNIFF = True +SECURE_REFERRER_POLICY = 'strict-origin-when-cross-origin' CSRF_COOKIE_SECURE = False SESSION_COOKIE_SECURE = False @@ -217,14 +219,17 @@ if IS_SHELL: ################################################################################ LANGUAGE_CODE = 'en-us' -TIME_ZONE = 'UTC' -USE_I18N = False -USE_L10N = False -USE_TZ = False - +USE_I18N = True +USE_L10N = True +USE_TZ = True DATETIME_FORMAT = 'Y-m-d g:iA' SHORT_DATETIME_FORMAT = 'Y-m-d h:iA' +from django.conf.locale.en import formats as en_formats + +en_formats.DATETIME_FORMAT = DATETIME_FORMAT +en_formats.SHORT_DATETIME_FORMAT = SHORT_DATETIME_FORMAT + ################################################################################ ### Logging Settings diff --git a/archivebox/core/templatetags/core_tags.py b/archivebox/core/templatetags/core_tags.py index 9ac1ee27..4f53ac2a 100644 --- a/archivebox/core/templatetags/core_tags.py +++ b/archivebox/core/templatetags/core_tags.py @@ -1,22 +1,15 @@ from django import template -from django.urls import reverse from django.contrib.admin.templatetags.base import InclusionAdminNode -from django.templatetags.static import static from typing import Union -from core.models import ArchiveResult register = template.Library() -@register.simple_tag -def snapshot_image(snapshot): - result = ArchiveResult.objects.filter(snapshot=snapshot, extractor='screenshot', status='succeeded').first() - if result: - return reverse('Snapshot', args=[f'{str(snapshot.timestamp)}/{result.output}']) - - return static('archive.png') +@register.filter(name='split') +def split(value, separator: str=','): + return (value or '').split(separator) @register.filter def file_size(num_bytes: Union[int, float]) -> str: diff --git a/archivebox/extractors/__init__.py b/archivebox/extractors/__init__.py index 09b56c66..7c71f241 100644 --- a/archivebox/extractors/__init__.py +++ b/archivebox/extractors/__init__.py @@ -4,7 +4,7 @@ import os from pathlib import Path from typing import Optional, List, Iterable, Union -from datetime import datetime +from datetime import datetime, timezone from django.db.models import QuerySet from ..index.schema import Link @@ -94,7 +94,7 @@ def archive_link(link: Link, overwrite: bool=False, methods: Optional[Iterable[s link = load_link_details(link, out_dir=out_dir) write_link_details(link, out_dir=out_dir, skip_sql_index=False) log_link_archiving_started(link, out_dir, is_new) - link = link.overwrite(updated=datetime.now()) + link = link.overwrite(updated=datetime.now(timezone.utc)) stats = {'skipped': 0, 'succeeded': 0, 'failed': 0} for method_name, should_run, method_function in ARCHIVE_METHODS: diff --git a/archivebox/extractors/readability.py b/archivebox/extractors/readability.py index d7c1e303..bc6d6656 100644 --- a/archivebox/extractors/readability.py +++ b/archivebox/extractors/readability.py @@ -92,6 +92,7 @@ def save_readability(link: Link, out_dir: Optional[str]=None, timeout: int=TIMEO result = run(cmd, cwd=out_dir, timeout=timeout) try: result_json = json.loads(result.stdout) + assert result_json and 'content' in result_json except json.JSONDecodeError: raise ArchiveError('Readability was not able to archive the page', result.stdout + result.stderr) diff --git a/archivebox/extractors/wget.py b/archivebox/extractors/wget.py index 4d04f673..d4e09aa3 100644 --- a/archivebox/extractors/wget.py +++ b/archivebox/extractors/wget.py @@ -4,7 +4,7 @@ import re from pathlib import Path from typing import Optional -from datetime import datetime +from datetime import datetime, timezone from ..index.schema import Link, ArchiveResult, ArchiveOutput, ArchiveError from ..system import run, chmod_file @@ -51,7 +51,7 @@ def save_wget(link: Link, out_dir: Optional[Path]=None, timeout: int=TIMEOUT) -> if SAVE_WARC: warc_dir = out_dir / "warc" warc_dir.mkdir(exist_ok=True) - warc_path = warc_dir / str(int(datetime.now().timestamp())) + warc_path = warc_dir / str(int(datetime.now(timezone.utc).timestamp())) # WGET CLI Docs: https://www.gnu.org/software/wget/manual/wget.html output: ArchiveOutput = None diff --git a/archivebox/index/html.py b/archivebox/index/html.py index b584b876..d45f66ea 100644 --- a/archivebox/index/html.py +++ b/archivebox/index/html.py @@ -1,7 +1,7 @@ __package__ = 'archivebox.index' from pathlib import Path -from datetime import datetime +from datetime import datetime, timezone from collections import defaultdict from typing import List, Optional, Iterator, Mapping @@ -13,7 +13,7 @@ from ..system import atomic_write from ..logging_util import printable_filesize from ..util import ( enforce_types, - ts_to_date, + ts_to_date_str, urlencode, htmlencode, urldecode, @@ -62,8 +62,8 @@ def main_index_template(links: List[Link], template: str=MAIN_INDEX_TEMPLATE) -> 'version': VERSION, 'git_sha': VERSION, # not used anymore, but kept for backwards compatibility 'num_links': str(len(links)), - 'date_updated': datetime.now().strftime('%Y-%m-%d'), - 'time_updated': datetime.now().strftime('%Y-%m-%d %H:%M'), + 'date_updated': datetime.now(timezone.utc).strftime('%Y-%m-%d'), + 'time_updated': datetime.now(timezone.utc).strftime('%Y-%m-%d %H:%M'), 'links': [link._asdict(extended=True) for link in links], 'FOOTER_INFO': FOOTER_INFO, }) @@ -103,7 +103,7 @@ def link_details_template(link: Link) -> str: 'size': printable_filesize(link.archive_size) if link.archive_size else 'pending', 'status': 'archived' if link.is_archived else 'not yet archived', 'status_color': 'success' if link.is_archived else 'danger', - 'oldest_archive_date': ts_to_date(link.oldest_archive_date), + 'oldest_archive_date': ts_to_date_str(link.oldest_archive_date), 'SAVE_ARCHIVE_DOT_ORG': SAVE_ARCHIVE_DOT_ORG, }) @@ -120,7 +120,7 @@ def snapshot_icons(snapshot) -> str: def calc_snapshot_icons(): from core.models import EXTRACTORS - # start = datetime.now() + # start = datetime.now(timezone.utc) archive_results = snapshot.archiveresult_set.filter(status="succeeded", output__isnull=False) link = snapshot.as_link() @@ -183,7 +183,7 @@ def snapshot_icons(snapshot) -> str: "archive_org", icons.get("archive_org", "?")) result = format_html('{}', mark_safe(output)) - # end = datetime.now() + # end = datetime.now(timezone.utc) # print(((end - start).total_seconds()*1000) // 1, 'ms') return result diff --git a/archivebox/index/json.py b/archivebox/index/json.py index 441e6854..6d564ae8 100644 --- a/archivebox/index/json.py +++ b/archivebox/index/json.py @@ -5,7 +5,7 @@ import sys import json as pyjson from pathlib import Path -from datetime import datetime +from datetime import datetime, timezone from typing import List, Optional, Iterator, Any, Union from .schema import Link @@ -44,7 +44,7 @@ def generate_json_index_from_links(links: List[Link], with_headers: bool): output = { **MAIN_INDEX_HEADER, 'num_links': len(links), - 'updated': datetime.now(), + 'updated': datetime.now(timezone.utc), 'last_run_cmd': sys.argv, 'links': links, } diff --git a/archivebox/index/schema.py b/archivebox/index/schema.py index 00831e19..480e9c7f 100644 --- a/archivebox/index/schema.py +++ b/archivebox/index/schema.py @@ -10,7 +10,7 @@ __package__ = 'archivebox.index' from pathlib import Path -from datetime import datetime, timedelta +from datetime import datetime, timezone, timedelta from typing import List, Dict, Any, Optional, Union @@ -19,7 +19,7 @@ from dataclasses import dataclass, asdict, field, fields from django.utils.functional import cached_property from ..system import get_dir_size - +from ..util import ts_to_date_str, parse_date from ..config import OUTPUT_DIR, ARCHIVE_DIR_NAME class ArchiveError(Exception): @@ -203,7 +203,7 @@ class Link: 'extension': self.extension, 'is_static': self.is_static, - 'tags_str': self.tags, # only used to render static index in index/html.py, remove if no longer needed there + 'tags_str': (self.tags or '').strip(','), # only used to render static index in index/html.py, remove if no longer needed there 'icons': None, # only used to render static index in index/html.py, remove if no longer needed there 'bookmarked_date': self.bookmarked_date, @@ -325,13 +325,11 @@ class Link: ### Pretty Printing Helpers @property def bookmarked_date(self) -> Optional[str]: - from ..util import ts_to_date - - max_ts = (datetime.now() + timedelta(days=30)).timestamp() + max_ts = (datetime.now(timezone.utc) + timedelta(days=30)).timestamp() if self.timestamp and self.timestamp.replace('.', '').isdigit(): if 0 < float(self.timestamp) < max_ts: - return ts_to_date(datetime.fromtimestamp(float(self.timestamp))) + return ts_to_date_str(datetime.fromtimestamp(float(self.timestamp))) else: return str(self.timestamp) return None @@ -339,13 +337,12 @@ class Link: @property def updated_date(self) -> Optional[str]: - from ..util import ts_to_date - return ts_to_date(self.updated) if self.updated else None + return ts_to_date_str(self.updated) if self.updated else None @property def archive_dates(self) -> List[datetime]: return [ - result.start_ts + parse_date(result.start_ts) for method in self.history.keys() for result in self.history[method] ] diff --git a/archivebox/logging_util.py b/archivebox/logging_util.py index 92a0f61d..6cb34f47 100644 --- a/archivebox/logging_util.py +++ b/archivebox/logging_util.py @@ -10,7 +10,7 @@ from math import log from multiprocessing import Process from pathlib import Path -from datetime import datetime +from datetime import datetime, timezone from dataclasses import dataclass from typing import Any, Optional, List, Dict, Union, IO, TYPE_CHECKING @@ -138,17 +138,19 @@ class TimedProgress: """Show a progress bar and measure elapsed time until .end() is called""" def __init__(self, seconds, prefix=''): + self.SHOW_PROGRESS = SHOW_PROGRESS if self.SHOW_PROGRESS: self.p = Process(target=progress_bar, args=(seconds, prefix)) self.p.start() - self.stats = {'start_ts': datetime.now(), 'end_ts': None} + self.stats = {'start_ts': datetime.now(timezone.utc), 'end_ts': None} def end(self): """immediately end progress, clear the progressbar line, and save end_ts""" - end_ts = datetime.now() + + end_ts = datetime.now(timezone.utc) self.stats['end_ts'] = end_ts if self.SHOW_PROGRESS: @@ -231,7 +233,7 @@ def progress_bar(seconds: int, prefix: str='') -> None: def log_cli_command(subcommand: str, subcommand_args: List[str], stdin: Optional[str], pwd: str): cmd = ' '.join(('archivebox', subcommand, *subcommand_args)) stderr('{black}[i] [{now}] ArchiveBox v{VERSION}: {cmd}{reset}'.format( - now=datetime.now().strftime('%Y-%m-%d %H:%M:%S'), + now=datetime.now(timezone.utc).strftime('%Y-%m-%d %H:%M:%S'), VERSION=VERSION, cmd=cmd, **ANSI, @@ -243,7 +245,7 @@ def log_cli_command(subcommand: str, subcommand_args: List[str], stdin: Optional def log_importing_started(urls: Union[str, List[str]], depth: int, index_only: bool): - _LAST_RUN_STATS.parse_start_ts = datetime.now() + _LAST_RUN_STATS.parse_start_ts = datetime.now(timezone.utc) print('{green}[+] [{}] Adding {} links to index (crawl depth={}){}...{reset}'.format( _LAST_RUN_STATS.parse_start_ts.strftime('%Y-%m-%d %H:%M:%S'), len(urls) if isinstance(urls, list) else len(urls.split('\n')), @@ -256,7 +258,7 @@ def log_source_saved(source_file: str): print(' > Saved verbatim input to {}/{}'.format(SOURCES_DIR_NAME, source_file.rsplit('/', 1)[-1])) def log_parsing_finished(num_parsed: int, parser_name: str): - _LAST_RUN_STATS.parse_end_ts = datetime.now() + _LAST_RUN_STATS.parse_end_ts = datetime.now(timezone.utc) print(' > Parsed {} URLs from input ({})'.format(num_parsed, parser_name)) def log_deduping_finished(num_new_links: int): @@ -270,7 +272,7 @@ def log_crawl_started(new_links): ### Indexing Stage def log_indexing_process_started(num_links: int): - start_ts = datetime.now() + start_ts = datetime.now(timezone.utc) _LAST_RUN_STATS.index_start_ts = start_ts print() print('{black}[*] [{}] Writing {} links to main index...{reset}'.format( @@ -281,7 +283,7 @@ def log_indexing_process_started(num_links: int): def log_indexing_process_finished(): - end_ts = datetime.now() + end_ts = datetime.now(timezone.utc) _LAST_RUN_STATS.index_end_ts = end_ts @@ -297,7 +299,8 @@ def log_indexing_finished(out_path: str): ### Archiving Stage def log_archiving_started(num_links: int, resume: Optional[float]=None): - start_ts = datetime.now() + + start_ts = datetime.now(timezone.utc) _LAST_RUN_STATS.archiving_start_ts = start_ts print() if resume: @@ -315,7 +318,8 @@ def log_archiving_started(num_links: int, resume: Optional[float]=None): )) def log_archiving_paused(num_links: int, idx: int, timestamp: str): - end_ts = datetime.now() + + end_ts = datetime.now(timezone.utc) _LAST_RUN_STATS.archiving_end_ts = end_ts print() print('\n{lightyellow}[X] [{now}] Downloading paused on link {timestamp} ({idx}/{total}){reset}'.format( @@ -330,7 +334,8 @@ def log_archiving_paused(num_links: int, idx: int, timestamp: str): print(' archivebox update --resume={}'.format(timestamp)) def log_archiving_finished(num_links: int): - end_ts = datetime.now() + + end_ts = datetime.now(timezone.utc) _LAST_RUN_STATS.archiving_end_ts = end_ts assert _LAST_RUN_STATS.archiving_start_ts is not None seconds = end_ts.timestamp() - _LAST_RUN_STATS.archiving_start_ts.timestamp() @@ -356,6 +361,7 @@ def log_archiving_finished(num_links: int): def log_link_archiving_started(link: "Link", link_dir: str, is_new: bool): + # [*] [2019-03-22 13:46:45] "Log Structured Merge Trees - ben stopford" # http://www.benstopford.com/2015/02/14/log-structured-merge-trees/ # > output/archive/1478739709 @@ -363,7 +369,7 @@ def log_link_archiving_started(link: "Link", link_dir: str, is_new: bool): print('\n[{symbol_color}{symbol}{reset}] [{symbol_color}{now}{reset}] "{title}"'.format( symbol_color=ANSI['green' if is_new else 'black'], symbol='+' if is_new else '√', - now=datetime.now().strftime('%Y-%m-%d %H:%M:%S'), + now=datetime.now(timezone.utc).strftime('%Y-%m-%d %H:%M:%S'), title=link.title or link.base_url, **ANSI, )) diff --git a/archivebox/main.py b/archivebox/main.py index 3af26e5d..fa13dc34 100644 --- a/archivebox/main.py +++ b/archivebox/main.py @@ -585,6 +585,7 @@ def add(urls: Union[str, List[str]], update_all: bool=not ONLY_NEW, index_only: bool=False, overwrite: bool=False, + # duplicate: bool=False, # TODO: reuse the logic from admin.py resnapshot to allow adding multiple snapshots by appending timestamp automatically init: bool=False, extractors: str="", parser: str="auto", diff --git a/archivebox/parsers/__init__.py b/archivebox/parsers/__init__.py index d040b23a..2451f0f5 100644 --- a/archivebox/parsers/__init__.py +++ b/archivebox/parsers/__init__.py @@ -11,7 +11,7 @@ import re from io import StringIO from typing import IO, Tuple, List, Optional -from datetime import datetime +from datetime import datetime, timezone from pathlib import Path from ..system import atomic_write @@ -147,7 +147,7 @@ def run_parser_functions(to_parse: IO[str], timer, root_url: Optional[str]=None, @enforce_types def save_text_as_source(raw_text: str, filename: str='{ts}-stdin.txt', out_dir: Path=OUTPUT_DIR) -> str: - ts = str(datetime.now().timestamp()).split('.', 1)[0] + ts = str(datetime.now(timezone.utc).timestamp()).split('.', 1)[0] source_path = str(out_dir / SOURCES_DIR_NAME / filename.format(ts=ts)) atomic_write(source_path, raw_text) log_source_saved(source_file=source_path) @@ -157,7 +157,7 @@ def save_text_as_source(raw_text: str, filename: str='{ts}-stdin.txt', out_dir: @enforce_types def save_file_as_source(path: str, timeout: int=TIMEOUT, filename: str='{ts}-{basename}.txt', out_dir: Path=OUTPUT_DIR) -> str: """download a given url's content into output/sources/domain-.txt""" - ts = str(datetime.now().timestamp()).split('.', 1)[0] + ts = str(datetime.now(timezone.utc).timestamp()).split('.', 1)[0] source_path = str(OUTPUT_DIR / SOURCES_DIR_NAME / filename.format(basename=basename(path), ts=ts)) if any(path.startswith(s) for s in ('http://', 'https://', 'ftp://')): diff --git a/archivebox/parsers/generic_html.py b/archivebox/parsers/generic_html.py index 6950dc1d..95adb018 100644 --- a/archivebox/parsers/generic_html.py +++ b/archivebox/parsers/generic_html.py @@ -4,7 +4,7 @@ __package__ = 'archivebox.parsers' import re from typing import IO, Iterable, Optional -from datetime import datetime +from datetime import datetime, timezone from ..index.schema import Link from ..util import ( @@ -46,7 +46,7 @@ def parse_generic_html_export(html_file: IO[str], root_url: Optional[str]=None, for archivable_url in re.findall(URL_REGEX, url): yield Link( url=htmldecode(archivable_url), - timestamp=str(datetime.now().timestamp()), + timestamp=str(datetime.now(timezone.utc).timestamp()), title=None, tags=None, sources=[html_file.name], diff --git a/archivebox/parsers/generic_json.py b/archivebox/parsers/generic_json.py index fff4d712..0466b0f6 100644 --- a/archivebox/parsers/generic_json.py +++ b/archivebox/parsers/generic_json.py @@ -3,7 +3,7 @@ __package__ = 'archivebox.parsers' import json from typing import IO, Iterable -from datetime import datetime +from datetime import datetime, timezone from ..index.schema import Link from ..util import ( @@ -30,7 +30,7 @@ def parse_generic_json_export(json_file: IO[str], **_kwargs) -> Iterable[Link]: raise Exception('JSON must contain URL in each entry [{"url": "http://...", ...}, ...]') # Parse the timestamp - ts_str = str(datetime.now().timestamp()) + ts_str = str(datetime.now(timezone.utc).timestamp()) if link.get('timestamp'): # chrome/ff histories use a very precise timestamp ts_str = str(link['timestamp'] / 10000000) diff --git a/archivebox/parsers/generic_txt.py b/archivebox/parsers/generic_txt.py index a7ed8d54..80d97cf5 100644 --- a/archivebox/parsers/generic_txt.py +++ b/archivebox/parsers/generic_txt.py @@ -4,7 +4,7 @@ __description__ = 'Plain Text' import re from typing import IO, Iterable -from datetime import datetime +from datetime import datetime, timezone from pathlib import Path from ..index.schema import Link @@ -29,7 +29,7 @@ def parse_generic_txt_export(text_file: IO[str], **_kwargs) -> Iterable[Link]: if Path(line).exists(): yield Link( url=line, - timestamp=str(datetime.now().timestamp()), + timestamp=str(datetime.now(timezone.utc).timestamp()), title=None, tags=None, sources=[text_file.name], @@ -42,7 +42,7 @@ def parse_generic_txt_export(text_file: IO[str], **_kwargs) -> Iterable[Link]: for url in re.findall(URL_REGEX, line): yield Link( url=htmldecode(url), - timestamp=str(datetime.now().timestamp()), + timestamp=str(datetime.now(timezone.utc).timestamp()), title=None, tags=None, sources=[text_file.name], @@ -54,7 +54,7 @@ def parse_generic_txt_export(text_file: IO[str], **_kwargs) -> Iterable[Link]: for sub_url in re.findall(URL_REGEX, line[1:]): yield Link( url=htmldecode(sub_url), - timestamp=str(datetime.now().timestamp()), + timestamp=str(datetime.now(timezone.utc).timestamp()), title=None, tags=None, sources=[text_file.name], diff --git a/archivebox/parsers/pinboard_rss.py b/archivebox/parsers/pinboard_rss.py index 17d1025e..b7a77a00 100644 --- a/archivebox/parsers/pinboard_rss.py +++ b/archivebox/parsers/pinboard_rss.py @@ -2,7 +2,7 @@ __package__ = 'archivebox.parsers' from typing import IO, Iterable -from datetime import datetime +from datetime import datetime, timezone from xml.etree import ElementTree @@ -36,7 +36,7 @@ def parse_pinboard_rss_export(rss_file: IO[str], **_kwargs) -> Iterable[Link]: if ts_str: time = datetime.strptime(ts_str, "%Y-%m-%dT%H:%M:%S%z") else: - time = datetime.now() + time = datetime.now(timezone.utc) yield Link( url=htmldecode(url), diff --git a/archivebox/parsers/url_list.py b/archivebox/parsers/url_list.py index 66e3961c..e9a7bbb3 100644 --- a/archivebox/parsers/url_list.py +++ b/archivebox/parsers/url_list.py @@ -4,7 +4,7 @@ __description__ = 'URL list' import re from typing import IO, Iterable -from datetime import datetime +from datetime import datetime, timezone from ..index.schema import Link from ..util import ( @@ -25,7 +25,7 @@ def parse_url_list(text_file: IO[str], **_kwargs) -> Iterable[Link]: yield Link( url=url, - timestamp=str(datetime.now().timestamp()), + timestamp=str(datetime.now(timezone.utc).timestamp()), title=None, tags=None, sources=[text_file.name], diff --git a/archivebox/templates/admin/base.html b/archivebox/templates/admin/base.html index 436318ea..9dc62516 100644 --- a/archivebox/templates/admin/base.html +++ b/archivebox/templates/admin/base.html @@ -4,228 +4,228 @@ - -{% block title %}{% endblock %} | ArchiveBox - -{% block extrastyle %}{% endblock %} -{% if LANGUAGE_BIDI %}{% endif %} -{% block extrahead %}{% endblock %} -{% block responsive %} - - - {% if LANGUAGE_BIDI %}{% endif %} -{% endblock %} -{% block blockbots %}{% endblock %} - - -{% load i18n %} - - - - - - - - - -
- - {% if not is_popup %} - - - - {% block breadcrumbs %} - + + {{obj.title|default:'Not yet archived...'}} + + +
{% endfor %} - - + +
{% endblock %} diff --git a/archivebox/templates/core/add.html b/archivebox/templates/core/add.html index 4315ee76..978567a3 100644 --- a/archivebox/templates/core/add.html +++ b/archivebox/templates/core/add.html @@ -49,7 +49,7 @@ - - - + + + + + + {% block extra_head %} + {% endblock %}
@@ -48,6 +30,7 @@
{% block body %} + {% endblock %}
{% block footer %} diff --git a/archivebox/templates/core/index_row.html b/archivebox/templates/core/index_row.html index ba34a8c6..bfeed9fa 100644 --- a/archivebox/templates/core/index_row.html +++ b/archivebox/templates/core/index_row.html @@ -1,37 +1,44 @@ -{% load static %} +{% load static tz core_tags %} - {% if link.bookmarked_date %} {{ link.bookmarked_date }} {% else %} {{ link.added }} {% endif %} - + + {{ link.added|localtime }} + + {% if link.is_archived %} - + {% else %} - + {% endif %} - - {{link.title|default:'Loading...'|truncatechars:128}} + + + {{link.title|default:'Loading...'|truncatechars:128}} + {% if link.tags_str %} - - {% if link.tags_str != None %} - {{link.tags_str|default:''}} - {% else %} - {{ link.tags|default:'' }} - {% endif %} - + {% for tag in link.tags_str|split:',' %} + + {{tag}} + + {% endfor %} {% endif %} {% if link.icons %} - {{link.icons}} {{link.num_outputs}} + {{link.icons}}  {{link.num_outputs}} {% else %} - 📄 + + 📄   {{link.num_outputs}} {% endif %} - {{link.url|truncatechars:128}} + + + {{link.url}} + + diff --git a/archivebox/templates/core/progressbar.html b/archivebox/templates/core/progressbar.html new file mode 100644 index 00000000..34d6ce98 --- /dev/null +++ b/archivebox/templates/core/progressbar.html @@ -0,0 +1,45 @@ + + diff --git a/archivebox/templates/core/public_index.html b/archivebox/templates/core/public_index.html index c414cbf8..57bb802c 100644 --- a/archivebox/templates/core/public_index.html +++ b/archivebox/templates/core/public_index.html @@ -1,12 +1,7 @@ {% extends "base.html" %} -{% load static %} +{% load static tz %} {% block body %} -
- - - - - - - - - +
+
BookmarkedSnapshot ({{page_obj.paginator.count}})FilesOriginal URL
+ + + + + + + + {% for link in object_list %} {% include 'index_row.html' with link=link %} {% endfor %}
BookmarkedSnapshot ({{page_obj.paginator.count}})FilesOriginal URL
-
-
- Showing {{ page_obj.start_index }}-{{ page_obj.end_index }} of {{ page_obj.paginator.count }} total -
- - {% if page_obj.has_previous %} - « first   - previous -   - {% endif %} - - - Page {{ page_obj.number }} of {{ page_obj.paginator.num_pages }} - - - {% if page_obj.has_next %} -   - next   - last » - {% endif %} + +
+
+ Showing {{ page_obj.start_index }}-{{ page_obj.end_index }} of {{ page_obj.paginator.count }} total +
+ + {% if page_obj.has_previous %} + « first   + previous +   + {% endif %} + + + Page {{ page_obj.number }} of {{ page_obj.paginator.num_pages }} + + {% if page_obj.has_next %} +   + next   + last » + {% endif %}
diff --git a/archivebox/templates/core/snapshot.html b/archivebox/templates/core/snapshot.html index b4d34fd1..d562d038 100644 --- a/archivebox/templates/core/snapshot.html +++ b/archivebox/templates/core/snapshot.html @@ -1,3 +1,5 @@ +{% load tz core_tags %} + @@ -20,7 +22,6 @@ } header { background-color: #aa1e55; - padding-bottom: 12px; } small { font-weight: 200; @@ -34,15 +35,15 @@ min-height: 40px; margin: 0px; text-align: center; - color: white; - font-size: calc(11px + 0.84vw); + color: #f6f6f6; + font-size: calc(10px + 0.84vw); font-weight: 200; - padding: 4px 4px; + padding: 3px 4px; background-color: #aa1e55; } .nav > div { min-height: 30px; - line-height: 1.3; + line-height: 1.2; } .header-top a { text-decoration: none; @@ -68,9 +69,14 @@ .header-archivebox img:hover { opacity: 0.5; } - .header-url small { + header small code { white-space: nowrap; font-weight: 200; + display: block; + margin-top: -1px; + font-size: 13px; + opacity: 0.8; + user-select: all; } .header-url img { height: 20px; @@ -90,28 +96,38 @@ .info-row .alert { margin-bottom: 0px; } + .row.header-bottom { + margin-left: -10px; + margin-right: -10px; + } + .header-bottom .col-lg-2 { + padding-left: 4px; + padding-right: 4px; + } + .header-bottom-frames .card { - overflow: hidden; box-shadow: 2px 3px 14px 0px rgba(0,0,0,0.02); - margin-top: 10px; + margin-bottom: 5px; border: 1px solid rgba(0,0,0,3); - border-radius: 14px; + border-radius: 10px; background-color: black; + overflow: hidden; } .card h4 { font-size: 1.4vw; } .card-body { - font-size: 15px; + font-size: 14px; padding: 13px 10px; - padding-bottom: 6px; + padding-bottom: 1px; /* padding-left: 3px; */ /* padding-right: 3px; */ /* padding-bottom: 3px; */ - line-height: 1.1; + line-height: 1; word-wrap: break-word; max-height: 102px; overflow: hidden; + text-overflow: ellipsis; background-color: #1a1a1a; color: #d3d3d3; } @@ -146,22 +162,12 @@ border-top: 3px solid #aa1e55; } .card.selected-card { - border: 1px solid orange; + border: 2px solid orange; box-shadow: 0px -6px 13px 1px rgba(0,0,0,0.05); } .iframe-large { height: calc(100% - 40px); } - .pdf-frame { - transform: none; - width: 100%; - height: 160px; - margin-top: -60px; - margin-bottom: 0px; - transform: scale(1.1); - width: 100%; - margin-left: -10%; - } img.external { height: 30px; margin-right: -10px; @@ -185,7 +191,7 @@ } .header-bottom { border-top: 1px solid rgba(170, 30, 85, 0.9); - padding-bottom: 12px; + padding-bottom: 1px; border-bottom: 5px solid rgb(170, 30, 85); margin-bottom: -1px; @@ -215,10 +221,11 @@ } .info-chunk { width: auto; - display:inline-block; + display: inline-block; text-align: center; - margin: 10px 10px; + margin: 8px 4px; vertical-align: top; + font-size: 14px; } .info-chunk .badge { margin-top: 5px; @@ -226,13 +233,12 @@ .header-bottom-frames .card-title { width: 100%; text-align: center; - font-size: 18px; - margin-bottom: 5px; + font-size: 17px; + margin-bottom: 0px; display: inline-block; color: #d3d3d3; font-weight: 200; - vertical-align: 0px; - margin-top: -6px; + vertical-align: 3px; } .header-bottom-frames .card-text { width: 100%; @@ -277,8 +283,7 @@