diff --git a/archivebox/config.py b/archivebox/config.py index b12cd59e..2ecc3415 100644 --- a/archivebox/config.py +++ b/archivebox/config.py @@ -34,7 +34,7 @@ import django from hashlib import md5 from pathlib import Path -from datetime import datetime +from datetime import datetime, timezone from typing import Optional, Type, Tuple, Dict, Union, List from subprocess import run, PIPE, DEVNULL from configparser import ConfigParser @@ -80,7 +80,8 @@ CONFIG_SCHEMA: Dict[str, ConfigDefaultDict] = { 'PUBLIC_ADD_VIEW': {'type': bool, 'default': False}, 'FOOTER_INFO': {'type': str, 'default': 'Content is hosted for personal archiving purposes only. Contact server owner for any takedown requests.'}, 'SNAPSHOTS_PER_PAGE': {'type': int, 'default': 40}, - 'CUSTOM_TEMPLATES_DIR': {'type': str, 'default': None} + 'CUSTOM_TEMPLATES_DIR': {'type': str, 'default': None}, + 'TIME_ZONE': {'type': str, 'default': 'UTC'}, }, 'ARCHIVE_METHOD_TOGGLES': { @@ -1105,7 +1106,7 @@ def setup_django(out_dir: Path=None, check_db=False, config: ConfigDict=CONFIG, # log startup message to the error log with open(settings.ERROR_LOG, "a+", encoding='utf-8') as f: command = ' '.join(sys.argv) - ts = datetime.now().strftime('%Y-%m-%d__%H:%M:%S') + ts = datetime.now(timezone.utc).strftime('%Y-%m-%d__%H:%M:%S') f.write(f"\n> {command}; ts={ts} version={config['VERSION']} docker={config['IN_DOCKER']} is_tty={config['IS_TTY']}\n") diff --git a/archivebox/core/admin.py b/archivebox/core/admin.py index a5bb1351..0329d9b0 100644 --- a/archivebox/core/admin.py +++ b/archivebox/core/admin.py @@ -3,6 +3,7 @@ __package__ = 'archivebox.core' from io import StringIO from pathlib import Path from contextlib import redirect_stdout +from datetime import datetime, timezone from django.contrib import admin from django.urls import path diff --git a/archivebox/core/settings.py b/archivebox/core/settings.py index fade85db..8bc44b60 100644 --- a/archivebox/core/settings.py +++ b/archivebox/core/settings.py @@ -19,9 +19,9 @@ from ..config import ( SQL_INDEX_FILENAME, OUTPUT_DIR, LOGS_DIR, + TIME_ZONE, ) - IS_MIGRATING = 'makemigrations' in sys.argv[:3] or 'migrate' in sys.argv[:3] IS_TESTING = 'test' in sys.argv[:3] or 'PYTEST_CURRENT_TEST' in os.environ IS_SHELL = 'shell' in sys.argv[:3] or 'shell_plus' in sys.argv[:3] @@ -154,6 +154,7 @@ DATABASES = { 'timeout': 60, 'check_same_thread': False, }, + 'TIME_ZONE': 'UTC', # DB setup is sometimes modified at runtime by setup_django() in config.py } } @@ -182,6 +183,7 @@ ALLOWED_HOSTS = ALLOWED_HOSTS.split(',') SECURE_BROWSER_XSS_FILTER = True SECURE_CONTENT_TYPE_NOSNIFF = True +SECURE_REFERRER_POLICY = 'strict-origin-when-cross-origin' CSRF_COOKIE_SECURE = False SESSION_COOKIE_SECURE = False @@ -217,14 +219,17 @@ if IS_SHELL: ################################################################################ LANGUAGE_CODE = 'en-us' -TIME_ZONE = 'UTC' -USE_I18N = False -USE_L10N = False -USE_TZ = False - +USE_I18N = True +USE_L10N = True +USE_TZ = True DATETIME_FORMAT = 'Y-m-d g:iA' SHORT_DATETIME_FORMAT = 'Y-m-d h:iA' +from django.conf.locale.en import formats as en_formats + +en_formats.DATETIME_FORMAT = DATETIME_FORMAT +en_formats.SHORT_DATETIME_FORMAT = SHORT_DATETIME_FORMAT + ################################################################################ ### Logging Settings diff --git a/archivebox/core/templatetags/core_tags.py b/archivebox/core/templatetags/core_tags.py index 9ac1ee27..4f53ac2a 100644 --- a/archivebox/core/templatetags/core_tags.py +++ b/archivebox/core/templatetags/core_tags.py @@ -1,22 +1,15 @@ from django import template -from django.urls import reverse from django.contrib.admin.templatetags.base import InclusionAdminNode -from django.templatetags.static import static from typing import Union -from core.models import ArchiveResult register = template.Library() -@register.simple_tag -def snapshot_image(snapshot): - result = ArchiveResult.objects.filter(snapshot=snapshot, extractor='screenshot', status='succeeded').first() - if result: - return reverse('Snapshot', args=[f'{str(snapshot.timestamp)}/{result.output}']) - - return static('archive.png') +@register.filter(name='split') +def split(value, separator: str=','): + return (value or '').split(separator) @register.filter def file_size(num_bytes: Union[int, float]) -> str: diff --git a/archivebox/extractors/__init__.py b/archivebox/extractors/__init__.py index 09b56c66..7c71f241 100644 --- a/archivebox/extractors/__init__.py +++ b/archivebox/extractors/__init__.py @@ -4,7 +4,7 @@ import os from pathlib import Path from typing import Optional, List, Iterable, Union -from datetime import datetime +from datetime import datetime, timezone from django.db.models import QuerySet from ..index.schema import Link @@ -94,7 +94,7 @@ def archive_link(link: Link, overwrite: bool=False, methods: Optional[Iterable[s link = load_link_details(link, out_dir=out_dir) write_link_details(link, out_dir=out_dir, skip_sql_index=False) log_link_archiving_started(link, out_dir, is_new) - link = link.overwrite(updated=datetime.now()) + link = link.overwrite(updated=datetime.now(timezone.utc)) stats = {'skipped': 0, 'succeeded': 0, 'failed': 0} for method_name, should_run, method_function in ARCHIVE_METHODS: diff --git a/archivebox/extractors/readability.py b/archivebox/extractors/readability.py index d7c1e303..bc6d6656 100644 --- a/archivebox/extractors/readability.py +++ b/archivebox/extractors/readability.py @@ -92,6 +92,7 @@ def save_readability(link: Link, out_dir: Optional[str]=None, timeout: int=TIMEO result = run(cmd, cwd=out_dir, timeout=timeout) try: result_json = json.loads(result.stdout) + assert result_json and 'content' in result_json except json.JSONDecodeError: raise ArchiveError('Readability was not able to archive the page', result.stdout + result.stderr) diff --git a/archivebox/extractors/wget.py b/archivebox/extractors/wget.py index 4d04f673..d4e09aa3 100644 --- a/archivebox/extractors/wget.py +++ b/archivebox/extractors/wget.py @@ -4,7 +4,7 @@ import re from pathlib import Path from typing import Optional -from datetime import datetime +from datetime import datetime, timezone from ..index.schema import Link, ArchiveResult, ArchiveOutput, ArchiveError from ..system import run, chmod_file @@ -51,7 +51,7 @@ def save_wget(link: Link, out_dir: Optional[Path]=None, timeout: int=TIMEOUT) -> if SAVE_WARC: warc_dir = out_dir / "warc" warc_dir.mkdir(exist_ok=True) - warc_path = warc_dir / str(int(datetime.now().timestamp())) + warc_path = warc_dir / str(int(datetime.now(timezone.utc).timestamp())) # WGET CLI Docs: https://www.gnu.org/software/wget/manual/wget.html output: ArchiveOutput = None diff --git a/archivebox/index/html.py b/archivebox/index/html.py index b584b876..d45f66ea 100644 --- a/archivebox/index/html.py +++ b/archivebox/index/html.py @@ -1,7 +1,7 @@ __package__ = 'archivebox.index' from pathlib import Path -from datetime import datetime +from datetime import datetime, timezone from collections import defaultdict from typing import List, Optional, Iterator, Mapping @@ -13,7 +13,7 @@ from ..system import atomic_write from ..logging_util import printable_filesize from ..util import ( enforce_types, - ts_to_date, + ts_to_date_str, urlencode, htmlencode, urldecode, @@ -62,8 +62,8 @@ def main_index_template(links: List[Link], template: str=MAIN_INDEX_TEMPLATE) -> 'version': VERSION, 'git_sha': VERSION, # not used anymore, but kept for backwards compatibility 'num_links': str(len(links)), - 'date_updated': datetime.now().strftime('%Y-%m-%d'), - 'time_updated': datetime.now().strftime('%Y-%m-%d %H:%M'), + 'date_updated': datetime.now(timezone.utc).strftime('%Y-%m-%d'), + 'time_updated': datetime.now(timezone.utc).strftime('%Y-%m-%d %H:%M'), 'links': [link._asdict(extended=True) for link in links], 'FOOTER_INFO': FOOTER_INFO, }) @@ -103,7 +103,7 @@ def link_details_template(link: Link) -> str: 'size': printable_filesize(link.archive_size) if link.archive_size else 'pending', 'status': 'archived' if link.is_archived else 'not yet archived', 'status_color': 'success' if link.is_archived else 'danger', - 'oldest_archive_date': ts_to_date(link.oldest_archive_date), + 'oldest_archive_date': ts_to_date_str(link.oldest_archive_date), 'SAVE_ARCHIVE_DOT_ORG': SAVE_ARCHIVE_DOT_ORG, }) @@ -120,7 +120,7 @@ def snapshot_icons(snapshot) -> str: def calc_snapshot_icons(): from core.models import EXTRACTORS - # start = datetime.now() + # start = datetime.now(timezone.utc) archive_results = snapshot.archiveresult_set.filter(status="succeeded", output__isnull=False) link = snapshot.as_link() @@ -183,7 +183,7 @@ def snapshot_icons(snapshot) -> str: "archive_org", icons.get("archive_org", "?")) result = format_html('{}', mark_safe(output)) - # end = datetime.now() + # end = datetime.now(timezone.utc) # print(((end - start).total_seconds()*1000) // 1, 'ms') return result diff --git a/archivebox/index/json.py b/archivebox/index/json.py index 441e6854..6d564ae8 100644 --- a/archivebox/index/json.py +++ b/archivebox/index/json.py @@ -5,7 +5,7 @@ import sys import json as pyjson from pathlib import Path -from datetime import datetime +from datetime import datetime, timezone from typing import List, Optional, Iterator, Any, Union from .schema import Link @@ -44,7 +44,7 @@ def generate_json_index_from_links(links: List[Link], with_headers: bool): output = { **MAIN_INDEX_HEADER, 'num_links': len(links), - 'updated': datetime.now(), + 'updated': datetime.now(timezone.utc), 'last_run_cmd': sys.argv, 'links': links, } diff --git a/archivebox/index/schema.py b/archivebox/index/schema.py index 00831e19..480e9c7f 100644 --- a/archivebox/index/schema.py +++ b/archivebox/index/schema.py @@ -10,7 +10,7 @@ __package__ = 'archivebox.index' from pathlib import Path -from datetime import datetime, timedelta +from datetime import datetime, timezone, timedelta from typing import List, Dict, Any, Optional, Union @@ -19,7 +19,7 @@ from dataclasses import dataclass, asdict, field, fields from django.utils.functional import cached_property from ..system import get_dir_size - +from ..util import ts_to_date_str, parse_date from ..config import OUTPUT_DIR, ARCHIVE_DIR_NAME class ArchiveError(Exception): @@ -203,7 +203,7 @@ class Link: 'extension': self.extension, 'is_static': self.is_static, - 'tags_str': self.tags, # only used to render static index in index/html.py, remove if no longer needed there + 'tags_str': (self.tags or '').strip(','), # only used to render static index in index/html.py, remove if no longer needed there 'icons': None, # only used to render static index in index/html.py, remove if no longer needed there 'bookmarked_date': self.bookmarked_date, @@ -325,13 +325,11 @@ class Link: ### Pretty Printing Helpers @property def bookmarked_date(self) -> Optional[str]: - from ..util import ts_to_date - - max_ts = (datetime.now() + timedelta(days=30)).timestamp() + max_ts = (datetime.now(timezone.utc) + timedelta(days=30)).timestamp() if self.timestamp and self.timestamp.replace('.', '').isdigit(): if 0 < float(self.timestamp) < max_ts: - return ts_to_date(datetime.fromtimestamp(float(self.timestamp))) + return ts_to_date_str(datetime.fromtimestamp(float(self.timestamp))) else: return str(self.timestamp) return None @@ -339,13 +337,12 @@ class Link: @property def updated_date(self) -> Optional[str]: - from ..util import ts_to_date - return ts_to_date(self.updated) if self.updated else None + return ts_to_date_str(self.updated) if self.updated else None @property def archive_dates(self) -> List[datetime]: return [ - result.start_ts + parse_date(result.start_ts) for method in self.history.keys() for result in self.history[method] ] diff --git a/archivebox/logging_util.py b/archivebox/logging_util.py index 92a0f61d..6cb34f47 100644 --- a/archivebox/logging_util.py +++ b/archivebox/logging_util.py @@ -10,7 +10,7 @@ from math import log from multiprocessing import Process from pathlib import Path -from datetime import datetime +from datetime import datetime, timezone from dataclasses import dataclass from typing import Any, Optional, List, Dict, Union, IO, TYPE_CHECKING @@ -138,17 +138,19 @@ class TimedProgress: """Show a progress bar and measure elapsed time until .end() is called""" def __init__(self, seconds, prefix=''): + self.SHOW_PROGRESS = SHOW_PROGRESS if self.SHOW_PROGRESS: self.p = Process(target=progress_bar, args=(seconds, prefix)) self.p.start() - self.stats = {'start_ts': datetime.now(), 'end_ts': None} + self.stats = {'start_ts': datetime.now(timezone.utc), 'end_ts': None} def end(self): """immediately end progress, clear the progressbar line, and save end_ts""" - end_ts = datetime.now() + + end_ts = datetime.now(timezone.utc) self.stats['end_ts'] = end_ts if self.SHOW_PROGRESS: @@ -231,7 +233,7 @@ def progress_bar(seconds: int, prefix: str='') -> None: def log_cli_command(subcommand: str, subcommand_args: List[str], stdin: Optional[str], pwd: str): cmd = ' '.join(('archivebox', subcommand, *subcommand_args)) stderr('{black}[i] [{now}] ArchiveBox v{VERSION}: {cmd}{reset}'.format( - now=datetime.now().strftime('%Y-%m-%d %H:%M:%S'), + now=datetime.now(timezone.utc).strftime('%Y-%m-%d %H:%M:%S'), VERSION=VERSION, cmd=cmd, **ANSI, @@ -243,7 +245,7 @@ def log_cli_command(subcommand: str, subcommand_args: List[str], stdin: Optional def log_importing_started(urls: Union[str, List[str]], depth: int, index_only: bool): - _LAST_RUN_STATS.parse_start_ts = datetime.now() + _LAST_RUN_STATS.parse_start_ts = datetime.now(timezone.utc) print('{green}[+] [{}] Adding {} links to index (crawl depth={}){}...{reset}'.format( _LAST_RUN_STATS.parse_start_ts.strftime('%Y-%m-%d %H:%M:%S'), len(urls) if isinstance(urls, list) else len(urls.split('\n')), @@ -256,7 +258,7 @@ def log_source_saved(source_file: str): print(' > Saved verbatim input to {}/{}'.format(SOURCES_DIR_NAME, source_file.rsplit('/', 1)[-1])) def log_parsing_finished(num_parsed: int, parser_name: str): - _LAST_RUN_STATS.parse_end_ts = datetime.now() + _LAST_RUN_STATS.parse_end_ts = datetime.now(timezone.utc) print(' > Parsed {} URLs from input ({})'.format(num_parsed, parser_name)) def log_deduping_finished(num_new_links: int): @@ -270,7 +272,7 @@ def log_crawl_started(new_links): ### Indexing Stage def log_indexing_process_started(num_links: int): - start_ts = datetime.now() + start_ts = datetime.now(timezone.utc) _LAST_RUN_STATS.index_start_ts = start_ts print() print('{black}[*] [{}] Writing {} links to main index...{reset}'.format( @@ -281,7 +283,7 @@ def log_indexing_process_started(num_links: int): def log_indexing_process_finished(): - end_ts = datetime.now() + end_ts = datetime.now(timezone.utc) _LAST_RUN_STATS.index_end_ts = end_ts @@ -297,7 +299,8 @@ def log_indexing_finished(out_path: str): ### Archiving Stage def log_archiving_started(num_links: int, resume: Optional[float]=None): - start_ts = datetime.now() + + start_ts = datetime.now(timezone.utc) _LAST_RUN_STATS.archiving_start_ts = start_ts print() if resume: @@ -315,7 +318,8 @@ def log_archiving_started(num_links: int, resume: Optional[float]=None): )) def log_archiving_paused(num_links: int, idx: int, timestamp: str): - end_ts = datetime.now() + + end_ts = datetime.now(timezone.utc) _LAST_RUN_STATS.archiving_end_ts = end_ts print() print('\n{lightyellow}[X] [{now}] Downloading paused on link {timestamp} ({idx}/{total}){reset}'.format( @@ -330,7 +334,8 @@ def log_archiving_paused(num_links: int, idx: int, timestamp: str): print(' archivebox update --resume={}'.format(timestamp)) def log_archiving_finished(num_links: int): - end_ts = datetime.now() + + end_ts = datetime.now(timezone.utc) _LAST_RUN_STATS.archiving_end_ts = end_ts assert _LAST_RUN_STATS.archiving_start_ts is not None seconds = end_ts.timestamp() - _LAST_RUN_STATS.archiving_start_ts.timestamp() @@ -356,6 +361,7 @@ def log_archiving_finished(num_links: int): def log_link_archiving_started(link: "Link", link_dir: str, is_new: bool): + # [*] [2019-03-22 13:46:45] "Log Structured Merge Trees - ben stopford" # http://www.benstopford.com/2015/02/14/log-structured-merge-trees/ # > output/archive/1478739709 @@ -363,7 +369,7 @@ def log_link_archiving_started(link: "Link", link_dir: str, is_new: bool): print('\n[{symbol_color}{symbol}{reset}] [{symbol_color}{now}{reset}] "{title}"'.format( symbol_color=ANSI['green' if is_new else 'black'], symbol='+' if is_new else '√', - now=datetime.now().strftime('%Y-%m-%d %H:%M:%S'), + now=datetime.now(timezone.utc).strftime('%Y-%m-%d %H:%M:%S'), title=link.title or link.base_url, **ANSI, )) diff --git a/archivebox/main.py b/archivebox/main.py index 3af26e5d..fa13dc34 100644 --- a/archivebox/main.py +++ b/archivebox/main.py @@ -585,6 +585,7 @@ def add(urls: Union[str, List[str]], update_all: bool=not ONLY_NEW, index_only: bool=False, overwrite: bool=False, + # duplicate: bool=False, # TODO: reuse the logic from admin.py resnapshot to allow adding multiple snapshots by appending timestamp automatically init: bool=False, extractors: str="", parser: str="auto", diff --git a/archivebox/parsers/__init__.py b/archivebox/parsers/__init__.py index d040b23a..2451f0f5 100644 --- a/archivebox/parsers/__init__.py +++ b/archivebox/parsers/__init__.py @@ -11,7 +11,7 @@ import re from io import StringIO from typing import IO, Tuple, List, Optional -from datetime import datetime +from datetime import datetime, timezone from pathlib import Path from ..system import atomic_write @@ -147,7 +147,7 @@ def run_parser_functions(to_parse: IO[str], timer, root_url: Optional[str]=None, @enforce_types def save_text_as_source(raw_text: str, filename: str='{ts}-stdin.txt', out_dir: Path=OUTPUT_DIR) -> str: - ts = str(datetime.now().timestamp()).split('.', 1)[0] + ts = str(datetime.now(timezone.utc).timestamp()).split('.', 1)[0] source_path = str(out_dir / SOURCES_DIR_NAME / filename.format(ts=ts)) atomic_write(source_path, raw_text) log_source_saved(source_file=source_path) @@ -157,7 +157,7 @@ def save_text_as_source(raw_text: str, filename: str='{ts}-stdin.txt', out_dir: @enforce_types def save_file_as_source(path: str, timeout: int=TIMEOUT, filename: str='{ts}-{basename}.txt', out_dir: Path=OUTPUT_DIR) -> str: """download a given url's content into output/sources/domain-.txt""" - ts = str(datetime.now().timestamp()).split('.', 1)[0] + ts = str(datetime.now(timezone.utc).timestamp()).split('.', 1)[0] source_path = str(OUTPUT_DIR / SOURCES_DIR_NAME / filename.format(basename=basename(path), ts=ts)) if any(path.startswith(s) for s in ('http://', 'https://', 'ftp://')): diff --git a/archivebox/parsers/generic_html.py b/archivebox/parsers/generic_html.py index 6950dc1d..95adb018 100644 --- a/archivebox/parsers/generic_html.py +++ b/archivebox/parsers/generic_html.py @@ -4,7 +4,7 @@ __package__ = 'archivebox.parsers' import re from typing import IO, Iterable, Optional -from datetime import datetime +from datetime import datetime, timezone from ..index.schema import Link from ..util import ( @@ -46,7 +46,7 @@ def parse_generic_html_export(html_file: IO[str], root_url: Optional[str]=None, for archivable_url in re.findall(URL_REGEX, url): yield Link( url=htmldecode(archivable_url), - timestamp=str(datetime.now().timestamp()), + timestamp=str(datetime.now(timezone.utc).timestamp()), title=None, tags=None, sources=[html_file.name], diff --git a/archivebox/parsers/generic_json.py b/archivebox/parsers/generic_json.py index fff4d712..0466b0f6 100644 --- a/archivebox/parsers/generic_json.py +++ b/archivebox/parsers/generic_json.py @@ -3,7 +3,7 @@ __package__ = 'archivebox.parsers' import json from typing import IO, Iterable -from datetime import datetime +from datetime import datetime, timezone from ..index.schema import Link from ..util import ( @@ -30,7 +30,7 @@ def parse_generic_json_export(json_file: IO[str], **_kwargs) -> Iterable[Link]: raise Exception('JSON must contain URL in each entry [{"url": "http://...", ...}, ...]') # Parse the timestamp - ts_str = str(datetime.now().timestamp()) + ts_str = str(datetime.now(timezone.utc).timestamp()) if link.get('timestamp'): # chrome/ff histories use a very precise timestamp ts_str = str(link['timestamp'] / 10000000) diff --git a/archivebox/parsers/generic_txt.py b/archivebox/parsers/generic_txt.py index a7ed8d54..80d97cf5 100644 --- a/archivebox/parsers/generic_txt.py +++ b/archivebox/parsers/generic_txt.py @@ -4,7 +4,7 @@ __description__ = 'Plain Text' import re from typing import IO, Iterable -from datetime import datetime +from datetime import datetime, timezone from pathlib import Path from ..index.schema import Link @@ -29,7 +29,7 @@ def parse_generic_txt_export(text_file: IO[str], **_kwargs) -> Iterable[Link]: if Path(line).exists(): yield Link( url=line, - timestamp=str(datetime.now().timestamp()), + timestamp=str(datetime.now(timezone.utc).timestamp()), title=None, tags=None, sources=[text_file.name], @@ -42,7 +42,7 @@ def parse_generic_txt_export(text_file: IO[str], **_kwargs) -> Iterable[Link]: for url in re.findall(URL_REGEX, line): yield Link( url=htmldecode(url), - timestamp=str(datetime.now().timestamp()), + timestamp=str(datetime.now(timezone.utc).timestamp()), title=None, tags=None, sources=[text_file.name], @@ -54,7 +54,7 @@ def parse_generic_txt_export(text_file: IO[str], **_kwargs) -> Iterable[Link]: for sub_url in re.findall(URL_REGEX, line[1:]): yield Link( url=htmldecode(sub_url), - timestamp=str(datetime.now().timestamp()), + timestamp=str(datetime.now(timezone.utc).timestamp()), title=None, tags=None, sources=[text_file.name], diff --git a/archivebox/parsers/pinboard_rss.py b/archivebox/parsers/pinboard_rss.py index 17d1025e..b7a77a00 100644 --- a/archivebox/parsers/pinboard_rss.py +++ b/archivebox/parsers/pinboard_rss.py @@ -2,7 +2,7 @@ __package__ = 'archivebox.parsers' from typing import IO, Iterable -from datetime import datetime +from datetime import datetime, timezone from xml.etree import ElementTree @@ -36,7 +36,7 @@ def parse_pinboard_rss_export(rss_file: IO[str], **_kwargs) -> Iterable[Link]: if ts_str: time = datetime.strptime(ts_str, "%Y-%m-%dT%H:%M:%S%z") else: - time = datetime.now() + time = datetime.now(timezone.utc) yield Link( url=htmldecode(url), diff --git a/archivebox/parsers/url_list.py b/archivebox/parsers/url_list.py index 66e3961c..e9a7bbb3 100644 --- a/archivebox/parsers/url_list.py +++ b/archivebox/parsers/url_list.py @@ -4,7 +4,7 @@ __description__ = 'URL list' import re from typing import IO, Iterable -from datetime import datetime +from datetime import datetime, timezone from ..index.schema import Link from ..util import ( @@ -25,7 +25,7 @@ def parse_url_list(text_file: IO[str], **_kwargs) -> Iterable[Link]: yield Link( url=url, - timestamp=str(datetime.now().timestamp()), + timestamp=str(datetime.now(timezone.utc).timestamp()), title=None, tags=None, sources=[text_file.name], diff --git a/archivebox/templates/admin/base.html b/archivebox/templates/admin/base.html index 436318ea..9dc62516 100644 --- a/archivebox/templates/admin/base.html +++ b/archivebox/templates/admin/base.html @@ -4,228 +4,228 @@ - -{% block title %}{% endblock %} | ArchiveBox - -{% block extrastyle %}{% endblock %} -{% if LANGUAGE_BIDI %}{% endif %} -{% block extrahead %}{% endblock %} -{% block responsive %} - - - {% if LANGUAGE_BIDI %}{% endif %} -{% endblock %} -{% block blockbots %}{% endblock %} - - -{% load i18n %} - - - - - - - - - -
- - {% if not is_popup %} - - - - {% block breadcrumbs %} - + + {{obj.title|default:'Not yet archived...'}} + + +
{% endfor %} - - + +
{% endblock %} diff --git a/archivebox/templates/core/add.html b/archivebox/templates/core/add.html index 4315ee76..978567a3 100644 --- a/archivebox/templates/core/add.html +++ b/archivebox/templates/core/add.html @@ -49,7 +49,7 @@ - - - + + + + + + {% block extra_head %} + {% endblock %}
@@ -48,6 +30,7 @@
{% block body %} + {% endblock %}
{% block footer %} diff --git a/archivebox/templates/core/index_row.html b/archivebox/templates/core/index_row.html index ba34a8c6..bfeed9fa 100644 --- a/archivebox/templates/core/index_row.html +++ b/archivebox/templates/core/index_row.html @@ -1,37 +1,44 @@ -{% load static %} +{% load static tz core_tags %} - {% if link.bookmarked_date %} {{ link.bookmarked_date }} {% else %} {{ link.added }} {% endif %} - + + {{ link.added|localtime }} + + {% if link.is_archived %} - + {% else %} - + {% endif %} - - {{link.title|default:'Loading...'|truncatechars:128}} + + + {{link.title|default:'Loading...'|truncatechars:128}} + {% if link.tags_str %} - - {% if link.tags_str != None %} - {{link.tags_str|default:''}} - {% else %} - {{ link.tags|default:'' }} - {% endif %} - + {% for tag in link.tags_str|split:',' %} + + {{tag}} + + {% endfor %} {% endif %} {% if link.icons %} - {{link.icons}} {{link.num_outputs}} + {{link.icons}}  {{link.num_outputs}} {% else %} - 📄 + + 📄   {{link.num_outputs}} {% endif %} - {{link.url|truncatechars:128}} + + + {{link.url}} + + diff --git a/archivebox/templates/core/progressbar.html b/archivebox/templates/core/progressbar.html new file mode 100644 index 00000000..34d6ce98 --- /dev/null +++ b/archivebox/templates/core/progressbar.html @@ -0,0 +1,45 @@ + + diff --git a/archivebox/templates/core/public_index.html b/archivebox/templates/core/public_index.html index c414cbf8..57bb802c 100644 --- a/archivebox/templates/core/public_index.html +++ b/archivebox/templates/core/public_index.html @@ -1,12 +1,7 @@ {% extends "base.html" %} -{% load static %} +{% load static tz %} {% block body %} -
- - - - - - - - - +
+
BookmarkedSnapshot ({{page_obj.paginator.count}})FilesOriginal URL
+ + + + + + + + {% for link in object_list %} {% include 'index_row.html' with link=link %} {% endfor %}
BookmarkedSnapshot ({{page_obj.paginator.count}})FilesOriginal URL
-
-
- Showing {{ page_obj.start_index }}-{{ page_obj.end_index }} of {{ page_obj.paginator.count }} total -
- - {% if page_obj.has_previous %} - « first   - previous -   - {% endif %} - - - Page {{ page_obj.number }} of {{ page_obj.paginator.num_pages }} - - - {% if page_obj.has_next %} -   - next   - last » - {% endif %} + +
+
+ Showing {{ page_obj.start_index }}-{{ page_obj.end_index }} of {{ page_obj.paginator.count }} total +
+ + {% if page_obj.has_previous %} + « first   + previous +   + {% endif %} + + + Page {{ page_obj.number }} of {{ page_obj.paginator.num_pages }} + + {% if page_obj.has_next %} +   + next   + last » + {% endif %}
diff --git a/archivebox/templates/core/snapshot.html b/archivebox/templates/core/snapshot.html index b4d34fd1..d562d038 100644 --- a/archivebox/templates/core/snapshot.html +++ b/archivebox/templates/core/snapshot.html @@ -1,3 +1,5 @@ +{% load tz core_tags %} + @@ -20,7 +22,6 @@ } header { background-color: #aa1e55; - padding-bottom: 12px; } small { font-weight: 200; @@ -34,15 +35,15 @@ min-height: 40px; margin: 0px; text-align: center; - color: white; - font-size: calc(11px + 0.84vw); + color: #f6f6f6; + font-size: calc(10px + 0.84vw); font-weight: 200; - padding: 4px 4px; + padding: 3px 4px; background-color: #aa1e55; } .nav > div { min-height: 30px; - line-height: 1.3; + line-height: 1.2; } .header-top a { text-decoration: none; @@ -68,9 +69,14 @@ .header-archivebox img:hover { opacity: 0.5; } - .header-url small { + header small code { white-space: nowrap; font-weight: 200; + display: block; + margin-top: -1px; + font-size: 13px; + opacity: 0.8; + user-select: all; } .header-url img { height: 20px; @@ -90,28 +96,38 @@ .info-row .alert { margin-bottom: 0px; } + .row.header-bottom { + margin-left: -10px; + margin-right: -10px; + } + .header-bottom .col-lg-2 { + padding-left: 4px; + padding-right: 4px; + } + .header-bottom-frames .card { - overflow: hidden; box-shadow: 2px 3px 14px 0px rgba(0,0,0,0.02); - margin-top: 10px; + margin-bottom: 5px; border: 1px solid rgba(0,0,0,3); - border-radius: 14px; + border-radius: 10px; background-color: black; + overflow: hidden; } .card h4 { font-size: 1.4vw; } .card-body { - font-size: 15px; + font-size: 14px; padding: 13px 10px; - padding-bottom: 6px; + padding-bottom: 1px; /* padding-left: 3px; */ /* padding-right: 3px; */ /* padding-bottom: 3px; */ - line-height: 1.1; + line-height: 1; word-wrap: break-word; max-height: 102px; overflow: hidden; + text-overflow: ellipsis; background-color: #1a1a1a; color: #d3d3d3; } @@ -146,22 +162,12 @@ border-top: 3px solid #aa1e55; } .card.selected-card { - border: 1px solid orange; + border: 2px solid orange; box-shadow: 0px -6px 13px 1px rgba(0,0,0,0.05); } .iframe-large { height: calc(100% - 40px); } - .pdf-frame { - transform: none; - width: 100%; - height: 160px; - margin-top: -60px; - margin-bottom: 0px; - transform: scale(1.1); - width: 100%; - margin-left: -10%; - } img.external { height: 30px; margin-right: -10px; @@ -185,7 +191,7 @@ } .header-bottom { border-top: 1px solid rgba(170, 30, 85, 0.9); - padding-bottom: 12px; + padding-bottom: 1px; border-bottom: 5px solid rgb(170, 30, 85); margin-bottom: -1px; @@ -215,10 +221,11 @@ } .info-chunk { width: auto; - display:inline-block; + display: inline-block; text-align: center; - margin: 10px 10px; + margin: 8px 4px; vertical-align: top; + font-size: 14px; } .info-chunk .badge { margin-top: 5px; @@ -226,13 +233,12 @@ .header-bottom-frames .card-title { width: 100%; text-align: center; - font-size: 18px; - margin-bottom: 5px; + font-size: 17px; + margin-bottom: 0px; display: inline-block; color: #d3d3d3; font-weight: 200; - vertical-align: 0px; - margin-top: -6px; + vertical-align: 3px; } .header-bottom-frames .card-text { width: 100%; @@ -277,8 +283,7 @@