From 91f543ff8d3c30c4fa899bd80ffeb726073e5f43 Mon Sep 17 00:00:00 2001 From: Cristian Date: Tue, 19 Jan 2021 12:18:48 -0500 Subject: [PATCH] lint: Remove unused imports around the app --- archivebox/core/models.py | 3 +++ archivebox/extractors/__init__.py | 19 +++++++++---------- archivebox/extractors/archive_org.py | 2 +- archivebox/extractors/dom.py | 2 +- archivebox/extractors/favicon.py | 2 +- archivebox/extractors/git.py | 2 +- archivebox/extractors/headers.py | 2 +- archivebox/extractors/media.py | 2 +- archivebox/extractors/mercury.py | 2 +- archivebox/extractors/pdf.py | 2 +- archivebox/extractors/readability.py | 2 +- archivebox/extractors/screenshot.py | 2 +- archivebox/index/sql.py | 6 +++--- archivebox/logging_util.py | 10 +++++----- 14 files changed, 30 insertions(+), 28 deletions(-) diff --git a/archivebox/core/models.py b/archivebox/core/models.py index 2b555e08..241eb558 100644 --- a/archivebox/core/models.py +++ b/archivebox/core/models.py @@ -348,3 +348,6 @@ class ArchiveResult(models.Model): def __str__(self): return self.extractor + + class Meta: + ordering = ["-start_ts"] diff --git a/archivebox/extractors/__init__.py b/archivebox/extractors/__init__.py index ea12faec..f80e686b 100644 --- a/archivebox/extractors/__init__.py +++ b/archivebox/extractors/__init__.py @@ -5,10 +5,8 @@ from pathlib import Path from typing import Optional, List, Iterable, Union -from datetime import datetime from django.db.models import QuerySet, Model -from ..index.sql import write_snapshot_to_index from ..index import ( load_snapshot_details, write_snapshot_details, @@ -17,10 +15,9 @@ from ..util import enforce_types from ..logging_util import ( log_archiving_started, log_archiving_paused, - log_archiving_finished, - log_link_archiving_started, - log_link_archiving_finished, + log_snapshot_archiving_started, + log_snapshot_archiving_finished, log_archive_method_started, log_archive_method_finished, ) @@ -91,7 +88,7 @@ def archive_snapshot(snapshot: Model, overwrite: bool=False, methods: Optional[I details = snapshot.details #TODO: This can be retrieved from the sqlite database too. # If that makes more sense, it can be easily changed. - #log_link_archiving_started(link, out_dir, is_new) + log_snapshot_archiving_started(snapshot, out_dir, is_new) stats = {'skipped': 0, 'succeeded': 0, 'failed': 0} for method_name, should_run, method_function in ARCHIVE_METHODS: @@ -122,15 +119,17 @@ def archive_snapshot(snapshot: Model, overwrite: bool=False, methods: Optional[I # print(' ', stats) try: - latest_title = link.history['title'][-1].output.strip() - if latest_title and len(latest_title) >= len(link.title or ''): - snapshot.title = latest_title + latest_title_archive_result = snapshot.archiveresult_set.filter(extractor="title") + if latest_title_archive_result.count() > 0: + latest_title = latest_title_archive_result.output.strip() + if len(latest_title) >= len(snapshot.title or ''): + snapshot.title = latest_title except Exception: pass write_snapshot_details(snapshot, out_dir=out_dir, skip_sql_index=False) - log_link_archiving_finished(snapshot, snapshot.snapshot_dir, is_new, stats) + log_snapshot_archiving_finished(snapshot, snapshot.snapshot_dir, is_new, stats) except KeyboardInterrupt: try: diff --git a/archivebox/extractors/archive_org.py b/archivebox/extractors/archive_org.py index f88cdc9f..d1e15c11 100644 --- a/archivebox/extractors/archive_org.py +++ b/archivebox/extractors/archive_org.py @@ -7,7 +7,7 @@ from collections import defaultdict from django.db.models import Model -from ..index.schema import Link, ArchiveResult, ArchiveOutput, ArchiveError +from ..index.schema import ArchiveResult, ArchiveOutput, ArchiveError from ..system import run, chmod_file from ..util import ( enforce_types, diff --git a/archivebox/extractors/dom.py b/archivebox/extractors/dom.py index d2582f74..61038e76 100644 --- a/archivebox/extractors/dom.py +++ b/archivebox/extractors/dom.py @@ -5,7 +5,7 @@ from typing import Optional from django.db.models import Model -from ..index.schema import Link, ArchiveResult, ArchiveOutput, ArchiveError +from ..index.schema import ArchiveResult, ArchiveOutput, ArchiveError from ..system import run, chmod_file, atomic_write from ..util import ( enforce_types, diff --git a/archivebox/extractors/favicon.py b/archivebox/extractors/favicon.py index 7cd9c42c..b678776b 100644 --- a/archivebox/extractors/favicon.py +++ b/archivebox/extractors/favicon.py @@ -6,7 +6,7 @@ from typing import Optional from django.db.models import Model -from ..index.schema import Link, ArchiveResult, ArchiveOutput +from ..index.schema import ArchiveResult, ArchiveOutput from ..system import chmod_file, run from ..util import enforce_types, domain from ..config import ( diff --git a/archivebox/extractors/git.py b/archivebox/extractors/git.py index a16e5bbc..6674cab8 100644 --- a/archivebox/extractors/git.py +++ b/archivebox/extractors/git.py @@ -6,7 +6,7 @@ from typing import Optional from django.db.models import Model -from ..index.schema import Link, ArchiveResult, ArchiveOutput, ArchiveError +from ..index.schema import ArchiveResult, ArchiveOutput, ArchiveError from ..system import run, chmod_file from ..util import ( enforce_types, diff --git a/archivebox/extractors/headers.py b/archivebox/extractors/headers.py index 6b453ccf..7104b499 100644 --- a/archivebox/extractors/headers.py +++ b/archivebox/extractors/headers.py @@ -6,7 +6,7 @@ from typing import Optional from django.db.models import Model -from ..index.schema import Link, ArchiveResult, ArchiveOutput +from ..index.schema import ArchiveResult, ArchiveOutput from ..system import atomic_write from ..util import ( enforce_types, diff --git a/archivebox/extractors/media.py b/archivebox/extractors/media.py index a4d013a6..a865e572 100644 --- a/archivebox/extractors/media.py +++ b/archivebox/extractors/media.py @@ -5,7 +5,7 @@ from typing import Optional from django.db.models import Model -from ..index.schema import Link, ArchiveResult, ArchiveOutput, ArchiveError +from ..index.schema import ArchiveResult, ArchiveOutput, ArchiveError from ..system import run, chmod_file from ..util import ( enforce_types, diff --git a/archivebox/extractors/mercury.py b/archivebox/extractors/mercury.py index f4a55707..135007ab 100644 --- a/archivebox/extractors/mercury.py +++ b/archivebox/extractors/mercury.py @@ -8,7 +8,7 @@ import json from django.db.models import Model -from ..index.schema import Link, ArchiveResult, ArchiveError +from ..index.schema import ArchiveResult, ArchiveError from ..system import run, atomic_write from ..util import ( enforce_types, diff --git a/archivebox/extractors/pdf.py b/archivebox/extractors/pdf.py index 215a9de3..196c0089 100644 --- a/archivebox/extractors/pdf.py +++ b/archivebox/extractors/pdf.py @@ -5,7 +5,7 @@ from typing import Optional from django.db.models import Model -from ..index.schema import Link, ArchiveResult, ArchiveOutput, ArchiveError +from ..index.schema import ArchiveResult, ArchiveOutput, ArchiveError from ..system import run, chmod_file from ..util import ( enforce_types, diff --git a/archivebox/extractors/readability.py b/archivebox/extractors/readability.py index e77e173c..655e9499 100644 --- a/archivebox/extractors/readability.py +++ b/archivebox/extractors/readability.py @@ -8,7 +8,7 @@ import json from django.db.models import Model -from ..index.schema import Link, ArchiveResult, ArchiveError +from ..index.schema import ArchiveResult, ArchiveError from ..system import run, atomic_write from ..util import ( enforce_types, diff --git a/archivebox/extractors/screenshot.py b/archivebox/extractors/screenshot.py index 1cd2f8f8..723feb19 100644 --- a/archivebox/extractors/screenshot.py +++ b/archivebox/extractors/screenshot.py @@ -5,7 +5,7 @@ from typing import Optional from django.db.models import Model -from ..index.schema import Link, ArchiveResult, ArchiveOutput, ArchiveError +from ..index.schema import ArchiveResult, ArchiveOutput, ArchiveError from ..system import run, chmod_file from ..util import ( enforce_types, diff --git a/archivebox/index/sql.py b/archivebox/index/sql.py index 1719da94..f0bb9fbf 100644 --- a/archivebox/index/sql.py +++ b/archivebox/index/sql.py @@ -47,10 +47,10 @@ def write_snapshot_to_index(snapshot: Model): @enforce_types -def write_sql_main_index(links: List[Link], out_dir: Path=OUTPUT_DIR) -> None: +def write_sql_main_index(snapshots: List[Model], out_dir: Path=OUTPUT_DIR) -> None: with transaction.atomic(): - for link in links: - write_snapshot_to_index(link) + for snapshot in snapshots: + write_snapshot_to_index(snapshot) @enforce_types diff --git a/archivebox/logging_util.py b/archivebox/logging_util.py index 2f564e6b..318b7a04 100644 --- a/archivebox/logging_util.py +++ b/archivebox/logging_util.py @@ -303,7 +303,7 @@ def log_archiving_finished(num_links: int): print(' archivebox server 0.0.0.0:8000') -def log_link_archiving_started(link: "Link", link_dir: str, is_new: bool): +def log_snapshot_archiving_started(snapshot: Model, snapshot_dir: str, is_new: bool): # [*] [2019-03-22 13:46:45] "Log Structured Merge Trees - ben stopford" # http://www.benstopford.com/2015/02/14/log-structured-merge-trees/ # > output/archive/1478739709 @@ -312,16 +312,16 @@ def log_link_archiving_started(link: "Link", link_dir: str, is_new: bool): symbol_color=ANSI['green' if is_new else 'black'], symbol='+' if is_new else '√', now=datetime.now().strftime('%Y-%m-%d %H:%M:%S'), - title=link.title or link.base_url, + title=snapshot.title or snapshot.base_url, **ANSI, )) - print(' {blue}{url}{reset}'.format(url=link.url, **ANSI)) + print(' {blue}{url}{reset}'.format(url=snapshot.url, **ANSI)) print(' {} {}'.format( '>' if is_new else '√', - pretty_path(link_dir), + pretty_path(snapshot_dir), )) -def log_link_archiving_finished(link: "Link", link_dir: str, is_new: bool, stats: dict): +def log_snapshot_archiving_finished(snapshot: Model, snapshot_dir: str, is_new: bool, stats: dict): total = sum(stats.values()) if stats['failed'] > 0 :