From a4e1bebc46fa77a5f2edff988e452934288d4599 Mon Sep 17 00:00:00 2001 From: Cristian Date: Thu, 31 Dec 2020 12:59:06 -0500 Subject: [PATCH] refactor: list command is functional --- archivebox/core/models.py | 12 +++++++++++- archivebox/index/csv.py | 8 +++++--- archivebox/index/html.py | 12 ++++++------ archivebox/index/json.py | 8 ++++---- archivebox/main.py | 18 +++++++++--------- 5 files changed, 35 insertions(+), 23 deletions(-) diff --git a/archivebox/core/models.py b/archivebox/core/models.py index dbad0f66..42fae496 100644 --- a/archivebox/core/models.py +++ b/archivebox/core/models.py @@ -2,7 +2,7 @@ __package__ = 'archivebox.core' import uuid from pathlib import Path -from typing import Dict, Optional +from typing import Dict, Optional, List from django.db import models, transaction from django.utils.functional import cached_property @@ -91,6 +91,11 @@ class Snapshot(models.Model): title = self.title or '-' return f'[{self.timestamp}] {self.url[:64]} ({title[:64]})' + def field_names(): + fields = self._meta.get_field_names() + exclude = ["tags", "archiveresult"] # Exclude relationships for now + return [field.name for field in fields if field.name not in exclude] + @classmethod def from_json(cls, info: dict): @@ -105,6 +110,11 @@ class Snapshot(models.Model): for key in args } + + def as_csv(self, cols: Optional[List[str]]=None, separator: str=',', ljust: int=0) -> str: + from ..index.csv import to_csv + return to_csv(self, cols=cols or self.field_names(), separator=separator, ljust=ljust) + def as_link(self) -> Link: return Link.from_json(self.as_json()) diff --git a/archivebox/index/csv.py b/archivebox/index/csv.py index 804e6461..d6817ed7 100644 --- a/archivebox/index/csv.py +++ b/archivebox/index/csv.py @@ -2,12 +2,14 @@ __package__ = 'archivebox.index' from typing import List, Optional, Any +from django.db.models import Model + from ..util import enforce_types from .schema import Link @enforce_types -def links_to_csv(links: List[Link], +def snapshots_to_csv(snapshots: List[Model], cols: Optional[List[str]]=None, header: bool=True, separator: str=',', @@ -20,8 +22,8 @@ def links_to_csv(links: List[Link], header_str = separator.join(col.ljust(ljust) for col in cols) row_strs = ( - link.to_csv(cols=cols, ljust=ljust, separator=separator) - for link in links + snapshot.as_csv(cols=cols, ljust=ljust, separator=separator) + for snapshot in snapshots ) return '\n'.join((header_str, *row_strs)) diff --git a/archivebox/index/html.py b/archivebox/index/html.py index d1bd5ee2..b34d38bb 100644 --- a/archivebox/index/html.py +++ b/archivebox/index/html.py @@ -47,24 +47,24 @@ def parse_html_main_index(out_dir: Path=OUTPUT_DIR) -> Iterator[str]: return () @enforce_types -def generate_index_from_links(links: List[Link], with_headers: bool): +def generate_index_from_snapshots(snapshots: List[Model], with_headers: bool): if with_headers: - output = main_index_template(links) + output = main_index_template(snapshots) else: - output = main_index_template(links, template=MINIMAL_INDEX_TEMPLATE) + output = main_index_template(snapshots, template=MINIMAL_INDEX_TEMPLATE) return output @enforce_types -def main_index_template(links: List[Link], template: str=MAIN_INDEX_TEMPLATE) -> str: +def main_index_template(snapshots: List[Model], template: str=MAIN_INDEX_TEMPLATE) -> str: """render the template for the entire main index""" return render_django_template(template, { 'version': VERSION, 'git_sha': GIT_SHA, - 'num_links': str(len(links)), + 'num_links': str(len(snapshots)), 'date_updated': datetime.now().strftime('%Y-%m-%d'), 'time_updated': datetime.now().strftime('%Y-%m-%d %H:%M'), - 'links': [link._asdict(extended=True) for link in links], + 'links': [snapshot.as_json() for snapshot in snapshots], 'FOOTER_INFO': FOOTER_INFO, }) diff --git a/archivebox/index/json.py b/archivebox/index/json.py index f6e54372..9097b95e 100644 --- a/archivebox/index/json.py +++ b/archivebox/index/json.py @@ -41,17 +41,17 @@ MAIN_INDEX_HEADER = { } @enforce_types -def generate_json_index_from_links(links: List[Link], with_headers: bool): +def generate_json_index_from_snapshots(snapshots: List[Model], with_headers: bool): if with_headers: output = { **MAIN_INDEX_HEADER, - 'num_links': len(links), + 'num_links': len(snapshots), 'updated': datetime.now(), 'last_run_cmd': sys.argv, - 'links': links, + 'links': snapshots, } else: - output = links + output = snapshots return to_json(output, indent=4, sort_keys=True) diff --git a/archivebox/main.py b/archivebox/main.py index 3c3bbb5f..f33fabd8 100644 --- a/archivebox/main.py +++ b/archivebox/main.py @@ -49,7 +49,7 @@ from .index import ( from .index.json import ( parse_json_main_index, parse_json_snapshot_details, - generate_json_index_from_links, + generate_json_index_from_snapshots, ) from .index.sql import ( get_admins, @@ -57,9 +57,9 @@ from .index.sql import ( remove_from_sql_main_index, ) from .index.html import ( - generate_index_from_links, + generate_index_from_snapshots, ) -from .index.csv import links_to_csv +from .index.csv import snapshots_to_csv from .extractors import archive_snapshots, archive_snapshot, ignore_methods from .config import ( stderr, @@ -646,7 +646,7 @@ def remove(filter_str: Optional[str]=None, log_list_started(filter_patterns, filter_type) timer = TimedProgress(360, prefix=' ') try: - snapshots = list_links(**list_kwargs) + snapshots = list_snapshots(**list_kwargs) finally: timer.end() @@ -771,7 +771,7 @@ def list_all(filter_patterns_str: Optional[str]=None, elif filter_patterns_str: filter_patterns = filter_patterns_str.split('\n') - snapshots = list_links( + snapshots = list_snapshots( filter_patterns=filter_patterns, filter_type=filter_type, before=before, @@ -782,17 +782,17 @@ def list_all(filter_patterns_str: Optional[str]=None, snapshots = snapshots.order_by(sort) folders = list_folders( - links=snapshots, + snapshots=snapshots, status=status, out_dir=out_dir, ) if json: - output = generate_json_index_from_links(folders.values(), with_headers) + output = generate_json_index_from_snapshots(folders.values(), with_headers) elif html: - output = generate_index_from_links(folders.values(), with_headers) + output = generate_index_from_snapshots(folders.values(), with_headers) elif csv: - output = links_to_csv(folders.values(), cols=csv.split(','), header=with_headers) + output = snapshots_to_csv(folders.values(), cols=csv.split(','), header=with_headers) else: output = printable_folders(folders, with_headers=with_headers) print(output)