1
0
Fork 0
mirror of synced 2024-06-28 11:00:35 +12:00

refactor: list command is functional

This commit is contained in:
Cristian 2020-12-31 12:59:06 -05:00
parent c51d789ad4
commit a4e1bebc46
5 changed files with 35 additions and 23 deletions

View file

@ -2,7 +2,7 @@ __package__ = 'archivebox.core'
import uuid import uuid
from pathlib import Path from pathlib import Path
from typing import Dict, Optional from typing import Dict, Optional, List
from django.db import models, transaction from django.db import models, transaction
from django.utils.functional import cached_property from django.utils.functional import cached_property
@ -91,6 +91,11 @@ class Snapshot(models.Model):
title = self.title or '-' title = self.title or '-'
return f'[{self.timestamp}] {self.url[:64]} ({title[:64]})' return f'[{self.timestamp}] {self.url[:64]} ({title[:64]})'
def field_names():
fields = self._meta.get_field_names()
exclude = ["tags", "archiveresult"] # Exclude relationships for now
return [field.name for field in fields if field.name not in exclude]
@classmethod @classmethod
def from_json(cls, info: dict): def from_json(cls, info: dict):
@ -105,6 +110,11 @@ class Snapshot(models.Model):
for key in args for key in args
} }
def as_csv(self, cols: Optional[List[str]]=None, separator: str=',', ljust: int=0) -> str:
from ..index.csv import to_csv
return to_csv(self, cols=cols or self.field_names(), separator=separator, ljust=ljust)
def as_link(self) -> Link: def as_link(self) -> Link:
return Link.from_json(self.as_json()) return Link.from_json(self.as_json())

View file

@ -2,12 +2,14 @@ __package__ = 'archivebox.index'
from typing import List, Optional, Any from typing import List, Optional, Any
from django.db.models import Model
from ..util import enforce_types from ..util import enforce_types
from .schema import Link from .schema import Link
@enforce_types @enforce_types
def links_to_csv(links: List[Link], def snapshots_to_csv(snapshots: List[Model],
cols: Optional[List[str]]=None, cols: Optional[List[str]]=None,
header: bool=True, header: bool=True,
separator: str=',', separator: str=',',
@ -20,8 +22,8 @@ def links_to_csv(links: List[Link],
header_str = separator.join(col.ljust(ljust) for col in cols) header_str = separator.join(col.ljust(ljust) for col in cols)
row_strs = ( row_strs = (
link.to_csv(cols=cols, ljust=ljust, separator=separator) snapshot.as_csv(cols=cols, ljust=ljust, separator=separator)
for link in links for snapshot in snapshots
) )
return '\n'.join((header_str, *row_strs)) return '\n'.join((header_str, *row_strs))

View file

@ -47,24 +47,24 @@ def parse_html_main_index(out_dir: Path=OUTPUT_DIR) -> Iterator[str]:
return () return ()
@enforce_types @enforce_types
def generate_index_from_links(links: List[Link], with_headers: bool): def generate_index_from_snapshots(snapshots: List[Model], with_headers: bool):
if with_headers: if with_headers:
output = main_index_template(links) output = main_index_template(snapshots)
else: else:
output = main_index_template(links, template=MINIMAL_INDEX_TEMPLATE) output = main_index_template(snapshots, template=MINIMAL_INDEX_TEMPLATE)
return output return output
@enforce_types @enforce_types
def main_index_template(links: List[Link], template: str=MAIN_INDEX_TEMPLATE) -> str: def main_index_template(snapshots: List[Model], template: str=MAIN_INDEX_TEMPLATE) -> str:
"""render the template for the entire main index""" """render the template for the entire main index"""
return render_django_template(template, { return render_django_template(template, {
'version': VERSION, 'version': VERSION,
'git_sha': GIT_SHA, 'git_sha': GIT_SHA,
'num_links': str(len(links)), 'num_links': str(len(snapshots)),
'date_updated': datetime.now().strftime('%Y-%m-%d'), 'date_updated': datetime.now().strftime('%Y-%m-%d'),
'time_updated': datetime.now().strftime('%Y-%m-%d %H:%M'), 'time_updated': datetime.now().strftime('%Y-%m-%d %H:%M'),
'links': [link._asdict(extended=True) for link in links], 'links': [snapshot.as_json() for snapshot in snapshots],
'FOOTER_INFO': FOOTER_INFO, 'FOOTER_INFO': FOOTER_INFO,
}) })

View file

@ -41,17 +41,17 @@ MAIN_INDEX_HEADER = {
} }
@enforce_types @enforce_types
def generate_json_index_from_links(links: List[Link], with_headers: bool): def generate_json_index_from_snapshots(snapshots: List[Model], with_headers: bool):
if with_headers: if with_headers:
output = { output = {
**MAIN_INDEX_HEADER, **MAIN_INDEX_HEADER,
'num_links': len(links), 'num_links': len(snapshots),
'updated': datetime.now(), 'updated': datetime.now(),
'last_run_cmd': sys.argv, 'last_run_cmd': sys.argv,
'links': links, 'links': snapshots,
} }
else: else:
output = links output = snapshots
return to_json(output, indent=4, sort_keys=True) return to_json(output, indent=4, sort_keys=True)

View file

@ -49,7 +49,7 @@ from .index import (
from .index.json import ( from .index.json import (
parse_json_main_index, parse_json_main_index,
parse_json_snapshot_details, parse_json_snapshot_details,
generate_json_index_from_links, generate_json_index_from_snapshots,
) )
from .index.sql import ( from .index.sql import (
get_admins, get_admins,
@ -57,9 +57,9 @@ from .index.sql import (
remove_from_sql_main_index, remove_from_sql_main_index,
) )
from .index.html import ( from .index.html import (
generate_index_from_links, generate_index_from_snapshots,
) )
from .index.csv import links_to_csv from .index.csv import snapshots_to_csv
from .extractors import archive_snapshots, archive_snapshot, ignore_methods from .extractors import archive_snapshots, archive_snapshot, ignore_methods
from .config import ( from .config import (
stderr, stderr,
@ -646,7 +646,7 @@ def remove(filter_str: Optional[str]=None,
log_list_started(filter_patterns, filter_type) log_list_started(filter_patterns, filter_type)
timer = TimedProgress(360, prefix=' ') timer = TimedProgress(360, prefix=' ')
try: try:
snapshots = list_links(**list_kwargs) snapshots = list_snapshots(**list_kwargs)
finally: finally:
timer.end() timer.end()
@ -771,7 +771,7 @@ def list_all(filter_patterns_str: Optional[str]=None,
elif filter_patterns_str: elif filter_patterns_str:
filter_patterns = filter_patterns_str.split('\n') filter_patterns = filter_patterns_str.split('\n')
snapshots = list_links( snapshots = list_snapshots(
filter_patterns=filter_patterns, filter_patterns=filter_patterns,
filter_type=filter_type, filter_type=filter_type,
before=before, before=before,
@ -782,17 +782,17 @@ def list_all(filter_patterns_str: Optional[str]=None,
snapshots = snapshots.order_by(sort) snapshots = snapshots.order_by(sort)
folders = list_folders( folders = list_folders(
links=snapshots, snapshots=snapshots,
status=status, status=status,
out_dir=out_dir, out_dir=out_dir,
) )
if json: if json:
output = generate_json_index_from_links(folders.values(), with_headers) output = generate_json_index_from_snapshots(folders.values(), with_headers)
elif html: elif html:
output = generate_index_from_links(folders.values(), with_headers) output = generate_index_from_snapshots(folders.values(), with_headers)
elif csv: elif csv:
output = links_to_csv(folders.values(), cols=csv.split(','), header=with_headers) output = snapshots_to_csv(folders.values(), cols=csv.split(','), header=with_headers)
else: else:
output = printable_folders(folders, with_headers=with_headers) output = printable_folders(folders, with_headers=with_headers)
print(output) print(output)