1
0
Fork 0
mirror of synced 2024-06-28 11:00:35 +12:00

refactor: status command is functional

This commit is contained in:
Cristian 2020-12-30 12:53:20 -05:00
parent d92083b928
commit 973f8b6abc
5 changed files with 33 additions and 26 deletions

View file

@ -138,7 +138,7 @@ class Snapshot(models.Model):
@cached_property @cached_property
def snapshot_dir(self): def snapshot_dir(self):
from ..config import CONFIG from ..config import CONFIG
return str(Path(CONFIG['ARCHIVE_DIR']) / self.timestamp) return Path(CONFIG['ARCHIVE_DIR']) / self.timestamp
@cached_property @cached_property
def archive_path(self): def archive_path(self):
@ -173,6 +173,12 @@ class Snapshot(models.Model):
from ..util import is_static_file from ..util import is_static_file
return is_static_file(self.url) return is_static_file(self.url)
@cached_property
def details(self) -> Dict:
# TODO: Define what details are, and return them accordingly
return {"history": {}}
def canonical_outputs(self) -> Dict[str, Optional[str]]: def canonical_outputs(self) -> Dict[str, Optional[str]]:
"""predict the expected output paths that should be present after archiving""" """predict the expected output paths that should be present after archiving"""

View file

@ -88,7 +88,7 @@ def archive_snapshot(snapshot: Model, overwrite: bool=False, methods: Optional[I
details = {"history": {}} details = {"history": {}}
write_snapshot_details(snapshot, out_dir=out_dir, skip_sql_index=False) write_snapshot_details(snapshot, out_dir=out_dir, skip_sql_index=False)
else: else:
details = load_snapshot_details(snapshot) details = snapshot.details
#log_link_archiving_started(link, out_dir, is_new) #log_link_archiving_started(link, out_dir, is_new)
stats = {'skipped': 0, 'succeeded': 0, 'failed': 0} stats = {'skipped': 0, 'succeeded': 0, 'failed': 0}

View file

@ -42,7 +42,7 @@ from .html import (
write_html_snapshot_details, write_html_snapshot_details,
) )
from .json import ( from .json import (
load_json_snapshot_details, load_json_snapshot,
parse_json_snapshot_details, parse_json_snapshot_details,
write_json_snapshot_details, write_json_snapshot_details,
) )
@ -441,7 +441,7 @@ def get_duplicate_folders(snapshots, out_dir: Path=OUTPUT_DIR) -> Dict[str, Opti
path = path.snapshot_dir path = path.snapshot_dir
try: try:
snapshot = parse_json_snapshot_details(path) snapshot = load_json_snapshot_details(path)
except Exception: except Exception:
pass pass
@ -530,7 +530,7 @@ def is_valid(snapshot: Model) -> bool:
if dir_exists and index_exists: if dir_exists and index_exists:
try: try:
# TODO: review if the `guess` was necessary here # TODO: review if the `guess` was necessary here
parsed_snapshot = parse_json_snapshot_details(snapshot.snapshot_dir) parsed_snapshot = load_json_snapshot(snapshot.snapshot_dir)
return snapshot.url == parsed_snapshot.url return snapshot.url == parsed_snapshot.url
except Exception: except Exception:
pass pass

View file

@ -91,17 +91,18 @@ def write_json_snapshot_details(snapshot: Model, out_dir: Optional[str]=None) ->
@enforce_types @enforce_types
def load_json_snapshot_details(out_dir: Path) -> Optional[Model]: def load_json_snapshot(out_dir: Path) -> Optional[Model]:
""" """
Loads the detail from the local json index Loads the detail from the local json index
""" """
from core.models import Snapshot
existing_index = Path(out_dir) / JSON_INDEX_FILENAME existing_index = Path(out_dir) / JSON_INDEX_FILENAME
if existing_index.exists(): if existing_index.exists():
with open(existing_index, 'r', encoding='utf-8') as f: with open(existing_index, 'r', encoding='utf-8') as f:
try: try:
output = pyjson.load(f) output = pyjson.load(f)
if "history" not in output.keys(): output = Snapshot.from_json(output)
output["history"] = {}
return output return output
except pyjson.JSONDecodeError: except pyjson.JSONDecodeError:
pass pass
@ -110,13 +111,13 @@ def load_json_snapshot_details(out_dir: Path) -> Optional[Model]:
@enforce_types @enforce_types
def parse_json_snapshot_details(out_dir: Union[Path, str]) -> Iterator[dict]: def parse_json_snapshot_details(out_dir: Union[Path, str]) -> Iterator[dict]:
"""read through all the archive data folders and return the parsed links""" """read through all the archive data folders and return the parsed snapshots"""
for entry in os.scandir(Path(out_dir)): for entry in os.scandir(Path(out_dir)):
if entry.is_dir(follow_symlinks=True): if entry.is_dir(follow_symlinks=True):
if (Path(entry.path) / 'index.json').exists(): if (Path(entry.path) / 'index.json').exists():
try: try:
snapshot_details = load_snapshot_details(entry.path) snapshot_details = load_json_snapshot_details(entry.path)
except KeyError: except KeyError:
snapshot_details = None snapshot_details = None
if snapshot_details: if snapshot_details:

View file

@ -427,11 +427,11 @@ def status(out_dir: Path=OUTPUT_DIR) -> None:
print(f' Index size: {size} across {num_files} files') print(f' Index size: {size} across {num_files} files')
print() print()
links = load_main_index(out_dir=out_dir) snapshots = load_main_index(out_dir=out_dir)
num_sql_links = links.count() num_sql_snapshots = snapshots.count()
num_link_details = sum(1 for link in parse_json_links_details(out_dir=out_dir)) num_snapshot_details = sum(1 for snapshot in parse_json_snapshot_details(out_dir=out_dir))
print(f' > SQL Main Index: {num_sql_links} links'.ljust(36), f'(found in {SQL_INDEX_FILENAME})') print(f' > SQL Main Index: {num_sql_snapshots} snapshots'.ljust(36), f'(found in {SQL_INDEX_FILENAME})')
print(f' > JSON Link Details: {num_link_details} links'.ljust(36), f'(found in {ARCHIVE_DIR_NAME}/*/index.json)') print(f' > JSON Link Details: {num_snapshot_details} snapshots'.ljust(36), f'(found in {ARCHIVE_DIR_NAME}/*/index.json)')
print() print()
print('{green}[*] Scanning archive data directories...{reset}'.format(**ANSI)) print('{green}[*] Scanning archive data directories...{reset}'.format(**ANSI))
print(ANSI['lightyellow'], f' {ARCHIVE_DIR}/*', ANSI['reset']) print(ANSI['lightyellow'], f' {ARCHIVE_DIR}/*', ANSI['reset'])
@ -439,23 +439,23 @@ def status(out_dir: Path=OUTPUT_DIR) -> None:
size = printable_filesize(num_bytes) size = printable_filesize(num_bytes)
print(f' Size: {size} across {num_files} files in {num_dirs} directories') print(f' Size: {size} across {num_files} files in {num_dirs} directories')
print(ANSI['black']) print(ANSI['black'])
num_indexed = len(get_indexed_folders(links, out_dir=out_dir)) num_indexed = len(get_indexed_folders(snapshots, out_dir=out_dir))
num_archived = len(get_archived_folders(links, out_dir=out_dir)) num_archived = len(get_archived_folders(snapshots, out_dir=out_dir))
num_unarchived = len(get_unarchived_folders(links, out_dir=out_dir)) num_unarchived = len(get_unarchived_folders(snapshots, out_dir=out_dir))
print(f' > indexed: {num_indexed}'.ljust(36), f'({get_indexed_folders.__doc__})') print(f' > indexed: {num_indexed}'.ljust(36), f'({get_indexed_folders.__doc__})')
print(f' > archived: {num_archived}'.ljust(36), f'({get_archived_folders.__doc__})') print(f' > archived: {num_archived}'.ljust(36), f'({get_archived_folders.__doc__})')
print(f' > unarchived: {num_unarchived}'.ljust(36), f'({get_unarchived_folders.__doc__})') print(f' > unarchived: {num_unarchived}'.ljust(36), f'({get_unarchived_folders.__doc__})')
num_present = len(get_present_folders(links, out_dir=out_dir)) num_present = len(get_present_folders(snapshots, out_dir=out_dir))
num_valid = len(get_valid_folders(links, out_dir=out_dir)) num_valid = len(get_valid_folders(snapshots, out_dir=out_dir))
print() print()
print(f' > present: {num_present}'.ljust(36), f'({get_present_folders.__doc__})') print(f' > present: {num_present}'.ljust(36), f'({get_present_folders.__doc__})')
print(f' > valid: {num_valid}'.ljust(36), f'({get_valid_folders.__doc__})') print(f' > valid: {num_valid}'.ljust(36), f'({get_valid_folders.__doc__})')
duplicate = get_duplicate_folders(links, out_dir=out_dir) duplicate = get_duplicate_folders(snapshots, out_dir=out_dir)
orphaned = get_orphaned_folders(links, out_dir=out_dir) orphaned = get_orphaned_folders(snapshots, out_dir=out_dir)
corrupted = get_corrupted_folders(links, out_dir=out_dir) corrupted = get_corrupted_folders(snapshots, out_dir=out_dir)
unrecognized = get_unrecognized_folders(links, out_dir=out_dir) unrecognized = get_unrecognized_folders(snapshots, out_dir=out_dir)
num_invalid = len({**duplicate, **orphaned, **corrupted, **unrecognized}) num_invalid = len({**duplicate, **orphaned, **corrupted, **unrecognized})
print(f' > invalid: {num_invalid}'.ljust(36), f'({get_invalid_folders.__doc__})') print(f' > invalid: {num_invalid}'.ljust(36), f'({get_invalid_folders.__doc__})')
print(f' > duplicate: {len(duplicate)}'.ljust(36), f'({get_duplicate_folders.__doc__})') print(f' > duplicate: {len(duplicate)}'.ljust(36), f'({get_duplicate_folders.__doc__})')
@ -466,7 +466,7 @@ def status(out_dir: Path=OUTPUT_DIR) -> None:
print(ANSI['reset']) print(ANSI['reset'])
if num_indexed: if num_indexed:
print(' {lightred}Hint:{reset} You can list link data directories by status like so:'.format(**ANSI)) print(' {lightred}Hint:{reset} You can list snapshot data directories by status like so:'.format(**ANSI))
print(' archivebox list --status=<status> (e.g. indexed, corrupted, archived, etc.)') print(' archivebox list --status=<status> (e.g. indexed, corrupted, archived, etc.)')
if orphaned: if orphaned:
@ -495,7 +495,7 @@ def status(out_dir: Path=OUTPUT_DIR) -> None:
print(' archivebox manage createsuperuser') print(' archivebox manage createsuperuser')
print() print()
for snapshot in links.order_by('-updated')[:10]: for snapshot in snapshots.order_by('-updated')[:10]:
if not snapshot.updated: if not snapshot.updated:
continue continue
print( print(