1
0
Fork 0
mirror of synced 2024-06-24 00:50:23 +12:00

refactor: status command is functional

This commit is contained in:
Cristian 2020-12-30 12:53:20 -05:00
parent d92083b928
commit 973f8b6abc
5 changed files with 33 additions and 26 deletions

View file

@ -138,7 +138,7 @@ class Snapshot(models.Model):
@cached_property
def snapshot_dir(self):
from ..config import CONFIG
return str(Path(CONFIG['ARCHIVE_DIR']) / self.timestamp)
return Path(CONFIG['ARCHIVE_DIR']) / self.timestamp
@cached_property
def archive_path(self):
@ -173,6 +173,12 @@ class Snapshot(models.Model):
from ..util import is_static_file
return is_static_file(self.url)
@cached_property
def details(self) -> Dict:
# TODO: Define what details are, and return them accordingly
return {"history": {}}
def canonical_outputs(self) -> Dict[str, Optional[str]]:
"""predict the expected output paths that should be present after archiving"""

View file

@ -88,7 +88,7 @@ def archive_snapshot(snapshot: Model, overwrite: bool=False, methods: Optional[I
details = {"history": {}}
write_snapshot_details(snapshot, out_dir=out_dir, skip_sql_index=False)
else:
details = load_snapshot_details(snapshot)
details = snapshot.details
#log_link_archiving_started(link, out_dir, is_new)
stats = {'skipped': 0, 'succeeded': 0, 'failed': 0}

View file

@ -42,7 +42,7 @@ from .html import (
write_html_snapshot_details,
)
from .json import (
load_json_snapshot_details,
load_json_snapshot,
parse_json_snapshot_details,
write_json_snapshot_details,
)
@ -441,7 +441,7 @@ def get_duplicate_folders(snapshots, out_dir: Path=OUTPUT_DIR) -> Dict[str, Opti
path = path.snapshot_dir
try:
snapshot = parse_json_snapshot_details(path)
snapshot = load_json_snapshot_details(path)
except Exception:
pass
@ -530,7 +530,7 @@ def is_valid(snapshot: Model) -> bool:
if dir_exists and index_exists:
try:
# TODO: review if the `guess` was necessary here
parsed_snapshot = parse_json_snapshot_details(snapshot.snapshot_dir)
parsed_snapshot = load_json_snapshot(snapshot.snapshot_dir)
return snapshot.url == parsed_snapshot.url
except Exception:
pass

View file

@ -91,17 +91,18 @@ def write_json_snapshot_details(snapshot: Model, out_dir: Optional[str]=None) ->
@enforce_types
def load_json_snapshot_details(out_dir: Path) -> Optional[Model]:
def load_json_snapshot(out_dir: Path) -> Optional[Model]:
"""
Loads the detail from the local json index
"""
from core.models import Snapshot
existing_index = Path(out_dir) / JSON_INDEX_FILENAME
if existing_index.exists():
with open(existing_index, 'r', encoding='utf-8') as f:
try:
output = pyjson.load(f)
if "history" not in output.keys():
output["history"] = {}
output = Snapshot.from_json(output)
return output
except pyjson.JSONDecodeError:
pass
@ -110,13 +111,13 @@ def load_json_snapshot_details(out_dir: Path) -> Optional[Model]:
@enforce_types
def parse_json_snapshot_details(out_dir: Union[Path, str]) -> Iterator[dict]:
"""read through all the archive data folders and return the parsed links"""
"""read through all the archive data folders and return the parsed snapshots"""
for entry in os.scandir(Path(out_dir)):
if entry.is_dir(follow_symlinks=True):
if (Path(entry.path) / 'index.json').exists():
try:
snapshot_details = load_snapshot_details(entry.path)
snapshot_details = load_json_snapshot_details(entry.path)
except KeyError:
snapshot_details = None
if snapshot_details:

View file

@ -427,11 +427,11 @@ def status(out_dir: Path=OUTPUT_DIR) -> None:
print(f' Index size: {size} across {num_files} files')
print()
links = load_main_index(out_dir=out_dir)
num_sql_links = links.count()
num_link_details = sum(1 for link in parse_json_links_details(out_dir=out_dir))
print(f' > SQL Main Index: {num_sql_links} links'.ljust(36), f'(found in {SQL_INDEX_FILENAME})')
print(f' > JSON Link Details: {num_link_details} links'.ljust(36), f'(found in {ARCHIVE_DIR_NAME}/*/index.json)')
snapshots = load_main_index(out_dir=out_dir)
num_sql_snapshots = snapshots.count()
num_snapshot_details = sum(1 for snapshot in parse_json_snapshot_details(out_dir=out_dir))
print(f' > SQL Main Index: {num_sql_snapshots} snapshots'.ljust(36), f'(found in {SQL_INDEX_FILENAME})')
print(f' > JSON Link Details: {num_snapshot_details} snapshots'.ljust(36), f'(found in {ARCHIVE_DIR_NAME}/*/index.json)')
print()
print('{green}[*] Scanning archive data directories...{reset}'.format(**ANSI))
print(ANSI['lightyellow'], f' {ARCHIVE_DIR}/*', ANSI['reset'])
@ -439,23 +439,23 @@ def status(out_dir: Path=OUTPUT_DIR) -> None:
size = printable_filesize(num_bytes)
print(f' Size: {size} across {num_files} files in {num_dirs} directories')
print(ANSI['black'])
num_indexed = len(get_indexed_folders(links, out_dir=out_dir))
num_archived = len(get_archived_folders(links, out_dir=out_dir))
num_unarchived = len(get_unarchived_folders(links, out_dir=out_dir))
num_indexed = len(get_indexed_folders(snapshots, out_dir=out_dir))
num_archived = len(get_archived_folders(snapshots, out_dir=out_dir))
num_unarchived = len(get_unarchived_folders(snapshots, out_dir=out_dir))
print(f' > indexed: {num_indexed}'.ljust(36), f'({get_indexed_folders.__doc__})')
print(f' > archived: {num_archived}'.ljust(36), f'({get_archived_folders.__doc__})')
print(f' > unarchived: {num_unarchived}'.ljust(36), f'({get_unarchived_folders.__doc__})')
num_present = len(get_present_folders(links, out_dir=out_dir))
num_valid = len(get_valid_folders(links, out_dir=out_dir))
num_present = len(get_present_folders(snapshots, out_dir=out_dir))
num_valid = len(get_valid_folders(snapshots, out_dir=out_dir))
print()
print(f' > present: {num_present}'.ljust(36), f'({get_present_folders.__doc__})')
print(f' > valid: {num_valid}'.ljust(36), f'({get_valid_folders.__doc__})')
duplicate = get_duplicate_folders(links, out_dir=out_dir)
orphaned = get_orphaned_folders(links, out_dir=out_dir)
corrupted = get_corrupted_folders(links, out_dir=out_dir)
unrecognized = get_unrecognized_folders(links, out_dir=out_dir)
duplicate = get_duplicate_folders(snapshots, out_dir=out_dir)
orphaned = get_orphaned_folders(snapshots, out_dir=out_dir)
corrupted = get_corrupted_folders(snapshots, out_dir=out_dir)
unrecognized = get_unrecognized_folders(snapshots, out_dir=out_dir)
num_invalid = len({**duplicate, **orphaned, **corrupted, **unrecognized})
print(f' > invalid: {num_invalid}'.ljust(36), f'({get_invalid_folders.__doc__})')
print(f' > duplicate: {len(duplicate)}'.ljust(36), f'({get_duplicate_folders.__doc__})')
@ -466,7 +466,7 @@ def status(out_dir: Path=OUTPUT_DIR) -> None:
print(ANSI['reset'])
if num_indexed:
print(' {lightred}Hint:{reset} You can list link data directories by status like so:'.format(**ANSI))
print(' {lightred}Hint:{reset} You can list snapshot data directories by status like so:'.format(**ANSI))
print(' archivebox list --status=<status> (e.g. indexed, corrupted, archived, etc.)')
if orphaned:
@ -495,7 +495,7 @@ def status(out_dir: Path=OUTPUT_DIR) -> None:
print(' archivebox manage createsuperuser')
print()
for snapshot in links.order_by('-updated')[:10]:
for snapshot in snapshots.order_by('-updated')[:10]:
if not snapshot.updated:
continue
print(