From fe0884f1ec5c311fddbcf33ff09fa7e0ad2b0961 Mon Sep 17 00:00:00 2001 From: Cristian Date: Thu, 23 Jul 2020 15:07:00 -0500 Subject: [PATCH] fix: Remove link from sql index on remove command --- archivebox/index/sql.py | 10 ++++++++++ archivebox/main.py | 12 +++++++++--- tests/test_remove.py | 8 ++++++++ 3 files changed, 27 insertions(+), 3 deletions(-) create mode 100644 tests/test_remove.py diff --git a/archivebox/index/sql.py b/archivebox/index/sql.py index b120738c..1043fa52 100644 --- a/archivebox/index/sql.py +++ b/archivebox/index/sql.py @@ -20,6 +20,16 @@ def parse_sql_main_index(out_dir: str=OUTPUT_DIR) -> Iterator[Link]: for page in Snapshot.objects.all() ) +@enforce_types +def remove_from_sql_main_index(links: List[Link], out_dir: str=OUTPUT_DIR) -> None: + setup_django(out_dir, check_db=True) + from core.models import Snapshot + from django.db import transaction + + with transaction.atomic(): + for link in links: + Snapshot.objects.filter(url=link.url).delete() + @enforce_types def write_sql_main_index(links: List[Link], out_dir: str=OUTPUT_DIR) -> None: setup_django(out_dir, check_db=True) diff --git a/archivebox/main.py b/archivebox/main.py index cd6c7492..1c7068b3 100644 --- a/archivebox/main.py +++ b/archivebox/main.py @@ -49,6 +49,7 @@ from .index.sql import ( parse_sql_main_index, get_admins, apply_migrations, + remove_from_sql_main_index, ) from .index.html import parse_html_main_index from .extractors import archive_links @@ -600,6 +601,7 @@ def remove(filter_str: Optional[str]=None, timer = TimedProgress(360, prefix=' ') try: to_keep = [] + to_delete = [] all_links = load_main_index(out_dir=out_dir) for link in all_links: should_remove = ( @@ -607,13 +609,17 @@ def remove(filter_str: Optional[str]=None, or (before is not None and float(link.timestamp) > before) or link_matches_filter(link, filter_patterns, filter_type) ) - if not should_remove: + if should_remove: + to_delete.append(link) + + if delete: + shutil.rmtree(link.link_dir, ignore_errors=True) + else: to_keep.append(link) - elif should_remove and delete: - shutil.rmtree(link.link_dir, ignore_errors=True) finally: timer.end() + remove_from_sql_main_index(links=to_delete, out_dir=out_dir) write_main_index(links=to_keep, out_dir=out_dir, finished=True) log_removal_finished(len(all_links), len(to_keep)) diff --git a/tests/test_remove.py b/tests/test_remove.py new file mode 100644 index 00000000..040dafdc --- /dev/null +++ b/tests/test_remove.py @@ -0,0 +1,8 @@ +from .fixtures import * + +def test_remove_leaves_index_in_consistent_state(tmp_path, process): + os.chdir(tmp_path) + subprocess.run(['archivebox', 'add', 'http://127.0.0.1:8080/static/example.com.html'], capture_output=True) + remove_process = subprocess.run(['archivebox', 'remove', '127.0.0.1:8080/static/example.com.html', '--yes'], capture_output=True) + list_process = subprocess.run(['archivebox', 'list'], capture_output=True) + assert "Warning: SQL index does not match JSON index!" not in list_process.stderr.decode("utf-8") \ No newline at end of file