1
0
Fork 0
mirror of synced 2024-06-22 16:10:54 +12:00

feat: Add tests for remove command

This commit is contained in:
Cristian 2020-08-21 13:32:31 -05:00 committed by Cristian Vargas
parent a8ed72501d
commit fe9604a772
4 changed files with 68 additions and 8 deletions

View file

@ -9,7 +9,6 @@ from ..index.schema import Link
from ..index import (
load_link_details,
write_link_details,
write_main_index,
)
from ..util import enforce_types
from ..logging_util import (

View file

@ -1,6 +1,5 @@
__package__ = 'archivebox.index'
import re
import os
import shutil
import json as pyjson
@ -373,7 +372,7 @@ LINK_FILTERS = {
'exact': lambda pattern: Q(url=pattern),
'substring': lambda pattern: Q(url__icontains=pattern),
'regex': lambda pattern: Q(url__iregex=pattern),
'domain': lambda pattern: Q(domain=pattern),
'domain': lambda pattern: Q(url__istartswith=f"http://{pattern}") | Q(url__istartswith=f"https://{pattern}") | Q(url__istartswith=f"ftp://{pattern}"),
}
@enforce_types

View file

@ -24,7 +24,6 @@ def parse_sql_main_index(out_dir: str=OUTPUT_DIR) -> Iterator[Link]:
@enforce_types
def remove_from_sql_main_index(snapshots: QuerySet, out_dir: str=OUTPUT_DIR) -> None:
setup_django(out_dir, check_db=True)
from core.models import Snapshot
from django.db import transaction
with transaction.atomic():

View file

@ -1,8 +1,71 @@
import os
import sqlite3
from .fixtures import *
def test_remove_leaves_index_in_consistent_state(tmp_path, process, disable_extractors_dict):
def test_remove_single_page(tmp_path, process, disable_extractors_dict):
os.chdir(tmp_path)
subprocess.run(['archivebox', 'add', 'http://127.0.0.1:8080/static/example.com.html'], capture_output=True, env=disable_extractors_dict)
remove_process = subprocess.run(['archivebox', 'remove', '127.0.0.1:8080/static/example.com.html', '--yes'], capture_output=True)
list_process = subprocess.run(['archivebox', 'list'], capture_output=True)
assert "Warning: SQL index does not match JSON index!" not in list_process.stderr.decode("utf-8")
remove_process = subprocess.run(['archivebox', 'remove', 'http://127.0.0.1:8080/static/example.com.html', '--yes'], capture_output=True)
assert "Found 1 matching URLs to remove" in remove_process.stdout.decode("utf-8")
conn = sqlite3.connect("index.sqlite3")
c = conn.cursor()
count = c.execute("SELECT COUNT() from core_snapshot").fetchone()[0]
conn.commit()
conn.close()
assert count == 0
def test_remove_single_page_filesystem(tmp_path, process, disable_extractors_dict):
subprocess.run(['archivebox', 'add', 'http://127.0.0.1:8080/static/example.com.html'], capture_output=True, env=disable_extractors_dict)
assert list((tmp_path / "archive").iterdir()) != []
subprocess.run(['archivebox', 'remove', 'http://127.0.0.1:8080/static/example.com.html', '--yes', '--delete'], capture_output=True)
assert list((tmp_path / "archive").iterdir()) == []
def test_remove_regex(tmp_path, process, disable_extractors_dict):
subprocess.run(['archivebox', 'add', 'http://127.0.0.1:8080/static/example.com.html'], capture_output=True, env=disable_extractors_dict)
subprocess.run(['archivebox', 'add', 'http://127.0.0.1:8080/static/iana.org.html'], capture_output=True, env=disable_extractors_dict)
assert list((tmp_path / "archive").iterdir()) != []
subprocess.run(['archivebox', 'remove', '--filter-type=regex', '.*', '--yes', '--delete'], capture_output=True)
assert list((tmp_path / "archive").iterdir()) == []
def test_remove_exact(tmp_path, process, disable_extractors_dict):
subprocess.run(['archivebox', 'add', 'http://127.0.0.1:8080/static/example.com.html'], capture_output=True, env=disable_extractors_dict)
subprocess.run(['archivebox', 'add', 'http://127.0.0.1:8080/static/iana.org.html'], capture_output=True, env=disable_extractors_dict)
assert list((tmp_path / "archive").iterdir()) != []
remove_process = subprocess.run(['archivebox', 'remove', '--filter-type=exact', 'http://127.0.0.1:8080/static/iana.org.html', '--yes', '--delete'], capture_output=True)
assert len(list((tmp_path / "archive").iterdir())) == 1
def test_remove_substr(tmp_path, process, disable_extractors_dict):
subprocess.run(['archivebox', 'add', 'http://127.0.0.1:8080/static/example.com.html'], capture_output=True, env=disable_extractors_dict)
subprocess.run(['archivebox', 'add', 'http://127.0.0.1:8080/static/iana.org.html'], capture_output=True, env=disable_extractors_dict)
assert list((tmp_path / "archive").iterdir()) != []
subprocess.run(['archivebox', 'remove', '--filter-type=substring', 'example.com', '--yes', '--delete'], capture_output=True)
assert len(list((tmp_path / "archive").iterdir())) == 1
def test_remove_domain(tmp_path, process, disable_extractors_dict):
subprocess.run(['archivebox', 'add', 'http://127.0.0.1:8080/static/example.com.html'], capture_output=True, env=disable_extractors_dict)
subprocess.run(['archivebox', 'add', 'http://127.0.0.1:8080/static/iana.org.html'], capture_output=True, env=disable_extractors_dict)
assert list((tmp_path / "archive").iterdir()) != []
remove_process = subprocess.run(['archivebox', 'remove', '--filter-type=domain', '127.0.0.1', '--yes', '--delete'], capture_output=True)
assert len(list((tmp_path / "archive").iterdir())) == 0
conn = sqlite3.connect("index.sqlite3")
c = conn.cursor()
count = c.execute("SELECT COUNT() from core_snapshot").fetchone()[0]
conn.commit()
conn.close()
assert count == 0