1
0
Fork 0
mirror of synced 2024-06-29 03:20:58 +12:00
ArchiveBox/tests/test_remove.py

71 lines
3.7 KiB
Python
Raw Normal View History

2020-08-22 06:32:31 +12:00
import os
import sqlite3
from .fixtures import *
2020-08-22 06:32:31 +12:00
def test_remove_single_page(tmp_path, process, disable_extractors_dict):
os.chdir(tmp_path)
subprocess.run(['archivebox', 'add', 'http://127.0.0.1:8080/static/example.com.html'], capture_output=True, env=disable_extractors_dict)
2020-08-22 06:32:31 +12:00
remove_process = subprocess.run(['archivebox', 'remove', 'http://127.0.0.1:8080/static/example.com.html', '--yes'], capture_output=True)
assert "Found 1 matching URLs to remove" in remove_process.stdout.decode("utf-8")
conn = sqlite3.connect("index.sqlite3")
c = conn.cursor()
count = c.execute("SELECT COUNT() from core_snapshot").fetchone()[0]
conn.commit()
conn.close()
assert count == 0
def test_remove_single_page_filesystem(tmp_path, process, disable_extractors_dict):
subprocess.run(['archivebox', 'add', 'http://127.0.0.1:8080/static/example.com.html'], capture_output=True, env=disable_extractors_dict)
assert list((tmp_path / "archive").iterdir()) != []
subprocess.run(['archivebox', 'remove', 'http://127.0.0.1:8080/static/example.com.html', '--yes', '--delete'], capture_output=True)
assert list((tmp_path / "archive").iterdir()) == []
def test_remove_regex(tmp_path, process, disable_extractors_dict):
subprocess.run(['archivebox', 'add', 'http://127.0.0.1:8080/static/example.com.html'], capture_output=True, env=disable_extractors_dict)
subprocess.run(['archivebox', 'add', 'http://127.0.0.1:8080/static/iana.org.html'], capture_output=True, env=disable_extractors_dict)
assert list((tmp_path / "archive").iterdir()) != []
subprocess.run(['archivebox', 'remove', '--filter-type=regex', '.*', '--yes', '--delete'], capture_output=True)
assert list((tmp_path / "archive").iterdir()) == []
def test_remove_exact(tmp_path, process, disable_extractors_dict):
subprocess.run(['archivebox', 'add', 'http://127.0.0.1:8080/static/example.com.html'], capture_output=True, env=disable_extractors_dict)
subprocess.run(['archivebox', 'add', 'http://127.0.0.1:8080/static/iana.org.html'], capture_output=True, env=disable_extractors_dict)
assert list((tmp_path / "archive").iterdir()) != []
remove_process = subprocess.run(['archivebox', 'remove', '--filter-type=exact', 'http://127.0.0.1:8080/static/iana.org.html', '--yes', '--delete'], capture_output=True)
assert len(list((tmp_path / "archive").iterdir())) == 1
def test_remove_substr(tmp_path, process, disable_extractors_dict):
subprocess.run(['archivebox', 'add', 'http://127.0.0.1:8080/static/example.com.html'], capture_output=True, env=disable_extractors_dict)
subprocess.run(['archivebox', 'add', 'http://127.0.0.1:8080/static/iana.org.html'], capture_output=True, env=disable_extractors_dict)
assert list((tmp_path / "archive").iterdir()) != []
subprocess.run(['archivebox', 'remove', '--filter-type=substring', 'example.com', '--yes', '--delete'], capture_output=True)
assert len(list((tmp_path / "archive").iterdir())) == 1
def test_remove_domain(tmp_path, process, disable_extractors_dict):
subprocess.run(['archivebox', 'add', 'http://127.0.0.1:8080/static/example.com.html'], capture_output=True, env=disable_extractors_dict)
subprocess.run(['archivebox', 'add', 'http://127.0.0.1:8080/static/iana.org.html'], capture_output=True, env=disable_extractors_dict)
assert list((tmp_path / "archive").iterdir()) != []
remove_process = subprocess.run(['archivebox', 'remove', '--filter-type=domain', '127.0.0.1', '--yes', '--delete'], capture_output=True)
assert len(list((tmp_path / "archive").iterdir())) == 0
conn = sqlite3.connect("index.sqlite3")
c = conn.cursor()
count = c.execute("SELECT COUNT() from core_snapshot").fetchone()[0]
conn.commit()
conn.close()
assert count == 0