From 5429096c305500f586abfd44b4450e9917785c3b Mon Sep 17 00:00:00 2001 From: Cristian Date: Tue, 4 Aug 2020 08:42:30 -0500 Subject: [PATCH] tests: Add mechanism to avoid using extractors that we are not testing --- tests/fixtures.py | 17 ++++++++++++++++- tests/test_args.py | 23 ++++++++++++++--------- tests/test_extractors.py | 14 ++++++++------ tests/test_init.py | 15 +++++++++------ tests/test_oneshot.py | 9 +++++---- tests/test_remove.py | 4 ++-- tests/test_title.py | 5 +++-- 7 files changed, 57 insertions(+), 30 deletions(-) diff --git a/tests/fixtures.py b/tests/fixtures.py index 9bf2640a..3d8dabfe 100644 --- a/tests/fixtures.py +++ b/tests/fixtures.py @@ -7,4 +7,19 @@ import pytest def process(tmp_path): os.chdir(tmp_path) process = subprocess.run(['archivebox', 'init'], capture_output=True) - return process \ No newline at end of file + return process + +@pytest.fixture +def disable_extractors_dict(): + env = os.environ.copy() + env.update({ + "USE_WGET": "false", + "USE_SINGLEFILE": "false", + "SAVE_PDF": "false", + "SAVE_SCREENSHOT": "false", + "SAVE_DOM": "false", + "USE_GIT": "false", + "SAVE_MEDIA": "false", + "SAVE_ARCHIVE_DOT_ORG": "false" + }) + return env \ No newline at end of file diff --git a/tests/test_args.py b/tests/test_args.py index ed132524..c616cb80 100644 --- a/tests/test_args.py +++ b/tests/test_args.py @@ -3,25 +3,30 @@ import json from .fixtures import * -def test_depth_flag_is_accepted(process): - arg_process = subprocess.run(["archivebox", "add", "http://127.0.0.1:8080/static/example.com.html", "--depth=0"], capture_output=True) +def test_depth_flag_is_accepted(process, disable_extractors_dict): + arg_process = subprocess.run(["archivebox", "add", "http://127.0.0.1:8080/static/example.com.html", "--depth=0"], + capture_output=True, env=disable_extractors_dict) assert 'unrecognized arguments: --depth' not in arg_process.stderr.decode("utf-8") -def test_depth_flag_fails_if_it_is_not_0_or_1(process): - arg_process = subprocess.run(["archivebox", "add", "http://127.0.0.1:8080/static/example.com.html", "--depth=5"], capture_output=True) +def test_depth_flag_fails_if_it_is_not_0_or_1(process, disable_extractors_dict): + arg_process = subprocess.run(["archivebox", "add", "http://127.0.0.1:8080/static/example.com.html", "--depth=5"], + capture_output=True, env=disable_extractors_dict) assert 'invalid choice' in arg_process.stderr.decode("utf-8") - arg_process = subprocess.run(["archivebox", "add", "http://127.0.0.1:8080/static/example.com.html", "--depth=-1"], capture_output=True) + arg_process = subprocess.run(["archivebox", "add", "http://127.0.0.1:8080/static/example.com.html", "--depth=-1"], + capture_output=True, env=disable_extractors_dict) assert 'invalid choice' in arg_process.stderr.decode("utf-8") -def test_depth_flag_0_crawls_only_the_arg_page(tmp_path, process): - arg_process = subprocess.run(["archivebox", "add", "http://127.0.0.1:8080/static/example.com.html", "--depth=0"], capture_output=True) +def test_depth_flag_0_crawls_only_the_arg_page(tmp_path, process, disable_extractors_dict): + arg_process = subprocess.run(["archivebox", "add", "http://127.0.0.1:8080/static/example.com.html", "--depth=0"], + capture_output=True, env=disable_extractors_dict) archived_item_path = list(tmp_path.glob('archive/**/*'))[0] with open(archived_item_path / "index.json", "r") as f: output_json = json.load(f) assert output_json["base_url"] == "127.0.0.1:8080/static/example.com.html" -def test_depth_flag_1_crawls_the_page_AND_links(tmp_path, process): - arg_process = subprocess.run(["archivebox", "add", "http://127.0.0.1:8080/static/example.com.html", "--depth=1"], capture_output=True) +def test_depth_flag_1_crawls_the_page_AND_links(tmp_path, process, disable_extractors_dict): + arg_process = subprocess.run(["archivebox", "add", "http://127.0.0.1:8080/static/example.com.html", "--depth=1"], + capture_output=True, env=disable_extractors_dict) with open(tmp_path / "index.json", "r") as f: archive_file = f.read() assert "http://127.0.0.1:8080/static/example.com.html" in archive_file diff --git a/tests/test_extractors.py b/tests/test_extractors.py index fb02044b..ffb933c1 100644 --- a/tests/test_extractors.py +++ b/tests/test_extractors.py @@ -1,8 +1,10 @@ from .fixtures import * from archivebox.extractors import ignore_methods, get_default_archive_methods, should_save_title -def test_wget_broken_pipe(tmp_path, process): - add_process = subprocess.run(['archivebox', 'add', 'http://127.0.0.1:8080/static/example.com.html'], capture_output=True) +def test_wget_broken_pipe(tmp_path, process, disable_extractors_dict): + disable_extractors_dict.update({"USE_WGET": "true"}) + add_process = subprocess.run(['archivebox', 'add', 'http://127.0.0.1:8080/static/example.com.html'], + capture_output=True, env=disable_extractors_dict) assert "TypeError chmod_file(..., path: str) got unexpected NoneType argument path=None" not in add_process.stdout.decode("utf-8") def test_ignore_methods(): @@ -12,10 +14,10 @@ def test_ignore_methods(): ignored = ignore_methods(['title']) assert should_save_title not in ignored - - -def test_singlefile_works(tmp_path, process): - add_process = subprocess.run(['archivebox', 'add', 'http://127.0.0.1:8080/static/example.com.html'], capture_output=True) +def test_singlefile_works(tmp_path, process, disable_extractors_dict): + disable_extractors_dict.update({"USE_SINGLEFILE": "true"}) + add_process = subprocess.run(['archivebox', 'add', 'http://127.0.0.1:8080/static/example.com.html'], + capture_output=True, env=disable_extractors_dict) archived_item_path = list(tmp_path.glob('archive/**/*'))[0] output_file = archived_item_path / "singlefile.html" assert output_file.exists() diff --git a/tests/test_init.py b/tests/test_init.py index 133aaaa9..bd1ad516 100644 --- a/tests/test_init.py +++ b/tests/test_init.py @@ -18,9 +18,10 @@ def test_update(tmp_path, process): update_process = subprocess.run(['archivebox', 'init'], capture_output=True) assert "Updating existing ArchiveBox collection in this folder" in update_process.stdout.decode("utf-8") -def test_add_link(tmp_path, process): +def test_add_link(tmp_path, process, disable_extractors_dict): os.chdir(tmp_path) - add_process = subprocess.run(['archivebox', 'add', 'http://127.0.0.1:8080/static/example.com.html'], capture_output=True) + add_process = subprocess.run(['archivebox', 'add', 'http://127.0.0.1:8080/static/example.com.html'], + capture_output=True, env=disable_extractors_dict) archived_item_path = list(tmp_path.glob('archive/**/*'))[0] assert "index.json" in [x.name for x in archived_item_path.iterdir()] @@ -33,9 +34,10 @@ def test_add_link(tmp_path, process): output_html = f.read() assert "Example Domain" in output_html -def test_add_link_support_stdin(tmp_path, process): +def test_add_link_support_stdin(tmp_path, process, disable_extractors_dict): os.chdir(tmp_path) - stdin_process = subprocess.Popen(["archivebox", "add"], stdin=subprocess.PIPE, stdout=subprocess.PIPE, stderr=subprocess.STDOUT) + stdin_process = subprocess.Popen(["archivebox", "add"], stdin=subprocess.PIPE, stdout=subprocess.PIPE, stderr=subprocess.STDOUT, + env=disable_extractors_dict) stdin_process.communicate(input="http://127.0.0.1:8080/static/example.com.html".encode()) archived_item_path = list(tmp_path.glob('archive/**/*'))[0] @@ -51,9 +53,10 @@ def test_correct_permissions_output_folder(tmp_path, process): file_path = tmp_path / file assert oct(file_path.stat().st_mode)[-3:] == OUTPUT_PERMISSIONS -def test_correct_permissions_add_command_results(tmp_path, process): +def test_correct_permissions_add_command_results(tmp_path, process, disable_extractors_dict): os.chdir(tmp_path) - add_process = subprocess.run(['archivebox', 'add', 'http://127.0.0.1:8080/static/example.com.html'], capture_output=True) + add_process = subprocess.run(['archivebox', 'add', 'http://127.0.0.1:8080/static/example.com.html'], capture_output=True, + env=disable_extractors_dict) archived_item_path = list(tmp_path.glob('archive/**/*'))[0] for path in archived_item_path.iterdir(): assert oct(path.stat().st_mode)[-3:] == OUTPUT_PERMISSIONS diff --git a/tests/test_oneshot.py b/tests/test_oneshot.py index 7ff9867f..66a567e1 100644 --- a/tests/test_oneshot.py +++ b/tests/test_oneshot.py @@ -2,13 +2,14 @@ from pathlib import Path from .fixtures import * -def test_oneshot_command_exists(tmp_path): +def test_oneshot_command_exists(tmp_path, disable_extractors_dict): os.chdir(tmp_path) - process = subprocess.run(['archivebox', 'oneshot'], capture_output=True) + process = subprocess.run(['archivebox', 'oneshot'], capture_output=True, env=disable_extractors_dict) assert not "invalid choice: 'oneshot'" in process.stderr.decode("utf-8") -def test_oneshot_commad_saves_page_in_right_folder(tmp_path): - process = subprocess.run(["archivebox", "oneshot", f"--out-dir={tmp_path}", "http://127.0.0.1:8080/static/example.com.html"], capture_output=True) +def test_oneshot_commad_saves_page_in_right_folder(tmp_path, disable_extractors_dict): + process = subprocess.run(["archivebox", "oneshot", f"--out-dir={tmp_path}", "http://127.0.0.1:8080/static/example.com.html"], + capture_output=True, env=disable_extractors_dict) items = ' '.join([str(x) for x in tmp_path.iterdir()]) current_path = ' '.join([str(x) for x in Path.cwd().iterdir()]) assert "index.json" in items diff --git a/tests/test_remove.py b/tests/test_remove.py index 040dafdc..d26c96bb 100644 --- a/tests/test_remove.py +++ b/tests/test_remove.py @@ -1,8 +1,8 @@ from .fixtures import * -def test_remove_leaves_index_in_consistent_state(tmp_path, process): +def test_remove_leaves_index_in_consistent_state(tmp_path, process, disable_extractors_dict): os.chdir(tmp_path) - subprocess.run(['archivebox', 'add', 'http://127.0.0.1:8080/static/example.com.html'], capture_output=True) + subprocess.run(['archivebox', 'add', 'http://127.0.0.1:8080/static/example.com.html'], capture_output=True, env=disable_extractors_dict) remove_process = subprocess.run(['archivebox', 'remove', '127.0.0.1:8080/static/example.com.html', '--yes'], capture_output=True) list_process = subprocess.run(['archivebox', 'list'], capture_output=True) assert "Warning: SQL index does not match JSON index!" not in list_process.stderr.decode("utf-8") \ No newline at end of file diff --git a/tests/test_title.py b/tests/test_title.py index b5090844..24b2cc28 100644 --- a/tests/test_title.py +++ b/tests/test_title.py @@ -1,12 +1,13 @@ from .fixtures import * -def test_title_is_htmlencoded_in_index_html(tmp_path, process): +def test_title_is_htmlencoded_in_index_html(tmp_path, process, disable_extractors_dict): """ https://github.com/pirate/ArchiveBox/issues/330 Unencoded content should not be rendered as it facilitates xss injections and breaks the layout. """ - add_process = subprocess.run(['archivebox', 'add', 'http://localhost:8080/static/title_with_html.com.html'], capture_output=True) + add_process = subprocess.run(['archivebox', 'add', 'http://localhost:8080/static/title_with_html.com.html'], + capture_output=True, env=disable_extractors_dict) with open(tmp_path / "index.html", "r") as f: output_html = f.read()