diff --git a/archivebox/core/models.py b/archivebox/core/models.py index b34c0212..dbad0f66 100644 --- a/archivebox/core/models.py +++ b/archivebox/core/models.py @@ -14,7 +14,7 @@ from ..index.schema import Link from ..config import CONFIG #EXTRACTORS = [(extractor[0], extractor[0]) for extractor in get_default_archive_methods()] -EXTRACTORS = ["title", "wget"] +EXTRACTORS = [("title", "title"), ("wget", "wget")] STATUS_CHOICES = [ ("succeeded", "succeeded"), ("failed", "failed"), diff --git a/archivebox/core/views.py b/archivebox/core/views.py index b46e364e..ed549aa4 100644 --- a/archivebox/core/views.py +++ b/archivebox/core/views.py @@ -41,6 +41,7 @@ class MainIndex(View): class LinkDetails(View): + def get(self, request, path): # missing trailing slash -> redirect to index if '/' not in path: diff --git a/archivebox/index/sql.py b/archivebox/index/sql.py index d32a1468..1719da94 100644 --- a/archivebox/index/sql.py +++ b/archivebox/index/sql.py @@ -61,7 +61,7 @@ def write_sql_snapshot_details(snapshot: Model, out_dir: Path=OUTPUT_DIR) -> Non try: snap = Snapshot.objects.get(url=snapshot.url) except Snapshot.DoesNotExist: - snap = write_snapshot_to_sql_index(snapshot) + snap = write_snapshot_to_index(snapshot) snap.title = snapshot.title # TODO: If there are actual tags, this will break diff --git a/archivebox/main.py b/archivebox/main.py index 443a5d0a..3c3bbb5f 100644 --- a/archivebox/main.py +++ b/archivebox/main.py @@ -22,7 +22,7 @@ from .cli import ( from .parsers import ( save_text_as_source, save_file_as_source, - parse_links_memory, + parse_snapshots_memory, ) from .index.schema import Link from .util import enforce_types # type: ignore @@ -516,8 +516,8 @@ def oneshot(url: str, extractors: str="", out_dir: Path=OUTPUT_DIR): Create a single URL archive folder with an index.json and index.html, and all the archive method outputs. You can run this to archive single pages without needing to create a whole collection with archivebox init. """ - oneshot_link, _ = parse_links_memory([url]) - if len(oneshot_link) > 1: + oneshot_snapshots, _ = parse_snapshots_memory([url]) + if len(oneshot_snapshots) > 1: stderr( '[X] You should pass a single url to the oneshot command', color='red' @@ -525,8 +525,10 @@ def oneshot(url: str, extractors: str="", out_dir: Path=OUTPUT_DIR): raise SystemExit(2) methods = extractors.split(",") if extractors else ignore_methods(['title']) - archive_link(oneshot_link[0], out_dir=out_dir, methods=methods) - return oneshot_link + snapshot = oneshot_snapshots[0] + snapshot.save() # Oneshot uses an in-memory database, so this is safe + archive_snapshot(snapshot, out_dir=out_dir, methods=methods) + return snapshot @enforce_types def add(urls: Union[str, List[str]], diff --git a/archivebox/parsers/__init__.py b/archivebox/parsers/__init__.py index 8b10d794..72ae4860 100644 --- a/archivebox/parsers/__init__.py +++ b/archivebox/parsers/__init__.py @@ -66,7 +66,7 @@ PARSERS = ( @enforce_types -def parse_links_memory(urls: List[str], root_url: Optional[str]=None): +def parse_snapshots_memory(urls: List[str], root_url: Optional[str]=None): """ parse a list of URLS without touching the filesystem """ @@ -77,12 +77,12 @@ def parse_links_memory(urls: List[str], root_url: Optional[str]=None): file = StringIO() file.writelines(urls) file.name = "io_string" - links, parser = run_parser_functions(file, timer, root_url=root_url) + snapshots, parser = run_parser_functions(file, timer, root_url=root_url) timer.end() if parser is None: return [], 'Failed to parse' - return links, parser + return snapshots, parser @enforce_types