1
0
Fork 0
mirror of synced 2024-06-24 00:50:23 +12:00

refactor: oneshot command is functional

This commit is contained in:
Cristian 2020-12-31 12:19:14 -05:00
parent 973f8b6abc
commit c51d789ad4
5 changed files with 13 additions and 10 deletions

View file

@ -14,7 +14,7 @@ from ..index.schema import Link
from ..config import CONFIG
#EXTRACTORS = [(extractor[0], extractor[0]) for extractor in get_default_archive_methods()]
EXTRACTORS = ["title", "wget"]
EXTRACTORS = [("title", "title"), ("wget", "wget")]
STATUS_CHOICES = [
("succeeded", "succeeded"),
("failed", "failed"),

View file

@ -41,6 +41,7 @@ class MainIndex(View):
class LinkDetails(View):
def get(self, request, path):
# missing trailing slash -> redirect to index
if '/' not in path:

View file

@ -61,7 +61,7 @@ def write_sql_snapshot_details(snapshot: Model, out_dir: Path=OUTPUT_DIR) -> Non
try:
snap = Snapshot.objects.get(url=snapshot.url)
except Snapshot.DoesNotExist:
snap = write_snapshot_to_sql_index(snapshot)
snap = write_snapshot_to_index(snapshot)
snap.title = snapshot.title
# TODO: If there are actual tags, this will break

View file

@ -22,7 +22,7 @@ from .cli import (
from .parsers import (
save_text_as_source,
save_file_as_source,
parse_links_memory,
parse_snapshots_memory,
)
from .index.schema import Link
from .util import enforce_types # type: ignore
@ -516,8 +516,8 @@ def oneshot(url: str, extractors: str="", out_dir: Path=OUTPUT_DIR):
Create a single URL archive folder with an index.json and index.html, and all the archive method outputs.
You can run this to archive single pages without needing to create a whole collection with archivebox init.
"""
oneshot_link, _ = parse_links_memory([url])
if len(oneshot_link) > 1:
oneshot_snapshots, _ = parse_snapshots_memory([url])
if len(oneshot_snapshots) > 1:
stderr(
'[X] You should pass a single url to the oneshot command',
color='red'
@ -525,8 +525,10 @@ def oneshot(url: str, extractors: str="", out_dir: Path=OUTPUT_DIR):
raise SystemExit(2)
methods = extractors.split(",") if extractors else ignore_methods(['title'])
archive_link(oneshot_link[0], out_dir=out_dir, methods=methods)
return oneshot_link
snapshot = oneshot_snapshots[0]
snapshot.save() # Oneshot uses an in-memory database, so this is safe
archive_snapshot(snapshot, out_dir=out_dir, methods=methods)
return snapshot
@enforce_types
def add(urls: Union[str, List[str]],

View file

@ -66,7 +66,7 @@ PARSERS = (
@enforce_types
def parse_links_memory(urls: List[str], root_url: Optional[str]=None):
def parse_snapshots_memory(urls: List[str], root_url: Optional[str]=None):
"""
parse a list of URLS without touching the filesystem
"""
@ -77,12 +77,12 @@ def parse_links_memory(urls: List[str], root_url: Optional[str]=None):
file = StringIO()
file.writelines(urls)
file.name = "io_string"
links, parser = run_parser_functions(file, timer, root_url=root_url)
snapshots, parser = run_parser_functions(file, timer, root_url=root_url)
timer.end()
if parser is None:
return [], 'Failed to parse'
return links, parser
return snapshots, parser
@enforce_types