1
0
Fork 0
mirror of synced 2024-06-25 01:20:30 +12:00

allow passing import list via stdin

This commit is contained in:
Nick Sweeting 2019-01-14 18:11:48 -05:00
parent cb60bad1d7
commit 39f5e12364
2 changed files with 37 additions and 2 deletions

View file

@ -31,6 +31,7 @@ from config import (
)
from util import (
download_url,
save_source,
progress,
cleanup_archive,
pretty_path,
@ -39,14 +40,18 @@ from util import (
__AUTHOR__ = 'Nick Sweeting <git@nicksweeting.com>'
__VERSION__ = GIT_SHA
__DESCRIPTION__ = 'ArchiveBox: Create a browsable html archive of a list of links.'
__DOCUMENTATION__ = 'https://github.com/pirate/ArchiveBox'
__DESCRIPTION__ = 'ArchiveBox Usage: Create a browsable html archive of a list of links.'
__DOCUMENTATION__ = 'https://github.com/pirate/ArchiveBox/wiki'
def print_help():
print(__DESCRIPTION__)
print("Documentation: {}\n".format(__DOCUMENTATION__))
print("Usage:")
print(" ./bin/archivebox ~/Downloads/bookmarks_export.html\n")
print("")
print(" ./bin/archivebox https://example.com/feed.rss\n")
print("")
print(" echo 'https://examplecom' | ./bin/archivebox\n")
def merge_links(archive_path=OUTPUT_DIR, import_path=None, only_new=False):
@ -138,6 +143,20 @@ if __name__ == '__main__':
source = sys.argv[1] if argc > 1 else None # path of links file to import
resume = sys.argv[2] if argc > 2 else None # timestamp to resume dowloading from
stdin_raw_text = []
if not sys.stdin.isatty():
stdin_raw_text = sys.stdin.read()
if source and stdin_raw_text:
print(
'[X] You should pass either a path as an argument, '
'or pass a list of links via stdin, but not both.\n'
)
print_help()
raise SystemExit(1)
if argc == 1:
source, resume = None, None
elif argc == 2:
@ -163,6 +182,8 @@ if __name__ == '__main__':
# Step 0: Download url to local file (only happens if a URL is specified instead of local path)
if source and any(source.startswith(s) for s in ('http://', 'https://', 'ftp://')):
source = download_url(source)
elif stdin_raw_text:
source = save_source(stdin_raw_text)
# Step 1: Parse the links and dedupe them with existing archive
links = merge_links(archive_path=out_dir, import_path=source, only_new=False)

View file

@ -180,6 +180,20 @@ def pretty_path(path):
return path.replace(REPO_DIR + '/', '')
def save_source(raw_text):
if not os.path.exists(SOURCES_DIR):
os.makedirs(SOURCES_DIR)
ts = str(datetime.now().timestamp()).split('.', 1)[0]
source_path = os.path.join(SOURCES_DIR, '{}-{}.txt'.format('stdin', ts))
with open(source_path, 'w', encoding='utf-8') as f:
f.write(raw_text)
return source_path
def download_url(url):
"""download a given url's content into downloads/domain.txt"""