1
0
Fork 0
mirror of synced 2024-07-01 12:30:24 +12:00
ArchiveBox/archive.py

92 lines
2.9 KiB
Python
Raw Normal View History

2017-05-05 21:00:30 +12:00
#!/usr/bin/env python3
2017-06-30 20:23:19 +12:00
# Bookmark Archiver
# Nick Sweeting 2017 | MIT License
# https://github.com/pirate/bookmark-archiver
2017-05-05 21:00:30 +12:00
import os
import sys
2017-06-30 20:23:19 +12:00
from datetime import datetime
from parse import parse_export
from index import dump_index
from fetch import dump_website
from config import (
ARCHIVE_PERMISSIONS,
2017-07-06 09:33:51 +12:00
ANSI,
check_dependencies,
)
2017-06-16 10:33:01 +12:00
DESCRIPTION = 'Bookmark Archiver: Create a browsable html archive of a list of links.'
__DOCUMENTATION__ = 'https://github.com/pirate/bookmark-archiver'
2017-05-05 23:27:05 +12:00
2017-06-30 20:23:19 +12:00
2017-06-16 11:31:33 +12:00
def create_archive(export_file, service=None, resume=None):
2017-06-30 20:23:19 +12:00
"""update or create index.html and download archive of all links"""
2017-07-06 09:33:51 +12:00
print('[*] [{}] Starting archive from {} export file.'.format(
datetime.now().strftime('%Y-%m-%d %H:%M:%S'),
export_file,
))
2017-06-16 11:31:33 +12:00
2017-07-06 09:33:51 +12:00
with open(export_file, 'r', encoding='utf-8') as f:
links, service = parse_export(f, service=service)
2017-06-30 20:23:19 +12:00
2017-07-06 09:33:51 +12:00
if resume:
try:
links = [
link
for link in links
if float(link['timestamp']) >= float(resume)
]
except TypeError:
print('Resume value and all timestamp values must be valid numbers.')
2017-05-05 21:00:30 +12:00
if not links or not service:
2017-06-15 17:45:40 +12:00
print('[X] No links found in {}, is it a {} export file?'.format(export_file, service))
2017-05-05 21:00:30 +12:00
raise SystemExit(1)
2017-06-16 11:31:33 +12:00
if not os.path.exists(service):
os.makedirs(service)
if not os.path.exists(os.path.join(service, 'archive')):
os.makedirs(os.path.join(service, 'archive'))
dump_index(links, service)
2017-05-05 23:27:05 +12:00
check_dependencies()
2017-07-06 09:33:51 +12:00
try:
for link in links:
dump_website(link, service)
2017-07-06 10:15:39 +12:00
except (KeyboardInterrupt, SystemExit, Exception) as e:
2017-07-06 09:33:51 +12:00
print('{red}[X] Archive creation stopped.{reset}'.format(**ANSI))
print(' Continue where you left off by running:')
print(' ./archive.py {} {} {}'.format(
export_file,
service,
link['timestamp'],
))
2017-07-06 10:15:39 +12:00
if not isinstance(e, KeyboardInterrupt):
raise e
2017-07-06 09:33:51 +12:00
raise SystemExit(1)
2017-05-05 23:27:05 +12:00
2017-07-06 09:33:51 +12:00
print('{}[√] [{}] Archive update complete.{}'.format(ANSI['green'], datetime.now().strftime('%Y-%m-%d %H:%M:%S'), ANSI['reset']))
2017-05-05 21:00:30 +12:00
2017-05-05 21:00:30 +12:00
if __name__ == '__main__':
2017-05-07 00:23:49 +12:00
argc = len(sys.argv)
2017-06-30 20:23:19 +12:00
if argc < 2 or sys.argv[1] in ('-h', '--help', 'help'):
print(DESCRIPTION)
2017-06-30 20:23:19 +12:00
print("Documentation: {}".format(__DOCUMENTATION__))
print("")
print("Usage:")
print(" ./archive.py ~/Downloads/bookmarks_export.html")
print("")
raise SystemExit(0)
export_file = sys.argv[1] # path to export file
2017-06-16 11:31:33 +12:00
export_type = sys.argv[2] if argc > 2 else None # select export_type for file format select
2017-06-30 20:23:19 +12:00
resume_from = sys.argv[3] if argc > 3 else None # timestamp to resume dowloading from
2017-05-05 21:00:30 +12:00
2017-06-30 20:23:19 +12:00
create_archive(export_file, service=export_type, resume=resume_from)