import os import sys import time import shutil from subprocess import run, PIPE, DEVNULL from multiprocessing import Process # os.getenv('VARIABLE', 'DEFAULT') gets the value of environment # variable "VARIABLE" and if it is not set, sets it to 'DEFAULT' # for boolean values, check to see if the string is 'true', and # if so, the python variable will be True IS_TTY = sys.stdout.isatty() USE_COLOR = os.getenv('USE_COLOR', str(IS_TTY) ).lower() == 'true' SHOW_PROGRESS = os.getenv('SHOW_PROGRESS', str(IS_TTY) ).lower() == 'true' FETCH_WGET = os.getenv('FETCH_WGET', 'True' ).lower() == 'true' FETCH_WGET_REQUISITES = os.getenv('FETCH_WGET_REQUISITES', 'True' ).lower() == 'true' FETCH_AUDIO = os.getenv('FETCH_AUDIO', 'False' ).lower() == 'true' FETCH_VIDEO = os.getenv('FETCH_VIDEO', 'False' ).lower() == 'true' FETCH_PDF = os.getenv('FETCH_PDF', 'True' ).lower() == 'true' FETCH_SCREENSHOT = os.getenv('FETCH_SCREENSHOT', 'True' ).lower() == 'true' FETCH_FAVICON = os.getenv('FETCH_FAVICON', 'True' ).lower() == 'true' SUBMIT_ARCHIVE_DOT_ORG = os.getenv('SUBMIT_ARCHIVE_DOT_ORG', 'True' ).lower() == 'true' RESOLUTION = os.getenv('RESOLUTION', '1440,900' ) ARCHIVE_PERMISSIONS = os.getenv('ARCHIVE_PERMISSIONS', '755' ) CHROME_BINARY = os.getenv('CHROME_BINARY', 'chromium-browser' ) # change to google-chrome browser if using google-chrome WGET_BINARY = os.getenv('WGET_BINARY', 'wget' ) WGET_USER_AGENT = os.getenv('WGET_USER_AGENT', None) TIMEOUT = int(os.getenv('TIMEOUT', '60')) INDEX_TEMPLATE = os.getenv('INDEX_TEMPLATE', 'templates/index.html') INDEX_ROW_TEMPLATE = os.getenv('INDEX_ROW_TEMPLATE', 'templates/index_row.html') TERM_WIDTH = shutil.get_terminal_size((100, 10)).columns ANSI = { 'reset': '\033[00;00m', 'lightblue': '\033[01;30m', 'lightyellow': '\033[01;33m', 'lightred': '\033[01;35m', 'red': '\033[01;31m', 'green': '\033[01;32m', 'blue': '\033[01;34m', 'white': '\033[01;37m', 'black': '\033[01;30m', } if not USE_COLOR: # dont show colors if USE_COLOR is False ANSI = {k: '' for k in ANSI.keys()} if sys.stdout.encoding != 'UTF-8': print('[X] Your system is running python3 scripts with a bad locale setting: {} (it should be UTF-8).'.format(sys.stdout.encoding)) print(' To fix it, add the line "export PYTHONIOENCODING=utf8" to your ~/.bashrc file (without quotes)') print('') print(' Confirm that it\'s fixed by opening a new shell and running:') print(' python3 -c "import sys; print(sys.stdout.encoding)" # should output UTF-8') print('') print(' Alternatively, run this script with:') print(' env PYTHONIOENCODING=utf8 ./archive.py export.html') ### Util Functions def check_dependencies(): """Check that all necessary dependencies are installed, and have valid versions""" print('[*] Checking Dependencies:') python_vers = float('{}.{}'.format(sys.version_info.major, sys.version_info.minor)) if python_vers < 3.5: print('{}[X] Python version is not new enough: {} (>3.5 is required){}'.format(ANSI['red'], python_vers, ANSI['reset'])) print(' See https://github.com/pirate/bookmark-archiver#troubleshooting for help upgrading your Python installation.') raise SystemExit(1) if FETCH_PDF or FETCH_SCREENSHOT: if run(['which', CHROME_BINARY]).returncode: print('{}[X] Missing dependency: {}{}'.format(ANSI['red'], CHROME_BINARY, ANSI['reset'])) print(' Run ./setup.sh, then confirm it was installed with: {} --version'.format(CHROME_BINARY)) print(' See https://github.com/pirate/bookmark-archiver for help.') raise SystemExit(1) # parse chrome --version e.g. Google Chrome 61.0.3114.0 canary / Chromium 59.0.3029.110 built on Ubuntu, running on Ubuntu 16.04 try: result = run([CHROME_BINARY, '--version'], stdout=PIPE) version = result.stdout.decode('utf-8').replace('Google Chrome ', '').replace('Chromium ', '').split(' ', 1)[0].split('.', 1)[0] # TODO: regex might be better if int(version) < 59: print('{red}[X] Chrome version must be 59 or greater for headless PDF and screenshot saving{reset}'.format(**ANSI)) print(' See https://github.com/pirate/bookmark-archiver for help.') raise SystemExit(1) except (TypeError, OSError): print('{red}[X] Failed to parse Chrome version, is it installed properly?{reset}'.format(**ANSI)) print(' Run ./setup.sh, then confirm it was installed with: {} --version'.format(CHROME_BINARY)) print(' See https://github.com/pirate/bookmark-archiver for help.') raise SystemExit(1) if FETCH_WGET: if run(['which', 'wget']).returncode or run(['wget', '--version'], stdout=DEVNULL).returncode: print('{red}[X] Missing dependency: wget{reset}'.format(**ANSI)) print(' Run ./setup.sh, then confirm it was installed with: {} --version'.format('wget')) print(' See https://github.com/pirate/bookmark-archiver for help.') raise SystemExit(1) if FETCH_FAVICON or SUBMIT_ARCHIVE_DOT_ORG: if run(['which', 'curl']).returncode or run(['curl', '--version'], stdout=DEVNULL).returncode: print('{red}[X] Missing dependency: curl{reset}'.format(**ANSI)) print(' Run ./setup.sh, then confirm it was installed with: {} --version'.format('curl')) print(' See https://github.com/pirate/bookmark-archiver for help.') raise SystemExit(1) if FETCH_AUDIO or FETCH_VIDEO: if run(['which', 'youtube-dl']).returncode or run(['youtube-dl', '--version'], stdout=DEVNULL).returncode: print('{red}[X] Missing dependency: youtube-dl{reset}'.format(**ANSI)) print(' Run ./setup.sh, then confirm it was installed with: {} --version'.format('youtube-dl')) print(' See https://github.com/pirate/bookmark-archiver for help.') raise SystemExit(1) def chmod_file(path, cwd='.', permissions=ARCHIVE_PERMISSIONS, timeout=30): """chmod -R /""" if not os.path.exists(os.path.join(cwd, path)): raise Exception('Failed to chmod: {} does not exist (did the previous step fail?)'.format(path)) chmod_result = run(['chmod', '-R', permissions, path], cwd=cwd, stdout=DEVNULL, stderr=PIPE, timeout=timeout) if chmod_result.returncode == 1: print(' ', chmod_result.stderr.decode()) raise Exception('Failed to chmod {}/{}'.format(cwd, path)) def progress(seconds=TIMEOUT, prefix=''): """Show a (subprocess-controlled) progress bar with a timeout, returns end() function to instantly finish the progress """ if not SHOW_PROGRESS: return lambda: None chunk = '█' if sys.stdout.encoding == 'UTF-8' else '#' chunks = TERM_WIDTH - len(prefix) - 20 # number of progress chunks to show (aka max bar width) def progress_bar(seconds=seconds, prefix=prefix): """show timer in the form of progress bar, with percentage and seconds remaining""" try: for s in range(seconds * chunks): progress = s / chunks / seconds * 100 bar_width = round(progress/(100/chunks)) # ████████████████████ 0.9% (1/60sec) sys.stdout.write('\r{0}{1}{2}{3} {4}% ({5}/{6}sec)'.format( prefix, ANSI['green'], (chunk * bar_width).ljust(chunks), ANSI['reset'], round(progress, 1), round(s/chunks), seconds, )) sys.stdout.flush() time.sleep(1 / chunks) # ██████████████████████████████████ 100.0% (60/60sec) sys.stdout.write('\r{0}{1}{2}{3} {4}% ({5}/{6}sec)\n'.format( prefix, ANSI['red'], chunk * chunks, ANSI['reset'], 100.0, seconds, seconds, )) sys.stdout.flush() except KeyboardInterrupt: print() pass p = Process(target=progress_bar) p.start() def end(): """immediately finish progress and clear the progressbar line""" p.terminate() sys.stdout.write('\r{}{}\r'.format((' ' * TERM_WIDTH), ANSI['reset'])) # clear whole terminal line sys.stdout.flush() return end