diff --git a/Dockerfile b/Dockerfile index 97bd1bd1..7d76ea1d 100644 --- a/Dockerfile +++ b/Dockerfile @@ -10,8 +10,8 @@ FROM python:3.8-slim-buster LABEL name="archivebox" \ - maintainer="Nick Sweeting " \ - description="All-in-one personal internet archiving container" + maintainer="Nick Sweeting " \ + description="All-in-one personal internet archiving container" ENV TZ=UTC \ LANGUAGE=en_US:en \ @@ -22,28 +22,41 @@ ENV TZ=UTC \ APT_KEY_DONT_WARN_ON_DANGEROUS_USAGE=1 \ CODE_PATH=/app \ VENV_PATH=/venv \ - DATA_PATH=/data + DATA_PATH=/data \ + EXTRA_PATH=/extra -# First install CLI utils and base deps, then Chrome + Fons +# First install CLI utils and base deps, then Chrome + Fons + nodejs RUN echo 'debconf debconf/frontend select Noninteractive' | debconf-set-selections \ && apt-get update -qq \ && apt-get install -qq -y --no-install-recommends \ - apt-transport-https ca-certificates apt-utils gnupg gosu gnupg2 libgconf-2-4 zlib1g-dev \ - dumb-init jq git wget curl youtube-dl ffmpeg \ + apt-transport-https ca-certificates apt-utils gnupg gosu gnupg2 libgconf-2-4 zlib1g-dev \ + dumb-init jq git wget curl youtube-dl ffmpeg \ && curl -sSL "https://dl.google.com/linux/linux_signing_key.pub" | apt-key add - \ && echo "deb https://dl.google.com/linux/chrome/deb/ stable main" > /etc/apt/sources.list.d/google-chrome.list \ + && curl -sL https://deb.nodesource.com/setup_14.x | bash - \ && apt-get update -qq \ && apt-get install -qq -y --no-install-recommends \ - google-chrome-stable \ - fontconfig \ - fonts-ipafont-gothic \ - fonts-wqy-zenhei \ - fonts-thai-tlwg \ - fonts-kacst \ - fonts-symbola \ - fonts-noto \ - fonts-freefont-ttf \ - && rm -rf /var/lib/apt/lists/* + google-chrome-stable \ + fontconfig \ + fonts-ipafont-gothic \ + fonts-wqy-zenhei \ + fonts-thai-tlwg \ + fonts-kacst \ + fonts-symbola \ + fonts-noto \ + fonts-freefont-ttf \ + nodejs \ + unzip \ + && rm -rf /var/lib/apt/lists/* + +# Clone singlefile and move it to the /bin folder so archivebox can find it + +WORKDIR "$EXTRA_PATH" +RUN wget -qO - https://github.com/gildas-lormeau/SingleFile/archive/master.zip > SingleFile.zip \ + && unzip -q SingleFile.zip \ + && npm install --prefix SingleFile-master/cli --production > /dev/null 2>&1 \ + && chmod +x SingleFile-master/cli/single-file \ + && ln -s "$EXTRA_PATH/SingleFile-master/cli/single-file" "/bin/single-file" # Run everything from here on out as non-privileged user RUN groupadd --system archivebox \ diff --git a/archivebox/extractors/singlefile.py b/archivebox/extractors/singlefile.py index 60ebdab6..3a1a3759 100644 --- a/archivebox/extractors/singlefile.py +++ b/archivebox/extractors/singlefile.py @@ -3,11 +3,13 @@ __package__ = 'archivebox.extractors' from pathlib import Path from typing import Optional +import json from ..index.schema import Link, ArchiveResult, ArchiveError from ..system import run, chmod_file from ..util import ( enforce_types, + chrome_args ) from ..config import ( TIMEOUT, @@ -34,10 +36,13 @@ def save_singlefile(link: Link, out_dir: Optional[str]=None, timeout: int=TIMEOU out_dir = out_dir or link.link_dir output = str(Path(out_dir).absolute() / "singlefile.html") + browser_args = chrome_args(TIMEOUT=0) + # SingleFile CLI Docs: https://github.com/gildas-lormeau/SingleFile/tree/master/cli cmd = [ SINGLEFILE_BINARY, '--browser-executable-path={}'.format(CHROME_BINARY), + '--browser-args="{}"'.format(json.dumps(browser_args[1:])), link.url, output ] diff --git a/archivebox/logging_util.py b/archivebox/logging_util.py index c44f87f1..684f3d80 100644 --- a/archivebox/logging_util.py +++ b/archivebox/logging_util.py @@ -518,6 +518,7 @@ def printable_folder_status(name: str, folder: Dict) -> str: @enforce_types def printable_dependency_version(name: str, dependency: Dict) -> str: + version = None if dependency['enabled']: if dependency['is_valid']: color, symbol, note, version = 'green', '√', 'valid', ''