1
0
Fork 0
mirror of synced 2024-06-02 02:25:20 +12:00

feat: Add support for singlefile in docker

This commit is contained in:
Cristian 2020-08-03 13:19:47 -05:00
parent 5b6eb5e4ad
commit 06d0e9de6c
3 changed files with 35 additions and 16 deletions

View file

@ -10,8 +10,8 @@
FROM python:3.8-slim-buster FROM python:3.8-slim-buster
LABEL name="archivebox" \ LABEL name="archivebox" \
maintainer="Nick Sweeting <archivebox-git@sweeting.me>" \ maintainer="Nick Sweeting <archivebox-git@sweeting.me>" \
description="All-in-one personal internet archiving container" description="All-in-one personal internet archiving container"
ENV TZ=UTC \ ENV TZ=UTC \
LANGUAGE=en_US:en \ LANGUAGE=en_US:en \
@ -22,28 +22,41 @@ ENV TZ=UTC \
APT_KEY_DONT_WARN_ON_DANGEROUS_USAGE=1 \ APT_KEY_DONT_WARN_ON_DANGEROUS_USAGE=1 \
CODE_PATH=/app \ CODE_PATH=/app \
VENV_PATH=/venv \ VENV_PATH=/venv \
DATA_PATH=/data DATA_PATH=/data \
EXTRA_PATH=/extra
# First install CLI utils and base deps, then Chrome + Fons # First install CLI utils and base deps, then Chrome + Fons + nodejs
RUN echo 'debconf debconf/frontend select Noninteractive' | debconf-set-selections \ RUN echo 'debconf debconf/frontend select Noninteractive' | debconf-set-selections \
&& apt-get update -qq \ && apt-get update -qq \
&& apt-get install -qq -y --no-install-recommends \ && apt-get install -qq -y --no-install-recommends \
apt-transport-https ca-certificates apt-utils gnupg gosu gnupg2 libgconf-2-4 zlib1g-dev \ apt-transport-https ca-certificates apt-utils gnupg gosu gnupg2 libgconf-2-4 zlib1g-dev \
dumb-init jq git wget curl youtube-dl ffmpeg \ dumb-init jq git wget curl youtube-dl ffmpeg \
&& curl -sSL "https://dl.google.com/linux/linux_signing_key.pub" | apt-key add - \ && curl -sSL "https://dl.google.com/linux/linux_signing_key.pub" | apt-key add - \
&& echo "deb https://dl.google.com/linux/chrome/deb/ stable main" > /etc/apt/sources.list.d/google-chrome.list \ && echo "deb https://dl.google.com/linux/chrome/deb/ stable main" > /etc/apt/sources.list.d/google-chrome.list \
&& curl -sL https://deb.nodesource.com/setup_14.x | bash - \
&& apt-get update -qq \ && apt-get update -qq \
&& apt-get install -qq -y --no-install-recommends \ && apt-get install -qq -y --no-install-recommends \
google-chrome-stable \ google-chrome-stable \
fontconfig \ fontconfig \
fonts-ipafont-gothic \ fonts-ipafont-gothic \
fonts-wqy-zenhei \ fonts-wqy-zenhei \
fonts-thai-tlwg \ fonts-thai-tlwg \
fonts-kacst \ fonts-kacst \
fonts-symbola \ fonts-symbola \
fonts-noto \ fonts-noto \
fonts-freefont-ttf \ fonts-freefont-ttf \
&& rm -rf /var/lib/apt/lists/* nodejs \
unzip \
&& rm -rf /var/lib/apt/lists/*
# Clone singlefile and move it to the /bin folder so archivebox can find it
WORKDIR "$EXTRA_PATH"
RUN wget -qO - https://github.com/gildas-lormeau/SingleFile/archive/master.zip > SingleFile.zip \
&& unzip -q SingleFile.zip \
&& npm install --prefix SingleFile-master/cli --production > /dev/null 2>&1 \
&& chmod +x SingleFile-master/cli/single-file \
&& ln -s "$EXTRA_PATH/SingleFile-master/cli/single-file" "/bin/single-file"
# Run everything from here on out as non-privileged user # Run everything from here on out as non-privileged user
RUN groupadd --system archivebox \ RUN groupadd --system archivebox \

View file

@ -3,11 +3,13 @@ __package__ = 'archivebox.extractors'
from pathlib import Path from pathlib import Path
from typing import Optional from typing import Optional
import json
from ..index.schema import Link, ArchiveResult, ArchiveError from ..index.schema import Link, ArchiveResult, ArchiveError
from ..system import run, chmod_file from ..system import run, chmod_file
from ..util import ( from ..util import (
enforce_types, enforce_types,
chrome_args
) )
from ..config import ( from ..config import (
TIMEOUT, TIMEOUT,
@ -34,10 +36,13 @@ def save_singlefile(link: Link, out_dir: Optional[str]=None, timeout: int=TIMEOU
out_dir = out_dir or link.link_dir out_dir = out_dir or link.link_dir
output = str(Path(out_dir).absolute() / "singlefile.html") output = str(Path(out_dir).absolute() / "singlefile.html")
browser_args = chrome_args(TIMEOUT=0)
# SingleFile CLI Docs: https://github.com/gildas-lormeau/SingleFile/tree/master/cli # SingleFile CLI Docs: https://github.com/gildas-lormeau/SingleFile/tree/master/cli
cmd = [ cmd = [
SINGLEFILE_BINARY, SINGLEFILE_BINARY,
'--browser-executable-path={}'.format(CHROME_BINARY), '--browser-executable-path={}'.format(CHROME_BINARY),
'--browser-args="{}"'.format(json.dumps(browser_args[1:])),
link.url, link.url,
output output
] ]

View file

@ -518,6 +518,7 @@ def printable_folder_status(name: str, folder: Dict) -> str:
@enforce_types @enforce_types
def printable_dependency_version(name: str, dependency: Dict) -> str: def printable_dependency_version(name: str, dependency: Dict) -> str:
version = None
if dependency['enabled']: if dependency['enabled']:
if dependency['is_valid']: if dependency['is_valid']:
color, symbol, note, version = 'green', '', 'valid', '' color, symbol, note, version = 'green', '', 'valid', ''