1
0
Fork 0
mirror of synced 2024-05-17 02:43:16 +12:00

Add ARGS and EXTRA_ARGS for Mercury extractor

This commit is contained in:
Ben Muthalaly 2024-03-05 21:15:38 -06:00
parent d8cf09c21e
commit f4deb97f59
2 changed files with 14 additions and 4 deletions

View file

@ -199,6 +199,8 @@ CONFIG_SCHEMA: Dict[str, ConfigDefaultDict] = {
'GIT_ARGS': {'type': list, 'default': ['--recursive']}, 'GIT_ARGS': {'type': list, 'default': ['--recursive']},
'SINGLEFILE_ARGS': {'type': list, 'default': None}, 'SINGLEFILE_ARGS': {'type': list, 'default': None},
'SINGLEFILE_EXTRA_ARGS': {'type': list, 'default': None}, 'SINGLEFILE_EXTRA_ARGS': {'type': list, 'default': None},
'MERCURY_ARGS': {'type': list, 'default': ['--format=text']},
'MERCURY_EXTRA_ARGS': {'type': list, 'default': None},
'FAVICON_PROVIDER': {'type': str, 'default': 'https://www.google.com/s2/favicons?domain={}'}, 'FAVICON_PROVIDER': {'type': str, 'default': 'https://www.google.com/s2/favicons?domain={}'},
}, },
@ -561,6 +563,8 @@ DYNAMIC_CONFIG_SCHEMA: ConfigDefaultDict = {
'USE_MERCURY': {'default': lambda c: c['USE_MERCURY'] and c['SAVE_MERCURY']}, 'USE_MERCURY': {'default': lambda c: c['USE_MERCURY'] and c['SAVE_MERCURY']},
'MERCURY_VERSION': {'default': lambda c: '1.0.0' if shutil.which(str(bin_path(c['MERCURY_BINARY']))) else None}, # mercury doesnt expose version info until this is merged https://github.com/postlight/parser/pull/750 'MERCURY_VERSION': {'default': lambda c: '1.0.0' if shutil.which(str(bin_path(c['MERCURY_BINARY']))) else None}, # mercury doesnt expose version info until this is merged https://github.com/postlight/parser/pull/750
'MERCURY_ARGS': {'default': lambda c: c['MERCURY_ARGS'] or []},
'MERCURY_EXTRA_ARGS': {'default': lambda c: c['MERCURY_EXTRA_ARGS'] or []},
'USE_GIT': {'default': lambda c: c['USE_GIT'] and c['SAVE_GIT']}, 'USE_GIT': {'default': lambda c: c['USE_GIT'] and c['SAVE_GIT']},
'GIT_VERSION': {'default': lambda c: bin_version(c['GIT_BINARY']) if c['USE_GIT'] else None}, 'GIT_VERSION': {'default': lambda c: bin_version(c['GIT_BINARY']) if c['USE_GIT'] else None},

View file

@ -11,13 +11,15 @@ from ..system import run, atomic_write
from ..util import ( from ..util import (
enforce_types, enforce_types,
is_static_file, is_static_file,
dedupe,
) )
from ..config import ( from ..config import (
TIMEOUT, TIMEOUT,
SAVE_MERCURY, SAVE_MERCURY,
DEPENDENCIES, DEPENDENCIES,
MERCURY_VERSION, MERCURY_VERSION,
MERCURY_ARGS,
MERCURY_EXTRA_ARGS,
) )
from ..logging_util import TimedProgress from ..logging_util import TimedProgress
@ -60,12 +62,16 @@ def save_mercury(link: Link, out_dir: Optional[Path]=None, timeout: int=TIMEOUT)
timer = TimedProgress(timeout, prefix=' ') timer = TimedProgress(timeout, prefix=' ')
try: try:
output_folder.mkdir(exist_ok=True) output_folder.mkdir(exist_ok=True)
# later options take precedence
# Get plain text version of article options = [
*MERCURY_ARGS,
*MERCURY_EXTRA_ARGS,
]
# By default, get plain text version of article
cmd = [ cmd = [
DEPENDENCIES['MERCURY_BINARY']['path'], DEPENDENCIES['MERCURY_BINARY']['path'],
link.url, link.url,
"--format=text" *dedupe(options)
] ]
result = run(cmd, cwd=out_dir, timeout=timeout) result = run(cmd, cwd=out_dir, timeout=timeout)
try: try: