1
0
Fork 0
mirror of synced 2024-06-02 02:25:20 +12:00

Merge pull request #584 from jdcaballerov/ripgrep-configs

This commit is contained in:
Nick Sweeting 2020-12-12 10:21:49 -05:00 committed by GitHub
commit 154d31263b
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
2 changed files with 12 additions and 4 deletions

View file

@ -161,6 +161,7 @@ CONFIG_DEFAULTS: Dict[str, ConfigDefaultDict] = {
'USE_CHROME': {'type': bool, 'default': True},
'USE_NODE': {'type': bool, 'default': True},
'USE_YOUTUBEDL': {'type': bool, 'default': True},
'USE_RIPGREP': {'type': bool, 'default': True},
'CURL_BINARY': {'type': str, 'default': 'curl'},
'GIT_BINARY': {'type': str, 'default': 'git'},
@ -170,6 +171,7 @@ CONFIG_DEFAULTS: Dict[str, ConfigDefaultDict] = {
'MERCURY_BINARY': {'type': str, 'default': 'mercury-parser'},
'YOUTUBEDL_BINARY': {'type': str, 'default': 'youtube-dl'},
'NODE_BINARY': {'type': str, 'default': 'node'},
'RIPGREP_BINARY': {'type': str, 'default': 'rg'},
'CHROME_BINARY': {'type': str, 'default': None},
'POCKET_CONSUMER_KEY': {'type': str, 'default': None},
@ -312,6 +314,7 @@ DERIVED_CONFIG_DEFAULTS: ConfigDefaultDict = {
'SAVE_WARC': {'default': lambda c: c['USE_WGET'] and c['SAVE_WARC']},
'WGET_ARGS': {'default': lambda c: c['WGET_ARGS'] or []},
'RIPGREP_VERSION': {'default': lambda c: bin_version(c['RIPGREP_BINARY']) if c['USE_RIPGREP'] else None},
'USE_SINGLEFILE': {'default': lambda c: c['USE_SINGLEFILE'] and c['SAVE_SINGLEFILE']},
'SINGLEFILE_VERSION': {'default': lambda c: bin_version(c['SINGLEFILE_BINARY']) if c['USE_SINGLEFILE'] else None},
@ -827,6 +830,13 @@ def get_dependency_info(config: ConfigDict) -> ConfigValue:
'enabled': config['USE_CHROME'],
'is_valid': bool(config['CHROME_VERSION']),
},
'RIPGREP_BINARY': {
'path': bin_path(config['RIPGREP_BINARY']),
'version': config['RIPGREP_VERSION'],
'hash': bin_hash(config['RIPGREP_BINARY']),
'enabled': config['USE_RIPGREP'],
'is_valid': bool(config['RIPGREP_VERSION']),
},
}
def get_chrome_info(config: ConfigDict) -> ConfigValue:

View file

@ -2,7 +2,7 @@ import re
from subprocess import run, PIPE, DEVNULL
from typing import List, Generator
from archivebox.config import ARCHIVE_DIR
from archivebox.config import ARCHIVE_DIR, RIPGREP_VERSION
from archivebox.util import enforce_types
RG_IGNORE_EXTENSIONS = ('css','js','orig','svg')
@ -26,8 +26,7 @@ def flush(snapshot_ids: Generator[str, None, None]):
@enforce_types
def search(text: str) -> List[str]:
is_rg_installed = run(['which', 'rg'], stdout=DEVNULL, stderr=DEVNULL)
if is_rg_installed.returncode:
if not RIPGREP_VERSION:
raise Exception("ripgrep binary not found, install ripgrep to use this search backend")
from core.models import Snapshot
@ -44,4 +43,3 @@ def search(text: str) -> List[str]:
snap_ids = [str(id) for id in Snapshot.objects.filter(timestamp__in=timestamps).values_list('pk', flat=True)]
return snap_ids