diff --git a/archivebox/core/mixins.py b/archivebox/core/mixins.py index 28f79b38..afae2d78 100644 --- a/archivebox/core/mixins.py +++ b/archivebox/core/mixins.py @@ -1,10 +1,10 @@ -from django.db.models import Q, Case, When, Value, IntegerField +from django.contrib import messages -from archivebox.search import search_index +from archivebox.search import query_search_index class SearchResultsAdminMixin(object): def get_search_results(self, request, queryset, search_term): - ''' Show exact match for title and slug at top of admin search results. + ''' Enhances the search queryset with results from the search backend. ''' qs, use_distinct = \ super(SearchResultsAdminMixin, self).get_search_results( @@ -13,9 +13,13 @@ class SearchResultsAdminMixin(object): search_term = search_term.strip() if not search_term: return qs, use_distinct + try: + snapshot_ids = query_search_index(search_term) + except Exception as err: + messages.add_message(request, messages.WARNING, f'Error from the search backend, only showing results from default admin search fields - Error: {err}') + else: + qsearch = queryset.filter(id__in=snapshot_ids) + qs |= qsearch - snapshot_ids = search_index(search_term) - qsearch = queryset.filter(id__in=snapshot_ids) - qs |= qsearch - - return qs, use_distinct \ No newline at end of file + finally: + return qs, use_distinct diff --git a/archivebox/search/__init__.py b/archivebox/search/__init__.py index f503e9fa..6e604224 100644 --- a/archivebox/search/__init__.py +++ b/archivebox/search/__init__.py @@ -1,40 +1,51 @@ -from typing import List, Optional, Union +from typing import List, Union from pathlib import Path - -from sonic import IngestClient, SearchClient - -from ..index.schema import Link, ArchiveResult -from ..util import enforce_types -from ..config import setup_django, OUTPUT_DIR +from importlib import import_module -@enforce_types -def write_sonic_index(snapshot_id: str, texts: List[str]): - # TODO add variables to localhost, port, password, bucket, collection - with IngestClient("localhost", 1491, "SecretPassword") as ingestcl: - for text in texts: - ingestcl.push("archivebox", "snapshots", snapshot_id, str(text)) - -@enforce_types -def search_sonic_index(text: str) -> List: - with SearchClient("localhost", 1491, "SecretPassword") as querycl: - snap_ids = querycl.query("archivebox", "snapshots", text) - return snap_ids +from archivebox.index.schema import Link +from archivebox.util import enforce_types +from archivebox.config import setup_django, OUTPUT_DIR -@enforce_types -def search_index(text: str) -> List: - # get backend - return search_sonic_index(text) +def indexing_enabled(): + return True + # return FULLTEXT_INDEXING_ENABLED +def search_backend_enabled(): + return True + # return FULLTEXT_SEARCH_ENABLED + +def get_backend(): + return 'search.backends.sonic' + +def import_backend(): + backend_string = get_backend() + try: + backend = import_module(backend_string) + except Exception as err: + raise Exception("Could not load '%s' as a backend: %s" % (backend_string, err)) + return backend @enforce_types def write_search_index(link: Link, texts: Union[List[str], None]=None, out_dir: Path=OUTPUT_DIR, skip_text_index: bool=False) -> None: - setup_django(out_dir, check_db=True) - from core.models import Snapshot + if not indexing_enabled(): + return if not skip_text_index and texts: + setup_django(out_dir, check_db=True) + from core.models import Snapshot + snap = Snapshot.objects.filter(url=link.url).first() + backend = import_backend() if snap: - # get backend - write_sonic_index(str(snap.id), texts) \ No newline at end of file + backend.index(snapshot_id=str(snap.id), texts=texts) + +@enforce_types +def query_search_index(text: str) -> List: + if search_backend_enabled(): + backend = import_backend() + return backend.search(text) + else: + return [] + \ No newline at end of file diff --git a/archivebox/search/backends/__init__.py b/archivebox/search/backends/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/archivebox/search/backends/sonic.py b/archivebox/search/backends/sonic.py new file mode 100644 index 00000000..28725f27 --- /dev/null +++ b/archivebox/search/backends/sonic.py @@ -0,0 +1,19 @@ +from typing import List + +from sonic import IngestClient, SearchClient + +from archivebox.util import enforce_types + +@enforce_types +def index(snapshot_id: str, texts: List[str]): + # TODO add variables to localhost, port, password, bucket, collection + with IngestClient("localhost", 1491, "SecretPassword") as ingestcl: + for text in texts: + ingestcl.push("archivebox", "snapshots", snapshot_id, str(text)) + +@enforce_types +def search(text: str) -> List: + with SearchClient("localhost", 1491, "SecretPassword") as querycl: + snap_ids = querycl.query("archivebox", "snapshots", text) + return snap_ids + \ No newline at end of file