2020-11-19 11:54:13 +13:00
|
|
|
from typing import List
|
|
|
|
|
|
|
|
from sonic import IngestClient, SearchClient
|
|
|
|
|
|
|
|
from archivebox.util import enforce_types
|
2020-11-20 02:06:13 +13:00
|
|
|
from archivebox.config import SEARCH_BACKEND_HOST_NAME, SEARCH_BACKEND_PORT, SEARCH_BACKEND_PASSWORD, SONIC_BUCKET, SONIC_COLLECTION
|
|
|
|
|
2020-11-19 11:54:13 +13:00
|
|
|
|
|
|
|
@enforce_types
|
|
|
|
def index(snapshot_id: str, texts: List[str]):
|
2020-11-20 02:06:13 +13:00
|
|
|
with IngestClient(SEARCH_BACKEND_HOST_NAME, SEARCH_BACKEND_PORT, SEARCH_BACKEND_PASSWORD) as ingestcl:
|
2020-11-19 11:54:13 +13:00
|
|
|
for text in texts:
|
2020-11-20 10:45:12 +13:00
|
|
|
ingestcl.push(SONIC_COLLECTION, SONIC_BUCKET, snapshot_id, str(text))
|
2020-11-19 11:54:13 +13:00
|
|
|
|
|
|
|
@enforce_types
|
|
|
|
def search(text: str) -> List:
|
2020-11-20 02:06:13 +13:00
|
|
|
with SearchClient(SEARCH_BACKEND_HOST_NAME, SEARCH_BACKEND_PORT, SEARCH_BACKEND_PASSWORD) as querycl:
|
2020-11-20 10:45:12 +13:00
|
|
|
snap_ids = querycl.query(SONIC_COLLECTION, SONIC_BUCKET, text)
|
2020-11-19 11:54:13 +13:00
|
|
|
return snap_ids
|
2020-11-20 10:45:12 +13:00
|
|
|
|
|
|
|
@enforce_types
|
|
|
|
def flush(snapshot_ids: List[str]):
|
|
|
|
with IngestClient(SEARCH_BACKEND_HOST_NAME, SEARCH_BACKEND_PORT, SEARCH_BACKEND_PASSWORD) as ingestcl:
|
|
|
|
for id in snapshot_ids:
|
|
|
|
ingestcl.flush_object(SONIC_COLLECTION, SONIC_BUCKET, str(id))
|