1
0
Fork 0
mirror of synced 2024-05-29 00:30:32 +12:00

bail out on sonic indexing after 5 errors

This commit is contained in:
Nick Sweeting 2021-04-10 05:18:03 -04:00
parent 89158d5dc7
commit 3093057675

View file

@ -7,10 +7,11 @@ from archivebox.config import SEARCH_BACKEND_HOST_NAME, SEARCH_BACKEND_PORT, SEA
MAX_SONIC_TEXT_TOTAL_LENGTH = 100000000 # dont index more than 100 million characters per text
MAX_SONIC_TEXT_CHUNK_LENGTH = 2000 # dont index more than 2000 characters per chunk
MAX_SONIC_ERRORS_BEFORE_ABORT = 5
@enforce_types
def index(snapshot_id: str, texts: List[str]):
error_count = 0
with IngestClient(SEARCH_BACKEND_HOST_NAME, SEARCH_BACKEND_PORT, SEARCH_BACKEND_PASSWORD) as ingestcl:
for text in texts:
chunks = (
@ -21,8 +22,14 @@ def index(snapshot_id: str, texts: List[str]):
MAX_SONIC_TEXT_CHUNK_LENGTH,
)
)
for chunk in chunks:
ingestcl.push(SONIC_COLLECTION, SONIC_BUCKET, snapshot_id, str(chunk))
try:
for chunk in chunks:
ingestcl.push(SONIC_COLLECTION, SONIC_BUCKET, snapshot_id, str(chunk))
except Exception as err:
print(f'[!] Sonic search backend threw an error while indexing: {err.__class__.__name__} {err}')
error_count += 1
if error_count > MAX_SONIC_ERRORS_BEFORE_ABORT:
raise
@enforce_types
def search(text: str) -> List[str]: