diff --git a/archivebox/search/backends/sonic.py b/archivebox/search/backends/sonic.py index e34c6535..f0beaddd 100644 --- a/archivebox/search/backends/sonic.py +++ b/archivebox/search/backends/sonic.py @@ -11,12 +11,10 @@ MAX_SONIC_TEXT_LENGTH = 20000 def index(snapshot_id: str, texts: List[str]): with IngestClient(SEARCH_BACKEND_HOST_NAME, SEARCH_BACKEND_PORT, SEARCH_BACKEND_PASSWORD) as ingestcl: for text in texts: - if len(text) < MAX_SONIC_TEXT_LENGTH: - ingestcl.push(SONIC_COLLECTION, SONIC_BUCKET, snapshot_id, str(text)) - else: - chunks = [text[i:i+MAX_SONIC_TEXT_LENGTH] for i in range(0, len(text), MAX_SONIC_TEXT_LENGTH)] - for chunk in chunks: - ingestcl.push(SONIC_COLLECTION, SONIC_BUCKET, snapshot_id, str(chunk)) + chunks = [text[i:i+MAX_SONIC_TEXT_LENGTH] for i in range(0, len(text), MAX_SONIC_TEXT_LENGTH)] + for chunk in chunks: + ingestcl.push(SONIC_COLLECTION, SONIC_BUCKET, snapshot_id, str(chunk)) + @enforce_types def search(text: str) -> List[str]: with SearchClient(SEARCH_BACKEND_HOST_NAME, SEARCH_BACKEND_PORT, SEARCH_BACKEND_PASSWORD) as querycl: