1
0
Fork 0
mirror of synced 2024-06-16 09:24:52 +12:00

feat: add search filter-type to list command

This commit is contained in:
JDC 2020-11-19 23:39:28 -05:00 committed by Nick Sweeting
parent fb67d6684c
commit 0f7dba07df
2 changed files with 34 additions and 2 deletions

View file

@ -98,7 +98,7 @@ def main(args: Optional[List[str]]=None, stdin: Optional[IO]=None, pwd: Optional
parser.add_argument(
'--filter-type',
type=str,
choices=('exact', 'substring', 'domain', 'regex','tag'),
choices=('exact', 'substring', 'domain', 'regex', 'tag', 'search'),
default='exact',
help='Type of pattern matching to use when filtering URLs',
)

View file

@ -51,6 +51,8 @@ from .sql import (
write_sql_link_details,
)
from ..search import search_backend_enabled, query_search_index
### Link filtering and checking
@enforce_types
@ -365,7 +367,7 @@ LINK_FILTERS = {
}
@enforce_types
def snapshot_filter(snapshots: QuerySet, filter_patterns: List[str], filter_type: str='exact') -> QuerySet:
def q_filter(snapshots: QuerySet, filter_patterns: List[str], filter_type: str='exact') -> QuerySet:
q_filter = Q()
for pattern in filter_patterns:
try:
@ -380,6 +382,36 @@ def snapshot_filter(snapshots: QuerySet, filter_patterns: List[str], filter_type
raise SystemExit(2)
return snapshots.filter(q_filter)
def search_filter(snapshots: QuerySet, filter_patterns: List[str], filter_type: str='search') -> QuerySet:
if not search_backend_enabled():
stderr()
stderr(
'[X] The search backend is not enabled',
color='red',
)
raise SystemExit(2)
qsearch = get_empty_snapshot_queryset()
for pattern in filter_patterns:
try:
qsearch |= query_search_index(pattern)
except Exception as err:
stderr()
stderr(
f'[X] The search backend threw an exception={err}:',
color='red',
)
raise SystemExit(2)
return snapshots & qsearch
@enforce_types
def snapshot_filter(snapshots: QuerySet, filter_patterns: List[str], filter_type: str='exact') -> QuerySet:
if filter_type != 'search':
return q_filter(snapshots, filter_patterns, filter_type)
else:
return search_filter(snapshots, filter_patterns, filter_type)
def get_indexed_folders(snapshots, out_dir: Path=OUTPUT_DIR) -> Dict[str, Optional[Link]]:
"""indexed links without checking archive status or data directory validity"""