#!/usr/bin/env python3 import argparse import re from typing import List from archive import parse_json_link_index from config import OUTPUT_DIR from index import write_json_links_index def cleanup_index(patterns: List[str], yes=False): regexes = [re.compile(p) for p in patterns] index = parse_json_link_index(OUTPUT_DIR) links = index['links'] filtered = [] remaining = [] for l in links: url = l['url'] for r in regexes: if r.search(url): filtered.append((l, r)) break else: remaining.append(l) print("Filtered out {}/{} urls:".format(len(filtered), len(links))) for link, regex in filtered: url = link['url'] print(" {url} via {regex}".format(url=url, regex=regex.pattern)) proceed = False if yes: proceed = True else: res = input("Remove {} entries from index? [y/n] ".format(len(filtered))) proceed = res.strip().lower() in ('y', 'yes') if proceed: write_json_links_index(OUTPUT_DIR, remaining) else: exit('aborting') if __name__ == '__main__': p = argparse.ArgumentParser('Index purging tool') p.add_argument('--regex', '-r', action='append', help='Python regex to filter out') p.add_argument('--yes', action='store_true', default=False, help='Do not propmpt for confirmation') args = p.parse_args() regexes = args.regex cleanup_index(regexes, yes=args.yes)