From aaca74f6a898ac3f1644d774a6f00fabe7e572bc Mon Sep 17 00:00:00 2001 From: Nick Sweeting Date: Sun, 3 Sep 2023 21:40:12 -0700 Subject: [PATCH] only start parsing json after the first open brace --- archivebox/parsers/generic_json.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/archivebox/parsers/generic_json.py b/archivebox/parsers/generic_json.py index 703c5d65..daebb7c4 100644 --- a/archivebox/parsers/generic_json.py +++ b/archivebox/parsers/generic_json.py @@ -17,8 +17,10 @@ def parse_generic_json_export(json_file: IO[str], **_kwargs) -> Iterable[Link]: """Parse JSON-format bookmarks export files (produced by pinboard.in/export/, or wallabag)""" json_file.seek(0) - next(json_file) - links = json.load(json_file) + + # sometimes the first line is a comment or filepath, so we get everything after the first { + json_file_json_str = '{' + json_file.read().split('{', 1)[-1] + links = json.loads(json_file_json_str) json_date = lambda s: datetime.strptime(s, '%Y-%m-%dT%H:%M:%S%z') for link in links: