From eb97fd427b406a332ff7b10180da769067b2d769 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Bart=C5=82omiej=20Piotrowski?= Date: Tue, 5 Jul 2022 10:56:40 +0200 Subject: [PATCH 1/2] Skip first line of the "JSON" file ArchiveBox moves the file to parse to the sources directory and adds the original filename at the top, making the file invalid. --- archivebox/parsers/generic_json.py | 1 + 1 file changed, 1 insertion(+) diff --git a/archivebox/parsers/generic_json.py b/archivebox/parsers/generic_json.py index 0466b0f6..703c5d65 100644 --- a/archivebox/parsers/generic_json.py +++ b/archivebox/parsers/generic_json.py @@ -17,6 +17,7 @@ def parse_generic_json_export(json_file: IO[str], **_kwargs) -> Iterable[Link]: """Parse JSON-format bookmarks export files (produced by pinboard.in/export/, or wallabag)""" json_file.seek(0) + next(json_file) links = json.load(json_file) json_date = lambda s: datetime.strptime(s, '%Y-%m-%dT%H:%M:%S%z') From aaca74f6a898ac3f1644d774a6f00fabe7e572bc Mon Sep 17 00:00:00 2001 From: Nick Sweeting Date: Sun, 3 Sep 2023 21:40:12 -0700 Subject: [PATCH 2/2] only start parsing json after the first open brace --- archivebox/parsers/generic_json.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/archivebox/parsers/generic_json.py b/archivebox/parsers/generic_json.py index 703c5d65..daebb7c4 100644 --- a/archivebox/parsers/generic_json.py +++ b/archivebox/parsers/generic_json.py @@ -17,8 +17,10 @@ def parse_generic_json_export(json_file: IO[str], **_kwargs) -> Iterable[Link]: """Parse JSON-format bookmarks export files (produced by pinboard.in/export/, or wallabag)""" json_file.seek(0) - next(json_file) - links = json.load(json_file) + + # sometimes the first line is a comment or filepath, so we get everything after the first { + json_file_json_str = '{' + json_file.read().split('{', 1)[-1] + links = json.loads(json_file_json_str) json_date = lambda s: datetime.strptime(s, '%Y-%m-%dT%H:%M:%S%z') for link in links: