From 5498822a9702e9c81943ef64fe32f5d38d78fc74 Mon Sep 17 00:00:00 2001 From: Nick Sweeting Date: Sun, 10 Jun 2018 22:13:56 -0400 Subject: [PATCH] fix parsing of chrome and ff histories --- archiver/parse.py | 13 +++++++++---- 1 file changed, 9 insertions(+), 4 deletions(-) diff --git a/archiver/parse.py b/archiver/parse.py index c2e72306..c84edf88 100644 --- a/archiver/parse.py +++ b/archiver/parse.py @@ -93,14 +93,19 @@ def parse_json_export(json_file): # {"href":"http:\/\/www.reddit.com\/r\/example","description":"title here","extended":"","meta":"18a973f09c9cc0608c116967b64e0419","hash":"910293f019c2f4bb1a749fb937ba58e3","time":"2014-06-14T15:51:42Z","shared":"no","toread":"no","tags":"reddit android"}] if line: erg = line - time = datetime.strptime(erg['time'].split(',', 1)[0], '%Y-%m-%dT%H:%M:%SZ') + if erg.get('timestamp'): + timestamp = str(erg['timestamp']/10000000) # chrome/ff histories use a very precise timestamp + elif erg.get('time'): + timestamp = str(datetime.strptime(erg['time'].split(',', 1)[0], '%Y-%m-%dT%H:%M:%SZ').timestamp()) + else: + timestamp = str(datetime.now().timestamp()) info = { 'url': erg['href'], 'domain': domain(erg['href']), 'base_url': base_url(erg['href']), - 'timestamp': erg.get('timestamp') or str(time.timestamp()), - 'tags': erg['tags'], - 'title': erg['description'].replace(' — Readability', ''), + 'timestamp': timestamp, + 'tags': erg.get('tags') or '', + 'title': (erg.get('description') or '').replace(' — Readability', ''), 'sources': [json_file.name], } info['type'] = get_link_type(info)