1
0
Fork 0
mirror of synced 2024-06-29 03:20:58 +12:00

add comment about why DOM is preferred over singlefile for readability parsing

This commit is contained in:
Nick Sweeting 2024-01-03 19:00:19 -08:00
parent fcdc41a1ab
commit 5b07a1126c
2 changed files with 3 additions and 0 deletions

View file

@ -99,6 +99,8 @@ def save_readability(link: Link, out_dir: Optional[str]=None, timeout: int=TIMEO
except (Exception, OSError) as err: except (Exception, OSError) as err:
status = 'failed' status = 'failed'
output = err output = err
# prefer Chrome dom output to singlefile because singlefile often contains huge url(data:image/...base64) strings that make the html too long to parse with readability
cmd = [cmd[0], './{dom,singlefile}.html'] cmd = [cmd[0], './{dom,singlefile}.html']
finally: finally:
timer.end() timer.end()

View file

@ -177,6 +177,7 @@
} }
</script> </script>
{% endif %} {% endif %}
<script> <script>
$ = django.jQuery; $ = django.jQuery;
$.fn.reverse = [].reverse; $.fn.reverse = [].reverse;