Merge pull request #607 from ArchiveBox/dev
This commit is contained in:
commit
eb2eecc24a
|
@ -26,7 +26,7 @@
|
||||||
<hr/>
|
<hr/>
|
||||||
</div>
|
</div>
|
||||||
|
|
||||||
ArchiveBox is a powerful self-hosted internet archiving solution written in Python 3. You feed it URLs of pages you want to archive, and it saves them to disk in a varitety of formats depending on the configuration and the content it detects.
|
ArchiveBox is a powerful self-hosted internet archiving solution written in Python 3. You feed it URLs of pages you want to archive, and it saves them to disk in a variety of formats depending on the configuration and the content it detects.
|
||||||
|
|
||||||
Your archive can be managed through the command line with commands like `archivebox add`, through the built-in Web UI `archivebox server`, or via the Python library API (beta). It can ingest bookmarks from a browser or service like Pocket/Pinboard, your entire browsing history, RSS feeds, or URLs one at a time. You can also schedule regular/realtime imports with `archivebox schedule`.
|
Your archive can be managed through the command line with commands like `archivebox add`, through the built-in Web UI `archivebox server`, or via the Python library API (beta). It can ingest bookmarks from a browser or service like Pocket/Pinboard, your entire browsing history, RSS feeds, or URLs one at a time. You can also schedule regular/realtime imports with `archivebox schedule`.
|
||||||
|
|
||||||
|
|
18
archivebox/core/migrations/0008_auto_20210105_1421.py
Normal file
18
archivebox/core/migrations/0008_auto_20210105_1421.py
Normal file
|
@ -0,0 +1,18 @@
|
||||||
|
# Generated by Django 3.1.3 on 2021-01-05 14:21
|
||||||
|
|
||||||
|
from django.db import migrations, models
|
||||||
|
|
||||||
|
|
||||||
|
class Migration(migrations.Migration):
|
||||||
|
|
||||||
|
dependencies = [
|
||||||
|
('core', '0007_archiveresult'),
|
||||||
|
]
|
||||||
|
|
||||||
|
operations = [
|
||||||
|
migrations.AlterField(
|
||||||
|
model_name='archiveresult',
|
||||||
|
name='cmd_version',
|
||||||
|
field=models.CharField(blank=True, default=None, max_length=32, null=True),
|
||||||
|
),
|
||||||
|
]
|
|
@ -181,7 +181,7 @@ class ArchiveResult(models.Model):
|
||||||
snapshot = models.ForeignKey(Snapshot, on_delete=models.CASCADE)
|
snapshot = models.ForeignKey(Snapshot, on_delete=models.CASCADE)
|
||||||
cmd = JSONField()
|
cmd = JSONField()
|
||||||
pwd = models.CharField(max_length=256)
|
pwd = models.CharField(max_length=256)
|
||||||
cmd_version = models.CharField(max_length=32)
|
cmd_version = models.CharField(max_length=32, default=None, null=True, blank=True)
|
||||||
output = models.CharField(max_length=512)
|
output = models.CharField(max_length=512)
|
||||||
start_ts = models.DateTimeField()
|
start_ts = models.DateTimeField()
|
||||||
end_ts = models.DateTimeField()
|
end_ts = models.DateTimeField()
|
||||||
|
|
|
@ -124,7 +124,7 @@ def validate_links(links: Iterable[Link]) -> List[Link]:
|
||||||
timer = TimedProgress(TIMEOUT * 4)
|
timer = TimedProgress(TIMEOUT * 4)
|
||||||
try:
|
try:
|
||||||
links = archivable_links(links) # remove chrome://, about:, mailto: etc.
|
links = archivable_links(links) # remove chrome://, about:, mailto: etc.
|
||||||
links = sorted_links(links) # deterministically sort the links based on timstamp, url
|
links = sorted_links(links) # deterministically sort the links based on timestamp, url
|
||||||
links = fix_duplicate_links(links) # merge/dedupe duplicate timestamps & urls
|
links = fix_duplicate_links(links) # merge/dedupe duplicate timestamps & urls
|
||||||
finally:
|
finally:
|
||||||
timer.end()
|
timer.end()
|
||||||
|
|
Loading…
Reference in a new issue