1
0
Fork 0
mirror of synced 2024-06-02 10:34:43 +12:00

wip initial django setup

This commit is contained in:
Nick Sweeting 2019-04-02 16:36:41 -04:00
parent 03f300d050
commit 27708152d2
36 changed files with 257 additions and 59 deletions

View file

@ -1,5 +1,5 @@
__name__ = 'archivebox'
__package__ = 'archivebox'
#__name__ = 'archivebox'
#__package__ = 'archivebox'

View file

@ -0,0 +1 @@
../VERSION

View file

View file

@ -0,0 +1,123 @@
"""
Django settings for archivebox project.
Generated by 'django-admin startproject' using Django 2.1.7.
For more information on this file, see
https://docs.djangoproject.com/en/2.1/topics/settings/
For the full list of settings and their values, see
https://docs.djangoproject.com/en/2.1/ref/settings/
"""
import os
# Build paths inside the project like this: os.path.join(COLLECTION_DIR, ...)
REPO_DIR = os.path.abspath(os.path.join(os.path.dirname(os.path.abspath(__file__)), '..'))
COLLECTION_DIR = os.path.abspath(os.curdir)
print(REPO_DIR)
print(COLLECTION_DIR)
raise SystemExit(0)
# Quick-start development settings - unsuitable for production
# See https://docs.djangoproject.com/en/2.1/howto/deployment/checklist/
# SECURITY WARNING: keep the secret key used in production secret!
SECRET_KEY = 'm-ma!-z^0b5w4%**le#ig!7-d@h($t02q*96h*-ua+$lm9bvao'
# SECURITY WARNING: don't run with debug turned on in production!
DEBUG = True
ALLOWED_HOSTS = []
# Application definition
INSTALLED_APPS = [
'django.contrib.admin',
'django.contrib.auth',
'django.contrib.contenttypes',
'django.contrib.sessions',
'django.contrib.messages',
'django.contrib.staticfiles',
'core',
]
MIDDLEWARE = [
'django.middleware.security.SecurityMiddleware',
'django.contrib.sessions.middleware.SessionMiddleware',
'django.middleware.common.CommonMiddleware',
'django.middleware.csrf.CsrfViewMiddleware',
'django.contrib.auth.middleware.AuthenticationMiddleware',
'django.contrib.messages.middleware.MessageMiddleware',
'django.middleware.clickjacking.XFrameOptionsMiddleware',
]
ROOT_URLCONF = 'archivebox.urls'
ACTIVE_THEME = 'default'
TEMPLATES_DIR = os.path.join(REPO_DIR, 'themes', ACTIVE_THEME)
TEMPLATES = [
{
'BACKEND': 'django.template.backends.django.DjangoTemplates',
'DIRS': [TEMPLATES_DIR],
'APP_DIRS': True,
'OPTIONS': {
'context_processors': [
'django.template.context_processors.debug',
'django.template.context_processors.request',
'django.contrib.auth.context_processors.auth',
'django.contrib.messages.context_processors.messages',
],
},
},
]
WSGI_APPLICATION = 'archivebox.wsgi.application'
# Database
# https://docs.djangoproject.com/en/2.1/ref/settings/#databases
DATABASES = {
'default': {
'ENGINE': 'django.db.backends.sqlite3',
'NAME': os.path.join(COLLECTION_DIR, 'database.sqlite3'),
}
}
# Password validation
# https://docs.djangoproject.com/en/2.1/ref/settings/#auth-password-validators
AUTH_PASSWORD_VALIDATORS = [
{
'NAME': 'django.contrib.auth.password_validation.UserAttributeSimilarityValidator',
},
{
'NAME': 'django.contrib.auth.password_validation.MinimumLengthValidator',
},
{
'NAME': 'django.contrib.auth.password_validation.CommonPasswordValidator',
},
{
'NAME': 'django.contrib.auth.password_validation.NumericPasswordValidator',
},
]
# Internationalization
# https://docs.djangoproject.com/en/2.1/topics/i18n/
LANGUAGE_CODE = 'en-us'
TIME_ZONE = 'UTC'
USE_I18N = True
USE_L10N = True
USE_TZ = True
# Static files (CSS, JavaScript, Images)
# https://docs.djangoproject.com/en/2.1/howto/static-files/
STATIC_URL = '/static/'

View file

@ -0,0 +1,21 @@
"""archivebox URL Configuration
The `urlpatterns` list routes URLs to views. For more information please see:
https://docs.djangoproject.com/en/2.1/topics/http/urls/
Examples:
Function views
1. Add an import: from my_app import views
2. Add a URL to urlpatterns: path('', views.home, name='home')
Class-based views
1. Add an import: from other_app.views import Home
2. Add a URL to urlpatterns: path('', Home.as_view(), name='home')
Including another URLconf
1. Import the include() function: from django.urls import include, path
2. Add a URL to urlpatterns: path('blog/', include('blog.urls'))
"""
from django.contrib import admin
from django.urls import path
urlpatterns = [
path('admin/', admin.site.urls),
]

View file

@ -0,0 +1,16 @@
"""
WSGI config for archivebox project.
It exposes the WSGI callable as a module-level variable named ``application``.
For more information on this file, see
https://docs.djangoproject.com/en/2.1/howto/deployment/wsgi/
"""
import os
from django.core.wsgi import get_wsgi_application
os.environ.setdefault('DJANGO_SETTINGS_MODULE', 'archivebox.settings')
application = get_wsgi_application()

View file

3
archivebox/core/admin.py Normal file
View file

@ -0,0 +1,3 @@
from django.contrib import admin
# Register your models here.

5
archivebox/core/apps.py Normal file
View file

@ -0,0 +1,5 @@
from django.apps import AppConfig
class CoreConfig(AppConfig):
name = 'core'

View file

@ -16,11 +16,11 @@ import shutil
from typing import List, Optional
from .schema import Link
from .links import links_after_timestamp
from .index import write_links_index, load_links_index
from .archive_methods import archive_link
from .config import (
from core.schema import Link
from core.links import links_after_timestamp
from core.index import write_links_index, load_links_index
from core.archive_methods import archive_link
from core.config import (
ONLY_NEW,
OUTPUT_DIR,
VERSION,
@ -41,12 +41,12 @@ from .config import (
FETCH_GIT,
FETCH_MEDIA,
)
from .util import (
from core.util import (
enforce_types,
handle_stdin_import,
handle_file_import,
)
from .logs import (
from core.logs import (
log_archiving_started,
log_archiving_paused,
log_archiving_finished,
@ -142,7 +142,7 @@ def main(args=None) -> None:
" If you're trying to create a new archive, you must run archivebox inside a completely empty directory."
"\n\n"
" {lightred}Hint:{reset} To import a data folder created by an older version of ArchiveBox, \n"
" just cd into the folder and run the archivebox comamnd to pick up where you left off.\n\n"
" just cd into the folder and run the archivebox command to pick up where you left off.\n\n"
" (Always make sure your data folder is backed up first before updating ArchiveBox)"
).format(OUTPUT_DIR, **ANSI)
)

View file

@ -4,13 +4,13 @@ from typing import Dict, List, Tuple, Optional
from collections import defaultdict
from datetime import datetime
from .schema import Link, ArchiveResult, ArchiveOutput
from .index import (
from core.schema import Link, ArchiveResult, ArchiveOutput
from core.index import (
write_link_index,
patch_links_index,
load_json_link_index,
)
from .config import (
from core.config import (
CURL_BINARY,
GIT_BINARY,
WGET_BINARY,
@ -40,7 +40,7 @@ from .config import (
YOUTUBEDL_VERSION,
WGET_AUTO_COMPRESSION,
)
from .util import (
from core.util import (
enforce_types,
domain,
extension,
@ -54,7 +54,7 @@ from .util import (
chrome_args,
run, PIPE, DEVNULL,
)
from .logs import (
from core.logs import (
log_link_archiving_started,
log_link_archiving_finished,
log_archive_method_started,

View file

@ -5,8 +5,8 @@ from datetime import datetime
from string import Template
from typing import List, Tuple, Iterator, Optional, Mapping
from .schema import Link, ArchiveResult
from .config import (
from core.schema import Link, ArchiveResult
from core.config import (
OUTPUT_DIR,
TEMPLATES_DIR,
VERSION,
@ -14,7 +14,8 @@ from .config import (
FOOTER_INFO,
TIMEOUT,
)
from .util import (
from core.util import (
ts_to_date,
merge_links,
urlencode,
htmlencode,
@ -26,9 +27,9 @@ from .util import (
copy_and_overwrite,
atomic_write,
)
from .parse import parse_links
from .links import validate_links
from .logs import (
from core.parse import parse_links
from core.links import validate_links
from core.logs import (
log_indexing_process_started,
log_indexing_started,
log_indexing_finished,
@ -284,6 +285,7 @@ def write_html_link_index(link: Link, link_dir: Optional[str]=None) -> None:
'tags': link.tags or 'untagged',
'status': 'archived' if link.is_archived else 'not yet archived',
'status_color': 'success' if link.is_archived else 'danger',
'oldest_archive_date': ts_to_date(link.oldest_archive_date),
}
html_index = Template(link_html).substitute(**template_vars)

View file

@ -1,14 +1,14 @@
from typing import Iterable
from collections import OrderedDict
from .schema import Link
from .util import (
from core.schema import Link
from core.util import (
scheme,
fuzzy_url,
merge_links,
)
from .config import URL_BLACKLIST_PTN
from core.config import URL_BLACKLIST_PTN
def validate_links(links: Iterable[Link]) -> Iterable[Link]:

View file

@ -5,8 +5,8 @@ from datetime import datetime
from dataclasses import dataclass
from typing import Optional
from .schema import Link, ArchiveResult
from .config import ANSI, OUTPUT_DIR
from core.schema import Link, ArchiveResult
from core.config import ANSI, OUTPUT_DIR
@dataclass

View file

@ -0,0 +1,10 @@
from django.core.management.base import BaseCommand
from core.archive import main
class Command(BaseCommand):
help = 'ArchiveBox test.bee'
def handle(self, *args, **kwargs):
main()

View file

View file

@ -0,0 +1,3 @@
from django.db import models
# Create your models here.

View file

@ -24,8 +24,8 @@ from typing import Tuple, List, IO, Iterable
from datetime import datetime
import xml.etree.ElementTree as etree
from .config import TIMEOUT
from .util import (
from core.config import TIMEOUT
from core.util import (
htmldecode,
str_between,
URL_REGEX,

View file

@ -6,8 +6,8 @@ from os.path import exists, join
from shutil import rmtree
from typing import List
from .config import ARCHIVE_DIR, OUTPUT_DIR
from .index import parse_json_links_index, write_html_links_index, write_json_links_index
from core.config import ARCHIVE_DIR, OUTPUT_DIR
from core.index import parse_json_links_index, write_html_links_index, write_json_links_index
def cleanup_index(regexes: List[str], proceed: bool, delete: bool) -> None:

View file

@ -221,28 +221,20 @@ class Link:
return ts_to_date(self.updated) if self.updated else None
@property
def oldest_archive_date(self) -> Optional[datetime]:
from .util import ts_to_date
def archive_dates(self) -> List[datetime]:
return [
result.start_ts
for method in self.history.keys()
for result in self.history[method]
]
most_recent = min(
(ts_to_date(result.start_ts)
for method in self.history.keys()
for result in self.history[method]),
default=None,
)
return ts_to_date(most_recent) if most_recent else None
@property
def oldest_archive_date(self) -> Optional[datetime]:
return min(self.archive_dates, default=None)
@property
def newest_archive_date(self) -> Optional[datetime]:
from .util import ts_to_date
most_recent = max(
(ts_to_date(result.start_ts)
for method in self.history.keys()
for result in self.history[method]),
default=None,
)
return ts_to_date(most_recent) if most_recent else None
return max(self.archive_dates, default=None)
### Archive Status Helpers
@property

3
archivebox/core/tests.py Normal file
View file

@ -0,0 +1,3 @@
from django.test import TestCase
# Create your tests here.

View file

@ -26,8 +26,8 @@ from subprocess import (
from base32_crockford import encode as base32_encode # type: ignore
from .schema import Link
from .config import (
from core.schema import Link
from core.config import (
ANSI,
TERM_WIDTH,
SOURCES_DIR,
@ -40,7 +40,7 @@ from .config import (
CHROME_OPTIONS,
PYTHON_DIR,
)
from .logs import pretty_path
from core.logs import pretty_path
### Parsing Helpers
@ -62,17 +62,17 @@ base_url = lambda url: without_scheme(url) # uniq base url used to dedupe links
without_www = lambda url: url.replace('://www.', '://', 1)
without_trailing_slash = lambda url: url[:-1] if url[-1] == '/' else url.replace('/?', '?')
fuzzy_url = lambda url: without_trailing_slash(without_www(without_scheme(url.lower())))
short_ts = lambda ts: str(parse_date(ts).timestamp()).split('.')[0]
ts_to_date = lambda ts: parse_date(ts).strftime('%Y-%m-%d %H:%M')
ts_to_iso = lambda ts: parse_date(ts).isoformat()
hashurl = lambda url: base32_encode(int(sha256(base_url(url).encode('utf-8')).hexdigest(), 16))[:20]
urlencode = lambda s: s and quote(s, encoding='utf-8', errors='replace')
urldecode = lambda s: s and unquote(s)
htmlencode = lambda s: s and escape(s, quote=True)
htmldecode = lambda s: s and unescape(s)
hashurl = lambda url: base32_encode(int(sha256(base_url(url).encode('utf-8')).hexdigest(), 16))[:20]
short_ts = lambda ts: str(parse_date(ts).timestamp()).split('.')[0]
ts_to_date = lambda ts: ts and parse_date(ts).strftime('%Y-%m-%d %H:%M')
ts_to_iso = lambda ts: ts and parse_date(ts).isoformat()
URL_REGEX = re.compile(
r'http[s]?://' # start matching from allowed schemes
@ -357,11 +357,11 @@ def str_between(string: str, start: str, end: str=None) -> str:
def parse_date(date: Any) -> Optional[datetime]:
"""Parse unix timestamps, iso format, and human-readable strings"""
if isinstance(date, datetime):
return date
if date is None:
return None
if isinstance(date, datetime):
return date
if isinstance(date, (float, int)):
date = str(date)

3
archivebox/core/views.py Normal file
View file

@ -0,0 +1,3 @@
from django.shortcuts import render
# Create your views here.

15
archivebox/manage.py Executable file
View file

@ -0,0 +1,15 @@
#!/usr/bin/env python
import os
import sys
if __name__ == '__main__':
os.environ.setdefault('DJANGO_SETTINGS_MODULE', 'archivebox.settings')
try:
from django.core.management import execute_from_command_line
except ImportError as exc:
raise ImportError(
"Couldn't import Django. Are you sure it's installed and "
"available on your PYTHONPATH environment variable? Did you "
"forget to activate a virtual environment?"
) from exc
execute_from_command_line(sys.argv)

View file

Before

Width:  |  Height:  |  Size: 17 KiB

After

Width:  |  Height:  |  Size: 17 KiB

View file

Before

Width:  |  Height:  |  Size: 1.6 KiB

After

Width:  |  Height:  |  Size: 1.6 KiB

View file

Before

Width:  |  Height:  |  Size: 158 B

After

Width:  |  Height:  |  Size: 158 B

View file

Before

Width:  |  Height:  |  Size: 201 B

After

Width:  |  Height:  |  Size: 201 B

View file

Before

Width:  |  Height:  |  Size: 157 B

After

Width:  |  Height:  |  Size: 157 B

View file

Before

Width:  |  Height:  |  Size: 11 KiB

After

Width:  |  Height:  |  Size: 11 KiB

View file

@ -1,3 +1,4 @@
django
base32-crockford
setuptools