diff --git a/archivebox/cli/archivebox_manage.py b/archivebox/cli/archivebox_manage.py new file mode 100644 index 00000000..9d1c8eb3 --- /dev/null +++ b/archivebox/cli/archivebox_manage.py @@ -0,0 +1,29 @@ +#!/usr/bin/env python3 + +__package__ = 'archivebox.cli' +__command__ = 'archivebox manage' +__description__ = 'Run an ArchiveBox Django management command' + +import sys + +from ..legacy.config import OUTPUT_DIR, setup_django, check_data_folder + + +def main(args=None): + check_data_folder() + + setup_django(OUTPUT_DIR) + from django.core.management import execute_from_command_line + + args = sys.argv if args is None else ['archivebox', *args] + + args[0] = f'{sys.argv[0]} manage' + + if args[1:] == []: + args.append('help') + + execute_from_command_line(args) + + +if __name__ == '__main__': + main() diff --git a/archivebox/core/views.py b/archivebox/core/views.py index 2d429ee2..a30043c1 100644 --- a/archivebox/core/views.py +++ b/archivebox/core/views.py @@ -2,16 +2,45 @@ from django.shortcuts import render from django.views import View +from legacy.config import OUTPUT_DIR +from legacy.index import load_main_index, load_main_index_meta + class MainIndex(View): template = 'main_index.html' def get(self, request): - return render(self.template, {}) + all_links = load_main_index(out_dir=OUTPUT_DIR) + meta_info = load_main_index_meta(out_dir=OUTPUT_DIR) + + context = { + 'updated': meta_info['updated'], + 'num_links': meta_info['num_links'], + 'links': all_links, + } + + return render(template_name=self.template, request=request, context=context) + + +class AddLinks(View): + template = 'add_links.html' + + def get(self, request): + context = {} + + return render(template_name=self.template, request=request, context=context) + + + def post(self, request): + import_path = request.POST['url'] + print(f'Adding URL: {import_path}') + context = {} + + return render(template_name=self.template, request=request, context=context) class LinkDetails(View): template = 'link_details.html' def get(self, request): - return render(self.template, {}) + return render(template_name=self.template, request=request, context={}) diff --git a/archivebox/core/welcome_message.py b/archivebox/core/welcome_message.py new file mode 100644 index 00000000..b3a9ebf8 --- /dev/null +++ b/archivebox/core/welcome_message.py @@ -0,0 +1,4 @@ +print() +print('[i] Welcome to the ArchiveBox Shell! Example usage:') +print(' Page.objects.all()') +print(' User.objects.all()') diff --git a/archivebox/legacy/index.py b/archivebox/legacy/index.py index c063b1e2..2ce7bb82 100644 --- a/archivebox/legacy/index.py +++ b/archivebox/legacy/index.py @@ -240,16 +240,28 @@ def write_main_index(links: List[Link], out_dir: str=OUTPUT_DIR, finished: bool= @enforce_types -def load_main_index(out_dir: str=OUTPUT_DIR) -> List[Link]: +def load_main_index(out_dir: str=OUTPUT_DIR, warn: bool=True) -> List[Link]: """parse and load existing index with any new links from import_path merged in""" all_links: List[Link] = [] all_links = list(parse_json_main_index(out_dir)) links_from_sql = list(parse_sql_main_index()) - assert set(l.url for l in all_links) == set(l['url'] for l in links_from_sql) + + if warn and not set(l.url for l in all_links) == set(l['url'] for l in links_from_sql): + stderr('{red}[!] Warning: SQL index does not match JSON index!{reset}'.format(**ANSI)) return all_links +@enforce_types +def load_main_index_meta(out_dir: str=OUTPUT_DIR) -> Optional[dict]: + index_path = os.path.join(out_dir, JSON_INDEX_FILENAME) + if os.path.exists(index_path): + with open(index_path, 'r', encoding='utf-8') as f: + meta_dict = json.load(f) + meta_dict.pop('links') + return meta_dict + + return None @enforce_types def import_new_links(existing_links: List[Link], import_path: str) -> Tuple[List[Link], List[Link]]: diff --git a/archivebox/legacy/main.py b/archivebox/legacy/main.py index 3ecdc887..20315231 100644 --- a/archivebox/legacy/main.py +++ b/archivebox/legacy/main.py @@ -110,7 +110,7 @@ def init(): # call_command("createsuperuser", interactive=True) if existing_index: - all_links = load_main_index(out_dir=OUTPUT_DIR) + all_links = load_main_index(out_dir=OUTPUT_DIR, warn=False) write_main_index(links=list(all_links), out_dir=OUTPUT_DIR) else: write_main_index([], out_dir=OUTPUT_DIR) @@ -130,12 +130,23 @@ def info(): print('{green}[*] Scanning archive collection main index with {} links:{reset}'.format(len(all_links), **ANSI)) print(f' {OUTPUT_DIR}') - num_bytes, num_dirs, num_files = get_dir_size(OUTPUT_DIR, recursive=False) size = human_readable_size(num_bytes) print(f' > Index Size: {size} across {num_files} files') print() + setup_django() + from django.contrib.auth.models import User + from core.models import Page + + users = User.objects.all() + num_pages = Page.objects.count() + + print(f' > {len(users)} admin users:', ', '.join(u.username for u in users)) + print(f' > {num_pages} pages in SQL database {SQL_INDEX_FILENAME}') + print(f' > {len(all_links)} pages in JSON database {JSON_INDEX_FILENAME}') + print() + print('{green}[*] Scanning archive collection data directory with {} entries:{reset}'.format(len(all_links), **ANSI)) print(f' {ARCHIVE_DIR}') @@ -165,7 +176,6 @@ def info(): print(f' > {num_orphaned} orphaned data directories (directories present for links that don\'t exist in the index)') - @enforce_types def update_archive_data(import_path: Optional[str]=None, resume: Optional[float]=None, only_new: bool=False) -> List[Link]: """The main ArchiveBox entrancepoint. Everything starts here."""