diff --git a/.gitmodules b/.gitmodules
index fb41f516..0993934a 100644
--- a/.gitmodules
+++ b/.gitmodules
@@ -14,3 +14,12 @@
[submodule "docker"]
path = docker
url = https://github.com/ArchiveBox/docker-archivebox.git
+[submodule "archivebox/vendor/base32-crockford"]
+ path = archivebox/vendor/base32-crockford
+ url = https://github.com/jbittel/base32-crockford
+[submodule "archivebox/vendor/pocket"]
+ path = archivebox/vendor/pocket
+ url = https://github.com/tapanpandita/pocket
+[submodule "archivebox/vendor/django-taggit"]
+ path = archivebox/vendor/django-taggit
+ url = https://github.com/jazzband/django-taggit
diff --git a/archivebox/vendor/base32-crockford b/archivebox/vendor/base32-crockford
new file mode 160000
index 00000000..1ffb6021
--- /dev/null
+++ b/archivebox/vendor/base32-crockford
@@ -0,0 +1 @@
+Subproject commit 1ffb6021485b666ea6a562abd0a1ea6f7021188f
diff --git a/archivebox/vendor/base32_crockford.py b/archivebox/vendor/base32_crockford.py
deleted file mode 100644
index 07dac08c..00000000
--- a/archivebox/vendor/base32_crockford.py
+++ /dev/null
@@ -1,172 +0,0 @@
-"""
-base32-crockford
-================
-
-A Python module implementing the alternate base32 encoding as described
-by Douglas Crockford at: http://www.crockford.com/wrmg/base32.html.
-
-He designed the encoding to:
-
- * Be human and machine readable
- * Be compact
- * Be error resistant
- * Be pronounceable
-
-It uses a symbol set of 10 digits and 22 letters, excluding I, L O and
-U. Decoding is not case sensitive, and 'i' and 'l' are converted to '1'
-and 'o' is converted to '0'. Encoding uses only upper-case characters.
-
-Hyphens may be present in symbol strings to improve readability, and
-are removed when decoding.
-
-A check symbol can be appended to a symbol string to detect errors
-within the string.
-
-"""
-
-import re
-import sys
-
-PY3 = sys.version_info[0] == 3
-
-if not PY3:
- import string as str
-
-
-__all__ = ["encode", "decode", "normalize"]
-
-
-if PY3:
- string_types = (str,)
-else:
- string_types = (basestring,) # noqa
-
-# The encoded symbol space does not include I, L, O or U
-symbols = '0123456789ABCDEFGHJKMNPQRSTVWXYZ'
-# These five symbols are exclusively for checksum values
-check_symbols = '*~$=U'
-
-encode_symbols = dict((i, ch) for (i, ch) in enumerate(symbols + check_symbols))
-decode_symbols = dict((ch, i) for (i, ch) in enumerate(symbols + check_symbols))
-normalize_symbols = str.maketrans('IiLlOo', '111100')
-valid_symbols = re.compile('^[%s]+[%s]?$' % (symbols,
- re.escape(check_symbols)))
-
-base = len(symbols)
-check_base = len(symbols + check_symbols)
-
-
-def encode(number, checksum=False, split=0):
- """Encode an integer into a symbol string.
-
- A ValueError is raised on invalid input.
-
- If checksum is set to True, a check symbol will be
- calculated and appended to the string.
-
- If split is specified, the string will be divided into
- clusters of that size separated by hyphens.
-
- The encoded string is returned.
- """
- number = int(number)
- if number < 0:
- raise ValueError("number '%d' is not a positive integer" % number)
-
- split = int(split)
- if split < 0:
- raise ValueError("split '%d' is not a positive integer" % split)
-
- check_symbol = ''
- if checksum:
- check_symbol = encode_symbols[number % check_base]
-
- if number == 0:
- return '0' + check_symbol
-
- symbol_string = ''
- while number > 0:
- remainder = number % base
- number //= base
- symbol_string = encode_symbols[remainder] + symbol_string
- symbol_string = symbol_string + check_symbol
-
- if split:
- chunks = []
- for pos in range(0, len(symbol_string), split):
- chunks.append(symbol_string[pos:pos + split])
- symbol_string = '-'.join(chunks)
-
- return symbol_string
-
-
-def decode(symbol_string, checksum=False, strict=False):
- """Decode an encoded symbol string.
-
- If checksum is set to True, the string is assumed to have a
- trailing check symbol which will be validated. If the
- checksum validation fails, a ValueError is raised.
-
- If strict is set to True, a ValueError is raised if the
- normalization step requires changes to the string.
-
- The decoded string is returned.
- """
- symbol_string = normalize(symbol_string, strict=strict)
- if checksum:
- symbol_string, check_symbol = symbol_string[:-1], symbol_string[-1]
-
- number = 0
- for symbol in symbol_string:
- number = number * base + decode_symbols[symbol]
-
- if checksum:
- check_value = decode_symbols[check_symbol]
- modulo = number % check_base
- if check_value != modulo:
- raise ValueError("invalid check symbol '%s' for string '%s'" %
- (check_symbol, symbol_string))
-
- return number
-
-
-def normalize(symbol_string, strict=False):
- """Normalize an encoded symbol string.
-
- Normalization provides error correction and prepares the
- string for decoding. These transformations are applied:
-
- 1. Hyphens are removed
- 2. 'I', 'i', 'L' or 'l' are converted to '1'
- 3. 'O' or 'o' are converted to '0'
- 4. All characters are converted to uppercase
-
- A TypeError is raised if an invalid string type is provided.
-
- A ValueError is raised if the normalized string contains
- invalid characters.
-
- If the strict parameter is set to True, a ValueError is raised
- if any of the above transformations are applied.
-
- The normalized string is returned.
- """
- if isinstance(symbol_string, string_types):
- if not PY3:
- try:
- symbol_string = symbol_string.encode('ascii')
- except UnicodeEncodeError:
- raise ValueError("string should only contain ASCII characters")
- else:
- raise TypeError("string is of invalid type %s" %
- symbol_string.__class__.__name__)
-
- norm_string = symbol_string.replace('-', '').translate(normalize_symbols).upper()
-
- if not valid_symbols.match(norm_string):
- raise ValueError("string '%s' contains invalid characters" % norm_string)
-
- if strict and norm_string != symbol_string:
- raise ValueError("string '%s' requires normalization" % symbol_string)
-
- return norm_string
diff --git a/archivebox/vendor/base32_crockford.py b/archivebox/vendor/base32_crockford.py
new file mode 120000
index 00000000..a5d9c64f
--- /dev/null
+++ b/archivebox/vendor/base32_crockford.py
@@ -0,0 +1 @@
+base32-crockford/base32_crockford.py
\ No newline at end of file
diff --git a/archivebox/vendor/django-taggit b/archivebox/vendor/django-taggit
new file mode 160000
index 00000000..1e4dca37
--- /dev/null
+++ b/archivebox/vendor/django-taggit
@@ -0,0 +1 @@
+Subproject commit 1e4dca37e534ca70e99c39fb4198970eb8aad5aa
diff --git a/archivebox/vendor/pocket b/archivebox/vendor/pocket
new file mode 160000
index 00000000..3a0c5c76
--- /dev/null
+++ b/archivebox/vendor/pocket
@@ -0,0 +1 @@
+Subproject commit 3a0c5c76832b0e92923383af3f9831ece7901c2f
diff --git a/archivebox/vendor/pocket.py b/archivebox/vendor/pocket.py
deleted file mode 100644
index bd49aa29..00000000
--- a/archivebox/vendor/pocket.py
+++ /dev/null
@@ -1,368 +0,0 @@
-# https://github.com/tapanpandita/pocket/blob/master/pocket.py
-
-import requests
-import json
-from functools import wraps
-
-
-class PocketException(Exception):
- '''
- Base class for all pocket exceptions
- http://getpocket.com/developer/docs/errors
-
- '''
- pass
-
-
-class InvalidQueryException(PocketException):
- pass
-
-
-class AuthException(PocketException):
- pass
-
-
-class RateLimitException(PocketException):
- '''
- http://getpocket.com/developer/docs/rate-limits
-
- '''
- pass
-
-
-class ServerMaintenanceException(PocketException):
- pass
-
-EXCEPTIONS = {
- 400: InvalidQueryException,
- 401: AuthException,
- 403: RateLimitException,
- 503: ServerMaintenanceException,
-}
-
-
-def method_wrapper(fn):
-
- @wraps(fn)
- def wrapped(self, *args, **kwargs):
- arg_names = list(fn.__code__.co_varnames)
- arg_names.remove('self')
- kwargs.update(dict(zip(arg_names, args)))
-
- url = self.api_endpoints[fn.__name__]
- payload = dict([
- (k, v) for k, v in kwargs.items()
- if v is not None
- ])
- payload.update(self.get_payload())
-
- return self.make_request(url, payload)
-
- return wrapped
-
-
-def bulk_wrapper(fn):
-
- @wraps(fn)
- def wrapped(self, *args, **kwargs):
- arg_names = list(fn.__code__.co_varnames)
- arg_names.remove('self')
- kwargs.update(dict(zip(arg_names, args)))
-
- wait = kwargs.get('wait', True)
- query = dict(
- [(k, v) for k, v in kwargs.items() if v is not None]
- )
- # TODO: Fix this hack
- query['action'] = 'add' if fn.__name__ == 'bulk_add' else fn.__name__
-
- if wait:
- self.add_bulk_query(query)
- return self
- else:
- url = self.api_endpoints['send']
- payload = {
- 'actions': [query],
- }
- payload.update(self.get_payload())
- return self.make_request(
- url,
- json.dumps(payload),
- headers={'content-type': 'application/json'},
- )
-
- return wrapped
-
-
-class Pocket(object):
- '''
- This class implements a basic python wrapper around the pocket api. For a
- detailed documentation of the methods and what they do please refer the
- official pocket api documentation at
- http://getpocket.com/developer/docs/overview
-
- '''
- api_endpoints = dict(
- (method, 'https://getpocket.com/v3/%s' % method)
- for method in "add,send,get".split(",")
- )
-
- statuses = {
- 200: 'Request was successful',
- 400: 'Invalid request, please make sure you follow the '
- 'documentation for proper syntax',
- 401: 'Problem authenticating the user',
- 403: 'User was authenticated, but access denied due to lack of '
- 'permission or rate limiting',
- 503: 'Pocket\'s sync server is down for scheduled maintenance.',
- }
-
- def __init__(self, consumer_key, access_token):
- self.consumer_key = consumer_key
- self.access_token = access_token
- self._bulk_query = []
-
- self._payload = {
- 'consumer_key': self.consumer_key,
- 'access_token': self.access_token,
- }
-
- def get_payload(self):
- return self._payload
-
- def add_bulk_query(self, query):
- self._bulk_query.append(query)
-
- @staticmethod
- def _post_request(url, payload, headers):
- r = requests.post(url, data=payload, headers=headers)
- return r
-
- @classmethod
- def _make_request(cls, url, payload, headers=None):
- r = cls._post_request(url, payload, headers)
-
- if r.status_code > 399:
- error_msg = cls.statuses.get(r.status_code)
- extra_info = r.headers.get('X-Error')
- raise EXCEPTIONS.get(r.status_code, PocketException)(
- '%s. %s' % (error_msg, extra_info)
- )
-
- return r.json() or r.text, r.headers
-
- @classmethod
- def make_request(cls, url, payload, headers=None):
- return cls._make_request(url, payload, headers)
-
- @method_wrapper
- def add(self, url, title=None, tags=None, tweet_id=None):
- '''
- This method allows you to add a page to a user's list.
- In order to use the /v3/add endpoint, your consumer key must have the
- "Add" permission.
- http://getpocket.com/developer/docs/v3/add
-
- '''
-
- @method_wrapper
- def get(
- self, state=None, favorite=None, tag=None, contentType=None,
- sort=None, detailType=None, search=None, domain=None, since=None,
- count=None, offset=None
- ):
- '''
- This method allows you to retrieve a user's list. It supports
- retrieving items changed since a specific time to allow for syncing.
- http://getpocket.com/developer/docs/v3/retrieve
-
- '''
-
- @method_wrapper
- def send(self, actions):
- '''
- This method allows you to make changes to a user's list. It supports
- adding new pages, marking pages as read, changing titles, or updating
- tags. Multiple changes to items can be made in one request.
- http://getpocket.com/developer/docs/v3/modify
-
- '''
-
- @bulk_wrapper
- def bulk_add(
- self, item_id, ref_id=None, tags=None, time=None, title=None,
- url=None, wait=True
- ):
- '''
- Add a new item to the user's list
- http://getpocket.com/developer/docs/v3/modify#action_add
-
- '''
-
- @bulk_wrapper
- def archive(self, item_id, time=None, wait=True):
- '''
- Move an item to the user's archive
- http://getpocket.com/developer/docs/v3/modify#action_archive
-
- '''
-
- @bulk_wrapper
- def readd(self, item_id, time=None, wait=True):
- '''
- Re-add (unarchive) an item to the user's list
- http://getpocket.com/developer/docs/v3/modify#action_readd
-
- '''
-
- @bulk_wrapper
- def favorite(self, item_id, time=None, wait=True):
- '''
- Mark an item as a favorite
- http://getpocket.com/developer/docs/v3/modify#action_favorite
-
- '''
-
- @bulk_wrapper
- def unfavorite(self, item_id, time=None, wait=True):
- '''
- Remove an item from the user's favorites
- http://getpocket.com/developer/docs/v3/modify#action_unfavorite
-
- '''
-
- @bulk_wrapper
- def delete(self, item_id, time=None, wait=True):
- '''
- Permanently remove an item from the user's account
- http://getpocket.com/developer/docs/v3/modify#action_delete
-
- '''
-
- @bulk_wrapper
- def tags_add(self, item_id, tags, time=None, wait=True):
- '''
- Add one or more tags to an item
- http://getpocket.com/developer/docs/v3/modify#action_tags_add
-
- '''
-
- @bulk_wrapper
- def tags_remove(self, item_id, tags, time=None, wait=True):
- '''
- Remove one or more tags from an item
- http://getpocket.com/developer/docs/v3/modify#action_tags_remove
-
- '''
-
- @bulk_wrapper
- def tags_replace(self, item_id, tags, time=None, wait=True):
- '''
- Replace all of the tags for an item with one or more provided tags
- http://getpocket.com/developer/docs/v3/modify#action_tags_replace
-
- '''
-
- @bulk_wrapper
- def tags_clear(self, item_id, time=None, wait=True):
- '''
- Remove all tags from an item.
- http://getpocket.com/developer/docs/v3/modify#action_tags_clear
-
- '''
-
- @bulk_wrapper
- def tag_rename(self, item_id, old_tag, new_tag, time=None, wait=True):
- '''
- Rename a tag. This affects all items with this tag.
- http://getpocket.com/developer/docs/v3/modify#action_tag_rename
-
- '''
-
- def commit(self):
- '''
- This method executes the bulk query, flushes stored queries and
- returns the response
-
- '''
- url = self.api_endpoints['send']
- payload = {
- 'actions': self._bulk_query,
- }
- payload.update(self._payload)
- self._bulk_query = []
-
- return self._make_request(
- url,
- json.dumps(payload),
- headers={'content-type': 'application/json'},
- )
-
- @classmethod
- def get_request_token(
- cls, consumer_key, redirect_uri='http://example.com/', state=None
- ):
- '''
- Returns the request token that can be used to fetch the access token
-
- '''
- headers = {
- 'X-Accept': 'application/json',
- }
- url = 'https://getpocket.com/v3/oauth/request'
- payload = {
- 'consumer_key': consumer_key,
- 'redirect_uri': redirect_uri,
- }
-
- if state:
- payload['state'] = state
-
- return cls._make_request(url, payload, headers)[0]['code']
-
- @classmethod
- def get_credentials(cls, consumer_key, code):
- '''
- Fetches access token from using the request token and consumer key
-
- '''
- headers = {
- 'X-Accept': 'application/json',
- }
- url = 'https://getpocket.com/v3/oauth/authorize'
- payload = {
- 'consumer_key': consumer_key,
- 'code': code,
- }
-
- return cls._make_request(url, payload, headers)[0]
-
- @classmethod
- def get_access_token(cls, consumer_key, code):
- return cls.get_credentials(consumer_key, code)['access_token']
-
- @classmethod
- def get_auth_url(cls, code, redirect_uri='http://example.com'):
- auth_url = ('https://getpocket.com/auth/authorize'
- '?request_token=%s&redirect_uri=%s' % (code, redirect_uri))
- return auth_url
-
- @classmethod
- def auth(
- cls, consumer_key, redirect_uri='http://example.com/', state=None,
- ):
- '''
- This is a test method for verifying if oauth worked
- http://getpocket.com/developer/docs/authentication
-
- '''
- code = cls.get_request_token(consumer_key, redirect_uri, state)
-
- auth_url = 'https://getpocket.com/auth/authorize?request_token='\
- '%s&redirect_uri=%s' % (code, redirect_uri)
- raw_input(
- 'Please open %s in your browser to authorize the app and '
- 'press enter:' % auth_url
- )
-
- return cls.get_access_token(consumer_key, code)
diff --git a/archivebox/vendor/pocket.py b/archivebox/vendor/pocket.py
new file mode 120000
index 00000000..37352d27
--- /dev/null
+++ b/archivebox/vendor/pocket.py
@@ -0,0 +1 @@
+pocket/pocket.py
\ No newline at end of file
diff --git a/archivebox/vendor/taggit_utils.py b/archivebox/vendor/taggit_utils.py
deleted file mode 100644
index 5a2d511d..00000000
--- a/archivebox/vendor/taggit_utils.py
+++ /dev/null
@@ -1,113 +0,0 @@
-# Taken from https://github.com/jazzband/django-taggit/blob/3b56adb637ab95aca5036c37a358402c825a367c/taggit/utils.py
-
-def parse_tags(tagstring):
- """
- Parses tag input, with multiple word input being activated and
- delineated by commas and double quotes. Quotes take precedence, so
- they may contain commas.
-
- Returns a sorted list of unique tag names.
-
- Ported from Jonathan Buchanan's `django-tagging
- `_
- """
- if not tagstring:
- return []
-
- # Special case - if there are no commas or double quotes in the
- # input, we don't *do* a recall... I mean, we know we only need to
- # split on spaces.
- if "," not in tagstring and '"' not in tagstring:
- words = list(set(split_strip(tagstring, " ")))
- words.sort()
- return words
-
- words = []
- buffer = []
- # Defer splitting of non-quoted sections until we know if there are
- # any unquoted commas.
- to_be_split = []
- saw_loose_comma = False
- open_quote = False
- i = iter(tagstring)
- try:
- while True:
- c = next(i)
- if c == '"':
- if buffer:
- to_be_split.append("".join(buffer))
- buffer = []
- # Find the matching quote
- open_quote = True
- c = next(i)
- while c != '"':
- buffer.append(c)
- c = next(i)
- if buffer:
- word = "".join(buffer).strip()
- if word:
- words.append(word)
- buffer = []
- open_quote = False
- else:
- if not saw_loose_comma and c == ",":
- saw_loose_comma = True
- buffer.append(c)
- except StopIteration:
- # If we were parsing an open quote which was never closed treat
- # the buffer as unquoted.
- if buffer:
- if open_quote and "," in buffer:
- saw_loose_comma = True
- to_be_split.append("".join(buffer))
- if to_be_split:
- if saw_loose_comma:
- delimiter = ","
- else:
- delimiter = " "
- for chunk in to_be_split:
- words.extend(split_strip(chunk, delimiter))
- words = list(set(words))
- words.sort()
- return words
-
-
-def split_strip(string, delimiter=","):
- """
- Splits ``string`` on ``delimiter``, stripping each resulting string
- and returning a list of non-empty strings.
-
- Ported from Jonathan Buchanan's `django-tagging
- `_
- """
- if not string:
- return []
-
- words = [w.strip() for w in string.split(delimiter)]
- return [w for w in words if w]
-
-
-def edit_string_for_tags(tags):
- """
- Given list of ``Tag`` instances, creates a string representation of
- the list suitable for editing by the user, such that submitting the
- given string representation back without changing it will give the
- same list of tags.
-
- Tag names which contain commas will be double quoted.
-
- If any tag name which isn't being quoted contains whitespace, the
- resulting string of tag names will be comma-delimited, otherwise
- it will be space-delimited.
-
- Ported from Jonathan Buchanan's `django-tagging
- `_
- """
- names = []
- for tag in tags:
- name = tag.name
- if "," in name or " " in name:
- names.append('"%s"' % name)
- else:
- names.append(name)
- return ", ".join(sorted(names))
diff --git a/archivebox/vendor/taggit_utils.py b/archivebox/vendor/taggit_utils.py
new file mode 120000
index 00000000..f36776db
--- /dev/null
+++ b/archivebox/vendor/taggit_utils.py
@@ -0,0 +1 @@
+django-taggit/taggit/utils.py
\ No newline at end of file