alnoda-workspaces/workspaces/notebook-old-workspace/nbviewer/nbviewer/utils.py
2022-05-30 07:24:06 +00:00

248 lines
6.5 KiB
Python

# -----------------------------------------------------------------------------
# Copyright (C) Jupyter Development Team
#
# Distributed under the terms of the BSD License. The full license is in
# the file COPYING, distributed as part of this software.
# -----------------------------------------------------------------------------
import cgi
import json
import os
import re
import time
from base64 import decodebytes
from base64 import encodebytes
from contextlib import contextmanager
from functools import lru_cache
from subprocess import check_output
from urllib.parse import parse_qs
from urllib.parse import quote as stdlib_quote
from urllib.parse import urlencode
from urllib.parse import urlparse
from urllib.parse import urlunparse
STRIP_PARAMS = ["client_id", "client_secret", "access_token"]
HERE = os.path.dirname(__file__)
GIT_INFO_JSON = os.path.join(HERE, "git_info.json")
class EmptyClass(object):
"""
Simple empty class that returns itself for all functions called on it.
This allows us to call any method of any name on this, and it'll return another
instance of itself that'll allow any method to be called on it.
Primarily used to mock out the statsd client when statsd is not being used
"""
def empty_function(self, *args, **kwargs):
return self
def __getattr__(self, attr):
return self.empty_function
def quote(s):
"""unicode-safe quote
- accepts str+unicode (not bytes on py3)
- Python 2 requires str, not unicode
- always return unicode
"""
if not isinstance(s, str):
s = s.encode("utf8")
quoted = stdlib_quote(s)
if isinstance(quoted, bytes):
quoted = quoted.decode("utf8")
return quoted
def clean_filename(fn):
""" Github url sanitizes gist filenames to produce their permalink. This is
not provided over API, so we recreate it here. """
return re.sub("[^0-9a-zA-Z]+", "-", fn)
def url_path_join(*pieces):
"""Join components of url into a relative url
Use to prevent double slash when joining subpath. This will leave the
initial and final / in place
"""
initial = pieces[0].startswith("/")
final = pieces[-1].endswith("/")
stripped = [s.strip("/") for s in pieces]
result = "/".join(s for s in stripped if s)
if initial:
result = "/" + result
if final:
result += "/"
if result == "//":
result = "/"
return result
def transform_ipynb_uri(uri, uri_rewrite_list):
"""Transform a given uri (an ipynb 'URI') into an app URL
State-free part of transforming URIs to nbviewer URLs.
:param uri: uri to transform
:param uri_rewrite_list: list of (URI regexes, URL templates) tuples
"""
for reg, rewrite in uri_rewrite_list:
matches = re.match(reg, uri)
if matches:
uri = rewrite.format(*matches.groups())
break
# encode query parameters as last url part
if "?" in uri:
uri, query = uri.split("?", 1)
uri = "%s/%s" % (uri, quote("?" + query))
return uri
# get_encoding_from_headers from requests.utils (1.2.3)
# (c) 2013 Kenneth Reitz
# used under Apache 2.0
def get_encoding_from_headers(headers):
"""Returns encodings from given HTTP Header Dict.
:param headers: dictionary to extract encoding from.
"""
content_type = headers.get("content-type")
if not content_type:
return None
content_type, params = cgi.parse_header(content_type)
if "charset" in params:
return params["charset"].strip("'\"")
# per #507, at least some hosts are providing UTF-8 without declaring it
# while the former choice of ISO-8859-1 wasn't known to be causing problems
# in the wild
if "text" in content_type:
return "utf-8"
def response_text(response, encoding=None):
"""mimic requests.text property, but for plain HTTPResponse"""
encoding = encoding or get_encoding_from_headers(response.headers) or "utf-8"
return response.body.decode(encoding, "replace")
# parse_header_links from requests.util
# modified to actually return a dict, like the docstring says.
def parse_header_links(value):
"""Return a dict of parsed link headers proxies.
i.e. Link: <http:/.../front.jpeg>; rel=front; type="image/jpeg",<http://.../back.jpeg>; rel=back;type="image/jpeg"
"""
links = {}
replace_chars = " '\""
for val in value.split(","):
try:
url, params = val.split(";", 1)
except ValueError:
url, params = val, ""
link = {}
parts = list(urlparse(url.strip("<> '\"")))
get_params = parse_qs(parts[4])
get_params = {
key: value[0]
for key, value in get_params.items()
if key not in STRIP_PARAMS
}
parts[4] = urlencode(get_params)
link["url"] = urlunparse(parts)
for param in params.split(";"):
try:
key, value = param.split("=")
except ValueError:
break
link[key.strip(replace_chars)] = value.strip(replace_chars)
if "rel" in link:
links[link["rel"]] = link
return links
def git_info(path, force_git=False):
"""Return some git info"""
if os.path.exists(GIT_INFO_JSON) and not force_git:
with open(GIT_INFO_JSON, "r") as f:
return json.load(f)
command = ["git", "log", "-1", "--format=%H\n%s\n%cD"]
sha, msg, date = check_output(command, cwd=path).decode("utf8").splitlines()
return dict(sha=sha, date=date, msg=msg)
def jupyter_info():
"""Get Jupyter info dict"""
import nbconvert
return dict(nbconvert_version=nbconvert.__version__)
def base64_decode(s):
"""unicode-safe base64
base64 API only talks bytes
"""
if not isinstance(s, bytes):
s = s.encode("ascii", "replace")
decoded = decodebytes(s)
return decoded
def base64_encode(s):
"""unicode-safe base64
base64 API only talks bytes
"""
if not isinstance(s, bytes):
s = s.encode("ascii", "replace")
encoded = encodebytes(s)
return encoded.decode("ascii")
@contextmanager
def time_block(message, logger, debug_limit=1):
"""context manager for timing a block
logs millisecond timings of the block
If the time is longer than debug_limit,
then log level will be INFO,
otherwise it will be DEBUG.
"""
tic = time.time()
yield
dt = time.time() - tic
log = logger.info if dt > debug_limit else logger.debug
log("%s in %.2f ms", message, 1e3 * dt)
def cached_property(method):
return property(lru_cache(1)(method))