alnoda-workspaces/workspaces/notebook-old-workspace/nbviewer/nbviewer/client.py

"""Async HTTP client with bonus features!

- Support caching via upstream 304 with ETag, Last-Modified
- Log request timings for profiling
"""
# Copyright (c) Jupyter Development Team.
# Distributed under the terms of the Modified BSD License.
import asyncio
import hashlib
import pickle
import time

from tornado.curl_httpclient import CurlAsyncHTTPClient
from tornado.httpclient import HTTPRequest

from nbviewer.utils import time_block

# -----------------------------------------------------------------------------
# Async HTTP Client
# -----------------------------------------------------------------------------

# cache headers and their response:request mapping
# use this to map headers in cached response to the headers
# that should be set in the request.

cache_headers = {"ETag": "If-None-Match", "Last-Modified": "If-Modified-Since"}


class NBViewerAsyncHTTPClient(object):
    """Subclass of AsyncHTTPClient with bonus logging and caching!

    If upstream servers support 304 cache replies with the following headers:

    - ETag : If-None-Match
    - Last-Modified : If-Modified-Since

    Upstream requests are still made every time,
    but resources and rate limits may be saved by 304 responses.

    If upstream responds with 304 or an error and a cached response is available,
    use the cached response.

    Responses are cached as long as possible.
    """

    cache = None

    def __init__(self, log, client=None):
        self.log = log
        self.client = client or CurlAsyncHTTPClient()

    def fetch(self, url, params=None, **kwargs):
        request = HTTPRequest(url, **kwargs)

        if request.user_agent is None:
            request.user_agent = "Tornado-Async-Client"

        # The future which will become the response upon awaiting.
        response_future = asyncio.ensure_future(self.smart_fetch(request))

        return response_future

    async def smart_fetch(self, request):
        """
        Before fetching request, first look to see whether it's already in cache.
        If so load the response from cache. Only otherwise attempt to fetch the request.
        When response code isn't 304 or 400, cache response before loading, else just load.
        """
        tic = time.time()

        # when logging, use the URL without params
        name = request.url.split("?")[0]
        self.log.debug("Fetching %s", name)

        # look for a cached response
        cached_response = None
        cache_key = hashlib.sha256(request.url.encode("utf8")).hexdigest()
        cached_response = await self._get_cached_response(cache_key, name)
        toc = time.time()
        self.log.info("Upstream cache get %s %.2f ms", name, 1e3 * (toc - tic))

        if cached_response:
            self.log.info("Upstream cache hit %s", name)
            # add cache headers, if any
            for resp_key, req_key in cache_headers.items():
                value = cached_response.headers.get(resp_key)
                if value:
                    request.headers[req_key] = value
            return cached_response
        else:
            self.log.info("Upstream cache miss %s", name)

            response = await self.client.fetch(request)
            dt = time.time() - tic
            self.log.info("Fetched %s in %.2f ms", name, 1e3 * dt)
            await self._cache_response(cache_key, name, response)
            return response

    async def _get_cached_response(self, cache_key, name):
        """Get the cached response, if any"""
        if not self.cache:
            return
        try:
            cached_pickle = await self.cache.get(cache_key)
            if cached_pickle:
                self.log.info("Type of self.cache is: %s", type(self.cache))
                return pickle.loads(cached_pickle)
        except Exception:
            self.log.error("Upstream cache get failed %s", name, exc_info=True)

    async def _cache_response(self, cache_key, name, response):
        """Cache the response, if any cache headers we understand are present."""
        if not self.cache:
            return
        with time_block("Upstream cache set %s" % name, logger=self.log):
            # cache the response
            try:
                pickle_response = pickle.dumps(response, pickle.HIGHEST_PROTOCOL)
                await self.cache.set(cache_key, pickle_response)
            except Exception:
                self.log.error("Upstream cache failed %s" % name, exc_info=True)