1
0
Fork 0
mirror of synced 2024-06-16 17:24:40 +12:00
bulk-downloader-for-reddit/bdfr/site_downloaders/base_downloader.py

38 lines
1.2 KiB
Python
Raw Normal View History

2021-02-07 14:33:19 +13:00
#!/usr/bin/env python3
# coding=utf-8
import logging
from abc import ABC, abstractmethod
2021-02-25 23:40:08 +13:00
from typing import Optional
2021-02-07 14:33:19 +13:00
2021-04-05 19:21:04 +12:00
import requests
2021-02-11 12:10:40 +13:00
from praw.models import Submission
from bdfr.exceptions import ResourceNotFound, SiteDownloaderError
2021-04-12 19:58:32 +12:00
from bdfr.resource import Resource
from bdfr.site_authenticator import SiteAuthenticator
logger = logging.getLogger(__name__)
2021-02-07 14:33:19 +13:00
class BaseDownloader(ABC):
2021-02-25 23:40:08 +13:00
def __init__(self, post: Submission, typical_extension: Optional[str] = None):
2021-02-07 14:33:19 +13:00
self.post = post
2021-02-25 23:40:08 +13:00
self.typical_extension = typical_extension
2021-02-07 14:33:19 +13:00
@abstractmethod
2021-02-26 21:57:05 +13:00
def find_resources(self, authenticator: Optional[SiteAuthenticator] = None) -> list[Resource]:
2021-02-25 23:40:08 +13:00
"""Return list of all un-downloaded Resources from submission"""
raise NotImplementedError
2021-04-05 19:21:04 +12:00
@staticmethod
2021-04-06 12:48:21 +12:00
def retrieve_url(url: str, cookies: dict = None, headers: dict = None) -> requests.Response:
try:
res = requests.get(url, cookies=cookies, headers=headers)
except requests.exceptions.RequestException as e:
logger.exception(e)
raise SiteDownloaderError(f'Failed to get page {url}')
2021-04-05 19:21:04 +12:00
if res.status_code != 200:
raise ResourceNotFound(f'Server responded with {res.status_code} to {url}')
return res