1
0
Fork 0
mirror of synced 2024-06-02 18:34:37 +12:00
bulk-downloader-for-reddit/bdfr/site_downloaders/base_downloader.py

38 lines
1.3 KiB
Python
Raw Permalink Normal View History

2021-02-07 14:33:19 +13:00
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
import logging
from abc import ABC, abstractmethod
2021-02-25 23:40:08 +13:00
from typing import Optional
2021-02-07 14:33:19 +13:00
2021-04-05 19:21:04 +12:00
import requests
2021-02-11 12:10:40 +13:00
from praw.models import Submission
from bdfr.exceptions import ResourceNotFound, SiteDownloaderError
2021-04-12 19:58:32 +12:00
from bdfr.resource import Resource
from bdfr.site_authenticator import SiteAuthenticator
logger = logging.getLogger(__name__)
2021-02-07 14:33:19 +13:00
class BaseDownloader(ABC):
2021-02-25 23:40:08 +13:00
def __init__(self, post: Submission, typical_extension: Optional[str] = None):
2021-02-07 14:33:19 +13:00
self.post = post
2021-02-25 23:40:08 +13:00
self.typical_extension = typical_extension
2021-02-07 14:33:19 +13:00
@abstractmethod
2021-02-26 21:57:05 +13:00
def find_resources(self, authenticator: Optional[SiteAuthenticator] = None) -> list[Resource]:
2021-02-25 23:40:08 +13:00
"""Return list of all un-downloaded Resources from submission"""
raise NotImplementedError
2021-04-05 19:21:04 +12:00
@staticmethod
2021-04-06 12:48:21 +12:00
def retrieve_url(url: str, cookies: dict = None, headers: dict = None) -> requests.Response:
try:
res = requests.get(url, cookies=cookies, headers=headers)
except requests.exceptions.RequestException as e:
logger.exception(e)
2022-12-03 18:11:17 +13:00
raise SiteDownloaderError(f"Failed to get page {url}")
2021-04-05 19:21:04 +12:00
if res.status_code != 200:
2022-12-03 18:11:17 +13:00
raise ResourceNotFound(f"Server responded with {res.status_code} to {url}")
2021-04-05 19:21:04 +12:00
return res