2021-02-07 14:33:19 +13:00
|
|
|
#!/usr/bin/env python3
|
|
|
|
# coding=utf-8
|
2021-02-07 17:46:20 +13:00
|
|
|
|
|
|
|
import logging
|
|
|
|
from abc import ABC, abstractmethod
|
2021-02-07 14:33:19 +13:00
|
|
|
|
2021-02-07 17:46:20 +13:00
|
|
|
import requests
|
2021-02-11 12:10:40 +13:00
|
|
|
from praw.models import Submission
|
2021-02-07 17:46:20 +13:00
|
|
|
|
2021-02-15 19:30:39 +13:00
|
|
|
from bulkredditdownloader.errors import SiteDownloaderError
|
2021-02-11 12:10:40 +13:00
|
|
|
from bulkredditdownloader.resource import Resource
|
2021-02-07 17:46:20 +13:00
|
|
|
|
|
|
|
logger = logging.getLogger(__name__)
|
2021-02-07 14:33:19 +13:00
|
|
|
|
|
|
|
|
|
|
|
class BaseDownloader(ABC):
|
2021-02-15 18:12:27 +13:00
|
|
|
def __init__(self, post: Submission):
|
2021-02-07 14:33:19 +13:00
|
|
|
self.post = post
|
2021-02-11 12:10:40 +13:00
|
|
|
self.hashes = []
|
2021-02-07 14:33:19 +13:00
|
|
|
|
2021-02-07 17:46:20 +13:00
|
|
|
@abstractmethod
|
2021-02-11 12:10:40 +13:00
|
|
|
def download(self) -> list[Resource]:
|
2021-02-07 17:46:20 +13:00
|
|
|
raise NotImplementedError
|
|
|
|
|
2021-02-11 12:10:40 +13:00
|
|
|
def _download_resource(self, resource_url: str):
|
|
|
|
headers = {
|
|
|
|
"User-Agent":
|
|
|
|
"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/67.0.3396.87 "
|
|
|
|
"Safari/537.36 OPR/54.0.2952.64",
|
|
|
|
"Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8",
|
|
|
|
"Accept-Charset": "ISO-8859-1,utf-8;q=0.7,*;q=0.3",
|
|
|
|
"Accept-Encoding": "none",
|
|
|
|
"Accept-Language": "en-US,en;q=0.8",
|
|
|
|
"Connection": "keep-alive",
|
2021-02-07 14:33:19 +13:00
|
|
|
}
|
2021-02-07 17:46:20 +13:00
|
|
|
# Loop to attempt download 3 times
|
2021-02-07 14:33:19 +13:00
|
|
|
for i in range(3):
|
2021-02-11 12:10:40 +13:00
|
|
|
try:
|
|
|
|
download_content = requests.get(resource_url, headers=headers).content
|
|
|
|
except ConnectionResetError:
|
2021-02-15 19:30:39 +13:00
|
|
|
raise SiteDownloaderError
|
2021-02-11 12:10:40 +13:00
|
|
|
return Resource(self.post, resource_url, download_content)
|
2021-02-07 17:46:20 +13:00
|
|
|
|
2021-02-15 19:30:39 +13:00
|
|
|
raise SiteDownloaderError
|