1
0
Fork 0
mirror of synced 2024-06-25 17:40:17 +12:00
bulk-downloader-for-reddit/bulkredditdownloader/site_downloaders/base_downloader.py

45 lines
1.4 KiB
Python
Raw Normal View History

2021-02-07 14:33:19 +13:00
#!/usr/bin/env python3
# coding=utf-8
import logging
from abc import ABC, abstractmethod
2021-02-07 14:33:19 +13:00
import requests
2021-02-11 12:10:40 +13:00
from praw.models import Submission
2021-02-15 19:30:39 +13:00
from bulkredditdownloader.errors import SiteDownloaderError
2021-02-11 12:10:40 +13:00
from bulkredditdownloader.resource import Resource
logger = logging.getLogger(__name__)
2021-02-07 14:33:19 +13:00
class BaseDownloader(ABC):
2021-02-15 18:12:27 +13:00
def __init__(self, post: Submission):
2021-02-07 14:33:19 +13:00
self.post = post
2021-02-11 12:10:40 +13:00
self.hashes = []
2021-02-07 14:33:19 +13:00
@abstractmethod
2021-02-11 12:10:40 +13:00
def download(self) -> list[Resource]:
raise NotImplementedError
2021-02-11 12:10:40 +13:00
def _download_resource(self, resource_url: str):
headers = {
"User-Agent":
"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/67.0.3396.87 "
"Safari/537.36 OPR/54.0.2952.64",
"Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8",
"Accept-Charset": "ISO-8859-1,utf-8;q=0.7,*;q=0.3",
"Accept-Encoding": "none",
"Accept-Language": "en-US,en;q=0.8",
"Connection": "keep-alive",
2021-02-07 14:33:19 +13:00
}
# Loop to attempt download 3 times
2021-02-07 14:33:19 +13:00
for i in range(3):
2021-02-11 12:10:40 +13:00
try:
download_content = requests.get(resource_url, headers=headers).content
except ConnectionResetError:
2021-02-15 19:30:39 +13:00
raise SiteDownloaderError
2021-02-11 12:10:40 +13:00
return Resource(self.post, resource_url, download_content)
2021-02-15 19:30:39 +13:00
raise SiteDownloaderError