Refactor method to base class
This commit is contained in:
parent
500cee4bae
commit
2384c03170
|
@ -5,10 +5,12 @@ import logging
|
|||
from abc import ABC, abstractmethod
|
||||
from typing import Optional
|
||||
|
||||
import requests
|
||||
from praw.models import Submission
|
||||
|
||||
from bulkredditdownloader.site_authenticator import SiteAuthenticator
|
||||
from bulkredditdownloader.exceptions import ResourceNotFound
|
||||
from bulkredditdownloader.resource import Resource
|
||||
from bulkredditdownloader.site_authenticator import SiteAuthenticator
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
@ -22,3 +24,10 @@ class BaseDownloader(ABC):
|
|||
def find_resources(self, authenticator: Optional[SiteAuthenticator] = None) -> list[Resource]:
|
||||
"""Return list of all un-downloaded Resources from submission"""
|
||||
raise NotImplementedError
|
||||
|
||||
@staticmethod
|
||||
def get_link(url: str, cookies: dict = None, headers: dict = None) -> requests.Response:
|
||||
res = requests.get(url, cookies=cookies, headers=headers)
|
||||
if res.status_code != 200:
|
||||
raise ResourceNotFound(f'Server responded with {res.status_code} to {url}')
|
||||
return res
|
||||
|
|
|
@ -5,7 +5,6 @@ import re
|
|||
from typing import Optional
|
||||
|
||||
import bs4
|
||||
import requests
|
||||
from praw.models import Submission
|
||||
|
||||
from bulkredditdownloader.exceptions import NotADownloadableLinkError
|
||||
|
@ -34,7 +33,7 @@ class Erome(BaseDownloader):
|
|||
|
||||
@staticmethod
|
||||
def _get_links(url: str) -> set[str]:
|
||||
page = requests.get(url)
|
||||
page = Erome.get_link(url)
|
||||
soup = bs4.BeautifulSoup(page.text, 'html.parser')
|
||||
front_images = soup.find_all('img', attrs={'class': 'lasyload'})
|
||||
out = [im.get('data-src') for im in front_images]
|
||||
|
|
|
@ -5,7 +5,6 @@ import re
|
|||
from typing import Optional
|
||||
|
||||
import bs4
|
||||
import requests
|
||||
from praw.models import Submission
|
||||
|
||||
from bulkredditdownloader.exceptions import ResourceNotFound
|
||||
|
@ -28,12 +27,12 @@ class Gallery(BaseDownloader):
|
|||
|
||||
@staticmethod
|
||||
def _get_links(url: str) -> list[str]:
|
||||
page = requests.get(url, headers={
|
||||
"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko)"
|
||||
" Chrome/67.0.3396.87 Safari/537.36 OPR/54.0.2952.64",
|
||||
"Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8",
|
||||
resource_headers = {
|
||||
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko)'
|
||||
' Chrome/67.0.3396.87 Safari/537.36 OPR/54.0.2952.64',
|
||||
'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8',
|
||||
}
|
||||
)
|
||||
page = Gallery.get_link(url, headers=resource_headers)
|
||||
soup = bs4.BeautifulSoup(page.text, 'html.parser')
|
||||
|
||||
links = soup.findAll('a', attrs={'target': '_blank', 'href': re.compile(r'https://preview\.redd\.it.*')})
|
||||
|
|
|
@ -4,7 +4,6 @@ import json
|
|||
import re
|
||||
from typing import Optional
|
||||
|
||||
import requests
|
||||
from bs4 import BeautifulSoup
|
||||
from praw.models import Submission
|
||||
|
||||
|
@ -22,19 +21,14 @@ class Gfycat(GifDeliveryNetwork):
|
|||
|
||||
@staticmethod
|
||||
def _get_link(url: str) -> str:
|
||||
if re.match(r'\.(webm|mp4|gif)$', url):
|
||||
return url
|
||||
|
||||
gfycat_id = re.match(r'.*/(.*?)/?$', url).group(1)
|
||||
url = 'https://gfycat.com/' + gfycat_id
|
||||
|
||||
response = requests.get(url)
|
||||
page_source = response.text
|
||||
|
||||
response = Gfycat.get_link(url)
|
||||
if 'gifdeliverynetwork' in response.url:
|
||||
return GifDeliveryNetwork._get_link(url)
|
||||
|
||||
soup = BeautifulSoup(page_source, 'html.parser')
|
||||
soup = BeautifulSoup(response.text, 'html.parser')
|
||||
content = soup.find('script', attrs={'data-react-helmet': 'true', 'type': 'application/ld+json'})
|
||||
|
||||
out = json.loads(content.contents[0]).get('video').get('contentUrl')
|
||||
|
|
|
@ -1,9 +1,7 @@
|
|||
#!/usr/bin/env python3
|
||||
|
||||
import re
|
||||
from typing import Optional
|
||||
|
||||
import requests
|
||||
from bs4 import BeautifulSoup
|
||||
from praw.models import Submission
|
||||
|
||||
|
@ -23,12 +21,9 @@ class GifDeliveryNetwork(BaseDownloader):
|
|||
|
||||
@staticmethod
|
||||
def _get_link(url: str) -> str:
|
||||
if re.match(r'https://.*\.(mp4|webm|gif)(\?.*)?$', url):
|
||||
return url
|
||||
page = GifDeliveryNetwork.get_link(url)
|
||||
|
||||
page_source = requests.get(url).text
|
||||
|
||||
soup = BeautifulSoup(page_source, 'html.parser')
|
||||
soup = BeautifulSoup(page.text, 'html.parser')
|
||||
content = soup.find('source', attrs={'id': 'mp4Source', 'type': 'video/mp4'})
|
||||
|
||||
if content is None or content.get('src') is None:
|
||||
|
|
|
@ -5,10 +5,9 @@ import re
|
|||
from typing import Optional
|
||||
|
||||
import bs4
|
||||
import requests
|
||||
from praw.models import Submission
|
||||
|
||||
from bulkredditdownloader.exceptions import NotADownloadableLinkError, ResourceNotFound, SiteDownloaderError
|
||||
from bulkredditdownloader.exceptions import NotADownloadableLinkError, SiteDownloaderError
|
||||
from bulkredditdownloader.resource import Resource
|
||||
from bulkredditdownloader.site_authenticator import SiteAuthenticator
|
||||
from bulkredditdownloader.site_downloaders.base_downloader import BaseDownloader
|
||||
|
@ -42,10 +41,7 @@ class Imgur(BaseDownloader):
|
|||
link = link.replace('i.imgur', 'imgur')
|
||||
link = link.rstrip('.gifv')
|
||||
|
||||
res = requests.get(link, cookies={'over18': '1', 'postpagebeta': '0'})
|
||||
|
||||
if res.status_code != 200:
|
||||
raise ResourceNotFound(f'Server responded with {res.status_code} to {link}')
|
||||
res = Imgur.get_link(link, cookies={'over18': '1', 'postpagebeta': '0'})
|
||||
|
||||
soup = bs4.BeautifulSoup(res.text, 'html.parser')
|
||||
scripts = soup.find_all('script', attrs={'type': 'text/javascript'})
|
||||
|
|
|
@ -4,7 +4,6 @@ import json
|
|||
import re
|
||||
from typing import Optional
|
||||
|
||||
import requests
|
||||
from bs4 import BeautifulSoup
|
||||
from praw.models import Submission
|
||||
|
||||
|
@ -23,20 +22,17 @@ class Redgifs(GifDeliveryNetwork):
|
|||
|
||||
@staticmethod
|
||||
def _get_link(url: str) -> str:
|
||||
if re.match(r'https://.*\.(mp4|webm|gif)(\?.*)?$', url):
|
||||
return url
|
||||
|
||||
redgif_id = re.match(r'.*/(.*?)/?$', url).group(1)
|
||||
url = 'https://redgifs.com/watch/' + redgif_id
|
||||
|
||||
headers = {'User-Agent':
|
||||
'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko)'
|
||||
' Chrome/67.0.3396.87 Safari/537.36 OPR/54.0.2952.64'
|
||||
}
|
||||
headers = {
|
||||
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko)'
|
||||
' Chrome/67.0.3396.87 Safari/537.36 OPR/54.0.2952.64',
|
||||
}
|
||||
|
||||
page_source = requests.get(url, headers=headers).text
|
||||
page = Redgifs.get_link(url, headers=headers)
|
||||
|
||||
soup = BeautifulSoup(page_source, 'html.parser')
|
||||
soup = BeautifulSoup(page.text, 'html.parser')
|
||||
content = soup.find('script', attrs={'data-react-helmet': 'true', 'type': 'application/ld+json'})
|
||||
|
||||
if content is None:
|
||||
|
|
Loading…
Reference in a new issue