1
0
Fork 0
mirror of synced 2024-06-14 08:14:42 +12:00

Refactor method to base class

This commit is contained in:
Serene-Arc 2021-04-05 17:21:04 +10:00 committed by Ali Parlakci
parent 500cee4bae
commit 2384c03170
7 changed files with 28 additions and 40 deletions

View file

@ -5,10 +5,12 @@ import logging
from abc import ABC, abstractmethod
from typing import Optional
import requests
from praw.models import Submission
from bulkredditdownloader.site_authenticator import SiteAuthenticator
from bulkredditdownloader.exceptions import ResourceNotFound
from bulkredditdownloader.resource import Resource
from bulkredditdownloader.site_authenticator import SiteAuthenticator
logger = logging.getLogger(__name__)
@ -22,3 +24,10 @@ class BaseDownloader(ABC):
def find_resources(self, authenticator: Optional[SiteAuthenticator] = None) -> list[Resource]:
"""Return list of all un-downloaded Resources from submission"""
raise NotImplementedError
@staticmethod
def get_link(url: str, cookies: dict = None, headers: dict = None) -> requests.Response:
res = requests.get(url, cookies=cookies, headers=headers)
if res.status_code != 200:
raise ResourceNotFound(f'Server responded with {res.status_code} to {url}')
return res

View file

@ -5,7 +5,6 @@ import re
from typing import Optional
import bs4
import requests
from praw.models import Submission
from bulkredditdownloader.exceptions import NotADownloadableLinkError
@ -34,7 +33,7 @@ class Erome(BaseDownloader):
@staticmethod
def _get_links(url: str) -> set[str]:
page = requests.get(url)
page = Erome.get_link(url)
soup = bs4.BeautifulSoup(page.text, 'html.parser')
front_images = soup.find_all('img', attrs={'class': 'lasyload'})
out = [im.get('data-src') for im in front_images]

View file

@ -5,7 +5,6 @@ import re
from typing import Optional
import bs4
import requests
from praw.models import Submission
from bulkredditdownloader.exceptions import ResourceNotFound
@ -28,12 +27,12 @@ class Gallery(BaseDownloader):
@staticmethod
def _get_links(url: str) -> list[str]:
page = requests.get(url, headers={
"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko)"
" Chrome/67.0.3396.87 Safari/537.36 OPR/54.0.2952.64",
"Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8",
resource_headers = {
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko)'
' Chrome/67.0.3396.87 Safari/537.36 OPR/54.0.2952.64',
'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8',
}
)
page = Gallery.get_link(url, headers=resource_headers)
soup = bs4.BeautifulSoup(page.text, 'html.parser')
links = soup.findAll('a', attrs={'target': '_blank', 'href': re.compile(r'https://preview\.redd\.it.*')})

View file

@ -4,7 +4,6 @@ import json
import re
from typing import Optional
import requests
from bs4 import BeautifulSoup
from praw.models import Submission
@ -22,19 +21,14 @@ class Gfycat(GifDeliveryNetwork):
@staticmethod
def _get_link(url: str) -> str:
if re.match(r'\.(webm|mp4|gif)$', url):
return url
gfycat_id = re.match(r'.*/(.*?)/?$', url).group(1)
url = 'https://gfycat.com/' + gfycat_id
response = requests.get(url)
page_source = response.text
response = Gfycat.get_link(url)
if 'gifdeliverynetwork' in response.url:
return GifDeliveryNetwork._get_link(url)
soup = BeautifulSoup(page_source, 'html.parser')
soup = BeautifulSoup(response.text, 'html.parser')
content = soup.find('script', attrs={'data-react-helmet': 'true', 'type': 'application/ld+json'})
out = json.loads(content.contents[0]).get('video').get('contentUrl')

View file

@ -1,9 +1,7 @@
#!/usr/bin/env python3
import re
from typing import Optional
import requests
from bs4 import BeautifulSoup
from praw.models import Submission
@ -23,12 +21,9 @@ class GifDeliveryNetwork(BaseDownloader):
@staticmethod
def _get_link(url: str) -> str:
if re.match(r'https://.*\.(mp4|webm|gif)(\?.*)?$', url):
return url
page = GifDeliveryNetwork.get_link(url)
page_source = requests.get(url).text
soup = BeautifulSoup(page_source, 'html.parser')
soup = BeautifulSoup(page.text, 'html.parser')
content = soup.find('source', attrs={'id': 'mp4Source', 'type': 'video/mp4'})
if content is None or content.get('src') is None:

View file

@ -5,10 +5,9 @@ import re
from typing import Optional
import bs4
import requests
from praw.models import Submission
from bulkredditdownloader.exceptions import NotADownloadableLinkError, ResourceNotFound, SiteDownloaderError
from bulkredditdownloader.exceptions import NotADownloadableLinkError, SiteDownloaderError
from bulkredditdownloader.resource import Resource
from bulkredditdownloader.site_authenticator import SiteAuthenticator
from bulkredditdownloader.site_downloaders.base_downloader import BaseDownloader
@ -42,10 +41,7 @@ class Imgur(BaseDownloader):
link = link.replace('i.imgur', 'imgur')
link = link.rstrip('.gifv')
res = requests.get(link, cookies={'over18': '1', 'postpagebeta': '0'})
if res.status_code != 200:
raise ResourceNotFound(f'Server responded with {res.status_code} to {link}')
res = Imgur.get_link(link, cookies={'over18': '1', 'postpagebeta': '0'})
soup = bs4.BeautifulSoup(res.text, 'html.parser')
scripts = soup.find_all('script', attrs={'type': 'text/javascript'})

View file

@ -4,7 +4,6 @@ import json
import re
from typing import Optional
import requests
from bs4 import BeautifulSoup
from praw.models import Submission
@ -23,20 +22,17 @@ class Redgifs(GifDeliveryNetwork):
@staticmethod
def _get_link(url: str) -> str:
if re.match(r'https://.*\.(mp4|webm|gif)(\?.*)?$', url):
return url
redgif_id = re.match(r'.*/(.*?)/?$', url).group(1)
url = 'https://redgifs.com/watch/' + redgif_id
headers = {'User-Agent':
'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko)'
' Chrome/67.0.3396.87 Safari/537.36 OPR/54.0.2952.64'
}
headers = {
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko)'
' Chrome/67.0.3396.87 Safari/537.36 OPR/54.0.2952.64',
}
page_source = requests.get(url, headers=headers).text
page = Redgifs.get_link(url, headers=headers)
soup = BeautifulSoup(page_source, 'html.parser')
soup = BeautifulSoup(page.text, 'html.parser')
content = soup.find('script', attrs={'data-react-helmet': 'true', 'type': 'application/ld+json'})
if content is None: