Refactor method to base class

2024-06-28 19:10:41 +12:00 · 2021-04-05 17:21:04 +10:00 · 2021-04-05 17:21:04 +10:00 · 2384c03170
parent 500cee4bae
commit 2384c03170
7 changed files with 28 additions and 40 deletions
--- a/bulkredditdownloader/site_downloaders/base_downloader.py
+++ b/bulkredditdownloader/site_downloaders/base_downloader.py
@ -5,10 +5,12 @@ import logging
 from abc import ABC, abstractmethod
 from typing import Optional

+import requests
 from praw.models import Submission

-from bulkredditdownloader.site_authenticator import SiteAuthenticator
+from bulkredditdownloader.exceptions import ResourceNotFound
 from bulkredditdownloader.resource import Resource
+from bulkredditdownloader.site_authenticator import SiteAuthenticator

 logger = logging.getLogger(__name__)

@ -22,3 +24,10 @@ class BaseDownloader(ABC):
    def find_resources(self, authenticator: Optional[SiteAuthenticator] = None) -> list[Resource]:
        """Return list of all un-downloaded Resources from submission"""
        raise NotImplementedError
+
+    @staticmethod
+    def get_link(url: str, cookies: dict = None, headers: dict = None) -> requests.Response:
+        res = requests.get(url, cookies=cookies, headers=headers)
+        if res.status_code != 200:
+            raise ResourceNotFound(f'Server responded with {res.status_code} to {url}')
+        return res
--- a/bulkredditdownloader/site_downloaders/erome.py
+++ b/bulkredditdownloader/site_downloaders/erome.py
@ -5,7 +5,6 @@ import re
 from typing import Optional

 import bs4
-import requests
 from praw.models import Submission

 from bulkredditdownloader.exceptions import NotADownloadableLinkError
@ -34,7 +33,7 @@ class Erome(BaseDownloader):

    @staticmethod
    def _get_links(url: str) -> set[str]:
-        page = requests.get(url)
+        page = Erome.get_link(url)
        soup = bs4.BeautifulSoup(page.text, 'html.parser')
        front_images = soup.find_all('img', attrs={'class': 'lasyload'})
        out = [im.get('data-src') for im in front_images]
--- a/bulkredditdownloader/site_downloaders/gallery.py
+++ b/bulkredditdownloader/site_downloaders/gallery.py
@ -5,7 +5,6 @@ import re
 from typing import Optional

 import bs4
-import requests
 from praw.models import Submission

 from bulkredditdownloader.exceptions import ResourceNotFound
@ -28,12 +27,12 @@ class Gallery(BaseDownloader):

    @staticmethod
    def _get_links(url: str) -> list[str]:
-        page = requests.get(url, headers={
-            "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko)"
-            " Chrome/67.0.3396.87 Safari/537.36 OPR/54.0.2952.64",
-            "Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8",
+        resource_headers = {
+            'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko)'
+                          ' Chrome/67.0.3396.87 Safari/537.36 OPR/54.0.2952.64',
+            'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8',
        }
-        )
+        page = Gallery.get_link(url, headers=resource_headers)
        soup = bs4.BeautifulSoup(page.text, 'html.parser')

        links = soup.findAll('a', attrs={'target': '_blank', 'href': re.compile(r'https://preview\.redd\.it.*')})
--- a/bulkredditdownloader/site_downloaders/gfycat.py
+++ b/bulkredditdownloader/site_downloaders/gfycat.py
@ -4,7 +4,6 @@ import json
 import re
 from typing import Optional

-import requests
 from bs4 import BeautifulSoup
 from praw.models import Submission

@ -22,19 +21,14 @@ class Gfycat(GifDeliveryNetwork):

    @staticmethod
    def _get_link(url: str) -> str:
-        if re.match(r'\.(webm|mp4|gif)$', url):
-            return url
-
        gfycat_id = re.match(r'.*/(.*?)/?$', url).group(1)
        url = 'https://gfycat.com/' + gfycat_id

-        response = requests.get(url)
-        page_source = response.text
-
+        response = Gfycat.get_link(url)
        if 'gifdeliverynetwork' in response.url:
            return GifDeliveryNetwork._get_link(url)

-        soup = BeautifulSoup(page_source, 'html.parser')
+        soup = BeautifulSoup(response.text, 'html.parser')
        content = soup.find('script', attrs={'data-react-helmet': 'true', 'type': 'application/ld+json'})

        out = json.loads(content.contents[0]).get('video').get('contentUrl')
--- a/bulkredditdownloader/site_downloaders/gif_delivery_network.py
+++ b/bulkredditdownloader/site_downloaders/gif_delivery_network.py
@ -1,9 +1,7 @@
 #!/usr/bin/env python3

-import re
 from typing import Optional

-import requests
 from bs4 import BeautifulSoup
 from praw.models import Submission

@ -23,12 +21,9 @@ class GifDeliveryNetwork(BaseDownloader):

    @staticmethod
    def _get_link(url: str) -> str:
-        if re.match(r'https://.*\.(mp4|webm|gif)(\?.*)?$', url):
-            return url
+        page = GifDeliveryNetwork.get_link(url)

-        page_source = requests.get(url).text
-
-        soup = BeautifulSoup(page_source, 'html.parser')
+        soup = BeautifulSoup(page.text, 'html.parser')
        content = soup.find('source', attrs={'id': 'mp4Source', 'type': 'video/mp4'})

        if content is None or content.get('src') is None:
--- a/bulkredditdownloader/site_downloaders/imgur.py
+++ b/bulkredditdownloader/site_downloaders/imgur.py
@ -5,10 +5,9 @@ import re
 from typing import Optional

 import bs4
-import requests
 from praw.models import Submission

-from bulkredditdownloader.exceptions import NotADownloadableLinkError, ResourceNotFound, SiteDownloaderError
+from bulkredditdownloader.exceptions import NotADownloadableLinkError, SiteDownloaderError
 from bulkredditdownloader.resource import Resource
 from bulkredditdownloader.site_authenticator import SiteAuthenticator
 from bulkredditdownloader.site_downloaders.base_downloader import BaseDownloader
@ -42,10 +41,7 @@ class Imgur(BaseDownloader):
            link = link.replace('i.imgur', 'imgur')
            link = link.rstrip('.gifv')

-        res = requests.get(link, cookies={'over18': '1', 'postpagebeta': '0'})
-
-        if res.status_code != 200:
-            raise ResourceNotFound(f'Server responded with {res.status_code} to {link}')
+        res = Imgur.get_link(link, cookies={'over18': '1', 'postpagebeta': '0'})

        soup = bs4.BeautifulSoup(res.text, 'html.parser')
        scripts = soup.find_all('script', attrs={'type': 'text/javascript'})
--- a/bulkredditdownloader/site_downloaders/redgifs.py
+++ b/bulkredditdownloader/site_downloaders/redgifs.py
@ -4,7 +4,6 @@ import json
 import re
 from typing import Optional

-import requests
 from bs4 import BeautifulSoup
 from praw.models import Submission

@ -23,20 +22,17 @@ class Redgifs(GifDeliveryNetwork):

    @staticmethod
    def _get_link(url: str) -> str:
-        if re.match(r'https://.*\.(mp4|webm|gif)(\?.*)?$', url):
-            return url
-
        redgif_id = re.match(r'.*/(.*?)/?$', url).group(1)
        url = 'https://redgifs.com/watch/' + redgif_id

-        headers = {'User-Agent':
-                   'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko)'
-                   ' Chrome/67.0.3396.87 Safari/537.36 OPR/54.0.2952.64'
-                   }
+        headers = {
+            'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko)'
+                          ' Chrome/67.0.3396.87 Safari/537.36 OPR/54.0.2952.64',
+        }

-        page_source = requests.get(url, headers=headers).text
+        page = Redgifs.get_link(url, headers=headers)

-        soup = BeautifulSoup(page_source, 'html.parser')
+        soup = BeautifulSoup(page.text, 'html.parser')
        content = soup.find('script', attrs={'data-react-helmet': 'true', 'type': 'application/ld+json'})

        if content is None: