Merge branch 'development' into imgur

2024-04-29 01:42:31 +12:00 · 2022-09-20 17:28:48 +10:00 · 2022-09-20 17:28:48 +10:00 · 06816098dc
parent 106d7596b1 f4598c4bec
commit 06816098dc
7 changed files with 29 additions and 35 deletions
--- a/bdfr/site_downloaders/download_factory.py
+++ b/bdfr/site_downloaders/download_factory.py
@ -27,6 +27,8 @@ class DownloadFactory:
        sanitised_url = DownloadFactory.sanitise_url(url)
        if re.match(r'(i\.|m\.)?imgur', sanitised_url):
            return Imgur
+        elif re.match(r'(i\.)?(redgifs|gifdeliverynetwork)', sanitised_url):
+            return Redgifs
        elif re.match(r'.*/.*\.\w{3,4}(\?[\w;&=]*)?$', sanitised_url) and \
                not DownloadFactory.is_web_resource(sanitised_url):
            return Direct
@ -40,6 +42,8 @@ class DownloadFactory:
            return Gfycat
        elif re.match(r'(redgifs|gifdeliverynetwork)', sanitised_url):
            return Redgifs
+        elif re.match(r'(m\.)?imgur.*', sanitised_url):
+            return Imgur
        elif re.match(r'reddit\.com/r/', sanitised_url):
            return SelfPost
        elif re.match(r'(m\.)?youtu\.?be', sanitised_url):
--- a/bdfr/site_downloaders/redgifs.py
+++ b/bdfr/site_downloaders/redgifs.py
@ -24,16 +24,11 @@ class Redgifs(BaseDownloader):
    @staticmethod
    def _get_link(url: str) -> set[str]:
        try:
-            redgif_id = re.match(r'.*/(.*?)/?$', url).group(1)
+            redgif_id = re.match(r'.*/(.*?)(\..{0,})?$', url).group(1)
        except AttributeError:
            raise SiteDownloaderError(f'Could not extract Redgifs ID from {url}')

-        headers = {
-            'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) '
-                          'Chrome/90.0.4430.93 Safari/537.36',
-        }
-
-        content = Redgifs.retrieve_url(f'https://api.redgifs.com/v2/gifs/{redgif_id}', headers=headers)
+        content = Redgifs.retrieve_url(f'https://api.redgifs.com/v2/gifs/{redgif_id}')

        if content is None:
            raise SiteDownloaderError('Could not read the page source')
@ -50,9 +45,7 @@ class Redgifs(BaseDownloader):
            elif response_json['gif']['type'] == 2:  # type 2 is an image
                if response_json['gif']['gallery']:
                    content = Redgifs.retrieve_url(
-                        f'https://api.redgifs.com/v2/gallery/{response_json["gif"]["gallery"]}',
-                        headers=headers,
-                    )
+                        f'https://api.redgifs.com/v2/gallery/{response_json["gif"]["gallery"]}')
                    response_json = json.loads(content.text)
                    out = {p['urls']['hd'] for p in response_json['gifs']}
                else:
@ -62,14 +55,7 @@ class Redgifs(BaseDownloader):
        except (KeyError, AttributeError):
            raise SiteDownloaderError('Failed to find JSON data in page')

-        # returned domain seems to be being phased out
+        # Update subdomain if old one is returned
        out = {re.sub('thumbs2', 'thumbs3', link) for link in out}
-        out = {Redgifs._clean_thumbs4_link(link) for link in out}
-        return out
-
-    @staticmethod
-    def _clean_thumbs4_link(url: str) -> str:
-        split_url = urllib.parse.urlsplit(url)
-        out = split_url.scheme + '://' + split_url.netloc + split_url.path
-        out = re.sub('thumbs4', 'thumbs3', out)
+        out = {re.sub('thumbs3', 'thumbs4', link) for link in out}
        return out
--- a/dev_requirements.txt
+++ b/dev_requirements.txt
@ -0,0 +1 @@
+pytest
--- a/tests/integration_tests/test_clone_integration.py
+++ b/tests/integration_tests/test_clone_integration.py
@ -17,11 +17,12 @@ def copy_test_config(run_path: Path):


 def create_basic_args_for_cloner_runner(test_args: list[str], tmp_path: Path):
+    copy_test_config(tmp_path)
    out = [
        'clone',
        str(tmp_path),
        '-v',
-        '--config', 'test_config.cfg',
+        '--config', str(Path(tmp_path, 'test_config.cfg')),
        '--log', str(Path(tmp_path, 'test_log.txt')),
    ] + test_args
    return out
@ -33,6 +34,8 @@ def create_basic_args_for_cloner_runner(test_args: list[str], tmp_path: Path):
@pytest.mark.parametrize('test_args', (
    ['-l', 'm2601g'],
    ['-s', 'TrollXChromosomes/', '-L', 1],
+    ['-l', 'tr79b'],
+    ['-l', 'tr6ky'],
 ))
 def test_cli_scrape_general(test_args: list[str], tmp_path: Path):
    runner = CliRunner()
--- a/tests/site_downloaders/test_redgifs.py
+++ b/tests/site_downloaders/test_redgifs.py
@ -2,6 +2,7 @@
 # coding=utf-8

 from unittest.mock import Mock
+import re

 import pytest

@ -12,24 +13,26 @@ from bdfr.site_downloaders.redgifs import Redgifs
@pytest.mark.online
@pytest.mark.parametrize(('test_url', 'expected'), (
    ('https://redgifs.com/watch/frighteningvictorioussalamander',
-     {'https://thumbs3.redgifs.com/FrighteningVictoriousSalamander.mp4'}),
+     {'FrighteningVictoriousSalamander.mp4'}),
    ('https://redgifs.com/watch/springgreendecisivetaruca',
-     {'https://thumbs3.redgifs.com/SpringgreenDecisiveTaruca.mp4'}),
+     {'SpringgreenDecisiveTaruca.mp4'}),
    ('https://www.redgifs.com/watch/palegoldenrodrawhalibut',
-     {'https://thumbs3.redgifs.com/PalegoldenrodRawHalibut.mp4'}),
+     {'PalegoldenrodRawHalibut.mp4'}),
    ('https://redgifs.com/watch/hollowintentsnowyowl',
-     {'https://thumbs3.redgifs.com/HollowIntentSnowyowl-large.jpg'}),
+     {'HollowIntentSnowyowl-large.jpg'}),
    ('https://www.redgifs.com/watch/lustrousstickywaxwing',
-     {'https://thumbs3.redgifs.com/EntireEnchantingHypsilophodon-large.jpg',
-      'https://thumbs3.redgifs.com/FancyMagnificentAdamsstaghornedbeetle-large.jpg',
-      'https://thumbs3.redgifs.com/LustrousStickyWaxwing-large.jpg',
-      'https://thumbs3.redgifs.com/ParchedWindyArmyworm-large.jpg',
-      'https://thumbs3.redgifs.com/ThunderousColorlessErmine-large.jpg',
-      'https://thumbs3.redgifs.com/UnripeUnkemptWoodpecker-large.jpg'}),
+     {'EntireEnchantingHypsilophodon-large.jpg',
+      'FancyMagnificentAdamsstaghornedbeetle-large.jpg',
+      'LustrousStickyWaxwing-large.jpg',
+      'ParchedWindyArmyworm-large.jpg',
+      'ThunderousColorlessErmine-large.jpg',
+      'UnripeUnkemptWoodpecker-large.jpg'}),
 ))
 def test_get_link(test_url: str, expected: set[str]):
    result = Redgifs._get_link(test_url)
-    assert result == expected
+    result = list(result)
+    patterns = [r'https://thumbs\d\.redgifs\.com/' + e + r'.*' for e in expected]
+    assert all([re.match(p, r) for p in patterns] for r in result)


@pytest.mark.online
--- a/tests/site_downloaders/test_vidble.py
+++ b/tests/site_downloaders/test_vidble.py
@ -30,9 +30,6 @@ def test_change_med_url(test_url: str, expected: str):
        'https://www.vidble.com/VWuNsnLJMD.jpg',
        'https://www.vidble.com/sMmM8O650W.jpg',
    }),
-    ('https://www.vidble.com/watch?v=joC6b7cgs2Tnucx7dhDoyqKPbr7TQUA5', {
-        'https://www.vidble.com/joC6b7cgs2Tnucx7dhDoyqKPbr7TQUA5.mp4',
-    }),
    ('https://www.vidble.com/pHuwWkOcEb', {
        'https://www.vidble.com/pHuwWkOcEb.jpg',
    }),
@ -42,6 +39,7 @@ def test_get_links(test_url: str, expected: set[str]):
    assert results == expected


+@pytest.mark.online
@pytest.mark.parametrize(('test_url', 'expected_hashes'), (
    ('https://www.vidble.com/show/UxsvAssYe5', {
        '0ef2f8e0e0b45936d2fb3e6fbdf67e28',
--- a/tests/site_downloaders/test_youtube.py
+++ b/tests/site_downloaders/test_youtube.py
@ -15,7 +15,6 @@ from bdfr.site_downloaders.youtube import Youtube
@pytest.mark.parametrize(('test_url', 'expected_hash'), (
    ('https://www.youtube.com/watch?v=uSm2VDgRIUs', '2d60b54582df5b95ec72bb00b580d2ff'),
    ('https://www.youtube.com/watch?v=GcI7nxQj7HA', '5db0fc92a0a7fb9ac91e63505eea9cf0'),
-    ('https://youtu.be/TMqPOlp4tNo', 'ceb4c2cb1a9bf79617623b2aa57e18fd'),  # Age restricted
 ))
 def test_find_resources_good(test_url: str, expected_hash: str):
    test_submission = MagicMock()