diff --git a/bdfr/site_downloaders/download_factory.py b/bdfr/site_downloaders/download_factory.py index e618b68..0ebbf2c 100644 --- a/bdfr/site_downloaders/download_factory.py +++ b/bdfr/site_downloaders/download_factory.py @@ -27,6 +27,8 @@ class DownloadFactory: sanitised_url = DownloadFactory.sanitise_url(url) if re.match(r'(i\.|m\.)?imgur', sanitised_url): return Imgur + elif re.match(r'(i\.)?(redgifs|gifdeliverynetwork)', sanitised_url): + return Redgifs elif re.match(r'.*/.*\.\w{3,4}(\?[\w;&=]*)?$', sanitised_url) and \ not DownloadFactory.is_web_resource(sanitised_url): return Direct @@ -40,6 +42,8 @@ class DownloadFactory: return Gfycat elif re.match(r'(redgifs|gifdeliverynetwork)', sanitised_url): return Redgifs + elif re.match(r'(m\.)?imgur.*', sanitised_url): + return Imgur elif re.match(r'reddit\.com/r/', sanitised_url): return SelfPost elif re.match(r'(m\.)?youtu\.?be', sanitised_url): diff --git a/bdfr/site_downloaders/redgifs.py b/bdfr/site_downloaders/redgifs.py index 26b9dfc..2134aa3 100644 --- a/bdfr/site_downloaders/redgifs.py +++ b/bdfr/site_downloaders/redgifs.py @@ -24,16 +24,11 @@ class Redgifs(BaseDownloader): @staticmethod def _get_link(url: str) -> set[str]: try: - redgif_id = re.match(r'.*/(.*?)/?$', url).group(1) + redgif_id = re.match(r'.*/(.*?)(\..{0,})?$', url).group(1) except AttributeError: raise SiteDownloaderError(f'Could not extract Redgifs ID from {url}') - headers = { - 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) ' - 'Chrome/90.0.4430.93 Safari/537.36', - } - - content = Redgifs.retrieve_url(f'https://api.redgifs.com/v2/gifs/{redgif_id}', headers=headers) + content = Redgifs.retrieve_url(f'https://api.redgifs.com/v2/gifs/{redgif_id}') if content is None: raise SiteDownloaderError('Could not read the page source') @@ -50,9 +45,7 @@ class Redgifs(BaseDownloader): elif response_json['gif']['type'] == 2: # type 2 is an image if response_json['gif']['gallery']: content = Redgifs.retrieve_url( - f'https://api.redgifs.com/v2/gallery/{response_json["gif"]["gallery"]}', - headers=headers, - ) + f'https://api.redgifs.com/v2/gallery/{response_json["gif"]["gallery"]}') response_json = json.loads(content.text) out = {p['urls']['hd'] for p in response_json['gifs']} else: @@ -62,14 +55,7 @@ class Redgifs(BaseDownloader): except (KeyError, AttributeError): raise SiteDownloaderError('Failed to find JSON data in page') - # returned domain seems to be being phased out + # Update subdomain if old one is returned out = {re.sub('thumbs2', 'thumbs3', link) for link in out} - out = {Redgifs._clean_thumbs4_link(link) for link in out} - return out - - @staticmethod - def _clean_thumbs4_link(url: str) -> str: - split_url = urllib.parse.urlsplit(url) - out = split_url.scheme + '://' + split_url.netloc + split_url.path - out = re.sub('thumbs4', 'thumbs3', out) + out = {re.sub('thumbs3', 'thumbs4', link) for link in out} return out diff --git a/dev_requirements.txt b/dev_requirements.txt new file mode 100644 index 0000000..e079f8a --- /dev/null +++ b/dev_requirements.txt @@ -0,0 +1 @@ +pytest diff --git a/tests/integration_tests/test_clone_integration.py b/tests/integration_tests/test_clone_integration.py index 22c1988..29ec416 100644 --- a/tests/integration_tests/test_clone_integration.py +++ b/tests/integration_tests/test_clone_integration.py @@ -17,11 +17,12 @@ def copy_test_config(run_path: Path): def create_basic_args_for_cloner_runner(test_args: list[str], tmp_path: Path): + copy_test_config(tmp_path) out = [ 'clone', str(tmp_path), '-v', - '--config', 'test_config.cfg', + '--config', str(Path(tmp_path, 'test_config.cfg')), '--log', str(Path(tmp_path, 'test_log.txt')), ] + test_args return out @@ -33,6 +34,8 @@ def create_basic_args_for_cloner_runner(test_args: list[str], tmp_path: Path): @pytest.mark.parametrize('test_args', ( ['-l', 'm2601g'], ['-s', 'TrollXChromosomes/', '-L', 1], + ['-l', 'tr79b'], + ['-l', 'tr6ky'], )) def test_cli_scrape_general(test_args: list[str], tmp_path: Path): runner = CliRunner() diff --git a/tests/site_downloaders/test_redgifs.py b/tests/site_downloaders/test_redgifs.py index b7ae3b3..b73ee95 100644 --- a/tests/site_downloaders/test_redgifs.py +++ b/tests/site_downloaders/test_redgifs.py @@ -2,6 +2,7 @@ # coding=utf-8 from unittest.mock import Mock +import re import pytest @@ -12,24 +13,26 @@ from bdfr.site_downloaders.redgifs import Redgifs @pytest.mark.online @pytest.mark.parametrize(('test_url', 'expected'), ( ('https://redgifs.com/watch/frighteningvictorioussalamander', - {'https://thumbs3.redgifs.com/FrighteningVictoriousSalamander.mp4'}), + {'FrighteningVictoriousSalamander.mp4'}), ('https://redgifs.com/watch/springgreendecisivetaruca', - {'https://thumbs3.redgifs.com/SpringgreenDecisiveTaruca.mp4'}), + {'SpringgreenDecisiveTaruca.mp4'}), ('https://www.redgifs.com/watch/palegoldenrodrawhalibut', - {'https://thumbs3.redgifs.com/PalegoldenrodRawHalibut.mp4'}), + {'PalegoldenrodRawHalibut.mp4'}), ('https://redgifs.com/watch/hollowintentsnowyowl', - {'https://thumbs3.redgifs.com/HollowIntentSnowyowl-large.jpg'}), + {'HollowIntentSnowyowl-large.jpg'}), ('https://www.redgifs.com/watch/lustrousstickywaxwing', - {'https://thumbs3.redgifs.com/EntireEnchantingHypsilophodon-large.jpg', - 'https://thumbs3.redgifs.com/FancyMagnificentAdamsstaghornedbeetle-large.jpg', - 'https://thumbs3.redgifs.com/LustrousStickyWaxwing-large.jpg', - 'https://thumbs3.redgifs.com/ParchedWindyArmyworm-large.jpg', - 'https://thumbs3.redgifs.com/ThunderousColorlessErmine-large.jpg', - 'https://thumbs3.redgifs.com/UnripeUnkemptWoodpecker-large.jpg'}), + {'EntireEnchantingHypsilophodon-large.jpg', + 'FancyMagnificentAdamsstaghornedbeetle-large.jpg', + 'LustrousStickyWaxwing-large.jpg', + 'ParchedWindyArmyworm-large.jpg', + 'ThunderousColorlessErmine-large.jpg', + 'UnripeUnkemptWoodpecker-large.jpg'}), )) def test_get_link(test_url: str, expected: set[str]): result = Redgifs._get_link(test_url) - assert result == expected + result = list(result) + patterns = [r'https://thumbs\d\.redgifs\.com/' + e + r'.*' for e in expected] + assert all([re.match(p, r) for p in patterns] for r in result) @pytest.mark.online diff --git a/tests/site_downloaders/test_vidble.py b/tests/site_downloaders/test_vidble.py index 50ca808..4d71022 100644 --- a/tests/site_downloaders/test_vidble.py +++ b/tests/site_downloaders/test_vidble.py @@ -30,9 +30,6 @@ def test_change_med_url(test_url: str, expected: str): 'https://www.vidble.com/VWuNsnLJMD.jpg', 'https://www.vidble.com/sMmM8O650W.jpg', }), - ('https://www.vidble.com/watch?v=joC6b7cgs2Tnucx7dhDoyqKPbr7TQUA5', { - 'https://www.vidble.com/joC6b7cgs2Tnucx7dhDoyqKPbr7TQUA5.mp4', - }), ('https://www.vidble.com/pHuwWkOcEb', { 'https://www.vidble.com/pHuwWkOcEb.jpg', }), @@ -42,6 +39,7 @@ def test_get_links(test_url: str, expected: set[str]): assert results == expected +@pytest.mark.online @pytest.mark.parametrize(('test_url', 'expected_hashes'), ( ('https://www.vidble.com/show/UxsvAssYe5', { '0ef2f8e0e0b45936d2fb3e6fbdf67e28', diff --git a/tests/site_downloaders/test_youtube.py b/tests/site_downloaders/test_youtube.py index ce1abb8..14c6648 100644 --- a/tests/site_downloaders/test_youtube.py +++ b/tests/site_downloaders/test_youtube.py @@ -15,7 +15,6 @@ from bdfr.site_downloaders.youtube import Youtube @pytest.mark.parametrize(('test_url', 'expected_hash'), ( ('https://www.youtube.com/watch?v=uSm2VDgRIUs', '2d60b54582df5b95ec72bb00b580d2ff'), ('https://www.youtube.com/watch?v=GcI7nxQj7HA', '5db0fc92a0a7fb9ac91e63505eea9cf0'), - ('https://youtu.be/TMqPOlp4tNo', 'ceb4c2cb1a9bf79617623b2aa57e18fd'), # Age restricted )) def test_find_resources_good(test_url: str, expected_hash: str): test_submission = MagicMock()