1
0
Fork 0
mirror of synced 2024-06-27 18:40:34 +12:00

Merge pull request #477 from Serene-Arc/bug_fix_472

This commit is contained in:
Serene 2021-06-30 12:03:46 +10:00 committed by GitHub
commit fdda8f95e6
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
5 changed files with 64 additions and 49 deletions

View file

@ -5,6 +5,7 @@ import re
from typing import Optional from typing import Optional
import bs4 import bs4
import requests
from praw.models import Submission from praw.models import Submission
from bdfr.exceptions import SiteDownloaderError from bdfr.exceptions import SiteDownloaderError
@ -20,21 +21,21 @@ class Gallery(BaseDownloader):
super().__init__(post) super().__init__(post)
def find_resources(self, authenticator: Optional[SiteAuthenticator] = None) -> list[Resource]: def find_resources(self, authenticator: Optional[SiteAuthenticator] = None) -> list[Resource]:
image_urls = self._get_links(self.post.url) image_urls = self._get_links(self.post.gallery_data['items'])
if not image_urls: if not image_urls:
raise SiteDownloaderError('No images found in Reddit gallery') raise SiteDownloaderError('No images found in Reddit gallery')
return [Resource(self.post, url) for url in image_urls] return [Resource(self.post, url) for url in image_urls]
@staticmethod @ staticmethod
def _get_links(url: str) -> list[str]: def _get_links(id_dict: list[dict]) -> list[str]:
resource_headers = { out = []
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko)' for item in id_dict:
' Chrome/67.0.3396.87 Safari/537.36 OPR/54.0.2952.64', image_id = item['media_id']
'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8', possible_extensions = ('.jpg', '.png', '.gif', '.gifv', '.jpeg')
} for extension in possible_extensions:
page = Gallery.retrieve_url(url, headers=resource_headers) test_url = f'https://i.redd.it/{image_id}{extension}'
soup = bs4.BeautifulSoup(page.text, 'html.parser') response = requests.head(test_url)
if response.status_code == 200:
links = soup.findAll('a', attrs={'target': '_blank', 'href': re.compile(r'https://preview\.redd\.it.*')}) out.append(test_url)
links = [link.get('href') for link in links] break
return links return out

View file

@ -37,9 +37,10 @@ class Imgur(BaseDownloader):
@staticmethod @staticmethod
def _get_data(link: str) -> dict: def _get_data(link: str) -> dict:
if re.match(r'.*\.gifv$', link): link = link.rstrip('?')
if re.match(r'(?i).*\.gifv$', link):
link = link.replace('i.imgur', 'imgur') link = link.replace('i.imgur', 'imgur')
link = re.sub('\\.gifv$', '', link) link = re.sub('(?i)\\.gifv$', '', link)
res = Imgur.retrieve_url(link, cookies={'over18': '1', 'postpagebeta': '0'}) res = Imgur.retrieve_url(link, cookies={'over18': '1', 'postpagebeta': '0'})

View file

@ -8,30 +8,32 @@ from bdfr.site_downloaders.gallery import Gallery
@pytest.mark.online @pytest.mark.online
@pytest.mark.parametrize(('test_url', 'expected'), ( @pytest.mark.parametrize(('test_ids', 'expected'), (
('https://www.reddit.com/gallery/m6lvrh', { ([
'https://preview.redd.it/18nzv9ch0hn61.jpg?width=4160&' {'media_id': '18nzv9ch0hn61'},
'format=pjpg&auto=webp&s=470a825b9c364e0eace0036882dcff926f821de8', {'media_id': 'jqkizcch0hn61'},
'https://preview.redd.it/jqkizcch0hn61.jpg?width=4160&' {'media_id': 'k0fnqzbh0hn61'},
'format=pjpg&auto=webp&s=ae4f552a18066bb6727676b14f2451c5feecf805', {'media_id': 'm3gamzbh0hn61'},
'https://preview.redd.it/k0fnqzbh0hn61.jpg?width=4160&' ], {
'format=pjpg&auto=webp&s=c6a10fececdc33983487c16ad02219fd3fc6cd76', 'https://i.redd.it/18nzv9ch0hn61.jpg',
'https://preview.redd.it/m3gamzbh0hn61.jpg?width=4160&' 'https://i.redd.it/jqkizcch0hn61.jpg',
'format=pjpg&auto=webp&s=0dd90f324711851953e24873290b7f29ec73c444' 'https://i.redd.it/k0fnqzbh0hn61.jpg',
'https://i.redd.it/m3gamzbh0hn61.jpg'
}), }),
('https://www.reddit.com/gallery/ljyy27', { ([
'https://preview.redd.it/04vxj25uqih61.png?width=92&' {'media_id': '04vxj25uqih61'},
'format=png&auto=webp&s=6513f3a5c5128ee7680d402cab5ea4fb2bbeead4', {'media_id': '0fnx83kpqih61'},
'https://preview.redd.it/0fnx83kpqih61.png?width=241&' {'media_id': '7zkmr1wqqih61'},
'format=png&auto=webp&s=655e9deb6f499c9ba1476eaff56787a697e6255a', {'media_id': 'u37k5gxrqih61'},
'https://preview.redd.it/7zkmr1wqqih61.png?width=237&' ], {
'format=png&auto=webp&s=19de214e634cbcad9959f19570c616e29be0c0b0', 'https://i.redd.it/04vxj25uqih61.png',
'https://preview.redd.it/u37k5gxrqih61.png?width=443&' 'https://i.redd.it/0fnx83kpqih61.png',
'format=png&auto=webp&s=e74dae31841fe4a2545ffd794d3b25b9ff0eb862' 'https://i.redd.it/7zkmr1wqqih61.png',
'https://i.redd.it/u37k5gxrqih61.png'
}), }),
)) ))
def test_gallery_get_links(test_url: str, expected: set[str]): def test_gallery_get_links(test_ids: list[dict], expected: set[str]):
results = Gallery._get_links(test_url) results = Gallery._get_links(test_ids)
assert set(results) == expected assert set(results) == expected
@ -39,16 +41,20 @@ def test_gallery_get_links(test_url: str, expected: set[str]):
@pytest.mark.reddit @pytest.mark.reddit
@pytest.mark.parametrize(('test_submission_id', 'expected_hashes'), ( @pytest.mark.parametrize(('test_submission_id', 'expected_hashes'), (
('m6lvrh', { ('m6lvrh', {
'6c8a892ae8066cbe119218bcaac731e1', '5c42b8341dd56eebef792e86f3981c6a',
'93ce177f8cb7994906795f4615114d13', '8f38d76da46f4057bf2773a778e725ca',
'9a293adf19354f14582608cf22124574', 'f5776f8f90491c8b770b8e0a6bfa49b3',
'b73e2c3daee02f99404644ea02f1ae65' 'fa1a43c94da30026ad19a9813a0ed2c2',
}), }),
('ljyy27', { ('ljyy27', {
'1bc38bed88f9c4770e22a37122d5c941', '359c203ec81d0bc00e675f1023673238',
'2539a92b78f3968a069df2dffe2279f9', '79262fd46bce5bfa550d878a3b898be4',
'37dea50281c219b905e46edeefc1a18d', '808c35267f44acb523ce03bfa5687404',
'ec4924cf40549728dcf53dd40bc7a73c' 'ec8b65bdb7f1279c4b3af0ea2bbb30c3',
}),
('nxyahw', {
'b89a3f41feb73ec1136ec4ffa7353eb1',
'cabb76fd6fd11ae6e115a2039eb09f04',
}), }),
)) ))
def test_gallery_download(test_submission_id: str, expected_hashes: set[str], reddit_instance: praw.Reddit): def test_gallery_download(test_submission_id: str, expected_hashes: set[str], reddit_instance: praw.Reddit):

View file

@ -132,10 +132,16 @@ def test_imgur_extension_validation_bad(test_extension: str):
), ),
( (
'https://i.imgur.com/lFJai6i.gifv', 'https://i.imgur.com/lFJai6i.gifv',
( ('01a6e79a30bec0e644e5da12365d5071',),
'01a6e79a30bec0e644e5da12365d5071', ),
), (
) 'https://i.imgur.com/ywSyILa.gifv?',
('56d4afc32d2966017c38d98568709b45',),
),
(
'https://imgur.com/ubYwpbk.GIFV',
('d4a774aac1667783f9ed3a1bd02fac0c',),
),
)) ))
def test_find_resources(test_url: str, expected_hashes: list[str]): def test_find_resources(test_url: str, expected_hashes: list[str]):
mock_download = Mock() mock_download = Mock()

View file

@ -31,6 +31,7 @@ def test_get_link(test_url: str, expected: str):
('https://redgifs.com/watch/springgreendecisivetaruca', '8dac487ac49a1f18cc1b4dabe23f0869'), ('https://redgifs.com/watch/springgreendecisivetaruca', '8dac487ac49a1f18cc1b4dabe23f0869'),
('https://www.gifdeliverynetwork.com/maturenexthippopotamus', '9bec0a9e4163a43781368ed5d70471df'), ('https://www.gifdeliverynetwork.com/maturenexthippopotamus', '9bec0a9e4163a43781368ed5d70471df'),
('https://www.gifdeliverynetwork.com/regalshoddyhorsechestnutleafminer', '8afb4e2c090a87140230f2352bf8beba'), ('https://www.gifdeliverynetwork.com/regalshoddyhorsechestnutleafminer', '8afb4e2c090a87140230f2352bf8beba'),
('https://redgifs.com/watch/leafysaltydungbeetle', '076792c660b9c024c0471ef4759af8bd'),
)) ))
def test_download_resource(test_url: str, expected_hash: str): def test_download_resource(test_url: str, expected_hash: str):
mock_submission = Mock() mock_submission = Mock()