Merge pull request #477 from Serene-Arc/bug_fix_472
This commit is contained in:
commit
fdda8f95e6
|
@ -5,6 +5,7 @@ import re
|
||||||
from typing import Optional
|
from typing import Optional
|
||||||
|
|
||||||
import bs4
|
import bs4
|
||||||
|
import requests
|
||||||
from praw.models import Submission
|
from praw.models import Submission
|
||||||
|
|
||||||
from bdfr.exceptions import SiteDownloaderError
|
from bdfr.exceptions import SiteDownloaderError
|
||||||
|
@ -20,21 +21,21 @@ class Gallery(BaseDownloader):
|
||||||
super().__init__(post)
|
super().__init__(post)
|
||||||
|
|
||||||
def find_resources(self, authenticator: Optional[SiteAuthenticator] = None) -> list[Resource]:
|
def find_resources(self, authenticator: Optional[SiteAuthenticator] = None) -> list[Resource]:
|
||||||
image_urls = self._get_links(self.post.url)
|
image_urls = self._get_links(self.post.gallery_data['items'])
|
||||||
if not image_urls:
|
if not image_urls:
|
||||||
raise SiteDownloaderError('No images found in Reddit gallery')
|
raise SiteDownloaderError('No images found in Reddit gallery')
|
||||||
return [Resource(self.post, url) for url in image_urls]
|
return [Resource(self.post, url) for url in image_urls]
|
||||||
|
|
||||||
@staticmethod
|
@ staticmethod
|
||||||
def _get_links(url: str) -> list[str]:
|
def _get_links(id_dict: list[dict]) -> list[str]:
|
||||||
resource_headers = {
|
out = []
|
||||||
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko)'
|
for item in id_dict:
|
||||||
' Chrome/67.0.3396.87 Safari/537.36 OPR/54.0.2952.64',
|
image_id = item['media_id']
|
||||||
'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8',
|
possible_extensions = ('.jpg', '.png', '.gif', '.gifv', '.jpeg')
|
||||||
}
|
for extension in possible_extensions:
|
||||||
page = Gallery.retrieve_url(url, headers=resource_headers)
|
test_url = f'https://i.redd.it/{image_id}{extension}'
|
||||||
soup = bs4.BeautifulSoup(page.text, 'html.parser')
|
response = requests.head(test_url)
|
||||||
|
if response.status_code == 200:
|
||||||
links = soup.findAll('a', attrs={'target': '_blank', 'href': re.compile(r'https://preview\.redd\.it.*')})
|
out.append(test_url)
|
||||||
links = [link.get('href') for link in links]
|
break
|
||||||
return links
|
return out
|
||||||
|
|
|
@ -37,9 +37,10 @@ class Imgur(BaseDownloader):
|
||||||
|
|
||||||
@staticmethod
|
@staticmethod
|
||||||
def _get_data(link: str) -> dict:
|
def _get_data(link: str) -> dict:
|
||||||
if re.match(r'.*\.gifv$', link):
|
link = link.rstrip('?')
|
||||||
|
if re.match(r'(?i).*\.gifv$', link):
|
||||||
link = link.replace('i.imgur', 'imgur')
|
link = link.replace('i.imgur', 'imgur')
|
||||||
link = re.sub('\\.gifv$', '', link)
|
link = re.sub('(?i)\\.gifv$', '', link)
|
||||||
|
|
||||||
res = Imgur.retrieve_url(link, cookies={'over18': '1', 'postpagebeta': '0'})
|
res = Imgur.retrieve_url(link, cookies={'over18': '1', 'postpagebeta': '0'})
|
||||||
|
|
||||||
|
|
|
@ -8,30 +8,32 @@ from bdfr.site_downloaders.gallery import Gallery
|
||||||
|
|
||||||
|
|
||||||
@pytest.mark.online
|
@pytest.mark.online
|
||||||
@pytest.mark.parametrize(('test_url', 'expected'), (
|
@pytest.mark.parametrize(('test_ids', 'expected'), (
|
||||||
('https://www.reddit.com/gallery/m6lvrh', {
|
([
|
||||||
'https://preview.redd.it/18nzv9ch0hn61.jpg?width=4160&'
|
{'media_id': '18nzv9ch0hn61'},
|
||||||
'format=pjpg&auto=webp&s=470a825b9c364e0eace0036882dcff926f821de8',
|
{'media_id': 'jqkizcch0hn61'},
|
||||||
'https://preview.redd.it/jqkizcch0hn61.jpg?width=4160&'
|
{'media_id': 'k0fnqzbh0hn61'},
|
||||||
'format=pjpg&auto=webp&s=ae4f552a18066bb6727676b14f2451c5feecf805',
|
{'media_id': 'm3gamzbh0hn61'},
|
||||||
'https://preview.redd.it/k0fnqzbh0hn61.jpg?width=4160&'
|
], {
|
||||||
'format=pjpg&auto=webp&s=c6a10fececdc33983487c16ad02219fd3fc6cd76',
|
'https://i.redd.it/18nzv9ch0hn61.jpg',
|
||||||
'https://preview.redd.it/m3gamzbh0hn61.jpg?width=4160&'
|
'https://i.redd.it/jqkizcch0hn61.jpg',
|
||||||
'format=pjpg&auto=webp&s=0dd90f324711851953e24873290b7f29ec73c444'
|
'https://i.redd.it/k0fnqzbh0hn61.jpg',
|
||||||
|
'https://i.redd.it/m3gamzbh0hn61.jpg'
|
||||||
}),
|
}),
|
||||||
('https://www.reddit.com/gallery/ljyy27', {
|
([
|
||||||
'https://preview.redd.it/04vxj25uqih61.png?width=92&'
|
{'media_id': '04vxj25uqih61'},
|
||||||
'format=png&auto=webp&s=6513f3a5c5128ee7680d402cab5ea4fb2bbeead4',
|
{'media_id': '0fnx83kpqih61'},
|
||||||
'https://preview.redd.it/0fnx83kpqih61.png?width=241&'
|
{'media_id': '7zkmr1wqqih61'},
|
||||||
'format=png&auto=webp&s=655e9deb6f499c9ba1476eaff56787a697e6255a',
|
{'media_id': 'u37k5gxrqih61'},
|
||||||
'https://preview.redd.it/7zkmr1wqqih61.png?width=237&'
|
], {
|
||||||
'format=png&auto=webp&s=19de214e634cbcad9959f19570c616e29be0c0b0',
|
'https://i.redd.it/04vxj25uqih61.png',
|
||||||
'https://preview.redd.it/u37k5gxrqih61.png?width=443&'
|
'https://i.redd.it/0fnx83kpqih61.png',
|
||||||
'format=png&auto=webp&s=e74dae31841fe4a2545ffd794d3b25b9ff0eb862'
|
'https://i.redd.it/7zkmr1wqqih61.png',
|
||||||
|
'https://i.redd.it/u37k5gxrqih61.png'
|
||||||
}),
|
}),
|
||||||
))
|
))
|
||||||
def test_gallery_get_links(test_url: str, expected: set[str]):
|
def test_gallery_get_links(test_ids: list[dict], expected: set[str]):
|
||||||
results = Gallery._get_links(test_url)
|
results = Gallery._get_links(test_ids)
|
||||||
assert set(results) == expected
|
assert set(results) == expected
|
||||||
|
|
||||||
|
|
||||||
|
@ -39,16 +41,20 @@ def test_gallery_get_links(test_url: str, expected: set[str]):
|
||||||
@pytest.mark.reddit
|
@pytest.mark.reddit
|
||||||
@pytest.mark.parametrize(('test_submission_id', 'expected_hashes'), (
|
@pytest.mark.parametrize(('test_submission_id', 'expected_hashes'), (
|
||||||
('m6lvrh', {
|
('m6lvrh', {
|
||||||
'6c8a892ae8066cbe119218bcaac731e1',
|
'5c42b8341dd56eebef792e86f3981c6a',
|
||||||
'93ce177f8cb7994906795f4615114d13',
|
'8f38d76da46f4057bf2773a778e725ca',
|
||||||
'9a293adf19354f14582608cf22124574',
|
'f5776f8f90491c8b770b8e0a6bfa49b3',
|
||||||
'b73e2c3daee02f99404644ea02f1ae65'
|
'fa1a43c94da30026ad19a9813a0ed2c2',
|
||||||
}),
|
}),
|
||||||
('ljyy27', {
|
('ljyy27', {
|
||||||
'1bc38bed88f9c4770e22a37122d5c941',
|
'359c203ec81d0bc00e675f1023673238',
|
||||||
'2539a92b78f3968a069df2dffe2279f9',
|
'79262fd46bce5bfa550d878a3b898be4',
|
||||||
'37dea50281c219b905e46edeefc1a18d',
|
'808c35267f44acb523ce03bfa5687404',
|
||||||
'ec4924cf40549728dcf53dd40bc7a73c'
|
'ec8b65bdb7f1279c4b3af0ea2bbb30c3',
|
||||||
|
}),
|
||||||
|
('nxyahw', {
|
||||||
|
'b89a3f41feb73ec1136ec4ffa7353eb1',
|
||||||
|
'cabb76fd6fd11ae6e115a2039eb09f04',
|
||||||
}),
|
}),
|
||||||
))
|
))
|
||||||
def test_gallery_download(test_submission_id: str, expected_hashes: set[str], reddit_instance: praw.Reddit):
|
def test_gallery_download(test_submission_id: str, expected_hashes: set[str], reddit_instance: praw.Reddit):
|
||||||
|
|
|
@ -132,10 +132,16 @@ def test_imgur_extension_validation_bad(test_extension: str):
|
||||||
),
|
),
|
||||||
(
|
(
|
||||||
'https://i.imgur.com/lFJai6i.gifv',
|
'https://i.imgur.com/lFJai6i.gifv',
|
||||||
(
|
('01a6e79a30bec0e644e5da12365d5071',),
|
||||||
'01a6e79a30bec0e644e5da12365d5071',
|
),
|
||||||
|
(
|
||||||
|
'https://i.imgur.com/ywSyILa.gifv?',
|
||||||
|
('56d4afc32d2966017c38d98568709b45',),
|
||||||
|
),
|
||||||
|
(
|
||||||
|
'https://imgur.com/ubYwpbk.GIFV',
|
||||||
|
('d4a774aac1667783f9ed3a1bd02fac0c',),
|
||||||
),
|
),
|
||||||
)
|
|
||||||
))
|
))
|
||||||
def test_find_resources(test_url: str, expected_hashes: list[str]):
|
def test_find_resources(test_url: str, expected_hashes: list[str]):
|
||||||
mock_download = Mock()
|
mock_download = Mock()
|
||||||
|
|
|
@ -31,6 +31,7 @@ def test_get_link(test_url: str, expected: str):
|
||||||
('https://redgifs.com/watch/springgreendecisivetaruca', '8dac487ac49a1f18cc1b4dabe23f0869'),
|
('https://redgifs.com/watch/springgreendecisivetaruca', '8dac487ac49a1f18cc1b4dabe23f0869'),
|
||||||
('https://www.gifdeliverynetwork.com/maturenexthippopotamus', '9bec0a9e4163a43781368ed5d70471df'),
|
('https://www.gifdeliverynetwork.com/maturenexthippopotamus', '9bec0a9e4163a43781368ed5d70471df'),
|
||||||
('https://www.gifdeliverynetwork.com/regalshoddyhorsechestnutleafminer', '8afb4e2c090a87140230f2352bf8beba'),
|
('https://www.gifdeliverynetwork.com/regalshoddyhorsechestnutleafminer', '8afb4e2c090a87140230f2352bf8beba'),
|
||||||
|
('https://redgifs.com/watch/leafysaltydungbeetle', '076792c660b9c024c0471ef4759af8bd'),
|
||||||
))
|
))
|
||||||
def test_download_resource(test_url: str, expected_hash: str):
|
def test_download_resource(test_url: str, expected_hash: str):
|
||||||
mock_submission = Mock()
|
mock_submission = Mock()
|
||||||
|
|
Loading…
Reference in a new issue