1
0
Fork 0
mirror of synced 2024-05-14 01:02:42 +12:00
bulk-downloader-for-reddit/bdfr/site_downloaders/gallery.py

49 lines
1.8 KiB
Python
Raw Normal View History

#!/usr/bin/env python3
import logging
2021-02-25 23:40:08 +13:00
from typing import Optional
import requests
2021-02-11 12:10:40 +13:00
from praw.models import Submission
2021-04-12 19:58:32 +12:00
from bdfr.exceptions import SiteDownloaderError
from bdfr.resource import Resource
from bdfr.site_authenticator import SiteAuthenticator
from bdfr.site_downloaders.base_downloader import BaseDownloader
logger = logging.getLogger(__name__)
2021-02-07 14:33:19 +13:00
class Gallery(BaseDownloader):
2021-02-15 18:12:27 +13:00
def __init__(self, post: Submission):
super().__init__(post)
2021-02-26 21:57:05 +13:00
def find_resources(self, authenticator: Optional[SiteAuthenticator] = None) -> list[Resource]:
try:
image_urls = self._get_links(self.post.gallery_data['items'])
2021-07-19 20:44:54 +12:00
except (AttributeError, TypeError):
try:
image_urls = self._get_links(self.post.crosspost_parent_list[0]['gallery_data']['items'])
2021-12-20 23:43:09 +13:00
except (AttributeError, IndexError, TypeError, KeyError):
logger.error(f'Could not find gallery data in submission {self.post.id}')
logger.exception('Gallery image find failure')
raise SiteDownloaderError('No images found in Reddit gallery')
2021-03-17 19:58:29 +13:00
if not image_urls:
raise SiteDownloaderError('No images found in Reddit gallery')
return [Resource(self.post, url, Resource.retry_download(url)) for url in image_urls]
@ staticmethod
def _get_links(id_dict: list[dict]) -> list[str]:
out = []
for item in id_dict:
image_id = item['media_id']
possible_extensions = ('.jpg', '.png', '.gif', '.gifv', '.jpeg')
for extension in possible_extensions:
test_url = f'https://i.redd.it/{image_id}{extension}'
response = requests.head(test_url)
if response.status_code == 200:
out.append(test_url)
break
return out