1
0
Fork 0
mirror of synced 2024-04-29 01:42:31 +12:00
bulk-downloader-for-reddit/bdfr/site_downloaders/gallery.py
2021-12-20 20:43:09 +10:00

49 lines
1.8 KiB
Python

#!/usr/bin/env python3
import logging
from typing import Optional
import requests
from praw.models import Submission
from bdfr.exceptions import SiteDownloaderError
from bdfr.resource import Resource
from bdfr.site_authenticator import SiteAuthenticator
from bdfr.site_downloaders.base_downloader import BaseDownloader
logger = logging.getLogger(__name__)
class Gallery(BaseDownloader):
def __init__(self, post: Submission):
super().__init__(post)
def find_resources(self, authenticator: Optional[SiteAuthenticator] = None) -> list[Resource]:
try:
image_urls = self._get_links(self.post.gallery_data['items'])
except (AttributeError, TypeError):
try:
image_urls = self._get_links(self.post.crosspost_parent_list[0]['gallery_data']['items'])
except (AttributeError, IndexError, TypeError, KeyError):
logger.error(f'Could not find gallery data in submission {self.post.id}')
logger.exception('Gallery image find failure')
raise SiteDownloaderError('No images found in Reddit gallery')
if not image_urls:
raise SiteDownloaderError('No images found in Reddit gallery')
return [Resource(self.post, url, Resource.retry_download(url)) for url in image_urls]
@ staticmethod
def _get_links(id_dict: list[dict]) -> list[str]:
out = []
for item in id_dict:
image_id = item['media_id']
possible_extensions = ('.jpg', '.png', '.gif', '.gifv', '.jpeg')
for extension in possible_extensions:
test_url = f'https://i.redd.it/{image_id}{extension}'
response = requests.head(test_url)
if response.status_code == 200:
out.append(test_url)
break
return out