1
0
Fork 0
mirror of synced 2024-06-19 02:34:45 +12:00
bulk-downloader-for-reddit/bdfr/site_downloaders/gallery.py

50 lines
1.8 KiB
Python
Raw Normal View History

#!/usr/bin/env python3
# -*- coding: utf-8 -*-
import logging
2021-02-25 23:40:08 +13:00
from typing import Optional
import requests
2021-02-11 12:10:40 +13:00
from praw.models import Submission
2021-04-12 19:58:32 +12:00
from bdfr.exceptions import SiteDownloaderError
from bdfr.resource import Resource
from bdfr.site_authenticator import SiteAuthenticator
from bdfr.site_downloaders.base_downloader import BaseDownloader
logger = logging.getLogger(__name__)
2021-02-07 14:33:19 +13:00
class Gallery(BaseDownloader):
2021-02-15 18:12:27 +13:00
def __init__(self, post: Submission):
super().__init__(post)
2021-02-26 21:57:05 +13:00
def find_resources(self, authenticator: Optional[SiteAuthenticator] = None) -> list[Resource]:
try:
2022-12-03 18:11:17 +13:00
image_urls = self._get_links(self.post.gallery_data["items"])
2021-07-19 20:44:54 +12:00
except (AttributeError, TypeError):
try:
2022-12-03 18:11:17 +13:00
image_urls = self._get_links(self.post.crosspost_parent_list[0]["gallery_data"]["items"])
2021-12-20 23:43:09 +13:00
except (AttributeError, IndexError, TypeError, KeyError):
2022-12-03 18:11:17 +13:00
logger.error(f"Could not find gallery data in submission {self.post.id}")
logger.exception("Gallery image find failure")
raise SiteDownloaderError("No images found in Reddit gallery")
2021-03-17 19:58:29 +13:00
if not image_urls:
2022-12-03 18:11:17 +13:00
raise SiteDownloaderError("No images found in Reddit gallery")
return [Resource(self.post, url, Resource.retry_download(url)) for url in image_urls]
2022-12-03 18:11:17 +13:00
@staticmethod
def _get_links(id_dict: list[dict]) -> list[str]:
out = []
for item in id_dict:
2022-12-03 18:11:17 +13:00
image_id = item["media_id"]
possible_extensions = (".jpg", ".png", ".gif", ".gifv", ".jpeg")
for extension in possible_extensions:
2022-12-03 18:11:17 +13:00
test_url = f"https://i.redd.it/{image_id}{extension}"
response = requests.head(test_url)
if response.status_code == 200:
out.append(test_url)
break
return out