bulk-downloader-for-reddit/bdfr/site_downloaders/redgifs.py

#!/usr/bin/env python3
# -*- coding: utf-8 -*-

import json
import re
from typing import Optional

import requests
from praw.models import Submission

from bdfr.exceptions import SiteDownloaderError
from bdfr.resource import Resource
from bdfr.site_authenticator import SiteAuthenticator
from bdfr.site_downloaders.base_downloader import BaseDownloader


class Redgifs(BaseDownloader):
    def __init__(self, post: Submission):
        super().__init__(post)

    def find_resources(self, authenticator: Optional[SiteAuthenticator] = None) -> list[Resource]:
        media_urls = self._get_link(self.post.url)
        return [Resource(self.post, m, Resource.retry_download(m), None) for m in media_urls]

    @staticmethod
    def _get_id(url: str) -> str:
        try:
            if url.endswith("/"):
                url = url.removesuffix("/")
            redgif_id = re.match(r".*/(.*?)(?:#.*|\?.*|\..{0,})?$", url).group(1).lower()
            if redgif_id.endswith("-mobile"):
                redgif_id = redgif_id.removesuffix("-mobile")
        except AttributeError:
            raise SiteDownloaderError(f"Could not extract Redgifs ID from {url}")
        return redgif_id

    @staticmethod
    def _get_link(url: str) -> set[str]:
        redgif_id = Redgifs._get_id(url)

        auth_token = json.loads(Redgifs.retrieve_url("https://api.redgifs.com/v2/auth/temporary").text)["token"]
        if not auth_token:
            raise SiteDownloaderError("Unable to retrieve Redgifs API token")

        headers = {
            "referer": "https://www.redgifs.com/",
            "origin": "https://www.redgifs.com",
            "content-type": "application/json",
            "Authorization": f"Bearer {auth_token}",
        }

        content = Redgifs.retrieve_url(f"https://api.redgifs.com/v2/gifs/{redgif_id}", headers=headers)

        if content is None:
            raise SiteDownloaderError("Could not read the page source")

        try:
            response_json = json.loads(content.text)
        except json.JSONDecodeError as e:
            raise SiteDownloaderError(f"Received data was not valid JSON: {e}")

        out = set()
        try:
            if response_json["gif"]["type"] == 1:  # type 1 is a video
                if requests.get(response_json["gif"]["urls"]["hd"], headers=headers).ok:
                    out.add(response_json["gif"]["urls"]["hd"])
                else:
                    out.add(response_json["gif"]["urls"]["sd"])
            elif response_json["gif"]["type"] == 2:  # type 2 is an image
                if response_json["gif"]["gallery"]:
                    content = Redgifs.retrieve_url(
                        f'https://api.redgifs.com/v2/gallery/{response_json["gif"]["gallery"]}'
                    )
                    response_json = json.loads(content.text)
                    out = {p["urls"]["hd"] for p in response_json["gifs"]}
                else:
                    out.add(response_json["gif"]["urls"]["hd"])
            else:
                raise KeyError
        except (KeyError, AttributeError):
            raise SiteDownloaderError("Failed to find JSON data in page")

        # Update subdomain if old one is returned
        out = {re.sub("thumbs2", "thumbs3", link) for link in out}
        out = {re.sub("thumbs3", "thumbs4", link) for link in out}
        return out
Move to inheritance system for downloaders 2021-02-07 17:46:20 +13:00			`#!/usr/bin/env python3`
Standardize shebang and coding declaration Standardizes shebang and coding declarations. Coding matches what's used by install tools such as pip(x). Removes a few init files that were not needed. 2022-12-20 12:32:37 +13:00			`# -- coding: utf-8 --`
Move to inheritance system for downloaders 2021-02-07 17:46:20 +13:00
v1.7.0 (#97) * tools file name change to utils * Seperate downloaders (#94) * Seperated the downloaders * Remove redundant code * Changed file names * refactor * Redgifs (#95) * Init commit * Init commit * GifDeliveryNetwork (#96) * Initial commit * Gfycat forwarding to GDN bug fixed 2020-05-29 06:42:11 +12:00			`import json`
Refactor redgifs 2021-03-20 01:28:41 +13:00			`import re`
Integrate new base_downloader class 2021-02-25 23:40:08 +13:00			`from typing import Optional`
Pep8 format (#184) * Format file to be PEP8 compliant * Remove unused imports * Format file to PEP8 * Format file to PEP8 * Remove unused imports * Format file to PEP8 * Format file to PEP8 * Format file to PEP8 * Format file to PEP8 * Remove unused imports * Format file to PEP8 * Remove unused imports * Format file to PEP8 * Remove unused imports * Format file to PEP8 * Format file to PEP8 * Remove unused imports * Format file to PEP8 * Remove unused imports * Format file to PEP8 * Remove unused imports * Format file to PEP8 * Remove unused imports * Format file to PEP8 * Remove unused imports * Format file to PEP8 * Remove unused imports * Format file to PEP8 * Remove unused imports * Format file to PEP8 * Remove unused imports * Format file to PEP8 * Remove unused imports * Format file to PEP8 * Format file to PEP8 * Remove unused imports * Format file to PEP8 * Remove unused imports * Condense spacing 2021-02-06 21:35:50 +13:00
Format according to the black standard 2022-12-03 18:11:17 +13:00			`import requests`
Move to different program structure 2021-02-11 12:10:40 +13:00			`from praw.models import Submission`
v1.7.0 (#97) * tools file name change to utils * Seperate downloaders (#94) * Seperated the downloaders * Remove redundant code * Changed file names * refactor * Redgifs (#95) * Init commit * Init commit * GifDeliveryNetwork (#96) * Initial commit * Gfycat forwarding to GDN bug fixed 2020-05-29 06:42:11 +12:00
Remove unused imports 2021-04-23 23:06:16 +12:00			`from bdfr.exceptions import SiteDownloaderError`
Rename module 2021-04-12 19:58:32 +12:00			`from bdfr.resource import Resource`
			`from bdfr.site_authenticator import SiteAuthenticator`
Invert inheritance direction 2021-04-28 20:50:18 +12:00			`from bdfr.site_downloaders.base_downloader import BaseDownloader`
(maint) code clean up (#187) ## bdfr - Add the bound instance as method parameter - Change methods not using its bound instance to staticmethods - Fix dangerous default argument - Refactor the comparison involving `not` - Refactor unnecessary `else` / `elif` when `if` block has a `raise` statement - Refactor unnecessary `else` / `elif` when `if` block has a `return` statement - Refactor useless `else` block in the loop - Remove implicit `object` from the base class - Remove reimported module - Remove unnecessary generator - Remove unnecessary return statement - Remove unnecessary use of comprehension - Remove unused imports - Use `is` to compare type of objects - Using not x can cause unwanted results ## Dockerfile - use a pinned Python version tag instead of latest - leverage cached requirements Signed-off-by: Vladislav Doster <mvdoster@gmail.com> Co-authored-by: Ali Parlakçı <parlakciali@gmail.com> 2021-02-25 22:32:06 +13:00
v1.7.0 (#97) * tools file name change to utils * Seperate downloaders (#94) * Seperated the downloaders * Remove redundant code * Changed file names * refactor * Redgifs (#95) * Init commit * Init commit * GifDeliveryNetwork (#96) * Initial commit * Gfycat forwarding to GDN bug fixed 2020-05-29 06:42:11 +12:00
Invert inheritance direction 2021-04-28 20:50:18 +12:00			`class Redgifs(BaseDownloader):`
Remove unused parameter 2021-02-15 18:12:27 +13:00			`def __init__(self, post: Submission):`
			`super().__init__(post)`
(maint) code clean up (#187) ## bdfr - Add the bound instance as method parameter - Change methods not using its bound instance to staticmethods - Fix dangerous default argument - Refactor the comparison involving `not` - Refactor unnecessary `else` / `elif` when `if` block has a `raise` statement - Refactor unnecessary `else` / `elif` when `if` block has a `return` statement - Refactor useless `else` block in the loop - Remove implicit `object` from the base class - Remove reimported module - Remove unnecessary generator - Remove unnecessary return statement - Remove unnecessary use of comprehension - Remove unused imports - Use `is` to compare type of objects - Using not x can cause unwanted results ## Dockerfile - use a pinned Python version tag instead of latest - leverage cached requirements Signed-off-by: Vladislav Doster <mvdoster@gmail.com> Co-authored-by: Ali Parlakçı <parlakciali@gmail.com> 2021-02-25 22:32:06 +13:00
Rename file and class 2021-02-26 21:57:05 +13:00			`def find_resources(self, authenticator: Optional[SiteAuthenticator] = None) -> list[Resource]:`
Add support for Redgifs images and galleries 2022-02-18 15:04:37 +13:00			`media_urls = self._get_link(self.post.url)`
Switch redgifs to dynamic file extensions 2022-02-20 18:48:02 +13:00			`return [Resource(self.post, m, Resource.retry_download(m), None) for m in media_urls]`
v1.7.0 (#97) * tools file name change to utils * Seperate downloaders (#94) * Seperated the downloaders * Remove redundant code * Changed file names * refactor * Redgifs (#95) * Init commit * Init commit * GifDeliveryNetwork (#96) * Initial commit * Gfycat forwarding to GDN bug fixed 2020-05-29 06:42:11 +12:00
(maint) code clean up (#187) ## bdfr - Add the bound instance as method parameter - Change methods not using its bound instance to staticmethods - Fix dangerous default argument - Refactor the comparison involving `not` - Refactor unnecessary `else` / `elif` when `if` block has a `raise` statement - Refactor unnecessary `else` / `elif` when `if` block has a `return` statement - Refactor useless `else` block in the loop - Remove implicit `object` from the base class - Remove reimported module - Remove unnecessary generator - Remove unnecessary return statement - Remove unnecessary use of comprehension - Remove unused imports - Use `is` to compare type of objects - Using not x can cause unwanted results ## Dockerfile - use a pinned Python version tag instead of latest - leverage cached requirements Signed-off-by: Vladislav Doster <mvdoster@gmail.com> Co-authored-by: Ali Parlakçı <parlakciali@gmail.com> 2021-02-25 22:32:06 +13:00			`@staticmethod`
Redgifs updates Update Redgifs regex for further edge case. Add test for checking ID. 2022-12-25 14:52:45 +13:00			`def _get_id(url: str) -> str:`
Add defensive programming to site downloaders 2021-04-06 13:04:08 +12:00			`try:`
Redgifs fix Handle redgifs link with trailing / causing id to return empty string. 2022-12-20 05:02:06 +13:00			`if url.endswith("/"):`
			`url = url.removesuffix("/")`
More Redgifs coverage 2023-01-25 22:43:06 +13:00			`redgif_id = re.match(r"./(.?)(?:#.\|\?.\|\..{0,})?$", url).group(1).lower()`
Redgif updates Coverage for direct links. The direct link won't work because it will have the wrong auth anyway but this will at least end up with the right API call. 2022-12-27 14:25:20 +13:00			`if redgif_id.endswith("-mobile"):`
			`redgif_id = redgif_id.removesuffix("-mobile")`
Add defensive programming to site downloaders 2021-04-06 13:04:08 +12:00			`except AttributeError:`
Format according to the black standard 2022-12-03 18:11:17 +13:00			`raise SiteDownloaderError(f"Could not extract Redgifs ID from {url}")`
Redgifs updates Update Redgifs regex for further edge case. Add test for checking ID. 2022-12-25 14:52:45 +13:00			`return redgif_id`

			`@staticmethod`
			`def _get_link(url: str) -> set[str]:`
			`redgif_id = Redgifs._get_id(url)`
Add defensive programming to site downloaders 2021-04-06 13:04:08 +12:00
Format according to the black standard 2022-12-03 18:11:17 +13:00			`auth_token = json.loads(Redgifs.retrieve_url("https://api.redgifs.com/v2/auth/temporary").text)["token"]`
Redgifs improvements Add check to verify token was received. Update headers sent to content API. Add availability check for videos to resolve last part of #472 where only SD version is available. 2022-11-28 12:07:43 +13:00			`if not auth_token:`
Format according to the black standard 2022-12-03 18:11:17 +13:00			`raise SiteDownloaderError("Unable to retrieve Redgifs API token")`
Redgifs improvements Add check to verify token was received. Update headers sent to content API. Add availability check for videos to resolve last part of #472 where only SD version is available. 2022-11-28 12:07:43 +13:00
Switch Redgifs to temporary tokens Initial switch to temporary tokens for Redgifs. Gets a new auth token for every API request. 2022-10-25 05:45:26 +13:00			`headers = {`
Format according to the black standard 2022-12-03 18:11:17 +13:00			`"referer": "https://www.redgifs.com/",`
			`"origin": "https://www.redgifs.com",`
			`"content-type": "application/json",`
			`"Authorization": f"Bearer {auth_token}",`
Switch Redgifs to temporary tokens Initial switch to temporary tokens for Redgifs. Gets a new auth token for every API request. 2022-10-25 05:45:26 +13:00			`}`

Format according to the black standard 2022-12-03 18:11:17 +13:00			`content = Redgifs.retrieve_url(f"https://api.redgifs.com/v2/gifs/{redgif_id}", headers=headers)`
v1.7.0 (#97) * tools file name change to utils * Seperate downloaders (#94) * Seperated the downloaders * Remove redundant code * Changed file names * refactor * Redgifs (#95) * Init commit * Init commit * GifDeliveryNetwork (#96) * Initial commit * Gfycat forwarding to GDN bug fixed 2020-05-29 06:42:11 +12:00
			`if content is None:`
Format according to the black standard 2022-12-03 18:11:17 +13:00			`raise SiteDownloaderError("Could not read the page source")`
Add defensive programming to site downloaders 2021-04-06 13:04:08 +12:00
			`try:`
Add support for Redgifs images and galleries 2022-02-18 15:04:37 +13:00			`response_json = json.loads(content.text)`
Add defensive programming to site downloaders 2021-04-06 13:04:08 +12:00			`except json.JSONDecodeError as e:`
Format according to the black standard 2022-12-03 18:11:17 +13:00			`raise SiteDownloaderError(f"Received data was not valid JSON: {e}")`
v1.7.0 (#97) * tools file name change to utils * Seperate downloaders (#94) * Seperated the downloaders * Remove redundant code * Changed file names * refactor * Redgifs (#95) * Init commit * Init commit * GifDeliveryNetwork (#96) * Initial commit * Gfycat forwarding to GDN bug fixed 2020-05-29 06:42:11 +12:00
Add support for Redgifs images and galleries 2022-02-18 15:04:37 +13:00			`out = set()`
			`try:`
Format according to the black standard 2022-12-03 18:11:17 +13:00			`if response_json["gif"]["type"] == 1: # type 1 is a video`
			`if requests.get(response_json["gif"]["urls"]["hd"], headers=headers).ok:`
			`out.add(response_json["gif"]["urls"]["hd"])`
Redgifs improvements Add check to verify token was received. Update headers sent to content API. Add availability check for videos to resolve last part of #472 where only SD version is available. 2022-11-28 12:07:43 +13:00			`else:`
Format according to the black standard 2022-12-03 18:11:17 +13:00			`out.add(response_json["gif"]["urls"]["sd"])`
			`elif response_json["gif"]["type"] == 2: # type 2 is an image`
			`if response_json["gif"]["gallery"]:`
Add support for Redgifs images and galleries 2022-02-18 15:04:37 +13:00			`content = Redgifs.retrieve_url(`
Format according to the black standard 2022-12-03 18:11:17 +13:00			`f'https://api.redgifs.com/v2/gallery/{response_json["gif"]["gallery"]}'`
			`)`
Add support for Redgifs images and galleries 2022-02-18 15:04:37 +13:00			`response_json = json.loads(content.text)`
Format according to the black standard 2022-12-03 18:11:17 +13:00			`out = {p["urls"]["hd"] for p in response_json["gifs"]}`
Add support for Redgifs images and galleries 2022-02-18 15:04:37 +13:00			`else:`
Format according to the black standard 2022-12-03 18:11:17 +13:00			`out.add(response_json["gif"]["urls"]["hd"])`
Add support for Redgifs images and galleries 2022-02-18 15:04:37 +13:00			`else:`
			`raise KeyError`
			`except (KeyError, AttributeError):`
Format according to the black standard 2022-12-03 18:11:17 +13:00			`raise SiteDownloaderError("Failed to find JSON data in page")`
Add support for Redgifs images and galleries 2022-02-18 15:04:37 +13:00
Redgifs fixed? If this doesn't work then I give up... 2022-09-17 12:41:17 +12:00			`# Update subdomain if old one is returned`
Format according to the black standard 2022-12-03 18:11:17 +13:00			`out = {re.sub("thumbs2", "thumbs3", link) for link in out}`
			`out = {re.sub("thumbs3", "thumbs4", link) for link in out}`
Refactor redgifs 2021-03-20 01:28:41 +13:00			`return out`