1
0
Fork 0
mirror of synced 2024-05-18 19:22:38 +12:00
bulk-downloader-for-reddit/bdfr/site_downloaders/gfycat.py
Soulsuck24 0e23dcb8ad
Gfycat/Redgifs coverage
Coverage for direct gfycat links that redirect to redgifs. The redirect through the sites themselves are broken but this fixes that.

Coverage for o.imgur links and incorrect capitalisation of domains in download_factory.

Changed tests for direct as gfycat is handled by the gfycat downloader.

fix pornhub test as the previous video was removed.
2023-01-30 14:52:08 -05:00

46 lines
1.5 KiB
Python

#!/usr/bin/env python3
# -*- coding: utf-8 -*-
import json
import re
from typing import Optional
from bs4 import BeautifulSoup
from praw.models import Submission
from bdfr.exceptions import SiteDownloaderError
from bdfr.resource import Resource
from bdfr.site_authenticator import SiteAuthenticator
from bdfr.site_downloaders.redgifs import Redgifs
class Gfycat(Redgifs):
def __init__(self, post: Submission):
super().__init__(post)
def find_resources(self, authenticator: Optional[SiteAuthenticator] = None) -> list[Resource]:
return super().find_resources(authenticator)
@staticmethod
def _get_link(url: str) -> set[str]:
gfycat_id = re.match(r".*/(.*?)(?:/?|-.*|\..{3-4})$", url).group(1)
url = "https://gfycat.com/" + gfycat_id
response = Gfycat.retrieve_url(url)
if re.search(r"(redgifs|gifdeliverynetwork)", response.url):
url = url.lower() # Fixes error with old gfycat/redgifs links
return Redgifs._get_link(url)
soup = BeautifulSoup(response.text, "html.parser")
content = soup.find("script", attrs={"data-react-helmet": "true", "type": "application/ld+json"})
try:
out = json.loads(content.contents[0])["video"]["contentUrl"]
except (IndexError, KeyError, AttributeError) as e:
raise SiteDownloaderError(f"Failed to download Gfycat link {url}: {e}")
except json.JSONDecodeError as e:
raise SiteDownloaderError(f"Did not receive valid JSON data: {e}")
return {
out,
}