1
0
Fork 0
mirror of synced 2024-05-18 19:22:38 +12:00
bulk-downloader-for-reddit/bdfr/site_downloaders/download_factory.py
Soulsuck24 0e23dcb8ad
Gfycat/Redgifs coverage
Coverage for direct gfycat links that redirect to redgifs. The redirect through the sites themselves are broken but this fixes that.

Coverage for o.imgur links and incorrect capitalisation of domains in download_factory.

Changed tests for direct as gfycat is handled by the gfycat downloader.

fix pornhub test as the previous video was removed.
2023-01-30 14:52:08 -05:00

90 lines
3.2 KiB
Python

#!/usr/bin/env python3
# -*- coding: utf-8 -*-
import re
import urllib.parse
from bdfr.exceptions import NotADownloadableLinkError
from bdfr.site_downloaders.base_downloader import BaseDownloader
from bdfr.site_downloaders.delay_for_reddit import DelayForReddit
from bdfr.site_downloaders.direct import Direct
from bdfr.site_downloaders.erome import Erome
from bdfr.site_downloaders.fallback_downloaders.ytdlp_fallback import YtdlpFallback
from bdfr.site_downloaders.gallery import Gallery
from bdfr.site_downloaders.gfycat import Gfycat
from bdfr.site_downloaders.imgur import Imgur
from bdfr.site_downloaders.pornhub import PornHub
from bdfr.site_downloaders.redgifs import Redgifs
from bdfr.site_downloaders.self_post import SelfPost
from bdfr.site_downloaders.vidble import Vidble
from bdfr.site_downloaders.vreddit import VReddit
from bdfr.site_downloaders.youtube import Youtube
class DownloadFactory:
@staticmethod
def pull_lever(url: str) -> type[BaseDownloader]:
sanitised_url = DownloadFactory.sanitise_url(url).lower()
if re.match(r"(i\.|m\.|o\.)?imgur", sanitised_url):
return Imgur
elif re.match(r"(i\.|thumbs\d\.|v\d\.)?(redgifs|gifdeliverynetwork)", sanitised_url):
return Redgifs
elif re.match(r"(thumbs\.|giant\.)?gfycat\.", sanitised_url):
return Gfycat
elif re.match(r".*/.*\.[a-zA-Z34]{3,4}(\?[\w;&=]*)?$", sanitised_url) and not DownloadFactory.is_web_resource(
sanitised_url
):
return Direct
elif re.match(r"erome\.com.*", sanitised_url):
return Erome
elif re.match(r"delayforreddit\.com", sanitised_url):
return DelayForReddit
elif re.match(r"reddit\.com/gallery/.*", sanitised_url):
return Gallery
elif re.match(r"patreon\.com.*", sanitised_url):
return Gallery
elif re.match(r"reddit\.com/r/", sanitised_url):
return SelfPost
elif re.match(r"(m\.)?youtu\.?be", sanitised_url):
return Youtube
elif re.match(r"i\.redd\.it.*", sanitised_url):
return Direct
elif re.match(r"v\.redd\.it.*", sanitised_url):
return VReddit
elif re.match(r"pornhub\.com.*", sanitised_url):
return PornHub
elif re.match(r"vidble\.com", sanitised_url):
return Vidble
elif YtdlpFallback.can_handle_link(sanitised_url):
return YtdlpFallback
else:
raise NotADownloadableLinkError(f"No downloader module exists for url {url}")
@staticmethod
def sanitise_url(url: str) -> str:
beginning_regex = re.compile(r"\s*(www\.?)?")
split_url = urllib.parse.urlsplit(url)
split_url = split_url.netloc + split_url.path
split_url = re.sub(beginning_regex, "", split_url)
return split_url
@staticmethod
def is_web_resource(url: str) -> bool:
web_extensions = (
"asp",
"aspx",
"cfm",
"cfml",
"css",
"htm",
"html",
"js",
"php",
"php3",
"xhtml",
)
if re.match(rf'(?i).*/.*\.({"|".join(web_extensions)})$', url):
return True
else:
return False