2021-02-11 12:09:37 +13:00
|
|
|
#!/usr/bin/env python3
|
|
|
|
|
|
|
|
import re
|
2021-04-18 23:24:11 +12:00
|
|
|
import urllib.parse
|
2021-02-11 12:09:37 +13:00
|
|
|
|
2021-04-12 19:58:32 +12:00
|
|
|
from bdfr.exceptions import NotADownloadableLinkError
|
|
|
|
from bdfr.site_downloaders.base_downloader import BaseDownloader
|
2022-11-06 03:51:33 +13:00
|
|
|
from bdfr.site_downloaders.delay_for_reddit import DelayForReddit
|
2021-04-12 19:58:32 +12:00
|
|
|
from bdfr.site_downloaders.direct import Direct
|
|
|
|
from bdfr.site_downloaders.erome import Erome
|
2021-11-24 13:40:18 +13:00
|
|
|
from bdfr.site_downloaders.fallback_downloaders.ytdlp_fallback import YtdlpFallback
|
2021-04-12 19:58:32 +12:00
|
|
|
from bdfr.site_downloaders.gallery import Gallery
|
|
|
|
from bdfr.site_downloaders.gfycat import Gfycat
|
|
|
|
from bdfr.site_downloaders.imgur import Imgur
|
2021-06-25 19:47:49 +12:00
|
|
|
from bdfr.site_downloaders.pornhub import PornHub
|
2021-04-12 19:58:32 +12:00
|
|
|
from bdfr.site_downloaders.redgifs import Redgifs
|
|
|
|
from bdfr.site_downloaders.self_post import SelfPost
|
2021-09-11 14:15:35 +12:00
|
|
|
from bdfr.site_downloaders.vidble import Vidble
|
2022-07-15 17:05:07 +12:00
|
|
|
from bdfr.site_downloaders.vreddit import VReddit
|
2021-04-12 19:58:32 +12:00
|
|
|
from bdfr.site_downloaders.youtube import Youtube
|
2021-02-11 12:09:37 +13:00
|
|
|
|
|
|
|
|
|
|
|
class DownloadFactory:
|
|
|
|
@staticmethod
|
2023-01-26 16:23:59 +13:00
|
|
|
def pull_lever(url: str) -> type[BaseDownloader]:
|
2021-05-25 20:51:24 +12:00
|
|
|
sanitised_url = DownloadFactory.sanitise_url(url)
|
2022-12-03 18:11:17 +13:00
|
|
|
if re.match(r"(i\.|m\.)?imgur", sanitised_url):
|
2021-04-04 04:44:53 +12:00
|
|
|
return Imgur
|
2023-01-07 05:56:54 +13:00
|
|
|
elif re.match(r"(i\.|thumbs\d\.|v\d\.)?(redgifs|gifdeliverynetwork)", sanitised_url):
|
2022-09-17 12:41:17 +12:00
|
|
|
return Redgifs
|
2022-12-20 16:02:16 +13:00
|
|
|
elif re.match(r".*/.*\.[a-zA-Z34]{3,4}(\?[\w;&=]*)?$", sanitised_url) and not DownloadFactory.is_web_resource(
|
2022-12-03 18:11:17 +13:00
|
|
|
sanitised_url
|
|
|
|
):
|
2021-04-04 04:44:53 +12:00
|
|
|
return Direct
|
2022-12-03 18:11:17 +13:00
|
|
|
elif re.match(r"erome\.com.*", sanitised_url):
|
2021-02-11 12:09:37 +13:00
|
|
|
return Erome
|
2022-12-03 18:11:17 +13:00
|
|
|
elif re.match(r"delayforreddit\.com", sanitised_url):
|
2022-11-06 03:51:33 +13:00
|
|
|
return DelayForReddit
|
2022-12-03 18:11:17 +13:00
|
|
|
elif re.match(r"reddit\.com/gallery/.*", sanitised_url):
|
2021-03-01 12:51:44 +13:00
|
|
|
return Gallery
|
2022-12-03 18:11:17 +13:00
|
|
|
elif re.match(r"patreon\.com.*", sanitised_url):
|
2021-12-19 16:44:24 +13:00
|
|
|
return Gallery
|
2022-12-03 18:11:17 +13:00
|
|
|
elif re.match(r"gfycat\.", sanitised_url):
|
2021-03-01 12:51:44 +13:00
|
|
|
return Gfycat
|
2022-12-03 18:11:17 +13:00
|
|
|
elif re.match(r"reddit\.com/r/", sanitised_url):
|
2021-03-01 12:51:44 +13:00
|
|
|
return SelfPost
|
2022-12-03 18:11:17 +13:00
|
|
|
elif re.match(r"(m\.)?youtu\.?be", sanitised_url):
|
2021-04-02 18:56:31 +13:00
|
|
|
return Youtube
|
2022-12-03 18:11:17 +13:00
|
|
|
elif re.match(r"i\.redd\.it.*", sanitised_url):
|
2021-03-28 13:10:46 +13:00
|
|
|
return Direct
|
2022-12-03 18:11:17 +13:00
|
|
|
elif re.match(r"v\.redd\.it.*", sanitised_url):
|
2022-07-15 17:05:07 +12:00
|
|
|
return VReddit
|
2022-12-03 18:11:17 +13:00
|
|
|
elif re.match(r"pornhub\.com.*", sanitised_url):
|
2021-06-25 19:47:49 +12:00
|
|
|
return PornHub
|
2022-12-03 18:11:17 +13:00
|
|
|
elif re.match(r"vidble\.com", sanitised_url):
|
2021-09-11 14:15:35 +12:00
|
|
|
return Vidble
|
2021-11-24 13:40:18 +13:00
|
|
|
elif YtdlpFallback.can_handle_link(sanitised_url):
|
|
|
|
return YtdlpFallback
|
2021-02-11 12:09:37 +13:00
|
|
|
else:
|
2022-12-03 18:11:17 +13:00
|
|
|
raise NotADownloadableLinkError(f"No downloader module exists for url {url}")
|
2021-04-18 23:24:11 +12:00
|
|
|
|
|
|
|
@staticmethod
|
2021-05-25 20:51:24 +12:00
|
|
|
def sanitise_url(url: str) -> str:
|
2022-12-03 18:11:17 +13:00
|
|
|
beginning_regex = re.compile(r"\s*(www\.?)?")
|
2021-04-18 23:24:11 +12:00
|
|
|
split_url = urllib.parse.urlsplit(url)
|
|
|
|
split_url = split_url.netloc + split_url.path
|
2022-12-03 18:11:17 +13:00
|
|
|
split_url = re.sub(beginning_regex, "", split_url)
|
2021-04-18 23:24:11 +12:00
|
|
|
return split_url
|
2021-05-25 20:59:32 +12:00
|
|
|
|
|
|
|
@staticmethod
|
|
|
|
def is_web_resource(url: str) -> bool:
|
|
|
|
web_extensions = (
|
2022-12-03 18:11:17 +13:00
|
|
|
"asp",
|
|
|
|
"aspx",
|
|
|
|
"cfm",
|
|
|
|
"cfml",
|
|
|
|
"css",
|
|
|
|
"htm",
|
|
|
|
"html",
|
|
|
|
"js",
|
|
|
|
"php",
|
|
|
|
"php3",
|
|
|
|
"xhtml",
|
2021-05-25 20:59:32 +12:00
|
|
|
)
|
|
|
|
if re.match(rf'(?i).*/.*\.({"|".join(web_extensions)})$', url):
|
|
|
|
return True
|
|
|
|
else:
|
|
|
|
return False
|