1
0
Fork 0
mirror of synced 2024-06-30 03:50:32 +12:00
bulk-downloader-for-reddit/bdfr/site_downloaders/download_factory.py
2021-04-18 16:44:52 +03:00

60 lines
2.3 KiB
Python

#!/usr/bin/env python3
# coding=utf-8
import re
import urllib.parse
from typing import Type
from bdfr.exceptions import NotADownloadableLinkError
from bdfr.site_downloaders.base_downloader import BaseDownloader
from bdfr.site_downloaders.direct import Direct
from bdfr.site_downloaders.erome import Erome
from bdfr.site_downloaders.gallery import Gallery
from bdfr.site_downloaders.gfycat import Gfycat
from bdfr.site_downloaders.gif_delivery_network import GifDeliveryNetwork
from bdfr.site_downloaders.imgur import Imgur
from bdfr.site_downloaders.redgifs import Redgifs
from bdfr.site_downloaders.self_post import SelfPost
from bdfr.site_downloaders.vreddit import VReddit
from bdfr.site_downloaders.youtube import Youtube
class DownloadFactory:
@staticmethod
def pull_lever(url: str) -> Type[BaseDownloader]:
sanitised_url = DownloadFactory._sanitise_url(url)
if re.match(r'(i\.)?imgur.*\.gifv$', sanitised_url):
return Imgur
elif re.match(r'.*/.*\.\w{3,4}(\?[\w;&=]*)?$', sanitised_url):
return Direct
elif re.match(r'erome\.com.*', sanitised_url):
return Erome
elif re.match(r'reddit\.com/gallery/.*', sanitised_url):
return Gallery
elif re.match(r'gfycat\.', sanitised_url):
return Gfycat
elif re.match(r'gifdeliverynetwork', sanitised_url):
return GifDeliveryNetwork
elif re.match(r'(m\.)?imgur.*', sanitised_url):
return Imgur
elif re.match(r'redgifs.com', sanitised_url):
return Redgifs
elif re.match(r'reddit\.com/r/', sanitised_url):
return SelfPost
elif re.match(r'v\.redd\.it', sanitised_url):
return VReddit
elif re.match(r'(m\.)?youtu\.?be', sanitised_url):
return Youtube
elif re.match(r'i\.redd\.it.*', sanitised_url):
return Direct
else:
raise NotADownloadableLinkError(f'No downloader module exists for url {url}')
@staticmethod
def _sanitise_url(url: str) -> str:
beginning_regex = re.compile(r'\s*(www\.?)?')
split_url = urllib.parse.urlsplit(url)
split_url = split_url.netloc + split_url.path
split_url = re.sub(beginning_regex, '', split_url)
return split_url