1
0
Fork 0
mirror of synced 2024-06-02 18:34:37 +12:00
bulk-downloader-for-reddit/bdfr/site_downloaders/youtube.py

59 lines
2.1 KiB
Python
Raw Normal View History

#!/usr/bin/env python3
import logging
2021-02-11 12:10:40 +13:00
import tempfile
2021-02-28 17:52:20 +13:00
from pathlib import Path
2021-02-25 23:40:08 +13:00
from typing import Optional
import youtube_dl
2021-02-11 12:10:40 +13:00
from praw.models import Submission
from bdfr.exceptions import (NotADownloadableLinkError, SiteDownloaderError)
2021-04-12 19:58:32 +12:00
from bdfr.resource import Resource
from bdfr.site_authenticator import SiteAuthenticator
from bdfr.site_downloaders.base_downloader import BaseDownloader
logger = logging.getLogger(__name__)
2021-02-07 14:33:19 +13:00
class Youtube(BaseDownloader):
2021-02-15 18:12:27 +13:00
def __init__(self, post: Submission):
super().__init__(post)
2021-02-26 21:57:05 +13:00
def find_resources(self, authenticator: Optional[SiteAuthenticator] = None) -> list[Resource]:
2021-03-01 17:50:31 +13:00
ytdl_options = {
2021-04-05 17:57:21 +12:00
'format': 'best',
'playlistend': 1,
'nooverwrites': True,
2021-03-01 17:50:31 +13:00
}
out = self._download_video(ytdl_options)
return [out]
def _download_video(self, ytdl_options: dict) -> Resource:
2021-05-02 21:49:32 +12:00
yt_logger = logging.getLogger('youtube-dl')
yt_logger.setLevel(logging.CRITICAL)
2021-03-01 17:50:31 +13:00
ytdl_options['quiet'] = True
2021-05-02 21:49:32 +12:00
ytdl_options['logger'] = yt_logger
2021-02-11 12:10:40 +13:00
with tempfile.TemporaryDirectory() as temp_dir:
2021-02-28 17:52:20 +13:00
download_path = Path(temp_dir).resolve()
2021-03-01 17:50:31 +13:00
ytdl_options['outtmpl'] = str(download_path) + '/' + 'test.%(ext)s'
try:
with youtube_dl.YoutubeDL(ytdl_options) as ydl:
ydl.download([self.post.url])
except youtube_dl.DownloadError as e:
raise SiteDownloaderError(f'Youtube download failed: {e}')
2021-02-11 12:10:40 +13:00
downloaded_file = None
downloaded_files = list(download_path.iterdir())
if len(downloaded_files) > 0:
downloaded_file = downloaded_files[0]
else:
raise NotADownloadableLinkError(f"No media exists in the URL {self.post.url}")
2021-02-28 17:52:20 +13:00
extension = downloaded_file.suffix
with open(downloaded_file, 'rb') as file:
2021-02-11 12:10:40 +13:00
content = file.read()
2021-02-28 17:52:20 +13:00
out = Resource(self.post, self.post.url, extension)
2021-02-25 23:40:08 +13:00
out.content = content
2021-02-28 17:52:20 +13:00
out.create_hash()
2021-02-25 23:40:08 +13:00
return out