1
0
Fork 0
mirror of synced 2024-06-02 18:34:37 +12:00
bulk-downloader-for-reddit/bdfr/site_downloaders/youtube.py

86 lines
3 KiB
Python
Raw Normal View History

#!/usr/bin/env python3
# -*- coding: utf-8 -*-
import logging
2021-02-11 12:10:40 +13:00
import tempfile
2023-01-26 16:23:59 +13:00
from collections.abc import Callable
2021-02-28 17:52:20 +13:00
from pathlib import Path
2023-01-26 16:23:59 +13:00
from typing import Optional
2021-10-02 15:23:13 +13:00
import yt_dlp
2021-02-11 12:10:40 +13:00
from praw.models import Submission
2021-07-27 15:39:49 +12:00
from bdfr.exceptions import NotADownloadableLinkError, SiteDownloaderError
2021-04-12 19:58:32 +12:00
from bdfr.resource import Resource
from bdfr.site_authenticator import SiteAuthenticator
from bdfr.site_downloaders.base_downloader import BaseDownloader
logger = logging.getLogger(__name__)
2021-02-07 14:33:19 +13:00
class Youtube(BaseDownloader):
2021-02-15 18:12:27 +13:00
def __init__(self, post: Submission):
super().__init__(post)
2021-02-26 21:57:05 +13:00
def find_resources(self, authenticator: Optional[SiteAuthenticator] = None) -> list[Resource]:
2021-03-01 17:50:31 +13:00
ytdl_options = {
2022-12-03 18:11:17 +13:00
"format": "best",
"playlistend": 1,
"nooverwrites": True,
2021-03-01 17:50:31 +13:00
}
2021-07-27 15:39:49 +12:00
download_function = self._download_video(ytdl_options)
2022-12-03 18:11:17 +13:00
extension = self.get_video_attributes(self.post.url)["ext"]
2021-07-27 15:39:49 +12:00
res = Resource(self.post, self.post.url, download_function, extension)
return [res]
2021-03-01 17:50:31 +13:00
2021-07-27 15:39:49 +12:00
def _download_video(self, ytdl_options: dict) -> Callable:
2022-12-03 18:11:17 +13:00
yt_logger = logging.getLogger("youtube-dl")
2021-05-02 21:49:32 +12:00
yt_logger.setLevel(logging.CRITICAL)
2022-12-03 18:11:17 +13:00
ytdl_options["quiet"] = True
ytdl_options["logger"] = yt_logger
2021-07-27 15:39:49 +12:00
2021-07-29 21:10:10 +12:00
def download(_: dict) -> bytes:
2021-07-27 15:39:49 +12:00
with tempfile.TemporaryDirectory() as temp_dir:
download_path = Path(temp_dir).resolve()
2022-12-03 18:11:17 +13:00
ytdl_options["outtmpl"] = str(download_path) + "/" + "test.%(ext)s"
2021-07-27 15:39:49 +12:00
try:
2021-10-02 15:23:13 +13:00
with yt_dlp.YoutubeDL(ytdl_options) as ydl:
2021-07-27 15:39:49 +12:00
ydl.download([self.post.url])
2021-10-02 15:23:13 +13:00
except yt_dlp.DownloadError as e:
2022-12-03 18:11:17 +13:00
raise SiteDownloaderError(f"Youtube download failed: {e}")
2021-07-27 15:39:49 +12:00
downloaded_files = list(download_path.iterdir())
if downloaded_files:
2021-07-27 15:39:49 +12:00
downloaded_file = downloaded_files[0]
else:
raise NotADownloadableLinkError(f"No media exists in the URL {self.post.url}")
2022-12-03 18:11:17 +13:00
with downloaded_file.open("rb") as file:
2021-07-27 15:39:49 +12:00
content = file.read()
return content
2022-12-03 18:11:17 +13:00
2021-07-27 15:39:49 +12:00
return download
@staticmethod
def get_video_data(url: str) -> dict:
2022-12-03 18:11:17 +13:00
yt_logger = logging.getLogger("youtube-dl")
2021-07-27 15:39:49 +12:00
yt_logger.setLevel(logging.CRITICAL)
2022-12-03 18:11:17 +13:00
with yt_dlp.YoutubeDL(
{
"logger": yt_logger,
}
) as ydl:
try:
2021-07-27 15:39:49 +12:00
result = ydl.extract_info(url, download=False)
except Exception as e:
logger.exception(e)
2022-12-03 18:11:17 +13:00
raise NotADownloadableLinkError(f"Video info extraction failed for {url}")
return result
@staticmethod
def get_video_attributes(url: str) -> dict:
result = Youtube.get_video_data(url)
2022-12-03 18:11:17 +13:00
if "ext" in result:
return result
else:
2022-12-03 18:11:17 +13:00
raise NotADownloadableLinkError(f"Video info extraction failed for {url}")