diff --git a/bulkredditdownloader/downloaders/base_downloader.py b/bulkredditdownloader/downloaders/base_downloader.py index 5580d70..eb30431 100644 --- a/bulkredditdownloader/downloaders/base_downloader.py +++ b/bulkredditdownloader/downloaders/base_downloader.py @@ -1,15 +1,18 @@ #!/usr/bin/env python3 # coding=utf-8 + import hashlib -import os -import sys -import urllib.request -from abc import ABC +import logging +import re +from abc import ABC, abstractmethod from pathlib import Path +import requests + from bulkredditdownloader.errors import DomainInSkip, FailedToDownload, FileAlreadyExistsError, TypeInSkip from bulkredditdownloader.utils import GLOBAL -from bulkredditdownloader.utils import printToFile as print + +logger = logging.getLogger(__name__) class BaseDownloader(ABC): @@ -17,22 +20,17 @@ class BaseDownloader(ABC): self.directory = directory self.post = post + @abstractmethod + def download(self): + raise NotImplementedError + @staticmethod - def createHash(filename: str) -> str: - hash_md5 = hashlib.md5() - with open(filename, "rb") as f: - for chunk in iter(lambda: f.read(4096), b""): - hash_md5.update(chunk) + def _create_hash(content: bytes) -> str: + hash_md5 = hashlib.md5(content) return hash_md5.hexdigest() @staticmethod - def getFile( - filename: str, - short_filename: str, - folder_dir: Path, - image_url: str, - indent: int = 0, - silent: bool = False): + def _download_resource(filename: Path, folder_dir: Path, image_url: str, indent: int = 0, silent: bool = False): formats = { "videos": [".mp4", ".webm"], "images": [".jpg", ".jpeg", ".png", ".bmp"], @@ -52,69 +50,55 @@ class BaseDownloader(ABC): ("User-Agent", "Mozilla/5.0 (Windows NT 10.0; Win64; x64) " "AppleWebKit/537.36 (KHTML, like Gecko) Chrome/67.0.3396.87 " "Safari/537.36 OPR/54.0.2952.64"), - ("Accept", "text/html,application/xhtml+xml,application/xml;" - "q=0.9,image/webp,image/apng,*/*;q=0.8"), + ("Accept", "text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8"), ("Accept-Charset", "ISO-8859-1,utf-8;q=0.7,*;q=0.3"), ("Accept-Encoding", "none"), ("Accept-Language", "en-US,en;q=0.8"), ("Connection", "keep-alive") ] - if not os.path.exists(folder_dir): - os.makedirs(folder_dir) + folder_dir.mkdir(exist_ok=True) - opener = urllib.request.build_opener() if "imgur" not in image_url: - opener.addheaders = headers - urllib.request.install_opener(opener) + addheaders = headers + else: + addheaders = None if not silent: - print(" " * indent + str(folder_dir), " " * indent + str(filename), sep="\n") - - def dlProgress(count: int, block_size: int, total_size: int): - """Function for writing download progress to console """ - download_mbs = int(count * block_size * (10 ** (-6))) - file_size = int(total_size * (10 ** (-6))) - sys.stdout.write("{}Mb/{}Mb\r".format(download_mbs, file_size)) - sys.stdout.flush() + logger.info(" " * indent + str(folder_dir), " " * indent + str(filename), sep="\n") + # Loop to attempt download 3 times for i in range(3): - file_dir = Path(folder_dir) / filename - temp_dir = Path(folder_dir) / (filename + ".tmp") + file_path = Path(folder_dir) / filename - if not (os.path.isfile(file_dir)): + if file_path.is_file(): + raise FileAlreadyExistsError + else: try: - urllib.request.urlretrieve(image_url, temp_dir, reporthook=dlProgress) - - file_hash = BaseDownloader.createHash(temp_dir) - if GLOBAL.arguments.no_dupes: - if file_hash in GLOBAL.downloadedPosts(): - os.remove(temp_dir) - raise FileAlreadyExistsError - GLOBAL.downloadedPosts.add(file_hash) - - os.rename(temp_dir, file_dir) - if not silent: - print(" " * indent + "Downloaded" + " " * 10) - return None + download_content = requests.get(image_url, headers=addheaders).content except ConnectionResetError: raise FailedToDownload - except FileNotFoundError: - filename = short_filename - else: - raise FileAlreadyExistsError + + file_hash = BaseDownloader._create_hash(download_content) + if GLOBAL.arguments.no_dupes: + if file_hash in GLOBAL.downloadedPosts(): + raise FileAlreadyExistsError + GLOBAL.downloadedPosts.add(file_hash) + + with open(file_path, 'wb') as file: + file.write(download_content) + if not silent: + logger.info(" " * indent + "Downloaded" + " " * 10) + return + raise FailedToDownload @staticmethod - def getExtension(link: str): - """Extract file extension from image link. If didn't find any, return '.jpg' """ - image_types = ['jpg', 'png', 'mp4', 'webm', 'gif'] - parsed = link.split('.') - for fileType in image_types: - if fileType in parsed: - return "." + parsed[-1] + def _get_extension(url: str) -> str: + pattern = re.compile(r'(\.(jpg|jpeg|png|mp4|webm|gif))') + if len(results := re.search(pattern, url).groups()) > 1: + return results[1] + if "v.redd.it" not in url: + return '.jpg' else: - if "v.redd.it" not in link: - return '.jpg' - else: - return '.mp4' + return '.mp4' diff --git a/bulkredditdownloader/downloaders/direct.py b/bulkredditdownloader/downloaders/direct.py index 23a5c2f..5fe97cd 100644 --- a/bulkredditdownloader/downloaders/direct.py +++ b/bulkredditdownloader/downloaders/direct.py @@ -1,4 +1,5 @@ -import os +#!/usr/bin/env python3 + import pathlib from bulkredditdownloader.downloaders.base_downloader import BaseDownloader @@ -8,11 +9,11 @@ from bulkredditdownloader.utils import GLOBAL class Direct(BaseDownloader): def __init__(self, directory: pathlib.Path, post: dict): super().__init__(directory, post) - post['EXTENSION'] = self.getExtension(post['CONTENTURL']) - if not os.path.exists(directory): - os.makedirs(directory) + self.download() - filename = GLOBAL.config['filename'].format(**post) + post["EXTENSION"] - short_filename = post['POSTID'] + post['EXTENSION'] + def download(self): + self.post['EXTENSION'] = self._get_extension(self.post['CONTENTURL']) + self.directory.mkdir(exist_ok=True) - self.getFile(filename, short_filename, directory, post['CONTENTURL']) + filename = GLOBAL.config['filename'].format(**self.post) + self.post["EXTENSION"] + self._download_resource(pathlib.Path(filename), self.directory, self.post['CONTENTURL']) diff --git a/bulkredditdownloader/downloaders/erome.py b/bulkredditdownloader/downloaders/erome.py index f54e6f5..2df5937 100644 --- a/bulkredditdownloader/downloaders/erome.py +++ b/bulkredditdownloader/downloaders/erome.py @@ -1,5 +1,8 @@ -import os +#!/usr/bin/env python3 + +import logging import pathlib +import re import urllib.error import urllib.request from html.parser import HTMLParser @@ -7,70 +10,64 @@ from html.parser import HTMLParser from bulkredditdownloader.downloaders.base_downloader import BaseDownloader from bulkredditdownloader.errors import AlbumNotDownloadedCompletely, FileAlreadyExistsError, NotADownloadableLinkError from bulkredditdownloader.utils import GLOBAL -from bulkredditdownloader.utils import printToFile as print + +logger = logging.getLogger(__name__) class Erome(BaseDownloader): def __init__(self, directory: pathlib.Path, post: dict): super().__init__(directory, post) + self.download() + + def download(self): try: - images = self.getLinks(post['CONTENTURL']) + images = self._get_links(self.post['CONTENTURL']) except urllib.error.HTTPError: raise NotADownloadableLinkError("Not a downloadable link") images_length = len(images) - how_many_downloaded = images_length + how_many_downloaded = len(images) duplicates = 0 if images_length == 1: - extension = self.getExtension(images[0]) - """Filenames are declared here""" - filename = GLOBAL.config['filename'].format(**post) + post["EXTENSION"] - short_filename = post['POSTID'] + extension + filename = GLOBAL.config['filename'].format(**self.post) + self.post["EXTENSION"] - image_url = images[0] - if 'https://' not in image_url or 'http://' not in image_url: - image_url = "https://" + image_url + image = images[0] + if not re.match(r'https?://.*', image): + image = "https://" + image - self.getFile(filename, short_filename, directory, image_url) + self._download_resource(filename, self.directory, image) else: - filename = GLOBAL.config['filename'].format(**post) - print(filename) + filename = GLOBAL.config['filename'].format(**self.post) + logger.info(filename) - folder_dir = directory / filename + folder_dir = self.directory / filename - try: - if not os.path.exists(folder_dir): - os.makedirs(folder_dir) - except FileNotFoundError: - folder_dir = directory / post['POSTID'] - os.makedirs(folder_dir) - - for i in range(images_length): - extension = self.getExtension(images[i]) + folder_dir.mkdir(exist_ok=True) + for i, image in enumerate(images): + extension = self._get_extension(image) filename = str(i + 1) + extension - image_url = images[i] - if 'https://' not in image_url and 'http://' not in image_url: - image_url = "https://" + image_url - print(" ({}/{})".format(i + 1, images_length)) - print(" {}".format(filename)) + if not re.match(r'https?://.*', image): + image = "https://" + image + + logger.info(" ({}/{})".format(i + 1, images_length)) + logger.info(" {}".format(filename)) try: - self.getFile(filename, filename, folder_dir, image_url, indent=2) - print() + self._download_resource(pathlib.Path(filename), folder_dir, image, indent=2) except FileAlreadyExistsError: - print(" The file already exists" + " " * 10, end="\n\n") + logger.info(" The file already exists" + " " * 10, end="\n\n") duplicates += 1 how_many_downloaded -= 1 except Exception as exception: # raise exception - print("\n Could not get the file") - print( + logger.error("\n Could not get the file") + logger.error( " " + "{class_name}: {info}".format(class_name=exception.__class__.__name__, info=str(exception)) + "\n" @@ -82,10 +79,12 @@ class Erome(BaseDownloader): elif how_many_downloaded + duplicates < images_length: raise AlbumNotDownloadedCompletely("Album Not Downloaded Completely") - def getLinks(self, url: str) -> list[str]: + @staticmethod + def _get_links(url: str) -> list[str]: content = [] line_number = None + # TODO: move to bs4 and requests class EromeParser(HTMLParser): tag = None diff --git a/bulkredditdownloader/downloaders/gallery.py b/bulkredditdownloader/downloaders/gallery.py index 597d653..d877e4e 100644 --- a/bulkredditdownloader/downloaders/gallery.py +++ b/bulkredditdownloader/downloaders/gallery.py @@ -1,7 +1,9 @@ +#!/usr/bin/env python3 + import json -import os import pathlib -import urllib +import logging +import urllib.parse import requests @@ -9,15 +11,18 @@ from bulkredditdownloader.downloaders.base_downloader import BaseDownloader from bulkredditdownloader.errors import (AlbumNotDownloadedCompletely, FileAlreadyExistsError, ImageNotFound, NotADownloadableLinkError, TypeInSkip) from bulkredditdownloader.utils import GLOBAL -from bulkredditdownloader.utils import printToFile as print + +logger = logging.getLogger(__name__) class Gallery(BaseDownloader): def __init__(self, directory: pathlib.Path, post): super().__init__(directory, post) - link = post['CONTENTURL'] - self.raw_data = self.getData(link) + link = self.post['CONTENTURL'] + self.raw_data = self._get_data(link) + self.download() + def download(self): images = {} count = 0 for model in self.raw_data['posts']['models']: @@ -27,15 +32,15 @@ class Gallery(BaseDownloader): images[count] = {'id': item['mediaId'], 'url': self.raw_data['posts'] ['models'][model]['media']['mediaMetadata'][item['mediaId']]['s']['u']} count += 1 - except Exception: + except KeyError: continue - except Exception: + except KeyError: continue - self.downloadAlbum(images, count) + self._download_album(images, count) @staticmethod - def getData(link: str) -> dict: + def _get_data(link: str) -> dict: headers = { "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/67.0.3396.87 Safari/537.36 OPR/54.0.2952.64", "Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8", @@ -58,50 +63,42 @@ class Gallery(BaseDownloader): data = json.loads(page_source[start_index - 1:end_index + 1].strip()[:-1]) return data - def downloadAlbum(self, images: dict, count: int): + def _download_album(self, images: dict, count: int): folder_name = GLOBAL.config['filename'].format(**self.post) folder_dir = self.directory / folder_name how_many_downloaded = 0 duplicates = 0 - try: - if not os.path.exists(folder_dir): - os.makedirs(folder_dir) - except FileNotFoundError: - folder_dir = self.directory / self.post['POSTID'] - os.makedirs(folder_dir) + folder_dir.mkdir(exist_ok=True) + logger.info(folder_name) - print(folder_name) + for i, image in enumerate(images): + path = urllib.parse.urlparse(image['url']).path + extension = pathlib.Path(path).suffix - for i in range(count): - path = urllib.parse.urlparse(images[i]['url']).path - extension = os.path.splitext(path)[1] + filename = pathlib.Path("_".join([str(i + 1), image['id']]) + extension) - filename = "_".join([str(i + 1), images[i]['id']]) + extension - short_filename = str(i + 1) + "_" + images[i]['id'] - - print("\n ({}/{})".format(i + 1, count)) + logger.info("\n ({}/{})".format(i + 1, count)) try: - self.getFile(filename, short_filename, folder_dir, images[i]['url'], indent=2) + self._download_resource(filename, folder_dir, image['url'], indent=2) how_many_downloaded += 1 - print() except FileAlreadyExistsError: - print(" The file already exists" + " " * 10, end="\n\n") + logger.info(" The file already exists" + " " * 10, end="\n\n") duplicates += 1 except TypeInSkip: - print(" Skipping...") + logger.info(" Skipping...") how_many_downloaded += 1 except Exception as exception: - print("\n Could not get the file") - print(" " + "{class_name}: {info}\nSee CONSOLE_LOG.txt for more information".format( + logger.info("\n Could not get the file") + logger.info(" " + "{class_name}: {info}\nSee CONSOLE_LOG.txt for more information".format( class_name=exception.__class__.__name__, info=str(exception)) + "\n" ) - print(GLOBAL.log_stream.getvalue(), no_print=True) + logger.info(GLOBAL.log_stream.getvalue(), no_print=True) if duplicates == count: raise FileAlreadyExistsError diff --git a/bulkredditdownloader/downloaders/gfycat.py b/bulkredditdownloader/downloaders/gfycat.py index 1bedeb4..9d2b3bb 100644 --- a/bulkredditdownloader/downloaders/gfycat.py +++ b/bulkredditdownloader/downloaders/gfycat.py @@ -1,43 +1,32 @@ +#!/usr/bin/env python3 + import json -import os +import pathlib +import re import urllib.request from bs4 import BeautifulSoup -from bulkredditdownloader.downloaders.base_downloader import BaseDownloader from bulkredditdownloader.downloaders.gif_delivery_network import GifDeliveryNetwork -from bulkredditdownloader.errors import NotADownloadableLinkError -from bulkredditdownloader.utils import GLOBAL -import pathlib -class Gfycat(BaseDownloader): +class Gfycat(GifDeliveryNetwork): def __init__(self, directory: pathlib.Path, post: dict): super().__init__(directory, post) - try: - post['MEDIAURL'] = self.getLink(post['CONTENTURL']) - except IndexError: - raise NotADownloadableLinkError("Could not read the page source") + self.download() - post['EXTENSION'] = self.getExtension(post['MEDIAURL']) - - if not os.path.exists(directory): - os.makedirs(directory) - - filename = GLOBAL.config['filename'].format(**post) + post["EXTENSION"] - short_filename = post['POSTID'] + post['EXTENSION'] - - self.getFile(filename, short_filename, directory, post['MEDIAURL']) + def download(self): + super().download() @staticmethod - def getLink(url: str) -> str: + def _get_link(url: str) -> str: """Extract direct link to the video from page's source and return it """ - if '.webm' in url or '.mp4' in url or '.gif' in url: + if re.match(r'\.(webm|mp4|gif)$', url): return url - if url[-1:] == '/': + if url.endswith('/'): url = url[:-1] url = "https://gfycat.com/" + url.split('/')[-1] @@ -49,6 +38,6 @@ class Gfycat(BaseDownloader): content = soup.find("script", attrs=attributes) if content is None: - return GifDeliveryNetwork.getLink(url) + return super()._get_link(url) return json.loads(content.contents[0])["video"]["contentUrl"] diff --git a/bulkredditdownloader/downloaders/gif_delivery_network.py b/bulkredditdownloader/downloaders/gif_delivery_network.py index 2c66e1b..52caf4c 100644 --- a/bulkredditdownloader/downloaders/gif_delivery_network.py +++ b/bulkredditdownloader/downloaders/gif_delivery_network.py @@ -1,4 +1,5 @@ -import os +#!/usr/bin/env python3 + import pathlib import urllib.request @@ -12,23 +13,23 @@ from bulkredditdownloader.utils import GLOBAL class GifDeliveryNetwork(BaseDownloader): def __init__(self, directory: pathlib.Path, post: dict): super().__init__(directory, post) + self.download() + + def download(self): try: - post['MEDIAURL'] = self.getLink(post['CONTENTURL']) + self.post['MEDIAURL'] = self._get_link(self.post['CONTENTURL']) except IndexError: raise NotADownloadableLinkError("Could not read the page source") - post['EXTENSION'] = self.getExtension(post['MEDIAURL']) + self.post['EXTENSION'] = self._get_extension(self.post['MEDIAURL']) + self.directory.mkdir(exist_ok=True) - if not os.path.exists(directory): - os.makedirs(directory) + filename = GLOBAL.config['filename'].format(**self.post) + self.post["EXTENSION"] - filename = GLOBAL.config['filename'].format(**post) + post["EXTENSION"] - short_filename = post['POSTID'] + post['EXTENSION'] - - self.getFile(filename, short_filename, directory, post['MEDIAURL']) + self._download_resource(filename, self.directory, self.post['MEDIAURL']) @staticmethod - def getLink(url: str) -> str: + def _get_link(url: str) -> str: """Extract direct link to the video from page's source and return it """ diff --git a/bulkredditdownloader/downloaders/imgur.py b/bulkredditdownloader/downloaders/imgur.py index 3b816ff..6f05b26 100644 --- a/bulkredditdownloader/downloaders/imgur.py +++ b/bulkredditdownloader/downloaders/imgur.py @@ -1,6 +1,8 @@ +#!/usr/bin/env python3 + import json -import os import pathlib +import logging import requests @@ -9,7 +11,8 @@ from bulkredditdownloader.downloaders.direct import Direct from bulkredditdownloader.errors import (AlbumNotDownloadedCompletely, ExtensionError, FileAlreadyExistsError, ImageNotFound, NotADownloadableLinkError, TypeInSkip) from bulkredditdownloader.utils import GLOBAL, nameCorrector -from bulkredditdownloader.utils import printToFile as print + +logger = logging.getLogger(__name__) class Imgur(BaseDownloader): @@ -18,24 +21,28 @@ class Imgur(BaseDownloader): def __init__(self, directory: pathlib.Path, post: dict): super().__init__(directory, post) - link = post['CONTENTURL'] + self.raw_data = {} + self.download() + + def download(self): + link = self.post['CONTENTURL'] if link.endswith(".gifv"): link = link.replace(".gifv", ".mp4") - Direct(directory, {**post, 'CONTENTURL': link}) + Direct(self.directory, {**self.post, 'CONTENTURL': link}) return - self.raw_data = self.getData(link) + self.raw_data = self._get_data(link) - if self.isAlbum: + if self._is_album: if self.raw_data["album_images"]["count"] != 1: - self.downloadAlbum(self.raw_data["album_images"]) + self._download_album(self.raw_data["album_images"]) else: - self.download(self.raw_data["album_images"]["images"][0]) + self._download_image(self.raw_data["album_images"]["images"][0]) else: - self.download(self.raw_data) + self._download_image(self.raw_data) - def downloadAlbum(self, images: dict): + def _download_album(self, images: dict): folder_name = GLOBAL.config['filename'].format(**self.post) folder_dir = self.directory / folder_name @@ -43,70 +50,60 @@ class Imgur(BaseDownloader): how_many_downloaded = 0 duplicates = 0 - try: - if not os.path.exists(folder_dir): - os.makedirs(folder_dir) - except FileNotFoundError: - folder_dir = self.directory / self.post['POSTID'] - os.makedirs(folder_dir) - - print(folder_name) + folder_dir.mkdir(exist_ok=True) + logger.info(folder_name) for i in range(images_length): - extension = self.validateExtension(images["images"][i]["ext"]) + extension = self._validate_extension(images["images"][i]["ext"]) image_url = self.imgur_image_domain + images["images"][i]["hash"] + extension - filename = "_".join([str(i + 1), - nameCorrector(images["images"][i]['title']), - images["images"][i]['hash']]) + extension - short_filename = str(i + 1) + "_" + images["images"][i]['hash'] + filename = pathlib.Path("_".join([str(i + 1), + nameCorrector(images["images"][i]['title']), + images["images"][i]['hash']]) + extension) - print("\n ({}/{})".format(i + 1, images_length)) + logger.info("\n ({}/{})".format(i + 1, images_length)) try: - self.getFile(filename, short_filename, folder_dir, image_url, indent=2) + self._download_resource(filename, folder_dir, image_url, indent=2) how_many_downloaded += 1 - print() except FileAlreadyExistsError: - print(" The file already exists" + " " * 10, end="\n\n") + logger.info(" The file already exists" + " " * 10, end="\n\n") duplicates += 1 except TypeInSkip: - print(" Skipping...") + logger.info(" Skipping...") how_many_downloaded += 1 except Exception as exception: - print("\n Could not get the file") - print( - " " + - "{class_name}: {info}\nSee CONSOLE_LOG.txt for more information".format( + logger.info("\n Could not get the file") + logger.info( + " " + + "{class_name}: {info}\nSee CONSOLE_LOG.txt for more information".format( class_name=exception.__class__.__name__, info=str(exception) ) + "\n" ) - print(GLOBAL.log_stream.getvalue(), no_print=True) + logger.info(GLOBAL.log_stream.getvalue(), no_print=True) if duplicates == images_length: raise FileAlreadyExistsError elif how_many_downloaded + duplicates < images_length: raise AlbumNotDownloadedCompletely("Album Not Downloaded Completely") - def download(self, image: dict): - extension = self.validateExtension(image["ext"]) + def _download_image(self, image: dict): + extension = self._validate_extension(image["ext"]) image_url = self.imgur_image_domain + image["hash"] + extension filename = GLOBAL.config['filename'].format(**self.post) + extension - short_filename = self.post['POSTID'] + extension - self.getFile(filename, short_filename, self.directory, image_url) + self._download_resource(filename, self.directory, image_url) - @property - def isAlbum(self) -> bool: + def _is_album(self) -> bool: return "album_images" in self.raw_data @staticmethod - def getData(link: str) -> dict: + def _get_data(link: str) -> dict: cookies = {"over18": "1", "postpagebeta": "0"} res = requests.get(link, cookies=cookies) if res.status_code != 200: @@ -128,18 +125,18 @@ class Imgur(BaseDownloader): end_index -= 1 try: data = page_source[start_index:end_index + 2].strip()[:-1] - except Exception: + except IndexError: page_source[end_index + 1] = '}' data = page_source[start_index:end_index + 3].strip()[:-1] return json.loads(data) @staticmethod - def validateExtension(string: str) -> str: + def _validate_extension(extension_suffix: str) -> str: possible_extensions = [".jpg", ".png", ".mp4", ".gif"] for extension in possible_extensions: - if extension in string: + if extension in extension_suffix: return extension else: - raise ExtensionError(f"\"{string}\" is not recognized as a valid extension.") + raise ExtensionError(f"\"{extension_suffix}\" is not recognized as a valid extension.") diff --git a/bulkredditdownloader/downloaders/redgifs.py b/bulkredditdownloader/downloaders/redgifs.py index ff63780..98224aa 100644 --- a/bulkredditdownloader/downloaders/redgifs.py +++ b/bulkredditdownloader/downloaders/redgifs.py @@ -1,35 +1,25 @@ +#!/usr/bin/env python3 + import json -import os import pathlib import urllib.request from bs4 import BeautifulSoup -from bulkredditdownloader.downloaders.base_downloader import BaseDownloader +from bulkredditdownloader.downloaders.gif_delivery_network import GifDeliveryNetwork from bulkredditdownloader.errors import NotADownloadableLinkError -from bulkredditdownloader.utils import GLOBAL -class Redgifs(BaseDownloader): +class Redgifs(GifDeliveryNetwork): def __init__(self, directory: pathlib.Path, post: dict): super().__init__(directory, post) - try: - post['MEDIAURL'] = self.getLink(post['CONTENTURL']) - except IndexError: - raise NotADownloadableLinkError("Could not read the page source") + self.download() - post['EXTENSION'] = self.getExtension(post['MEDIAURL']) - - if not os.path.exists(directory): - os.makedirs(directory) - - filename = GLOBAL.config['filename'].format(**post) + post["EXTENSION"] - short_filename = post['POSTID'] + post['EXTENSION'] - - self.getFile(filename, short_filename, directory, post['MEDIAURL']) + def download(self): + super().download() @staticmethod - def getLink(url: str) -> str: + def _get_link(url: str) -> str: """Extract direct link to the video from page's source and return it """ diff --git a/bulkredditdownloader/downloaders/self_post.py b/bulkredditdownloader/downloaders/self_post.py index 05a7249..2325711 100644 --- a/bulkredditdownloader/downloaders/self_post.py +++ b/bulkredditdownloader/downloaders/self_post.py @@ -1,45 +1,46 @@ -from src.utils import printToFile as print +#!/usr/bin/env python3 + import io -import os +import logging import pathlib from pathlib import Path from bulkredditdownloader.downloaders.base_downloader import BaseDownloader from bulkredditdownloader.errors import FileAlreadyExistsError, TypeInSkip from bulkredditdownloader.utils import GLOBAL -from bulkredditdownloader.utils import printToFile as print -VanillaPrint = print +logger = logging.getLogger(__name__) class SelfPost(BaseDownloader): def __init__(self, directory: pathlib.Path, post: dict): super().__init__(directory, post) + self.download() + + def download(self): if "self" in GLOBAL.arguments.skip: raise TypeInSkip - if not os.path.exists(directory): - os.makedirs(directory) + self.directory.mkdir(exist_ok=True) + filename = GLOBAL.config['filename'].format(**self.post) - filename = GLOBAL.config['filename'].format(**post) - - file_dir = directory / (filename + ".md") - print(file_dir) - print(filename + ".md") + file_dir = self.directory / (filename + ".md") + logger.info(file_dir) + logger.info(filename + ".md") if Path.is_file(file_dir): raise FileAlreadyExistsError try: - self.writeToFile(file_dir, post) + self._write_to_file(file_dir, self.post) except FileNotFoundError: - file_dir = post['POSTID'] + ".md" - file_dir = directory / file_dir + file_dir = self.post['POSTID'] + ".md" + file_dir = self.directory / file_dir - self.writeToFile(file_dir, post) + self._write_to_file(file_dir, self.post) @staticmethod - def writeToFile(directory: pathlib.Path, post: dict): + def _write_to_file(directory: pathlib.Path, post: dict): """Self posts are formatted here""" content = ("## [" + post["TITLE"] @@ -59,5 +60,5 @@ class SelfPost(BaseDownloader): + ")") with io.open(directory, "w", encoding="utf-8") as FILE: - VanillaPrint(content, file=FILE) - print("Downloaded") + print(content, file=FILE) + logger.info("Downloaded") diff --git a/bulkredditdownloader/downloaders/vreddit.py b/bulkredditdownloader/downloaders/vreddit.py index b530d0c..3cce613 100644 --- a/bulkredditdownloader/downloaders/vreddit.py +++ b/bulkredditdownloader/downloaders/vreddit.py @@ -1,51 +1,56 @@ +#!/usr/bin/env python3 + +import logging import os import pathlib import subprocess from bulkredditdownloader.downloaders.base_downloader import BaseDownloader from bulkredditdownloader.utils import GLOBAL -from bulkredditdownloader.utils import printToFile as print + +logger = logging.getLogger(__name__) class VReddit(BaseDownloader): def __init__(self, directory: pathlib.Path, post: dict): super().__init__(directory, post) - extension = ".mp4" - if not os.path.exists(directory): - os.makedirs(directory) + self.download() - filename = GLOBAL.config['filename'].format(**post) + extension - short_filename = post['POSTID'] + extension + def download(self): + extension = ".mp4" + self.directory.mkdir(exist_ok=True) + + filename = GLOBAL.config['filename'].format(**self.post) + extension try: fnull = open(os.devnull, 'w') subprocess.call("ffmpeg", stdout=fnull, stderr=subprocess.STDOUT) except Exception: - self.getFile(filename, short_filename, directory, post['CONTENTURL']) - print("FFMPEG library not found, skipping merging video and audio") + self._download_resource(filename, self.directory, self.post['CONTENTURL']) + logger.info("FFMPEG library not found, skipping merging video and audio") else: - video_name = post['POSTID'] + "_video" - video_url = post['CONTENTURL'] - audio_name = post['POSTID'] + "_audio" + video_name = self.post['POSTID'] + "_video" + video_url = self.post['CONTENTURL'] + audio_name = self.post['POSTID'] + "_audio" audio_url = video_url[:video_url.rfind('/')] + '/DASH_audio.mp4' - print(directory, filename, sep="\n") + logger.info(self.directory, filename, sep="\n") - self.getFile(video_name, video_name, directory, video_url, silent=True) - self.getFile(audio_name, audio_name, directory, audio_url, silent=True) + self._download_resource(video_name, self.directory, video_url, silent=True) + self._download_resource(audio_name, self.directory, audio_url, silent=True) try: - self._mergeAudio(video_name, audio_name, filename, short_filename, directory) + self._merge_audio(video_name, audio_name, filename, self.directory) except KeyboardInterrupt: - os.remove(directory / filename) - os.remove(directory / audio_name) - os.rename(directory / video_name, directory / filename) + (self.directory / filename).unlink() + (self.directory / audio_name).unlink() + (self.directory / video_name).unlink() + (self.directory / filename).unlink() @staticmethod - def _mergeAudio( + def _merge_audio( video: pathlib.Path, audio: pathlib.Path, filename: pathlib.Path, - short_filename, directory: pathlib.Path): input_video = str(directory / video) input_audio = str(directory / audio) @@ -55,5 +60,5 @@ class VReddit(BaseDownloader): input_audio, input_video, str(directory / filename)) subprocess.call(cmd.split(), stdout=fnull, stderr=subprocess.STDOUT) - os.remove(directory / video) - os.remove(directory / audio) + (directory / video).unlink() + (directory / audio).unlink() diff --git a/bulkredditdownloader/downloaders/youtube.py b/bulkredditdownloader/downloaders/youtube.py index bcc0c2f..abde54a 100644 --- a/bulkredditdownloader/downloaders/youtube.py +++ b/bulkredditdownloader/downloaders/youtube.py @@ -1,3 +1,6 @@ +#!/usr/bin/env python3 + +import logging import os import pathlib import sys @@ -7,21 +10,24 @@ import youtube_dl from bulkredditdownloader.downloaders.base_downloader import BaseDownloader from bulkredditdownloader.errors import FileAlreadyExistsError from bulkredditdownloader.utils import GLOBAL -from bulkredditdownloader.utils import printToFile as print + +logger = logging.getLogger(__name__) class Youtube(BaseDownloader): def __init__(self, directory: pathlib.Path, post: dict): super().__init__(directory, post) - if not os.path.exists(directory): - os.makedirs(directory) + self.download() - filename = GLOBAL.config['filename'].format(**post) - print(filename) + def download(self): + self.directory.mkdir(exist_ok=True) - self.download(filename, directory, post['CONTENTURL']) + filename = GLOBAL.config['filename'].format(**self.post) + logger.info(filename) - def download(self, filename: str, directory: pathlib.Path, url: str): + self._download_video(filename, self.directory, self.post['CONTENTURL']) + + def _download_video(self, filename: str, directory: pathlib.Path, url: str): ydl_opts = { "format": "best", "outtmpl": str(directory / (filename + ".%(ext)s")), @@ -35,9 +41,12 @@ class Youtube(BaseDownloader): location = directory / (filename + ".mp4") + with open(location, 'rb') as file: + content = file.read() + if GLOBAL.arguments.no_dupes: try: - file_hash = self.createHash(str(location)) + file_hash = self._create_hash(content) except FileNotFoundError: return None if file_hash in GLOBAL.downloadedPosts(): @@ -48,7 +57,7 @@ class Youtube(BaseDownloader): @staticmethod def _hook(d): if d['status'] == 'finished': - return print("Downloaded") + return logger.info("Downloaded") downloaded_mbs = int(d['downloaded_bytes'] * (10**(-6))) file_size = int(d['total_bytes'] * (10**(-6))) sys.stdout.write("{}Mb/{}Mb\r".format(downloaded_mbs, file_size))