1
0
Fork 0
mirror of synced 2024-06-25 17:40:17 +12:00

Move to inheritance system for downloaders

This commit is contained in:
Serene-Arc 2021-02-07 14:46:20 +10:00 committed by Ali Parlakci
parent 69e21e46a2
commit f573038a21
11 changed files with 253 additions and 280 deletions

View file

@ -1,15 +1,18 @@
#!/usr/bin/env python3
# coding=utf-8
import hashlib
import os
import sys
import urllib.request
from abc import ABC
import logging
import re
from abc import ABC, abstractmethod
from pathlib import Path
import requests
from bulkredditdownloader.errors import DomainInSkip, FailedToDownload, FileAlreadyExistsError, TypeInSkip
from bulkredditdownloader.utils import GLOBAL
from bulkredditdownloader.utils import printToFile as print
logger = logging.getLogger(__name__)
class BaseDownloader(ABC):
@ -17,22 +20,17 @@ class BaseDownloader(ABC):
self.directory = directory
self.post = post
@abstractmethod
def download(self):
raise NotImplementedError
@staticmethod
def createHash(filename: str) -> str:
hash_md5 = hashlib.md5()
with open(filename, "rb") as f:
for chunk in iter(lambda: f.read(4096), b""):
hash_md5.update(chunk)
def _create_hash(content: bytes) -> str:
hash_md5 = hashlib.md5(content)
return hash_md5.hexdigest()
@staticmethod
def getFile(
filename: str,
short_filename: str,
folder_dir: Path,
image_url: str,
indent: int = 0,
silent: bool = False):
def _download_resource(filename: Path, folder_dir: Path, image_url: str, indent: int = 0, silent: bool = False):
formats = {
"videos": [".mp4", ".webm"],
"images": [".jpg", ".jpeg", ".png", ".bmp"],
@ -52,69 +50,55 @@ class BaseDownloader(ABC):
("User-Agent", "Mozilla/5.0 (Windows NT 10.0; Win64; x64) "
"AppleWebKit/537.36 (KHTML, like Gecko) Chrome/67.0.3396.87 "
"Safari/537.36 OPR/54.0.2952.64"),
("Accept", "text/html,application/xhtml+xml,application/xml;"
"q=0.9,image/webp,image/apng,*/*;q=0.8"),
("Accept", "text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8"),
("Accept-Charset", "ISO-8859-1,utf-8;q=0.7,*;q=0.3"),
("Accept-Encoding", "none"),
("Accept-Language", "en-US,en;q=0.8"),
("Connection", "keep-alive")
]
if not os.path.exists(folder_dir):
os.makedirs(folder_dir)
folder_dir.mkdir(exist_ok=True)
opener = urllib.request.build_opener()
if "imgur" not in image_url:
opener.addheaders = headers
urllib.request.install_opener(opener)
addheaders = headers
else:
addheaders = None
if not silent:
print(" " * indent + str(folder_dir), " " * indent + str(filename), sep="\n")
def dlProgress(count: int, block_size: int, total_size: int):
"""Function for writing download progress to console """
download_mbs = int(count * block_size * (10 ** (-6)))
file_size = int(total_size * (10 ** (-6)))
sys.stdout.write("{}Mb/{}Mb\r".format(download_mbs, file_size))
sys.stdout.flush()
logger.info(" " * indent + str(folder_dir), " " * indent + str(filename), sep="\n")
# Loop to attempt download 3 times
for i in range(3):
file_dir = Path(folder_dir) / filename
temp_dir = Path(folder_dir) / (filename + ".tmp")
file_path = Path(folder_dir) / filename
if not (os.path.isfile(file_dir)):
if file_path.is_file():
raise FileAlreadyExistsError
else:
try:
urllib.request.urlretrieve(image_url, temp_dir, reporthook=dlProgress)
file_hash = BaseDownloader.createHash(temp_dir)
if GLOBAL.arguments.no_dupes:
if file_hash in GLOBAL.downloadedPosts():
os.remove(temp_dir)
raise FileAlreadyExistsError
GLOBAL.downloadedPosts.add(file_hash)
os.rename(temp_dir, file_dir)
if not silent:
print(" " * indent + "Downloaded" + " " * 10)
return None
download_content = requests.get(image_url, headers=addheaders).content
except ConnectionResetError:
raise FailedToDownload
except FileNotFoundError:
filename = short_filename
else:
raise FileAlreadyExistsError
file_hash = BaseDownloader._create_hash(download_content)
if GLOBAL.arguments.no_dupes:
if file_hash in GLOBAL.downloadedPosts():
raise FileAlreadyExistsError
GLOBAL.downloadedPosts.add(file_hash)
with open(file_path, 'wb') as file:
file.write(download_content)
if not silent:
logger.info(" " * indent + "Downloaded" + " " * 10)
return
raise FailedToDownload
@staticmethod
def getExtension(link: str):
"""Extract file extension from image link. If didn't find any, return '.jpg' """
image_types = ['jpg', 'png', 'mp4', 'webm', 'gif']
parsed = link.split('.')
for fileType in image_types:
if fileType in parsed:
return "." + parsed[-1]
def _get_extension(url: str) -> str:
pattern = re.compile(r'(\.(jpg|jpeg|png|mp4|webm|gif))')
if len(results := re.search(pattern, url).groups()) > 1:
return results[1]
if "v.redd.it" not in url:
return '.jpg'
else:
if "v.redd.it" not in link:
return '.jpg'
else:
return '.mp4'
return '.mp4'

View file

@ -1,4 +1,5 @@
import os
#!/usr/bin/env python3
import pathlib
from bulkredditdownloader.downloaders.base_downloader import BaseDownloader
@ -8,11 +9,11 @@ from bulkredditdownloader.utils import GLOBAL
class Direct(BaseDownloader):
def __init__(self, directory: pathlib.Path, post: dict):
super().__init__(directory, post)
post['EXTENSION'] = self.getExtension(post['CONTENTURL'])
if not os.path.exists(directory):
os.makedirs(directory)
self.download()
filename = GLOBAL.config['filename'].format(**post) + post["EXTENSION"]
short_filename = post['POSTID'] + post['EXTENSION']
def download(self):
self.post['EXTENSION'] = self._get_extension(self.post['CONTENTURL'])
self.directory.mkdir(exist_ok=True)
self.getFile(filename, short_filename, directory, post['CONTENTURL'])
filename = GLOBAL.config['filename'].format(**self.post) + self.post["EXTENSION"]
self._download_resource(pathlib.Path(filename), self.directory, self.post['CONTENTURL'])

View file

@ -1,5 +1,8 @@
import os
#!/usr/bin/env python3
import logging
import pathlib
import re
import urllib.error
import urllib.request
from html.parser import HTMLParser
@ -7,70 +10,64 @@ from html.parser import HTMLParser
from bulkredditdownloader.downloaders.base_downloader import BaseDownloader
from bulkredditdownloader.errors import AlbumNotDownloadedCompletely, FileAlreadyExistsError, NotADownloadableLinkError
from bulkredditdownloader.utils import GLOBAL
from bulkredditdownloader.utils import printToFile as print
logger = logging.getLogger(__name__)
class Erome(BaseDownloader):
def __init__(self, directory: pathlib.Path, post: dict):
super().__init__(directory, post)
self.download()
def download(self):
try:
images = self.getLinks(post['CONTENTURL'])
images = self._get_links(self.post['CONTENTURL'])
except urllib.error.HTTPError:
raise NotADownloadableLinkError("Not a downloadable link")
images_length = len(images)
how_many_downloaded = images_length
how_many_downloaded = len(images)
duplicates = 0
if images_length == 1:
extension = self.getExtension(images[0])
"""Filenames are declared here"""
filename = GLOBAL.config['filename'].format(**post) + post["EXTENSION"]
short_filename = post['POSTID'] + extension
filename = GLOBAL.config['filename'].format(**self.post) + self.post["EXTENSION"]
image_url = images[0]
if 'https://' not in image_url or 'http://' not in image_url:
image_url = "https://" + image_url
image = images[0]
if not re.match(r'https?://.*', image):
image = "https://" + image
self.getFile(filename, short_filename, directory, image_url)
self._download_resource(filename, self.directory, image)
else:
filename = GLOBAL.config['filename'].format(**post)
print(filename)
filename = GLOBAL.config['filename'].format(**self.post)
logger.info(filename)
folder_dir = directory / filename
folder_dir = self.directory / filename
try:
if not os.path.exists(folder_dir):
os.makedirs(folder_dir)
except FileNotFoundError:
folder_dir = directory / post['POSTID']
os.makedirs(folder_dir)
for i in range(images_length):
extension = self.getExtension(images[i])
folder_dir.mkdir(exist_ok=True)
for i, image in enumerate(images):
extension = self._get_extension(image)
filename = str(i + 1) + extension
image_url = images[i]
if 'https://' not in image_url and 'http://' not in image_url:
image_url = "https://" + image_url
print(" ({}/{})".format(i + 1, images_length))
print(" {}".format(filename))
if not re.match(r'https?://.*', image):
image = "https://" + image
logger.info(" ({}/{})".format(i + 1, images_length))
logger.info(" {}".format(filename))
try:
self.getFile(filename, filename, folder_dir, image_url, indent=2)
print()
self._download_resource(pathlib.Path(filename), folder_dir, image, indent=2)
except FileAlreadyExistsError:
print(" The file already exists" + " " * 10, end="\n\n")
logger.info(" The file already exists" + " " * 10, end="\n\n")
duplicates += 1
how_many_downloaded -= 1
except Exception as exception:
# raise exception
print("\n Could not get the file")
print(
logger.error("\n Could not get the file")
logger.error(
" "
+ "{class_name}: {info}".format(class_name=exception.__class__.__name__, info=str(exception))
+ "\n"
@ -82,10 +79,12 @@ class Erome(BaseDownloader):
elif how_many_downloaded + duplicates < images_length:
raise AlbumNotDownloadedCompletely("Album Not Downloaded Completely")
def getLinks(self, url: str) -> list[str]:
@staticmethod
def _get_links(url: str) -> list[str]:
content = []
line_number = None
# TODO: move to bs4 and requests
class EromeParser(HTMLParser):
tag = None

View file

@ -1,7 +1,9 @@
#!/usr/bin/env python3
import json
import os
import pathlib
import urllib
import logging
import urllib.parse
import requests
@ -9,15 +11,18 @@ from bulkredditdownloader.downloaders.base_downloader import BaseDownloader
from bulkredditdownloader.errors import (AlbumNotDownloadedCompletely, FileAlreadyExistsError, ImageNotFound,
NotADownloadableLinkError, TypeInSkip)
from bulkredditdownloader.utils import GLOBAL
from bulkredditdownloader.utils import printToFile as print
logger = logging.getLogger(__name__)
class Gallery(BaseDownloader):
def __init__(self, directory: pathlib.Path, post):
super().__init__(directory, post)
link = post['CONTENTURL']
self.raw_data = self.getData(link)
link = self.post['CONTENTURL']
self.raw_data = self._get_data(link)
self.download()
def download(self):
images = {}
count = 0
for model in self.raw_data['posts']['models']:
@ -27,15 +32,15 @@ class Gallery(BaseDownloader):
images[count] = {'id': item['mediaId'], 'url': self.raw_data['posts']
['models'][model]['media']['mediaMetadata'][item['mediaId']]['s']['u']}
count += 1
except Exception:
except KeyError:
continue
except Exception:
except KeyError:
continue
self.downloadAlbum(images, count)
self._download_album(images, count)
@staticmethod
def getData(link: str) -> dict:
def _get_data(link: str) -> dict:
headers = {
"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/67.0.3396.87 Safari/537.36 OPR/54.0.2952.64",
"Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8",
@ -58,50 +63,42 @@ class Gallery(BaseDownloader):
data = json.loads(page_source[start_index - 1:end_index + 1].strip()[:-1])
return data
def downloadAlbum(self, images: dict, count: int):
def _download_album(self, images: dict, count: int):
folder_name = GLOBAL.config['filename'].format(**self.post)
folder_dir = self.directory / folder_name
how_many_downloaded = 0
duplicates = 0
try:
if not os.path.exists(folder_dir):
os.makedirs(folder_dir)
except FileNotFoundError:
folder_dir = self.directory / self.post['POSTID']
os.makedirs(folder_dir)
folder_dir.mkdir(exist_ok=True)
logger.info(folder_name)
print(folder_name)
for i, image in enumerate(images):
path = urllib.parse.urlparse(image['url']).path
extension = pathlib.Path(path).suffix
for i in range(count):
path = urllib.parse.urlparse(images[i]['url']).path
extension = os.path.splitext(path)[1]
filename = pathlib.Path("_".join([str(i + 1), image['id']]) + extension)
filename = "_".join([str(i + 1), images[i]['id']]) + extension
short_filename = str(i + 1) + "_" + images[i]['id']
print("\n ({}/{})".format(i + 1, count))
logger.info("\n ({}/{})".format(i + 1, count))
try:
self.getFile(filename, short_filename, folder_dir, images[i]['url'], indent=2)
self._download_resource(filename, folder_dir, image['url'], indent=2)
how_many_downloaded += 1
print()
except FileAlreadyExistsError:
print(" The file already exists" + " " * 10, end="\n\n")
logger.info(" The file already exists" + " " * 10, end="\n\n")
duplicates += 1
except TypeInSkip:
print(" Skipping...")
logger.info(" Skipping...")
how_many_downloaded += 1
except Exception as exception:
print("\n Could not get the file")
print(" " + "{class_name}: {info}\nSee CONSOLE_LOG.txt for more information".format(
logger.info("\n Could not get the file")
logger.info(" " + "{class_name}: {info}\nSee CONSOLE_LOG.txt for more information".format(
class_name=exception.__class__.__name__, info=str(exception)) + "\n"
)
print(GLOBAL.log_stream.getvalue(), no_print=True)
logger.info(GLOBAL.log_stream.getvalue(), no_print=True)
if duplicates == count:
raise FileAlreadyExistsError

View file

@ -1,43 +1,32 @@
#!/usr/bin/env python3
import json
import os
import pathlib
import re
import urllib.request
from bs4 import BeautifulSoup
from bulkredditdownloader.downloaders.base_downloader import BaseDownloader
from bulkredditdownloader.downloaders.gif_delivery_network import GifDeliveryNetwork
from bulkredditdownloader.errors import NotADownloadableLinkError
from bulkredditdownloader.utils import GLOBAL
import pathlib
class Gfycat(BaseDownloader):
class Gfycat(GifDeliveryNetwork):
def __init__(self, directory: pathlib.Path, post: dict):
super().__init__(directory, post)
try:
post['MEDIAURL'] = self.getLink(post['CONTENTURL'])
except IndexError:
raise NotADownloadableLinkError("Could not read the page source")
self.download()
post['EXTENSION'] = self.getExtension(post['MEDIAURL'])
if not os.path.exists(directory):
os.makedirs(directory)
filename = GLOBAL.config['filename'].format(**post) + post["EXTENSION"]
short_filename = post['POSTID'] + post['EXTENSION']
self.getFile(filename, short_filename, directory, post['MEDIAURL'])
def download(self):
super().download()
@staticmethod
def getLink(url: str) -> str:
def _get_link(url: str) -> str:
"""Extract direct link to the video from page's source
and return it
"""
if '.webm' in url or '.mp4' in url or '.gif' in url:
if re.match(r'\.(webm|mp4|gif)$', url):
return url
if url[-1:] == '/':
if url.endswith('/'):
url = url[:-1]
url = "https://gfycat.com/" + url.split('/')[-1]
@ -49,6 +38,6 @@ class Gfycat(BaseDownloader):
content = soup.find("script", attrs=attributes)
if content is None:
return GifDeliveryNetwork.getLink(url)
return super()._get_link(url)
return json.loads(content.contents[0])["video"]["contentUrl"]

View file

@ -1,4 +1,5 @@
import os
#!/usr/bin/env python3
import pathlib
import urllib.request
@ -12,23 +13,23 @@ from bulkredditdownloader.utils import GLOBAL
class GifDeliveryNetwork(BaseDownloader):
def __init__(self, directory: pathlib.Path, post: dict):
super().__init__(directory, post)
self.download()
def download(self):
try:
post['MEDIAURL'] = self.getLink(post['CONTENTURL'])
self.post['MEDIAURL'] = self._get_link(self.post['CONTENTURL'])
except IndexError:
raise NotADownloadableLinkError("Could not read the page source")
post['EXTENSION'] = self.getExtension(post['MEDIAURL'])
self.post['EXTENSION'] = self._get_extension(self.post['MEDIAURL'])
self.directory.mkdir(exist_ok=True)
if not os.path.exists(directory):
os.makedirs(directory)
filename = GLOBAL.config['filename'].format(**self.post) + self.post["EXTENSION"]
filename = GLOBAL.config['filename'].format(**post) + post["EXTENSION"]
short_filename = post['POSTID'] + post['EXTENSION']
self.getFile(filename, short_filename, directory, post['MEDIAURL'])
self._download_resource(filename, self.directory, self.post['MEDIAURL'])
@staticmethod
def getLink(url: str) -> str:
def _get_link(url: str) -> str:
"""Extract direct link to the video from page's source
and return it
"""

View file

@ -1,6 +1,8 @@
#!/usr/bin/env python3
import json
import os
import pathlib
import logging
import requests
@ -9,7 +11,8 @@ from bulkredditdownloader.downloaders.direct import Direct
from bulkredditdownloader.errors import (AlbumNotDownloadedCompletely, ExtensionError, FileAlreadyExistsError,
ImageNotFound, NotADownloadableLinkError, TypeInSkip)
from bulkredditdownloader.utils import GLOBAL, nameCorrector
from bulkredditdownloader.utils import printToFile as print
logger = logging.getLogger(__name__)
class Imgur(BaseDownloader):
@ -18,24 +21,28 @@ class Imgur(BaseDownloader):
def __init__(self, directory: pathlib.Path, post: dict):
super().__init__(directory, post)
link = post['CONTENTURL']
self.raw_data = {}
self.download()
def download(self):
link = self.post['CONTENTURL']
if link.endswith(".gifv"):
link = link.replace(".gifv", ".mp4")
Direct(directory, {**post, 'CONTENTURL': link})
Direct(self.directory, {**self.post, 'CONTENTURL': link})
return
self.raw_data = self.getData(link)
self.raw_data = self._get_data(link)
if self.isAlbum:
if self._is_album:
if self.raw_data["album_images"]["count"] != 1:
self.downloadAlbum(self.raw_data["album_images"])
self._download_album(self.raw_data["album_images"])
else:
self.download(self.raw_data["album_images"]["images"][0])
self._download_image(self.raw_data["album_images"]["images"][0])
else:
self.download(self.raw_data)
self._download_image(self.raw_data)
def downloadAlbum(self, images: dict):
def _download_album(self, images: dict):
folder_name = GLOBAL.config['filename'].format(**self.post)
folder_dir = self.directory / folder_name
@ -43,70 +50,60 @@ class Imgur(BaseDownloader):
how_many_downloaded = 0
duplicates = 0
try:
if not os.path.exists(folder_dir):
os.makedirs(folder_dir)
except FileNotFoundError:
folder_dir = self.directory / self.post['POSTID']
os.makedirs(folder_dir)
print(folder_name)
folder_dir.mkdir(exist_ok=True)
logger.info(folder_name)
for i in range(images_length):
extension = self.validateExtension(images["images"][i]["ext"])
extension = self._validate_extension(images["images"][i]["ext"])
image_url = self.imgur_image_domain + images["images"][i]["hash"] + extension
filename = "_".join([str(i + 1),
nameCorrector(images["images"][i]['title']),
images["images"][i]['hash']]) + extension
short_filename = str(i + 1) + "_" + images["images"][i]['hash']
filename = pathlib.Path("_".join([str(i + 1),
nameCorrector(images["images"][i]['title']),
images["images"][i]['hash']]) + extension)
print("\n ({}/{})".format(i + 1, images_length))
logger.info("\n ({}/{})".format(i + 1, images_length))
try:
self.getFile(filename, short_filename, folder_dir, image_url, indent=2)
self._download_resource(filename, folder_dir, image_url, indent=2)
how_many_downloaded += 1
print()
except FileAlreadyExistsError:
print(" The file already exists" + " " * 10, end="\n\n")
logger.info(" The file already exists" + " " * 10, end="\n\n")
duplicates += 1
except TypeInSkip:
print(" Skipping...")
logger.info(" Skipping...")
how_many_downloaded += 1
except Exception as exception:
print("\n Could not get the file")
print(
" " +
"{class_name}: {info}\nSee CONSOLE_LOG.txt for more information".format(
logger.info("\n Could not get the file")
logger.info(
" "
+ "{class_name}: {info}\nSee CONSOLE_LOG.txt for more information".format(
class_name=exception.__class__.__name__,
info=str(exception)
)
+ "\n"
)
print(GLOBAL.log_stream.getvalue(), no_print=True)
logger.info(GLOBAL.log_stream.getvalue(), no_print=True)
if duplicates == images_length:
raise FileAlreadyExistsError
elif how_many_downloaded + duplicates < images_length:
raise AlbumNotDownloadedCompletely("Album Not Downloaded Completely")
def download(self, image: dict):
extension = self.validateExtension(image["ext"])
def _download_image(self, image: dict):
extension = self._validate_extension(image["ext"])
image_url = self.imgur_image_domain + image["hash"] + extension
filename = GLOBAL.config['filename'].format(**self.post) + extension
short_filename = self.post['POSTID'] + extension
self.getFile(filename, short_filename, self.directory, image_url)
self._download_resource(filename, self.directory, image_url)
@property
def isAlbum(self) -> bool:
def _is_album(self) -> bool:
return "album_images" in self.raw_data
@staticmethod
def getData(link: str) -> dict:
def _get_data(link: str) -> dict:
cookies = {"over18": "1", "postpagebeta": "0"}
res = requests.get(link, cookies=cookies)
if res.status_code != 200:
@ -128,18 +125,18 @@ class Imgur(BaseDownloader):
end_index -= 1
try:
data = page_source[start_index:end_index + 2].strip()[:-1]
except Exception:
except IndexError:
page_source[end_index + 1] = '}'
data = page_source[start_index:end_index + 3].strip()[:-1]
return json.loads(data)
@staticmethod
def validateExtension(string: str) -> str:
def _validate_extension(extension_suffix: str) -> str:
possible_extensions = [".jpg", ".png", ".mp4", ".gif"]
for extension in possible_extensions:
if extension in string:
if extension in extension_suffix:
return extension
else:
raise ExtensionError(f"\"{string}\" is not recognized as a valid extension.")
raise ExtensionError(f"\"{extension_suffix}\" is not recognized as a valid extension.")

View file

@ -1,35 +1,25 @@
#!/usr/bin/env python3
import json
import os
import pathlib
import urllib.request
from bs4 import BeautifulSoup
from bulkredditdownloader.downloaders.base_downloader import BaseDownloader
from bulkredditdownloader.downloaders.gif_delivery_network import GifDeliveryNetwork
from bulkredditdownloader.errors import NotADownloadableLinkError
from bulkredditdownloader.utils import GLOBAL
class Redgifs(BaseDownloader):
class Redgifs(GifDeliveryNetwork):
def __init__(self, directory: pathlib.Path, post: dict):
super().__init__(directory, post)
try:
post['MEDIAURL'] = self.getLink(post['CONTENTURL'])
except IndexError:
raise NotADownloadableLinkError("Could not read the page source")
self.download()
post['EXTENSION'] = self.getExtension(post['MEDIAURL'])
if not os.path.exists(directory):
os.makedirs(directory)
filename = GLOBAL.config['filename'].format(**post) + post["EXTENSION"]
short_filename = post['POSTID'] + post['EXTENSION']
self.getFile(filename, short_filename, directory, post['MEDIAURL'])
def download(self):
super().download()
@staticmethod
def getLink(url: str) -> str:
def _get_link(url: str) -> str:
"""Extract direct link to the video from page's source
and return it
"""

View file

@ -1,45 +1,46 @@
from src.utils import printToFile as print
#!/usr/bin/env python3
import io
import os
import logging
import pathlib
from pathlib import Path
from bulkredditdownloader.downloaders.base_downloader import BaseDownloader
from bulkredditdownloader.errors import FileAlreadyExistsError, TypeInSkip
from bulkredditdownloader.utils import GLOBAL
from bulkredditdownloader.utils import printToFile as print
VanillaPrint = print
logger = logging.getLogger(__name__)
class SelfPost(BaseDownloader):
def __init__(self, directory: pathlib.Path, post: dict):
super().__init__(directory, post)
self.download()
def download(self):
if "self" in GLOBAL.arguments.skip:
raise TypeInSkip
if not os.path.exists(directory):
os.makedirs(directory)
self.directory.mkdir(exist_ok=True)
filename = GLOBAL.config['filename'].format(**self.post)
filename = GLOBAL.config['filename'].format(**post)
file_dir = directory / (filename + ".md")
print(file_dir)
print(filename + ".md")
file_dir = self.directory / (filename + ".md")
logger.info(file_dir)
logger.info(filename + ".md")
if Path.is_file(file_dir):
raise FileAlreadyExistsError
try:
self.writeToFile(file_dir, post)
self._write_to_file(file_dir, self.post)
except FileNotFoundError:
file_dir = post['POSTID'] + ".md"
file_dir = directory / file_dir
file_dir = self.post['POSTID'] + ".md"
file_dir = self.directory / file_dir
self.writeToFile(file_dir, post)
self._write_to_file(file_dir, self.post)
@staticmethod
def writeToFile(directory: pathlib.Path, post: dict):
def _write_to_file(directory: pathlib.Path, post: dict):
"""Self posts are formatted here"""
content = ("## ["
+ post["TITLE"]
@ -59,5 +60,5 @@ class SelfPost(BaseDownloader):
+ ")")
with io.open(directory, "w", encoding="utf-8") as FILE:
VanillaPrint(content, file=FILE)
print("Downloaded")
print(content, file=FILE)
logger.info("Downloaded")

View file

@ -1,51 +1,56 @@
#!/usr/bin/env python3
import logging
import os
import pathlib
import subprocess
from bulkredditdownloader.downloaders.base_downloader import BaseDownloader
from bulkredditdownloader.utils import GLOBAL
from bulkredditdownloader.utils import printToFile as print
logger = logging.getLogger(__name__)
class VReddit(BaseDownloader):
def __init__(self, directory: pathlib.Path, post: dict):
super().__init__(directory, post)
extension = ".mp4"
if not os.path.exists(directory):
os.makedirs(directory)
self.download()
filename = GLOBAL.config['filename'].format(**post) + extension
short_filename = post['POSTID'] + extension
def download(self):
extension = ".mp4"
self.directory.mkdir(exist_ok=True)
filename = GLOBAL.config['filename'].format(**self.post) + extension
try:
fnull = open(os.devnull, 'w')
subprocess.call("ffmpeg", stdout=fnull, stderr=subprocess.STDOUT)
except Exception:
self.getFile(filename, short_filename, directory, post['CONTENTURL'])
print("FFMPEG library not found, skipping merging video and audio")
self._download_resource(filename, self.directory, self.post['CONTENTURL'])
logger.info("FFMPEG library not found, skipping merging video and audio")
else:
video_name = post['POSTID'] + "_video"
video_url = post['CONTENTURL']
audio_name = post['POSTID'] + "_audio"
video_name = self.post['POSTID'] + "_video"
video_url = self.post['CONTENTURL']
audio_name = self.post['POSTID'] + "_audio"
audio_url = video_url[:video_url.rfind('/')] + '/DASH_audio.mp4'
print(directory, filename, sep="\n")
logger.info(self.directory, filename, sep="\n")
self.getFile(video_name, video_name, directory, video_url, silent=True)
self.getFile(audio_name, audio_name, directory, audio_url, silent=True)
self._download_resource(video_name, self.directory, video_url, silent=True)
self._download_resource(audio_name, self.directory, audio_url, silent=True)
try:
self._mergeAudio(video_name, audio_name, filename, short_filename, directory)
self._merge_audio(video_name, audio_name, filename, self.directory)
except KeyboardInterrupt:
os.remove(directory / filename)
os.remove(directory / audio_name)
os.rename(directory / video_name, directory / filename)
(self.directory / filename).unlink()
(self.directory / audio_name).unlink()
(self.directory / video_name).unlink()
(self.directory / filename).unlink()
@staticmethod
def _mergeAudio(
def _merge_audio(
video: pathlib.Path,
audio: pathlib.Path,
filename: pathlib.Path,
short_filename,
directory: pathlib.Path):
input_video = str(directory / video)
input_audio = str(directory / audio)
@ -55,5 +60,5 @@ class VReddit(BaseDownloader):
input_audio, input_video, str(directory / filename))
subprocess.call(cmd.split(), stdout=fnull, stderr=subprocess.STDOUT)
os.remove(directory / video)
os.remove(directory / audio)
(directory / video).unlink()
(directory / audio).unlink()

View file

@ -1,3 +1,6 @@
#!/usr/bin/env python3
import logging
import os
import pathlib
import sys
@ -7,21 +10,24 @@ import youtube_dl
from bulkredditdownloader.downloaders.base_downloader import BaseDownloader
from bulkredditdownloader.errors import FileAlreadyExistsError
from bulkredditdownloader.utils import GLOBAL
from bulkredditdownloader.utils import printToFile as print
logger = logging.getLogger(__name__)
class Youtube(BaseDownloader):
def __init__(self, directory: pathlib.Path, post: dict):
super().__init__(directory, post)
if not os.path.exists(directory):
os.makedirs(directory)
self.download()
filename = GLOBAL.config['filename'].format(**post)
print(filename)
def download(self):
self.directory.mkdir(exist_ok=True)
self.download(filename, directory, post['CONTENTURL'])
filename = GLOBAL.config['filename'].format(**self.post)
logger.info(filename)
def download(self, filename: str, directory: pathlib.Path, url: str):
self._download_video(filename, self.directory, self.post['CONTENTURL'])
def _download_video(self, filename: str, directory: pathlib.Path, url: str):
ydl_opts = {
"format": "best",
"outtmpl": str(directory / (filename + ".%(ext)s")),
@ -35,9 +41,12 @@ class Youtube(BaseDownloader):
location = directory / (filename + ".mp4")
with open(location, 'rb') as file:
content = file.read()
if GLOBAL.arguments.no_dupes:
try:
file_hash = self.createHash(str(location))
file_hash = self._create_hash(content)
except FileNotFoundError:
return None
if file_hash in GLOBAL.downloadedPosts():
@ -48,7 +57,7 @@ class Youtube(BaseDownloader):
@staticmethod
def _hook(d):
if d['status'] == 'finished':
return print("Downloaded")
return logger.info("Downloaded")
downloaded_mbs = int(d['downloaded_bytes'] * (10**(-6)))
file_size = int(d['total_bytes'] * (10**(-6)))
sys.stdout.write("{}Mb/{}Mb\r".format(downloaded_mbs, file_size))