Move to inheritance system for downloaders
This commit is contained in:
parent
69e21e46a2
commit
f573038a21
11 changed files with 253 additions and 280 deletions
|
@ -1,15 +1,18 @@
|
||||||
#!/usr/bin/env python3
|
#!/usr/bin/env python3
|
||||||
# coding=utf-8
|
# coding=utf-8
|
||||||
|
|
||||||
import hashlib
|
import hashlib
|
||||||
import os
|
import logging
|
||||||
import sys
|
import re
|
||||||
import urllib.request
|
from abc import ABC, abstractmethod
|
||||||
from abc import ABC
|
|
||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
|
|
||||||
|
import requests
|
||||||
|
|
||||||
from bulkredditdownloader.errors import DomainInSkip, FailedToDownload, FileAlreadyExistsError, TypeInSkip
|
from bulkredditdownloader.errors import DomainInSkip, FailedToDownload, FileAlreadyExistsError, TypeInSkip
|
||||||
from bulkredditdownloader.utils import GLOBAL
|
from bulkredditdownloader.utils import GLOBAL
|
||||||
from bulkredditdownloader.utils import printToFile as print
|
|
||||||
|
logger = logging.getLogger(__name__)
|
||||||
|
|
||||||
|
|
||||||
class BaseDownloader(ABC):
|
class BaseDownloader(ABC):
|
||||||
|
@ -17,22 +20,17 @@ class BaseDownloader(ABC):
|
||||||
self.directory = directory
|
self.directory = directory
|
||||||
self.post = post
|
self.post = post
|
||||||
|
|
||||||
|
@abstractmethod
|
||||||
|
def download(self):
|
||||||
|
raise NotImplementedError
|
||||||
|
|
||||||
@staticmethod
|
@staticmethod
|
||||||
def createHash(filename: str) -> str:
|
def _create_hash(content: bytes) -> str:
|
||||||
hash_md5 = hashlib.md5()
|
hash_md5 = hashlib.md5(content)
|
||||||
with open(filename, "rb") as f:
|
|
||||||
for chunk in iter(lambda: f.read(4096), b""):
|
|
||||||
hash_md5.update(chunk)
|
|
||||||
return hash_md5.hexdigest()
|
return hash_md5.hexdigest()
|
||||||
|
|
||||||
@staticmethod
|
@staticmethod
|
||||||
def getFile(
|
def _download_resource(filename: Path, folder_dir: Path, image_url: str, indent: int = 0, silent: bool = False):
|
||||||
filename: str,
|
|
||||||
short_filename: str,
|
|
||||||
folder_dir: Path,
|
|
||||||
image_url: str,
|
|
||||||
indent: int = 0,
|
|
||||||
silent: bool = False):
|
|
||||||
formats = {
|
formats = {
|
||||||
"videos": [".mp4", ".webm"],
|
"videos": [".mp4", ".webm"],
|
||||||
"images": [".jpg", ".jpeg", ".png", ".bmp"],
|
"images": [".jpg", ".jpeg", ".png", ".bmp"],
|
||||||
|
@ -52,69 +50,55 @@ class BaseDownloader(ABC):
|
||||||
("User-Agent", "Mozilla/5.0 (Windows NT 10.0; Win64; x64) "
|
("User-Agent", "Mozilla/5.0 (Windows NT 10.0; Win64; x64) "
|
||||||
"AppleWebKit/537.36 (KHTML, like Gecko) Chrome/67.0.3396.87 "
|
"AppleWebKit/537.36 (KHTML, like Gecko) Chrome/67.0.3396.87 "
|
||||||
"Safari/537.36 OPR/54.0.2952.64"),
|
"Safari/537.36 OPR/54.0.2952.64"),
|
||||||
("Accept", "text/html,application/xhtml+xml,application/xml;"
|
("Accept", "text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8"),
|
||||||
"q=0.9,image/webp,image/apng,*/*;q=0.8"),
|
|
||||||
("Accept-Charset", "ISO-8859-1,utf-8;q=0.7,*;q=0.3"),
|
("Accept-Charset", "ISO-8859-1,utf-8;q=0.7,*;q=0.3"),
|
||||||
("Accept-Encoding", "none"),
|
("Accept-Encoding", "none"),
|
||||||
("Accept-Language", "en-US,en;q=0.8"),
|
("Accept-Language", "en-US,en;q=0.8"),
|
||||||
("Connection", "keep-alive")
|
("Connection", "keep-alive")
|
||||||
]
|
]
|
||||||
|
|
||||||
if not os.path.exists(folder_dir):
|
folder_dir.mkdir(exist_ok=True)
|
||||||
os.makedirs(folder_dir)
|
|
||||||
|
|
||||||
opener = urllib.request.build_opener()
|
|
||||||
if "imgur" not in image_url:
|
if "imgur" not in image_url:
|
||||||
opener.addheaders = headers
|
addheaders = headers
|
||||||
urllib.request.install_opener(opener)
|
else:
|
||||||
|
addheaders = None
|
||||||
|
|
||||||
if not silent:
|
if not silent:
|
||||||
print(" " * indent + str(folder_dir), " " * indent + str(filename), sep="\n")
|
logger.info(" " * indent + str(folder_dir), " " * indent + str(filename), sep="\n")
|
||||||
|
|
||||||
def dlProgress(count: int, block_size: int, total_size: int):
|
|
||||||
"""Function for writing download progress to console """
|
|
||||||
download_mbs = int(count * block_size * (10 ** (-6)))
|
|
||||||
file_size = int(total_size * (10 ** (-6)))
|
|
||||||
sys.stdout.write("{}Mb/{}Mb\r".format(download_mbs, file_size))
|
|
||||||
sys.stdout.flush()
|
|
||||||
|
|
||||||
|
# Loop to attempt download 3 times
|
||||||
for i in range(3):
|
for i in range(3):
|
||||||
file_dir = Path(folder_dir) / filename
|
file_path = Path(folder_dir) / filename
|
||||||
temp_dir = Path(folder_dir) / (filename + ".tmp")
|
|
||||||
|
|
||||||
if not (os.path.isfile(file_dir)):
|
if file_path.is_file():
|
||||||
|
raise FileAlreadyExistsError
|
||||||
|
else:
|
||||||
try:
|
try:
|
||||||
urllib.request.urlretrieve(image_url, temp_dir, reporthook=dlProgress)
|
download_content = requests.get(image_url, headers=addheaders).content
|
||||||
|
except ConnectionResetError:
|
||||||
|
raise FailedToDownload
|
||||||
|
|
||||||
file_hash = BaseDownloader.createHash(temp_dir)
|
file_hash = BaseDownloader._create_hash(download_content)
|
||||||
if GLOBAL.arguments.no_dupes:
|
if GLOBAL.arguments.no_dupes:
|
||||||
if file_hash in GLOBAL.downloadedPosts():
|
if file_hash in GLOBAL.downloadedPosts():
|
||||||
os.remove(temp_dir)
|
|
||||||
raise FileAlreadyExistsError
|
raise FileAlreadyExistsError
|
||||||
GLOBAL.downloadedPosts.add(file_hash)
|
GLOBAL.downloadedPosts.add(file_hash)
|
||||||
|
|
||||||
os.rename(temp_dir, file_dir)
|
with open(file_path, 'wb') as file:
|
||||||
|
file.write(download_content)
|
||||||
if not silent:
|
if not silent:
|
||||||
print(" " * indent + "Downloaded" + " " * 10)
|
logger.info(" " * indent + "Downloaded" + " " * 10)
|
||||||
return None
|
return
|
||||||
except ConnectionResetError:
|
|
||||||
raise FailedToDownload
|
|
||||||
except FileNotFoundError:
|
|
||||||
filename = short_filename
|
|
||||||
else:
|
|
||||||
raise FileAlreadyExistsError
|
|
||||||
raise FailedToDownload
|
raise FailedToDownload
|
||||||
|
|
||||||
@staticmethod
|
@staticmethod
|
||||||
def getExtension(link: str):
|
def _get_extension(url: str) -> str:
|
||||||
"""Extract file extension from image link. If didn't find any, return '.jpg' """
|
pattern = re.compile(r'(\.(jpg|jpeg|png|mp4|webm|gif))')
|
||||||
image_types = ['jpg', 'png', 'mp4', 'webm', 'gif']
|
if len(results := re.search(pattern, url).groups()) > 1:
|
||||||
parsed = link.split('.')
|
return results[1]
|
||||||
for fileType in image_types:
|
if "v.redd.it" not in url:
|
||||||
if fileType in parsed:
|
|
||||||
return "." + parsed[-1]
|
|
||||||
else:
|
|
||||||
if "v.redd.it" not in link:
|
|
||||||
return '.jpg'
|
return '.jpg'
|
||||||
else:
|
else:
|
||||||
return '.mp4'
|
return '.mp4'
|
||||||
|
|
|
@ -1,4 +1,5 @@
|
||||||
import os
|
#!/usr/bin/env python3
|
||||||
|
|
||||||
import pathlib
|
import pathlib
|
||||||
|
|
||||||
from bulkredditdownloader.downloaders.base_downloader import BaseDownloader
|
from bulkredditdownloader.downloaders.base_downloader import BaseDownloader
|
||||||
|
@ -8,11 +9,11 @@ from bulkredditdownloader.utils import GLOBAL
|
||||||
class Direct(BaseDownloader):
|
class Direct(BaseDownloader):
|
||||||
def __init__(self, directory: pathlib.Path, post: dict):
|
def __init__(self, directory: pathlib.Path, post: dict):
|
||||||
super().__init__(directory, post)
|
super().__init__(directory, post)
|
||||||
post['EXTENSION'] = self.getExtension(post['CONTENTURL'])
|
self.download()
|
||||||
if not os.path.exists(directory):
|
|
||||||
os.makedirs(directory)
|
|
||||||
|
|
||||||
filename = GLOBAL.config['filename'].format(**post) + post["EXTENSION"]
|
def download(self):
|
||||||
short_filename = post['POSTID'] + post['EXTENSION']
|
self.post['EXTENSION'] = self._get_extension(self.post['CONTENTURL'])
|
||||||
|
self.directory.mkdir(exist_ok=True)
|
||||||
|
|
||||||
self.getFile(filename, short_filename, directory, post['CONTENTURL'])
|
filename = GLOBAL.config['filename'].format(**self.post) + self.post["EXTENSION"]
|
||||||
|
self._download_resource(pathlib.Path(filename), self.directory, self.post['CONTENTURL'])
|
||||||
|
|
|
@ -1,5 +1,8 @@
|
||||||
import os
|
#!/usr/bin/env python3
|
||||||
|
|
||||||
|
import logging
|
||||||
import pathlib
|
import pathlib
|
||||||
|
import re
|
||||||
import urllib.error
|
import urllib.error
|
||||||
import urllib.request
|
import urllib.request
|
||||||
from html.parser import HTMLParser
|
from html.parser import HTMLParser
|
||||||
|
@ -7,70 +10,64 @@ from html.parser import HTMLParser
|
||||||
from bulkredditdownloader.downloaders.base_downloader import BaseDownloader
|
from bulkredditdownloader.downloaders.base_downloader import BaseDownloader
|
||||||
from bulkredditdownloader.errors import AlbumNotDownloadedCompletely, FileAlreadyExistsError, NotADownloadableLinkError
|
from bulkredditdownloader.errors import AlbumNotDownloadedCompletely, FileAlreadyExistsError, NotADownloadableLinkError
|
||||||
from bulkredditdownloader.utils import GLOBAL
|
from bulkredditdownloader.utils import GLOBAL
|
||||||
from bulkredditdownloader.utils import printToFile as print
|
|
||||||
|
logger = logging.getLogger(__name__)
|
||||||
|
|
||||||
|
|
||||||
class Erome(BaseDownloader):
|
class Erome(BaseDownloader):
|
||||||
def __init__(self, directory: pathlib.Path, post: dict):
|
def __init__(self, directory: pathlib.Path, post: dict):
|
||||||
super().__init__(directory, post)
|
super().__init__(directory, post)
|
||||||
|
self.download()
|
||||||
|
|
||||||
|
def download(self):
|
||||||
try:
|
try:
|
||||||
images = self.getLinks(post['CONTENTURL'])
|
images = self._get_links(self.post['CONTENTURL'])
|
||||||
except urllib.error.HTTPError:
|
except urllib.error.HTTPError:
|
||||||
raise NotADownloadableLinkError("Not a downloadable link")
|
raise NotADownloadableLinkError("Not a downloadable link")
|
||||||
|
|
||||||
images_length = len(images)
|
images_length = len(images)
|
||||||
how_many_downloaded = images_length
|
how_many_downloaded = len(images)
|
||||||
duplicates = 0
|
duplicates = 0
|
||||||
|
|
||||||
if images_length == 1:
|
if images_length == 1:
|
||||||
extension = self.getExtension(images[0])
|
|
||||||
|
|
||||||
"""Filenames are declared here"""
|
"""Filenames are declared here"""
|
||||||
filename = GLOBAL.config['filename'].format(**post) + post["EXTENSION"]
|
filename = GLOBAL.config['filename'].format(**self.post) + self.post["EXTENSION"]
|
||||||
short_filename = post['POSTID'] + extension
|
|
||||||
|
|
||||||
image_url = images[0]
|
image = images[0]
|
||||||
if 'https://' not in image_url or 'http://' not in image_url:
|
if not re.match(r'https?://.*', image):
|
||||||
image_url = "https://" + image_url
|
image = "https://" + image
|
||||||
|
|
||||||
self.getFile(filename, short_filename, directory, image_url)
|
self._download_resource(filename, self.directory, image)
|
||||||
|
|
||||||
else:
|
else:
|
||||||
filename = GLOBAL.config['filename'].format(**post)
|
filename = GLOBAL.config['filename'].format(**self.post)
|
||||||
print(filename)
|
logger.info(filename)
|
||||||
|
|
||||||
folder_dir = directory / filename
|
folder_dir = self.directory / filename
|
||||||
|
|
||||||
try:
|
folder_dir.mkdir(exist_ok=True)
|
||||||
if not os.path.exists(folder_dir):
|
|
||||||
os.makedirs(folder_dir)
|
|
||||||
except FileNotFoundError:
|
|
||||||
folder_dir = directory / post['POSTID']
|
|
||||||
os.makedirs(folder_dir)
|
|
||||||
|
|
||||||
for i in range(images_length):
|
|
||||||
extension = self.getExtension(images[i])
|
|
||||||
|
|
||||||
|
for i, image in enumerate(images):
|
||||||
|
extension = self._get_extension(image)
|
||||||
filename = str(i + 1) + extension
|
filename = str(i + 1) + extension
|
||||||
image_url = images[i]
|
|
||||||
if 'https://' not in image_url and 'http://' not in image_url:
|
|
||||||
image_url = "https://" + image_url
|
|
||||||
|
|
||||||
print(" ({}/{})".format(i + 1, images_length))
|
if not re.match(r'https?://.*', image):
|
||||||
print(" {}".format(filename))
|
image = "https://" + image
|
||||||
|
|
||||||
|
logger.info(" ({}/{})".format(i + 1, images_length))
|
||||||
|
logger.info(" {}".format(filename))
|
||||||
|
|
||||||
try:
|
try:
|
||||||
self.getFile(filename, filename, folder_dir, image_url, indent=2)
|
self._download_resource(pathlib.Path(filename), folder_dir, image, indent=2)
|
||||||
print()
|
|
||||||
except FileAlreadyExistsError:
|
except FileAlreadyExistsError:
|
||||||
print(" The file already exists" + " " * 10, end="\n\n")
|
logger.info(" The file already exists" + " " * 10, end="\n\n")
|
||||||
duplicates += 1
|
duplicates += 1
|
||||||
how_many_downloaded -= 1
|
how_many_downloaded -= 1
|
||||||
|
|
||||||
except Exception as exception:
|
except Exception as exception:
|
||||||
# raise exception
|
# raise exception
|
||||||
print("\n Could not get the file")
|
logger.error("\n Could not get the file")
|
||||||
print(
|
logger.error(
|
||||||
" "
|
" "
|
||||||
+ "{class_name}: {info}".format(class_name=exception.__class__.__name__, info=str(exception))
|
+ "{class_name}: {info}".format(class_name=exception.__class__.__name__, info=str(exception))
|
||||||
+ "\n"
|
+ "\n"
|
||||||
|
@ -82,10 +79,12 @@ class Erome(BaseDownloader):
|
||||||
elif how_many_downloaded + duplicates < images_length:
|
elif how_many_downloaded + duplicates < images_length:
|
||||||
raise AlbumNotDownloadedCompletely("Album Not Downloaded Completely")
|
raise AlbumNotDownloadedCompletely("Album Not Downloaded Completely")
|
||||||
|
|
||||||
def getLinks(self, url: str) -> list[str]:
|
@staticmethod
|
||||||
|
def _get_links(url: str) -> list[str]:
|
||||||
content = []
|
content = []
|
||||||
line_number = None
|
line_number = None
|
||||||
|
|
||||||
|
# TODO: move to bs4 and requests
|
||||||
class EromeParser(HTMLParser):
|
class EromeParser(HTMLParser):
|
||||||
tag = None
|
tag = None
|
||||||
|
|
||||||
|
|
|
@ -1,7 +1,9 @@
|
||||||
|
#!/usr/bin/env python3
|
||||||
|
|
||||||
import json
|
import json
|
||||||
import os
|
|
||||||
import pathlib
|
import pathlib
|
||||||
import urllib
|
import logging
|
||||||
|
import urllib.parse
|
||||||
|
|
||||||
import requests
|
import requests
|
||||||
|
|
||||||
|
@ -9,15 +11,18 @@ from bulkredditdownloader.downloaders.base_downloader import BaseDownloader
|
||||||
from bulkredditdownloader.errors import (AlbumNotDownloadedCompletely, FileAlreadyExistsError, ImageNotFound,
|
from bulkredditdownloader.errors import (AlbumNotDownloadedCompletely, FileAlreadyExistsError, ImageNotFound,
|
||||||
NotADownloadableLinkError, TypeInSkip)
|
NotADownloadableLinkError, TypeInSkip)
|
||||||
from bulkredditdownloader.utils import GLOBAL
|
from bulkredditdownloader.utils import GLOBAL
|
||||||
from bulkredditdownloader.utils import printToFile as print
|
|
||||||
|
logger = logging.getLogger(__name__)
|
||||||
|
|
||||||
|
|
||||||
class Gallery(BaseDownloader):
|
class Gallery(BaseDownloader):
|
||||||
def __init__(self, directory: pathlib.Path, post):
|
def __init__(self, directory: pathlib.Path, post):
|
||||||
super().__init__(directory, post)
|
super().__init__(directory, post)
|
||||||
link = post['CONTENTURL']
|
link = self.post['CONTENTURL']
|
||||||
self.raw_data = self.getData(link)
|
self.raw_data = self._get_data(link)
|
||||||
|
self.download()
|
||||||
|
|
||||||
|
def download(self):
|
||||||
images = {}
|
images = {}
|
||||||
count = 0
|
count = 0
|
||||||
for model in self.raw_data['posts']['models']:
|
for model in self.raw_data['posts']['models']:
|
||||||
|
@ -27,15 +32,15 @@ class Gallery(BaseDownloader):
|
||||||
images[count] = {'id': item['mediaId'], 'url': self.raw_data['posts']
|
images[count] = {'id': item['mediaId'], 'url': self.raw_data['posts']
|
||||||
['models'][model]['media']['mediaMetadata'][item['mediaId']]['s']['u']}
|
['models'][model]['media']['mediaMetadata'][item['mediaId']]['s']['u']}
|
||||||
count += 1
|
count += 1
|
||||||
except Exception:
|
except KeyError:
|
||||||
continue
|
continue
|
||||||
except Exception:
|
except KeyError:
|
||||||
continue
|
continue
|
||||||
|
|
||||||
self.downloadAlbum(images, count)
|
self._download_album(images, count)
|
||||||
|
|
||||||
@staticmethod
|
@staticmethod
|
||||||
def getData(link: str) -> dict:
|
def _get_data(link: str) -> dict:
|
||||||
headers = {
|
headers = {
|
||||||
"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/67.0.3396.87 Safari/537.36 OPR/54.0.2952.64",
|
"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/67.0.3396.87 Safari/537.36 OPR/54.0.2952.64",
|
||||||
"Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8",
|
"Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8",
|
||||||
|
@ -58,50 +63,42 @@ class Gallery(BaseDownloader):
|
||||||
data = json.loads(page_source[start_index - 1:end_index + 1].strip()[:-1])
|
data = json.loads(page_source[start_index - 1:end_index + 1].strip()[:-1])
|
||||||
return data
|
return data
|
||||||
|
|
||||||
def downloadAlbum(self, images: dict, count: int):
|
def _download_album(self, images: dict, count: int):
|
||||||
folder_name = GLOBAL.config['filename'].format(**self.post)
|
folder_name = GLOBAL.config['filename'].format(**self.post)
|
||||||
folder_dir = self.directory / folder_name
|
folder_dir = self.directory / folder_name
|
||||||
|
|
||||||
how_many_downloaded = 0
|
how_many_downloaded = 0
|
||||||
duplicates = 0
|
duplicates = 0
|
||||||
|
|
||||||
try:
|
folder_dir.mkdir(exist_ok=True)
|
||||||
if not os.path.exists(folder_dir):
|
logger.info(folder_name)
|
||||||
os.makedirs(folder_dir)
|
|
||||||
except FileNotFoundError:
|
|
||||||
folder_dir = self.directory / self.post['POSTID']
|
|
||||||
os.makedirs(folder_dir)
|
|
||||||
|
|
||||||
print(folder_name)
|
for i, image in enumerate(images):
|
||||||
|
path = urllib.parse.urlparse(image['url']).path
|
||||||
|
extension = pathlib.Path(path).suffix
|
||||||
|
|
||||||
for i in range(count):
|
filename = pathlib.Path("_".join([str(i + 1), image['id']]) + extension)
|
||||||
path = urllib.parse.urlparse(images[i]['url']).path
|
|
||||||
extension = os.path.splitext(path)[1]
|
|
||||||
|
|
||||||
filename = "_".join([str(i + 1), images[i]['id']]) + extension
|
logger.info("\n ({}/{})".format(i + 1, count))
|
||||||
short_filename = str(i + 1) + "_" + images[i]['id']
|
|
||||||
|
|
||||||
print("\n ({}/{})".format(i + 1, count))
|
|
||||||
|
|
||||||
try:
|
try:
|
||||||
self.getFile(filename, short_filename, folder_dir, images[i]['url'], indent=2)
|
self._download_resource(filename, folder_dir, image['url'], indent=2)
|
||||||
how_many_downloaded += 1
|
how_many_downloaded += 1
|
||||||
print()
|
|
||||||
|
|
||||||
except FileAlreadyExistsError:
|
except FileAlreadyExistsError:
|
||||||
print(" The file already exists" + " " * 10, end="\n\n")
|
logger.info(" The file already exists" + " " * 10, end="\n\n")
|
||||||
duplicates += 1
|
duplicates += 1
|
||||||
|
|
||||||
except TypeInSkip:
|
except TypeInSkip:
|
||||||
print(" Skipping...")
|
logger.info(" Skipping...")
|
||||||
how_many_downloaded += 1
|
how_many_downloaded += 1
|
||||||
|
|
||||||
except Exception as exception:
|
except Exception as exception:
|
||||||
print("\n Could not get the file")
|
logger.info("\n Could not get the file")
|
||||||
print(" " + "{class_name}: {info}\nSee CONSOLE_LOG.txt for more information".format(
|
logger.info(" " + "{class_name}: {info}\nSee CONSOLE_LOG.txt for more information".format(
|
||||||
class_name=exception.__class__.__name__, info=str(exception)) + "\n"
|
class_name=exception.__class__.__name__, info=str(exception)) + "\n"
|
||||||
)
|
)
|
||||||
print(GLOBAL.log_stream.getvalue(), no_print=True)
|
logger.info(GLOBAL.log_stream.getvalue(), no_print=True)
|
||||||
|
|
||||||
if duplicates == count:
|
if duplicates == count:
|
||||||
raise FileAlreadyExistsError
|
raise FileAlreadyExistsError
|
||||||
|
|
|
@ -1,43 +1,32 @@
|
||||||
|
#!/usr/bin/env python3
|
||||||
|
|
||||||
import json
|
import json
|
||||||
import os
|
import pathlib
|
||||||
|
import re
|
||||||
import urllib.request
|
import urllib.request
|
||||||
|
|
||||||
from bs4 import BeautifulSoup
|
from bs4 import BeautifulSoup
|
||||||
|
|
||||||
from bulkredditdownloader.downloaders.base_downloader import BaseDownloader
|
|
||||||
from bulkredditdownloader.downloaders.gif_delivery_network import GifDeliveryNetwork
|
from bulkredditdownloader.downloaders.gif_delivery_network import GifDeliveryNetwork
|
||||||
from bulkredditdownloader.errors import NotADownloadableLinkError
|
|
||||||
from bulkredditdownloader.utils import GLOBAL
|
|
||||||
import pathlib
|
|
||||||
|
|
||||||
|
|
||||||
class Gfycat(BaseDownloader):
|
class Gfycat(GifDeliveryNetwork):
|
||||||
def __init__(self, directory: pathlib.Path, post: dict):
|
def __init__(self, directory: pathlib.Path, post: dict):
|
||||||
super().__init__(directory, post)
|
super().__init__(directory, post)
|
||||||
try:
|
self.download()
|
||||||
post['MEDIAURL'] = self.getLink(post['CONTENTURL'])
|
|
||||||
except IndexError:
|
|
||||||
raise NotADownloadableLinkError("Could not read the page source")
|
|
||||||
|
|
||||||
post['EXTENSION'] = self.getExtension(post['MEDIAURL'])
|
def download(self):
|
||||||
|
super().download()
|
||||||
if not os.path.exists(directory):
|
|
||||||
os.makedirs(directory)
|
|
||||||
|
|
||||||
filename = GLOBAL.config['filename'].format(**post) + post["EXTENSION"]
|
|
||||||
short_filename = post['POSTID'] + post['EXTENSION']
|
|
||||||
|
|
||||||
self.getFile(filename, short_filename, directory, post['MEDIAURL'])
|
|
||||||
|
|
||||||
@staticmethod
|
@staticmethod
|
||||||
def getLink(url: str) -> str:
|
def _get_link(url: str) -> str:
|
||||||
"""Extract direct link to the video from page's source
|
"""Extract direct link to the video from page's source
|
||||||
and return it
|
and return it
|
||||||
"""
|
"""
|
||||||
if '.webm' in url or '.mp4' in url or '.gif' in url:
|
if re.match(r'\.(webm|mp4|gif)$', url):
|
||||||
return url
|
return url
|
||||||
|
|
||||||
if url[-1:] == '/':
|
if url.endswith('/'):
|
||||||
url = url[:-1]
|
url = url[:-1]
|
||||||
|
|
||||||
url = "https://gfycat.com/" + url.split('/')[-1]
|
url = "https://gfycat.com/" + url.split('/')[-1]
|
||||||
|
@ -49,6 +38,6 @@ class Gfycat(BaseDownloader):
|
||||||
content = soup.find("script", attrs=attributes)
|
content = soup.find("script", attrs=attributes)
|
||||||
|
|
||||||
if content is None:
|
if content is None:
|
||||||
return GifDeliveryNetwork.getLink(url)
|
return super()._get_link(url)
|
||||||
|
|
||||||
return json.loads(content.contents[0])["video"]["contentUrl"]
|
return json.loads(content.contents[0])["video"]["contentUrl"]
|
||||||
|
|
|
@ -1,4 +1,5 @@
|
||||||
import os
|
#!/usr/bin/env python3
|
||||||
|
|
||||||
import pathlib
|
import pathlib
|
||||||
import urllib.request
|
import urllib.request
|
||||||
|
|
||||||
|
@ -12,23 +13,23 @@ from bulkredditdownloader.utils import GLOBAL
|
||||||
class GifDeliveryNetwork(BaseDownloader):
|
class GifDeliveryNetwork(BaseDownloader):
|
||||||
def __init__(self, directory: pathlib.Path, post: dict):
|
def __init__(self, directory: pathlib.Path, post: dict):
|
||||||
super().__init__(directory, post)
|
super().__init__(directory, post)
|
||||||
|
self.download()
|
||||||
|
|
||||||
|
def download(self):
|
||||||
try:
|
try:
|
||||||
post['MEDIAURL'] = self.getLink(post['CONTENTURL'])
|
self.post['MEDIAURL'] = self._get_link(self.post['CONTENTURL'])
|
||||||
except IndexError:
|
except IndexError:
|
||||||
raise NotADownloadableLinkError("Could not read the page source")
|
raise NotADownloadableLinkError("Could not read the page source")
|
||||||
|
|
||||||
post['EXTENSION'] = self.getExtension(post['MEDIAURL'])
|
self.post['EXTENSION'] = self._get_extension(self.post['MEDIAURL'])
|
||||||
|
self.directory.mkdir(exist_ok=True)
|
||||||
|
|
||||||
if not os.path.exists(directory):
|
filename = GLOBAL.config['filename'].format(**self.post) + self.post["EXTENSION"]
|
||||||
os.makedirs(directory)
|
|
||||||
|
|
||||||
filename = GLOBAL.config['filename'].format(**post) + post["EXTENSION"]
|
self._download_resource(filename, self.directory, self.post['MEDIAURL'])
|
||||||
short_filename = post['POSTID'] + post['EXTENSION']
|
|
||||||
|
|
||||||
self.getFile(filename, short_filename, directory, post['MEDIAURL'])
|
|
||||||
|
|
||||||
@staticmethod
|
@staticmethod
|
||||||
def getLink(url: str) -> str:
|
def _get_link(url: str) -> str:
|
||||||
"""Extract direct link to the video from page's source
|
"""Extract direct link to the video from page's source
|
||||||
and return it
|
and return it
|
||||||
"""
|
"""
|
||||||
|
|
|
@ -1,6 +1,8 @@
|
||||||
|
#!/usr/bin/env python3
|
||||||
|
|
||||||
import json
|
import json
|
||||||
import os
|
|
||||||
import pathlib
|
import pathlib
|
||||||
|
import logging
|
||||||
|
|
||||||
import requests
|
import requests
|
||||||
|
|
||||||
|
@ -9,7 +11,8 @@ from bulkredditdownloader.downloaders.direct import Direct
|
||||||
from bulkredditdownloader.errors import (AlbumNotDownloadedCompletely, ExtensionError, FileAlreadyExistsError,
|
from bulkredditdownloader.errors import (AlbumNotDownloadedCompletely, ExtensionError, FileAlreadyExistsError,
|
||||||
ImageNotFound, NotADownloadableLinkError, TypeInSkip)
|
ImageNotFound, NotADownloadableLinkError, TypeInSkip)
|
||||||
from bulkredditdownloader.utils import GLOBAL, nameCorrector
|
from bulkredditdownloader.utils import GLOBAL, nameCorrector
|
||||||
from bulkredditdownloader.utils import printToFile as print
|
|
||||||
|
logger = logging.getLogger(__name__)
|
||||||
|
|
||||||
|
|
||||||
class Imgur(BaseDownloader):
|
class Imgur(BaseDownloader):
|
||||||
|
@ -18,24 +21,28 @@ class Imgur(BaseDownloader):
|
||||||
|
|
||||||
def __init__(self, directory: pathlib.Path, post: dict):
|
def __init__(self, directory: pathlib.Path, post: dict):
|
||||||
super().__init__(directory, post)
|
super().__init__(directory, post)
|
||||||
link = post['CONTENTURL']
|
self.raw_data = {}
|
||||||
|
self.download()
|
||||||
|
|
||||||
|
def download(self):
|
||||||
|
link = self.post['CONTENTURL']
|
||||||
|
|
||||||
if link.endswith(".gifv"):
|
if link.endswith(".gifv"):
|
||||||
link = link.replace(".gifv", ".mp4")
|
link = link.replace(".gifv", ".mp4")
|
||||||
Direct(directory, {**post, 'CONTENTURL': link})
|
Direct(self.directory, {**self.post, 'CONTENTURL': link})
|
||||||
return
|
return
|
||||||
|
|
||||||
self.raw_data = self.getData(link)
|
self.raw_data = self._get_data(link)
|
||||||
|
|
||||||
if self.isAlbum:
|
if self._is_album:
|
||||||
if self.raw_data["album_images"]["count"] != 1:
|
if self.raw_data["album_images"]["count"] != 1:
|
||||||
self.downloadAlbum(self.raw_data["album_images"])
|
self._download_album(self.raw_data["album_images"])
|
||||||
else:
|
else:
|
||||||
self.download(self.raw_data["album_images"]["images"][0])
|
self._download_image(self.raw_data["album_images"]["images"][0])
|
||||||
else:
|
else:
|
||||||
self.download(self.raw_data)
|
self._download_image(self.raw_data)
|
||||||
|
|
||||||
def downloadAlbum(self, images: dict):
|
def _download_album(self, images: dict):
|
||||||
folder_name = GLOBAL.config['filename'].format(**self.post)
|
folder_name = GLOBAL.config['filename'].format(**self.post)
|
||||||
folder_dir = self.directory / folder_name
|
folder_dir = self.directory / folder_name
|
||||||
|
|
||||||
|
@ -43,70 +50,60 @@ class Imgur(BaseDownloader):
|
||||||
how_many_downloaded = 0
|
how_many_downloaded = 0
|
||||||
duplicates = 0
|
duplicates = 0
|
||||||
|
|
||||||
try:
|
folder_dir.mkdir(exist_ok=True)
|
||||||
if not os.path.exists(folder_dir):
|
logger.info(folder_name)
|
||||||
os.makedirs(folder_dir)
|
|
||||||
except FileNotFoundError:
|
|
||||||
folder_dir = self.directory / self.post['POSTID']
|
|
||||||
os.makedirs(folder_dir)
|
|
||||||
|
|
||||||
print(folder_name)
|
|
||||||
|
|
||||||
for i in range(images_length):
|
for i in range(images_length):
|
||||||
extension = self.validateExtension(images["images"][i]["ext"])
|
extension = self._validate_extension(images["images"][i]["ext"])
|
||||||
image_url = self.imgur_image_domain + images["images"][i]["hash"] + extension
|
image_url = self.imgur_image_domain + images["images"][i]["hash"] + extension
|
||||||
filename = "_".join([str(i + 1),
|
filename = pathlib.Path("_".join([str(i + 1),
|
||||||
nameCorrector(images["images"][i]['title']),
|
nameCorrector(images["images"][i]['title']),
|
||||||
images["images"][i]['hash']]) + extension
|
images["images"][i]['hash']]) + extension)
|
||||||
short_filename = str(i + 1) + "_" + images["images"][i]['hash']
|
|
||||||
|
|
||||||
print("\n ({}/{})".format(i + 1, images_length))
|
logger.info("\n ({}/{})".format(i + 1, images_length))
|
||||||
|
|
||||||
try:
|
try:
|
||||||
self.getFile(filename, short_filename, folder_dir, image_url, indent=2)
|
self._download_resource(filename, folder_dir, image_url, indent=2)
|
||||||
how_many_downloaded += 1
|
how_many_downloaded += 1
|
||||||
print()
|
|
||||||
|
|
||||||
except FileAlreadyExistsError:
|
except FileAlreadyExistsError:
|
||||||
print(" The file already exists" + " " * 10, end="\n\n")
|
logger.info(" The file already exists" + " " * 10, end="\n\n")
|
||||||
duplicates += 1
|
duplicates += 1
|
||||||
|
|
||||||
except TypeInSkip:
|
except TypeInSkip:
|
||||||
print(" Skipping...")
|
logger.info(" Skipping...")
|
||||||
how_many_downloaded += 1
|
how_many_downloaded += 1
|
||||||
|
|
||||||
except Exception as exception:
|
except Exception as exception:
|
||||||
print("\n Could not get the file")
|
logger.info("\n Could not get the file")
|
||||||
print(
|
logger.info(
|
||||||
" " +
|
" "
|
||||||
"{class_name}: {info}\nSee CONSOLE_LOG.txt for more information".format(
|
+ "{class_name}: {info}\nSee CONSOLE_LOG.txt for more information".format(
|
||||||
class_name=exception.__class__.__name__,
|
class_name=exception.__class__.__name__,
|
||||||
info=str(exception)
|
info=str(exception)
|
||||||
)
|
)
|
||||||
+ "\n"
|
+ "\n"
|
||||||
)
|
)
|
||||||
print(GLOBAL.log_stream.getvalue(), no_print=True)
|
logger.info(GLOBAL.log_stream.getvalue(), no_print=True)
|
||||||
|
|
||||||
if duplicates == images_length:
|
if duplicates == images_length:
|
||||||
raise FileAlreadyExistsError
|
raise FileAlreadyExistsError
|
||||||
elif how_many_downloaded + duplicates < images_length:
|
elif how_many_downloaded + duplicates < images_length:
|
||||||
raise AlbumNotDownloadedCompletely("Album Not Downloaded Completely")
|
raise AlbumNotDownloadedCompletely("Album Not Downloaded Completely")
|
||||||
|
|
||||||
def download(self, image: dict):
|
def _download_image(self, image: dict):
|
||||||
extension = self.validateExtension(image["ext"])
|
extension = self._validate_extension(image["ext"])
|
||||||
image_url = self.imgur_image_domain + image["hash"] + extension
|
image_url = self.imgur_image_domain + image["hash"] + extension
|
||||||
|
|
||||||
filename = GLOBAL.config['filename'].format(**self.post) + extension
|
filename = GLOBAL.config['filename'].format(**self.post) + extension
|
||||||
short_filename = self.post['POSTID'] + extension
|
|
||||||
|
|
||||||
self.getFile(filename, short_filename, self.directory, image_url)
|
self._download_resource(filename, self.directory, image_url)
|
||||||
|
|
||||||
@property
|
def _is_album(self) -> bool:
|
||||||
def isAlbum(self) -> bool:
|
|
||||||
return "album_images" in self.raw_data
|
return "album_images" in self.raw_data
|
||||||
|
|
||||||
@staticmethod
|
@staticmethod
|
||||||
def getData(link: str) -> dict:
|
def _get_data(link: str) -> dict:
|
||||||
cookies = {"over18": "1", "postpagebeta": "0"}
|
cookies = {"over18": "1", "postpagebeta": "0"}
|
||||||
res = requests.get(link, cookies=cookies)
|
res = requests.get(link, cookies=cookies)
|
||||||
if res.status_code != 200:
|
if res.status_code != 200:
|
||||||
|
@ -128,18 +125,18 @@ class Imgur(BaseDownloader):
|
||||||
end_index -= 1
|
end_index -= 1
|
||||||
try:
|
try:
|
||||||
data = page_source[start_index:end_index + 2].strip()[:-1]
|
data = page_source[start_index:end_index + 2].strip()[:-1]
|
||||||
except Exception:
|
except IndexError:
|
||||||
page_source[end_index + 1] = '}'
|
page_source[end_index + 1] = '}'
|
||||||
data = page_source[start_index:end_index + 3].strip()[:-1]
|
data = page_source[start_index:end_index + 3].strip()[:-1]
|
||||||
|
|
||||||
return json.loads(data)
|
return json.loads(data)
|
||||||
|
|
||||||
@staticmethod
|
@staticmethod
|
||||||
def validateExtension(string: str) -> str:
|
def _validate_extension(extension_suffix: str) -> str:
|
||||||
possible_extensions = [".jpg", ".png", ".mp4", ".gif"]
|
possible_extensions = [".jpg", ".png", ".mp4", ".gif"]
|
||||||
|
|
||||||
for extension in possible_extensions:
|
for extension in possible_extensions:
|
||||||
if extension in string:
|
if extension in extension_suffix:
|
||||||
return extension
|
return extension
|
||||||
else:
|
else:
|
||||||
raise ExtensionError(f"\"{string}\" is not recognized as a valid extension.")
|
raise ExtensionError(f"\"{extension_suffix}\" is not recognized as a valid extension.")
|
||||||
|
|
|
@ -1,35 +1,25 @@
|
||||||
|
#!/usr/bin/env python3
|
||||||
|
|
||||||
import json
|
import json
|
||||||
import os
|
|
||||||
import pathlib
|
import pathlib
|
||||||
import urllib.request
|
import urllib.request
|
||||||
|
|
||||||
from bs4 import BeautifulSoup
|
from bs4 import BeautifulSoup
|
||||||
|
|
||||||
from bulkredditdownloader.downloaders.base_downloader import BaseDownloader
|
from bulkredditdownloader.downloaders.gif_delivery_network import GifDeliveryNetwork
|
||||||
from bulkredditdownloader.errors import NotADownloadableLinkError
|
from bulkredditdownloader.errors import NotADownloadableLinkError
|
||||||
from bulkredditdownloader.utils import GLOBAL
|
|
||||||
|
|
||||||
|
|
||||||
class Redgifs(BaseDownloader):
|
class Redgifs(GifDeliveryNetwork):
|
||||||
def __init__(self, directory: pathlib.Path, post: dict):
|
def __init__(self, directory: pathlib.Path, post: dict):
|
||||||
super().__init__(directory, post)
|
super().__init__(directory, post)
|
||||||
try:
|
self.download()
|
||||||
post['MEDIAURL'] = self.getLink(post['CONTENTURL'])
|
|
||||||
except IndexError:
|
|
||||||
raise NotADownloadableLinkError("Could not read the page source")
|
|
||||||
|
|
||||||
post['EXTENSION'] = self.getExtension(post['MEDIAURL'])
|
def download(self):
|
||||||
|
super().download()
|
||||||
if not os.path.exists(directory):
|
|
||||||
os.makedirs(directory)
|
|
||||||
|
|
||||||
filename = GLOBAL.config['filename'].format(**post) + post["EXTENSION"]
|
|
||||||
short_filename = post['POSTID'] + post['EXTENSION']
|
|
||||||
|
|
||||||
self.getFile(filename, short_filename, directory, post['MEDIAURL'])
|
|
||||||
|
|
||||||
@staticmethod
|
@staticmethod
|
||||||
def getLink(url: str) -> str:
|
def _get_link(url: str) -> str:
|
||||||
"""Extract direct link to the video from page's source
|
"""Extract direct link to the video from page's source
|
||||||
and return it
|
and return it
|
||||||
"""
|
"""
|
||||||
|
|
|
@ -1,45 +1,46 @@
|
||||||
from src.utils import printToFile as print
|
#!/usr/bin/env python3
|
||||||
|
|
||||||
import io
|
import io
|
||||||
import os
|
import logging
|
||||||
import pathlib
|
import pathlib
|
||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
|
|
||||||
from bulkredditdownloader.downloaders.base_downloader import BaseDownloader
|
from bulkredditdownloader.downloaders.base_downloader import BaseDownloader
|
||||||
from bulkredditdownloader.errors import FileAlreadyExistsError, TypeInSkip
|
from bulkredditdownloader.errors import FileAlreadyExistsError, TypeInSkip
|
||||||
from bulkredditdownloader.utils import GLOBAL
|
from bulkredditdownloader.utils import GLOBAL
|
||||||
from bulkredditdownloader.utils import printToFile as print
|
|
||||||
|
|
||||||
VanillaPrint = print
|
logger = logging.getLogger(__name__)
|
||||||
|
|
||||||
|
|
||||||
class SelfPost(BaseDownloader):
|
class SelfPost(BaseDownloader):
|
||||||
def __init__(self, directory: pathlib.Path, post: dict):
|
def __init__(self, directory: pathlib.Path, post: dict):
|
||||||
super().__init__(directory, post)
|
super().__init__(directory, post)
|
||||||
|
self.download()
|
||||||
|
|
||||||
|
def download(self):
|
||||||
if "self" in GLOBAL.arguments.skip:
|
if "self" in GLOBAL.arguments.skip:
|
||||||
raise TypeInSkip
|
raise TypeInSkip
|
||||||
|
|
||||||
if not os.path.exists(directory):
|
self.directory.mkdir(exist_ok=True)
|
||||||
os.makedirs(directory)
|
filename = GLOBAL.config['filename'].format(**self.post)
|
||||||
|
|
||||||
filename = GLOBAL.config['filename'].format(**post)
|
file_dir = self.directory / (filename + ".md")
|
||||||
|
logger.info(file_dir)
|
||||||
file_dir = directory / (filename + ".md")
|
logger.info(filename + ".md")
|
||||||
print(file_dir)
|
|
||||||
print(filename + ".md")
|
|
||||||
|
|
||||||
if Path.is_file(file_dir):
|
if Path.is_file(file_dir):
|
||||||
raise FileAlreadyExistsError
|
raise FileAlreadyExistsError
|
||||||
|
|
||||||
try:
|
try:
|
||||||
self.writeToFile(file_dir, post)
|
self._write_to_file(file_dir, self.post)
|
||||||
except FileNotFoundError:
|
except FileNotFoundError:
|
||||||
file_dir = post['POSTID'] + ".md"
|
file_dir = self.post['POSTID'] + ".md"
|
||||||
file_dir = directory / file_dir
|
file_dir = self.directory / file_dir
|
||||||
|
|
||||||
self.writeToFile(file_dir, post)
|
self._write_to_file(file_dir, self.post)
|
||||||
|
|
||||||
@staticmethod
|
@staticmethod
|
||||||
def writeToFile(directory: pathlib.Path, post: dict):
|
def _write_to_file(directory: pathlib.Path, post: dict):
|
||||||
"""Self posts are formatted here"""
|
"""Self posts are formatted here"""
|
||||||
content = ("## ["
|
content = ("## ["
|
||||||
+ post["TITLE"]
|
+ post["TITLE"]
|
||||||
|
@ -59,5 +60,5 @@ class SelfPost(BaseDownloader):
|
||||||
+ ")")
|
+ ")")
|
||||||
|
|
||||||
with io.open(directory, "w", encoding="utf-8") as FILE:
|
with io.open(directory, "w", encoding="utf-8") as FILE:
|
||||||
VanillaPrint(content, file=FILE)
|
print(content, file=FILE)
|
||||||
print("Downloaded")
|
logger.info("Downloaded")
|
||||||
|
|
|
@ -1,51 +1,56 @@
|
||||||
|
#!/usr/bin/env python3
|
||||||
|
|
||||||
|
import logging
|
||||||
import os
|
import os
|
||||||
import pathlib
|
import pathlib
|
||||||
import subprocess
|
import subprocess
|
||||||
|
|
||||||
from bulkredditdownloader.downloaders.base_downloader import BaseDownloader
|
from bulkredditdownloader.downloaders.base_downloader import BaseDownloader
|
||||||
from bulkredditdownloader.utils import GLOBAL
|
from bulkredditdownloader.utils import GLOBAL
|
||||||
from bulkredditdownloader.utils import printToFile as print
|
|
||||||
|
logger = logging.getLogger(__name__)
|
||||||
|
|
||||||
|
|
||||||
class VReddit(BaseDownloader):
|
class VReddit(BaseDownloader):
|
||||||
def __init__(self, directory: pathlib.Path, post: dict):
|
def __init__(self, directory: pathlib.Path, post: dict):
|
||||||
super().__init__(directory, post)
|
super().__init__(directory, post)
|
||||||
extension = ".mp4"
|
self.download()
|
||||||
if not os.path.exists(directory):
|
|
||||||
os.makedirs(directory)
|
|
||||||
|
|
||||||
filename = GLOBAL.config['filename'].format(**post) + extension
|
def download(self):
|
||||||
short_filename = post['POSTID'] + extension
|
extension = ".mp4"
|
||||||
|
self.directory.mkdir(exist_ok=True)
|
||||||
|
|
||||||
|
filename = GLOBAL.config['filename'].format(**self.post) + extension
|
||||||
|
|
||||||
try:
|
try:
|
||||||
fnull = open(os.devnull, 'w')
|
fnull = open(os.devnull, 'w')
|
||||||
subprocess.call("ffmpeg", stdout=fnull, stderr=subprocess.STDOUT)
|
subprocess.call("ffmpeg", stdout=fnull, stderr=subprocess.STDOUT)
|
||||||
except Exception:
|
except Exception:
|
||||||
self.getFile(filename, short_filename, directory, post['CONTENTURL'])
|
self._download_resource(filename, self.directory, self.post['CONTENTURL'])
|
||||||
print("FFMPEG library not found, skipping merging video and audio")
|
logger.info("FFMPEG library not found, skipping merging video and audio")
|
||||||
else:
|
else:
|
||||||
video_name = post['POSTID'] + "_video"
|
video_name = self.post['POSTID'] + "_video"
|
||||||
video_url = post['CONTENTURL']
|
video_url = self.post['CONTENTURL']
|
||||||
audio_name = post['POSTID'] + "_audio"
|
audio_name = self.post['POSTID'] + "_audio"
|
||||||
audio_url = video_url[:video_url.rfind('/')] + '/DASH_audio.mp4'
|
audio_url = video_url[:video_url.rfind('/')] + '/DASH_audio.mp4'
|
||||||
|
|
||||||
print(directory, filename, sep="\n")
|
logger.info(self.directory, filename, sep="\n")
|
||||||
|
|
||||||
self.getFile(video_name, video_name, directory, video_url, silent=True)
|
self._download_resource(video_name, self.directory, video_url, silent=True)
|
||||||
self.getFile(audio_name, audio_name, directory, audio_url, silent=True)
|
self._download_resource(audio_name, self.directory, audio_url, silent=True)
|
||||||
try:
|
try:
|
||||||
self._mergeAudio(video_name, audio_name, filename, short_filename, directory)
|
self._merge_audio(video_name, audio_name, filename, self.directory)
|
||||||
except KeyboardInterrupt:
|
except KeyboardInterrupt:
|
||||||
os.remove(directory / filename)
|
(self.directory / filename).unlink()
|
||||||
os.remove(directory / audio_name)
|
(self.directory / audio_name).unlink()
|
||||||
os.rename(directory / video_name, directory / filename)
|
(self.directory / video_name).unlink()
|
||||||
|
(self.directory / filename).unlink()
|
||||||
|
|
||||||
@staticmethod
|
@staticmethod
|
||||||
def _mergeAudio(
|
def _merge_audio(
|
||||||
video: pathlib.Path,
|
video: pathlib.Path,
|
||||||
audio: pathlib.Path,
|
audio: pathlib.Path,
|
||||||
filename: pathlib.Path,
|
filename: pathlib.Path,
|
||||||
short_filename,
|
|
||||||
directory: pathlib.Path):
|
directory: pathlib.Path):
|
||||||
input_video = str(directory / video)
|
input_video = str(directory / video)
|
||||||
input_audio = str(directory / audio)
|
input_audio = str(directory / audio)
|
||||||
|
@ -55,5 +60,5 @@ class VReddit(BaseDownloader):
|
||||||
input_audio, input_video, str(directory / filename))
|
input_audio, input_video, str(directory / filename))
|
||||||
subprocess.call(cmd.split(), stdout=fnull, stderr=subprocess.STDOUT)
|
subprocess.call(cmd.split(), stdout=fnull, stderr=subprocess.STDOUT)
|
||||||
|
|
||||||
os.remove(directory / video)
|
(directory / video).unlink()
|
||||||
os.remove(directory / audio)
|
(directory / audio).unlink()
|
||||||
|
|
|
@ -1,3 +1,6 @@
|
||||||
|
#!/usr/bin/env python3
|
||||||
|
|
||||||
|
import logging
|
||||||
import os
|
import os
|
||||||
import pathlib
|
import pathlib
|
||||||
import sys
|
import sys
|
||||||
|
@ -7,21 +10,24 @@ import youtube_dl
|
||||||
from bulkredditdownloader.downloaders.base_downloader import BaseDownloader
|
from bulkredditdownloader.downloaders.base_downloader import BaseDownloader
|
||||||
from bulkredditdownloader.errors import FileAlreadyExistsError
|
from bulkredditdownloader.errors import FileAlreadyExistsError
|
||||||
from bulkredditdownloader.utils import GLOBAL
|
from bulkredditdownloader.utils import GLOBAL
|
||||||
from bulkredditdownloader.utils import printToFile as print
|
|
||||||
|
logger = logging.getLogger(__name__)
|
||||||
|
|
||||||
|
|
||||||
class Youtube(BaseDownloader):
|
class Youtube(BaseDownloader):
|
||||||
def __init__(self, directory: pathlib.Path, post: dict):
|
def __init__(self, directory: pathlib.Path, post: dict):
|
||||||
super().__init__(directory, post)
|
super().__init__(directory, post)
|
||||||
if not os.path.exists(directory):
|
self.download()
|
||||||
os.makedirs(directory)
|
|
||||||
|
|
||||||
filename = GLOBAL.config['filename'].format(**post)
|
def download(self):
|
||||||
print(filename)
|
self.directory.mkdir(exist_ok=True)
|
||||||
|
|
||||||
self.download(filename, directory, post['CONTENTURL'])
|
filename = GLOBAL.config['filename'].format(**self.post)
|
||||||
|
logger.info(filename)
|
||||||
|
|
||||||
def download(self, filename: str, directory: pathlib.Path, url: str):
|
self._download_video(filename, self.directory, self.post['CONTENTURL'])
|
||||||
|
|
||||||
|
def _download_video(self, filename: str, directory: pathlib.Path, url: str):
|
||||||
ydl_opts = {
|
ydl_opts = {
|
||||||
"format": "best",
|
"format": "best",
|
||||||
"outtmpl": str(directory / (filename + ".%(ext)s")),
|
"outtmpl": str(directory / (filename + ".%(ext)s")),
|
||||||
|
@ -35,9 +41,12 @@ class Youtube(BaseDownloader):
|
||||||
|
|
||||||
location = directory / (filename + ".mp4")
|
location = directory / (filename + ".mp4")
|
||||||
|
|
||||||
|
with open(location, 'rb') as file:
|
||||||
|
content = file.read()
|
||||||
|
|
||||||
if GLOBAL.arguments.no_dupes:
|
if GLOBAL.arguments.no_dupes:
|
||||||
try:
|
try:
|
||||||
file_hash = self.createHash(str(location))
|
file_hash = self._create_hash(content)
|
||||||
except FileNotFoundError:
|
except FileNotFoundError:
|
||||||
return None
|
return None
|
||||||
if file_hash in GLOBAL.downloadedPosts():
|
if file_hash in GLOBAL.downloadedPosts():
|
||||||
|
@ -48,7 +57,7 @@ class Youtube(BaseDownloader):
|
||||||
@staticmethod
|
@staticmethod
|
||||||
def _hook(d):
|
def _hook(d):
|
||||||
if d['status'] == 'finished':
|
if d['status'] == 'finished':
|
||||||
return print("Downloaded")
|
return logger.info("Downloaded")
|
||||||
downloaded_mbs = int(d['downloaded_bytes'] * (10**(-6)))
|
downloaded_mbs = int(d['downloaded_bytes'] * (10**(-6)))
|
||||||
file_size = int(d['total_bytes'] * (10**(-6)))
|
file_size = int(d['total_bytes'] * (10**(-6)))
|
||||||
sys.stdout.write("{}Mb/{}Mb\r".format(downloaded_mbs, file_size))
|
sys.stdout.write("{}Mb/{}Mb\r".format(downloaded_mbs, file_size))
|
||||||
|
|
Loading…
Reference in a new issue