Remove utils module for downloaders
This commit is contained in:
parent
be613949fe
commit
0d839329e5
121
bulkredditdownloader/downloaders/base_downloader.py
Normal file
121
bulkredditdownloader/downloaders/base_downloader.py
Normal file
|
@ -0,0 +1,121 @@
|
||||||
|
#!/usr/bin/env python3
|
||||||
|
# coding=utf-8
|
||||||
|
import hashlib
|
||||||
|
import os
|
||||||
|
import sys
|
||||||
|
import urllib.request
|
||||||
|
from abc import ABC
|
||||||
|
from pathlib import Path
|
||||||
|
|
||||||
|
from bulkredditdownloader.errors import DomainInSkip, FailedToDownload, FileAlreadyExistsError, TypeInSkip
|
||||||
|
from bulkredditdownloader.utils import GLOBAL
|
||||||
|
from bulkredditdownloader.utils import printToFile as print
|
||||||
|
|
||||||
|
|
||||||
|
class BaseDownloader(ABC):
|
||||||
|
def __init__(self, directory: Path, post: dict):
|
||||||
|
self.directory = directory
|
||||||
|
self.post = post
|
||||||
|
|
||||||
|
@staticmethod
|
||||||
|
def createHash(filename: str) -> str:
|
||||||
|
hash_md5 = hashlib.md5()
|
||||||
|
with open(filename, "rb") as f:
|
||||||
|
for chunk in iter(lambda: f.read(4096), b""):
|
||||||
|
hash_md5.update(chunk)
|
||||||
|
return hash_md5.hexdigest()
|
||||||
|
|
||||||
|
@staticmethod
|
||||||
|
def dlProgress(count: int, block_size: int, total_size: int):
|
||||||
|
"""Function for writing download progress to console """
|
||||||
|
download_mbs = int(count * block_size * (10 ** (-6)))
|
||||||
|
file_size = int(total_size * (10 ** (-6)))
|
||||||
|
sys.stdout.write("{}Mb/{}Mb\r".format(download_mbs, file_size))
|
||||||
|
sys.stdout.flush()
|
||||||
|
|
||||||
|
@staticmethod
|
||||||
|
def getFile(
|
||||||
|
filename: str,
|
||||||
|
short_filename: str,
|
||||||
|
folder_dir: Path,
|
||||||
|
image_url: str,
|
||||||
|
indent: int = 0,
|
||||||
|
silent: bool = False):
|
||||||
|
formats = {
|
||||||
|
"videos": [".mp4", ".webm"],
|
||||||
|
"images": [".jpg", ".jpeg", ".png", ".bmp"],
|
||||||
|
"gifs": [".gif"],
|
||||||
|
"self": []
|
||||||
|
}
|
||||||
|
|
||||||
|
for file_type in GLOBAL.arguments.skip:
|
||||||
|
for extension in formats[file_type]:
|
||||||
|
if extension in filename:
|
||||||
|
raise TypeInSkip
|
||||||
|
|
||||||
|
if any(domain in image_url for domain in GLOBAL.arguments.skip_domain):
|
||||||
|
raise DomainInSkip
|
||||||
|
|
||||||
|
headers = [
|
||||||
|
("User-Agent", "Mozilla/5.0 (Windows NT 10.0; Win64; x64) "
|
||||||
|
"AppleWebKit/537.36 (KHTML, like Gecko) Chrome/67.0.3396.87 "
|
||||||
|
"Safari/537.36 OPR/54.0.2952.64"),
|
||||||
|
("Accept", "text/html,application/xhtml+xml,application/xml;"
|
||||||
|
"q=0.9,image/webp,image/apng,*/*;q=0.8"),
|
||||||
|
("Accept-Charset", "ISO-8859-1,utf-8;q=0.7,*;q=0.3"),
|
||||||
|
("Accept-Encoding", "none"),
|
||||||
|
("Accept-Language", "en-US,en;q=0.8"),
|
||||||
|
("Connection", "keep-alive")
|
||||||
|
]
|
||||||
|
|
||||||
|
if not os.path.exists(folder_dir):
|
||||||
|
os.makedirs(folder_dir)
|
||||||
|
|
||||||
|
opener = urllib.request.build_opener()
|
||||||
|
if "imgur" not in image_url:
|
||||||
|
opener.addheaders = headers
|
||||||
|
urllib.request.install_opener(opener)
|
||||||
|
|
||||||
|
if not silent:
|
||||||
|
print(" " * indent + str(folder_dir), " " * indent + str(filename), sep="\n")
|
||||||
|
|
||||||
|
for i in range(3):
|
||||||
|
file_dir = Path(folder_dir) / filename
|
||||||
|
temp_dir = Path(folder_dir) / (filename + ".tmp")
|
||||||
|
|
||||||
|
if not (os.path.isfile(file_dir)):
|
||||||
|
try:
|
||||||
|
urllib.request.urlretrieve(image_url, temp_dir, reporthook=BaseDownloader.dlProgress)
|
||||||
|
|
||||||
|
file_hash = BaseDownloader.createHash(temp_dir)
|
||||||
|
if GLOBAL.arguments.no_dupes:
|
||||||
|
if file_hash in GLOBAL.downloadedPosts():
|
||||||
|
os.remove(temp_dir)
|
||||||
|
raise FileAlreadyExistsError
|
||||||
|
GLOBAL.downloadedPosts.add(file_hash)
|
||||||
|
|
||||||
|
os.rename(temp_dir, file_dir)
|
||||||
|
if not silent:
|
||||||
|
print(" " * indent + "Downloaded" + " " * 10)
|
||||||
|
return None
|
||||||
|
except ConnectionResetError:
|
||||||
|
raise FailedToDownload
|
||||||
|
except FileNotFoundError:
|
||||||
|
filename = short_filename
|
||||||
|
else:
|
||||||
|
raise FileAlreadyExistsError
|
||||||
|
raise FailedToDownload
|
||||||
|
|
||||||
|
@staticmethod
|
||||||
|
def getExtension(link: str):
|
||||||
|
"""Extract file extension from image link. If didn't find any, return '.jpg' """
|
||||||
|
image_types = ['jpg', 'png', 'mp4', 'webm', 'gif']
|
||||||
|
parsed = link.split('.')
|
||||||
|
for fileType in image_types:
|
||||||
|
if fileType in parsed:
|
||||||
|
return "." + parsed[-1]
|
||||||
|
else:
|
||||||
|
if "v.redd.it" not in link:
|
||||||
|
return '.jpg'
|
||||||
|
else:
|
||||||
|
return '.mp4'
|
|
@ -1,17 +1,18 @@
|
||||||
import os
|
import os
|
||||||
import pathlib
|
import pathlib
|
||||||
|
|
||||||
from bulkredditdownloader.downloaders.downloader_utils import getExtension, getFile
|
from bulkredditdownloader.downloaders.base_downloader import BaseDownloader
|
||||||
from bulkredditdownloader.utils import GLOBAL
|
from bulkredditdownloader.utils import GLOBAL
|
||||||
|
|
||||||
|
|
||||||
class Direct:
|
class Direct(BaseDownloader):
|
||||||
def __init__(self, directory: pathlib.Path, post: dict):
|
def __init__(self, directory: pathlib.Path, post: dict):
|
||||||
post['EXTENSION'] = getExtension(post['CONTENTURL'])
|
super().__init__(directory, post)
|
||||||
|
post['EXTENSION'] = self.getExtension(post['CONTENTURL'])
|
||||||
if not os.path.exists(directory):
|
if not os.path.exists(directory):
|
||||||
os.makedirs(directory)
|
os.makedirs(directory)
|
||||||
|
|
||||||
filename = GLOBAL.config['filename'].format(**post) + post["EXTENSION"]
|
filename = GLOBAL.config['filename'].format(**post) + post["EXTENSION"]
|
||||||
short_filename = post['POSTID'] + post['EXTENSION']
|
short_filename = post['POSTID'] + post['EXTENSION']
|
||||||
|
|
||||||
getFile(filename, short_filename, directory, post['CONTENTURL'])
|
self.getFile(filename, short_filename, directory, post['CONTENTURL'])
|
||||||
|
|
|
@ -1,109 +0,0 @@
|
||||||
import hashlib
|
|
||||||
import os
|
|
||||||
import sys
|
|
||||||
import urllib.request
|
|
||||||
from pathlib import Path
|
|
||||||
|
|
||||||
from bulkredditdownloader.errors import DomainInSkip, FailedToDownload, FileAlreadyExistsError, TypeInSkip
|
|
||||||
from bulkredditdownloader.utils import GLOBAL
|
|
||||||
from bulkredditdownloader.utils import printToFile as print
|
|
||||||
|
|
||||||
|
|
||||||
def dlProgress(count: int, block_size: int, total_size: int):
|
|
||||||
"""Function for writing download progress to console
|
|
||||||
"""
|
|
||||||
download_mbs = int(count * block_size * (10 ** (-6)))
|
|
||||||
file_size = int(total_size * (10 ** (-6)))
|
|
||||||
sys.stdout.write("{}Mb/{}Mb\r".format(download_mbs, file_size))
|
|
||||||
sys.stdout.flush()
|
|
||||||
|
|
||||||
|
|
||||||
def getExtension(link: str):
|
|
||||||
"""Extract file extension from image link.
|
|
||||||
If didn't find any, return '.jpg'
|
|
||||||
"""
|
|
||||||
image_types = ['jpg', 'png', 'mp4', 'webm', 'gif']
|
|
||||||
parsed = link.split('.')
|
|
||||||
for fileType in image_types:
|
|
||||||
if fileType in parsed:
|
|
||||||
return "." + parsed[-1]
|
|
||||||
else:
|
|
||||||
if "v.redd.it" not in link:
|
|
||||||
return '.jpg'
|
|
||||||
else:
|
|
||||||
return '.mp4'
|
|
||||||
|
|
||||||
|
|
||||||
def getFile(filename: str, short_filename: str, folder_dir: Path, image_url: str, indent: int = 0, silent: bool = False):
|
|
||||||
formats = {
|
|
||||||
"videos": [".mp4", ".webm"],
|
|
||||||
"images": [".jpg", ".jpeg", ".png", ".bmp"],
|
|
||||||
"gifs": [".gif"],
|
|
||||||
"self": []
|
|
||||||
}
|
|
||||||
|
|
||||||
for file_type in GLOBAL.arguments.skip:
|
|
||||||
for extension in formats[file_type]:
|
|
||||||
if extension in filename:
|
|
||||||
raise TypeInSkip
|
|
||||||
|
|
||||||
if any(domain in image_url for domain in GLOBAL.arguments.skip_domain):
|
|
||||||
raise DomainInSkip
|
|
||||||
|
|
||||||
headers = [
|
|
||||||
("User-Agent", "Mozilla/5.0 (Windows NT 10.0; Win64; x64) "
|
|
||||||
"AppleWebKit/537.36 (KHTML, like Gecko) Chrome/67.0.3396.87 "
|
|
||||||
"Safari/537.36 OPR/54.0.2952.64"),
|
|
||||||
("Accept", "text/html,application/xhtml+xml,application/xml;"
|
|
||||||
"q=0.9,image/webp,image/apng,*/*;q=0.8"),
|
|
||||||
("Accept-Charset", "ISO-8859-1,utf-8;q=0.7,*;q=0.3"),
|
|
||||||
("Accept-Encoding", "none"),
|
|
||||||
("Accept-Language", "en-US,en;q=0.8"),
|
|
||||||
("Connection", "keep-alive")
|
|
||||||
]
|
|
||||||
|
|
||||||
if not os.path.exists(folder_dir):
|
|
||||||
os.makedirs(folder_dir)
|
|
||||||
|
|
||||||
opener = urllib.request.build_opener()
|
|
||||||
if "imgur" not in image_url:
|
|
||||||
opener.addheaders = headers
|
|
||||||
urllib.request.install_opener(opener)
|
|
||||||
|
|
||||||
if not silent:
|
|
||||||
print(" " * indent + str(folder_dir), " " * indent + str(filename), sep="\n")
|
|
||||||
|
|
||||||
for i in range(3):
|
|
||||||
file_dir = Path(folder_dir) / filename
|
|
||||||
temp_dir = Path(folder_dir) / (filename + ".tmp")
|
|
||||||
|
|
||||||
if not (os.path.isfile(file_dir)):
|
|
||||||
try:
|
|
||||||
urllib.request.urlretrieve(image_url, temp_dir, reporthook=dlProgress)
|
|
||||||
|
|
||||||
file_hash = createHash(temp_dir)
|
|
||||||
if GLOBAL.arguments.no_dupes:
|
|
||||||
if file_hash in GLOBAL.downloadedPosts():
|
|
||||||
os.remove(temp_dir)
|
|
||||||
raise FileAlreadyExistsError
|
|
||||||
GLOBAL.downloadedPosts.add(file_hash)
|
|
||||||
|
|
||||||
os.rename(temp_dir, file_dir)
|
|
||||||
if not silent:
|
|
||||||
print(" " * indent + "Downloaded" + " " * 10)
|
|
||||||
return None
|
|
||||||
except ConnectionResetError:
|
|
||||||
raise FailedToDownload
|
|
||||||
except FileNotFoundError:
|
|
||||||
filename = short_filename
|
|
||||||
else:
|
|
||||||
raise FileAlreadyExistsError
|
|
||||||
raise FailedToDownload
|
|
||||||
|
|
||||||
|
|
||||||
def createHash(filename: str) -> str:
|
|
||||||
hash_md5 = hashlib.md5()
|
|
||||||
with open(filename, "rb") as f:
|
|
||||||
for chunk in iter(lambda: f.read(4096), b""):
|
|
||||||
hash_md5.update(chunk)
|
|
||||||
return hash_md5.hexdigest()
|
|
|
@ -4,14 +4,15 @@ import urllib.error
|
||||||
import urllib.request
|
import urllib.request
|
||||||
from html.parser import HTMLParser
|
from html.parser import HTMLParser
|
||||||
|
|
||||||
from bulkredditdownloader.downloaders.downloader_utils import getExtension, getFile
|
from bulkredditdownloader.downloaders.base_downloader import BaseDownloader
|
||||||
from bulkredditdownloader.errors import AlbumNotDownloadedCompletely, FileAlreadyExistsError, NotADownloadableLinkError
|
from bulkredditdownloader.errors import AlbumNotDownloadedCompletely, FileAlreadyExistsError, NotADownloadableLinkError
|
||||||
from bulkredditdownloader.utils import GLOBAL
|
from bulkredditdownloader.utils import GLOBAL
|
||||||
from bulkredditdownloader.utils import printToFile as print
|
from bulkredditdownloader.utils import printToFile as print
|
||||||
|
|
||||||
|
|
||||||
class Erome:
|
class Erome(BaseDownloader):
|
||||||
def __init__(self, directory: pathlib.Path, post: dict):
|
def __init__(self, directory: pathlib.Path, post: dict):
|
||||||
|
super().__init__(directory, post)
|
||||||
try:
|
try:
|
||||||
images = self.getLinks(post['CONTENTURL'])
|
images = self.getLinks(post['CONTENTURL'])
|
||||||
except urllib.error.HTTPError:
|
except urllib.error.HTTPError:
|
||||||
|
@ -22,7 +23,7 @@ class Erome:
|
||||||
duplicates = 0
|
duplicates = 0
|
||||||
|
|
||||||
if images_length == 1:
|
if images_length == 1:
|
||||||
extension = getExtension(images[0])
|
extension = self.getExtension(images[0])
|
||||||
|
|
||||||
"""Filenames are declared here"""
|
"""Filenames are declared here"""
|
||||||
filename = GLOBAL.config['filename'].format(**post) + post["EXTENSION"]
|
filename = GLOBAL.config['filename'].format(**post) + post["EXTENSION"]
|
||||||
|
@ -32,7 +33,7 @@ class Erome:
|
||||||
if 'https://' not in image_url or 'http://' not in image_url:
|
if 'https://' not in image_url or 'http://' not in image_url:
|
||||||
image_url = "https://" + image_url
|
image_url = "https://" + image_url
|
||||||
|
|
||||||
getFile(filename, short_filename, directory, image_url)
|
self.getFile(filename, short_filename, directory, image_url)
|
||||||
|
|
||||||
else:
|
else:
|
||||||
filename = GLOBAL.config['filename'].format(**post)
|
filename = GLOBAL.config['filename'].format(**post)
|
||||||
|
@ -48,7 +49,7 @@ class Erome:
|
||||||
os.makedirs(folder_dir)
|
os.makedirs(folder_dir)
|
||||||
|
|
||||||
for i in range(images_length):
|
for i in range(images_length):
|
||||||
extension = getExtension(images[i])
|
extension = self.getExtension(images[i])
|
||||||
|
|
||||||
filename = str(i + 1) + extension
|
filename = str(i + 1) + extension
|
||||||
image_url = images[i]
|
image_url = images[i]
|
||||||
|
@ -59,7 +60,7 @@ class Erome:
|
||||||
print(" {}".format(filename))
|
print(" {}".format(filename))
|
||||||
|
|
||||||
try:
|
try:
|
||||||
getFile(filename, filename, folder_dir, image_url, indent=2)
|
self.getFile(filename, filename, folder_dir, image_url, indent=2)
|
||||||
print()
|
print()
|
||||||
except FileAlreadyExistsError:
|
except FileAlreadyExistsError:
|
||||||
print(" The file already exists" + " " * 10, end="\n\n")
|
print(" The file already exists" + " " * 10, end="\n\n")
|
||||||
|
|
|
@ -1,25 +1,23 @@
|
||||||
import json
|
import json
|
||||||
import os
|
import os
|
||||||
|
import pathlib
|
||||||
import urllib
|
import urllib
|
||||||
|
|
||||||
import requests
|
import requests
|
||||||
import pathlib
|
|
||||||
|
|
||||||
from bulkredditdownloader.downloaders.downloader_utils import getFile
|
from bulkredditdownloader.downloaders.base_downloader import BaseDownloader
|
||||||
from bulkredditdownloader.errors import (AlbumNotDownloadedCompletely, FileAlreadyExistsError, ImageNotFound, NotADownloadableLinkError,
|
from bulkredditdownloader.errors import (AlbumNotDownloadedCompletely, FileAlreadyExistsError, ImageNotFound,
|
||||||
TypeInSkip)
|
NotADownloadableLinkError, TypeInSkip)
|
||||||
from bulkredditdownloader.utils import GLOBAL
|
from bulkredditdownloader.utils import GLOBAL
|
||||||
from bulkredditdownloader.utils import printToFile as print
|
from bulkredditdownloader.utils import printToFile as print
|
||||||
|
|
||||||
|
|
||||||
class Gallery:
|
class Gallery(BaseDownloader):
|
||||||
def __init__(self, directory: pathlib.Path, post):
|
def __init__(self, directory: pathlib.Path, post):
|
||||||
|
super().__init__(directory, post)
|
||||||
link = post['CONTENTURL']
|
link = post['CONTENTURL']
|
||||||
self.raw_data = self.getData(link)
|
self.raw_data = self.getData(link)
|
||||||
|
|
||||||
self.directory = directory
|
|
||||||
self.post = post
|
|
||||||
|
|
||||||
images = {}
|
images = {}
|
||||||
count = 0
|
count = 0
|
||||||
for model in self.raw_data['posts']['models']:
|
for model in self.raw_data['posts']['models']:
|
||||||
|
@ -86,7 +84,7 @@ class Gallery:
|
||||||
print("\n ({}/{})".format(i + 1, count))
|
print("\n ({}/{})".format(i + 1, count))
|
||||||
|
|
||||||
try:
|
try:
|
||||||
getFile(filename, short_filename, folder_dir, images[i]['url'], indent=2)
|
self.getFile(filename, short_filename, folder_dir, images[i]['url'], indent=2)
|
||||||
how_many_downloaded += 1
|
how_many_downloaded += 1
|
||||||
print()
|
print()
|
||||||
|
|
||||||
|
|
|
@ -4,22 +4,22 @@ import urllib.request
|
||||||
|
|
||||||
from bs4 import BeautifulSoup
|
from bs4 import BeautifulSoup
|
||||||
|
|
||||||
from bulkredditdownloader.downloaders.downloader_utils import getExtension, getFile
|
from bulkredditdownloader.downloaders.base_downloader import BaseDownloader
|
||||||
from bulkredditdownloader.downloaders.gif_delivery_network import GifDeliveryNetwork
|
from bulkredditdownloader.downloaders.gif_delivery_network import GifDeliveryNetwork
|
||||||
from bulkredditdownloader.errors import NotADownloadableLinkError
|
from bulkredditdownloader.errors import NotADownloadableLinkError
|
||||||
from bulkredditdownloader.utils import GLOBAL
|
from bulkredditdownloader.utils import GLOBAL
|
||||||
import pathlib
|
import pathlib
|
||||||
|
|
||||||
|
|
||||||
|
class Gfycat(BaseDownloader):
|
||||||
class Gfycat:
|
|
||||||
def __init__(self, directory: pathlib.Path, post: dict):
|
def __init__(self, directory: pathlib.Path, post: dict):
|
||||||
|
super().__init__(directory, post)
|
||||||
try:
|
try:
|
||||||
post['MEDIAURL'] = self.getLink(post['CONTENTURL'])
|
post['MEDIAURL'] = self.getLink(post['CONTENTURL'])
|
||||||
except IndexError:
|
except IndexError:
|
||||||
raise NotADownloadableLinkError("Could not read the page source")
|
raise NotADownloadableLinkError("Could not read the page source")
|
||||||
|
|
||||||
post['EXTENSION'] = getExtension(post['MEDIAURL'])
|
post['EXTENSION'] = self.getExtension(post['MEDIAURL'])
|
||||||
|
|
||||||
if not os.path.exists(directory):
|
if not os.path.exists(directory):
|
||||||
os.makedirs(directory)
|
os.makedirs(directory)
|
||||||
|
@ -27,7 +27,7 @@ class Gfycat:
|
||||||
filename = GLOBAL.config['filename'].format(**post) + post["EXTENSION"]
|
filename = GLOBAL.config['filename'].format(**post) + post["EXTENSION"]
|
||||||
short_filename = post['POSTID'] + post['EXTENSION']
|
short_filename = post['POSTID'] + post['EXTENSION']
|
||||||
|
|
||||||
getFile(filename, short_filename, directory, post['MEDIAURL'])
|
self.getFile(filename, short_filename, directory, post['MEDIAURL'])
|
||||||
|
|
||||||
@staticmethod
|
@staticmethod
|
||||||
def getLink(url: str) -> str:
|
def getLink(url: str) -> str:
|
||||||
|
|
|
@ -4,19 +4,20 @@ import urllib.request
|
||||||
|
|
||||||
from bs4 import BeautifulSoup
|
from bs4 import BeautifulSoup
|
||||||
|
|
||||||
from bulkredditdownloader.downloaders.downloader_utils import getExtension, getFile
|
from bulkredditdownloader.downloaders.base_downloader import BaseDownloader
|
||||||
from bulkredditdownloader.errors import NotADownloadableLinkError
|
from bulkredditdownloader.errors import NotADownloadableLinkError
|
||||||
from bulkredditdownloader.utils import GLOBAL
|
from bulkredditdownloader.utils import GLOBAL
|
||||||
|
|
||||||
|
|
||||||
class GifDeliveryNetwork:
|
class GifDeliveryNetwork(BaseDownloader):
|
||||||
def __init__(self, directory: pathlib.Path, post: dict):
|
def __init__(self, directory: pathlib.Path, post: dict):
|
||||||
|
super().__init__(directory, post)
|
||||||
try:
|
try:
|
||||||
post['MEDIAURL'] = self.getLink(post['CONTENTURL'])
|
post['MEDIAURL'] = self.getLink(post['CONTENTURL'])
|
||||||
except IndexError:
|
except IndexError:
|
||||||
raise NotADownloadableLinkError("Could not read the page source")
|
raise NotADownloadableLinkError("Could not read the page source")
|
||||||
|
|
||||||
post['EXTENSION'] = getExtension(post['MEDIAURL'])
|
post['EXTENSION'] = self.getExtension(post['MEDIAURL'])
|
||||||
|
|
||||||
if not os.path.exists(directory):
|
if not os.path.exists(directory):
|
||||||
os.makedirs(directory)
|
os.makedirs(directory)
|
||||||
|
@ -24,7 +25,7 @@ class GifDeliveryNetwork:
|
||||||
filename = GLOBAL.config['filename'].format(**post) + post["EXTENSION"]
|
filename = GLOBAL.config['filename'].format(**post) + post["EXTENSION"]
|
||||||
short_filename = post['POSTID'] + post['EXTENSION']
|
short_filename = post['POSTID'] + post['EXTENSION']
|
||||||
|
|
||||||
getFile(filename, short_filename, directory, post['MEDIAURL'])
|
self.getFile(filename, short_filename, directory, post['MEDIAURL'])
|
||||||
|
|
||||||
@staticmethod
|
@staticmethod
|
||||||
def getLink(url: str) -> str:
|
def getLink(url: str) -> str:
|
||||||
|
|
|
@ -4,19 +4,20 @@ import pathlib
|
||||||
|
|
||||||
import requests
|
import requests
|
||||||
|
|
||||||
|
from bulkredditdownloader.downloaders.base_downloader import BaseDownloader
|
||||||
from bulkredditdownloader.downloaders.direct import Direct
|
from bulkredditdownloader.downloaders.direct import Direct
|
||||||
from bulkredditdownloader.downloaders.downloader_utils import getFile
|
from bulkredditdownloader.errors import (AlbumNotDownloadedCompletely, ExtensionError, FileAlreadyExistsError,
|
||||||
from bulkredditdownloader.errors import (AlbumNotDownloadedCompletely, ExtensionError, FileAlreadyExistsError, ImageNotFound,
|
ImageNotFound, NotADownloadableLinkError, TypeInSkip)
|
||||||
NotADownloadableLinkError, TypeInSkip)
|
|
||||||
from bulkredditdownloader.utils import GLOBAL, nameCorrector
|
from bulkredditdownloader.utils import GLOBAL, nameCorrector
|
||||||
from bulkredditdownloader.utils import printToFile as print
|
from bulkredditdownloader.utils import printToFile as print
|
||||||
|
|
||||||
|
|
||||||
class Imgur:
|
class Imgur(BaseDownloader):
|
||||||
|
|
||||||
imgur_image_domain = "https://i.imgur.com/"
|
imgur_image_domain = "https://i.imgur.com/"
|
||||||
|
|
||||||
def __init__(self, directory: pathlib.Path, post: dict):
|
def __init__(self, directory: pathlib.Path, post: dict):
|
||||||
|
super().__init__(directory, post)
|
||||||
link = post['CONTENTURL']
|
link = post['CONTENTURL']
|
||||||
|
|
||||||
if link.endswith(".gifv"):
|
if link.endswith(".gifv"):
|
||||||
|
@ -26,9 +27,6 @@ class Imgur:
|
||||||
|
|
||||||
self.raw_data = self.getData(link)
|
self.raw_data = self.getData(link)
|
||||||
|
|
||||||
self.directory = directory
|
|
||||||
self.post = post
|
|
||||||
|
|
||||||
if self.isAlbum:
|
if self.isAlbum:
|
||||||
if self.raw_data["album_images"]["count"] != 1:
|
if self.raw_data["album_images"]["count"] != 1:
|
||||||
self.downloadAlbum(self.raw_data["album_images"])
|
self.downloadAlbum(self.raw_data["album_images"])
|
||||||
|
@ -65,7 +63,7 @@ class Imgur:
|
||||||
print("\n ({}/{})".format(i + 1, images_length))
|
print("\n ({}/{})".format(i + 1, images_length))
|
||||||
|
|
||||||
try:
|
try:
|
||||||
getFile(filename, short_filename, folder_dir, image_url, indent=2)
|
self.getFile(filename, short_filename, folder_dir, image_url, indent=2)
|
||||||
how_many_downloaded += 1
|
how_many_downloaded += 1
|
||||||
print()
|
print()
|
||||||
|
|
||||||
|
@ -101,7 +99,7 @@ class Imgur:
|
||||||
filename = GLOBAL.config['filename'].format(**self.post) + extension
|
filename = GLOBAL.config['filename'].format(**self.post) + extension
|
||||||
short_filename = self.post['POSTID'] + extension
|
short_filename = self.post['POSTID'] + extension
|
||||||
|
|
||||||
getFile(filename, short_filename, self.directory, image_url)
|
self.getFile(filename, short_filename, self.directory, image_url)
|
||||||
|
|
||||||
@property
|
@property
|
||||||
def isAlbum(self) -> bool:
|
def isAlbum(self) -> bool:
|
||||||
|
|
|
@ -5,19 +5,20 @@ import urllib.request
|
||||||
|
|
||||||
from bs4 import BeautifulSoup
|
from bs4 import BeautifulSoup
|
||||||
|
|
||||||
from bulkredditdownloader.downloaders.downloader_utils import getExtension, getFile
|
from bulkredditdownloader.downloaders.base_downloader import BaseDownloader
|
||||||
from bulkredditdownloader.errors import NotADownloadableLinkError
|
from bulkredditdownloader.errors import NotADownloadableLinkError
|
||||||
from bulkredditdownloader.utils import GLOBAL
|
from bulkredditdownloader.utils import GLOBAL
|
||||||
|
|
||||||
|
|
||||||
class Redgifs:
|
class Redgifs(BaseDownloader):
|
||||||
def __init__(self, directory: pathlib.Path, post: dict):
|
def __init__(self, directory: pathlib.Path, post: dict):
|
||||||
|
super().__init__(directory, post)
|
||||||
try:
|
try:
|
||||||
post['MEDIAURL'] = self.getLink(post['CONTENTURL'])
|
post['MEDIAURL'] = self.getLink(post['CONTENTURL'])
|
||||||
except IndexError:
|
except IndexError:
|
||||||
raise NotADownloadableLinkError("Could not read the page source")
|
raise NotADownloadableLinkError("Could not read the page source")
|
||||||
|
|
||||||
post['EXTENSION'] = getExtension(post['MEDIAURL'])
|
post['EXTENSION'] = self.getExtension(post['MEDIAURL'])
|
||||||
|
|
||||||
if not os.path.exists(directory):
|
if not os.path.exists(directory):
|
||||||
os.makedirs(directory)
|
os.makedirs(directory)
|
||||||
|
@ -25,7 +26,7 @@ class Redgifs:
|
||||||
filename = GLOBAL.config['filename'].format(**post) + post["EXTENSION"]
|
filename = GLOBAL.config['filename'].format(**post) + post["EXTENSION"]
|
||||||
short_filename = post['POSTID'] + post['EXTENSION']
|
short_filename = post['POSTID'] + post['EXTENSION']
|
||||||
|
|
||||||
getFile(filename, short_filename, directory, post['MEDIAURL'])
|
self.getFile(filename, short_filename, directory, post['MEDIAURL'])
|
||||||
|
|
||||||
@staticmethod
|
@staticmethod
|
||||||
def getLink(url: str) -> str:
|
def getLink(url: str) -> str:
|
||||||
|
|
|
@ -4,6 +4,7 @@ import os
|
||||||
import pathlib
|
import pathlib
|
||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
|
|
||||||
|
from bulkredditdownloader.downloaders.base_downloader import BaseDownloader
|
||||||
from bulkredditdownloader.errors import FileAlreadyExistsError, TypeInSkip
|
from bulkredditdownloader.errors import FileAlreadyExistsError, TypeInSkip
|
||||||
from bulkredditdownloader.utils import GLOBAL
|
from bulkredditdownloader.utils import GLOBAL
|
||||||
from bulkredditdownloader.utils import printToFile as print
|
from bulkredditdownloader.utils import printToFile as print
|
||||||
|
@ -11,8 +12,9 @@ from bulkredditdownloader.utils import printToFile as print
|
||||||
VanillaPrint = print
|
VanillaPrint = print
|
||||||
|
|
||||||
|
|
||||||
class SelfPost:
|
class SelfPost(BaseDownloader):
|
||||||
def __init__(self, directory: pathlib.Path, post: dict):
|
def __init__(self, directory: pathlib.Path, post: dict):
|
||||||
|
super().__init__(directory, post)
|
||||||
if "self" in GLOBAL.arguments.skip:
|
if "self" in GLOBAL.arguments.skip:
|
||||||
raise TypeInSkip
|
raise TypeInSkip
|
||||||
|
|
||||||
|
|
|
@ -2,13 +2,14 @@ import os
|
||||||
import pathlib
|
import pathlib
|
||||||
import subprocess
|
import subprocess
|
||||||
|
|
||||||
from bulkredditdownloader.downloaders.downloader_utils import getFile
|
from bulkredditdownloader.downloaders.base_downloader import BaseDownloader
|
||||||
from bulkredditdownloader.utils import GLOBAL
|
from bulkredditdownloader.utils import GLOBAL
|
||||||
from bulkredditdownloader.utils import printToFile as print
|
from bulkredditdownloader.utils import printToFile as print
|
||||||
|
|
||||||
|
|
||||||
class VReddit:
|
class VReddit(BaseDownloader):
|
||||||
def __init__(self, directory: pathlib.Path, post: dict):
|
def __init__(self, directory: pathlib.Path, post: dict):
|
||||||
|
super().__init__(directory, post)
|
||||||
extension = ".mp4"
|
extension = ".mp4"
|
||||||
if not os.path.exists(directory):
|
if not os.path.exists(directory):
|
||||||
os.makedirs(directory)
|
os.makedirs(directory)
|
||||||
|
@ -20,7 +21,7 @@ class VReddit:
|
||||||
fnull = open(os.devnull, 'w')
|
fnull = open(os.devnull, 'w')
|
||||||
subprocess.call("ffmpeg", stdout=fnull, stderr=subprocess.STDOUT)
|
subprocess.call("ffmpeg", stdout=fnull, stderr=subprocess.STDOUT)
|
||||||
except Exception:
|
except Exception:
|
||||||
getFile(filename, short_filename, directory, post['CONTENTURL'])
|
self.getFile(filename, short_filename, directory, post['CONTENTURL'])
|
||||||
print("FFMPEG library not found, skipping merging video and audio")
|
print("FFMPEG library not found, skipping merging video and audio")
|
||||||
else:
|
else:
|
||||||
video_name = post['POSTID'] + "_video"
|
video_name = post['POSTID'] + "_video"
|
||||||
|
@ -30,8 +31,8 @@ class VReddit:
|
||||||
|
|
||||||
print(directory, filename, sep="\n")
|
print(directory, filename, sep="\n")
|
||||||
|
|
||||||
getFile(video_name, video_name, directory, video_url, silent=True)
|
self.getFile(video_name, video_name, directory, video_url, silent=True)
|
||||||
getFile(audio_name, audio_name, directory, audio_url, silent=True)
|
self.getFile(audio_name, audio_name, directory, audio_url, silent=True)
|
||||||
try:
|
try:
|
||||||
self._mergeAudio(video_name, audio_name, filename, short_filename, directory)
|
self._mergeAudio(video_name, audio_name, filename, short_filename, directory)
|
||||||
except KeyboardInterrupt:
|
except KeyboardInterrupt:
|
||||||
|
|
|
@ -4,15 +4,15 @@ import sys
|
||||||
|
|
||||||
import youtube_dl
|
import youtube_dl
|
||||||
|
|
||||||
from bulkredditdownloader.downloaders.downloader_utils import createHash
|
from bulkredditdownloader.downloaders.base_downloader import BaseDownloader
|
||||||
from bulkredditdownloader.errors import FileAlreadyExistsError
|
from bulkredditdownloader.errors import FileAlreadyExistsError
|
||||||
from bulkredditdownloader.utils import GLOBAL
|
from bulkredditdownloader.utils import GLOBAL
|
||||||
from bulkredditdownloader.utils import printToFile as print
|
from bulkredditdownloader.utils import printToFile as print
|
||||||
|
|
||||||
|
|
||||||
|
class Youtube(BaseDownloader):
|
||||||
class Youtube:
|
|
||||||
def __init__(self, directory: pathlib.Path, post: dict):
|
def __init__(self, directory: pathlib.Path, post: dict):
|
||||||
|
super().__init__(directory, post)
|
||||||
if not os.path.exists(directory):
|
if not os.path.exists(directory):
|
||||||
os.makedirs(directory)
|
os.makedirs(directory)
|
||||||
|
|
||||||
|
@ -37,7 +37,7 @@ class Youtube:
|
||||||
|
|
||||||
if GLOBAL.arguments.no_dupes:
|
if GLOBAL.arguments.no_dupes:
|
||||||
try:
|
try:
|
||||||
file_hash = createHash(str(location))
|
file_hash = self.createHash(str(location))
|
||||||
except FileNotFoundError:
|
except FileNotFoundError:
|
||||||
return None
|
return None
|
||||||
if file_hash in GLOBAL.downloadedPosts():
|
if file_hash in GLOBAL.downloadedPosts():
|
||||||
|
|
Loading…
Reference in a new issue