bulk-downloader-for-reddit/src/downloaders/downloaderUtils.py

import sys
import os
import time
from urllib.error import HTTPError
import urllib.request
from pathlib import Path
import hashlib

from src.utils import nameCorrector, GLOBAL
from src.utils import printToFile as print
from src.errors import FileAlreadyExistsError, FileNameTooLong, FailedToDownload, DomainInSkip

def dlProgress(count, blockSize, totalSize):
    """Function for writing download progress to console
    """

    downloadedMbs = int(count*blockSize*(10**(-6)))
    fileSize = int(totalSize*(10**(-6)))
    sys.stdout.write("{}Mb/{}Mb\r".format(downloadedMbs,fileSize))
    sys.stdout.flush()

def getExtension(link):
    """Extract file extension from image link.
    If didn't find any, return '.jpg'
    """

    imageTypes = ['jpg','png','mp4','webm','gif']
    parsed = link.split('.')
    for fileType in imageTypes:
        if fileType in parsed:
            return "."+parsed[-1]
    else:
        if not "v.redd.it" in link:
            return '.jpg'
        else:
            return '.mp4'

def getFile(filename,shortFilename,folderDir,imageURL,indent=0, silent=False):

    if any(domain in imageURL for domain in GLOBAL.arguments.skip):
        raise DomainInSkip

    headers = [
        ("User-Agent", "Mozilla/5.0 (Windows NT 10.0; Win64; x64) " \
            "AppleWebKit/537.36 (KHTML, like Gecko) Chrome/67.0.3396.87 "\
            "Safari/537.36 OPR/54.0.2952.64"),
        ("Accept", "text/html,application/xhtml+xml,application/xml;" \
            "q=0.9,image/webp,image/apng,*/*;q=0.8"),
        ("Accept-Charset", "ISO-8859-1,utf-8;q=0.7,*;q=0.3"),
        ("Accept-Encoding", "none"),
        ("Accept-Language", "en-US,en;q=0.8"),
        ("Connection", "keep-alive")
    ]

    opener = urllib.request.build_opener()
    if not "imgur" in imageURL:
        opener.addheaders = headers
    urllib.request.install_opener(opener)

    filename = nameCorrector(filename)

    if not silent: print(" "*indent + str(folderDir),
                         " "*indent + str(filename),
                         sep="\n")


    for i in range(3):
        fileDir = Path(folderDir) / filename
        tempDir = Path(folderDir) / (filename+".tmp")

        if not (os.path.isfile(fileDir)):
            try:
                urllib.request.urlretrieve(imageURL,
                                           tempDir,
                                           reporthook=dlProgress)

                if GLOBAL.arguments.no_dupes:
                    fileHash = createHash(tempDir)
                    if fileHash in GLOBAL.hashList:
                        os.remove(tempDir)
                        raise FileAlreadyExistsError
                    GLOBAL.hashList.add(fileHash)

                os.rename(tempDir,fileDir)
                if not silent: print(" "*indent+"Downloaded"+" "*10)
                return None
            except ConnectionResetError as exception:
                if not silent: print(" "*indent + str(exception))
                if not silent: print(" "*indent + "Trying again\n")
            except FileNotFoundError:
                filename = shortFilename
        else:
            raise FileAlreadyExistsError
    raise FailedToDownload

def createHash(filename):
    hash_md5 = hashlib.md5()
    with open(filename, "rb") as f:
        for chunk in iter(lambda: f.read(4096), b""):
            hash_md5.update(chunk)
    return hash_md5.hexdigest()
v1.7.0 (#97) * tools file name change to utils * Seperate downloaders (#94) * Seperated the downloaders * Remove redundant code * Changed file names * refactor * Redgifs (#95) * Init commit * Init commit * GifDeliveryNetwork (#96) * Initial commit * Gfycat forwarding to GDN bug fixed 2020-05-29 06:42:11 +12:00			`import sys`
			`import os`
v1.8.0 (#105) ## Change log - Youtube support added - Custom filenames feature added - Custom folder structure feature added - Unsaving downloaded posts option added - Remove duplicate posts on different subreddits option added - Skipping given domains option added - Keeping track of already downloaded posts on a separate file option added (See --dowloaded-posts in README) - No audio on v.redd.it videos bug fixed (see README for details about ffmpeg) - --default-directory option is added - --default-options is added - --use-local-config option is added - Bug fixes 2020-06-02 00:05:02 +12:00			`import time`
v1.7.0 (#97) * tools file name change to utils * Seperate downloaders (#94) * Seperated the downloaders * Remove redundant code * Changed file names * refactor * Redgifs (#95) * Init commit * Init commit * GifDeliveryNetwork (#96) * Initial commit * Gfycat forwarding to GDN bug fixed 2020-05-29 06:42:11 +12:00			`from urllib.error import HTTPError`
			`import urllib.request`
v1.8.0 (#105) ## Change log - Youtube support added - Custom filenames feature added - Custom folder structure feature added - Unsaving downloaded posts option added - Remove duplicate posts on different subreddits option added - Skipping given domains option added - Keeping track of already downloaded posts on a separate file option added (See --dowloaded-posts in README) - No audio on v.redd.it videos bug fixed (see README for details about ffmpeg) - --default-directory option is added - --default-options is added - --use-local-config option is added - Bug fixes 2020-06-02 00:05:02 +12:00			`from pathlib import Path`
			`import hashlib`
v1.7.0 (#97) * tools file name change to utils * Seperate downloaders (#94) * Seperated the downloaders * Remove redundant code * Changed file names * refactor * Redgifs (#95) * Init commit * Init commit * GifDeliveryNetwork (#96) * Initial commit * Gfycat forwarding to GDN bug fixed 2020-05-29 06:42:11 +12:00
v1.8.0 (#105) ## Change log - Youtube support added - Custom filenames feature added - Custom folder structure feature added - Unsaving downloaded posts option added - Remove duplicate posts on different subreddits option added - Skipping given domains option added - Keeping track of already downloaded posts on a separate file option added (See --dowloaded-posts in README) - No audio on v.redd.it videos bug fixed (see README for details about ffmpeg) - --default-directory option is added - --default-options is added - --use-local-config option is added - Bug fixes 2020-06-02 00:05:02 +12:00			`from src.utils import nameCorrector, GLOBAL`
			`from src.utils import printToFile as print`
			`from src.errors import FileAlreadyExistsError, FileNameTooLong, FailedToDownload, DomainInSkip`
v1.7.0 (#97) * tools file name change to utils * Seperate downloaders (#94) * Seperated the downloaders * Remove redundant code * Changed file names * refactor * Redgifs (#95) * Init commit * Init commit * GifDeliveryNetwork (#96) * Initial commit * Gfycat forwarding to GDN bug fixed 2020-05-29 06:42:11 +12:00
			`def dlProgress(count, blockSize, totalSize):`
			`"""Function for writing download progress to console`
			`"""`

			`downloadedMbs = int(countblockSize(10**(-6)))`
			`fileSize = int(totalSize(10*(-6)))`
			`sys.stdout.write("{}Mb/{}Mb\r".format(downloadedMbs,fileSize))`
			`sys.stdout.flush()`

			`def getExtension(link):`
			`"""Extract file extension from image link.`
			`If didn't find any, return '.jpg'`
			`"""`

			`imageTypes = ['jpg','png','mp4','webm','gif']`
			`parsed = link.split('.')`
			`for fileType in imageTypes:`
			`if fileType in parsed:`
			`return "."+parsed[-1]`
			`else:`
			`if not "v.redd.it" in link:`
			`return '.jpg'`
			`else:`
			`return '.mp4'`

v1.8.0 (#105) ## Change log - Youtube support added - Custom filenames feature added - Custom folder structure feature added - Unsaving downloaded posts option added - Remove duplicate posts on different subreddits option added - Skipping given domains option added - Keeping track of already downloaded posts on a separate file option added (See --dowloaded-posts in README) - No audio on v.redd.it videos bug fixed (see README for details about ffmpeg) - --default-directory option is added - --default-options is added - --use-local-config option is added - Bug fixes 2020-06-02 00:05:02 +12:00			`def getFile(filename,shortFilename,folderDir,imageURL,indent=0, silent=False):`
v1.7.0 (#97) * tools file name change to utils * Seperate downloaders (#94) * Seperated the downloaders * Remove redundant code * Changed file names * refactor * Redgifs (#95) * Init commit * Init commit * GifDeliveryNetwork (#96) * Initial commit * Gfycat forwarding to GDN bug fixed 2020-05-29 06:42:11 +12:00
v1.8.0 (#105) ## Change log - Youtube support added - Custom filenames feature added - Custom folder structure feature added - Unsaving downloaded posts option added - Remove duplicate posts on different subreddits option added - Skipping given domains option added - Keeping track of already downloaded posts on a separate file option added (See --dowloaded-posts in README) - No audio on v.redd.it videos bug fixed (see README for details about ffmpeg) - --default-directory option is added - --default-options is added - --use-local-config option is added - Bug fixes 2020-06-02 00:05:02 +12:00			`if any(domain in imageURL for domain in GLOBAL.arguments.skip):`
			`raise DomainInSkip`
v1.7.0 (#97) * tools file name change to utils * Seperate downloaders (#94) * Seperated the downloaders * Remove redundant code * Changed file names * refactor * Redgifs (#95) * Init commit * Init commit * GifDeliveryNetwork (#96) * Initial commit * Gfycat forwarding to GDN bug fixed 2020-05-29 06:42:11 +12:00
			`headers = [`
			`("User-Agent", "Mozilla/5.0 (Windows NT 10.0; Win64; x64) " \`
			`"AppleWebKit/537.36 (KHTML, like Gecko) Chrome/67.0.3396.87 "\`
			`"Safari/537.36 OPR/54.0.2952.64"),`
			`("Accept", "text/html,application/xhtml+xml,application/xml;" \`
			`"q=0.9,image/webp,image/apng,/;q=0.8"),`
			`("Accept-Charset", "ISO-8859-1,utf-8;q=0.7,*;q=0.3"),`
			`("Accept-Encoding", "none"),`
			`("Accept-Language", "en-US,en;q=0.8"),`
			`("Connection", "keep-alive")`
			`]`

			`opener = urllib.request.build_opener()`
			`if not "imgur" in imageURL:`
			`opener.addheaders = headers`
			`urllib.request.install_opener(opener)`

v1.8.0 (#105) ## Change log - Youtube support added - Custom filenames feature added - Custom folder structure feature added - Unsaving downloaded posts option added - Remove duplicate posts on different subreddits option added - Skipping given domains option added - Keeping track of already downloaded posts on a separate file option added (See --dowloaded-posts in README) - No audio on v.redd.it videos bug fixed (see README for details about ffmpeg) - --default-directory option is added - --default-options is added - --use-local-config option is added - Bug fixes 2020-06-02 00:05:02 +12:00			`filename = nameCorrector(filename)`

			`if not silent: print(" "*indent + str(folderDir),`
			`" "*indent + str(filename),`
			`sep="\n")`


			`for i in range(3):`
			`fileDir = Path(folderDir) / filename`
			`tempDir = Path(folderDir) / (filename+".tmp")`

			`if not (os.path.isfile(fileDir)):`
v1.7.0 (#97) * tools file name change to utils * Seperate downloaders (#94) * Seperated the downloaders * Remove redundant code * Changed file names * refactor * Redgifs (#95) * Init commit * Init commit * GifDeliveryNetwork (#96) * Initial commit * Gfycat forwarding to GDN bug fixed 2020-05-29 06:42:11 +12:00			`try:`
			`urllib.request.urlretrieve(imageURL,`
			`tempDir,`
			`reporthook=dlProgress)`
v1.8.0 (#105) ## Change log - Youtube support added - Custom filenames feature added - Custom folder structure feature added - Unsaving downloaded posts option added - Remove duplicate posts on different subreddits option added - Skipping given domains option added - Keeping track of already downloaded posts on a separate file option added (See --dowloaded-posts in README) - No audio on v.redd.it videos bug fixed (see README for details about ffmpeg) - --default-directory option is added - --default-options is added - --use-local-config option is added - Bug fixes 2020-06-02 00:05:02 +12:00
			`if GLOBAL.arguments.no_dupes:`
			`fileHash = createHash(tempDir)`
			`if fileHash in GLOBAL.hashList:`
			`os.remove(tempDir)`
			`raise FileAlreadyExistsError`
			`GLOBAL.hashList.add(fileHash)`

v1.7.0 (#97) * tools file name change to utils * Seperate downloaders (#94) * Seperated the downloaders * Remove redundant code * Changed file names * refactor * Redgifs (#95) * Init commit * Init commit * GifDeliveryNetwork (#96) * Initial commit * Gfycat forwarding to GDN bug fixed 2020-05-29 06:42:11 +12:00			`os.rename(tempDir,fileDir)`
v1.8.0 (#105) ## Change log - Youtube support added - Custom filenames feature added - Custom folder structure feature added - Unsaving downloaded posts option added - Remove duplicate posts on different subreddits option added - Skipping given domains option added - Keeping track of already downloaded posts on a separate file option added (See --dowloaded-posts in README) - No audio on v.redd.it videos bug fixed (see README for details about ffmpeg) - --default-directory option is added - --default-options is added - --use-local-config option is added - Bug fixes 2020-06-02 00:05:02 +12:00			`if not silent: print(" "indent+"Downloaded"+" "10)`
			`return None`
v1.7.0 (#97) * tools file name change to utils * Seperate downloaders (#94) * Seperated the downloaders * Remove redundant code * Changed file names * refactor * Redgifs (#95) * Init commit * Init commit * GifDeliveryNetwork (#96) * Initial commit * Gfycat forwarding to GDN bug fixed 2020-05-29 06:42:11 +12:00			`except ConnectionResetError as exception:`
v1.8.0 (#105) ## Change log - Youtube support added - Custom filenames feature added - Custom folder structure feature added - Unsaving downloaded posts option added - Remove duplicate posts on different subreddits option added - Skipping given domains option added - Keeping track of already downloaded posts on a separate file option added (See --dowloaded-posts in README) - No audio on v.redd.it videos bug fixed (see README for details about ffmpeg) - --default-directory option is added - --default-options is added - --use-local-config option is added - Bug fixes 2020-06-02 00:05:02 +12:00			`if not silent: print(" "*indent + str(exception))`
			`if not silent: print(" "*indent + "Trying again\n")`
v1.7.0 (#97) * tools file name change to utils * Seperate downloaders (#94) * Seperated the downloaders * Remove redundant code * Changed file names * refactor * Redgifs (#95) * Init commit * Init commit * GifDeliveryNetwork (#96) * Initial commit * Gfycat forwarding to GDN bug fixed 2020-05-29 06:42:11 +12:00			`except FileNotFoundError:`
v1.8.0 (#105) ## Change log - Youtube support added - Custom filenames feature added - Custom folder structure feature added - Unsaving downloaded posts option added - Remove duplicate posts on different subreddits option added - Skipping given domains option added - Keeping track of already downloaded posts on a separate file option added (See --dowloaded-posts in README) - No audio on v.redd.it videos bug fixed (see README for details about ffmpeg) - --default-directory option is added - --default-options is added - --use-local-config option is added - Bug fixes 2020-06-02 00:05:02 +12:00			`filename = shortFilename`
			`else:`
			`raise FileAlreadyExistsError`
			`raise FailedToDownload`

			`def createHash(filename):`
			`hash_md5 = hashlib.md5()`
			`with open(filename, "rb") as f:`
			`for chunk in iter(lambda: f.read(4096), b""):`
			`hash_md5.update(chunk)`
			`return hash_md5.hexdigest()`