bulk-downloader-for-reddit/src/downloader.py

import io
import os
import sys
import urllib.request
from html.parser import HTMLParser
from pathlib import Path
from urllib.error import HTTPError

import imgurpython
from multiprocessing import Queue

from src.errors import (AlbumNotDownloadedCompletely, FileAlreadyExistsError,
                        FileNameTooLong, ImgurLoginError,
                        NotADownloadableLinkError)
from src.tools import GLOBAL, nameCorrector, printToFile

VanillaPrint = print
print = printToFile

def dlProgress(count, blockSize, totalSize):
    """Function for writing download progress to console
    """

    downloadedMbs = int(count*blockSize*(10**(-6)))
    fileSize = int(totalSize*(10**(-6)))
    sys.stdout.write("{}Mb/{}Mb".format(downloadedMbs,fileSize))
    sys.stdout.write("\r")
    # sys.stdout.write("\b"*len("\r{}Mb/{}Mb".format(downloadedMbs,fileSize)))
    sys.stdout.flush()

def getExtension(link):
    """Extract file extension from image link.
    If didn't find any, return '.jpg'
    """

    imageTypes = ['jpg','png','mp4','webm','gif']
    parsed = link.split('.')
    for TYPE in imageTypes:
        if TYPE in parsed:
            return "."+parsed[-1]
    else:
        if not "v.redd.it" in link:
            return '.jpg'
        else:
            return '.mp4'

def getFile(fileDir,tempDir,imageURL,indent=0):
    """Downloads given file to given directory.

    fileDir -- Full file directory
    tempDir -- Full file directory with the extension of '.tmp'
    imageURL -- URL to the file to be downloaded

    redditID -- Post's reddit id if renaming the file is necessary.
                As too long file names seem not working.
    """

    headers = [
        ("User-Agent", "Mozilla/5.0 (Windows NT 10.0; Win64; x64) " \
            "AppleWebKit/537.36 (KHTML, like Gecko) Chrome/67.0.3396.87 "\
            "Safari/537.36 OPR/54.0.2952.64"),
        ("Accept", "text/html,application/xhtml+xml,application/xml;" \
            "q=0.9,image/webp,image/apng,*/*;q=0.8"),
        ("Accept-Charset", "ISO-8859-1,utf-8;q=0.7,*;q=0.3"),
        ("Accept-Encoding", "none"),
        ("Accept-Language", "en-US,en;q=0.8"),
        ("Connection", "keep-alive")
    ]

    opener = urllib.request.build_opener()
    opener.addheaders = headers
    urllib.request.install_opener(opener)

    if not (os.path.isfile(fileDir)):
        for i in range(3):
            try:
                urllib.request.urlretrieve(imageURL,
                                           tempDir,
                                           reporthook=dlProgress)
                os.rename(tempDir,fileDir)
            except ConnectionResetError as exception:
                print(" "*indent + str(exception))
                print(" "*indent + "Trying again\n")
            except FileNotFoundError:
                raise FileNameTooLong
            else:
                print(" "*indent+"Downloaded"+" "*10)
                break
    else:
        raise FileAlreadyExistsError

class Erome:
    def __init__(self,directory,post):
        try:
            IMAGES = self.getLinks(post['postURL'])
        except urllib.error.HTTPError:
            raise NotADownloadableLinkError("Not a downloadable link")

        imagesLenght = len(IMAGES)
        howManyDownloaded = imagesLenght
        duplicates = 0

        if imagesLenght == 1:
            
            extension = getExtension(IMAGES[0])

            """Filenames are declared here"""

            title = nameCorrector(post['postTitle'])
            print(post["postSubmitter"]+"_"+title+"_"+post['postId']+extension)

            fileDir = directory / (
                post["postSubmitter"]+"_"+title+"_"+post['postId']+extension
            )
            tempDir = directory / (
                post["postSubmitter"]+"_"+title+"_"+post['postId']+".tmp"
            )

            imageURL = "https:" + IMAGES[0]

            try:
                getFile(fileDir,tempDir,imageURL)
            except FileNameTooLong:
                fileDir = directory / (post['postId'] + extension)
                tempDir = directory / (post['postId'] + '.tmp')
                getFile(fileDir,tempDir,imageURL)

        else:
            title = nameCorrector(post['postTitle'])
            print(post["postSubmitter"]+"_"+title+"_"+post['postId'],end="\n\n")

            folderDir = directory / (
                post["postSubmitter"] + "_" + title + "_" + post['postId']
            )

            try:
                if not os.path.exists(folderDir):
                    os.makedirs(folderDir)
            except FileNotFoundError:
                folderDir = directory / post['postId']
                os.makedirs(folderDir)

            for i in range(imagesLenght):
                
                extension = getExtension(IMAGES[i])

                fileName = str(i+1)
                imageURL = "https:" + IMAGES[i]

                fileDir = folderDir / (fileName + extension)
                tempDir = folderDir / (fileName + ".tmp")

                print("  ({}/{})".format(i+1,imagesLenght))
                print("  {}".format(fileName+extension))

                try:
                    getFile(fileDir,tempDir,imageURL,indent=2)
                    print()
                except FileAlreadyExistsError:
                    print("  The file already exists" + " "*10,end="\n\n")
                    duplicates += 1
                    howManyDownloaded -= 1

                except Exception as exception:
                    # raise exception
                    print("\n  Could not get the file")
                    print(
                        "  "
                        + "{class_name}: {info}".format(
                            class_name=exception.__class__.__name__,
                            info=str(exception)
                        )
                        + "\n"
                    )
                    exceptionType = exception
                    howManyDownloaded -= 1

            if duplicates == imagesLenght:
                raise FileAlreadyExistsError
            elif howManyDownloaded + duplicates < imagesLenght:
                raise AlbumNotDownloadedCompletely(
                    "Album Not Downloaded Completely"
                )

    def getLinks(self,url,lineNumber=129):
 
        content = []
        lineNumber = None

        class EromeParser(HTMLParser):
            tag = None
            def handle_starttag(self, tag, attrs):
                self.tag = {tag:{attr[0]: attr[1] for attr in attrs}}

        pageSource = (urllib.request.urlopen(url).read().decode().split('\n'))

        """ FIND WHERE ALBUM STARTS IN ORDER NOT TO GET WRONG LINKS"""
        for i in range(len(pageSource)):
            obj = EromeParser()
            obj.feed(pageSource[i])
            tag = obj.tag
            
            if tag is not None:
                if "div" in tag:
                    if "id" in tag["div"]:
                        if tag["div"]["id"] == "album":
                            lineNumber = i
                            break

        for line in pageSource[lineNumber:]:
            obj = EromeParser()
            obj.feed(line)
            tag = obj.tag
            if tag is not None:
                if "img" in tag:
                    if "class" in tag["img"]:
                        if tag["img"]["class"]=="img-front":
                            content.append(tag["img"]["src"])
                elif "source" in tag:
                    content.append(tag["source"]["src"])
                    
        return [
            link for link in content \
            if link.endswith("_480p.mp4") or not link.endswith(".mp4")
        ]

class Imgur:
    def __init__(self,directory,post):
        self.imgurClient = self.initImgur()

        imgurID = self.getId(post['postURL'])
        content = self.getLink(imgurID)

        if not os.path.exists(directory): os.makedirs(directory)

        if content['type'] == 'image':

            try:
                post['mediaURL'] = content['object'].mp4
            except AttributeError:
                post['mediaURL'] = content['object'].link

            post['postExt'] = getExtension(post['mediaURL'])
            
            title = nameCorrector(post['postTitle'])

            """Filenames are declared here"""

            print(post["postSubmitter"]+"_"+title+"_"+post['postId']+post['postExt'])

            fileDir = directory / (
                post["postSubmitter"]
                + "_" + title
                + "_" + post['postId'] 
                + post['postExt']
            )

            tempDir = directory / (
                post["postSubmitter"]
                + "_" + title 
                + "_" + post['postId'] 
                + ".tmp"
            )

            try:
                getFile(fileDir,tempDir,post['mediaURL'])
            except FileNameTooLong:
                fileDir = directory / post['postId'] + post['postExt']
                tempDir = directory / post['postId'] + '.tmp'
                getFile(fileDir,tempDir,post['mediaURL'])

        elif content['type'] == 'album':
            exceptionType = ""
            images = content['object'].images
            imagesLenght = len(images)
            howManyDownloaded = imagesLenght
            duplicates = 0

            title = nameCorrector(post['postTitle'])
            print(post["postSubmitter"]+"_"+title+"_"+post['postId'],end="\n\n")

            folderDir = directory / (
                post["postSubmitter"] + "_" + title + "_" + post['postId']
            )

            try:
                if not os.path.exists(folderDir):
                    os.makedirs(folderDir)
            except FileNotFoundError:
                folderDir = directory / post['postId']
                os.makedirs(folderDir)

            for i in range(imagesLenght):
                try:
                    imageURL = images[i]['mp4']
                except KeyError:
                    imageURL = images[i]['link']

                images[i]['Ext'] = getExtension(imageURL)

                fileName = (str(i+1)
                            + "_"
                            + nameCorrector(str(images[i]['title']))
                            + "_"
                            + images[i]['id'])

                """Filenames are declared here"""

                fileDir = folderDir / (fileName + images[i]['Ext'])
                tempDir = folderDir / (fileName + ".tmp")

                print("  ({}/{})".format(i+1,imagesLenght))
                print("  {}".format(fileName+images[i]['Ext']))

                try:
                    getFile(fileDir,tempDir,imageURL,indent=2)
                    print()
                except FileAlreadyExistsError:
                    print("  The file already exists" + " "*10,end="\n\n")
                    duplicates += 1
                    howManyDownloaded -= 1

                # IF FILE NAME IS TOO LONG, IT WONT REGISTER
                except FileNameTooLong:
                    fileName = (str(i+1) + "_" + images[i]['id'])
                    fileDir = folderDir / (fileName + images[i]['Ext'])
                    tempDir = folderDir / (fileName + ".tmp")
                    try:
                        getFile(fileDir,tempDir,imageURL,indent=2)
                    # IF STILL TOO LONG
                    except FileNameTooLong:
                        fileName = str(i+1)
                        fileDir = folderDir / (fileName + images[i]['Ext'])
                        tempDir = folderDir / (fileName + ".tmp")
                        getFile(fileDir,tempDir,imageURL,indent=2)

                except Exception as exception:
                    print("\n  Could not get the file")
                    print(
                        "  "
                        + "{class_name}: {info}".format(
                            class_name=exception.__class__.__name__,
                            info=str(exception)
                        )
                        + "\n"
                    )
                    exceptionType = exception
                    howManyDownloaded -= 1

            if duplicates == imagesLenght:
                raise FileAlreadyExistsError
            elif howManyDownloaded + duplicates < imagesLenght:
                raise AlbumNotDownloadedCompletely(
                    "Album Not Downloaded Completely"
                )
    
    @staticmethod
    def initImgur():
        """Initialize imgur api"""

        config = GLOBAL.config
        return imgurpython.ImgurClient(
            config['imgur_client_id'],
            config['imgur_client_secret']
        )
    def getId(self,submissionURL):
        """Extract imgur post id
        and determine if its a single image or album
        """

        domainLenght = len("imgur.com/")
        if submissionURL[-1] == "/":
            submissionURL = submissionURL[:-1]

        if "a/" in submissionURL or "gallery/" in submissionURL:
            albumId = submissionURL.split("/")[-1]
            return {'id':albumId, 'type':'album'}

        else:
            url = submissionURL.replace('.','/').split('/')
            imageId = url[url.index('com')+1]
            return {'id':imageId, 'type':'image'}

    def getLink(self,identity):
        """Request imgur object from imgur api
        """

        if identity['type'] == 'image':
            return {'object':self.imgurClient.get_image(identity['id']),
                    'type':'image'}
        elif identity['type'] == 'album':
            return {'object':self.imgurClient.get_album(identity['id']),
                    'type':'album'}
    @staticmethod
    def get_credits():
        return Imgur.initImgur().get_credits()

class Gfycat:
    def __init__(self,directory,POST):
        try:
            POST['mediaURL'] = self.getLink(POST['postURL'])
        except IndexError:
            raise NotADownloadableLinkError("Could not read the page source")
        except Exception as exception:
            #debug
            raise exception
            raise NotADownloadableLinkError("Could not read the page source")

        POST['postExt'] = getExtension(POST['mediaURL'])
        
        if not os.path.exists(directory): os.makedirs(directory)
        title = nameCorrector(POST['postTitle'])

        """Filenames are declared here"""

        print(POST["postSubmitter"]+"_"+title+"_"+POST['postId']+POST['postExt'])

        fileDir = directory / (
            POST["postSubmitter"]+"_"+title+"_"+POST['postId']+POST['postExt']
        )
        tempDir = directory / (
            POST["postSubmitter"]+"_"+title+"_"+POST['postId']+".tmp"
        )
        
        try:
            getFile(fileDir,tempDir,POST['mediaURL'])
        except FileNameTooLong:
            fileDir = directory / (POST['postId']+POST['postExt'])
            tempDir = directory / (POST['postId']+".tmp")

            getFile(fileDir,tempDir,POST['mediaURL'])
      
    def getLink(self, url, query='<source id="mp4Source" src=', lineNumber=105):
        """Extract direct link to the video from page's source
        and return it
        """

        if '.webm' in url or '.mp4' in url or '.gif' in url:
            return url

        if url[-1:] == '/':
            url = url[:-1]

        url = "https://gfycat.com/" + url.split('/')[-1]

        pageSource = (urllib.request.urlopen(url).read().decode().split('\n'))

        theLine = pageSource[lineNumber]
        lenght = len(query)
        link = []

        for i in range(len(theLine)):
            if theLine[i:i+lenght] == query:
                cursor = (i+lenght)+1
                while not theLine[cursor] == '"':
                    link.append(theLine[cursor])
                    cursor += 1
                break

        if "".join(link) == "":
            raise NotADownloadableLinkError("Could not read the page source")

        return "".join(link)

class Direct:
    def __init__(self,directory,POST):
        POST['postExt'] = getExtension(POST['postURL'])
        if not os.path.exists(directory): os.makedirs(directory)
        title = nameCorrector(POST['postTitle'])

        """Filenames are declared here"""

        print(POST["postSubmitter"]+"_"+title+"_"+POST['postId']+POST['postExt'])

        fileDir = directory / (
            POST["postSubmitter"]+"_"+title+"_"+POST['postId']+POST['postExt']
        )
        tempDir = directory / (
            POST["postSubmitter"]+"_"+title+"_"+POST['postId']+".tmp"
        )

        try:
            getFile(fileDir,tempDir,POST['postURL'])
        except FileNameTooLong:
            fileDir = directory / (POST['postId']+POST['postExt'])
            tempDir = directory / (POST['postId']+".tmp")

            getFile(fileDir,tempDir,POST['postURL'])

class Self:
    def __init__(self,directory,post):
        if not os.path.exists(directory): os.makedirs(directory)

        title = nameCorrector(post['postTitle'])

        """Filenames are declared here"""

        print(post["postSubmitter"]+"_"+title+"_"+post['postId']+".md")

        fileDir = directory / (
            post["postSubmitter"]+"_"+title+"_"+post['postId']+".md"
        )
        
        if Path.is_file(fileDir):
            raise FileAlreadyExistsError
            
        try:
            self.writeToFile(fileDir,post)
        except FileNotFoundError:
            fileDir = post['postId']+".md"
            fileDir = directory / fileDir

            self.writeToFile(fileDir,post)
    
    @staticmethod
    def writeToFile(directory,post):
        
        """Self posts are formatted here"""
        content = ("## ["
                   + post["postTitle"]
                   + "]("
                   + post["postURL"]
                   + ")\n"
                   + post["postContent"]
                   + "\n\n---\n\n"
                   + "submitted to [r/"
                   + post["postSubreddit"]
                   + "](https://www.reddit.com/r/"
                   + post["postSubreddit"]
                   + ") by [u/"
                   + post["postSubmitter"]
                   + "](https://www.reddit.com/user/"
                   + post["postSubmitter"]
                   + ")")

        with io.open(directory,"w",encoding="utf-8") as FILE:
            VanillaPrint(content,file=FILE)
        
        print("Downloaded")
Added Self class 2018-07-10 10:30:50 +12:00			`import io`
Initial commit 2018-07-10 07:58:11 +12:00			`import os`
			`import sys`
			`import urllib.request`
Added erome support 2018-07-24 08:16:56 +12:00			`from html.parser import HTMLParser`
Initial commit 2018-07-10 07:58:11 +12:00			`from pathlib import Path`
Improve error handling 2018-07-24 08:33:11 +12:00			`from urllib.error import HTTPError`
Initial commit 2018-07-10 07:58:11 +12:00
Removed installing packages in the runtime 2018-07-12 08:59:14 +12:00			`import imgurpython`
Bug fix 2018-07-12 09:09:20 +12:00			`from multiprocessing import Queue`
Removed installing packages in the runtime 2018-07-12 08:59:14 +12:00
Initial commit 2018-07-10 07:58:11 +12:00			`from src.errors import (AlbumNotDownloadedCompletely, FileAlreadyExistsError,`
			`FileNameTooLong, ImgurLoginError,`
			`NotADownloadableLinkError)`
			`from src.tools import GLOBAL, nameCorrector, printToFile`

Added Self class 2018-07-10 10:30:50 +12:00			`VanillaPrint = print`
Initial commit 2018-07-10 07:58:11 +12:00			`print = printToFile`

			`def dlProgress(count, blockSize, totalSize):`
			`"""Function for writing download progress to console`
			`"""`

			`downloadedMbs = int(countblockSize(10**(-6)))`
			`fileSize = int(totalSize(10*(-6)))`
Fixed console prints for Linux 2018-08-14 00:55:37 +12:00			`sys.stdout.write("{}Mb/{}Mb".format(downloadedMbs,fileSize))`
			`sys.stdout.write("\r")`
			`# sys.stdout.write("\b"*len("\r{}Mb/{}Mb".format(downloadedMbs,fileSize)))`
Initial commit 2018-07-10 07:58:11 +12:00			`sys.stdout.flush()`

			`def getExtension(link):`
			`"""Extract file extension from image link.`
			`If didn't find any, return '.jpg'`
			`"""`

			`imageTypes = ['jpg','png','mp4','webm','gif']`
			`parsed = link.split('.')`
			`for TYPE in imageTypes:`
			`if TYPE in parsed:`
			`return "."+parsed[-1]`
			`else:`
Added v.redd.it support (#36) 2018-07-19 23:57:16 +12:00			`if not "v.redd.it" in link:`
			`return '.jpg'`
			`else:`
			`return '.mp4'`
Initial commit 2018-07-10 07:58:11 +12:00
			`def getFile(fileDir,tempDir,imageURL,indent=0):`
			`"""Downloads given file to given directory.`

			`fileDir -- Full file directory`
			`tempDir -- Full file directory with the extension of '.tmp'`
			`imageURL -- URL to the file to be downloaded`

			`redditID -- Post's reddit id if renaming the file is necessary.`
			`As too long file names seem not working.`
			`"""`

Sending header when requesting a file 2018-08-06 16:35:43 +12:00			`headers = [`
Update request headers 2018-08-06 18:33:07 +12:00			`("User-Agent", "Mozilla/5.0 (Windows NT 10.0; Win64; x64) " \`
			`"AppleWebKit/537.36 (KHTML, like Gecko) Chrome/67.0.3396.87 "\`
			`"Safari/537.36 OPR/54.0.2952.64"),`
Sending header when requesting a file 2018-08-06 16:35:43 +12:00			`("Accept", "text/html,application/xhtml+xml,application/xml;" \`
Update request headers 2018-08-06 18:33:07 +12:00			`"q=0.9,image/webp,image/apng,/;q=0.8"),`
Sending header when requesting a file 2018-08-06 16:35:43 +12:00			`("Accept-Charset", "ISO-8859-1,utf-8;q=0.7,*;q=0.3"),`
Fixed request headers 2018-08-08 09:47:34 +12:00			`("Accept-Encoding", "none"),`
Sending header when requesting a file 2018-08-06 16:35:43 +12:00			`("Accept-Language", "en-US,en;q=0.8"),`
			`("Connection", "keep-alive")`
			`]`

			`opener = urllib.request.build_opener()`
			`opener.addheaders = headers`
			`urllib.request.install_opener(opener)`

Initial commit 2018-07-10 07:58:11 +12:00			`if not (os.path.isfile(fileDir)):`
			`for i in range(3):`
			`try:`
			`urllib.request.urlretrieve(imageURL,`
			`tempDir,`
			`reporthook=dlProgress)`
			`os.rename(tempDir,fileDir)`
			`except ConnectionResetError as exception:`
			`print(" "*indent + str(exception))`
			`print(" "*indent + "Trying again\n")`
			`except FileNotFoundError:`
			`raise FileNameTooLong`
Use else in try blocks 2018-07-25 07:11:12 +12:00			`else:`
			`print(" "indent+"Downloaded"+" "10)`
			`break`
Initial commit 2018-07-10 07:58:11 +12:00			`else:`
			`raise FileAlreadyExistsError`

Added erome support 2018-07-24 08:16:56 +12:00			`class Erome:`
			`def __init__(self,directory,post):`
Improve error handling 2018-07-24 08:33:11 +12:00			`try:`
			`IMAGES = self.getLinks(post['postURL'])`
			`except urllib.error.HTTPError:`
			`raise NotADownloadableLinkError("Not a downloadable link")`
Added erome support 2018-07-24 08:16:56 +12:00
			`imagesLenght = len(IMAGES)`
			`howManyDownloaded = imagesLenght`
			`duplicates = 0`

			`if imagesLenght == 1:`

			`extension = getExtension(IMAGES[0])`

Added docstrings for the ease of modification 2018-08-06 17:13:07 +12:00			`"""Filenames are declared here"""`

Added erome support 2018-07-24 08:16:56 +12:00			`title = nameCorrector(post['postTitle'])`
Add OP's name first 2018-07-25 03:55:33 +12:00			`print(post["postSubmitter"]+"_"+title+"_"+post['postId']+extension)`
Added erome support 2018-07-24 08:16:56 +12:00
Add submitter to file name 2018-07-24 21:44:53 +12:00			`fileDir = directory / (`
Bug fix 2018-07-25 07:13:11 +12:00			`post["postSubmitter"]+"_"+title+"_"+post['postId']+extension`
Add submitter to file name 2018-07-24 21:44:53 +12:00			`)`
			`tempDir = directory / (`
Bug fix 2018-07-25 07:13:11 +12:00			`post["postSubmitter"]+"_"+title+"_"+post['postId']+".tmp"`
Add submitter to file name 2018-07-24 21:44:53 +12:00			`)`
Added erome support 2018-07-24 08:16:56 +12:00
			`imageURL = "https:" + IMAGES[0]`

			`try:`
			`getFile(fileDir,tempDir,imageURL)`
			`except FileNameTooLong:`
			`fileDir = directory / (post['postId'] + extension)`
			`tempDir = directory / (post['postId'] + '.tmp')`
			`getFile(fileDir,tempDir,imageURL)`

			`else:`
			`title = nameCorrector(post['postTitle'])`
Add OP's name first 2018-07-25 03:55:33 +12:00			`print(post["postSubmitter"]+"_"+title+"_"+post['postId'],end="\n\n")`
Added erome support 2018-07-24 08:16:56 +12:00
Add submitter to file name 2018-07-24 21:44:53 +12:00			`folderDir = directory / (`
Add OP's name first 2018-07-25 03:55:33 +12:00			`post["postSubmitter"] + "_" + title + "_" + post['postId']`
Add submitter to file name 2018-07-24 21:44:53 +12:00			`)`
Added erome support 2018-07-24 08:16:56 +12:00
			`try:`
			`if not os.path.exists(folderDir):`
			`os.makedirs(folderDir)`
			`except FileNotFoundError:`
			`folderDir = directory / post['postId']`
			`os.makedirs(folderDir)`

			`for i in range(imagesLenght):`

			`extension = getExtension(IMAGES[i])`

			`fileName = str(i+1)`
			`imageURL = "https:" + IMAGES[i]`

			`fileDir = folderDir / (fileName + extension)`
			`tempDir = folderDir / (fileName + ".tmp")`

			`print(" ({}/{})".format(i+1,imagesLenght))`
			`print(" {}".format(fileName+extension))`

			`try:`
			`getFile(fileDir,tempDir,imageURL,indent=2)`
			`print()`
			`except FileAlreadyExistsError:`
			`print(" The file already exists" + " "*10,end="\n\n")`
			`duplicates += 1`
			`howManyDownloaded -= 1`

			`except Exception as exception:`
Improved exception handling 2018-07-25 04:27:52 +12:00			`# raise exception`
Added erome support 2018-07-24 08:16:56 +12:00			`print("\n Could not get the file")`
Improved exception handling 2018-07-25 04:27:52 +12:00			`print(`
			`" "`
			`+ "{class_name}: {info}".format(`
			`class_name=exception.__class__.__name__,`
			`info=str(exception)`
			`)`
			`+ "\n"`
			`)`
Added erome support 2018-07-24 08:16:56 +12:00			`exceptionType = exception`
			`howManyDownloaded -= 1`

			`if duplicates == imagesLenght:`
			`raise FileAlreadyExistsError`
			`elif howManyDownloaded + duplicates < imagesLenght:`
			`raise AlbumNotDownloadedCompletely(`
			`"Album Not Downloaded Completely"`
			`)`

			`def getLinks(self,url,lineNumber=129):`

			`content = []`
			`lineNumber = None`

			`class EromeParser(HTMLParser):`
			`tag = None`
			`def handle_starttag(self, tag, attrs):`
			`self.tag = {tag:{attr[0]: attr[1] for attr in attrs}}`

			`pageSource = (urllib.request.urlopen(url).read().decode().split('\n'))`

			`""" FIND WHERE ALBUM STARTS IN ORDER NOT TO GET WRONG LINKS"""`
			`for i in range(len(pageSource)):`
			`obj = EromeParser()`
			`obj.feed(pageSource[i])`
			`tag = obj.tag`

			`if tag is not None:`
			`if "div" in tag:`
			`if "id" in tag["div"]:`
			`if tag["div"]["id"] == "album":`
			`lineNumber = i`
			`break`

			`for line in pageSource[lineNumber:]:`
			`obj = EromeParser()`
			`obj.feed(line)`
			`tag = obj.tag`
			`if tag is not None:`
			`if "img" in tag:`
			`if "class" in tag["img"]:`
			`if tag["img"]["class"]=="img-front":`
			`content.append(tag["img"]["src"])`
			`elif "source" in tag:`
			`content.append(tag["source"]["src"])`

			`return [`
			`link for link in content \`
			`if link.endswith("_480p.mp4") or not link.endswith(".mp4")`
			`]`

Initial commit 2018-07-10 07:58:11 +12:00			`class Imgur:`
			`def __init__(self,directory,post):`
			`self.imgurClient = self.initImgur()`

			`imgurID = self.getId(post['postURL'])`
			`content = self.getLink(imgurID)`

			`if not os.path.exists(directory): os.makedirs(directory)`

			`if content['type'] == 'image':`

			`try:`
			`post['mediaURL'] = content['object'].mp4`
			`except AttributeError:`
			`post['mediaURL'] = content['object'].link`

			`post['postExt'] = getExtension(post['mediaURL'])`
Added docstrings for the ease of modification 2018-08-06 17:13:07 +12:00
Initial commit 2018-07-10 07:58:11 +12:00			`title = nameCorrector(post['postTitle'])`
Added docstrings for the ease of modification 2018-08-06 17:13:07 +12:00
			`"""Filenames are declared here"""`

Add OP's name first 2018-07-25 03:55:33 +12:00			`print(post["postSubmitter"]+"_"+title+"_"+post['postId']+post['postExt'])`
Initial commit 2018-07-10 07:58:11 +12:00
Add submitter to file name 2018-07-24 21:44:53 +12:00			`fileDir = directory / (`
Add OP's name first 2018-07-25 03:55:33 +12:00			`post["postSubmitter"]`
			`+ "_" + title`
Add submitter to file name 2018-07-24 21:44:53 +12:00			`+ "_" + post['postId']`
			`+ post['postExt']`
			`)`

			`tempDir = directory / (`
Add OP's name first 2018-07-25 03:55:33 +12:00			`post["postSubmitter"]`
			`+ "_" + title`
Add submitter to file name 2018-07-24 21:44:53 +12:00			`+ "_" + post['postId']`
			`+ ".tmp"`
			`)`
Initial commit 2018-07-10 07:58:11 +12:00
			`try:`
			`getFile(fileDir,tempDir,post['mediaURL'])`
			`except FileNameTooLong:`
			`fileDir = directory / post['postId'] + post['postExt']`
			`tempDir = directory / post['postId'] + '.tmp'`
			`getFile(fileDir,tempDir,post['mediaURL'])`

			`elif content['type'] == 'album':`
			`exceptionType = ""`
			`images = content['object'].images`
			`imagesLenght = len(images)`
			`howManyDownloaded = imagesLenght`
			`duplicates = 0`

			`title = nameCorrector(post['postTitle'])`
Add OP's name first 2018-07-25 03:55:33 +12:00			`print(post["postSubmitter"]+"_"+title+"_"+post['postId'],end="\n\n")`
Initial commit 2018-07-10 07:58:11 +12:00
Add submitter to file name 2018-07-24 21:44:53 +12:00			`folderDir = directory / (`
Add OP's name first 2018-07-25 03:55:33 +12:00			`post["postSubmitter"] + "_" + title + "_" + post['postId']`
Add submitter to file name 2018-07-24 21:44:53 +12:00			`)`
Initial commit 2018-07-10 07:58:11 +12:00
			`try:`
			`if not os.path.exists(folderDir):`
			`os.makedirs(folderDir)`
			`except FileNotFoundError:`
			`folderDir = directory / post['postId']`
			`os.makedirs(folderDir)`

			`for i in range(imagesLenght):`
			`try:`
			`imageURL = images[i]['mp4']`
			`except KeyError:`
			`imageURL = images[i]['link']`

			`images[i]['Ext'] = getExtension(imageURL)`

			`fileName = (str(i+1)`
			`+ "_"`
			`+ nameCorrector(str(images[i]['title']))`
			`+ "_"`
			`+ images[i]['id'])`

Added docstrings for the ease of modification 2018-08-06 17:13:07 +12:00			`"""Filenames are declared here"""`

Initial commit 2018-07-10 07:58:11 +12:00			`fileDir = folderDir / (fileName + images[i]['Ext'])`
			`tempDir = folderDir / (fileName + ".tmp")`

			`print(" ({}/{})".format(i+1,imagesLenght))`
			`print(" {}".format(fileName+images[i]['Ext']))`

			`try:`
			`getFile(fileDir,tempDir,imageURL,indent=2)`
			`print()`
			`except FileAlreadyExistsError:`
			`print(" The file already exists" + " "*10,end="\n\n")`
			`duplicates += 1`
			`howManyDownloaded -= 1`

			`# IF FILE NAME IS TOO LONG, IT WONT REGISTER`
			`except FileNameTooLong:`
			`fileName = (str(i+1) + "_" + images[i]['id'])`
			`fileDir = folderDir / (fileName + images[i]['Ext'])`
			`tempDir = folderDir / (fileName + ".tmp")`
			`try:`
			`getFile(fileDir,tempDir,imageURL,indent=2)`
			`# IF STILL TOO LONG`
			`except FileNameTooLong:`
			`fileName = str(i+1)`
			`fileDir = folderDir / (fileName + images[i]['Ext'])`
			`tempDir = folderDir / (fileName + ".tmp")`
			`getFile(fileDir,tempDir,imageURL,indent=2)`

			`except Exception as exception:`
			`print("\n Could not get the file")`
Improved exception handling 2018-07-25 04:27:52 +12:00			`print(`
			`" "`
			`+ "{class_name}: {info}".format(`
			`class_name=exception.__class__.__name__,`
			`info=str(exception)`
			`)`
			`+ "\n"`
			`)`
Initial commit 2018-07-10 07:58:11 +12:00			`exceptionType = exception`
			`howManyDownloaded -= 1`

			`if duplicates == imagesLenght:`
			`raise FileAlreadyExistsError`
Added erome support 2018-07-24 08:16:56 +12:00			`elif howManyDownloaded + duplicates < imagesLenght:`
Added custom exception descriptions to FAILED.json file 2018-07-19 23:56:00 +12:00			`raise AlbumNotDownloadedCompletely(`
			`"Album Not Downloaded Completely"`
			`)`
Initial commit 2018-07-10 07:58:11 +12:00
			`@staticmethod`
			`def initImgur():`
			`"""Initialize imgur api"""`

			`config = GLOBAL.config`
Bug fix 2018-07-12 09:09:20 +12:00			`return imgurpython.ImgurClient(`
Initial commit 2018-07-10 07:58:11 +12:00			`config['imgur_client_id'],`
			`config['imgur_client_secret']`
			`)`
			`def getId(self,submissionURL):`
			`"""Extract imgur post id`
			`and determine if its a single image or album`
			`"""`

			`domainLenght = len("imgur.com/")`
			`if submissionURL[-1] == "/":`
			`submissionURL = submissionURL[:-1]`

			`if "a/" in submissionURL or "gallery/" in submissionURL:`
			`albumId = submissionURL.split("/")[-1]`
			`return {'id':albumId, 'type':'album'}`

			`else:`
			`url = submissionURL.replace('.','/').split('/')`
			`imageId = url[url.index('com')+1]`
			`return {'id':imageId, 'type':'image'}`

			`def getLink(self,identity):`
			`"""Request imgur object from imgur api`
			`"""`

			`if identity['type'] == 'image':`
			`return {'object':self.imgurClient.get_image(identity['id']),`
			`'type':'image'}`
			`elif identity['type'] == 'album':`
			`return {'object':self.imgurClient.get_album(identity['id']),`
			`'type':'album'}`
Bug fix 2018-07-12 11:06:16 +12:00			`@staticmethod`
Initial commit 2018-07-10 07:58:11 +12:00			`def get_credits():`
Bug fix 2018-07-12 11:06:16 +12:00			`return Imgur.initImgur().get_credits()`
Initial commit 2018-07-10 07:58:11 +12:00
			`class Gfycat:`
			`def __init__(self,directory,POST):`
			`try:`
			`POST['mediaURL'] = self.getLink(POST['postURL'])`
			`except IndexError:`
Added custom exception descriptions to FAILED.json file 2018-07-19 23:56:00 +12:00			`raise NotADownloadableLinkError("Could not read the page source")`
Initial commit 2018-07-10 07:58:11 +12:00			`except Exception as exception:`
Fixed request headers 2018-08-08 09:47:34 +12:00			`#debug`
			`raise exception`
Added custom exception descriptions to FAILED.json file 2018-07-19 23:56:00 +12:00			`raise NotADownloadableLinkError("Could not read the page source")`
Initial commit 2018-07-10 07:58:11 +12:00
			`POST['postExt'] = getExtension(POST['mediaURL'])`
Added docstrings for the ease of modification 2018-08-06 17:13:07 +12:00
Initial commit 2018-07-10 07:58:11 +12:00			`if not os.path.exists(directory): os.makedirs(directory)`
			`title = nameCorrector(POST['postTitle'])`
Added docstrings for the ease of modification 2018-08-06 17:13:07 +12:00
			`"""Filenames are declared here"""`

Add OP's name first 2018-07-25 03:55:33 +12:00			`print(POST["postSubmitter"]+"_"+title+"_"+POST['postId']+POST['postExt'])`
Initial commit 2018-07-10 07:58:11 +12:00
Add submitter to file name 2018-07-24 21:44:53 +12:00			`fileDir = directory / (`
Add OP's name first 2018-07-25 03:55:33 +12:00			`POST["postSubmitter"]+"_"+title+"_"+POST['postId']+POST['postExt']`
Add submitter to file name 2018-07-24 21:44:53 +12:00			`)`
			`tempDir = directory / (`
Add OP's name first 2018-07-25 03:55:33 +12:00			`POST["postSubmitter"]+"_"+title+"_"+POST['postId']+".tmp"`
Add submitter to file name 2018-07-24 21:44:53 +12:00			`)`

Added Self class 2018-07-10 10:30:50 +12:00			`try:`
			`getFile(fileDir,tempDir,POST['mediaURL'])`
			`except FileNameTooLong:`
			`fileDir = directory / (POST['postId']+POST['postExt'])`
			`tempDir = directory / (POST['postId']+".tmp")`
Initial commit 2018-07-10 07:58:11 +12:00
Added Self class 2018-07-10 10:30:50 +12:00			`getFile(fileDir,tempDir,POST['mediaURL'])`
Initial commit 2018-07-10 07:58:11 +12:00
			`def getLink(self, url, query='<source id="mp4Source" src=', lineNumber=105):`
			`"""Extract direct link to the video from page's source`
			`and return it`
			`"""`

			`if '.webm' in url or '.mp4' in url or '.gif' in url:`
			`return url`

			`if url[-1:] == '/':`
			`url = url[:-1]`

Added more gfycat links 2018-07-20 00:22:12 +12:00			`url = "https://gfycat.com/" + url.split('/')[-1]`
Initial commit 2018-07-10 07:58:11 +12:00
			`pageSource = (urllib.request.urlopen(url).read().decode().split('\n'))`

			`theLine = pageSource[lineNumber]`
			`lenght = len(query)`
			`link = []`

			`for i in range(len(theLine)):`
			`if theLine[i:i+lenght] == query:`
			`cursor = (i+lenght)+1`
			`while not theLine[cursor] == '"':`
			`link.append(theLine[cursor])`
			`cursor += 1`
			`break`

			`if "".join(link) == "":`
Added custom exception descriptions to FAILED.json file 2018-07-19 23:56:00 +12:00			`raise NotADownloadableLinkError("Could not read the page source")`
Initial commit 2018-07-10 07:58:11 +12:00
			`return "".join(link)`

			`class Direct:`
			`def __init__(self,directory,POST):`
			`POST['postExt'] = getExtension(POST['postURL'])`
			`if not os.path.exists(directory): os.makedirs(directory)`
			`title = nameCorrector(POST['postTitle'])`
Added docstrings for the ease of modification 2018-08-06 17:13:07 +12:00
			`"""Filenames are declared here"""`

Add OP's name first 2018-07-25 03:55:33 +12:00			`print(POST["postSubmitter"]+"_"+title+"_"+POST['postId']+POST['postExt'])`
Initial commit 2018-07-10 07:58:11 +12:00
Add submitter to file name 2018-07-24 21:44:53 +12:00			`fileDir = directory / (`
Add OP's name first 2018-07-25 03:55:33 +12:00			`POST["postSubmitter"]+"_"+title+"_"+POST['postId']+POST['postExt']`
Add submitter to file name 2018-07-24 21:44:53 +12:00			`)`
			`tempDir = directory / (`
Add OP's name first 2018-07-25 03:55:33 +12:00			`POST["postSubmitter"]+"_"+title+"_"+POST['postId']+".tmp"`
Add submitter to file name 2018-07-24 21:44:53 +12:00			`)`
Initial commit 2018-07-10 07:58:11 +12:00
Added Self class 2018-07-10 10:30:50 +12:00			`try:`
			`getFile(fileDir,tempDir,POST['postURL'])`
			`except FileNameTooLong:`
			`fileDir = directory / (POST['postId']+POST['postExt'])`
			`tempDir = directory / (POST['postId']+".tmp")`

			`getFile(fileDir,tempDir,POST['postURL'])`

			`class Self:`
			`def __init__(self,directory,post):`
			`if not os.path.exists(directory): os.makedirs(directory)`

			`title = nameCorrector(post['postTitle'])`
Added docstrings for the ease of modification 2018-08-06 17:13:07 +12:00
			`"""Filenames are declared here"""`

Add OP's name first 2018-07-25 03:55:33 +12:00			`print(post["postSubmitter"]+"_"+title+"_"+post['postId']+".md")`
Added Self class 2018-07-10 10:30:50 +12:00
Add submitter to file name 2018-07-24 21:44:53 +12:00			`fileDir = directory / (`
Add OP's name first 2018-07-25 03:55:33 +12:00			`post["postSubmitter"]+"_"+title+"_"+post['postId']+".md"`
Add submitter to file name 2018-07-24 21:44:53 +12:00			`)`
Added Self class 2018-07-10 10:30:50 +12:00
			`if Path.is_file(fileDir):`
			`raise FileAlreadyExistsError`
Fixed the long file name bug 2018-07-10 10:44:28 +12:00
			`try:`
			`self.writeToFile(fileDir,post)`
			`except FileNotFoundError:`
			`fileDir = post['postId']+".md"`
			`fileDir = directory / fileDir`
Added Self class 2018-07-10 10:30:50 +12:00
Fixed the long file name bug 2018-07-10 10:44:28 +12:00			`self.writeToFile(fileDir,post)`
Added Self class 2018-07-10 10:30:50 +12:00
			`@staticmethod`
			`def writeToFile(directory,post):`
Added docstrings for the ease of modification 2018-08-06 17:13:07 +12:00
			`"""Self posts are formatted here"""`
Added Self class 2018-07-10 10:30:50 +12:00			`content = ("## ["`
			`+ post["postTitle"]`
			`+ "]("`
			`+ post["postURL"]`
			`+ ")\n"`
			`+ post["postContent"]`
			`+ "\n\n---\n\n"`
Add OP's name first 2018-07-25 03:55:33 +12:00			`+ "submitted to [r/"`
			`+ post["postSubreddit"]`
			`+ "](https://www.reddit.com/r/"`
			`+ post["postSubreddit"]`
			`+ ") by [u/"`
Added Self class 2018-07-10 10:30:50 +12:00			`+ post["postSubmitter"]`
			`+ "](https://www.reddit.com/user/"`
			`+ post["postSubmitter"]`
			`+ ")")`

			`with io.open(directory,"w",encoding="utf-8") as FILE:`
			`VanillaPrint(content,file=FILE)`

Merge branch 'master' into SelfDownloader 2018-07-10 11:45:55 +12:00			`print("Downloaded")`