Added erome support

2024-09-29 08:41:56 +13:00 · 2018-07-23 23:16:56 +03:00 · 2018-07-23 23:16:56 +03:00 · 7314e17125
commit 7314e17125
parent 2d334d56bf
3 changed files with 158 additions and 19 deletions
--- a/script.py
+++ b/script.py
@ -13,7 +13,7 @@ import time
 from io import StringIO
 from pathlib import Path, PurePath

-from src.downloader import Direct, Gfycat, Imgur, Self
+from src.downloader import Direct, Gfycat, Imgur, Self, Erome
 from src.errors import *
 from src.parser import LinkDesigner
 from src.searcher import getPosts
@ -322,7 +322,6 @@ class PromptUser:
                GLOBAL.arguments.log = input("\nlog file directory:")
                if Path(GLOBAL.arguments.log ).is_file():
                    break 
-
        while True:
            try:
                GLOBAL.arguments.limit = int(input("\nlimit (0 for none): "))
@ -447,7 +446,9 @@ def downloadPost(SUBMISSION):

    global lastRequestTime

-    downloaders = {"imgur":Imgur,"gfycat":Gfycat,"direct":Direct,"self":Self}
+    downloaders = {
+        "imgur":Imgur,"gfycat":Gfycat,"erome":Erome,"direct":Direct,"self":Self
+    }

    if SUBMISSION['postType'] in downloaders:

@ -572,8 +573,6 @@ def download(submissions):
    else:
        print(" Total of {} links downloaded!".format(downloadedCount))

-    return None
-
 def main():
    GLOBAL.arguments = parseArguments()

--- a/src/downloader.py
+++ b/src/downloader.py
@ -2,6 +2,7 @@ import io
 import os
 import sys
 import urllib.request
+from html.parser import HTMLParser
 from pathlib import Path

 import imgurpython
@ -69,6 +70,134 @@ def getFile(fileDir,tempDir,imageURL,indent=0):
    else:
        raise FileAlreadyExistsError

+class Erome:
+    def __init__(self,directory,post):
+        # try:
+        #     IMAGES = self.getLinks(post['postURL'])
+        # except IndexError:
+        #     # raise NotADownloadableLinkError("Could not read the page source")
+        #     pass
+        # except Exception as exception:
+        #     pass
+        #     # raise NotADownloadableLinkError("Could not read the page source")
+        IMAGES = self.getLinks(post['postURL'])
+
+        imagesLenght = len(IMAGES)
+        howManyDownloaded = imagesLenght
+        duplicates = 0
+
+        if imagesLenght == 1:
+            
+            extension = getExtension(IMAGES[0])
+
+            title = nameCorrector(post['postTitle'])
+            print(title+"_" +post['postId']+extension)
+
+            fileDir = title + "_" + post['postId'] + extension
+            fileDir = directory / fileDir
+
+            tempDir = title + "_" + post['postId'] + '.tmp'
+            tempDir = directory / tempDir
+
+            imageURL = "https:" + IMAGES[0]
+
+            try:
+                getFile(fileDir,tempDir,imageURL)
+            except FileNameTooLong:
+                fileDir = directory / (post['postId'] + extension)
+                tempDir = directory / (post['postId'] + '.tmp')
+                getFile(fileDir,tempDir,imageURL)
+
+        else:
+            title = nameCorrector(post['postTitle'])
+            print(title+"_"+post['postId'],end="\n\n")
+
+            folderDir = directory / (title+"_"+post['postId'])
+
+            try:
+                if not os.path.exists(folderDir):
+                    os.makedirs(folderDir)
+            except FileNotFoundError:
+                folderDir = directory / post['postId']
+                os.makedirs(folderDir)
+
+            for i in range(imagesLenght):
+                
+                extension = getExtension(IMAGES[i])
+
+                fileName = str(i+1)
+                imageURL = "https:" + IMAGES[i]
+
+                fileDir = folderDir / (fileName + extension)
+                tempDir = folderDir / (fileName + ".tmp")
+
+                print("  ({}/{})".format(i+1,imagesLenght))
+                print("  {}".format(fileName+extension))
+
+                try:
+                    getFile(fileDir,tempDir,imageURL,indent=2)
+                    print()
+                except FileAlreadyExistsError:
+                    print("  The file already exists" + " "*10,end="\n\n")
+                    duplicates += 1
+                    howManyDownloaded -= 1
+
+                except Exception as exception:
+                    raise exception
+                    print("\n  Could not get the file")
+                    print("  " + str(exception) + "\n")
+                    exceptionType = exception
+                    howManyDownloaded -= 1
+
+            if duplicates == imagesLenght:
+                raise FileAlreadyExistsError
+            elif howManyDownloaded + duplicates < imagesLenght:
+                raise AlbumNotDownloadedCompletely(
+                    "Album Not Downloaded Completely"
+                )
+
+    def getLinks(self,url,lineNumber=129):
+ 
+        content = []
+        lineNumber = None
+
+        class EromeParser(HTMLParser):
+            tag = None
+            def handle_starttag(self, tag, attrs):
+                self.tag = {tag:{attr[0]: attr[1] for attr in attrs}}
+
+        pageSource = (urllib.request.urlopen(url).read().decode().split('\n'))
+
+        """ FIND WHERE ALBUM STARTS IN ORDER NOT TO GET WRONG LINKS"""
+        for i in range(len(pageSource)):
+            obj = EromeParser()
+            obj.feed(pageSource[i])
+            tag = obj.tag
+            
+            if tag is not None:
+                if "div" in tag:
+                    if "id" in tag["div"]:
+                        if tag["div"]["id"] == "album":
+                            lineNumber = i
+                            break
+
+        for line in pageSource[lineNumber:]:
+            obj = EromeParser()
+            obj.feed(line)
+            tag = obj.tag
+            if tag is not None:
+                if "img" in tag:
+                    if "class" in tag["img"]:
+                        if tag["img"]["class"]=="img-front":
+                            content.append(tag["img"]["src"])
+                elif "source" in tag:
+                    content.append(tag["source"]["src"])
+                    
+        return [
+            link for link in content \
+            if link.endswith("_480p.mp4") or not link.endswith(".mp4")
+        ]
+
 class Imgur:
    def __init__(self,directory,post):
        self.imgurClient = self.initImgur()
@ -171,7 +300,7 @@ class Imgur:

            if duplicates == imagesLenght:
                raise FileAlreadyExistsError
-            elif howManyDownloaded < imagesLenght:
+            elif howManyDownloaded + duplicates < imagesLenght:
                raise AlbumNotDownloadedCompletely(
                    "Album Not Downloaded Completely"
                )
--- a/src/searcher.py
+++ b/src/searcher.py
@ -299,6 +299,8 @@ def redditSearcher(posts,SINGLE_POST=False):
    gfycatCount = 0
    global imgurCount
    imgurCount = 0
+    global eromeCount
+    eromeCount = 0
    global directCount
    directCount = 0
    global selfCount
@ -360,8 +362,15 @@ def redditSearcher(posts,SINGLE_POST=False):
    if not len(subList) == 0:    
        print(
            "\nTotal of {} submissions found!\n"\
-            "{} GFYCATs, {} IMGURs, {} DIRECTs and {} SELF POSTS\n"
-            .format(len(subList),gfycatCount,imgurCount,directCount,selfCount)
+            "{} GFYCATs, {} IMGURs, {} EROMEs, {} DIRECTs and {} SELF POSTS\n"
+            .format(
+                len(subList),
+                gfycatCount,
+                imgurCount,
+                eromeCount,
+                directCount,
+                selfCount
+            )
        )
        return subList
    else:
@ -370,6 +379,7 @@ def redditSearcher(posts,SINGLE_POST=False):
 def checkIfMatching(submission):
    global gfycatCount
    global imgurCount
+    global eromeCount
    global directCount
    global selfCount

@ -383,19 +393,20 @@ def checkIfMatching(submission):
    except AttributeError:
        return None

-    if ('gfycat' in submission.domain) or \
-        ('imgur' in submission.domain):
+    if 'gfycat' in submission.domain:
+        details['postType'] = 'gfycat'
+        gfycatCount += 1
+        return details

-        if 'gfycat' in submission.domain:
-            details['postType'] = 'gfycat'
-            gfycatCount += 1
-            return details
+    elif 'imgur' in submission.domain:
+        details['postType'] = 'imgur'
+        imgurCount += 1
+        return details

-        elif 'imgur' in submission.domain:
-            details['postType'] = 'imgur'
-            
-            imgurCount += 1
-            return details
+    elif 'erome' in submission.domain:
+        details['postType'] = 'erome'
+        eromeCount += 1
+        return details

    elif isDirectLink(submission.url) is not False:
        details['postType'] = 'direct'