bulk-downloader-for-reddit/src/downloaders/redgifs.py

import json
import os
import urllib.request
from bs4 import BeautifulSoup

from src.downloaders.downloaderUtils import getFile, getExtension
from src.errors import (FileNameTooLong, AlbumNotDownloadedCompletely, 
                        NotADownloadableLinkError, FileAlreadyExistsError)
from src.utils import GLOBAL
from src.utils import printToFile as print

class Redgifs:
    def __init__(self,directory,POST):
        try:
            POST['MEDIAURL'] = self.getLink(POST['CONTENTURL'])
        except IndexError:
            raise NotADownloadableLinkError("Could not read the page source")

        POST['EXTENSION'] = getExtension(POST['MEDIAURL'])
        
        if not os.path.exists(directory): os.makedirs(directory)

        filename = GLOBAL.config['filename'].format(**POST)+POST["EXTENSION"]
        shortFilename = POST['POSTID']+POST['EXTENSION']
        
        getFile(filename,shortFilename,directory,POST['MEDIAURL'])

    def getLink(self, url):
        """Extract direct link to the video from page's source
        and return it
        """

        if '.webm' in url or '.mp4' in url or '.gif' in url:
            return url

        if url[-1:] == '/':
            url = url[:-1]

        url = urllib.request.Request("https://redgifs.com/watch/" + url.split('/')[-1])

        url.add_header('User-Agent', 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/67.0.3396.87 Safari/537.36 OPR/54.0.2952.64')

        pageSource = (urllib.request.urlopen(url).read().decode())

        soup = BeautifulSoup(pageSource, "html.parser")
        attributes = {"data-react-helmet":"true","type":"application/ld+json"}
        content = soup.find("script",attrs=attributes)

        if content is None:
            raise NotADownloadableLinkError("Could not read the page source")

        return json.loads(content.contents[0])["video"]["contentUrl"]
v1.7.0 (#97) * tools file name change to utils * Seperate downloaders (#94) * Seperated the downloaders * Remove redundant code * Changed file names * refactor * Redgifs (#95) * Init commit * Init commit * GifDeliveryNetwork (#96) * Initial commit * Gfycat forwarding to GDN bug fixed 2020-05-29 06:42:11 +12:00			`import json`
			`import os`
			`import urllib.request`
			`from bs4 import BeautifulSoup`

			`from src.downloaders.downloaderUtils import getFile, getExtension`
			`from src.errors import (FileNameTooLong, AlbumNotDownloadedCompletely,`
			`NotADownloadableLinkError, FileAlreadyExistsError)`
v1.8.0 (#105) ## Change log - Youtube support added - Custom filenames feature added - Custom folder structure feature added - Unsaving downloaded posts option added - Remove duplicate posts on different subreddits option added - Skipping given domains option added - Keeping track of already downloaded posts on a separate file option added (See --dowloaded-posts in README) - No audio on v.redd.it videos bug fixed (see README for details about ffmpeg) - --default-directory option is added - --default-options is added - --use-local-config option is added - Bug fixes 2020-06-02 00:05:02 +12:00			`from src.utils import GLOBAL`
v1.7.0 (#97) * tools file name change to utils * Seperate downloaders (#94) * Seperated the downloaders * Remove redundant code * Changed file names * refactor * Redgifs (#95) * Init commit * Init commit * GifDeliveryNetwork (#96) * Initial commit * Gfycat forwarding to GDN bug fixed 2020-05-29 06:42:11 +12:00			`from src.utils import printToFile as print`

			`class Redgifs:`
			`def __init__(self,directory,POST):`
			`try:`
v1.8.0 (#105) ## Change log - Youtube support added - Custom filenames feature added - Custom folder structure feature added - Unsaving downloaded posts option added - Remove duplicate posts on different subreddits option added - Skipping given domains option added - Keeping track of already downloaded posts on a separate file option added (See --dowloaded-posts in README) - No audio on v.redd.it videos bug fixed (see README for details about ffmpeg) - --default-directory option is added - --default-options is added - --use-local-config option is added - Bug fixes 2020-06-02 00:05:02 +12:00			`POST['MEDIAURL'] = self.getLink(POST['CONTENTURL'])`
v1.7.0 (#97) * tools file name change to utils * Seperate downloaders (#94) * Seperated the downloaders * Remove redundant code * Changed file names * refactor * Redgifs (#95) * Init commit * Init commit * GifDeliveryNetwork (#96) * Initial commit * Gfycat forwarding to GDN bug fixed 2020-05-29 06:42:11 +12:00			`except IndexError:`
			`raise NotADownloadableLinkError("Could not read the page source")`

v1.8.0 (#105) ## Change log - Youtube support added - Custom filenames feature added - Custom folder structure feature added - Unsaving downloaded posts option added - Remove duplicate posts on different subreddits option added - Skipping given domains option added - Keeping track of already downloaded posts on a separate file option added (See --dowloaded-posts in README) - No audio on v.redd.it videos bug fixed (see README for details about ffmpeg) - --default-directory option is added - --default-options is added - --use-local-config option is added - Bug fixes 2020-06-02 00:05:02 +12:00			`POST['EXTENSION'] = getExtension(POST['MEDIAURL'])`

v1.7.0 (#97) * tools file name change to utils * Seperate downloaders (#94) * Seperated the downloaders * Remove redundant code * Changed file names * refactor * Redgifs (#95) * Init commit * Init commit * GifDeliveryNetwork (#96) * Initial commit * Gfycat forwarding to GDN bug fixed 2020-05-29 06:42:11 +12:00			`if not os.path.exists(directory): os.makedirs(directory)`

v1.8.0 (#105) ## Change log - Youtube support added - Custom filenames feature added - Custom folder structure feature added - Unsaving downloaded posts option added - Remove duplicate posts on different subreddits option added - Skipping given domains option added - Keeping track of already downloaded posts on a separate file option added (See --dowloaded-posts in README) - No audio on v.redd.it videos bug fixed (see README for details about ffmpeg) - --default-directory option is added - --default-options is added - --use-local-config option is added - Bug fixes 2020-06-02 00:05:02 +12:00			`filename = GLOBAL.config['filename'].format(**POST)+POST["EXTENSION"]`
			`shortFilename = POST['POSTID']+POST['EXTENSION']`

			`getFile(filename,shortFilename,directory,POST['MEDIAURL'])`
v1.7.0 (#97) * tools file name change to utils * Seperate downloaders (#94) * Seperated the downloaders * Remove redundant code * Changed file names * refactor * Redgifs (#95) * Init commit * Init commit * GifDeliveryNetwork (#96) * Initial commit * Gfycat forwarding to GDN bug fixed 2020-05-29 06:42:11 +12:00
			`def getLink(self, url):`
			`"""Extract direct link to the video from page's source`
			`and return it`
			`"""`

			`if '.webm' in url or '.mp4' in url or '.gif' in url:`
			`return url`

			`if url[-1:] == '/':`
			`url = url[:-1]`

Fix for access denied 2021-01-18 10:58:51 +13:00			`url = urllib.request.Request("https://redgifs.com/watch/" + url.split('/')[-1])`

			`url.add_header('User-Agent', 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/67.0.3396.87 Safari/537.36 OPR/54.0.2952.64')`
v1.7.0 (#97) * tools file name change to utils * Seperate downloaders (#94) * Seperated the downloaders * Remove redundant code * Changed file names * refactor * Redgifs (#95) * Init commit * Init commit * GifDeliveryNetwork (#96) * Initial commit * Gfycat forwarding to GDN bug fixed 2020-05-29 06:42:11 +12:00
			`pageSource = (urllib.request.urlopen(url).read().decode())`

			`soup = BeautifulSoup(pageSource, "html.parser")`
			`attributes = {"data-react-helmet":"true","type":"application/ld+json"}`
			`content = soup.find("script",attrs=attributes)`

			`if content is None:`
			`raise NotADownloadableLinkError("Could not read the page source")`

Fix for access denied 2021-01-18 10:58:51 +13:00			`return json.loads(content.contents[0])["video"]["contentUrl"]`