1
0
Fork 0
mirror of synced 2024-05-14 09:12:42 +12:00

Merge pull request #516 from Serene-Arc/enahcnement_515

This commit is contained in:
Serene 2021-09-11 12:18:03 +10:00 committed by GitHub
commit ee2075697b
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
3 changed files with 119 additions and 2 deletions

View file

@ -16,6 +16,7 @@ from bdfr.site_downloaders.imgur import Imgur
from bdfr.site_downloaders.pornhub import PornHub
from bdfr.site_downloaders.redgifs import Redgifs
from bdfr.site_downloaders.self_post import SelfPost
from bdfr.site_downloaders.vidble import Vidble
from bdfr.site_downloaders.youtube import Youtube
@ -46,11 +47,12 @@ class DownloadFactory:
return Direct
elif re.match(r'pornhub\.com.*', sanitised_url):
return PornHub
elif re.match(r'vidble\.com', sanitised_url):
return Vidble
elif YoutubeDlFallback.can_handle_link(sanitised_url):
return YoutubeDlFallback
else:
raise NotADownloadableLinkError(
f'No downloader module exists for url {url}')
raise NotADownloadableLinkError(f'No downloader module exists for url {url}')
@staticmethod
def sanitise_url(url: str) -> str:

View file

@ -0,0 +1,48 @@
#!/usr/bin/env python3
# coding=utf-8
import itertools
import logging
import re
from typing import Optional
import bs4
import requests
from praw.models import Submission
from bdfr.exceptions import SiteDownloaderError
from bdfr.resource import Resource
from bdfr.site_authenticator import SiteAuthenticator
from bdfr.site_downloaders.base_downloader import BaseDownloader
logger = logging.getLogger(__name__)
class Vidble(BaseDownloader):
def __init__(self, post: Submission):
super().__init__(post)
def find_resources(self, authenticator: Optional[SiteAuthenticator] = None) -> list[Resource]:
res = self.get_links(self.post.url)
if not res:
raise SiteDownloaderError(rf'No resources found at {self.post.url}')
res = [Resource(self.post, r, Resource.retry_download(r)) for r in res]
return res
@staticmethod
def get_links(url: str) -> set[str]:
page = requests.get(url)
soup = bs4.BeautifulSoup(page.text, 'html.parser')
content_div = soup.find('div', attrs={'id': 'ContentPlaceHolder1_divContent'})
images = content_div.find_all('img')
images = [i.get('src') for i in images]
videos = content_div.find_all('source', attrs={'type': 'video/mp4'})
videos = [v.get('src') for v in videos]
resources = filter(None, itertools.chain(images, videos))
resources = ['https://www.vidble.com' + r for r in resources]
resources = [Vidble.change_med_url(r) for r in resources]
return set(resources)
@staticmethod
def change_med_url(url: str) -> str:
out = re.sub(r'_med(\..{3,4})$', r'\1', url)
return out

View file

@ -0,0 +1,67 @@
#!/usr/bin/env python3
# coding=utf-8
from unittest.mock import Mock
import pytest
from bdfr.resource import Resource
from bdfr.site_downloaders.vidble import Vidble
@pytest.mark.parametrize(('test_url', 'expected'), (
('/RDFbznUvcN_med.jpg', '/RDFbznUvcN.jpg'),
))
def test_change_med_url(test_url: str, expected: str):
result = Vidble.change_med_url(test_url)
assert result == expected
@pytest.mark.online
@pytest.mark.parametrize(('test_url', 'expected'), (
('https://www.vidble.com/show/UxsvAssYe5', {
'https://www.vidble.com/UxsvAssYe5.gif',
}),
('https://vidble.com/show/RDFbznUvcN', {
'https://www.vidble.com/RDFbznUvcN.jpg',
}),
('https://vidble.com/album/h0jTLs6B', {
'https://www.vidble.com/XG4eAoJ5JZ.jpg',
'https://www.vidble.com/IqF5UdH6Uq.jpg',
'https://www.vidble.com/VWuNsnLJMD.jpg',
'https://www.vidble.com/sMmM8O650W.jpg',
}),
('https://vidble.com/watch?v=0q4nWakqM6kzQWxlePD8N62Dsflev0N9', {
'https://www.vidble.com/0q4nWakqM6kzQWxlePD8N62Dsflev0N9.mp4',
}),
))
def test_get_links(test_url: str, expected: set[str]):
results = Vidble.get_links(test_url)
assert results == expected
@pytest.mark.parametrize(('test_url', 'expected_hashes'), (
('https://www.vidble.com/show/UxsvAssYe5', {
'0ef2f8e0e0b45936d2fb3e6fbdf67e28',
}),
('https://vidble.com/show/RDFbznUvcN', {
'c2dd30a71e32369c50eed86f86efff58',
}),
('https://vidble.com/album/h0jTLs6B', {
'3b3cba02e01c91f9858a95240b942c71',
'dd6ecf5fc9e936f9fb614eb6a0537f99',
'b31a942cd8cdda218ed547bbc04c3a27',
'6f77c570b451eef4222804bd52267481',
}),
('https://vidble.com/watch?v=0q4nWakqM6kzQWxlePD8N62Dsflev0N9', {
'cebe9d5f24dba3b0443e5097f160ca83',
}),
))
def test_find_resources(test_url: str, expected_hashes: set[str]):
mock_download = Mock()
mock_download.url = test_url
downloader = Vidble(mock_download)
results = downloader.find_resources()
assert all([isinstance(res, Resource) for res in results])
[res.download() for res in results]
hashes = set([res.hash.hexdigest() for res in results])
assert hashes == set(expected_hashes)