diff --git a/bdfr/site_downloaders/vidble.py b/bdfr/site_downloaders/vidble.py new file mode 100644 index 0000000..2f8f4f4 --- /dev/null +++ b/bdfr/site_downloaders/vidble.py @@ -0,0 +1,48 @@ +#!/usr/bin/env python3 +# coding=utf-8 +import itertools +import logging +import re +from typing import Optional + +import bs4 +import requests +from praw.models import Submission + +from bdfr.exceptions import SiteDownloaderError +from bdfr.resource import Resource +from bdfr.site_authenticator import SiteAuthenticator +from bdfr.site_downloaders.base_downloader import BaseDownloader + +logger = logging.getLogger(__name__) + + +class Vidble(BaseDownloader): + def __init__(self, post: Submission): + super().__init__(post) + + def find_resources(self, authenticator: Optional[SiteAuthenticator] = None) -> list[Resource]: + res = self.get_links(self.post.url) + if not res: + raise SiteDownloaderError(rf'No resources found at {self.post.url}') + res = [Resource(self.post, r, Resource.retry_download(r)) for r in res] + return res + + @staticmethod + def get_links(url: str) -> set[str]: + page = requests.get(url) + soup = bs4.BeautifulSoup(page.text, 'html.parser') + content_div = soup.find('div', attrs={'id': 'ContentPlaceHolder1_divContent'}) + images = content_div.find_all('img') + images = [i.get('src') for i in images] + videos = content_div.find_all('source', attrs={'type': 'video/mp4'}) + videos = [v.get('src') for v in videos] + resources = filter(None, itertools.chain(images, videos)) + resources = ['https://www.vidble.com' + r for r in resources] + resources = [Vidble.change_med_url(r) for r in resources] + return set(resources) + + @staticmethod + def change_med_url(url: str) -> str: + out = re.sub(r'_med(\..{3,4})$', r'\1', url) + return out diff --git a/tests/site_downloaders/test_vidble.py b/tests/site_downloaders/test_vidble.py new file mode 100644 index 0000000..1617bf1 --- /dev/null +++ b/tests/site_downloaders/test_vidble.py @@ -0,0 +1,67 @@ +#!/usr/bin/env python3 +# coding=utf-8 +from unittest.mock import Mock + +import pytest + +from bdfr.resource import Resource +from bdfr.site_downloaders.vidble import Vidble + + +@pytest.mark.parametrize(('test_url', 'expected'), ( + ('/RDFbznUvcN_med.jpg', '/RDFbznUvcN.jpg'), +)) +def test_change_med_url(test_url: str, expected: str): + result = Vidble.change_med_url(test_url) + assert result == expected + + +@pytest.mark.online +@pytest.mark.parametrize(('test_url', 'expected'), ( + ('https://www.vidble.com/show/UxsvAssYe5', { + 'https://www.vidble.com/UxsvAssYe5.gif', + }), + ('https://vidble.com/show/RDFbznUvcN', { + 'https://www.vidble.com/RDFbznUvcN.jpg', + }), + ('https://vidble.com/album/h0jTLs6B', { + 'https://www.vidble.com/XG4eAoJ5JZ.jpg', + 'https://www.vidble.com/IqF5UdH6Uq.jpg', + 'https://www.vidble.com/VWuNsnLJMD.jpg', + 'https://www.vidble.com/sMmM8O650W.jpg', + }), + ('https://vidble.com/watch?v=0q4nWakqM6kzQWxlePD8N62Dsflev0N9', { + 'https://www.vidble.com/0q4nWakqM6kzQWxlePD8N62Dsflev0N9.mp4', + }), +)) +def test_get_links(test_url: str, expected: set[str]): + results = Vidble.get_links(test_url) + assert results == expected + + +@pytest.mark.parametrize(('test_url', 'expected_hashes'), ( + ('https://www.vidble.com/show/UxsvAssYe5', { + '0ef2f8e0e0b45936d2fb3e6fbdf67e28', + }), + ('https://vidble.com/show/RDFbznUvcN', { + 'c2dd30a71e32369c50eed86f86efff58', + }), + ('https://vidble.com/album/h0jTLs6B', { + '3b3cba02e01c91f9858a95240b942c71', + 'dd6ecf5fc9e936f9fb614eb6a0537f99', + 'b31a942cd8cdda218ed547bbc04c3a27', + '6f77c570b451eef4222804bd52267481', + }), + ('https://vidble.com/watch?v=0q4nWakqM6kzQWxlePD8N62Dsflev0N9', { + 'cebe9d5f24dba3b0443e5097f160ca83', + }), +)) +def test_find_resources(test_url: str, expected_hashes: set[str]): + mock_download = Mock() + mock_download.url = test_url + downloader = Vidble(mock_download) + results = downloader.find_resources() + assert all([isinstance(res, Resource) for res in results]) + [res.download() for res in results] + hashes = set([res.hash.hexdigest() for res in results]) + assert hashes == set(expected_hashes)