From 540b237da6bd0b9c04489d109355d774715e93bc Mon Sep 17 00:00:00 2001 From: Serene-Arc Date: Thu, 18 Mar 2021 19:10:27 +1000 Subject: [PATCH] Fix Erome downloader --- .../site_downloaders/erome.py | 22 +++++++++---------- .../tests/downloaders/test_erome.py | 21 +++++++++++++----- 2 files changed, 26 insertions(+), 17 deletions(-) diff --git a/bulkredditdownloader/site_downloaders/erome.py b/bulkredditdownloader/site_downloaders/erome.py index 8dab973..bb4373b 100644 --- a/bulkredditdownloader/site_downloaders/erome.py +++ b/bulkredditdownloader/site_downloaders/erome.py @@ -21,20 +21,20 @@ class Erome(BaseDownloader): super().__init__(post) def find_resources(self, authenticator: Optional[SiteAuthenticator] = None) -> list[Resource]: - images = self._get_links(self.post.url) - if not images: + links = self._get_links(self.post.url) + if not links: raise NotADownloadableLinkError('Erome parser could not find any links') - if len(images) == 1: - image = images.pop() - image = self._validate_url(image) - return [Resource(self.post, image)] + if len(links) == 1: + link = links.pop() + link = self._validate_url(link) + return [Resource(self.post, link)] else: out = [] - for i, image in enumerate(images): - image = self._validate_url(image) - out.append(Resource(self.post, image)) + for i, link in enumerate(links): + link = self._validate_url(link) + out.append(Resource(self.post, link)) return out @staticmethod @@ -47,8 +47,8 @@ class Erome(BaseDownloader): def _get_links(url: str) -> set[str]: page = requests.get(url) soup = bs4.BeautifulSoup(page.text) - front_images = soup.find_all('img', attrs={'class': 'img-front'}) - out = [im.get('src') for im in front_images] + front_images = soup.find_all('img', attrs={'class': 'lasyload'}) + out = [im.get('data-src') for im in front_images] videos = soup.find_all('source') out.extend([vid.get('src') for vid in videos]) diff --git a/bulkredditdownloader/tests/downloaders/test_erome.py b/bulkredditdownloader/tests/downloaders/test_erome.py index 6c8d5e0..b6d1d54 100644 --- a/bulkredditdownloader/tests/downloaders/test_erome.py +++ b/bulkredditdownloader/tests/downloaders/test_erome.py @@ -14,7 +14,7 @@ from bulkredditdownloader.site_downloaders.erome import Erome 'https://s6.erome.com/365/vqtPuLXh/KH2qBT99.jpg', 'https://s6.erome.com/365/vqtPuLXh/KH2qBT99_480p.mp4', ) - ), + ), ('https://www.erome.com/a/ORhX0FZz', ('https://s4.erome.com/355/ORhX0FZz/9IYQocM9.jpg', 'https://s4.erome.com/355/ORhX0FZz/9IYQocM9_480p.mp4', @@ -39,15 +39,24 @@ def test_get_link(test_url: str, expected_urls: tuple[str]): @pytest.mark.online @pytest.mark.slow -@pytest.mark.parametrize(('test_url', 'expected_number_of_resources', 'expected_hashes'), ( - ('https://www.erome.com/a/vqtPuLXh', 2, ('5da2a8d60d87bed279431fdec8e7d72f', '243d17b52a728911b022829badbc524e')), +@pytest.mark.parametrize(('test_url', 'expected_hashes'), ( + ('https://www.erome.com/a/vqtPuLXh', {'5da2a8d60d87bed279431fdec8e7d72f'}), + ('https://www.erome.com/i/ItASD33e', {'b0d73fedc9ce6995c2f2c4fdb6f11eff'}), + ('https://www.erome.com/a/lGrcFxmb', { + '0e98f9f527a911dcedde4f846bb5b69f', + '25696ae364750a5303fc7d7dc78b35c1', + '63775689f438bd393cde7db6d46187de', + 'a1abf398cfd4ef9cfaf093ceb10c746a', + 'bd9e1a4ea5ef0d6ba47fb90e337c2d14' + }), )) -def test_download_resource(test_url: str, expected_number_of_resources: int, expected_hashes: tuple[str]): +def test_download_resource(test_url: str, expected_hashes: tuple[str]): + # Can't compare hashes for this test, Erome doesn't return the exact same file from request to request so the hash + # will change back and forth randomly mock_submission = MagicMock() mock_submission.url = test_url test_site = Erome(mock_submission) resources = test_site.find_resources() - assert len(resources) == expected_number_of_resources [res.download() for res in resources] resource_hashes = [res.hash.hexdigest() for res in resources] - assert set(resource_hashes) == set(expected_hashes) + assert len(resource_hashes) == len(expected_hashes)