1
0
Fork 0
mirror of synced 2024-06-02 18:34:37 +12:00

Move Imgur to API

Moves Imgur to use API with public Client-ID.
This commit is contained in:
OMEGARAZER 2023-01-21 17:36:56 -05:00
parent 8b3b5a73e8
commit 5fbe64dc71
No known key found for this signature in database
GPG key ID: D89925310D306E35
2 changed files with 24 additions and 150 deletions

View file

@ -5,7 +5,6 @@ import json
import re import re
from typing import Optional from typing import Optional
import bs4
from praw.models import Submission from praw.models import Submission
from bdfr.exceptions import SiteDownloaderError from bdfr.exceptions import SiteDownloaderError
@ -23,73 +22,42 @@ class Imgur(BaseDownloader):
self.raw_data = self._get_data(self.post.url) self.raw_data = self._get_data(self.post.url)
out = [] out = []
if "album_images" in self.raw_data: if "is_album" in self.raw_data:
images = self.raw_data["album_images"] for image in self.raw_data["images"]:
for image in images["images"]: if "mp4" in image:
out.append(self._compute_image_url(image)) out.append(Resource(self.post, image["mp4"], Resource.retry_download(image["mp4"])))
else:
out.append(Resource(self.post, image["link"], Resource.retry_download(image["link"])))
else: else:
out.append(self._compute_image_url(self.raw_data)) if "mp4" in self.raw_data:
out.append(Resource(self.post, self.raw_data["mp4"], Resource.retry_download(self.raw_data["mp4"])))
else:
out.append(Resource(self.post, self.raw_data["link"], Resource.retry_download(self.raw_data["link"])))
return out return out
def _compute_image_url(self, image: dict) -> Resource:
ext = self._validate_extension(image["ext"])
if image.get("prefer_video", False):
ext = ".mp4"
image_url = "https://i.imgur.com/" + image["hash"] + ext
return Resource(self.post, image_url, Resource.retry_download(image_url))
@staticmethod @staticmethod
def _get_data(link: str) -> dict: def _get_data(link: str) -> dict:
try: try:
if re.search(r".*/(.*?)(gallery/|a/)", link): if re.search(r".*/(.*?)(gallery/|a/)", link):
imgur_id = re.match(r".*/(?:gallery/|a/)(.*?)(?:/.*)?$", link).group(1) imgur_id = re.match(r".*/(?:gallery/|a/)(.*?)(?:/.*)?$", link).group(1)
link = f"https://api.imgur.com/3/album/{imgur_id}"
else: else:
imgur_id = re.match(r".*/(.*?)(?:_d)?(?:\..{0,})?$", link).group(1) imgur_id = re.match(r".*/(.*?)(?:_d)?(?:\..{0,})?$", link).group(1)
gallery = "a/" if re.search(r".*/(.*?)(gallery/|a/)", link) else "" link = f"https://api.imgur.com/3/image/{imgur_id}"
if len(imgur_id) > 7:
if imgur_id.endswith(("s", "b", "t", "m", "l", "h")):
imgur_id = imgur_id[:7]
else:
raise SiteDownloaderError(f"Imgur ID error in link {link}")
link = f"https://imgur.com/{gallery}{imgur_id}"
except AttributeError: except AttributeError:
raise SiteDownloaderError(f"Could not extract Imgur ID from {link}") raise SiteDownloaderError(f"Could not extract Imgur ID from {link}")
res = Imgur.retrieve_url(link, cookies={"over18": "1", "postpagebeta": "0"}) headers = {
"referer": "https://imgur.com/",
soup = bs4.BeautifulSoup(res.text, "html.parser") "origin": "https://imgur.com",
scripts = soup.find_all("script", attrs={"type": "text/javascript"}) "content-type": "application/json",
scripts = [script.string.replace("\n", "") for script in scripts if script.string] "Authorization": "Client-ID 546c25a59c58ad7",
}
script_regex = re.compile(r"\s*\(function\(widgetFactory\)\s*{\s*widgetFactory\.mergeConfig\(\'gallery\'") res = Imgur.retrieve_url(link, headers=headers)
chosen_script = list(filter(lambda s: re.search(script_regex, s), scripts))
if len(chosen_script) != 1:
raise SiteDownloaderError(f"Could not read page source from {link}")
chosen_script = chosen_script[0]
outer_regex = re.compile(r"widgetFactory\.mergeConfig\(\'gallery\', ({.*})\);")
inner_regex = re.compile(r"image\s*:(.*),\s*group")
try:
image_dict = re.search(outer_regex, chosen_script).group(1)
image_dict = re.search(inner_regex, image_dict).group(1)
except AttributeError:
raise SiteDownloaderError("Could not find image dictionary in page source")
try: try:
image_dict = json.loads(image_dict) image_dict = json.loads(res.text)
except json.JSONDecodeError as e: except json.JSONDecodeError as e:
raise SiteDownloaderError(f"Could not parse received dict as JSON: {e}") raise SiteDownloaderError(f"Could not parse received response as JSON: {e}")
return image_dict return image_dict["data"]
@staticmethod
def _validate_extension(extension_suffix: str) -> str:
extension_suffix = re.sub(r"\?.*", "", extension_suffix)
possible_extensions = (".jpg", ".png", ".mp4", ".gif")
selection = [ext for ext in possible_extensions if ext == extension_suffix]
if len(selection) == 1:
return selection[0]
else:
raise SiteDownloaderError(f'"{extension_suffix}" is not recognized as a valid extension for Imgur')

View file

@ -5,105 +5,10 @@ from unittest.mock import Mock
import pytest import pytest
from bdfr.exceptions import SiteDownloaderError
from bdfr.resource import Resource from bdfr.resource import Resource
from bdfr.site_downloaders.imgur import Imgur from bdfr.site_downloaders.imgur import Imgur
@pytest.mark.online
@pytest.mark.parametrize(
("test_url", "expected_gen_dict", "expected_image_dict"),
(
(
"https://imgur.com/a/xWZsDDP",
{"num_images": "1", "id": "xWZsDDP", "hash": "xWZsDDP"},
[{"hash": "ypa8YfS", "title": "", "ext": ".png", "animated": False}],
),
(
"https://imgur.com/gallery/IjJJdlC",
{"num_images": 1, "id": 384898055, "hash": "IjJJdlC"},
[
{
"hash": "CbbScDt",
"description": "watch when he gets it",
"ext": ".gif",
"animated": True,
"has_sound": False,
}
],
),
(
"https://imgur.com/a/dcc84Gt",
{"num_images": "4", "id": "dcc84Gt", "hash": "dcc84Gt"},
[
{"hash": "ylx0Kle", "ext": ".jpg", "title": ""},
{"hash": "TdYfKbK", "ext": ".jpg", "title": ""},
{"hash": "pCxGbe8", "ext": ".jpg", "title": ""},
{"hash": "TSAkikk", "ext": ".jpg", "title": ""},
],
),
(
"https://m.imgur.com/a/py3RW0j",
{
"num_images": "1",
"id": "py3RW0j",
"hash": "py3RW0j",
},
[{"hash": "K24eQmK", "has_sound": False, "ext": ".jpg"}],
),
),
)
def test_get_data_album(test_url: str, expected_gen_dict: dict, expected_image_dict: list[dict]):
result = Imgur._get_data(test_url)
assert all([result.get(key) == expected_gen_dict[key] for key in expected_gen_dict.keys()])
# Check if all the keys from the test dict are correct in at least one of the album entries
assert any(
[
all([image.get(key) == image_dict[key] for key in image_dict.keys()])
for image_dict in expected_image_dict
for image in result["album_images"]["images"]
]
)
@pytest.mark.online
@pytest.mark.parametrize(
("test_url", "expected_image_dict"),
(
("https://i.imgur.com/dLk3FGY.gifv", {"hash": "dLk3FGY", "title": "", "ext": ".mp4", "animated": True}),
(
"https://imgur.com/65FqTpT.gifv",
{"hash": "65FqTpT", "title": "", "description": "", "animated": True, "mimetype": "video/mp4"},
),
),
)
def test_get_data_gif(test_url: str, expected_image_dict: dict):
result = Imgur._get_data(test_url)
assert all([result.get(key) == expected_image_dict[key] for key in expected_image_dict.keys()])
@pytest.mark.parametrize("test_extension", (".gif", ".png", ".jpg", ".mp4"))
def test_imgur_extension_validation_good(test_extension: str):
result = Imgur._validate_extension(test_extension)
assert result == test_extension
@pytest.mark.parametrize(
"test_extension",
(
".jpeg",
"bad",
".avi",
".test",
".flac",
),
)
def test_imgur_extension_validation_bad(test_extension: str):
with pytest.raises(SiteDownloaderError):
Imgur._validate_extension(test_extension)
@pytest.mark.online @pytest.mark.online
@pytest.mark.parametrize( @pytest.mark.parametrize(
("test_url", "expected_hashes"), ("test_url", "expected_hashes"),
@ -130,7 +35,7 @@ def test_imgur_extension_validation_bad(test_extension: str):
("https://i.imgur.com/lFJai6i.gifv", ("01a6e79a30bec0e644e5da12365d5071",)), ("https://i.imgur.com/lFJai6i.gifv", ("01a6e79a30bec0e644e5da12365d5071",)),
("https://i.imgur.com/ywSyILa.gifv?", ("56d4afc32d2966017c38d98568709b45",)), ("https://i.imgur.com/ywSyILa.gifv?", ("56d4afc32d2966017c38d98568709b45",)),
("https://imgur.com/ubYwpbk.GIFV", ("d4a774aac1667783f9ed3a1bd02fac0c",)), ("https://imgur.com/ubYwpbk.GIFV", ("d4a774aac1667783f9ed3a1bd02fac0c",)),
("https://i.imgur.com/j1CNCZY.gifv", ("58e7e6d972058c18b7ecde910ca147e3",)), ("https://i.imgur.com/j1CNCZY.gifv", ("ed63d7062bc32edaeea8b53f876a307c",)),
("https://i.imgur.com/uTvtQsw.gifv", ("46c86533aa60fc0e09f2a758513e3ac2",)), ("https://i.imgur.com/uTvtQsw.gifv", ("46c86533aa60fc0e09f2a758513e3ac2",)),
("https://i.imgur.com/OGeVuAe.giff", ("77389679084d381336f168538793f218",)), ("https://i.imgur.com/OGeVuAe.giff", ("77389679084d381336f168538793f218",)),
("https://i.imgur.com/OGeVuAe.gift", ("77389679084d381336f168538793f218",)), ("https://i.imgur.com/OGeVuAe.gift", ("77389679084d381336f168538793f218",)),
@ -142,6 +47,7 @@ def test_imgur_extension_validation_bad(test_extension: str):
("https://imgur.com/a/1qzfWtY/gifv", ("65fbc7ba5c3ed0e3af47c4feef4d3735",)), ("https://imgur.com/a/1qzfWtY/gifv", ("65fbc7ba5c3ed0e3af47c4feef4d3735",)),
("https://imgur.com/a/1qzfWtY/mp4", ("65fbc7ba5c3ed0e3af47c4feef4d3735",)), ("https://imgur.com/a/1qzfWtY/mp4", ("65fbc7ba5c3ed0e3af47c4feef4d3735",)),
("https://imgur.com/a/1qzfWtY/spqr", ("65fbc7ba5c3ed0e3af47c4feef4d3735",)), ("https://imgur.com/a/1qzfWtY/spqr", ("65fbc7ba5c3ed0e3af47c4feef4d3735",)),
("https://i.imgur.com/expO7Rc.gifv", ("e309f98158fc98072eb2ae68f947f421",)),
), ),
) )
def test_find_resources(test_url: str, expected_hashes: list[str]): def test_find_resources(test_url: str, expected_hashes: list[str]):