From 77aaee96f3dbfbbd042948d285ebca7ff1055424 Mon Sep 17 00:00:00 2001
From: Serene-Arc <serenical@gmail.com>
Date: Mon, 19 Jul 2021 18:44:54 +1000
Subject: [PATCH 01/15] Fix bug with deleted galleries

---
 bdfr/site_downloaders/gallery.py       |  2 +-
 tests/site_downloaders/test_gallery.py | 11 +++++++++++
 2 files changed, 12 insertions(+), 1 deletion(-)

diff --git a/bdfr/site_downloaders/gallery.py b/bdfr/site_downloaders/gallery.py
index 62fec60..cd34416 100644
--- a/bdfr/site_downloaders/gallery.py
+++ b/bdfr/site_downloaders/gallery.py
@@ -21,7 +21,7 @@ class Gallery(BaseDownloader):
     def find_resources(self, authenticator: Optional[SiteAuthenticator] = None) -> list[Resource]:
         try:
             image_urls = self._get_links(self.post.gallery_data['items'])
-        except AttributeError:
+        except (AttributeError, TypeError):
             try:
                 image_urls = self._get_links(self.post.crosspost_parent_list[0]['gallery_data']['items'])
             except (AttributeError, IndexError, TypeError):
diff --git a/tests/site_downloaders/test_gallery.py b/tests/site_downloaders/test_gallery.py
index 51045f8..f84650d 100644
--- a/tests/site_downloaders/test_gallery.py
+++ b/tests/site_downloaders/test_gallery.py
@@ -4,6 +4,7 @@
 import praw
 import pytest
 
+from bdfr.exceptions import SiteDownloaderError
 from bdfr.site_downloaders.gallery import Gallery
 
 
@@ -68,3 +69,13 @@ def test_gallery_download(test_submission_id: str, expected_hashes: set[str], re
     [res.download(120) for res in results]
     hashes = [res.hash.hexdigest() for res in results]
     assert set(hashes) == expected_hashes
+
+
+@pytest.mark.parametrize('test_id', (
+    'n0pyzp',
+))
+def test_gallery_download_raises_right_error(test_id: str, reddit_instance: praw.Reddit):
+    test_submission = reddit_instance.submission(id=test_id)
+    gallery = Gallery(test_submission)
+    with pytest.raises(SiteDownloaderError):
+        gallery.find_resources()

From 1a4ff07f78f51dfbbe70c089b440e1c0f169be08 Mon Sep 17 00:00:00 2001
From: Serene-Arc <serenical@gmail.com>
Date: Mon, 5 Jul 2021 16:58:33 +1000
Subject: [PATCH 02/15] Add ability to read IDs from files

---
 bdfr/__main__.py                                 | 11 ++++++-----
 bdfr/configuration.py                            |  1 +
 bdfr/connector.py                                | 16 +++++++++++-----
 .../test_download_integration.py                 | 14 ++++++++++++++
 tests/test_connector.py                          |  5 ++---
 5 files changed, 34 insertions(+), 13 deletions(-)

diff --git a/bdfr/__main__.py b/bdfr/__main__.py
index 67e4f99..367f8c6 100644
--- a/bdfr/__main__.py
+++ b/bdfr/__main__.py
@@ -6,9 +6,9 @@ import sys
 import click
 
 from bdfr.archiver import Archiver
+from bdfr.cloner import RedditCloner
 from bdfr.configuration import Configuration
 from bdfr.downloader import RedditDownloader
-from bdfr.cloner import RedditCloner
 
 logger = logging.getLogger()
 
@@ -17,6 +17,7 @@ _common_options = [
     click.option('--authenticate', is_flag=True, default=None),
     click.option('--config', type=str, default=None),
     click.option('--disable-module', multiple=True, default=None, type=str),
+    click.option('--include-id-file', multiple=True, default=None),
     click.option('--log', type=str, default=None),
     click.option('--saved', is_flag=True, default=None),
     click.option('--search', default=None, type=str),
@@ -26,12 +27,12 @@ _common_options = [
     click.option('-L', '--limit', default=None, type=int),
     click.option('-l', '--link', multiple=True, default=None, type=str),
     click.option('-m', '--multireddit', multiple=True, default=None, type=str),
+    click.option('-S', '--sort', type=click.Choice(('hot', 'top', 'new', 'controversial', 'rising', 'relevance')),
+                 default=None),
     click.option('-s', '--subreddit', multiple=True, default=None, type=str),
-    click.option('-v', '--verbose', default=None, count=True),
-    click.option('-u', '--user', type=str, multiple=True, default=None),
     click.option('-t', '--time', type=click.Choice(('all', 'hour', 'day', 'week', 'month', 'year')), default=None),
-    click.option('-S', '--sort', type=click.Choice(('hot', 'top', 'new',
-                                                    'controversial', 'rising', 'relevance')), default=None),
+    click.option('-u', '--user', type=str, multiple=True, default=None),
+    click.option('-v', '--verbose', default=None, count=True),
 ]
 
 _downloader_options = [
diff --git a/bdfr/configuration.py b/bdfr/configuration.py
index 36a1860..bc4c541 100644
--- a/bdfr/configuration.py
+++ b/bdfr/configuration.py
@@ -18,6 +18,7 @@ class Configuration(Namespace):
         self.exclude_id_file = []
         self.file_scheme: str = '{REDDITOR}_{TITLE}_{POSTID}'
         self.folder_scheme: str = '{SUBREDDIT}'
+        self.include_id_file = []
         self.limit: Optional[int] = None
         self.link: list[str] = []
         self.log: Optional[str] = None
diff --git a/bdfr/connector.py b/bdfr/connector.py
index 0e78c8c..a379847 100644
--- a/bdfr/connector.py
+++ b/bdfr/connector.py
@@ -3,6 +3,7 @@
 
 import configparser
 import importlib.resources
+import itertools
 import logging
 import logging.handlers
 import re
@@ -78,7 +79,12 @@ class RedditConnector(metaclass=ABCMeta):
         self.create_reddit_instance()
         self.args.user = list(filter(None, [self.resolve_user_name(user) for user in self.args.user]))
 
-        self.excluded_submission_ids = self.read_excluded_ids()
+        self.excluded_submission_ids = set.union(
+            self.read_id_files(self.args.exclude_id_file),
+            set(self.args.exclude_id),
+        )
+
+        self.args.link = list(itertools.chain(self.args.link, self.read_id_files(self.args.include_id_file)))
 
         self.master_hash_list = {}
         self.authenticator = self.create_authenticator()
@@ -403,13 +409,13 @@ class RedditConnector(metaclass=ABCMeta):
         except prawcore.Forbidden:
             raise errors.BulkDownloaderException(f'Source {subreddit.display_name} is private and cannot be scraped')
 
-    def read_excluded_ids(self) -> set[str]:
+    @staticmethod
+    def read_id_files(file_locations: list[str]) -> set[str]:
         out = []
-        out.extend(self.args.exclude_id)
-        for id_file in self.args.exclude_id_file:
+        for id_file in file_locations:
             id_file = Path(id_file).resolve().expanduser()
             if not id_file.exists():
-                logger.warning(f'ID exclusion file at {id_file} does not exist')
+                logger.warning(f'ID file at {id_file} does not exist')
                 continue
             with open(id_file, 'r') as file:
                 for line in file:
diff --git a/tests/integration_tests/test_download_integration.py b/tests/integration_tests/test_download_integration.py
index 305fe99..cb4a273 100644
--- a/tests/integration_tests/test_download_integration.py
+++ b/tests/integration_tests/test_download_integration.py
@@ -306,3 +306,17 @@ def test_cli_download_disable_modules(test_args: list[str], tmp_path: Path):
     assert result.exit_code == 0
     assert 'skipped due to disabled module' in result.output
     assert 'Downloaded submission' not in result.output
+
+
+@pytest.mark.online
+@pytest.mark.reddit
+@pytest.mark.skipif(not does_test_config_exist, reason='A test config file is required for integration tests')
+def test_cli_download_include_id_file(tmp_path: Path):
+    test_file = Path(tmp_path, 'include.txt')
+    test_args = ['--include-id-file', str(test_file)]
+    test_file.write_text('odr9wg\nody576')
+    runner = CliRunner()
+    test_args = create_basic_args_for_download_runner(test_args, tmp_path)
+    result = runner.invoke(cli, test_args)
+    assert result.exit_code == 0
+    assert 'Downloaded submission' in result.output
diff --git a/tests/test_connector.py b/tests/test_connector.py
index 15eede1..2dd76f9 100644
--- a/tests/test_connector.py
+++ b/tests/test_connector.py
@@ -339,11 +339,10 @@ def test_split_subreddit_entries(test_subreddit_entries: list[str], expected: se
     assert results == expected
 
 
-def test_read_excluded_submission_ids_from_file(downloader_mock: MagicMock, tmp_path: Path):
+def test_read_submission_ids_from_file(downloader_mock: MagicMock, tmp_path: Path):
     test_file = tmp_path / 'test.txt'
     test_file.write_text('aaaaaa\nbbbbbb')
-    downloader_mock.args.exclude_id_file = [test_file]
-    results = RedditConnector.read_excluded_ids(downloader_mock)
+    results = RedditConnector.read_id_files([str(test_file)])
     assert results == {'aaaaaa', 'bbbbbb'}
 
 

From 7a1663db51895e849aa112ec51b6b2c8a4301da7 Mon Sep 17 00:00:00 2001
From: Serene-Arc <serenical@gmail.com>
Date: Mon, 5 Jul 2021 17:02:19 +1000
Subject: [PATCH 03/15] Update README

---
 README.md | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/README.md b/README.md
index be4f455..89a4e90 100644
--- a/README.md
+++ b/README.md
@@ -76,6 +76,10 @@ The following options are common between both the `archive` and `download` comma
   - Can be specified multiple times
   - Disables certain modules from being used
   - See [Disabling Modules](#disabling-modules) for more information and a list of module names
+- `--include-id-file`
+  - This will add any submission with the IDs in the files provided
+  - Can be specified multiple times
+  - Format is one ID per line
 - `--log`
   - This allows one to specify the location of the logfile
   - This must be done when running multiple instances of the BDFR, see [Multiple Instances](#multiple-instances) below

From 44453b1707abc6559b3d9bc05d4ad53c8ffc7fbe Mon Sep 17 00:00:00 2001
From: Serene-Arc <serenical@gmail.com>
Date: Tue, 27 Jul 2021 13:12:50 +1000
Subject: [PATCH 04/15] Update tests

---
 tests/site_downloaders/test_gallery.py | 5 +----
 tests/site_downloaders/test_gfycat.py  | 2 --
 tests/site_downloaders/test_redgifs.py | 9 +++------
 3 files changed, 4 insertions(+), 12 deletions(-)

diff --git a/tests/site_downloaders/test_gallery.py b/tests/site_downloaders/test_gallery.py
index f84650d..4e5d9f1 100644
--- a/tests/site_downloaders/test_gallery.py
+++ b/tests/site_downloaders/test_gallery.py
@@ -53,10 +53,6 @@ def test_gallery_get_links(test_ids: list[dict], expected: set[str]):
         '808c35267f44acb523ce03bfa5687404',
         'ec8b65bdb7f1279c4b3af0ea2bbb30c3',
     }),
-    ('nxyahw', {
-        'b89a3f41feb73ec1136ec4ffa7353eb1',
-        'cabb76fd6fd11ae6e115a2039eb09f04',
-    }),
     ('obkflw', {
         '65163f685fb28c5b776e0e77122718be',
         '2a337eb5b13c34d3ca3f51b5db7c13e9',
@@ -73,6 +69,7 @@ def test_gallery_download(test_submission_id: str, expected_hashes: set[str], re
 
 @pytest.mark.parametrize('test_id', (
     'n0pyzp',
+    'nxyahw',
 ))
 def test_gallery_download_raises_right_error(test_id: str, reddit_instance: praw.Reddit):
     test_submission = reddit_instance.submission(id=test_id)
diff --git a/tests/site_downloaders/test_gfycat.py b/tests/site_downloaders/test_gfycat.py
index 56aa2d0..3a405f8 100644
--- a/tests/site_downloaders/test_gfycat.py
+++ b/tests/site_downloaders/test_gfycat.py
@@ -13,8 +13,6 @@ from bdfr.site_downloaders.gfycat import Gfycat
 @pytest.mark.parametrize(('test_url', 'expected_url'), (
     ('https://gfycat.com/definitivecaninecrayfish', 'https://giant.gfycat.com/DefinitiveCanineCrayfish.mp4'),
     ('https://gfycat.com/dazzlingsilkyiguana', 'https://giant.gfycat.com/DazzlingSilkyIguana.mp4'),
-    ('https://gfycat.com/webbedimpurebutterfly', 'https://thumbs2.redgifs.com/WebbedImpureButterfly.mp4'),
-    ('https://gfycat.com/CornyLoathsomeHarrierhawk', 'https://thumbs2.redgifs.com/CornyLoathsomeHarrierhawk.mp4')
 ))
 def test_get_link(test_url: str, expected_url: str):
     result = Gfycat._get_link(test_url)
diff --git a/tests/site_downloaders/test_redgifs.py b/tests/site_downloaders/test_redgifs.py
index 476149f..097fbf4 100644
--- a/tests/site_downloaders/test_redgifs.py
+++ b/tests/site_downloaders/test_redgifs.py
@@ -15,10 +15,8 @@ from bdfr.site_downloaders.redgifs import Redgifs
      'https://thumbs2.redgifs.com/FrighteningVictoriousSalamander.mp4'),
     ('https://redgifs.com/watch/springgreendecisivetaruca',
      'https://thumbs2.redgifs.com/SpringgreenDecisiveTaruca.mp4'),
-    ('https://www.gifdeliverynetwork.com/regalshoddyhorsechestnutleafminer',
-     'https://thumbs2.redgifs.com/RegalShoddyHorsechestnutleafminer.mp4'),
-    ('https://www.gifdeliverynetwork.com/maturenexthippopotamus',
-     'https://thumbs2.redgifs.com/MatureNextHippopotamus.mp4'),
+    ('https://www.redgifs.com/watch/palegoldenrodrawhalibut',
+     'https://thumbs2.redgifs.com/PalegoldenrodRawHalibut.mp4'),
 ))
 def test_get_link(test_url: str, expected: str):
     result = Redgifs._get_link(test_url)
@@ -29,9 +27,8 @@ def test_get_link(test_url: str, expected: str):
 @pytest.mark.parametrize(('test_url', 'expected_hash'), (
     ('https://redgifs.com/watch/frighteningvictorioussalamander', '4007c35d9e1f4b67091b5f12cffda00a'),
     ('https://redgifs.com/watch/springgreendecisivetaruca', '8dac487ac49a1f18cc1b4dabe23f0869'),
-    ('https://www.gifdeliverynetwork.com/maturenexthippopotamus', '9bec0a9e4163a43781368ed5d70471df'),
-    ('https://www.gifdeliverynetwork.com/regalshoddyhorsechestnutleafminer', '8afb4e2c090a87140230f2352bf8beba'),
     ('https://redgifs.com/watch/leafysaltydungbeetle', '076792c660b9c024c0471ef4759af8bd'),
+    ('https://www.redgifs.com/watch/palegoldenrodrawhalibut', '46d5aa77fe80c6407de1ecc92801c10e'),
 ))
 def test_download_resource(test_url: str, expected_hash: str):
     mock_submission = Mock()

From 3cdae99490e54bc6eb0da452cce2b3048da10786 Mon Sep 17 00:00:00 2001
From: Serene-Arc <serenical@gmail.com>
Date: Tue, 27 Jul 2021 13:39:49 +1000
Subject: [PATCH 05/15] Implement callbacks for downloading

---
 bdfr/archiver.py                              |  6 +-
 bdfr/downloader.py                            |  2 +-
 bdfr/resource.py                              | 50 +++++++-------
 bdfr/site_downloaders/direct.py               |  2 +-
 bdfr/site_downloaders/erome.py                |  2 +-
 .../youtubedl_fallback.py                     | 26 ++++----
 bdfr/site_downloaders/gallery.py              |  2 +-
 bdfr/site_downloaders/imgur.py                |  2 +-
 bdfr/site_downloaders/pornhub.py              |  7 +-
 bdfr/site_downloaders/redgifs.py              |  2 +-
 bdfr/site_downloaders/self_post.py            |  2 +-
 bdfr/site_downloaders/youtube.py              | 65 ++++++++++++-------
 tests/site_downloaders/test_direct.py         |  2 +-
 tests/site_downloaders/test_erome.py          |  2 +-
 tests/site_downloaders/test_gallery.py        |  2 +-
 tests/site_downloaders/test_gfycat.py         |  2 +-
 tests/site_downloaders/test_imgur.py          |  2 +-
 tests/site_downloaders/test_pornhub.py        |  2 +-
 tests/site_downloaders/test_redgifs.py        |  2 +-
 tests/site_downloaders/test_youtube.py        |  2 +-
 tests/test_download_filter.py                 |  4 +-
 tests/test_file_name_formatter.py             | 10 +--
 tests/test_resource.py                        |  6 +-
 23 files changed, 112 insertions(+), 92 deletions(-)

diff --git a/bdfr/archiver.py b/bdfr/archiver.py
index 74b92e8..d445e8d 100644
--- a/bdfr/archiver.py
+++ b/bdfr/archiver.py
@@ -76,17 +76,17 @@ class Archiver(RedditConnector):
         logger.info(f'Record for entry item {praw_item.id} written to disk')
 
     def _write_entry_json(self, entry: BaseArchiveEntry):
-        resource = Resource(entry.source, '', '.json')
+        resource = Resource(entry.source, '', lambda: None, '.json')
         content = json.dumps(entry.compile())
         self._write_content_to_disk(resource, content)
 
     def _write_entry_xml(self, entry: BaseArchiveEntry):
-        resource = Resource(entry.source, '', '.xml')
+        resource = Resource(entry.source, '', lambda: None, '.xml')
         content = dict2xml.dict2xml(entry.compile(), wrap='root')
         self._write_content_to_disk(resource, content)
 
     def _write_entry_yaml(self, entry: BaseArchiveEntry):
-        resource = Resource(entry.source, '', '.yaml')
+        resource = Resource(entry.source, '', lambda: None, '.yaml')
         content = yaml.dump(entry.compile())
         self._write_content_to_disk(resource, content)
 
diff --git a/bdfr/downloader.py b/bdfr/downloader.py
index f4220db..69aa818 100644
--- a/bdfr/downloader.py
+++ b/bdfr/downloader.py
@@ -82,7 +82,7 @@ class RedditDownloader(RedditConnector):
                 logger.debug(f'Download filter removed {submission.id} file with URL {submission.url}')
                 continue
             try:
-                res.download(self.args.max_wait_time)
+                res.download()
             except errors.BulkDownloaderException as e:
                 logger.error(f'Failed to download resource {res.url} in submission {submission.id} '
                              f'with downloader {downloader_class.__name__}: {e}')
diff --git a/bdfr/resource.py b/bdfr/resource.py
index e8f9fd1..8f874ef 100644
--- a/bdfr/resource.py
+++ b/bdfr/resource.py
@@ -6,7 +6,7 @@ import logging
 import re
 import time
 import urllib.parse
-from typing import Optional
+from typing import Callable, Optional
 
 import _hashlib
 import requests
@@ -18,40 +18,44 @@ logger = logging.getLogger(__name__)
 
 
 class Resource:
-    def __init__(self, source_submission: Submission, url: str, extension: str = None):
+    def __init__(self, source_submission: Submission, url: str, download_function: Callable, extension: str = None):
         self.source_submission = source_submission
         self.content: Optional[bytes] = None
         self.url = url
         self.hash: Optional[_hashlib.HASH] = None
         self.extension = extension
+        self.download_function = download_function
         if not self.extension:
             self.extension = self._determine_extension()
 
     @staticmethod
-    def retry_download(url: str, max_wait_time: int, current_wait_time: int = 60) -> Optional[bytes]:
-        try:
-            response = requests.get(url)
-            if re.match(r'^2\d{2}', str(response.status_code)) and response.content:
-                return response.content
-            elif response.status_code in (408, 429):
-                raise requests.exceptions.ConnectionError(f'Response code {response.status_code}')
-            else:
-                raise BulkDownloaderException(
-                    f'Unrecoverable error requesting resource: HTTP Code {response.status_code}')
-        except (requests.exceptions.ConnectionError, requests.exceptions.ChunkedEncodingError) as e:
-            logger.warning(f'Error occured downloading from {url}, waiting {current_wait_time} seconds: {e}')
-            time.sleep(current_wait_time)
-            if current_wait_time < max_wait_time:
-                current_wait_time += 60
-                return Resource.retry_download(url, max_wait_time, current_wait_time)
-            else:
-                logger.error(f'Max wait time exceeded for resource at url {url}')
-                raise
+    def retry_download(url: str, max_wait_time: int) -> Callable:
+        def http_download() -> Optional[bytes]:
+            current_wait_time = 60
+            while True:
+                try:
+                    response = requests.get(url)
+                    if re.match(r'^2\d{2}', str(response.status_code)) and response.content:
+                        return response.content
+                    elif response.status_code in (408, 429):
+                        raise requests.exceptions.ConnectionError(f'Response code {response.status_code}')
+                    else:
+                        raise BulkDownloaderException(
+                            f'Unrecoverable error requesting resource: HTTP Code {response.status_code}')
+                except (requests.exceptions.ConnectionError, requests.exceptions.ChunkedEncodingError) as e:
+                    logger.warning(f'Error occured downloading from {url}, waiting {current_wait_time} seconds: {e}')
+                    time.sleep(current_wait_time)
+                    if current_wait_time < max_wait_time:
+                        current_wait_time += 60
+                    else:
+                        logger.error(f'Max wait time exceeded for resource at url {url}')
+                        raise
+        return http_download
 
-    def download(self, max_wait_time: int):
+    def download(self):
         if not self.content:
             try:
-                content = self.retry_download(self.url, max_wait_time)
+                content = self.download_function()
             except requests.exceptions.ConnectionError as e:
                 raise BulkDownloaderException(f'Could not download resource: {e}')
             except BulkDownloaderException:
diff --git a/bdfr/site_downloaders/direct.py b/bdfr/site_downloaders/direct.py
index 106f251..df1a469 100644
--- a/bdfr/site_downloaders/direct.py
+++ b/bdfr/site_downloaders/direct.py
@@ -14,4 +14,4 @@ class Direct(BaseDownloader):
         super().__init__(post)
 
     def find_resources(self, authenticator: Optional[SiteAuthenticator] = None) -> list[Resource]:
-        return [Resource(self.post, self.post.url)]
+        return [Resource(self.post, self.post.url, Resource.retry_download(self.post.url, 300))]
diff --git a/bdfr/site_downloaders/erome.py b/bdfr/site_downloaders/erome.py
index bd29ea4..69b9ae3 100644
--- a/bdfr/site_downloaders/erome.py
+++ b/bdfr/site_downloaders/erome.py
@@ -29,7 +29,7 @@ class Erome(BaseDownloader):
         for link in links:
             if not re.match(r'https?://.*', link):
                 link = 'https://' + link
-            out.append(Resource(self.post, link))
+            out.append(Resource(self.post, link, Resource.retry_download(link, 300)))
         return out
 
     @staticmethod
diff --git a/bdfr/site_downloaders/fallback_downloaders/youtubedl_fallback.py b/bdfr/site_downloaders/fallback_downloaders/youtubedl_fallback.py
index 281182a..6ede405 100644
--- a/bdfr/site_downloaders/fallback_downloaders/youtubedl_fallback.py
+++ b/bdfr/site_downloaders/fallback_downloaders/youtubedl_fallback.py
@@ -4,7 +4,6 @@
 import logging
 from typing import Optional
 
-import youtube_dl
 from praw.models import Submission
 
 from bdfr.resource import Resource
@@ -20,21 +19,18 @@ class YoutubeDlFallback(BaseFallbackDownloader, Youtube):
         super(YoutubeDlFallback, self).__init__(post)
 
     def find_resources(self, authenticator: Optional[SiteAuthenticator] = None) -> list[Resource]:
-        out = super()._download_video({})
+        out = Resource(
+            self.post,
+            self.post.url,
+            super()._download_video({}),
+            super().get_video_attributes(self.post.url)['ext'],
+        )
         return [out]
 
     @staticmethod
     def can_handle_link(url: str) -> bool:
-        yt_logger = logging.getLogger('youtube-dl')
-        yt_logger.setLevel(logging.CRITICAL)
-        with youtube_dl.YoutubeDL({
-            'logger': yt_logger,
-        }) as ydl:
-            try:
-                result = ydl.extract_info(url, download=False)
-                if result:
-                    return True
-            except Exception as e:
-                logger.exception(e)
-                return False
-        return False
+        attributes = YoutubeDlFallback.get_video_attributes(url)
+        if attributes:
+            return True
+        else:
+            return False
diff --git a/bdfr/site_downloaders/gallery.py b/bdfr/site_downloaders/gallery.py
index cd34416..c016d28 100644
--- a/bdfr/site_downloaders/gallery.py
+++ b/bdfr/site_downloaders/gallery.py
@@ -31,7 +31,7 @@ class Gallery(BaseDownloader):
 
         if not image_urls:
             raise SiteDownloaderError('No images found in Reddit gallery')
-        return [Resource(self.post, url) for url in image_urls]
+        return [Resource(self.post, url, Resource.retry_download(url, 300)) for url in image_urls]
 
     @ staticmethod
     def _get_links(id_dict: list[dict]) -> list[str]:
diff --git a/bdfr/site_downloaders/imgur.py b/bdfr/site_downloaders/imgur.py
index 44a62f1..79a1115 100644
--- a/bdfr/site_downloaders/imgur.py
+++ b/bdfr/site_downloaders/imgur.py
@@ -33,7 +33,7 @@ class Imgur(BaseDownloader):
 
     def _compute_image_url(self, image: dict) -> Resource:
         image_url = 'https://i.imgur.com/' + image['hash'] + self._validate_extension(image['ext'])
-        return Resource(self.post, image_url)
+        return Resource(self.post, image_url, Resource.retry_download(image_url, 300))
 
     @staticmethod
     def _get_data(link: str) -> dict:
diff --git a/bdfr/site_downloaders/pornhub.py b/bdfr/site_downloaders/pornhub.py
index 6658d7e..c2bc0ad 100644
--- a/bdfr/site_downloaders/pornhub.py
+++ b/bdfr/site_downloaders/pornhub.py
@@ -22,5 +22,10 @@ class PornHub(Youtube):
             'format': 'best',
             'nooverwrites': True,
         }
-        out = self._download_video(ytdl_options)
+        out = Resource(
+            self.post,
+            self.post.url,
+            super()._download_video(ytdl_options),
+            super().get_video_attributes(self.post.url)['ext'],
+        )
         return [out]
diff --git a/bdfr/site_downloaders/redgifs.py b/bdfr/site_downloaders/redgifs.py
index 9cfec02..d4989e7 100644
--- a/bdfr/site_downloaders/redgifs.py
+++ b/bdfr/site_downloaders/redgifs.py
@@ -18,7 +18,7 @@ class Redgifs(BaseDownloader):
 
     def find_resources(self, authenticator: Optional[SiteAuthenticator] = None) -> list[Resource]:
         media_url = self._get_link(self.post.url)
-        return [Resource(self.post, media_url, '.mp4')]
+        return [Resource(self.post, media_url, Resource.retry_download(media_url, 300), '.mp4')]
 
     @staticmethod
     def _get_link(url: str) -> str:
diff --git a/bdfr/site_downloaders/self_post.py b/bdfr/site_downloaders/self_post.py
index cb922ee..6e4ce0e 100644
--- a/bdfr/site_downloaders/self_post.py
+++ b/bdfr/site_downloaders/self_post.py
@@ -17,7 +17,7 @@ class SelfPost(BaseDownloader):
         super().__init__(post)
 
     def find_resources(self, authenticator: Optional[SiteAuthenticator] = None) -> list[Resource]:
-        out = Resource(self.post, self.post.url, '.txt')
+        out = Resource(self.post, self.post.url, lambda: None, '.txt')
         out.content = self.export_to_string().encode('utf-8')
         out.create_hash()
         return [out]
diff --git a/bdfr/site_downloaders/youtube.py b/bdfr/site_downloaders/youtube.py
index 8b93b23..126cb6a 100644
--- a/bdfr/site_downloaders/youtube.py
+++ b/bdfr/site_downloaders/youtube.py
@@ -3,12 +3,12 @@
 import logging
 import tempfile
 from pathlib import Path
-from typing import Optional
+from typing import Callable, Optional
 
 import youtube_dl
 from praw.models import Submission
 
-from bdfr.exceptions import (NotADownloadableLinkError, SiteDownloaderError)
+from bdfr.exceptions import NotADownloadableLinkError, SiteDownloaderError
 from bdfr.resource import Resource
 from bdfr.site_authenticator import SiteAuthenticator
 from bdfr.site_downloaders.base_downloader import BaseDownloader
@@ -26,32 +26,47 @@ class Youtube(BaseDownloader):
             'playlistend': 1,
             'nooverwrites': True,
         }
-        out = self._download_video(ytdl_options)
-        return [out]
+        download_function = self._download_video(ytdl_options)
+        try:
+            extension = self.get_video_attributes(self.post.url)['ext']
+        except KeyError:
+            raise NotADownloadableLinkError(f'Youtube-DL cannot download URL {self.post.url}')
+        res = Resource(self.post, self.post.url, download_function, extension)
+        return [res]
 
-    def _download_video(self, ytdl_options: dict) -> Resource:
+    def _download_video(self, ytdl_options: dict) -> Callable:
         yt_logger = logging.getLogger('youtube-dl')
         yt_logger.setLevel(logging.CRITICAL)
         ytdl_options['quiet'] = True
         ytdl_options['logger'] = yt_logger
-        with tempfile.TemporaryDirectory() as temp_dir:
-            download_path = Path(temp_dir).resolve()
-            ytdl_options['outtmpl'] = str(download_path) + '/' + 'test.%(ext)s'
-            try:
-                with youtube_dl.YoutubeDL(ytdl_options) as ydl:
-                    ydl.download([self.post.url])
-            except youtube_dl.DownloadError as e:
-                raise SiteDownloaderError(f'Youtube download failed: {e}')
 
-            downloaded_files = list(download_path.iterdir())
-            if len(downloaded_files) > 0:
-                downloaded_file = downloaded_files[0]
-            else:
-                raise NotADownloadableLinkError(f"No media exists in the URL {self.post.url}")
-            extension = downloaded_file.suffix
-            with open(downloaded_file, 'rb') as file:
-                content = file.read()
-        out = Resource(self.post, self.post.url, extension)
-        out.content = content
-        out.create_hash()
-        return out
+        def download() -> bytes:
+            with tempfile.TemporaryDirectory() as temp_dir:
+                download_path = Path(temp_dir).resolve()
+                ytdl_options['outtmpl'] = str(download_path) + '/' + 'test.%(ext)s'
+                try:
+                    with youtube_dl.YoutubeDL(ytdl_options) as ydl:
+                        ydl.download([self.post.url])
+                except youtube_dl.DownloadError as e:
+                    raise SiteDownloaderError(f'Youtube download failed: {e}')
+
+                downloaded_files = list(download_path.iterdir())
+                if len(downloaded_files) > 0:
+                    downloaded_file = downloaded_files[0]
+                else:
+                    raise NotADownloadableLinkError(f"No media exists in the URL {self.post.url}")
+                with open(downloaded_file, 'rb') as file:
+                    content = file.read()
+                return content
+        return download
+
+    @staticmethod
+    def get_video_attributes(url: str) -> dict:
+        yt_logger = logging.getLogger('youtube-dl')
+        yt_logger.setLevel(logging.CRITICAL)
+        with youtube_dl.YoutubeDL({'logger': yt_logger, }) as ydl:
+            try:
+                result = ydl.extract_info(url, download=False)
+                return result
+            except Exception as e:
+                logger.exception(e)
diff --git a/tests/site_downloaders/test_direct.py b/tests/site_downloaders/test_direct.py
index 790f4c3..56f90fc 100644
--- a/tests/site_downloaders/test_direct.py
+++ b/tests/site_downloaders/test_direct.py
@@ -21,5 +21,5 @@ def test_download_resource(test_url: str, expected_hash: str):
     resources = test_site.find_resources()
     assert len(resources) == 1
     assert isinstance(resources[0], Resource)
-    resources[0].download(120)
+    resources[0].download()
     assert resources[0].hash.hexdigest() == expected_hash
diff --git a/tests/site_downloaders/test_erome.py b/tests/site_downloaders/test_erome.py
index 84546c4..2918bef 100644
--- a/tests/site_downloaders/test_erome.py
+++ b/tests/site_downloaders/test_erome.py
@@ -49,6 +49,6 @@ def test_download_resource(test_url: str, expected_hashes: tuple[str]):
     mock_submission.url = test_url
     test_site = Erome(mock_submission)
     resources = test_site.find_resources()
-    [res.download(120) for res in resources]
+    [res.download() for res in resources]
     resource_hashes = [res.hash.hexdigest() for res in resources]
     assert len(resource_hashes) == len(expected_hashes)
diff --git a/tests/site_downloaders/test_gallery.py b/tests/site_downloaders/test_gallery.py
index 4e5d9f1..08eea91 100644
--- a/tests/site_downloaders/test_gallery.py
+++ b/tests/site_downloaders/test_gallery.py
@@ -62,7 +62,7 @@ def test_gallery_download(test_submission_id: str, expected_hashes: set[str], re
     test_submission = reddit_instance.submission(id=test_submission_id)
     gallery = Gallery(test_submission)
     results = gallery.find_resources()
-    [res.download(120) for res in results]
+    [res.download() for res in results]
     hashes = [res.hash.hexdigest() for res in results]
     assert set(hashes) == expected_hashes
 
diff --git a/tests/site_downloaders/test_gfycat.py b/tests/site_downloaders/test_gfycat.py
index 3a405f8..981d01d 100644
--- a/tests/site_downloaders/test_gfycat.py
+++ b/tests/site_downloaders/test_gfycat.py
@@ -31,5 +31,5 @@ def test_download_resource(test_url: str, expected_hash: str):
     resources = test_site.find_resources()
     assert len(resources) == 1
     assert isinstance(resources[0], Resource)
-    resources[0].download(120)
+    resources[0].download()
     assert resources[0].hash.hexdigest() == expected_hash
diff --git a/tests/site_downloaders/test_imgur.py b/tests/site_downloaders/test_imgur.py
index 94bd240..bfb7405 100644
--- a/tests/site_downloaders/test_imgur.py
+++ b/tests/site_downloaders/test_imgur.py
@@ -149,6 +149,6 @@ def test_find_resources(test_url: str, expected_hashes: list[str]):
     downloader = Imgur(mock_download)
     results = downloader.find_resources()
     assert all([isinstance(res, Resource) for res in results])
-    [res.download(120) for res in results]
+    [res.download() for res in results]
     hashes = set([res.hash.hexdigest() for res in results])
     assert hashes == set(expected_hashes)
diff --git a/tests/site_downloaders/test_pornhub.py b/tests/site_downloaders/test_pornhub.py
index 12144dd..e07da45 100644
--- a/tests/site_downloaders/test_pornhub.py
+++ b/tests/site_downloaders/test_pornhub.py
@@ -21,5 +21,5 @@ def test_find_resources_good(test_url: str, expected_hash: str):
     resources = downloader.find_resources()
     assert len(resources) == 1
     assert isinstance(resources[0], Resource)
-    resources[0].download(120)
+    resources[0].download()
     assert resources[0].hash.hexdigest() == expected_hash
diff --git a/tests/site_downloaders/test_redgifs.py b/tests/site_downloaders/test_redgifs.py
index 097fbf4..571f044 100644
--- a/tests/site_downloaders/test_redgifs.py
+++ b/tests/site_downloaders/test_redgifs.py
@@ -37,5 +37,5 @@ def test_download_resource(test_url: str, expected_hash: str):
     resources = test_site.find_resources()
     assert len(resources) == 1
     assert isinstance(resources[0], Resource)
-    resources[0].download(120)
+    resources[0].download()
     assert resources[0].hash.hexdigest() == expected_hash
diff --git a/tests/site_downloaders/test_youtube.py b/tests/site_downloaders/test_youtube.py
index f3a97e1..1f6b81a 100644
--- a/tests/site_downloaders/test_youtube.py
+++ b/tests/site_downloaders/test_youtube.py
@@ -23,7 +23,7 @@ def test_find_resources_good(test_url: str, expected_hash: str):
     resources = downloader.find_resources()
     assert len(resources) == 1
     assert isinstance(resources[0], Resource)
-    resources[0].download(120)
+    resources[0].download()
     assert resources[0].hash.hexdigest() == expected_hash
 
 
diff --git a/tests/test_download_filter.py b/tests/test_download_filter.py
index ead2b2f..5def10c 100644
--- a/tests/test_download_filter.py
+++ b/tests/test_download_filter.py
@@ -46,7 +46,7 @@ def test_filter_domain(test_url: str, expected: bool, download_filter: DownloadF
     ('http://reddit.com/test.gif', False),
 ))
 def test_filter_all(test_url: str, expected: bool, download_filter: DownloadFilter):
-    test_resource = Resource(MagicMock(), test_url)
+    test_resource = Resource(MagicMock(), test_url, lambda: None)
     result = download_filter.check_resource(test_resource)
     assert result == expected
 
@@ -59,6 +59,6 @@ def test_filter_all(test_url: str, expected: bool, download_filter: DownloadFilt
 ))
 def test_filter_empty_filter(test_url: str):
     download_filter = DownloadFilter()
-    test_resource = Resource(MagicMock(), test_url)
+    test_resource = Resource(MagicMock(), test_url, lambda: None)
     result = download_filter.check_resource(test_resource)
     assert result is True
diff --git a/tests/test_file_name_formatter.py b/tests/test_file_name_formatter.py
index e4c82ac..f596d89 100644
--- a/tests/test_file_name_formatter.py
+++ b/tests/test_file_name_formatter.py
@@ -119,7 +119,7 @@ def test_format_full(
         format_string_file: str,
         expected: str,
         reddit_submission: praw.models.Submission):
-    test_resource = Resource(reddit_submission, 'i.reddit.com/blabla.png')
+    test_resource = Resource(reddit_submission, 'i.reddit.com/blabla.png', lambda: None)
     test_formatter = FileNameFormatter(format_string_file, format_string_directory, 'ISO')
     result = test_formatter.format_path(test_resource, Path('test'))
     assert do_test_path_equality(result, expected)
@@ -136,7 +136,7 @@ def test_format_full_conform(
         format_string_directory: str,
         format_string_file: str,
         reddit_submission: praw.models.Submission):
-    test_resource = Resource(reddit_submission, 'i.reddit.com/blabla.png')
+    test_resource = Resource(reddit_submission, 'i.reddit.com/blabla.png', lambda: None)
     test_formatter = FileNameFormatter(format_string_file, format_string_directory, 'ISO')
     test_formatter.format_path(test_resource, Path('test'))
 
@@ -156,7 +156,7 @@ def test_format_full_with_index_suffix(
         expected: str,
         reddit_submission: praw.models.Submission,
 ):
-    test_resource = Resource(reddit_submission, 'i.reddit.com/blabla.png')
+    test_resource = Resource(reddit_submission, 'i.reddit.com/blabla.png', lambda: None)
     test_formatter = FileNameFormatter(format_string_file, format_string_directory, 'ISO')
     result = test_formatter.format_path(test_resource, Path('test'), index)
     assert do_test_path_equality(result, expected)
@@ -216,7 +216,7 @@ def test_shorten_filenames(submission: MagicMock, tmp_path: Path):
     submission.author.name = 'test'
     submission.subreddit.display_name = 'test'
     submission.id = 'BBBBBB'
-    test_resource = Resource(submission, 'www.example.com/empty', '.jpeg')
+    test_resource = Resource(submission, 'www.example.com/empty', lambda: None, '.jpeg')
     test_formatter = FileNameFormatter('{REDDITOR}_{TITLE}_{POSTID}', '{SUBREDDIT}', 'ISO')
     result = test_formatter.format_path(test_resource, tmp_path)
     result.parent.mkdir(parents=True)
@@ -296,7 +296,7 @@ def test_format_archive_entry_comment(
 ):
     test_comment = reddit_instance.comment(id=test_comment_id)
     test_formatter = FileNameFormatter(test_file_scheme, test_folder_scheme, 'ISO')
-    test_entry = Resource(test_comment, '', '.json')
+    test_entry = Resource(test_comment, '', lambda: None, '.json')
     result = test_formatter.format_path(test_entry, tmp_path)
     assert do_test_string_equality(result, expected_name)
 
diff --git a/tests/test_resource.py b/tests/test_resource.py
index 272c457..db9a6cc 100644
--- a/tests/test_resource.py
+++ b/tests/test_resource.py
@@ -21,7 +21,7 @@ from bdfr.resource import Resource
     ('https://www.test.com/test/test2/example.png?random=test#thing', '.png'),
 ))
 def test_resource_get_extension(test_url: str, expected: str):
-    test_resource = Resource(MagicMock(), test_url)
+    test_resource = Resource(MagicMock(), test_url, lambda: None)
     result = test_resource._determine_extension()
     assert result == expected
 
@@ -31,6 +31,6 @@ def test_resource_get_extension(test_url: str, expected: str):
     ('https://www.iana.org/_img/2013.1/iana-logo-header.svg', '426b3ac01d3584c820f3b7f5985d6623'),
 ))
 def test_download_online_resource(test_url: str, expected_hash: str):
-    test_resource = Resource(MagicMock(), test_url)
-    test_resource.download(120)
+    test_resource = Resource(MagicMock(), test_url, Resource.retry_download(test_url, 60))
+    test_resource.download()
     assert test_resource.hash.hexdigest() == expected_hash

From dbe8733fd44cb1b3055faa072c801e73e18d7865 Mon Sep 17 00:00:00 2001
From: Serene-Arc <serenical@gmail.com>
Date: Tue, 27 Jul 2021 14:02:30 +1000
Subject: [PATCH 06/15] Refactor method to remove max wait time

---
 bdfr/resource.py                 | 5 ++++-
 bdfr/site_downloaders/direct.py  | 2 +-
 bdfr/site_downloaders/erome.py   | 2 +-
 bdfr/site_downloaders/gallery.py | 2 +-
 bdfr/site_downloaders/imgur.py   | 2 +-
 bdfr/site_downloaders/redgifs.py | 2 +-
 tests/test_resource.py           | 2 +-
 7 files changed, 10 insertions(+), 7 deletions(-)

diff --git a/bdfr/resource.py b/bdfr/resource.py
index 8f874ef..a1c90de 100644
--- a/bdfr/resource.py
+++ b/bdfr/resource.py
@@ -6,6 +6,7 @@ import logging
 import re
 import time
 import urllib.parse
+from collections import namedtuple
 from typing import Callable, Optional
 
 import _hashlib
@@ -29,7 +30,9 @@ class Resource:
             self.extension = self._determine_extension()
 
     @staticmethod
-    def retry_download(url: str, max_wait_time: int) -> Callable:
+    def retry_download(url: str) -> Callable:
+        max_wait_time = 300
+
         def http_download() -> Optional[bytes]:
             current_wait_time = 60
             while True:
diff --git a/bdfr/site_downloaders/direct.py b/bdfr/site_downloaders/direct.py
index df1a469..833acae 100644
--- a/bdfr/site_downloaders/direct.py
+++ b/bdfr/site_downloaders/direct.py
@@ -14,4 +14,4 @@ class Direct(BaseDownloader):
         super().__init__(post)
 
     def find_resources(self, authenticator: Optional[SiteAuthenticator] = None) -> list[Resource]:
-        return [Resource(self.post, self.post.url, Resource.retry_download(self.post.url, 300))]
+        return [Resource(self.post, self.post.url, Resource.retry_download(self.post.url))]
diff --git a/bdfr/site_downloaders/erome.py b/bdfr/site_downloaders/erome.py
index 69b9ae3..6130560 100644
--- a/bdfr/site_downloaders/erome.py
+++ b/bdfr/site_downloaders/erome.py
@@ -29,7 +29,7 @@ class Erome(BaseDownloader):
         for link in links:
             if not re.match(r'https?://.*', link):
                 link = 'https://' + link
-            out.append(Resource(self.post, link, Resource.retry_download(link, 300)))
+            out.append(Resource(self.post, link, Resource.retry_download(link)))
         return out
 
     @staticmethod
diff --git a/bdfr/site_downloaders/gallery.py b/bdfr/site_downloaders/gallery.py
index c016d28..158e338 100644
--- a/bdfr/site_downloaders/gallery.py
+++ b/bdfr/site_downloaders/gallery.py
@@ -31,7 +31,7 @@ class Gallery(BaseDownloader):
 
         if not image_urls:
             raise SiteDownloaderError('No images found in Reddit gallery')
-        return [Resource(self.post, url, Resource.retry_download(url, 300)) for url in image_urls]
+        return [Resource(self.post, url, Resource.retry_download(url)) for url in image_urls]
 
     @ staticmethod
     def _get_links(id_dict: list[dict]) -> list[str]:
diff --git a/bdfr/site_downloaders/imgur.py b/bdfr/site_downloaders/imgur.py
index 79a1115..f0b7012 100644
--- a/bdfr/site_downloaders/imgur.py
+++ b/bdfr/site_downloaders/imgur.py
@@ -33,7 +33,7 @@ class Imgur(BaseDownloader):
 
     def _compute_image_url(self, image: dict) -> Resource:
         image_url = 'https://i.imgur.com/' + image['hash'] + self._validate_extension(image['ext'])
-        return Resource(self.post, image_url, Resource.retry_download(image_url, 300))
+        return Resource(self.post, image_url, Resource.retry_download(image_url))
 
     @staticmethod
     def _get_data(link: str) -> dict:
diff --git a/bdfr/site_downloaders/redgifs.py b/bdfr/site_downloaders/redgifs.py
index d4989e7..a62fedb 100644
--- a/bdfr/site_downloaders/redgifs.py
+++ b/bdfr/site_downloaders/redgifs.py
@@ -18,7 +18,7 @@ class Redgifs(BaseDownloader):
 
     def find_resources(self, authenticator: Optional[SiteAuthenticator] = None) -> list[Resource]:
         media_url = self._get_link(self.post.url)
-        return [Resource(self.post, media_url, Resource.retry_download(media_url, 300), '.mp4')]
+        return [Resource(self.post, media_url, Resource.retry_download(media_url), '.mp4')]
 
     @staticmethod
     def _get_link(url: str) -> str:
diff --git a/tests/test_resource.py b/tests/test_resource.py
index db9a6cc..f3bbc9a 100644
--- a/tests/test_resource.py
+++ b/tests/test_resource.py
@@ -31,6 +31,6 @@ def test_resource_get_extension(test_url: str, expected: str):
     ('https://www.iana.org/_img/2013.1/iana-logo-header.svg', '426b3ac01d3584c820f3b7f5985d6623'),
 ))
 def test_download_online_resource(test_url: str, expected_hash: str):
-    test_resource = Resource(MagicMock(), test_url, Resource.retry_download(test_url, 60))
+    test_resource = Resource(MagicMock(), test_url, Resource.retry_download(test_url))
     test_resource.download()
     assert test_resource.hash.hexdigest() == expected_hash

From 7bca303b1b663848c5081fd9fa0543291a05396a Mon Sep 17 00:00:00 2001
From: Serene-Arc <serenical@gmail.com>
Date: Thu, 29 Jul 2021 19:10:10 +1000
Subject: [PATCH 07/15] Add in downloader parameters

---
 bdfr/downloader.py               |  2 +-
 bdfr/resource.py                 | 13 +++++++++----
 bdfr/site_downloaders/youtube.py |  2 +-
 3 files changed, 11 insertions(+), 6 deletions(-)

diff --git a/bdfr/downloader.py b/bdfr/downloader.py
index 69aa818..70052b2 100644
--- a/bdfr/downloader.py
+++ b/bdfr/downloader.py
@@ -82,7 +82,7 @@ class RedditDownloader(RedditConnector):
                 logger.debug(f'Download filter removed {submission.id} file with URL {submission.url}')
                 continue
             try:
-                res.download()
+                res.download({'max_wait_time': self.args.max_wait_time})
             except errors.BulkDownloaderException as e:
                 logger.error(f'Failed to download resource {res.url} in submission {submission.id} '
                              f'with downloader {downloader_class.__name__}: {e}')
diff --git a/bdfr/resource.py b/bdfr/resource.py
index a1c90de..27ba84b 100644
--- a/bdfr/resource.py
+++ b/bdfr/resource.py
@@ -6,7 +6,6 @@ import logging
 import re
 import time
 import urllib.parse
-from collections import namedtuple
 from typing import Callable, Optional
 
 import _hashlib
@@ -33,8 +32,12 @@ class Resource:
     def retry_download(url: str) -> Callable:
         max_wait_time = 300
 
-        def http_download() -> Optional[bytes]:
+        def http_download(download_parameters: dict) -> Optional[bytes]:
             current_wait_time = 60
+            if 'max_wait_time' in download_parameters:
+                max_wait_time = download_parameters['max_wait_time']
+            else:
+                max_wait_time = 300
             while True:
                 try:
                     response = requests.get(url)
@@ -55,10 +58,12 @@ class Resource:
                         raise
         return http_download
 
-    def download(self):
+    def download(self, download_parameters: Optional[dict] = None):
+        if download_parameters is None:
+            download_parameters = {}
         if not self.content:
             try:
-                content = self.download_function()
+                content = self.download_function(download_parameters)
             except requests.exceptions.ConnectionError as e:
                 raise BulkDownloaderException(f'Could not download resource: {e}')
             except BulkDownloaderException:
diff --git a/bdfr/site_downloaders/youtube.py b/bdfr/site_downloaders/youtube.py
index 126cb6a..a870c2e 100644
--- a/bdfr/site_downloaders/youtube.py
+++ b/bdfr/site_downloaders/youtube.py
@@ -40,7 +40,7 @@ class Youtube(BaseDownloader):
         ytdl_options['quiet'] = True
         ytdl_options['logger'] = yt_logger
 
-        def download() -> bytes:
+        def download(_: dict) -> bytes:
             with tempfile.TemporaryDirectory() as temp_dir:
                 download_path = Path(temp_dir).resolve()
                 ytdl_options['outtmpl'] = str(download_path) + '/' + 'test.%(ext)s'

From 87f283cc98ccb7743cfefd54b063d23142040431 Mon Sep 17 00:00:00 2001
From: Serene-Arc <serenical@gmail.com>
Date: Fri, 3 Sep 2021 19:24:28 +1000
Subject: [PATCH 08/15] Fix backup config location

---
 bdfr/connector.py | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/bdfr/connector.py b/bdfr/connector.py
index 0e78c8c..78ddc4f 100644
--- a/bdfr/connector.py
+++ b/bdfr/connector.py
@@ -184,8 +184,9 @@ class RedditConnector(metaclass=ABCMeta):
                 logger.debug(f'Loading configuration from {path}')
                 break
         if not self.config_location:
-            self.config_location = list(importlib.resources.path('bdfr', 'default_config.cfg').gen)[0]
-            shutil.copy(self.config_location, Path(self.config_directory, 'default_config.cfg'))
+            with importlib.resources.path('bdfr', 'default_config.cfg') as path:
+                self.config_location = path
+                shutil.copy(self.config_location, Path(self.config_directory, 'default_config.cfg'))
         if not self.config_location:
             raise errors.BulkDownloaderException('Could not find a configuration file to load')
         self.cfg_parser.read(self.config_location)

From afc2a6416bc08b6009e7f4d27af132cf65705259 Mon Sep 17 00:00:00 2001
From: Serene-Arc <serenical@gmail.com>
Date: Fri, 3 Sep 2021 16:39:00 +1000
Subject: [PATCH 09/15] Add integration test

---
 tests/integration_tests/test_download_integration.py | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/tests/integration_tests/test_download_integration.py b/tests/integration_tests/test_download_integration.py
index 305fe99..6fecd73 100644
--- a/tests/integration_tests/test_download_integration.py
+++ b/tests/integration_tests/test_download_integration.py
@@ -45,6 +45,7 @@ def create_basic_args_for_download_runner(test_args: list[str], run_path: Path):
     ['-s', 'trollxchromosomes', '-L', 1, '--sort', 'new'],
     ['-s', 'trollxchromosomes', '-L', 1, '--time', 'day', '--sort', 'new'],
     ['-s', 'trollxchromosomes', '-L', 1, '--search', 'women'],
+    ['-s', 'hentai', '-L', 10, '--search', 'red'],
     ['-s', 'trollxchromosomes', '-L', 1, '--time', 'day', '--search', 'women'],
     ['-s', 'trollxchromosomes', '-L', 1, '--sort', 'new', '--search', 'women'],
     ['-s', 'trollxchromosomes', '-L', 1, '--time', 'day', '--sort', 'new', '--search', 'women'],
@@ -55,6 +56,7 @@ def test_cli_download_subreddits(test_args: list[str], tmp_path: Path):
     result = runner.invoke(cli, test_args)
     assert result.exit_code == 0
     assert 'Added submissions from subreddit ' in result.output
+    assert 'Downloaded submission' in result.output
 
 
 @pytest.mark.online

From defd6bca77ff2b56e91b289307d12fe422cda524 Mon Sep 17 00:00:00 2001
From: Serene-Arc <serenical@gmail.com>
Date: Thu, 9 Sep 2021 13:42:18 +1000
Subject: [PATCH 10/15] Tweak test conditions

---
 tests/test_connector.py | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/tests/test_connector.py b/tests/test_connector.py
index 15eede1..a275d9f 100644
--- a/tests/test_connector.py
+++ b/tests/test_connector.py
@@ -199,10 +199,9 @@ def test_get_subreddit_normal(
 @pytest.mark.reddit
 @pytest.mark.parametrize(('test_subreddits', 'search_term', 'limit', 'time_filter', 'max_expected_len'), (
     (('Python',), 'scraper', 10, 'all', 10),
-    (('Python',), '', 10, 'all', 10),
+    (('Python',), '', 10, 'all', 0),
     (('Python',), 'djsdsgewef', 10, 'all', 0),
     (('Python',), 'scraper', 10, 'year', 10),
-    (('Python',), 'scraper', 10, 'hour', 1),
 ))
 def test_get_subreddit_search(
         test_subreddits: list[str],
@@ -226,6 +225,8 @@ def test_get_subreddit_search(
     assert all([isinstance(res, praw.models.Submission) for res in results])
     assert all([res.subreddit.display_name in test_subreddits for res in results])
     assert len(results) <= max_expected_len
+    if max_expected_len != 0:
+        assert len(results) > 0
     assert not any([isinstance(m, MagicMock) for m in results])
 
 

From 56575dc390fbefcbcbadb390e950fdda38561030 Mon Sep 17 00:00:00 2001
From: Serene-Arc <serenical@gmail.com>
Date: Thu, 9 Sep 2021 13:43:11 +1000
Subject: [PATCH 11/15] Add NSFW search test

---
 .../test_download_integration.py                | 17 ++++++++++++++++-
 1 file changed, 16 insertions(+), 1 deletion(-)

diff --git a/tests/integration_tests/test_download_integration.py b/tests/integration_tests/test_download_integration.py
index 6fecd73..57f39bf 100644
--- a/tests/integration_tests/test_download_integration.py
+++ b/tests/integration_tests/test_download_integration.py
@@ -45,7 +45,6 @@ def create_basic_args_for_download_runner(test_args: list[str], run_path: Path):
     ['-s', 'trollxchromosomes', '-L', 1, '--sort', 'new'],
     ['-s', 'trollxchromosomes', '-L', 1, '--time', 'day', '--sort', 'new'],
     ['-s', 'trollxchromosomes', '-L', 1, '--search', 'women'],
-    ['-s', 'hentai', '-L', 10, '--search', 'red'],
     ['-s', 'trollxchromosomes', '-L', 1, '--time', 'day', '--search', 'women'],
     ['-s', 'trollxchromosomes', '-L', 1, '--sort', 'new', '--search', 'women'],
     ['-s', 'trollxchromosomes', '-L', 1, '--time', 'day', '--sort', 'new', '--search', 'women'],
@@ -59,6 +58,22 @@ def test_cli_download_subreddits(test_args: list[str], tmp_path: Path):
     assert 'Downloaded submission' in result.output
 
 
+@pytest.mark.online
+@pytest.mark.reddit
+@pytest.mark.authenticated
+@pytest.mark.skipif(not does_test_config_exist, reason='A test config file is required for integration tests')
+@pytest.mark.parametrize('test_args', (
+    ['-s', 'hentai', '-L', 10, '--search', 'red', '--authenticate'],
+))
+def test_cli_download_search_subreddits_authenticated(test_args: list[str], tmp_path: Path):
+    runner = CliRunner()
+    test_args = create_basic_args_for_download_runner(test_args, tmp_path)
+    result = runner.invoke(cli, test_args)
+    assert result.exit_code == 0
+    assert 'Added submissions from subreddit ' in result.output
+    assert 'Downloaded submission' in result.output
+
+
 @pytest.mark.online
 @pytest.mark.reddit
 @pytest.mark.authenticated

From edc2db0ded1222b4b050f99421d939fc369ff104 Mon Sep 17 00:00:00 2001
From: Serene-Arc <serenical@gmail.com>
Date: Thu, 9 Sep 2021 13:50:03 +1000
Subject: [PATCH 12/15] Update test

---
 tests/site_downloaders/test_erome.py | 14 +++++++-------
 1 file changed, 7 insertions(+), 7 deletions(-)

diff --git a/tests/site_downloaders/test_erome.py b/tests/site_downloaders/test_erome.py
index 2918bef..bab34bb 100644
--- a/tests/site_downloaders/test_erome.py
+++ b/tests/site_downloaders/test_erome.py
@@ -14,13 +14,13 @@ from bdfr.site_downloaders.erome import Erome
         'https://s11.erome.com/365/vqtPuLXh/KH2qBT99_480p.mp4',
     )),
     ('https://www.erome.com/a/ORhX0FZz', (
-        'https://s4.erome.com/355/ORhX0FZz/9IYQocM9_480p.mp4',
-        'https://s4.erome.com/355/ORhX0FZz/9eEDc8xm_480p.mp4',
-        'https://s4.erome.com/355/ORhX0FZz/EvApC7Rp_480p.mp4',
-        'https://s4.erome.com/355/ORhX0FZz/LruobtMs_480p.mp4',
-        'https://s4.erome.com/355/ORhX0FZz/TJNmSUU5_480p.mp4',
-        'https://s4.erome.com/355/ORhX0FZz/X11Skh6Z_480p.mp4',
-        'https://s4.erome.com/355/ORhX0FZz/bjlTkpn7_480p.mp4'
+        'https://s15.erome.com/355/ORhX0FZz/9IYQocM9_480p.mp4',
+        'https://s15.erome.com/355/ORhX0FZz/9eEDc8xm_480p.mp4',
+        'https://s15.erome.com/355/ORhX0FZz/EvApC7Rp_480p.mp4',
+        'https://s15.erome.com/355/ORhX0FZz/LruobtMs_480p.mp4',
+        'https://s15.erome.com/355/ORhX0FZz/TJNmSUU5_480p.mp4',
+        'https://s15.erome.com/355/ORhX0FZz/X11Skh6Z_480p.mp4',
+        'https://s15.erome.com/355/ORhX0FZz/bjlTkpn7_480p.mp4'
     )),
 ))
 def test_get_link(test_url: str, expected_urls: tuple[str]):

From 940d646d30299747b6d0a0c3b25ea3fbafed0875 Mon Sep 17 00:00:00 2001
From: Serene-Arc <serenical@gmail.com>
Date: Sat, 11 Sep 2021 12:13:21 +1000
Subject: [PATCH 13/15] Add Vidble module

---
 bdfr/site_downloaders/vidble.py       | 48 +++++++++++++++++++
 tests/site_downloaders/test_vidble.py | 67 +++++++++++++++++++++++++++
 2 files changed, 115 insertions(+)
 create mode 100644 bdfr/site_downloaders/vidble.py
 create mode 100644 tests/site_downloaders/test_vidble.py

diff --git a/bdfr/site_downloaders/vidble.py b/bdfr/site_downloaders/vidble.py
new file mode 100644
index 0000000..2f8f4f4
--- /dev/null
+++ b/bdfr/site_downloaders/vidble.py
@@ -0,0 +1,48 @@
+#!/usr/bin/env python3
+# coding=utf-8
+import itertools
+import logging
+import re
+from typing import Optional
+
+import bs4
+import requests
+from praw.models import Submission
+
+from bdfr.exceptions import SiteDownloaderError
+from bdfr.resource import Resource
+from bdfr.site_authenticator import SiteAuthenticator
+from bdfr.site_downloaders.base_downloader import BaseDownloader
+
+logger = logging.getLogger(__name__)
+
+
+class Vidble(BaseDownloader):
+    def __init__(self, post: Submission):
+        super().__init__(post)
+
+    def find_resources(self, authenticator: Optional[SiteAuthenticator] = None) -> list[Resource]:
+        res = self.get_links(self.post.url)
+        if not res:
+            raise SiteDownloaderError(rf'No resources found at {self.post.url}')
+        res = [Resource(self.post, r, Resource.retry_download(r)) for r in res]
+        return res
+
+    @staticmethod
+    def get_links(url: str) -> set[str]:
+        page = requests.get(url)
+        soup = bs4.BeautifulSoup(page.text, 'html.parser')
+        content_div = soup.find('div', attrs={'id': 'ContentPlaceHolder1_divContent'})
+        images = content_div.find_all('img')
+        images = [i.get('src') for i in images]
+        videos = content_div.find_all('source', attrs={'type': 'video/mp4'})
+        videos = [v.get('src') for v in videos]
+        resources = filter(None, itertools.chain(images, videos))
+        resources = ['https://www.vidble.com' + r for r in resources]
+        resources = [Vidble.change_med_url(r) for r in resources]
+        return set(resources)
+
+    @staticmethod
+    def change_med_url(url: str) -> str:
+        out = re.sub(r'_med(\..{3,4})$', r'\1', url)
+        return out
diff --git a/tests/site_downloaders/test_vidble.py b/tests/site_downloaders/test_vidble.py
new file mode 100644
index 0000000..1617bf1
--- /dev/null
+++ b/tests/site_downloaders/test_vidble.py
@@ -0,0 +1,67 @@
+#!/usr/bin/env python3
+# coding=utf-8
+from unittest.mock import Mock
+
+import pytest
+
+from bdfr.resource import Resource
+from bdfr.site_downloaders.vidble import Vidble
+
+
+@pytest.mark.parametrize(('test_url', 'expected'), (
+    ('/RDFbznUvcN_med.jpg', '/RDFbznUvcN.jpg'),
+))
+def test_change_med_url(test_url: str, expected: str):
+    result = Vidble.change_med_url(test_url)
+    assert result == expected
+
+
+@pytest.mark.online
+@pytest.mark.parametrize(('test_url', 'expected'), (
+    ('https://www.vidble.com/show/UxsvAssYe5', {
+        'https://www.vidble.com/UxsvAssYe5.gif',
+    }),
+    ('https://vidble.com/show/RDFbznUvcN', {
+        'https://www.vidble.com/RDFbznUvcN.jpg',
+    }),
+    ('https://vidble.com/album/h0jTLs6B', {
+        'https://www.vidble.com/XG4eAoJ5JZ.jpg',
+        'https://www.vidble.com/IqF5UdH6Uq.jpg',
+        'https://www.vidble.com/VWuNsnLJMD.jpg',
+        'https://www.vidble.com/sMmM8O650W.jpg',
+    }),
+    ('https://vidble.com/watch?v=0q4nWakqM6kzQWxlePD8N62Dsflev0N9', {
+        'https://www.vidble.com/0q4nWakqM6kzQWxlePD8N62Dsflev0N9.mp4',
+    }),
+))
+def test_get_links(test_url: str, expected: set[str]):
+    results = Vidble.get_links(test_url)
+    assert results == expected
+
+
+@pytest.mark.parametrize(('test_url', 'expected_hashes'), (
+        ('https://www.vidble.com/show/UxsvAssYe5', {
+            '0ef2f8e0e0b45936d2fb3e6fbdf67e28',
+        }),
+        ('https://vidble.com/show/RDFbznUvcN', {
+            'c2dd30a71e32369c50eed86f86efff58',
+        }),
+        ('https://vidble.com/album/h0jTLs6B', {
+            '3b3cba02e01c91f9858a95240b942c71',
+            'dd6ecf5fc9e936f9fb614eb6a0537f99',
+            'b31a942cd8cdda218ed547bbc04c3a27',
+            '6f77c570b451eef4222804bd52267481',
+        }),
+        ('https://vidble.com/watch?v=0q4nWakqM6kzQWxlePD8N62Dsflev0N9', {
+            'cebe9d5f24dba3b0443e5097f160ca83',
+        }),
+))
+def test_find_resources(test_url: str, expected_hashes: set[str]):
+    mock_download = Mock()
+    mock_download.url = test_url
+    downloader = Vidble(mock_download)
+    results = downloader.find_resources()
+    assert all([isinstance(res, Resource) for res in results])
+    [res.download() for res in results]
+    hashes = set([res.hash.hexdigest() for res in results])
+    assert hashes == set(expected_hashes)

From aee6f4add9a0e89686c194ff8be3723bb3ce24e6 Mon Sep 17 00:00:00 2001
From: Serene-Arc <serenical@gmail.com>
Date: Sat, 11 Sep 2021 12:15:35 +1000
Subject: [PATCH 14/15] Add Vidble to download factory

---
 bdfr/site_downloaders/download_factory.py | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/bdfr/site_downloaders/download_factory.py b/bdfr/site_downloaders/download_factory.py
index 911e8fb..a4e9a6a 100644
--- a/bdfr/site_downloaders/download_factory.py
+++ b/bdfr/site_downloaders/download_factory.py
@@ -16,6 +16,7 @@ from bdfr.site_downloaders.imgur import Imgur
 from bdfr.site_downloaders.pornhub import PornHub
 from bdfr.site_downloaders.redgifs import Redgifs
 from bdfr.site_downloaders.self_post import SelfPost
+from bdfr.site_downloaders.vidble import Vidble
 from bdfr.site_downloaders.youtube import Youtube
 
 
@@ -46,11 +47,12 @@ class DownloadFactory:
             return Direct
         elif re.match(r'pornhub\.com.*', sanitised_url):
             return PornHub
+        elif re.match(r'vidble\.com', sanitised_url):
+            return Vidble
         elif YoutubeDlFallback.can_handle_link(sanitised_url):
             return YoutubeDlFallback
         else:
-            raise NotADownloadableLinkError(
-                f'No downloader module exists for url {url}')
+            raise NotADownloadableLinkError(f'No downloader module exists for url {url}')
 
     @staticmethod
     def sanitise_url(url: str) -> str:

From 89e24eca62bd7cf5fd6e9e8854f87a03d76f1309 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Ali=20Parlak=C3=A7=C4=B1?= <parlakciali@gmail.com>
Date: Sun, 12 Sep 2021 20:06:51 +0300
Subject: [PATCH 15/15] Bump version to v2.4

---
 setup.cfg | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/setup.cfg b/setup.cfg
index 2969fe0..196bd9e 100644
--- a/setup.cfg
+++ b/setup.cfg
@@ -4,7 +4,7 @@ description_file = README.md
 description_content_type = text/markdown
 home_page = https://github.com/aliparlakci/bulk-downloader-for-reddit
 keywords = reddit, download, archive
-version = 2.2.0
+version = 2.4.0
 author = Ali Parlakci
 author_email = parlakciali@gmail.com
 maintainer = Serene Arc