diff --git a/.gitattributes b/.gitattributes new file mode 100644 index 0000000..c16e947 --- /dev/null +++ b/.gitattributes @@ -0,0 +1,2 @@ +# Declare files that will always have CRLF line endings on checkout. +*.ps1 text eol=crlf \ No newline at end of file diff --git a/.github/workflows/protect_master.yml b/.github/workflows/protect_master.yml new file mode 100644 index 0000000..6267b77 --- /dev/null +++ b/.github/workflows/protect_master.yml @@ -0,0 +1,13 @@ +name: Protect master branch + +on: + pull_request: + branches: + - master +jobs: + merge_check: + runs-on: ubuntu-latest + steps: + - name: Check if the pull request is mergeable to master + run: | + if [[ "$GITHUB_HEAD_REF" == 'development' && "$GITHUB_REPOSITORY" == 'aliparlakci/bulk-downloader-for-reddit' ]]; then exit 0; else exit 1; fi; diff --git a/README.md b/README.md index b84aa3d..a539331 100644 --- a/README.md +++ b/README.md @@ -53,6 +53,12 @@ However, these commands are not enough. You should chain parameters in [Options] python3 -m bdfr download ./path/to/output --subreddit Python -L 10 ``` ```bash +python3 -m bdfr download ./path/to/output --user reddituser --submitted -L 100 +``` +```bash +python3 -m bdfr download ./path/to/output --user reddituser --submitted --all-comments --comment-context +``` +```bash python3 -m bdfr download ./path/to/output --user me --saved --authenticate -L 25 --file-scheme '{POSTID}' ``` ```bash @@ -62,6 +68,31 @@ python3 -m bdfr download ./path/to/output --subreddit 'Python, all, mindustry' - python3 -m bdfr archive ./path/to/output --subreddit all --format yaml -L 500 --folder-scheme '' ``` +Alternatively, you can pass options through a YAML file. + +```bash +python3 -m bdfr download ./path/to/output --opts my_opts.yaml +``` + +For example, running it with the following file + +```yaml +skip: [mp4, avi] +file_scheme: "{UPVOTES}_{REDDITOR}_{POSTID}_{DATE}" +limit: 10 +sort: top +subreddit: + - EarthPorn + - CityPorn +``` + +would be equilavent to (take note that in YAML there is `file_scheme` instead of `file-scheme`): +```bash +python3 -m bdfr download ./path/to/output --skip mp4 --skip avi --file-scheme "{UPVOTES}_{REDDITOR}_{POSTID}_{DATE}" -L 10 -S top --subreddit EarthPorn --subreddit CityPorn +``` + +In case when the same option is specified both in the YAML file and in as a command line argument, the command line argument takes prs + ## Options The following options are common between both the `archive` and `download` commands of the BDFR. @@ -74,6 +105,10 @@ The following options are common between both the `archive` and `download` comma - `--config` - If the path to a configuration file is supplied with this option, the BDFR will use the specified config - See [Configuration Files](#configuration) for more details +- `--opts` + - Load options from a YAML file. + - Has higher prority than the global config file but lower than command-line arguments. + - See [opts_example.yaml](./opts_example.yaml) for an example file. - `--disable-module` - Can be specified multiple times - Disables certain modules from being used @@ -92,8 +127,8 @@ The following options are common between both the `archive` and `download` comma - This option will make the BDFR use the supplied user's saved posts list as a download source - This requires an authenticated Reddit instance, using the `--authenticate` flag, as well as `--user` set to `me` - `--search` - - This will apply the specified search term to specific lists when scraping submissions - - A search term can only be applied to subreddits and multireddits, supplied with the `- s` and `-m` flags respectively + - This will apply the input search term to specific lists when scraping submissions + - A search term can only be applied when using the `--subreddit` and `--multireddit` flags - `--submitted` - This will use a user's submissions as a source - A user must be specified with `--user` @@ -192,6 +227,15 @@ The following options apply only to the `download` command. This command downloa - This skips all submissions from the specified subreddit - Can be specified multiple times - Also accepts CSV subreddit names +- `--min-score` + - This skips all submissions which have fewer than specified upvotes +- `--max-score` + - This skips all submissions which have more than specified upvotes +- `--min-score-ratio` + - This skips all submissions which have lower than specified upvote ratio +- `--max-score-ratio` + - This skips all submissions which have higher than specified upvote ratio + ### Archiver Options @@ -215,7 +259,10 @@ The `clone` command can take all the options listed above for both the `archive` ## Common Command Tricks -A common use case is for subreddits/users to be loaded from a file. The BDFR doesn't support this directly but it is simple enough to do through the command-line. Consider a list of usernames to download; they can be passed through to the BDFR with the following command, assuming that the usernames are in a text file: +A common use case is for subreddits/users to be loaded from a file. The BDFR supports this via YAML file options (`--opts my_opts.yaml`). + +Alternatively, you can use the command-line [xargs](https://en.wikipedia.org/wiki/Xargs) function. +For a list of users `users.txt` (one user per line), type: ```bash cat users.txt | xargs -L 1 echo --user | xargs -L 50 python3 -m bdfr download diff --git a/bdfr/__main__.py b/bdfr/__main__.py index de658de..1117a70 100644 --- a/bdfr/__main__.py +++ b/bdfr/__main__.py @@ -16,13 +16,19 @@ _common_options = [ click.argument('directory', type=str), click.option('--authenticate', is_flag=True, default=None), click.option('--config', type=str, default=None), + click.option('--opts', type=str, default=None), click.option('--disable-module', multiple=True, default=None, type=str), + click.option('--exclude-id', default=None, multiple=True), + click.option('--exclude-id-file', default=None, multiple=True), + click.option('--file-scheme', default=None, type=str), + click.option('--folder-scheme', default=None, type=str), click.option('--ignore-user', type=str, multiple=True, default=None), click.option('--include-id-file', multiple=True, default=None), click.option('--log', type=str, default=None), click.option('--saved', is_flag=True, default=None), click.option('--search', default=None, type=str), click.option('--submitted', is_flag=True, default=None), + click.option('--subscribed', is_flag=True, default=None), click.option('--time-format', type=str, default=None), click.option('--upvoted', is_flag=True, default=None), click.option('-L', '--limit', default=None, type=int), @@ -37,17 +43,17 @@ _common_options = [ ] _downloader_options = [ - click.option('--file-scheme', default=None, type=str), - click.option('--folder-scheme', default=None, type=str), click.option('--make-hard-links', is_flag=True, default=None), click.option('--max-wait-time', type=int, default=None), click.option('--no-dupes', is_flag=True, default=None), click.option('--search-existing', is_flag=True, default=None), - click.option('--exclude-id', default=None, multiple=True), - click.option('--exclude-id-file', default=None, multiple=True), click.option('--skip', default=None, multiple=True), click.option('--skip-domain', default=None, multiple=True), click.option('--skip-subreddit', default=None, multiple=True), + click.option('--min-score', type=int, default=None), + click.option('--max-score', type=int, default=None), + click.option('--min-score-ratio', type=float, default=None), + click.option('--max-score-ratio', type=float, default=None), ] _archiver_options = [ diff --git a/bdfr/archiver.py b/bdfr/archiver.py index 559dcc1..214111f 100644 --- a/bdfr/archiver.py +++ b/bdfr/archiver.py @@ -34,6 +34,9 @@ class Archiver(RedditConnector): f'Submission {submission.id} in {submission.subreddit.display_name} skipped' f' due to {submission.author.name if submission.author else "DELETED"} being an ignored user') continue + if submission.id in self.excluded_submission_ids: + logger.debug(f'Object {submission.id} in exclusion list, skipping') + continue logger.debug(f'Attempting to archive submission {submission.id}') self.write_entry(submission) diff --git a/bdfr/configuration.py b/bdfr/configuration.py index 81fa3e4..46c4cf0 100644 --- a/bdfr/configuration.py +++ b/bdfr/configuration.py @@ -2,16 +2,21 @@ # coding=utf-8 from argparse import Namespace +from pathlib import Path from typing import Optional +import logging import click +import yaml +logger = logging.getLogger(__name__) class Configuration(Namespace): def __init__(self): super(Configuration, self).__init__() self.authenticate = False self.config = None + self.opts: Optional[str] = None self.directory: str = '.' self.disable_module: list[str] = [] self.exclude_id = [] @@ -33,8 +38,13 @@ class Configuration(Namespace): self.skip: list[str] = [] self.skip_domain: list[str] = [] self.skip_subreddit: list[str] = [] + self.min_score = None + self.max_score = None + self.min_score_ratio = None + self.max_score_ratio = None self.sort: str = 'hot' self.submitted: bool = False + self.subscribed: bool = False self.subreddit: list[str] = [] self.time: str = 'all' self.time_format = None @@ -48,6 +58,31 @@ class Configuration(Namespace): self.comment_context: bool = False def process_click_arguments(self, context: click.Context): + if context.params.get('opts') is not None: + self.parse_yaml_options(context.params['opts']) for arg_key in context.params.keys(): - if arg_key in vars(self) and context.params[arg_key] is not None: - vars(self)[arg_key] = context.params[arg_key] + if not hasattr(self, arg_key): + logger.warning(f'Ignoring an unknown CLI argument: {arg_key}') + continue + val = context.params[arg_key] + if val is None or val == (): + # don't overwrite with an empty value + continue + setattr(self, arg_key, val) + + def parse_yaml_options(self, file_path: str): + yaml_file_loc = Path(file_path) + if not yaml_file_loc.exists(): + logger.error(f'No YAML file found at {yaml_file_loc}') + return + with open(yaml_file_loc) as file: + try: + opts = yaml.load(file, Loader=yaml.FullLoader) + except yaml.YAMLError as e: + logger.error(f'Could not parse YAML options file: {e}') + return + for arg_key, val in opts.items(): + if not hasattr(self, arg_key): + logger.warning(f'Ignoring an unknown YAML argument: {arg_key}') + continue + setattr(self, arg_key, val) diff --git a/bdfr/connector.py b/bdfr/connector.py index 506e23f..61ed8f4 100644 --- a/bdfr/connector.py +++ b/bdfr/connector.py @@ -243,9 +243,19 @@ class RedditConnector(metaclass=ABCMeta): return set(all_entries) def get_subreddits(self) -> list[praw.models.ListingGenerator]: - if self.args.subreddit: - out = [] - for reddit in self.split_args_input(self.args.subreddit): + out = [] + subscribed_subreddits = set() + if self.args.subscribed: + if self.args.authenticate: + try: + subscribed_subreddits = list(self.reddit_instance.user.subreddits(limit=None)) + subscribed_subreddits = set([s.display_name for s in subscribed_subreddits]) + except prawcore.InsufficientScope: + logger.error('BDFR has insufficient scope to access subreddit lists') + else: + logger.error('Cannot find subscribed subreddits without an authenticated instance') + if self.args.subreddit or subscribed_subreddits: + for reddit in self.split_args_input(self.args.subreddit) | subscribed_subreddits: if reddit == 'friends' and self.authenticated is False: logger.error('Cannot read friends subreddit without an authenticated instance') continue @@ -270,9 +280,7 @@ class RedditConnector(metaclass=ABCMeta): logger.debug(f'Added submissions from subreddit {reddit}') except (errors.BulkDownloaderException, praw.exceptions.PRAWException) as e: logger.error(f'Failed to get submissions for subreddit {reddit}: {e}') - return out - else: - return [] + return out def resolve_user_name(self, in_name: str) -> str: if in_name == 'me': @@ -406,7 +414,9 @@ class RedditConnector(metaclass=ABCMeta): try: assert subreddit.id except prawcore.NotFound: - raise errors.BulkDownloaderException(f'Source {subreddit.display_name} does not exist or cannot be found') + raise errors.BulkDownloaderException(f"Source {subreddit.display_name} cannot be found") + except prawcore.Redirect: + raise errors.BulkDownloaderException(f"Source {subreddit.display_name} does not exist") except prawcore.Forbidden: raise errors.BulkDownloaderException(f'Source {subreddit.display_name} is private and cannot be scraped') diff --git a/bdfr/default_config.cfg b/bdfr/default_config.cfg index b8039a9..c601152 100644 --- a/bdfr/default_config.cfg +++ b/bdfr/default_config.cfg @@ -1,7 +1,7 @@ [DEFAULT] client_id = U-6gk4ZCh3IeNQ client_secret = 7CZHY6AmKweZME5s50SfDGylaPg -scopes = identity, history, read, save +scopes = identity, history, read, save, mysubreddits backup_log_count = 3 max_wait_time = 120 time_format = ISO \ No newline at end of file diff --git a/bdfr/downloader.py b/bdfr/downloader.py index 02f5c68..3b5a7e1 100644 --- a/bdfr/downloader.py +++ b/bdfr/downloader.py @@ -57,6 +57,19 @@ class RedditDownloader(RedditConnector): f'Submission {submission.id} in {submission.subreddit.display_name} skipped' f' due to {submission.author.name if submission.author else "DELETED"} being an ignored user') return + elif self.args.min_score and submission.score < self.args.min_score: + logger.debug( + f"Submission {submission.id} filtered due to score {submission.score} < [{self.args.min_score}]") + return + elif self.args.max_score and self.args.max_score < submission.score: + logger.debug( + f"Submission {submission.id} filtered due to score {submission.score} > [{self.args.max_score}]") + return + elif (self.args.min_score_ratio and submission.upvote_ratio < self.args.min_score_ratio) or ( + self.args.max_score_ratio and self.args.max_score_ratio < submission.upvote_ratio + ): + logger.debug(f"Submission {submission.id} filtered due to score ratio ({submission.upvote_ratio})") + return elif not isinstance(submission, praw.models.Submission): logger.warning(f'{submission.id} is not a submission') return diff --git a/bdfr/file_name_formatter.py b/bdfr/file_name_formatter.py index 3e8832b..1dabd34 100644 --- a/bdfr/file_name_formatter.py +++ b/bdfr/file_name_formatter.py @@ -111,6 +111,9 @@ class FileNameFormatter: if not resource.extension: raise BulkDownloaderException(f'Resource from {resource.url} has no extension') file_name = str(self._format_name(resource.source_submission, self.file_format_string)) + + file_name = re.sub(r'\n', ' ', file_name) + if not re.match(r'.*\.$', file_name) and not re.match(r'^\..*', resource.extension): ending = index + '.' + resource.extension else: diff --git a/bdfr/site_downloaders/download_factory.py b/bdfr/site_downloaders/download_factory.py index 49dba5f..f5e8d99 100644 --- a/bdfr/site_downloaders/download_factory.py +++ b/bdfr/site_downloaders/download_factory.py @@ -17,6 +17,7 @@ from bdfr.site_downloaders.pornhub import PornHub from bdfr.site_downloaders.redgifs import Redgifs from bdfr.site_downloaders.self_post import SelfPost from bdfr.site_downloaders.vidble import Vidble +from bdfr.site_downloaders.vreddit import VReddit from bdfr.site_downloaders.youtube import Youtube @@ -24,8 +25,10 @@ class DownloadFactory: @staticmethod def pull_lever(url: str) -> Type[BaseDownloader]: sanitised_url = DownloadFactory.sanitise_url(url) - if re.match(r'(i\.)?imgur.*\.gif.+$', sanitised_url): + if re.match(r'(i\.|m\.)?imgur', sanitised_url): return Imgur + elif re.match(r'(i\.)?(redgifs|gifdeliverynetwork)', sanitised_url): + return Redgifs elif re.match(r'.*/.*\.\w{3,4}(\?[\w;&=]*)?$', sanitised_url) and \ not DownloadFactory.is_web_resource(sanitised_url): return Direct @@ -37,16 +40,14 @@ class DownloadFactory: return Gallery elif re.match(r'gfycat\.', sanitised_url): return Gfycat - elif re.match(r'(m\.)?imgur.*', sanitised_url): - return Imgur - elif re.match(r'(redgifs|gifdeliverynetwork)', sanitised_url): - return Redgifs elif re.match(r'reddit\.com/r/', sanitised_url): return SelfPost elif re.match(r'(m\.)?youtu\.?be', sanitised_url): return Youtube elif re.match(r'i\.redd\.it.*', sanitised_url): return Direct + elif re.match(r'v\.redd\.it.*', sanitised_url): + return VReddit elif re.match(r'pornhub\.com.*', sanitised_url): return PornHub elif re.match(r'vidble\.com', sanitised_url): diff --git a/bdfr/site_downloaders/gfycat.py b/bdfr/site_downloaders/gfycat.py index 6accaab..c8da9df 100644 --- a/bdfr/site_downloaders/gfycat.py +++ b/bdfr/site_downloaders/gfycat.py @@ -21,7 +21,7 @@ class Gfycat(Redgifs): return super().find_resources(authenticator) @staticmethod - def _get_link(url: str) -> str: + def _get_link(url: str) -> set[str]: gfycat_id = re.match(r'.*/(.*?)/?$', url).group(1) url = 'https://gfycat.com/' + gfycat_id @@ -39,4 +39,4 @@ class Gfycat(Redgifs): raise SiteDownloaderError(f'Failed to download Gfycat link {url}: {e}') except json.JSONDecodeError as e: raise SiteDownloaderError(f'Did not receive valid JSON data: {e}') - return out + return {out,} diff --git a/bdfr/site_downloaders/imgur.py b/bdfr/site_downloaders/imgur.py index 1f669d0..f895785 100644 --- a/bdfr/site_downloaders/imgur.py +++ b/bdfr/site_downloaders/imgur.py @@ -41,10 +41,12 @@ class Imgur(BaseDownloader): @staticmethod def _get_data(link: str) -> dict: - link = link.rstrip('?') - if re.match(r'(?i).*\.gif.+$', link): - link = link.replace('i.imgur', 'imgur') - link = re.sub('(?i)\\.gif.+$', '', link) + try: + imgur_id = re.match(r'.*/(.*?)(\..{0,})?$', link).group(1) + gallery = 'a/' if re.search(r'.*/(.*?)(gallery/|a/)', link) else '' + link = f'https://imgur.com/{gallery}{imgur_id}' + except AttributeError: + raise SiteDownloaderError(f'Could not extract Imgur ID from {link}') res = Imgur.retrieve_url(link, cookies={'over18': '1', 'postpagebeta': '0'}) diff --git a/bdfr/site_downloaders/redgifs.py b/bdfr/site_downloaders/redgifs.py index a62fedb..2134aa3 100644 --- a/bdfr/site_downloaders/redgifs.py +++ b/bdfr/site_downloaders/redgifs.py @@ -2,6 +2,7 @@ import json import re +import urllib.parse from typing import Optional from praw.models import Submission @@ -17,31 +18,44 @@ class Redgifs(BaseDownloader): super().__init__(post) def find_resources(self, authenticator: Optional[SiteAuthenticator] = None) -> list[Resource]: - media_url = self._get_link(self.post.url) - return [Resource(self.post, media_url, Resource.retry_download(media_url), '.mp4')] + media_urls = self._get_link(self.post.url) + return [Resource(self.post, m, Resource.retry_download(m), None) for m in media_urls] @staticmethod - def _get_link(url: str) -> str: + def _get_link(url: str) -> set[str]: try: - redgif_id = re.match(r'.*/(.*?)/?$', url).group(1) + redgif_id = re.match(r'.*/(.*?)(\..{0,})?$', url).group(1) except AttributeError: raise SiteDownloaderError(f'Could not extract Redgifs ID from {url}') - headers = { - 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) ' - 'Chrome/90.0.4430.93 Safari/537.36', - } - - content = Redgifs.retrieve_url(f'https://api.redgifs.com/v1/gfycats/{redgif_id}', headers=headers) + content = Redgifs.retrieve_url(f'https://api.redgifs.com/v2/gifs/{redgif_id}') if content is None: raise SiteDownloaderError('Could not read the page source') try: - out = json.loads(content.text)['gfyItem']['mp4Url'] - except (KeyError, AttributeError): - raise SiteDownloaderError('Failed to find JSON data in page') + response_json = json.loads(content.text) except json.JSONDecodeError as e: raise SiteDownloaderError(f'Received data was not valid JSON: {e}') + out = set() + try: + if response_json['gif']['type'] == 1: # type 1 is a video + out.add(response_json['gif']['urls']['hd']) + elif response_json['gif']['type'] == 2: # type 2 is an image + if response_json['gif']['gallery']: + content = Redgifs.retrieve_url( + f'https://api.redgifs.com/v2/gallery/{response_json["gif"]["gallery"]}') + response_json = json.loads(content.text) + out = {p['urls']['hd'] for p in response_json['gifs']} + else: + out.add(response_json['gif']['urls']['hd']) + else: + raise KeyError + except (KeyError, AttributeError): + raise SiteDownloaderError('Failed to find JSON data in page') + + # Update subdomain if old one is returned + out = {re.sub('thumbs2', 'thumbs3', link) for link in out} + out = {re.sub('thumbs3', 'thumbs4', link) for link in out} return out diff --git a/bdfr/site_downloaders/vreddit.py b/bdfr/site_downloaders/vreddit.py new file mode 100644 index 0000000..ad526b4 --- /dev/null +++ b/bdfr/site_downloaders/vreddit.py @@ -0,0 +1,44 @@ +#!/usr/bin/env python3 + +import logging +import tempfile +from pathlib import Path +from typing import Callable, Optional + +import yt_dlp +from praw.models import Submission + +from bdfr.exceptions import NotADownloadableLinkError, SiteDownloaderError +from bdfr.resource import Resource +from bdfr.site_authenticator import SiteAuthenticator +from bdfr.site_downloaders.youtube import Youtube + +logger = logging.getLogger(__name__) + + +class VReddit(Youtube): + def __init__(self, post: Submission): + super().__init__(post) + + def find_resources(self, authenticator: Optional[SiteAuthenticator] = None) -> list[Resource]: + ytdl_options = { + 'playlistend': 1, + 'nooverwrites': True, + } + download_function = self._download_video(ytdl_options) + extension = self.get_video_attributes(self.post.url)['ext'] + res = Resource(self.post, self.post.url, download_function, extension) + return [res] + + @staticmethod + def get_video_attributes(url: str) -> dict: + result = VReddit.get_video_data(url) + if 'ext' in result: + return result + else: + try: + result = result["entries"][0] + return result + except Exception as e: + logger.exception(e) + raise NotADownloadableLinkError(f'Video info extraction failed for {url}') diff --git a/bdfr/site_downloaders/youtube.py b/bdfr/site_downloaders/youtube.py index f18f405..70c35ae 100644 --- a/bdfr/site_downloaders/youtube.py +++ b/bdfr/site_downloaders/youtube.py @@ -58,7 +58,7 @@ class Youtube(BaseDownloader): return download @staticmethod - def get_video_attributes(url: str) -> dict: + def get_video_data(url: str) -> dict: yt_logger = logging.getLogger('youtube-dl') yt_logger.setLevel(logging.CRITICAL) with yt_dlp.YoutubeDL({'logger': yt_logger, }) as ydl: @@ -67,6 +67,11 @@ class Youtube(BaseDownloader): except Exception as e: logger.exception(e) raise NotADownloadableLinkError(f'Video info extraction failed for {url}') + return result + + @staticmethod + def get_video_attributes(url: str) -> dict: + result = Youtube.get_video_data(url) if 'ext' in result: return result else: diff --git a/dev_requirements.txt b/dev_requirements.txt new file mode 100644 index 0000000..e079f8a --- /dev/null +++ b/dev_requirements.txt @@ -0,0 +1 @@ +pytest diff --git a/opts_example.yaml b/opts_example.yaml new file mode 100644 index 0000000..22fca7d --- /dev/null +++ b/opts_example.yaml @@ -0,0 +1,9 @@ +skip: [mp4, avi, mov] +file_scheme: "{UPVOTES}_{REDDITOR}_{POSTID}_{DATE}" +limit: 10 +sort: top +time: all +no_dupes: true +subreddit: + - EarthPorn + - CityPorn diff --git a/requirements.txt b/requirements.txt index 8ceffdb..83378f0 100644 --- a/requirements.txt +++ b/requirements.txt @@ -6,4 +6,4 @@ ffmpeg-python>=0.2.0 praw>=7.2.0 pyyaml>=5.4.1 requests>=2.25.1 -yt-dlp>=2021.9.25 \ No newline at end of file +yt-dlp>=2022.9.1 \ No newline at end of file diff --git a/scripts/extract_failed_ids.ps1 b/scripts/extract_failed_ids.ps1 new file mode 100644 index 0000000..be2d2cb --- /dev/null +++ b/scripts/extract_failed_ids.ps1 @@ -0,0 +1,21 @@ +if (Test-Path -Path $args[0] -PathType Leaf) { + $file=$args[0] +} +else { + Write-Host "CANNOT FIND LOG FILE" + Exit 1 +} + +if ($args[1] -ne $null) { + $output=$args[1] + Write-Host "Outputting IDs to $output" +} +else { + $output="./failed.txt" +} + +Select-String -Path $file -Pattern "Could not download submission" | ForEach-Object { -split $_.Line | Select-Object -Skip 11 | Select-Object -First 1 } | foreach { $_.substring(0,$_.Length-1) } >> $output +Select-String -Path $file -Pattern "Failed to download resource" | ForEach-Object { -split $_.Line | Select-Object -Skip 14 | Select-Object -First 1 } >> $output +Select-String -Path $file -Pattern "failed to download submission" | ForEach-Object { -split $_.Line | Select-Object -Skip 13 | Select-Object -First 1 } | foreach { $_.substring(0,$_.Length-1) } >> $output +Select-String -Path $file -Pattern "Failed to write file" | ForEach-Object { -split $_.Line | Select-Object -Skip 13 | Select-Object -First 1 } >> $output +Select-String -Path $file -Pattern "skipped due to disabled module" | ForEach-Object { -split $_.Line | Select-Object -Skip 8 | Select-Object -First 1 } >> $output diff --git a/scripts/extract_failed_ids.sh b/scripts/extract_failed_ids.sh index f96bd9a..64d1e72 100755 --- a/scripts/extract_failed_ids.sh +++ b/scripts/extract_failed_ids.sh @@ -7,17 +7,10 @@ else exit 1 fi -if [ -n "$2" ]; then - output="$2" - echo "Outputting IDs to $output" -else - output="./failed.txt" -fi - { grep 'Could not download submission' "$file" | awk '{ print $12 }' | rev | cut -c 2- | rev ; grep 'Failed to download resource' "$file" | awk '{ print $15 }' ; grep 'failed to download submission' "$file" | awk '{ print $14 }' | rev | cut -c 2- | rev ; - grep 'Failed to write file' "$file" | awk '{ print $13 }' | rev | cut -c 2- | rev ; + grep 'Failed to write file' "$file" | awk '{ print $14 }' ; grep 'skipped due to disabled module' "$file" | awk '{ print $9 }' ; -} >>"$output" +} diff --git a/scripts/extract_successful_ids.ps1 b/scripts/extract_successful_ids.ps1 new file mode 100644 index 0000000..00722f1 --- /dev/null +++ b/scripts/extract_successful_ids.ps1 @@ -0,0 +1,21 @@ +if (Test-Path -Path $args[0] -PathType Leaf) { + $file=$args[0] +} +else { + Write-Host "CANNOT FIND LOG FILE" + Exit 1 +} + +if ($args[1] -ne $null) { + $output=$args[1] + Write-Host "Outputting IDs to $output" +} +else { + $output="./successful.txt" +} + +Select-String -Path $file -Pattern "Downloaded submission" | ForEach-Object { -split $_.Line | Select-Object -Last 3 | Select-Object -SkipLast 2 } >> $output +Select-String -Path $file -Pattern "Resource hash" | ForEach-Object { -split $_.Line | Select-Object -Last 3 | Select-Object -SkipLast 2 } >> $output +Select-String -Path $file -Pattern "Download filter" | ForEach-Object { -split $_.Line | Select-Object -Last 4 | Select-Object -SkipLast 3 } >> $output +Select-String -Path $file -Pattern "already exists, continuing" | ForEach-Object { -split $_.Line | Select-Object -Last 4 | Select-Object -SkipLast 3 } >> $output +Select-String -Path $file -Pattern "Hard link made" | ForEach-Object { -split $_.Line | Select-Object -Last 1 } >> $output diff --git a/scripts/extract_successful_ids.sh b/scripts/extract_successful_ids.sh index 011ba6c..f2128e5 100755 --- a/scripts/extract_successful_ids.sh +++ b/scripts/extract_successful_ids.sh @@ -7,17 +7,11 @@ else exit 1 fi -if [ -n "$2" ]; then - output="$2" - echo "Outputting IDs to $output" -else - output="./successful.txt" -fi - { grep 'Downloaded submission' "$file" | awk '{ print $(NF-2) }' ; grep 'Resource hash' "$file" | awk '{ print $(NF-2) }' ; grep 'Download filter' "$file" | awk '{ print $(NF-3) }' ; grep 'already exists, continuing' "$file" | awk '{ print $(NF-3) }' ; grep 'Hard link made' "$file" | awk '{ print $(NF) }' ; -} >> "$output" + grep 'filtered due to score' "$file" | awk '{ print $9 }' +} diff --git a/scripts/print_summary.ps1 b/scripts/print_summary.ps1 new file mode 100644 index 0000000..5d85b09 --- /dev/null +++ b/scripts/print_summary.ps1 @@ -0,0 +1,30 @@ +if (Test-Path -Path $args[0] -PathType Leaf) { + $file=$args[0] +} +else { + Write-Host "CANNOT FIND LOG FILE" + Exit 1 +} + +if ($args[1] -ne $null) { + $output=$args[1] + Write-Host "Outputting IDs to $output" +} +else { + $output="./successful.txt" +} + +Write-Host -NoNewline "Downloaded submissions: " +Write-Host (Select-String -Path $file -Pattern "Downloaded submission" -AllMatches).Matches.Count +Write-Host -NoNewline "Failed downloads: " +Write-Host (Select-String -Path $file -Pattern "failed to download submission" -AllMatches).Matches.Count +Write-Host -NoNewline "Files already downloaded: " +Write-Host (Select-String -Path $file -Pattern "already exists, continuing" -AllMatches).Matches.Count +Write-Host -NoNewline "Hard linked submissions: " +Write-Host (Select-String -Path $file -Pattern "Hard link made" -AllMatches).Matches.Count +Write-Host -NoNewline "Excluded submissions: " +Write-Host (Select-String -Path $file -Pattern "in exclusion list" -AllMatches).Matches.Count +Write-Host -NoNewline "Files with existing hash skipped: " +Write-Host (Select-String -Path $file -Pattern "downloaded elsewhere" -AllMatches).Matches.Count +Write-Host -NoNewline "Submissions from excluded subreddits: " +Write-Host (Select-String -Path $file -Pattern "in skip list" -AllMatches).Matches.Count diff --git a/scripts/tests/example_logfiles/succeed_score_filter.txt b/scripts/tests/example_logfiles/succeed_score_filter.txt new file mode 100644 index 0000000..8f31ef7 --- /dev/null +++ b/scripts/tests/example_logfiles/succeed_score_filter.txt @@ -0,0 +1,2 @@ +[2022-07-23 14:04:14,095 - bdfr.downloader - DEBUG] - Submission ljyy27 filtered due to score 15 < [50] +[2022-07-23 14:04:14,104 - bdfr.downloader - DEBUG] - Submission ljyy27 filtered due to score 16 > [1] \ No newline at end of file diff --git a/scripts/tests/test_extract_failed_ids.bats b/scripts/tests/test_extract_failed_ids.bats index 75b9bff..04eada6 100644 --- a/scripts/tests/test_extract_failed_ids.bats +++ b/scripts/tests/test_extract_failed_ids.bats @@ -14,30 +14,35 @@ teardown() { @test "fail no downloader module" { run ../extract_failed_ids.sh ./example_logfiles/failed_no_downloader.txt + echo "$output" > failed.txt assert [ "$( wc -l 'failed.txt' | awk '{ print $1 }' )" -eq "3" ]; assert [ "$( grep -Ecv '\w{6,7}' 'failed.txt' )" -eq "0" ]; } @test "fail resource error" { run ../extract_failed_ids.sh ./example_logfiles/failed_resource_error.txt + echo "$output" > failed.txt assert [ "$( wc -l 'failed.txt' | awk '{ print $1 }' )" -eq "1" ]; assert [ "$( grep -Ecv '\w{6,7}' 'failed.txt' )" -eq "0" ]; } @test "fail site downloader error" { run ../extract_failed_ids.sh ./example_logfiles/failed_sitedownloader_error.txt + echo "$output" > failed.txt assert [ "$( wc -l 'failed.txt' | awk '{ print $1 }' )" -eq "2" ]; assert [ "$( grep -Ecv '\w{6,7}' 'failed.txt' )" -eq "0" ]; } @test "fail failed file write" { run ../extract_failed_ids.sh ./example_logfiles/failed_write_error.txt + echo "$output" > failed.txt assert [ "$( wc -l 'failed.txt' | awk '{ print $1 }' )" -eq "1" ]; assert [ "$( grep -Ecv '\w{6,7}' 'failed.txt' )" -eq "0" ]; } @test "fail disabled module" { run ../extract_failed_ids.sh ./example_logfiles/failed_disabled_module.txt + echo "$output" > failed.txt assert [ "$( wc -l 'failed.txt' | awk '{ print $1 }' )" -eq "1" ]; assert [ "$( grep -Ecv '\w{6,7}' 'failed.txt' )" -eq "0" ]; } diff --git a/scripts/tests/test_extract_successful_ids.bats b/scripts/tests/test_extract_successful_ids.bats index 364bedb..6ff54bc 100644 --- a/scripts/tests/test_extract_successful_ids.bats +++ b/scripts/tests/test_extract_successful_ids.bats @@ -9,30 +9,42 @@ teardown() { @test "success downloaded submission" { run ../extract_successful_ids.sh ./example_logfiles/succeed_downloaded_submission.txt + echo "$output" > successful.txt assert [ "$( wc -l 'successful.txt' | awk '{ print $1 }' )" -eq "7" ]; assert [ "$( grep -Ecv '\w{6,7}' 'successful.txt' )" -eq "0" ]; } @test "success resource hash" { run ../extract_successful_ids.sh ./example_logfiles/succeed_resource_hash.txt + echo "$output" > successful.txt assert [ "$( wc -l 'successful.txt' | awk '{ print $1 }' )" -eq "1" ]; assert [ "$( grep -Ecv '\w{6,7}' 'successful.txt' )" -eq "0" ]; } @test "success download filter" { run ../extract_successful_ids.sh ./example_logfiles/succeed_download_filter.txt + echo "$output" > successful.txt assert [ "$( wc -l 'successful.txt' | awk '{ print $1 }' )" -eq "3" ]; assert [ "$( grep -Ecv '\w{6,7}' 'successful.txt' )" -eq "0" ]; } @test "success already exists" { run ../extract_successful_ids.sh ./example_logfiles/succeed_already_exists.txt + echo "$output" > successful.txt assert [ "$( wc -l 'successful.txt' | awk '{ print $1 }' )" -eq "3" ]; assert [ "$( grep -Ecv '\w{6,7}' 'successful.txt' )" -eq "0" ]; } @test "success hard link" { run ../extract_successful_ids.sh ./example_logfiles/succeed_hard_link.txt + echo "$output" > successful.txt assert [ "$( wc -l 'successful.txt' | awk '{ print $1 }' )" -eq "1" ]; assert [ "$( grep -Ecv '\w{6,7}' 'successful.txt' )" -eq "0" ]; } + +@test "success score filter" { + run ../extract_successful_ids.sh ./example_logfiles/succeed_score_filter.txt + echo "$output" > successful.txt + assert [ "$( wc -l 'successful.txt' | awk '{ print $1 }' )" -eq "2" ]; + assert [ "$( grep -Ecv '\w{6,7}' 'successful.txt' )" -eq "0" ]; +} diff --git a/setup.cfg b/setup.cfg index 198ebe7..67a1deb 100644 --- a/setup.cfg +++ b/setup.cfg @@ -4,7 +4,7 @@ description_file = README.md description_content_type = text/markdown home_page = https://github.com/aliparlakci/bulk-downloader-for-reddit keywords = reddit, download, archive -version = 2.5.2 +version = 2.6.0 author = Ali Parlakci author_email = parlakciali@gmail.com maintainer = Serene Arc diff --git a/tests/archive_entry/test_submission_archive_entry.py b/tests/archive_entry/test_submission_archive_entry.py index 60f47b5..045eabd 100644 --- a/tests/archive_entry/test_submission_archive_entry.py +++ b/tests/archive_entry/test_submission_archive_entry.py @@ -34,7 +34,7 @@ def test_get_comments(test_submission_id: str, min_comments: int, reddit_instanc 'created_utc': 1615583837, 'permalink': '/r/australia/comments/m3reby/this_little_guy_fell_out_of_a_tree_and_in_front/' }), - ('m3kua3', {'author': 'DELETED'}), + # TODO: add deleted user test case )) def test_get_post_details(test_submission_id: str, expected_dict: dict, reddit_instance: praw.Reddit): test_submission = reddit_instance.submission(id=test_submission_id) diff --git a/tests/conftest.py b/tests/conftest.py index da02948..a61d8d5 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -23,7 +23,7 @@ def reddit_instance(): @pytest.fixture(scope='session') def authenticated_reddit_instance(): - test_config_path = Path('test_config.cfg') + test_config_path = Path('./tests/test_config.cfg') if not test_config_path.exists(): pytest.skip('Refresh token must be provided to authenticate with OAuth2') cfg_parser = configparser.ConfigParser() diff --git a/tests/integration_tests/test_archive_integration.py b/tests/integration_tests/test_archive_integration.py index 5ef04a6..7b9a48d 100644 --- a/tests/integration_tests/test_archive_integration.py +++ b/tests/integration_tests/test_archive_integration.py @@ -10,11 +10,11 @@ from click.testing import CliRunner from bdfr.__main__ import cli -does_test_config_exist = Path('../test_config.cfg').exists() +does_test_config_exist = Path('./tests/test_config.cfg').exists() def copy_test_config(run_path: Path): - shutil.copy(Path('../test_config.cfg'), Path(run_path, '../test_config.cfg')) + shutil.copy(Path('./tests/test_config.cfg'), Path(run_path, 'test_config.cfg')) def create_basic_args_for_archive_runner(test_args: list[str], run_path: Path): @@ -23,7 +23,7 @@ def create_basic_args_for_archive_runner(test_args: list[str], run_path: Path): 'archive', str(run_path), '-v', - '--config', str(Path(run_path, '../test_config.cfg')), + '--config', str(Path(run_path, 'test_config.cfg')), '--log', str(Path(run_path, 'test_log.txt')), ] + test_args return out @@ -121,3 +121,33 @@ def test_cli_archive_ignore_user(test_args: list[str], tmp_path: Path): assert result.exit_code == 0 assert 'being an ignored user' in result.output assert 'Attempting to archive submission' not in result.output + + +@pytest.mark.online +@pytest.mark.reddit +@pytest.mark.skipif(not does_test_config_exist, reason='A test config file is required for integration tests') +@pytest.mark.parametrize('test_args', ( + ['--file-scheme', '{TITLE}', '-l', 'suy011'], +)) +def test_cli_archive_file_format(test_args: list[str], tmp_path: Path): + runner = CliRunner() + test_args = create_basic_args_for_archive_runner(test_args, tmp_path) + result = runner.invoke(cli, test_args) + assert result.exit_code == 0 + assert 'Attempting to archive submission' in result.output + assert re.search('format at /.+?/Judge says Trump and two adult', result.output) + + +@pytest.mark.online +@pytest.mark.reddit +@pytest.mark.skipif(not does_test_config_exist, reason='A test config file is required for integration tests') +@pytest.mark.parametrize('test_args', ( + ['-l', 'm2601g', '--exclude-id', 'm2601g'], +)) +def test_cli_archive_links_exclusion(test_args: list[str], tmp_path: Path): + runner = CliRunner() + test_args = create_basic_args_for_archive_runner(test_args, tmp_path) + result = runner.invoke(cli, test_args) + assert result.exit_code == 0 + assert 'in exclusion list' in result.output + assert 'Attempting to archive' not in result.output diff --git a/tests/integration_tests/test_clone_integration.py b/tests/integration_tests/test_clone_integration.py index 343b2d3..f9bf91a 100644 --- a/tests/integration_tests/test_clone_integration.py +++ b/tests/integration_tests/test_clone_integration.py @@ -9,19 +9,20 @@ from click.testing import CliRunner from bdfr.__main__ import cli -does_test_config_exist = Path('../test_config.cfg').exists() +does_test_config_exist = Path('./tests/test_config.cfg').exists() def copy_test_config(run_path: Path): - shutil.copy(Path('../test_config.cfg'), Path(run_path, '../test_config.cfg')) + shutil.copy(Path('./tests/test_config.cfg'), Path(run_path, 'test_config.cfg')) def create_basic_args_for_cloner_runner(test_args: list[str], tmp_path: Path): + copy_test_config(tmp_path) out = [ 'clone', str(tmp_path), '-v', - '--config', 'test_config.cfg', + '--config', str(Path(tmp_path, 'test_config.cfg')), '--log', str(Path(tmp_path, 'test_log.txt')), ] + test_args return out @@ -31,8 +32,10 @@ def create_basic_args_for_cloner_runner(test_args: list[str], tmp_path: Path): @pytest.mark.reddit @pytest.mark.skipif(not does_test_config_exist, reason='A test config file is required for integration tests') @pytest.mark.parametrize('test_args', ( - ['-l', 'm2601g'], + ['-l', '6l7778'], ['-s', 'TrollXChromosomes/', '-L', 1], + ['-l', 'eiajjw'], + ['-l', 'xl0lhi'], )) def test_cli_scrape_general(test_args: list[str], tmp_path: Path): runner = CliRunner() diff --git a/tests/integration_tests/test_download_integration.py b/tests/integration_tests/test_download_integration.py index bd53382..a9f0e0e 100644 --- a/tests/integration_tests/test_download_integration.py +++ b/tests/integration_tests/test_download_integration.py @@ -9,11 +9,11 @@ from click.testing import CliRunner from bdfr.__main__ import cli -does_test_config_exist = Path('../test_config.cfg').exists() +does_test_config_exist = Path('./tests/test_config.cfg').exists() def copy_test_config(run_path: Path): - shutil.copy(Path('../test_config.cfg'), Path(run_path, '../test_config.cfg')) + shutil.copy(Path('./tests/test_config.cfg'), Path(run_path, './test_config.cfg')) def create_basic_args_for_download_runner(test_args: list[str], run_path: Path): @@ -21,7 +21,7 @@ def create_basic_args_for_download_runner(test_args: list[str], run_path: Path): out = [ 'download', str(run_path), '-v', - '--config', str(Path(run_path, '../test_config.cfg')), + '--config', str(Path(run_path, './test_config.cfg')), '--log', str(Path(run_path, 'test_log.txt')), ] + test_args return out @@ -31,23 +31,23 @@ def create_basic_args_for_download_runner(test_args: list[str], run_path: Path): @pytest.mark.reddit @pytest.mark.skipif(not does_test_config_exist, reason='A test config file is required for integration tests') @pytest.mark.parametrize('test_args', ( - ['-s', 'Mindustry', '-L', 1], - ['-s', 'r/Mindustry', '-L', 1], - ['-s', 'r/mindustry', '-L', 1], - ['-s', 'mindustry', '-L', 1], - ['-s', 'https://www.reddit.com/r/TrollXChromosomes/', '-L', 1], - ['-s', 'r/TrollXChromosomes/', '-L', 1], - ['-s', 'TrollXChromosomes/', '-L', 1], - ['-s', 'trollxchromosomes', '-L', 1], - ['-s', 'trollxchromosomes,mindustry,python', '-L', 1], - ['-s', 'trollxchromosomes, mindustry, python', '-L', 1], - ['-s', 'trollxchromosomes', '-L', 1, '--time', 'day'], - ['-s', 'trollxchromosomes', '-L', 1, '--sort', 'new'], - ['-s', 'trollxchromosomes', '-L', 1, '--time', 'day', '--sort', 'new'], - ['-s', 'trollxchromosomes', '-L', 1, '--search', 'women'], - ['-s', 'trollxchromosomes', '-L', 1, '--time', 'day', '--search', 'women'], - ['-s', 'trollxchromosomes', '-L', 1, '--sort', 'new', '--search', 'women'], - ['-s', 'trollxchromosomes', '-L', 1, '--time', 'day', '--sort', 'new', '--search', 'women'], + ['-s', 'Mindustry', '-L', 3], + ['-s', 'r/Mindustry', '-L', 3], + ['-s', 'r/mindustry', '-L', 3], + ['-s', 'mindustry', '-L', 3], + ['-s', 'https://www.reddit.com/r/TrollXChromosomes/', '-L', 3], + ['-s', 'r/TrollXChromosomes/', '-L', 3], + ['-s', 'TrollXChromosomes/', '-L', 3], + ['-s', 'trollxchromosomes', '-L', 3], + ['-s', 'trollxchromosomes,mindustry,python', '-L', 3], + ['-s', 'trollxchromosomes, mindustry, python', '-L', 3], + ['-s', 'trollxchromosomes', '-L', 3, '--time', 'day'], + ['-s', 'trollxchromosomes', '-L', 3, '--sort', 'new'], + ['-s', 'trollxchromosomes', '-L', 3, '--time', 'day', '--sort', 'new'], + ['-s', 'trollxchromosomes', '-L', 3, '--search', 'women'], + ['-s', 'trollxchromosomes', '-L', 3, '--time', 'day', '--search', 'women'], + ['-s', 'trollxchromosomes', '-L', 3, '--sort', 'new', '--search', 'women'], + ['-s', 'trollxchromosomes', '-L', 3, '--time', 'day', '--sort', 'new', '--search', 'women'], )) def test_cli_download_subreddits(test_args: list[str], tmp_path: Path): runner = CliRunner() @@ -60,10 +60,12 @@ def test_cli_download_subreddits(test_args: list[str], tmp_path: Path): @pytest.mark.online @pytest.mark.reddit +@pytest.mark.slow @pytest.mark.authenticated @pytest.mark.skipif(not does_test_config_exist, reason='A test config file is required for integration tests') @pytest.mark.parametrize('test_args', ( ['-s', 'hentai', '-L', 10, '--search', 'red', '--authenticate'], + ['--authenticate', '--subscribed', '-L', 10], )) def test_cli_download_search_subreddits_authenticated(test_args: list[str], tmp_path: Path): runner = CliRunner() @@ -93,10 +95,9 @@ def test_cli_download_user_specific_subreddits(test_args: list[str], tmp_path: P @pytest.mark.reddit @pytest.mark.skipif(not does_test_config_exist, reason='A test config file is required for integration tests') @pytest.mark.parametrize('test_args', ( - ['-l', 'm2601g'], - ['-l', 'https://www.reddit.com/r/TrollXChromosomes/comments/m2601g/its_a_step_in_the_right_direction/'], + ['-l', '6l7778'], + ['-l', 'https://reddit.com/r/EmpireDidNothingWrong/comments/6l7778/technically_true/'], ['-l', 'm3hxzd'], # Really long title used to overflow filename limit - ['-l', 'm3kua3'], # Has a deleted user ['-l', 'm5bqkf'], # Resource leading to a 404 )) def test_cli_download_links(test_args: list[str], tmp_path: Path): @@ -266,7 +267,7 @@ def test_cli_download_use_default_config(tmp_path: Path): @pytest.mark.reddit @pytest.mark.skipif(not does_test_config_exist, reason='A test config file is required for integration tests') @pytest.mark.parametrize('test_args', ( - ['-l', 'm2601g', '--exclude-id', 'm2601g'], + ['-l', '6l7778', '--exclude-id', '6l7778'], )) def test_cli_download_links_exclusion(test_args: list[str], tmp_path: Path): runner = CliRunner() @@ -281,7 +282,7 @@ def test_cli_download_links_exclusion(test_args: list[str], tmp_path: Path): @pytest.mark.reddit @pytest.mark.skipif(not does_test_config_exist, reason='A test config file is required for integration tests') @pytest.mark.parametrize('test_args', ( - ['-l', 'm2601g', '--skip-subreddit', 'trollxchromosomes'], + ['-l', '6l7778', '--skip-subreddit', 'EmpireDidNothingWrong'], ['-s', 'trollxchromosomes', '--skip-subreddit', 'trollxchromosomes', '-L', '3'], )) def test_cli_download_subreddit_exclusion(test_args: list[str], tmp_path: Path): @@ -312,9 +313,8 @@ def test_cli_download_file_scheme_warning(test_args: list[str], tmp_path: Path): @pytest.mark.reddit @pytest.mark.skipif(not does_test_config_exist, reason='A test config file is required for integration tests') @pytest.mark.parametrize('test_args', ( - ['-l', 'm2601g', '--disable-module', 'Direct'], - ['-l', 'nnb9vs', '--disable-module', 'YoutubeDlFallback'], - ['-l', 'nnb9vs', '--disable-module', 'youtubedlfallback'], + ['-l', 'n9w9fo', '--disable-module', 'SelfPost'], + ['-l', 'nnb9vs', '--disable-module', 'VReddit'], )) def test_cli_download_disable_modules(test_args: list[str], tmp_path: Path): runner = CliRunner() @@ -352,3 +352,20 @@ def test_cli_download_ignore_user(test_args: list[str], tmp_path: Path): assert result.exit_code == 0 assert 'Downloaded submission' not in result.output assert 'being an ignored user' in result.output + + +@pytest.mark.online +@pytest.mark.reddit +@pytest.mark.skipif(not does_test_config_exist, reason='A test config file is required for integration tests') +@pytest.mark.parametrize(('test_args', 'was_filtered'), ( + (['-l', 'ljyy27', '--min-score', '50'], True), + (['-l', 'ljyy27', '--min-score', '1'], False), + (['-l', 'ljyy27', '--max-score', '1'], True), + (['-l', 'ljyy27', '--max-score', '100'], False), +)) +def test_cli_download_score_filter(test_args: list[str], was_filtered: bool, tmp_path: Path): + runner = CliRunner() + test_args = create_basic_args_for_download_runner(test_args, tmp_path) + result = runner.invoke(cli, test_args) + assert result.exit_code == 0 + assert ('filtered due to score' in result.output) == was_filtered diff --git a/tests/site_downloaders/fallback_downloaders/test_ytdlp_fallback.py b/tests/site_downloaders/fallback_downloaders/test_ytdlp_fallback.py index 2c4a4f6..92ba27d 100644 --- a/tests/site_downloaders/fallback_downloaders/test_ytdlp_fallback.py +++ b/tests/site_downloaders/fallback_downloaders/test_ytdlp_fallback.py @@ -15,6 +15,7 @@ from bdfr.site_downloaders.fallback_downloaders.ytdlp_fallback import YtdlpFallb ('https://www.youtube.com/watch?v=P19nvJOmqCc', True), ('https://www.example.com/test', False), ('https://milesmatrix.bandcamp.com/album/la-boum/', False), + ('https://v.redd.it/54i8fvzev3u81', True), )) def test_can_handle_link(test_url: str, expected: bool): result = YtdlpFallback.can_handle_link(test_url) @@ -35,8 +36,8 @@ def test_info_extraction_bad(test_url: str): @pytest.mark.parametrize(('test_url', 'expected_hash'), ( ('https://streamable.com/dt46y', 'b7e465adaade5f2b6d8c2b4b7d0a2878'), ('https://streamable.com/t8sem', '49b2d1220c485455548f1edbc05d4ecf'), - ('https://www.reddit.com/r/specializedtools/comments/n2nw5m/bamboo_splitter/', '21968d3d92161ea5e0abdcaf6311b06c'), - ('https://v.redd.it/9z1dnk3xr5k61', '351a2b57e888df5ccbc508056511f38d'), + ('https://www.reddit.com/r/specializedtools/comments/n2nw5m/bamboo_splitter/', '03087ce64f88f438bad6849858c9b7f0'), + ('https://v.redd.it/9z1dnk3xr5k61', '9ce39c8e46b6534a0b3f164a792d51c8'), )) def test_find_resources(test_url: str, expected_hash: str): test_submission = MagicMock() diff --git a/tests/site_downloaders/test_download_factory.py b/tests/site_downloaders/test_download_factory.py index 134396c..d3fec6f 100644 --- a/tests/site_downloaders/test_download_factory.py +++ b/tests/site_downloaders/test_download_factory.py @@ -16,6 +16,7 @@ from bdfr.site_downloaders.imgur import Imgur from bdfr.site_downloaders.pornhub import PornHub from bdfr.site_downloaders.redgifs import Redgifs from bdfr.site_downloaders.self_post import SelfPost +from bdfr.site_downloaders.vreddit import VReddit from bdfr.site_downloaders.youtube import Youtube @@ -23,12 +24,12 @@ from bdfr.site_downloaders.youtube import Youtube @pytest.mark.parametrize(('test_submission_url', 'expected_class'), ( ('https://www.reddit.com/r/TwoXChromosomes/comments/lu29zn/i_refuse_to_live_my_life' '_in_anything_but_comfort/', SelfPost), - ('https://i.imgur.com/bZx1SJQ.jpg', Direct), + ('https://i.imgur.com/bZx1SJQ.jpg', Imgur), ('https://i.redd.it/affyv0axd5k61.png', Direct), - ('https://imgur.com/3ls94yv.jpeg', Direct), + ('https://imgur.com/3ls94yv.jpeg', Imgur), ('https://i.imgur.com/BuzvZwb.gifv', Imgur), ('https://imgur.com/BuzvZwb.gifv', Imgur), - ('https://i.imgur.com/6fNdLst.gif', Direct), + ('https://i.imgur.com/6fNdLst.gif', Imgur), ('https://imgur.com/a/MkxAzeg', Imgur), ('https://i.imgur.com/OGeVuAe.giff', Imgur), ('https://www.reddit.com/gallery/lu93m7', Gallery), @@ -39,15 +40,16 @@ from bdfr.site_downloaders.youtube import Youtube ('https://www.gifdeliverynetwork.com/repulsivefinishedandalusianhorse', Redgifs), ('https://youtu.be/DevfjHOhuFc', Youtube), ('https://m.youtube.com/watch?v=kr-FeojxzUM', Youtube), - ('https://i.imgur.com/3SKrQfK.jpg?1', Direct), + ('https://i.imgur.com/3SKrQfK.jpg?1', Imgur), ('https://dynasty-scans.com/system/images_images/000/017/819/original/80215103_p0.png?1612232781', Direct), ('https://m.imgur.com/a/py3RW0j', Imgur), - ('https://v.redd.it/9z1dnk3xr5k61', YtdlpFallback), + ('https://v.redd.it/9z1dnk3xr5k61', VReddit), ('https://streamable.com/dt46y', YtdlpFallback), ('https://vimeo.com/channels/31259/53576664', YtdlpFallback), ('http://video.pbs.org/viralplayer/2365173446/', YtdlpFallback), ('https://www.pornhub.com/view_video.php?viewkey=ph5a2ee0461a8d0', PornHub), ('https://www.patreon.com/posts/minecart-track-59346560', Gallery), + ('https://v.redd.it/9z1dnk3xr5k61', VReddit) )) def test_factory_lever_good(test_submission_url: str, expected_class: BaseDownloader, reddit_instance: praw.Reddit): result = DownloadFactory.pull_lever(test_submission_url) diff --git a/tests/site_downloaders/test_erome.py b/tests/site_downloaders/test_erome.py index e06fab5..2f3701d 100644 --- a/tests/site_downloaders/test_erome.py +++ b/tests/site_downloaders/test_erome.py @@ -11,16 +11,16 @@ from bdfr.site_downloaders.erome import Erome @pytest.mark.online @pytest.mark.parametrize(('test_url', 'expected_urls'), ( ('https://www.erome.com/a/vqtPuLXh', ( - r'https://s\d+.erome.com/365/vqtPuLXh/KH2qBT99_480p.mp4', + r'https://[a-z]\d+.erome.com/\d{3}/vqtPuLXh/KH2qBT99_480p.mp4', )), ('https://www.erome.com/a/ORhX0FZz', ( - r'https://s\d+.erome.com/355/ORhX0FZz/9IYQocM9_480p.mp4', - r'https://s\d+.erome.com/355/ORhX0FZz/9eEDc8xm_480p.mp4', - r'https://s\d+.erome.com/355/ORhX0FZz/EvApC7Rp_480p.mp4', - r'https://s\d+.erome.com/355/ORhX0FZz/LruobtMs_480p.mp4', - r'https://s\d+.erome.com/355/ORhX0FZz/TJNmSUU5_480p.mp4', - r'https://s\d+.erome.com/355/ORhX0FZz/X11Skh6Z_480p.mp4', - r'https://s\d+.erome.com/355/ORhX0FZz/bjlTkpn7_480p.mp4' + r'https://[a-z]\d+.erome.com/\d{3}/ORhX0FZz/9IYQocM9_480p.mp4', + r'https://[a-z]\d+.erome.com/\d{3}/ORhX0FZz/9eEDc8xm_480p.mp4', + r'https://[a-z]\d+.erome.com/\d{3}/ORhX0FZz/EvApC7Rp_480p.mp4', + r'https://[a-z]\d+.erome.com/\d{3}/ORhX0FZz/LruobtMs_480p.mp4', + r'https://[a-z]\d+.erome.com/\d{3}/ORhX0FZz/TJNmSUU5_480p.mp4', + r'https://[a-z]\d+.erome.com/\d{3}/ORhX0FZz/X11Skh6Z_480p.mp4', + r'https://[a-z]\d+.erome.com/\d{3}/ORhX0FZz/bjlTkpn7_480p.mp4' )), )) def test_get_link(test_url: str, expected_urls: tuple[str]): diff --git a/tests/site_downloaders/test_gfycat.py b/tests/site_downloaders/test_gfycat.py index 981d01d..3b40840 100644 --- a/tests/site_downloaders/test_gfycat.py +++ b/tests/site_downloaders/test_gfycat.py @@ -16,7 +16,7 @@ from bdfr.site_downloaders.gfycat import Gfycat )) def test_get_link(test_url: str, expected_url: str): result = Gfycat._get_link(test_url) - assert result == expected_url + assert result.pop() == expected_url @pytest.mark.online diff --git a/tests/site_downloaders/test_redgifs.py b/tests/site_downloaders/test_redgifs.py index 571f044..b73ee95 100644 --- a/tests/site_downloaders/test_redgifs.py +++ b/tests/site_downloaders/test_redgifs.py @@ -2,6 +2,7 @@ # coding=utf-8 from unittest.mock import Mock +import re import pytest @@ -12,30 +13,50 @@ from bdfr.site_downloaders.redgifs import Redgifs @pytest.mark.online @pytest.mark.parametrize(('test_url', 'expected'), ( ('https://redgifs.com/watch/frighteningvictorioussalamander', - 'https://thumbs2.redgifs.com/FrighteningVictoriousSalamander.mp4'), + {'FrighteningVictoriousSalamander.mp4'}), ('https://redgifs.com/watch/springgreendecisivetaruca', - 'https://thumbs2.redgifs.com/SpringgreenDecisiveTaruca.mp4'), + {'SpringgreenDecisiveTaruca.mp4'}), ('https://www.redgifs.com/watch/palegoldenrodrawhalibut', - 'https://thumbs2.redgifs.com/PalegoldenrodRawHalibut.mp4'), + {'PalegoldenrodRawHalibut.mp4'}), + ('https://redgifs.com/watch/hollowintentsnowyowl', + {'HollowIntentSnowyowl-large.jpg'}), + ('https://www.redgifs.com/watch/lustrousstickywaxwing', + {'EntireEnchantingHypsilophodon-large.jpg', + 'FancyMagnificentAdamsstaghornedbeetle-large.jpg', + 'LustrousStickyWaxwing-large.jpg', + 'ParchedWindyArmyworm-large.jpg', + 'ThunderousColorlessErmine-large.jpg', + 'UnripeUnkemptWoodpecker-large.jpg'}), )) -def test_get_link(test_url: str, expected: str): +def test_get_link(test_url: str, expected: set[str]): result = Redgifs._get_link(test_url) - assert result == expected + result = list(result) + patterns = [r'https://thumbs\d\.redgifs\.com/' + e + r'.*' for e in expected] + assert all([re.match(p, r) for p in patterns] for r in result) @pytest.mark.online -@pytest.mark.parametrize(('test_url', 'expected_hash'), ( - ('https://redgifs.com/watch/frighteningvictorioussalamander', '4007c35d9e1f4b67091b5f12cffda00a'), - ('https://redgifs.com/watch/springgreendecisivetaruca', '8dac487ac49a1f18cc1b4dabe23f0869'), - ('https://redgifs.com/watch/leafysaltydungbeetle', '076792c660b9c024c0471ef4759af8bd'), - ('https://www.redgifs.com/watch/palegoldenrodrawhalibut', '46d5aa77fe80c6407de1ecc92801c10e'), +@pytest.mark.parametrize(('test_url', 'expected_hashes'), ( + ('https://redgifs.com/watch/frighteningvictorioussalamander', {'4007c35d9e1f4b67091b5f12cffda00a'}), + ('https://redgifs.com/watch/springgreendecisivetaruca', {'8dac487ac49a1f18cc1b4dabe23f0869'}), + ('https://redgifs.com/watch/leafysaltydungbeetle', {'076792c660b9c024c0471ef4759af8bd'}), + ('https://www.redgifs.com/watch/palegoldenrodrawhalibut', {'46d5aa77fe80c6407de1ecc92801c10e'}), + ('https://redgifs.com/watch/hollowintentsnowyowl', + {'5ee51fa15e0a58e98f11dea6a6cca771'}), + ('https://www.redgifs.com/watch/lustrousstickywaxwing', + {'b461e55664f07bed8d2f41d8586728fa', + '30ba079a8ed7d7adf17929dc3064c10f', + '0d4f149d170d29fc2f015c1121bab18b', + '53987d99cfd77fd65b5fdade3718f9f1', + 'fb2e7d972846b83bf4016447d3060d60', + '44fb28f72ec9a5cca63fa4369ab4f672'}), )) -def test_download_resource(test_url: str, expected_hash: str): +def test_download_resource(test_url: str, expected_hashes: set[str]): mock_submission = Mock() mock_submission.url = test_url test_site = Redgifs(mock_submission) - resources = test_site.find_resources() - assert len(resources) == 1 - assert isinstance(resources[0], Resource) - resources[0].download() - assert resources[0].hash.hexdigest() == expected_hash + results = test_site.find_resources() + assert all([isinstance(res, Resource) for res in results]) + [res.download() for res in results] + hashes = set([res.hash.hexdigest() for res in results]) + assert hashes == set(expected_hashes) diff --git a/tests/site_downloaders/test_vidble.py b/tests/site_downloaders/test_vidble.py index 0c5ebb2..f6ddd56 100644 --- a/tests/site_downloaders/test_vidble.py +++ b/tests/site_downloaders/test_vidble.py @@ -30,9 +30,6 @@ def test_change_med_url(test_url: str, expected: str): 'https://www.vidble.com/VWuNsnLJMD.jpg', 'https://www.vidble.com/sMmM8O650W.jpg', }), - ('https://vidble.com/watch?v=0q4nWakqM6kzQWxlePD8N62Dsflev0N9', { - 'https://www.vidble.com/0q4nWakqM6kzQWxlePD8N62Dsflev0N9.mp4', - }), ('https://www.vidble.com/pHuwWkOcEb', { 'https://www.vidble.com/pHuwWkOcEb.jpg', }), @@ -42,6 +39,7 @@ def test_get_links(test_url: str, expected: set[str]): assert results == expected +@pytest.mark.online @pytest.mark.parametrize(('test_url', 'expected_hashes'), ( ('https://www.vidble.com/show/UxsvAssYe5', { '0ef2f8e0e0b45936d2fb3e6fbdf67e28', @@ -55,9 +53,6 @@ def test_get_links(test_url: str, expected: set[str]): 'b31a942cd8cdda218ed547bbc04c3a27', '6f77c570b451eef4222804bd52267481', }), - ('https://vidble.com/watch?v=0q4nWakqM6kzQWxlePD8N62Dsflev0N9', { - 'cebe9d5f24dba3b0443e5097f160ca83', - }), ('https://www.vidble.com/pHuwWkOcEb', { '585f486dd0b2f23a57bddbd5bf185bc7', }), diff --git a/tests/site_downloaders/test_vreddit.py b/tests/site_downloaders/test_vreddit.py new file mode 100644 index 0000000..da05c1b --- /dev/null +++ b/tests/site_downloaders/test_vreddit.py @@ -0,0 +1,39 @@ +#!/usr/bin/env python3 +# coding=utf-8 + +from unittest.mock import MagicMock + +import pytest + +from bdfr.exceptions import NotADownloadableLinkError +from bdfr.resource import Resource +from bdfr.site_downloaders.vreddit import VReddit + + +@pytest.mark.online +@pytest.mark.slow +@pytest.mark.parametrize(('test_url', 'expected_hash'), ( + ('https://www.reddit.com/user/Xomb_Forever/comments/u5p2kj/hold_up/', '379ef5cd87203544d51caee31e72d210'), +)) +def test_find_resources_good(test_url: str, expected_hash: str): + test_submission = MagicMock() + test_submission.url = test_url + downloader = VReddit(test_submission) + resources = downloader.find_resources() + assert len(resources) == 1 + assert isinstance(resources[0], Resource) + resources[0].download() + assert resources[0].hash.hexdigest() == expected_hash + + +@pytest.mark.online +@pytest.mark.parametrize('test_url', ( + 'https://www.polygon.com/disney-plus/2020/5/14/21249881/gargoyles-animated-series-disney-plus-greg-weisman' + '-interview-oj-simpson-goliath-chronicles', +)) +def test_find_resources_bad(test_url: str): + test_submission = MagicMock() + test_submission.url = test_url + downloader = VReddit(test_submission) + with pytest.raises(NotADownloadableLinkError): + downloader.find_resources() diff --git a/tests/site_downloaders/test_youtube.py b/tests/site_downloaders/test_youtube.py index 684eb20..14c6648 100644 --- a/tests/site_downloaders/test_youtube.py +++ b/tests/site_downloaders/test_youtube.py @@ -15,7 +15,6 @@ from bdfr.site_downloaders.youtube import Youtube @pytest.mark.parametrize(('test_url', 'expected_hash'), ( ('https://www.youtube.com/watch?v=uSm2VDgRIUs', '2d60b54582df5b95ec72bb00b580d2ff'), ('https://www.youtube.com/watch?v=GcI7nxQj7HA', '5db0fc92a0a7fb9ac91e63505eea9cf0'), - ('https://youtu.be/TMqPOlp4tNo', 'f68c00b018162857f3df4844c45302e7'), # Age restricted )) def test_find_resources_good(test_url: str, expected_hash: str): test_submission = MagicMock() diff --git a/tests/test_configuration.py b/tests/test_configuration.py index 8ad1663..060f145 100644 --- a/tests/test_configuration.py +++ b/tests/test_configuration.py @@ -22,3 +22,12 @@ def test_process_click_context(arg_dict: dict): test_config.process_click_arguments(test_context) test_config = vars(test_config) assert all([test_config[arg] == arg_dict[arg] for arg in arg_dict.keys()]) + + +def test_yaml_file_read(): + file = './tests/yaml_test_configuration.yaml' + test_config = Configuration() + test_config.parse_yaml_options(file) + assert test_config.subreddit == ['EarthPorn', 'TwoXChromosomes', 'Mindustry'] + assert test_config.sort == 'new' + assert test_config.limit == 10 diff --git a/tests/test_connector.py b/tests/test_connector.py index 9fe58f2..142baa6 100644 --- a/tests/test_connector.py +++ b/tests/test_connector.py @@ -336,13 +336,29 @@ def test_get_user_authenticated_lists( downloader_mock.args.__dict__[test_flag] = True downloader_mock.reddit_instance = authenticated_reddit_instance downloader_mock.args.limit = 10 - downloader_mock._determine_sort_function.return_value = praw.models.Subreddit.hot + downloader_mock.determine_sort_function.return_value = praw.models.Subreddit.hot downloader_mock.sort_filter = RedditTypes.SortType.HOT downloader_mock.args.user = [RedditConnector.resolve_user_name(downloader_mock, 'me')] results = RedditConnector.get_user_data(downloader_mock) assert_all_results_are_submissions_or_comments(10, results) +@pytest.mark.online +@pytest.mark.reddit +@pytest.mark.authenticated +def test_get_subscribed_subreddits(downloader_mock: MagicMock, authenticated_reddit_instance: praw.Reddit): + downloader_mock.reddit_instance = authenticated_reddit_instance + downloader_mock.args.limit = 10 + downloader_mock.args.authenticate = True + downloader_mock.args.subscribed = True + downloader_mock.determine_sort_function.return_value = praw.models.Subreddit.hot + downloader_mock.determine_sort_function.return_value = praw.models.Subreddit.hot + downloader_mock.sort_filter = RedditTypes.SortType.HOT + results = RedditConnector.get_subreddits(downloader_mock) + assert all([isinstance(s, praw.models.ListingGenerator) for s in results]) + assert len(results) > 0 + + @pytest.mark.parametrize(('test_name', 'expected'), ( ('Mindustry', 'Mindustry'), ('Futurology', 'Futurology'), @@ -383,7 +399,7 @@ def test_read_submission_ids_from_file(downloader_mock: MagicMock, tmp_path: Pat @pytest.mark.online @pytest.mark.reddit @pytest.mark.parametrize('test_redditor_name', ( - 'Paracortex', + 'nasa', 'crowdstrike', 'HannibalGoddamnit', )) @@ -431,7 +447,8 @@ def test_check_user_existence_banned( @pytest.mark.reddit @pytest.mark.parametrize(('test_subreddit_name', 'expected_message'), ( ('donaldtrump', 'cannot be found'), - ('submitters', 'private and cannot be scraped') + ('submitters', 'private and cannot be scraped'), + ('lhnhfkuhwreolo', 'does not exist') )) def test_check_subreddit_status_bad(test_subreddit_name: str, expected_message: str, reddit_instance: praw.Reddit): test_subreddit = reddit_instance.subreddit(test_subreddit_name) diff --git a/tests/test_downloader.py b/tests/test_downloader.py index e5f0a31..e2e9e82 100644 --- a/tests/test_downloader.py +++ b/tests/test_downloader.py @@ -200,3 +200,107 @@ def test_download_submission( RedditDownloader._download_submission(downloader_mock, submission) folder_contents = list(tmp_path.iterdir()) assert len(folder_contents) == expected_files_len + + +@pytest.mark.online +@pytest.mark.reddit +@pytest.mark.parametrize(('test_submission_id', 'min_score'), ( + ('ljyy27', 1), +)) +def test_download_submission_min_score_above( + test_submission_id: str, + min_score: int, + downloader_mock: MagicMock, + reddit_instance: praw.Reddit, + tmp_path: Path, + capsys: pytest.CaptureFixture, +): + setup_logging(3) + downloader_mock.reddit_instance = reddit_instance + downloader_mock.download_filter.check_url.return_value = True + downloader_mock.args.folder_scheme = '' + downloader_mock.args.min_score = min_score + downloader_mock.file_name_formatter = RedditConnector.create_file_name_formatter(downloader_mock) + downloader_mock.download_directory = tmp_path + submission = downloader_mock.reddit_instance.submission(id=test_submission_id) + RedditDownloader._download_submission(downloader_mock, submission) + output = capsys.readouterr() + assert 'filtered due to score' not in output.out + + +@pytest.mark.online +@pytest.mark.reddit +@pytest.mark.parametrize(('test_submission_id', 'min_score'), ( + ('ljyy27', 25), +)) +def test_download_submission_min_score_below( + test_submission_id: str, + min_score: int, + downloader_mock: MagicMock, + reddit_instance: praw.Reddit, + tmp_path: Path, + capsys: pytest.CaptureFixture, +): + setup_logging(3) + downloader_mock.reddit_instance = reddit_instance + downloader_mock.download_filter.check_url.return_value = True + downloader_mock.args.folder_scheme = '' + downloader_mock.args.min_score = min_score + downloader_mock.file_name_formatter = RedditConnector.create_file_name_formatter(downloader_mock) + downloader_mock.download_directory = tmp_path + submission = downloader_mock.reddit_instance.submission(id=test_submission_id) + RedditDownloader._download_submission(downloader_mock, submission) + output = capsys.readouterr() + assert 'filtered due to score' in output.out + + +@pytest.mark.online +@pytest.mark.reddit +@pytest.mark.parametrize(('test_submission_id', 'max_score'), ( + ('ljyy27', 25), +)) +def test_download_submission_max_score_below( + test_submission_id: str, + max_score: int, + downloader_mock: MagicMock, + reddit_instance: praw.Reddit, + tmp_path: Path, + capsys: pytest.CaptureFixture, +): + setup_logging(3) + downloader_mock.reddit_instance = reddit_instance + downloader_mock.download_filter.check_url.return_value = True + downloader_mock.args.folder_scheme = '' + downloader_mock.args.max_score = max_score + downloader_mock.file_name_formatter = RedditConnector.create_file_name_formatter(downloader_mock) + downloader_mock.download_directory = tmp_path + submission = downloader_mock.reddit_instance.submission(id=test_submission_id) + RedditDownloader._download_submission(downloader_mock, submission) + output = capsys.readouterr() + assert 'filtered due to score' not in output.out + + +@pytest.mark.online +@pytest.mark.reddit +@pytest.mark.parametrize(('test_submission_id', 'max_score'), ( + ('ljyy27', 1), +)) +def test_download_submission_max_score_above( + test_submission_id: str, + max_score: int, + downloader_mock: MagicMock, + reddit_instance: praw.Reddit, + tmp_path: Path, + capsys: pytest.CaptureFixture, +): + setup_logging(3) + downloader_mock.reddit_instance = reddit_instance + downloader_mock.download_filter.check_url.return_value = True + downloader_mock.args.folder_scheme = '' + downloader_mock.args.max_score = max_score + downloader_mock.file_name_formatter = RedditConnector.create_file_name_formatter(downloader_mock) + downloader_mock.download_directory = tmp_path + submission = downloader_mock.reddit_instance.submission(id=test_submission_id) + RedditDownloader._download_submission(downloader_mock, submission) + output = capsys.readouterr() + assert 'filtered due to score' in output.out diff --git a/tests/test_file_name_formatter.py b/tests/test_file_name_formatter.py index 30fac77..21cb8a6 100644 --- a/tests/test_file_name_formatter.py +++ b/tests/test_file_name_formatter.py @@ -16,6 +16,7 @@ from bdfr.file_name_formatter import FileNameFormatter from bdfr.resource import Resource from bdfr.site_downloaders.base_downloader import BaseDownloader from bdfr.site_downloaders.fallback_downloaders.ytdlp_fallback import YtdlpFallback +from bdfr.site_downloaders.self_post import SelfPost @pytest.fixture() @@ -406,6 +407,7 @@ def test_windows_max_path(tmp_path: Path): @pytest.mark.parametrize(('test_reddit_id', 'test_downloader', 'expected_names'), ( ('gphmnr', YtdlpFallback, {'He has a lot to say today.mp4'}), ('d0oir2', YtdlpFallback, {"Crunk's finest moment. Welcome to the new subreddit!.mp4"}), + ('jiecu', SelfPost, {'[deleted by user].txt'}), )) def test_name_submission( test_reddit_id: str, @@ -418,4 +420,4 @@ def test_name_submission( test_formatter = FileNameFormatter('{TITLE}', '', '') results = test_formatter.format_resource_paths(test_resources, Path('.')) results = set([r[0].name for r in results]) - assert expected_names == results + assert results == expected_names diff --git a/tests/yaml_test_configuration.yaml b/tests/yaml_test_configuration.yaml new file mode 100644 index 0000000..5621721 --- /dev/null +++ b/tests/yaml_test_configuration.yaml @@ -0,0 +1,6 @@ +limit: 10 +sort: new +subreddit: + - EarthPorn + - TwoXChromosomes + - Mindustry \ No newline at end of file