1
0
Fork 0
mirror of synced 2024-06-02 18:34:37 +12:00

Merge branch 'development' into master

This commit is contained in:
Armin Samii 2023-06-17 20:31:24 -04:00 committed by GitHub
commit ce4017db17
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
99 changed files with 1157 additions and 453 deletions

View file

@ -7,9 +7,10 @@ assignees: ''
---
- [ ] I have read the [Opening an issue](https://github.com/aliparlakci/bulk-downloader-for-reddit/blob/master/docs/CONTRIBUTING.md#opening-an-issue)
- [ ] I am reporting a bug.
- [ ] I am running the latest version of BDfR
- [ ] I have read the [Opening an issue](https://github.com/aliparlakci/bulk-downloader-for-reddit/blob/master/docs/CONTRIBUTING.md#opening-an-issue)
- [ ] I am not asking a question about the BDFR (please use Discussions for this)
## Description

29
.github/workflows/scripts-test.yml vendored Normal file
View file

@ -0,0 +1,29 @@
name: Scripts Test
on:
push:
paths:
- "scripts/*.sh"
- "scripts/*.ps1"
pull_request:
paths:
- "scripts/*.sh"
- "scripts/*.ps1"
jobs:
test:
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v3
with:
submodules: 'true'
- name: Bats tests
run: |
cd scripts/tests/
bats/bin/bats *.bats
- name: Pester tests
shell: pwsh
run: |
cd scripts/tests/
Invoke-Pester -CI -PassThru .

View file

@ -7,12 +7,14 @@ on:
- "**.md"
- ".markdown_style.rb"
- ".mdlrc"
- "scripts/"
pull_request:
branches: [ master, development ]
paths-ignore:
- "**.md"
- ".markdown_style.rb"
- ".mdlrc"
- "scripts/"
jobs:
test:
@ -35,7 +37,7 @@ jobs:
- name: Install dependencies
run: |
python -m pip install --upgrade pip Flake8-pyproject pytest pytest-cov
python -m pip install --upgrade pip pytest pytest-cov ruff
pip install .
- name: Make configuration for tests
@ -44,16 +46,20 @@ jobs:
run: |
./devscripts/configure${{ matrix.ext }}
- name: Lint with flake8
- name: Critical ruff lint
run: |
flake8 . --select=E9,F63,F7,F82
ruff check --format=github --select=E9,F63,F7,F82 .
- name: Test with pytest
run: |
pytest -m 'not slow' --verbose --cov=./bdfr/ --cov-report term:skip-covered --cov-report html
pytest -m "not slow" --verbose --cov=./bdfr/ --cov-report term:skip-covered --cov-report html
- name: Upload coverage report
uses: actions/upload-artifact@v3
with:
name: coverage_report
path: htmlcov/
- name: Full ruff lint
run: |
ruff check --format=github . --exit-zero

5
.gitignore vendored
View file

@ -128,6 +128,9 @@ venv.bak/
.dmypy.json
dmypy.json
# ruff
.ruff_cache/
# Pyre type checker
.pyre/
@ -141,4 +144,4 @@ cython_debug/
test_config.cfg
.vscode/
.idea/
.idea/

View file

@ -2,24 +2,34 @@
# See https://pre-commit.com/hooks.html for more hooks
repos:
- repo: https://github.com/abravalheri/validate-pyproject
rev: v0.13
hooks:
- id: validate-pyproject
name: validate-pyproject
- repo: https://github.com/psf/black
rev: 22.12.0
rev: 23.3.0
hooks:
- id: black
name: black
- repo: https://github.com/pycqa/isort
rev: 5.11.4
- repo: https://github.com/charliermarsh/ruff-pre-commit
rev: v0.0.272
hooks:
- id: isort
name: isort (python)
- repo: https://github.com/pycqa/flake8
rev: 6.0.0
hooks:
- id: flake8
additional_dependencies: [Flake8-pyproject]
- id: ruff
name: ruff
args: ["--fixable=I","--fix"]
- repo: https://github.com/markdownlint/markdownlint
rev: v0.12.0
hooks:
- id: markdownlint
name: markdownlint
- repo: https://github.com/adamchainz/blacken-docs
rev: 1.13.0
hooks:
- id: blacken-docs
name: blacken-docs
additional_dependencies: [black>=23.1.0]

View file

@ -80,11 +80,11 @@ bdfr download ./path/to/output --user reddituser --submitted -L 100
```
```bash
bdfr download ./path/to/output --user me --saved --authenticate -L 25 --file-scheme '{POSTID}'
bdfr download ./path/to/output --user me --saved --authenticate -L 25 --file-scheme "{POSTID}"
```
```bash
bdfr download ./path/to/output --subreddit 'Python, all, mindustry' -L 10 --make-hard-links
bdfr download ./path/to/output --subreddit "Python, all, mindustry" -L 10 --make-hard-links
```
```bash
@ -92,7 +92,7 @@ bdfr archive ./path/to/output --user reddituser --submitted --all-comments --com
```
```bash
bdfr archive ./path/to/output --subreddit all --format yaml -L 500 --folder-scheme ''
bdfr archive ./path/to/output --subreddit all --format yaml -L 500 --folder-scheme ""
```
Alternatively, you can pass options through a YAML file.
@ -143,6 +143,9 @@ The following options are common between both the `archive` and `download` comma
- Can be specified multiple times
- Disables certain modules from being used
- See [Disabling Modules](#disabling-modules) for more information and a list of module names
- `--downvoted`
- This will use a user's downvoted posts as a source of posts to scrape
- This requires an authenticated Reddit instance, using the `--authenticate` flag, as well as `--user` set to `me`
- `--filename-restriction-scheme`
- Can be: `windows`, `linux`
- Turns off the OS detection and specifies which system to use when making filenames
@ -176,7 +179,7 @@ The following options are common between both the `archive` and `download` comma
- If it is not supplied, then the BDFR will default to the maximum allowed by Reddit, roughly 1000 posts. **We cannot bypass this.**
- `-S, --sort`
- This is the sort type for each applicable submission source supplied to the BDFR
- This option does not apply to upvoted or saved posts when scraping from these sources
- This option does not apply to upvoted, downvoted or saved posts when scraping from these sources
- The following options are available:
- `controversial`
- `hot` (default)
@ -191,16 +194,16 @@ The following options are common between both the `archive` and `download` comma
- This is the name of a multireddit to add as a source
- Can be specified multiple times
- This can be done by using `-m` multiple times
- Multireddits can also be used to provide CSV multireddits e.g. `-m 'chess, favourites'`
- Multireddits can also be used to provide CSV multireddits e.g. `-m "chess, favourites"`
- The specified multireddits must all belong to the user specified with the `--user` option
- `-s, --subreddit`
- This adds a subreddit as a source
- Can be used mutliple times
- This can be done by using `-s` multiple times
- Subreddits can also be used to provide CSV subreddits e.g. `-m 'all, python, mindustry'`
- Subreddits can also be used to provide CSV subreddits e.g. `-m "all, python, mindustry"`
- `-t, --time`
- This is the time filter that will be applied to all applicable sources
- This option does not apply to upvoted or saved posts when scraping from these sources
- This option does not apply to upvoted, downvoted or saved posts when scraping from these sources
- This option only applies if sorting by top or controversial. See --sort for more detail.
- The following options are available:
- `all` (default)
@ -233,11 +236,12 @@ The following options apply only to the `download` command. This command downloa
- The default is 120 seconds
- See [Rate Limiting](#rate-limiting) for details
- `--no-dupes`
- This flag will not redownload files if they were already downloaded in the current run
- This flag will skip writing a file to disk if that file was already downloaded in the current run
- This is calculated by MD5 hash
- `--search-existing`
- This will make the BDFR compile the hashes for every file in `directory`
- The hashes are used to remove duplicates if `--no-dupes` is supplied or make hard links if `--make-hard-links` is supplied
- The hashes are used to skip duplicate files if `--no-dupes` is supplied or make hard links if `--make-hard-links` is supplied
- **The use of this option is highly discouraged due to inefficiency**
- `--file-scheme`
- Sets the scheme for files
- Default is `{REDDITOR}_{TITLE}_{POSTID}`
@ -311,9 +315,9 @@ The part `-L 50` is to make sure that the character limit for a single line isn'
## Authentication and Security
The BDFR uses OAuth2 authentication to connect to Reddit if authentication is required. This means that it is a secure, token-based system for making requests. This also means that the BDFR only has access to specific parts of the account authenticated, by default only saved posts, upvoted posts, and the identity of the authenticated account. Note that authentication is not required unless accessing private things like upvoted posts, saved posts, and private multireddits.
The BDFR uses OAuth2 authentication to connect to Reddit if authentication is required. This means that it is a secure, token-based system for making requests. This also means that the BDFR only has access to specific parts of the account authenticated, by default only saved posts, upvoted posts, downvoted posts, and the identity of the authenticated account. Note that authentication is not required unless accessing private things like upvoted posts, downvoted posts, saved posts, and private multireddits.
To authenticate, the BDFR will first look for a token in the configuration file that signals that there's been a previous authentication. If this is not there, then the BDFR will attempt to register itself with your account. This is normal, and if you run the program, it will pause and show a Reddit URL. Click on this URL and it will take you to Reddit, where the permissions being requested will be shown. Read this and **confirm that there are no more permissions than needed to run the program**. You should not grant unneeded permissions; by default, the BDFR only requests permission to read your saved or upvoted submissions and identify as you.
To authenticate, the BDFR will first look for a token in the configuration file that signals that there's been a previous authentication. If this is not there, then the BDFR will attempt to register itself with your account. This is normal, and if you run the program, it will pause and show a Reddit URL. Click on this URL and it will take you to Reddit, where the permissions being requested will be shown. Read this and **confirm that there are no more permissions than needed to run the program**. You should not grant unneeded permissions; by default, the BDFR only requests permission to read your saved, upvoted, or downvoted submissions and identify as you.
If the permissions look safe, confirm it, and the BDFR will save a token that will allow it to authenticate with Reddit from then on.
@ -410,7 +414,7 @@ Modules can be disabled through the command line interface for the BDFR or more
- `Vidble`
- `VReddit` (Reddit Video Post)
- `Youtube`
- `YoutubeDlFallback`
- `YtdlpFallback` (Youtube DL Fallback)
### Rate Limiting

View file

@ -1,4 +1,3 @@
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
__version__ = "2.6.2"

View file

@ -1,5 +1,4 @@
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
import logging
import sys
@ -20,10 +19,11 @@ _common_options = [
click.argument("directory", type=str),
click.option("--authenticate", is_flag=True, default=None),
click.option("--config", type=str, default=None),
click.option("--disable-module", multiple=True, default=None, type=str),
click.option("--exclude-id", default=None, multiple=True),
click.option("--exclude-id-file", default=None, multiple=True),
click.option("--file-scheme", default=None, type=str),
click.option("--disable-module", type=str, multiple=True, default=None),
click.option("--downvoted", is_flag=True, default=None),
click.option("--exclude-id", type=str, multiple=True, default=None),
click.option("--exclude-id-file", type=str, multiple=True, default=None),
click.option("--file-scheme", type=str, default=None),
click.option("--filename-restriction-scheme", type=click.Choice(("linux", "windows")), default=None),
click.option("--folder-scheme", default=None, type=str),
click.option("--ignore-user", type=str, multiple=True, default=None),
@ -70,8 +70,8 @@ _archiver_options = [
]
def _add_options(opts: list):
def wrap(func):
def _add_options(opts: list): # noqa: ANN202
def wrap(func): # noqa: ANN001,ANN202
for opt in opts:
func = opt(func)
return func
@ -79,12 +79,16 @@ def _add_options(opts: list):
return wrap
def _check_version(context, param, value):
def _check_version(context: click.core.Context, _param, value: bool) -> None:
if not value or context.resilient_parsing:
return
current = __version__
latest = requests.get("https://pypi.org/pypi/bdfr/json").json()["info"]["version"]
print(f"You are currently using v{current} the latest is v{latest}")
try:
latest = requests.get("https://pypi.org/pypi/bdfr/json", timeout=10).json()["info"]["version"]
print(f"You are currently using v{current} the latest is v{latest}")
except TimeoutError:
logger.exception(f"Timeout reached fetching current version from Pypi - BDFR v{current}")
raise
context.exit()
@ -98,7 +102,7 @@ def _check_version(context, param, value):
callback=_check_version,
help="Check version and exit.",
)
def cli():
def cli() -> None:
"""BDFR is used to download and archive content from Reddit."""
pass
@ -108,7 +112,7 @@ def cli():
@_add_options(_downloader_options)
@click.help_option("-h", "--help")
@click.pass_context
def cli_download(context: click.Context, **_):
def cli_download(context: click.Context, **_) -> None:
"""Used to download content posted to Reddit."""
config = Configuration()
config.process_click_arguments(context)
@ -118,10 +122,10 @@ def cli_download(context: click.Context, **_):
reddit_downloader = RedditDownloader(config, [stream])
reddit_downloader.download()
except Exception:
logger.exception("Downloader exited unexpectedly")
logger.exception(f"Downloader exited unexpectedly - BDFR Downloader v{__version__}")
raise
else:
logger.info("Program complete")
logger.info(f"Program complete - BDFR Downloader v{__version__}")
@cli.command("archive")
@ -129,7 +133,7 @@ def cli_download(context: click.Context, **_):
@_add_options(_archiver_options)
@click.help_option("-h", "--help")
@click.pass_context
def cli_archive(context: click.Context, **_):
def cli_archive(context: click.Context, **_) -> None:
"""Used to archive post data from Reddit."""
config = Configuration()
config.process_click_arguments(context)
@ -139,10 +143,10 @@ def cli_archive(context: click.Context, **_):
reddit_archiver = Archiver(config, [stream])
reddit_archiver.download()
except Exception:
logger.exception("Archiver exited unexpectedly")
logger.exception(f"Archiver exited unexpectedly - BDFR Archiver v{__version__}")
raise
else:
logger.info("Program complete")
logger.info(f"Program complete - BDFR Archiver v{__version__}")
@cli.command("clone")
@ -151,7 +155,7 @@ def cli_archive(context: click.Context, **_):
@_add_options(_downloader_options)
@click.help_option("-h", "--help")
@click.pass_context
def cli_clone(context: click.Context, **_):
def cli_clone(context: click.Context, **_) -> None:
"""Combines archive and download commands."""
config = Configuration()
config.process_click_arguments(context)
@ -161,17 +165,17 @@ def cli_clone(context: click.Context, **_):
reddit_scraper = RedditCloner(config, [stream])
reddit_scraper.download()
except Exception:
logger.exception("Scraper exited unexpectedly")
logger.exception("Scraper exited unexpectedly - BDFR Scraper v{__version__}")
raise
else:
logger.info("Program complete")
logger.info("Program complete - BDFR Cloner v{__version__}")
@cli.command("completion")
@click.argument("shell", type=click.Choice(("all", "bash", "fish", "zsh"), case_sensitive=False), default="all")
@click.help_option("-h", "--help")
@click.option("-u", "--uninstall", is_flag=True, default=False, help="Uninstall completion")
def cli_completion(shell: str, uninstall: bool):
def cli_completion(shell: str, uninstall: bool) -> None:
"""\b
Installs shell completions for BDFR.
Options: all, bash, fish, zsh
@ -184,7 +188,7 @@ def cli_completion(shell: str, uninstall: bool):
Completion(shell).uninstall()
return
if shell not in ("all", "bash", "fish", "zsh"):
print(f"{shell} is not a valid option.")
print(f"{shell!r} is not a valid option.")
print("Options: all, bash, fish, zsh")
return
if click.confirm(f"Would you like to install {shell} completions for BDFR"):
@ -213,7 +217,7 @@ def make_console_logging_handler(verbosity: int) -> logging.StreamHandler:
return stream
def silence_module_loggers():
def silence_module_loggers() -> None:
logging.getLogger("praw").setLevel(logging.CRITICAL)
logging.getLogger("prawcore").setLevel(logging.CRITICAL)
logging.getLogger("urllib3").setLevel(logging.CRITICAL)

View file

@ -1,2 +1 @@
#!/usr/bin/env python3
# -*- coding: utf-8 -*-

View file

@ -1,5 +1,4 @@
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
from abc import ABC, abstractmethod
from typing import Union
@ -8,7 +7,7 @@ from praw.models import Comment, Submission
class BaseArchiveEntry(ABC):
def __init__(self, source: Union[Comment, Submission]):
def __init__(self, source: Union[Comment, Submission]) -> None:
self.source = source
self.post_details: dict = {}

View file

@ -1,5 +1,4 @@
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
import logging
@ -11,8 +10,8 @@ logger = logging.getLogger(__name__)
class CommentArchiveEntry(BaseArchiveEntry):
def __init__(self, comment: praw.models.Comment):
super(CommentArchiveEntry, self).__init__(comment)
def __init__(self, comment: praw.models.Comment) -> None:
super().__init__(comment)
def compile(self) -> dict:
self.source.refresh()

View file

@ -1,5 +1,4 @@
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
import logging
@ -11,8 +10,8 @@ logger = logging.getLogger(__name__)
class SubmissionArchiveEntry(BaseArchiveEntry):
def __init__(self, submission: praw.models.Submission, include_comments: bool):
super(SubmissionArchiveEntry, self).__init__(submission)
def __init__(self, submission: praw.models.Submission, include_comments: bool) -> None:
super().__init__(submission)
self._include_comments = include_comments
def compile(self) -> dict:
@ -25,7 +24,7 @@ class SubmissionArchiveEntry(BaseArchiveEntry):
out["comments"] = comments
return out
def _get_post_details(self):
def _get_post_details(self) -> None:
self.post_details = {
"title": self.source.title,
"name": self.source.name,

View file

@ -1,5 +1,4 @@
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
import json
import logging
@ -26,10 +25,10 @@ logger = logging.getLogger(__name__)
class Archiver(RedditConnector):
def __init__(self, args: Configuration, logging_handlers: Iterable[logging.Handler] = ()):
super(Archiver, self).__init__(args, logging_handlers)
def __init__(self, args: Configuration, logging_handlers: Iterable[logging.Handler] = ()) -> None:
super().__init__(args, logging_handlers)
def download(self):
def download(self) -> None:
for generator in self.reddit_lists:
try:
for submission in generator:
@ -66,7 +65,7 @@ class Archiver(RedditConnector):
return [supplied_submissions]
def get_user_data(self) -> list[Iterator]:
results = super(Archiver, self).get_user_data()
results = super().get_user_data()
if self.args.user and self.args.all_comments:
sort = self.determine_sort_function()
for user in self.args.user:
@ -82,7 +81,7 @@ class Archiver(RedditConnector):
else:
raise ArchiverError(f"Factory failed to classify item of type {type(praw_item).__name__}")
def write_entry(self, praw_item: Union[praw.models.Submission, praw.models.Comment]):
def write_entry(self, praw_item: Union[praw.models.Submission, praw.models.Comment]) -> None:
if self.args.comment_context and isinstance(praw_item, praw.models.Comment):
logger.debug(f"Converting comment {praw_item.id} to submission {praw_item.submission.id}")
praw_item = praw_item.submission
@ -94,25 +93,25 @@ class Archiver(RedditConnector):
elif self.args.format == "yaml":
self._write_entry_yaml(archive_entry)
else:
raise ArchiverError(f"Unknown format {self.args.format} given")
raise ArchiverError(f"Unknown format {self.args.format!r} given")
logger.info(f"Record for entry item {praw_item.id} written to disk")
def _write_entry_json(self, entry: BaseArchiveEntry):
def _write_entry_json(self, entry: BaseArchiveEntry) -> None:
resource = Resource(entry.source, "", lambda: None, ".json")
content = json.dumps(entry.compile())
self._write_content_to_disk(resource, content)
def _write_entry_xml(self, entry: BaseArchiveEntry):
def _write_entry_xml(self, entry: BaseArchiveEntry) -> None:
resource = Resource(entry.source, "", lambda: None, ".xml")
content = dict2xml.dict2xml(entry.compile(), wrap="root")
self._write_content_to_disk(resource, content)
def _write_entry_yaml(self, entry: BaseArchiveEntry):
def _write_entry_yaml(self, entry: BaseArchiveEntry) -> None:
resource = Resource(entry.source, "", lambda: None, ".yaml")
content = yaml.safe_dump(entry.compile())
self._write_content_to_disk(resource, content)
def _write_content_to_disk(self, resource: Resource, content: str):
def _write_content_to_disk(self, resource: Resource, content: str) -> None:
file_path = self.file_name_formatter.format_path(resource, self.download_directory)
file_path.parent.mkdir(exist_ok=True, parents=True)
with Path(file_path).open(mode="w", encoding="utf-8") as file:

View file

@ -1,5 +1,4 @@
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
import logging
from collections.abc import Iterable
@ -15,10 +14,10 @@ logger = logging.getLogger(__name__)
class RedditCloner(RedditDownloader, Archiver):
def __init__(self, args: Configuration, logging_handlers: Iterable[logging.Handler] = ()):
super(RedditCloner, self).__init__(args, logging_handlers)
def __init__(self, args: Configuration, logging_handlers: Iterable[logging.Handler] = ()) -> None:
super().__init__(args, logging_handlers)
def download(self):
def download(self) -> None:
for generator in self.reddit_lists:
try:
for submission in generator:

View file

@ -1,5 +1,4 @@
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
import subprocess
from os import environ
@ -9,13 +8,13 @@ import appdirs
class Completion:
def __init__(self, shell: str):
def __init__(self, shell: str) -> None:
self.shell = shell
self.env = environ.copy()
self.share_dir = appdirs.user_data_dir()
self.entry_points = ["bdfr", "bdfr-archive", "bdfr-clone", "bdfr-download"]
def install(self):
def install(self) -> None:
if self.shell in ("all", "bash"):
comp_dir = self.share_dir + "/bash-completion/completions/"
if not Path(comp_dir).exists():
@ -24,7 +23,9 @@ class Completion:
for point in self.entry_points:
self.env[f"_{point.upper().replace('-', '_')}_COMPLETE"] = "bash_source"
with Path(comp_dir + point).open(mode="w") as file:
file.write(subprocess.run([point], env=self.env, capture_output=True, text=True).stdout)
file.write(
subprocess.run([point], env=self.env, capture_output=True, text=True).stdout, # noqa: S603
)
print(f"Bash completion for {point} written to {comp_dir}{point}")
if self.shell in ("all", "fish"):
comp_dir = self.share_dir + "/fish/vendor_completions.d/"
@ -34,7 +35,9 @@ class Completion:
for point in self.entry_points:
self.env[f"_{point.upper().replace('-', '_')}_COMPLETE"] = "fish_source"
with Path(comp_dir + point + ".fish").open(mode="w") as file:
file.write(subprocess.run([point], env=self.env, capture_output=True, text=True).stdout)
file.write(
subprocess.run([point], env=self.env, capture_output=True, text=True).stdout, # noqa: S603
)
print(f"Fish completion for {point} written to {comp_dir}{point}.fish")
if self.shell in ("all", "zsh"):
comp_dir = self.share_dir + "/zsh/site-functions/"
@ -44,10 +47,12 @@ class Completion:
for point in self.entry_points:
self.env[f"_{point.upper().replace('-', '_')}_COMPLETE"] = "zsh_source"
with Path(comp_dir + "_" + point).open(mode="w") as file:
file.write(subprocess.run([point], env=self.env, capture_output=True, text=True).stdout)
file.write(
subprocess.run([point], env=self.env, capture_output=True, text=True).stdout, # noqa: S603
)
print(f"Zsh completion for {point} written to {comp_dir}_{point}")
def uninstall(self):
def uninstall(self) -> None:
if self.shell in ("all", "bash"):
comp_dir = self.share_dir + "/bash-completion/completions/"
for point in self.entry_points:

View file

@ -1,5 +1,4 @@
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
import logging
from argparse import Namespace
@ -13,13 +12,14 @@ logger = logging.getLogger(__name__)
class Configuration(Namespace):
def __init__(self):
super(Configuration, self).__init__()
def __init__(self) -> None:
super().__init__()
self.authenticate = False
self.config = None
self.opts: Optional[str] = None
self.directory: str = "."
self.disable_module: list[str] = []
self.downvoted: bool = False
self.exclude_id = []
self.exclude_id_file = []
self.file_scheme: str = "{REDDITOR}_{TITLE}_{POSTID}"
@ -60,12 +60,12 @@ class Configuration(Namespace):
self.comment_context: bool = False
self.skip_comments = False
def process_click_arguments(self, context: click.Context):
def process_click_arguments(self, context: click.Context) -> None:
if context.params.get("opts") is not None:
self.parse_yaml_options(context.params["opts"])
for arg_key in context.params.keys():
if not hasattr(self, arg_key):
logger.warning(f"Ignoring an unknown CLI argument: {arg_key}")
logger.warning(f"Ignoring an unknown CLI argument: {arg_key!r}")
continue
val = context.params[arg_key]
if val is None or val == ():
@ -73,7 +73,7 @@ class Configuration(Namespace):
continue
setattr(self, arg_key, val)
def parse_yaml_options(self, file_path: str):
def parse_yaml_options(self, file_path: str) -> None:
yaml_file_loc = Path(file_path)
if not yaml_file_loc.exists():
logger.error(f"No YAML file found at {yaml_file_loc}")
@ -86,6 +86,6 @@ class Configuration(Namespace):
return
for arg_key, val in opts.items():
if not hasattr(self, arg_key):
logger.warning(f"Ignoring an unknown YAML argument: {arg_key}")
logger.warning(f"Ignoring an unknown YAML argument: {arg_key!r}")
continue
setattr(self, arg_key, val)

View file

@ -1,20 +1,20 @@
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
import configparser
import importlib.resources
import itertools
import logging
import logging.handlers
import platform
import re
import shutil
import socket
from abc import ABCMeta, abstractmethod
from collections.abc import Callable, Iterable, Iterator
from datetime import datetime
from enum import Enum, auto
from pathlib import Path
from time import sleep
from typing import Union
import appdirs
import praw
@ -22,6 +22,7 @@ import praw.exceptions
import praw.models
import prawcore
from bdfr import __version__
from bdfr import exceptions as errors
from bdfr.configuration import Configuration
from bdfr.download_filter import DownloadFilter
@ -51,7 +52,7 @@ class RedditTypes:
class RedditConnector(metaclass=ABCMeta):
def __init__(self, args: Configuration, logging_handlers: Iterable[logging.Handler] = ()):
def __init__(self, args: Configuration, logging_handlers: Iterable[logging.Handler] = ()) -> None:
self.args = args
self.config_directories = appdirs.AppDirs("bdfr", "BDFR")
self.determine_directories()
@ -64,8 +65,7 @@ class RedditConnector(metaclass=ABCMeta):
self.reddit_lists = self.retrieve_reddit_lists()
def _setup_internal_objects(self):
def _setup_internal_objects(self) -> None:
self.parse_disabled_modules()
self.download_filter = self.create_download_filter()
@ -77,6 +77,7 @@ class RedditConnector(metaclass=ABCMeta):
self.file_name_formatter = self.create_file_name_formatter()
logger.log(9, "Create file name formatter")
self.user_agent = praw.const.USER_AGENT_FORMAT.format(":".join([platform.uname()[0], __package__, __version__]))
self.create_reddit_instance()
self.args.user = list(filter(None, [self.resolve_user_name(user) for user in self.args.user]))
@ -95,12 +96,12 @@ class RedditConnector(metaclass=ABCMeta):
self.args.skip_subreddit = {sub.lower() for sub in self.args.skip_subreddit}
@staticmethod
def _apply_logging_handlers(handlers: Iterable[logging.Handler]):
def _apply_logging_handlers(handlers: Iterable[logging.Handler]) -> None:
main_logger = logging.getLogger()
for handler in handlers:
main_logger.addHandler(handler)
def read_config(self):
def read_config(self) -> None:
"""Read any cfg values that need to be processed"""
if self.args.max_wait_time is None:
self.args.max_wait_time = self.cfg_parser.getint("DEFAULT", "max_wait_time", fallback=120)
@ -117,29 +118,34 @@ class RedditConnector(metaclass=ABCMeta):
self.args.filename_restriction_scheme = self.cfg_parser.get(
"DEFAULT", "filename_restriction_scheme", fallback=None
)
logger.debug(f"Setting filename restriction scheme to '{self.args.filename_restriction_scheme}'")
logger.debug(f"Setting filename restriction scheme to {self.args.filename_restriction_scheme!r}")
# Update config on disk
with Path(self.config_location).open(mode="w") as file:
self.cfg_parser.write(file)
def parse_disabled_modules(self):
def parse_disabled_modules(self) -> None:
disabled_modules = self.args.disable_module
disabled_modules = self.split_args_input(disabled_modules)
disabled_modules = {name.strip().lower() for name in disabled_modules}
self.args.disable_module = disabled_modules
logger.debug(f'Disabling the following modules: {", ".join(self.args.disable_module)}')
logger.debug(f"Disabling the following modules: {', '.join(self.args.disable_module)}")
def create_reddit_instance(self):
def create_reddit_instance(self) -> None:
if self.args.authenticate:
logger.debug("Using authenticated Reddit instance")
client_id = self.cfg_parser.get("DEFAULT", "client_id")
client_secret = self.cfg_parser.get("DEFAULT", "client_secret", fallback=None)
if client_secret and client_secret.lower() == "none":
client_secret = None
if not self.cfg_parser.has_option("DEFAULT", "user_token"):
logger.log(9, "Commencing OAuth2 authentication")
scopes = self.cfg_parser.get("DEFAULT", "scopes", fallback="identity, history, read, save")
scopes = OAuth2Authenticator.split_scopes(scopes)
oauth2_authenticator = OAuth2Authenticator(
scopes,
self.cfg_parser.get("DEFAULT", "client_id"),
self.cfg_parser.get("DEFAULT", "client_secret"),
wanted_scopes=scopes,
client_id=client_id,
client_secret=client_secret,
user_agent=self.user_agent,
)
token = oauth2_authenticator.retrieve_new_token()
self.cfg_parser["DEFAULT"]["user_token"] = token
@ -149,18 +155,21 @@ class RedditConnector(metaclass=ABCMeta):
self.authenticated = True
self.reddit_instance = praw.Reddit(
client_id=self.cfg_parser.get("DEFAULT", "client_id"),
client_secret=self.cfg_parser.get("DEFAULT", "client_secret"),
user_agent=socket.gethostname(),
client_id=client_id,
client_secret=client_secret,
user_agent=self.user_agent,
token_manager=token_manager,
)
else:
logger.debug("Using unauthenticated Reddit instance")
self.authenticated = False
client_secret = self.cfg_parser.get("DEFAULT", "client_secret", fallback=None)
if client_secret and client_secret.lower() == "none":
client_secret = None
self.reddit_instance = praw.Reddit(
client_id=self.cfg_parser.get("DEFAULT", "client_id"),
client_secret=self.cfg_parser.get("DEFAULT", "client_secret"),
user_agent=socket.gethostname(),
client_secret=client_secret,
user_agent=self.user_agent,
)
def retrieve_reddit_lists(self) -> list[praw.models.ListingGenerator]:
@ -175,14 +184,14 @@ class RedditConnector(metaclass=ABCMeta):
logger.log(9, "Retrieved submissions for given links")
return master_list
def determine_directories(self):
def determine_directories(self) -> None:
self.download_directory = Path(self.args.directory).resolve().expanduser()
self.config_directory = Path(self.config_directories.user_config_dir)
self.download_directory.mkdir(exist_ok=True, parents=True)
self.config_directory.mkdir(exist_ok=True, parents=True)
def load_config(self):
def load_config(self) -> None:
self.cfg_parser = configparser.ConfigParser()
if self.args.config:
if (cfg_path := Path(self.args.config)).exists():
@ -241,7 +250,7 @@ class RedditConnector(metaclass=ABCMeta):
pattern = re.compile(r"^(?:https://www\.reddit\.com/)?(?:r/)?(.*?)/?$")
match = re.match(pattern, subreddit)
if not match:
raise errors.BulkDownloaderException(f"Could not find subreddit name in string {subreddit}")
raise errors.BulkDownloaderException(f"Could not find subreddit name in string {subreddit!r}")
return match.group(1)
@staticmethod
@ -287,7 +296,7 @@ class RedditConnector(metaclass=ABCMeta):
)
)
logger.debug(
f'Added submissions from subreddit {reddit} with the search term "{self.args.search}"'
f"Added submissions from subreddit {reddit} with the search term {self.args.search!r}"
)
else:
out.append(self.create_filtered_listing_generator(reddit))
@ -303,7 +312,7 @@ class RedditConnector(metaclass=ABCMeta):
logger.log(9, f"Resolved user to {resolved_name}")
return resolved_name
else:
logger.warning('To use "me" as a user, an authenticated Reddit instance must be used')
logger.warning("To use 'me' as a user, an authenticated Reddit instance must be used")
else:
return in_name
@ -348,7 +357,9 @@ class RedditConnector(metaclass=ABCMeta):
else:
return []
def create_filtered_listing_generator(self, reddit_source) -> Iterator:
def create_filtered_listing_generator(
self, reddit_source: Union[praw.models.Subreddit, praw.models.Multireddit, praw.models.Redditor.submissions]
) -> Iterator:
sort_function = self.determine_sort_function()
if self.sort_filter in (RedditTypes.SortType.TOP, RedditTypes.SortType.CONTROVERSIAL):
return sort_function(reddit_source, limit=self.args.limit, time_filter=self.time_filter.value)
@ -356,7 +367,7 @@ class RedditConnector(metaclass=ABCMeta):
return sort_function(reddit_source, limit=self.args.limit)
def get_user_data(self) -> list[Iterator]:
if any([self.args.submitted, self.args.upvoted, self.args.saved]):
if any([self.args.downvoted, self.args.saved, self.args.submitted, self.args.upvoted]):
if not self.args.user:
logger.warning("At least one user must be supplied to download user data")
return []
@ -375,7 +386,7 @@ class RedditConnector(metaclass=ABCMeta):
self.reddit_instance.redditor(user).submissions,
)
)
if not self.authenticated and any((self.args.upvoted, self.args.saved)):
if not self.authenticated and any((self.args.downvoted, self.args.saved, self.args.upvoted)):
logger.warning("Accessing user lists requires authentication")
else:
if self.args.upvoted:
@ -384,6 +395,9 @@ class RedditConnector(metaclass=ABCMeta):
if self.args.saved:
logger.debug(f"Retrieving saved posts of user {user}")
generators.append(self.reddit_instance.redditor(user).saved(limit=self.args.limit))
if self.args.downvoted:
logger.debug(f"Retrieving downvoted posts of user {user}")
generators.append(self.reddit_instance.redditor(user).downvoted(limit=self.args.limit))
except prawcore.PrawcoreException as e:
logger.error(f"User {user} failed to be retrieved due to a PRAW exception: {e}")
logger.debug("Waiting 60 seconds to continue")
@ -392,7 +406,7 @@ class RedditConnector(metaclass=ABCMeta):
else:
return []
def check_user_existence(self, name: str):
def check_user_existence(self, name: str) -> None:
user = self.reddit_instance.redditor(name=name)
try:
if user.id:
@ -427,15 +441,16 @@ class RedditConnector(metaclass=ABCMeta):
return SiteAuthenticator(self.cfg_parser)
@abstractmethod
def download(self):
def download(self) -> None:
pass
@staticmethod
def check_subreddit_status(subreddit: praw.models.Subreddit):
def check_subreddit_status(subreddit: praw.models.Subreddit) -> None:
if subreddit.display_name in ("all", "friends"):
return
try:
assert subreddit.id
if subreddit.id:
return
except prawcore.NotFound:
raise errors.BulkDownloaderException(f"Source {subreddit.display_name} cannot be found")
except prawcore.Redirect:

View file

@ -1,5 +1,4 @@
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
import logging
import re
@ -10,7 +9,7 @@ logger = logging.getLogger(__name__)
class DownloadFilter:
def __init__(self, excluded_extensions: list[str] = None, excluded_domains: list[str] = None):
def __init__(self, excluded_extensions: list[str] = None, excluded_domains: list[str] = None) -> None:
self.excluded_extensions = excluded_extensions
self.excluded_domains = excluded_domains
@ -34,9 +33,9 @@ class DownloadFilter:
if not self.excluded_extensions:
return True
combined_extensions = "|".join(self.excluded_extensions)
pattern = re.compile(r".*({})$".format(combined_extensions))
pattern = re.compile(rf".*({combined_extensions})$")
if re.match(pattern, resource_extension):
logger.log(9, f'Url "{resource_extension}" matched with "{pattern}"')
logger.log(9, f"Url {resource_extension!r} matched with {pattern!r}")
return False
else:
return True
@ -45,9 +44,9 @@ class DownloadFilter:
if not self.excluded_domains:
return True
combined_domains = "|".join(self.excluded_domains)
pattern = re.compile(r"https?://.*({}).*".format(combined_domains))
pattern = re.compile(rf"https?://.*({combined_domains}).*")
if re.match(pattern, url):
logger.log(9, f'Url "{url}" matched with "{pattern}"')
logger.log(9, f"Url {url!r} matched with {pattern!r}")
return False
else:
return True

View file

@ -1,5 +1,4 @@
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
import hashlib
import logging.handlers
@ -24,9 +23,9 @@ from bdfr.site_downloaders.download_factory import DownloadFactory
logger = logging.getLogger(__name__)
def _calc_hash(existing_file: Path):
def _calc_hash(existing_file: Path) -> tuple[Path, str]:
chunk_size = 1024 * 1024
md5_hash = hashlib.md5()
md5_hash = hashlib.md5(usedforsecurity=False)
with existing_file.open("rb") as file:
chunk = file.read(chunk_size)
while chunk:
@ -37,12 +36,12 @@ def _calc_hash(existing_file: Path):
class RedditDownloader(RedditConnector):
def __init__(self, args: Configuration, logging_handlers: Iterable[logging.Handler] = ()):
super(RedditDownloader, self).__init__(args, logging_handlers)
def __init__(self, args: Configuration, logging_handlers: Iterable[logging.Handler] = ()) -> None:
super().__init__(args, logging_handlers)
if self.args.search_existing:
self.master_hash_list = self.scan_existing_files(self.download_directory)
def download(self):
def download(self) -> None:
for generator in self.reddit_lists:
try:
for submission in generator:
@ -55,7 +54,7 @@ class RedditDownloader(RedditConnector):
logger.debug("Waiting 60 seconds to continue")
sleep(60)
def _download_submission(self, submission: praw.models.Submission):
def _download_submission(self, submission: praw.models.Submission) -> None:
if submission.id in self.excluded_submission_ids:
logger.debug(f"Object {submission.id} in exclusion list, skipping")
return
@ -67,7 +66,7 @@ class RedditDownloader(RedditConnector):
):
logger.debug(
f"Submission {submission.id} in {submission.subreddit.display_name} skipped"
f' due to {submission.author.name if submission.author else "DELETED"} being an ignored user'
f" due to {submission.author.name if submission.author else 'DELETED'} being an ignored user"
)
return
elif self.args.min_score and submission.score < self.args.min_score:
@ -124,12 +123,12 @@ class RedditDownloader(RedditConnector):
)
return
resource_hash = res.hash.hexdigest()
destination.parent.mkdir(parents=True, exist_ok=True)
if resource_hash in self.master_hash_list:
if self.args.no_dupes:
logger.info(f"Resource hash {resource_hash} from submission {submission.id} downloaded elsewhere")
return
elif self.args.make_hard_links:
destination.parent.mkdir(parents=True, exist_ok=True)
try:
destination.hardlink_to(self.master_hash_list[resource_hash])
except AttributeError:
@ -139,6 +138,7 @@ class RedditDownloader(RedditConnector):
f" in submission {submission.id}"
)
return
destination.parent.mkdir(parents=True, exist_ok=True)
try:
with destination.open("wb") as file:
file.write(res.content)
@ -156,7 +156,7 @@ class RedditDownloader(RedditConnector):
@staticmethod
def scan_existing_files(directory: Path) -> dict[str, Path]:
files = []
for (dirpath, _dirnames, filenames) in os.walk(directory):
for dirpath, _dirnames, filenames in os.walk(directory):
files.extend([Path(dirpath, file) for file in filenames])
logger.info(f"Calculating hashes for {len(files)} files")

View file

@ -1,5 +1,4 @@
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
class BulkDownloaderException(Exception):

View file

@ -1,5 +1,4 @@
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
import datetime
import logging
@ -36,9 +35,9 @@ class FileNameFormatter:
directory_format_string: str,
time_format_string: str,
restriction_scheme: Optional[str] = None,
):
) -> None:
if not self.validate_string(file_format_string):
raise BulkDownloaderException(f'"{file_format_string}" is not a valid format string')
raise BulkDownloaderException(f"{file_format_string!r} is not a valid format string")
self.file_format_string = file_format_string
self.directory_format_string: list[str] = directory_format_string.split("/")
self.time_format_string = time_format_string
@ -154,6 +153,7 @@ class FileNameFormatter:
max_path_length = max_path - len(ending) - len(str(root)) - 1
out = Path(root, filename + ending)
safe_ending = re.match(r".*\..*", ending)
while any(
[
len(filename) > max_file_part_length_chars,
@ -162,6 +162,8 @@ class FileNameFormatter:
]
):
filename = filename[:-1]
if not safe_ending and filename[-1] != ".":
filename = filename[:-1] + "."
out = Path(root, filename + ending)
return out
@ -169,7 +171,7 @@ class FileNameFormatter:
@staticmethod
def find_max_path_length() -> int:
try:
return int(subprocess.check_output(["getconf", "PATH_MAX", "/"]))
return int(subprocess.check_output(["getconf", "PATH_MAX", "/"])) # noqa: S603, S607
except (ValueError, subprocess.CalledProcessError, OSError):
if platform.system() == "Windows":
return FileNameFormatter.WINDOWS_MAX_PATH_LENGTH

View file

@ -1,5 +1,4 @@
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
import configparser
import logging
@ -17,22 +16,27 @@ logger = logging.getLogger(__name__)
class OAuth2Authenticator:
def __init__(self, wanted_scopes: set[str], client_id: str, client_secret: str):
self._check_scopes(wanted_scopes)
def __init__(self, wanted_scopes: set[str], client_id: str, client_secret: str, user_agent: str) -> None:
self._check_scopes(wanted_scopes, user_agent)
self.scopes = wanted_scopes
self.client_id = client_id
self.client_secret = client_secret
@staticmethod
def _check_scopes(wanted_scopes: set[str]):
response = requests.get(
"https://www.reddit.com/api/v1/scopes.json", headers={"User-Agent": "fetch-scopes test"}
)
def _check_scopes(wanted_scopes: set[str], user_agent: str) -> None:
try:
response = requests.get(
"https://www.reddit.com/api/v1/scopes.json",
headers={"User-Agent": user_agent},
timeout=10,
)
except TimeoutError:
raise BulkDownloaderException("Reached timeout fetching scopes")
known_scopes = [scope for scope, data in response.json().items()]
known_scopes.append("*")
for scope in wanted_scopes:
if scope not in known_scopes:
raise BulkDownloaderException(f"Scope {scope} is not known to reddit")
raise BulkDownloaderException(f"Scope {scope!r} is not known to reddit")
@staticmethod
def split_scopes(scopes: str) -> set[str]:
@ -46,7 +50,7 @@ class OAuth2Authenticator:
client_id=self.client_id,
client_secret=self.client_secret,
)
state = str(random.randint(0, 65000))
state = str(random.randint(0, 65000)) # noqa: S311
url = reddit.auth.url(self.scopes, state, "permanent")
logger.warning("Authentication action required before the program can proceed")
logger.warning(f"Authenticate at {url}")
@ -58,10 +62,10 @@ class OAuth2Authenticator:
if state != params["state"]:
self.send_message(client)
raise RedditAuthenticationError(f'State mismatch in OAuth2. Expected: {state} Received: {params["state"]}')
raise RedditAuthenticationError(f"State mismatch in OAuth2. Expected: {state} Received: {params['state']}")
elif "error" in params:
self.send_message(client)
raise RedditAuthenticationError(f'Error in OAuth2: {params["error"]}')
raise RedditAuthenticationError(f"Error in OAuth2: {params['error']}")
self.send_message(client, "<script>alert('You can go back to terminal window now.')</script>")
refresh_token = reddit.auth.authorize(params["code"])
@ -71,7 +75,7 @@ class OAuth2Authenticator:
def receive_connection() -> socket.socket:
server = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
server.setsockopt(socket.SOL_SOCKET, socket.SO_REUSEADDR, 1)
server.bind(("0.0.0.0", 7634))
server.bind(("0.0.0.0", 7634)) # noqa: S104
logger.log(9, "Server listening on 0.0.0.0:7634")
server.listen(1)
@ -82,18 +86,18 @@ class OAuth2Authenticator:
return client
@staticmethod
def send_message(client: socket.socket, message: str = ""):
client.send(f"HTTP/1.1 200 OK\r\n\r\n{message}".encode("utf-8"))
def send_message(client: socket.socket, message: str = "") -> None:
client.send(f"HTTP/1.1 200 OK\r\n\r\n{message}".encode())
client.close()
class OAuth2TokenManager(praw.reddit.BaseTokenManager):
def __init__(self, config: configparser.ConfigParser, config_location: Path):
super(OAuth2TokenManager, self).__init__()
def __init__(self, config: configparser.ConfigParser, config_location: Path) -> None:
super().__init__()
self.config = config
self.config_location = config_location
def pre_refresh_callback(self, authorizer: praw.reddit.Authorizer):
def pre_refresh_callback(self, authorizer: praw.reddit.Authorizer) -> None:
if authorizer.refresh_token is None:
if self.config.has_option("DEFAULT", "user_token"):
authorizer.refresh_token = self.config.get("DEFAULT", "user_token")
@ -101,7 +105,7 @@ class OAuth2TokenManager(praw.reddit.BaseTokenManager):
else:
raise RedditAuthenticationError("No auth token loaded in configuration")
def post_refresh_callback(self, authorizer: praw.reddit.Authorizer):
def post_refresh_callback(self, authorizer: praw.reddit.Authorizer) -> None:
self.config.set("DEFAULT", "user_token", authorizer.refresh_token)
with Path(self.config_location).open(mode="w") as file:
self.config.write(file, True)

View file

@ -1,5 +1,4 @@
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
import hashlib
import logging
@ -19,7 +18,9 @@ logger = logging.getLogger(__name__)
class Resource:
def __init__(self, source_submission: Submission, url: str, download_function: Callable, extension: str = None):
def __init__(
self, source_submission: Submission, url: str, download_function: Callable, extension: str = None
) -> None:
self.source_submission = source_submission
self.content: Optional[bytes] = None
self.url = url
@ -33,7 +34,7 @@ class Resource:
def retry_download(url: str) -> Callable:
return lambda global_params: Resource.http_download(url, global_params)
def download(self, download_parameters: Optional[dict] = None):
def download(self, download_parameters: Optional[dict] = None) -> None:
if download_parameters is None:
download_parameters = {}
if not self.content:
@ -48,8 +49,8 @@ class Resource:
if not self.hash and self.content:
self.create_hash()
def create_hash(self):
self.hash = hashlib.md5(self.content)
def create_hash(self) -> None:
self.hash = hashlib.md5(self.content, usedforsecurity=False)
def _determine_extension(self) -> Optional[str]:
extension_pattern = re.compile(r".*(\..{3,5})$")
@ -68,7 +69,7 @@ class Resource:
max_wait_time = 300
while True:
try:
response = requests.get(url, headers=headers)
response = requests.get(url, headers=headers, timeout=10)
if re.match(r"^2\d{2}", str(response.status_code)) and response.content:
return response.content
elif response.status_code in (408, 429):

View file

@ -1,9 +1,8 @@
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
import configparser
class SiteAuthenticator:
def __init__(self, cfg: configparser.ConfigParser):
def __init__(self, cfg: configparser.ConfigParser) -> None:
self.imgur_authentication = None

View file

@ -1,2 +1 @@
#!/usr/bin/env python3
# -*- coding: utf-8 -*-

View file

@ -1,5 +1,4 @@
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
import logging
from abc import ABC, abstractmethod
@ -16,7 +15,7 @@ logger = logging.getLogger(__name__)
class BaseDownloader(ABC):
def __init__(self, post: Submission, typical_extension: Optional[str] = None):
def __init__(self, post: Submission, typical_extension: Optional[str] = None) -> None:
self.post = post
self.typical_extension = typical_extension
@ -28,10 +27,39 @@ class BaseDownloader(ABC):
@staticmethod
def retrieve_url(url: str, cookies: dict = None, headers: dict = None) -> requests.Response:
try:
res = requests.get(url, cookies=cookies, headers=headers)
res = requests.get(url, cookies=cookies, headers=headers, timeout=10)
except requests.exceptions.RequestException as e:
logger.exception(e)
raise SiteDownloaderError(f"Failed to get page {url}")
except TimeoutError as e:
logger.exception(e)
raise SiteDownloaderError(f"Timeout reached attempting to get page {url}")
if res.status_code != 200:
raise ResourceNotFound(f"Server responded with {res.status_code} at {url}")
return res
@staticmethod
def post_url(url: str, cookies: dict = None, headers: dict = None, payload: dict = None) -> requests.Response:
try:
res = requests.post(url, cookies=cookies, headers=headers, json=payload, timeout=10)
except requests.exceptions.RequestException as e:
logger.exception(e)
raise SiteDownloaderError(f"Failed to post to {url}")
except TimeoutError as e:
logger.exception(e)
raise SiteDownloaderError(f"Timeout reached attempting to post to page {url}")
if res.status_code != 200:
raise ResourceNotFound(f"Server responded with {res.status_code} to {url}")
return res
@staticmethod
def head_url(url: str, cookies: dict = None, headers: dict = None) -> requests.Response:
try:
res = requests.head(url, cookies=cookies, headers=headers, timeout=10)
except requests.exceptions.RequestException as e:
logger.exception(e)
raise SiteDownloaderError(f"Failed to check head at {url}")
except TimeoutError as e:
logger.exception(e)
raise SiteDownloaderError(f"Timeout reached attempting to check head at {url}")
return res

View file

@ -0,0 +1,39 @@
import logging
from itertools import chain
from typing import Optional
import bs4
from praw.models import Submission
from bdfr.exceptions import SiteDownloaderError
from bdfr.resource import Resource
from bdfr.site_authenticator import SiteAuthenticator
from bdfr.site_downloaders.base_downloader import BaseDownloader
logger = logging.getLogger(__name__)
class Catbox(BaseDownloader):
def __init__(self, post: Submission) -> None:
super().__init__(post)
def find_resources(self, authenticator: Optional[SiteAuthenticator] = None) -> list[Resource]:
links = self.get_links(self.post.url)
if not links:
raise SiteDownloaderError("Catbox parser could not find any links")
links = [Resource(self.post, link, Resource.retry_download(link)) for link in links]
return links
@staticmethod
def get_links(url: str) -> set[str]:
content = Catbox.retrieve_url(url)
soup = bs4.BeautifulSoup(content.text, "html.parser")
collection_div = soup.find("div", attrs={"class": "imagecontainer"})
images = collection_div.find_all("a")
images = [link.get("href") for link in images]
videos = collection_div.find_all("video")
videos = [link.get("src") for link in videos]
audios = collection_div.find_all("audio")
audios = [link.get("src") for link in audios]
resources = chain(images, videos, audios)
return set(resources)

View file

@ -1,5 +1,4 @@
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
import logging
from typing import Optional
@ -14,7 +13,7 @@ logger = logging.getLogger(__name__)
class DelayForReddit(BaseDownloader):
def __init__(self, post: Submission):
def __init__(self, post: Submission) -> None:
super().__init__(post)
def find_resources(self, authenticator: Optional[SiteAuthenticator] = None) -> list[Resource]:

View file

@ -1,5 +1,4 @@
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
from typing import Optional
@ -11,7 +10,7 @@ from bdfr.site_downloaders.base_downloader import BaseDownloader
class Direct(BaseDownloader):
def __init__(self, post: Submission):
def __init__(self, post: Submission) -> None:
super().__init__(post)
def find_resources(self, authenticator: Optional[SiteAuthenticator] = None) -> list[Resource]:

View file

@ -1,17 +1,19 @@
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
import re
import urllib.parse
from bdfr.exceptions import NotADownloadableLinkError
from bdfr.site_downloaders.base_downloader import BaseDownloader
from bdfr.site_downloaders.catbox import Catbox
from bdfr.site_downloaders.delay_for_reddit import DelayForReddit
from bdfr.site_downloaders.direct import Direct
from bdfr.site_downloaders.erome import Erome
from bdfr.site_downloaders.fallback_downloaders.ytdlp_fallback import YtdlpFallback
from bdfr.site_downloaders.flickr import Flickr
from bdfr.site_downloaders.gallery import Gallery
from bdfr.site_downloaders.gfycat import Gfycat
from bdfr.site_downloaders.imgchest import Imgchest
from bdfr.site_downloaders.imgur import Imgur
from bdfr.site_downloaders.pornhub import PornHub
from bdfr.site_downloaders.redgifs import Redgifs
@ -27,7 +29,7 @@ class DownloadFactory:
sanitised_url = DownloadFactory.sanitise_url(url).lower()
if re.match(r"(i\.|m\.|o\.)?imgur", sanitised_url):
return Imgur
elif re.match(r"(i\.|thumbs\d\.|v\d\.)?(redgifs|gifdeliverynetwork)", sanitised_url):
elif re.match(r"(i\.|thumbs\d{1,2}\.|v\d\.)?(redgifs|gifdeliverynetwork)", sanitised_url):
return Redgifs
elif re.match(r"(thumbs\.|giant\.)?gfycat\.", sanitised_url):
return Gfycat
@ -37,12 +39,18 @@ class DownloadFactory:
return Direct
elif re.match(r"erome\.com.*", sanitised_url):
return Erome
elif re.match(r"catbox\.moe", sanitised_url):
return Catbox
elif re.match(r"delayforreddit\.com", sanitised_url):
return DelayForReddit
elif re.match(r"flickr\.com", sanitised_url) or re.match(r"flic\.kr", sanitised_url):
return Flickr
elif re.match(r"reddit\.com/gallery/.*", sanitised_url):
return Gallery
elif re.match(r"patreon\.com.*", sanitised_url):
return Gallery
elif re.match(r"imgchest\.com/p/", sanitised_url):
return Imgchest
elif re.match(r"reddit\.com/r/", sanitised_url):
return SelfPost
elif re.match(r"(m\.)?youtu\.?be", sanitised_url):
@ -83,7 +91,7 @@ class DownloadFactory:
"php3",
"xhtml",
)
if re.match(rf'(?i).*/.*\.({"|".join(web_extensions)})$', url):
if re.match(rf"(?i).*/.*\.({'|'.join(web_extensions)})$", url):
return True
else:
return False

View file

@ -1,5 +1,4 @@
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
import logging
import re
@ -18,7 +17,7 @@ logger = logging.getLogger(__name__)
class Erome(BaseDownloader):
def __init__(self, post: Submission):
def __init__(self, post: Submission) -> None:
super().__init__(post)
def find_resources(self, authenticator: Optional[SiteAuthenticator] = None) -> list[Resource]:
@ -38,7 +37,7 @@ class Erome(BaseDownloader):
def _get_links(url: str) -> set[str]:
page = Erome.retrieve_url(url)
soup = bs4.BeautifulSoup(page.text, "html.parser")
front_images = soup.find_all("img", attrs={"class": "lasyload"})
front_images = soup.find_all("img", attrs={"class": "img-front"})
out = [im.get("data-src") for im in front_images]
videos = soup.find_all("source")

View file

@ -1,2 +1 @@
#!/usr/bin/env python3
# -*- coding: utf-8 -*-

View file

@ -1,5 +1,4 @@
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
from abc import ABC, abstractmethod

View file

@ -1,5 +1,4 @@
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
import logging
from typing import Optional
@ -16,8 +15,8 @@ logger = logging.getLogger(__name__)
class YtdlpFallback(BaseFallbackDownloader, Youtube):
def __init__(self, post: Submission):
super(YtdlpFallback, self).__init__(post)
def __init__(self, post: Submission) -> None:
super().__init__(post)
def find_resources(self, authenticator: Optional[SiteAuthenticator] = None) -> list[Resource]:
out = Resource(

View file

@ -0,0 +1,129 @@
import json
import re
from typing import Optional
from bs4 import BeautifulSoup
from cachetools import TTLCache, cached
from praw.models import Submission
from bdfr.exceptions import ResourceNotFound, SiteDownloaderError
from bdfr.resource import Resource
from bdfr.site_authenticator import SiteAuthenticator
from bdfr.site_downloaders.base_downloader import BaseDownloader
class Flickr(BaseDownloader):
def __init__(self, post: Submission) -> None:
super().__init__(post)
self.raw_data = {}
def find_resources(self, authenticator: Optional[SiteAuthenticator] = None) -> list[Resource]:
links = self._get_data(self.post.url)
if not links:
raise SiteDownloaderError("Flickr could not find any images to download")
return [Resource(self.post, link, Resource.retry_download(link)) for link in links]
@staticmethod
@cached(cache=TTLCache(maxsize=5, ttl=10260))
def _get_api_key() -> str:
key_regex = re.compile(r".*api_key=(\w*)(&.*)?")
res = Flickr.retrieve_url("https://www.flickr.com/services/api/response.json.html").text
elements = BeautifulSoup(res, "html.parser")
links = elements.find_all("a", href=True, string="here")
return key_regex.search(str(links[0])).group(1)
@staticmethod
def _get_ids(link: str) -> str:
flickr_regex = re.compile(r".*/photos/(?P<user>\d*@\D\d*|\w*)/(?:albums/(?P<album>\d*)|(?P<photo>\d*))")
try:
flickr_id = flickr_regex.search(link).group("photo")
if not flickr_id:
flickr_id = flickr_regex.search(link).group("album")
user = flickr_regex.search(link).group("user")
except AttributeError:
raise SiteDownloaderError(f"Could not extract Flickr ID from {link}")
return user, flickr_id
@staticmethod
def _construct_direct_link(image_dict: json) -> str:
image_id = image_dict["photo"]["id"]
secret = image_dict["photo"]["secret"]
server = image_dict["photo"]["server"]
user = image_dict["photo"]["owner"]["nsid"]
originalsecret = None
if "originalsecret" in image_dict["photo"]:
originalsecret = image_dict["photo"]["originalsecret"]
if "originalformat" in image_dict["photo"]:
originalformat = image_dict["photo"]["originalformat"]
if image_dict["photo"]["media"] == "video":
if originalsecret:
return Flickr.retrieve_url(
f"https://flickr.com/photos/{user}/{image_id}/play/orig/{originalsecret}/",
).url
try:
return Flickr.retrieve_url(f"https://flickr.com/photos/{user}/{image_id}/play/1080p/{secret}/").url
except ResourceNotFound:
try:
return Flickr.retrieve_url(f"https://flickr.com/photos/{user}/{image_id}/play/720p/{secret}/").url
except ResourceNotFound:
try:
return Flickr.retrieve_url(
f"https://flickr.com/photos/{user}/{image_id}/play/360p/{secret}/",
).url
except ResourceNotFound:
raise SiteDownloaderError("Could not find correct video from Flickr")
if originalsecret:
return f"https://live.staticflickr.com/{server}/{image_id}_{originalsecret}_o.{originalformat}"
return f"https://live.staticflickr.com/{server}/{image_id}_{secret}_b.jpg"
@staticmethod
def _get_album_links(album_dict: json, api_string: str) -> list:
out = []
for photo in album_dict["photoset"]["photo"]:
res = Flickr.retrieve_url(f"{api_string}method=flickr.photos.getInfo&photo_id={photo['id']}")
image_dict = json.loads(res.text)
out.append(Flickr._construct_direct_link(image_dict))
return out
@staticmethod
def _get_user_id(user: str, api_string: str) -> str:
try:
res = Flickr.retrieve_url(
f"{api_string}method=flickr.urls.lookupUser&url=https://flickr.com/photos/{user}",
).text
return json.loads(res)["user"]["id"]
except json.JSONDecodeError as e:
raise SiteDownloaderError(f"Could not parse flickr user ID from API: {e}")
@staticmethod
def _expand_link(link: str) -> str:
return Flickr.retrieve_url(link).url
@staticmethod
def _get_data(link: str) -> list:
if ("/gp/" in link) or ("flic.kr" in link):
link = Flickr._expand_link(link)
user, flickr_id = Flickr._get_ids(link)
api_key = Flickr._get_api_key()
api_string = f"https://www.flickr.com/services/rest/?api_key={api_key}&format=json&nojsoncallback=1&"
album = False
if "/albums/" in link:
if "@" not in user:
user = Flickr._get_user_id(user, api_string)
api = f"{api_string}method=flickr.photosets.getPhotos&photoset_id={flickr_id}&user_id={user}"
album = True
else:
api = f"{api_string}method=flickr.photos.getInfo&photo_id={flickr_id}"
res = Flickr.retrieve_url(api)
try:
image_dict = json.loads(res.text)
except json.JSONDecodeError as e:
raise SiteDownloaderError(f"Could not parse received response as JSON: {e}")
image_dict = (
Flickr._get_album_links(image_dict, api_string) if album else [Flickr._construct_direct_link(image_dict)]
)
return image_dict

View file

@ -1,10 +1,8 @@
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
import logging
from typing import Optional
import requests
from praw.models import Submission
from bdfr.exceptions import SiteDownloaderError
@ -16,7 +14,7 @@ logger = logging.getLogger(__name__)
class Gallery(BaseDownloader):
def __init__(self, post: Submission):
def __init__(self, post: Submission) -> None:
super().__init__(post)
def find_resources(self, authenticator: Optional[SiteAuthenticator] = None) -> list[Resource]:
@ -42,8 +40,7 @@ class Gallery(BaseDownloader):
possible_extensions = (".jpg", ".png", ".gif", ".gifv", ".jpeg")
for extension in possible_extensions:
test_url = f"https://i.redd.it/{image_id}{extension}"
response = requests.head(test_url)
if response.status_code == 200:
if Gallery.head_url(test_url).status_code == 200:
out.append(test_url)
break
return out

View file

@ -1,11 +1,10 @@
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
import json
import re
from typing import Optional
from bs4 import BeautifulSoup
from cachetools import TTLCache, cached
from praw.models import Submission
from bdfr.exceptions import SiteDownloaderError
@ -15,12 +14,26 @@ from bdfr.site_downloaders.redgifs import Redgifs
class Gfycat(Redgifs):
def __init__(self, post: Submission):
def __init__(self, post: Submission) -> None:
super().__init__(post)
def find_resources(self, authenticator: Optional[SiteAuthenticator] = None) -> list[Resource]:
return super().find_resources(authenticator)
@staticmethod
@cached(cache=TTLCache(maxsize=5, ttl=3420))
def _get_auth_token() -> str:
headers = {
"content-type": "text/plain;charset=UTF-8",
"host": "weblogin.gfycat.com",
"origin": "https://gfycat.com",
}
payload = {"access_key": "Anr96uuqt9EdamSCwK4txKPjMsf2M95Rfa5FLLhPFucu8H5HTzeutyAa"}
token = json.loads(
Gfycat.post_url("https://weblogin.gfycat.com/oauth/webtoken", headers=headers, payload=payload).text
)["access_token"]
return token
@staticmethod
def _get_link(url: str) -> set[str]:
gfycat_id = re.match(r".*/(.*?)(?:/?|-.*|\..{3-4})$", url).group(1)
@ -28,18 +41,33 @@ class Gfycat(Redgifs):
response = Gfycat.retrieve_url(url)
if re.search(r"(redgifs|gifdeliverynetwork)", response.url):
url = url.lower() # Fixes error with old gfycat/redgifs links
url = url.lower()
return Redgifs._get_link(url)
soup = BeautifulSoup(response.text, "html.parser")
content = soup.find("script", attrs={"data-react-helmet": "true", "type": "application/ld+json"})
auth_token = Gfycat._get_auth_token()
if not auth_token:
raise SiteDownloaderError("Unable to retrieve Gfycat API token")
headers = {
"referer": "https://gfycat.com/",
"origin": "https://gfycat.com",
"content-type": "application/json",
"Authorization": f"Bearer {auth_token}",
}
content = Gfycat.retrieve_url(f"https://api.gfycat.com/v1/gfycats/{gfycat_id}", headers=headers)
if content is None:
raise SiteDownloaderError("Could not read the API source")
try:
out = json.loads(content.contents[0])["video"]["contentUrl"]
response_json = json.loads(content.text)
except json.JSONDecodeError as e:
raise SiteDownloaderError(f"Received data was not valid JSON: {e}")
try:
out = response_json["gfyItem"]["mp4Url"]
except (IndexError, KeyError, AttributeError) as e:
raise SiteDownloaderError(f"Failed to download Gfycat link {url}: {e}")
except json.JSONDecodeError as e:
raise SiteDownloaderError(f"Did not receive valid JSON data: {e}")
return {
out,
}

View file

@ -0,0 +1,35 @@
import logging
from typing import Optional
import bs4
from praw.models import Submission
from bdfr.exceptions import SiteDownloaderError
from bdfr.resource import Resource
from bdfr.site_authenticator import SiteAuthenticator
from bdfr.site_downloaders.base_downloader import BaseDownloader
logger = logging.getLogger(__name__)
class Imgchest(BaseDownloader):
def __init__(self, post: Submission) -> None:
super().__init__(post)
def find_resources(self, authenticator: Optional[SiteAuthenticator] = None) -> list[Resource]:
links = self._get_links(self.post.url)
if not links:
raise SiteDownloaderError("Imgchest parser could not find any links")
links = [Resource(self.post, link, Resource.retry_download(link)) for link in links]
return links
@staticmethod
def _get_links(url: str) -> set[str]:
page = Imgchest.retrieve_url(url)
soup = bs4.BeautifulSoup(page.text, "html.parser")
album_div = soup.find("div", attrs={"id": "post-images"})
images = album_div.find_all("img")
out = [im.get("src") for im in images]
videos = album_div.find_all("source")
out.extend([vid.get("src") for vid in videos])
return set(out)

View file

@ -1,5 +1,4 @@
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
import json
import re
@ -14,7 +13,7 @@ from bdfr.site_downloaders.base_downloader import BaseDownloader
class Imgur(BaseDownloader):
def __init__(self, post: Submission):
def __init__(self, post: Submission) -> None:
super().__init__(post)
self.raw_data = {}
@ -36,18 +35,20 @@ class Imgur(BaseDownloader):
return out
@staticmethod
def _get_data(link: str) -> dict:
def _get_id(link: str) -> str:
try:
if link.endswith("/"):
link = link.removesuffix("/")
if re.search(r".*/(.*?)(gallery/|a/)", link):
imgur_id = re.match(r".*/(?:gallery/|a/)(.*?)(?:/.*)?$", link).group(1)
link = f"https://api.imgur.com/3/album/{imgur_id}"
else:
imgur_id = re.match(r".*/(.*?)(?:_d)?(?:\..{0,})?$", link).group(1)
link = f"https://api.imgur.com/3/image/{imgur_id}"
imgur_id = re.search(r"imgur\.com/(?:a/|gallery/)?([a-zA-Z0-9]+)", link).group(1)
except AttributeError:
raise SiteDownloaderError(f"Could not extract Imgur ID from {link}")
return imgur_id
@staticmethod
def _get_data(link: str) -> dict:
imgur_id = Imgur._get_id(link)
if re.search(r"/(gallery|a)/", link):
api = f"https://api.imgur.com/3/album/{imgur_id}"
else:
api = f"https://api.imgur.com/3/image/{imgur_id}"
headers = {
"referer": "https://imgur.com/",
@ -55,7 +56,7 @@ class Imgur(BaseDownloader):
"content-type": "application/json",
"Authorization": "Client-ID 546c25a59c58ad7",
}
res = Imgur.retrieve_url(link, headers=headers)
res = Imgur.retrieve_url(api, headers=headers)
try:
image_dict = json.loads(res.text)

View file

@ -1,5 +1,4 @@
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
import logging
from typing import Optional
@ -15,7 +14,7 @@ logger = logging.getLogger(__name__)
class PornHub(Youtube):
def __init__(self, post: Submission):
def __init__(self, post: Submission) -> None:
super().__init__(post)
def find_resources(self, authenticator: Optional[SiteAuthenticator] = None) -> list[Resource]:

View file

@ -1,11 +1,10 @@
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
import json
import re
from typing import Optional
import requests
from cachetools import TTLCache, cached
from praw.models import Submission
from bdfr.exceptions import SiteDownloaderError
@ -15,21 +14,26 @@ from bdfr.site_downloaders.base_downloader import BaseDownloader
class Redgifs(BaseDownloader):
def __init__(self, post: Submission):
def __init__(self, post: Submission) -> None:
super().__init__(post)
def find_resources(self, authenticator: Optional[SiteAuthenticator] = None) -> list[Resource]:
media_urls = self._get_link(self.post.url)
return [Resource(self.post, m, Resource.retry_download(m), None) for m in media_urls]
@staticmethod
@cached(cache=TTLCache(maxsize=5, ttl=82080))
def _get_auth_token() -> str:
token = json.loads(Redgifs.retrieve_url("https://api.redgifs.com/v2/auth/temporary").text)["token"]
return token
@staticmethod
def _get_id(url: str) -> str:
try:
if url.endswith("/"):
url = url.removesuffix("/")
redgif_id = re.match(r".*/(.*?)(?:#.*|\?.*|\..{0,})?$", url).group(1).lower()
if redgif_id.endswith("-mobile"):
redgif_id = redgif_id.removesuffix("-mobile")
redgif_id = re.sub(r"(-.*)$", "", redgif_id)
except AttributeError:
raise SiteDownloaderError(f"Could not extract Redgifs ID from {url}")
return redgif_id
@ -38,7 +42,7 @@ class Redgifs(BaseDownloader):
def _get_link(url: str) -> set[str]:
redgif_id = Redgifs._get_id(url)
auth_token = json.loads(Redgifs.retrieve_url("https://api.redgifs.com/v2/auth/temporary").text)["token"]
auth_token = Redgifs._get_auth_token()
if not auth_token:
raise SiteDownloaderError("Unable to retrieve Redgifs API token")
@ -48,7 +52,6 @@ class Redgifs(BaseDownloader):
"content-type": "application/json",
"Authorization": f"Bearer {auth_token}",
}
content = Redgifs.retrieve_url(f"https://api.redgifs.com/v2/gifs/{redgif_id}", headers=headers)
if content is None:
@ -62,15 +65,13 @@ class Redgifs(BaseDownloader):
out = set()
try:
if response_json["gif"]["type"] == 1: # type 1 is a video
if requests.get(response_json["gif"]["urls"]["hd"], headers=headers).ok:
if Redgifs.head_url(response_json["gif"]["urls"]["hd"], headers=headers).status_code == 200:
out.add(response_json["gif"]["urls"]["hd"])
else:
out.add(response_json["gif"]["urls"]["sd"])
elif response_json["gif"]["type"] == 2: # type 2 is an image
if response_json["gif"]["gallery"]:
content = Redgifs.retrieve_url(
f'https://api.redgifs.com/v2/gallery/{response_json["gif"]["gallery"]}'
)
if gallery := response_json["gif"]["gallery"]:
content = Redgifs.retrieve_url(f"https://api.redgifs.com/v2/gallery/{gallery}")
response_json = json.loads(content.text)
out = {p["urls"]["hd"] for p in response_json["gifs"]}
else:
@ -80,7 +81,4 @@ class Redgifs(BaseDownloader):
except (KeyError, AttributeError):
raise SiteDownloaderError("Failed to find JSON data in page")
# Update subdomain if old one is returned
out = {re.sub("thumbs2", "thumbs3", link) for link in out}
out = {re.sub("thumbs3", "thumbs4", link) for link in out}
return out

View file

@ -1,5 +1,4 @@
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
import logging
from typing import Optional
@ -14,7 +13,7 @@ logger = logging.getLogger(__name__)
class SelfPost(BaseDownloader):
def __init__(self, post: Submission):
def __init__(self, post: Submission) -> None:
super().__init__(post)
def find_resources(self, authenticator: Optional[SiteAuthenticator] = None) -> list[Resource]:

View file

@ -1,5 +1,4 @@
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
import itertools
import logging
@ -7,7 +6,6 @@ import re
from typing import Optional
import bs4
import requests
from praw.models import Submission
from bdfr.exceptions import SiteDownloaderError
@ -19,7 +17,7 @@ logger = logging.getLogger(__name__)
class Vidble(BaseDownloader):
def __init__(self, post: Submission):
def __init__(self, post: Submission) -> None:
super().__init__(post)
def find_resources(self, authenticator: Optional[SiteAuthenticator] = None) -> list[Resource]:
@ -37,7 +35,7 @@ class Vidble(BaseDownloader):
if not re.search(r"vidble.com/(show/|album/|watch\?v)", url):
url = re.sub(r"/(\w*?)$", r"/show/\1", url)
page = requests.get(url)
page = Vidble.retrieve_url(url)
soup = bs4.BeautifulSoup(page.text, "html.parser")
content_div = soup.find("div", attrs={"id": "ContentPlaceHolder1_divContent"})
images = content_div.find_all("img")

View file

@ -1,5 +1,4 @@
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
import logging
from typing import Optional
@ -15,7 +14,7 @@ logger = logging.getLogger(__name__)
class VReddit(Youtube):
def __init__(self, post: Submission):
def __init__(self, post: Submission) -> None:
super().__init__(post)
def find_resources(self, authenticator: Optional[SiteAuthenticator] = None) -> list[Resource]:

View file

@ -1,5 +1,4 @@
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
import logging
import tempfile
@ -19,7 +18,7 @@ logger = logging.getLogger(__name__)
class Youtube(BaseDownloader):
def __init__(self, post: Submission):
def __init__(self, post: Submission) -> None:
super().__init__(post)
def find_resources(self, authenticator: Optional[SiteAuthenticator] = None) -> list[Resource]:

View file

@ -1,5 +1,5 @@
if (-not ([string]::IsNullOrEmpty($env:REDDIT_TOKEN)))
{
Copy-Item .\\bdfr\\default_config.cfg .\\test_config.cfg
Write-Output "`nuser_token = $env:REDDIT_TOKEN" >> ./test_config.cfg
Copy-Item .\\bdfr\\default_config.cfg .\\tests\\test_config.cfg
Write-Output "`nuser_token = $env:REDDIT_TOKEN" >> ./tests/test_config.cfg
}

View file

@ -2,6 +2,6 @@
if [ -n "$REDDIT_TOKEN" ]
then
cp ./bdfr/default_config.cfg ./test_config.cfg
echo -e "\nuser_token = $REDDIT_TOKEN" >> ./test_config.cfg
fi
cp ./bdfr/default_config.cfg ./tests/test_config.cfg
echo -e "\nuser_token = $REDDIT_TOKEN" >> ./tests/test_config.cfg
fi

View file

@ -10,12 +10,16 @@ All communication on GitHub, Discord, email, or any other medium must conform to
**Before opening a new issue**, be sure that no issues regarding your problem already exist. If a similar issue exists, try to contribute to the issue.
**If you are asking a question** about the functioning of the BDFR or the interface, please use the discussions page. Bug reports are not the right medium for asking and answering questions, and the discussions page makes it much easier to discuss, answer, and save questions and responses for others going forwards.
### Bugs
When opening an issue about a bug, **please provide the full log file for the run in which the bug occurred**. This log file is named `log_output.txt` in the configuration folder. Check the [README](../README.md) for information on where this is. This log file will contain all the information required for the developers to recreate the bug.
If you do not have or cannot find the log file, then at minimum please provide the **Reddit ID for the submission** or comment which caused the issue. Also copy in the command that you used to run the BDFR from the command line, as that will also provide helpful information when trying to find and fix the bug. If needed, more information will be asked in the thread of the bug.
Adding this information is **not optional**. If a bug report is opened without this information, it cannot be replicated by developers. The logs will be asked for once and if they are not supplied, the issue will be closed due to lack of information.
### Feature requests
In the case of requesting a feature or an enhancement, there are fewer requirements. However, please be clear in what you would like the BDFR to do and also how the feature/enhancement would be used or would be useful to more people. It is crucial that the feature is justified. Any feature request without a concrete reason for it to be implemented has a very small chance to get accepted. Be aware that proposed enhancements may be rejected for multiple reasons, or no reason, at the discretion of the developers.
@ -69,13 +73,12 @@ python3 -m pip install -e .[dev]
The BDFR project uses several tools to manage the code of the project. These include:
- [black](https://github.com/psf/black)
- [flake8](https://github.com/john-hen/Flake8-pyproject)
- [isort](https://github.com/PyCQA/isort)
- [markdownlint (mdl)](https://github.com/markdownlint/markdownlint)
- [ruff](https://github.com/charliermarsh/ruff)
- [tox](https://tox.wiki/en/latest/)
- [pre-commit](https://github.com/pre-commit/pre-commit)
The first four tools are formatters. These change the code to the standards expected for the BDFR project. The configuration details for these tools are contained in the [pyproject.toml](../pyproject.toml) file for the project.
The first three tools are formatters. These change the code to the standards expected for the BDFR project. The configuration details for these tools are contained in the [pyproject.toml](../pyproject.toml) file for the project.
The tool `tox` is used to run tests and tools on demand and has the following environments:

View file

@ -25,27 +25,27 @@ classifiers = [
dependencies = [
"appdirs>=1.4.4",
"beautifulsoup4>=4.10.0",
"cachetools>=5.3.0",
"click>=8.0.0",
"dict2xml>=1.7.0",
"praw>=7.2.0",
"pyyaml>=5.4.1",
"requests>=2.25.1",
"yt-dlp>=2022.11.11",
"requests>=2.28.2",
"yt-dlp>=2023.2.17",
]
dynamic = ["version"]
[tool.setuptools]
dynamic = {"version" = {attr = 'bdfr.__version__'}}
dynamic = {"version" = {attr = "bdfr.__version__"}}
packages = ["bdfr", "bdfr.archive_entry", "bdfr.site_downloaders", "bdfr.site_downloaders.fallback_downloaders",]
data-files = {"config" = ["bdfr/default_config.cfg",]}
[project.optional-dependencies]
dev = [
"black>=22.12.0",
"Flake8-pyproject>=1.2.2",
"isort>=5.11.4",
"pre-commit>=2.20.0",
"pytest>=7.1.0",
"black>=23.3.0",
"pre-commit>=3.0.4",
"pytest>=7.2.1",
"ruff>=0.0.272",
"tox>=3.27.1",
]
@ -64,7 +64,7 @@ bdfr-download = "bdfr.__main__:cli_download"
line-length = 120
[tool.flake8]
exclude = ["scripts"]
exclude = ["scripts/tests"]
max-line-length = 120
show-source = true
statistics = true
@ -86,3 +86,17 @@ markers = [
"slow: test is slow to run",
"authenticated: test requires an authenticated Reddit instance",
]
[tool.ruff]
exclude = ["scripts/tests"]
external = ["FURB123"]
flake8-annotations = {"allow-star-arg-any" = true, "suppress-dummy-args" = true}
flake8-pytest-style = {"parametrize-values-type" = "tuple", "mark-parentheses" = false}
format = "grouped"
ignore = ["ANN101","B904","N818","RET505"]
line-length = 120
per-file-ignores={"tests/*"=["ANN","S101","S105","S106"], "scripts/*"=["INP","S105","S106"]}
select = ["ANN","B","BLE","E","ERA","F","I","ICN","INP","ISC","N","PT","PTH","Q","RUF","S","TID","UP","W","YTT"]
show-fixes = true
show-source = true
target-version = "py39"

View file

@ -2,11 +2,11 @@
Due to the verboseness of the logs, a great deal of information can be gathered quite easily from the BDFR's logfiles. In this folder, there is a selection of scripts that parse these logs, scraping useful bits of information. Since the logfiles are recurring patterns of strings, it is a fairly simple matter to write scripts that utilise tools included on most Linux systems.
- [Script to extract all successfully downloaded IDs](#extract-all-successfully-downloaded-ids)
- [Script to extract all failed download IDs](#extract-all-failed-ids)
- [Timestamp conversion](#converting-bdfrv1-timestamps-to-bdfrv2-timestamps)
- [Printing summary statistics for a run](#printing-summary-statistics)
- [Unsaving posts from your account after downloading](#unsave-posts-after-downloading)
- [Script to extract all successfully downloaded IDs](#extract-all-successfully-downloaded-ids)
- [Script to extract all failed download IDs](#extract-all-failed-ids)
- [Timestamp conversion](#converting-bdfrv1-timestamps-to-bdfrv2-timestamps)
- [Printing summary statistics for a run](#printing-summary-statistics)
- [Unsaving posts from your account after downloading](#unsave-posts-after-downloading)
## Extract all Successfully Downloaded IDs
@ -15,7 +15,7 @@ This script is contained [here](extract_successful_ids.sh) and will result in a
The script can be used with the following signature:
```bash
./extract_successful_ids.sh LOGFILE_LOCATION <OUTPUT_FILE>
./extract_successful_ids.sh LOGFILE_LOCATION >> <OUTPUT_FILE>
```
By default, if the second argument is not supplied, the script will write the results to `successful.txt`.
@ -32,7 +32,7 @@ An example of the script being run on a Linux machine is the following:
The script can be used with the following signature:
```bash
./extract_failed_ids.sh LOGFILE_LOCATION <OUTPUT_FILE>
./extract_failed_ids.sh LOGFILE_LOCATION >> <OUTPUT_FILE>
```
By default, if the second argument is not supplied, the script will write the results to `failed.txt`.
@ -72,19 +72,20 @@ Submissions from excluded subreddits: 0
## Unsave Posts After Downloading
[This script](unsaveposts.py) takes a list of submission IDs from a file named `successfulids` created with the `extract_successful_ids.sh` script and unsaves them from your account. To make it work you will need to make a user script in your reddit profile like this:
- Fill in the username and password fields in the script. Make sure you keep the quotes around the fields.
- Go to https://old.reddit.com/prefs/apps/
- Click on `Develop an app` at the bottom.
- Make sure you select a `script` not a `web app`.
- Name it `Unsave Posts`.
- Fill in the `Redirect URI` field with `127.0.0.0`.
- Save it.
- Fill in the `client_id` and `client_secret` fields on the script. The client ID is the 14 character string under the name you gave your script. .It'll look like a bunch of random characters like this: pspYLwDoci9z_A. The client secret is the longer string next to "secret". Again keep the quotes around the fields.
- Fill in the username and password fields in the script. Make sure you keep the quotes around the fields.
- Go to <https://old.reddit.com/prefs/apps/>
- Click on `Develop an app` at the bottom.
- Make sure you select a `script` not a `web app`.
- Name it `Unsave Posts`.
- Fill in the `Redirect URI` field with `127.0.0.0`.
- Save it.
- Fill in the `client_id` and `client_secret` fields on the script. The client ID is the 14 character string under the name you gave your script. .It'll look like a bunch of random characters like this: pspYLwDoci9z_A. The client secret is the longer string next to "secret". Again keep the quotes around the fields.
Now the script is ready tu run. Just execute it like this:
```bash
python3.9 -m bdfr download DOWNLOAD_DIR --authenticate --user me --saved --log LOGFILE_LOCATION
bdfr download DOWNLOAD_DIR --authenticate --user me --saved --log LOGFILE_LOCATION
./extract_successful_ids.sh LOGFILE_LOCATION > successfulids
./unsaveposts.py
```

View file

@ -1,21 +1,13 @@
if (Test-Path -Path $args[0] -PathType Leaf) {
$file=$args[0]
}
else {
Write-Host "CANNOT FIND LOG FILE"
if (($args[0] -eq $null) -or -Not (Test-Path -Path $args[0] -PathType Leaf)) {
Write-Output "CANNOT FIND LOG FILE"
Exit 1
}
if ($null -ne $args[1]) {
$output=$args[1]
Write-Host "Outputting IDs to $output"
}
else {
$output="./failed.txt"
elseif (Test-Path -Path $args[0] -PathType Leaf) {
$file=$args[0]
}
Select-String -Path $file -Pattern "Could not download submission" | ForEach-Object { -split $_.Line | Select-Object -Skip 11 | Select-Object -First 1 } | ForEach-Object { $_.substring(0,$_.Length-1) } >> $output
Select-String -Path $file -Pattern "Failed to download resource" | ForEach-Object { -split $_.Line | Select-Object -Skip 14 | Select-Object -First 1 } >> $output
Select-String -Path $file -Pattern "failed to download submission" | ForEach-Object { -split $_.Line | Select-Object -Skip 13 | Select-Object -First 1 } | ForEach-Object { $_.substring(0,$_.Length-1) } >> $output
Select-String -Path $file -Pattern "Failed to write file" | ForEach-Object { -split $_.Line | Select-Object -Skip 13 | Select-Object -First 1 } >> $output
Select-String -Path $file -Pattern "skipped due to disabled module" | ForEach-Object { -split $_.Line | Select-Object -Skip 8 | Select-Object -First 1 } >> $output
Select-String -Path $file -Pattern "Could not download submission" | ForEach-Object { -split $_.Line | Select-Object -Skip 11 | Select-Object -First 1 } | ForEach-Object { $_.substring(0,$_.Length-1) }
Select-String -Path $file -Pattern "Failed to download resource" | ForEach-Object { -split $_.Line | Select-Object -Skip 14 | Select-Object -First 1 }
Select-String -Path $file -Pattern "failed to download submission" | ForEach-Object { -split $_.Line | Select-Object -Skip 13 | Select-Object -First 1 } | ForEach-Object { $_.substring(0,$_.Length-1) }
Select-String -Path $file -Pattern "Failed to write file" | ForEach-Object { -split $_.Line | Select-Object -Skip 13 | Select-Object -First 1 }
Select-String -Path $file -Pattern "skipped due to disabled module" | ForEach-Object { -split $_.Line | Select-Object -Skip 8 | Select-Object -First 1 }

View file

@ -1,16 +1,16 @@
#!/bin/bash
if [ -e "$1" ]; then
if [ -e "$1" ] && [ -f "$1" ]; then
file="$1"
else
echo 'CANNOT FIND LOG FILE'
echo "CANNOT FIND LOG FILE"
exit 1
fi
{
grep 'Could not download submission' "$file" | awk '{ print $12 }' | rev | cut -c 2- | rev ;
grep 'Failed to download resource' "$file" | awk '{ print $15 }' ;
grep 'failed to download submission' "$file" | awk '{ print $14 }' | rev | cut -c 2- | rev ;
grep 'Failed to write file' "$file" | awk '{ print $14 }' ;
grep 'skipped due to disabled module' "$file" | awk '{ print $9 }' ;
grep "Could not download submission" "$file" | awk '{ print $12 }' | rev | cut -c 2- | rev ;
grep "Failed to download resource" "$file" | awk '{ print $15 }' ;
grep "failed to download submission" "$file" | awk '{ print $14 }' | rev | cut -c 2- | rev ;
grep "Failed to write file" "$file" | awk '{ print $14 }' ;
grep "skipped due to disabled module" "$file" | awk '{ print $9 }' ;
}

View file

@ -1,21 +1,14 @@
if (Test-Path -Path $args[0] -PathType Leaf) {
$file=$args[0]
}
else {
Write-Host "CANNOT FIND LOG FILE"
if (($args[0] -eq $null) -or -Not (Test-Path -Path $args[0] -PathType Leaf)) {
Write-Output "CANNOT FIND LOG FILE"
Exit 1
}
if ($null -ne $args[1]) {
$output=$args[1]
Write-Host "Outputting IDs to $output"
}
else {
$output="./successful.txt"
elseif (Test-Path -Path $args[0] -PathType Leaf) {
$file=$args[0]
}
Select-String -Path $file -Pattern "Downloaded submission" | ForEach-Object { -split $_.Line | Select-Object -Last 3 | Select-Object -SkipLast 2 } >> $output
Select-String -Path $file -Pattern "Resource hash" | ForEach-Object { -split $_.Line | Select-Object -Last 3 | Select-Object -SkipLast 2 } >> $output
Select-String -Path $file -Pattern "Download filter" | ForEach-Object { -split $_.Line | Select-Object -Last 4 | Select-Object -SkipLast 3 } >> $output
Select-String -Path $file -Pattern "already exists, continuing" | ForEach-Object { -split $_.Line | Select-Object -Last 4 | Select-Object -SkipLast 3 } >> $output
Select-String -Path $file -Pattern "Hard link made" | ForEach-Object { -split $_.Line | Select-Object -Last 1 } >> $output
Select-String -Path $file -Pattern "Downloaded submission" | ForEach-Object { -split $_.Line | Select-Object -Last 3 | Select-Object -SkipLast 2 }
Select-String -Path $file -Pattern "Resource hash" | ForEach-Object { -split $_.Line | Select-Object -Last 3 | Select-Object -SkipLast 2 }
Select-String -Path $file -Pattern "Download filter" | ForEach-Object { -split $_.Line | Select-Object -Last 4 | Select-Object -SkipLast 3 }
Select-String -Path $file -Pattern "already exists, continuing" | ForEach-Object { -split $_.Line | Select-Object -Last 4 | Select-Object -SkipLast 3 }
Select-String -Path $file -Pattern "Hard link made" | ForEach-Object { -split $_.Line | Select-Object -Last 1 }
Select-String -Path $file -Pattern "filtered due to score" | ForEach-Object { -split $_.Line | Select-Object -Index 8 }

View file

@ -1,17 +1,17 @@
#!/bin/bash
if [ -e "$1" ]; then
if [ -e "$1" ] && [ -f "$1" ]; then
file="$1"
else
echo 'CANNOT FIND LOG FILE'
echo "CANNOT FIND LOG FILE"
exit 1
fi
{
grep 'Downloaded submission' "$file" | awk '{ print $(NF-2) }' ;
grep 'Resource hash' "$file" | awk '{ print $(NF-2) }' ;
grep 'Download filter' "$file" | awk '{ print $(NF-3) }' ;
grep 'already exists, continuing' "$file" | awk '{ print $(NF-3) }' ;
grep 'Hard link made' "$file" | awk '{ print $(NF) }' ;
grep 'filtered due to score' "$file" | awk '{ print $9 }'
grep "Downloaded submission" "$file" | awk '{ print $(NF-2) }' ;
grep "Resource hash" "$file" | awk '{ print $(NF-2) }' ;
grep "Download filter" "$file" | awk '{ print $(NF-3) }' ;
grep "already exists, continuing" "$file" | awk '{ print $(NF-3) }' ;
grep "Hard link made" "$file" | awk '{ print $(NF) }' ;
grep "filtered due to score" "$file" | awk '{ print $9 }' ;
}

View file

@ -1,17 +1,9 @@
if (Test-Path -Path $args[0] -PathType Leaf) {
$file=$args[0]
}
else {
if (($args[0] -eq $null) -or -Not (Test-Path -Path $args[0] -PathType Leaf)) {
Write-Host "CANNOT FIND LOG FILE"
Exit 1
}
if ($null -ne $args[1]) {
$output=$args[1]
Write-Host "Outputting IDs to $output"
}
else {
$output="./successful.txt"
elseif (Test-Path -Path $args[0] -PathType Leaf) {
$file=$args[0]
}
Write-Host -NoNewline "Downloaded submissions: "

View file

@ -1,9 +1,9 @@
#!/bin/bash
if [ -e "$1" ]; then
if [ -e "$1" ] && [ -f "$1" ]; then
file="$1"
else
echo 'CANNOT FIND LOG FILE'
echo "CANNOT FIND LOG FILE"
exit 1
fi

View file

@ -1,2 +1,2 @@
[2022-07-23 14:04:14,095 - bdfr.downloader - DEBUG] - Submission ljyy27 filtered due to score 15 < [50]
[2022-07-23 14:04:14,104 - bdfr.downloader - DEBUG] - Submission ljyy27 filtered due to score 16 > [1]
[2022-07-23 14:04:14,104 - bdfr.downloader - DEBUG] - Submission ljyz27 filtered due to score 16 > [1]

View file

@ -0,0 +1,39 @@
Describe "extract_failed_ids" {
It "fail run no args" {
(..\extract_failed_ids.ps1) | Should -Be "CANNOT FIND LOG FILE"
}
It "fail run no logfile" {
(..\extract_failed_ids.ps1 missing.txt) | Should -Be "CANNOT FIND LOG FILE"
}
It "fail no downloader module" {
$down_error = (..\extract_failed_ids.ps1 example_logfiles\failed_no_downloader.txt)
$down_error | Should -HaveCount 3
$down_error | Should -Contain "nxv3ea"
}
It "fail resource error" {
$res_error = (..\extract_failed_ids.ps1 example_logfiles\failed_resource_error.txt)
$res_error | Should -HaveCount 1
$res_error | Should -Contain "nxv3dt"
}
It "fail site downloader error" {
$site_error = (..\extract_failed_ids.ps1 example_logfiles\failed_sitedownloader_error.txt)
$site_error | Should -HaveCount 2
$site_error | Should -Contain "nxpn0h"
}
It "fail failed file write" {
$write_error = (..\extract_failed_ids.ps1 example_logfiles\failed_write_error.txt)
$write_error | Should -HaveCount 1
$write_error | Should -Contain "nnboza"
}
It "fail disabled module" {
$disabled = (..\extract_failed_ids.ps1 example_logfiles\failed_disabled_module.txt)
$disabled | Should -HaveCount 1
$disabled | Should -Contain "m2601g"
}
}

View file

@ -0,0 +1,45 @@
Describe "extract_successful_ids" {
It "fail run no args" {
(..\extract_successful_ids.ps1) | Should -Be "CANNOT FIND LOG FILE"
}
It "fail run no logfile" {
(..\extract_successful_ids.ps1 missing.txt) | Should -Be "CANNOT FIND LOG FILE"
}
It "success downloaded submission" {
$down_success = (..\extract_successful_ids.ps1 example_logfiles\succeed_downloaded_submission.txt)
$down_success | Should -HaveCount 7
$down_success | Should -Contain "nn9cor"
}
It "success resource hash" {
$hash_success = (..\extract_successful_ids.ps1 example_logfiles\succeed_resource_hash.txt)
$hash_success | Should -HaveCount 1
$hash_success | Should -Contain "n86jk8"
}
It "success download filter" {
$filt_success = (..\extract_successful_ids.ps1 example_logfiles\succeed_download_filter.txt)
$filt_success | Should -HaveCount 3
$filt_success | Should -Contain "nxuxjy"
}
It "success already exists" {
$exist_success = (..\extract_successful_ids.ps1 example_logfiles\succeed_already_exists.txt)
$exist_success | Should -HaveCount 3
$exist_success | Should -Contain "nxrq9g"
}
It "success hard link" {
$link_success = (..\extract_successful_ids.ps1 example_logfiles\succeed_hard_link.txt)
$link_success | Should -HaveCount 1
$link_success | Should -Contain "nwnp2n"
}
It "success score filter" {
$score_success = (..\extract_successful_ids.ps1 example_logfiles\succeed_score_filter.txt)
$score_success | Should -HaveCount 2
$score_success | Should -Contain "ljyz27"
}
}

View file

@ -7,11 +7,16 @@ teardown() {
rm -f failed.txt
}
@test "fail run no logfile" {
@test "fail run no args" {
run ../extract_failed_ids.sh
assert_failure
}
@test "fail run no logfile" {
run ../extract_failed_ids.sh ./missing.txt
assert_failure
}
@test "fail no downloader module" {
run ../extract_failed_ids.sh ./example_logfiles/failed_no_downloader.txt
echo "$output" > failed.txt

View file

@ -7,6 +7,16 @@ teardown() {
rm -f successful.txt
}
@test "fail run no args" {
run ../extract_successful_ids.sh
assert_failure
}
@test "fail run no logfile" {
run ../extract_successful_ids.sh ./missing.txt
assert_failure
}
@test "success downloaded submission" {
run ../extract_successful_ids.sh ./example_logfiles/succeed_downloaded_submission.txt
echo "$output" > successful.txt

View file

@ -1,6 +1,6 @@
#! /usr/bin/env python3.9
'''
This script takes a list of submission IDs from a file named "successfulids" created with the
#!/usr/bin/env python3
"""
This script takes a list of submission IDs from a file named "successfulids" created with the
"extract_successful_ids.sh" script and unsaves them from your account. To make it work you must
fill in the username and password fields below. Make sure you keep the quotes around the fields.
You'll need to make a "user script" in your reddit profile to run this.
@ -14,12 +14,18 @@ The client ID is the 14 character string under the name you gave your script.
It'll look like a bunch of random characters like this: pspYLwDoci9z_A
The client secret is the longer string next to "secret".
Replace those two fields below. Again keep the quotes around the fields.
'''
"""
import praw
from pathlib import Path
try:
r= praw.Reddit(
import praw
import prawcore.exceptions
except ImportError:
print("Please install PRAW")
try:
reddit = praw.Reddit(
client_id="CLIENTID",
client_secret="CLIENTSECRET",
password="USERPASSWORD",
@ -27,14 +33,15 @@ try:
username="USERNAME",
)
with open("successfulids", "r") as f:
for item in f:
r.submission(id = item.strip()).unsave()
with Path("successfulids").open() as id_file:
for item in id_file:
reddit.submission(id=item.strip()).unsave()
except:
print("Something went wrong. Did you install PRAW? Did you change the user login fields?")
except FileNotFoundError:
print("ID file not found")
except prawcore.exceptions.ResponseException:
print("Something went wrong. Did you change the user login fields?")
else:
print("Done! Thanks for playing!")

View file

@ -1,2 +1 @@
#!/usr/bin/env python3
# -*- coding: utf-8 -*-

View file

@ -1,2 +1 @@
#!/usr/bin/env python3
# -*- coding: utf-8 -*-

View file

@ -1,5 +1,4 @@
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
import praw
import pytest

View file

@ -1,5 +1,4 @@
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
import praw
import pytest

View file

@ -1,5 +1,4 @@
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
import configparser
import socket

View file

@ -1,2 +1 @@
#!/usr/bin/env python3
# -*- coding: utf-8 -*-

View file

@ -1,5 +1,4 @@
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
import re
import shutil
@ -29,7 +28,8 @@ def create_basic_args_for_archive_runner(test_args: list[str], run_path: Path):
str(Path(run_path, "test_config.cfg")),
"--log",
str(Path(run_path, "test_log.txt")),
] + test_args
*test_args,
]
return out

View file

@ -1,5 +1,4 @@
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
import shutil
from pathlib import Path
@ -28,7 +27,8 @@ def create_basic_args_for_cloner_runner(test_args: list[str], tmp_path: Path):
str(Path(tmp_path, "test_config.cfg")),
"--log",
str(Path(tmp_path, "test_log.txt")),
] + test_args
*test_args,
]
return out

View file

@ -1,8 +1,8 @@
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
import shutil
from pathlib import Path
from sys import platform
from unittest.mock import MagicMock, patch
import prawcore
@ -28,7 +28,8 @@ def create_basic_args_for_download_runner(test_args: list[str], run_path: Path):
str(Path(run_path, "test_config.cfg")),
"--log",
str(Path(run_path, "test_log.txt")),
] + test_args
*test_args,
]
return out
@ -160,6 +161,7 @@ def test_cli_download_multireddit_nonexistent(test_args: list[str], tmp_path: Pa
"test_args",
(
["--user", "djnish", "--submitted", "--user", "FriesWithThat", "-L", 10],
["--user", "me", "--downvoted", "--authenticate", "-L", 10],
["--user", "me", "--upvoted", "--authenticate", "-L", 10],
["--user", "me", "--saved", "--authenticate", "-L", 10],
["--user", "me", "--submitted", "--authenticate", "-L", 10],
@ -186,7 +188,7 @@ def test_cli_download_user_data_bad_me_unauthenticated(test_args: list[str], tmp
test_args = create_basic_args_for_download_runner(test_args, tmp_path)
result = runner.invoke(cli, test_args)
assert result.exit_code == 0
assert 'To use "me" as a user, an authenticated Reddit instance must be used' in result.output
assert "To use 'me' as a user, an authenticated Reddit instance must be used" in result.output
@pytest.mark.online
@ -218,7 +220,7 @@ def test_cli_download_download_filters(test_args: list[str], tmp_path: Path):
test_args = create_basic_args_for_download_runner(test_args, tmp_path)
result = runner.invoke(cli, test_args)
assert result.exit_code == 0
assert any((string in result.output for string in ("Download filter removed ", "filtered due to URL")))
assert any(string in result.output for string in ("Download filter removed ", "filtered due to URL"))
@pytest.mark.online
@ -426,6 +428,7 @@ def test_cli_download_user_reddit_server_error(test_args: list[str], response: i
@pytest.mark.online
@pytest.mark.reddit
@pytest.mark.skipif(not does_test_config_exist, reason="A test config file is required for integration tests")
@pytest.mark.skipif(platform == "darwin", reason="Test hangs on macos github")
@pytest.mark.parametrize(
"test_args",
(
@ -440,3 +443,17 @@ def test_cli_download_explicit_filename_restriction_scheme(test_args: list[str],
assert result.exit_code == 0
assert "Downloaded submission" in result.output
assert "Forcing Windows-compatible filenames" in result.output
@pytest.mark.online
@pytest.mark.reddit
@pytest.mark.skipif(not does_test_config_exist, reason="A test config file is required for integration tests")
@pytest.mark.parametrize("test_args", (["--link", "ehqt2g", "--link", "ehtuv8", "--no-dupes"],))
def test_cli_download_no_empty_dirs(test_args: list[str], tmp_path: Path):
runner = CliRunner()
test_args = create_basic_args_for_download_runner(test_args, tmp_path)
result = runner.invoke(cli, test_args)
assert result.exit_code == 0
assert "downloaded elsewhere" in result.output
assert Path(tmp_path, "EmpireDidNothingWrong").exists()
assert not Path(tmp_path, "StarWarsEU").exists()

View file

@ -1,2 +1 @@
#!/usr/bin/env python3
# -*- coding: utf-8 -*-

View file

@ -1,2 +1 @@
#!/usr/bin/env python3
# -*- coding: utf-8 -*-

View file

@ -1,5 +1,4 @@
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
from unittest.mock import MagicMock
@ -15,7 +14,7 @@ from bdfr.site_downloaders.fallback_downloaders.ytdlp_fallback import YtdlpFallb
("test_url", "expected"),
(
("https://www.reddit.com/r/specializedtools/comments/n2nw5m/bamboo_splitter/", True),
("https://www.youtube.com/watch?v=P19nvJOmqCc", True),
("https://www.youtube.com/watch?v=DWUbA501CO4", True),
("https://www.example.com/test", False),
("https://milesmatrix.bandcamp.com/album/la-boum/", False),
("https://v.redd.it/dlr54z8p182a1", True),

View file

@ -0,0 +1,59 @@
from unittest.mock import Mock
import pytest
from bdfr.resource import Resource
from bdfr.site_downloaders.catbox import Catbox
@pytest.mark.online
@pytest.mark.parametrize(
("test_url", "expected"),
(
(
"https://catbox.moe/c/vel5eg",
{
"https://files.catbox.moe/h2dx9k.gif",
"https://files.catbox.moe/bc83lg.png",
"https://files.catbox.moe/aq3m2a.jpeg",
"https://files.catbox.moe/yfk8r7.jpeg",
"https://files.catbox.moe/34ofbz.png",
"https://files.catbox.moe/xx4lcw.mp4",
"https://files.catbox.moe/xocd6t.mp3",
},
),
),
)
def test_get_links(test_url: str, expected: set[str]):
results = Catbox.get_links(test_url)
assert results == expected
@pytest.mark.online
@pytest.mark.slow
@pytest.mark.parametrize(
("test_url", "expected_hashes"),
(
(
"https://catbox.moe/c/vel5eg",
{
"014762b38e280ef3c0d000cc5f2aa386",
"85799edf12e20876f37286784460ad1b",
"c71b88c4230aa3aaad52a644fb709737",
"f40cffededd1929726d9cd265cc42c67",
"bda1f646c49607183c2450441f2ea6e8",
"21b48729bf9be7884999442b73887eed",
"0ec327259733a8276c207cc6e1b001ad",
},
),
),
)
def test_download_resources(test_url: str, expected_hashes: set[str]):
mock_download = Mock()
mock_download.url = test_url
downloader = Catbox(mock_download)
results = downloader.find_resources()
assert all(isinstance(res, Resource) for res in results)
[res.download() for res in results]
hashes = {res.hash.hexdigest() for res in results}
assert hashes == set(expected_hashes)

View file

@ -1,5 +1,4 @@
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
from unittest.mock import Mock

View file

@ -1,5 +1,4 @@
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
from unittest.mock import Mock
@ -14,10 +13,7 @@ from bdfr.site_downloaders.direct import Direct
("test_url", "expected_hash"),
(
("https://i.redd.it/q6ebualjxzea1.jpg", "6ec154859c777cb401132bb991cb3635"),
(
"https://file-examples.com/wp-content/uploads/2017/11/file_example_MP3_700KB.mp3",
"3caa342e241ddb7d76fd24a834094101",
),
("https://filesamples.com/samples/audio/mp3/sample3.mp3", "d30a2308f188cbb11d74cf20c357891c"),
),
)
def test_download_resource(test_url: str, expected_hash: str):

View file

@ -1,5 +1,4 @@
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
import praw
import pytest

View file

@ -1,5 +1,4 @@
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
import re
from unittest.mock import MagicMock
@ -13,17 +12,23 @@ from bdfr.site_downloaders.erome import Erome
@pytest.mark.parametrize(
("test_url", "expected_urls"),
(
("https://www.erome.com/a/vqtPuLXh", (r"https://[a-z]\d+.erome.com/\d{3}/vqtPuLXh/KH2qBT99_480p.mp4",)),
(
"https://www.erome.com/a/ORhX0FZz",
"https://www.erome.com/a/vqtPuLXh", # Video
(r"https://[a-z]\d+.erome.com/\d{3}/vqtPuLXh/KH2qBT99_480p.mp4",),
),
(
"https://www.erome.com/a/9E50Xkb6", # Image album
(
r"https://[a-z]\d+.erome.com/\d{3}/ORhX0FZz/9IYQocM9_480p.mp4",
r"https://[a-z]\d+.erome.com/\d{3}/ORhX0FZz/9eEDc8xm_480p.mp4",
r"https://[a-z]\d+.erome.com/\d{3}/ORhX0FZz/EvApC7Rp_480p.mp4",
r"https://[a-z]\d+.erome.com/\d{3}/ORhX0FZz/LruobtMs_480p.mp4",
r"https://[a-z]\d+.erome.com/\d{3}/ORhX0FZz/TJNmSUU5_480p.mp4",
r"https://[a-z]\d+.erome.com/\d{3}/ORhX0FZz/X11Skh6Z_480p.mp4",
r"https://[a-z]\d+.erome.com/\d{3}/ORhX0FZz/bjlTkpn7_480p.mp4",
r"https://[a-z]\d+.erome.com/\d{4}/9E50Xkb6/hUpc1d21.jpeg",
r"https://[a-z]\d+.erome.com/\d{4}/9E50Xkb6/3zZF7uv4.jpeg",
r"https://[a-z]\d+.erome.com/\d{4}/9E50Xkb6/h6C03hNq.jpeg",
r"https://[a-z]\d+.erome.com/\d{4}/9E50Xkb6/AHQuZh9j.jpeg",
r"https://[a-z]\d+.erome.com/\d{4}/9E50Xkb6/Ram0NmDU.jpeg",
r"https://[a-z]\d+.erome.com/\d{4}/9E50Xkb6/dY82guy1.jpeg",
r"https://[a-z]\d+.erome.com/\d{4}/9E50Xkb6/3x8bp9lF.jpeg",
r"https://[a-z]\d+.erome.com/\d{4}/9E50Xkb6/lxyFSUMQ.jpeg",
r"https://[a-z]\d+.erome.com/\d{4}/9E50Xkb6/vPIb29UR.jpeg",
r"https://[a-z]\d+.erome.com/\d{4}/9E50Xkb6/w1BJtyh5.jpeg",
),
),
),
@ -38,8 +43,9 @@ def test_get_link(test_url: str, expected_urls: tuple[str]):
@pytest.mark.parametrize(
("test_url", "expected_hashes_len"),
(
("https://www.erome.com/a/vqtPuLXh", 1),
("https://www.erome.com/a/4tP3KI6F", 1),
("https://www.erome.com/a/vqtPuLXh", 1), # Video
("https://www.erome.com/a/4tP3KI6F", 1), # Video
("https://www.erome.com/a/9E50Xkb6", 10), # Image album
),
)
def test_download_resource(test_url: str, expected_hashes_len: int):

View file

@ -0,0 +1,92 @@
from unittest.mock import Mock
import pytest
from bdfr.resource import Resource
from bdfr.site_downloaders.flickr import Flickr
@pytest.mark.online
def test_key_cache():
key1 = Flickr._get_api_key()
key2 = Flickr._get_api_key()
assert key1 == key2
@pytest.mark.parametrize(
("test_url", "expected_user", "expected_id"),
(
("https://www.flickr.com/photos/137434519@N08/33635695603", "137434519@N08", "33635695603"), # Single photo
(
"https://www.flickr.com/photos/63215229@N04/albums/72157644975251416", # Album
"63215229@N04",
"72157644975251416",
),
),
)
def test_get_ids(test_url: str, expected_user: str, expected_id: str):
user, f_id = Flickr._get_ids(test_url)
assert user == expected_user
assert f_id == expected_id
@pytest.mark.online
@pytest.mark.parametrize(
("test_url", "expected_url"),
(
(
"https://www.flickr.com/gp/137434519@N08/83Q029", # /gp/ link
"https://www.flickr.com/photos/137434519@N08/33635695603/",
),
("https://flic.kr/p/2k5E4mv", "https://www.flickr.com/photos/129756120@N03/50592162657/"), # flic.kr link
),
)
def test_expand_url(test_url: str, expected_url: str):
link = Flickr._expand_link(test_url)
assert link == expected_url
@pytest.mark.online
@pytest.mark.parametrize(
("test_id", "expected_user"),
(("buta_suneo", "63215229@N04"),), # username to user ID
)
def test_get_user_id(test_id: str, expected_user: str):
api_key = Flickr._get_api_key()
api_string = f"https://www.flickr.com/services/rest/?api_key={api_key}&format=json&nojsoncallback=1&"
user = Flickr._get_user_id(test_id, api_string)
assert user == expected_user
@pytest.mark.online
@pytest.mark.parametrize(
("test_url", "expected_hashes"),
(
("https://www.flickr.com/gp/137434519@N08/83Q029", {"b3f4e6fca1cc0ffca55368e4f94f9b5f"}), # Single photo
("https://flic.kr/p/2k5E4mv", {"75ae4f5e70b9b7525041b1dcc852d144"}), # Single photo
(
"http://www.flickr.com/photos/thekog/6886709962/", # Single photo
{"a4a64e606368f7b5a1995c84e15463e9"},
),
(
"https://www.flickr.com/photos/ochre_jelly/albums/72157708743730852", # Album
{
"3c442ffdadff7b02cb7a133865339a26",
"8023fc0e76f891d585871ddd64edac23",
"9bbedad97b59ec51cb967da507351912",
"a86fcd3458620eec4cb3606882d11e9a",
"addb62d788c542383d1ad47914bbefb3",
},
),
("https://www.flickr.com/photos/eerokiuru/52902303276", {"adfd8175f398f87744285da2591c8215"}), # Single video
),
)
def test_download_resource(test_url: str, expected_hashes: set[str]):
mock_submission = Mock()
mock_submission.url = test_url
test_site = Flickr(mock_submission)
results = test_site.find_resources()
assert all(isinstance(res, Resource) for res in results)
[res.download() for res in results]
hashes = {res.hash.hexdigest() for res in results}
assert hashes == set(expected_hashes)

View file

@ -1,5 +1,4 @@
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
import praw
import pytest

View file

@ -1,5 +1,4 @@
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
from unittest.mock import Mock
@ -9,18 +8,28 @@ from bdfr.resource import Resource
from bdfr.site_downloaders.gfycat import Gfycat
@pytest.mark.online
def test_auth_cache():
auth1 = Gfycat._get_auth_token()
auth2 = Gfycat._get_auth_token()
assert auth1 == auth2
@pytest.mark.online
@pytest.mark.parametrize(
("test_url", "expected_url"),
(
("https://gfycat.com/definitivecaninecrayfish", "https://giant.gfycat.com/DefinitiveCanineCrayfish.mp4"),
("https://gfycat.com/dazzlingsilkyiguana", "https://giant.gfycat.com/DazzlingSilkyIguana.mp4"),
("https://gfycat.com/WearyComposedHairstreak", "https://thumbs4.redgifs.com/WearyComposedHairstreak.mp4"),
("https://gfycat.com/ComposedWholeBullfrog", "https://thumbs44.redgifs.com/ComposedWholeBullfrog.mp4"),
(
"https://thumbs.gfycat.com/ComposedWholeBullfrog-size_restricted.gif",
"https://thumbs4.redgifs.com/ComposedWholeBullfrog.mp4",
"https://thumbs44.redgifs.com/ComposedWholeBullfrog.mp4",
),
(
"https://giant.gfycat.com/ComposedWholeBullfrog.mp4",
"https://thumbs44.redgifs.com/ComposedWholeBullfrog.mp4",
),
("https://giant.gfycat.com/ComposedWholeBullfrog.mp4", "https://thumbs4.redgifs.com/ComposedWholeBullfrog.mp4"),
),
)
def test_get_link(test_url: str, expected_url: str):
@ -34,7 +43,7 @@ def test_get_link(test_url: str, expected_url: str):
(
("https://gfycat.com/definitivecaninecrayfish", "48f9bd4dbec1556d7838885612b13b39"),
("https://gfycat.com/dazzlingsilkyiguana", "808941b48fc1e28713d36dd7ed9dc648"),
("https://gfycat.com/WearyComposedHairstreak", "5f82ba1ba23cc927c9fbb0c0421953a5"),
("https://gfycat.com/ComposedWholeBullfrog", "5292343665a13b5369d889d911ae284d"),
("https://thumbs.gfycat.com/ComposedWholeBullfrog-size_restricted.gif", "5292343665a13b5369d889d911ae284d"),
("https://giant.gfycat.com/ComposedWholeBullfrog.mp4", "5292343665a13b5369d889d911ae284d"),
),

View file

@ -0,0 +1,83 @@
from unittest.mock import Mock
import pytest
from bdfr.resource import Resource
from bdfr.site_downloaders.imgchest import Imgchest
@pytest.mark.online
@pytest.mark.parametrize(
("test_url", "expected"),
(
(
"https://www.imgchest.com/p/ro24aogylj5", # Basic image album
{
"https://cdn.imgchest.com/files/jd7ogcgl5y9.jpg",
"https://cdn.imgchest.com/files/rj7kzcdv27m.jpg",
"https://cdn.imgchest.com/files/vmy2pc2pr7j.jpg",
"https://cdn.imgchest.com/files/xl7lxce967o.jpg",
},
),
(
"https://www.imgchest.com/p/o24ap5wd4lj", # Image and video album
{
"https://cdn.imgchest.com/files/k46ac86kq7z.jpeg",
"https://cdn.imgchest.com/files/pyvdczlvayk.jpeg",
"https://cdn.imgchest.com/files/6yxkcvlrn7w.jpeg",
"https://cdn.imgchest.com/files/b49zce5wkyw.jpeg",
"https://cdn.imgchest.com/files/l4necb3kw4m.jpeg",
"https://cdn.imgchest.com/files/p7bwc3rx37n.mp4",
"https://cdn.imgchest.com/files/w7pjcbe587p.mp4",
"https://cdn.imgchest.com/files/d7ogcr95jy9.mp4",
"https://cdn.imgchest.com/files/j7kzc9r557m.mp4",
"https://cdn.imgchest.com/files/my2pc3wzl7j.mp4",
},
),
),
)
def test_get_links(test_url: str, expected: set[str]):
results = Imgchest._get_links(test_url)
assert results == expected
@pytest.mark.online
@pytest.mark.slow
@pytest.mark.parametrize(
("test_url", "expected_hashes"),
(
(
"https://www.imgchest.com/p/ro24aogylj5", # Basic image album
{
"91f1a5919b32af6cbf5c24528e83871c",
"c4969ac347fdcefbb6b2ec01c0be02ae",
"a9db23217974d8b78c84b463224f130a",
"6a0d0e28f02c2cdccff80f9973efbad3",
},
),
(
"https://www.imgchest.com/p/o24ap5wd4lj", # Image and video album
{
"a4ea3f676c8a1cbca8e2faf70a031e1e",
"59db5f35f5969d638c4036a3a249b1e1",
"73ee75fe341022cd643431a4fb78be3d",
"6fe6f1239dd39f948b3abb583c310c7d",
"8e9b652c62b906ba54607c7fd8ce6d63",
"108b167b04830ce0a59c27415bb5ef86",
"05a063fe87fb010ca782c268d0bf90c5",
"5ef705919760684d54e082430f32551a",
"7ff437036cac57e04aaabcfd604ad2c8",
"d2e3eb303f3a605b2a8587f914b78c34",
},
),
),
)
def test_download_resources(test_url: str, expected_hashes: set[str]):
mock_download = Mock()
mock_download.url = test_url
downloader = Imgchest(mock_download)
results = downloader.find_resources()
assert all(isinstance(res, Resource) for res in results)
[res.download() for res in results]
hashes = {res.hash.hexdigest() for res in results}
assert hashes == set(expected_hashes)

View file

@ -1,5 +1,4 @@
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
from unittest.mock import Mock
@ -9,15 +8,41 @@ from bdfr.resource import Resource
from bdfr.site_downloaders.imgur import Imgur
@pytest.mark.parametrize(
("test_url", "expected"),
(
("https://imgur.com/a/xWZsDDP", "xWZsDDP"), # Gallery, /a/
("https://imgur.com/gallery/IjJJdlC", "IjJJdlC"), # Gallery, /gallery/
("https://imgur.com/gallery/IjJJdlC/", "IjJJdlC"), # Gallery, trailing /
("https://o.imgur.com/jZw9gq2.jpg", "jZw9gq2"), # Direct link, jpg, incorrect subdomain
("https://i.imgur.com/lFJai6i.gifv", "lFJai6i"), # Direct link, gifv
("https://i.imgur.com/ywSyILa.gifv?", "ywSyILa"), # Direct link, gifv, trailing ?
("https://imgur.com/ubYwpbk.GIFV", "ubYwpbk"), # No subdomain, uppercase gifv
("https://i.imgur.com/OGeVuAe.giff", "OGeVuAe"), # Direct link, incorrect extension
("https://i.imgur.com/OGeVuAe.gift", "OGeVuAe"), # Direct link, incorrect extension
("https://i.imgur.com/3SKrQfK.jpg?1", "3SKrQfK"), # Direct link, trainling ?1
("https://i.imgur.com/cbivYRW.jpg?3", "cbivYRW"), # Direct link, trailing ?3
("http://i.imgur.com/s9uXxlq.jpg?5.jpg", "s9uXxlq"), # Direct link, trailing ?5.jpg, http
("http://i.imgur.com/s9uXxlqb.jpg", "s9uXxlqb"), # Direct link, jpg, http
("https://i.imgur.com/2TtN68l_d.webp", "2TtN68l"), # Direct link, webp, _d thumbnail
("https://imgur.com/a/1qzfWtY/gifv", "1qzfWtY"), # Gallery, trailing filetype
("https://imgur.com/a/1qzfWtY/spqr", "1qzfWtY"), # Gallery, trailing non filetype
),
)
def test_get_id(test_url: str, expected: str):
result = Imgur._get_id(test_url)
assert result == expected
@pytest.mark.online
@pytest.mark.slow
@pytest.mark.parametrize(
("test_url", "expected_hashes"),
(
("https://imgur.com/a/xWZsDDP", ("f551d6e6b0fef2ce909767338612e31b",)),
("https://imgur.com/gallery/IjJJdlC", ("740b006cf9ec9d6f734b6e8f5130bdab",)),
("https://imgur.com/gallery/IjJJdlC/", ("740b006cf9ec9d6f734b6e8f5130bdab",)),
("https://imgur.com/a/xWZsDDP", ("f551d6e6b0fef2ce909767338612e31b",)), # Single image gallery
("https://imgur.com/gallery/IjJJdlC", ("740b006cf9ec9d6f734b6e8f5130bdab",)), # Single video gallery
(
"https://imgur.com/a/dcc84Gt",
"https://imgur.com/a/dcc84Gt", # Multiple image gallery
(
"cf1158e1de5c3c8993461383b96610cf",
"28d6b791a2daef8aa363bf5a3198535d",
@ -25,31 +50,26 @@ from bdfr.site_downloaders.imgur import Imgur
"029c475ce01b58fdf1269d8771d33913",
),
),
("https://i.imgur.com/j1CNCZY.gifv", ("ed63d7062bc32edaeea8b53f876a307c",)), # Direct video link
("https://i.imgur.com/uTvtQsw.gifv", ("46c86533aa60fc0e09f2a758513e3ac2",)), # Direct video link
(
"https://imgur.com/a/eemHCCK",
(
"9cb757fd8f055e7ef7aa88addc9d9fa5",
"b6cb6c918e2544e96fb7c07d828774b5",
"fb6c913d721c0bbb96aa65d7f560d385",
),
"https://i.imgur.com/OGeVuAe.giff", # Direct video link, incorrect extension
("77389679084d381336f168538793f218",),
),
("https://i.imgur.com/cbivYRW.jpg?3", ("7ec6ceef5380cb163a1d498c359c51fd",)), # Direct image link, trailing ?3
(
"http://i.imgur.com/s9uXxlq.jpg?5.jpg", # Direct image link, trailing ?5.jpg
("338de3c23ee21af056b3a7c154e2478f",),
),
("http://i.imgur.com/s9uXxlqb.jpg", ("338de3c23ee21af056b3a7c154e2478f",)), # Direct image link
(
"https://imgur.com/a/1qzfWtY/mp4", # Single video gallery, web filetype request
("65fbc7ba5c3ed0e3af47c4feef4d3735",),
),
(
"https://imgur.com/a/1qzfWtY/spqr", # Single video gallery, web filetype invalid
("65fbc7ba5c3ed0e3af47c4feef4d3735",),
),
("https://o.imgur.com/jZw9gq2.jpg", ("6d6ea9aa1d98827a05425338afe675bc",)),
("https://i.imgur.com/lFJai6i.gifv", ("01a6e79a30bec0e644e5da12365d5071",)),
("https://i.imgur.com/ywSyILa.gifv?", ("56d4afc32d2966017c38d98568709b45",)),
("https://imgur.com/ubYwpbk.GIFV", ("d4a774aac1667783f9ed3a1bd02fac0c",)),
("https://i.imgur.com/j1CNCZY.gifv", ("ed63d7062bc32edaeea8b53f876a307c",)),
("https://i.imgur.com/uTvtQsw.gifv", ("46c86533aa60fc0e09f2a758513e3ac2",)),
("https://i.imgur.com/OGeVuAe.giff", ("77389679084d381336f168538793f218",)),
("https://i.imgur.com/OGeVuAe.gift", ("77389679084d381336f168538793f218",)),
("https://i.imgur.com/3SKrQfK.jpg?1", ("aa299e181b268578979cad176d1bd1d0",)),
("https://i.imgur.com/cbivYRW.jpg?3", ("7ec6ceef5380cb163a1d498c359c51fd",)),
("http://i.imgur.com/s9uXxlq.jpg?5.jpg", ("338de3c23ee21af056b3a7c154e2478f",)),
("http://i.imgur.com/s9uXxlqb.jpg", ("338de3c23ee21af056b3a7c154e2478f",)),
("https://i.imgur.com/2TtN68l_d.webp", ("6569ab9ad9fa68d93f6b408f112dd741",)),
("https://imgur.com/a/1qzfWtY/gifv", ("65fbc7ba5c3ed0e3af47c4feef4d3735",)),
("https://imgur.com/a/1qzfWtY/mp4", ("65fbc7ba5c3ed0e3af47c4feef4d3735",)),
("https://imgur.com/a/1qzfWtY/spqr", ("65fbc7ba5c3ed0e3af47c4feef4d3735",)),
("https://i.imgur.com/expO7Rc.gifv", ("e309f98158fc98072eb2ae68f947f421",)),
),
)
def test_find_resources(test_url: str, expected_hashes: list[str]):

View file

@ -1,5 +1,4 @@
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
from unittest.mock import MagicMock

View file

@ -1,5 +1,4 @@
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
import re
from unittest.mock import Mock
@ -10,6 +9,13 @@ from bdfr.resource import Resource
from bdfr.site_downloaders.redgifs import Redgifs
@pytest.mark.online
def test_auth_cache():
auth1 = Redgifs._get_auth_token()
auth2 = Redgifs._get_auth_token()
assert auth1 == auth2
@pytest.mark.parametrize(
("test_url", "expected"),
(
@ -19,6 +25,7 @@ from bdfr.site_downloaders.redgifs import Redgifs
("https://thumbs4.redgifs.com/DismalIgnorantDrongo.mp4", "dismalignorantdrongo"),
("https://thumbs4.redgifs.com/DismalIgnorantDrongo-mobile.mp4", "dismalignorantdrongo"),
("https://v3.redgifs.com/watch/newilliteratemeerkat#rel=user%3Atastynova", "newilliteratemeerkat"),
("https://thumbs46.redgifs.com/BabyishCharmingAidi-medium.jpg", "babyishcharmingaidi"),
),
)
def test_get_id(test_url: str, expected: str):
@ -75,6 +82,7 @@ def test_get_link(test_url: str, expected: set[str]):
"44fb28f72ec9a5cca63fa4369ab4f672",
},
),
("https://thumbs46.redgifs.com/BabyishCharmingAidi-medium.jpg", {"bf14b9f3d5b630cb5fd271661226f1af"}),
),
)
def test_download_resource(test_url: str, expected_hashes: set[str]):
@ -97,11 +105,6 @@ def test_download_resource(test_url: str, expected_hashes: set[str]):
{"FlippantMemorableBaiji-mobile.mp4"},
{"41a5fb4865367ede9f65fc78736f497a"},
),
(
"https://redgifs.com/watch/thirstyunfortunatewaterdragons",
{"thirstyunfortunatewaterdragons-mobile.mp4"},
{"1a51dad8fedb594bdd84f027b3cbe8af"},
),
(
"https://redgifs.com/watch/conventionalplainxenopterygii",
{"conventionalplainxenopterygii-mobile.mp4"},

View file

@ -1,5 +1,4 @@
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
import praw
import pytest

View file

@ -1,5 +1,4 @@
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
from unittest.mock import Mock

View file

@ -1,5 +1,4 @@
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
from unittest.mock import MagicMock

View file

@ -1,5 +1,4 @@
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
from unittest.mock import MagicMock
@ -16,7 +15,7 @@ from bdfr.site_downloaders.youtube import Youtube
("test_url", "expected_hash"),
(
("https://www.youtube.com/watch?v=uSm2VDgRIUs", "2d60b54582df5b95ec72bb00b580d2ff"),
("https://www.youtube.com/watch?v=GcI7nxQj7HA", "5db0fc92a0a7fb9ac91e63505eea9cf0"),
("https://www.youtube.com/watch?v=NcA_j23HuDU", "26e6ca4849267e600ff474f4260c3b5b"),
),
)
def test_find_resources_good(test_url: str, expected_hash: str):

View file

@ -1,5 +1,4 @@
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
from pathlib import Path
from unittest.mock import MagicMock

View file

@ -1,5 +1,4 @@
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
import sys
from pathlib import Path

View file

@ -1,5 +1,4 @@
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
from unittest.mock import MagicMock

View file

@ -1,9 +1,9 @@
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
from collections.abc import Iterator
from datetime import datetime, timedelta
from pathlib import Path
from typing import Union
from unittest.mock import MagicMock
import praw
@ -38,21 +38,23 @@ def downloader_mock(args: Configuration):
return downloader_mock
def assert_all_results_are_submissions(result_limit: int, results: list[Iterator]) -> list:
def assert_all_results_are_submissions(result_limit: Union[int, None], results: list[Iterator]) -> list:
results = [sub for res in results for sub in res]
assert all([isinstance(res, praw.models.Submission) for res in results])
assert not any([isinstance(m, MagicMock) for m in results])
if result_limit is not None:
assert len(results) == result_limit
assert len(results) > 0
assert len(results) <= result_limit
return results
def assert_all_results_are_submissions_or_comments(result_limit: int, results: list[Iterator]) -> list:
def assert_all_results_are_submissions_or_comments(result_limit: Union[int, None], results: list[Iterator]) -> list:
results = [sub for res in results for sub in res]
assert all([isinstance(res, (praw.models.Submission, praw.models.Comment)) for res in results])
assert not any([isinstance(m, MagicMock) for m in results])
if result_limit is not None:
assert len(results) == result_limit
assert len(results) > 0
assert len(results) <= result_limit
return results
@ -363,6 +365,7 @@ def test_get_user_submissions(test_user: str, limit: int, downloader_mock: Magic
@pytest.mark.parametrize(
"test_flag",
(
"downvoted",
"upvoted",
"saved",
),

View file

@ -1,5 +1,4 @@
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
from unittest.mock import MagicMock

View file

@ -1,5 +1,4 @@
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
import logging
import re
from pathlib import Path

View file

@ -1,5 +1,4 @@
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
import platform
import sys
@ -428,8 +427,8 @@ def test_multilevel_folder_scheme(
("test", "test"),
("😍", "😍"),
("test😍", "test😍"),
("test😍 ", "test😍 "),
("test😍 \\u2019", "test😍 "),
("test😍 ", "test😍 "), # noqa: RUF001
("test😍 \\u2019", "test😍 "), # noqa: RUF001
("Using that real good [1\\4]", "Using that real good [1\\4]"),
),
)
@ -443,8 +442,8 @@ def test_preserve_emojis(test_name_string: str, expected: str, submission: Magic
@pytest.mark.parametrize(
("test_string", "expected"),
(
("test \\u2019", "test "),
("My cat\\u2019s paws are so cute", "My cats paws are so cute"),
("test \\u2019", "test "), # noqa: RUF001
("My cat\\u2019s paws are so cute", "My cats paws are so cute"), # noqa: RUF001
),
)
def test_convert_unicode_escapes(test_string: str, expected: str):
@ -519,3 +518,19 @@ def test_name_submission(
results = test_formatter.format_resource_paths(test_resources, Path())
results = set([r[0].name for r in results])
assert results == expected_names
@pytest.mark.parametrize(
("test_filename", "test_ending", "expected_end"),
(
("A" * 300 + ".", "_1.mp4", "A_1.mp4"),
("A" * 300 + ".", ".mp4", "A.mp4"),
("A" * 300 + ".", "mp4", "A.mp4"),
),
)
def test_shortened_file_name_ending(
test_filename: str, test_ending: str, expected_end: str, test_formatter: FileNameFormatter
):
result = test_formatter.limit_file_name_length(test_filename, test_ending, Path("."))
assert result.name.endswith(expected_end)
assert len(str(result)) <= FileNameFormatter.find_max_path_length()

View file

@ -1,5 +1,4 @@
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
import configparser
from pathlib import Path
@ -34,7 +33,7 @@ def example_config() -> configparser.ConfigParser:
),
)
def test_check_scopes(test_scopes: set[str]):
OAuth2Authenticator._check_scopes(test_scopes)
OAuth2Authenticator._check_scopes(test_scopes, "fetch-scopes test")
@pytest.mark.parametrize(
@ -68,7 +67,7 @@ def test_split_scopes(test_scopes: str, expected: set[str]):
)
def test_check_scopes_bad(test_scopes: set[str]):
with pytest.raises(BulkDownloaderException):
OAuth2Authenticator._check_scopes(test_scopes)
OAuth2Authenticator._check_scopes(test_scopes, "fetch-scopes test")
def test_token_manager_read(example_config: configparser.ConfigParser):

View file

@ -1,5 +1,4 @@
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
from unittest.mock import MagicMock