Merge pull request #741 from aliparlakci/development
This commit is contained in:
commit
8c293a4684
13
.github/workflows/formatting_check.yml
vendored
Normal file
13
.github/workflows/formatting_check.yml
vendored
Normal file
|
@ -0,0 +1,13 @@
|
||||||
|
name: formatting_check
|
||||||
|
run-name: Check code formatting
|
||||||
|
on: [push, pull_request]
|
||||||
|
jobs:
|
||||||
|
formatting_check:
|
||||||
|
runs-on: ubuntu-latest
|
||||||
|
steps:
|
||||||
|
- name: Install dependencies
|
||||||
|
run: sudo gem install mdl
|
||||||
|
- uses: actions/checkout@v3
|
||||||
|
- uses: paolorechia/pox@v1.0.1
|
||||||
|
with:
|
||||||
|
tox_env: "format_check"
|
14
.github/workflows/publish.yml
vendored
14
.github/workflows/publish.yml
vendored
|
@ -11,25 +11,25 @@ jobs:
|
||||||
deploy:
|
deploy:
|
||||||
runs-on: ubuntu-latest
|
runs-on: ubuntu-latest
|
||||||
steps:
|
steps:
|
||||||
- uses: actions/checkout@v2
|
- uses: actions/checkout@v3
|
||||||
- name: Set up Python
|
- name: Set up Python
|
||||||
uses: actions/setup-python@v2
|
uses: actions/setup-python@v4
|
||||||
with:
|
with:
|
||||||
python-version: '3.9'
|
python-version: '3.9'
|
||||||
- name: Install dependencies
|
- name: Install dependencies
|
||||||
run: |
|
run: |
|
||||||
python -m pip install --upgrade pip
|
python -m pip install --upgrade pip
|
||||||
pip install setuptools wheel twine
|
pip install build setuptools wheel twine
|
||||||
- name: Build and publish
|
- name: Build and publish
|
||||||
env:
|
env:
|
||||||
TWINE_USERNAME: ${{ secrets.PYPI_USERNAME }}
|
TWINE_USERNAME: ${{ secrets.PYPI_USERNAME }}
|
||||||
TWINE_PASSWORD: ${{ secrets.PYPI_PASSWORD }}
|
TWINE_PASSWORD: ${{ secrets.PYPI_PASSWORD }}
|
||||||
run: |
|
run: |
|
||||||
python setup.py sdist bdist_wheel
|
python -m build
|
||||||
twine upload dist/*
|
twine upload dist/*
|
||||||
|
|
||||||
- name: Upload coverage report
|
- name: Upload dist folder
|
||||||
uses: actions/upload-artifact@v2
|
uses: actions/upload-artifact@v3
|
||||||
with:
|
with:
|
||||||
name: dist
|
name: dist
|
||||||
path: dist/
|
path: dist/
|
||||||
|
|
22
.github/workflows/test.yml
vendored
22
.github/workflows/test.yml
vendored
|
@ -3,8 +3,16 @@ name: Python Test
|
||||||
on:
|
on:
|
||||||
push:
|
push:
|
||||||
branches: [ master, development ]
|
branches: [ master, development ]
|
||||||
|
paths-ignore:
|
||||||
|
- "**.md"
|
||||||
|
- ".markdown_style.rb"
|
||||||
|
- ".mdlrc"
|
||||||
pull_request:
|
pull_request:
|
||||||
branches: [ master, development ]
|
branches: [ master, development ]
|
||||||
|
paths-ignore:
|
||||||
|
- "**.md"
|
||||||
|
- ".markdown_style.rb"
|
||||||
|
- ".mdlrc"
|
||||||
|
|
||||||
jobs:
|
jobs:
|
||||||
test:
|
test:
|
||||||
|
@ -19,16 +27,16 @@ jobs:
|
||||||
python-version: 3.9
|
python-version: 3.9
|
||||||
ext: .ps1
|
ext: .ps1
|
||||||
steps:
|
steps:
|
||||||
- uses: actions/checkout@v2
|
- uses: actions/checkout@v3
|
||||||
- name: Setup Python
|
- name: Setup Python
|
||||||
uses: actions/setup-python@v2
|
uses: actions/setup-python@v4
|
||||||
with:
|
with:
|
||||||
python-version: ${{ matrix.python-version }}
|
python-version: ${{ matrix.python-version }}
|
||||||
|
|
||||||
- name: Install dependencies
|
- name: Install dependencies
|
||||||
run: |
|
run: |
|
||||||
python -m pip install --upgrade pip flake8 pytest pytest-cov
|
python -m pip install --upgrade pip Flake8-pyproject pytest pytest-cov
|
||||||
pip install -r requirements.txt
|
pip install .
|
||||||
|
|
||||||
- name: Make configuration for tests
|
- name: Make configuration for tests
|
||||||
env:
|
env:
|
||||||
|
@ -38,14 +46,14 @@ jobs:
|
||||||
|
|
||||||
- name: Lint with flake8
|
- name: Lint with flake8
|
||||||
run: |
|
run: |
|
||||||
flake8 . --count --select=E9,F63,F7,F82 --show-source --statistics
|
flake8 . --select=E9,F63,F7,F82
|
||||||
|
|
||||||
- name: Test with pytest
|
- name: Test with pytest
|
||||||
run: |
|
run: |
|
||||||
pytest -m 'not slow' --verbose --cov=./bdfr/ --cov-report term:skip-covered --cov-report html
|
pytest -m 'not slow' --verbose --cov=./bdfr/ --cov-report term:skip-covered --cov-report html
|
||||||
|
|
||||||
- name: Upload coverage report
|
- name: Upload coverage report
|
||||||
uses: actions/upload-artifact@v2
|
uses: actions/upload-artifact@v3
|
||||||
with:
|
with:
|
||||||
name: coverage_report
|
name: coverage_report
|
||||||
path: htmlcov/
|
path: htmlcov/
|
||||||
|
|
4
.markdown_style.rb
Normal file
4
.markdown_style.rb
Normal file
|
@ -0,0 +1,4 @@
|
||||||
|
all
|
||||||
|
exclude_tag :line_length
|
||||||
|
rule 'MD007', :indent => 4
|
||||||
|
rule 'MD029', :style => 'ordered'
|
25
.pre-commit-config.yaml
Normal file
25
.pre-commit-config.yaml
Normal file
|
@ -0,0 +1,25 @@
|
||||||
|
# See https://pre-commit.com for more information
|
||||||
|
# See https://pre-commit.com/hooks.html for more hooks
|
||||||
|
|
||||||
|
repos:
|
||||||
|
- repo: https://github.com/psf/black
|
||||||
|
rev: 22.12.0
|
||||||
|
hooks:
|
||||||
|
- id: black
|
||||||
|
|
||||||
|
- repo: https://github.com/pycqa/isort
|
||||||
|
rev: 5.11.4
|
||||||
|
hooks:
|
||||||
|
- id: isort
|
||||||
|
name: isort (python)
|
||||||
|
|
||||||
|
- repo: https://github.com/pycqa/flake8
|
||||||
|
rev: 6.0.0
|
||||||
|
hooks:
|
||||||
|
- id: flake8
|
||||||
|
additional_dependencies: [Flake8-pyproject]
|
||||||
|
|
||||||
|
- repo: https://github.com/markdownlint/markdownlint
|
||||||
|
rev: v0.12.0
|
||||||
|
hooks:
|
||||||
|
- id: markdownlint
|
259
README.md
259
README.md
|
@ -1,8 +1,12 @@
|
||||||
# Bulk Downloader for Reddit
|
# Bulk Downloader for Reddit
|
||||||
|
|
||||||
[![PyPI version](https://img.shields.io/pypi/v/bdfr.svg)](https://pypi.python.org/pypi/bdfr)
|
[![PyPI Status](https://img.shields.io/pypi/status/bdfr?logo=PyPI)](https://pypi.python.org/pypi/bdfr)
|
||||||
[![PyPI downloads](https://img.shields.io/pypi/dm/bdfr)](https://pypi.python.org/pypi/bdfr)
|
[![PyPI version](https://img.shields.io/pypi/v/bdfr.svg?logo=PyPI)](https://pypi.python.org/pypi/bdfr)
|
||||||
|
[![PyPI downloads](https://img.shields.io/pypi/dm/bdfr?logo=PyPI)](https://pypi.python.org/pypi/bdfr)
|
||||||
|
[![AUR version](https://img.shields.io/aur/version/python-bdfr?logo=Arch%20Linux)](https://aur.archlinux.org/packages/python-bdfr)
|
||||||
[![Python Test](https://github.com/aliparlakci/bulk-downloader-for-reddit/actions/workflows/test.yml/badge.svg?branch=master)](https://github.com/aliparlakci/bulk-downloader-for-reddit/actions/workflows/test.yml)
|
[![Python Test](https://github.com/aliparlakci/bulk-downloader-for-reddit/actions/workflows/test.yml/badge.svg?branch=master)](https://github.com/aliparlakci/bulk-downloader-for-reddit/actions/workflows/test.yml)
|
||||||
|
[![Code style: black](https://img.shields.io/badge/code%20style-black-000000.svg?logo=Python)](https://github.com/psf/black)
|
||||||
|
[![pre-commit](https://img.shields.io/badge/pre--commit-enabled-brightgreen?logo=pre-commit)](https://github.com/pre-commit/pre-commit)
|
||||||
|
|
||||||
This is a tool to download submissions or submission data from Reddit. It can be used to archive data or even crawl Reddit to gather research data. The BDFR is flexible and can be used in scripts if needed through an extensive command-line interface. [List of currently supported sources](#list-of-currently-supported-sources)
|
This is a tool to download submissions or submission data from Reddit. It can be used to archive data or even crawl Reddit to gather research data. The BDFR is flexible and can be used in scripts if needed through an extensive command-line interface. [List of currently supported sources](#list-of-currently-supported-sources)
|
||||||
|
|
||||||
|
@ -12,7 +16,9 @@ Included in this README are a few example Bash tricks to get certain behaviour.
|
||||||
|
|
||||||
## Installation
|
## Installation
|
||||||
|
|
||||||
*Bulk Downloader for Reddit* needs Python version 3.9 or above. Please update Python before installation to meet the requirement. Then, you can install it via pip with:
|
*Bulk Downloader for Reddit* needs Python version 3.9 or above. Please update Python before installation to meet the requirement.
|
||||||
|
|
||||||
|
Then, you can install it via pip with:
|
||||||
|
|
||||||
```bash
|
```bash
|
||||||
python3 -m pip install bdfr --upgrade
|
python3 -m pip install bdfr --upgrade
|
||||||
|
@ -21,10 +27,14 @@ python3 -m pip install bdfr --upgrade
|
||||||
or via [pipx](https://pypa.github.io/pipx) with:
|
or via [pipx](https://pypa.github.io/pipx) with:
|
||||||
|
|
||||||
```bash
|
```bash
|
||||||
python3 -m pipx install bdfr --upgrade
|
python3 -m pipx install bdfr
|
||||||
```
|
```
|
||||||
|
|
||||||
**To update BDFR**, run the above command again after the installation.
|
**To update BDFR**, run the above command again for pip or `pipx upgrade bdfr` for pipx installations.
|
||||||
|
|
||||||
|
**To check your version of BDFR**, run `bdfr --version`
|
||||||
|
|
||||||
|
**To install shell completions**, run `bdfr completions`
|
||||||
|
|
||||||
### AUR Package
|
### AUR Package
|
||||||
|
|
||||||
|
@ -109,167 +119,175 @@ would be equilavent to (take note that in YAML there is `file_scheme` instead of
|
||||||
bdfr download ./path/to/output --skip mp4 --skip avi --file-scheme "{UPVOTES}_{REDDITOR}_{POSTID}_{DATE}" -L 10 -S top --subreddit EarthPorn --subreddit CityPorn
|
bdfr download ./path/to/output --skip mp4 --skip avi --file-scheme "{UPVOTES}_{REDDITOR}_{POSTID}_{DATE}" -L 10 -S top --subreddit EarthPorn --subreddit CityPorn
|
||||||
```
|
```
|
||||||
|
|
||||||
In case when the same option is specified both in the YAML file and in as a command line argument, the command line argument takes prs
|
Any option that can be specified multiple times should be formatted like subreddit is above.
|
||||||
|
|
||||||
|
In case when the same option is specified both in the YAML file and in as a command line argument, the command line argument takes priority
|
||||||
|
|
||||||
## Options
|
## Options
|
||||||
|
|
||||||
The following options are common between both the `archive` and `download` commands of the BDFR.
|
The following options are common between both the `archive` and `download` commands of the BDFR.
|
||||||
|
|
||||||
- `directory`
|
- `directory`
|
||||||
- This is the directory to which the BDFR will download and place all files
|
- This is the directory to which the BDFR will download and place all files
|
||||||
- `--authenticate`
|
- `--authenticate`
|
||||||
- This flag will make the BDFR attempt to use an authenticated Reddit session
|
- This flag will make the BDFR attempt to use an authenticated Reddit session
|
||||||
- See [Authentication](#authentication-and-security) for more details
|
- See [Authentication](#authentication-and-security) for more details
|
||||||
- `--config`
|
- `--config`
|
||||||
- If the path to a configuration file is supplied with this option, the BDFR will use the specified config
|
- If the path to a configuration file is supplied with this option, the BDFR will use the specified config
|
||||||
- See [Configuration Files](#configuration) for more details
|
- See [Configuration Files](#configuration) for more details
|
||||||
- `--opts`
|
- `--opts`
|
||||||
- Load options from a YAML file.
|
- Load options from a YAML file.
|
||||||
- Has higher prority than the global config file but lower than command-line arguments.
|
- Has higher prority than the global config file but lower than command-line arguments.
|
||||||
- See [opts_example.yaml](./opts_example.yaml) for an example file.
|
- See [opts_example.yaml](./opts_example.yaml) for an example file.
|
||||||
- `--disable-module`
|
- `--disable-module`
|
||||||
- Can be specified multiple times
|
- Can be specified multiple times
|
||||||
- Disables certain modules from being used
|
- Disables certain modules from being used
|
||||||
- See [Disabling Modules](#disabling-modules) for more information and a list of module names
|
- See [Disabling Modules](#disabling-modules) for more information and a list of module names
|
||||||
|
- `--filename-restriction-scheme`
|
||||||
|
- Can be: `windows`, `linux`
|
||||||
|
- Turns off the OS detection and specifies which system to use when making filenames
|
||||||
|
- See [Filesystem Restrictions](#filesystem-restrictions)
|
||||||
- `--ignore-user`
|
- `--ignore-user`
|
||||||
- This will add a user to ignore
|
- This will add a user to ignore
|
||||||
- Can be specified multiple times
|
- Can be specified multiple times
|
||||||
- `--include-id-file`
|
- `--include-id-file`
|
||||||
- This will add any submission with the IDs in the files provided
|
- This will add any submission with the IDs in the files provided
|
||||||
- Can be specified multiple times
|
- Can be specified multiple times
|
||||||
- Format is one ID per line
|
- Format is one ID per line
|
||||||
- `--log`
|
- `--log`
|
||||||
- This allows one to specify the location of the logfile
|
- This allows one to specify the location of the logfile
|
||||||
- This must be done when running multiple instances of the BDFR, see [Multiple Instances](#multiple-instances) below
|
- This must be done when running multiple instances of the BDFR, see [Multiple Instances](#multiple-instances) below
|
||||||
- `--saved`
|
- `--saved`
|
||||||
- This option will make the BDFR use the supplied user's saved posts list as a download source
|
- This option will make the BDFR use the supplied user's saved posts list as a download source
|
||||||
- This requires an authenticated Reddit instance, using the `--authenticate` flag, as well as `--user` set to `me`
|
- This requires an authenticated Reddit instance, using the `--authenticate` flag, as well as `--user` set to `me`
|
||||||
- `--search`
|
- `--search`
|
||||||
- This will apply the input search term to specific lists when scraping submissions
|
- This will apply the input search term to specific lists when scraping submissions
|
||||||
- A search term can only be applied when using the `--subreddit` and `--multireddit` flags
|
- A search term can only be applied when using the `--subreddit` and `--multireddit` flags
|
||||||
- `--submitted`
|
- `--submitted`
|
||||||
- This will use a user's submissions as a source
|
- This will use a user's submissions as a source
|
||||||
- A user must be specified with `--user`
|
- A user must be specified with `--user`
|
||||||
- `--upvoted`
|
- `--upvoted`
|
||||||
- This will use a user's upvoted posts as a source of posts to scrape
|
- This will use a user's upvoted posts as a source of posts to scrape
|
||||||
- This requires an authenticated Reddit instance, using the `--authenticate` flag, as well as `--user` set to `me`
|
- This requires an authenticated Reddit instance, using the `--authenticate` flag, as well as `--user` set to `me`
|
||||||
- `-L, --limit`
|
- `-L, --limit`
|
||||||
- This is the limit on the number of submissions retrieve
|
- This is the limit on the number of submissions retrieve
|
||||||
- Default is max possible
|
- Default is max possible
|
||||||
- Note that this limit applies to **each source individually** e.g. if a `--limit` of 10 and three subreddits are provided, then 30 total submissions will be scraped
|
- Note that this limit applies to **each source individually** e.g. if a `--limit` of 10 and three subreddits are provided, then 30 total submissions will be scraped
|
||||||
- If it is not supplied, then the BDFR will default to the maximum allowed by Reddit, roughly 1000 posts. **We cannot bypass this.**
|
- If it is not supplied, then the BDFR will default to the maximum allowed by Reddit, roughly 1000 posts. **We cannot bypass this.**
|
||||||
- `-S, --sort`
|
- `-S, --sort`
|
||||||
- This is the sort type for each applicable submission source supplied to the BDFR
|
- This is the sort type for each applicable submission source supplied to the BDFR
|
||||||
- This option does not apply to upvoted or saved posts when scraping from these sources
|
- This option does not apply to upvoted or saved posts when scraping from these sources
|
||||||
- The following options are available:
|
- The following options are available:
|
||||||
- `controversial`
|
- `controversial`
|
||||||
- `hot` (default)
|
- `hot` (default)
|
||||||
- `new`
|
- `new`
|
||||||
- `relevance` (only available when using `--search`)
|
- `relevance` (only available when using `--search`)
|
||||||
- `rising`
|
- `rising`
|
||||||
- `top`
|
- `top`
|
||||||
- `-l, --link`
|
- `-l, --link`
|
||||||
- This is a direct link to a submission to download, either as a URL or an ID
|
- This is a direct link to a submission to download, either as a URL or an ID
|
||||||
- Can be specified multiple times
|
- Can be specified multiple times
|
||||||
- `-m, --multireddit`
|
- `-m, --multireddit`
|
||||||
- This is the name of a multireddit to add as a source
|
- This is the name of a multireddit to add as a source
|
||||||
- Can be specified multiple times
|
- Can be specified multiple times
|
||||||
- This can be done by using `-m` multiple times
|
- This can be done by using `-m` multiple times
|
||||||
- Multireddits can also be used to provide CSV multireddits e.g. `-m 'chess, favourites'`
|
- Multireddits can also be used to provide CSV multireddits e.g. `-m 'chess, favourites'`
|
||||||
- The specified multireddits must all belong to the user specified with the `--user` option
|
- The specified multireddits must all belong to the user specified with the `--user` option
|
||||||
- `-s, --subreddit`
|
- `-s, --subreddit`
|
||||||
- This adds a subreddit as a source
|
- This adds a subreddit as a source
|
||||||
- Can be used mutliple times
|
- Can be used mutliple times
|
||||||
- This can be done by using `-s` multiple times
|
- This can be done by using `-s` multiple times
|
||||||
- Subreddits can also be used to provide CSV subreddits e.g. `-m 'all, python, mindustry'`
|
- Subreddits can also be used to provide CSV subreddits e.g. `-m 'all, python, mindustry'`
|
||||||
- `-t, --time`
|
- `-t, --time`
|
||||||
- This is the time filter that will be applied to all applicable sources
|
- This is the time filter that will be applied to all applicable sources
|
||||||
- This option does not apply to upvoted or saved posts when scraping from these sources
|
- This option does not apply to upvoted or saved posts when scraping from these sources
|
||||||
- The following options are available:
|
- This option only applies if sorting by top or controversial. See --sort for more detail.
|
||||||
- `all` (default)
|
- The following options are available:
|
||||||
- `hour`
|
- `all` (default)
|
||||||
- `day`
|
- `hour`
|
||||||
- `week`
|
- `day`
|
||||||
- `month`
|
- `week`
|
||||||
- `year`
|
- `month`
|
||||||
- `--time-format`
|
- `year`
|
||||||
- This specifies the format of the datetime string that replaces `{DATE}` in file and folder naming schemes
|
- `--time-format`
|
||||||
- See [Time Formatting Customisation](#time-formatting-customisation) for more details, and the formatting scheme
|
- This specifies the format of the datetime string that replaces `{DATE}` in file and folder naming schemes
|
||||||
|
- See [Time Formatting Customisation](#time-formatting-customisation) for more details, and the formatting scheme
|
||||||
- `-u, --user`
|
- `-u, --user`
|
||||||
- This specifies the user to scrape in concert with other options
|
- This specifies the user to scrape in concert with other options
|
||||||
- When using `--authenticate`, `--user me` can be used to refer to the authenticated user
|
- When using `--authenticate`, `--user me` can be used to refer to the authenticated user
|
||||||
- Can be specified multiple times for multiple users
|
- Can be specified multiple times for multiple users
|
||||||
- If downloading a multireddit, only one user can be specified
|
- If downloading a multireddit, only one user can be specified
|
||||||
- `-v, --verbose`
|
- `-v, --verbose`
|
||||||
- Increases the verbosity of the program
|
- Increases the verbosity of the program
|
||||||
- Can be specified multiple times
|
- Can be specified multiple times
|
||||||
|
|
||||||
### Downloader Options
|
### Downloader Options
|
||||||
|
|
||||||
The following options apply only to the `download` command. This command downloads the files and resources linked to in the submission, or a text submission itself, to the disk in the specified directory.
|
The following options apply only to the `download` command. This command downloads the files and resources linked to in the submission, or a text submission itself, to the disk in the specified directory.
|
||||||
|
|
||||||
- `--make-hard-links`
|
- `--make-hard-links`
|
||||||
- This flag will create hard links to an existing file when a duplicate is downloaded
|
- This flag will create hard links to an existing file when a duplicate is downloaded in the current run
|
||||||
- This will make the file appear in multiple directories while only taking the space of a single instance
|
- This will make the file appear in multiple directories while only taking the space of a single instance
|
||||||
- `--max-wait-time`
|
- `--max-wait-time`
|
||||||
- This option specifies the maximum wait time for downloading a resource
|
- This option specifies the maximum wait time for downloading a resource
|
||||||
- The default is 120 seconds
|
- The default is 120 seconds
|
||||||
- See [Rate Limiting](#rate-limiting) for details
|
- See [Rate Limiting](#rate-limiting) for details
|
||||||
- `--no-dupes`
|
- `--no-dupes`
|
||||||
- This flag will not redownload files if they already exist somewhere in the root folder tree
|
- This flag will not redownload files if they were already downloaded in the current run
|
||||||
- This is calculated by MD5 hash
|
- This is calculated by MD5 hash
|
||||||
- `--search-existing`
|
- `--search-existing`
|
||||||
- This will make the BDFR compile the hashes for every file in `directory` and store them to remove duplicates if `--no-dupes` is also supplied
|
- This will make the BDFR compile the hashes for every file in `directory`
|
||||||
|
- The hashes are used to remove duplicates if `--no-dupes` is supplied or make hard links if `--make-hard-links` is supplied
|
||||||
- `--file-scheme`
|
- `--file-scheme`
|
||||||
- Sets the scheme for files
|
- Sets the scheme for files
|
||||||
- Default is `{REDDITOR}_{TITLE}_{POSTID}`
|
- Default is `{REDDITOR}_{TITLE}_{POSTID}`
|
||||||
- See [Folder and File Name Schemes](#folder-and-file-name-schemes) for more details
|
- See [Folder and File Name Schemes](#folder-and-file-name-schemes) for more details
|
||||||
- `--folder-scheme`
|
- `--folder-scheme`
|
||||||
- Sets the scheme for folders
|
- Sets the scheme for folders
|
||||||
- Default is `{SUBREDDIT}`
|
- Default is `{SUBREDDIT}`
|
||||||
- See [Folder and File Name Schemes](#folder-and-file-name-schemes) for more details
|
- See [Folder and File Name Schemes](#folder-and-file-name-schemes) for more details
|
||||||
- `--exclude-id`
|
- `--exclude-id`
|
||||||
- This will skip the download of any submission with the ID provided
|
- This will skip the download of any submission with the ID provided
|
||||||
- Can be specified multiple times
|
- Can be specified multiple times
|
||||||
- `--exclude-id-file`
|
- `--exclude-id-file`
|
||||||
- This will skip the download of any submission with any of the IDs in the files provided
|
- This will skip the download of any submission with any of the IDs in the files provided
|
||||||
- Can be specified multiple times
|
- Can be specified multiple times
|
||||||
- Format is one ID per line
|
- Format is one ID per line
|
||||||
- `--skip-domain`
|
- `--skip-domain`
|
||||||
- This adds domains to the download filter i.e. submissions coming from these domains will not be downloaded
|
- This adds domains to the download filter i.e. submissions coming from these domains will not be downloaded
|
||||||
- Can be specified multiple times
|
- Can be specified multiple times
|
||||||
- Domains must be supplied in the form `example.com` or `img.example.com`
|
- Domains must be supplied in the form `example.com` or `img.example.com`
|
||||||
- `--skip`
|
- `--skip`
|
||||||
- This adds file types to the download filter i.e. submissions with one of the supplied file extensions will not be downloaded
|
- This adds file types to the download filter i.e. submissions with one of the supplied file extensions will not be downloaded
|
||||||
- Can be specified multiple times
|
- Can be specified multiple times
|
||||||
- `--skip-subreddit`
|
- `--skip-subreddit`
|
||||||
- This skips all submissions from the specified subreddit
|
- This skips all submissions from the specified subreddit
|
||||||
- Can be specified multiple times
|
- Can be specified multiple times
|
||||||
- Also accepts CSV subreddit names
|
- Also accepts CSV subreddit names
|
||||||
- `--min-score`
|
- `--min-score`
|
||||||
- This skips all submissions which have fewer than specified upvotes
|
- This skips all submissions which have fewer than specified upvotes
|
||||||
- `--max-score`
|
- `--max-score`
|
||||||
- This skips all submissions which have more than specified upvotes
|
- This skips all submissions which have more than specified upvotes
|
||||||
- `--min-score-ratio`
|
- `--min-score-ratio`
|
||||||
- This skips all submissions which have lower than specified upvote ratio
|
- This skips all submissions which have lower than specified upvote ratio
|
||||||
- `--max-score-ratio`
|
- `--max-score-ratio`
|
||||||
- This skips all submissions which have higher than specified upvote ratio
|
- This skips all submissions which have higher than specified upvote ratio
|
||||||
|
|
||||||
### Archiver Options
|
### Archiver Options
|
||||||
|
|
||||||
The following options are for the `archive` command specifically.
|
The following options are for the `archive` command specifically.
|
||||||
|
|
||||||
- `--all-comments`
|
- `--all-comments`
|
||||||
- When combined with the `--user` option, this will download all the user's comments
|
- When combined with the `--user` option, this will download all the user's comments
|
||||||
- `-f, --format`
|
- `-f, --format`
|
||||||
- This specifies the format of the data file saved to disk
|
- This specifies the format of the data file saved to disk
|
||||||
- The following formats are available:
|
- The following formats are available:
|
||||||
- `json` (default)
|
- `json` (default)
|
||||||
- `xml`
|
- `xml`
|
||||||
- `yaml`
|
- `yaml`
|
||||||
- `--comment-context`
|
- `--comment-context`
|
||||||
- This option will, instead of downloading an individual comment, download the submission that comment is a part of
|
- This option will, instead of downloading an individual comment, download the submission that comment is a part of
|
||||||
- May result in a longer run time as it retrieves much more data
|
- May result in a longer run time as it retrieves much more data
|
||||||
|
|
||||||
### Cloner Options
|
### Cloner Options
|
||||||
|
|
||||||
|
@ -359,6 +377,7 @@ The following keys are optional, and defaults will be used if they cannot be fou
|
||||||
- `max_wait_time`
|
- `max_wait_time`
|
||||||
- `time_format`
|
- `time_format`
|
||||||
- `disabled_modules`
|
- `disabled_modules`
|
||||||
|
- `filename-restriction-scheme`
|
||||||
|
|
||||||
All of these should not be modified unless you know what you're doing, as the default values will enable the BDFR to function just fine. A configuration is included in the BDFR when it is installed, and this will be placed in the configuration directory as the default.
|
All of these should not be modified unless you know what you're doing, as the default values will enable the BDFR to function just fine. A configuration is included in the BDFR when it is installed, and this will be placed in the configuration directory as the default.
|
||||||
|
|
||||||
|
@ -408,10 +427,22 @@ Running scenarios concurrently (at the same time) however, is more complicated.
|
||||||
|
|
||||||
The way to fix this is to use the `--log` option to manually specify where the logfile is to be stored. If the given location is unique to each instance of the BDFR, then it will run fine.
|
The way to fix this is to use the `--log` option to manually specify where the logfile is to be stored. If the given location is unique to each instance of the BDFR, then it will run fine.
|
||||||
|
|
||||||
|
## Filesystem Restrictions
|
||||||
|
|
||||||
|
Different filesystems have different restrictions for what files and directories can be named. Thesse are separated into two broad categories: Linux-based filesystems, which have very few restrictions; and Windows-based filesystems, which are much more restrictive in terms if forbidden characters and length of paths.
|
||||||
|
|
||||||
|
During the normal course of operation, the BDFR detects what filesystem it is running on and formats any filenames and directories to conform to the rules that are expected of it. However, there are cases where this will fail. When running on a Linux-based machine, or another system where the home filesystem is permissive, and accessing a share or drive with a less permissive system, the BDFR will assume that the *home* filesystem's rules apply. For example, when downloading to a SAMBA share from Ubuntu, there will be errors as SAMBA is more restrictive than Ubuntu.
|
||||||
|
|
||||||
|
The best option would be to always download to a filesystem that is as permission as possible, such as an NFS share or ext4 drive. However, when this is not possible, the BDFR allows for the restriction scheme to be manually specified at either the command-line or in the configuration file. At the command-line, this is done with `--filename-restriction-scheme windows`, or else an option by the same name in the configuration file.
|
||||||
|
|
||||||
## Manipulating Logfiles
|
## Manipulating Logfiles
|
||||||
|
|
||||||
The logfiles that the BDFR outputs are consistent and quite detailed and in a format that is amenable to regex. To this end, a number of bash scripts have been [included here](./scripts). They show examples for how to extract successfully downloaded IDs, failed IDs, and more besides.
|
The logfiles that the BDFR outputs are consistent and quite detailed and in a format that is amenable to regex. To this end, a number of bash scripts have been [included here](./scripts). They show examples for how to extract successfully downloaded IDs, failed IDs, and more besides.
|
||||||
|
|
||||||
|
## Unsaving posts
|
||||||
|
|
||||||
|
Back in v1 there was an option to unsave posts from your account when downloading, but it was removed from the core BDFR on v2 as it is considered a read-only tool. However, for those missing this functionality, a script was created that uses the log files to achieve this. There is info on how to use this on the README.md file on the scripts subdirectory.
|
||||||
|
|
||||||
## List of currently supported sources
|
## List of currently supported sources
|
||||||
|
|
||||||
- Direct links (links leading to a file)
|
- Direct links (links leading to a file)
|
||||||
|
@ -426,7 +457,7 @@ The logfiles that the BDFR outputs are consistent and quite detailed and in a fo
|
||||||
- Redgifs
|
- Redgifs
|
||||||
- Vidble
|
- Vidble
|
||||||
- YouTube
|
- YouTube
|
||||||
- Any source supported by [YT-DLP](https://github.com/yt-dlp/yt-dlp/blob/master/supportedsites.md) should be compatable
|
- Any source supported by [YT-DLP](https://github.com/yt-dlp/yt-dlp/blob/master/supportedsites.md) should be compatable
|
||||||
|
|
||||||
## Contributing
|
## Contributing
|
||||||
|
|
||||||
|
|
|
@ -0,0 +1,4 @@
|
||||||
|
#!/usr/bin/env python3
|
||||||
|
# -*- coding: utf-8 -*-
|
||||||
|
|
||||||
|
__version__ = "2.6.2"
|
|
@ -1,12 +1,16 @@
|
||||||
#!/usr/bin/env python3
|
#!/usr/bin/env python3
|
||||||
|
# -*- coding: utf-8 -*-
|
||||||
|
|
||||||
import logging
|
import logging
|
||||||
import sys
|
import sys
|
||||||
|
|
||||||
import click
|
import click
|
||||||
|
import requests
|
||||||
|
|
||||||
|
from bdfr import __version__
|
||||||
from bdfr.archiver import Archiver
|
from bdfr.archiver import Archiver
|
||||||
from bdfr.cloner import RedditCloner
|
from bdfr.cloner import RedditCloner
|
||||||
|
from bdfr.completion import Completion
|
||||||
from bdfr.configuration import Configuration
|
from bdfr.configuration import Configuration
|
||||||
from bdfr.downloader import RedditDownloader
|
from bdfr.downloader import RedditDownloader
|
||||||
|
|
||||||
|
@ -16,15 +20,16 @@ _common_options = [
|
||||||
click.argument("directory", type=str),
|
click.argument("directory", type=str),
|
||||||
click.option("--authenticate", is_flag=True, default=None),
|
click.option("--authenticate", is_flag=True, default=None),
|
||||||
click.option("--config", type=str, default=None),
|
click.option("--config", type=str, default=None),
|
||||||
click.option("--opts", type=str, default=None),
|
|
||||||
click.option("--disable-module", multiple=True, default=None, type=str),
|
click.option("--disable-module", multiple=True, default=None, type=str),
|
||||||
click.option("--exclude-id", default=None, multiple=True),
|
click.option("--exclude-id", default=None, multiple=True),
|
||||||
click.option("--exclude-id-file", default=None, multiple=True),
|
click.option("--exclude-id-file", default=None, multiple=True),
|
||||||
click.option("--file-scheme", default=None, type=str),
|
click.option("--file-scheme", default=None, type=str),
|
||||||
|
click.option("--filename-restriction-scheme", type=click.Choice(("linux", "windows")), default=None),
|
||||||
click.option("--folder-scheme", default=None, type=str),
|
click.option("--folder-scheme", default=None, type=str),
|
||||||
click.option("--ignore-user", type=str, multiple=True, default=None),
|
click.option("--ignore-user", type=str, multiple=True, default=None),
|
||||||
click.option("--include-id-file", multiple=True, default=None),
|
click.option("--include-id-file", multiple=True, default=None),
|
||||||
click.option("--log", type=str, default=None),
|
click.option("--log", type=str, default=None),
|
||||||
|
click.option("--opts", type=str, default=None),
|
||||||
click.option("--saved", is_flag=True, default=None),
|
click.option("--saved", is_flag=True, default=None),
|
||||||
click.option("--search", default=None, type=str),
|
click.option("--search", default=None, type=str),
|
||||||
click.option("--submitted", is_flag=True, default=None),
|
click.option("--submitted", is_flag=True, default=None),
|
||||||
|
@ -73,21 +78,43 @@ def _add_options(opts: list):
|
||||||
return wrap
|
return wrap
|
||||||
|
|
||||||
|
|
||||||
|
def _check_version(context, param, value):
|
||||||
|
if not value or context.resilient_parsing:
|
||||||
|
return
|
||||||
|
current = __version__
|
||||||
|
latest = requests.get("https://pypi.org/pypi/bdfr/json").json()["info"]["version"]
|
||||||
|
print(f"You are currently using v{current} the latest is v{latest}")
|
||||||
|
context.exit()
|
||||||
|
|
||||||
|
|
||||||
@click.group()
|
@click.group()
|
||||||
|
@click.help_option("-h", "--help")
|
||||||
|
@click.option(
|
||||||
|
"--version",
|
||||||
|
is_flag=True,
|
||||||
|
is_eager=True,
|
||||||
|
expose_value=False,
|
||||||
|
callback=_check_version,
|
||||||
|
help="Check version and exit.",
|
||||||
|
)
|
||||||
def cli():
|
def cli():
|
||||||
|
"""BDFR is used to download and archive content from Reddit."""
|
||||||
pass
|
pass
|
||||||
|
|
||||||
|
|
||||||
@cli.command("download")
|
@cli.command("download")
|
||||||
@_add_options(_common_options)
|
@_add_options(_common_options)
|
||||||
@_add_options(_downloader_options)
|
@_add_options(_downloader_options)
|
||||||
|
@click.help_option("-h", "--help")
|
||||||
@click.pass_context
|
@click.pass_context
|
||||||
def cli_download(context: click.Context, **_):
|
def cli_download(context: click.Context, **_):
|
||||||
|
"""Used to download content posted to Reddit."""
|
||||||
config = Configuration()
|
config = Configuration()
|
||||||
config.process_click_arguments(context)
|
config.process_click_arguments(context)
|
||||||
setup_logging(config.verbose)
|
silence_module_loggers()
|
||||||
|
stream = make_console_logging_handler(config.verbose)
|
||||||
try:
|
try:
|
||||||
reddit_downloader = RedditDownloader(config)
|
reddit_downloader = RedditDownloader(config, [stream])
|
||||||
reddit_downloader.download()
|
reddit_downloader.download()
|
||||||
except Exception:
|
except Exception:
|
||||||
logger.exception("Downloader exited unexpectedly")
|
logger.exception("Downloader exited unexpectedly")
|
||||||
|
@ -99,13 +126,16 @@ def cli_download(context: click.Context, **_):
|
||||||
@cli.command("archive")
|
@cli.command("archive")
|
||||||
@_add_options(_common_options)
|
@_add_options(_common_options)
|
||||||
@_add_options(_archiver_options)
|
@_add_options(_archiver_options)
|
||||||
|
@click.help_option("-h", "--help")
|
||||||
@click.pass_context
|
@click.pass_context
|
||||||
def cli_archive(context: click.Context, **_):
|
def cli_archive(context: click.Context, **_):
|
||||||
|
"""Used to archive post data from Reddit."""
|
||||||
config = Configuration()
|
config = Configuration()
|
||||||
config.process_click_arguments(context)
|
config.process_click_arguments(context)
|
||||||
setup_logging(config.verbose)
|
silence_module_loggers()
|
||||||
|
stream = make_console_logging_handler(config.verbose)
|
||||||
try:
|
try:
|
||||||
reddit_archiver = Archiver(config)
|
reddit_archiver = Archiver(config, [stream])
|
||||||
reddit_archiver.download()
|
reddit_archiver.download()
|
||||||
except Exception:
|
except Exception:
|
||||||
logger.exception("Archiver exited unexpectedly")
|
logger.exception("Archiver exited unexpectedly")
|
||||||
|
@ -118,13 +148,16 @@ def cli_archive(context: click.Context, **_):
|
||||||
@_add_options(_common_options)
|
@_add_options(_common_options)
|
||||||
@_add_options(_archiver_options)
|
@_add_options(_archiver_options)
|
||||||
@_add_options(_downloader_options)
|
@_add_options(_downloader_options)
|
||||||
|
@click.help_option("-h", "--help")
|
||||||
@click.pass_context
|
@click.pass_context
|
||||||
def cli_clone(context: click.Context, **_):
|
def cli_clone(context: click.Context, **_):
|
||||||
|
"""Combines archive and download commands."""
|
||||||
config = Configuration()
|
config = Configuration()
|
||||||
config.process_click_arguments(context)
|
config.process_click_arguments(context)
|
||||||
setup_logging(config.verbose)
|
silence_module_loggers()
|
||||||
|
stream = make_console_logging_handler(config.verbose)
|
||||||
try:
|
try:
|
||||||
reddit_scraper = RedditCloner(config)
|
reddit_scraper = RedditCloner(config, [stream])
|
||||||
reddit_scraper.download()
|
reddit_scraper.download()
|
||||||
except Exception:
|
except Exception:
|
||||||
logger.exception("Scraper exited unexpectedly")
|
logger.exception("Scraper exited unexpectedly")
|
||||||
|
@ -133,7 +166,31 @@ def cli_clone(context: click.Context, **_):
|
||||||
logger.info("Program complete")
|
logger.info("Program complete")
|
||||||
|
|
||||||
|
|
||||||
def setup_logging(verbosity: int):
|
@cli.command("completion")
|
||||||
|
@click.argument("shell", type=click.Choice(("all", "bash", "fish", "zsh"), case_sensitive=False), default="all")
|
||||||
|
@click.help_option("-h", "--help")
|
||||||
|
@click.option("-u", "--uninstall", is_flag=True, default=False, help="Uninstall completion")
|
||||||
|
def cli_completion(shell: str, uninstall: bool):
|
||||||
|
"""\b
|
||||||
|
Installs shell completions for BDFR.
|
||||||
|
Options: all, bash, fish, zsh
|
||||||
|
Default: all"""
|
||||||
|
shell = shell.lower()
|
||||||
|
if sys.platform == "win32":
|
||||||
|
print("Completions are not currently supported on Windows.")
|
||||||
|
return
|
||||||
|
if uninstall and click.confirm(f"Would you like to uninstall {shell} completions for BDFR"):
|
||||||
|
Completion(shell).uninstall()
|
||||||
|
return
|
||||||
|
if shell not in ("all", "bash", "fish", "zsh"):
|
||||||
|
print(f"{shell} is not a valid option.")
|
||||||
|
print("Options: all, bash, fish, zsh")
|
||||||
|
return
|
||||||
|
if click.confirm(f"Would you like to install {shell} completions for BDFR"):
|
||||||
|
Completion(shell).install()
|
||||||
|
|
||||||
|
|
||||||
|
def make_console_logging_handler(verbosity: int) -> logging.StreamHandler:
|
||||||
class StreamExceptionFilter(logging.Filter):
|
class StreamExceptionFilter(logging.Filter):
|
||||||
def filter(self, record: logging.LogRecord) -> bool:
|
def filter(self, record: logging.LogRecord) -> bool:
|
||||||
result = not (record.levelno == logging.ERROR and record.exc_info)
|
result = not (record.levelno == logging.ERROR and record.exc_info)
|
||||||
|
@ -146,13 +203,16 @@ def setup_logging(verbosity: int):
|
||||||
formatter = logging.Formatter("[%(asctime)s - %(name)s - %(levelname)s] - %(message)s")
|
formatter = logging.Formatter("[%(asctime)s - %(name)s - %(levelname)s] - %(message)s")
|
||||||
stream.setFormatter(formatter)
|
stream.setFormatter(formatter)
|
||||||
|
|
||||||
logger.addHandler(stream)
|
|
||||||
if verbosity <= 0:
|
if verbosity <= 0:
|
||||||
stream.setLevel(logging.INFO)
|
stream.setLevel(logging.INFO)
|
||||||
elif verbosity == 1:
|
elif verbosity == 1:
|
||||||
stream.setLevel(logging.DEBUG)
|
stream.setLevel(logging.DEBUG)
|
||||||
else:
|
else:
|
||||||
stream.setLevel(9)
|
stream.setLevel(9)
|
||||||
|
return stream
|
||||||
|
|
||||||
|
|
||||||
|
def silence_module_loggers():
|
||||||
logging.getLogger("praw").setLevel(logging.CRITICAL)
|
logging.getLogger("praw").setLevel(logging.CRITICAL)
|
||||||
logging.getLogger("prawcore").setLevel(logging.CRITICAL)
|
logging.getLogger("prawcore").setLevel(logging.CRITICAL)
|
||||||
logging.getLogger("urllib3").setLevel(logging.CRITICAL)
|
logging.getLogger("urllib3").setLevel(logging.CRITICAL)
|
||||||
|
|
|
@ -1,2 +1,2 @@
|
||||||
#!/usr/bin/env python3
|
#!/usr/bin/env python3
|
||||||
# coding=utf-8
|
# -*- coding: utf-8 -*-
|
||||||
|
|
|
@ -1,5 +1,5 @@
|
||||||
#!/usr/bin/env python3
|
#!/usr/bin/env python3
|
||||||
# coding=utf-8
|
# -*- coding: utf-8 -*-
|
||||||
|
|
||||||
from abc import ABC, abstractmethod
|
from abc import ABC, abstractmethod
|
||||||
from typing import Union
|
from typing import Union
|
||||||
|
|
|
@ -1,5 +1,5 @@
|
||||||
#!/usr/bin/env python3
|
#!/usr/bin/env python3
|
||||||
# coding=utf-8
|
# -*- coding: utf-8 -*-
|
||||||
|
|
||||||
import logging
|
import logging
|
||||||
|
|
||||||
|
|
|
@ -1,5 +1,5 @@
|
||||||
#!/usr/bin/env python3
|
#!/usr/bin/env python3
|
||||||
# coding=utf-8
|
# -*- coding: utf-8 -*-
|
||||||
|
|
||||||
import logging
|
import logging
|
||||||
|
|
||||||
|
|
|
@ -1,10 +1,13 @@
|
||||||
#!/usr/bin/env python3
|
#!/usr/bin/env python3
|
||||||
# coding=utf-8
|
# -*- coding: utf-8 -*-
|
||||||
|
|
||||||
import json
|
import json
|
||||||
import logging
|
import logging
|
||||||
import re
|
import re
|
||||||
from typing import Iterator, Union
|
from collections.abc import Iterable, Iterator
|
||||||
|
from pathlib import Path
|
||||||
|
from time import sleep
|
||||||
|
from typing import Union
|
||||||
|
|
||||||
import dict2xml
|
import dict2xml
|
||||||
import praw.models
|
import praw.models
|
||||||
|
@ -23,28 +26,33 @@ logger = logging.getLogger(__name__)
|
||||||
|
|
||||||
|
|
||||||
class Archiver(RedditConnector):
|
class Archiver(RedditConnector):
|
||||||
def __init__(self, args: Configuration):
|
def __init__(self, args: Configuration, logging_handlers: Iterable[logging.Handler] = ()):
|
||||||
super(Archiver, self).__init__(args)
|
super(Archiver, self).__init__(args, logging_handlers)
|
||||||
|
|
||||||
def download(self):
|
def download(self):
|
||||||
for generator in self.reddit_lists:
|
for generator in self.reddit_lists:
|
||||||
for submission in generator:
|
try:
|
||||||
try:
|
for submission in generator:
|
||||||
if (submission.author and submission.author.name in self.args.ignore_user) or (
|
try:
|
||||||
submission.author is None and "DELETED" in self.args.ignore_user
|
if (submission.author and submission.author.name in self.args.ignore_user) or (
|
||||||
):
|
submission.author is None and "DELETED" in self.args.ignore_user
|
||||||
logger.debug(
|
):
|
||||||
f"Submission {submission.id} in {submission.subreddit.display_name} skipped"
|
logger.debug(
|
||||||
f' due to {submission.author.name if submission.author else "DELETED"} being an ignored user'
|
f"Submission {submission.id} in {submission.subreddit.display_name} skipped due to"
|
||||||
)
|
f" {submission.author.name if submission.author else 'DELETED'} being an ignored user"
|
||||||
continue
|
)
|
||||||
if submission.id in self.excluded_submission_ids:
|
continue
|
||||||
logger.debug(f"Object {submission.id} in exclusion list, skipping")
|
if submission.id in self.excluded_submission_ids:
|
||||||
continue
|
logger.debug(f"Object {submission.id} in exclusion list, skipping")
|
||||||
logger.debug(f"Attempting to archive submission {submission.id}")
|
continue
|
||||||
self.write_entry(submission)
|
logger.debug(f"Attempting to archive submission {submission.id}")
|
||||||
except prawcore.PrawcoreException as e:
|
self.write_entry(submission)
|
||||||
logger.error(f"Submission {submission.id} failed to be archived due to a PRAW exception: {e}")
|
except prawcore.PrawcoreException as e:
|
||||||
|
logger.error(f"Submission {submission.id} failed to be archived due to a PRAW exception: {e}")
|
||||||
|
except prawcore.PrawcoreException as e:
|
||||||
|
logger.error(f"The submission after {submission.id} failed to download due to a PRAW exception: {e}")
|
||||||
|
logger.debug("Waiting 60 seconds to continue")
|
||||||
|
sleep(60)
|
||||||
|
|
||||||
def get_submissions_from_link(self) -> list[list[praw.models.Submission]]:
|
def get_submissions_from_link(self) -> list[list[praw.models.Submission]]:
|
||||||
supplied_submissions = []
|
supplied_submissions = []
|
||||||
|
@ -102,13 +110,13 @@ class Archiver(RedditConnector):
|
||||||
|
|
||||||
def _write_entry_yaml(self, entry: BaseArchiveEntry):
|
def _write_entry_yaml(self, entry: BaseArchiveEntry):
|
||||||
resource = Resource(entry.source, "", lambda: None, ".yaml")
|
resource = Resource(entry.source, "", lambda: None, ".yaml")
|
||||||
content = yaml.dump(entry.compile())
|
content = yaml.safe_dump(entry.compile())
|
||||||
self._write_content_to_disk(resource, content)
|
self._write_content_to_disk(resource, content)
|
||||||
|
|
||||||
def _write_content_to_disk(self, resource: Resource, content: str):
|
def _write_content_to_disk(self, resource: Resource, content: str):
|
||||||
file_path = self.file_name_formatter.format_path(resource, self.download_directory)
|
file_path = self.file_name_formatter.format_path(resource, self.download_directory)
|
||||||
file_path.parent.mkdir(exist_ok=True, parents=True)
|
file_path.parent.mkdir(exist_ok=True, parents=True)
|
||||||
with open(file_path, "w", encoding="utf-8") as file:
|
with Path(file_path).open(mode="w", encoding="utf-8") as file:
|
||||||
logger.debug(
|
logger.debug(
|
||||||
f"Writing entry {resource.source_submission.id} to file in {resource.extension[1:].upper()}"
|
f"Writing entry {resource.source_submission.id} to file in {resource.extension[1:].upper()}"
|
||||||
f" format at {file_path}"
|
f" format at {file_path}"
|
||||||
|
|
|
@ -1,7 +1,9 @@
|
||||||
#!/usr/bin/env python3
|
#!/usr/bin/env python3
|
||||||
# coding=utf-8
|
# -*- coding: utf-8 -*-
|
||||||
|
|
||||||
import logging
|
import logging
|
||||||
|
from collections.abc import Iterable
|
||||||
|
from time import sleep
|
||||||
|
|
||||||
import prawcore
|
import prawcore
|
||||||
|
|
||||||
|
@ -13,14 +15,19 @@ logger = logging.getLogger(__name__)
|
||||||
|
|
||||||
|
|
||||||
class RedditCloner(RedditDownloader, Archiver):
|
class RedditCloner(RedditDownloader, Archiver):
|
||||||
def __init__(self, args: Configuration):
|
def __init__(self, args: Configuration, logging_handlers: Iterable[logging.Handler] = ()):
|
||||||
super(RedditCloner, self).__init__(args)
|
super(RedditCloner, self).__init__(args, logging_handlers)
|
||||||
|
|
||||||
def download(self):
|
def download(self):
|
||||||
for generator in self.reddit_lists:
|
for generator in self.reddit_lists:
|
||||||
for submission in generator:
|
try:
|
||||||
try:
|
for submission in generator:
|
||||||
self._download_submission(submission)
|
try:
|
||||||
self.write_entry(submission)
|
self._download_submission(submission)
|
||||||
except prawcore.PrawcoreException as e:
|
self.write_entry(submission)
|
||||||
logger.error(f"Submission {submission.id} failed to be cloned due to a PRAW exception: {e}")
|
except prawcore.PrawcoreException as e:
|
||||||
|
logger.error(f"Submission {submission.id} failed to be cloned due to a PRAW exception: {e}")
|
||||||
|
except prawcore.PrawcoreException as e:
|
||||||
|
logger.error(f"The submission after {submission.id} failed to download due to a PRAW exception: {e}")
|
||||||
|
logger.debug("Waiting 60 seconds to continue")
|
||||||
|
sleep(60)
|
||||||
|
|
68
bdfr/completion.py
Normal file
68
bdfr/completion.py
Normal file
|
@ -0,0 +1,68 @@
|
||||||
|
#!/usr/bin/env python3
|
||||||
|
# -*- coding: utf-8 -*-
|
||||||
|
|
||||||
|
import subprocess
|
||||||
|
from os import environ
|
||||||
|
from pathlib import Path
|
||||||
|
|
||||||
|
import appdirs
|
||||||
|
|
||||||
|
|
||||||
|
class Completion:
|
||||||
|
def __init__(self, shell: str):
|
||||||
|
self.shell = shell
|
||||||
|
self.env = environ.copy()
|
||||||
|
self.share_dir = appdirs.user_data_dir()
|
||||||
|
self.entry_points = ["bdfr", "bdfr-archive", "bdfr-clone", "bdfr-download"]
|
||||||
|
|
||||||
|
def install(self):
|
||||||
|
if self.shell in ("all", "bash"):
|
||||||
|
comp_dir = self.share_dir + "/bash-completion/completions/"
|
||||||
|
if not Path(comp_dir).exists():
|
||||||
|
print("Creating Bash completion directory.")
|
||||||
|
Path(comp_dir).mkdir(parents=True, exist_ok=True)
|
||||||
|
for point in self.entry_points:
|
||||||
|
self.env[f"_{point.upper().replace('-', '_')}_COMPLETE"] = "bash_source"
|
||||||
|
with Path(comp_dir + point).open(mode="w") as file:
|
||||||
|
file.write(subprocess.run([point], env=self.env, capture_output=True, text=True).stdout)
|
||||||
|
print(f"Bash completion for {point} written to {comp_dir}{point}")
|
||||||
|
if self.shell in ("all", "fish"):
|
||||||
|
comp_dir = self.share_dir + "/fish/vendor_completions.d/"
|
||||||
|
if not Path(comp_dir).exists():
|
||||||
|
print("Creating Fish completion directory.")
|
||||||
|
Path(comp_dir).mkdir(parents=True, exist_ok=True)
|
||||||
|
for point in self.entry_points:
|
||||||
|
self.env[f"_{point.upper().replace('-', '_')}_COMPLETE"] = "fish_source"
|
||||||
|
with Path(comp_dir + point + ".fish").open(mode="w") as file:
|
||||||
|
file.write(subprocess.run([point], env=self.env, capture_output=True, text=True).stdout)
|
||||||
|
print(f"Fish completion for {point} written to {comp_dir}{point}.fish")
|
||||||
|
if self.shell in ("all", "zsh"):
|
||||||
|
comp_dir = self.share_dir + "/zsh/site-functions/"
|
||||||
|
if not Path(comp_dir).exists():
|
||||||
|
print("Creating Zsh completion directory.")
|
||||||
|
Path(comp_dir).mkdir(parents=True, exist_ok=True)
|
||||||
|
for point in self.entry_points:
|
||||||
|
self.env[f"_{point.upper().replace('-', '_')}_COMPLETE"] = "zsh_source"
|
||||||
|
with Path(comp_dir + "_" + point).open(mode="w") as file:
|
||||||
|
file.write(subprocess.run([point], env=self.env, capture_output=True, text=True).stdout)
|
||||||
|
print(f"Zsh completion for {point} written to {comp_dir}_{point}")
|
||||||
|
|
||||||
|
def uninstall(self):
|
||||||
|
if self.shell in ("all", "bash"):
|
||||||
|
comp_dir = self.share_dir + "/bash-completion/completions/"
|
||||||
|
for point in self.entry_points:
|
||||||
|
if Path(comp_dir + point).exists():
|
||||||
|
Path(comp_dir + point).unlink()
|
||||||
|
print(f"Bash completion for {point} removed from {comp_dir}{point}")
|
||||||
|
if self.shell in ("all", "fish"):
|
||||||
|
comp_dir = self.share_dir + "/fish/vendor_completions.d/"
|
||||||
|
for point in self.entry_points:
|
||||||
|
if Path(comp_dir + point + ".fish").exists():
|
||||||
|
Path(comp_dir + point + ".fish").unlink()
|
||||||
|
print(f"Fish completion for {point} removed from {comp_dir}{point}.fish")
|
||||||
|
if self.shell in ("all", "zsh"):
|
||||||
|
comp_dir = self.share_dir + "/zsh/site-functions/"
|
||||||
|
for point in self.entry_points:
|
||||||
|
if Path(comp_dir + "_" + point).exists():
|
||||||
|
Path(comp_dir + "_" + point).unlink()
|
||||||
|
print(f"Zsh completion for {point} removed from {comp_dir}_{point}")
|
|
@ -1,5 +1,5 @@
|
||||||
#!/usr/bin/env python3
|
#!/usr/bin/env python3
|
||||||
# coding=utf-8
|
# -*- coding: utf-8 -*-
|
||||||
|
|
||||||
import logging
|
import logging
|
||||||
from argparse import Namespace
|
from argparse import Namespace
|
||||||
|
@ -23,6 +23,7 @@ class Configuration(Namespace):
|
||||||
self.exclude_id = []
|
self.exclude_id = []
|
||||||
self.exclude_id_file = []
|
self.exclude_id_file = []
|
||||||
self.file_scheme: str = "{REDDITOR}_{TITLE}_{POSTID}"
|
self.file_scheme: str = "{REDDITOR}_{TITLE}_{POSTID}"
|
||||||
|
self.filename_restriction_scheme = None
|
||||||
self.folder_scheme: str = "{SUBREDDIT}"
|
self.folder_scheme: str = "{SUBREDDIT}"
|
||||||
self.ignore_user = []
|
self.ignore_user = []
|
||||||
self.include_id_file = []
|
self.include_id_file = []
|
||||||
|
@ -78,7 +79,7 @@ class Configuration(Namespace):
|
||||||
return
|
return
|
||||||
with yaml_file_loc.open() as file:
|
with yaml_file_loc.open() as file:
|
||||||
try:
|
try:
|
||||||
opts = yaml.load(file, Loader=yaml.FullLoader)
|
opts = yaml.safe_load(file)
|
||||||
except yaml.YAMLError as e:
|
except yaml.YAMLError as e:
|
||||||
logger.error(f"Could not parse YAML options file: {e}")
|
logger.error(f"Could not parse YAML options file: {e}")
|
||||||
return
|
return
|
||||||
|
|
|
@ -1,5 +1,5 @@
|
||||||
#!/usr/bin/env python3
|
#!/usr/bin/env python3
|
||||||
# coding=utf-8
|
# -*- coding: utf-8 -*-
|
||||||
|
|
||||||
import configparser
|
import configparser
|
||||||
import importlib.resources
|
import importlib.resources
|
||||||
|
@ -10,10 +10,11 @@ import re
|
||||||
import shutil
|
import shutil
|
||||||
import socket
|
import socket
|
||||||
from abc import ABCMeta, abstractmethod
|
from abc import ABCMeta, abstractmethod
|
||||||
|
from collections.abc import Callable, Iterable, Iterator
|
||||||
from datetime import datetime
|
from datetime import datetime
|
||||||
from enum import Enum, auto
|
from enum import Enum, auto
|
||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
from typing import Callable, Iterator
|
from time import sleep
|
||||||
|
|
||||||
import appdirs
|
import appdirs
|
||||||
import praw
|
import praw
|
||||||
|
@ -50,20 +51,20 @@ class RedditTypes:
|
||||||
|
|
||||||
|
|
||||||
class RedditConnector(metaclass=ABCMeta):
|
class RedditConnector(metaclass=ABCMeta):
|
||||||
def __init__(self, args: Configuration):
|
def __init__(self, args: Configuration, logging_handlers: Iterable[logging.Handler] = ()):
|
||||||
self.args = args
|
self.args = args
|
||||||
self.config_directories = appdirs.AppDirs("bdfr", "BDFR")
|
self.config_directories = appdirs.AppDirs("bdfr", "BDFR")
|
||||||
|
self.determine_directories()
|
||||||
|
self.load_config()
|
||||||
|
self.read_config()
|
||||||
|
file_log = self.create_file_logger()
|
||||||
|
self._apply_logging_handlers(itertools.chain(logging_handlers, [file_log]))
|
||||||
self.run_time = datetime.now().isoformat()
|
self.run_time = datetime.now().isoformat()
|
||||||
self._setup_internal_objects()
|
self._setup_internal_objects()
|
||||||
|
|
||||||
self.reddit_lists = self.retrieve_reddit_lists()
|
self.reddit_lists = self.retrieve_reddit_lists()
|
||||||
|
|
||||||
def _setup_internal_objects(self):
|
def _setup_internal_objects(self):
|
||||||
self.determine_directories()
|
|
||||||
self.load_config()
|
|
||||||
self.create_file_logger()
|
|
||||||
|
|
||||||
self.read_config()
|
|
||||||
|
|
||||||
self.parse_disabled_modules()
|
self.parse_disabled_modules()
|
||||||
|
|
||||||
|
@ -93,6 +94,12 @@ class RedditConnector(metaclass=ABCMeta):
|
||||||
self.args.skip_subreddit = self.split_args_input(self.args.skip_subreddit)
|
self.args.skip_subreddit = self.split_args_input(self.args.skip_subreddit)
|
||||||
self.args.skip_subreddit = {sub.lower() for sub in self.args.skip_subreddit}
|
self.args.skip_subreddit = {sub.lower() for sub in self.args.skip_subreddit}
|
||||||
|
|
||||||
|
@staticmethod
|
||||||
|
def _apply_logging_handlers(handlers: Iterable[logging.Handler]):
|
||||||
|
main_logger = logging.getLogger()
|
||||||
|
for handler in handlers:
|
||||||
|
main_logger.addHandler(handler)
|
||||||
|
|
||||||
def read_config(self):
|
def read_config(self):
|
||||||
"""Read any cfg values that need to be processed"""
|
"""Read any cfg values that need to be processed"""
|
||||||
if self.args.max_wait_time is None:
|
if self.args.max_wait_time is None:
|
||||||
|
@ -106,8 +113,13 @@ class RedditConnector(metaclass=ABCMeta):
|
||||||
self.args.time_format = option
|
self.args.time_format = option
|
||||||
if not self.args.disable_module:
|
if not self.args.disable_module:
|
||||||
self.args.disable_module = [self.cfg_parser.get("DEFAULT", "disabled_modules", fallback="")]
|
self.args.disable_module = [self.cfg_parser.get("DEFAULT", "disabled_modules", fallback="")]
|
||||||
|
if not self.args.filename_restriction_scheme:
|
||||||
|
self.args.filename_restriction_scheme = self.cfg_parser.get(
|
||||||
|
"DEFAULT", "filename_restriction_scheme", fallback=None
|
||||||
|
)
|
||||||
|
logger.debug(f"Setting filename restriction scheme to '{self.args.filename_restriction_scheme}'")
|
||||||
# Update config on disk
|
# Update config on disk
|
||||||
with open(self.config_location, "w") as file:
|
with Path(self.config_location).open(mode="w") as file:
|
||||||
self.cfg_parser.write(file)
|
self.cfg_parser.write(file)
|
||||||
|
|
||||||
def parse_disabled_modules(self):
|
def parse_disabled_modules(self):
|
||||||
|
@ -131,7 +143,7 @@ class RedditConnector(metaclass=ABCMeta):
|
||||||
)
|
)
|
||||||
token = oauth2_authenticator.retrieve_new_token()
|
token = oauth2_authenticator.retrieve_new_token()
|
||||||
self.cfg_parser["DEFAULT"]["user_token"] = token
|
self.cfg_parser["DEFAULT"]["user_token"] = token
|
||||||
with open(self.config_location, "w") as file:
|
with Path(self.config_location).open(mode="w") as file:
|
||||||
self.cfg_parser.write(file, True)
|
self.cfg_parser.write(file, True)
|
||||||
token_manager = OAuth2TokenManager(self.cfg_parser, self.config_location)
|
token_manager = OAuth2TokenManager(self.cfg_parser, self.config_location)
|
||||||
|
|
||||||
|
@ -197,14 +209,13 @@ class RedditConnector(metaclass=ABCMeta):
|
||||||
raise errors.BulkDownloaderException("Could not find a configuration file to load")
|
raise errors.BulkDownloaderException("Could not find a configuration file to load")
|
||||||
self.cfg_parser.read(self.config_location)
|
self.cfg_parser.read(self.config_location)
|
||||||
|
|
||||||
def create_file_logger(self):
|
def create_file_logger(self) -> logging.handlers.RotatingFileHandler:
|
||||||
main_logger = logging.getLogger()
|
|
||||||
if self.args.log is None:
|
if self.args.log is None:
|
||||||
log_path = Path(self.config_directory, "log_output.txt")
|
log_path = Path(self.config_directory, "log_output.txt")
|
||||||
else:
|
else:
|
||||||
log_path = Path(self.args.log).resolve().expanduser()
|
log_path = Path(self.args.log).resolve().expanduser()
|
||||||
if not log_path.parent.exists():
|
if not log_path.parent.exists():
|
||||||
raise errors.BulkDownloaderException(f"Designated location for logfile does not exist")
|
raise errors.BulkDownloaderException("Designated location for logfile does not exist")
|
||||||
backup_count = self.cfg_parser.getint("DEFAULT", "backup_log_count", fallback=3)
|
backup_count = self.cfg_parser.getint("DEFAULT", "backup_log_count", fallback=3)
|
||||||
file_handler = logging.handlers.RotatingFileHandler(
|
file_handler = logging.handlers.RotatingFileHandler(
|
||||||
log_path,
|
log_path,
|
||||||
|
@ -223,8 +234,7 @@ class RedditConnector(metaclass=ABCMeta):
|
||||||
formatter = logging.Formatter("[%(asctime)s - %(name)s - %(levelname)s] - %(message)s")
|
formatter = logging.Formatter("[%(asctime)s - %(name)s - %(levelname)s] - %(message)s")
|
||||||
file_handler.setFormatter(formatter)
|
file_handler.setFormatter(formatter)
|
||||||
file_handler.setLevel(0)
|
file_handler.setLevel(0)
|
||||||
|
return file_handler
|
||||||
main_logger.addHandler(file_handler)
|
|
||||||
|
|
||||||
@staticmethod
|
@staticmethod
|
||||||
def sanitise_subreddit_name(subreddit: str) -> str:
|
def sanitise_subreddit_name(subreddit: str) -> str:
|
||||||
|
@ -300,7 +310,7 @@ class RedditConnector(metaclass=ABCMeta):
|
||||||
def get_submissions_from_link(self) -> list[list[praw.models.Submission]]:
|
def get_submissions_from_link(self) -> list[list[praw.models.Submission]]:
|
||||||
supplied_submissions = []
|
supplied_submissions = []
|
||||||
for sub_id in self.args.link:
|
for sub_id in self.args.link:
|
||||||
if len(sub_id) == 6:
|
if len(sub_id) in (6, 7):
|
||||||
supplied_submissions.append(self.reddit_instance.submission(id=sub_id))
|
supplied_submissions.append(self.reddit_instance.submission(id=sub_id))
|
||||||
else:
|
else:
|
||||||
supplied_submissions.append(self.reddit_instance.submission(url=sub_id))
|
supplied_submissions.append(self.reddit_instance.submission(url=sub_id))
|
||||||
|
@ -322,7 +332,7 @@ class RedditConnector(metaclass=ABCMeta):
|
||||||
def get_multireddits(self) -> list[Iterator]:
|
def get_multireddits(self) -> list[Iterator]:
|
||||||
if self.args.multireddit:
|
if self.args.multireddit:
|
||||||
if len(self.args.user) != 1:
|
if len(self.args.user) != 1:
|
||||||
logger.error(f"Only 1 user can be supplied when retrieving from multireddits")
|
logger.error("Only 1 user can be supplied when retrieving from multireddits")
|
||||||
return []
|
return []
|
||||||
out = []
|
out = []
|
||||||
for multi in self.split_args_input(self.args.multireddit):
|
for multi in self.split_args_input(self.args.multireddit):
|
||||||
|
@ -353,26 +363,31 @@ class RedditConnector(metaclass=ABCMeta):
|
||||||
generators = []
|
generators = []
|
||||||
for user in self.args.user:
|
for user in self.args.user:
|
||||||
try:
|
try:
|
||||||
self.check_user_existence(user)
|
try:
|
||||||
except errors.BulkDownloaderException as e:
|
self.check_user_existence(user)
|
||||||
logger.error(e)
|
except errors.BulkDownloaderException as e:
|
||||||
continue
|
logger.error(e)
|
||||||
if self.args.submitted:
|
continue
|
||||||
logger.debug(f"Retrieving submitted posts of user {self.args.user}")
|
if self.args.submitted:
|
||||||
generators.append(
|
logger.debug(f"Retrieving submitted posts of user {user}")
|
||||||
self.create_filtered_listing_generator(
|
generators.append(
|
||||||
self.reddit_instance.redditor(user).submissions,
|
self.create_filtered_listing_generator(
|
||||||
|
self.reddit_instance.redditor(user).submissions,
|
||||||
|
)
|
||||||
)
|
)
|
||||||
)
|
if not self.authenticated and any((self.args.upvoted, self.args.saved)):
|
||||||
if not self.authenticated and any((self.args.upvoted, self.args.saved)):
|
logger.warning("Accessing user lists requires authentication")
|
||||||
logger.warning("Accessing user lists requires authentication")
|
else:
|
||||||
else:
|
if self.args.upvoted:
|
||||||
if self.args.upvoted:
|
logger.debug(f"Retrieving upvoted posts of user {user}")
|
||||||
logger.debug(f"Retrieving upvoted posts of user {self.args.user}")
|
generators.append(self.reddit_instance.redditor(user).upvoted(limit=self.args.limit))
|
||||||
generators.append(self.reddit_instance.redditor(user).upvoted(limit=self.args.limit))
|
if self.args.saved:
|
||||||
if self.args.saved:
|
logger.debug(f"Retrieving saved posts of user {user}")
|
||||||
logger.debug(f"Retrieving saved posts of user {self.args.user}")
|
generators.append(self.reddit_instance.redditor(user).saved(limit=self.args.limit))
|
||||||
generators.append(self.reddit_instance.redditor(user).saved(limit=self.args.limit))
|
except prawcore.PrawcoreException as e:
|
||||||
|
logger.error(f"User {user} failed to be retrieved due to a PRAW exception: {e}")
|
||||||
|
logger.debug("Waiting 60 seconds to continue")
|
||||||
|
sleep(60)
|
||||||
return generators
|
return generators
|
||||||
else:
|
else:
|
||||||
return []
|
return []
|
||||||
|
@ -389,7 +404,9 @@ class RedditConnector(metaclass=ABCMeta):
|
||||||
raise errors.BulkDownloaderException(f"User {name} is banned")
|
raise errors.BulkDownloaderException(f"User {name} is banned")
|
||||||
|
|
||||||
def create_file_name_formatter(self) -> FileNameFormatter:
|
def create_file_name_formatter(self) -> FileNameFormatter:
|
||||||
return FileNameFormatter(self.args.file_scheme, self.args.folder_scheme, self.args.time_format)
|
return FileNameFormatter(
|
||||||
|
self.args.file_scheme, self.args.folder_scheme, self.args.time_format, self.args.filename_restriction_scheme
|
||||||
|
)
|
||||||
|
|
||||||
def create_time_filter(self) -> RedditTypes.TimeType:
|
def create_time_filter(self) -> RedditTypes.TimeType:
|
||||||
try:
|
try:
|
||||||
|
|
|
@ -1,5 +1,5 @@
|
||||||
#!/usr/bin/env python3
|
#!/usr/bin/env python3
|
||||||
# coding=utf-8
|
# -*- coding: utf-8 -*-
|
||||||
|
|
||||||
import logging
|
import logging
|
||||||
import re
|
import re
|
||||||
|
|
|
@ -1,13 +1,15 @@
|
||||||
#!/usr/bin/env python3
|
#!/usr/bin/env python3
|
||||||
# coding=utf-8
|
# -*- coding: utf-8 -*-
|
||||||
|
|
||||||
import hashlib
|
import hashlib
|
||||||
import logging.handlers
|
import logging.handlers
|
||||||
import os
|
import os
|
||||||
import time
|
import time
|
||||||
|
from collections.abc import Iterable
|
||||||
from datetime import datetime
|
from datetime import datetime
|
||||||
from multiprocessing import Pool
|
from multiprocessing import Pool
|
||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
|
from time import sleep
|
||||||
|
|
||||||
import praw
|
import praw
|
||||||
import praw.exceptions
|
import praw.exceptions
|
||||||
|
@ -35,18 +37,23 @@ def _calc_hash(existing_file: Path):
|
||||||
|
|
||||||
|
|
||||||
class RedditDownloader(RedditConnector):
|
class RedditDownloader(RedditConnector):
|
||||||
def __init__(self, args: Configuration):
|
def __init__(self, args: Configuration, logging_handlers: Iterable[logging.Handler] = ()):
|
||||||
super(RedditDownloader, self).__init__(args)
|
super(RedditDownloader, self).__init__(args, logging_handlers)
|
||||||
if self.args.search_existing:
|
if self.args.search_existing:
|
||||||
self.master_hash_list = self.scan_existing_files(self.download_directory)
|
self.master_hash_list = self.scan_existing_files(self.download_directory)
|
||||||
|
|
||||||
def download(self):
|
def download(self):
|
||||||
for generator in self.reddit_lists:
|
for generator in self.reddit_lists:
|
||||||
for submission in generator:
|
try:
|
||||||
try:
|
for submission in generator:
|
||||||
self._download_submission(submission)
|
try:
|
||||||
except prawcore.PrawcoreException as e:
|
self._download_submission(submission)
|
||||||
logger.error(f"Submission {submission.id} failed to download due to a PRAW exception: {e}")
|
except prawcore.PrawcoreException as e:
|
||||||
|
logger.error(f"Submission {submission.id} failed to download due to a PRAW exception: {e}")
|
||||||
|
except prawcore.PrawcoreException as e:
|
||||||
|
logger.error(f"The submission after {submission.id} failed to download due to a PRAW exception: {e}")
|
||||||
|
logger.debug("Waiting 60 seconds to continue")
|
||||||
|
sleep(60)
|
||||||
|
|
||||||
def _download_submission(self, submission: praw.models.Submission):
|
def _download_submission(self, submission: praw.models.Submission):
|
||||||
if submission.id in self.excluded_submission_ids:
|
if submission.id in self.excluded_submission_ids:
|
||||||
|
@ -149,7 +156,7 @@ class RedditDownloader(RedditConnector):
|
||||||
@staticmethod
|
@staticmethod
|
||||||
def scan_existing_files(directory: Path) -> dict[str, Path]:
|
def scan_existing_files(directory: Path) -> dict[str, Path]:
|
||||||
files = []
|
files = []
|
||||||
for (dirpath, dirnames, filenames) in os.walk(directory):
|
for (dirpath, _dirnames, filenames) in os.walk(directory):
|
||||||
files.extend([Path(dirpath, file) for file in filenames])
|
files.extend([Path(dirpath, file) for file in filenames])
|
||||||
logger.info(f"Calculating hashes for {len(files)} files")
|
logger.info(f"Calculating hashes for {len(files)} files")
|
||||||
|
|
||||||
|
|
|
@ -1,4 +1,5 @@
|
||||||
#!/usr/bin/env
|
#!/usr/bin/env python3
|
||||||
|
# -*- coding: utf-8 -*-
|
||||||
|
|
||||||
|
|
||||||
class BulkDownloaderException(Exception):
|
class BulkDownloaderException(Exception):
|
||||||
|
|
|
@ -1,5 +1,6 @@
|
||||||
#!/usr/bin/env python3
|
#!/usr/bin/env python3
|
||||||
# coding=utf-8
|
# -*- coding: utf-8 -*-
|
||||||
|
|
||||||
import datetime
|
import datetime
|
||||||
import logging
|
import logging
|
||||||
import platform
|
import platform
|
||||||
|
@ -26,13 +27,26 @@ class FileNameFormatter:
|
||||||
"title",
|
"title",
|
||||||
"upvotes",
|
"upvotes",
|
||||||
)
|
)
|
||||||
|
WINDOWS_MAX_PATH_LENGTH = 260
|
||||||
|
LINUX_MAX_PATH_LENGTH = 4096
|
||||||
|
|
||||||
def __init__(self, file_format_string: str, directory_format_string: str, time_format_string: str):
|
def __init__(
|
||||||
|
self,
|
||||||
|
file_format_string: str,
|
||||||
|
directory_format_string: str,
|
||||||
|
time_format_string: str,
|
||||||
|
restriction_scheme: Optional[str] = None,
|
||||||
|
):
|
||||||
if not self.validate_string(file_format_string):
|
if not self.validate_string(file_format_string):
|
||||||
raise BulkDownloaderException(f'"{file_format_string}" is not a valid format string')
|
raise BulkDownloaderException(f'"{file_format_string}" is not a valid format string')
|
||||||
self.file_format_string = file_format_string
|
self.file_format_string = file_format_string
|
||||||
self.directory_format_string: list[str] = directory_format_string.split("/")
|
self.directory_format_string: list[str] = directory_format_string.split("/")
|
||||||
self.time_format_string = time_format_string
|
self.time_format_string = time_format_string
|
||||||
|
self.restiction_scheme = restriction_scheme.lower().strip() if restriction_scheme else None
|
||||||
|
if self.restiction_scheme == "windows":
|
||||||
|
self.max_path = self.WINDOWS_MAX_PATH_LENGTH
|
||||||
|
else:
|
||||||
|
self.max_path = self.find_max_path_length()
|
||||||
|
|
||||||
def _format_name(self, submission: Union[Comment, Submission], format_string: str) -> str:
|
def _format_name(self, submission: Union[Comment, Submission], format_string: str) -> str:
|
||||||
if isinstance(submission, Submission):
|
if isinstance(submission, Submission):
|
||||||
|
@ -51,9 +65,12 @@ class FileNameFormatter:
|
||||||
|
|
||||||
result = result.replace("/", "")
|
result = result.replace("/", "")
|
||||||
|
|
||||||
if platform.system() == "Windows":
|
if self.restiction_scheme is None:
|
||||||
|
if platform.system() == "Windows":
|
||||||
|
result = FileNameFormatter._format_for_windows(result)
|
||||||
|
elif self.restiction_scheme == "windows":
|
||||||
|
logger.debug("Forcing Windows-compatible filenames")
|
||||||
result = FileNameFormatter._format_for_windows(result)
|
result = FileNameFormatter._format_for_windows(result)
|
||||||
|
|
||||||
return result
|
return result
|
||||||
|
|
||||||
@staticmethod
|
@staticmethod
|
||||||
|
@ -125,14 +142,13 @@ class FileNameFormatter:
|
||||||
raise BulkDownloaderException(f"Could not determine path name: {subfolder}, {index}, {resource.extension}")
|
raise BulkDownloaderException(f"Could not determine path name: {subfolder}, {index}, {resource.extension}")
|
||||||
return file_path
|
return file_path
|
||||||
|
|
||||||
@staticmethod
|
def limit_file_name_length(self, filename: str, ending: str, root: Path) -> Path:
|
||||||
def limit_file_name_length(filename: str, ending: str, root: Path) -> Path:
|
|
||||||
root = root.resolve().expanduser()
|
root = root.resolve().expanduser()
|
||||||
possible_id = re.search(r"((?:_\w{6})?$)", filename)
|
possible_id = re.search(r"((?:_\w{6})?$)", filename)
|
||||||
if possible_id:
|
if possible_id:
|
||||||
ending = possible_id.group(1) + ending
|
ending = possible_id.group(1) + ending
|
||||||
filename = filename[: possible_id.start()]
|
filename = filename[: possible_id.start()]
|
||||||
max_path = FileNameFormatter.find_max_path_length()
|
max_path = self.max_path
|
||||||
max_file_part_length_chars = 255 - len(ending)
|
max_file_part_length_chars = 255 - len(ending)
|
||||||
max_file_part_length_bytes = 255 - len(ending.encode("utf-8"))
|
max_file_part_length_bytes = 255 - len(ending.encode("utf-8"))
|
||||||
max_path_length = max_path - len(ending) - len(str(root)) - 1
|
max_path_length = max_path - len(ending) - len(str(root)) - 1
|
||||||
|
@ -156,9 +172,9 @@ class FileNameFormatter:
|
||||||
return int(subprocess.check_output(["getconf", "PATH_MAX", "/"]))
|
return int(subprocess.check_output(["getconf", "PATH_MAX", "/"]))
|
||||||
except (ValueError, subprocess.CalledProcessError, OSError):
|
except (ValueError, subprocess.CalledProcessError, OSError):
|
||||||
if platform.system() == "Windows":
|
if platform.system() == "Windows":
|
||||||
return 260
|
return FileNameFormatter.WINDOWS_MAX_PATH_LENGTH
|
||||||
else:
|
else:
|
||||||
return 4096
|
return FileNameFormatter.LINUX_MAX_PATH_LENGTH
|
||||||
|
|
||||||
def format_resource_paths(
|
def format_resource_paths(
|
||||||
self,
|
self,
|
||||||
|
|
|
@ -1,5 +1,5 @@
|
||||||
#!/usr/bin/env python3
|
#!/usr/bin/env python3
|
||||||
# coding=utf-8
|
# -*- coding: utf-8 -*-
|
||||||
|
|
||||||
import configparser
|
import configparser
|
||||||
import logging
|
import logging
|
||||||
|
@ -103,6 +103,6 @@ class OAuth2TokenManager(praw.reddit.BaseTokenManager):
|
||||||
|
|
||||||
def post_refresh_callback(self, authorizer: praw.reddit.Authorizer):
|
def post_refresh_callback(self, authorizer: praw.reddit.Authorizer):
|
||||||
self.config.set("DEFAULT", "user_token", authorizer.refresh_token)
|
self.config.set("DEFAULT", "user_token", authorizer.refresh_token)
|
||||||
with open(self.config_location, "w") as file:
|
with Path(self.config_location).open(mode="w") as file:
|
||||||
self.config.write(file, True)
|
self.config.write(file, True)
|
||||||
logger.log(9, f"Written OAuth2 token from authoriser to {self.config_location}")
|
logger.log(9, f"Written OAuth2 token from authoriser to {self.config_location}")
|
||||||
|
|
|
@ -1,12 +1,13 @@
|
||||||
#!/usr/bin/env python3
|
#!/usr/bin/env python3
|
||||||
# coding=utf-8
|
# -*- coding: utf-8 -*-
|
||||||
|
|
||||||
import hashlib
|
import hashlib
|
||||||
import logging
|
import logging
|
||||||
import re
|
import re
|
||||||
import time
|
import time
|
||||||
import urllib.parse
|
import urllib.parse
|
||||||
from typing import Callable, Optional
|
from collections.abc import Callable
|
||||||
|
from typing import Optional
|
||||||
|
|
||||||
import _hashlib
|
import _hashlib
|
||||||
import requests
|
import requests
|
||||||
|
|
|
@ -1,5 +1,5 @@
|
||||||
#!/usr/bin/env python3
|
#!/usr/bin/env python3
|
||||||
# coding=utf-8
|
# -*- coding: utf-8 -*-
|
||||||
|
|
||||||
import configparser
|
import configparser
|
||||||
|
|
||||||
|
|
|
@ -0,0 +1,2 @@
|
||||||
|
#!/usr/bin/env python3
|
||||||
|
# -*- coding: utf-8 -*-
|
|
@ -1,5 +1,5 @@
|
||||||
#!/usr/bin/env python3
|
#!/usr/bin/env python3
|
||||||
# coding=utf-8
|
# -*- coding: utf-8 -*-
|
||||||
|
|
||||||
import logging
|
import logging
|
||||||
from abc import ABC, abstractmethod
|
from abc import ABC, abstractmethod
|
||||||
|
|
|
@ -1,4 +1,5 @@
|
||||||
#!/usr/bin/env python3
|
#!/usr/bin/env python3
|
||||||
|
# -*- coding: utf-8 -*-
|
||||||
|
|
||||||
import logging
|
import logging
|
||||||
from typing import Optional
|
from typing import Optional
|
||||||
|
|
|
@ -1,4 +1,5 @@
|
||||||
#!/usr/bin/env python3
|
#!/usr/bin/env python3
|
||||||
|
# -*- coding: utf-8 -*-
|
||||||
|
|
||||||
from typing import Optional
|
from typing import Optional
|
||||||
|
|
||||||
|
|
|
@ -1,9 +1,8 @@
|
||||||
#!/usr/bin/env python3
|
#!/usr/bin/env python3
|
||||||
# coding=utf-8
|
# -*- coding: utf-8 -*-
|
||||||
|
|
||||||
import re
|
import re
|
||||||
import urllib.parse
|
import urllib.parse
|
||||||
from typing import Type
|
|
||||||
|
|
||||||
from bdfr.exceptions import NotADownloadableLinkError
|
from bdfr.exceptions import NotADownloadableLinkError
|
||||||
from bdfr.site_downloaders.base_downloader import BaseDownloader
|
from bdfr.site_downloaders.base_downloader import BaseDownloader
|
||||||
|
@ -24,13 +23,15 @@ from bdfr.site_downloaders.youtube import Youtube
|
||||||
|
|
||||||
class DownloadFactory:
|
class DownloadFactory:
|
||||||
@staticmethod
|
@staticmethod
|
||||||
def pull_lever(url: str) -> Type[BaseDownloader]:
|
def pull_lever(url: str) -> type[BaseDownloader]:
|
||||||
sanitised_url = DownloadFactory.sanitise_url(url)
|
sanitised_url = DownloadFactory.sanitise_url(url).lower()
|
||||||
if re.match(r"(i\.|m\.)?imgur", sanitised_url):
|
if re.match(r"(i\.|m\.|o\.)?imgur", sanitised_url):
|
||||||
return Imgur
|
return Imgur
|
||||||
elif re.match(r"(i\.)?(redgifs|gifdeliverynetwork)", sanitised_url):
|
elif re.match(r"(i\.|thumbs\d\.|v\d\.)?(redgifs|gifdeliverynetwork)", sanitised_url):
|
||||||
return Redgifs
|
return Redgifs
|
||||||
elif re.match(r".*/.*\.\w{3,4}(\?[\w;&=]*)?$", sanitised_url) and not DownloadFactory.is_web_resource(
|
elif re.match(r"(thumbs\.|giant\.)?gfycat\.", sanitised_url):
|
||||||
|
return Gfycat
|
||||||
|
elif re.match(r".*/.*\.[a-zA-Z34]{3,4}(\?[\w;&=]*)?$", sanitised_url) and not DownloadFactory.is_web_resource(
|
||||||
sanitised_url
|
sanitised_url
|
||||||
):
|
):
|
||||||
return Direct
|
return Direct
|
||||||
|
@ -42,8 +43,6 @@ class DownloadFactory:
|
||||||
return Gallery
|
return Gallery
|
||||||
elif re.match(r"patreon\.com.*", sanitised_url):
|
elif re.match(r"patreon\.com.*", sanitised_url):
|
||||||
return Gallery
|
return Gallery
|
||||||
elif re.match(r"gfycat\.", sanitised_url):
|
|
||||||
return Gfycat
|
|
||||||
elif re.match(r"reddit\.com/r/", sanitised_url):
|
elif re.match(r"reddit\.com/r/", sanitised_url):
|
||||||
return SelfPost
|
return SelfPost
|
||||||
elif re.match(r"(m\.)?youtu\.?be", sanitised_url):
|
elif re.match(r"(m\.)?youtu\.?be", sanitised_url):
|
||||||
|
|
|
@ -1,8 +1,10 @@
|
||||||
#!/usr/bin/env python3
|
#!/usr/bin/env python3
|
||||||
|
# -*- coding: utf-8 -*-
|
||||||
|
|
||||||
import logging
|
import logging
|
||||||
import re
|
import re
|
||||||
from typing import Callable, Optional
|
from collections.abc import Callable
|
||||||
|
from typing import Optional
|
||||||
|
|
||||||
import bs4
|
import bs4
|
||||||
from praw.models import Submission
|
from praw.models import Submission
|
||||||
|
|
|
@ -0,0 +1,2 @@
|
||||||
|
#!/usr/bin/env python3
|
||||||
|
# -*- coding: utf-8 -*-
|
|
@ -1,5 +1,5 @@
|
||||||
#!/usr/bin/env python3
|
#!/usr/bin/env python3
|
||||||
# coding=utf-8
|
# -*- coding: utf-8 -*-
|
||||||
|
|
||||||
from abc import ABC, abstractmethod
|
from abc import ABC, abstractmethod
|
||||||
|
|
||||||
|
|
|
@ -1,5 +1,5 @@
|
||||||
#!/usr/bin/env python3
|
#!/usr/bin/env python3
|
||||||
# coding=utf-8
|
# -*- coding: utf-8 -*-
|
||||||
|
|
||||||
import logging
|
import logging
|
||||||
from typing import Optional
|
from typing import Optional
|
||||||
|
@ -9,9 +9,7 @@ from praw.models import Submission
|
||||||
from bdfr.exceptions import NotADownloadableLinkError
|
from bdfr.exceptions import NotADownloadableLinkError
|
||||||
from bdfr.resource import Resource
|
from bdfr.resource import Resource
|
||||||
from bdfr.site_authenticator import SiteAuthenticator
|
from bdfr.site_authenticator import SiteAuthenticator
|
||||||
from bdfr.site_downloaders.fallback_downloaders.fallback_downloader import (
|
from bdfr.site_downloaders.fallback_downloaders.fallback_downloader import BaseFallbackDownloader
|
||||||
BaseFallbackDownloader,
|
|
||||||
)
|
|
||||||
from bdfr.site_downloaders.youtube import Youtube
|
from bdfr.site_downloaders.youtube import Youtube
|
||||||
|
|
||||||
logger = logging.getLogger(__name__)
|
logger = logging.getLogger(__name__)
|
||||||
|
|
|
@ -1,4 +1,5 @@
|
||||||
#!/usr/bin/env python3
|
#!/usr/bin/env python3
|
||||||
|
# -*- coding: utf-8 -*-
|
||||||
|
|
||||||
import logging
|
import logging
|
||||||
from typing import Optional
|
from typing import Optional
|
||||||
|
|
|
@ -1,4 +1,5 @@
|
||||||
#!/usr/bin/env python3
|
#!/usr/bin/env python3
|
||||||
|
# -*- coding: utf-8 -*-
|
||||||
|
|
||||||
import json
|
import json
|
||||||
import re
|
import re
|
||||||
|
@ -22,7 +23,7 @@ class Gfycat(Redgifs):
|
||||||
|
|
||||||
@staticmethod
|
@staticmethod
|
||||||
def _get_link(url: str) -> set[str]:
|
def _get_link(url: str) -> set[str]:
|
||||||
gfycat_id = re.match(r".*/(.*?)/?$", url).group(1)
|
gfycat_id = re.match(r".*/(.*?)(?:/?|-.*|\..{3-4})$", url).group(1)
|
||||||
url = "https://gfycat.com/" + gfycat_id
|
url = "https://gfycat.com/" + gfycat_id
|
||||||
|
|
||||||
response = Gfycat.retrieve_url(url)
|
response = Gfycat.retrieve_url(url)
|
||||||
|
|
|
@ -1,10 +1,10 @@
|
||||||
#!/usr/bin/env python3
|
#!/usr/bin/env python3
|
||||||
|
# -*- coding: utf-8 -*-
|
||||||
|
|
||||||
import json
|
import json
|
||||||
import re
|
import re
|
||||||
from typing import Optional
|
from typing import Optional
|
||||||
|
|
||||||
import bs4
|
|
||||||
from praw.models import Submission
|
from praw.models import Submission
|
||||||
|
|
||||||
from bdfr.exceptions import SiteDownloaderError
|
from bdfr.exceptions import SiteDownloaderError
|
||||||
|
@ -22,65 +22,44 @@ class Imgur(BaseDownloader):
|
||||||
self.raw_data = self._get_data(self.post.url)
|
self.raw_data = self._get_data(self.post.url)
|
||||||
|
|
||||||
out = []
|
out = []
|
||||||
if "album_images" in self.raw_data:
|
if "is_album" in self.raw_data:
|
||||||
images = self.raw_data["album_images"]
|
for image in self.raw_data["images"]:
|
||||||
for image in images["images"]:
|
if "mp4" in image:
|
||||||
out.append(self._compute_image_url(image))
|
out.append(Resource(self.post, image["mp4"], Resource.retry_download(image["mp4"])))
|
||||||
|
else:
|
||||||
|
out.append(Resource(self.post, image["link"], Resource.retry_download(image["link"])))
|
||||||
else:
|
else:
|
||||||
out.append(self._compute_image_url(self.raw_data))
|
if "mp4" in self.raw_data:
|
||||||
|
out.append(Resource(self.post, self.raw_data["mp4"], Resource.retry_download(self.raw_data["mp4"])))
|
||||||
|
else:
|
||||||
|
out.append(Resource(self.post, self.raw_data["link"], Resource.retry_download(self.raw_data["link"])))
|
||||||
return out
|
return out
|
||||||
|
|
||||||
def _compute_image_url(self, image: dict) -> Resource:
|
|
||||||
ext = self._validate_extension(image["ext"])
|
|
||||||
if image.get("prefer_video", False):
|
|
||||||
ext = ".mp4"
|
|
||||||
|
|
||||||
image_url = "https://i.imgur.com/" + image["hash"] + ext
|
|
||||||
return Resource(self.post, image_url, Resource.retry_download(image_url))
|
|
||||||
|
|
||||||
@staticmethod
|
@staticmethod
|
||||||
def _get_data(link: str) -> dict:
|
def _get_data(link: str) -> dict:
|
||||||
try:
|
try:
|
||||||
imgur_id = re.match(r".*/(.*?)(\..{0,})?$", link).group(1)
|
if link.endswith("/"):
|
||||||
gallery = "a/" if re.search(r".*/(.*?)(gallery/|a/)", link) else ""
|
link = link.removesuffix("/")
|
||||||
link = f"https://imgur.com/{gallery}{imgur_id}"
|
if re.search(r".*/(.*?)(gallery/|a/)", link):
|
||||||
|
imgur_id = re.match(r".*/(?:gallery/|a/)(.*?)(?:/.*)?$", link).group(1)
|
||||||
|
link = f"https://api.imgur.com/3/album/{imgur_id}"
|
||||||
|
else:
|
||||||
|
imgur_id = re.match(r".*/(.*?)(?:_d)?(?:\..{0,})?$", link).group(1)
|
||||||
|
link = f"https://api.imgur.com/3/image/{imgur_id}"
|
||||||
except AttributeError:
|
except AttributeError:
|
||||||
raise SiteDownloaderError(f"Could not extract Imgur ID from {link}")
|
raise SiteDownloaderError(f"Could not extract Imgur ID from {link}")
|
||||||
|
|
||||||
res = Imgur.retrieve_url(link, cookies={"over18": "1", "postpagebeta": "0"})
|
headers = {
|
||||||
|
"referer": "https://imgur.com/",
|
||||||
soup = bs4.BeautifulSoup(res.text, "html.parser")
|
"origin": "https://imgur.com",
|
||||||
scripts = soup.find_all("script", attrs={"type": "text/javascript"})
|
"content-type": "application/json",
|
||||||
scripts = [script.string.replace("\n", "") for script in scripts if script.string]
|
"Authorization": "Client-ID 546c25a59c58ad7",
|
||||||
|
}
|
||||||
script_regex = re.compile(r"\s*\(function\(widgetFactory\)\s*{\s*widgetFactory\.mergeConfig\(\'gallery\'")
|
res = Imgur.retrieve_url(link, headers=headers)
|
||||||
chosen_script = list(filter(lambda s: re.search(script_regex, s), scripts))
|
|
||||||
if len(chosen_script) != 1:
|
|
||||||
raise SiteDownloaderError(f"Could not read page source from {link}")
|
|
||||||
|
|
||||||
chosen_script = chosen_script[0]
|
|
||||||
|
|
||||||
outer_regex = re.compile(r"widgetFactory\.mergeConfig\(\'gallery\', ({.*})\);")
|
|
||||||
inner_regex = re.compile(r"image\s*:(.*),\s*group")
|
|
||||||
try:
|
|
||||||
image_dict = re.search(outer_regex, chosen_script).group(1)
|
|
||||||
image_dict = re.search(inner_regex, image_dict).group(1)
|
|
||||||
except AttributeError:
|
|
||||||
raise SiteDownloaderError(f"Could not find image dictionary in page source")
|
|
||||||
|
|
||||||
try:
|
try:
|
||||||
image_dict = json.loads(image_dict)
|
image_dict = json.loads(res.text)
|
||||||
except json.JSONDecodeError as e:
|
except json.JSONDecodeError as e:
|
||||||
raise SiteDownloaderError(f"Could not parse received dict as JSON: {e}")
|
raise SiteDownloaderError(f"Could not parse received response as JSON: {e}")
|
||||||
|
|
||||||
return image_dict
|
return image_dict["data"]
|
||||||
|
|
||||||
@staticmethod
|
|
||||||
def _validate_extension(extension_suffix: str) -> str:
|
|
||||||
extension_suffix = re.sub(r"\?.*", "", extension_suffix)
|
|
||||||
possible_extensions = (".jpg", ".png", ".mp4", ".gif")
|
|
||||||
selection = [ext for ext in possible_extensions if ext == extension_suffix]
|
|
||||||
if len(selection) == 1:
|
|
||||||
return selection[0]
|
|
||||||
else:
|
|
||||||
raise SiteDownloaderError(f'"{extension_suffix}" is not recognized as a valid extension for Imgur')
|
|
||||||
|
|
|
@ -1,5 +1,5 @@
|
||||||
#!/usr/bin/env python3
|
#!/usr/bin/env python3
|
||||||
# coding=utf-8
|
# -*- coding: utf-8 -*-
|
||||||
|
|
||||||
import logging
|
import logging
|
||||||
from typing import Optional
|
from typing import Optional
|
||||||
|
|
|
@ -1,4 +1,5 @@
|
||||||
#!/usr/bin/env python3
|
#!/usr/bin/env python3
|
||||||
|
# -*- coding: utf-8 -*-
|
||||||
|
|
||||||
import json
|
import json
|
||||||
import re
|
import re
|
||||||
|
@ -22,11 +23,20 @@ class Redgifs(BaseDownloader):
|
||||||
return [Resource(self.post, m, Resource.retry_download(m), None) for m in media_urls]
|
return [Resource(self.post, m, Resource.retry_download(m), None) for m in media_urls]
|
||||||
|
|
||||||
@staticmethod
|
@staticmethod
|
||||||
def _get_link(url: str) -> set[str]:
|
def _get_id(url: str) -> str:
|
||||||
try:
|
try:
|
||||||
redgif_id = re.match(r".*/(.*?)(\..{0,})?$", url).group(1)
|
if url.endswith("/"):
|
||||||
|
url = url.removesuffix("/")
|
||||||
|
redgif_id = re.match(r".*/(.*?)(?:#.*|\?.*|\..{0,})?$", url).group(1).lower()
|
||||||
|
if redgif_id.endswith("-mobile"):
|
||||||
|
redgif_id = redgif_id.removesuffix("-mobile")
|
||||||
except AttributeError:
|
except AttributeError:
|
||||||
raise SiteDownloaderError(f"Could not extract Redgifs ID from {url}")
|
raise SiteDownloaderError(f"Could not extract Redgifs ID from {url}")
|
||||||
|
return redgif_id
|
||||||
|
|
||||||
|
@staticmethod
|
||||||
|
def _get_link(url: str) -> set[str]:
|
||||||
|
redgif_id = Redgifs._get_id(url)
|
||||||
|
|
||||||
auth_token = json.loads(Redgifs.retrieve_url("https://api.redgifs.com/v2/auth/temporary").text)["token"]
|
auth_token = json.loads(Redgifs.retrieve_url("https://api.redgifs.com/v2/auth/temporary").text)["token"]
|
||||||
if not auth_token:
|
if not auth_token:
|
||||||
|
|
|
@ -1,4 +1,5 @@
|
||||||
#!/usr/bin/env python3
|
#!/usr/bin/env python3
|
||||||
|
# -*- coding: utf-8 -*-
|
||||||
|
|
||||||
import logging
|
import logging
|
||||||
from typing import Optional
|
from typing import Optional
|
||||||
|
|
|
@ -1,5 +1,6 @@
|
||||||
#!/usr/bin/env python3
|
#!/usr/bin/env python3
|
||||||
# coding=utf-8
|
# -*- coding: utf-8 -*-
|
||||||
|
|
||||||
import itertools
|
import itertools
|
||||||
import logging
|
import logging
|
||||||
import re
|
import re
|
||||||
|
|
|
@ -1,14 +1,12 @@
|
||||||
#!/usr/bin/env python3
|
#!/usr/bin/env python3
|
||||||
|
# -*- coding: utf-8 -*-
|
||||||
|
|
||||||
import logging
|
import logging
|
||||||
import tempfile
|
from typing import Optional
|
||||||
from pathlib import Path
|
|
||||||
from typing import Callable, Optional
|
|
||||||
|
|
||||||
import yt_dlp
|
|
||||||
from praw.models import Submission
|
from praw.models import Submission
|
||||||
|
|
||||||
from bdfr.exceptions import NotADownloadableLinkError, SiteDownloaderError
|
from bdfr.exceptions import NotADownloadableLinkError
|
||||||
from bdfr.resource import Resource
|
from bdfr.resource import Resource
|
||||||
from bdfr.site_authenticator import SiteAuthenticator
|
from bdfr.site_authenticator import SiteAuthenticator
|
||||||
from bdfr.site_downloaders.youtube import Youtube
|
from bdfr.site_downloaders.youtube import Youtube
|
||||||
|
|
|
@ -1,9 +1,11 @@
|
||||||
#!/usr/bin/env python3
|
#!/usr/bin/env python3
|
||||||
|
# -*- coding: utf-8 -*-
|
||||||
|
|
||||||
import logging
|
import logging
|
||||||
import tempfile
|
import tempfile
|
||||||
|
from collections.abc import Callable
|
||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
from typing import Callable, Optional
|
from typing import Optional
|
||||||
|
|
||||||
import yt_dlp
|
import yt_dlp
|
||||||
from praw.models import Submission
|
from praw.models import Submission
|
||||||
|
|
|
@ -1,4 +0,0 @@
|
||||||
black
|
|
||||||
isort
|
|
||||||
pytest
|
|
||||||
tox
|
|
|
@ -6,11 +6,11 @@ When the project was rewritten for v2, the goal was to make the codebase easily
|
||||||
|
|
||||||
The BDFR is designed to be a stateless downloader. This means that the state of the program is forgotten between each run of the program. There are no central lists, databases, or indices, that the BDFR uses, only the actual files on disk. There are several advantages to this approach:
|
The BDFR is designed to be a stateless downloader. This means that the state of the program is forgotten between each run of the program. There are no central lists, databases, or indices, that the BDFR uses, only the actual files on disk. There are several advantages to this approach:
|
||||||
|
|
||||||
1. There is no chance of the database being corrupted or changed by something other than the BDFR, rendering the BDFR's "idea" of the archive wrong or incomplete.
|
1. There is no chance of the database being corrupted or changed by something other than the BDFR, rendering the BDFR's "idea" of the archive wrong or incomplete.
|
||||||
2. Any information about the archive is contained by the archive itself i.e. for a list of all submission IDs in the archive, this can be extracted from the names of the files in said archive, assuming an appropriate naming scheme was used.
|
2. Any information about the archive is contained by the archive itself i.e. for a list of all submission IDs in the archive, this can be extracted from the names of the files in said archive, assuming an appropriate naming scheme was used.
|
||||||
3. Archives can be merged, split, or editing without worrying about having to update a central database
|
3. Archives can be merged, split, or editing without worrying about having to update a central database
|
||||||
4. There are no versioning issues between updates of the BDFR, where old version are stuck with a worse form of the database
|
4. There are no versioning issues between updates of the BDFR, where old version are stuck with a worse form of the database
|
||||||
5. An archive can be put on a USB, moved to another computer with possibly a very different BDFR version, and work completely fine
|
5. An archive can be put on a USB, moved to another computer with possibly a very different BDFR version, and work completely fine
|
||||||
|
|
||||||
Another major part of the ethos of the design is DOTADIW, Do One Thing And Do It Well. It's a major part of Unix philosophy and states that each tool should have a well-defined, limited purpose. To this end, the BDFR is, as the name implies, a *downloader*. That is the scope of the tool. Managing the files downloaded can be for better-suited programs, since the BDFR is not a file manager. Nor the BDFR concern itself with how any of the data downloaded is displayed, changed, parsed, or analysed. This makes the BDFR suitable for data science-related tasks, archiving, personal downloads, or analysis of various Reddit sources as the BDFR is completely agnostic on how the data is used.
|
Another major part of the ethos of the design is DOTADIW, Do One Thing And Do It Well. It's a major part of Unix philosophy and states that each tool should have a well-defined, limited purpose. To this end, the BDFR is, as the name implies, a *downloader*. That is the scope of the tool. Managing the files downloaded can be for better-suited programs, since the BDFR is not a file manager. Nor the BDFR concern itself with how any of the data downloaded is displayed, changed, parsed, or analysed. This makes the BDFR suitable for data science-related tasks, archiving, personal downloads, or analysis of various Reddit sources as the BDFR is completely agnostic on how the data is used.
|
||||||
|
|
||||||
|
@ -18,23 +18,15 @@ Another major part of the ethos of the design is DOTADIW, Do One Thing And Do It
|
||||||
|
|
||||||
The BDFR is organised around a central object, the RedditDownloader class. The Archiver object extends and inherits from this class.
|
The BDFR is organised around a central object, the RedditDownloader class. The Archiver object extends and inherits from this class.
|
||||||
|
|
||||||
1. The RedditDownloader parses all the arguments and configuration options, held in the Configuration object, and creates a variety of internal objects for use, such as the file name formatter, download filter, etc.
|
1. The RedditDownloader parses all the arguments and configuration options, held in the Configuration object, and creates a variety of internal objects for use, such as the file name formatter, download filter, etc.
|
||||||
|
2. The RedditDownloader scrapes raw submissions from Reddit via several methods relating to different sources. A source is defined as a single stream of submissions from a subreddit, multireddit, or user list.
|
||||||
2. The RedditDownloader scrapes raw submissions from Reddit via several methods relating to different sources. A source is defined as a single stream of submissions from a subreddit, multireddit, or user list.
|
3. These raw submissions are passed to the DownloaderFactory class to select the specialised downloader class to use. Each of these are for a specific website or link type, with some catch-all classes like Direct.
|
||||||
|
4. The BaseDownloader child, spawned by DownloaderFactory, takes the link and does any necessary processing to find the direct link to the actual resource.
|
||||||
3. These raw submissions are passed to the DownloaderFactory class to select the specialised downloader class to use. Each of these are for a specific website or link type, with some catch-all classes like Direct.
|
5. This is returned to the RedditDownloader in the form of a Resource object. This holds the URL and some other information for the final resource.
|
||||||
|
6. The Resource is passed through the DownloadFilter instantiated in step 1.
|
||||||
4. The BaseDownloader child, spawned by DownloaderFactory, takes the link and does any necessary processing to find the direct link to the actual resource.
|
7. The destination file name for the Resource is calculated. If it already exists, then the Resource will be discarded.
|
||||||
|
8. Here the actual data is downloaded to the Resource and a hash calculated which is used to find duplicates.
|
||||||
5. This is returned to the RedditDownloader in the form of a Resource object. This holds the URL and some other information for the final resource.
|
9. Only then is the Resource written to the disk.
|
||||||
|
|
||||||
6. The Resource is passed through the DownloadFilter instantiated in step 1.
|
|
||||||
|
|
||||||
7. The destination file name for the Resource is calculated. If it already exists, then the Resource will be discarded.
|
|
||||||
|
|
||||||
8. Here the actual data is downloaded to the Resource and a hash calculated which is used to find duplicates.
|
|
||||||
|
|
||||||
9. Only then is the Resource written to the disk.
|
|
||||||
|
|
||||||
This is the step-by-step process that the BDFR goes through to download a Reddit post.
|
This is the step-by-step process that the BDFR goes through to download a Reddit post.
|
||||||
|
|
||||||
|
|
|
@ -26,13 +26,13 @@ Before creating a pull request (PR), check out [ARCHITECTURE](ARCHITECTURE.md) f
|
||||||
|
|
||||||
Once you have done both of these, the below list shows the path that should be followed when writing a PR.
|
Once you have done both of these, the below list shows the path that should be followed when writing a PR.
|
||||||
|
|
||||||
1. If an issue does not already exist, open one that will relate to the PR.
|
1. If an issue does not already exist, open one that will relate to the PR.
|
||||||
2. Ensure that any changes fit into the architecture specified above.
|
2. Ensure that any changes fit into the architecture specified above.
|
||||||
3. Ensure that you have written tests that cover the new code.
|
3. Ensure that you have written tests that cover the new code.
|
||||||
4. Ensure that no existing tests fail, unless there is a good reason for them to do so.
|
4. Ensure that no existing tests fail, unless there is a good reason for them to do so.
|
||||||
5. If needed, update any documentation with changes.
|
5. If needed, update any documentation with changes.
|
||||||
6. Open a pull request that references the relevant issue.
|
6. Open a pull request that references the relevant issue.
|
||||||
7. Expect changes or suggestions and heed the Code of Conduct. We're all volunteers here.
|
7. Expect changes or suggestions and heed the Code of Conduct. We're all volunteers here.
|
||||||
|
|
||||||
Someone will review your pull request as soon as possible, but remember that all maintainers are volunteers and this won't happen immediately. Once it is approved, congratulations! Your code is now part of the BDFR.
|
Someone will review your pull request as soon as possible, but remember that all maintainers are volunteers and this won't happen immediately. Once it is approved, congratulations! Your code is now part of the BDFR.
|
||||||
|
|
||||||
|
@ -58,23 +58,37 @@ Then, you can run the program from anywhere in your disk as such:
|
||||||
bdfr
|
bdfr
|
||||||
```
|
```
|
||||||
|
|
||||||
## Style Guide
|
There are additional Python packages that are required to develop the BDFR. These can be installed with the following command:
|
||||||
|
|
||||||
The BDFR must conform to PEP8 standard wherever there is Python code, with one exception. Line lengths may extend to 120 characters, but all other PEP8 standards must be followed.
|
```bash
|
||||||
|
python3 -m pip install -e .[dev]
|
||||||
It's easy to format your code without any manual work via a variety of tools. Autopep8 is a good one, and can be used with `autopep8 --max-line-length 120` which will format the code according to the style in use with the BDFR.
|
|
||||||
|
|
||||||
Hanging brackets are preferred when there are many items, items that otherwise go over the 120 character line limit, or when doing so would increase readability. It is also preferred when there might be many commits altering the list, such as with the parameter lists for tests. A hanging comma is also required in such cases. An example of this is below:
|
|
||||||
|
|
||||||
```python
|
|
||||||
test = [
|
|
||||||
'test 1',
|
|
||||||
'test 2',
|
|
||||||
'test 3',
|
|
||||||
]
|
|
||||||
```
|
```
|
||||||
|
|
||||||
Note that the last bracket is on its own line, and that the first bracket has a new line before the first term. Also note that there is a comma after the last term.
|
### Tools
|
||||||
|
|
||||||
|
The BDFR project uses several tools to manage the code of the project. These include:
|
||||||
|
|
||||||
|
- [black](https://github.com/psf/black)
|
||||||
|
- [flake8](https://github.com/john-hen/Flake8-pyproject)
|
||||||
|
- [isort](https://github.com/PyCQA/isort)
|
||||||
|
- [markdownlint (mdl)](https://github.com/markdownlint/markdownlint)
|
||||||
|
- [tox](https://tox.wiki/en/latest/)
|
||||||
|
- [pre-commit](https://github.com/pre-commit/pre-commit)
|
||||||
|
|
||||||
|
The first four tools are formatters. These change the code to the standards expected for the BDFR project. The configuration details for these tools are contained in the [pyproject.toml](../pyproject.toml) file for the project.
|
||||||
|
|
||||||
|
The tool `tox` is used to run tests and tools on demand and has the following environments:
|
||||||
|
|
||||||
|
- `format`
|
||||||
|
- `format_check`
|
||||||
|
|
||||||
|
The tool `pre-commit` is optional, and runs the three formatting tools automatically when a commit is made. This is **highly recommended** to ensure that all code submitted for this project is formatted acceptably. Note that any PR that does not follow the formatting guide will not be accepted. For information on how to use pre-commit to avoid this, see [the pre-commit documentation](https://pre-commit.com/).
|
||||||
|
|
||||||
|
## Style Guide
|
||||||
|
|
||||||
|
The BDFR uses the Black formatting standard and enforces this with the tool by the same name. Additionally, the tool isort is used as well to format imports.
|
||||||
|
|
||||||
|
See [Preparing the Environment for Development](#preparing-the-environment-for-development) for how to setup these tools to run automatically.
|
||||||
|
|
||||||
## Tests
|
## Tests
|
||||||
|
|
||||||
|
@ -87,14 +101,14 @@ When submitting a PR, it is required that you run **all** possible tests to ensu
|
||||||
This is accomplished with marks, a system that pytest uses to categorise tests. There are currently the current marks in use in the BDFR test suite.
|
This is accomplished with marks, a system that pytest uses to categorise tests. There are currently the current marks in use in the BDFR test suite.
|
||||||
|
|
||||||
- `slow`
|
- `slow`
|
||||||
- This marks a test that may take a long time to complete
|
- This marks a test that may take a long time to complete
|
||||||
- Usually marks a test that downloads many submissions or downloads a particularly large resource
|
- Usually marks a test that downloads many submissions or downloads a particularly large resource
|
||||||
- `online`
|
- `online`
|
||||||
- This marks a test that requires an internet connection and uses online resources
|
- This marks a test that requires an internet connection and uses online resources
|
||||||
- `reddit`
|
- `reddit`
|
||||||
- This marks a test that accesses online Reddit specifically
|
- This marks a test that accesses online Reddit specifically
|
||||||
- `authenticated`
|
- `authenticated`
|
||||||
- This marks a test that requires a test configuration file with a valid OAuth2 token
|
- This marks a test that requires a test configuration file with a valid OAuth2 token
|
||||||
|
|
||||||
These tests can be run either all at once, or excluding certain marks. The tests that require online resources, such as those marked `reddit` or `online`, will naturally require more time to run than tests that are entirely offline. To run tests, you must be in the root directory of the project and can use the following command.
|
These tests can be run either all at once, or excluding certain marks. The tests that require online resources, such as those marked `reddit` or `online`, will naturally require more time to run than tests that are entirely offline. To run tests, you must be in the root directory of the project and can use the following command.
|
||||||
|
|
||||||
|
|
88
pyproject.toml
Normal file
88
pyproject.toml
Normal file
|
@ -0,0 +1,88 @@
|
||||||
|
[build-system]
|
||||||
|
requires = ["setuptools>=65.6.0", "wheel"]
|
||||||
|
build-backend = "setuptools.build_meta"
|
||||||
|
|
||||||
|
[project]
|
||||||
|
name = "bdfr"
|
||||||
|
description = "Downloads and archives content from reddit"
|
||||||
|
readme = "README.md"
|
||||||
|
requires-python = ">=3.9"
|
||||||
|
license = {file = "LICENSE"}
|
||||||
|
keywords = ["reddit", "download", "archive",]
|
||||||
|
authors = [{name = "Ali Parlakci", email = "parlakciali@gmail.com"}]
|
||||||
|
maintainers = [{name = "Serene Arc", email = "serenical@gmail.com"}]
|
||||||
|
classifiers = [
|
||||||
|
"Development Status :: 5 - Production/Stable",
|
||||||
|
"Environment :: Console",
|
||||||
|
"License :: OSI Approved :: GNU General Public License v3 (GPLv3)",
|
||||||
|
"Natural Language :: English",
|
||||||
|
"Operating System :: OS Independent",
|
||||||
|
"Programming Language :: Python :: 3",
|
||||||
|
"Programming Language :: Python :: 3.9",
|
||||||
|
"Programming Language :: Python :: 3.10",
|
||||||
|
"Programming Language :: Python :: 3.11",
|
||||||
|
]
|
||||||
|
dependencies = [
|
||||||
|
"appdirs>=1.4.4",
|
||||||
|
"beautifulsoup4>=4.10.0",
|
||||||
|
"click>=8.0.0",
|
||||||
|
"dict2xml>=1.7.0",
|
||||||
|
"praw>=7.2.0",
|
||||||
|
"pyyaml>=5.4.1",
|
||||||
|
"requests>=2.25.1",
|
||||||
|
"yt-dlp>=2022.11.11",
|
||||||
|
]
|
||||||
|
dynamic = ["version"]
|
||||||
|
|
||||||
|
[tool.setuptools]
|
||||||
|
dynamic = {"version" = {attr = 'bdfr.__version__'}}
|
||||||
|
packages = ["bdfr", "bdfr.archive_entry", "bdfr.site_downloaders", "bdfr.site_downloaders.fallback_downloaders",]
|
||||||
|
data-files = {"config" = ["bdfr/default_config.cfg",]}
|
||||||
|
|
||||||
|
[project.optional-dependencies]
|
||||||
|
dev = [
|
||||||
|
"black>=22.12.0",
|
||||||
|
"Flake8-pyproject>=1.2.2",
|
||||||
|
"isort>=5.11.4",
|
||||||
|
"pre-commit>=2.20.0",
|
||||||
|
"pytest>=7.1.0",
|
||||||
|
"tox>=3.27.1",
|
||||||
|
]
|
||||||
|
|
||||||
|
[project.urls]
|
||||||
|
"Homepage" = "https://aliparlakci.github.io/bulk-downloader-for-reddit"
|
||||||
|
"Source" = "https://github.com/aliparlakci/bulk-downloader-for-reddit"
|
||||||
|
"Bug Reports" = "https://github.com/aliparlakci/bulk-downloader-for-reddit/issues"
|
||||||
|
|
||||||
|
[project.scripts]
|
||||||
|
bdfr = "bdfr.__main__:cli"
|
||||||
|
bdfr-archive = "bdfr.__main__:cli_archive"
|
||||||
|
bdfr-clone = "bdfr.__main__:cli_clone"
|
||||||
|
bdfr-download = "bdfr.__main__:cli_download"
|
||||||
|
|
||||||
|
[tool.black]
|
||||||
|
line-length = 120
|
||||||
|
|
||||||
|
[tool.flake8]
|
||||||
|
exclude = ["scripts"]
|
||||||
|
max-line-length = 120
|
||||||
|
show-source = true
|
||||||
|
statistics = true
|
||||||
|
|
||||||
|
[tool.isort]
|
||||||
|
profile = "black"
|
||||||
|
py_version = 39
|
||||||
|
multi_line_output = 3
|
||||||
|
line_length = 120
|
||||||
|
indent = 4
|
||||||
|
|
||||||
|
[tool.pytest.ini_options]
|
||||||
|
minversion = "7.1"
|
||||||
|
addopts = "--strict-markers"
|
||||||
|
testpaths = "tests"
|
||||||
|
markers = [
|
||||||
|
"online: tests require a connection to the internet",
|
||||||
|
"reddit: tests require a connection to Reddit",
|
||||||
|
"slow: test is slow to run",
|
||||||
|
"authenticated: test requires an authenticated Reddit instance",
|
||||||
|
]
|
|
@ -1,7 +0,0 @@
|
||||||
[pytest]
|
|
||||||
addopts = --strict-markers
|
|
||||||
markers =
|
|
||||||
online: tests require a connection to the internet
|
|
||||||
reddit: tests require a connection to Reddit
|
|
||||||
slow: test is slow to run
|
|
||||||
authenticated: test requires an authenticated Reddit instance
|
|
|
@ -1,9 +0,0 @@
|
||||||
appdirs>=1.4.4
|
|
||||||
bs4>=0.0.1
|
|
||||||
click>=7.1.2
|
|
||||||
dict2xml>=1.7.0
|
|
||||||
ffmpeg-python>=0.2.0
|
|
||||||
praw>=7.2.0
|
|
||||||
pyyaml>=5.4.1
|
|
||||||
requests>=2.25.1
|
|
||||||
yt-dlp>=2022.11.11
|
|
|
@ -2,10 +2,11 @@
|
||||||
|
|
||||||
Due to the verboseness of the logs, a great deal of information can be gathered quite easily from the BDFR's logfiles. In this folder, there is a selection of scripts that parse these logs, scraping useful bits of information. Since the logfiles are recurring patterns of strings, it is a fairly simple matter to write scripts that utilise tools included on most Linux systems.
|
Due to the verboseness of the logs, a great deal of information can be gathered quite easily from the BDFR's logfiles. In this folder, there is a selection of scripts that parse these logs, scraping useful bits of information. Since the logfiles are recurring patterns of strings, it is a fairly simple matter to write scripts that utilise tools included on most Linux systems.
|
||||||
|
|
||||||
- [Script to extract all successfully downloaded IDs](#extract-all-successfully-downloaded-ids)
|
- [Script to extract all successfully downloaded IDs](#extract-all-successfully-downloaded-ids)
|
||||||
- [Script to extract all failed download IDs](#extract-all-failed-ids)
|
- [Script to extract all failed download IDs](#extract-all-failed-ids)
|
||||||
- [Timestamp conversion](#converting-bdfrv1-timestamps-to-bdfrv2-timestamps)
|
- [Timestamp conversion](#converting-bdfrv1-timestamps-to-bdfrv2-timestamps)
|
||||||
- [Printing summary statistics for a run](#printing-summary-statistics)
|
- [Printing summary statistics for a run](#printing-summary-statistics)
|
||||||
|
- [Unsaving posts from your account after downloading](#unsave-posts-after-downloading)
|
||||||
|
|
||||||
## Extract all Successfully Downloaded IDs
|
## Extract all Successfully Downloaded IDs
|
||||||
|
|
||||||
|
@ -67,3 +68,23 @@ Excluded submissions: 1146
|
||||||
Files with existing hash skipped: 0
|
Files with existing hash skipped: 0
|
||||||
Submissions from excluded subreddits: 0
|
Submissions from excluded subreddits: 0
|
||||||
```
|
```
|
||||||
|
|
||||||
|
## Unsave Posts After Downloading
|
||||||
|
|
||||||
|
[This script](unsaveposts.py) takes a list of submission IDs from a file named `successfulids` created with the `extract_successful_ids.sh` script and unsaves them from your account. To make it work you will need to make a user script in your reddit profile like this:
|
||||||
|
- Fill in the username and password fields in the script. Make sure you keep the quotes around the fields.
|
||||||
|
- Go to https://old.reddit.com/prefs/apps/
|
||||||
|
- Click on `Develop an app` at the bottom.
|
||||||
|
- Make sure you select a `script` not a `web app`.
|
||||||
|
- Name it `Unsave Posts`.
|
||||||
|
- Fill in the `Redirect URI` field with `127.0.0.0`.
|
||||||
|
- Save it.
|
||||||
|
- Fill in the `client_id` and `client_secret` fields on the script. The client ID is the 14 character string under the name you gave your script. .It'll look like a bunch of random characters like this: pspYLwDoci9z_A. The client secret is the longer string next to "secret". Again keep the quotes around the fields.
|
||||||
|
|
||||||
|
Now the script is ready tu run. Just execute it like this:
|
||||||
|
|
||||||
|
```bash
|
||||||
|
python3.9 -m bdfr download DOWNLOAD_DIR --authenticate --user me --saved --log LOGFILE_LOCATION
|
||||||
|
./extract_successful_ids.sh LOGFILE_LOCATION > successfulids
|
||||||
|
./unsaveposts.py
|
||||||
|
```
|
||||||
|
|
40
scripts/unsaveposts.py
Normal file
40
scripts/unsaveposts.py
Normal file
|
@ -0,0 +1,40 @@
|
||||||
|
#! /usr/bin/env python3.9
|
||||||
|
'''
|
||||||
|
This script takes a list of submission IDs from a file named "successfulids" created with the
|
||||||
|
"extract_successful_ids.sh" script and unsaves them from your account. To make it work you must
|
||||||
|
fill in the username and password fields below. Make sure you keep the quotes around the fields.
|
||||||
|
You'll need to make a "user script" in your reddit profile to run this.
|
||||||
|
Go to https://old.reddit.com/prefs/apps/
|
||||||
|
Click on "Develop an app" at the bottom.
|
||||||
|
Make sure you select a "script" not a "web app."
|
||||||
|
Give it a random name. Doesn't matter.
|
||||||
|
You need to fill in the "Redirect URI" field with something so go ahead and put 127.0.0.0 in there.
|
||||||
|
Save it.
|
||||||
|
The client ID is the 14 character string under the name you gave your script.
|
||||||
|
It'll look like a bunch of random characters like this: pspYLwDoci9z_A
|
||||||
|
The client secret is the longer string next to "secret".
|
||||||
|
Replace those two fields below. Again keep the quotes around the fields.
|
||||||
|
'''
|
||||||
|
|
||||||
|
import praw
|
||||||
|
|
||||||
|
try:
|
||||||
|
r= praw.Reddit(
|
||||||
|
client_id="CLIENTID",
|
||||||
|
client_secret="CLIENTSECRET",
|
||||||
|
password="USERPASSWORD",
|
||||||
|
user_agent="Unsave Posts",
|
||||||
|
username="USERNAME",
|
||||||
|
)
|
||||||
|
|
||||||
|
with open("successfulids", "r") as f:
|
||||||
|
for item in f:
|
||||||
|
r.submission(id = item.strip()).unsave()
|
||||||
|
|
||||||
|
except:
|
||||||
|
print("Something went wrong. Did you install PRAW? Did you change the user login fields?")
|
||||||
|
|
||||||
|
|
||||||
|
else:
|
||||||
|
print("Done! Thanks for playing!")
|
||||||
|
|
26
setup.cfg
26
setup.cfg
|
@ -1,26 +0,0 @@
|
||||||
[metadata]
|
|
||||||
name = bdfr
|
|
||||||
description_file = README.md
|
|
||||||
description_content_type = text/markdown
|
|
||||||
home_page = https://github.com/aliparlakci/bulk-downloader-for-reddit
|
|
||||||
keywords = reddit, download, archive
|
|
||||||
version = 2.6.2
|
|
||||||
author = Ali Parlakci
|
|
||||||
author_email = parlakciali@gmail.com
|
|
||||||
maintainer = Serene Arc
|
|
||||||
maintainer_email = serenical@gmail.com
|
|
||||||
license = GPLv3
|
|
||||||
classifiers =
|
|
||||||
Programming Language :: Python :: 3
|
|
||||||
License :: OSI Approved :: GNU General Public License v3 (GPLv3)
|
|
||||||
Natural Language :: English
|
|
||||||
Environment :: Console
|
|
||||||
Operating System :: OS Independent
|
|
||||||
platforms = any
|
|
||||||
|
|
||||||
[files]
|
|
||||||
packages = bdfr
|
|
||||||
|
|
||||||
[entry_points]
|
|
||||||
console_scripts =
|
|
||||||
bdfr = bdfr.__main__:cli
|
|
6
setup.py
6
setup.py
|
@ -1,6 +0,0 @@
|
||||||
#!/usr/bin/env python3
|
|
||||||
# encoding=utf-8
|
|
||||||
|
|
||||||
from setuptools import setup
|
|
||||||
|
|
||||||
setup(setup_requires=['pbr', 'appdirs'], pbr=True, data_files=[('config', ['bdfr/default_config.cfg'])], python_requires='>=3.9.0')
|
|
|
@ -0,0 +1,2 @@
|
||||||
|
#!/usr/bin/env python3
|
||||||
|
# -*- coding: utf-8 -*-
|
|
@ -1,2 +1,2 @@
|
||||||
#!/usr/bin/env python3
|
#!/usr/bin/env python3
|
||||||
# coding=utf-8
|
# -*- coding: utf-8 -*-
|
||||||
|
|
|
@ -1,5 +1,5 @@
|
||||||
#!/usr/bin/env python3
|
#!/usr/bin/env python3
|
||||||
# coding=utf-8
|
# -*- coding: utf-8 -*-
|
||||||
|
|
||||||
import praw
|
import praw
|
||||||
import pytest
|
import pytest
|
||||||
|
|
|
@ -1,5 +1,5 @@
|
||||||
#!/usr/bin/env python3
|
#!/usr/bin/env python3
|
||||||
# coding=utf-8
|
# -*- coding: utf-8 -*-
|
||||||
|
|
||||||
import praw
|
import praw
|
||||||
import pytest
|
import pytest
|
||||||
|
|
|
@ -1,5 +1,5 @@
|
||||||
#!/usr/bin/env python3
|
#!/usr/bin/env python3
|
||||||
# coding=utf-8
|
# -*- coding: utf-8 -*-
|
||||||
|
|
||||||
import configparser
|
import configparser
|
||||||
import socket
|
import socket
|
||||||
|
|
|
@ -1,2 +1,2 @@
|
||||||
#!/usr/bin/env python3
|
#!/usr/bin/env python3
|
||||||
# coding=utf-8
|
# -*- coding: utf-8 -*-
|
||||||
|
|
|
@ -1,10 +1,12 @@
|
||||||
#!/usr/bin/env python3
|
#!/usr/bin/env python3
|
||||||
# coding=utf-8
|
# -*- coding: utf-8 -*-
|
||||||
|
|
||||||
import re
|
import re
|
||||||
import shutil
|
import shutil
|
||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
|
from unittest.mock import MagicMock, patch
|
||||||
|
|
||||||
|
import prawcore
|
||||||
import pytest
|
import pytest
|
||||||
from click.testing import CliRunner
|
from click.testing import CliRunner
|
||||||
|
|
||||||
|
@ -176,3 +178,24 @@ def test_cli_archive_soft_fail(test_args: list[str], tmp_path: Path):
|
||||||
assert result.exit_code == 0
|
assert result.exit_code == 0
|
||||||
assert "failed to be archived due to a PRAW exception" in result.output
|
assert "failed to be archived due to a PRAW exception" in result.output
|
||||||
assert "Attempting to archive" not in result.output
|
assert "Attempting to archive" not in result.output
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.mark.skipif(not does_test_config_exist, reason="A test config file is required for integration tests")
|
||||||
|
@pytest.mark.parametrize(
|
||||||
|
("test_args", "response"),
|
||||||
|
(
|
||||||
|
(["--user", "nasa", "--submitted"], 502),
|
||||||
|
(["--user", "nasa", "--submitted"], 504),
|
||||||
|
),
|
||||||
|
)
|
||||||
|
def test_user_serv_fail(test_args: list[str], response: int, tmp_path: Path):
|
||||||
|
runner = CliRunner()
|
||||||
|
test_args = create_basic_args_for_archive_runner(test_args, tmp_path)
|
||||||
|
with patch("bdfr.connector.sleep", return_value=None):
|
||||||
|
with patch(
|
||||||
|
"bdfr.connector.RedditConnector.check_user_existence",
|
||||||
|
side_effect=prawcore.exceptions.ResponseException(MagicMock(status_code=response)),
|
||||||
|
):
|
||||||
|
result = runner.invoke(cli, test_args)
|
||||||
|
assert result.exit_code == 0
|
||||||
|
assert f"received {response} HTTP response" in result.output
|
||||||
|
|
|
@ -1,9 +1,11 @@
|
||||||
#!/usr/bin/env python3
|
#!/usr/bin/env python3
|
||||||
# coding=utf-8
|
# -*- coding: utf-8 -*-
|
||||||
|
|
||||||
import shutil
|
import shutil
|
||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
|
from unittest.mock import MagicMock, patch
|
||||||
|
|
||||||
|
import prawcore
|
||||||
import pytest
|
import pytest
|
||||||
from click.testing import CliRunner
|
from click.testing import CliRunner
|
||||||
|
|
||||||
|
@ -68,3 +70,24 @@ def test_cli_scrape_soft_fail(test_args: list[str], tmp_path: Path):
|
||||||
assert result.exit_code == 0
|
assert result.exit_code == 0
|
||||||
assert "Downloaded submission" not in result.output
|
assert "Downloaded submission" not in result.output
|
||||||
assert "Record for entry item" not in result.output
|
assert "Record for entry item" not in result.output
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.mark.skipif(not does_test_config_exist, reason="A test config file is required for integration tests")
|
||||||
|
@pytest.mark.parametrize(
|
||||||
|
("test_args", "response"),
|
||||||
|
(
|
||||||
|
(["--user", "nasa", "--submitted"], 502),
|
||||||
|
(["--user", "nasa", "--submitted"], 504),
|
||||||
|
),
|
||||||
|
)
|
||||||
|
def test_user_serv_fail(test_args: list[str], response: int, tmp_path: Path):
|
||||||
|
runner = CliRunner()
|
||||||
|
test_args = create_basic_args_for_cloner_runner(test_args, tmp_path)
|
||||||
|
with patch("bdfr.connector.sleep", return_value=None):
|
||||||
|
with patch(
|
||||||
|
"bdfr.connector.RedditConnector.check_user_existence",
|
||||||
|
side_effect=prawcore.exceptions.ResponseException(MagicMock(status_code=response)),
|
||||||
|
):
|
||||||
|
result = runner.invoke(cli, test_args)
|
||||||
|
assert result.exit_code == 0
|
||||||
|
assert f"received {response} HTTP response" in result.output
|
||||||
|
|
|
@ -1,9 +1,11 @@
|
||||||
#!/usr/bin/env python3
|
#!/usr/bin/env python3
|
||||||
# coding=utf-8
|
# -*- coding: utf-8 -*-
|
||||||
|
|
||||||
import shutil
|
import shutil
|
||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
|
from unittest.mock import MagicMock, patch
|
||||||
|
|
||||||
|
import prawcore
|
||||||
import pytest
|
import pytest
|
||||||
from click.testing import CliRunner
|
from click.testing import CliRunner
|
||||||
|
|
||||||
|
@ -13,7 +15,7 @@ does_test_config_exist = Path("./tests/test_config.cfg").exists()
|
||||||
|
|
||||||
|
|
||||||
def copy_test_config(run_path: Path):
|
def copy_test_config(run_path: Path):
|
||||||
shutil.copy(Path("./tests/test_config.cfg"), Path(run_path, "./test_config.cfg"))
|
shutil.copy(Path("./tests/test_config.cfg"), Path(run_path, "test_config.cfg"))
|
||||||
|
|
||||||
|
|
||||||
def create_basic_args_for_download_runner(test_args: list[str], run_path: Path):
|
def create_basic_args_for_download_runner(test_args: list[str], run_path: Path):
|
||||||
|
@ -23,7 +25,7 @@ def create_basic_args_for_download_runner(test_args: list[str], run_path: Path):
|
||||||
str(run_path),
|
str(run_path),
|
||||||
"-v",
|
"-v",
|
||||||
"--config",
|
"--config",
|
||||||
str(Path(run_path, "./test_config.cfg")),
|
str(Path(run_path, "test_config.cfg")),
|
||||||
"--log",
|
"--log",
|
||||||
str(Path(run_path, "test_log.txt")),
|
str(Path(run_path, "test_log.txt")),
|
||||||
] + test_args
|
] + test_args
|
||||||
|
@ -50,9 +52,9 @@ def create_basic_args_for_download_runner(test_args: list[str], run_path: Path):
|
||||||
["-s", "trollxchromosomes", "-L", 3, "--sort", "new"],
|
["-s", "trollxchromosomes", "-L", 3, "--sort", "new"],
|
||||||
["-s", "trollxchromosomes", "-L", 3, "--time", "day", "--sort", "new"],
|
["-s", "trollxchromosomes", "-L", 3, "--time", "day", "--sort", "new"],
|
||||||
["-s", "trollxchromosomes", "-L", 3, "--search", "women"],
|
["-s", "trollxchromosomes", "-L", 3, "--search", "women"],
|
||||||
["-s", "trollxchromosomes", "-L", 3, "--time", "day", "--search", "women"],
|
["-s", "trollxchromosomes", "-L", 3, "--time", "week", "--search", "women"],
|
||||||
["-s", "trollxchromosomes", "-L", 3, "--sort", "new", "--search", "women"],
|
["-s", "trollxchromosomes", "-L", 3, "--sort", "new", "--search", "women"],
|
||||||
["-s", "trollxchromosomes", "-L", 3, "--time", "day", "--sort", "new", "--search", "women"],
|
["-s", "trollxchromosomes", "-L", 3, "--time", "week", "--sort", "new", "--search", "women"],
|
||||||
),
|
),
|
||||||
)
|
)
|
||||||
def test_cli_download_subreddits(test_args: list[str], tmp_path: Path):
|
def test_cli_download_subreddits(test_args: list[str], tmp_path: Path):
|
||||||
|
@ -277,7 +279,7 @@ def test_cli_download_hard_fail(test_args: list[str], tmp_path: Path):
|
||||||
|
|
||||||
def test_cli_download_use_default_config(tmp_path: Path):
|
def test_cli_download_use_default_config(tmp_path: Path):
|
||||||
runner = CliRunner()
|
runner = CliRunner()
|
||||||
test_args = ["download", "-vv", str(tmp_path)]
|
test_args = ["download", "-vv", str(tmp_path), "--log", str(Path(tmp_path, "test_log.txt"))]
|
||||||
result = runner.invoke(cli, test_args)
|
result = runner.invoke(cli, test_args)
|
||||||
assert result.exit_code == 0
|
assert result.exit_code == 0
|
||||||
|
|
||||||
|
@ -396,3 +398,45 @@ def test_cli_download_score_filter(test_args: list[str], was_filtered: bool, tmp
|
||||||
result = runner.invoke(cli, test_args)
|
result = runner.invoke(cli, test_args)
|
||||||
assert result.exit_code == 0
|
assert result.exit_code == 0
|
||||||
assert ("filtered due to score" in result.output) == was_filtered
|
assert ("filtered due to score" in result.output) == was_filtered
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.mark.online
|
||||||
|
@pytest.mark.reddit
|
||||||
|
@pytest.mark.skipif(not does_test_config_exist, reason="A test config file is required for integration tests")
|
||||||
|
@pytest.mark.parametrize(
|
||||||
|
("test_args", "response"),
|
||||||
|
(
|
||||||
|
(["--user", "nasa", "--submitted"], 502),
|
||||||
|
(["--user", "nasa", "--submitted"], 504),
|
||||||
|
),
|
||||||
|
)
|
||||||
|
def test_cli_download_user_reddit_server_error(test_args: list[str], response: int, tmp_path: Path):
|
||||||
|
runner = CliRunner()
|
||||||
|
test_args = create_basic_args_for_download_runner(test_args, tmp_path)
|
||||||
|
with patch("bdfr.connector.sleep", return_value=None):
|
||||||
|
with patch(
|
||||||
|
"bdfr.connector.RedditConnector.check_user_existence",
|
||||||
|
side_effect=prawcore.exceptions.ResponseException(MagicMock(status_code=response)),
|
||||||
|
):
|
||||||
|
result = runner.invoke(cli, test_args)
|
||||||
|
assert result.exit_code == 0
|
||||||
|
assert f"received {response} HTTP response" in result.output
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.mark.online
|
||||||
|
@pytest.mark.reddit
|
||||||
|
@pytest.mark.skipif(not does_test_config_exist, reason="A test config file is required for integration tests")
|
||||||
|
@pytest.mark.parametrize(
|
||||||
|
"test_args",
|
||||||
|
(
|
||||||
|
["-l", "102vd5i", "--filename-restriction-scheme", "windows"],
|
||||||
|
["-l", "m3hxzd", "--filename-restriction-scheme", "windows"],
|
||||||
|
),
|
||||||
|
)
|
||||||
|
def test_cli_download_explicit_filename_restriction_scheme(test_args: list[str], tmp_path: Path):
|
||||||
|
runner = CliRunner()
|
||||||
|
test_args = create_basic_args_for_download_runner(test_args, tmp_path)
|
||||||
|
result = runner.invoke(cli, test_args)
|
||||||
|
assert result.exit_code == 0
|
||||||
|
assert "Downloaded submission" in result.output
|
||||||
|
assert "Forcing Windows-compatible filenames" in result.output
|
||||||
|
|
|
@ -0,0 +1,2 @@
|
||||||
|
#!/usr/bin/env python3
|
||||||
|
# -*- coding: utf-8 -*-
|
|
@ -0,0 +1,2 @@
|
||||||
|
#!/usr/bin/env python3
|
||||||
|
# -*- coding: utf-8 -*-
|
|
@ -1,4 +1,5 @@
|
||||||
#!/usr/bin/env python3
|
#!/usr/bin/env python3
|
||||||
|
# -*- coding: utf-8 -*-
|
||||||
|
|
||||||
from unittest.mock import MagicMock
|
from unittest.mock import MagicMock
|
||||||
|
|
||||||
|
|
|
@ -1,5 +1,5 @@
|
||||||
#!/usr/bin/env python3
|
#!/usr/bin/env python3
|
||||||
# coding=utf-8
|
# -*- coding: utf-8 -*-
|
||||||
|
|
||||||
from unittest.mock import Mock
|
from unittest.mock import Mock
|
||||||
|
|
||||||
|
|
|
@ -1,5 +1,5 @@
|
||||||
#!/usr/bin/env python3
|
#!/usr/bin/env python3
|
||||||
# coding=utf-8
|
# -*- coding: utf-8 -*-
|
||||||
|
|
||||||
from unittest.mock import Mock
|
from unittest.mock import Mock
|
||||||
|
|
||||||
|
@ -13,8 +13,11 @@ from bdfr.site_downloaders.direct import Direct
|
||||||
@pytest.mark.parametrize(
|
@pytest.mark.parametrize(
|
||||||
("test_url", "expected_hash"),
|
("test_url", "expected_hash"),
|
||||||
(
|
(
|
||||||
("https://giant.gfycat.com/DefinitiveCanineCrayfish.mp4", "48f9bd4dbec1556d7838885612b13b39"),
|
("https://i.redd.it/q6ebualjxzea1.jpg", "6ec154859c777cb401132bb991cb3635"),
|
||||||
("https://giant.gfycat.com/DazzlingSilkyIguana.mp4", "808941b48fc1e28713d36dd7ed9dc648"),
|
(
|
||||||
|
"https://file-examples.com/wp-content/uploads/2017/11/file_example_MP3_700KB.mp3",
|
||||||
|
"3caa342e241ddb7d76fd24a834094101",
|
||||||
|
),
|
||||||
),
|
),
|
||||||
)
|
)
|
||||||
def test_download_resource(test_url: str, expected_hash: str):
|
def test_download_resource(test_url: str, expected_hash: str):
|
||||||
|
|
|
@ -1,5 +1,5 @@
|
||||||
#!/usr/bin/env python3
|
#!/usr/bin/env python3
|
||||||
# coding=utf-8
|
# -*- coding: utf-8 -*-
|
||||||
|
|
||||||
import praw
|
import praw
|
||||||
import pytest
|
import pytest
|
||||||
|
@ -31,6 +31,7 @@ from bdfr.site_downloaders.youtube import Youtube
|
||||||
),
|
),
|
||||||
("https://i.redd.it/affyv0axd5k61.png", Direct),
|
("https://i.redd.it/affyv0axd5k61.png", Direct),
|
||||||
("https://i.imgur.com/bZx1SJQ.jpg", Imgur),
|
("https://i.imgur.com/bZx1SJQ.jpg", Imgur),
|
||||||
|
("https://i.Imgur.com/bZx1SJQ.jpg", Imgur),
|
||||||
("https://imgur.com/BuzvZwb.gifv", Imgur),
|
("https://imgur.com/BuzvZwb.gifv", Imgur),
|
||||||
("https://imgur.com/a/MkxAzeg", Imgur),
|
("https://imgur.com/a/MkxAzeg", Imgur),
|
||||||
("https://m.imgur.com/a/py3RW0j", Imgur),
|
("https://m.imgur.com/a/py3RW0j", Imgur),
|
||||||
|
@ -40,6 +41,8 @@ from bdfr.site_downloaders.youtube import Youtube
|
||||||
("https://youtube.com/watch?v=Gv8Wz74FjVA", Youtube),
|
("https://youtube.com/watch?v=Gv8Wz74FjVA", Youtube),
|
||||||
("https://redgifs.com/watch/courageousimpeccablecanvasback", Redgifs),
|
("https://redgifs.com/watch/courageousimpeccablecanvasback", Redgifs),
|
||||||
("https://www.gifdeliverynetwork.com/repulsivefinishedandalusianhorse", Redgifs),
|
("https://www.gifdeliverynetwork.com/repulsivefinishedandalusianhorse", Redgifs),
|
||||||
|
("https://thumbs4.redgifs.com/DismalIgnorantDrongo-mobile.mp4", Redgifs),
|
||||||
|
("https://v3.redgifs.com/watch/kaleidoscopicdaringvenomoussnake", Redgifs),
|
||||||
("https://youtu.be/DevfjHOhuFc", Youtube),
|
("https://youtu.be/DevfjHOhuFc", Youtube),
|
||||||
("https://m.youtube.com/watch?v=kr-FeojxzUM", Youtube),
|
("https://m.youtube.com/watch?v=kr-FeojxzUM", Youtube),
|
||||||
("https://dynasty-scans.com/system/images_images/000/017/819/original/80215103_p0.png?1612232781", Direct),
|
("https://dynasty-scans.com/system/images_images/000/017/819/original/80215103_p0.png?1612232781", Direct),
|
||||||
|
@ -65,6 +68,7 @@ def test_factory_lever_good(test_submission_url: str, expected_class: BaseDownlo
|
||||||
"https://www.google.com",
|
"https://www.google.com",
|
||||||
"https://www.google.com/test",
|
"https://www.google.com/test",
|
||||||
"https://www.google.com/test/",
|
"https://www.google.com/test/",
|
||||||
|
"https://www.tiktok.com/@keriberry.420",
|
||||||
),
|
),
|
||||||
)
|
)
|
||||||
def test_factory_lever_bad(test_url: str):
|
def test_factory_lever_bad(test_url: str):
|
||||||
|
|
|
@ -1,5 +1,6 @@
|
||||||
#!/usr/bin/env python3
|
#!/usr/bin/env python3
|
||||||
# coding=utf-8
|
# -*- coding: utf-8 -*-
|
||||||
|
|
||||||
import re
|
import re
|
||||||
from unittest.mock import MagicMock
|
from unittest.mock import MagicMock
|
||||||
|
|
||||||
|
|
|
@ -1,5 +1,5 @@
|
||||||
#!/usr/bin/env python3
|
#!/usr/bin/env python3
|
||||||
# coding=utf-8
|
# -*- coding: utf-8 -*-
|
||||||
|
|
||||||
import praw
|
import praw
|
||||||
import pytest
|
import pytest
|
||||||
|
|
|
@ -1,5 +1,5 @@
|
||||||
#!/usr/bin/env python3
|
#!/usr/bin/env python3
|
||||||
# coding=utf-8
|
# -*- coding: utf-8 -*-
|
||||||
|
|
||||||
from unittest.mock import Mock
|
from unittest.mock import Mock
|
||||||
|
|
||||||
|
@ -15,11 +15,17 @@ from bdfr.site_downloaders.gfycat import Gfycat
|
||||||
(
|
(
|
||||||
("https://gfycat.com/definitivecaninecrayfish", "https://giant.gfycat.com/DefinitiveCanineCrayfish.mp4"),
|
("https://gfycat.com/definitivecaninecrayfish", "https://giant.gfycat.com/DefinitiveCanineCrayfish.mp4"),
|
||||||
("https://gfycat.com/dazzlingsilkyiguana", "https://giant.gfycat.com/DazzlingSilkyIguana.mp4"),
|
("https://gfycat.com/dazzlingsilkyiguana", "https://giant.gfycat.com/DazzlingSilkyIguana.mp4"),
|
||||||
|
("https://gfycat.com/WearyComposedHairstreak", "https://thumbs4.redgifs.com/WearyComposedHairstreak.mp4"),
|
||||||
|
(
|
||||||
|
"https://thumbs.gfycat.com/ComposedWholeBullfrog-size_restricted.gif",
|
||||||
|
"https://thumbs4.redgifs.com/ComposedWholeBullfrog.mp4",
|
||||||
|
),
|
||||||
|
("https://giant.gfycat.com/ComposedWholeBullfrog.mp4", "https://thumbs4.redgifs.com/ComposedWholeBullfrog.mp4"),
|
||||||
),
|
),
|
||||||
)
|
)
|
||||||
def test_get_link(test_url: str, expected_url: str):
|
def test_get_link(test_url: str, expected_url: str):
|
||||||
result = Gfycat._get_link(test_url)
|
result = Gfycat._get_link(test_url)
|
||||||
assert result.pop() == expected_url
|
assert expected_url in result.pop()
|
||||||
|
|
||||||
|
|
||||||
@pytest.mark.online
|
@pytest.mark.online
|
||||||
|
@ -28,6 +34,9 @@ def test_get_link(test_url: str, expected_url: str):
|
||||||
(
|
(
|
||||||
("https://gfycat.com/definitivecaninecrayfish", "48f9bd4dbec1556d7838885612b13b39"),
|
("https://gfycat.com/definitivecaninecrayfish", "48f9bd4dbec1556d7838885612b13b39"),
|
||||||
("https://gfycat.com/dazzlingsilkyiguana", "808941b48fc1e28713d36dd7ed9dc648"),
|
("https://gfycat.com/dazzlingsilkyiguana", "808941b48fc1e28713d36dd7ed9dc648"),
|
||||||
|
("https://gfycat.com/WearyComposedHairstreak", "5f82ba1ba23cc927c9fbb0c0421953a5"),
|
||||||
|
("https://thumbs.gfycat.com/ComposedWholeBullfrog-size_restricted.gif", "5292343665a13b5369d889d911ae284d"),
|
||||||
|
("https://giant.gfycat.com/ComposedWholeBullfrog.mp4", "5292343665a13b5369d889d911ae284d"),
|
||||||
),
|
),
|
||||||
)
|
)
|
||||||
def test_download_resource(test_url: str, expected_hash: str):
|
def test_download_resource(test_url: str, expected_hash: str):
|
||||||
|
|
|
@ -1,118 +1,21 @@
|
||||||
#!/usr/bin/env python3
|
#!/usr/bin/env python3
|
||||||
# coding=utf-8
|
# -*- coding: utf-8 -*-
|
||||||
|
|
||||||
from unittest.mock import Mock
|
from unittest.mock import Mock
|
||||||
|
|
||||||
import pytest
|
import pytest
|
||||||
|
|
||||||
from bdfr.exceptions import SiteDownloaderError
|
|
||||||
from bdfr.resource import Resource
|
from bdfr.resource import Resource
|
||||||
from bdfr.site_downloaders.imgur import Imgur
|
from bdfr.site_downloaders.imgur import Imgur
|
||||||
|
|
||||||
|
|
||||||
@pytest.mark.online
|
|
||||||
@pytest.mark.parametrize(
|
|
||||||
("test_url", "expected_gen_dict", "expected_image_dict"),
|
|
||||||
(
|
|
||||||
(
|
|
||||||
"https://imgur.com/a/xWZsDDP",
|
|
||||||
{"num_images": "1", "id": "xWZsDDP", "hash": "xWZsDDP"},
|
|
||||||
[{"hash": "ypa8YfS", "title": "", "ext": ".png", "animated": False}],
|
|
||||||
),
|
|
||||||
(
|
|
||||||
"https://imgur.com/gallery/IjJJdlC",
|
|
||||||
{"num_images": 1, "id": 384898055, "hash": "IjJJdlC"},
|
|
||||||
[
|
|
||||||
{
|
|
||||||
"hash": "CbbScDt",
|
|
||||||
"description": "watch when he gets it",
|
|
||||||
"ext": ".gif",
|
|
||||||
"animated": True,
|
|
||||||
"has_sound": False,
|
|
||||||
}
|
|
||||||
],
|
|
||||||
),
|
|
||||||
(
|
|
||||||
"https://imgur.com/a/dcc84Gt",
|
|
||||||
{"num_images": "4", "id": "dcc84Gt", "hash": "dcc84Gt"},
|
|
||||||
[
|
|
||||||
{"hash": "ylx0Kle", "ext": ".jpg", "title": ""},
|
|
||||||
{"hash": "TdYfKbK", "ext": ".jpg", "title": ""},
|
|
||||||
{"hash": "pCxGbe8", "ext": ".jpg", "title": ""},
|
|
||||||
{"hash": "TSAkikk", "ext": ".jpg", "title": ""},
|
|
||||||
],
|
|
||||||
),
|
|
||||||
(
|
|
||||||
"https://m.imgur.com/a/py3RW0j",
|
|
||||||
{
|
|
||||||
"num_images": "1",
|
|
||||||
"id": "py3RW0j",
|
|
||||||
"hash": "py3RW0j",
|
|
||||||
},
|
|
||||||
[{"hash": "K24eQmK", "has_sound": False, "ext": ".jpg"}],
|
|
||||||
),
|
|
||||||
),
|
|
||||||
)
|
|
||||||
def test_get_data_album(test_url: str, expected_gen_dict: dict, expected_image_dict: list[dict]):
|
|
||||||
result = Imgur._get_data(test_url)
|
|
||||||
assert all([result.get(key) == expected_gen_dict[key] for key in expected_gen_dict.keys()])
|
|
||||||
|
|
||||||
# Check if all the keys from the test dict are correct in at least one of the album entries
|
|
||||||
assert any(
|
|
||||||
[
|
|
||||||
all([image.get(key) == image_dict[key] for key in image_dict.keys()])
|
|
||||||
for image_dict in expected_image_dict
|
|
||||||
for image in result["album_images"]["images"]
|
|
||||||
]
|
|
||||||
)
|
|
||||||
|
|
||||||
|
|
||||||
@pytest.mark.online
|
|
||||||
@pytest.mark.parametrize(
|
|
||||||
("test_url", "expected_image_dict"),
|
|
||||||
(
|
|
||||||
("https://i.imgur.com/dLk3FGY.gifv", {"hash": "dLk3FGY", "title": "", "ext": ".mp4", "animated": True}),
|
|
||||||
(
|
|
||||||
"https://imgur.com/65FqTpT.gifv",
|
|
||||||
{"hash": "65FqTpT", "title": "", "description": "", "animated": True, "mimetype": "video/mp4"},
|
|
||||||
),
|
|
||||||
),
|
|
||||||
)
|
|
||||||
def test_get_data_gif(test_url: str, expected_image_dict: dict):
|
|
||||||
result = Imgur._get_data(test_url)
|
|
||||||
assert all([result.get(key) == expected_image_dict[key] for key in expected_image_dict.keys()])
|
|
||||||
|
|
||||||
|
|
||||||
@pytest.mark.parametrize("test_extension", (".gif", ".png", ".jpg", ".mp4"))
|
|
||||||
def test_imgur_extension_validation_good(test_extension: str):
|
|
||||||
result = Imgur._validate_extension(test_extension)
|
|
||||||
assert result == test_extension
|
|
||||||
|
|
||||||
|
|
||||||
@pytest.mark.parametrize(
|
|
||||||
"test_extension",
|
|
||||||
(
|
|
||||||
".jpeg",
|
|
||||||
"bad",
|
|
||||||
".avi",
|
|
||||||
".test",
|
|
||||||
".flac",
|
|
||||||
),
|
|
||||||
)
|
|
||||||
def test_imgur_extension_validation_bad(test_extension: str):
|
|
||||||
with pytest.raises(SiteDownloaderError):
|
|
||||||
Imgur._validate_extension(test_extension)
|
|
||||||
|
|
||||||
|
|
||||||
@pytest.mark.online
|
@pytest.mark.online
|
||||||
@pytest.mark.parametrize(
|
@pytest.mark.parametrize(
|
||||||
("test_url", "expected_hashes"),
|
("test_url", "expected_hashes"),
|
||||||
(
|
(
|
||||||
("https://imgur.com/a/xWZsDDP", ("f551d6e6b0fef2ce909767338612e31b",)),
|
("https://imgur.com/a/xWZsDDP", ("f551d6e6b0fef2ce909767338612e31b",)),
|
||||||
(
|
("https://imgur.com/gallery/IjJJdlC", ("740b006cf9ec9d6f734b6e8f5130bdab",)),
|
||||||
"https://imgur.com/gallery/IjJJdlC",
|
("https://imgur.com/gallery/IjJJdlC/", ("740b006cf9ec9d6f734b6e8f5130bdab",)),
|
||||||
("740b006cf9ec9d6f734b6e8f5130bdab",),
|
|
||||||
),
|
|
||||||
(
|
(
|
||||||
"https://imgur.com/a/dcc84Gt",
|
"https://imgur.com/a/dcc84Gt",
|
||||||
(
|
(
|
||||||
|
@ -130,46 +33,23 @@ def test_imgur_extension_validation_bad(test_extension: str):
|
||||||
"fb6c913d721c0bbb96aa65d7f560d385",
|
"fb6c913d721c0bbb96aa65d7f560d385",
|
||||||
),
|
),
|
||||||
),
|
),
|
||||||
(
|
("https://o.imgur.com/jZw9gq2.jpg", ("6d6ea9aa1d98827a05425338afe675bc",)),
|
||||||
"https://i.imgur.com/lFJai6i.gifv",
|
("https://i.imgur.com/lFJai6i.gifv", ("01a6e79a30bec0e644e5da12365d5071",)),
|
||||||
("01a6e79a30bec0e644e5da12365d5071",),
|
("https://i.imgur.com/ywSyILa.gifv?", ("56d4afc32d2966017c38d98568709b45",)),
|
||||||
),
|
("https://imgur.com/ubYwpbk.GIFV", ("d4a774aac1667783f9ed3a1bd02fac0c",)),
|
||||||
(
|
("https://i.imgur.com/j1CNCZY.gifv", ("ed63d7062bc32edaeea8b53f876a307c",)),
|
||||||
"https://i.imgur.com/ywSyILa.gifv?",
|
("https://i.imgur.com/uTvtQsw.gifv", ("46c86533aa60fc0e09f2a758513e3ac2",)),
|
||||||
("56d4afc32d2966017c38d98568709b45",),
|
("https://i.imgur.com/OGeVuAe.giff", ("77389679084d381336f168538793f218",)),
|
||||||
),
|
("https://i.imgur.com/OGeVuAe.gift", ("77389679084d381336f168538793f218",)),
|
||||||
(
|
("https://i.imgur.com/3SKrQfK.jpg?1", ("aa299e181b268578979cad176d1bd1d0",)),
|
||||||
"https://imgur.com/ubYwpbk.GIFV",
|
("https://i.imgur.com/cbivYRW.jpg?3", ("7ec6ceef5380cb163a1d498c359c51fd",)),
|
||||||
("d4a774aac1667783f9ed3a1bd02fac0c",),
|
("http://i.imgur.com/s9uXxlq.jpg?5.jpg", ("338de3c23ee21af056b3a7c154e2478f",)),
|
||||||
),
|
("http://i.imgur.com/s9uXxlqb.jpg", ("338de3c23ee21af056b3a7c154e2478f",)),
|
||||||
(
|
("https://i.imgur.com/2TtN68l_d.webp", ("6569ab9ad9fa68d93f6b408f112dd741",)),
|
||||||
"https://i.imgur.com/j1CNCZY.gifv",
|
("https://imgur.com/a/1qzfWtY/gifv", ("65fbc7ba5c3ed0e3af47c4feef4d3735",)),
|
||||||
("58e7e6d972058c18b7ecde910ca147e3",),
|
("https://imgur.com/a/1qzfWtY/mp4", ("65fbc7ba5c3ed0e3af47c4feef4d3735",)),
|
||||||
),
|
("https://imgur.com/a/1qzfWtY/spqr", ("65fbc7ba5c3ed0e3af47c4feef4d3735",)),
|
||||||
(
|
("https://i.imgur.com/expO7Rc.gifv", ("e309f98158fc98072eb2ae68f947f421",)),
|
||||||
"https://i.imgur.com/uTvtQsw.gifv",
|
|
||||||
("46c86533aa60fc0e09f2a758513e3ac2",),
|
|
||||||
),
|
|
||||||
(
|
|
||||||
"https://i.imgur.com/OGeVuAe.giff",
|
|
||||||
("77389679084d381336f168538793f218",),
|
|
||||||
),
|
|
||||||
(
|
|
||||||
"https://i.imgur.com/OGeVuAe.gift",
|
|
||||||
("77389679084d381336f168538793f218",),
|
|
||||||
),
|
|
||||||
(
|
|
||||||
"https://i.imgur.com/3SKrQfK.jpg?1",
|
|
||||||
("aa299e181b268578979cad176d1bd1d0",),
|
|
||||||
),
|
|
||||||
(
|
|
||||||
"https://i.imgur.com/cbivYRW.jpg?3",
|
|
||||||
("7ec6ceef5380cb163a1d498c359c51fd",),
|
|
||||||
),
|
|
||||||
(
|
|
||||||
"http://i.imgur.com/s9uXxlq.jpg?5.jpg",
|
|
||||||
("338de3c23ee21af056b3a7c154e2478f",),
|
|
||||||
),
|
|
||||||
),
|
),
|
||||||
)
|
)
|
||||||
def test_find_resources(test_url: str, expected_hashes: list[str]):
|
def test_find_resources(test_url: str, expected_hashes: list[str]):
|
||||||
|
|
|
@ -1,5 +1,5 @@
|
||||||
#!/usr/bin/env python3
|
#!/usr/bin/env python3
|
||||||
# coding=utf-8
|
# -*- coding: utf-8 -*-
|
||||||
|
|
||||||
from unittest.mock import MagicMock
|
from unittest.mock import MagicMock
|
||||||
|
|
||||||
|
@ -14,7 +14,7 @@ from bdfr.site_downloaders.pornhub import PornHub
|
||||||
@pytest.mark.slow
|
@pytest.mark.slow
|
||||||
@pytest.mark.parametrize(
|
@pytest.mark.parametrize(
|
||||||
("test_url", "expected_hash"),
|
("test_url", "expected_hash"),
|
||||||
(("https://www.pornhub.com/view_video.php?viewkey=ph6074c59798497", "ad52a0f4fce8f99df0abed17de1d04c7"),),
|
(("https://www.pornhub.com/view_video.php?viewkey=ph5eafee2d174ff", "d15090cbbaa8ee90500a257c7899ff84"),),
|
||||||
)
|
)
|
||||||
def test_hash_resources_good(test_url: str, expected_hash: str):
|
def test_hash_resources_good(test_url: str, expected_hash: str):
|
||||||
test_submission = MagicMock()
|
test_submission = MagicMock()
|
||||||
|
|
|
@ -1,5 +1,5 @@
|
||||||
#!/usr/bin/env python3
|
#!/usr/bin/env python3
|
||||||
# coding=utf-8
|
# -*- coding: utf-8 -*-
|
||||||
|
|
||||||
import re
|
import re
|
||||||
from unittest.mock import Mock
|
from unittest.mock import Mock
|
||||||
|
@ -10,6 +10,22 @@ from bdfr.resource import Resource
|
||||||
from bdfr.site_downloaders.redgifs import Redgifs
|
from bdfr.site_downloaders.redgifs import Redgifs
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.mark.parametrize(
|
||||||
|
("test_url", "expected"),
|
||||||
|
(
|
||||||
|
("https://redgifs.com/watch/frighteningvictorioussalamander", "frighteningvictorioussalamander"),
|
||||||
|
("https://www.redgifs.com/watch/genuineprivateguillemot/", "genuineprivateguillemot"),
|
||||||
|
("https://www.redgifs.com/watch/marriedcrushingcob?rel=u%3Akokiri.girl%3Bo%3Arecent", "marriedcrushingcob"),
|
||||||
|
("https://thumbs4.redgifs.com/DismalIgnorantDrongo.mp4", "dismalignorantdrongo"),
|
||||||
|
("https://thumbs4.redgifs.com/DismalIgnorantDrongo-mobile.mp4", "dismalignorantdrongo"),
|
||||||
|
("https://v3.redgifs.com/watch/newilliteratemeerkat#rel=user%3Atastynova", "newilliteratemeerkat"),
|
||||||
|
),
|
||||||
|
)
|
||||||
|
def test_get_id(test_url: str, expected: str):
|
||||||
|
result = Redgifs._get_id(test_url)
|
||||||
|
assert result == expected
|
||||||
|
|
||||||
|
|
||||||
@pytest.mark.online
|
@pytest.mark.online
|
||||||
@pytest.mark.parametrize(
|
@pytest.mark.parametrize(
|
||||||
("test_url", "expected"),
|
("test_url", "expected"),
|
||||||
|
@ -29,6 +45,7 @@ from bdfr.site_downloaders.redgifs import Redgifs
|
||||||
"UnripeUnkemptWoodpecker-large.jpg",
|
"UnripeUnkemptWoodpecker-large.jpg",
|
||||||
},
|
},
|
||||||
),
|
),
|
||||||
|
("https://www.redgifs.com/watch/genuineprivateguillemot/", {"GenuinePrivateGuillemot.mp4"}),
|
||||||
),
|
),
|
||||||
)
|
)
|
||||||
def test_get_link(test_url: str, expected: set[str]):
|
def test_get_link(test_url: str, expected: set[str]):
|
||||||
|
|
|
@ -1,5 +1,5 @@
|
||||||
#!/usr/bin/env python3
|
#!/usr/bin/env python3
|
||||||
# coding=utf-8
|
# -*- coding: utf-8 -*-
|
||||||
|
|
||||||
import praw
|
import praw
|
||||||
import pytest
|
import pytest
|
||||||
|
|
|
@ -1,5 +1,6 @@
|
||||||
#!/usr/bin/env python3
|
#!/usr/bin/env python3
|
||||||
# coding=utf-8
|
# -*- coding: utf-8 -*-
|
||||||
|
|
||||||
from unittest.mock import Mock
|
from unittest.mock import Mock
|
||||||
|
|
||||||
import pytest
|
import pytest
|
||||||
|
|
|
@ -1,5 +1,5 @@
|
||||||
#!/usr/bin/env python3
|
#!/usr/bin/env python3
|
||||||
# coding=utf-8
|
# -*- coding: utf-8 -*-
|
||||||
|
|
||||||
from unittest.mock import MagicMock
|
from unittest.mock import MagicMock
|
||||||
|
|
||||||
|
|
|
@ -1,5 +1,5 @@
|
||||||
#!/usr/bin/env python3
|
#!/usr/bin/env python3
|
||||||
# coding=utf-8
|
# -*- coding: utf-8 -*-
|
||||||
|
|
||||||
from unittest.mock import MagicMock
|
from unittest.mock import MagicMock
|
||||||
|
|
||||||
|
|
|
@ -1,5 +1,5 @@
|
||||||
#!/usr/bin/env python3
|
#!/usr/bin/env python3
|
||||||
# coding=utf-8
|
# -*- coding: utf-8 -*-
|
||||||
|
|
||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
from unittest.mock import MagicMock
|
from unittest.mock import MagicMock
|
||||||
|
|
54
tests/test_completion.py
Normal file
54
tests/test_completion.py
Normal file
|
@ -0,0 +1,54 @@
|
||||||
|
#!/usr/bin/env python3
|
||||||
|
# -*- coding: utf-8 -*-
|
||||||
|
|
||||||
|
import sys
|
||||||
|
from pathlib import Path
|
||||||
|
from unittest.mock import patch
|
||||||
|
|
||||||
|
import pytest
|
||||||
|
|
||||||
|
from bdfr.completion import Completion
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.mark.skipif(sys.platform == "win32", reason="Completions are not currently supported on Windows.")
|
||||||
|
def test_cli_completion_all(tmp_path: Path):
|
||||||
|
tmp_path = str(tmp_path)
|
||||||
|
with patch("appdirs.user_data_dir", return_value=tmp_path):
|
||||||
|
Completion("all").install()
|
||||||
|
assert Path(tmp_path + "/bash-completion/completions/bdfr").exists() == 1
|
||||||
|
assert Path(tmp_path + "/fish/vendor_completions.d/bdfr.fish").exists() == 1
|
||||||
|
assert Path(tmp_path + "/zsh/site-functions/_bdfr").exists() == 1
|
||||||
|
Completion("all").uninstall()
|
||||||
|
assert Path(tmp_path + "/bash-completion/completions/bdfr").exists() == 0
|
||||||
|
assert Path(tmp_path + "/fish/vendor_completions.d/bdfr.fish").exists() == 0
|
||||||
|
assert Path(tmp_path + "/zsh/site-functions/_bdfr").exists() == 0
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.mark.skipif(sys.platform == "win32", reason="Completions are not currently supported on Windows.")
|
||||||
|
def test_cli_completion_bash(tmp_path: Path):
|
||||||
|
tmp_path = str(tmp_path)
|
||||||
|
with patch("appdirs.user_data_dir", return_value=tmp_path):
|
||||||
|
Completion("bash").install()
|
||||||
|
assert Path(tmp_path + "/bash-completion/completions/bdfr").exists() == 1
|
||||||
|
Completion("bash").uninstall()
|
||||||
|
assert Path(tmp_path + "/bash-completion/completions/bdfr").exists() == 0
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.mark.skipif(sys.platform == "win32", reason="Completions are not currently supported on Windows.")
|
||||||
|
def test_cli_completion_fish(tmp_path: Path):
|
||||||
|
tmp_path = str(tmp_path)
|
||||||
|
with patch("appdirs.user_data_dir", return_value=tmp_path):
|
||||||
|
Completion("fish").install()
|
||||||
|
assert Path(tmp_path + "/fish/vendor_completions.d/bdfr.fish").exists() == 1
|
||||||
|
Completion("fish").uninstall()
|
||||||
|
assert Path(tmp_path + "/fish/vendor_completions.d/bdfr.fish").exists() == 0
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.mark.skipif(sys.platform == "win32", reason="Completions are not currently supported on Windows.")
|
||||||
|
def test_cli_completion_zsh(tmp_path: Path):
|
||||||
|
tmp_path = str(tmp_path)
|
||||||
|
with patch("appdirs.user_data_dir", return_value=tmp_path):
|
||||||
|
Completion("zsh").install()
|
||||||
|
assert Path(tmp_path + "/zsh/site-functions/_bdfr").exists() == 1
|
||||||
|
Completion("zsh").uninstall()
|
||||||
|
assert Path(tmp_path + "/zsh/site-functions/_bdfr").exists() == 0
|
|
@ -1,5 +1,5 @@
|
||||||
#!/usr/bin/env python3
|
#!/usr/bin/env python3
|
||||||
# coding=utf-8
|
# -*- coding: utf-8 -*-
|
||||||
|
|
||||||
from unittest.mock import MagicMock
|
from unittest.mock import MagicMock
|
||||||
|
|
||||||
|
|
|
@ -1,8 +1,9 @@
|
||||||
#!/usr/bin/env python3
|
#!/usr/bin/env python3
|
||||||
# coding=utf-8
|
# -*- coding: utf-8 -*-
|
||||||
|
|
||||||
|
from collections.abc import Iterator
|
||||||
from datetime import datetime, timedelta
|
from datetime import datetime, timedelta
|
||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
from typing import Iterator
|
|
||||||
from unittest.mock import MagicMock
|
from unittest.mock import MagicMock
|
||||||
|
|
||||||
import praw
|
import praw
|
||||||
|
@ -166,6 +167,7 @@ def test_create_authenticator(downloader_mock: MagicMock):
|
||||||
("lvpf4l",),
|
("lvpf4l",),
|
||||||
("lvpf4l", "lvqnsn"),
|
("lvpf4l", "lvqnsn"),
|
||||||
("lvpf4l", "lvqnsn", "lvl9kd"),
|
("lvpf4l", "lvqnsn", "lvl9kd"),
|
||||||
|
("1000000",),
|
||||||
),
|
),
|
||||||
)
|
)
|
||||||
def test_get_submissions_from_link(
|
def test_get_submissions_from_link(
|
||||||
|
@ -252,7 +254,7 @@ def test_get_subreddit_time_verification(
|
||||||
for r in results:
|
for r in results:
|
||||||
result_time = datetime.fromtimestamp(r.created_utc)
|
result_time = datetime.fromtimestamp(r.created_utc)
|
||||||
time_diff = nowtime - result_time
|
time_diff = nowtime - result_time
|
||||||
assert time_diff < test_delta
|
assert time_diff < (test_delta + timedelta(minutes=1))
|
||||||
|
|
||||||
|
|
||||||
@pytest.mark.online
|
@pytest.mark.online
|
||||||
|
@ -334,7 +336,7 @@ def test_get_multireddits_public(
|
||||||
(
|
(
|
||||||
("danigirl3694", 10),
|
("danigirl3694", 10),
|
||||||
("danigirl3694", 50),
|
("danigirl3694", 50),
|
||||||
("CapitanHam", None),
|
("nasa", None),
|
||||||
),
|
),
|
||||||
)
|
)
|
||||||
def test_get_user_submissions(test_user: str, limit: int, downloader_mock: MagicMock, reddit_instance: praw.Reddit):
|
def test_get_user_submissions(test_user: str, limit: int, downloader_mock: MagicMock, reddit_instance: praw.Reddit):
|
||||||
|
|
|
@ -1,5 +1,5 @@
|
||||||
#!/usr/bin/env python3
|
#!/usr/bin/env python3
|
||||||
# coding=utf-8
|
# -*- coding: utf-8 -*-
|
||||||
|
|
||||||
from unittest.mock import MagicMock
|
from unittest.mock import MagicMock
|
||||||
|
|
||||||
|
@ -76,4 +76,4 @@ def test_filter_empty_filter(test_url: str):
|
||||||
download_filter = DownloadFilter()
|
download_filter = DownloadFilter()
|
||||||
test_resource = Resource(MagicMock(), test_url, lambda: None)
|
test_resource = Resource(MagicMock(), test_url, lambda: None)
|
||||||
result = download_filter.check_resource(test_resource)
|
result = download_filter.check_resource(test_resource)
|
||||||
assert result is True
|
assert result
|
||||||
|
|
|
@ -1,7 +1,6 @@
|
||||||
#!/usr/bin/env python3
|
#!/usr/bin/env python3
|
||||||
# coding=utf-8
|
# -*- coding: utf-8 -*-
|
||||||
|
import logging
|
||||||
import os
|
|
||||||
import re
|
import re
|
||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
from unittest.mock import MagicMock, patch
|
from unittest.mock import MagicMock, patch
|
||||||
|
@ -9,12 +8,16 @@ from unittest.mock import MagicMock, patch
|
||||||
import praw.models
|
import praw.models
|
||||||
import pytest
|
import pytest
|
||||||
|
|
||||||
from bdfr.__main__ import setup_logging
|
from bdfr.__main__ import make_console_logging_handler
|
||||||
from bdfr.configuration import Configuration
|
from bdfr.configuration import Configuration
|
||||||
from bdfr.connector import RedditConnector
|
from bdfr.connector import RedditConnector
|
||||||
from bdfr.downloader import RedditDownloader
|
from bdfr.downloader import RedditDownloader
|
||||||
|
|
||||||
|
|
||||||
|
def add_console_handler():
|
||||||
|
logging.getLogger().addHandler(make_console_logging_handler(3))
|
||||||
|
|
||||||
|
|
||||||
@pytest.fixture()
|
@pytest.fixture()
|
||||||
def args() -> Configuration:
|
def args() -> Configuration:
|
||||||
args = Configuration()
|
args = Configuration()
|
||||||
|
@ -114,12 +117,12 @@ def test_file_creation_date(
|
||||||
RedditDownloader._download_submission(downloader_mock, submission)
|
RedditDownloader._download_submission(downloader_mock, submission)
|
||||||
|
|
||||||
for file_path in Path(tmp_path).iterdir():
|
for file_path in Path(tmp_path).iterdir():
|
||||||
file_stats = os.stat(file_path)
|
file_stats = Path(file_path).stat()
|
||||||
assert file_stats.st_mtime == test_creation_date
|
assert file_stats.st_mtime == test_creation_date
|
||||||
|
|
||||||
|
|
||||||
def test_search_existing_files():
|
def test_search_existing_files():
|
||||||
results = RedditDownloader.scan_existing_files(Path("."))
|
results = RedditDownloader.scan_existing_files(Path())
|
||||||
assert len(results.keys()) != 0
|
assert len(results.keys()) != 0
|
||||||
|
|
||||||
|
|
||||||
|
@ -134,7 +137,7 @@ def test_download_submission_hash_exists(
|
||||||
tmp_path: Path,
|
tmp_path: Path,
|
||||||
capsys: pytest.CaptureFixture,
|
capsys: pytest.CaptureFixture,
|
||||||
):
|
):
|
||||||
setup_logging(3)
|
add_console_handler()
|
||||||
downloader_mock.reddit_instance = reddit_instance
|
downloader_mock.reddit_instance = reddit_instance
|
||||||
downloader_mock.download_filter.check_url.return_value = True
|
downloader_mock.download_filter.check_url.return_value = True
|
||||||
downloader_mock.args.folder_scheme = ""
|
downloader_mock.args.folder_scheme = ""
|
||||||
|
@ -155,7 +158,7 @@ def test_download_submission_hash_exists(
|
||||||
def test_download_submission_file_exists(
|
def test_download_submission_file_exists(
|
||||||
downloader_mock: MagicMock, reddit_instance: praw.Reddit, tmp_path: Path, capsys: pytest.CaptureFixture
|
downloader_mock: MagicMock, reddit_instance: praw.Reddit, tmp_path: Path, capsys: pytest.CaptureFixture
|
||||||
):
|
):
|
||||||
setup_logging(3)
|
add_console_handler()
|
||||||
downloader_mock.reddit_instance = reddit_instance
|
downloader_mock.reddit_instance = reddit_instance
|
||||||
downloader_mock.download_filter.check_url.return_value = True
|
downloader_mock.download_filter.check_url.return_value = True
|
||||||
downloader_mock.args.folder_scheme = ""
|
downloader_mock.args.folder_scheme = ""
|
||||||
|
@ -167,9 +170,7 @@ def test_download_submission_file_exists(
|
||||||
folder_contents = list(tmp_path.iterdir())
|
folder_contents = list(tmp_path.iterdir())
|
||||||
output = capsys.readouterr()
|
output = capsys.readouterr()
|
||||||
assert len(folder_contents) == 1
|
assert len(folder_contents) == 1
|
||||||
assert (
|
assert "Arneeman_Metagaming isn't always a bad thing_m1hqw6.png from submission m1hqw6 already exists" in output.out
|
||||||
"Arneeman_Metagaming isn't always a bad thing_m1hqw6.png" " from submission m1hqw6 already exists" in output.out
|
|
||||||
)
|
|
||||||
|
|
||||||
|
|
||||||
@pytest.mark.online
|
@pytest.mark.online
|
||||||
|
@ -204,7 +205,7 @@ def test_download_submission_min_score_above(
|
||||||
tmp_path: Path,
|
tmp_path: Path,
|
||||||
capsys: pytest.CaptureFixture,
|
capsys: pytest.CaptureFixture,
|
||||||
):
|
):
|
||||||
setup_logging(3)
|
add_console_handler()
|
||||||
downloader_mock.reddit_instance = reddit_instance
|
downloader_mock.reddit_instance = reddit_instance
|
||||||
downloader_mock.download_filter.check_url.return_value = True
|
downloader_mock.download_filter.check_url.return_value = True
|
||||||
downloader_mock.args.folder_scheme = ""
|
downloader_mock.args.folder_scheme = ""
|
||||||
|
@ -228,7 +229,7 @@ def test_download_submission_min_score_below(
|
||||||
tmp_path: Path,
|
tmp_path: Path,
|
||||||
capsys: pytest.CaptureFixture,
|
capsys: pytest.CaptureFixture,
|
||||||
):
|
):
|
||||||
setup_logging(3)
|
add_console_handler()
|
||||||
downloader_mock.reddit_instance = reddit_instance
|
downloader_mock.reddit_instance = reddit_instance
|
||||||
downloader_mock.download_filter.check_url.return_value = True
|
downloader_mock.download_filter.check_url.return_value = True
|
||||||
downloader_mock.args.folder_scheme = ""
|
downloader_mock.args.folder_scheme = ""
|
||||||
|
@ -252,7 +253,7 @@ def test_download_submission_max_score_below(
|
||||||
tmp_path: Path,
|
tmp_path: Path,
|
||||||
capsys: pytest.CaptureFixture,
|
capsys: pytest.CaptureFixture,
|
||||||
):
|
):
|
||||||
setup_logging(3)
|
add_console_handler()
|
||||||
downloader_mock.reddit_instance = reddit_instance
|
downloader_mock.reddit_instance = reddit_instance
|
||||||
downloader_mock.download_filter.check_url.return_value = True
|
downloader_mock.download_filter.check_url.return_value = True
|
||||||
downloader_mock.args.folder_scheme = ""
|
downloader_mock.args.folder_scheme = ""
|
||||||
|
@ -276,7 +277,7 @@ def test_download_submission_max_score_above(
|
||||||
tmp_path: Path,
|
tmp_path: Path,
|
||||||
capsys: pytest.CaptureFixture,
|
capsys: pytest.CaptureFixture,
|
||||||
):
|
):
|
||||||
setup_logging(3)
|
add_console_handler()
|
||||||
downloader_mock.reddit_instance = reddit_instance
|
downloader_mock.reddit_instance = reddit_instance
|
||||||
downloader_mock.download_filter.check_url.return_value = True
|
downloader_mock.download_filter.check_url.return_value = True
|
||||||
downloader_mock.args.folder_scheme = ""
|
downloader_mock.args.folder_scheme = ""
|
||||||
|
|
|
@ -1,12 +1,12 @@
|
||||||
#!/usr/bin/env python3
|
#!/usr/bin/env python3
|
||||||
# coding=utf-8
|
# -*- coding: utf-8 -*-
|
||||||
|
|
||||||
import platform
|
import platform
|
||||||
import sys
|
import sys
|
||||||
import unittest.mock
|
import unittest.mock
|
||||||
from datetime import datetime
|
from datetime import datetime
|
||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
from typing import Optional, Type, Union
|
from typing import Optional, Union
|
||||||
from unittest.mock import MagicMock
|
from unittest.mock import MagicMock
|
||||||
|
|
||||||
import praw.models
|
import praw.models
|
||||||
|
@ -33,6 +33,16 @@ def submission() -> MagicMock:
|
||||||
return test
|
return test
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.fixture()
|
||||||
|
def test_formatter() -> FileNameFormatter:
|
||||||
|
out = FileNameFormatter("{TITLE}", "", "ISO")
|
||||||
|
return out
|
||||||
|
|
||||||
|
|
||||||
|
def check_valid_windows_path(test_string: str):
|
||||||
|
return test_string == FileNameFormatter._format_for_windows(test_string)
|
||||||
|
|
||||||
|
|
||||||
def do_test_string_equality(result: Union[Path, str], expected: str) -> bool:
|
def do_test_string_equality(result: Union[Path, str], expected: str) -> bool:
|
||||||
if platform.system() == "Windows":
|
if platform.system() == "Windows":
|
||||||
expected = FileNameFormatter._format_for_windows(expected)
|
expected = FileNameFormatter._format_for_windows(expected)
|
||||||
|
@ -46,7 +56,7 @@ def do_test_path_equality(result: Path, expected: str) -> bool:
|
||||||
expected = Path(*expected)
|
expected = Path(*expected)
|
||||||
else:
|
else:
|
||||||
expected = Path(expected)
|
expected = Path(expected)
|
||||||
return str(result).endswith(str(expected))
|
return str(result).endswith(str(expected)) # noqa: FURB123
|
||||||
|
|
||||||
|
|
||||||
@pytest.fixture(scope="session")
|
@pytest.fixture(scope="session")
|
||||||
|
@ -91,6 +101,15 @@ def test_check_format_string_validity(test_string: str, expected: bool):
|
||||||
|
|
||||||
@pytest.mark.online
|
@pytest.mark.online
|
||||||
@pytest.mark.reddit
|
@pytest.mark.reddit
|
||||||
|
@pytest.mark.parametrize(
|
||||||
|
"restriction_scheme",
|
||||||
|
(
|
||||||
|
"windows",
|
||||||
|
"linux",
|
||||||
|
"bla",
|
||||||
|
None,
|
||||||
|
),
|
||||||
|
)
|
||||||
@pytest.mark.parametrize(
|
@pytest.mark.parametrize(
|
||||||
("test_format_string", "expected"),
|
("test_format_string", "expected"),
|
||||||
(
|
(
|
||||||
|
@ -102,10 +121,17 @@ def test_check_format_string_validity(test_string: str, expected: bool):
|
||||||
("{REDDITOR}_{TITLE}_{POSTID}", "Kirsty-Blue_George Russel acknowledges the Twitter trend about him_w22m5l"),
|
("{REDDITOR}_{TITLE}_{POSTID}", "Kirsty-Blue_George Russel acknowledges the Twitter trend about him_w22m5l"),
|
||||||
),
|
),
|
||||||
)
|
)
|
||||||
def test_format_name_real(test_format_string: str, expected: str, reddit_submission: praw.models.Submission):
|
def test_format_name_real(
|
||||||
test_formatter = FileNameFormatter(test_format_string, "", "")
|
test_format_string: str,
|
||||||
|
expected: str,
|
||||||
|
reddit_submission: praw.models.Submission,
|
||||||
|
restriction_scheme: Optional[str],
|
||||||
|
):
|
||||||
|
test_formatter = FileNameFormatter(test_format_string, "", "", restriction_scheme)
|
||||||
result = test_formatter._format_name(reddit_submission, test_format_string)
|
result = test_formatter._format_name(reddit_submission, test_format_string)
|
||||||
assert do_test_string_equality(result, expected)
|
assert do_test_string_equality(result, expected)
|
||||||
|
if restriction_scheme == "windows":
|
||||||
|
assert check_valid_windows_path(result)
|
||||||
|
|
||||||
|
|
||||||
@pytest.mark.online
|
@pytest.mark.online
|
||||||
|
@ -188,7 +214,7 @@ def test_format_full_with_index_suffix(
|
||||||
|
|
||||||
def test_format_multiple_resources():
|
def test_format_multiple_resources():
|
||||||
mocks = []
|
mocks = []
|
||||||
for i in range(1, 5):
|
for _i in range(1, 5):
|
||||||
new_mock = MagicMock()
|
new_mock = MagicMock()
|
||||||
new_mock.url = "https://example.com/test.png"
|
new_mock.url = "https://example.com/test.png"
|
||||||
new_mock.extension = ".png"
|
new_mock.extension = ".png"
|
||||||
|
@ -196,7 +222,7 @@ def test_format_multiple_resources():
|
||||||
new_mock.source_submission.__class__ = praw.models.Submission
|
new_mock.source_submission.__class__ = praw.models.Submission
|
||||||
mocks.append(new_mock)
|
mocks.append(new_mock)
|
||||||
test_formatter = FileNameFormatter("{TITLE}", "", "ISO")
|
test_formatter = FileNameFormatter("{TITLE}", "", "ISO")
|
||||||
results = test_formatter.format_resource_paths(mocks, Path("."))
|
results = test_formatter.format_resource_paths(mocks, Path())
|
||||||
results = set([str(res[0].name) for res in results])
|
results = set([str(res[0].name) for res in results])
|
||||||
expected = {"test_1.png", "test_2.png", "test_3.png", "test_4.png"}
|
expected = {"test_1.png", "test_2.png", "test_3.png", "test_4.png"}
|
||||||
assert results == expected
|
assert results == expected
|
||||||
|
@ -211,8 +237,8 @@ def test_format_multiple_resources():
|
||||||
("😍💕✨" * 100, "_1.png"),
|
("😍💕✨" * 100, "_1.png"),
|
||||||
),
|
),
|
||||||
)
|
)
|
||||||
def test_limit_filename_length(test_filename: str, test_ending: str):
|
def test_limit_filename_length(test_filename: str, test_ending: str, test_formatter: FileNameFormatter):
|
||||||
result = FileNameFormatter.limit_file_name_length(test_filename, test_ending, Path("."))
|
result = test_formatter.limit_file_name_length(test_filename, test_ending, Path())
|
||||||
assert len(result.name) <= 255
|
assert len(result.name) <= 255
|
||||||
assert len(result.name.encode("utf-8")) <= 255
|
assert len(result.name.encode("utf-8")) <= 255
|
||||||
assert len(str(result)) <= FileNameFormatter.find_max_path_length()
|
assert len(str(result)) <= FileNameFormatter.find_max_path_length()
|
||||||
|
@ -233,8 +259,10 @@ def test_limit_filename_length(test_filename: str, test_ending: str):
|
||||||
("😍💕✨" * 100 + "_aaa1aa", "_1.png", "_aaa1aa_1.png"),
|
("😍💕✨" * 100 + "_aaa1aa", "_1.png", "_aaa1aa_1.png"),
|
||||||
),
|
),
|
||||||
)
|
)
|
||||||
def test_preserve_id_append_when_shortening(test_filename: str, test_ending: str, expected_end: str):
|
def test_preserve_id_append_when_shortening(
|
||||||
result = FileNameFormatter.limit_file_name_length(test_filename, test_ending, Path("."))
|
test_filename: str, test_ending: str, expected_end: str, test_formatter: FileNameFormatter
|
||||||
|
):
|
||||||
|
result = test_formatter.limit_file_name_length(test_filename, test_ending, Path())
|
||||||
assert len(result.name) <= 255
|
assert len(result.name) <= 255
|
||||||
assert len(result.name.encode("utf-8")) <= 255
|
assert len(result.name.encode("utf-8")) <= 255
|
||||||
assert result.name.endswith(expected_end)
|
assert result.name.endswith(expected_end)
|
||||||
|
@ -264,8 +292,8 @@ def test_shorten_filename_real(submission: MagicMock, tmp_path: Path):
|
||||||
("a" * 500, "_bbbbbb.jpg"),
|
("a" * 500, "_bbbbbb.jpg"),
|
||||||
),
|
),
|
||||||
)
|
)
|
||||||
def test_shorten_path(test_name: str, test_ending: str, tmp_path: Path):
|
def test_shorten_path(test_name: str, test_ending: str, tmp_path: Path, test_formatter: FileNameFormatter):
|
||||||
result = FileNameFormatter.limit_file_name_length(test_name, test_ending, tmp_path)
|
result = test_formatter.limit_file_name_length(test_name, test_ending, tmp_path)
|
||||||
assert len(str(result.name)) <= 255
|
assert len(str(result.name)) <= 255
|
||||||
assert len(str(result.name).encode("UTF-8")) <= 255
|
assert len(str(result.name).encode("UTF-8")) <= 255
|
||||||
assert len(str(result.name).encode("cp1252")) <= 255
|
assert len(str(result.name).encode("cp1252")) <= 255
|
||||||
|
@ -462,7 +490,9 @@ def test_get_max_path_length():
|
||||||
def test_windows_max_path(tmp_path: Path):
|
def test_windows_max_path(tmp_path: Path):
|
||||||
with unittest.mock.patch("platform.system", return_value="Windows"):
|
with unittest.mock.patch("platform.system", return_value="Windows"):
|
||||||
with unittest.mock.patch("bdfr.file_name_formatter.FileNameFormatter.find_max_path_length", return_value=260):
|
with unittest.mock.patch("bdfr.file_name_formatter.FileNameFormatter.find_max_path_length", return_value=260):
|
||||||
result = FileNameFormatter.limit_file_name_length("test" * 100, "_1.png", tmp_path)
|
mock = MagicMock()
|
||||||
|
mock.max_path = 260
|
||||||
|
result = FileNameFormatter.limit_file_name_length(mock, "test" * 100, "_1.png", tmp_path)
|
||||||
assert len(str(result)) <= 260
|
assert len(str(result)) <= 260
|
||||||
assert len(result.name) <= (260 - len(str(tmp_path)))
|
assert len(result.name) <= (260 - len(str(tmp_path)))
|
||||||
|
|
||||||
|
@ -479,13 +509,13 @@ def test_windows_max_path(tmp_path: Path):
|
||||||
)
|
)
|
||||||
def test_name_submission(
|
def test_name_submission(
|
||||||
test_reddit_id: str,
|
test_reddit_id: str,
|
||||||
test_downloader: Type[BaseDownloader],
|
test_downloader: type[BaseDownloader],
|
||||||
expected_names: set[str],
|
expected_names: set[str],
|
||||||
reddit_instance: praw.reddit.Reddit,
|
reddit_instance: praw.reddit.Reddit,
|
||||||
):
|
):
|
||||||
test_submission = reddit_instance.submission(id=test_reddit_id)
|
test_submission = reddit_instance.submission(id=test_reddit_id)
|
||||||
test_resources = test_downloader(test_submission).find_resources()
|
test_resources = test_downloader(test_submission).find_resources()
|
||||||
test_formatter = FileNameFormatter("{TITLE}", "", "")
|
test_formatter = FileNameFormatter("{TITLE}", "", "")
|
||||||
results = test_formatter.format_resource_paths(test_resources, Path("."))
|
results = test_formatter.format_resource_paths(test_resources, Path())
|
||||||
results = set([r[0].name for r in results])
|
results = set([r[0].name for r in results])
|
||||||
assert results == expected_names
|
assert results == expected_names
|
||||||
|
|
|
@ -1,5 +1,5 @@
|
||||||
#!/usr/bin/env python3
|
#!/usr/bin/env python3
|
||||||
# coding=utf-8
|
# -*- coding: utf-8 -*-
|
||||||
|
|
||||||
import configparser
|
import configparser
|
||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
|
|
|
@ -1,5 +1,5 @@
|
||||||
#!/usr/bin/env python3
|
#!/usr/bin/env python3
|
||||||
# coding=utf-8
|
# -*- coding: utf-8 -*-
|
||||||
|
|
||||||
from unittest.mock import MagicMock
|
from unittest.mock import MagicMock
|
||||||
|
|
||||||
|
|
18
tox.ini
18
tox.ini
|
@ -1,6 +1,9 @@
|
||||||
[tox]
|
[tox]
|
||||||
|
requires =
|
||||||
|
tox>=3.27.1
|
||||||
envlist =
|
envlist =
|
||||||
format
|
format
|
||||||
|
format_check
|
||||||
|
|
||||||
[testenv:format]
|
[testenv:format]
|
||||||
deps =
|
deps =
|
||||||
|
@ -9,8 +12,15 @@ deps =
|
||||||
skip_install = True
|
skip_install = True
|
||||||
commands =
|
commands =
|
||||||
isort bdfr tests
|
isort bdfr tests
|
||||||
black bdfr tests --line-length 120
|
black bdfr tests
|
||||||
|
|
||||||
[isort]
|
[testenv:format_check]
|
||||||
profile = black
|
deps =
|
||||||
multi_line_output = 3
|
isort
|
||||||
|
black
|
||||||
|
skip_install = True
|
||||||
|
allowlist_externals = mdl
|
||||||
|
commands =
|
||||||
|
isort bdfr tests --check
|
||||||
|
black bdfr tests --check
|
||||||
|
mdl README.md docs/
|
||||||
|
|
Loading…
Reference in a new issue