From fecb65c53a2e47c966d24c104f7aa6e891cb93b2 Mon Sep 17 00:00:00 2001 From: OMEGARAZER <869111+OMEGARAZER@users.noreply.github.com> Date: Tue, 29 Nov 2022 11:48:24 -0500 Subject: [PATCH] Lint run Linting run through various things. Mostly markdownlint. --- .github/ISSUE_TEMPLATE/bug_report.md | 16 ++-- .github/ISSUE_TEMPLATE/feature_request.md | 3 +- .../ISSUE_TEMPLATE/site-support-request.md | 4 +- README.md | 91 +++++++++++-------- bdfr/default_config.cfg | 2 +- devscripts/configure.ps1 | 6 +- devscripts/configure.sh | 4 +- docs/ARCHITECTURE.md | 10 +- docs/CODE_OF_CONDUCT.md | 4 +- docs/CONTRIBUTING.md | 32 ++++--- scripts/README.md | 10 +- scripts/extract_failed_ids.ps1 | 18 ++-- scripts/extract_successful_ids.ps1 | 14 +-- scripts/print_summary.ps1 | 14 +-- .../failed_resource_error.txt | 1 - .../example_logfiles/succeed_score_filter.txt | 2 +- setup.cfg | 2 +- 17 files changed, 130 insertions(+), 103 deletions(-) diff --git a/.github/ISSUE_TEMPLATE/bug_report.md b/.github/ISSUE_TEMPLATE/bug_report.md index efc9757..e05bb36 100644 --- a/.github/ISSUE_TEMPLATE/bug_report.md +++ b/.github/ISSUE_TEMPLATE/bug_report.md @@ -10,20 +10,24 @@ assignees: '' - [ ] I am reporting a bug. - [ ] I am running the latest version of BDfR - [ ] I have read the [Opening an issue](https://github.com/aliparlakci/bulk-downloader-for-reddit/blob/master/docs/CONTRIBUTING.md#opening-an-issue) - + ## Description + A clear and concise description of what the bug is. ## Command -``` + +```text Paste here the command(s) that causes the bug ``` -## Environment (please complete the following information): - - OS: [e.g. Windows 10] - - Python version: [e.g. 3.9.4] +## Environment (please complete the following information) + +- OS: [e.g. Windows 10] +- Python version: [e.g. 3.9.4] ## Logs -``` + +```text Paste the log output here. ``` diff --git a/.github/ISSUE_TEMPLATE/feature_request.md b/.github/ISSUE_TEMPLATE/feature_request.md index ce9f0b3..c286de6 100644 --- a/.github/ISSUE_TEMPLATE/feature_request.md +++ b/.github/ISSUE_TEMPLATE/feature_request.md @@ -10,6 +10,7 @@ assignees: '' - [ ] I am requesting a feature. - [ ] I am running the latest version of BDfR - [ ] I have read the [Opening an issue](../../README.md#configuration) - + ## Description + Clearly state the current situation and issues you experience. Then, explain how this feature would solve these issues and make life easier. Also, explain the feature with as many detail as possible. diff --git a/.github/ISSUE_TEMPLATE/site-support-request.md b/.github/ISSUE_TEMPLATE/site-support-request.md index fd400aa..2eea710 100644 --- a/.github/ISSUE_TEMPLATE/site-support-request.md +++ b/.github/ISSUE_TEMPLATE/site-support-request.md @@ -10,9 +10,11 @@ assignees: '' - [ ] I am requesting a site support. - [ ] I am running the latest version of BDfR - [ ] I have read the [Opening an issue](../../README.md#configuration) - + ## Site + Provide a URL to domain of the site. ## Example posts + Provide example reddit posts with the domain. diff --git a/README.md b/README.md index 2c245c6..7914308 100644 --- a/README.md +++ b/README.md @@ -1,4 +1,5 @@ # Bulk Downloader for Reddit + [![PyPI version](https://img.shields.io/pypi/v/bdfr.svg)](https://pypi.python.org/pypi/bdfr) [![PyPI downloads](https://img.shields.io/pypi/dm/bdfr)](https://pypi.python.org/pypi/bdfr) [![Python Test](https://github.com/aliparlakci/bulk-downloader-for-reddit/actions/workflows/test.yml/badge.svg?branch=master)](https://github.com/aliparlakci/bulk-downloader-for-reddit/actions/workflows/test.yml) @@ -10,19 +11,24 @@ If you wish to open an issue, please read [the guide on opening issues](docs/CON Included in this README are a few example Bash tricks to get certain behaviour. For that, see [Common Command Tricks](#common-command-tricks). ## Installation + *Bulk Downloader for Reddit* needs Python version 3.9 or above. Please update Python before installation to meet the requirement. Then, you can install it as such: + ```bash python3 -m pip install bdfr --upgrade ``` + **To update BDFR**, run the above command again after the installation. ### AUR Package + If on Arch Linux or derivative operating systems such as Manjaro, the BDFR can be installed through the AUR. -- Latest Release: https://aur.archlinux.org/packages/python-bdfr/ -- Latest Development Build: https://aur.archlinux.org/packages/python-bdfr-git/ +- Latest Release: +- Latest Development Build: ### Source code + If you want to use the source code or make contributions, refer to [CONTRIBUTING](docs/CONTRIBUTING.md#preparing-the-environment-for-development) ## Usage @@ -52,18 +58,23 @@ However, these commands are not enough. You should chain parameters in [Options] ```bash python3 -m bdfr download ./path/to/output --subreddit Python -L 10 ``` + ```bash python3 -m bdfr download ./path/to/output --user reddituser --submitted -L 100 ``` + ```bash python3 -m bdfr download ./path/to/output --user reddituser --submitted --all-comments --comment-context ``` + ```bash python3 -m bdfr download ./path/to/output --user me --saved --authenticate -L 25 --file-scheme '{POSTID}' ``` + ```bash python3 -m bdfr download ./path/to/output --subreddit 'Python, all, mindustry' -L 10 --make-hard-links ``` + ```bash python3 -m bdfr archive ./path/to/output --subreddit all --format yaml -L 500 --folder-scheme '' ``` @@ -87,6 +98,7 @@ subreddit: ``` would be equilavent to (take note that in YAML there is `file_scheme` instead of `file-scheme`): + ```bash python3 -m bdfr download ./path/to/output --skip mp4 --skip avi --file-scheme "{UPVOTES}_{REDDITOR}_{POSTID}_{DATE}" -L 10 -S top --subreddit EarthPorn --subreddit CityPorn ``` @@ -156,8 +168,8 @@ The following options are common between both the `archive` and `download` comma - `-m, --multireddit` - This is the name of a multireddit to add as a source - Can be specified multiple times - - This can be done by using `-m` multiple times - - Multireddits can also be used to provide CSV multireddits e.g. `-m 'chess, favourites'` + - This can be done by using `-m` multiple times + - Multireddits can also be used to provide CSV multireddits e.g. `-m 'chess, favourites'` - The specified multireddits must all belong to the user specified with the `--user` option - `-s, --subreddit` - This adds a subreddit as a source @@ -237,7 +249,6 @@ The following options apply only to the `download` command. This command downloa - `--max-score-ratio` - This skips all submissions which have higher than specified upvote ratio - ### Archiver Options The following options are for the `archive` command specifically. @@ -291,18 +302,18 @@ For more details on the configuration file and the values therein, see [Configur The naming and folder schemes for the BDFR are both completely customisable. A number of different fields can be given which will be replaced with properties from a submission when downloading it. The scheme format takes the form of `{KEY}`, where `KEY` is a string from the below list. - - `DATE` - - `FLAIR` - - `POSTID` - - `REDDITOR` - - `SUBREDDIT` - - `TITLE` - - `UPVOTES` +- `DATE` +- `FLAIR` +- `POSTID` +- `REDDITOR` +- `SUBREDDIT` +- `TITLE` +- `UPVOTES` Each of these can be enclosed in curly bracket, `{}`, and included in the name. For example, to just title every downloaded post with the unique submission ID, you can use `{POSTID}`. Static strings can also be included, such as `download_{POSTID}` which will not change from submission to submission. For example, the previous string will result in the following submission file names: - - `download_aaaaaa.png` - - `download_bbbbbb.png` +- `download_aaaaaa.png` +- `download_bbbbbb.png` At least one key *must* be included in the file scheme, otherwise an error will be thrown. The folder scheme however, can be null or a simple static string. In the former case, all files will be placed in the folder specified with the `directory` argument. If the folder scheme is a static string, then all submissions will be placed in a folder of that name. In both cases, there will be no separation between all submissions. @@ -312,19 +323,19 @@ It is highly recommended that the file name scheme contain the parameter `{POSTI The configuration files are, by default, stored in the configuration directory for the user. This differs depending on the OS that the BDFR is being run on. For Windows, this will be: - - `C:\Users\\AppData\Local\BDFR\bdfr` +- `C:\Users\\AppData\Local\BDFR\bdfr` If Python has been installed through the Windows Store, the folder will appear in a different place. Note that the hash included in the file path may change from installation to installation. - - `C:\Users\\AppData\Local\Packages\PythonSoftwareFoundation.Python.3.9_qbz5n2kfra8p0\LocalCache\Local\BDFR\bdfr` +- `C:\Users\\AppData\Local\Packages\PythonSoftwareFoundation.Python.3.9_qbz5n2kfra8p0\LocalCache\Local\BDFR\bdfr` On Mac OSX, this will be: - - `~/Library/Application Support/bdfr`. - +- `~/Library/Application Support/bdfr`. + Lastly, on a Linux system, this will be: - - `~/.config/bdfr/` +- `~/.config/bdfr/` The logging output for each run of the BDFR will be saved to this directory in the file `log_output.txt`. If you need to submit a bug, it is this file that you will need to submit with the report. @@ -332,16 +343,16 @@ The logging output for each run of the BDFR will be saved to this directory in t The `config.cfg` is the file that supplies the BDFR with the configuration to use. At the moment, the following keys **must** be included in the configuration file supplied. - - `client_id` - - `client_secret` - - `scopes` +- `client_id` +- `client_secret` +- `scopes` The following keys are optional, and defaults will be used if they cannot be found. - - `backup_log_count` - - `max_wait_time` - - `time_format` - - `disabled_modules` +- `backup_log_count` +- `max_wait_time` +- `time_format` +- `disabled_modules` All of these should not be modified unless you know what you're doing, as the default values will enable the BDFR to function just fine. A configuration is included in the BDFR when it is installed, and this will be placed in the configuration directory as the default. @@ -360,12 +371,16 @@ The individual modules of the BDFR, used to download submissions from websites, Modules can be disabled through the command line interface for the BDFR or more permanently in the configuration file via the `disabled_modules` option. The list of downloaders that can be disabled are the following. Note that they are case-insensitive. - `Direct` +- `DelayForReddit` - `Erome` - `Gallery` (Reddit Image Galleries) - `Gfycat` - `Imgur` +- `PornHub` - `Redgifs` - `SelfPost` (Reddit Text Post) +- `Vidble` +- `VReddit` (Reddit Video Post) - `Youtube` - `YoutubeDlFallback` @@ -393,17 +408,19 @@ The logfiles that the BDFR outputs are consistent and quite detailed and in a fo ## List of currently supported sources - - Direct links (links leading to a file) - - Erome - - Gfycat - - Gif Delivery Network - - Imgur - - Reddit Galleries - - Reddit Text Posts - - Reddit Videos - - Redgifs - - YouTube - - Streamable +- Direct links (links leading to a file) +- Delay for Reddit +- Erome +- Gfycat +- Gif Delivery Network +- Imgur +- Reddit Galleries +- Reddit Text Posts +- Reddit Videos +- Redgifs +- Vidble +- YouTube + - Any source supported by [YT-DLP](https://github.com/yt-dlp/yt-dlp/blob/master/supportedsites.md) should be compatable ## Contributing diff --git a/bdfr/default_config.cfg b/bdfr/default_config.cfg index c601152..2b2976f 100644 --- a/bdfr/default_config.cfg +++ b/bdfr/default_config.cfg @@ -4,4 +4,4 @@ client_secret = 7CZHY6AmKweZME5s50SfDGylaPg scopes = identity, history, read, save, mysubreddits backup_log_count = 3 max_wait_time = 120 -time_format = ISO \ No newline at end of file +time_format = ISO diff --git a/devscripts/configure.ps1 b/devscripts/configure.ps1 index b096266..f5a2152 100644 --- a/devscripts/configure.ps1 +++ b/devscripts/configure.ps1 @@ -1,5 +1,5 @@ if (-not ([string]::IsNullOrEmpty($env:REDDIT_TOKEN))) { - copy .\\bdfr\\default_config.cfg .\\test_config.cfg - echo "`nuser_token = $env:REDDIT_TOKEN" >> ./test_config.cfg -} \ No newline at end of file + Copy-Item .\\bdfr\\default_config.cfg .\\test_config.cfg + Write-Output "`nuser_token = $env:REDDIT_TOKEN" >> ./test_config.cfg +} diff --git a/devscripts/configure.sh b/devscripts/configure.sh index d9c96df..f4528b1 100755 --- a/devscripts/configure.sh +++ b/devscripts/configure.sh @@ -1,4 +1,6 @@ -if [ ! -z "$REDDIT_TOKEN" ] +#!/bin/bash + +if [ -n "$REDDIT_TOKEN" ] then cp ./bdfr/default_config.cfg ./test_config.cfg echo -e "\nuser_token = $REDDIT_TOKEN" >> ./test_config.cfg diff --git a/docs/ARCHITECTURE.md b/docs/ARCHITECTURE.md index 7b69f99..33d4297 100644 --- a/docs/ARCHITECTURE.md +++ b/docs/ARCHITECTURE.md @@ -18,18 +18,18 @@ Another major part of the ethos of the design is DOTADIW, Do One Thing And Do It The BDFR is organised around a central object, the RedditDownloader class. The Archiver object extends and inherits from this class. - 1. The RedditDownloader parses all the arguments and configuration options, held in the Configuration object, and creates a variety of internal objects for use, such as the file name formatter, download filter, etc. - + 1. The RedditDownloader parses all the arguments and configuration options, held in the Configuration object, and creates a variety of internal objects for use, such as the file name formatter, download filter, etc. + 2. The RedditDownloader scrapes raw submissions from Reddit via several methods relating to different sources. A source is defined as a single stream of submissions from a subreddit, multireddit, or user list. - 3. These raw submissions are passed to the DownloaderFactory class to select the specialised downloader class to use. Each of these are for a specific website or link type, with some catch-all classes like Direct. + 3. These raw submissions are passed to the DownloaderFactory class to select the specialised downloader class to use. Each of these are for a specific website or link type, with some catch-all classes like Direct. - 4. The BaseDownloader child, spawned by DownloaderFactory, takes the link and does any necessary processing to find the direct link to the actual resource. + 4. The BaseDownloader child, spawned by DownloaderFactory, takes the link and does any necessary processing to find the direct link to the actual resource. 5. This is returned to the RedditDownloader in the form of a Resource object. This holds the URL and some other information for the final resource. 6. The Resource is passed through the DownloadFilter instantiated in step 1. - + 7. The destination file name for the Resource is calculated. If it already exists, then the Resource will be discarded. 8. Here the actual data is downloaded to the Resource and a hash calculated which is used to find duplicates. diff --git a/docs/CODE_OF_CONDUCT.md b/docs/CODE_OF_CONDUCT.md index 26edfa9..fe0374d 100644 --- a/docs/CODE_OF_CONDUCT.md +++ b/docs/CODE_OF_CONDUCT.md @@ -69,8 +69,6 @@ members of the project's leadership. ## Attribution This Code of Conduct is adapted from the [Contributor Covenant][homepage], version 1.4, -available at https://www.contributor-covenant.org/version/1/4/code-of-conduct.html +available at [homepage]: https://www.contributor-covenant.org - - diff --git a/docs/CONTRIBUTING.md b/docs/CONTRIBUTING.md index e08f1c2..5aafda2 100644 --- a/docs/CONTRIBUTING.md +++ b/docs/CONTRIBUTING.md @@ -11,19 +11,21 @@ All communication on GitHub, Discord, email, or any other medium must conform to **Before opening a new issue**, be sure that no issues regarding your problem already exist. If a similar issue exists, try to contribute to the issue. ### Bugs -When opening an issue about a bug, **please provide the full log file for the run in which the bug occurred**. This log file is named `log_output.txt` in the configuration folder. Check the [README](../README.md) for information on where this is. This log file will contain all the information required for the developers to recreate the bug. -If you do not have or cannot find the log file, then at minimum please provide the **Reddit ID for the submission** or comment which caused the issue. Also copy in the command that you used to run the BDFR from the command line, as that will also provide helpful information when trying to find and fix the bug. If needed, more information will be asked in the thread of the bug. +When opening an issue about a bug, **please provide the full log file for the run in which the bug occurred**. This log file is named `log_output.txt` in the configuration folder. Check the [README](../README.md) for information on where this is. This log file will contain all the information required for the developers to recreate the bug. + +If you do not have or cannot find the log file, then at minimum please provide the **Reddit ID for the submission** or comment which caused the issue. Also copy in the command that you used to run the BDFR from the command line, as that will also provide helpful information when trying to find and fix the bug. If needed, more information will be asked in the thread of the bug. ### Feature requests -In the case of requesting a feature or an enhancement, there are fewer requirements. However, please be clear in what you would like the BDFR to do and also how the feature/enhancement would be used or would be useful to more people. It is crucial that the feature is justified. Any feature request without a concrete reason for it to be implemented has a very small chance to get accepted. Be aware that proposed enhancements may be rejected for multiple reasons, or no reason, at the discretion of the developers. + +In the case of requesting a feature or an enhancement, there are fewer requirements. However, please be clear in what you would like the BDFR to do and also how the feature/enhancement would be used or would be useful to more people. It is crucial that the feature is justified. Any feature request without a concrete reason for it to be implemented has a very small chance to get accepted. Be aware that proposed enhancements may be rejected for multiple reasons, or no reason, at the discretion of the developers. ## Pull Requests Before creating a pull request (PR), check out [ARCHITECTURE](ARCHITECTURE.md) for a short introduction to the way that the BDFR is coded and how the code is organised. Also read the [Style Guide](#style-guide) section below before actually writing any code. Once you have done both of these, the below list shows the path that should be followed when writing a PR. - + 1. If an issue does not already exist, open one that will relate to the PR. 2. Ensure that any changes fit into the architecture specified above. 3. Ensure that you have written tests that cover the new code. @@ -32,24 +34,26 @@ Once you have done both of these, the below list shows the path that should be f 6. Open a pull request that references the relevant issue. 7. Expect changes or suggestions and heed the Code of Conduct. We're all volunteers here. -Someone will review your pull request as soon as possible, but remember that all maintainers are volunteers and this won't happen immediately. Once it is approved, congratulations! Your code is now part of the BDFR. +Someone will review your pull request as soon as possible, but remember that all maintainers are volunteers and this won't happen immediately. Once it is approved, congratulations! Your code is now part of the BDFR. ## Preparing the environment for development -Bulk Downloader for Reddit requires Python 3.9 at minimum. First, ensure that your Python installation satisfies this. +Bulk Downloader for Reddit requires Python 3.9 at minimum. First, ensure that your Python installation satisfies this. BDfR is built in a way that it can be packaged and installed via `pip`. This places BDfR next to other Python packages and enables you to run the program from any directory. Since it is managed by pip, you can also uninstall it. To install the program, clone the repository and run pip inside the project's root directory: + ```bash -$ git clone https://github.com/aliparlakci/bulk-downloader-for-reddit.git -$ cd ./bulk-downloader-for-reddit -$ python3 -m pip install -e . +git clone https://github.com/aliparlakci/bulk-downloader-for-reddit.git +cd ./bulk-downloader-for-reddit +python3 -m pip install -e . ``` -**`-e`** parameter creates a link to that folder. That is, any change inside the folder affects the package immidiately. So, when developing, you can be sure that the package is not stale and Python is always running your latest changes. (Due to this linking, moving/removing/renaming the folder might break it) +**`-e`** parameter creates a link to that folder. That is, any change inside the folder affects the package immidiately. So, when developing, you can be sure that the package is not stale and Python is always running your latest changes. (Due to this linking, moving/removing/renaming the folder might break it) Then, you can run the program from anywhere in your disk as such: + ```bash $ python3 -m bdfr ``` @@ -104,20 +108,20 @@ To exclude one or more marks, the following command can be used, substituting th pytest -m "not online" pytest -m "not reddit and not authenticated" ``` - + ### Configuration for authenticated tests There should be configuration file `test_config.cfg` in the project's root directory to be able to run the integration tests with reddit authentication. See how to create such files [here](../README.md#configuration). The easiest way of creating this file is copying your existing `default_config.cfg` file from the path stated in the previous link and renaming it to `test_config.cfg` Be sure that user_token key exists in test_config.cfg. - + --- - + For more details, review the pytest documentation that is freely available online. Many IDEs also provide integrated functionality to run and display the results from tests, and almost all of them support pytest in some capacity. This would be the recommended method due to the additional debugging and general capabilities. ### Writing Tests -When writing tests, ensure that they follow the style guide. The BDFR uses pytest to run tests. Wherever possible, parameterise tests, even if you only have one test case. This makes it easier to expand in the future, as the ultimate goal is to have multiple test cases for every test, instead of just one. +When writing tests, ensure that they follow the style guide. The BDFR uses pytest to run tests. Wherever possible, parameterise tests, even if you only have one test case. This makes it easier to expand in the future, as the ultimate goal is to have multiple test cases for every test, instead of just one. If required, use of mocks is expected to simplify tests and reduce the resources or complexity required. Tests should be as small as possible and test as small a part of the code as possible. Comprehensive or integration tests are run with the `click` framework and are located in their own file. diff --git a/scripts/README.md b/scripts/README.md index 4bb098b..2f77eb5 100644 --- a/scripts/README.md +++ b/scripts/README.md @@ -2,10 +2,10 @@ Due to the verboseness of the logs, a great deal of information can be gathered quite easily from the BDFR's logfiles. In this folder, there is a selection of scripts that parse these logs, scraping useful bits of information. Since the logfiles are recurring patterns of strings, it is a fairly simple matter to write scripts that utilise tools included on most Linux systems. - - [Script to extract all successfully downloaded IDs](#extract-all-successfully-downloaded-ids) - - [Script to extract all failed download IDs](#extract-all-failed-ids) - - [Timestamp conversion](#converting-bdfrv1-timestamps-to-bdfrv2-timestamps) - - [Printing summary statistics for a run](#printing-summary-statistics) +- [Script to extract all successfully downloaded IDs](#extract-all-successfully-downloaded-ids) +- [Script to extract all failed download IDs](#extract-all-failed-ids) +- [Timestamp conversion](#converting-bdfrv1-timestamps-to-bdfrv2-timestamps) +- [Printing summary statistics for a run](#printing-summary-statistics) ## Extract all Successfully Downloaded IDs @@ -58,7 +58,7 @@ A simple script has been included to print sumamry statistics for a run of the B This will create an output like the following: -``` +```text Downloaded submissions: 250 Failed downloads: 103 Files already downloaded: 20073 diff --git a/scripts/extract_failed_ids.ps1 b/scripts/extract_failed_ids.ps1 index be2d2cb..4820d04 100644 --- a/scripts/extract_failed_ids.ps1 +++ b/scripts/extract_failed_ids.ps1 @@ -1,21 +1,21 @@ if (Test-Path -Path $args[0] -PathType Leaf) { - $file=$args[0] + $file=$args[0] } else { - Write-Host "CANNOT FIND LOG FILE" - Exit 1 + Write-Host "CANNOT FIND LOG FILE" + Exit 1 } -if ($args[1] -ne $null) { - $output=$args[1] - Write-Host "Outputting IDs to $output" +if ($null -ne $args[1]) { + $output=$args[1] + Write-Host "Outputting IDs to $output" } else { - $output="./failed.txt" + $output="./failed.txt" } -Select-String -Path $file -Pattern "Could not download submission" | ForEach-Object { -split $_.Line | Select-Object -Skip 11 | Select-Object -First 1 } | foreach { $_.substring(0,$_.Length-1) } >> $output +Select-String -Path $file -Pattern "Could not download submission" | ForEach-Object { -split $_.Line | Select-Object -Skip 11 | Select-Object -First 1 } | ForEach-Object { $_.substring(0,$_.Length-1) } >> $output Select-String -Path $file -Pattern "Failed to download resource" | ForEach-Object { -split $_.Line | Select-Object -Skip 14 | Select-Object -First 1 } >> $output -Select-String -Path $file -Pattern "failed to download submission" | ForEach-Object { -split $_.Line | Select-Object -Skip 13 | Select-Object -First 1 } | foreach { $_.substring(0,$_.Length-1) } >> $output +Select-String -Path $file -Pattern "failed to download submission" | ForEach-Object { -split $_.Line | Select-Object -Skip 13 | Select-Object -First 1 } | ForEach-Object { $_.substring(0,$_.Length-1) } >> $output Select-String -Path $file -Pattern "Failed to write file" | ForEach-Object { -split $_.Line | Select-Object -Skip 13 | Select-Object -First 1 } >> $output Select-String -Path $file -Pattern "skipped due to disabled module" | ForEach-Object { -split $_.Line | Select-Object -Skip 8 | Select-Object -First 1 } >> $output diff --git a/scripts/extract_successful_ids.ps1 b/scripts/extract_successful_ids.ps1 index 00722f1..70c463b 100644 --- a/scripts/extract_successful_ids.ps1 +++ b/scripts/extract_successful_ids.ps1 @@ -1,17 +1,17 @@ if (Test-Path -Path $args[0] -PathType Leaf) { - $file=$args[0] + $file=$args[0] } else { - Write-Host "CANNOT FIND LOG FILE" - Exit 1 + Write-Host "CANNOT FIND LOG FILE" + Exit 1 } -if ($args[1] -ne $null) { - $output=$args[1] - Write-Host "Outputting IDs to $output" +if ($null -ne $args[1]) { + $output=$args[1] + Write-Host "Outputting IDs to $output" } else { - $output="./successful.txt" + $output="./successful.txt" } Select-String -Path $file -Pattern "Downloaded submission" | ForEach-Object { -split $_.Line | Select-Object -Last 3 | Select-Object -SkipLast 2 } >> $output diff --git a/scripts/print_summary.ps1 b/scripts/print_summary.ps1 index 5d85b09..1428a86 100644 --- a/scripts/print_summary.ps1 +++ b/scripts/print_summary.ps1 @@ -1,17 +1,17 @@ if (Test-Path -Path $args[0] -PathType Leaf) { - $file=$args[0] + $file=$args[0] } else { - Write-Host "CANNOT FIND LOG FILE" - Exit 1 + Write-Host "CANNOT FIND LOG FILE" + Exit 1 } -if ($args[1] -ne $null) { - $output=$args[1] - Write-Host "Outputting IDs to $output" +if ($null -ne $args[1]) { + $output=$args[1] + Write-Host "Outputting IDs to $output" } else { - $output="./successful.txt" + $output="./successful.txt" } Write-Host -NoNewline "Downloaded submissions: " diff --git a/scripts/tests/example_logfiles/failed_resource_error.txt b/scripts/tests/example_logfiles/failed_resource_error.txt index c2ba24c..ef477eb 100644 --- a/scripts/tests/example_logfiles/failed_resource_error.txt +++ b/scripts/tests/example_logfiles/failed_resource_error.txt @@ -1,2 +1 @@ [2021-06-12 11:18:25,794 - bdfr.downloader - ERROR] - Failed to download resource https://i.redd.it/61fniokpjq471.jpg in submission nxv3dt with downloader Direct: Unrecoverable error requesting resource: HTTP Code 404 - diff --git a/scripts/tests/example_logfiles/succeed_score_filter.txt b/scripts/tests/example_logfiles/succeed_score_filter.txt index 8f31ef7..6430a34 100644 --- a/scripts/tests/example_logfiles/succeed_score_filter.txt +++ b/scripts/tests/example_logfiles/succeed_score_filter.txt @@ -1,2 +1,2 @@ [2022-07-23 14:04:14,095 - bdfr.downloader - DEBUG] - Submission ljyy27 filtered due to score 15 < [50] -[2022-07-23 14:04:14,104 - bdfr.downloader - DEBUG] - Submission ljyy27 filtered due to score 16 > [1] \ No newline at end of file +[2022-07-23 14:04:14,104 - bdfr.downloader - DEBUG] - Submission ljyy27 filtered due to score 16 > [1] diff --git a/setup.cfg b/setup.cfg index 67a1deb..725f372 100644 --- a/setup.cfg +++ b/setup.cfg @@ -10,7 +10,7 @@ author_email = parlakciali@gmail.com maintainer = Serene Arc maintainer_email = serenical@gmail.com license = GPLv3 -classifiers = +classifiers = Programming Language :: Python :: 3 License :: OSI Approved :: GNU General Public License v3 (GPLv3) Natural Language :: English