From 798ed728f540421973f7d8cb1f572e4d8d57cf9a Mon Sep 17 00:00:00 2001 From: Piotr Migdal Date: Sun, 27 Mar 2022 20:29:05 +0200 Subject: [PATCH 1/9] yaml for options --- bdfr/__main__.py | 1 + bdfr/configuration.py | 14 ++++++++++++-- opts_example.yaml | 9 +++++++++ 3 files changed, 22 insertions(+), 2 deletions(-) create mode 100644 opts_example.yaml diff --git a/bdfr/__main__.py b/bdfr/__main__.py index 45450ed..3b2472a 100644 --- a/bdfr/__main__.py +++ b/bdfr/__main__.py @@ -16,6 +16,7 @@ _common_options = [ click.argument('directory', type=str), click.option('--authenticate', is_flag=True, default=None), click.option('--config', type=str, default=None), + click.option('--opts', type=str, default=None), click.option('--disable-module', multiple=True, default=None, type=str), click.option('--exclude-id', default=None, multiple=True), click.option('--exclude-id-file', default=None, multiple=True), diff --git a/bdfr/configuration.py b/bdfr/configuration.py index c8fb323..8b04722 100644 --- a/bdfr/configuration.py +++ b/bdfr/configuration.py @@ -5,6 +5,7 @@ from argparse import Namespace from typing import Optional import click +import yaml class Configuration(Namespace): @@ -12,6 +13,7 @@ class Configuration(Namespace): super(Configuration, self).__init__() self.authenticate = False self.config = None + self.opts: Optional[str] = None self.directory: str = '.' self.disable_module: list[str] = [] self.exclude_id = [] @@ -49,6 +51,14 @@ class Configuration(Namespace): self.comment_context: bool = False def process_click_arguments(self, context: click.Context): + if context.params['opts'] is not None: + with open(context.params['opts']) as f: + opts = yaml.load(f, Loader=yaml.FullLoader) + for arg_key, v in opts.items(): + vars(self)[arg_key] = v for arg_key in context.params.keys(): - if arg_key in vars(self) and context.params[arg_key] is not None: - vars(self)[arg_key] = context.params[arg_key] + if arg_key not in vars(self): + continue + if context.params[arg_key] is None or context.params[arg_key] == (): + continue + vars(self)[arg_key] = context.params[arg_key] diff --git a/opts_example.yaml b/opts_example.yaml new file mode 100644 index 0000000..91952e4 --- /dev/null +++ b/opts_example.yaml @@ -0,0 +1,9 @@ +skip: [mp4, avi, mov] +file-scheme: "{UPVOTES}_{REDDITOR}_{POSTID}_{DATE}" +limit: 10 +sort: top +time: all +no-dupes: true +subreddit: + - EarthPorn + - CityPorn From ef82387f84456751ea5f49e2c9f8039921ce1190 Mon Sep 17 00:00:00 2001 From: Piotr Migdal Date: Sun, 27 Mar 2022 20:49:28 +0200 Subject: [PATCH 2/9] underscores in YAML --- opts_example.yaml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/opts_example.yaml b/opts_example.yaml index 91952e4..22fca7d 100644 --- a/opts_example.yaml +++ b/opts_example.yaml @@ -1,9 +1,9 @@ skip: [mp4, avi, mov] -file-scheme: "{UPVOTES}_{REDDITOR}_{POSTID}_{DATE}" +file_scheme: "{UPVOTES}_{REDDITOR}_{POSTID}_{DATE}" limit: 10 sort: top time: all -no-dupes: true +no_dupes: true subreddit: - EarthPorn - CityPorn From 395bf9180aabb22152a299acb8df07d5bb1252f2 Mon Sep 17 00:00:00 2001 From: Piotr Migdal Date: Sun, 27 Mar 2022 20:50:34 +0200 Subject: [PATCH 3/9] explicit warnings for non-exisitng args --- bdfr/configuration.py | 18 +++++++++++++----- 1 file changed, 13 insertions(+), 5 deletions(-) diff --git a/bdfr/configuration.py b/bdfr/configuration.py index 8b04722..856c90b 100644 --- a/bdfr/configuration.py +++ b/bdfr/configuration.py @@ -3,10 +3,12 @@ from argparse import Namespace from typing import Optional +import logging import click import yaml +logger = logging.getLogger(__name__) class Configuration(Namespace): def __init__(self): @@ -54,11 +56,17 @@ class Configuration(Namespace): if context.params['opts'] is not None: with open(context.params['opts']) as f: opts = yaml.load(f, Loader=yaml.FullLoader) - for arg_key, v in opts.items(): - vars(self)[arg_key] = v + for arg_key, val in opts.items(): + if not hasattr(self, arg_key): + logger.error(f'Ignoring an unknown YAML argument: {arg_key}') + continue + setattr(self, arg_key, val) for arg_key in context.params.keys(): - if arg_key not in vars(self): + if not hasattr(self, arg_key): + logger.warning(f'Ignoring an unknown CLI argument: {arg_key}') continue - if context.params[arg_key] is None or context.params[arg_key] == (): + val = context.params[arg_key] + if val is None or val == (): + # don't overwrite with an empty value continue - vars(self)[arg_key] = context.params[arg_key] + setattr(self, arg_key, val) From 0731de788d6334f5b206084f63b907e7e19dab8d Mon Sep 17 00:00:00 2001 From: Piotr Migdal Date: Sun, 27 Mar 2022 21:09:02 +0200 Subject: [PATCH 4/9] instructions for YAML options --- README.md | 34 +++++++++++++++++++++++++++++++++- 1 file changed, 33 insertions(+), 1 deletion(-) diff --git a/README.md b/README.md index 28c0b8a..82d8812 100644 --- a/README.md +++ b/README.md @@ -68,6 +68,31 @@ python3 -m bdfr download ./path/to/output --subreddit 'Python, all, mindustry' - python3 -m bdfr archive ./path/to/output --subreddit all --format yaml -L 500 --folder-scheme '' ``` +Alternatively, you can pass options through a YAML file. + +```bash +python3 -m bdfr download ./path/to/output --opts my_opts.yaml +``` + +For example, running it with the following file + +```yaml +skip: [mp4, avi] +file_scheme: "{UPVOTES}_{REDDITOR}_{POSTID}_{DATE}" +limit: 10 +sort: top +subreddit: + - EarthPorn + - CityPorn +``` + +would be equilavent to (take note that in YAML there is `file_scheme` instead of `file-scheme`): +```bash +python3 -m bdfr download ./path/to/output --skip mp4 --skip avi --file-scheme "{UPVOTES}_{REDDITOR}_{POSTID}_{DATE}" -L 10 -S top --subreddit EarthPorn --subreddit CityPorn +``` + +In case when the same option is specified both in the YAML file and in as a command line argument, the command line argument takes prs + ## Options The following options are common between both the `archive` and `download` commands of the BDFR. @@ -80,6 +105,10 @@ The following options are common between both the `archive` and `download` comma - `--config` - If the path to a configuration file is supplied with this option, the BDFR will use the specified config - See [Configuration Files](#configuration) for more details +- `--opts` + - Load options from a YAML file. + - Has higher prority than the global config file but lower than command-line arguments. + - See [opts_example.yaml](./opts_example.yaml) for an example file. - `--disable-module` - Can be specified multiple times - Disables certain modules from being used @@ -221,7 +250,10 @@ The `clone` command can take all the options listed above for both the `archive` ## Common Command Tricks -A common use case is for subreddits/users to be loaded from a file. The BDFR doesn't support this directly but it is simple enough to do through the command-line. Consider a list of usernames to download; they can be passed through to the BDFR with the following command, assuming that the usernames are in a text file: +A common use case is for subreddits/users to be loaded from a file. The BDFR supports this via YAML file options (`--opts my_opts.yaml`). + +Alternatively, you can use the command-line [xargs](https://en.wikipedia.org/wiki/Xargs) function. +For a list of users `users.txt` (one user per line), type: ```bash cat users.txt | xargs -L 1 echo --user | xargs -L 50 python3 -m bdfr download From 5f443fddff952b0f21fe920b5ffbb4721023252a Mon Sep 17 00:00:00 2001 From: Piotr Migdal Date: Sun, 27 Mar 2022 21:13:33 +0200 Subject: [PATCH 5/9] a better check for opts --- bdfr/configuration.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/bdfr/configuration.py b/bdfr/configuration.py index 856c90b..2468ba9 100644 --- a/bdfr/configuration.py +++ b/bdfr/configuration.py @@ -53,7 +53,7 @@ class Configuration(Namespace): self.comment_context: bool = False def process_click_arguments(self, context: click.Context): - if context.params['opts'] is not None: + if context.params.get('opts') is not None: with open(context.params['opts']) as f: opts = yaml.load(f, Loader=yaml.FullLoader) for arg_key, val in opts.items(): From cb3415c62ffc33fda76f612924439b0d41ec1a12 Mon Sep 17 00:00:00 2001 From: Serene-Arc Date: Fri, 22 Jul 2022 15:44:19 +1000 Subject: [PATCH 6/9] Extract YAML function --- bdfr/configuration.py | 26 +++++++++++++++++++------- 1 file changed, 19 insertions(+), 7 deletions(-) diff --git a/bdfr/configuration.py b/bdfr/configuration.py index 2468ba9..79a208b 100644 --- a/bdfr/configuration.py +++ b/bdfr/configuration.py @@ -2,6 +2,7 @@ # coding=utf-8 from argparse import Namespace +from pathlib import Path from typing import Optional import logging @@ -54,13 +55,7 @@ class Configuration(Namespace): def process_click_arguments(self, context: click.Context): if context.params.get('opts') is not None: - with open(context.params['opts']) as f: - opts = yaml.load(f, Loader=yaml.FullLoader) - for arg_key, val in opts.items(): - if not hasattr(self, arg_key): - logger.error(f'Ignoring an unknown YAML argument: {arg_key}') - continue - setattr(self, arg_key, val) + self.parse_yaml_options(context.params['opts']) for arg_key in context.params.keys(): if not hasattr(self, arg_key): logger.warning(f'Ignoring an unknown CLI argument: {arg_key}') @@ -70,3 +65,20 @@ class Configuration(Namespace): # don't overwrite with an empty value continue setattr(self, arg_key, val) + + def parse_yaml_options(self, file_path: str): + yaml_file_loc = Path(file_path) + if not yaml_file_loc.exists(): + logger.error(f'No YAML file found at {yaml_file_loc}') + return + with open(yaml_file_loc) as f: + try: + opts = yaml.load(f, Loader=yaml.FullLoader) + except yaml.YAMLError as e: + logger.error(f'Could not parse YAML options file: {e}') + return + for arg_key, val in opts.items(): + if not hasattr(self, arg_key): + logger.error(f'Ignoring an unknown YAML argument: {arg_key}') + continue + setattr(self, arg_key, val) From 23e20e6ddc606b5af987a9294364425a25ec67a9 Mon Sep 17 00:00:00 2001 From: Serene-Arc Date: Fri, 22 Jul 2022 15:45:09 +1000 Subject: [PATCH 7/9] Rename variable --- bdfr/configuration.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/bdfr/configuration.py b/bdfr/configuration.py index 79a208b..e3f1758 100644 --- a/bdfr/configuration.py +++ b/bdfr/configuration.py @@ -71,9 +71,9 @@ class Configuration(Namespace): if not yaml_file_loc.exists(): logger.error(f'No YAML file found at {yaml_file_loc}') return - with open(yaml_file_loc) as f: + with open(yaml_file_loc) as file: try: - opts = yaml.load(f, Loader=yaml.FullLoader) + opts = yaml.load(file, Loader=yaml.FullLoader) except yaml.YAMLError as e: logger.error(f'Could not parse YAML options file: {e}') return From af3f98f59ceae0e0e262bfa274081339b3acfc1c Mon Sep 17 00:00:00 2001 From: Serene-Arc Date: Fri, 22 Jul 2022 15:45:38 +1000 Subject: [PATCH 8/9] Change logger message level --- bdfr/configuration.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/bdfr/configuration.py b/bdfr/configuration.py index e3f1758..ddc1401 100644 --- a/bdfr/configuration.py +++ b/bdfr/configuration.py @@ -79,6 +79,6 @@ class Configuration(Namespace): return for arg_key, val in opts.items(): if not hasattr(self, arg_key): - logger.error(f'Ignoring an unknown YAML argument: {arg_key}') + logger.warning(f'Ignoring an unknown YAML argument: {arg_key}') continue setattr(self, arg_key, val) From 27ca92ef157e8b174aa68b7548fa80d1893ac2c4 Mon Sep 17 00:00:00 2001 From: Serene-Arc Date: Fri, 22 Jul 2022 17:31:08 +1000 Subject: [PATCH 9/9] Add simple test --- tests/test_configuration.py | 9 +++++++++ tests/yaml_test_configuration.yaml | 6 ++++++ 2 files changed, 15 insertions(+) create mode 100644 tests/yaml_test_configuration.yaml diff --git a/tests/test_configuration.py b/tests/test_configuration.py index 8ad1663..6b6cd86 100644 --- a/tests/test_configuration.py +++ b/tests/test_configuration.py @@ -22,3 +22,12 @@ def test_process_click_context(arg_dict: dict): test_config.process_click_arguments(test_context) test_config = vars(test_config) assert all([test_config[arg] == arg_dict[arg] for arg in arg_dict.keys()]) + + +def test_yaml_file_read(): + file = './yaml_test_configuration.yaml' + test_config = Configuration() + test_config.parse_yaml_options(file) + assert test_config.subreddit == ['EarthPorn', 'TwoXChromosomes', 'Mindustry'] + assert test_config.sort == 'new' + assert test_config.limit == 10 diff --git a/tests/yaml_test_configuration.yaml b/tests/yaml_test_configuration.yaml new file mode 100644 index 0000000..5621721 --- /dev/null +++ b/tests/yaml_test_configuration.yaml @@ -0,0 +1,6 @@ +limit: 10 +sort: new +subreddit: + - EarthPorn + - TwoXChromosomes + - Mindustry \ No newline at end of file