From 4b683303934bd656d913c42bc5eb59b7a178925b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Rafa=C5=82=20Mikrut?= Date: Fri, 11 Sep 2020 22:32:17 +0200 Subject: [PATCH] Added support for excluded items with wildcard * --- czkawka_cli/src/main.rs | 71 ++++++++++++++++--------------- czkawka_core/Cargo.toml | 5 +-- czkawka_core/src/common.rs | 78 ++++++++++++++++++++++++++++++++++- czkawka_core/src/duplicate.rs | 65 ++++++++++++++++++++++++----- 4 files changed, 172 insertions(+), 47 deletions(-) diff --git a/czkawka_cli/src/main.rs b/czkawka_cli/src/main.rs index 0c65dba..da1939f 100644 --- a/czkawka_cli/src/main.rs +++ b/czkawka_cli/src/main.rs @@ -1,3 +1,4 @@ +use czkawka_core::duplicate::Info; use czkawka_core::{duplicate, empty_folder}; use std::{env, process}; @@ -132,37 +133,7 @@ fn main() { df.find_duplicates(&check_method, &delete_method); - let info = df.get_infos(); - - if !info.messages.is_empty() { - println!("-------------------------------MESSAGES--------------------------------"); - } - for i in &info.messages { - println!("{}", i); - } - if !info.messages.is_empty() { - println!("---------------------------END OF MESSAGES-----------------------------"); - } - - if !info.warnings.is_empty() { - println!("-------------------------------WARNINGS--------------------------------"); - } - for i in &info.warnings { - println!("{}", i); - } - if !info.warnings.is_empty() { - println!("---------------------------END OF WARNINGS-----------------------------"); - } - - if !info.errors.is_empty() { - println!("--------------------------------ERRORS---------------------------------"); - } - for i in &info.errors { - println!("{}", i); - } - if !info.errors.is_empty() { - println!("----------------------------END OF ERRORS------------------------------"); - } + print_infos(df.get_infos()); } "--h" | "--help" => { print_help(); @@ -205,16 +176,18 @@ Usage of Czkawka: czkawka --help czkawka - --d <-i directory_to_search> [-e exclude_directories = ""] [-s min_size = 1024] [-x allowed_extension = ""] [-l type_of_search = "hash"] [-delete = "aeo"] - search for duplicates files + --d <-i directory_to_search> [-e exclude_directories = ""] [-k excluded_items = ""] [-s min_size = 1024] [-x allowed_extension = ""] [-l type_of_search = "hash"] [-delete = "aeo"] - search for duplicates files -i directory_to_search - list of directories which should will be searched like /home/rafal -e exclude_directories - list of directories which will be excluded from search. + -k excluded_items - list of excluded items which contains * wildcard(may be slow) -s min_size - minimum size of checked files in bytes, assigning bigger value may speed up searching. - -x allowed_extension - list of checked extension, e.g. "jpg,mp4" will allow to check "book.jpg" and "car.mp4" but not roman.png.There are also helpful macros which allow to easy use a typcal extension like IMAGE("jpg,kra,gif,png,bmp,tiff,webp,hdr,svg") or TEXT("txt,doc,docx,odt,rtf") + -x allowed_extension - list of checked extension, e.g. "jpg,mp4" will allow to check "book.jpg" and "car.mp4" but not roman.png. There are also helpful macros which allow to easy use a typcal extension like IMAGE("jpg,kra,gif,png,bmp,tiff,webp,hdr,svg") or TEXT("txt,doc,docx,odt,rtf") -l type_of_search - allows to use fastest which takes into account only size, and more accurate which check if file contnet is same(hashes). -delete - delete found files, by default remove all except the most oldest one, it can take arguments: aen(All except newest one), aeo(All except oldest one), on(Only one newest), oo(Only one oldest) Usage example: czkawka --d -i "/home/rafal/,/home/szczekacz" -e "/home/rafal/Pulpit,/home/rafal/Obrazy" -s 25 -x "7z,rar,IMAGE" -l "size" -delete czkawka --d -i "/etc/,/mnt/Miecz" -s 1000 -x "VIDEO" -l "hash" + czkawka --d -i "/var/" -k "/var/l*b/,/var/lo*,*tmp" czkawka --d -i "/etc/" -delete "aeo" --e <-i directory_to_search> [-e exclude_directories = ""] [-delete] - option to find and delete empty folders @@ -225,6 +198,38 @@ Usage of Czkawka: "### ); } +/// Printing infos about warnings, messages and errors +fn print_infos(infos: &Info) { + if !infos.messages.is_empty() { + println!("-------------------------------MESSAGES--------------------------------"); + } + for i in &infos.messages { + println!("{}", i); + } + if !infos.messages.is_empty() { + println!("---------------------------END OF MESSAGES-----------------------------"); + } + + if !infos.warnings.is_empty() { + println!("-------------------------------WARNINGS--------------------------------"); + } + for i in &infos.warnings { + println!("{}", i); + } + if !infos.warnings.is_empty() { + println!("---------------------------END OF WARNINGS-----------------------------"); + } + + if !infos.errors.is_empty() { + println!("--------------------------------ERRORS---------------------------------"); + } + for i in &infos.errors { + println!("{}", i); + } + if !infos.errors.is_empty() { + println!("----------------------------END OF ERRORS------------------------------"); + } +} struct ArgumentsPair { command: String, diff --git a/czkawka_core/Cargo.toml b/czkawka_core/Cargo.toml index 9507239..1754225 100644 --- a/czkawka_core/Cargo.toml +++ b/czkawka_core/Cargo.toml @@ -5,7 +5,6 @@ authors = ["RafaƂ Mikrut "] edition = "2018" [dependencies] -humansize = "1.1.0" +humansize = "1" blake3 = "0.3.6" -#rayon = "1.4.0" -#regex = "1.3.9" \ No newline at end of file +#rayon = "1" \ No newline at end of file diff --git a/czkawka_core/src/common.rs b/czkawka_core/src/common.rs index cd48e2a..d888179 100644 --- a/czkawka_core/src/common.rs +++ b/czkawka_core/src/common.rs @@ -5,9 +5,85 @@ use std::time::SystemTime; pub struct Common(); impl Common { pub fn print_time(start_time: SystemTime, end_time: SystemTime, function_name: String) { - if true { + if false { return; } println!("Execution of function \"{}\" took {:?}", function_name, end_time.duration_since(start_time).expect("Time cannot go reverse.")); } + + /// Function to check if directory match expression + pub fn regex_check(expression: &str, directory: &str) -> bool { + if !expression.contains('*') { + println!("Expression should have *"); + return false; + } + + let temp_splits: Vec<&str> = expression.split('*').collect(); + let mut splits: Vec<&str> = Vec::new(); + for i in temp_splits { + if i != "" { + splits.push(i); + } + } + if splits.is_empty() { + return false; + } + + // Early checking if directory contains all parts needed by expression + for split in &splits { + if !directory.contains(split) { + return false; + } + } + + let mut position_of_splits: Vec = Vec::new(); + + // `git*` shouldn't be true for `/gitsfafasfs` + if !expression.starts_with('*') && directory.find(&splits[0]).unwrap() > 0 { + return false; + } + // `*home` shouldn't be true for `/homeowner` + if !expression.ends_with('*') && !directory.ends_with(splits.last().unwrap()) { + // && !directory.ends_with(&(splits.last().unwrap().to_string() + "/")){ + return false; + } + + // At the end we check if parts between * are correctly positioned + position_of_splits.push(directory.find(&splits[0]).unwrap()); + let mut current_index: usize; + let mut found_index: usize; + for i in splits[1..].iter().enumerate() { + current_index = *position_of_splits.get(i.0).unwrap() + i.1.len(); + found_index = match directory[current_index..].find(i.1) { + Some(t) => t, + None => return false, + }; + position_of_splits.push(found_index + current_index); + } + true + } +} +#[cfg(test)] +mod test { + use crate::common::Common; + + #[test] + fn test_regex() { + assert!(Common::regex_check("*home*", "/home/rafal")); + assert!(Common::regex_check("*home", "/home")); + assert!(Common::regex_check("*home/", "/home/")); + assert!(Common::regex_check("*home/*", "/home/")); + assert!(Common::regex_check("*.git*", "/home/.git")); + assert!(Common::regex_check("*/home/rafal*rafal*rafal*rafal*", "/home/rafal/rafalrafalrafal")); + assert!(!Common::regex_check("*home", "/home/")); + assert!(!Common::regex_check("*home", "/homefasfasfasfasf/")); + assert!(!Common::regex_check("*home", "/homefasfasfasfasf")); + assert!(!Common::regex_check("rafal*afal*fal", "rafal")); + assert!(!Common::regex_check("AAAAAAAA****", "/AAAAAAAAAAAAAAAAA")); + assert!(!Common::regex_check("*.git/*", "/home/.git")); + assert!(!Common::regex_check("*home/*koc", "/koc/home/")); + assert!(!Common::regex_check("*home/", "/home")); + assert!(!Common::regex_check("*TTT", "/GGG")); + assert!(!Common::regex_check("AAA", "AAA")); + } } diff --git a/czkawka_core/src/duplicate.rs b/czkawka_core/src/duplicate.rs index 1f99a3d..b3ecc8c 100644 --- a/czkawka_core/src/duplicate.rs +++ b/czkawka_core/src/duplicate.rs @@ -1,4 +1,3 @@ -// TODO when using GUI all or most println!() should be used as variables passed by argument use humansize::{file_size_opts as options, FileSize}; use std::collections::{BTreeMap, HashMap}; use std::fs; @@ -38,7 +37,7 @@ pub struct DuplicateFinder { files_with_identical_size: HashMap>, files_with_identical_hashes: BTreeMap>>, allowed_extensions: Vec, // jpg, jpeg, mp4 - // excluded_items: Vec, // TODO, support for e.g. */.git/* + excluded_items: Vec, // TODO, support for e.g. */.git/* excluded_directories: Vec, included_directories: Vec, min_file_size: u64, @@ -90,7 +89,7 @@ impl DuplicateFinder { infos: Info::new(), files_with_identical_size: Default::default(), files_with_identical_hashes: Default::default(), - // excluded_items: vec![], + excluded_items: vec![], excluded_directories: vec![], included_directories: vec![], min_file_size: 1024, @@ -117,9 +116,32 @@ impl DuplicateFinder { self.min_file_size = min_size; } - pub fn set_excluded_items(&mut self, _excluded_items: String) { - // TODO Still don't know how to exactly parse this - // Things like /.git/ should be by default hidden with help of this *.git* + pub fn set_excluded_items(&mut self, mut excluded_items: String) { + // let start_time: SystemTime = SystemTime::now(); + + if excluded_items.is_empty() { + return; + } + + excluded_items = excluded_items.replace("\"", ""); + let expressions: Vec = excluded_items.split(',').map(String::from).collect(); + let mut checked_expressions: Vec = Vec::new(); + + for expression in expressions { + let expression: String = expression.trim().to_string(); + + if expression == "" { + continue; + } + if !expression.contains('*') { + self.infos.warnings.push("Excluded Items Warning: Wildcard * is required in expression, ignoring ".to_string() + &*expression); + continue; + } + + checked_expressions.push(expression); + } + + self.excluded_items = checked_expressions; } pub fn set_allowed_extensions(&mut self, mut allowed_extensions: String) { if allowed_extensions.is_empty() { @@ -320,6 +342,7 @@ impl DuplicateFinder { let mut is_excluded_dir = false; next_folder = "".to_owned() + ¤t_folder + &entry_data.file_name().into_string().unwrap() + "/"; + for ed in &self.excluded_directories { if next_folder == *ed { is_excluded_dir = true; @@ -327,6 +350,16 @@ impl DuplicateFinder { } } if !is_excluded_dir { + let mut found_expression: bool = false; + for expression in &self.excluded_items { + if Common::regex_check(expression, &next_folder) { + found_expression = true; + break; + } + } + if found_expression { + break; + } folders_to_check.push(next_folder); } self.infos.number_of_checked_folders += 1; @@ -334,6 +367,7 @@ impl DuplicateFinder { let mut have_valid_extension: bool; let file_name_lowercase: String = entry_data.file_name().into_string().unwrap().to_lowercase(); + // Checking allowed extensions if !self.allowed_extensions.is_empty() { have_valid_extension = false; for i in &self.allowed_extensions { @@ -346,9 +380,23 @@ impl DuplicateFinder { have_valid_extension = true; } + // Checking files if metadata.len() >= self.min_file_size && have_valid_extension { let current_file_name = "".to_owned() + ¤t_folder + &entry_data.file_name().into_string().unwrap(); + // Checking expressions + let mut found_expression: bool = false; + for expression in &self.excluded_items { + if Common::regex_check(expression, ¤t_file_name) { + found_expression = true; + break; + } + } + if found_expression { + break; + } + + // Creating new file entry let fe: FileEntry = FileEntry { path: current_file_name.clone(), size: metadata.len(), @@ -384,7 +432,7 @@ impl DuplicateFinder { Common::print_time(start_time, SystemTime::now(), "check_files_size".to_string()); //println!("Duration of finding duplicates {:?}", end_time.duration_since(start_time).expect("a")); } - // pub fn save_results_to_file(&self) {} + // pub fn save_results_to_file(&self) {} // TODO Saving results to files /// Remove files which have unique size fn remove_files_with_unique_size(&mut self) { @@ -541,9 +589,6 @@ impl DuplicateFinder { Common::print_time(start_time, SystemTime::now(), "print_duplicated_entries".to_string()); } /// Remove unused entries when included or excluded overlaps with each other or are duplicated - /// ``` - // let df : DuplicateFinder = saf - /// ``` fn optimize_directories(&mut self) -> bool { let start_time: SystemTime = SystemTime::now();