diff --git a/Cargo.lock b/Cargo.lock index 0d81469..dd369bf 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -1,14 +1,5 @@ # This file is automatically @generated by Cargo. # It is not intended for manual editing. -[[package]] -name = "aho-corasick" -version = "0.7.13" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "043164d8ba5c4c3035fec9bbee8647c0261d788f3474306f93bb65901cae0e86" -dependencies = [ - "memchr", -] - [[package]] name = "arrayref" version = "0.3.6" @@ -21,12 +12,6 @@ version = "0.5.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "cff77d8686867eceff3105329d4698d96c2391c176d5d03adc90c7389162b5b8" -[[package]] -name = "autocfg" -version = "1.0.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "cdb031dd78e28731d87d56cc8ffef4a8f36ca26c38fe2de700543e627f8a464a" - [[package]] name = "blake3" version = "0.3.6" @@ -60,53 +45,6 @@ version = "0.1.5" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "245097e9a4535ee1e3e3931fcfcd55a796a44c643e8596ff6566d68f09b87bbc" -[[package]] -name = "crossbeam-channel" -version = "0.4.3" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "09ee0cc8804d5393478d743b035099520087a5186f3b93fa58cec08fa62407b6" -dependencies = [ - "cfg-if", - "crossbeam-utils", -] - -[[package]] -name = "crossbeam-deque" -version = "0.7.3" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9f02af974daeee82218205558e51ec8768b48cf524bd01d550abe5573a608285" -dependencies = [ - "crossbeam-epoch", - "crossbeam-utils", - "maybe-uninit", -] - -[[package]] -name = "crossbeam-epoch" -version = "0.8.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "058ed274caafc1f60c4997b5fc07bf7dc7cca454af7c6e81edffe5f33f70dace" -dependencies = [ - "autocfg", - "cfg-if", - "crossbeam-utils", - "lazy_static", - "maybe-uninit", - "memoffset", - "scopeguard", -] - -[[package]] -name = "crossbeam-utils" -version = "0.7.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c3c7c73a2d1e9fc0886a08b93e98eb643461230d5f1925e4036204d5f2e261a8" -dependencies = [ - "autocfg", - "cfg-if", - "lazy_static", -] - [[package]] name = "crypto-mac" version = "0.8.0" @@ -122,9 +60,7 @@ name = "czkawka" version = "0.1.0" dependencies = [ "blake3", - "multimap", - "rayon", - "regex", + "humansize", ] [[package]] @@ -136,12 +72,6 @@ dependencies = [ "generic-array", ] -[[package]] -name = "either" -version = "1.6.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "cd56b59865bce947ac5958779cfa508f6c3b9497cc762b7e24a12d11ccde2c4f" - [[package]] name = "generic-array" version = "0.14.4" @@ -153,120 +83,10 @@ dependencies = [ ] [[package]] -name = "hermit-abi" -version = "0.1.15" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3deed196b6e7f9e44a2ae8d94225d80302d81208b1bb673fd21fe634645c85a9" -dependencies = [ - "libc", -] - -[[package]] -name = "lazy_static" -version = "1.4.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e2abad23fbc42b3700f2f279844dc832adb2b2eb069b2df918f455c4e18cc646" - -[[package]] -name = "libc" -version = "0.2.76" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "755456fae044e6fa1ebbbd1b3e902ae19e73097ed4ed87bb79934a867c007bc3" - -[[package]] -name = "maybe-uninit" -version = "2.0.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "60302e4db3a61da70c0cb7991976248362f30319e88850c487b9b95bbf059e00" - -[[package]] -name = "memchr" -version = "2.3.3" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3728d817d99e5ac407411fa471ff9800a778d88a24685968b36824eaf4bee400" - -[[package]] -name = "memoffset" -version = "0.5.5" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c198b026e1bbf08a937e94c6c60f9ec4a2267f5b0d2eec9c1b21b061ce2be55f" -dependencies = [ - "autocfg", -] - -[[package]] -name = "multimap" -version = "0.8.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1255076139a83bb467426e7f8d0134968a8118844faa755985e077cf31850333" -dependencies = [ - "serde", -] - -[[package]] -name = "num_cpus" -version = "1.13.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "05499f3756671c15885fee9034446956fff3f243d6077b91e5767df161f766b3" -dependencies = [ - "hermit-abi", - "libc", -] - -[[package]] -name = "rayon" -version = "1.4.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "cfd016f0c045ad38b5251be2c9c0ab806917f82da4d36b2a327e5166adad9270" -dependencies = [ - "autocfg", - "crossbeam-deque", - "either", - "rayon-core", -] - -[[package]] -name = "rayon-core" -version = "1.8.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "91739a34c4355b5434ce54c9086c5895604a9c278586d1f1aa95e04f66b525a0" -dependencies = [ - "crossbeam-channel", - "crossbeam-deque", - "crossbeam-utils", - "lazy_static", - "num_cpus", -] - -[[package]] -name = "regex" -version = "1.3.9" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9c3780fcf44b193bc4d09f36d2a3c87b251da4a046c87795a0d35f4f927ad8e6" -dependencies = [ - "aho-corasick", - "memchr", - "regex-syntax", - "thread_local", -] - -[[package]] -name = "regex-syntax" -version = "0.6.18" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "26412eb97c6b088a6997e05f69403a802a92d520de2f8e63c2b65f9e0f47c4e8" - -[[package]] -name = "scopeguard" +name = "humansize" version = "1.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d29ab0c6d3fc0ee92fe66e2d99f700eab17a8d57d1c1d3b748380fb20baa78cd" - -[[package]] -name = "serde" -version = "1.0.115" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e54c9a88f2da7238af84b5101443f0c0d0a3bbdc455e34a5c9497b1903ed55d5" +checksum = "b6cab2627acfc432780848602f3f558f7e9dd427352224b0d9324025796d2a5e" [[package]] name = "subtle" @@ -274,15 +94,6 @@ version = "2.2.3" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "502d53007c02d7605a05df1c1a73ee436952781653da5d0bf57ad608f66932c1" -[[package]] -name = "thread_local" -version = "1.0.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d40c6d1b69745a6ec6fb1ca717914848da4b44ae29d9b3080cbee91d72a69b14" -dependencies = [ - "lazy_static", -] - [[package]] name = "typenum" version = "1.12.0" diff --git a/Cargo.toml b/Cargo.toml index 480bf9a..0bdec99 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -5,7 +5,7 @@ authors = ["Rafał Mikrut "] edition = "2018" [dependencies] -rayon = "1.4.0" +humansize = "1.1.0" +#rayon = "1.4.0" blake3 = "0.3.6" -multimap = "0.8.2" -regex = "1.3.9" \ No newline at end of file +#regex = "1.3.9" \ No newline at end of file diff --git a/README.md b/README.md index 54e6c1c..8be9ea8 100644 --- a/README.md +++ b/README.md @@ -4,12 +4,18 @@ It is in very early development, so most of the functions aren't added and doesn ## Done -- Basic menu +- Basic menu(need refactoring) +- Duplicated file finding + - Including and excluding directories(absolute pathes) + - Option to remove file + - Fast(by size) or accurate(by hash) file checking + - ## TODO -- Graphical UI(GTK) - Duplicated file finding -- Setting include and exclude directories + - saving to file + - support for * when excluding files and folders +- Graphical UI(GTK) - Removing empty folders - Files with debug symbols - Support for showing only duplicates with specific extension, name(Regex support needed) diff --git a/src/duplicate.rs b/src/duplicate.rs index db2cec2..02ea9b4 100644 --- a/src/duplicate.rs +++ b/src/duplicate.rs @@ -1,4 +1,5 @@ // Todo, należy upewnić się, że ma wystarczające uprawnienia do odczytu i usuwania +use humansize::{file_size_opts as options, FileSize}; use std::collections::{BTreeMap, HashMap}; use std::fs::{File, Metadata}; use std::io::prelude::*; @@ -6,15 +7,14 @@ use std::path::Path; use std::time::SystemTime; use std::{fs, process}; -const MIN_FILE_SIZE: u64 = 1000; #[derive(PartialEq)] -#[allow(dead_code)] // For now I only use Hash method pub enum CheckingMethod { SIZE, HASH, } + pub struct DuplicateFinder { number_of_checked_files: usize, number_of_ignored_files: usize, @@ -24,6 +24,7 @@ pub struct DuplicateFinder { files_with_identical_size: HashMap>, files_with_identical_hashes: BTreeMap>>, allowed_extensions: Vec, // jpg, jpeg, mp4 + lost_space: u64, // excluded_items: Vec, excluded_directories: Vec, included_directories: Vec, @@ -43,8 +44,9 @@ impl DuplicateFinder { // excluded_items: vec![], excluded_directories: vec![], included_directories: vec![], - min_file_size: 0, + min_file_size: 1024, allowed_extensions: vec![], + lost_space: 0, } } @@ -56,6 +58,7 @@ impl DuplicateFinder { if check_method == CheckingMethod::HASH { self.check_files_hash(); } + self.calculate_lost_space(&check_method); self.print_duplicated_entries(&check_method); if delete_files { self.delete_files(&check_method); @@ -77,6 +80,7 @@ impl DuplicateFinder { allowed_extensions = allowed_extensions.replace("IMAGE", "jpg,kra,gif,png,bmp,tiff,webp,hdr,svg"); allowed_extensions = allowed_extensions.replace("VIDEO", "mp4,flv,mkv,webm,vob,ogv,gifv,avi,mov,wmv,mpg,m4v,m4p,mpeg,3gp"); allowed_extensions = allowed_extensions.replace("MUSIC", "mp3,flac,ogg,tta,wma,webm"); + allowed_extensions = allowed_extensions.replace("TEXT", "txt,doc,docx,odt,rtf"); let extensions: Vec = allowed_extensions.split(',').map(String::from).collect(); for mut extension in extensions { @@ -178,8 +182,8 @@ impl DuplicateFinder { process::exit(1); } if !Path::new(&directory).exists() { - println!("Exclude Directory ERROR: Path {} doesn't exists.", directory); - process::exit(1); + println!("Exclude Directory WARNING: Path {} doesn't exists.", directory); + //process::exit(1); // Better just print warning witohut closing } if !Path::new(&directory).exists() { println!("Exclude Directory ERROR: {} isn't folder.", directory); @@ -198,6 +202,25 @@ impl DuplicateFinder { //DuplicateFinder::print_time(start_time, SystemTime::now(), "set_exclude_directory".to_string()); } + fn calculate_lost_space(&mut self, check_method: &CheckingMethod) { + let mut bytes: u64 = 0; + + match check_method { + CheckingMethod::SIZE => { + for i in &self.files_with_identical_size { + bytes += i.0 * (i.1.len() as u64 - 1); + } + } + CheckingMethod::HASH => { + for i in &self.files_with_identical_hashes { + for j in i.1 { + bytes += i.0 * (j.len() as u64 - 1); + } + } + } + } + self.lost_space = bytes; + } // TODO - Still isn't used but it will be probably required with GUI // pub fn clear(&mut self) { @@ -257,7 +280,7 @@ impl DuplicateFinder { if !self.allowed_extensions.is_empty() { have_valid_extension = false; for i in &self.allowed_extensions { - if file_name_lowercase.ends_with(&i.to_lowercase()) { + if file_name_lowercase.ends_with(&(".".to_string() + &*i.to_lowercase().to_string())) { have_valid_extension = true; break; } @@ -266,7 +289,7 @@ impl DuplicateFinder { have_valid_extension = true; } - if metadata.len() >= MIN_FILE_SIZE && have_valid_extension { + if metadata.len() >= self.min_file_size && have_valid_extension { let current_file_name = "".to_owned() + ¤t_folder + &entry_data.file_name().into_string().unwrap(); // println!("File\t\t - {:?}", current_file_name); // DEBUG //file_to_check @@ -383,27 +406,27 @@ impl DuplicateFinder { /// Setting include directories, panics when there is not directories available fn debug_print(&self) { - println!("---------------DEBUG PRINT---------------"); - println!("Number of all checked files - {}", self.number_of_checked_files); - println!("Number of all ignored files - {}", self.number_of_ignored_files); - println!("Number of all checked folders - {}", self.number_of_checked_folders); - println!("Number of all ignored things - {}", self.number_of_ignored_things); - println!("Number of duplicated files - {}", self.number_of_duplicated_files); - let mut file_size: u64 = 0; - for i in &self.files_with_identical_size { - file_size += i.1.len() as u64; - } - println!("Files list size - {} ({})", self.files_with_identical_size.len(), file_size); - let mut hashed_file_size: u64 = 0; - for i in &self.files_with_identical_hashes { - for j in i.1 { - hashed_file_size += j.len() as u64; - } - } - println!("Hashed Files list size - {} ({})", self.files_with_identical_hashes.len(), hashed_file_size); - println!("Excluded directories - {:?}", self.excluded_directories); - println!("Included directories - {:?}", self.included_directories); - println!("-----------------------------------------"); + // println!("---------------DEBUG PRINT---------------"); + // println!("Number of all checked files - {}", self.number_of_checked_files); + // println!("Number of all ignored files - {}", self.number_of_ignored_files); + // println!("Number of all checked folders - {}", self.number_of_checked_folders); + // println!("Number of all ignored things - {}", self.number_of_ignored_things); + // println!("Number of duplicated files - {}", self.number_of_duplicated_files); + // let mut file_size: u64 = 0; + // for i in &self.files_with_identical_size { + // file_size += i.1.len() as u64; + // } + // println!("Files list size - {} ({})", self.files_with_identical_size.len(), file_size); + // let mut hashed_file_size: u64 = 0; + // for i in &self.files_with_identical_hashes { + // for j in i.1 { + // hashed_file_size += j.len() as u64; + // } + // } + // println!("Hashed Files list size - {} ({})", self.files_with_identical_hashes.len(), hashed_file_size); + // println!("Excluded directories - {:?}", self.excluded_directories); + // println!("Included directories - {:?}", self.included_directories); + // println!("-----------------------------------------"); } #[allow(dead_code)] @@ -420,7 +443,12 @@ impl DuplicateFinder { number_of_groups += 1; } } - println!("Found {} files in {} groups with same content:", number_of_files, number_of_groups); + println!( + "Found {} files in {} groups with same content which took {}:", + number_of_files, + number_of_groups, + self.lost_space.file_size(options::BINARY).unwrap() + ); for i in &self.files_with_identical_hashes { println!("Size - {}", i.0); for j in i.1 { @@ -437,21 +465,26 @@ impl DuplicateFinder { number_of_files += i.1.len() as u64; number_of_groups += 1; } - println!("Found {} files in {} groups with same size(may have different content):", number_of_files, number_of_groups); - for i in &self.files_with_identical_size { - println!("Size - {}", i.0); - for j in i.1 { - println!("{}", j.path); - } - println!(); - } + println!( + "Found {} files in {} groups with same size(may have different content) which took {}:", + number_of_files, + number_of_groups, + self.lost_space.file_size(options::BINARY).unwrap() + ); + // for i in &self.files_with_identical_size { + // println!("Size - {}", i.0); + // for j in i.1 { + // println!("{}", j.path); + // } + // println!(); + // } } } DuplicateFinder::print_time(start_time, SystemTime::now(), "print_duplicated_entries".to_string()); } /// Remove unused entries when included or excluded overlaps with each other or are duplicated /// ``` - /// let df : DuplicateFinder = saf + // let df : DuplicateFinder = saf /// ``` fn optimize_directories(&mut self) { let start_time: SystemTime = SystemTime::now(); @@ -616,13 +649,3 @@ struct FileEntry { pub created_date: SystemTime, pub modified_date: SystemTime, } -impl FileEntry { - // pub fn return_copy(&self) -> FileEntry { - // let new_copy : FileEntry = FileEntry{ - // path: self.path.to_string(), - // size: self.size, - // created_date: self.created_date, - // modified_date: self.modified_date - // }; - // } -} diff --git a/src/main.rs b/src/main.rs index a4fe3ba..6d889f1 100644 --- a/src/main.rs +++ b/src/main.rs @@ -4,72 +4,110 @@ mod duplicate; fn main() { // Parse argument - // let mut all_arguments: Vec = env::args().collect(); - let number_of_arguments: usize = all_arguments.len() - 1; - let mut arguments: Vec = Vec::new(); let mut commands_arguments: Vec = Vec::new(); all_arguments.remove(0); // Removing program name from arguments - for argument in all_arguments { - if argument.starts_with('-') { - commands_arguments.push(argument); - } else { - arguments.push(argument); - } - } - - println!("Number of arguments - {}", arguments.len()); - for (index, argument) in arguments.iter().enumerate() { - println!("Argument number {} - {}", index, argument); - } - if number_of_arguments == 0 { + // No arguments, so we print help to allow user to learn more about program + if all_arguments.is_empty() { print_help(); process::exit(0); } + + // Assigning commands with arguments + let mut arguments: Vec = Vec::new(); + + let mut can_pass_argument: bool = false; + for argument in 0..all_arguments.len() { + if all_arguments[argument].starts_with("--") { + commands_arguments.push(all_arguments[argument].clone()); + } else if all_arguments[argument].starts_with('-') { + if argument + 1 < all_arguments.len() { + if all_arguments[argument + 1].starts_with("--") || all_arguments[argument + 1].starts_with('-') { + println!("FATAL ERROR: Missing argument for {}", all_arguments[argument]); + process::exit(1); + } else { + let a: ArgumentsPair = ArgumentsPair { + command: all_arguments[argument].clone(), + argument: all_arguments[argument + 1].clone(), + }; + arguments.push(a); + can_pass_argument = true; + } + } else { + println!("FATAL ERROR: Missing argument for {}", all_arguments[argument]); + process::exit(1); + } + } else { + if !can_pass_argument { + println!("FATAL ERROR: Argument \"{}\" is not linked to any command", all_arguments[argument]); + process::exit(1); + } else { + can_pass_argument = false; + } + } + } + + for a in &arguments { + println!("Argument number {} - {}", a.command, a.argument); + } + if commands_arguments.is_empty() { - println! {"Missing command, please read help for more info."}; + println! {"FATAL ERROR: Missing type of app which you want to run, please read help for more info."}; process::exit(0); } match commands_arguments[0].as_ref() { - "-d" | "-duplicate_finder" => { - let delete_files: bool = commands_arguments.contains(&"-delete".to_owned()); + "--d" => { + let mut df = duplicate::DuplicateFinder::new(); - if arguments.len() < 2 { - println!("FATAL ERROR: Duplicate Finder must be executed with at least 1 argument"); + if ArgumentsPair::has_command(&arguments, "-i") { + df.set_include_directory(ArgumentsPair::get_argument(&arguments, "-i")); + } else { + println!("FATAL ERROR: Parameter -i with set of included files is required."); process::exit(1); } - - let mut df = duplicate::DuplicateFinder::new(); - df.set_include_directory(arguments[0].clone()); - - if arguments.len() > 1 { - df.set_exclude_directory(arguments[1].clone()); + if ArgumentsPair::has_command(&arguments, "-e") { + df.set_exclude_directory(ArgumentsPair::get_argument(&arguments, "-e")); } - if arguments.len() > 2 { - let min_size = match arguments[2].parse::() { + + if ArgumentsPair::has_command(&arguments, "-s") { + let min_size = match ArgumentsPair::get_argument(&arguments, "-s").parse::() { Ok(t) => t, Err(_) => { - println!("FATAL ERROR: \"{}\" is not valid file size(allowed range <0,u64::max>)", arguments[2]); + println!("FATAL ERROR: \"{}\" is not valid file size(allowed range <0,u64::max>)", ArgumentsPair::get_argument(&arguments, "-s")); process::exit(1); } }; df.set_min_file_size(min_size); } - if arguments.len() > 3 { - df.set_allowed_extensions(arguments[3].clone()); - } - if arguments.len() > 4 { - df.set_excluded_items(arguments[4].clone()); - } - df.find_duplicates(duplicate::CheckingMethod::SIZE, delete_files); + if ArgumentsPair::has_command(&arguments, "-x") { + df.set_allowed_extensions(ArgumentsPair::get_argument(&arguments, "-x")); + } + if ArgumentsPair::has_command(&arguments, "-k") { + df.set_excluded_items(ArgumentsPair::get_argument(&arguments, "-k")); + } + if ArgumentsPair::has_command(&arguments, "-l") { + let check_method: duplicate::CheckingMethod; + if ArgumentsPair::get_argument(&arguments, "-l").to_lowercase() == "size" { + check_method = duplicate::CheckingMethod::SIZE; + } else if ArgumentsPair::get_argument(&arguments, "-l").to_lowercase() == "hash" { + check_method = duplicate::CheckingMethod::HASH; + } else { + println!("-l can only have values hash or size"); + process::exit(1); + } + df.find_duplicates(check_method, ArgumentsPair::has_command(&arguments, "--delete")); + } } - "-h" | "-help" => { + "--h" | "--help" => { print_help(); } - argum => println!("\"{}\" argument is not supported, check help for more info.", argum), + argum => { + println!("FATAL ERROR: \"{}\" argument is not supported, check help for more info.", argum); + process::exit(1); + } }; } @@ -78,11 +116,45 @@ fn print_help() { println!("Usage of Czkawka:"); println!("czkawka