diff --git a/Changelog b/Changelog index 7e76d34..7249af0 100644 --- a/Changelog +++ b/Changelog @@ -1,4 +1,14 @@ -## Version 0.10 +## Version 0.1.1 +- Added images to readme +- Better GTK buttons +- Basic search in GTK +- Cleaned core from println +- Core functions doesn't use now process::exit(everything is done with help of messages/errors/warnings) +- Added support for non recursive search +- Improved finding number and size of duplicated files +- Saving results to file + +## Version 0.1.0 - Initial Version - Duplicate file finder - Empty folder finder diff --git a/czkawka_cli/src/main.rs b/czkawka_cli/src/main.rs index 4090af9..ab32d45 100644 --- a/czkawka_cli/src/main.rs +++ b/czkawka_cli/src/main.rs @@ -61,9 +61,7 @@ fn main() { let mut check_method: duplicate::CheckingMethod = duplicate::CheckingMethod::HASH; if ArgumentsPair::has_command(&arguments, "-i") { - if !df.set_include_directory(ArgumentsPair::get_argument(&arguments, "-i", false)) { - process::exit(1); - } + df.set_include_directory(ArgumentsPair::get_argument(&arguments, "-i", false)); } else { println!("FATAL ERROR: Parameter -i with set of included files is required."); process::exit(1); @@ -95,7 +93,10 @@ fn main() { } if ArgumentsPair::has_command(&arguments, "-k") { df.set_excluded_items(ArgumentsPair::get_argument(&arguments, "-k", false)); + } else { + df.set_excluded_items("DEFAULT".to_string()); } + if ArgumentsPair::has_command(&arguments, "-o") { df.set_recursive_search(false); } @@ -136,6 +137,10 @@ fn main() { df.find_duplicates(&check_method, &delete_method); + if ArgumentsPair::has_command(&arguments, "-f") { + df.save_results_to_file(&ArgumentsPair::get_argument(&arguments, "-f", false)); + } + print_infos(df.get_infos()); } "--h" | "--help" => { @@ -179,11 +184,12 @@ Usage of Czkawka: czkawka --help czkawka - --d <-i directory_to_search> [-e exclude_directories = ""] [-k excluded_items = ""] [-s min_size = 1024] [-x allowed_extension = ""] [-l type_of_search = "hash"] [-o] [-delete = "aeo"] - search for duplicates files + --d <-i directory_to_search> [-e exclude_directories = ""] [-k excluded_items = "DEFAULT"] [-s min_size = 1024] [-x allowed_extension = ""] [-l type_of_search = "hash"] [-o] [-f file_to_save = "results.txt"] [-delete = "aeo"] - search for duplicates files -i directory_to_search - list of directories which should will be searched like /home/rafal -e exclude_directories - list of directories which will be excluded from search. -k excluded_items - list of excluded items which contains * wildcard(may be slow) - -o non_recursive - this options prevents from recursive check of folders + -o - this options prevents from recursive check of folders + -f file_to_save - saves results to file -s min_size - minimum size of checked files in bytes, assigning bigger value may speed up searching. -x allowed_extension - list of checked extension, e.g. "jpg,mp4" will allow to check "book.jpg" and "car.mp4" but not roman.png. There are also helpful macros which allow to easy use a typcal extension like IMAGE("jpg,kra,gif,png,bmp,tiff,webp,hdr,svg") or TEXT("txt,doc,docx,odt,rtf") -l type_of_search - allows to use fastest which takes into account only size, and more accurate which check if file contnet is same(hashes). diff --git a/czkawka_core/src/common.rs b/czkawka_core/src/common.rs index 00aa007..495a194 100644 --- a/czkawka_core/src/common.rs +++ b/czkawka_core/src/common.rs @@ -36,17 +36,16 @@ impl Common { pub fn delete_one_entry(entry: &str) -> String { let path: &Path = Path::new(entry); let mut warning: String = String::from(""); - if path.is_dir() { - match fs::remove_dir_all(&entry) { - Ok(_) => (), - Err(_) => warning = "Failed to remove folder ".to_owned() + entry, - } - } else { - match fs::remove_file(&entry) { - Ok(_) => (), - Err(_) => warning = "Failed to remove file ".to_owned() + entry, - } - + if path.is_dir() { + match fs::remove_dir_all(&entry) { + Ok(_) => (), + Err(_) => warning = "Failed to remove folder ".to_owned() + entry, + } + } else { + match fs::remove_file(&entry) { + Ok(_) => (), + Err(_) => warning = "Failed to remove file ".to_owned() + entry, + } } warning } diff --git a/czkawka_core/src/duplicate.rs b/czkawka_core/src/duplicate.rs index 0736b94..2452723 100644 --- a/czkawka_core/src/duplicate.rs +++ b/czkawka_core/src/duplicate.rs @@ -34,13 +34,13 @@ struct FileEntry { /// Struct with required information's to work pub struct DuplicateFinder { infos: Info, - files_with_identical_size: HashMap>, + files_with_identical_size: BTreeMap>, files_with_identical_hashes: BTreeMap>>, allowed_extensions: Vec, excluded_items: Vec, excluded_directories: Vec, included_directories: Vec, - recursive_search : bool, + recursive_search: bool, min_file_size: u64, } @@ -53,8 +53,12 @@ pub struct Info { pub number_of_checked_folders: usize, pub number_of_ignored_files: usize, pub number_of_ignored_things: usize, - pub number_of_duplicated_files: usize, - pub lost_space: u64, + pub number_of_groups_by_size: usize, + pub number_of_duplicated_files_by_size: usize, + pub number_of_groups_by_hash: usize, + pub number_of_duplicated_files_by_hash: usize, + pub lost_space_by_size: u64, + pub lost_space_by_hash: u64, pub number_of_removed_files: usize, pub number_of_failed_to_remove_files: usize, pub gained_space: u64, @@ -69,8 +73,12 @@ impl Info { number_of_ignored_files: 0, number_of_checked_folders: 0, number_of_ignored_things: 0, - number_of_duplicated_files: 0, - lost_space: 0, + number_of_groups_by_size: 0, + number_of_duplicated_files_by_size: 0, + number_of_groups_by_hash: 0, + number_of_duplicated_files_by_hash: 0, + lost_space_by_size: 0, + lost_space_by_hash: 0, number_of_removed_files: 0, number_of_failed_to_remove_files: 0, gained_space: 0, @@ -109,7 +117,6 @@ impl DuplicateFinder { if *check_method == CheckingMethod::HASH { self.check_files_hash(); } - self.calculate_lost_space(check_method); self.delete_files(check_method, delete_method); self.debug_print(); } @@ -118,8 +125,8 @@ impl DuplicateFinder { self.min_file_size = min_size; } - pub fn set_recursive_search(&mut self, reqursive_search : bool){ - self.recursive_search = reqursive_search; + pub fn set_recursive_search(&mut self, recursive_search: bool) { + self.recursive_search = recursive_search; } pub fn set_excluded_items(&mut self, mut excluded_items: String) { // let start_time: SystemTime = SystemTime::now(); @@ -138,6 +145,11 @@ impl DuplicateFinder { if expression == "" { continue; } + if expression == "DEFAULT" { + // TODO add more files by default + checked_expressions.push("*/.git/*".to_string()); + continue; + } if !expression.contains('*') { self.infos.warnings.push("Excluded Items Warning: Wildcard * is required in expression, ignoring ".to_string() + &*expression); continue; @@ -282,25 +294,6 @@ impl DuplicateFinder { //Common::print_time(start_time, SystemTime::now(), "set_exclude_directory".to_string()); } - fn calculate_lost_space(&mut self, check_method: &CheckingMethod) { - let mut bytes: u64 = 0; - - match check_method { - CheckingMethod::SIZE => { - for i in &self.files_with_identical_size { - bytes += i.0 * (i.1.len() as u64 - 1); - } - } - CheckingMethod::HASH => { - for i in &self.files_with_identical_hashes { - for j in i.1 { - bytes += i.0 * (j.len() as u64 - 1); - } - } - } - } - self.infos.lost_space = bytes; - } fn check_files_size(&mut self) { // TODO maybe add multithreading checking for file hash @@ -311,6 +304,7 @@ impl DuplicateFinder { for id in &self.included_directories { folders_to_check.push(id.to_string()); } + self.infos.number_of_checked_folders += folders_to_check.len(); let mut current_folder: String; let mut next_folder: String; @@ -340,12 +334,13 @@ impl DuplicateFinder { } //Permissions denied }; if metadata.is_dir() { + self.infos.number_of_checked_folders += 1; // if entry_data.file_name().into_string().is_err() { // Probably this can be removed, if crash still will be happens, then uncomment this line // self.infos.warnings.push("Cannot read folder name in dir ".to_string() + &*current_folder); // continue; // Permissions denied // } - if !self.recursive_search{ + if !self.recursive_search { continue; } @@ -371,7 +366,6 @@ impl DuplicateFinder { } folders_to_check.push(next_folder); } - self.infos.number_of_checked_folders += 1; } else if metadata.is_file() { let mut have_valid_extension: bool; let file_name_lowercase: String = entry_data.file_name().into_string().unwrap().to_lowercase(); @@ -441,23 +435,77 @@ impl DuplicateFinder { Common::print_time(start_time, SystemTime::now(), "check_files_size".to_string()); //println!("Duration of finding duplicates {:?}", end_time.duration_since(start_time).expect("a")); } - // pub fn save_results_to_file(&self) {} // TODO Saving results to files + pub fn save_results_to_file(&mut self, file_name: &str) { + let file_name: String = match file_name { + "" => "results.txt".to_string(), + k => k.to_string(), + }; + + let mut file = match File::create(&file_name) { + Ok(t) => t, + Err(_) => { + self.infos.errors.push("Failed to create file ".to_string() + file_name.as_str()); + return; + } + }; + + match file.write_all(b"Results of searching\n\n") { + Ok(_) => (), + Err(_) => { + self.infos.errors.push("Failed to save results to file ".to_string() + file_name.as_str()); + return; + } + } + + if !self.files_with_identical_size.is_empty() { + file.write_all(b"-------------------------------------------------Files with same size-------------------------------------------------\n").unwrap(); + file.write_all(("Found ".to_string() + self.infos.number_of_duplicated_files_by_size.to_string().as_str() + " duplicated files which in " + self.files_with_identical_size.len().to_string().as_str() + " groups.\n").as_bytes()) + .unwrap(); + for (size, files) in self.files_with_identical_size.iter().rev() { + file.write_all(b"\n---- Size ").unwrap(); + file.write_all(size.file_size(options::BINARY).unwrap().as_bytes()).unwrap(); + file.write_all((" (".to_string() + size.to_string().as_str() + ")").as_bytes()).unwrap(); + file.write_all(b"\n").unwrap(); + for file_entry in files { + file.write_all((file_entry.path.clone() + "\n").as_bytes()).unwrap(); + } + } + } + + if !self.files_with_identical_hashes.is_empty() { + file.write_all(b"-------------------------------------------------Files with same hashes-------------------------------------------------\n").unwrap(); + file.write_all(("Found ".to_string() + self.infos.number_of_duplicated_files_by_size.to_string().as_str() + " duplicated files which in " + self.files_with_identical_hashes.len().to_string().as_str() + " groups.\n").as_bytes()) + .unwrap(); + for (size, files) in self.files_with_identical_hashes.iter().rev() { + for vector in files { + file.write_all(b"\n---- Size ").unwrap(); + file.write_all(size.file_size(options::BINARY).unwrap().as_bytes()).unwrap(); + file.write_all((" (".to_string() + size.to_string().as_str() + ")").as_bytes()).unwrap(); + file.write_all(b"\n").unwrap(); + for file_entry in vector { + file.write_all((file_entry.path.clone() + "\n").as_bytes()).unwrap(); + } + } + } + } + } /// Remove files which have unique size fn remove_files_with_unique_size(&mut self) { let start_time: SystemTime = SystemTime::now(); - let mut new_hashmap: HashMap> = Default::default(); + let mut new_map: BTreeMap> = Default::default(); - self.infos.number_of_duplicated_files = 0; + self.infos.number_of_duplicated_files_by_size = 0; - for entry in &self.files_with_identical_size { - if entry.1.len() > 1 { - self.infos.number_of_duplicated_files += entry.1.len() - 1; - new_hashmap.insert(*entry.0, entry.1.clone()); + for (size, vector) in &self.files_with_identical_size { + if vector.len() > 1 { + self.infos.number_of_duplicated_files_by_size += vector.len() - 1; + self.infos.number_of_groups_by_size += 1; + self.infos.lost_space_by_size += (vector.len() as u64 - 1) * size; + new_map.insert(*size, vector.clone()); } } - - self.files_with_identical_size = new_hashmap; + self.files_with_identical_size = new_map; Common::print_time(start_time, SystemTime::now(), "remove_files_with_unique_size".to_string()); } @@ -492,7 +540,7 @@ impl DuplicateFinder { error_reading_file = true; break; } - }; //.unwrap(); + }; if n == 0 { break; } @@ -511,6 +559,15 @@ impl DuplicateFinder { } } } + + for (size, vector) in &self.files_with_identical_hashes { + for vec_file_entry in vector { + self.infos.number_of_duplicated_files_by_hash += vec_file_entry.len() - 1; + self.infos.number_of_groups_by_hash += 1; + self.infos.lost_space_by_hash += (vec_file_entry.len() as u64 - 1) * size; + } + } + Common::print_time(start_time, SystemTime::now(), "check_files_hash".to_string()); } @@ -518,30 +575,32 @@ impl DuplicateFinder { /// Setting include directories, panics when there is not directories available fn debug_print(&self) { println!("---------------DEBUG PRINT---------------"); - println!("Recursive search - {}", self.recursive_search.to_string()); + println!("### Infos"); + + println!("Errors size - {}", self.infos.errors.len()); + println!("Warnings size - {}", self.infos.warnings.len()); + println!("Messages size - {}", self.infos.messages.len()); println!("Number of checked files - {}", self.infos.number_of_checked_files); println!("Number of checked folders - {}", self.infos.number_of_checked_folders); println!("Number of ignored files - {}", self.infos.number_of_ignored_files); println!("Number of ignored things(like symbolic links) - {}", self.infos.number_of_ignored_things); - println!("Number of duplicated files - {}", self.infos.number_of_duplicated_files); - let mut file_size: u64 = 0; - for i in &self.files_with_identical_size { - file_size += i.1.len() as u64; - } - println!("Files list size - {} ({})", self.files_with_identical_size.len(), file_size); - let mut hashed_file_size: u64 = 0; - for i in &self.files_with_identical_hashes { - for j in i.1 { - hashed_file_size += j.len() as u64; - } - } - println!("Hashed Files list size - {} ({})", self.files_with_identical_hashes.len(), hashed_file_size); + println!("Number of duplicated files by size(in groups) - {} ({})", self.infos.number_of_duplicated_files_by_size, self.infos.number_of_groups_by_size); + println!("Number of duplicated files by hash(in groups) - {} ({})", self.infos.number_of_duplicated_files_by_hash, self.infos.number_of_groups_by_hash); + println!("Lost space by size - {} ({} bytes)", self.infos.lost_space_by_size.file_size(options::BINARY).unwrap(), self.infos.lost_space_by_size); + println!("Lost space by hash - {} ({} bytes)", self.infos.lost_space_by_hash.file_size(options::BINARY).unwrap(), self.infos.lost_space_by_hash); + println!("Gained space by removing duplicated entries - {} ({} bytes)", self.infos.gained_space.file_size(options::BINARY).unwrap(), self.infos.gained_space); println!("Number of removed files - {}", self.infos.number_of_removed_files); println!("Number of failed to remove files - {}", self.infos.number_of_failed_to_remove_files); - println!("Lost space - {} ({} bytes)", self.infos.lost_space.file_size(options::BINARY).unwrap(), self.infos.lost_space); - println!("Gained space by removing duplicated entries - {} ({} bytes)", self.infos.gained_space.file_size(options::BINARY).unwrap(), self.infos.gained_space); + + println!("### Other"); + + println!("Files list size - {}", self.files_with_identical_size.len()); + println!("Hashed Files list size - {}", self.files_with_identical_hashes.len()); + println!("Allowed extensions - {:?}", self.allowed_extensions); + println!("Excluded items - {:?}", self.excluded_items); println!("Excluded directories - {:?}", self.excluded_directories); - println!("Included directories - {:?}", self.included_directories); + println!("Recursive search - {}", self.recursive_search.to_string()); + println!("Minimum file size - {:?}", self.min_file_size); println!("-----------------------------------------"); } @@ -563,7 +622,7 @@ impl DuplicateFinder { "Found {} duplicated files in {} groups with same content which took {}:", number_of_files, number_of_groups, - self.infos.lost_space.file_size(options::BINARY).unwrap() + self.infos.lost_space_by_size.file_size(options::BINARY).unwrap() ); for (key, vector) in self.files_with_identical_hashes.iter().rev() { println!("Size - {}", key.file_size(options::BINARY).unwrap()); @@ -585,7 +644,7 @@ impl DuplicateFinder { "Found {} files in {} groups with same size(may have different content) which took {}:", number_of_files, number_of_groups, - self.infos.lost_space.file_size(options::BINARY).unwrap() + self.infos.lost_space_by_size.file_size(options::BINARY).unwrap() ); for i in &self.files_with_identical_size { println!("Size - {}", i.0); @@ -614,7 +673,8 @@ impl DuplicateFinder { self.included_directories.dedup(); // Optimize for duplicated included directories - "/", "/home". "/home/Pulpit" to "/" - if self.recursive_search { // This is only point which can't be done when recursive search is disabled. + if self.recursive_search { + // This is only point which can't be done when recursive search is disabled. let mut is_inside: bool; for ed_checked in &self.excluded_directories { is_inside = false; diff --git a/czkawka_gui/src/main.rs b/czkawka_gui/src/main.rs index 8d64b40..9e36b40 100644 --- a/czkawka_gui/src/main.rs +++ b/czkawka_gui/src/main.rs @@ -21,15 +21,11 @@ fn main() { // Buttons State - let mut hashmap_buttons : HashMap<&str,bool> = Default::default(); - for i in ["duplicate","empty_folder"].iter() { - hashmap_buttons.insert(i,false); + let mut hashmap_buttons: HashMap<&str, bool> = Default::default(); + for i in ["duplicate", "empty_folder"].iter() { + hashmap_buttons.insert(i, false); } - - - - // let mut hashmap_buttons : HashMap<&str,bool> = Default::default(); // let mut buttons_state : HashMap<&str,HashMap<&str,bool>> = Default::default(); // for i in ["buttons_search","buttons_stop","buttons_resume","buttons_pause","buttons_select","buttons_delete","buttons_save"].iter() { @@ -39,9 +35,7 @@ fn main() { // for i in ["buttons_search","buttons_stop","buttons_resume","buttons_pause","buttons_select","buttons_delete","buttons_save"].iter() { // buttons_state.insert(i,hashmap_buttons.clone()); // } - // buttons_state.insert(hashmap_buttons.clone()); - - + // buttons_state.insert(hashmap_buttons.clone()); // GUI Notepad Buttons @@ -113,7 +107,7 @@ fn main() { { // Connect Buttons - // let buttons_search = buttons_search.clone(); + let buttons_search_clone = buttons_search.clone(); // let info_entry = info_entry.clone(); buttons_search.connect_clicked(move |_| { @@ -128,7 +122,7 @@ fn main() { df.set_allowed_extensions("".to_owned()); df.set_min_file_size(1000); // TODO Change to proper value df.find_duplicates(&CheckingMethod::HASH, &DeleteMethod::None); - let infos = df.get_infos(); + let _infos = df.get_infos(); info_entry.set_text("Found TODO duplicates files in TODO groups which took TODO GB/MB/KB/B"); @@ -138,7 +132,7 @@ fn main() { buttons_delete.show(); // - buttons_search.show(); + buttons_search_clone.show(); buttons_stop.hide(); buttons_resume.hide(); buttons_pause.hide();