diff --git a/czkawka_cli/src/commands.rs b/czkawka_cli/src/commands.rs index 64d5712..544aecb 100644 --- a/czkawka_cli/src/commands.rs +++ b/czkawka_cli/src/commands.rs @@ -10,7 +10,7 @@ use czkawka_core::similar_images::SimilarityPreset; #[derive(Debug, StructOpt)] #[structopt(name = "czkawka", help_message = HELP_MESSAGE, template = HELP_TEMPLATE)] pub enum Commands { - #[structopt(name = "dup", about = "Finds duplicate files", help_message = HELP_MESSAGE, after_help = "EXAMPLE:\n czkawka dup -d /home/rafal -e /home/rafal/Obrazy -m 25 -x 7z rar IMAGE -s hashmb -f results.txt -D aeo")] + #[structopt(name = "dup", about = "Finds duplicate files", help_message = HELP_MESSAGE, after_help = "EXAMPLE:\n czkawka dup -d /home/rafal -e /home/rafal/Obrazy -m 25 -x 7z rar IMAGE -s hash -f results.txt -D aeo")] Duplicates { #[structopt(flatten)] directories: Directories, @@ -305,7 +305,6 @@ fn parse_checking_method(src: &str) -> Result { "name" => Ok(CheckingMethod::Name), "size" => Ok(CheckingMethod::Size), "hash" => Ok(CheckingMethod::Hash), - "hashmb" => Ok(CheckingMethod::HashMb), _ => Err("Couldn't parse the search method (allowed: NAME, SIZE, HASH, HASHMB)"), } } @@ -440,7 +439,7 @@ SUBCOMMANDS: try "{usage} -h" to get more info about a specific tool EXAMPLES: - {bin} dup -d /home/rafal -e /home/rafal/Obrazy -m 25 -x 7z rar IMAGE -s hashmb -f results.txt -D aeo + {bin} dup -d /home/rafal -e /home/rafal/Obrazy -m 25 -x 7z rar IMAGE -s hash -f results.txt -D aeo {bin} empty-folders -d /home/rafal/rr /home/gateway -f results.txt {bin} big -d /home/rafal/ /home/piszczal -e /home/rafal/Roman -n 25 -x VIDEO -f results.txt {bin} empty-files -d /home/rafal /home/szczekacz -e /home/rafal/Pulpit -R -f results.txt diff --git a/czkawka_core/src/duplicate.rs b/czkawka_core/src/duplicate.rs index d7266c2..e1db1de 100644 --- a/czkawka_core/src/duplicate.rs +++ b/czkawka_core/src/duplicate.rs @@ -27,8 +27,6 @@ use crate::common_items::ExcludedItems; use crate::common_messages::Messages; use crate::common_traits::*; -const HASH_MB_LIMIT_BYTES: u64 = 1024 * 1024; // 1MB - #[derive(Debug)] pub struct ProgressData { pub checking_method: CheckingMethod, @@ -44,7 +42,6 @@ pub enum CheckingMethod { Name, Size, Hash, - HashMb, } impl MyHasher for blake3::Hasher { @@ -198,7 +195,7 @@ impl DuplicateFinder { return; } } - CheckingMethod::HashMb | CheckingMethod::Hash => { + CheckingMethod::Hash => { if !self.check_files_size(stop_receiver, progress_sender) { self.stopped_search = true; return; @@ -499,7 +496,7 @@ impl DuplicateFinder { let checking_method = self.check_method.clone(); let max_stage = match self.check_method { CheckingMethod::Size => 0, - CheckingMethod::HashMb | CheckingMethod::Hash => 2, + CheckingMethod::Hash => 2, _ => 255, }; progress_thread_handle = thread::spawn(move || loop { @@ -796,35 +793,6 @@ impl DuplicateFinder { let mut full_hash_results: Vec<(u64, BTreeMap>, Vec, u64)>; match self.check_method { - CheckingMethod::HashMb => { - full_hash_results = pre_checked_map - .par_iter() - .map(|(size, vec_file_entry)| { - let mut hashmap_with_hash: BTreeMap> = Default::default(); - let mut errors: Vec = Vec::new(); - let mut bytes_read: u64 = 0; - let mut buffer = [0u8; 1024 * 128]; - atomic_file_counter.fetch_add(vec_file_entry.len(), Ordering::Relaxed); - for file_entry in vec_file_entry { - if stop_receiver.is_some() && stop_receiver.unwrap().try_recv().is_ok() { - check_was_breaked.store(true, Ordering::Relaxed); - return None; - } - - match hash_calculation(&mut buffer, file_entry, &check_type, HASH_MB_LIMIT_BYTES) { - Ok((hash_string, bytes)) => { - bytes_read += bytes; - hashmap_with_hash.entry(hash_string.to_string()).or_insert_with(Vec::new); - hashmap_with_hash.get_mut(hash_string.as_str()).unwrap().push(file_entry.to_owned()); - } - Err(s) => errors.push(s), - } - } - Some((*size, hashmap_with_hash, errors, bytes_read)) - }) - .while_some() - .collect(); - } CheckingMethod::Hash => { let loaded_hash_map; @@ -832,7 +800,7 @@ impl DuplicateFinder { let mut non_cached_files_to_check: BTreeMap> = Default::default(); if self.use_cache { - loaded_hash_map = match load_hashes_from_file(&mut self.text_messages, self.delete_outdated_cache, &self.hash_type) { + loaded_hash_map = match load_hashes_from_file(&mut self.text_messages, self.delete_outdated_cache, &self.hash_type, false) { Some(t) => t, None => Default::default(), }; @@ -934,7 +902,7 @@ impl DuplicateFinder { } } } - save_hashes_to_file(&all_results, &mut self.text_messages, &self.hash_type, self.minimal_cache_file_size); + save_hashes_to_file(&all_results, &mut self.text_messages, &self.hash_type, false, self.minimal_cache_file_size); } } _ => panic!("What"), @@ -995,7 +963,7 @@ impl DuplicateFinder { self.information.number_of_failed_to_remove_files += tuple.2; } } - CheckingMethod::Hash | CheckingMethod::HashMb => { + CheckingMethod::Hash => { for vector_vectors in self.files_with_identical_hashes.values() { for vector in vector_vectors.iter() { let tuple: (u64, usize, usize) = delete_files(vector, &self.delete_method, &mut self.text_messages, self.dryrun); @@ -1154,7 +1122,7 @@ impl SaveResults for DuplicateFinder { write!(writer, "Not found any duplicates.").unwrap(); } } - CheckingMethod::Hash | CheckingMethod::HashMb => { + CheckingMethod::Hash => { if !self.files_with_identical_hashes.is_empty() { writeln!(writer, "-------------------------------------------------Files with same hashes-------------------------------------------------").unwrap(); writeln!( @@ -1209,7 +1177,7 @@ impl PrintResults for DuplicateFinder { println!(); } } - CheckingMethod::Hash | CheckingMethod::HashMb => { + CheckingMethod::Hash => { for (_size, vector) in self.files_with_identical_hashes.iter() { for j in vector { number_of_files += j.len() as u64; @@ -1354,7 +1322,7 @@ pub fn make_hard_link(src: &Path, dst: &Path) -> io::Result<()> { result } -pub fn save_hashes_to_file(hashmap: &BTreeMap, text_messages: &mut Messages, type_of_hash: &HashType, minimal_cache_file_size: u64) { +pub fn save_hashes_to_file(hashmap: &BTreeMap, text_messages: &mut Messages, type_of_hash: &HashType, is_prehash: bool, minimal_cache_file_size: u64) { if let Some(proj_dirs) = ProjectDirs::from("pl", "Qarmin", "Czkawka") { let cache_dir = PathBuf::from(proj_dirs.cache_dir()); if cache_dir.exists() { @@ -1366,7 +1334,7 @@ pub fn save_hashes_to_file(hashmap: &BTreeMap, text_messages: text_messages.messages.push(format!("Cannot create config dir {}, reason {}", cache_dir.display(), e)); return; } - let cache_file = cache_dir.join(get_file_hash_name(type_of_hash).as_str()); + let cache_file = cache_dir.join(get_file_hash_name(type_of_hash, is_prehash).as_str()); let file_handler = match OpenOptions::new().truncate(true).write(true).create(true).open(&cache_file) { Ok(t) => t, Err(e) => { @@ -1419,14 +1387,15 @@ fn hash_calculation(buffer: &mut [u8], file_entry: &FileEntry, hash_type: &HashT Ok((hasher.finalize(), current_file_read_bytes)) } -fn get_file_hash_name(type_of_hash: &HashType) -> String { - format!("cache_duplicates_{:?}.txt", type_of_hash) +fn get_file_hash_name(type_of_hash: &HashType, is_prehash: bool) -> String { + let prehash_str = if is_prehash { "_prehash" } else { "" }; + format!("cache_duplicates_{:?}{}.txt", type_of_hash, prehash_str) } -pub fn load_hashes_from_file(text_messages: &mut Messages, delete_outdated_cache: bool, type_of_hash: &HashType) -> Option>> { +pub fn load_hashes_from_file(text_messages: &mut Messages, delete_outdated_cache: bool, type_of_hash: &HashType, is_prehash: bool) -> Option>> { if let Some(proj_dirs) = ProjectDirs::from("pl", "Qarmin", "Czkawka") { let cache_dir = PathBuf::from(proj_dirs.cache_dir()); - let cache_file = cache_dir.join(get_file_hash_name(type_of_hash).as_str()); + let cache_file = cache_dir.join(get_file_hash_name(type_of_hash, is_prehash).as_str()); let file_handler = match OpenOptions::new().read(true).open(&cache_file) { Ok(t) => t, Err(_inspected) => { diff --git a/czkawka_gui/src/compute_results.rs b/czkawka_gui/src/compute_results.rs index 2b96683..87d718c 100644 --- a/czkawka_gui/src/compute_results.rs +++ b/czkawka_gui/src/compute_results.rs @@ -93,7 +93,7 @@ pub fn connect_compute_results(gui_data: &GuiData, glib_stop_receiver: Receiver< duplicates_group = information.number_of_groups_by_name; entry_info.set_text(format!("Found {} files in {} groups which have same names.", duplicates_number, duplicates_group).as_str()); } - CheckingMethod::Hash | CheckingMethod::HashMb => { + CheckingMethod::Hash => { duplicates_number = information.number_of_duplicated_files_by_hash; duplicates_size = information.lost_space_by_hash; duplicates_group = information.number_of_groups_by_hash; @@ -164,7 +164,7 @@ pub fn connect_compute_results(gui_data: &GuiData, glib_stop_receiver: Receiver< } } } - CheckingMethod::Hash | CheckingMethod::HashMb => { + CheckingMethod::Hash => { let btreemap = df.get_files_sorted_by_hash(); for (size, vectors_vector) in btreemap.iter().rev() { diff --git a/czkawka_gui/src/connect_button_search.rs b/czkawka_gui/src/connect_button_search.rs index e4e2627..530600f 100644 --- a/czkawka_gui/src/connect_button_search.rs +++ b/czkawka_gui/src/connect_button_search.rs @@ -52,7 +52,6 @@ pub fn connect_button_search( let buttons_names = gui_data.bottom_buttons.buttons_names.clone(); let radio_button_duplicates_name = gui_data.main_notebook.radio_button_duplicates_name.clone(); let radio_button_duplicates_size = gui_data.main_notebook.radio_button_duplicates_size.clone(); - let radio_button_duplicates_hashmb = gui_data.main_notebook.radio_button_duplicates_hashmb.clone(); let radio_button_duplicates_hash = gui_data.main_notebook.radio_button_duplicates_hash.clone(); let scale_similarity_similar_images = gui_data.main_notebook.scale_similarity_similar_images.clone(); let scale_similarity_similar_videos = gui_data.main_notebook.scale_similarity_similar_videos.clone(); @@ -152,8 +151,6 @@ pub fn connect_button_search( check_method = duplicate::CheckingMethod::Name; } else if radio_button_duplicates_size.is_active() { check_method = duplicate::CheckingMethod::Size; - } else if radio_button_duplicates_hashmb.is_active() { - check_method = duplicate::CheckingMethod::HashMb; } else if radio_button_duplicates_hash.is_active() { check_method = duplicate::CheckingMethod::Hash; } else { diff --git a/czkawka_gui/src/connect_duplicate_buttons.rs b/czkawka_gui/src/connect_duplicate_buttons.rs new file mode 100644 index 0000000..e110be3 --- /dev/null +++ b/czkawka_gui/src/connect_duplicate_buttons.rs @@ -0,0 +1,21 @@ +use gtk::prelude::*; + +use crate::gui_data::GuiData; + +pub fn connect_duplicate_buttons(gui_data: &GuiData) { + let radio_button_duplicates_hash = gui_data.main_notebook.radio_button_duplicates_hash.clone(); + let radio_button_hash_type_blake3 = gui_data.main_notebook.radio_button_hash_type_blake3.clone(); + let radio_button_hash_type_xxh3 = gui_data.main_notebook.radio_button_hash_type_xxh3.clone(); + let radio_button_hash_type_crc32 = gui_data.main_notebook.radio_button_hash_type_crc32.clone(); + radio_button_duplicates_hash.connect_toggled(move |radio_button_duplicates_hash| { + if radio_button_duplicates_hash.is_active() { + radio_button_hash_type_blake3.set_sensitive(true); + radio_button_hash_type_xxh3.set_sensitive(true); + radio_button_hash_type_crc32.set_sensitive(true); + } else { + radio_button_hash_type_blake3.set_sensitive(false); + radio_button_hash_type_xxh3.set_sensitive(false); + radio_button_hash_type_crc32.set_sensitive(false); + } + }); +} diff --git a/czkawka_gui/src/connect_progress_window.rs b/czkawka_gui/src/connect_progress_window.rs index 92cd3b8..5a2ae50 100644 --- a/czkawka_gui/src/connect_progress_window.rs +++ b/czkawka_gui/src/connect_progress_window.rs @@ -33,7 +33,7 @@ pub fn connect_progress_window( let future = async move { while let Some(item) = futures_receiver_duplicate_files.next().await { match item.checking_method { - duplicate::CheckingMethod::Hash | duplicate::CheckingMethod::HashMb => { + duplicate::CheckingMethod::Hash => { label_stage.show(); match item.current_stage { // Checking Size diff --git a/czkawka_gui/src/connect_settings.rs b/czkawka_gui/src/connect_settings.rs index 3100fa2..9eca16d 100644 --- a/czkawka_gui/src/connect_settings.rs +++ b/czkawka_gui/src/connect_settings.rs @@ -102,20 +102,28 @@ pub fn connect_settings(gui_data: &GuiData) { dialog.connect_response(move |dialog, response_type| { if response_type == ResponseType::Ok { let mut messages: Messages = Messages::new(); - for type_of_hash in [HashType::Xxh3, HashType::Blake3, HashType::Crc32].iter() { - if let Some(cache_entries) = czkawka_core::duplicate::load_hashes_from_file(&mut messages, true, type_of_hash) { - let mut hashmap_to_save: BTreeMap = Default::default(); - for (_, vec_file_entry) in cache_entries { - for file_entry in vec_file_entry { - hashmap_to_save.insert(file_entry.path.to_string_lossy().to_string(), file_entry); + for use_prehash in [true, false] { + for type_of_hash in [HashType::Xxh3, HashType::Blake3, HashType::Crc32].iter() { + if let Some(cache_entries) = czkawka_core::duplicate::load_hashes_from_file(&mut messages, true, type_of_hash, use_prehash) { + let mut hashmap_to_save: BTreeMap = Default::default(); + for (_, vec_file_entry) in cache_entries { + for file_entry in vec_file_entry { + hashmap_to_save.insert(file_entry.path.to_string_lossy().to_string(), file_entry); + } } + czkawka_core::duplicate::save_hashes_to_file( + &hashmap_to_save, + &mut messages, + type_of_hash, + use_prehash, + entry_settings_cache_file_minimal_size.text().as_str().parse::().unwrap_or(2 * 1024 * 1024), + ) } - czkawka_core::duplicate::save_hashes_to_file(&hashmap_to_save, &mut messages, type_of_hash, entry_settings_cache_file_minimal_size.text().as_str().parse::().unwrap_or(2 * 1024 * 1024)) } - } - messages.messages.push("Properly cleared cache".to_string()); - text_view_errors.buffer().unwrap().set_text(messages.create_messages_text().as_str()); + messages.messages.push("Properly cleared cache".to_string()); + text_view_errors.buffer().unwrap().set_text(messages.create_messages_text().as_str()); + } } dialog.close(); }); diff --git a/czkawka_gui/src/gui_main_notebook.rs b/czkawka_gui/src/gui_main_notebook.rs index 1ee8155..4334c60 100644 --- a/czkawka_gui/src/gui_main_notebook.rs +++ b/czkawka_gui/src/gui_main_notebook.rs @@ -62,7 +62,6 @@ pub struct GuiMainNotebook { // Duplicates pub radio_button_duplicates_name: gtk::RadioButton, pub radio_button_duplicates_size: gtk::RadioButton, - pub radio_button_duplicates_hashmb: gtk::RadioButton, pub radio_button_duplicates_hash: gtk::RadioButton, pub scale_similarity_similar_images: gtk::Scale, @@ -173,9 +172,14 @@ impl GuiMainNotebook { //// Radio Buttons let radio_button_duplicates_name: gtk::RadioButton = builder.object("radio_button_duplicates_name").unwrap(); let radio_button_duplicates_size: gtk::RadioButton = builder.object("radio_button_duplicates_size").unwrap(); - let radio_button_duplicates_hashmb: gtk::RadioButton = builder.object("radio_button_duplicates_hashmb").unwrap(); let radio_button_duplicates_hash: gtk::RadioButton = builder.object("radio_button_duplicates_hash").unwrap(); + radio_button_duplicates_name.set_tooltip_text(Some("Finds files which have same name.\n\nThis mode not checking what file contain inside, so be carefully when using it.")); + radio_button_duplicates_size.set_tooltip_text(Some("Finds files which have same size.\n\nThis mode not checking what file contain inside, so be carefully when using it.")); + radio_button_duplicates_hash.set_tooltip_text(Some( + "Finds files which have the same content.\n\nThis mode hashes file and later compare this hashes to find duplicates.\n\nTool heavily uses cache, so second and further scans of same data should be a lot of faster that first.", + )); + let scale_similarity_similar_images: gtk::Scale = builder.object("scale_similarity_similar_images").unwrap(); let scale_similarity_similar_videos: gtk::Scale = builder.object("scale_similarity_similar_videos").unwrap(); @@ -248,7 +252,6 @@ impl GuiMainNotebook { check_button_music_year, radio_button_duplicates_name, radio_button_duplicates_size, - radio_button_duplicates_hashmb, radio_button_duplicates_hash, scale_similarity_similar_images, scale_similarity_similar_videos, diff --git a/czkawka_gui/src/main.rs b/czkawka_gui/src/main.rs index 2ac5155..027b221 100644 --- a/czkawka_gui/src/main.rs +++ b/czkawka_gui/src/main.rs @@ -16,6 +16,7 @@ use crate::connect_button_save::*; use crate::connect_button_search::*; use crate::connect_button_select::*; use crate::connect_button_stop::*; +use crate::connect_duplicate_buttons::*; use crate::connect_header_buttons::*; use crate::connect_hide_text_view_errors::*; use crate::connect_notebook_tabs::*; @@ -38,6 +39,7 @@ mod connect_button_save; mod connect_button_search; mod connect_button_select; mod connect_button_stop; +mod connect_duplicate_buttons; mod connect_header_buttons; mod connect_hide_text_view_errors; mod connect_notebook_tabs; @@ -119,6 +121,7 @@ fn main() { connect_button_stop(&gui_data); connect_button_hardlink_symlink(&gui_data); connect_button_move(&gui_data); + connect_duplicate_buttons(&gui_data); connect_notebook_tabs(&gui_data); connect_selection_of_directories(&gui_data); connect_popovers(&gui_data); diff --git a/czkawka_gui/ui/main_window.glade b/czkawka_gui/ui/main_window.glade index 0ce34c8..6c417b8 100644 --- a/czkawka_gui/ui/main_window.glade +++ b/czkawka_gui/ui/main_window.glade @@ -684,21 +684,6 @@ Author: RafaƂ Mikrut 1 - - - HashMb - True - True - False - True - radio_button_duplicates_hash - - - False - True - 2 - - Size