diff --git a/Cargo.lock b/Cargo.lock index d6add7f..7ea9d35 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -570,6 +570,7 @@ dependencies = [ "bitflags", "bk-tree", "blake3", + "crc32fast", "crossbeam-channel", "directories-next", "futures", @@ -579,6 +580,7 @@ dependencies = [ "img_hash", "rayon", "rodio", + "xxhash-rust", "zip", ] @@ -2746,6 +2748,12 @@ dependencies = [ "libc", ] +[[package]] +name = "xxhash-rust" +version = "0.8.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "94b92e16d90ed01ad0736f1123137630b5bcef5f5bafee62468283e027c1c85d" + [[package]] name = "zip" version = "0.5.9" diff --git a/czkawka_core/Cargo.toml b/czkawka_core/Cargo.toml index ba94439..e56e060 100644 --- a/czkawka_core/Cargo.toml +++ b/czkawka_core/Cargo.toml @@ -11,7 +11,6 @@ repository = "https://github.com/qarmin/czkawka" [dependencies] humansize = "1" -blake3 = "0.3" rayon = "1" crossbeam-channel = "0.5.0" @@ -33,4 +32,9 @@ futures = "0.3.9" # Needed by broken files zip = "0.5.9" -rodio = "0.13.0" \ No newline at end of file +rodio = "0.13.0" + +# Hashes +blake3 = "0.3" +crc32fast = "1.2.1" +xxhash-rust = { version = "0.8.1", features = ["xxh3"] } diff --git a/czkawka_core/src/duplicate.rs b/czkawka_core/src/duplicate.rs index 56b4199..8cfca00 100644 --- a/czkawka_core/src/duplicate.rs +++ b/czkawka_core/src/duplicate.rs @@ -15,6 +15,7 @@ use crate::common_messages::Messages; use crate::common_traits::*; use directories_next::ProjectDirs; use rayon::prelude::*; +use std::hash::Hasher; use std::io::{BufReader, BufWriter}; use std::sync::atomic::{AtomicBool, AtomicUsize, Ordering}; use std::sync::Arc; @@ -45,6 +46,8 @@ pub enum CheckingMethod { #[derive(PartialEq, Eq, Clone, Debug, Copy)] pub enum HashType { Blake3, + CRC32, + XXH3, } #[derive(Eq, PartialEq, Clone, Debug)] @@ -187,6 +190,10 @@ impl DuplicateFinder { &self.information } + pub fn set_hash_type(&mut self, hash_type: HashType) { + self.hash_type = hash_type; + } + pub fn set_check_method(&mut self, check_method: CheckingMethod) { self.check_method = check_method; } @@ -558,10 +565,6 @@ impl DuplicateFinder { /// The slowest checking type, which must be applied after checking for size fn check_files_hash(&mut self, stop_receiver: Option<&Receiver<()>>, progress_sender: Option<&futures::channel::mpsc::Sender>) -> bool { - if self.hash_type != HashType::Blake3 { - panic!(); // TODO Add more hash types - } - let check_type = Arc::new(self.hash_type); let start_time: SystemTime = SystemTime::now(); @@ -1326,6 +1329,36 @@ fn pre_hash_calculation(errors: &mut Vec, file_handler: &mut File, bytes Some(hasher.finalize().to_hex().to_string()) } + HashType::CRC32 => { + let mut hasher: crc32fast::Hasher = crc32fast::Hasher::new(); + let n = match file_handler.read(buffer) { + Ok(t) => t, + Err(_) => { + errors.push(format!("Error happened when checking hash of file {}", file_entry.path.display())); + return None; + } + }; + + *bytes_read += n as u64; + hasher.update(&buffer[..n]); + + Some(hasher.finalize().to_string()) + } + HashType::XXH3 => { + let mut hasher: xxhash_rust::xxh3::Xxh3 = xxhash_rust::xxh3::Xxh3::new(); + let n = match file_handler.read(buffer) { + Ok(t) => t, + Err(_) => { + errors.push(format!("Error happened when checking hash of file {}", file_entry.path.display())); + return None; + } + }; + + *bytes_read += n as u64; + hasher.update(&buffer[..n]); + + Some(hasher.finish().to_string()) + } } } @@ -1358,6 +1391,60 @@ fn hashmb_calculation(errors: &mut Vec, file_handler: &mut File, bytes_r Some(hasher.finalize().to_hex().to_string()) } + HashType::CRC32 => { + let mut hasher: crc32fast::Hasher = crc32fast::Hasher::new(); + let mut current_file_read_bytes: u64 = 0; + + loop { + let n = match file_handler.read(buffer) { + Ok(t) => t, + Err(_) => { + errors.push(format!("Error happened when checking hash of file {}", file_entry.path.display())); + return None; + } + }; + if n == 0 { + break; + } + + current_file_read_bytes += n as u64; + *bytes_read += n as u64; + hasher.update(&buffer[..n]); + + if current_file_read_bytes >= HASH_MB_LIMIT_BYTES { + break; + } + } + + Some(hasher.finalize().to_string()) + } + HashType::XXH3 => { + let mut hasher: xxhash_rust::xxh3::Xxh3 = xxhash_rust::xxh3::Xxh3::new(); + let mut current_file_read_bytes: u64 = 0; + + loop { + let n = match file_handler.read(buffer) { + Ok(t) => t, + Err(_) => { + errors.push(format!("Error happened when checking hash of file {}", file_entry.path.display())); + return None; + } + }; + if n == 0 { + break; + } + + current_file_read_bytes += n as u64; + *bytes_read += n as u64; + hasher.update(&buffer[..n]); + + if current_file_read_bytes >= HASH_MB_LIMIT_BYTES { + break; + } + } + + Some(hasher.finish().to_string()) + } } } @@ -1384,6 +1471,48 @@ fn hash_calculation(errors: &mut Vec, file_handler: &mut File, bytes_rea Some(hasher.finalize().to_hex().to_string()) } + HashType::CRC32 => { + let mut hasher: crc32fast::Hasher = crc32fast::Hasher::new(); + + loop { + let n = match file_handler.read(buffer) { + Ok(t) => t, + Err(_) => { + errors.push(format!("Error happened when checking hash of file {}", file_entry.path.display())); + return None; + } + }; + if n == 0 { + break; + } + + *bytes_read += n as u64; + hasher.update(&buffer[..n]); + } + + Some(hasher.finalize().to_string()) + } + HashType::XXH3 => { + let mut hasher: xxhash_rust::xxh3::Xxh3 = xxhash_rust::xxh3::Xxh3::new(); + + loop { + let n = match file_handler.read(buffer) { + Ok(t) => t, + Err(_) => { + errors.push(format!("Error happened when checking hash of file {}", file_entry.path.display())); + return None; + } + }; + if n == 0 { + break; + } + + *bytes_read += n as u64; + hasher.update(&buffer[..n]); + } + + Some(hasher.finish().to_string()) + } } } diff --git a/czkawka_gui/czkawka.glade b/czkawka_gui/czkawka.glade index 4187edd..544617b 100644 --- a/czkawka_gui/czkawka.glade +++ b/czkawka_gui/czkawka.glade @@ -43,7 +43,7 @@ Author: Rafał Mikrut This program is free to use and will always be. Rafał Mikrut(qarmin) - data/icons/com.github.qarmin.czkawka.svg + system-search mit-x11 @@ -1341,6 +1341,38 @@ This program is free to use and will always be. 1 + + + CRC32 + True + True + False + True + True + radio_button_hash_type_blake3 + + + False + True + 2 + + + + + XXH3 + True + True + False + True + True + radio_button_hash_type_blake3 + + + False + True + 3 + + False diff --git a/czkawka_gui/src/connect_button_search.rs b/czkawka_gui/src/connect_button_search.rs index c143e23..b8a72dd 100644 --- a/czkawka_gui/src/connect_button_search.rs +++ b/czkawka_gui/src/connect_button_search.rs @@ -6,7 +6,7 @@ use crate::help_functions::*; use crate::notebook_enums::*; use czkawka_core::big_file::BigFile; use czkawka_core::broken_files::BrokenFiles; -use czkawka_core::duplicate::DuplicateFinder; +use czkawka_core::duplicate::{DuplicateFinder, HashType}; use czkawka_core::empty_files::EmptyFiles; use czkawka_core::empty_folder::EmptyFolder; use czkawka_core::invalid_symlinks::InvalidSymlinks; @@ -84,6 +84,9 @@ pub fn connect_button_search( let progress_bar_current_stage = gui_data.progress_window.progress_bar_current_stage.clone(); let progress_bar_all_stages = gui_data.progress_window.progress_bar_all_stages.clone(); let image_preview_similar_images = gui_data.main_notebook.image_preview_similar_images.clone(); + let radio_button_hash_type_blake3 = gui_data.main_notebook.radio_button_hash_type_blake3.clone(); + let radio_button_hash_type_crc32 = gui_data.main_notebook.radio_button_hash_type_crc32.clone(); + let radio_button_hash_type_xxh3 = gui_data.main_notebook.radio_button_hash_type_xxh3.clone(); buttons_search_clone.connect_clicked(move |_| { let included_directories = get_path_buf_from_vector_of_strings(get_string_from_list_store(&tree_view_included_directories)); @@ -135,6 +138,17 @@ pub fn connect_button_search( } let minimal_file_size = entry_duplicate_minimal_size.get_text().as_str().parse::().unwrap_or(1024); + let hash_type: HashType; + if radio_button_hash_type_blake3.get_active() { + hash_type = duplicate::HashType::Blake3; + } else if radio_button_hash_type_crc32.get_active() { + hash_type = duplicate::HashType::CRC32; + } else if radio_button_hash_type_xxh3.get_active() { + hash_type = duplicate::HashType::XXH3; + } else { + panic!("No radio button is pressed"); + } + let futures_sender_duplicate_files = futures_sender_duplicate_files.clone(); // Find duplicates thread::spawn(move || { @@ -146,6 +160,7 @@ pub fn connect_button_search( df.set_allowed_extensions(allowed_extensions); df.set_minimal_file_size(minimal_file_size); df.set_check_method(check_method); + df.set_hash_type(hash_type); df.find_duplicates(Some(&stop_receiver), Some(&futures_sender_duplicate_files)); let _ = glib_stop_sender.send(Message::Duplicates(df)); }); diff --git a/czkawka_gui/src/gui_main_notebook.rs b/czkawka_gui/src/gui_main_notebook.rs index 64b6357..571486f 100644 --- a/czkawka_gui/src/gui_main_notebook.rs +++ b/czkawka_gui/src/gui_main_notebook.rs @@ -53,6 +53,10 @@ pub struct GUIMainNotebook { pub radio_button_similar_images_high: gtk::RadioButton, pub radio_button_similar_images_very_high: gtk::RadioButton, + pub radio_button_hash_type_blake3: gtk::RadioButton, + pub radio_button_hash_type_crc32: gtk::RadioButton, + pub radio_button_hash_type_xxh3: gtk::RadioButton, + pub image_preview_similar_images: gtk::Image, } @@ -107,6 +111,10 @@ impl GUIMainNotebook { let radio_button_similar_images_high: gtk::RadioButton = builder.get_object("radio_button_similar_images_high").unwrap(); let radio_button_similar_images_very_high: gtk::RadioButton = builder.get_object("radio_button_similar_images_very_high").unwrap(); + let radio_button_hash_type_blake3: gtk::RadioButton = builder.get_object("radio_button_hash_type_blake3").unwrap(); + let radio_button_hash_type_crc32: gtk::RadioButton = builder.get_object("radio_button_hash_type_crc32").unwrap(); + let radio_button_hash_type_xxh3: gtk::RadioButton = builder.get_object("radio_button_hash_type_xxh3").unwrap(); + let image_preview_similar_images: gtk::Image = builder.get_object("image_preview_similar_images").unwrap(); Self { @@ -150,6 +158,9 @@ impl GUIMainNotebook { radio_button_similar_images_medium, radio_button_similar_images_high, radio_button_similar_images_very_high, + radio_button_hash_type_blake3, + radio_button_hash_type_crc32, + radio_button_hash_type_xxh3, image_preview_similar_images, } }