Add support for CRC32 and XXH3 hash (#243)

This commit is contained in:
Rafał Mikrut 2021-02-03 19:59:06 +01:00 committed by GitHub
parent 7d8334bb0c
commit 9f3da0e70f
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
6 changed files with 207 additions and 8 deletions

8
Cargo.lock generated
View File

@ -570,6 +570,7 @@ dependencies = [
"bitflags",
"bk-tree",
"blake3",
"crc32fast",
"crossbeam-channel",
"directories-next",
"futures",
@ -579,6 +580,7 @@ dependencies = [
"img_hash",
"rayon",
"rodio",
"xxhash-rust",
"zip",
]
@ -2746,6 +2748,12 @@ dependencies = [
"libc",
]
[[package]]
name = "xxhash-rust"
version = "0.8.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "94b92e16d90ed01ad0736f1123137630b5bcef5f5bafee62468283e027c1c85d"
[[package]]
name = "zip"
version = "0.5.9"

View File

@ -11,7 +11,6 @@ repository = "https://github.com/qarmin/czkawka"
[dependencies]
humansize = "1"
blake3 = "0.3"
rayon = "1"
crossbeam-channel = "0.5.0"
@ -33,4 +32,9 @@ futures = "0.3.9"
# Needed by broken files
zip = "0.5.9"
rodio = "0.13.0"
rodio = "0.13.0"
# Hashes
blake3 = "0.3"
crc32fast = "1.2.1"
xxhash-rust = { version = "0.8.1", features = ["xxh3"] }

View File

@ -15,6 +15,7 @@ use crate::common_messages::Messages;
use crate::common_traits::*;
use directories_next::ProjectDirs;
use rayon::prelude::*;
use std::hash::Hasher;
use std::io::{BufReader, BufWriter};
use std::sync::atomic::{AtomicBool, AtomicUsize, Ordering};
use std::sync::Arc;
@ -45,6 +46,8 @@ pub enum CheckingMethod {
#[derive(PartialEq, Eq, Clone, Debug, Copy)]
pub enum HashType {
Blake3,
CRC32,
XXH3,
}
#[derive(Eq, PartialEq, Clone, Debug)]
@ -187,6 +190,10 @@ impl DuplicateFinder {
&self.information
}
pub fn set_hash_type(&mut self, hash_type: HashType) {
self.hash_type = hash_type;
}
pub fn set_check_method(&mut self, check_method: CheckingMethod) {
self.check_method = check_method;
}
@ -558,10 +565,6 @@ impl DuplicateFinder {
/// The slowest checking type, which must be applied after checking for size
fn check_files_hash(&mut self, stop_receiver: Option<&Receiver<()>>, progress_sender: Option<&futures::channel::mpsc::Sender<ProgressData>>) -> bool {
if self.hash_type != HashType::Blake3 {
panic!(); // TODO Add more hash types
}
let check_type = Arc::new(self.hash_type);
let start_time: SystemTime = SystemTime::now();
@ -1326,6 +1329,36 @@ fn pre_hash_calculation(errors: &mut Vec<String>, file_handler: &mut File, bytes
Some(hasher.finalize().to_hex().to_string())
}
HashType::CRC32 => {
let mut hasher: crc32fast::Hasher = crc32fast::Hasher::new();
let n = match file_handler.read(buffer) {
Ok(t) => t,
Err(_) => {
errors.push(format!("Error happened when checking hash of file {}", file_entry.path.display()));
return None;
}
};
*bytes_read += n as u64;
hasher.update(&buffer[..n]);
Some(hasher.finalize().to_string())
}
HashType::XXH3 => {
let mut hasher: xxhash_rust::xxh3::Xxh3 = xxhash_rust::xxh3::Xxh3::new();
let n = match file_handler.read(buffer) {
Ok(t) => t,
Err(_) => {
errors.push(format!("Error happened when checking hash of file {}", file_entry.path.display()));
return None;
}
};
*bytes_read += n as u64;
hasher.update(&buffer[..n]);
Some(hasher.finish().to_string())
}
}
}
@ -1358,6 +1391,60 @@ fn hashmb_calculation(errors: &mut Vec<String>, file_handler: &mut File, bytes_r
Some(hasher.finalize().to_hex().to_string())
}
HashType::CRC32 => {
let mut hasher: crc32fast::Hasher = crc32fast::Hasher::new();
let mut current_file_read_bytes: u64 = 0;
loop {
let n = match file_handler.read(buffer) {
Ok(t) => t,
Err(_) => {
errors.push(format!("Error happened when checking hash of file {}", file_entry.path.display()));
return None;
}
};
if n == 0 {
break;
}
current_file_read_bytes += n as u64;
*bytes_read += n as u64;
hasher.update(&buffer[..n]);
if current_file_read_bytes >= HASH_MB_LIMIT_BYTES {
break;
}
}
Some(hasher.finalize().to_string())
}
HashType::XXH3 => {
let mut hasher: xxhash_rust::xxh3::Xxh3 = xxhash_rust::xxh3::Xxh3::new();
let mut current_file_read_bytes: u64 = 0;
loop {
let n = match file_handler.read(buffer) {
Ok(t) => t,
Err(_) => {
errors.push(format!("Error happened when checking hash of file {}", file_entry.path.display()));
return None;
}
};
if n == 0 {
break;
}
current_file_read_bytes += n as u64;
*bytes_read += n as u64;
hasher.update(&buffer[..n]);
if current_file_read_bytes >= HASH_MB_LIMIT_BYTES {
break;
}
}
Some(hasher.finish().to_string())
}
}
}
@ -1384,6 +1471,48 @@ fn hash_calculation(errors: &mut Vec<String>, file_handler: &mut File, bytes_rea
Some(hasher.finalize().to_hex().to_string())
}
HashType::CRC32 => {
let mut hasher: crc32fast::Hasher = crc32fast::Hasher::new();
loop {
let n = match file_handler.read(buffer) {
Ok(t) => t,
Err(_) => {
errors.push(format!("Error happened when checking hash of file {}", file_entry.path.display()));
return None;
}
};
if n == 0 {
break;
}
*bytes_read += n as u64;
hasher.update(&buffer[..n]);
}
Some(hasher.finalize().to_string())
}
HashType::XXH3 => {
let mut hasher: xxhash_rust::xxh3::Xxh3 = xxhash_rust::xxh3::Xxh3::new();
loop {
let n = match file_handler.read(buffer) {
Ok(t) => t,
Err(_) => {
errors.push(format!("Error happened when checking hash of file {}", file_entry.path.display()));
return None;
}
};
if n == 0 {
break;
}
*bytes_read += n as u64;
hasher.update(&buffer[..n]);
}
Some(hasher.finish().to_string())
}
}
}

View File

@ -43,7 +43,7 @@ Author: Rafał Mikrut
This program is free to use and will always be.
</property>
<property name="authors">Rafał Mikrut(qarmin)</property>
<property name="logo">data/icons/com.github.qarmin.czkawka.svg</property>
<property name="logo_icon_name">system-search</property>
<property name="license_type">mit-x11</property>
<child internal-child="vbox">
<object class="GtkBox">
@ -1341,6 +1341,38 @@ This program is free to use and will always be.
<property name="position">1</property>
</packing>
</child>
<child>
<object class="GtkRadioButton" id="radio_button_hash_type_crc32">
<property name="label" translatable="yes">CRC32</property>
<property name="visible">True</property>
<property name="can_focus">True</property>
<property name="receives_default">False</property>
<property name="active">True</property>
<property name="draw_indicator">True</property>
<property name="group">radio_button_hash_type_blake3</property>
</object>
<packing>
<property name="expand">False</property>
<property name="fill">True</property>
<property name="position">2</property>
</packing>
</child>
<child>
<object class="GtkRadioButton" id="radio_button_hash_type_xxh3">
<property name="label" translatable="yes">XXH3</property>
<property name="visible">True</property>
<property name="can_focus">True</property>
<property name="receives_default">False</property>
<property name="active">True</property>
<property name="draw_indicator">True</property>
<property name="group">radio_button_hash_type_blake3</property>
</object>
<packing>
<property name="expand">False</property>
<property name="fill">True</property>
<property name="position">3</property>
</packing>
</child>
</object>
<packing>
<property name="expand">False</property>

View File

@ -6,7 +6,7 @@ use crate::help_functions::*;
use crate::notebook_enums::*;
use czkawka_core::big_file::BigFile;
use czkawka_core::broken_files::BrokenFiles;
use czkawka_core::duplicate::DuplicateFinder;
use czkawka_core::duplicate::{DuplicateFinder, HashType};
use czkawka_core::empty_files::EmptyFiles;
use czkawka_core::empty_folder::EmptyFolder;
use czkawka_core::invalid_symlinks::InvalidSymlinks;
@ -84,6 +84,9 @@ pub fn connect_button_search(
let progress_bar_current_stage = gui_data.progress_window.progress_bar_current_stage.clone();
let progress_bar_all_stages = gui_data.progress_window.progress_bar_all_stages.clone();
let image_preview_similar_images = gui_data.main_notebook.image_preview_similar_images.clone();
let radio_button_hash_type_blake3 = gui_data.main_notebook.radio_button_hash_type_blake3.clone();
let radio_button_hash_type_crc32 = gui_data.main_notebook.radio_button_hash_type_crc32.clone();
let radio_button_hash_type_xxh3 = gui_data.main_notebook.radio_button_hash_type_xxh3.clone();
buttons_search_clone.connect_clicked(move |_| {
let included_directories = get_path_buf_from_vector_of_strings(get_string_from_list_store(&tree_view_included_directories));
@ -135,6 +138,17 @@ pub fn connect_button_search(
}
let minimal_file_size = entry_duplicate_minimal_size.get_text().as_str().parse::<u64>().unwrap_or(1024);
let hash_type: HashType;
if radio_button_hash_type_blake3.get_active() {
hash_type = duplicate::HashType::Blake3;
} else if radio_button_hash_type_crc32.get_active() {
hash_type = duplicate::HashType::CRC32;
} else if radio_button_hash_type_xxh3.get_active() {
hash_type = duplicate::HashType::XXH3;
} else {
panic!("No radio button is pressed");
}
let futures_sender_duplicate_files = futures_sender_duplicate_files.clone();
// Find duplicates
thread::spawn(move || {
@ -146,6 +160,7 @@ pub fn connect_button_search(
df.set_allowed_extensions(allowed_extensions);
df.set_minimal_file_size(minimal_file_size);
df.set_check_method(check_method);
df.set_hash_type(hash_type);
df.find_duplicates(Some(&stop_receiver), Some(&futures_sender_duplicate_files));
let _ = glib_stop_sender.send(Message::Duplicates(df));
});

View File

@ -53,6 +53,10 @@ pub struct GUIMainNotebook {
pub radio_button_similar_images_high: gtk::RadioButton,
pub radio_button_similar_images_very_high: gtk::RadioButton,
pub radio_button_hash_type_blake3: gtk::RadioButton,
pub radio_button_hash_type_crc32: gtk::RadioButton,
pub radio_button_hash_type_xxh3: gtk::RadioButton,
pub image_preview_similar_images: gtk::Image,
}
@ -107,6 +111,10 @@ impl GUIMainNotebook {
let radio_button_similar_images_high: gtk::RadioButton = builder.get_object("radio_button_similar_images_high").unwrap();
let radio_button_similar_images_very_high: gtk::RadioButton = builder.get_object("radio_button_similar_images_very_high").unwrap();
let radio_button_hash_type_blake3: gtk::RadioButton = builder.get_object("radio_button_hash_type_blake3").unwrap();
let radio_button_hash_type_crc32: gtk::RadioButton = builder.get_object("radio_button_hash_type_crc32").unwrap();
let radio_button_hash_type_xxh3: gtk::RadioButton = builder.get_object("radio_button_hash_type_xxh3").unwrap();
let image_preview_similar_images: gtk::Image = builder.get_object("image_preview_similar_images").unwrap();
Self {
@ -150,6 +158,9 @@ impl GUIMainNotebook {
radio_button_similar_images_medium,
radio_button_similar_images_high,
radio_button_similar_images_very_high,
radio_button_hash_type_blake3,
radio_button_hash_type_crc32,
radio_button_hash_type_xxh3,
image_preview_similar_images,
}
}