Remove HashMB mode (#476)

* Remove HashMB mode

* Add some explanation and remove this from GUI

* Not needing to handle everything
This commit is contained in:
Rafał Mikrut 2021-12-01 12:37:17 +01:00 committed by GitHub
parent 8c4c67e26f
commit 51271dcdf0
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
10 changed files with 67 additions and 82 deletions

View File

@ -10,7 +10,7 @@ use czkawka_core::similar_images::SimilarityPreset;
#[derive(Debug, StructOpt)]
#[structopt(name = "czkawka", help_message = HELP_MESSAGE, template = HELP_TEMPLATE)]
pub enum Commands {
#[structopt(name = "dup", about = "Finds duplicate files", help_message = HELP_MESSAGE, after_help = "EXAMPLE:\n czkawka dup -d /home/rafal -e /home/rafal/Obrazy -m 25 -x 7z rar IMAGE -s hashmb -f results.txt -D aeo")]
#[structopt(name = "dup", about = "Finds duplicate files", help_message = HELP_MESSAGE, after_help = "EXAMPLE:\n czkawka dup -d /home/rafal -e /home/rafal/Obrazy -m 25 -x 7z rar IMAGE -s hash -f results.txt -D aeo")]
Duplicates {
#[structopt(flatten)]
directories: Directories,
@ -305,7 +305,6 @@ fn parse_checking_method(src: &str) -> Result<CheckingMethod, &'static str> {
"name" => Ok(CheckingMethod::Name),
"size" => Ok(CheckingMethod::Size),
"hash" => Ok(CheckingMethod::Hash),
"hashmb" => Ok(CheckingMethod::HashMb),
_ => Err("Couldn't parse the search method (allowed: NAME, SIZE, HASH, HASHMB)"),
}
}
@ -440,7 +439,7 @@ SUBCOMMANDS:
try "{usage} -h" to get more info about a specific tool
EXAMPLES:
{bin} dup -d /home/rafal -e /home/rafal/Obrazy -m 25 -x 7z rar IMAGE -s hashmb -f results.txt -D aeo
{bin} dup -d /home/rafal -e /home/rafal/Obrazy -m 25 -x 7z rar IMAGE -s hash -f results.txt -D aeo
{bin} empty-folders -d /home/rafal/rr /home/gateway -f results.txt
{bin} big -d /home/rafal/ /home/piszczal -e /home/rafal/Roman -n 25 -x VIDEO -f results.txt
{bin} empty-files -d /home/rafal /home/szczekacz -e /home/rafal/Pulpit -R -f results.txt

View File

@ -27,8 +27,6 @@ use crate::common_items::ExcludedItems;
use crate::common_messages::Messages;
use crate::common_traits::*;
const HASH_MB_LIMIT_BYTES: u64 = 1024 * 1024; // 1MB
#[derive(Debug)]
pub struct ProgressData {
pub checking_method: CheckingMethod,
@ -44,7 +42,6 @@ pub enum CheckingMethod {
Name,
Size,
Hash,
HashMb,
}
impl MyHasher for blake3::Hasher {
@ -198,7 +195,7 @@ impl DuplicateFinder {
return;
}
}
CheckingMethod::HashMb | CheckingMethod::Hash => {
CheckingMethod::Hash => {
if !self.check_files_size(stop_receiver, progress_sender) {
self.stopped_search = true;
return;
@ -499,7 +496,7 @@ impl DuplicateFinder {
let checking_method = self.check_method.clone();
let max_stage = match self.check_method {
CheckingMethod::Size => 0,
CheckingMethod::HashMb | CheckingMethod::Hash => 2,
CheckingMethod::Hash => 2,
_ => 255,
};
progress_thread_handle = thread::spawn(move || loop {
@ -796,35 +793,6 @@ impl DuplicateFinder {
let mut full_hash_results: Vec<(u64, BTreeMap<String, Vec<FileEntry>>, Vec<String>, u64)>;
match self.check_method {
CheckingMethod::HashMb => {
full_hash_results = pre_checked_map
.par_iter()
.map(|(size, vec_file_entry)| {
let mut hashmap_with_hash: BTreeMap<String, Vec<FileEntry>> = Default::default();
let mut errors: Vec<String> = Vec::new();
let mut bytes_read: u64 = 0;
let mut buffer = [0u8; 1024 * 128];
atomic_file_counter.fetch_add(vec_file_entry.len(), Ordering::Relaxed);
for file_entry in vec_file_entry {
if stop_receiver.is_some() && stop_receiver.unwrap().try_recv().is_ok() {
check_was_breaked.store(true, Ordering::Relaxed);
return None;
}
match hash_calculation(&mut buffer, file_entry, &check_type, HASH_MB_LIMIT_BYTES) {
Ok((hash_string, bytes)) => {
bytes_read += bytes;
hashmap_with_hash.entry(hash_string.to_string()).or_insert_with(Vec::new);
hashmap_with_hash.get_mut(hash_string.as_str()).unwrap().push(file_entry.to_owned());
}
Err(s) => errors.push(s),
}
}
Some((*size, hashmap_with_hash, errors, bytes_read))
})
.while_some()
.collect();
}
CheckingMethod::Hash => {
let loaded_hash_map;
@ -832,7 +800,7 @@ impl DuplicateFinder {
let mut non_cached_files_to_check: BTreeMap<u64, Vec<FileEntry>> = Default::default();
if self.use_cache {
loaded_hash_map = match load_hashes_from_file(&mut self.text_messages, self.delete_outdated_cache, &self.hash_type) {
loaded_hash_map = match load_hashes_from_file(&mut self.text_messages, self.delete_outdated_cache, &self.hash_type, false) {
Some(t) => t,
None => Default::default(),
};
@ -934,7 +902,7 @@ impl DuplicateFinder {
}
}
}
save_hashes_to_file(&all_results, &mut self.text_messages, &self.hash_type, self.minimal_cache_file_size);
save_hashes_to_file(&all_results, &mut self.text_messages, &self.hash_type, false, self.minimal_cache_file_size);
}
}
_ => panic!("What"),
@ -995,7 +963,7 @@ impl DuplicateFinder {
self.information.number_of_failed_to_remove_files += tuple.2;
}
}
CheckingMethod::Hash | CheckingMethod::HashMb => {
CheckingMethod::Hash => {
for vector_vectors in self.files_with_identical_hashes.values() {
for vector in vector_vectors.iter() {
let tuple: (u64, usize, usize) = delete_files(vector, &self.delete_method, &mut self.text_messages, self.dryrun);
@ -1154,7 +1122,7 @@ impl SaveResults for DuplicateFinder {
write!(writer, "Not found any duplicates.").unwrap();
}
}
CheckingMethod::Hash | CheckingMethod::HashMb => {
CheckingMethod::Hash => {
if !self.files_with_identical_hashes.is_empty() {
writeln!(writer, "-------------------------------------------------Files with same hashes-------------------------------------------------").unwrap();
writeln!(
@ -1209,7 +1177,7 @@ impl PrintResults for DuplicateFinder {
println!();
}
}
CheckingMethod::Hash | CheckingMethod::HashMb => {
CheckingMethod::Hash => {
for (_size, vector) in self.files_with_identical_hashes.iter() {
for j in vector {
number_of_files += j.len() as u64;
@ -1354,7 +1322,7 @@ pub fn make_hard_link(src: &Path, dst: &Path) -> io::Result<()> {
result
}
pub fn save_hashes_to_file(hashmap: &BTreeMap<String, FileEntry>, text_messages: &mut Messages, type_of_hash: &HashType, minimal_cache_file_size: u64) {
pub fn save_hashes_to_file(hashmap: &BTreeMap<String, FileEntry>, text_messages: &mut Messages, type_of_hash: &HashType, is_prehash: bool, minimal_cache_file_size: u64) {
if let Some(proj_dirs) = ProjectDirs::from("pl", "Qarmin", "Czkawka") {
let cache_dir = PathBuf::from(proj_dirs.cache_dir());
if cache_dir.exists() {
@ -1366,7 +1334,7 @@ pub fn save_hashes_to_file(hashmap: &BTreeMap<String, FileEntry>, text_messages:
text_messages.messages.push(format!("Cannot create config dir {}, reason {}", cache_dir.display(), e));
return;
}
let cache_file = cache_dir.join(get_file_hash_name(type_of_hash).as_str());
let cache_file = cache_dir.join(get_file_hash_name(type_of_hash, is_prehash).as_str());
let file_handler = match OpenOptions::new().truncate(true).write(true).create(true).open(&cache_file) {
Ok(t) => t,
Err(e) => {
@ -1419,14 +1387,15 @@ fn hash_calculation(buffer: &mut [u8], file_entry: &FileEntry, hash_type: &HashT
Ok((hasher.finalize(), current_file_read_bytes))
}
fn get_file_hash_name(type_of_hash: &HashType) -> String {
format!("cache_duplicates_{:?}.txt", type_of_hash)
fn get_file_hash_name(type_of_hash: &HashType, is_prehash: bool) -> String {
let prehash_str = if is_prehash { "_prehash" } else { "" };
format!("cache_duplicates_{:?}{}.txt", type_of_hash, prehash_str)
}
pub fn load_hashes_from_file(text_messages: &mut Messages, delete_outdated_cache: bool, type_of_hash: &HashType) -> Option<BTreeMap<u64, Vec<FileEntry>>> {
pub fn load_hashes_from_file(text_messages: &mut Messages, delete_outdated_cache: bool, type_of_hash: &HashType, is_prehash: bool) -> Option<BTreeMap<u64, Vec<FileEntry>>> {
if let Some(proj_dirs) = ProjectDirs::from("pl", "Qarmin", "Czkawka") {
let cache_dir = PathBuf::from(proj_dirs.cache_dir());
let cache_file = cache_dir.join(get_file_hash_name(type_of_hash).as_str());
let cache_file = cache_dir.join(get_file_hash_name(type_of_hash, is_prehash).as_str());
let file_handler = match OpenOptions::new().read(true).open(&cache_file) {
Ok(t) => t,
Err(_inspected) => {

View File

@ -93,7 +93,7 @@ pub fn connect_compute_results(gui_data: &GuiData, glib_stop_receiver: Receiver<
duplicates_group = information.number_of_groups_by_name;
entry_info.set_text(format!("Found {} files in {} groups which have same names.", duplicates_number, duplicates_group).as_str());
}
CheckingMethod::Hash | CheckingMethod::HashMb => {
CheckingMethod::Hash => {
duplicates_number = information.number_of_duplicated_files_by_hash;
duplicates_size = information.lost_space_by_hash;
duplicates_group = information.number_of_groups_by_hash;
@ -164,7 +164,7 @@ pub fn connect_compute_results(gui_data: &GuiData, glib_stop_receiver: Receiver<
}
}
}
CheckingMethod::Hash | CheckingMethod::HashMb => {
CheckingMethod::Hash => {
let btreemap = df.get_files_sorted_by_hash();
for (size, vectors_vector) in btreemap.iter().rev() {

View File

@ -52,7 +52,6 @@ pub fn connect_button_search(
let buttons_names = gui_data.bottom_buttons.buttons_names.clone();
let radio_button_duplicates_name = gui_data.main_notebook.radio_button_duplicates_name.clone();
let radio_button_duplicates_size = gui_data.main_notebook.radio_button_duplicates_size.clone();
let radio_button_duplicates_hashmb = gui_data.main_notebook.radio_button_duplicates_hashmb.clone();
let radio_button_duplicates_hash = gui_data.main_notebook.radio_button_duplicates_hash.clone();
let scale_similarity_similar_images = gui_data.main_notebook.scale_similarity_similar_images.clone();
let scale_similarity_similar_videos = gui_data.main_notebook.scale_similarity_similar_videos.clone();
@ -152,8 +151,6 @@ pub fn connect_button_search(
check_method = duplicate::CheckingMethod::Name;
} else if radio_button_duplicates_size.is_active() {
check_method = duplicate::CheckingMethod::Size;
} else if radio_button_duplicates_hashmb.is_active() {
check_method = duplicate::CheckingMethod::HashMb;
} else if radio_button_duplicates_hash.is_active() {
check_method = duplicate::CheckingMethod::Hash;
} else {

View File

@ -0,0 +1,21 @@
use gtk::prelude::*;
use crate::gui_data::GuiData;
pub fn connect_duplicate_buttons(gui_data: &GuiData) {
let radio_button_duplicates_hash = gui_data.main_notebook.radio_button_duplicates_hash.clone();
let radio_button_hash_type_blake3 = gui_data.main_notebook.radio_button_hash_type_blake3.clone();
let radio_button_hash_type_xxh3 = gui_data.main_notebook.radio_button_hash_type_xxh3.clone();
let radio_button_hash_type_crc32 = gui_data.main_notebook.radio_button_hash_type_crc32.clone();
radio_button_duplicates_hash.connect_toggled(move |radio_button_duplicates_hash| {
if radio_button_duplicates_hash.is_active() {
radio_button_hash_type_blake3.set_sensitive(true);
radio_button_hash_type_xxh3.set_sensitive(true);
radio_button_hash_type_crc32.set_sensitive(true);
} else {
radio_button_hash_type_blake3.set_sensitive(false);
radio_button_hash_type_xxh3.set_sensitive(false);
radio_button_hash_type_crc32.set_sensitive(false);
}
});
}

View File

@ -33,7 +33,7 @@ pub fn connect_progress_window(
let future = async move {
while let Some(item) = futures_receiver_duplicate_files.next().await {
match item.checking_method {
duplicate::CheckingMethod::Hash | duplicate::CheckingMethod::HashMb => {
duplicate::CheckingMethod::Hash => {
label_stage.show();
match item.current_stage {
// Checking Size

View File

@ -102,20 +102,28 @@ pub fn connect_settings(gui_data: &GuiData) {
dialog.connect_response(move |dialog, response_type| {
if response_type == ResponseType::Ok {
let mut messages: Messages = Messages::new();
for type_of_hash in [HashType::Xxh3, HashType::Blake3, HashType::Crc32].iter() {
if let Some(cache_entries) = czkawka_core::duplicate::load_hashes_from_file(&mut messages, true, type_of_hash) {
let mut hashmap_to_save: BTreeMap<String, czkawka_core::duplicate::FileEntry> = Default::default();
for (_, vec_file_entry) in cache_entries {
for file_entry in vec_file_entry {
hashmap_to_save.insert(file_entry.path.to_string_lossy().to_string(), file_entry);
for use_prehash in [true, false] {
for type_of_hash in [HashType::Xxh3, HashType::Blake3, HashType::Crc32].iter() {
if let Some(cache_entries) = czkawka_core::duplicate::load_hashes_from_file(&mut messages, true, type_of_hash, use_prehash) {
let mut hashmap_to_save: BTreeMap<String, czkawka_core::duplicate::FileEntry> = Default::default();
for (_, vec_file_entry) in cache_entries {
for file_entry in vec_file_entry {
hashmap_to_save.insert(file_entry.path.to_string_lossy().to_string(), file_entry);
}
}
czkawka_core::duplicate::save_hashes_to_file(
&hashmap_to_save,
&mut messages,
type_of_hash,
use_prehash,
entry_settings_cache_file_minimal_size.text().as_str().parse::<u64>().unwrap_or(2 * 1024 * 1024),
)
}
czkawka_core::duplicate::save_hashes_to_file(&hashmap_to_save, &mut messages, type_of_hash, entry_settings_cache_file_minimal_size.text().as_str().parse::<u64>().unwrap_or(2 * 1024 * 1024))
}
}
messages.messages.push("Properly cleared cache".to_string());
text_view_errors.buffer().unwrap().set_text(messages.create_messages_text().as_str());
messages.messages.push("Properly cleared cache".to_string());
text_view_errors.buffer().unwrap().set_text(messages.create_messages_text().as_str());
}
}
dialog.close();
});

View File

@ -62,7 +62,6 @@ pub struct GuiMainNotebook {
// Duplicates
pub radio_button_duplicates_name: gtk::RadioButton,
pub radio_button_duplicates_size: gtk::RadioButton,
pub radio_button_duplicates_hashmb: gtk::RadioButton,
pub radio_button_duplicates_hash: gtk::RadioButton,
pub scale_similarity_similar_images: gtk::Scale,
@ -173,9 +172,14 @@ impl GuiMainNotebook {
//// Radio Buttons
let radio_button_duplicates_name: gtk::RadioButton = builder.object("radio_button_duplicates_name").unwrap();
let radio_button_duplicates_size: gtk::RadioButton = builder.object("radio_button_duplicates_size").unwrap();
let radio_button_duplicates_hashmb: gtk::RadioButton = builder.object("radio_button_duplicates_hashmb").unwrap();
let radio_button_duplicates_hash: gtk::RadioButton = builder.object("radio_button_duplicates_hash").unwrap();
radio_button_duplicates_name.set_tooltip_text(Some("Finds files which have same name.\n\nThis mode not checking what file contain inside, so be carefully when using it."));
radio_button_duplicates_size.set_tooltip_text(Some("Finds files which have same size.\n\nThis mode not checking what file contain inside, so be carefully when using it."));
radio_button_duplicates_hash.set_tooltip_text(Some(
"Finds files which have the same content.\n\nThis mode hashes file and later compare this hashes to find duplicates.\n\nTool heavily uses cache, so second and further scans of same data should be a lot of faster that first.",
));
let scale_similarity_similar_images: gtk::Scale = builder.object("scale_similarity_similar_images").unwrap();
let scale_similarity_similar_videos: gtk::Scale = builder.object("scale_similarity_similar_videos").unwrap();
@ -248,7 +252,6 @@ impl GuiMainNotebook {
check_button_music_year,
radio_button_duplicates_name,
radio_button_duplicates_size,
radio_button_duplicates_hashmb,
radio_button_duplicates_hash,
scale_similarity_similar_images,
scale_similarity_similar_videos,

View File

@ -16,6 +16,7 @@ use crate::connect_button_save::*;
use crate::connect_button_search::*;
use crate::connect_button_select::*;
use crate::connect_button_stop::*;
use crate::connect_duplicate_buttons::*;
use crate::connect_header_buttons::*;
use crate::connect_hide_text_view_errors::*;
use crate::connect_notebook_tabs::*;
@ -38,6 +39,7 @@ mod connect_button_save;
mod connect_button_search;
mod connect_button_select;
mod connect_button_stop;
mod connect_duplicate_buttons;
mod connect_header_buttons;
mod connect_hide_text_view_errors;
mod connect_notebook_tabs;
@ -119,6 +121,7 @@ fn main() {
connect_button_stop(&gui_data);
connect_button_hardlink_symlink(&gui_data);
connect_button_move(&gui_data);
connect_duplicate_buttons(&gui_data);
connect_notebook_tabs(&gui_data);
connect_selection_of_directories(&gui_data);
connect_popovers(&gui_data);

View File

@ -684,21 +684,6 @@ Author: Rafał Mikrut
<property name="position">1</property>
</packing>
</child>
<child>
<object class="GtkRadioButton" id="radio_button_duplicates_hashmb">
<property name="label" translatable="yes">HashMb</property>
<property name="visible">True</property>
<property name="can-focus">True</property>
<property name="receives-default">False</property>
<property name="draw-indicator">True</property>
<property name="group">radio_button_duplicates_hash</property>
</object>
<packing>
<property name="expand">False</property>
<property name="fill">True</property>
<property name="position">2</property>
</packing>
</child>
<child>
<object class="GtkRadioButton" id="radio_button_duplicates_size">
<property name="label" translatable="yes">Size</property>