From 56fc29fa4f3f212776d728b0cee1a5e49bcb33df Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Rafa=C5=82=20Mikrut?= <41945903+qarmin@users.noreply.github.com> Date: Sun, 5 Jun 2022 08:01:17 +0200 Subject: [PATCH] Speedup little entry access in maps and fix missing extension workarounds (#747) --- czkawka_core/src/bad_extensions.rs | 22 ++++++----- czkawka_core/src/big_file.rs | 3 +- czkawka_core/src/duplicate.rs | 39 +++++++------------ czkawka_core/src/same_music.rs | 18 +++------ czkawka_core/src/similar_images.rs | 3 +- .../connect_things/connect_button_delete.rs | 3 +- czkawka_gui/src/saving_loading.rs | 3 +- 7 files changed, 36 insertions(+), 55 deletions(-) diff --git a/czkawka_core/src/bad_extensions.rs b/czkawka_core/src/bad_extensions.rs index fea0251..a5cc195 100644 --- a/czkawka_core/src/bad_extensions.rs +++ b/czkawka_core/src/bad_extensions.rs @@ -33,7 +33,6 @@ static WORKAROUNDS: &[(&str, &str)] = &[ ("exe", "bck"), ("exe", "com"), ("exe", "cpl"), - ("exe", "cpl"), ("exe", "dll"), ("exe", "dll16"), ("exe", "drv"), @@ -48,7 +47,6 @@ static WORKAROUNDS: &[(&str, &str)] = &[ ("exe", "orig"), ("exe", "signed"), ("exe", "sys"), - ("exe", "sys"), ("exe", "tlb"), ("exe", "vxd"), ("exe", "winmd"), @@ -120,7 +118,7 @@ static WORKAROUNDS: &[(&str, &str)] = &[ ("html", "svg"), ("xml", "html"), // Probably bug in external library - ("exe", "doc"), // Not sure whe doc is not recognized + ("msi", "doc"), // Not sure whe doc is not recognized ("exe", "xls"), // Not sure whe xls is not recognized ]; @@ -318,9 +316,13 @@ impl BadExtensions { mem::swap(&mut files_to_check, &mut self.files_to_check); //// PROGRESS THREAD END - let mut hashmap_workarounds: HashMap<&str, &str> = Default::default(); + let mut hashmap_workarounds: HashMap<&str, Vec<&str>> = Default::default(); for (proper, found) in WORKAROUNDS { - hashmap_workarounds.insert(found, proper); + // This should be enabled when items will have only 1 possible workaround items + // if hashmap_workarounds.contains_key(found) { + // panic!("Already have {} key", found); + // } + hashmap_workarounds.entry(found).or_insert_with(Vec::new).push(proper); } self.bad_extensions_files = files_to_check @@ -378,9 +380,12 @@ impl BadExtensions { } // Workarounds - if let Some(pre) = hashmap_workarounds.get(current_extension.as_str()) { - if all_available_extensions.contains(pre) { - all_available_extensions.insert(current_extension.as_str()); + if let Some(vec_pre) = hashmap_workarounds.get(current_extension.as_str()) { + for pre in vec_pre { + if all_available_extensions.contains(pre) { + all_available_extensions.insert(current_extension.as_str()); + break; + } } } @@ -400,7 +405,6 @@ impl BadExtensions { return Some(None); } else if current_extension.is_empty() { if !include_files_without_extension { - println!("Empty extension which is disabled by settings"); return Some(None); } } else if all_available_extensions.take(¤t_extension.as_str()).is_some() { diff --git a/czkawka_core/src/big_file.rs b/czkawka_core/src/big_file.rs index ca0deaa..6824fa8 100644 --- a/czkawka_core/src/big_file.rs +++ b/czkawka_core/src/big_file.rs @@ -313,8 +313,7 @@ impl BigFile { folders_to_check.extend(segment); self.text_messages.warnings.extend(warnings); for (size, fe) in fe_result { - old_map.entry(size).or_insert_with(Vec::new); - old_map.get_mut(&size).unwrap().push(fe); + old_map.entry(size).or_insert_with(Vec::new).push(fe); } } } diff --git a/czkawka_core/src/duplicate.rs b/czkawka_core/src/duplicate.rs index 33edac9..cdadcf2 100644 --- a/czkawka_core/src/duplicate.rs +++ b/czkawka_core/src/duplicate.rs @@ -568,16 +568,13 @@ impl DuplicateFinder { let name = file_entry.path.to_string_lossy().to_string(); if !loaded_hash_map2.contains_key(&name) { // If loaded data doesn't contains current image info - non_cached_files_to_check.entry(file_entry.size).or_insert_with(Vec::new); - non_cached_files_to_check.get_mut(&file_entry.size).unwrap().push(file_entry.clone()); + non_cached_files_to_check.entry(file_entry.size).or_insert_with(Vec::new).push(file_entry.clone()); } else if file_entry.size != loaded_hash_map2.get(&name).unwrap().size || file_entry.modified_date != loaded_hash_map2.get(&name).unwrap().modified_date { // When size or modification date of image changed, then it is clear that is different image - non_cached_files_to_check.entry(file_entry.size).or_insert_with(Vec::new); - non_cached_files_to_check.get_mut(&file_entry.size).unwrap().push(file_entry.clone()); + non_cached_files_to_check.entry(file_entry.size).or_insert_with(Vec::new).push(file_entry.clone()); } else { // Checking may be omitted when already there is entry with same size and modification date - records_already_cached.entry(file_entry.size).or_insert_with(Vec::new); - records_already_cached.get_mut(&file_entry.size).unwrap().push(file_entry.clone()); + records_already_cached.entry(file_entry.size).or_insert_with(Vec::new).push(file_entry.clone()); } } } @@ -602,8 +599,7 @@ impl DuplicateFinder { } match hash_calculation(&mut buffer, file_entry, &check_type, 0) { Ok(hash_string) => { - hashmap_with_hash.entry(hash_string.clone()).or_insert_with(Vec::new); - hashmap_with_hash.get_mut(hash_string.as_str()).unwrap().push(file_entry.clone()); + hashmap_with_hash.entry(hash_string.clone()).or_insert_with(Vec::new).push(file_entry.clone()); } Err(s) => errors.push(s), } @@ -624,8 +620,7 @@ impl DuplicateFinder { // Add data from cache for (size, vec_file_entry) in &records_already_cached { - pre_checked_map.entry(*size).or_insert_with(Vec::new); - pre_checked_map.get_mut(size).unwrap().append(&mut vec_file_entry.clone()); + pre_checked_map.entry(*size).or_insert_with(Vec::new).append(&mut vec_file_entry.clone()); } // Check results @@ -633,8 +628,7 @@ impl DuplicateFinder { self.text_messages.warnings.append(&mut errors.clone()); for vec_file_entry in hash_map.values() { if vec_file_entry.len() > 1 { - pre_checked_map.entry(*size).or_insert_with(Vec::new); - pre_checked_map.get_mut(size).unwrap().append(&mut vec_file_entry.clone()); + pre_checked_map.entry(*size).or_insert_with(Vec::new).append(&mut vec_file_entry.clone()); } } } @@ -732,16 +726,14 @@ impl DuplicateFinder { let mut found: bool = false; for loaded_file_entry in loaded_vec_file_entry { if file_entry.path == loaded_file_entry.path && file_entry.modified_date == loaded_file_entry.modified_date { - records_already_cached.entry(file_entry.size).or_insert_with(Vec::new); - records_already_cached.get_mut(&file_entry.size).unwrap().push(loaded_file_entry.clone()); + records_already_cached.entry(file_entry.size).or_insert_with(Vec::new).push(loaded_file_entry.clone()); found = true; break; } } if !found { - non_cached_files_to_check.entry(file_entry.size).or_insert_with(Vec::new); - non_cached_files_to_check.get_mut(&file_entry.size).unwrap().push(file_entry); + non_cached_files_to_check.entry(file_entry.size).or_insert_with(Vec::new).push(file_entry); } } } @@ -768,8 +760,7 @@ impl DuplicateFinder { match hash_calculation(&mut buffer, &file_entry, &check_type, u64::MAX) { Ok(hash_string) => { file_entry.hash = hash_string.clone(); - hashmap_with_hash.entry(hash_string.clone()).or_insert_with(Vec::new); - hashmap_with_hash.get_mut(hash_string.as_str()).unwrap().push(file_entry); + hashmap_with_hash.entry(hash_string.clone()).or_insert_with(Vec::new).push(file_entry); } Err(s) => errors.push(s), } @@ -785,8 +776,7 @@ impl DuplicateFinder { for (full_size, full_hashmap, _errors) in &mut full_hash_results { if size == *full_size { for file_entry in vec_file_entry { - full_hashmap.entry(file_entry.hash.clone()).or_insert_with(Vec::new); - full_hashmap.get_mut(&file_entry.hash).unwrap().push(file_entry); + full_hashmap.entry(file_entry.hash.clone()).or_insert_with(Vec::new).push(file_entry); } continue 'main; } @@ -794,8 +784,7 @@ impl DuplicateFinder { // Size doesn't exists add results to files let mut temp_hashmap: BTreeMap> = Default::default(); for file_entry in vec_file_entry { - temp_hashmap.entry(file_entry.hash.clone()).or_insert_with(Vec::new); - temp_hashmap.get_mut(&file_entry.hash).unwrap().push(file_entry); + temp_hashmap.entry(file_entry.hash.clone()).or_insert_with(Vec::new).push(file_entry); } full_hash_results.push((size, temp_hashmap, Vec::new())); } @@ -830,8 +819,7 @@ impl DuplicateFinder { self.text_messages.warnings.append(&mut errors); for (_hash, vec_file_entry) in hash_map { if vec_file_entry.len() > 1 { - self.files_with_identical_hashes.entry(size).or_insert_with(Vec::new); - self.files_with_identical_hashes.get_mut(&size).unwrap().push(vec_file_entry); + self.files_with_identical_hashes.entry(size).or_insert_with(Vec::new).push(vec_file_entry); } } } @@ -1374,8 +1362,7 @@ pub fn load_hashes_from_file(text_messages: &mut Messages, delete_outdated_cache hash: uuu[3].to_string(), symlink_info: None, }; - hashmap_loaded_entries.entry(file_entry.size).or_insert_with(Vec::new); - hashmap_loaded_entries.get_mut(&file_entry.size).unwrap().push(file_entry); + hashmap_loaded_entries.entry(file_entry.size).or_insert_with(Vec::new).push(file_entry); } } diff --git a/czkawka_core/src/same_music.rs b/czkawka_core/src/same_music.rs index 1e581f8..18c3858 100644 --- a/czkawka_core/src/same_music.rs +++ b/czkawka_core/src/same_music.rs @@ -552,8 +552,7 @@ impl SameMusic { get_approximate_conversion(&mut thing); } if !thing.is_empty() { - hash_map.entry(thing.clone()).or_insert_with(Vec::new); - hash_map.get_mut(thing.as_str()).unwrap().push(file_entry); + hash_map.entry(thing.clone()).or_insert_with(Vec::new).push(file_entry); } } for (_title, vec_file_entry) in hash_map { @@ -581,8 +580,7 @@ impl SameMusic { get_approximate_conversion(&mut thing); } if !thing.is_empty() { - hash_map.entry(thing.clone()).or_insert_with(Vec::new); - hash_map.get_mut(thing.as_str()).unwrap().push(file_entry); + hash_map.entry(thing.clone()).or_insert_with(Vec::new).push(file_entry); } } for (_title, vec_file_entry) in hash_map { @@ -607,8 +605,7 @@ impl SameMusic { for file_entry in vec_file_entry { let thing = file_entry.year.to_lowercase().trim().to_string(); if !thing.is_empty() { - hash_map.entry(thing.clone()).or_insert_with(Vec::new); - hash_map.get_mut(thing.as_str()).unwrap().push(file_entry); + hash_map.entry(thing.clone()).or_insert_with(Vec::new).push(file_entry); } } for (_title, vec_file_entry) in hash_map { @@ -633,8 +630,7 @@ impl SameMusic { for file_entry in vec_file_entry { let thing = file_entry.length.to_lowercase().trim().to_string(); if !thing.is_empty() { - hash_map.entry(thing.clone()).or_insert_with(Vec::new); - hash_map.get_mut(thing.as_str()).unwrap().push(file_entry); + hash_map.entry(thing.clone()).or_insert_with(Vec::new).push(file_entry); } } for (_title, vec_file_entry) in hash_map { @@ -659,8 +655,7 @@ impl SameMusic { for file_entry in vec_file_entry { let thing = file_entry.genre.to_lowercase().trim().to_string(); if !thing.is_empty() { - hash_map.entry(thing.clone()).or_insert_with(Vec::new); - hash_map.get_mut(thing.as_str()).unwrap().push(file_entry); + hash_map.entry(thing.clone()).or_insert_with(Vec::new).push(file_entry); } } for (_title, vec_file_entry) in hash_map { @@ -686,8 +681,7 @@ impl SameMusic { if file_entry.bitrate != 0 { let thing = file_entry.bitrate.to_string(); if !thing.is_empty() { - hash_map.entry(thing.clone()).or_insert_with(Vec::new); - hash_map.get_mut(thing.as_str()).unwrap().push(file_entry); + hash_map.entry(thing.clone()).or_insert_with(Vec::new).push(file_entry); } } } diff --git a/czkawka_core/src/similar_images.rs b/czkawka_core/src/similar_images.rs index 7fb0551..2c81090 100644 --- a/czkawka_core/src/similar_images.rs +++ b/czkawka_core/src/similar_images.rs @@ -631,8 +631,7 @@ impl SimilarImages { for (file_entry, buf) in &vec_file_entry { // Only use to comparing, non broken hashes(all 0 or 255 hashes means that algorithm fails to decode them because e.g. contains a log of alpha channel) if !(buf.is_empty() || buf.iter().all(|e| *e == 0) || buf.iter().all(|e| *e == 255)) { - self.image_hashes.entry(buf.clone()).or_insert_with(Vec::::new); - self.image_hashes.get_mut(buf).unwrap().push(file_entry.clone()); + self.image_hashes.entry(buf.clone()).or_insert_with(Vec::::new).push(file_entry.clone()); } } diff --git a/czkawka_gui/src/connect_things/connect_button_delete.rs b/czkawka_gui/src/connect_things/connect_button_delete.rs index 5f48578..cf79709 100644 --- a/czkawka_gui/src/connect_things/connect_button_delete.rs +++ b/czkawka_gui/src/connect_things/connect_button_delete.rs @@ -503,8 +503,7 @@ pub fn tree_remove( model.remove(&iter); - map_with_path_to_delete.entry(path.clone()).or_insert_with(Vec::new); - map_with_path_to_delete.get_mut(path.as_str()).unwrap().push(file_name); + map_with_path_to_delete.entry(path.clone()).or_insert_with(Vec::new).push(file_name); } // Delete duplicated entries, and remove real files diff --git a/czkawka_gui/src/saving_loading.rs b/czkawka_gui/src/saving_loading.rs index c7f2bd0..8de7b40 100644 --- a/czkawka_gui/src/saving_loading.rs +++ b/czkawka_gui/src/saving_loading.rs @@ -311,8 +311,7 @@ impl LoadSaveStruct { if line.starts_with("--") { header = line.to_string(); } else if !header.is_empty() { - self.loaded_items.entry(header.clone()).or_insert_with(Vec::new); - self.loaded_items.get_mut(&header).unwrap().push(line.to_string()); + self.loaded_items.entry(header.clone()).or_insert_with(Vec::new).push(line.to_string()); } else { add_text_to_text_view( text_view_errors,