Speedup little entry access in maps and fix missing extension workarounds (#747)

This commit is contained in:
Rafał Mikrut 2022-06-05 08:01:17 +02:00 committed by GitHub
parent ec13f86aee
commit 56fc29fa4f
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
7 changed files with 36 additions and 55 deletions

View File

@ -33,7 +33,6 @@ static WORKAROUNDS: &[(&str, &str)] = &[
("exe", "bck"),
("exe", "com"),
("exe", "cpl"),
("exe", "cpl"),
("exe", "dll"),
("exe", "dll16"),
("exe", "drv"),
@ -48,7 +47,6 @@ static WORKAROUNDS: &[(&str, &str)] = &[
("exe", "orig"),
("exe", "signed"),
("exe", "sys"),
("exe", "sys"),
("exe", "tlb"),
("exe", "vxd"),
("exe", "winmd"),
@ -120,7 +118,7 @@ static WORKAROUNDS: &[(&str, &str)] = &[
("html", "svg"),
("xml", "html"),
// Probably bug in external library
("exe", "doc"), // Not sure whe doc is not recognized
("msi", "doc"), // Not sure whe doc is not recognized
("exe", "xls"), // Not sure whe xls is not recognized
];
@ -318,9 +316,13 @@ impl BadExtensions {
mem::swap(&mut files_to_check, &mut self.files_to_check);
//// PROGRESS THREAD END
let mut hashmap_workarounds: HashMap<&str, &str> = Default::default();
let mut hashmap_workarounds: HashMap<&str, Vec<&str>> = Default::default();
for (proper, found) in WORKAROUNDS {
hashmap_workarounds.insert(found, proper);
// This should be enabled when items will have only 1 possible workaround items
// if hashmap_workarounds.contains_key(found) {
// panic!("Already have {} key", found);
// }
hashmap_workarounds.entry(found).or_insert_with(Vec::new).push(proper);
}
self.bad_extensions_files = files_to_check
@ -378,9 +380,12 @@ impl BadExtensions {
}
// Workarounds
if let Some(pre) = hashmap_workarounds.get(current_extension.as_str()) {
if all_available_extensions.contains(pre) {
all_available_extensions.insert(current_extension.as_str());
if let Some(vec_pre) = hashmap_workarounds.get(current_extension.as_str()) {
for pre in vec_pre {
if all_available_extensions.contains(pre) {
all_available_extensions.insert(current_extension.as_str());
break;
}
}
}
@ -400,7 +405,6 @@ impl BadExtensions {
return Some(None);
} else if current_extension.is_empty() {
if !include_files_without_extension {
println!("Empty extension which is disabled by settings");
return Some(None);
}
} else if all_available_extensions.take(&current_extension.as_str()).is_some() {

View File

@ -313,8 +313,7 @@ impl BigFile {
folders_to_check.extend(segment);
self.text_messages.warnings.extend(warnings);
for (size, fe) in fe_result {
old_map.entry(size).or_insert_with(Vec::new);
old_map.get_mut(&size).unwrap().push(fe);
old_map.entry(size).or_insert_with(Vec::new).push(fe);
}
}
}

View File

@ -568,16 +568,13 @@ impl DuplicateFinder {
let name = file_entry.path.to_string_lossy().to_string();
if !loaded_hash_map2.contains_key(&name) {
// If loaded data doesn't contains current image info
non_cached_files_to_check.entry(file_entry.size).or_insert_with(Vec::new);
non_cached_files_to_check.get_mut(&file_entry.size).unwrap().push(file_entry.clone());
non_cached_files_to_check.entry(file_entry.size).or_insert_with(Vec::new).push(file_entry.clone());
} else if file_entry.size != loaded_hash_map2.get(&name).unwrap().size || file_entry.modified_date != loaded_hash_map2.get(&name).unwrap().modified_date {
// When size or modification date of image changed, then it is clear that is different image
non_cached_files_to_check.entry(file_entry.size).or_insert_with(Vec::new);
non_cached_files_to_check.get_mut(&file_entry.size).unwrap().push(file_entry.clone());
non_cached_files_to_check.entry(file_entry.size).or_insert_with(Vec::new).push(file_entry.clone());
} else {
// Checking may be omitted when already there is entry with same size and modification date
records_already_cached.entry(file_entry.size).or_insert_with(Vec::new);
records_already_cached.get_mut(&file_entry.size).unwrap().push(file_entry.clone());
records_already_cached.entry(file_entry.size).or_insert_with(Vec::new).push(file_entry.clone());
}
}
}
@ -602,8 +599,7 @@ impl DuplicateFinder {
}
match hash_calculation(&mut buffer, file_entry, &check_type, 0) {
Ok(hash_string) => {
hashmap_with_hash.entry(hash_string.clone()).or_insert_with(Vec::new);
hashmap_with_hash.get_mut(hash_string.as_str()).unwrap().push(file_entry.clone());
hashmap_with_hash.entry(hash_string.clone()).or_insert_with(Vec::new).push(file_entry.clone());
}
Err(s) => errors.push(s),
}
@ -624,8 +620,7 @@ impl DuplicateFinder {
// Add data from cache
for (size, vec_file_entry) in &records_already_cached {
pre_checked_map.entry(*size).or_insert_with(Vec::new);
pre_checked_map.get_mut(size).unwrap().append(&mut vec_file_entry.clone());
pre_checked_map.entry(*size).or_insert_with(Vec::new).append(&mut vec_file_entry.clone());
}
// Check results
@ -633,8 +628,7 @@ impl DuplicateFinder {
self.text_messages.warnings.append(&mut errors.clone());
for vec_file_entry in hash_map.values() {
if vec_file_entry.len() > 1 {
pre_checked_map.entry(*size).or_insert_with(Vec::new);
pre_checked_map.get_mut(size).unwrap().append(&mut vec_file_entry.clone());
pre_checked_map.entry(*size).or_insert_with(Vec::new).append(&mut vec_file_entry.clone());
}
}
}
@ -732,16 +726,14 @@ impl DuplicateFinder {
let mut found: bool = false;
for loaded_file_entry in loaded_vec_file_entry {
if file_entry.path == loaded_file_entry.path && file_entry.modified_date == loaded_file_entry.modified_date {
records_already_cached.entry(file_entry.size).or_insert_with(Vec::new);
records_already_cached.get_mut(&file_entry.size).unwrap().push(loaded_file_entry.clone());
records_already_cached.entry(file_entry.size).or_insert_with(Vec::new).push(loaded_file_entry.clone());
found = true;
break;
}
}
if !found {
non_cached_files_to_check.entry(file_entry.size).or_insert_with(Vec::new);
non_cached_files_to_check.get_mut(&file_entry.size).unwrap().push(file_entry);
non_cached_files_to_check.entry(file_entry.size).or_insert_with(Vec::new).push(file_entry);
}
}
}
@ -768,8 +760,7 @@ impl DuplicateFinder {
match hash_calculation(&mut buffer, &file_entry, &check_type, u64::MAX) {
Ok(hash_string) => {
file_entry.hash = hash_string.clone();
hashmap_with_hash.entry(hash_string.clone()).or_insert_with(Vec::new);
hashmap_with_hash.get_mut(hash_string.as_str()).unwrap().push(file_entry);
hashmap_with_hash.entry(hash_string.clone()).or_insert_with(Vec::new).push(file_entry);
}
Err(s) => errors.push(s),
}
@ -785,8 +776,7 @@ impl DuplicateFinder {
for (full_size, full_hashmap, _errors) in &mut full_hash_results {
if size == *full_size {
for file_entry in vec_file_entry {
full_hashmap.entry(file_entry.hash.clone()).or_insert_with(Vec::new);
full_hashmap.get_mut(&file_entry.hash).unwrap().push(file_entry);
full_hashmap.entry(file_entry.hash.clone()).or_insert_with(Vec::new).push(file_entry);
}
continue 'main;
}
@ -794,8 +784,7 @@ impl DuplicateFinder {
// Size doesn't exists add results to files
let mut temp_hashmap: BTreeMap<String, Vec<FileEntry>> = Default::default();
for file_entry in vec_file_entry {
temp_hashmap.entry(file_entry.hash.clone()).or_insert_with(Vec::new);
temp_hashmap.get_mut(&file_entry.hash).unwrap().push(file_entry);
temp_hashmap.entry(file_entry.hash.clone()).or_insert_with(Vec::new).push(file_entry);
}
full_hash_results.push((size, temp_hashmap, Vec::new()));
}
@ -830,8 +819,7 @@ impl DuplicateFinder {
self.text_messages.warnings.append(&mut errors);
for (_hash, vec_file_entry) in hash_map {
if vec_file_entry.len() > 1 {
self.files_with_identical_hashes.entry(size).or_insert_with(Vec::new);
self.files_with_identical_hashes.get_mut(&size).unwrap().push(vec_file_entry);
self.files_with_identical_hashes.entry(size).or_insert_with(Vec::new).push(vec_file_entry);
}
}
}
@ -1374,8 +1362,7 @@ pub fn load_hashes_from_file(text_messages: &mut Messages, delete_outdated_cache
hash: uuu[3].to_string(),
symlink_info: None,
};
hashmap_loaded_entries.entry(file_entry.size).or_insert_with(Vec::new);
hashmap_loaded_entries.get_mut(&file_entry.size).unwrap().push(file_entry);
hashmap_loaded_entries.entry(file_entry.size).or_insert_with(Vec::new).push(file_entry);
}
}

View File

@ -552,8 +552,7 @@ impl SameMusic {
get_approximate_conversion(&mut thing);
}
if !thing.is_empty() {
hash_map.entry(thing.clone()).or_insert_with(Vec::new);
hash_map.get_mut(thing.as_str()).unwrap().push(file_entry);
hash_map.entry(thing.clone()).or_insert_with(Vec::new).push(file_entry);
}
}
for (_title, vec_file_entry) in hash_map {
@ -581,8 +580,7 @@ impl SameMusic {
get_approximate_conversion(&mut thing);
}
if !thing.is_empty() {
hash_map.entry(thing.clone()).or_insert_with(Vec::new);
hash_map.get_mut(thing.as_str()).unwrap().push(file_entry);
hash_map.entry(thing.clone()).or_insert_with(Vec::new).push(file_entry);
}
}
for (_title, vec_file_entry) in hash_map {
@ -607,8 +605,7 @@ impl SameMusic {
for file_entry in vec_file_entry {
let thing = file_entry.year.to_lowercase().trim().to_string();
if !thing.is_empty() {
hash_map.entry(thing.clone()).or_insert_with(Vec::new);
hash_map.get_mut(thing.as_str()).unwrap().push(file_entry);
hash_map.entry(thing.clone()).or_insert_with(Vec::new).push(file_entry);
}
}
for (_title, vec_file_entry) in hash_map {
@ -633,8 +630,7 @@ impl SameMusic {
for file_entry in vec_file_entry {
let thing = file_entry.length.to_lowercase().trim().to_string();
if !thing.is_empty() {
hash_map.entry(thing.clone()).or_insert_with(Vec::new);
hash_map.get_mut(thing.as_str()).unwrap().push(file_entry);
hash_map.entry(thing.clone()).or_insert_with(Vec::new).push(file_entry);
}
}
for (_title, vec_file_entry) in hash_map {
@ -659,8 +655,7 @@ impl SameMusic {
for file_entry in vec_file_entry {
let thing = file_entry.genre.to_lowercase().trim().to_string();
if !thing.is_empty() {
hash_map.entry(thing.clone()).or_insert_with(Vec::new);
hash_map.get_mut(thing.as_str()).unwrap().push(file_entry);
hash_map.entry(thing.clone()).or_insert_with(Vec::new).push(file_entry);
}
}
for (_title, vec_file_entry) in hash_map {
@ -686,8 +681,7 @@ impl SameMusic {
if file_entry.bitrate != 0 {
let thing = file_entry.bitrate.to_string();
if !thing.is_empty() {
hash_map.entry(thing.clone()).or_insert_with(Vec::new);
hash_map.get_mut(thing.as_str()).unwrap().push(file_entry);
hash_map.entry(thing.clone()).or_insert_with(Vec::new).push(file_entry);
}
}
}

View File

@ -631,8 +631,7 @@ impl SimilarImages {
for (file_entry, buf) in &vec_file_entry {
// Only use to comparing, non broken hashes(all 0 or 255 hashes means that algorithm fails to decode them because e.g. contains a log of alpha channel)
if !(buf.is_empty() || buf.iter().all(|e| *e == 0) || buf.iter().all(|e| *e == 255)) {
self.image_hashes.entry(buf.clone()).or_insert_with(Vec::<FileEntry>::new);
self.image_hashes.get_mut(buf).unwrap().push(file_entry.clone());
self.image_hashes.entry(buf.clone()).or_insert_with(Vec::<FileEntry>::new).push(file_entry.clone());
}
}

View File

@ -503,8 +503,7 @@ pub fn tree_remove(
model.remove(&iter);
map_with_path_to_delete.entry(path.clone()).or_insert_with(Vec::new);
map_with_path_to_delete.get_mut(path.as_str()).unwrap().push(file_name);
map_with_path_to_delete.entry(path.clone()).or_insert_with(Vec::new).push(file_name);
}
// Delete duplicated entries, and remove real files

View File

@ -311,8 +311,7 @@ impl LoadSaveStruct {
if line.starts_with("--") {
header = line.to_string();
} else if !header.is_empty() {
self.loaded_items.entry(header.clone()).or_insert_with(Vec::new);
self.loaded_items.get_mut(&header).unwrap().push(line.to_string());
self.loaded_items.entry(header.clone()).or_insert_with(Vec::new).push(line.to_string());
} else {
add_text_to_text_view(
text_view_errors,