Fix problem with not proper loading cached results in duplicate mode (#1086)
* Fix problem with not proper loading cached results in duplicate mode * Dbg
This commit is contained in:
parent
0462324607
commit
e50d930683
12
Changelog.md
12
Changelog.md
|
@ -1,9 +1,17 @@
|
|||
## Version 6.1.0 - ?
|
||||
- BREAKING CHANGE - Changed cache saving method, deduplicated, optimized and simplified procedure - [#1072](https://github.com/qarmin/czkawka/pull/1072)
|
||||
- BREAKING CHANGE - Changed cache saving method, deduplicated, optimized and simplified procedure(all files needs to be hashed again) - [#1072](https://github.com/qarmin/czkawka/pull/1072)
|
||||
- Remove up to 170ms of delay after ending scan - [#1070](https://github.com/qarmin/czkawka/pull/1070)
|
||||
- Added logger with useful info when debugging app (level can be adjusted via e.g. `RUST_LOG=debug` env) - [#1072](https://github.com/qarmin/czkawka/pull/1072), [#1070](https://github.com/qarmin/czkawka/pull/1070)
|
||||
- Core code cleanup - [#1072](https://github.com/qarmin/czkawka/pull/1072), [#1070](https://github.com/qarmin/czkawka/pull/1070)
|
||||
- Core code cleanup - [#1072](https://github.com/qarmin/czkawka/pull/1072), [#1070](https://github.com/qarmin/czkawka/pull/1070), [#1082](https://github.com/qarmin/czkawka/pull/1082)
|
||||
- Updated list of bad extensions and support for finding invalid jar files - [#1070](https://github.com/qarmin/czkawka/pull/1070)
|
||||
- More default excluded items on Windows(like pagefile) - [#1074](https://github.com/qarmin/czkawka/pull/1074)
|
||||
- Unified printing/saving method to files/terminal and fixed some differences/bugs - [#1082](https://github.com/qarmin/czkawka/pull/1082)
|
||||
- Uses fun_time library to print how much functions take time - [#1082](https://github.com/qarmin/czkawka/pull/1082)
|
||||
- Added exporting results into json file format - [#1083](https://github.com/qarmin/czkawka/pull/1083)
|
||||
- Added new test/regression suite for CI - [#1083](https://github.com/qarmin/czkawka/pull/1083)
|
||||
- Added ability to use relative paths - [#1083](https://github.com/qarmin/czkawka/pull/1083)
|
||||
- Fixed stability problem, that could remove invalid file in CLI - [#1083](https://github.com/qarmin/czkawka/pull/1083)
|
||||
- Fixed problem with invalid cache loading - [#0000]
|
||||
- Fix Windows gui crashes by using gtk 4.6 instead 4.8 or 4.10 - [#992](https://github.com/qarmin/czkawka/pull/992)
|
||||
- Fixed printing info about duplicated music files - [#1016](https://github.com/qarmin/czkawka/pull/1016)
|
||||
- Fixed printing info about duplicated video files - [#1017](https://github.com/qarmin/czkawka/pull/1017)
|
||||
|
|
|
@ -20,7 +20,7 @@
|
|||
- Temporary Files - Finds temporary files
|
||||
- Similar Images - Finds images which are not exactly the same (different resolution, watermarks)
|
||||
- Similar Videos - Looks for visually similar videos
|
||||
- Same Music - Searches for music with the same artist, album etc.
|
||||
- Same Music - Searches for similar music by tags or by reading content and comparing it
|
||||
- Invalid Symbolic Links - Shows symbolic links which point to non-existent files/directories
|
||||
- Broken Files - Finds files that are invalid or corrupted
|
||||
- Bad Extensions - Lists files whose content not match with their extension
|
||||
|
|
|
@ -1,3 +1,4 @@
|
|||
use std::collections::HashMap;
|
||||
use std::collections::{BTreeMap, HashSet};
|
||||
use std::fmt::Debug;
|
||||
use std::fs::File;
|
||||
|
@ -424,11 +425,14 @@ impl DuplicateFinder {
|
|||
debug!("prehash_load_cache_at_start - started diff between loaded and prechecked files");
|
||||
for (size, mut vec_file_entry) in mem::take(&mut self.files_with_identical_size) {
|
||||
if let Some(cached_vec_file_entry) = loaded_hash_map.get(&size) {
|
||||
// TODO maybe hashset is not needed when using < 4 elements
|
||||
let cached_path_entries = cached_vec_file_entry.iter().map(|e| &e.path).collect::<HashSet<_>>();
|
||||
// TODO maybe hashmap is not needed when using < 4 elements
|
||||
let mut cached_path_entries: HashMap<&Path, FileEntry> = HashMap::new();
|
||||
for file_entry in cached_vec_file_entry {
|
||||
cached_path_entries.insert(&file_entry.path, file_entry.clone());
|
||||
}
|
||||
for file_entry in vec_file_entry {
|
||||
if cached_path_entries.contains(&file_entry.path) {
|
||||
records_already_cached.entry(size).or_default().push(file_entry);
|
||||
if let Some(cached_file_entry) = cached_path_entries.remove(file_entry.path.as_path()) {
|
||||
records_already_cached.entry(size).or_default().push(cached_file_entry);
|
||||
} else {
|
||||
non_cached_files_to_check.entry(size).or_default().push(file_entry);
|
||||
}
|
||||
|
@ -508,7 +512,7 @@ impl DuplicateFinder {
|
|||
debug!("Starting calculating prehash");
|
||||
#[allow(clippy::type_complexity)]
|
||||
let pre_hash_results: Vec<(u64, BTreeMap<String, Vec<FileEntry>>, Vec<String>)> = non_cached_files_to_check
|
||||
.par_iter()
|
||||
.into_par_iter()
|
||||
.map(|(size, vec_file_entry)| {
|
||||
let mut hashmap_with_hash: BTreeMap<String, Vec<FileEntry>> = Default::default();
|
||||
let mut errors: Vec<String> = Vec::new();
|
||||
|
@ -519,15 +523,16 @@ impl DuplicateFinder {
|
|||
check_was_stopped.store(true, Ordering::Relaxed);
|
||||
return None;
|
||||
}
|
||||
for file_entry in vec_file_entry {
|
||||
match hash_calculation(&mut buffer, file_entry, &check_type, 0) {
|
||||
for mut file_entry in vec_file_entry {
|
||||
match hash_calculation(&mut buffer, &file_entry, &check_type, 0) {
|
||||
Ok(hash_string) => {
|
||||
hashmap_with_hash.entry(hash_string.clone()).or_default().push(file_entry.clone());
|
||||
file_entry.hash = hash_string.clone();
|
||||
hashmap_with_hash.entry(hash_string.clone()).or_default().push(file_entry);
|
||||
}
|
||||
Err(s) => errors.push(s),
|
||||
}
|
||||
}
|
||||
Some((*size, hashmap_with_hash, errors))
|
||||
Some((size, hashmap_with_hash, errors))
|
||||
})
|
||||
.while_some()
|
||||
.collect();
|
||||
|
@ -581,11 +586,14 @@ impl DuplicateFinder {
|
|||
debug!("full_hashing_load_cache_at_start - started diff between loaded and prechecked files");
|
||||
for (size, mut vec_file_entry) in pre_checked_map {
|
||||
if let Some(cached_vec_file_entry) = loaded_hash_map.get(&size) {
|
||||
// TODO maybe hashset is not needed when using < 4 elements
|
||||
let cached_path_entries = cached_vec_file_entry.iter().map(|e| &e.path).collect::<HashSet<_>>();
|
||||
// TODO maybe hashmap is not needed when using < 4 elements
|
||||
let mut cached_path_entries: HashMap<&Path, FileEntry> = HashMap::new();
|
||||
for file_entry in cached_vec_file_entry {
|
||||
cached_path_entries.insert(&file_entry.path, file_entry.clone());
|
||||
}
|
||||
for file_entry in vec_file_entry {
|
||||
if cached_path_entries.contains(&file_entry.path) {
|
||||
records_already_cached.entry(size).or_default().push(file_entry);
|
||||
if let Some(cached_file_entry) = cached_path_entries.remove(file_entry.path.as_path()) {
|
||||
records_already_cached.entry(size).or_default().push(cached_file_entry);
|
||||
} else {
|
||||
non_cached_files_to_check.entry(size).or_default().push(file_entry);
|
||||
}
|
||||
|
|
|
@ -783,7 +783,7 @@ impl SimilarImages {
|
|||
// Validating if group contains duplicated results
|
||||
let mut result_hashset: HashSet<String> = Default::default();
|
||||
let mut found = false;
|
||||
// dbg!(collected_similar_images.len());
|
||||
|
||||
for vec_file_entry in collected_similar_images.values() {
|
||||
if vec_file_entry.is_empty() {
|
||||
println!("Empty group");
|
||||
|
@ -1338,7 +1338,6 @@ mod tests {
|
|||
|
||||
similar_images.find_similar_hashes(None, None);
|
||||
let res = similar_images.get_similar_images();
|
||||
// dbg!(&res);
|
||||
assert!(res.is_empty());
|
||||
}
|
||||
}
|
||||
|
|
Loading…
Reference in a new issue