Fix problem with not proper loading cached results in duplicate mode (#1086)
* Fix problem with not proper loading cached results in duplicate mode * Dbg
This commit is contained in:
parent
0462324607
commit
e50d930683
12
Changelog.md
12
Changelog.md
|
@ -1,9 +1,17 @@
|
||||||
## Version 6.1.0 - ?
|
## Version 6.1.0 - ?
|
||||||
- BREAKING CHANGE - Changed cache saving method, deduplicated, optimized and simplified procedure - [#1072](https://github.com/qarmin/czkawka/pull/1072)
|
- BREAKING CHANGE - Changed cache saving method, deduplicated, optimized and simplified procedure(all files needs to be hashed again) - [#1072](https://github.com/qarmin/czkawka/pull/1072)
|
||||||
- Remove up to 170ms of delay after ending scan - [#1070](https://github.com/qarmin/czkawka/pull/1070)
|
- Remove up to 170ms of delay after ending scan - [#1070](https://github.com/qarmin/czkawka/pull/1070)
|
||||||
- Added logger with useful info when debugging app (level can be adjusted via e.g. `RUST_LOG=debug` env) - [#1072](https://github.com/qarmin/czkawka/pull/1072), [#1070](https://github.com/qarmin/czkawka/pull/1070)
|
- Added logger with useful info when debugging app (level can be adjusted via e.g. `RUST_LOG=debug` env) - [#1072](https://github.com/qarmin/czkawka/pull/1072), [#1070](https://github.com/qarmin/czkawka/pull/1070)
|
||||||
- Core code cleanup - [#1072](https://github.com/qarmin/czkawka/pull/1072), [#1070](https://github.com/qarmin/czkawka/pull/1070)
|
- Core code cleanup - [#1072](https://github.com/qarmin/czkawka/pull/1072), [#1070](https://github.com/qarmin/czkawka/pull/1070), [#1082](https://github.com/qarmin/czkawka/pull/1082)
|
||||||
- Updated list of bad extensions and support for finding invalid jar files - [#1070](https://github.com/qarmin/czkawka/pull/1070)
|
- Updated list of bad extensions and support for finding invalid jar files - [#1070](https://github.com/qarmin/czkawka/pull/1070)
|
||||||
|
- More default excluded items on Windows(like pagefile) - [#1074](https://github.com/qarmin/czkawka/pull/1074)
|
||||||
|
- Unified printing/saving method to files/terminal and fixed some differences/bugs - [#1082](https://github.com/qarmin/czkawka/pull/1082)
|
||||||
|
- Uses fun_time library to print how much functions take time - [#1082](https://github.com/qarmin/czkawka/pull/1082)
|
||||||
|
- Added exporting results into json file format - [#1083](https://github.com/qarmin/czkawka/pull/1083)
|
||||||
|
- Added new test/regression suite for CI - [#1083](https://github.com/qarmin/czkawka/pull/1083)
|
||||||
|
- Added ability to use relative paths - [#1083](https://github.com/qarmin/czkawka/pull/1083)
|
||||||
|
- Fixed stability problem, that could remove invalid file in CLI - [#1083](https://github.com/qarmin/czkawka/pull/1083)
|
||||||
|
- Fixed problem with invalid cache loading - [#0000]
|
||||||
- Fix Windows gui crashes by using gtk 4.6 instead 4.8 or 4.10 - [#992](https://github.com/qarmin/czkawka/pull/992)
|
- Fix Windows gui crashes by using gtk 4.6 instead 4.8 or 4.10 - [#992](https://github.com/qarmin/czkawka/pull/992)
|
||||||
- Fixed printing info about duplicated music files - [#1016](https://github.com/qarmin/czkawka/pull/1016)
|
- Fixed printing info about duplicated music files - [#1016](https://github.com/qarmin/czkawka/pull/1016)
|
||||||
- Fixed printing info about duplicated video files - [#1017](https://github.com/qarmin/czkawka/pull/1017)
|
- Fixed printing info about duplicated video files - [#1017](https://github.com/qarmin/czkawka/pull/1017)
|
||||||
|
|
|
@ -20,7 +20,7 @@
|
||||||
- Temporary Files - Finds temporary files
|
- Temporary Files - Finds temporary files
|
||||||
- Similar Images - Finds images which are not exactly the same (different resolution, watermarks)
|
- Similar Images - Finds images which are not exactly the same (different resolution, watermarks)
|
||||||
- Similar Videos - Looks for visually similar videos
|
- Similar Videos - Looks for visually similar videos
|
||||||
- Same Music - Searches for music with the same artist, album etc.
|
- Same Music - Searches for similar music by tags or by reading content and comparing it
|
||||||
- Invalid Symbolic Links - Shows symbolic links which point to non-existent files/directories
|
- Invalid Symbolic Links - Shows symbolic links which point to non-existent files/directories
|
||||||
- Broken Files - Finds files that are invalid or corrupted
|
- Broken Files - Finds files that are invalid or corrupted
|
||||||
- Bad Extensions - Lists files whose content not match with their extension
|
- Bad Extensions - Lists files whose content not match with their extension
|
||||||
|
|
|
@ -1,3 +1,4 @@
|
||||||
|
use std::collections::HashMap;
|
||||||
use std::collections::{BTreeMap, HashSet};
|
use std::collections::{BTreeMap, HashSet};
|
||||||
use std::fmt::Debug;
|
use std::fmt::Debug;
|
||||||
use std::fs::File;
|
use std::fs::File;
|
||||||
|
@ -424,11 +425,14 @@ impl DuplicateFinder {
|
||||||
debug!("prehash_load_cache_at_start - started diff between loaded and prechecked files");
|
debug!("prehash_load_cache_at_start - started diff between loaded and prechecked files");
|
||||||
for (size, mut vec_file_entry) in mem::take(&mut self.files_with_identical_size) {
|
for (size, mut vec_file_entry) in mem::take(&mut self.files_with_identical_size) {
|
||||||
if let Some(cached_vec_file_entry) = loaded_hash_map.get(&size) {
|
if let Some(cached_vec_file_entry) = loaded_hash_map.get(&size) {
|
||||||
// TODO maybe hashset is not needed when using < 4 elements
|
// TODO maybe hashmap is not needed when using < 4 elements
|
||||||
let cached_path_entries = cached_vec_file_entry.iter().map(|e| &e.path).collect::<HashSet<_>>();
|
let mut cached_path_entries: HashMap<&Path, FileEntry> = HashMap::new();
|
||||||
|
for file_entry in cached_vec_file_entry {
|
||||||
|
cached_path_entries.insert(&file_entry.path, file_entry.clone());
|
||||||
|
}
|
||||||
for file_entry in vec_file_entry {
|
for file_entry in vec_file_entry {
|
||||||
if cached_path_entries.contains(&file_entry.path) {
|
if let Some(cached_file_entry) = cached_path_entries.remove(file_entry.path.as_path()) {
|
||||||
records_already_cached.entry(size).or_default().push(file_entry);
|
records_already_cached.entry(size).or_default().push(cached_file_entry);
|
||||||
} else {
|
} else {
|
||||||
non_cached_files_to_check.entry(size).or_default().push(file_entry);
|
non_cached_files_to_check.entry(size).or_default().push(file_entry);
|
||||||
}
|
}
|
||||||
|
@ -508,7 +512,7 @@ impl DuplicateFinder {
|
||||||
debug!("Starting calculating prehash");
|
debug!("Starting calculating prehash");
|
||||||
#[allow(clippy::type_complexity)]
|
#[allow(clippy::type_complexity)]
|
||||||
let pre_hash_results: Vec<(u64, BTreeMap<String, Vec<FileEntry>>, Vec<String>)> = non_cached_files_to_check
|
let pre_hash_results: Vec<(u64, BTreeMap<String, Vec<FileEntry>>, Vec<String>)> = non_cached_files_to_check
|
||||||
.par_iter()
|
.into_par_iter()
|
||||||
.map(|(size, vec_file_entry)| {
|
.map(|(size, vec_file_entry)| {
|
||||||
let mut hashmap_with_hash: BTreeMap<String, Vec<FileEntry>> = Default::default();
|
let mut hashmap_with_hash: BTreeMap<String, Vec<FileEntry>> = Default::default();
|
||||||
let mut errors: Vec<String> = Vec::new();
|
let mut errors: Vec<String> = Vec::new();
|
||||||
|
@ -519,15 +523,16 @@ impl DuplicateFinder {
|
||||||
check_was_stopped.store(true, Ordering::Relaxed);
|
check_was_stopped.store(true, Ordering::Relaxed);
|
||||||
return None;
|
return None;
|
||||||
}
|
}
|
||||||
for file_entry in vec_file_entry {
|
for mut file_entry in vec_file_entry {
|
||||||
match hash_calculation(&mut buffer, file_entry, &check_type, 0) {
|
match hash_calculation(&mut buffer, &file_entry, &check_type, 0) {
|
||||||
Ok(hash_string) => {
|
Ok(hash_string) => {
|
||||||
hashmap_with_hash.entry(hash_string.clone()).or_default().push(file_entry.clone());
|
file_entry.hash = hash_string.clone();
|
||||||
|
hashmap_with_hash.entry(hash_string.clone()).or_default().push(file_entry);
|
||||||
}
|
}
|
||||||
Err(s) => errors.push(s),
|
Err(s) => errors.push(s),
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
Some((*size, hashmap_with_hash, errors))
|
Some((size, hashmap_with_hash, errors))
|
||||||
})
|
})
|
||||||
.while_some()
|
.while_some()
|
||||||
.collect();
|
.collect();
|
||||||
|
@ -581,11 +586,14 @@ impl DuplicateFinder {
|
||||||
debug!("full_hashing_load_cache_at_start - started diff between loaded and prechecked files");
|
debug!("full_hashing_load_cache_at_start - started diff between loaded and prechecked files");
|
||||||
for (size, mut vec_file_entry) in pre_checked_map {
|
for (size, mut vec_file_entry) in pre_checked_map {
|
||||||
if let Some(cached_vec_file_entry) = loaded_hash_map.get(&size) {
|
if let Some(cached_vec_file_entry) = loaded_hash_map.get(&size) {
|
||||||
// TODO maybe hashset is not needed when using < 4 elements
|
// TODO maybe hashmap is not needed when using < 4 elements
|
||||||
let cached_path_entries = cached_vec_file_entry.iter().map(|e| &e.path).collect::<HashSet<_>>();
|
let mut cached_path_entries: HashMap<&Path, FileEntry> = HashMap::new();
|
||||||
|
for file_entry in cached_vec_file_entry {
|
||||||
|
cached_path_entries.insert(&file_entry.path, file_entry.clone());
|
||||||
|
}
|
||||||
for file_entry in vec_file_entry {
|
for file_entry in vec_file_entry {
|
||||||
if cached_path_entries.contains(&file_entry.path) {
|
if let Some(cached_file_entry) = cached_path_entries.remove(file_entry.path.as_path()) {
|
||||||
records_already_cached.entry(size).or_default().push(file_entry);
|
records_already_cached.entry(size).or_default().push(cached_file_entry);
|
||||||
} else {
|
} else {
|
||||||
non_cached_files_to_check.entry(size).or_default().push(file_entry);
|
non_cached_files_to_check.entry(size).or_default().push(file_entry);
|
||||||
}
|
}
|
||||||
|
|
|
@ -783,7 +783,7 @@ impl SimilarImages {
|
||||||
// Validating if group contains duplicated results
|
// Validating if group contains duplicated results
|
||||||
let mut result_hashset: HashSet<String> = Default::default();
|
let mut result_hashset: HashSet<String> = Default::default();
|
||||||
let mut found = false;
|
let mut found = false;
|
||||||
// dbg!(collected_similar_images.len());
|
|
||||||
for vec_file_entry in collected_similar_images.values() {
|
for vec_file_entry in collected_similar_images.values() {
|
||||||
if vec_file_entry.is_empty() {
|
if vec_file_entry.is_empty() {
|
||||||
println!("Empty group");
|
println!("Empty group");
|
||||||
|
@ -1338,7 +1338,6 @@ mod tests {
|
||||||
|
|
||||||
similar_images.find_similar_hashes(None, None);
|
similar_images.find_similar_hashes(None, None);
|
||||||
let res = similar_images.get_similar_images();
|
let res = similar_images.get_similar_images();
|
||||||
// dbg!(&res);
|
|
||||||
assert!(res.is_empty());
|
assert!(res.is_empty());
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
Loading…
Reference in a new issue