diff --git a/czkawka_core/src/similar_images.rs b/czkawka_core/src/similar_images.rs index 7e637f6..a6ceac1 100644 --- a/czkawka_core/src/similar_images.rs +++ b/czkawka_core/src/similar_images.rs @@ -451,7 +451,17 @@ impl SimilarImages { let image = match image::open(file_entry.path.clone()) { Ok(t) => t, - Err(_inspected) => return Some(None), // Something is wrong with image + // Err(_inspected) => return Some(None), // Something is wrong with image, + // For broken images empty hash is used, because without it will try to resecan files each time when it is called(missing cache file is responsible for it) + // This may cause problems(very rarely), when e.g. file was not available due lack of permissions, but it is available now + Err(_inspected) => { + let mut buf = Vec::new(); + for _i in 0..(self.hash_size * self.hash_size / 8) { + buf.push(0); + } + file_entry.hash = buf.clone(); + return Some(Some((file_entry, buf))); + } }; let dimensions = image.dimensions(); @@ -463,16 +473,6 @@ impl SimilarImages { let hash = hasher.hash_image(&image); let buf: Vec = hash.as_bytes().to_vec(); - // Images with hashes with full of 0 or 255 usually means that algorithm fails to decode them because e.g. contains a log of alpha channel - { - if buf.iter().all(|e| *e == 0) { - return Some(None); - } - if buf.iter().all(|e| *e == 255) { - return Some(None); - } - } - file_entry.hash = buf.clone(); Some(Some((file_entry, buf))) @@ -495,9 +495,12 @@ impl SimilarImages { } for (file_entry, buf) in &vec_file_entry { - self.bktree.add(buf.clone()); - self.image_hashes.entry(buf.clone()).or_insert_with(Vec::::new); - self.image_hashes.get_mut(buf).unwrap().push(file_entry.clone()); + // Only use to comparing, non broken hashes(all 0 or 255 hashes means that algorithm fails to decode them because e.g. contains a log of alpha channel) + if !(buf.iter().all(|e| *e == 0) || buf.iter().all(|e| *e == 255)) { + self.bktree.add(buf.clone()); + self.image_hashes.entry(buf.clone()).or_insert_with(Vec::::new); + self.image_hashes.get_mut(buf).unwrap().push(file_entry.clone()); + } } if self.use_cache { @@ -753,9 +756,9 @@ fn save_hashes_to_file(hashmap: &BTreeMap, text_messages: &mu string += format!("{}//{}//{}//{}", file_entry.path.display(), file_entry.size, file_entry.dimensions, file_entry.modified_date).as_str(); - for i in 0..file_entry.hash.len() - 1 { + for hash in &file_entry.hash { string.push_str("//"); - string.push_str(file_entry.hash[i].to_string().as_str()); + string.push_str(hash.to_string().as_str()); } if let Err(e) = writeln!(writer, "{}", string) { @@ -800,8 +803,8 @@ fn load_hashes_from_file(text_messages: &mut Messages, hash_size: u8, hash_alg: index + 1, line, cache_file.display(), - uuu.len(), - number_of_results + 4 + number_of_results + 4, + uuu.len() )); continue; } @@ -820,22 +823,6 @@ fn load_hashes_from_file(text_messages: &mut Messages, hash_size: u8, hash_alg: }); } - #[cfg(debug_assertions)] - { - let mut have_at_least: u8 = 0; - for i in hash.iter() { - if *i == 0 { - have_at_least += 1; - } - } - if have_at_least == hash.len() as u8 { - println!("ERROR START - {}", line); - println!("have_at_least == hash.len() as u8"); - println!("ERROR END hash.len() - {} == have_at_least - {}", hash.len(), have_at_least); - continue; // Just skip this entry, it is very very unlikelly that something have this hash, but if it has, then just ignore it - } - } - hashmap_loaded_entries.insert( uuu[0].to_string(), FileEntry { diff --git a/czkawka_core/src/similar_videos.rs b/czkawka_core/src/similar_videos.rs index 8834ed8..3087de0 100644 --- a/czkawka_core/src/similar_videos.rs +++ b/czkawka_core/src/similar_videos.rs @@ -27,6 +27,8 @@ use crate::common_traits::{DebugPrint, PrintResults, SaveResults}; pub const MAX_TOLERANCE: i32 = 20; +const HASH_SIZE: usize = 19; + #[derive(Debug)] pub struct ProgressData { pub current_stage: u8, @@ -41,6 +43,7 @@ pub struct FileEntry { pub size: u64, pub modified_date: u64, pub vhash: VideoHash, + pub error: String, } /// Distance metric to use with the BK-tree. @@ -321,6 +324,7 @@ impl SimilarVideos { } // Permissions Denied }, vhash: Default::default(), + error: "".to_string(), }; self.videos_to_check.insert(current_file_name.to_string_lossy().to_string(), fe); @@ -400,7 +404,7 @@ impl SimilarVideos { progress_thread_handle = thread::spawn(|| {}); } //// PROGRESS THREAD END - let old_vec_file_entry: Vec> = non_cached_files_to_check + let mut vec_file_entry: Vec = non_cached_files_to_check .par_iter() .map(|file_entry| { atomic_file_counter.fetch_add(1, Ordering::Relaxed); @@ -412,30 +416,25 @@ impl SimilarVideos { let vhash = match VideoHash::from_path(&file_entry.path) { Ok(t) => t, - Err(e) => return Some(Err(format!("Failed to hash file, {}", e))), + Err(e) => { + return { + file_entry.error = format!("Failed to hash file, {}", e); + Some(file_entry) + } + } }; file_entry.vhash = vhash; - Some(Ok(file_entry)) + Some(file_entry) }) .while_some() - .collect::>>(); + .collect::>(); // End thread which send info to gui progress_thread_run.store(false, Ordering::Relaxed); progress_thread_handle.join().unwrap(); - let mut vec_file_entry = Vec::new(); - for result in old_vec_file_entry { - match result { - Ok(t) => vec_file_entry.push(t), - Err(e) => { - self.text_messages.errors.push(e); - } - } - } - Common::print_time(hash_map_modification, SystemTime::now(), "sort_videos - reading data from files in parallel".to_string()); let hash_map_modification = SystemTime::now(); @@ -446,9 +445,14 @@ impl SimilarVideos { let mut hashmap_with_file_entries: HashMap = Default::default(); let mut vector_of_hashes: Vec = Vec::new(); - for i in &vec_file_entry { - hashmap_with_file_entries.insert(i.vhash.src_path().to_string_lossy().to_string(), i.clone()); - vector_of_hashes.push(i.vhash.clone()); + for file_entry in &vec_file_entry { + // 0 means that images was not hashed correctly, e.g. could be improperly + if file_entry.error.is_empty() { + hashmap_with_file_entries.insert(file_entry.vhash.src_path().to_string_lossy().to_string(), file_entry.clone()); + vector_of_hashes.push(file_entry.vhash.clone()); + } else { + self.text_messages.errors.push(file_entry.error.clone()); + } } if self.use_cache { @@ -614,7 +618,16 @@ fn save_hashes_to_file(hashmap: &BTreeMap, text_messages: &mu for file_entry in hashmap.values() { let mut string: String = String::with_capacity(256); - string += format!("{}//{}//{}//{}//{}", file_entry.path.display(), file_entry.size, file_entry.modified_date, file_entry.vhash.num_frames(), file_entry.vhash.duration()).as_str(); + string += format!( + "{}//{}//{}//{}//{}//{}", + file_entry.path.display(), + file_entry.size, + file_entry.modified_date, + file_entry.vhash.num_frames(), + file_entry.vhash.duration(), + file_entry.error + ) + .as_str(); for i in file_entry.vhash.hash() { string.push_str("//"); @@ -655,15 +668,14 @@ fn load_hashes_from_file(text_messages: &mut Messages) -> Option>(); - let hash_size = 19; // Hash size + other things - if uuu.len() != (hash_size + 5) { + if uuu.len() != (HASH_SIZE + 6) { text_messages.warnings.push(format!( "Found invalid data in line {} - ({}) in cache file {}, expected {} values, found {}", index + 1, line, cache_file.display(), - hash_size + 5, + HASH_SIZE + 6, uuu.len(), )); continue; @@ -671,8 +683,8 @@ fn load_hashes_from_file(text_messages: &mut Messages) -> Option() { + for i in 0..HASH_SIZE { + hash[i] = match uuu[6 + i as usize].parse::() { Ok(t) => t, Err(e) => { text_messages @@ -706,6 +718,7 @@ fn load_hashes_from_file(text_messages: &mut Messages) -> Option().unwrap_or(0), uuu[0], hash, uuu[3].parse::().unwrap_or(10)), + error: uuu[5].to_string(), }, ); }