From 5cc23341f32d9fb0f306b6ad9f3e23009ef2e73b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Rafa=C5=82=20Mikrut?= Date: Tue, 2 May 2023 12:15:33 +0200 Subject: [PATCH] Similar Images coplexity --- czkawka_core/src/similar_images.rs | 203 +++++++++++++++-------------- 1 file changed, 104 insertions(+), 99 deletions(-) diff --git a/czkawka_core/src/similar_images.rs b/czkawka_core/src/similar_images.rs index 4b0c688..0990f39 100644 --- a/czkawka_core/src/similar_images.rs +++ b/czkawka_core/src/similar_images.rs @@ -1,5 +1,5 @@ use std::collections::{BTreeSet, HashMap, HashSet}; -use std::fs::File; +use std::fs::{DirEntry, File, Metadata}; use std::io::Write; use std::io::*; use std::panic; @@ -354,8 +354,7 @@ impl SimilarImages { return (dir_result, warnings, fe_result); }; - // Check every sub folder/file/link etc. - 'dir: for entry in read_dir { + for entry in read_dir { let Some((entry_data,metadata)) = common_get_entry_data_metadata(&entry, &mut warnings, current_folder) else { continue; }; @@ -372,33 +371,7 @@ impl SimilarImages { ); } else if metadata.is_file() { atomic_counter.fetch_add(1, Ordering::Relaxed); - - let Some(file_name_lowercase) = get_lowercase_name(entry_data, &mut warnings) else { - continue 'dir; - }; - - if !self.allowed_extensions.matches_filename(&file_name_lowercase) { - continue 'dir; - } - - // Checking files - if (self.minimal_file_size..=self.maximal_file_size).contains(&metadata.len()) { - let current_file_name = current_folder.join(entry_data.file_name()); - if self.excluded_items.is_excluded(¤t_file_name) { - continue 'dir; - } - - let fe: FileEntry = FileEntry { - path: current_file_name.clone(), - size: metadata.len(), - dimensions: String::new(), - modified_date: get_modified_time(&metadata, &mut warnings, ¤t_file_name, false), - hash: Vec::new(), - similarity: 0, - }; - - fe_result.push((current_file_name.to_string_lossy().to_string(), fe)); - } + self.add_file_entry(&metadata, current_folder, entry_data, &mut fe_result, &mut warnings); } } (dir_result, warnings, fe_result) @@ -423,6 +396,35 @@ impl SimilarImages { true } + fn add_file_entry(&self, metadata: &Metadata, current_folder: &Path, entry_data: &DirEntry, fe_result: &mut Vec<(String, FileEntry)>, warnings: &mut Vec) { + let Some(file_name_lowercase) = get_lowercase_name(entry_data, warnings) else { + return; + }; + + if !self.allowed_extensions.matches_filename(&file_name_lowercase) { + return; + } + + // Checking files + if (self.minimal_file_size..=self.maximal_file_size).contains(&metadata.len()) { + let current_file_name = current_folder.join(entry_data.file_name()); + if self.excluded_items.is_excluded(¤t_file_name) { + return; + } + + let fe: FileEntry = FileEntry { + path: current_file_name.clone(), + size: metadata.len(), + dimensions: String::new(), + modified_date: get_modified_time(metadata, warnings, ¤t_file_name, false), + hash: Vec::new(), + similarity: 0, + }; + + fe_result.push((current_file_name.to_string_lossy().to_string(), fe)); + } + } + // Cache algorithm: // - Load data from file // - Remove from data to search, already loaded entries from cache(size and modified datamust match) @@ -472,80 +474,13 @@ impl SimilarImages { let mut vec_file_entry: Vec<(FileEntry, Vec)> = non_cached_files_to_check .into_par_iter() - .map(|(_s, mut file_entry)| { + .map(|(_s, file_entry)| { atomic_counter.fetch_add(1, Ordering::Relaxed); if stop_receiver.is_some() && stop_receiver.unwrap().try_recv().is_ok() { check_was_stopped.store(true, Ordering::Relaxed); return None; } - let file_name_lowercase = file_entry.path.to_string_lossy().to_lowercase(); - - let image; - - #[allow(clippy::never_loop)] // Required to implement nice if/else - 'krztyna: loop { - if RAW_IMAGE_EXTENSIONS.iter().any(|e| file_name_lowercase.ends_with(e)) { - image = match get_dynamic_image_from_raw_image(&file_entry.path) { - Some(t) => t, - None => return Some(Some((file_entry, Vec::new()))), - }; - break 'krztyna; - } - - #[cfg(feature = "heif")] - if HEIC_EXTENSIONS.iter().any(|e| file_name_lowercase.ends_with(e)) { - image = match get_dynamic_image_from_heic(&file_entry.path.to_string_lossy()) { - Ok(t) => t, - Err(_) => { - return Some(Some((file_entry, Vec::new()))); - } - }; - break 'krztyna; - } - - // Normal image extension, when any other fail, not using if/else - let result = panic::catch_unwind(|| { - match image::open(file_entry.path.clone()) { - Ok(t) => Ok(t), - // Err(_inspected) => return Some(None), // Something is wrong with image, - // For broken images empty hash is used, because without it will try to resecan files each time when it is called(missing cache file is responsible for it) - // This may cause problems(very rarely), when e.g. file was not available due lack of permissions, but it is available now - Err(_inspected) => Err(()), - } - }); - - // If image crashed during opening, we just skip checking its hash and go on - if let Ok(image_result) = result { - if let Ok(image2) = image_result { - image = image2; - } else { - return Some(Some((file_entry, Vec::new()))); - } - } else { - let message = create_crash_message("Image-rs", &file_entry.path.to_string_lossy(), "https://github.com/image-rs/image/issues"); - println!("{message}"); - return Some(Some((file_entry, Vec::new()))); - } - - break 'krztyna; - } - - let dimensions = image.dimensions(); - - file_entry.dimensions = format!("{}x{}", dimensions.0, dimensions.1); - - let hasher_config = HasherConfig::new() - .hash_size(self.hash_size as u32, self.hash_size as u32) - .hash_alg(self.hash_alg) - .resize_filter(self.image_filter); - let hasher = hasher_config.to_hasher(); - - let hash = hasher.hash_image(&image); - let buf: Vec = hash.as_bytes().to_vec(); - - file_entry.hash = buf.clone(); - - Some(Some((file_entry, buf))) + Some(Some(self.collect_image_file_entry(file_entry))) }) .while_some() .filter(Option::is_some) @@ -594,6 +529,76 @@ impl SimilarImages { Common::print_time(hash_map_modification, SystemTime::now(), "sort_images - saving data to files"); true } + fn collect_image_file_entry(&self, mut file_entry: FileEntry) -> (FileEntry, Vec) { + let file_name_lowercase = file_entry.path.to_string_lossy().to_lowercase(); + + let image; + + #[allow(clippy::never_loop)] // Required to implement nice if/else + 'krztyna: loop { + if RAW_IMAGE_EXTENSIONS.iter().any(|e| file_name_lowercase.ends_with(e)) { + image = match get_dynamic_image_from_raw_image(&file_entry.path) { + Some(t) => t, + None => return (file_entry, Vec::new()), + }; + break 'krztyna; + } + + #[cfg(feature = "heif")] + if HEIC_EXTENSIONS.iter().any(|e| file_name_lowercase.ends_with(e)) { + image = match get_dynamic_image_from_heic(&file_entry.path.to_string_lossy()) { + Ok(t) => t, + Err(_) => { + return (file_entry, Vec::new()); + } + }; + break 'krztyna; + } + + // Normal image extension, when any other fail, not using if/else + let result = panic::catch_unwind(|| { + match image::open(file_entry.path.clone()) { + Ok(t) => Ok(t), + // Err(_inspected) => return Some(None), // Something is wrong with image, + // For broken images empty hash is used, because without it will try to resecan files each time when it is called(missing cache file is responsible for it) + // This may cause problems(very rarely), when e.g. file was not available due lack of permissions, but it is available now + Err(_inspected) => Err(()), + } + }); + + // If image crashed during opening, we just skip checking its hash and go on + if let Ok(image_result) = result { + if let Ok(image2) = image_result { + image = image2; + } else { + return (file_entry, Vec::new()); + } + } else { + let message = create_crash_message("Image-rs", &file_entry.path.to_string_lossy(), "https://github.com/image-rs/image/issues"); + println!("{message}"); + return (file_entry, Vec::new()); + } + + break 'krztyna; + } + + let dimensions = image.dimensions(); + + file_entry.dimensions = format!("{}x{}", dimensions.0, dimensions.1); + + let hasher_config = HasherConfig::new() + .hash_size(self.hash_size as u32, self.hash_size as u32) + .hash_alg(self.hash_alg) + .resize_filter(self.image_filter); + let hasher = hasher_config.to_hasher(); + + let hash = hasher.hash_image(&image); + let buf: Vec = hash.as_bytes().to_vec(); + + file_entry.hash = buf.clone(); + + (file_entry, buf) + } fn find_similar_hashes(&mut self, stop_receiver: Option<&Receiver<()>>, progress_sender: Option<&futures::channel::mpsc::UnboundedSender>) -> bool { if self.image_hashes.is_empty() {