Simplify, fix and improve similar images algorithm (#983)
* Random changer * CD * A1 * Almost? * Nein * Heif * Tests that needs to be fixed * Fixed test * Tests * Fixed image counting in reference folders * Lock * Catch possible more bugs in pdf * Find ever more bugs
This commit is contained in:
parent
04a91aeca7
commit
55b2744bf4
630
Cargo.lock
generated
630
Cargo.lock
generated
File diff suppressed because it is too large
Load diff
12
Changelog.md
12
Changelog.md
|
@ -1,3 +1,15 @@
|
|||
## Version 5.2.0 - ?
|
||||
- Add finding similar audio files by content - [#970](https://github.com/qarmin/czkawka/pull/970)
|
||||
- Allow to find duplicates by name/size at once - [#956](https://github.com/qarmin/czkawka/pull/956)
|
||||
- Fixed bug when cache for music tags not worked - [#970](https://github.com/qarmin/czkawka/pull/970)
|
||||
- Allow to set number of threads from CLI - [#972](https://github.com/qarmin/czkawka/pull/972)
|
||||
- Fix problem with invalid item sorting in bad extensions mode - [#972](https://github.com/qarmin/czkawka/pull/972)
|
||||
- Big refactor/cleaning of code - [#956](https://github.com/qarmin/czkawka/pull/956)/[#970](https://github.com/qarmin/czkawka/pull/970)/[#972](https://github.com/qarmin/czkawka/pull/972)
|
||||
- Use builtin gtk webp loader for previews - [#923](https://github.com/qarmin/czkawka/pull/923)
|
||||
- Fixed docker build - [#947](https://github.com/qarmin/czkawka/pull/947)
|
||||
- Restore snap builds broken since GTk 4 port - [#965](https://github.com/qarmin/czkawka/pull/947)
|
||||
- Instruction how to build native ARM64 binaries on Mac - [#945](https://github.com/qarmin/czkawka/pull/945)/[#971](https://github.com/qarmin/czkawka/pull/971)
|
||||
|
||||
## Version 5.1.0 - 19.02.2023r
|
||||
- Added sort button - [#894](https://github.com/qarmin/czkawka/pull/894)
|
||||
- Allow to set number of thread used to scan - [#839](https://github.com/qarmin/czkawka/pull/839)
|
||||
|
|
|
@ -10,10 +10,10 @@ homepage = "https://github.com/qarmin/czkawka"
|
|||
repository = "https://github.com/qarmin/czkawka"
|
||||
|
||||
[dependencies]
|
||||
clap = { version = "4.2", features = ["derive"] }
|
||||
clap = { version = "4.3", features = ["derive"] }
|
||||
|
||||
# For enum types
|
||||
image_hasher = "1.1"
|
||||
image_hasher = "1.2"
|
||||
|
||||
[dependencies.czkawka_core]
|
||||
path = "../czkawka_core"
|
||||
|
|
|
@ -19,14 +19,14 @@ crossbeam-channel = "0.5"
|
|||
directories-next = "2.0"
|
||||
|
||||
# Needed by similar images
|
||||
image_hasher = "1.1"
|
||||
image_hasher = "1.2"
|
||||
bk-tree = "0.5"
|
||||
image = "0.24"
|
||||
hamming = "0.1"
|
||||
|
||||
# Needed by same music
|
||||
bitflags = "2.2"
|
||||
lofty = "0.12"
|
||||
bitflags = "2.3"
|
||||
lofty = "0.14"
|
||||
|
||||
# Futures - needed by async progress sender
|
||||
futures = "0.3.28"
|
||||
|
@ -41,11 +41,11 @@ rusty-chromaprint = "0.1"
|
|||
symphonia = { version = "0.5", features = ["all"] }
|
||||
|
||||
# Hashes for duplicate files
|
||||
blake3 = "1.3"
|
||||
blake3 = "1.4"
|
||||
crc32fast = "1.3"
|
||||
xxhash-rust = { version = "0.8", features = ["xxh3"] }
|
||||
|
||||
tempfile = "3.5"
|
||||
tempfile = "3.6"
|
||||
|
||||
# Video Duplicates
|
||||
vid_dup_finder_lib = "0.1"
|
||||
|
@ -59,8 +59,8 @@ serde_json = "1.0"
|
|||
# Language
|
||||
i18n-embed = { version = "0.13", features = ["fluent-system", "desktop-requester"] }
|
||||
i18n-embed-fl = "0.6"
|
||||
rust-embed = "6.6"
|
||||
once_cell = "1.17"
|
||||
rust-embed = "6.7"
|
||||
once_cell = "1.18"
|
||||
|
||||
# Raw image files
|
||||
rawloader = "0.37"
|
||||
|
@ -73,11 +73,12 @@ infer = "0.13"
|
|||
num_cpus = "1.15"
|
||||
|
||||
# Heif/Heic
|
||||
libheif-rs = { version = "0.18.0", optional = true } # Do not upgrade now, since Ubuntu 22.04 not works with newer version
|
||||
libheif-rs = { version = "=0.18.0", optional = true } # Do not upgrade now, since Ubuntu 22.04 not works with newer version
|
||||
libheif-sys = { version = "=1.14.2", optional = true } # 1.14.3 brake compilation on Ubuntu 22.04
|
||||
anyhow = { version = "1.0" }
|
||||
|
||||
state = "0.5"
|
||||
state = "0.6"
|
||||
|
||||
[features]
|
||||
default = []
|
||||
heif = ["dep:libheif-rs"]
|
||||
heif = ["dep:libheif-rs", "dep:libheif-sys"]
|
||||
|
|
|
@ -372,25 +372,29 @@ impl BrokenFiles {
|
|||
|
||||
let mut file_entry_clone = file_entry.clone();
|
||||
let result = panic::catch_unwind(|| {
|
||||
if let Err(e) = FileOptions::cached().parse_options(parser_options).open(&file_entry.path) {
|
||||
match FileOptions::cached().parse_options(parser_options).open(&file_entry.path) {
|
||||
Ok(file) => {
|
||||
for idx in 0..file.num_pages() {
|
||||
if let Err(e) = file.get_page(idx) {
|
||||
let err = validate_pdf_error(&mut file_entry, e);
|
||||
if let PdfError::InvalidPassword = err {
|
||||
return None;
|
||||
} else {
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
Err(e) => {
|
||||
if let PdfError::Io { .. } = e {
|
||||
return None;
|
||||
}
|
||||
|
||||
let mut error_string = e.to_string();
|
||||
// Workaround for strange error message https://github.com/qarmin/czkawka/issues/898
|
||||
if error_string.starts_with("Try at") {
|
||||
if let Some(start_index) = error_string.find("/pdf-") {
|
||||
error_string = format!("Decoding error in pdf-rs library - {}", &error_string[start_index..]);
|
||||
}
|
||||
}
|
||||
|
||||
file_entry.error_string = error_string;
|
||||
let error = unpack_pdf_error(e);
|
||||
if let PdfError::InvalidPassword = error {
|
||||
let err = validate_pdf_error(&mut file_entry, e);
|
||||
if let PdfError::InvalidPassword = err {
|
||||
return None;
|
||||
}
|
||||
}
|
||||
}
|
||||
Some(file_entry)
|
||||
});
|
||||
if let Ok(pdf_result) = result {
|
||||
|
@ -708,3 +712,16 @@ fn unpack_pdf_error(e: PdfError) -> PdfError {
|
|||
e
|
||||
}
|
||||
}
|
||||
|
||||
fn validate_pdf_error(file_entry: &mut FileEntry, e: PdfError) -> PdfError {
|
||||
let mut error_string = e.to_string();
|
||||
// Workaround for strange error message https://github.com/qarmin/czkawka/issues/898
|
||||
if error_string.starts_with("Try at") {
|
||||
if let Some(start_index) = error_string.find("/pdf-") {
|
||||
error_string = format!("Decoding error in pdf-rs library - {}", &error_string[start_index..]);
|
||||
}
|
||||
}
|
||||
|
||||
file_entry.error_string = error_string;
|
||||
unpack_pdf_error(e)
|
||||
}
|
||||
|
|
|
@ -24,7 +24,7 @@ use crate::common_directory::Directories;
|
|||
use crate::common_items::ExcludedItems;
|
||||
use crate::common_traits::ResultEntry;
|
||||
|
||||
static NUMBER_OF_THREADS: state::Storage<usize> = state::Storage::new();
|
||||
static NUMBER_OF_THREADS: state::InitCell<usize> = state::InitCell::new();
|
||||
|
||||
pub fn get_number_of_threads() -> usize {
|
||||
let data = NUMBER_OF_THREADS.get();
|
||||
|
|
|
@ -473,7 +473,7 @@ where
|
|||
}
|
||||
}
|
||||
if counter > 0 {
|
||||
// Do not increase counter one by one in threads, because usually it
|
||||
// Increase counter in batch, because usually it may be slow to add multiple times atomic value
|
||||
atomic_counter.fetch_add(counter, Ordering::Relaxed);
|
||||
}
|
||||
(dir_result, warnings, fe_result, set_as_not_empty_folder_list, folder_entries_list)
|
||||
|
|
|
@ -13,9 +13,9 @@ pub struct Directories {
|
|||
pub excluded_directories: Vec<PathBuf>,
|
||||
pub included_directories: Vec<PathBuf>,
|
||||
pub reference_directories: Vec<PathBuf>,
|
||||
exclude_other_filesystems: Option<bool>,
|
||||
pub exclude_other_filesystems: Option<bool>,
|
||||
#[cfg(target_family = "unix")]
|
||||
included_dev_ids: Vec<u64>,
|
||||
pub included_dev_ids: Vec<u64>,
|
||||
}
|
||||
|
||||
impl Directories {
|
||||
|
|
|
@ -5,8 +5,7 @@ use std::io::*;
|
|||
use std::mem;
|
||||
use std::panic;
|
||||
use std::path::{Path, PathBuf};
|
||||
use std::sync::atomic::{AtomicBool, AtomicUsize, Ordering};
|
||||
use std::sync::Arc;
|
||||
use std::sync::atomic::Ordering;
|
||||
use std::time::SystemTime;
|
||||
|
||||
use bk_tree::BKTree;
|
||||
|
@ -22,8 +21,8 @@ use serde::{Deserialize, Serialize};
|
|||
#[cfg(feature = "heif")]
|
||||
use crate::common::get_dynamic_image_from_heic;
|
||||
use crate::common::{
|
||||
check_folder_children, create_crash_message, get_dynamic_image_from_raw_image, get_number_of_threads, open_cache_folder, prepare_thread_handler_common,
|
||||
send_info_and_wait_for_ending_all_threads, HEIC_EXTENSIONS, IMAGE_RS_SIMILAR_IMAGES_EXTENSIONS, RAW_IMAGE_EXTENSIONS,
|
||||
check_folder_children, create_crash_message, get_dynamic_image_from_raw_image, open_cache_folder, prepare_thread_handler_common, send_info_and_wait_for_ending_all_threads,
|
||||
HEIC_EXTENSIONS, IMAGE_RS_SIMILAR_IMAGES_EXTENSIONS, RAW_IMAGE_EXTENSIONS,
|
||||
};
|
||||
use crate::common_dir_traversal::{common_get_entry_data_metadata, common_read_dir, get_lowercase_name, get_modified_time, CheckingMethod, ProgressData, ToolType};
|
||||
use crate::common_directory::Directories;
|
||||
|
@ -564,81 +563,25 @@ impl SimilarImages {
|
|||
(file_entry, buf)
|
||||
}
|
||||
|
||||
fn compare_hashes(
|
||||
&self,
|
||||
hashes_to_check: &[ImHash],
|
||||
atomic_counter: &Arc<AtomicUsize>,
|
||||
stop_receiver: Option<&Receiver<()>>,
|
||||
check_was_stopped: &AtomicBool,
|
||||
tolerance: u32,
|
||||
hashes_with_multiple_images: &HashSet<ImHash>,
|
||||
all_hashed_images: &HashMap<ImHash, Vec<FileEntry>>,
|
||||
) -> Option<(HashMap<ImHash, u32>, HashMap<ImHash, (ImHash, u32)>)> {
|
||||
let mut hashes_parents: HashMap<ImHash, u32> = Default::default(); // Hashes used as parent (hash, children_number_of_hash)
|
||||
let mut hashes_similarity: HashMap<ImHash, (ImHash, u32)> = Default::default(); // Hashes used as child, (parent_hash, similarity)
|
||||
|
||||
// Sprawdź czy hash nie jest użyty jako master gdzie indziej
|
||||
// Jeśli tak to przejdź do sprawdzania kolejnego elementu
|
||||
// Zweryfikuj czy sprawdzany element ma rodzica
|
||||
// Jeśli ma to sprawdź czy similarity nowego rodzica jest mniejsze niż starego
|
||||
// // Jeśli tak to zmniejsz ilość dzieci starego rodzica, dodaj ilość dzieci w nowym rodzicu i podmień rekord hashes_similarity
|
||||
// // Jeśli nie to dodaj nowy rekord w hashes_similarity jak i hashes_parents z liczbą dzieci równą 1
|
||||
|
||||
for (index, hash_to_check) in hashes_to_check.iter().enumerate() {
|
||||
// Don't check for user stop too often
|
||||
// Also don't add too often data to atomic variable
|
||||
const CYCLES_COUNTER: usize = 0b11_1111;
|
||||
if ((index & CYCLES_COUNTER) == CYCLES_COUNTER) && index != 0 {
|
||||
atomic_counter.fetch_add(CYCLES_COUNTER, Ordering::Relaxed);
|
||||
if stop_receiver.is_some() && stop_receiver.unwrap().try_recv().is_ok() {
|
||||
check_was_stopped.store(true, Ordering::Relaxed);
|
||||
return None;
|
||||
}
|
||||
}
|
||||
hashes_parents.insert((*hash_to_check).clone(), 0);
|
||||
|
||||
let mut found_items = self
|
||||
.bktree
|
||||
.find(hash_to_check, tolerance)
|
||||
.filter(|(similarity, _hash)| if self.use_reference_folders { true } else { *similarity != 0 })
|
||||
.collect::<Vec<_>>();
|
||||
|
||||
found_items.sort_unstable_by_key(|f| f.0);
|
||||
|
||||
for (similarity, compared_hash) in found_items {
|
||||
image_to_check(
|
||||
&mut hashes_parents,
|
||||
&mut hashes_similarity,
|
||||
hashes_with_multiple_images,
|
||||
hash_to_check,
|
||||
compared_hash,
|
||||
similarity,
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
debug_check_for_duplicated_things(self.use_reference_folders, &hashes_parents, &hashes_similarity, all_hashed_images, "BEFORE");
|
||||
|
||||
Some((hashes_parents, hashes_similarity))
|
||||
}
|
||||
|
||||
fn chunk_hashes(&mut self, all_hashed_images: &HashMap<ImHash, Vec<FileEntry>>, all_hashes: &Vec<ImHash>) -> (Vec<Vec<ImHash>>, HashSet<ImHash>) {
|
||||
let mut hashes_with_multiple_images: HashSet<ImHash> = Default::default(); // Fast way to check if hash have multiple images
|
||||
// Split hashes at 2 parts, base hashes and hashes to compare, 3 argument is set of hashes with multiple images
|
||||
fn split_hashes(&mut self, all_hashed_images: &HashMap<ImHash, Vec<FileEntry>>) -> (Vec<ImHash>, HashSet<ImHash>) {
|
||||
let hashes_with_multiple_images: HashSet<ImHash> = all_hashed_images
|
||||
.iter()
|
||||
.filter_map(|(hash, vec_file_entry)| {
|
||||
if vec_file_entry.len() >= 2 {
|
||||
return Some(hash.clone());
|
||||
};
|
||||
None
|
||||
})
|
||||
.collect();
|
||||
let mut base_hashes = Vec::new(); // Initial hashes
|
||||
if self.use_reference_folders {
|
||||
let mut files_from_referenced_folders: HashMap<ImHash, Vec<FileEntry>> = HashMap::new();
|
||||
let mut normal_files: HashMap<ImHash, Vec<FileEntry>> = HashMap::new();
|
||||
|
||||
let number_of_processors = get_number_of_threads();
|
||||
let chunk_size;
|
||||
|
||||
let mut initial_hashes: Vec<ImHash> = Vec::new();
|
||||
let mut additional_chunk_to_check: Vec<ImHash> = Default::default();
|
||||
|
||||
let mut chunks: Vec<Vec<ImHash>>;
|
||||
if self.use_reference_folders {
|
||||
let reference_directories = self.directories.reference_directories.clone();
|
||||
all_hashed_images.clone().into_iter().for_each(|(hash, vec_file_entry)| {
|
||||
for file_entry in vec_file_entry {
|
||||
if reference_directories.iter().any(|e| file_entry.path.starts_with(e)) {
|
||||
if is_in_reference_folder(&self.directories.reference_directories, &file_entry.path) {
|
||||
files_from_referenced_folders.entry(hash.clone()).or_insert_with(Vec::new).push(file_entry);
|
||||
} else {
|
||||
normal_files.entry(hash.clone()).or_insert_with(Vec::new).push(file_entry);
|
||||
|
@ -646,44 +589,20 @@ impl SimilarImages {
|
|||
}
|
||||
});
|
||||
|
||||
for (hash, vec_files) in normal_files {
|
||||
if vec_files.len() >= 2 {
|
||||
hashes_with_multiple_images.insert(hash.clone());
|
||||
}
|
||||
for hash in normal_files.into_keys() {
|
||||
self.bktree.add(hash);
|
||||
}
|
||||
for (hash, vec_files) in files_from_referenced_folders {
|
||||
if vec_files.len() >= 2 {
|
||||
hashes_with_multiple_images.insert(hash.clone());
|
||||
}
|
||||
initial_hashes.push(hash);
|
||||
}
|
||||
chunk_size = initial_hashes.len() / number_of_processors;
|
||||
|
||||
chunks = if chunk_size > 0 {
|
||||
initial_hashes.chunks(chunk_size).map(<[std::vec::Vec<u8>]>::to_vec).collect::<Vec<_>>()
|
||||
} else {
|
||||
vec![initial_hashes]
|
||||
};
|
||||
} else {
|
||||
for (hash, vec_files) in all_hashed_images {
|
||||
if vec_files.len() >= 2 {
|
||||
additional_chunk_to_check.push(hash.clone());
|
||||
hashes_with_multiple_images.insert(hash.clone());
|
||||
} else {
|
||||
self.bktree.add(hash.clone());
|
||||
for hash in files_from_referenced_folders.into_keys() {
|
||||
base_hashes.push(hash);
|
||||
}
|
||||
}
|
||||
chunk_size = all_hashes.len() / number_of_processors;
|
||||
chunks = if chunk_size > 0 {
|
||||
all_hashes.chunks(chunk_size).map(<[Vec<u8>]>::to_vec).collect::<Vec<_>>()
|
||||
} else {
|
||||
vec![all_hashes.clone()]
|
||||
};
|
||||
chunks.push(additional_chunk_to_check);
|
||||
for original_hash in all_hashed_images.keys() {
|
||||
self.bktree.add(original_hash.clone());
|
||||
}
|
||||
|
||||
(chunks, hashes_with_multiple_images)
|
||||
base_hashes = all_hashed_images.keys().cloned().collect::<Vec<_>>();
|
||||
}
|
||||
(base_hashes, hashes_with_multiple_images)
|
||||
}
|
||||
|
||||
fn collect_hash_compare_result(
|
||||
|
@ -720,7 +639,7 @@ impl SimilarImages {
|
|||
.filter(|e| !is_in_reference_folder(&self.directories.reference_directories, &e.path))
|
||||
.cloned()
|
||||
.collect();
|
||||
for mut fe in &mut vec_fe {
|
||||
for fe in &mut vec_fe {
|
||||
fe.similarity = similarity;
|
||||
}
|
||||
collected_similar_images.get_mut(&parent_hash).unwrap().append(&mut vec_fe);
|
||||
|
@ -737,7 +656,7 @@ impl SimilarImages {
|
|||
|
||||
for (child_hash, (parent_hash, similarity)) in hashes_similarity {
|
||||
let mut vec_fe = all_hashed_images.get(&child_hash).unwrap().clone();
|
||||
for mut fe in &mut vec_fe {
|
||||
for fe in &mut vec_fe {
|
||||
fe.similarity = similarity;
|
||||
}
|
||||
collected_similar_images.get_mut(&parent_hash).unwrap().append(&mut vec_fe);
|
||||
|
@ -745,55 +664,135 @@ impl SimilarImages {
|
|||
}
|
||||
}
|
||||
|
||||
fn check_for_duplicate_hashes(
|
||||
&self,
|
||||
parts: Vec<(HashMap<ImHash, u32>, HashMap<ImHash, (ImHash, u32)>)>,
|
||||
hashes_with_multiple_images: &HashSet<ImHash>,
|
||||
fn compare_hashes_with_non_zero_tolerance(
|
||||
&mut self,
|
||||
all_hashed_images: &HashMap<ImHash, Vec<FileEntry>>,
|
||||
collected_similar_images: &mut HashMap<ImHash, Vec<FileEntry>>,
|
||||
) {
|
||||
let mut hashes_parents: HashMap<ImHash, u32> = Default::default();
|
||||
let mut hashes_similarity: HashMap<ImHash, (ImHash, u32)> = Default::default();
|
||||
let mut iter = parts.into_iter();
|
||||
// At start fill arrays with first item
|
||||
// Normal algorithm would do exactly same thing, but slower, one record after one
|
||||
if let Some((first_hashes_parents, first_hashes_similarity)) = iter.next() {
|
||||
hashes_parents = first_hashes_parents;
|
||||
hashes_similarity = first_hashes_similarity;
|
||||
progress_sender: Option<&UnboundedSender<ProgressData>>,
|
||||
stop_receiver: Option<&Receiver<()>>,
|
||||
tolerance: u32,
|
||||
) -> bool {
|
||||
// Don't use hashes with multiple images in bktree, because they will always be master of group and cannot be find by other hashes
|
||||
let (base_hashes, hashes_with_multiple_images) = self.split_hashes(all_hashed_images);
|
||||
|
||||
let (progress_thread_handle, progress_thread_run, atomic_counter, check_was_stopped) =
|
||||
prepare_thread_handler_common(progress_sender, 2, 2, base_hashes.len(), CheckingMethod::None, self.tool_type);
|
||||
|
||||
let mut hashes_parents: HashMap<ImHash, u32> = Default::default(); // Hashes used as parent (hash, children_number_of_hash)
|
||||
let mut hashes_similarity: HashMap<ImHash, (ImHash, u32)> = Default::default(); // Hashes used as child, (parent_hash, similarity)
|
||||
|
||||
// Check them in chunks, to decrease number of used memory
|
||||
let base_hashes_chunks = base_hashes.chunks(1000);
|
||||
for chunk in base_hashes_chunks {
|
||||
let partial_results = chunk
|
||||
.into_par_iter()
|
||||
.map(|hash_to_check| {
|
||||
atomic_counter.fetch_add(1, Ordering::Relaxed);
|
||||
|
||||
if stop_receiver.is_some() && stop_receiver.unwrap().try_recv().is_ok() {
|
||||
check_was_stopped.store(true, Ordering::Relaxed);
|
||||
return None;
|
||||
}
|
||||
let mut found_items = self
|
||||
.bktree
|
||||
.find(hash_to_check, tolerance)
|
||||
.filter(|(similarity, compared_hash)| {
|
||||
*similarity != 0 && !hashes_parents.contains_key(*compared_hash) && !hashes_with_multiple_images.contains(*compared_hash)
|
||||
})
|
||||
.filter(|(similarity, compared_hash)| {
|
||||
if let Some((_, other_similarity_with_parent)) = hashes_similarity.get(*compared_hash) {
|
||||
// If current hash is more similar to other hash than to current parent hash, then skip check earlier
|
||||
// Because there is no way to be more similar to other hash than to current parent hash
|
||||
if *similarity >= *other_similarity_with_parent {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
true
|
||||
})
|
||||
.collect::<Vec<_>>();
|
||||
|
||||
found_items.sort_unstable_by_key(|f| f.0);
|
||||
Some((hash_to_check, found_items))
|
||||
})
|
||||
.while_some()
|
||||
.filter(|(original_hash, vec_similar_hashes)| !vec_similar_hashes.is_empty() || hashes_with_multiple_images.contains(*original_hash))
|
||||
.collect::<Vec<_>>();
|
||||
|
||||
if check_was_stopped.load(Ordering::Relaxed) {
|
||||
send_info_and_wait_for_ending_all_threads(&progress_thread_run, progress_thread_handle);
|
||||
return false;
|
||||
}
|
||||
|
||||
for (partial_hashes_with_parents, partial_hashes_with_similarity) in iter {
|
||||
for (parent_hash, _child_number) in partial_hashes_with_parents {
|
||||
if !hashes_parents.contains_key(&parent_hash) && !hashes_similarity.contains_key(&parent_hash) {
|
||||
hashes_parents.insert(parent_hash, 0);
|
||||
}
|
||||
self.connect_results(partial_results, &mut hashes_parents, &mut hashes_similarity, &hashes_with_multiple_images);
|
||||
}
|
||||
|
||||
for (hash_to_check, (compared_hash, similarity)) in partial_hashes_with_similarity {
|
||||
image_to_check(
|
||||
&mut hashes_parents,
|
||||
&mut hashes_similarity,
|
||||
hashes_with_multiple_images,
|
||||
&hash_to_check,
|
||||
&compared_hash,
|
||||
similarity,
|
||||
);
|
||||
}
|
||||
}
|
||||
send_info_and_wait_for_ending_all_threads(&progress_thread_run, progress_thread_handle);
|
||||
|
||||
debug_check_for_duplicated_things(self.use_reference_folders, &hashes_parents, &hashes_similarity, all_hashed_images, "LATTER");
|
||||
self.collect_hash_compare_result(hashes_parents, &hashes_with_multiple_images, all_hashed_images, collected_similar_images, hashes_similarity);
|
||||
|
||||
// Just simple check if all original hashes with multiple entries are available in end results
|
||||
let original_hashes_at_start = hashes_with_multiple_images.len();
|
||||
let original_hashes_in_end_results = hashes_parents
|
||||
.iter()
|
||||
.filter(|(parent_hash, _child_number)| hashes_with_multiple_images.contains(*parent_hash))
|
||||
.count();
|
||||
if !self.use_reference_folders {
|
||||
assert_eq!(original_hashes_at_start, original_hashes_in_end_results);
|
||||
true
|
||||
}
|
||||
|
||||
self.collect_hash_compare_result(hashes_parents, hashes_with_multiple_images, all_hashed_images, collected_similar_images, hashes_similarity);
|
||||
fn connect_results(
|
||||
&self,
|
||||
partial_results: Vec<(&ImHash, Vec<(u32, &ImHash)>)>,
|
||||
hashes_parents: &mut HashMap<ImHash, u32>,
|
||||
hashes_similarity: &mut HashMap<ImHash, (ImHash, u32)>,
|
||||
hashes_with_multiple_images: &HashSet<ImHash>,
|
||||
) {
|
||||
for (original_hash, vec_compared_hashes) in partial_results {
|
||||
let mut number_of_added_child_items = 0;
|
||||
for (similarity, compared_hash) in vec_compared_hashes {
|
||||
// If hash is already in results skip it
|
||||
// This check duplicates check from bktree.find, but it is needed to because when iterating over elements, this structure can change
|
||||
if hashes_parents.contains_key(compared_hash) {
|
||||
continue;
|
||||
}
|
||||
|
||||
// If there is already record, with smaller sensitivity, then replace it
|
||||
let mut need_to_add = false;
|
||||
let mut need_to_check = false;
|
||||
|
||||
// TODO consider to replace variables from above with closures
|
||||
// If current checked hash, have parent, first we must check if similarity between them is lower than checked item
|
||||
if let Some((current_parent_hash, current_similarity_with_parent)) = hashes_similarity.get(original_hash) {
|
||||
if *current_similarity_with_parent > similarity {
|
||||
need_to_check = true;
|
||||
|
||||
*hashes_parents.get_mut(current_parent_hash).unwrap() -= 1;
|
||||
if hashes_parents.get(current_parent_hash) == Some(&0) && !hashes_with_multiple_images.contains(current_parent_hash) {
|
||||
hashes_parents.remove(current_parent_hash);
|
||||
}
|
||||
hashes_similarity.remove(original_hash).unwrap();
|
||||
}
|
||||
} else {
|
||||
need_to_check = true;
|
||||
}
|
||||
|
||||
if need_to_check {
|
||||
if let Some((other_parent_hash, other_similarity)) = hashes_similarity.get(compared_hash) {
|
||||
if *other_similarity > similarity {
|
||||
need_to_add = true;
|
||||
*hashes_parents.get_mut(other_parent_hash).unwrap() -= 1;
|
||||
}
|
||||
}
|
||||
// But when there is no record, just add it
|
||||
else {
|
||||
need_to_add = true;
|
||||
}
|
||||
}
|
||||
|
||||
if need_to_add {
|
||||
hashes_similarity.insert(compared_hash.clone(), (original_hash.clone(), similarity));
|
||||
number_of_added_child_items += 1;
|
||||
}
|
||||
}
|
||||
|
||||
if number_of_added_child_items > 0 || hashes_with_multiple_images.contains(original_hash) {
|
||||
hashes_parents.insert((*original_hash).clone(), number_of_added_child_items);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
fn find_similar_hashes(&mut self, stop_receiver: Option<&Receiver<()>>, progress_sender: Option<&UnboundedSender<ProgressData>>) -> bool {
|
||||
|
@ -808,55 +807,27 @@ impl SimilarImages {
|
|||
|
||||
let all_hashed_images = mem::take(&mut self.image_hashes);
|
||||
|
||||
let all_hashes: Vec<_> = all_hashed_images.clone().into_keys().collect();
|
||||
|
||||
// Checking entries with tolerance 0 is really easy and fast, because only entries with same hashes needs to be checked
|
||||
if tolerance == 0 {
|
||||
for (hash, vec_file_entry) in all_hashed_images.clone() {
|
||||
for (hash, vec_file_entry) in all_hashed_images {
|
||||
if vec_file_entry.len() >= 2 {
|
||||
collected_similar_images.insert(hash, vec_file_entry);
|
||||
}
|
||||
}
|
||||
} else {
|
||||
let (progress_thread_handle, progress_thread_run, atomic_counter, check_was_stopped) =
|
||||
prepare_thread_handler_common(progress_sender, 2, 2, all_hashes.len(), CheckingMethod::None, self.tool_type);
|
||||
|
||||
// Don't use hashes with multiple images in bktree, because they will always be master of group and cannot be find by other hashes
|
||||
|
||||
let (chunks, hashes_with_multiple_images) = self.chunk_hashes(&all_hashed_images, &all_hashes);
|
||||
|
||||
let parts: Vec<_> = chunks
|
||||
.into_par_iter()
|
||||
.map(|hashes_to_check| {
|
||||
self.compare_hashes(
|
||||
&hashes_to_check,
|
||||
&atomic_counter,
|
||||
stop_receiver,
|
||||
&check_was_stopped,
|
||||
tolerance,
|
||||
&hashes_with_multiple_images,
|
||||
&all_hashed_images,
|
||||
)
|
||||
})
|
||||
.while_some()
|
||||
.collect();
|
||||
|
||||
send_info_and_wait_for_ending_all_threads(&progress_thread_run, progress_thread_handle);
|
||||
|
||||
if check_was_stopped.load(Ordering::Relaxed) {
|
||||
if !self.compare_hashes_with_non_zero_tolerance(&all_hashed_images, &mut collected_similar_images, progress_sender, stop_receiver, tolerance) {
|
||||
return false;
|
||||
}
|
||||
|
||||
self.check_for_duplicate_hashes(parts, &hashes_with_multiple_images, &all_hashed_images, &mut collected_similar_images);
|
||||
}
|
||||
|
||||
self.verify_duplicated_items(&collected_similar_images);
|
||||
|
||||
// Info about hashes is not needed anymore, so we drop this info
|
||||
self.similar_vectors = collected_similar_images.into_values().collect();
|
||||
|
||||
self.exclude_items_with_same_size();
|
||||
|
||||
self.check_for_reference_folders();
|
||||
self.remove_multiple_records_from_reference_folders();
|
||||
|
||||
if self.use_reference_folders {
|
||||
for (_fe, vector) in &self.similar_referenced_vectors {
|
||||
|
@ -896,7 +867,7 @@ impl SimilarImages {
|
|||
}
|
||||
}
|
||||
|
||||
fn check_for_reference_folders(&mut self) {
|
||||
fn remove_multiple_records_from_reference_folders(&mut self) {
|
||||
if self.use_reference_folders {
|
||||
self.similar_referenced_vectors = mem::take(&mut self.similar_vectors)
|
||||
.into_iter()
|
||||
|
@ -917,12 +888,14 @@ impl SimilarImages {
|
|||
#[allow(dead_code)]
|
||||
#[allow(unreachable_code)]
|
||||
#[allow(unused_variables)]
|
||||
// TODO this probably not works good when reference folders are used
|
||||
pub fn verify_duplicated_items(&self, collected_similar_images: &HashMap<ImHash, Vec<FileEntry>>) {
|
||||
#[cfg(not(debug_assertions))]
|
||||
return;
|
||||
// Validating if group contains duplicated results
|
||||
let mut result_hashset: HashSet<String> = Default::default();
|
||||
let mut found = false;
|
||||
// dbg!(collected_similar_images.len());
|
||||
for vec_file_entry in collected_similar_images.values() {
|
||||
if vec_file_entry.is_empty() {
|
||||
println!("Empty group");
|
||||
|
@ -965,61 +938,6 @@ impl SimilarImages {
|
|||
}
|
||||
}
|
||||
|
||||
fn image_to_check<'a>(
|
||||
hashes_parents: &mut HashMap<ImHash, u32>,
|
||||
hashes_similarity: &mut HashMap<ImHash, (ImHash, u32)>,
|
||||
hashes_with_multiple_images: &HashSet<ImHash>,
|
||||
hash_to_check: &'a ImHash,
|
||||
compared_hash: &'a ImHash,
|
||||
similarity: u32,
|
||||
) {
|
||||
if let Some(children_number) = hashes_parents.get(compared_hash) {
|
||||
if *children_number > 0 || hashes_with_multiple_images.contains(compared_hash) {
|
||||
return;
|
||||
}
|
||||
}
|
||||
|
||||
// If there is already record, with smaller sensitivity, then replace it
|
||||
let mut need_to_add = false;
|
||||
let mut need_to_check = false;
|
||||
|
||||
// TODO consider to replace variables from above with closures
|
||||
// If current checked hash, have parent, first we must check if similarity between them is lower than checked item
|
||||
if let Some((current_parent_hash, current_similarity_with_parent)) = hashes_similarity.get(hash_to_check) {
|
||||
if *current_similarity_with_parent > similarity {
|
||||
need_to_check = true;
|
||||
|
||||
*hashes_parents.get_mut(current_parent_hash).unwrap() -= 1;
|
||||
hashes_similarity.remove(hash_to_check).unwrap();
|
||||
}
|
||||
} else {
|
||||
need_to_check = true;
|
||||
}
|
||||
|
||||
if need_to_check {
|
||||
if let Some((other_parent_hash, other_similarity)) = hashes_similarity.get(compared_hash) {
|
||||
if *other_similarity > similarity {
|
||||
need_to_add = true;
|
||||
*hashes_parents.get_mut(other_parent_hash).unwrap() -= 1;
|
||||
}
|
||||
}
|
||||
// But when there is no record, just add it
|
||||
else {
|
||||
need_to_add = true;
|
||||
}
|
||||
}
|
||||
|
||||
if need_to_add {
|
||||
hashes_similarity.insert(compared_hash.clone(), (hash_to_check.clone(), similarity));
|
||||
|
||||
if let Some(number_of_children) = hashes_parents.get_mut(hash_to_check) {
|
||||
*number_of_children += 1;
|
||||
} else {
|
||||
hashes_parents.insert(hash_to_check.clone(), 1);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
fn is_in_reference_folder(reference_directories: &[PathBuf], path: &Path) -> bool {
|
||||
reference_directories.iter().any(|e| path.starts_with(e))
|
||||
}
|
||||
|
@ -1216,7 +1134,7 @@ pub fn get_string_from_similarity(similarity: &u32, hash_size: u8) -> String {
|
|||
16 => 1,
|
||||
32 => 2,
|
||||
64 => 3,
|
||||
_ => panic!(),
|
||||
_ => panic!("Invalid hash size {hash_size}"),
|
||||
};
|
||||
|
||||
if *similarity == 0 {
|
||||
|
@ -1382,3 +1300,241 @@ fn debug_check_for_duplicated_things(
|
|||
|
||||
assert!(!found_broken_thing);
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use bk_tree::BKTree;
|
||||
use std::path::PathBuf;
|
||||
|
||||
use crate::common_directory::Directories;
|
||||
use crate::similar_images::{FileEntry, Hamming, SimilarImages};
|
||||
|
||||
#[test]
|
||||
fn test_compare_no_images() {
|
||||
let mut similar_images = SimilarImages::default();
|
||||
similar_images.find_similar_images(None, None);
|
||||
assert_eq!(similar_images.get_similar_images().len(), 0);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_compare_tolerance_0_normal_mode() {
|
||||
let mut similar_images = SimilarImages {
|
||||
similarity: 0,
|
||||
..Default::default()
|
||||
};
|
||||
|
||||
let fe1 = create_random_file_entry(vec![1, 1, 1, 1, 1, 1, 1, 1], "abc.txt");
|
||||
let fe2 = create_random_file_entry(vec![1, 1, 1, 1, 1, 1, 1, 1], "bcd.txt");
|
||||
let fe3 = create_random_file_entry(vec![1, 1, 1, 1, 1, 1, 1, 2], "cde.txt");
|
||||
let fe4 = create_random_file_entry(vec![1, 1, 1, 1, 1, 1, 1, 2], "rrt.txt");
|
||||
let fe5 = create_random_file_entry(vec![1, 1, 1, 1, 1, 1, 1, 2], "bld.txt");
|
||||
similar_images.image_hashes.insert(fe1.hash.clone(), vec![fe1.clone(), fe2.clone()]);
|
||||
similar_images.image_hashes.insert(fe3.hash.clone(), vec![fe3.clone(), fe4.clone(), fe5.clone()]);
|
||||
|
||||
similar_images.find_similar_hashes(None, None);
|
||||
assert_eq!(similar_images.get_similar_images().len(), 2);
|
||||
let first_group = similar_images.get_similar_images()[0].iter().map(|e| &e.path).collect::<Vec<_>>();
|
||||
let second_group = similar_images.get_similar_images()[1].iter().map(|e| &e.path).collect::<Vec<_>>();
|
||||
// Initial order is not guaranteed, so we need to check both options
|
||||
if similar_images.get_similar_images()[0][0].hash == fe1.hash {
|
||||
assert_eq!(first_group, vec![&fe1.path, &fe2.path]);
|
||||
assert_eq!(second_group, vec![&fe3.path, &fe4.path, &fe5.path]);
|
||||
} else {
|
||||
assert_eq!(first_group, vec![&fe3.path, &fe4.path, &fe5.path]);
|
||||
assert_eq!(second_group, vec![&fe1.path, &fe2.path]);
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_simple_normal_one_group() {
|
||||
let mut similar_images = SimilarImages {
|
||||
similarity: 1,
|
||||
..Default::default()
|
||||
};
|
||||
|
||||
let fe1 = create_random_file_entry(vec![1, 1, 1, 1, 1, 1, 1, 1], "abc.txt");
|
||||
let fe2 = create_random_file_entry(vec![1, 1, 1, 1, 1, 1, 1, 1], "bcd.txt");
|
||||
|
||||
similar_images.image_hashes.insert(fe1.hash.clone(), vec![fe1, fe2]);
|
||||
|
||||
similar_images.find_similar_hashes(None, None);
|
||||
assert_eq!(similar_images.get_similar_images().len(), 1);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_simple_normal_one_group_extended() {
|
||||
let mut similar_images = SimilarImages {
|
||||
similarity: 2,
|
||||
use_reference_folders: false,
|
||||
..Default::default()
|
||||
};
|
||||
|
||||
let fe1 = create_random_file_entry(vec![1, 1, 1, 1, 1, 1, 1, 1], "abc.txt");
|
||||
let fe2 = create_random_file_entry(vec![1, 1, 1, 1, 1, 1, 1, 1], "bcd.txt");
|
||||
let fe3 = create_random_file_entry(vec![1, 1, 1, 1, 1, 1, 1, 2], "rrd.txt");
|
||||
|
||||
similar_images.image_hashes.insert(fe1.hash.clone(), vec![fe1, fe2]);
|
||||
similar_images.image_hashes.insert(fe3.hash.clone(), vec![fe3]);
|
||||
|
||||
similar_images.find_similar_hashes(None, None);
|
||||
assert_eq!(similar_images.get_similar_images().len(), 1);
|
||||
assert_eq!(similar_images.get_similar_images()[0].len(), 3);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_simple_referenced_same_group() {
|
||||
let mut similar_images = SimilarImages {
|
||||
similarity: 0,
|
||||
use_reference_folders: true,
|
||||
directories: Directories {
|
||||
reference_directories: vec![PathBuf::from("/home/rr/")],
|
||||
..Default::default()
|
||||
},
|
||||
..Default::default()
|
||||
};
|
||||
|
||||
let fe1 = create_random_file_entry(vec![1, 1, 1, 1, 1, 1, 1, 1], "/home/rr/abc.txt");
|
||||
let fe2 = create_random_file_entry(vec![1, 1, 1, 1, 1, 1, 1, 1], "/home/rr/bcd.txt");
|
||||
|
||||
similar_images.image_hashes.insert(fe1.hash.clone(), vec![fe1, fe2]);
|
||||
|
||||
similar_images.find_similar_hashes(None, None);
|
||||
assert_eq!(similar_images.get_similar_images().len(), 0);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_simple_referenced_group_extended() {
|
||||
let mut similar_images = SimilarImages {
|
||||
similarity: 0,
|
||||
use_reference_folders: true,
|
||||
directories: Directories {
|
||||
reference_directories: vec![PathBuf::from("/home/rr/")],
|
||||
..Default::default()
|
||||
},
|
||||
..Default::default()
|
||||
};
|
||||
|
||||
let fe1 = create_random_file_entry(vec![1, 1, 1, 1, 1, 1, 1, 1], "/home/rr/abc.txt");
|
||||
let fe2 = create_random_file_entry(vec![1, 1, 1, 1, 1, 1, 1, 1], "/home/kk/bcd.txt");
|
||||
|
||||
similar_images.image_hashes.insert(fe1.hash.clone(), vec![fe1, fe2]);
|
||||
|
||||
similar_images.find_similar_hashes(None, None);
|
||||
assert_eq!(similar_images.get_similar_images_referenced().len(), 1);
|
||||
assert_eq!(similar_images.get_similar_images_referenced()[0].1.len(), 1);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_simple_referenced_group_extended2() {
|
||||
let mut similar_images = SimilarImages {
|
||||
similarity: 0,
|
||||
use_reference_folders: true,
|
||||
directories: Directories {
|
||||
reference_directories: vec![PathBuf::from("/home/rr/")],
|
||||
..Default::default()
|
||||
},
|
||||
..Default::default()
|
||||
};
|
||||
|
||||
let fe1 = create_random_file_entry(vec![1, 1, 1, 1, 1, 1, 1, 1], "/home/rr/abc.txt");
|
||||
let fe2 = create_random_file_entry(vec![1, 1, 1, 1, 1, 1, 1, 1], "/home/rr/abc2.txt");
|
||||
let fe3 = create_random_file_entry(vec![1, 1, 1, 1, 1, 1, 1, 1], "/home/kk/bcd.txt");
|
||||
let fe4 = create_random_file_entry(vec![1, 1, 1, 1, 1, 1, 1, 1], "/home/kk/bcd2.txt");
|
||||
|
||||
similar_images.image_hashes.insert(fe1.hash.clone(), vec![fe1, fe2, fe3, fe4]);
|
||||
|
||||
similar_images.find_similar_hashes(None, None);
|
||||
let res = similar_images.get_similar_images_referenced();
|
||||
assert_eq!(res.len(), 1);
|
||||
assert_eq!(res[0].1.len(), 2);
|
||||
assert!(res[0].1.iter().all(|e| e.path.starts_with("/home/kk/")));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_simple_normal_too_small_similarity() {
|
||||
for _ in 0..50 {
|
||||
let mut similar_images = SimilarImages {
|
||||
similarity: 1,
|
||||
use_reference_folders: false,
|
||||
..Default::default()
|
||||
};
|
||||
|
||||
let fe1 = create_random_file_entry(vec![1, 1, 1, 1, 1, 1, 1, 0b00001], "abc.txt");
|
||||
let fe2 = create_random_file_entry(vec![1, 1, 1, 1, 1, 1, 1, 0b00100], "bcd.txt");
|
||||
let fe3 = create_random_file_entry(vec![1, 1, 1, 1, 1, 1, 1, 0b10000], "rrd.txt");
|
||||
|
||||
similar_images.image_hashes.insert(fe1.hash.clone(), vec![fe1]);
|
||||
similar_images.image_hashes.insert(fe2.hash.clone(), vec![fe2]);
|
||||
similar_images.image_hashes.insert(fe3.hash.clone(), vec![fe3]);
|
||||
|
||||
similar_images.find_similar_hashes(None, None);
|
||||
let res = similar_images.get_similar_images();
|
||||
// dbg!(&res);
|
||||
assert!(res.is_empty());
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_simple_normal_union_of_similarity() {
|
||||
for _ in 0..100 {
|
||||
let mut similar_images = SimilarImages {
|
||||
similarity: 4,
|
||||
use_reference_folders: false,
|
||||
..Default::default()
|
||||
};
|
||||
|
||||
let fe1 = create_random_file_entry(vec![1, 1, 1, 1, 1, 1, 1, 0b0000_0001], "abc.txt");
|
||||
let fe2 = create_random_file_entry(vec![1, 1, 1, 1, 1, 1, 1, 0b0000_1111], "bcd.txt");
|
||||
let fe3 = create_random_file_entry(vec![1, 1, 1, 1, 1, 1, 1, 0b0111_1111], "rrd.txt");
|
||||
|
||||
similar_images.image_hashes.insert(fe1.hash.clone(), vec![fe1]);
|
||||
similar_images.image_hashes.insert(fe2.hash.clone(), vec![fe2]);
|
||||
similar_images.image_hashes.insert(fe3.hash.clone(), vec![fe3]);
|
||||
|
||||
similar_images.find_similar_hashes(None, None);
|
||||
let res = similar_images.get_similar_images();
|
||||
assert_eq!(res.len(), 1);
|
||||
let mut path = res[0].iter().map(|e| e.path.to_string_lossy().to_string()).collect::<Vec<_>>();
|
||||
path.sort();
|
||||
if res[0].len() == 3 {
|
||||
assert_eq!(path, vec!["abc.txt".to_string(), "bcd.txt".to_string(), "rrd.txt".to_string()]);
|
||||
} else if res[0].len() == 2 {
|
||||
assert!(path == vec!["abc.txt".to_string(), "bcd.txt".to_string()] || path == vec!["bcd.txt".to_string(), "rrd.txt".to_string()]);
|
||||
} else {
|
||||
panic!("Invalid number of items");
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_tolerance() {
|
||||
// This test not really tests anything, but shows that current hamming distance works
|
||||
// in bits instead of bytes
|
||||
// I tried to make it work in bytes, but it was terrible, so Hamming should be really Ok
|
||||
|
||||
let fe1 = vec![1, 1, 1, 1, 1, 1, 1, 1];
|
||||
let fe2 = vec![1, 1, 1, 1, 1, 1, 1, 2];
|
||||
let mut bktree = BKTree::new(Hamming);
|
||||
bktree.add(fe1);
|
||||
let (similarity, _hash) = bktree.find(&fe2, 100).next().unwrap();
|
||||
assert_eq!(similarity, 2);
|
||||
|
||||
let fe1 = vec![1, 1, 1, 1, 1, 1, 1, 1];
|
||||
let fe2 = vec![1, 1, 1, 1, 1, 1, 1, 3];
|
||||
let mut bktree = BKTree::new(Hamming);
|
||||
bktree.add(fe1);
|
||||
let (similarity, _hash) = bktree.find(&fe2, 100).next().unwrap();
|
||||
assert_eq!(similarity, 1);
|
||||
}
|
||||
|
||||
fn create_random_file_entry(hash: Vec<u8>, name: &str) -> FileEntry {
|
||||
FileEntry {
|
||||
path: PathBuf::from(name.to_string()),
|
||||
size: 0,
|
||||
dimensions: String::new(),
|
||||
modified_date: 0,
|
||||
hash,
|
||||
similarity: 0,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
@ -10,11 +10,11 @@ homepage = "https://github.com/qarmin/czkawka"
|
|||
repository = "https://github.com/qarmin/czkawka"
|
||||
|
||||
[dependencies]
|
||||
gdk4 = "0.6.3"
|
||||
glib = "0.17.9"
|
||||
gdk4 = "0.6"
|
||||
glib = "0.17"
|
||||
|
||||
humansize = "2.1"
|
||||
chrono = "0.4.24"
|
||||
chrono = "0.4.26"
|
||||
|
||||
# Used for sending stop signal across threads
|
||||
crossbeam-channel = "0.5.8"
|
||||
|
@ -35,19 +35,19 @@ image = "0.24"
|
|||
regex = "1.8"
|
||||
|
||||
# To get image_hasher types
|
||||
image_hasher = "1.1"
|
||||
image_hasher = "1.2"
|
||||
|
||||
# Move files to trash
|
||||
trash = "3.0"
|
||||
|
||||
# For moving files(why std::fs doesn't have such features)
|
||||
# For moving files(why std::fs doesn't have such features?)
|
||||
fs_extra = "1.3"
|
||||
|
||||
# Language
|
||||
i18n-embed = { version = "0.13", features = ["fluent-system", "desktop-requester"] }
|
||||
i18n-embed-fl = "0.6"
|
||||
rust-embed = "6.6"
|
||||
once_cell = "1.17"
|
||||
rust-embed = "6.7"
|
||||
once_cell = "1.18"
|
||||
|
||||
[target.'cfg(windows)'.dependencies]
|
||||
winapi = { version = "0.3.9", features = ["combaseapi", "objbase", "shobjidl_core", "windef", "winerror", "wtypesbase", "winuser"] }
|
||||
|
|
|
@ -775,7 +775,18 @@ fn computer_similar_images(
|
|||
|
||||
// Header
|
||||
let (directory, file) = split_path(&base_file_entry.path);
|
||||
similar_images_add_to_list_store(&list_store, &file, &directory, 0, 0, "", 0, 0, true, true);
|
||||
similar_images_add_to_list_store(
|
||||
&list_store,
|
||||
&file,
|
||||
&directory,
|
||||
base_file_entry.size,
|
||||
base_file_entry.modified_date,
|
||||
&base_file_entry.dimensions,
|
||||
0,
|
||||
hash_size,
|
||||
true,
|
||||
true,
|
||||
);
|
||||
for file_entry in &vec_file_entry {
|
||||
let (directory, file) = split_path(&file_entry.path);
|
||||
similar_images_add_to_list_store(
|
||||
|
@ -1396,15 +1407,20 @@ fn similar_images_add_to_list_store(
|
|||
let string_date;
|
||||
let similarity_string;
|
||||
let color = if is_header { HEADER_ROW_COLOR } else { MAIN_ROW_COLOR };
|
||||
|
||||
if is_header {
|
||||
similarity_string = String::new();
|
||||
} else {
|
||||
similarity_string = similar_images::get_string_from_similarity(&similarity, hash_size);
|
||||
};
|
||||
|
||||
if is_header && !is_reference_folder {
|
||||
size_str = String::new();
|
||||
string_date = String::new();
|
||||
similarity_string = String::new();
|
||||
} else {
|
||||
size_str = format_size(size, BINARY);
|
||||
string_date = NaiveDateTime::from_timestamp_opt(modified_date as i64, 0).unwrap().to_string();
|
||||
similarity_string = similar_images::get_string_from_similarity(&similarity, hash_size);
|
||||
};
|
||||
}
|
||||
|
||||
let values: [(u32, &dyn ToValue); COLUMNS_NUMBER] = [
|
||||
(ColumnsSimilarImages::ActivatableSelectButton as u32, &(!is_header)),
|
||||
|
|
|
@ -311,7 +311,6 @@ fn popover_custom_select_unselect(
|
|||
{
|
||||
let check_button_path = check_button_path.clone();
|
||||
let check_button_name = check_button_name.clone();
|
||||
let check_button_rust_regex = check_button_rust_regex.clone();
|
||||
let entry_path = entry_path.clone();
|
||||
let entry_name = entry_name.clone();
|
||||
let entry_rust_regex = entry_rust_regex.clone();
|
||||
|
|
Loading…
Reference in a new issue