Fix similar images algorithm (#799)
* Fixed missing images with similarity equal to 0 * Checking * Unify algorithm between two functions * Finally fix problem with missing images * Lock and debug * No comment
This commit is contained in:
parent
24b64a32c6
commit
d29935d208
81
Cargo.lock
generated
81
Cargo.lock
generated
|
@ -352,9 +352,9 @@ dependencies = [
|
|||
|
||||
[[package]]
|
||||
name = "clap"
|
||||
version = "3.2.17"
|
||||
version = "3.2.18"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "29e724a68d9319343bb3328c9cc2dfde263f4b3142ee1059a9980580171c954b"
|
||||
checksum = "b15f2ea93df33549dbe2e8eecd1ca55269d63ae0b3ba1f55db030817d1c2867f"
|
||||
dependencies = [
|
||||
"atty",
|
||||
"bitflags",
|
||||
|
@ -369,9 +369,9 @@ dependencies = [
|
|||
|
||||
[[package]]
|
||||
name = "clap_derive"
|
||||
version = "3.2.17"
|
||||
version = "3.2.18"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "13547f7012c01ab4a0e8f8967730ada8f9fdf419e8b6c792788f39cf4e46eefa"
|
||||
checksum = "ea0c8bce528c4be4da13ea6fead8965e95b6073585a2f05204bd8f4119f82a65"
|
||||
dependencies = [
|
||||
"heck",
|
||||
"proc-macro-error",
|
||||
|
@ -409,9 +409,9 @@ checksum = "5827cebf4670468b8772dd191856768aedcb1b0278a04f989f7766351917b9dc"
|
|||
|
||||
[[package]]
|
||||
name = "cpufeatures"
|
||||
version = "0.2.3"
|
||||
version = "0.2.4"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "1079fb8528d9f9c888b1e8aa651e6e079ade467323d58f75faf1d30b1808f540"
|
||||
checksum = "dc948ebb96241bb40ab73effeb80d9f93afaad49359d159a5e61be51619fe813"
|
||||
dependencies = [
|
||||
"libc",
|
||||
]
|
||||
|
@ -660,16 +660,15 @@ dependencies = [
|
|||
|
||||
[[package]]
|
||||
name = "exr"
|
||||
version = "1.4.2"
|
||||
version = "1.5.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "14cc0e06fb5f67e5d6beadf3a382fec9baca1aa751c6d5368fdeee7e5932c215"
|
||||
checksum = "78c26a90d9dd411a3d119d6f55752fb4c134ca243250c32fb9cab7b2561638d2"
|
||||
dependencies = [
|
||||
"bit_field",
|
||||
"deflate 1.0.0",
|
||||
"flume",
|
||||
"half",
|
||||
"inflate",
|
||||
"lebe",
|
||||
"miniz_oxide",
|
||||
"smallvec",
|
||||
"threadpool",
|
||||
]
|
||||
|
@ -827,9 +826,9 @@ checksum = "2022715d62ab30faffd124d40b76f4134a550a87792276512b18d63272333394"
|
|||
|
||||
[[package]]
|
||||
name = "futures"
|
||||
version = "0.3.23"
|
||||
version = "0.3.24"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "ab30e97ab6aacfe635fad58f22c2bb06c8b685f7421eb1e064a729e2a5f481fa"
|
||||
checksum = "7f21eda599937fba36daeb58a22e8f5cee2d14c4a17b5b7739c7c8e5e3b8230c"
|
||||
dependencies = [
|
||||
"futures-channel",
|
||||
"futures-core",
|
||||
|
@ -842,9 +841,9 @@ dependencies = [
|
|||
|
||||
[[package]]
|
||||
name = "futures-channel"
|
||||
version = "0.3.23"
|
||||
version = "0.3.24"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "2bfc52cbddcfd745bf1740338492bb0bd83d76c67b445f91c5fb29fae29ecaa1"
|
||||
checksum = "30bdd20c28fadd505d0fd6712cdfcb0d4b5648baf45faef7f852afb2399bb050"
|
||||
dependencies = [
|
||||
"futures-core",
|
||||
"futures-sink",
|
||||
|
@ -852,15 +851,15 @@ dependencies = [
|
|||
|
||||
[[package]]
|
||||
name = "futures-core"
|
||||
version = "0.3.23"
|
||||
version = "0.3.24"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "d2acedae88d38235936c3922476b10fced7b2b68136f5e3c03c2d5be348a1115"
|
||||
checksum = "4e5aa3de05362c3fb88de6531e6296e85cde7739cccad4b9dfeeb7f6ebce56bf"
|
||||
|
||||
[[package]]
|
||||
name = "futures-executor"
|
||||
version = "0.3.23"
|
||||
version = "0.3.24"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "1d11aa21b5b587a64682c0094c2bdd4df0076c5324961a40cc3abd7f37930528"
|
||||
checksum = "9ff63c23854bee61b6e9cd331d523909f238fc7636290b96826e9cfa5faa00ab"
|
||||
dependencies = [
|
||||
"futures-core",
|
||||
"futures-task",
|
||||
|
@ -869,15 +868,15 @@ dependencies = [
|
|||
|
||||
[[package]]
|
||||
name = "futures-io"
|
||||
version = "0.3.23"
|
||||
version = "0.3.24"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "93a66fc6d035a26a3ae255a6d2bca35eda63ae4c5512bef54449113f7a1228e5"
|
||||
checksum = "bbf4d2a7a308fd4578637c0b17c7e1c7ba127b8f6ba00b29f717e9655d85eb68"
|
||||
|
||||
[[package]]
|
||||
name = "futures-macro"
|
||||
version = "0.3.23"
|
||||
version = "0.3.24"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "0db9cce532b0eae2ccf2766ab246f114b56b9cf6d445e00c2549fbc100ca045d"
|
||||
checksum = "42cd15d1c7456c04dbdf7e88bcd69760d74f3a798d6444e16974b505b0e62f17"
|
||||
dependencies = [
|
||||
"proc-macro2",
|
||||
"quote",
|
||||
|
@ -886,21 +885,21 @@ dependencies = [
|
|||
|
||||
[[package]]
|
||||
name = "futures-sink"
|
||||
version = "0.3.23"
|
||||
version = "0.3.24"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "ca0bae1fe9752cf7fd9b0064c674ae63f97b37bc714d745cbde0afb7ec4e6765"
|
||||
checksum = "21b20ba5a92e727ba30e72834706623d94ac93a725410b6a6b6fbc1b07f7ba56"
|
||||
|
||||
[[package]]
|
||||
name = "futures-task"
|
||||
version = "0.3.23"
|
||||
version = "0.3.24"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "842fc63b931f4056a24d59de13fb1272134ce261816e063e634ad0c15cdc5306"
|
||||
checksum = "a6508c467c73851293f390476d4491cf4d227dbabcd4170f3bb6044959b294f1"
|
||||
|
||||
[[package]]
|
||||
name = "futures-util"
|
||||
version = "0.3.23"
|
||||
version = "0.3.24"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "f0828a5471e340229c11c77ca80017937ce3c58cb788a17e5f1c2d5c485a9577"
|
||||
checksum = "44fb6cb1be61cc1d2e43b262516aafcf63b241cffdb1d3fa115f91d9c7b09c90"
|
||||
dependencies = [
|
||||
"futures-channel",
|
||||
"futures-core",
|
||||
|
@ -1557,9 +1556,9 @@ dependencies = [
|
|||
|
||||
[[package]]
|
||||
name = "lock_api"
|
||||
version = "0.4.7"
|
||||
version = "0.4.8"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "327fa5b6a6940e4699ec49a9beae1ea4845c6bab9314e4f84ac68742139d8c53"
|
||||
checksum = "9f80bf5aacaf25cbfc8210d1cfb718f2bf3b11c4c54e5afe36c236853a8ec390"
|
||||
dependencies = [
|
||||
"autocfg",
|
||||
"scopeguard",
|
||||
|
@ -1940,9 +1939,9 @@ checksum = "d4fd5641d01c8f18a23da7b6fe29298ff4b55afcccdf78973b24cf3175fee32e"
|
|||
|
||||
[[package]]
|
||||
name = "pest"
|
||||
version = "2.2.1"
|
||||
version = "2.3.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "69486e2b8c2d2aeb9762db7b4e00b0331156393555cff467f4163ff06821eef8"
|
||||
checksum = "4b0560d531d1febc25a3c9398a62a71256c0178f2e3443baedd9ad4bb8c9deb4"
|
||||
dependencies = [
|
||||
"thiserror",
|
||||
"ucd-trie",
|
||||
|
@ -2352,18 +2351,18 @@ dependencies = [
|
|||
|
||||
[[package]]
|
||||
name = "serde"
|
||||
version = "1.0.143"
|
||||
version = "1.0.144"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "53e8e5d5b70924f74ff5c6d64d9a5acd91422117c60f48c4e07855238a254553"
|
||||
checksum = "0f747710de3dcd43b88c9168773254e809d8ddbdf9653b84e2554ab219f17860"
|
||||
dependencies = [
|
||||
"serde_derive",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "serde_derive"
|
||||
version = "1.0.143"
|
||||
version = "1.0.144"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "d3d8e8de557aee63c26b85b947f5e59b690d0454c753f3adeb5cd7835ab88391"
|
||||
checksum = "94ed3a816fb1d101812f83e789f888322c34e291f894f19590dc310963e87a00"
|
||||
dependencies = [
|
||||
"proc-macro2",
|
||||
"quote",
|
||||
|
@ -2372,9 +2371,9 @@ dependencies = [
|
|||
|
||||
[[package]]
|
||||
name = "serde_json"
|
||||
version = "1.0.83"
|
||||
version = "1.0.85"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "38dd04e3c8279e75b31ef29dbdceebfe5ad89f4d0937213c53f7d49d01b3d5a7"
|
||||
checksum = "e55a28e3aaef9d5ce0506d0a14dbba8054ddc7e499ef522dd8b26859ec9d4a44"
|
||||
dependencies = [
|
||||
"itoa",
|
||||
"ryu",
|
||||
|
@ -2779,9 +2778,9 @@ dependencies = [
|
|||
|
||||
[[package]]
|
||||
name = "time"
|
||||
version = "0.3.13"
|
||||
version = "0.3.14"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "db76ff9fa4b1458b3c7f077f3ff9887394058460d21e634355b273aaf11eea45"
|
||||
checksum = "3c3f9a28b618c3a6b9251b6908e9c99e04b9e5c02e6581ccbb67d59c34ef7f9b"
|
||||
dependencies = [
|
||||
"itoa",
|
||||
"libc",
|
||||
|
@ -3205,5 +3204,5 @@ dependencies = [
|
|||
"hmac",
|
||||
"pbkdf2",
|
||||
"sha1",
|
||||
"time 0.3.13",
|
||||
"time 0.3.14",
|
||||
]
|
||||
|
|
|
@ -729,19 +729,28 @@ impl SimilarImages {
|
|||
};
|
||||
//// PROGRESS THREAD END
|
||||
|
||||
for hash in &all_hashes {
|
||||
self.bktree.add(hash.to_vec());
|
||||
// Don't use hashes with multiple images in bktree, because they will always be master of group and cannot be find by other hashes
|
||||
let mut additional_chunk_to_check: Vec<_> = Default::default();
|
||||
let mut hashes_with_multiple_images: HashSet<_> = Default::default(); // Fast way to check if hash have multiple imaages
|
||||
for (hash, vec_files) in &all_hashed_images {
|
||||
if vec_files.len() >= 2 {
|
||||
additional_chunk_to_check.push(hash);
|
||||
hashes_with_multiple_images.insert(hash);
|
||||
} else {
|
||||
self.bktree.add(hash.to_vec());
|
||||
}
|
||||
}
|
||||
|
||||
let number_of_processors = num_cpus::get();
|
||||
let chunk_size = all_hashes.len() / number_of_processors;
|
||||
let chunks: Vec<_> = if chunk_size > 0 { all_hashes.chunks(chunk_size).collect() } else { vec![&all_hashes] };
|
||||
let mut chunks: Vec<_> = if chunk_size > 0 { all_hashes.chunks(chunk_size).collect() } else { vec![&all_hashes] };
|
||||
chunks.push(&additional_chunk_to_check);
|
||||
|
||||
let parts: Vec<_> = chunks
|
||||
.into_par_iter()
|
||||
.map(|hashes_to_check| {
|
||||
let mut hashes_parents: HashMap<&Vec<u8>, u32> = Default::default(); // Hash used as parent, childrens
|
||||
let mut hashes_similarity: HashMap<&Vec<u8>, (&Vec<u8>, u32)> = Default::default(); // Hash used as child, (parent_hash,similarity)
|
||||
let mut hashes_parents: HashMap<&Vec<u8>, u32> = Default::default(); // Hashes used as parent (hash, children_number_of_hash)
|
||||
let mut hashes_similarity: HashMap<&Vec<u8>, (&Vec<u8>, u32)> = Default::default(); // Hashes used as child, (parent_hash, similarity)
|
||||
|
||||
// Sprawdź czy hash nie jest użyty jako master gdzie indziej
|
||||
// Jeśli tak to przejdź do sprawdzania kolejnego elementu
|
||||
|
@ -752,15 +761,16 @@ impl SimilarImages {
|
|||
|
||||
for (index, hash_to_check) in hashes_to_check.iter().enumerate() {
|
||||
// Don't check for user stop too often
|
||||
// Also don't add too ofter data to variables
|
||||
const CYCLES_COUNTER: usize = 50;
|
||||
if index % CYCLES_COUNTER == 0 && index != 0 {
|
||||
// Also don't add too often data to atomic variable
|
||||
const CYCLES_COUNTER: usize = 0b111111;
|
||||
if ((index & CYCLES_COUNTER) == CYCLES_COUNTER) && index != 0 {
|
||||
atomic_mode_counter.fetch_add(CYCLES_COUNTER, Ordering::Relaxed);
|
||||
if stop_receiver.is_some() && stop_receiver.unwrap().try_recv().is_ok() {
|
||||
check_was_stopped.store(true, Ordering::Relaxed);
|
||||
return None;
|
||||
}
|
||||
}
|
||||
hashes_parents.insert(hash_to_check, 0);
|
||||
|
||||
let mut found_items = self
|
||||
.bktree
|
||||
|
@ -770,55 +780,15 @@ impl SimilarImages {
|
|||
|
||||
found_items.sort_unstable_by_key(|f| f.0);
|
||||
|
||||
for (similarity, other_hash) in found_items {
|
||||
// SSSTART
|
||||
// Cannot use hash if already is used as master record(have more than 0 children)
|
||||
if let Some(children_number) = hashes_parents.get(other_hash) {
|
||||
if *children_number > 0 {
|
||||
continue;
|
||||
}
|
||||
}
|
||||
|
||||
// If there is already record, with smaller sensitivity, then replace it
|
||||
let mut need_to_add = false;
|
||||
let mut need_to_check = false;
|
||||
|
||||
// TODO replace variables from above with closures
|
||||
// If current checked hash, have parent, first we must check if similarity between them is lower than checked item
|
||||
if let Some((current_parent_hash, current_similarity_with_parent)) = hashes_similarity.get(hash_to_check) {
|
||||
if *current_similarity_with_parent > similarity {
|
||||
need_to_check = true;
|
||||
|
||||
*hashes_parents.get_mut(current_parent_hash).unwrap() -= 1;
|
||||
hashes_similarity.remove(hash_to_check).unwrap();
|
||||
}
|
||||
} else {
|
||||
need_to_check = true;
|
||||
}
|
||||
|
||||
if need_to_check {
|
||||
if let Some((other_parent_hash, other_similarity)) = hashes_similarity.get(other_hash) {
|
||||
if *other_similarity > similarity {
|
||||
need_to_add = true;
|
||||
*hashes_parents.get_mut(other_parent_hash).unwrap() -= 1;
|
||||
}
|
||||
}
|
||||
// But when there is no record, just add it
|
||||
else {
|
||||
need_to_add = true
|
||||
}
|
||||
}
|
||||
|
||||
if need_to_add {
|
||||
hashes_similarity.insert(other_hash, (hash_to_check, similarity));
|
||||
|
||||
if let Some(number_of_children) = hashes_parents.get_mut(hash_to_check) {
|
||||
*number_of_children += 1;
|
||||
} else {
|
||||
hashes_parents.insert(hash_to_check, 1);
|
||||
}
|
||||
}
|
||||
// ENND
|
||||
for (similarity, compared_hash) in found_items {
|
||||
image_to_check(
|
||||
&mut hashes_parents,
|
||||
&mut hashes_similarity,
|
||||
&hashes_with_multiple_images,
|
||||
hash_to_check,
|
||||
compared_hash,
|
||||
similarity,
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -849,66 +819,39 @@ impl SimilarImages {
|
|||
hashes_similarity = first_hashes_similarity;
|
||||
}
|
||||
|
||||
for (_partial_hashes_with_parents, partial_hashes_with_similarity) in iter {
|
||||
for (hash_to_check, (other_hash, similarity)) in partial_hashes_with_similarity {
|
||||
// SSSTART
|
||||
// Cannot use hash if already is used as master record(have more than 0 children)
|
||||
if let Some(children_number) = hashes_parents.get(other_hash) {
|
||||
if *children_number > 0 {
|
||||
continue;
|
||||
}
|
||||
for (partial_hashes_with_parents, partial_hashes_with_similarity) in iter {
|
||||
for (parent_hash, _child_number) in partial_hashes_with_parents {
|
||||
if !hashes_parents.contains_key(parent_hash) && !hashes_similarity.contains_key(parent_hash) {
|
||||
hashes_parents.insert(parent_hash, 0);
|
||||
}
|
||||
}
|
||||
|
||||
// If there is already record, with smaller sensitivity, then replace it
|
||||
let mut need_to_add = false;
|
||||
let mut need_to_check = false;
|
||||
|
||||
// TODO replace variables from above with closures
|
||||
// If current checked hash, have parent, first we must check if similarity between them is lower than checked item
|
||||
if let Some((current_parent_hash, current_similarity_with_parent)) = hashes_similarity.get(hash_to_check) {
|
||||
if *current_similarity_with_parent > similarity {
|
||||
need_to_check = true;
|
||||
|
||||
*hashes_parents.get_mut(current_parent_hash).unwrap() -= 1;
|
||||
hashes_similarity.remove(hash_to_check).unwrap();
|
||||
}
|
||||
} else {
|
||||
need_to_check = true;
|
||||
}
|
||||
|
||||
if need_to_check {
|
||||
if let Some((other_parent_hash, other_similarity)) = hashes_similarity.get(other_hash) {
|
||||
if *other_similarity > similarity {
|
||||
need_to_add = true;
|
||||
*hashes_parents.get_mut(other_parent_hash).unwrap() -= 1;
|
||||
}
|
||||
}
|
||||
// But when there is no record, just add it
|
||||
else {
|
||||
need_to_add = true
|
||||
}
|
||||
}
|
||||
|
||||
if need_to_add {
|
||||
hashes_similarity.insert(other_hash, (hash_to_check, similarity));
|
||||
|
||||
if let Some(number_of_children) = hashes_parents.get_mut(hash_to_check) {
|
||||
*number_of_children += 1;
|
||||
} else {
|
||||
hashes_parents.insert(hash_to_check, 1);
|
||||
}
|
||||
}
|
||||
// ENND
|
||||
for (hash_to_check, (compared_hash, similarity)) in partial_hashes_with_similarity {
|
||||
image_to_check(
|
||||
&mut hashes_parents,
|
||||
&mut hashes_similarity,
|
||||
&hashes_with_multiple_images,
|
||||
hash_to_check,
|
||||
compared_hash,
|
||||
similarity,
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(debug_assertions)]
|
||||
debug_check_for_duplicated_things(hashes_parents.clone(), hashes_similarity.clone(), all_hashed_images.clone(), "LATTER");
|
||||
|
||||
// Collecting results
|
||||
|
||||
// Just simple check if all original hashes with multiple entries are available in end results
|
||||
let original_hashes_at_start = hashes_with_multiple_images.len();
|
||||
let original_hashes_in_end_results = hashes_parents
|
||||
.iter()
|
||||
.filter(|(parent_hash, _child_number)| hashes_with_multiple_images.contains(*parent_hash))
|
||||
.count();
|
||||
assert_eq!(original_hashes_at_start, original_hashes_in_end_results);
|
||||
// Collecting results to vector
|
||||
for (parent_hash, child_number) in hashes_parents {
|
||||
if child_number > 0 {
|
||||
// If hash contains other hasher OR multiple images are available for checked hash
|
||||
if child_number > 0 || hashes_with_multiple_images.contains(parent_hash) {
|
||||
let vec_fe = all_hashed_images.get(parent_hash).unwrap().clone();
|
||||
collected_similar_images.insert(parent_hash.clone(), vec_fe);
|
||||
}
|
||||
|
@ -1040,6 +983,61 @@ impl SimilarImages {
|
|||
}
|
||||
}
|
||||
|
||||
fn image_to_check<'a>(
|
||||
hashes_parents: &mut HashMap<&'a Vec<u8>, u32>,
|
||||
hashes_similarity: &mut HashMap<&'a Vec<u8>, (&'a Vec<u8>, u32)>,
|
||||
hashes_with_multiple_images: &HashSet<&'a Vec<u8>>,
|
||||
hash_to_check: &'a Vec<u8>,
|
||||
compared_hash: &'a Vec<u8>,
|
||||
similarity: u32,
|
||||
) {
|
||||
if let Some(children_number) = hashes_parents.get(compared_hash) {
|
||||
if *children_number > 0 || hashes_with_multiple_images.contains(compared_hash) {
|
||||
return;
|
||||
}
|
||||
}
|
||||
|
||||
// If there is already record, with smaller sensitivity, then replace it
|
||||
let mut need_to_add = false;
|
||||
let mut need_to_check = false;
|
||||
|
||||
// TODO consider to replace variables from above with closures
|
||||
// If current checked hash, have parent, first we must check if similarity between them is lower than checked item
|
||||
if let Some((current_parent_hash, current_similarity_with_parent)) = hashes_similarity.get(hash_to_check) {
|
||||
if *current_similarity_with_parent > similarity {
|
||||
need_to_check = true;
|
||||
|
||||
*hashes_parents.get_mut(current_parent_hash).unwrap() -= 1;
|
||||
hashes_similarity.remove(hash_to_check).unwrap();
|
||||
}
|
||||
} else {
|
||||
need_to_check = true;
|
||||
}
|
||||
|
||||
if need_to_check {
|
||||
if let Some((other_parent_hash, other_similarity)) = hashes_similarity.get(compared_hash) {
|
||||
if *other_similarity > similarity {
|
||||
need_to_add = true;
|
||||
*hashes_parents.get_mut(other_parent_hash).unwrap() -= 1;
|
||||
}
|
||||
}
|
||||
// But when there is no record, just add it
|
||||
else {
|
||||
need_to_add = true
|
||||
}
|
||||
}
|
||||
|
||||
if need_to_add {
|
||||
hashes_similarity.insert(compared_hash, (hash_to_check, similarity));
|
||||
|
||||
if let Some(number_of_children) = hashes_parents.get_mut(hash_to_check) {
|
||||
*number_of_children += 1;
|
||||
} else {
|
||||
hashes_parents.insert(hash_to_check, 1);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl Default for SimilarImages {
|
||||
fn default() -> Self {
|
||||
Self::new()
|
||||
|
@ -1368,12 +1366,14 @@ fn debug_check_for_duplicated_things(
|
|||
all_hashed_images: HashMap<Vec<u8>, Vec<FileEntry>>,
|
||||
numm: &str,
|
||||
) {
|
||||
let mut found_broken_thing = false;
|
||||
let mut hashmap_hashes: HashSet<_> = Default::default();
|
||||
let mut hashmap_names: HashSet<_> = Default::default();
|
||||
for (hash, number_of_children) in &hashes_parents {
|
||||
if *number_of_children > 0 {
|
||||
if hashmap_hashes.contains(*hash) {
|
||||
println!("------1--HASH--{} {:?}", numm, all_hashed_images.get(*hash).unwrap());
|
||||
found_broken_thing = true;
|
||||
}
|
||||
hashmap_hashes.insert(hash.to_vec());
|
||||
|
||||
|
@ -1381,6 +1381,7 @@ fn debug_check_for_duplicated_things(
|
|||
let name = i.path.to_string_lossy().to_string();
|
||||
if hashmap_names.contains(&name) {
|
||||
println!("------1--NAME--{} {:?}", numm, name);
|
||||
found_broken_thing = true;
|
||||
}
|
||||
hashmap_names.insert(name);
|
||||
}
|
||||
|
@ -1389,6 +1390,7 @@ fn debug_check_for_duplicated_things(
|
|||
for hash in hashes_similarity.keys() {
|
||||
if hashmap_hashes.contains(*hash) {
|
||||
println!("------2--HASH--{} {:?}", numm, all_hashed_images.get(*hash).unwrap());
|
||||
found_broken_thing = true;
|
||||
}
|
||||
hashmap_hashes.insert(hash.to_vec());
|
||||
|
||||
|
@ -1396,8 +1398,13 @@ fn debug_check_for_duplicated_things(
|
|||
let name = i.path.to_string_lossy().to_string();
|
||||
if hashmap_names.contains(&name) {
|
||||
println!("------2--NAME--{} {:?}", numm, name);
|
||||
found_broken_thing = true;
|
||||
}
|
||||
hashmap_names.insert(name);
|
||||
}
|
||||
}
|
||||
|
||||
if found_broken_thing {
|
||||
panic!();
|
||||
}
|
||||
}
|
||||
|
|
Loading…
Reference in a new issue