1
0
Fork 0
mirror of synced 2024-06-27 02:21:05 +12:00

Ending words

This commit is contained in:
Rafał Mikrut 2022-07-02 20:56:00 +02:00
parent 905c257451
commit 55808927d9
2 changed files with 73 additions and 44 deletions

8
Cargo.lock generated
View file

@ -1981,18 +1981,18 @@ dependencies = [
[[package]] [[package]]
name = "pin-project" name = "pin-project"
version = "1.0.10" version = "1.0.11"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "58ad3879ad3baf4e44784bc6a718a8698867bb991f8ce24d1bcbe2cfb4c3a75e" checksum = "78203e83c48cffbe01e4a2d35d566ca4de445d79a85372fc64e378bfc812a260"
dependencies = [ dependencies = [
"pin-project-internal", "pin-project-internal",
] ]
[[package]] [[package]]
name = "pin-project-internal" name = "pin-project-internal"
version = "1.0.10" version = "1.0.11"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "744b6f092ba29c3650faf274db506afd39944f48420f6c86b17cfe0ee1cb36bb" checksum = "710faf75e1b33345361201d36d04e98ac1ed8909151a017ed384700836104c74"
dependencies = [ dependencies = [
"proc-macro2", "proc-macro2",
"quote", "quote",

View file

@ -741,10 +741,10 @@ impl SimilarImages {
// // Jeśli tak to zmniejsz ilość dzieci starego rodzica, dodaj ilość dzieci w nowym rodzicu i podmień rekord hashes_similarity // // Jeśli tak to zmniejsz ilość dzieci starego rodzica, dodaj ilość dzieci w nowym rodzicu i podmień rekord hashes_similarity
// // Jeśli nie to dodaj nowy rekord w hashes_similarity jak i hashes_parents z liczbą dzieci równą 1 // // Jeśli nie to dodaj nowy rekord w hashes_similarity jak i hashes_parents z liczbą dzieci równą 1
for (index, hash_to_check) in hashes_to_check.into_iter().enumerate() { for (index, hash_to_check) in hashes_to_check.iter().enumerate() {
// Don't check for user stop too often // Don't check for user stop too often
// Also don't add too ofter data to variables // Also don't add too ofter data to variables
const CYCLES_COUNTER: usize = 100; const CYCLES_COUNTER: usize = 50;
if index % CYCLES_COUNTER == 0 && index != 0 { if index % CYCLES_COUNTER == 0 && index != 0 {
atomic_mode_counter.fetch_add(CYCLES_COUNTER, Ordering::Relaxed); atomic_mode_counter.fetch_add(CYCLES_COUNTER, Ordering::Relaxed);
if stop_receiver.is_some() && stop_receiver.unwrap().try_recv().is_ok() { if stop_receiver.is_some() && stop_receiver.unwrap().try_recv().is_ok() {
@ -753,11 +753,6 @@ impl SimilarImages {
} }
} }
// Hash is already used as child
if hashes_similarity.contains_key(hash_to_check) {
continue;
}
let mut found_items = self let mut found_items = self
.bktree .bktree
.find(hash_to_check, tolerance) .find(hash_to_check, tolerance)
@ -767,6 +762,7 @@ impl SimilarImages {
found_items.sort_unstable_by_key(|f| f.0); found_items.sort_unstable_by_key(|f| f.0);
for (similarity, other_hash) in found_items { for (similarity, other_hash) in found_items {
// SSSTART
// Cannot use hash if already is used as master record(have more than 0 children) // Cannot use hash if already is used as master record(have more than 0 children)
if let Some(children_number) = hashes_parents.get(other_hash) { if let Some(children_number) = hashes_parents.get(other_hash) {
if *children_number > 0 { if *children_number > 0 {
@ -776,15 +772,32 @@ impl SimilarImages {
// If there is already record, with smaller sensitivity, then replace it // If there is already record, with smaller sensitivity, then replace it
let mut need_to_add = false; let mut need_to_add = false;
if let Some((parent_hash, other_similarity)) = hashes_similarity.get(other_hash) { let mut need_to_check = false;
if similarity < *other_similarity {
need_to_add = true; // TODO replace variables from above with closures
*hashes_parents.get_mut(parent_hash).unwrap() -= 1; // If current checked hash, have parent, first we must check if similarity between them is lower than checked item
if let Some((current_parent_hash, current_similarity_with_parent)) = hashes_similarity.get(hash_to_check) {
if *current_similarity_with_parent > similarity {
need_to_check = true;
*hashes_parents.get_mut(current_parent_hash).unwrap() -= 1;
hashes_similarity.remove(hash_to_check).unwrap();
} }
} else {
need_to_check = true;
} }
// But when there is no record, just add it
else { if need_to_check {
need_to_add = true if let Some((other_parent_hash, other_similarity)) = hashes_similarity.get(other_hash) {
if *other_similarity > similarity {
need_to_add = true;
*hashes_parents.get_mut(other_parent_hash).unwrap() -= 1;
}
}
// But when there is no record, just add it
else {
need_to_add = true
}
} }
if need_to_add { if need_to_add {
@ -796,6 +809,7 @@ impl SimilarImages {
hashes_parents.insert(hash_to_check, 1); hashes_parents.insert(hash_to_check, 1);
} }
} }
// ENND
} }
} }
@ -816,25 +830,21 @@ impl SimilarImages {
} }
{ {
let mut new_hashes_parents: HashMap<&Vec<u8>, u32> = Default::default(); let mut hashes_parents: HashMap<&Vec<u8>, u32> = Default::default();
let mut new_hashes_similarity: HashMap<&Vec<u8>, (&Vec<u8>, u32)> = Default::default(); let mut hashes_similarity: HashMap<&Vec<u8>, (&Vec<u8>, u32)> = Default::default();
let mut iter = parts.into_iter(); let mut iter = parts.into_iter();
// At start fill arrays with first item // At start fill arrays with first item
// Normal algorithm would do exactly same thing, but slower, one record after one // Normal algorithm would do exactly same thing, but slower, one record after one
if let Some((hashes_parents, hashes_similarity)) = iter.next() { if let Some((first_hashes_parents, first_hashes_similarity)) = iter.next() {
new_hashes_parents = hashes_parents; hashes_parents = first_hashes_parents;
new_hashes_similarity = hashes_similarity; hashes_similarity = first_hashes_similarity;
} }
for (_hashes_with_parents, hashes_with_similarity) in iter { for (_partial_hashes_with_parents, partial_hashes_with_similarity) in iter {
for (hash_to_check, (other_hash, similarity)) in hashes_with_similarity { for (hash_to_check, (other_hash, similarity)) in partial_hashes_with_similarity {
// Hash is already used as child // SSSTART
if new_hashes_similarity.contains_key(hash_to_check) {
continue;
}
// Cannot use hash if already is used as master record(have more than 0 children) // Cannot use hash if already is used as master record(have more than 0 children)
if let Some(children_number) = new_hashes_parents.get(other_hash) { if let Some(children_number) = hashes_parents.get(other_hash) {
if *children_number > 0 { if *children_number > 0 {
continue; continue;
} }
@ -842,42 +852,60 @@ impl SimilarImages {
// If there is already record, with smaller sensitivity, then replace it // If there is already record, with smaller sensitivity, then replace it
let mut need_to_add = false; let mut need_to_add = false;
if let Some((parent_hash, other_similarity)) = new_hashes_similarity.get(other_hash) { let mut need_to_check = false;
if similarity < *other_similarity {
need_to_add = true; // TODO replace variables from above with closures
*new_hashes_parents.get_mut(parent_hash).unwrap() -= 1; // If current checked hash, have parent, first we must check if similarity between them is lower than checked item
if let Some((current_parent_hash, current_similarity_with_parent)) = hashes_similarity.get(hash_to_check) {
if *current_similarity_with_parent > similarity {
need_to_check = true;
*hashes_parents.get_mut(current_parent_hash).unwrap() -= 1;
hashes_similarity.remove(hash_to_check).unwrap();
} }
} else {
need_to_check = true;
} }
// But when there is no record, just add it
else { if need_to_check {
need_to_add = true if let Some((other_parent_hash, other_similarity)) = hashes_similarity.get(other_hash) {
if *other_similarity > similarity {
need_to_add = true;
*hashes_parents.get_mut(other_parent_hash).unwrap() -= 1;
}
}
// But when there is no record, just add it
else {
need_to_add = true
}
} }
if need_to_add { if need_to_add {
new_hashes_similarity.insert(other_hash, (hash_to_check, similarity)); hashes_similarity.insert(other_hash, (hash_to_check, similarity));
if let Some(number_of_children) = new_hashes_parents.get_mut(hash_to_check) { if let Some(number_of_children) = hashes_parents.get_mut(hash_to_check) {
*number_of_children += 1; *number_of_children += 1;
} else { } else {
new_hashes_parents.insert(hash_to_check, 1); hashes_parents.insert(hash_to_check, 1);
} }
} }
// ENND
} }
} }
#[cfg(debug_assertions)] #[cfg(debug_assertions)]
debug_check_for_duplicated_things(new_hashes_parents.clone(), new_hashes_similarity.clone(), all_hashed_images.clone(), "LATTER"); debug_check_for_duplicated_things(hashes_parents.clone(), hashes_similarity.clone(), all_hashed_images.clone(), "LATTER");
// Collecting results // Collecting results
for (parent_hash, child_number) in new_hashes_parents { for (parent_hash, child_number) in hashes_parents {
if child_number > 0 { if child_number > 0 {
let vec_fe = all_hashed_images.get(parent_hash).unwrap().clone(); let vec_fe = all_hashed_images.get(parent_hash).unwrap().clone();
collected_similar_images.insert(parent_hash.clone(), vec_fe); collected_similar_images.insert(parent_hash.clone(), vec_fe);
} }
} }
for (child_hash, (parent_hash, similarity)) in new_hashes_similarity { for (child_hash, (parent_hash, similarity)) in hashes_similarity {
let mut vec_fe = all_hashed_images.get(child_hash).unwrap().clone(); let mut vec_fe = all_hashed_images.get(child_hash).unwrap().clone();
for mut fe in &mut vec_fe { for mut fe in &mut vec_fe {
fe.similarity = similarity; fe.similarity = similarity;
@ -1318,6 +1346,7 @@ pub fn test_image_conversion_speed() {
} }
} }
#[allow(dead_code)]
fn debug_check_for_duplicated_things( fn debug_check_for_duplicated_things(
hashes_parents: HashMap<&Vec<u8>, u32>, hashes_parents: HashMap<&Vec<u8>, u32>,
hashes_similarity: HashMap<&Vec<u8>, (&Vec<u8>, u32)>, hashes_similarity: HashMap<&Vec<u8>, (&Vec<u8>, u32)>,