Optimize a little image compare algorithm (#528)
* Split checking images at 2 functions * Optimize a little image finding * 1.54.0 farawell
This commit is contained in:
parent
d8700f6e78
commit
7da578fa7f
|
@ -1,4 +1,4 @@
|
||||||
use std::collections::{BTreeMap, BTreeSet};
|
use std::collections::{BTreeMap, BTreeSet, HashMap, HashSet};
|
||||||
use std::fs::OpenOptions;
|
use std::fs::OpenOptions;
|
||||||
use std::fs::{File, Metadata};
|
use std::fs::{File, Metadata};
|
||||||
use std::io::Write;
|
use std::io::Write;
|
||||||
|
@ -47,7 +47,6 @@ const LOOP_DURATION: u32 = 200; //ms
|
||||||
|
|
||||||
#[derive(Clone, Eq, PartialEq, Ord, PartialOrd, Debug, Serialize, Deserialize)]
|
#[derive(Clone, Eq, PartialEq, Ord, PartialOrd, Debug, Serialize, Deserialize)]
|
||||||
pub enum Similarity {
|
pub enum Similarity {
|
||||||
None,
|
|
||||||
Similar(u32),
|
Similar(u32),
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -239,7 +238,11 @@ impl SimilarImages {
|
||||||
self.stopped_search = true;
|
self.stopped_search = true;
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
if !self.sort_images(stop_receiver, progress_sender) {
|
if !self.hash_images(stop_receiver, progress_sender) {
|
||||||
|
self.stopped_search = true;
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
if !self.find_similar_hashes(stop_receiver, progress_sender) {
|
||||||
self.stopped_search = true;
|
self.stopped_search = true;
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
@ -412,7 +415,7 @@ impl SimilarImages {
|
||||||
},
|
},
|
||||||
|
|
||||||
hash: Vec::new(),
|
hash: Vec::new(),
|
||||||
similarity: Similarity::None,
|
similarity: Similarity::Similar(0),
|
||||||
};
|
};
|
||||||
|
|
||||||
fe_result.push((current_file_name.to_string_lossy().to_string(), fe));
|
fe_result.push((current_file_name.to_string_lossy().to_string(), fe));
|
||||||
|
@ -450,7 +453,7 @@ impl SimilarImages {
|
||||||
// - Join already read hashes with hashes which were read from file
|
// - Join already read hashes with hashes which were read from file
|
||||||
// - Join all hashes and save it to file
|
// - Join all hashes and save it to file
|
||||||
|
|
||||||
fn sort_images(&mut self, stop_receiver: Option<&Receiver<()>>, progress_sender: Option<&futures::channel::mpsc::UnboundedSender<ProgressData>>) -> bool {
|
fn hash_images(&mut self, stop_receiver: Option<&Receiver<()>>, progress_sender: Option<&futures::channel::mpsc::UnboundedSender<ProgressData>>) -> bool {
|
||||||
let hash_map_modification = SystemTime::now();
|
let hash_map_modification = SystemTime::now();
|
||||||
|
|
||||||
let loaded_hash_map;
|
let loaded_hash_map;
|
||||||
|
@ -579,7 +582,6 @@ impl SimilarImages {
|
||||||
for (file_entry, buf) in &vec_file_entry {
|
for (file_entry, buf) in &vec_file_entry {
|
||||||
// Only use to comparing, non broken hashes(all 0 or 255 hashes means that algorithm fails to decode them because e.g. contains a log of alpha channel)
|
// Only use to comparing, non broken hashes(all 0 or 255 hashes means that algorithm fails to decode them because e.g. contains a log of alpha channel)
|
||||||
if !(buf.iter().all(|e| *e == 0) || buf.iter().all(|e| *e == 255)) {
|
if !(buf.iter().all(|e| *e == 0) || buf.iter().all(|e| *e == 255)) {
|
||||||
self.bktree.add(buf.clone());
|
|
||||||
self.image_hashes.entry(buf.clone()).or_insert_with(Vec::<FileEntry>::new);
|
self.image_hashes.entry(buf.clone()).or_insert_with(Vec::<FileEntry>::new);
|
||||||
self.image_hashes.get_mut(buf).unwrap().push(file_entry.clone());
|
self.image_hashes.get_mut(buf).unwrap().push(file_entry.clone());
|
||||||
}
|
}
|
||||||
|
@ -595,18 +597,32 @@ impl SimilarImages {
|
||||||
}
|
}
|
||||||
|
|
||||||
Common::print_time(hash_map_modification, SystemTime::now(), "sort_images - saving data to files".to_string());
|
Common::print_time(hash_map_modification, SystemTime::now(), "sort_images - saving data to files".to_string());
|
||||||
|
true
|
||||||
|
}
|
||||||
|
|
||||||
|
fn find_similar_hashes(&mut self, stop_receiver: Option<&Receiver<()>>, progress_sender: Option<&futures::channel::mpsc::UnboundedSender<ProgressData>>) -> bool {
|
||||||
let hash_map_modification = SystemTime::now();
|
let hash_map_modification = SystemTime::now();
|
||||||
|
let Similarity::Similar(similarity) = self.similarity;
|
||||||
|
|
||||||
let similarity: u32 = match self.similarity {
|
// Results
|
||||||
Similarity::Similar(k) => k,
|
|
||||||
_ => panic!(),
|
|
||||||
};
|
|
||||||
|
|
||||||
let mut collected_similar_images: BTreeMap<Vec<u8>, Vec<FileEntry>> = Default::default();
|
let mut collected_similar_images: BTreeMap<Vec<u8>, Vec<FileEntry>> = Default::default();
|
||||||
|
|
||||||
let mut available_hashes = self.image_hashes.clone();
|
let mut temp_hashes = Default::default();
|
||||||
|
mem::swap(&mut temp_hashes, &mut self.image_hashes);
|
||||||
|
|
||||||
let mut this_time_check_hashes;
|
let mut this_time_check_hashes;
|
||||||
let mut master_of_group: BTreeSet<Vec<u8>> = Default::default(); // Lista wszystkich głównych hashy, które odpowiadają za porównywanie
|
let mut master_of_group: HashSet<Vec<u8>> = Default::default(); // Lista wszystkich głównych hashy, które odpowiadają za porównywanie
|
||||||
|
|
||||||
|
let mut available_hashes: HashMap<Vec<u8>, Vec<FileEntry>> = Default::default();
|
||||||
|
for (hash, vec_file_entry) in temp_hashes {
|
||||||
|
// There exists 2 or more hashes with same hash
|
||||||
|
if vec_file_entry.len() >= 2 {
|
||||||
|
collected_similar_images.insert(hash, vec_file_entry);
|
||||||
|
} else {
|
||||||
|
self.bktree.add(hash.clone());
|
||||||
|
available_hashes.insert(hash, vec_file_entry);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
//// PROGRESS THREAD START
|
//// PROGRESS THREAD START
|
||||||
let progress_thread_run = Arc::new(AtomicBool::new(true));
|
let progress_thread_run = Arc::new(AtomicBool::new(true));
|
||||||
|
@ -636,37 +652,10 @@ impl SimilarImages {
|
||||||
thread::spawn(|| {})
|
thread::spawn(|| {})
|
||||||
};
|
};
|
||||||
//// PROGRESS THREAD END
|
//// PROGRESS THREAD END
|
||||||
|
if similarity >= 1 {
|
||||||
|
for current_similarity in 1..=similarity {
|
||||||
|
this_time_check_hashes = available_hashes.clone();
|
||||||
|
|
||||||
for current_similarity in 0..=similarity {
|
|
||||||
this_time_check_hashes = available_hashes.clone();
|
|
||||||
|
|
||||||
if stop_receiver.is_some() && stop_receiver.unwrap().try_recv().is_ok() {
|
|
||||||
// End thread which send info to gui
|
|
||||||
progress_thread_run.store(false, Ordering::Relaxed);
|
|
||||||
progress_thread_handle.join().unwrap();
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
|
|
||||||
for (hash, vec_file_entry) in &this_time_check_hashes {
|
|
||||||
atomic_mode_counter.fetch_add(1, Ordering::Relaxed);
|
|
||||||
|
|
||||||
let vector_with_found_similar_hashes = self
|
|
||||||
.bktree
|
|
||||||
.find(hash, similarity)
|
|
||||||
.filter(|r| (r.0 == current_similarity) && !master_of_group.contains(r.1) && available_hashes.contains_key(r.1))
|
|
||||||
.collect::<Vec<_>>();
|
|
||||||
|
|
||||||
// Not found any hash with specific distance
|
|
||||||
if vector_with_found_similar_hashes.is_empty() {
|
|
||||||
continue;
|
|
||||||
}
|
|
||||||
|
|
||||||
// This one picture doesn't have similar pictures except self in similarity 0
|
|
||||||
if current_similarity == 0 && vector_with_found_similar_hashes.len() == 1 {
|
|
||||||
continue;
|
|
||||||
}
|
|
||||||
|
|
||||||
// This shouldn't be executed too much times, so it should be quite fast to check this
|
|
||||||
if stop_receiver.is_some() && stop_receiver.unwrap().try_recv().is_ok() {
|
if stop_receiver.is_some() && stop_receiver.unwrap().try_recv().is_ok() {
|
||||||
// End thread which send info to gui
|
// End thread which send info to gui
|
||||||
progress_thread_run.store(false, Ordering::Relaxed);
|
progress_thread_run.store(false, Ordering::Relaxed);
|
||||||
|
@ -674,43 +663,51 @@ impl SimilarImages {
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
// Jeśli jeszcze nie dodał, to dodaje teraz grupę główną do już obrobionych
|
for (hash, vec_file_entry) in this_time_check_hashes.into_iter() {
|
||||||
if !master_of_group.contains(hash) {
|
atomic_mode_counter.fetch_add(1, Ordering::Relaxed);
|
||||||
master_of_group.insert(hash.clone());
|
|
||||||
collected_similar_images.insert(hash.clone(), Vec::new());
|
|
||||||
|
|
||||||
let mut things: Vec<FileEntry> = vec_file_entry
|
// Finds hashes with specific distance to
|
||||||
.iter()
|
let vector_with_found_similar_hashes = self
|
||||||
.map(|fe| FileEntry {
|
.bktree
|
||||||
path: fe.path.clone(),
|
.find(&hash, similarity)
|
||||||
size: fe.size,
|
.filter(|(similarity, hash)| (*similarity == current_similarity) && !master_of_group.contains(*hash) && available_hashes.contains_key(*hash))
|
||||||
dimensions: fe.dimensions.clone(),
|
.collect::<Vec<_>>();
|
||||||
modified_date: fe.modified_date,
|
|
||||||
hash: fe.hash.clone(),
|
|
||||||
similarity: Similarity::Similar(0),
|
|
||||||
})
|
|
||||||
.collect();
|
|
||||||
collected_similar_images.get_mut(hash).unwrap().append(&mut things);
|
|
||||||
}
|
|
||||||
|
|
||||||
// Since we checked hash, we don't need to check it again
|
// Not found any hash with specific distance
|
||||||
if current_similarity != 0 {
|
if vector_with_found_similar_hashes.is_empty() {
|
||||||
vector_with_found_similar_hashes.iter().for_each(|e| {
|
continue;
|
||||||
let mut things: Vec<FileEntry> = available_hashes
|
}
|
||||||
.get_mut(e.1)
|
|
||||||
.unwrap()
|
// Current checked hash isn't in any group of similarity, so we create one, because found similar images
|
||||||
.iter()
|
if !master_of_group.contains(&hash) {
|
||||||
.map(|fe| FileEntry {
|
master_of_group.insert(hash.clone());
|
||||||
path: fe.path.clone(),
|
collected_similar_images.insert(hash.clone(), Vec::new());
|
||||||
size: fe.size,
|
|
||||||
dimensions: fe.dimensions.clone(),
|
let mut things: Vec<FileEntry> = vec_file_entry
|
||||||
modified_date: fe.modified_date,
|
.into_iter()
|
||||||
hash: Vec::new(),
|
.map(|mut fe| {
|
||||||
similarity: Similarity::Similar(current_similarity),
|
fe.similarity = Similarity::Similar(0);
|
||||||
|
fe
|
||||||
})
|
})
|
||||||
.collect::<Vec<_>>();
|
.collect();
|
||||||
collected_similar_images.get_mut(hash).unwrap().append(&mut things);
|
collected_similar_images.get_mut(&hash).unwrap().append(&mut things);
|
||||||
available_hashes.remove(e.1);
|
|
||||||
|
// This shouldn't be executed too much times, so it should be quite fast to check this
|
||||||
|
if stop_receiver.is_some() && stop_receiver.unwrap().try_recv().is_ok() {
|
||||||
|
// End thread which send info to gui
|
||||||
|
progress_thread_run.store(false, Ordering::Relaxed);
|
||||||
|
progress_thread_handle.join().unwrap();
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
vector_with_found_similar_hashes.iter().for_each(|(_similarity, other_hash)| {
|
||||||
|
let mut vec_fe = available_hashes.remove(*other_hash).unwrap();
|
||||||
|
for fe in &mut vec_fe {
|
||||||
|
fe.similarity = Similarity::Similar(current_similarity)
|
||||||
|
}
|
||||||
|
|
||||||
|
collected_similar_images.get_mut(&hash).unwrap().append(&mut vec_fe);
|
||||||
});
|
});
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -719,7 +716,8 @@ impl SimilarImages {
|
||||||
progress_thread_run.store(false, Ordering::Relaxed);
|
progress_thread_run.store(false, Ordering::Relaxed);
|
||||||
progress_thread_handle.join().unwrap();
|
progress_thread_handle.join().unwrap();
|
||||||
|
|
||||||
self.similar_vectors = collected_similar_images.values().cloned().collect();
|
// self.similar_vectors = collected_similar_images.into_values().collect(); // TODO use this in Rust 1.54.0
|
||||||
|
self.similar_vectors = collected_similar_images.values().cloned().collect(); // 1.53.0 version
|
||||||
|
|
||||||
if self.exclude_images_with_same_size {
|
if self.exclude_images_with_same_size {
|
||||||
let mut new_vector = Default::default();
|
let mut new_vector = Default::default();
|
||||||
|
@ -1028,9 +1026,9 @@ pub fn get_string_from_similarity(similarity: &Similarity, hash_size: u8) -> Str
|
||||||
};
|
};
|
||||||
|
|
||||||
match similarity {
|
match similarity {
|
||||||
Similarity::None => {
|
// Similarity::None => {
|
||||||
panic!()
|
// panic!()
|
||||||
}
|
// }
|
||||||
Similarity::Similar(h) => {
|
Similarity::Similar(h) => {
|
||||||
// #[cfg(debug_assertions)]
|
// #[cfg(debug_assertions)]
|
||||||
// {
|
// {
|
||||||
|
|
Loading…
Reference in a new issue