1
0
Fork 0
mirror of synced 2024-04-28 09:33:30 +12:00

Add multithread support to similar image finder (#98)

This commit is contained in:
Rafał Mikrut 2020-11-08 07:41:29 +01:00 committed by GitHub
parent 110d6015bc
commit 2f72dd9d19
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
6 changed files with 191 additions and 45 deletions

1
Cargo.lock generated
View file

@ -422,6 +422,7 @@ dependencies = [
"humansize",
"image",
"img_hash",
"rayon",
]
[[package]]

View file

@ -12,7 +12,7 @@ repository = "https://github.com/qarmin/czkawka"
[dependencies]
humansize = "1"
blake3 = "0.3"
#rayon = "1"
rayon = "1"
crossbeam-channel = "0.4.4"

View file

@ -8,6 +8,7 @@ use crossbeam_channel::Receiver;
use humansize::{file_size_opts as options, FileSize};
use image::GenericImageView;
use img_hash::HasherConfig;
use rayon::prelude::*;
use std::collections::HashMap;
use std::fs;
use std::fs::{File, Metadata};
@ -58,6 +59,7 @@ pub struct SimilarImages {
image_hashes: HashMap<Node, Vec<FileEntry>>, // Hashmap with image hashes and Vector with names of files
stopped_search: bool,
similarity: Similarity,
images_to_check: Vec<FileEntry>,
}
/// Info struck with helpful information's about results
@ -95,6 +97,7 @@ impl SimilarImages {
image_hashes: Default::default(),
stopped_search: false,
similarity: Similarity::High,
images_to_check: vec![],
}
}
@ -231,16 +234,10 @@ impl SimilarImages {
continue 'dir;
}
let image = match image::open(&current_file_name) {
Ok(t) => t,
Err(_) => continue 'dir, // Something is wrong with image
};
let dimensions = image.dimensions();
// Creating new file entry
let fe: FileEntry = FileEntry {
path: current_file_name.clone(),
size: metadata.len(),
dimensions: format!("{}x{}", dimensions.0, dimensions.1),
dimensions: "".to_string(),
modified_date: match metadata.modified() {
Ok(t) => match t.duration_since(UNIX_EPOCH) {
Ok(d) => d.as_secs(),
@ -257,15 +254,8 @@ impl SimilarImages {
similarity: Similarity::None,
};
let hasher = HasherConfig::with_bytes_type::<[u8; 8]>().to_hasher();
let hash = hasher.hash_image(&image);
let mut buf = [0u8; 8];
buf.copy_from_slice(&hash.as_bytes());
self.bktree.add(buf);
self.image_hashes.entry(buf).or_insert_with(Vec::<FileEntry>::new);
self.image_hashes.get_mut(&buf).unwrap().push(fe);
self.images_to_check.push(fe);
self.information.size_of_checked_images += metadata.len();
self.information.number_of_checked_files += 1;
@ -286,6 +276,42 @@ impl SimilarImages {
fn sort_images(&mut self, stop_receiver: Option<&Receiver<()>>) -> bool {
let hash_map_modification = SystemTime::now();
let vec_file_entry = self
.images_to_check
.par_iter()
.map(|file_entry| {
if stop_receiver.is_some() && stop_receiver.unwrap().try_recv().is_ok() {
// This will not break
return None;
}
let mut file_entry = file_entry.clone();
let image = match image::open(file_entry.path.clone()) {
Ok(t) => t,
Err(_) => return Option::from((file_entry, [0u8; 8], false)), // Something is wrong with image
};
let dimensions = image.dimensions();
file_entry.dimensions = format!("{}x{}", dimensions.0, dimensions.1);
let hasher = HasherConfig::with_bytes_type::<[u8; 8]>().to_hasher();
let hash = hasher.hash_image(&image);
let mut buf = [0u8; 8];
buf.copy_from_slice(&hash.as_bytes());
Option::from((file_entry, buf, true))
})
.while_some()
.filter(|file_entry| file_entry.2)
.map(|file_entry| (file_entry.0, file_entry.1))
.collect::<Vec<(_, _)>>();
for (file_entry, buf) in vec_file_entry {
self.bktree.add(buf);
self.image_hashes.entry(buf).or_insert_with(Vec::<FileEntry>::new);
self.image_hashes.get_mut(&buf).unwrap().push(file_entry.clone());
}
//let hash_map_modification = SystemTime::now();
let similarity: u64 = match self.similarity {
Similarity::VeryHigh => 0,
@ -327,7 +353,7 @@ impl SimilarImages {
for (similarity, similar_hash) in vector_with_found_similar_hashes.iter() {
if *similarity == 0 && hash == *similar_hash {
// This was already readed before
// This was already read before
continue;
} else if hash == *similar_hash {
panic!("I'm not sure if same hash can have distance > 0");
@ -355,7 +381,10 @@ impl SimilarImages {
hashes_to_check.remove(*similar_hash);
}
}
new_vector.push((*vector_of_similar_images).to_owned());
if vector_of_similar_images.len() > 1 {
// Not sure why it may happens
new_vector.push((*vector_of_similar_images).to_owned());
}
}
self.similar_vectors = new_vector;

View file

@ -745,13 +745,13 @@ Author: Rafał Mikrut
<property name="visible">True</property>
<property name="can_focus">False</property>
<child>
<object class="GtkRadioButton" id="radio_button_name">
<object class="GtkRadioButton" id="radio_button_duplicates_name">
<property name="label" translatable="yes">Name(very fast)</property>
<property name="visible">True</property>
<property name="can_focus">True</property>
<property name="receives_default">False</property>
<property name="draw_indicator">True</property>
<property name="group">radio_button_hash</property>
<property name="group">radio_button_duplicates_hash</property>
</object>
<packing>
<property name="expand">False</property>
@ -760,13 +760,13 @@ Author: Rafał Mikrut
</packing>
</child>
<child>
<object class="GtkRadioButton" id="radio_button_size">
<object class="GtkRadioButton" id="radio_button_duplicates_size">
<property name="label" translatable="yes">Size(very fast)</property>
<property name="visible">True</property>
<property name="can_focus">True</property>
<property name="receives_default">False</property>
<property name="draw_indicator">True</property>
<property name="group">radio_button_hash</property>
<property name="group">radio_button_duplicates_hash</property>
</object>
<packing>
<property name="expand">False</property>
@ -775,13 +775,13 @@ Author: Rafał Mikrut
</packing>
</child>
<child>
<object class="GtkRadioButton" id="radio_button_hashmb">
<object class="GtkRadioButton" id="radio_button_duplicates_hashmb">
<property name="label" translatable="yes">HashMb(fast)</property>
<property name="visible">True</property>
<property name="can_focus">True</property>
<property name="receives_default">False</property>
<property name="draw_indicator">True</property>
<property name="group">radio_button_hash</property>
<property name="group">radio_button_duplicates_hash</property>
</object>
<packing>
<property name="expand">False</property>
@ -790,7 +790,7 @@ Author: Rafał Mikrut
</packing>
</child>
<child>
<object class="GtkRadioButton" id="radio_button_hash">
<object class="GtkRadioButton" id="radio_button_duplicates_hash">
<property name="label" translatable="yes">Hash(slow but accurate)</property>
<property name="visible">True</property>
<property name="can_focus">True</property>
@ -1066,6 +1066,90 @@ Author: Rafał Mikrut
<property name="position">0</property>
</packing>
</child>
<child>
<object class="GtkBox">
<property name="visible">True</property>
<property name="can_focus">False</property>
<child>
<object class="GtkLabel">
<property name="visible">True</property>
<property name="can_focus">False</property>
<property name="label" translatable="yes">Similarity level </property>
</object>
<packing>
<property name="expand">False</property>
<property name="fill">True</property>
<property name="position">0</property>
</packing>
</child>
<child>
<object class="GtkRadioButton" id="radio_button_similar_images_small">
<property name="label" translatable="yes">Small</property>
<property name="visible">True</property>
<property name="can_focus">True</property>
<property name="receives_default">False</property>
<property name="draw_indicator">True</property>
<property name="group">radio_button_similar_images_very_high</property>
</object>
<packing>
<property name="expand">False</property>
<property name="fill">True</property>
<property name="position">1</property>
</packing>
</child>
<child>
<object class="GtkRadioButton" id="radio_button_similar_images_medium">
<property name="label" translatable="yes">Medium</property>
<property name="visible">True</property>
<property name="can_focus">True</property>
<property name="receives_default">False</property>
<property name="draw_indicator">True</property>
<property name="group">radio_button_similar_images_very_high</property>
</object>
<packing>
<property name="expand">False</property>
<property name="fill">True</property>
<property name="position">2</property>
</packing>
</child>
<child>
<object class="GtkRadioButton" id="radio_button_similar_images_high">
<property name="label" translatable="yes">High</property>
<property name="visible">True</property>
<property name="can_focus">True</property>
<property name="receives_default">False</property>
<property name="active">True</property>
<property name="draw_indicator">True</property>
<property name="group">radio_button_similar_images_very_high</property>
</object>
<packing>
<property name="expand">False</property>
<property name="fill">True</property>
<property name="position">3</property>
</packing>
</child>
<child>
<object class="GtkRadioButton" id="radio_button_similar_images_very_high">
<property name="label" translatable="yes">Very High</property>
<property name="visible">True</property>
<property name="can_focus">True</property>
<property name="receives_default">False</property>
<property name="active">True</property>
<property name="draw_indicator">True</property>
</object>
<packing>
<property name="expand">False</property>
<property name="fill">True</property>
<property name="position">4</property>
</packing>
</child>
</object>
<packing>
<property name="expand">False</property>
<property name="fill">True</property>
<property name="position">1</property>
</packing>
</child>
<child>
<object class="GtkScrolledWindow" id="scrolled_window_similar_images_finder">
<property name="visible">True</property>

View file

@ -28,10 +28,14 @@ pub fn connect_button_search(gui_data: &GuiData, sender: Sender<Message>) {
let entry_same_music_minimal_size = gui_data.entry_same_music_minimal_size.clone();
let entry_allowed_extensions = gui_data.entry_allowed_extensions.clone();
let buttons_names = gui_data.buttons_names.clone();
let radio_button_name = gui_data.radio_button_name.clone();
let radio_button_size = gui_data.radio_button_size.clone();
let radio_button_hashmb = gui_data.radio_button_hashmb.clone();
let radio_button_hash = gui_data.radio_button_hash.clone();
let radio_button_duplicates_name = gui_data.radio_button_duplicates_name.clone();
let radio_button_duplicates_size = gui_data.radio_button_duplicates_size.clone();
let radio_button_duplicates_hashmb = gui_data.radio_button_duplicates_hashmb.clone();
let radio_button_duplicates_hash = gui_data.radio_button_duplicates_hash.clone();
let radio_button_similar_images_small = gui_data.radio_button_similar_images_small.clone();
let radio_button_similar_images_medium = gui_data.radio_button_similar_images_medium.clone();
let radio_button_similar_images_high = gui_data.radio_button_similar_images_high.clone();
let radio_button_similar_images_very_high = gui_data.radio_button_similar_images_very_high.clone();
let entry_duplicate_minimal_size = gui_data.entry_duplicate_minimal_size.clone();
let stop_receiver = gui_data.stop_receiver.clone();
let entry_big_files_number = gui_data.entry_big_files_number.clone();
@ -70,13 +74,13 @@ pub fn connect_button_search(gui_data: &GuiData, sender: Sender<Message>) {
get_list_store(&scrolled_window_duplicate_finder).clear();
let check_method;
if radio_button_name.get_active() {
if radio_button_duplicates_name.get_active() {
check_method = duplicate::CheckingMethod::Name;
} else if radio_button_size.get_active() {
} else if radio_button_duplicates_size.get_active() {
check_method = duplicate::CheckingMethod::Size;
} else if radio_button_hashmb.get_active() {
} else if radio_button_duplicates_hashmb.get_active() {
check_method = duplicate::CheckingMethod::HashMB;
} else if radio_button_hash.get_active() {
} else if radio_button_duplicates_hash.get_active() {
check_method = duplicate::CheckingMethod::Hash;
} else {
panic!("No radio button is pressed");
@ -186,6 +190,19 @@ pub fn connect_button_search(gui_data: &GuiData, sender: Sender<Message>) {
Err(_) => 1024 * 16, // By default
};
let similarity;
if radio_button_similar_images_small.get_active() {
similarity = similar_images::Similarity::Small;
} else if radio_button_similar_images_medium.get_active() {
similarity = similar_images::Similarity::Medium;
} else if radio_button_similar_images_high.get_active() {
similarity = similar_images::Similarity::High;
} else if radio_button_similar_images_very_high.get_active() {
similarity = similar_images::Similarity::VeryHigh;
} else {
panic!("No radio button is pressed");
}
// Find similar images
thread::spawn(move || {
let mut sf = SimilarImages::new();
@ -195,6 +212,7 @@ pub fn connect_button_search(gui_data: &GuiData, sender: Sender<Message>) {
sf.set_recursive_search(recursive_search);
sf.set_excluded_items(excluded_items);
sf.set_minimal_file_size(minimal_file_size);
sf.set_similarity(similarity);
sf.find_similar_images(Option::from(&receiver_stop));
let _ = sender.send(Message::SimilarImages(sf));
});

View file

@ -93,10 +93,15 @@ pub struct GuiData {
//// Radio Buttons
// Duplicates
pub radio_button_name: gtk::RadioButton,
pub radio_button_size: gtk::RadioButton,
pub radio_button_hashmb: gtk::RadioButton,
pub radio_button_hash: gtk::RadioButton,
pub radio_button_duplicates_name: gtk::RadioButton,
pub radio_button_duplicates_size: gtk::RadioButton,
pub radio_button_duplicates_hashmb: gtk::RadioButton,
pub radio_button_duplicates_hash: gtk::RadioButton,
pub radio_button_similar_images_small: gtk::RadioButton,
pub radio_button_similar_images_medium: gtk::RadioButton,
pub radio_button_similar_images_high: gtk::RadioButton,
pub radio_button_similar_images_very_high: gtk::RadioButton,
//// Notebooks
pub notebook_main: gtk::Notebook,
@ -266,10 +271,15 @@ impl GuiData {
let check_button_music_year: gtk::CheckButton = builder.get_object("check_button_music_year").unwrap();
//// Radio Buttons
let radio_button_name: gtk::RadioButton = builder.get_object("radio_button_name").unwrap();
let radio_button_size: gtk::RadioButton = builder.get_object("radio_button_size").unwrap();
let radio_button_hashmb: gtk::RadioButton = builder.get_object("radio_button_hashmb").unwrap();
let radio_button_hash: gtk::RadioButton = builder.get_object("radio_button_hash").unwrap();
let radio_button_duplicates_name: gtk::RadioButton = builder.get_object("radio_button_duplicates_name").unwrap();
let radio_button_duplicates_size: gtk::RadioButton = builder.get_object("radio_button_duplicates_size").unwrap();
let radio_button_duplicates_hashmb: gtk::RadioButton = builder.get_object("radio_button_duplicates_hashmb").unwrap();
let radio_button_duplicates_hash: gtk::RadioButton = builder.get_object("radio_button_duplicates_hash").unwrap();
let radio_button_similar_images_small: gtk::RadioButton = builder.get_object("radio_button_similar_images_small").unwrap();
let radio_button_similar_images_medium: gtk::RadioButton = builder.get_object("radio_button_similar_images_medium").unwrap();
let radio_button_similar_images_high: gtk::RadioButton = builder.get_object("radio_button_similar_images_high").unwrap();
let radio_button_similar_images_very_high: gtk::RadioButton = builder.get_object("radio_button_similar_images_very_high").unwrap();
//// Notebooks
let notebook_main: gtk::Notebook = builder.get_object("notebook_main").unwrap();
@ -363,10 +373,14 @@ impl GuiData {
check_button_music_album_title,
check_button_music_album_artist,
check_button_music_year,
radio_button_name,
radio_button_size,
radio_button_hashmb,
radio_button_hash,
radio_button_duplicates_name,
radio_button_duplicates_size,
radio_button_duplicates_hashmb,
radio_button_duplicates_hash,
radio_button_similar_images_small,
radio_button_similar_images_medium,
radio_button_similar_images_high,
radio_button_similar_images_very_high,
notebook_main,
notebook_upper,
notebook_main_children_names,