1
0
Fork 0
mirror of synced 2024-04-28 01:22:53 +12:00

Add support for multithreading to finding same music (#99)

This commit is contained in:
Rafał Mikrut 2020-11-08 09:10:49 +01:00 committed by GitHub
parent 2f72dd9d19
commit 31e35576ec
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
2 changed files with 72 additions and 26 deletions

View file

@ -111,6 +111,9 @@ cargo run --bin czkawka_cli
## Benchmarks
Since Czkawka is written in Rust and aims to be a faster alternative to FSlint (written in Python), we need to compare the speed of these tools.
Currently, I'm working on multithreading support in Czkawka so benchmarks should be updated in versions 1.4.0+.
Also Dupeguru probably will have new 4.0.5 release soon.
I prepared a directory and performed a test without any folder exceptions(I removed all directories from FSlint and Czkawka from other tabs than Include Directory) which contained 320004 files and 36902 folders and 108844 duplicates files in 34475 groups which took 4.53 GB.
Minimum file size to check I set to 1 KB on all programs

View file

@ -11,6 +11,7 @@ use crate::common_messages::Messages;
use crate::common_traits::*;
use audiotags::Tag;
use crossbeam_channel::Receiver;
use rayon::prelude::*;
use std::collections::HashMap;
#[derive(Eq, PartialEq, Clone, Debug)]
@ -73,6 +74,7 @@ impl Info {
pub struct SameMusic {
text_messages: Messages,
information: Info,
music_to_check: Vec<FileEntry>,
music_entries: Vec<FileEntry>,
duplicated_music_entries: Vec<Vec<FileEntry>>,
directories: Directories,
@ -92,12 +94,13 @@ impl SameMusic {
recursive_search: true,
directories: Directories::new(),
excluded_items: ExcludedItems::new(),
music_entries: vec![],
music_entries: Vec::with_capacity(2048),
delete_method: DeleteMethod::None,
music_similarity: MusicSimilarity::NONE,
stopped_search: false,
minimal_file_size: 1024,
duplicated_music_entries: vec![],
music_to_check: Vec::with_capacity(2048),
}
}
@ -107,6 +110,10 @@ impl SameMusic {
self.stopped_search = true;
return;
}
if !self.check_records_multithreaded(stop_receiver) {
self.stopped_search = true;
return;
}
if !self.check_for_duplicates(stop_receiver) {
self.stopped_search = true;
return;
@ -229,10 +236,8 @@ impl SameMusic {
continue 'dir;
}
let tag = Tag::new().read_from_path(&current_file_name).unwrap();
// Creating new file entry
let fe: FileEntry = FileEntry {
let file_entry: FileEntry = FileEntry {
size: metadata.len(),
path: current_file_name.clone(),
modified_date: match metadata.modified() {
@ -248,30 +253,16 @@ impl SameMusic {
continue 'dir;
} // Permissions Denied
},
title: match tag.title() {
Some(t) => t.to_string(),
None => "".to_string(),
},
artist: match tag.artist() {
Some(t) => t.to_string(),
None => "".to_string(),
},
album_title: match tag.album_title() {
Some(t) => t.to_string(),
None => "".to_string(),
},
album_artist: match tag.album_artist() {
Some(t) => t.to_string(),
None => "".to_string(),
},
year: match tag.year() {
Some(t) => t,
None => 0,
},
title: "".to_string(),
artist: "".to_string(),
album_title: "".to_string(),
album_artist: "".to_string(),
year: 0,
};
// Adding files to Vector
self.music_entries.push(fe);
self.music_to_check.push(file_entry);
self.information.number_of_checked_files += 1;
} else {
@ -285,10 +276,62 @@ impl SameMusic {
}
self.information.number_of_music_entries = self.music_entries.len();
Common::print_time(start_time, SystemTime::now(), "check_files_size".to_string());
Common::print_time(start_time, SystemTime::now(), "check_files".to_string());
true
}
fn check_records_multithreaded(&mut self, stop_receiver: Option<&Receiver<()>>) -> bool {
let start_time: SystemTime = SystemTime::now();
let vec_file_entry = self
.music_to_check
.par_iter()
.map(|file_entry| {
if stop_receiver.is_some() && stop_receiver.unwrap().try_recv().is_ok() {
// This will not break
return None;
}
let mut file_entry = file_entry.clone();
let tag = match Tag::new().read_from_path(&file_entry.path) {
Ok(t) => t,
Err(_) => return Option::from((file_entry, false)), // Data not in utf-8, etc.
};
file_entry.title = match tag.title() {
Some(t) => t.to_string(),
None => "".to_string(),
};
file_entry.artist = match tag.artist() {
Some(t) => t.to_string(),
None => "".to_string(),
};
file_entry.album_title = match tag.album_title() {
Some(t) => t.to_string(),
None => "".to_string(),
};
file_entry.album_artist = match tag.album_artist() {
Some(t) => t.to_string(),
None => "".to_string(),
};
file_entry.year = match tag.year() {
Some(t) => t,
None => 0,
};
Option::from((file_entry, true))
})
.while_some()
.filter(|file_entry| file_entry.1)
.map(|file_entry| file_entry.0)
.collect::<Vec<_>>();
// Adding files to Vector
self.music_entries = vec_file_entry;
Common::print_time(start_time, SystemTime::now(), "check_records_multithreaded".to_string());
true
}
fn check_for_duplicates(&mut self, stop_receiver: Option<&Receiver<()>>) -> bool {
if MusicSimilarity::NONE == self.music_similarity {
panic!("This can't be none");