Parallel BFS with Rayon (#502)
* Tester * Parallel BFS with Rayon * Rustfmt * Refactor code * Use `extend` instead of `append` * Remove no longer needed derives
This commit is contained in:
parent
c041ab416e
commit
859e01b30e
|
@ -185,7 +185,7 @@ impl DuplicateFinder {
|
||||||
|
|
||||||
match self.check_method {
|
match self.check_method {
|
||||||
CheckingMethod::Name => {
|
CheckingMethod::Name => {
|
||||||
if !self.check_files_name(stop_receiver, progress_sender) {
|
if !self.check_files_name_threaded(stop_receiver, progress_sender) {
|
||||||
self.stopped_search = true;
|
self.stopped_search = true;
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
@ -314,7 +314,7 @@ impl DuplicateFinder {
|
||||||
self.excluded_items.set_excluded_items(excluded_items, &mut self.text_messages);
|
self.excluded_items.set_excluded_items(excluded_items, &mut self.text_messages);
|
||||||
}
|
}
|
||||||
|
|
||||||
fn check_files_name(&mut self, stop_receiver: Option<&Receiver<()>>, progress_sender: Option<&futures::channel::mpsc::UnboundedSender<ProgressData>>) -> bool {
|
fn check_files_name_threaded(&mut self, stop_receiver: Option<&Receiver<()>>, progress_sender: Option<&futures::channel::mpsc::UnboundedSender<ProgressData>>) -> bool {
|
||||||
let start_time: SystemTime = SystemTime::now();
|
let start_time: SystemTime = SystemTime::now();
|
||||||
let mut folders_to_check: Vec<PathBuf> = Vec::with_capacity(1024 * 2); // This should be small enough too not see to big difference and big enough to store most of paths without needing to resize vector
|
let mut folders_to_check: Vec<PathBuf> = Vec::with_capacity(1024 * 2); // This should be small enough too not see to big difference and big enough to store most of paths without needing to resize vector
|
||||||
|
|
||||||
|
@ -363,99 +363,118 @@ impl DuplicateFinder {
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
let current_folder = folders_to_check.pop().unwrap();
|
let segments: Vec<_> = folders_to_check
|
||||||
|
.par_iter()
|
||||||
// Read current dir, if permission are denied just go to next
|
.map(|current_folder| {
|
||||||
let read_dir = match fs::read_dir(¤t_folder) {
|
let mut dir_result = vec![];
|
||||||
Ok(t) => t,
|
let mut warnings = vec![];
|
||||||
Err(e) => {
|
let mut fe_result = vec![];
|
||||||
self.text_messages.warnings.push(format!("Cannot open dir {}, reason {}", current_folder.display(), e));
|
// Read current dir, if permission are denied just go to next
|
||||||
continue;
|
let read_dir = match fs::read_dir(¤t_folder) {
|
||||||
} // Permissions denied
|
|
||||||
};
|
|
||||||
|
|
||||||
// Check every sub folder/file/link etc.
|
|
||||||
'dir: for entry in read_dir {
|
|
||||||
let entry_data = match entry {
|
|
||||||
Ok(t) => t,
|
|
||||||
Err(e) => {
|
|
||||||
self.text_messages.warnings.push(format!("Cannot read entry in dir {}, reason {}", current_folder.display(), e));
|
|
||||||
continue 'dir;
|
|
||||||
} //Permissions denied
|
|
||||||
};
|
|
||||||
let metadata: Metadata = match entry_data.metadata() {
|
|
||||||
Ok(t) => t,
|
|
||||||
Err(e) => {
|
|
||||||
self.text_messages.warnings.push(format!("Cannot read metadata in dir {}, reason {}", current_folder.display(), e));
|
|
||||||
continue 'dir;
|
|
||||||
} //Permissions denied
|
|
||||||
};
|
|
||||||
if metadata.is_dir() {
|
|
||||||
if !self.recursive_search {
|
|
||||||
continue 'dir;
|
|
||||||
}
|
|
||||||
|
|
||||||
let next_folder = current_folder.join(entry_data.file_name());
|
|
||||||
if self.directories.is_excluded(&next_folder) {
|
|
||||||
continue 'dir;
|
|
||||||
}
|
|
||||||
|
|
||||||
if self.excluded_items.is_excluded(&next_folder) {
|
|
||||||
continue 'dir;
|
|
||||||
}
|
|
||||||
|
|
||||||
folders_to_check.push(next_folder);
|
|
||||||
} else if metadata.is_file() {
|
|
||||||
atomic_file_counter.fetch_add(1, Ordering::Relaxed);
|
|
||||||
// let mut have_valid_extension: bool;
|
|
||||||
let file_name_lowercase: String = match entry_data.file_name().into_string() {
|
|
||||||
Ok(t) => t,
|
Ok(t) => t,
|
||||||
Err(_inspected) => {
|
Err(e) => {
|
||||||
println!("File {:?} has not valid UTF-8 name", entry_data);
|
warnings.push(format!("Cannot open dir {}, reason {}", current_folder.display(), e));
|
||||||
continue 'dir;
|
return (dir_result, warnings, fe_result);
|
||||||
}
|
} // Permissions denied
|
||||||
}
|
};
|
||||||
.to_lowercase();
|
|
||||||
|
|
||||||
// Checking allowed extensions
|
// Check every sub folder/file/link etc.
|
||||||
if !self.allowed_extensions.file_extensions.is_empty() {
|
'dir: for entry in read_dir {
|
||||||
let allowed = self.allowed_extensions.file_extensions.iter().any(|e| file_name_lowercase.ends_with((".".to_string() + e.to_lowercase().as_str()).as_str()));
|
let entry_data = match entry {
|
||||||
if !allowed {
|
Ok(t) => t,
|
||||||
// Not an allowed extension, ignore it.
|
Err(e) => {
|
||||||
continue 'dir;
|
warnings.push(format!("Cannot read entry in dir {}, reason {}", current_folder.display(), e));
|
||||||
}
|
continue 'dir;
|
||||||
}
|
} //Permissions denied
|
||||||
// Checking files
|
|
||||||
if (self.minimal_file_size..=self.maximal_file_size).contains(&metadata.len()) {
|
|
||||||
let current_file_name = current_folder.join(entry_data.file_name());
|
|
||||||
if self.excluded_items.is_excluded(¤t_file_name) {
|
|
||||||
continue 'dir;
|
|
||||||
}
|
|
||||||
|
|
||||||
// Creating new file entry
|
|
||||||
let fe: FileEntry = FileEntry {
|
|
||||||
path: current_file_name.clone(),
|
|
||||||
size: metadata.len(),
|
|
||||||
modified_date: match metadata.modified() {
|
|
||||||
Ok(t) => match t.duration_since(UNIX_EPOCH) {
|
|
||||||
Ok(d) => d.as_secs(),
|
|
||||||
Err(_inspected) => {
|
|
||||||
self.text_messages.warnings.push(format!("File {} seems to be modified before Unix Epoch.", current_file_name.display()));
|
|
||||||
0
|
|
||||||
}
|
|
||||||
},
|
|
||||||
Err(e) => {
|
|
||||||
self.text_messages.warnings.push(format!("Unable to get modification date from file {}, reason {}", current_file_name.display(), e));
|
|
||||||
0
|
|
||||||
} // Permissions Denied
|
|
||||||
},
|
|
||||||
hash: "".to_string(),
|
|
||||||
};
|
};
|
||||||
|
let metadata: Metadata = match entry_data.metadata() {
|
||||||
|
Ok(t) => t,
|
||||||
|
Err(e) => {
|
||||||
|
warnings.push(format!("Cannot read metadata in dir {}, reason {}", current_folder.display(), e));
|
||||||
|
continue 'dir;
|
||||||
|
} //Permissions denied
|
||||||
|
};
|
||||||
|
if metadata.is_dir() {
|
||||||
|
if !self.recursive_search {
|
||||||
|
continue 'dir;
|
||||||
|
}
|
||||||
|
|
||||||
// Adding files to BTreeMap
|
let next_folder = current_folder.join(entry_data.file_name());
|
||||||
self.files_with_identical_names.entry(entry_data.file_name().to_string_lossy().to_string()).or_insert_with(Vec::new);
|
if self.directories.is_excluded(&next_folder) {
|
||||||
self.files_with_identical_names.get_mut(&entry_data.file_name().to_string_lossy().to_string()).unwrap().push(fe);
|
continue 'dir;
|
||||||
|
}
|
||||||
|
|
||||||
|
if self.excluded_items.is_excluded(&next_folder) {
|
||||||
|
continue 'dir;
|
||||||
|
}
|
||||||
|
|
||||||
|
dir_result.push(next_folder);
|
||||||
|
} else if metadata.is_file() {
|
||||||
|
atomic_file_counter.fetch_add(1, Ordering::Relaxed);
|
||||||
|
// let mut have_valid_extension: bool;
|
||||||
|
let file_name_lowercase: String = match entry_data.file_name().into_string() {
|
||||||
|
Ok(t) => t,
|
||||||
|
Err(_inspected) => {
|
||||||
|
println!("File {:?} has not valid UTF-8 name", entry_data);
|
||||||
|
continue 'dir;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
.to_lowercase();
|
||||||
|
|
||||||
|
// Checking allowed extensions
|
||||||
|
if !self.allowed_extensions.file_extensions.is_empty() {
|
||||||
|
let allowed = self.allowed_extensions.file_extensions.iter().any(|e| file_name_lowercase.ends_with((".".to_string() + e.to_lowercase().as_str()).as_str()));
|
||||||
|
if !allowed {
|
||||||
|
// Not an allowed extension, ignore it.
|
||||||
|
continue 'dir;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
// Checking files
|
||||||
|
if (self.minimal_file_size..=self.maximal_file_size).contains(&metadata.len()) {
|
||||||
|
let current_file_name = current_folder.join(entry_data.file_name());
|
||||||
|
if self.excluded_items.is_excluded(¤t_file_name) {
|
||||||
|
continue 'dir;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Creating new file entry
|
||||||
|
let fe: FileEntry = FileEntry {
|
||||||
|
path: current_file_name.clone(),
|
||||||
|
size: metadata.len(),
|
||||||
|
modified_date: match metadata.modified() {
|
||||||
|
Ok(t) => match t.duration_since(UNIX_EPOCH) {
|
||||||
|
Ok(d) => d.as_secs(),
|
||||||
|
Err(_inspected) => {
|
||||||
|
warnings.push(format!("File {} seems to be modified before Unix Epoch.", current_file_name.display()));
|
||||||
|
0
|
||||||
|
}
|
||||||
|
},
|
||||||
|
Err(e) => {
|
||||||
|
warnings.push(format!("Unable to get modification date from file {}, reason {}", current_file_name.display(), e));
|
||||||
|
0
|
||||||
|
} // Permissions Denied
|
||||||
|
},
|
||||||
|
hash: "".to_string(),
|
||||||
|
};
|
||||||
|
|
||||||
|
// Adding files to BTreeMap
|
||||||
|
fe_result.push((entry_data.file_name().to_string_lossy().to_string(), fe));
|
||||||
|
}
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
(dir_result, warnings, fe_result)
|
||||||
|
})
|
||||||
|
.collect();
|
||||||
|
|
||||||
|
// Advance the frontier
|
||||||
|
folders_to_check.clear();
|
||||||
|
|
||||||
|
// Process collected data
|
||||||
|
for (segment, warnings, fe_result) in segments {
|
||||||
|
folders_to_check.extend(segment);
|
||||||
|
self.text_messages.warnings.extend(warnings);
|
||||||
|
for (name, fe) in fe_result {
|
||||||
|
self.files_with_identical_names.entry(name.clone()).or_insert_with(Vec::new);
|
||||||
|
self.files_with_identical_names.get_mut(&name).unwrap().push(fe);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
Loading…
Reference in a new issue