From cf668d02a718db95f6cc5a77a15ea963c972112e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Rafa=C5=82=20Mikrut?= <41945903+qarmin@users.noreply.github.com> Date: Fri, 17 Dec 2021 19:29:37 +0100 Subject: [PATCH] Implement threaded search in rest of modes(except empty folders) (#504) * Implement threaded search in rest of modes(except empty folders) * Add more tools in gui to use allowed extension * Change progress window to modal * Nazair * Clippy --- czkawka_core/src/big_file.rs | 177 ++++++++------ czkawka_core/src/broken_files.rs | 193 ++++++++------- czkawka_core/src/common_extensions.rs | 38 ++- czkawka_core/src/duplicate.rs | 289 ++++++++++++----------- czkawka_core/src/empty_files.rs | 177 +++++++------- czkawka_core/src/empty_folder.rs | 19 +- czkawka_core/src/invalid_symlinks.rs | 256 +++++++++++--------- czkawka_core/src/same_music.rs | 190 +++++++++------ czkawka_core/src/similar_images.rs | 196 ++++++++------- czkawka_core/src/similar_videos.rs | 194 ++++++++------- czkawka_core/src/temporary.rs | 175 ++++++++------ czkawka_gui/src/connect_button_search.rs | 6 + czkawka_gui/src/gui_progress_dialog.rs | 1 + 13 files changed, 1065 insertions(+), 846 deletions(-) diff --git a/czkawka_core/src/big_file.rs b/czkawka_core/src/big_file.rs index 2a76a8f..53e64d5 100644 --- a/czkawka_core/src/big_file.rs +++ b/czkawka_core/src/big_file.rs @@ -1,5 +1,4 @@ use std::collections::BTreeMap; -use std::ffi::OsStr; use std::fs::{File, Metadata}; use std::io::{BufWriter, Write}; use std::path::PathBuf; @@ -13,6 +12,7 @@ use std::{fs, thread}; use crossbeam_channel::Receiver; use humansize::{file_size_opts as options, FileSize}; +use rayon::prelude::*; use crate::common::Common; use crate::common_directory::Directories; @@ -156,90 +156,116 @@ impl BigFile { } //// PROGRESS THREAD END - while !folders_to_check.is_empty() { if stop_receiver.is_some() && stop_receiver.unwrap().try_recv().is_ok() { - // Be sure that every thread is closed + // End thread which send info to gui progress_thread_run.store(false, Ordering::Relaxed); progress_thread_handle.join().unwrap(); return false; } - let current_folder = folders_to_check.pop().unwrap(); - let read_dir = match fs::read_dir(¤t_folder) { - Ok(t) => t, - Err(e) => { - self.text_messages.warnings.push(format!("Cannot open dir {}, reason {}", current_folder.display(), e)); - continue; - } // Permissions denied - }; - 'dir: for entry in read_dir { - let entry_data = match entry { - Ok(t) => t, - Err(e) => { - self.text_messages.warnings.push(format!("Cannot read entry in dir {}, reason {}", current_folder.display(), e)); - continue; - } //Permissions denied - }; - let metadata: Metadata = match entry_data.metadata() { - Ok(t) => t, - Err(e) => { - self.text_messages.warnings.push(format!("Cannot read metadata in dir {}, reason {}", current_folder.display(), e)); - continue; - } //Permissions denied - }; - if metadata.is_dir() { - if !self.recursive_search { - continue; - } - - let next_folder = current_folder.join(entry_data.file_name()); - if self.directories.is_excluded(&next_folder) || self.excluded_items.is_excluded(&next_folder) { - continue 'dir; - } - - folders_to_check.push(next_folder); - } else if metadata.is_file() { - atomic_file_counter.fetch_add(1, Ordering::Relaxed); - // Extracting file extension - let file_extension = entry_data.path().extension().and_then(OsStr::to_str).map(str::to_lowercase); - - // Checking allowed extensions - if !self.allowed_extensions.file_extensions.is_empty() { - let allowed = self.allowed_extensions.file_extensions.iter().map(|e| e.to_lowercase()).any(|e| file_extension == Some(e)); - if !allowed { - // Not an allowed extension, ignore it. - continue 'dir; + let segments: Vec<_> = folders_to_check + .par_iter() + .map(|current_folder| { + let mut dir_result = vec![]; + let mut warnings = vec![]; + let mut fe_result = vec![]; + // Read current dir childrens + let read_dir = match fs::read_dir(¤t_folder) { + Ok(t) => t, + Err(e) => { + warnings.push(format!("Cannot open dir {}, reason {}", current_folder.display(), e)); + return (dir_result, warnings, fe_result); } - } - - // Checking expressions - let current_file_name = current_folder.join(entry_data.file_name()); - if self.excluded_items.is_excluded(¤t_file_name) { - continue 'dir; - } - - // Creating new file entry - let fe: FileEntry = FileEntry { - path: current_file_name.clone(), - size: metadata.len(), - modified_date: match metadata.modified() { - Ok(t) => match t.duration_since(UNIX_EPOCH) { - Ok(d) => d.as_secs(), - Err(_inspected) => { - self.text_messages.warnings.push(format!("File {} seems to be modified before Unix Epoch.", current_file_name.display())); - 0 - } - }, - Err(e) => { - self.text_messages.warnings.push(format!("Unable to get modification date from file {}, reason {}", current_file_name.display(), e)); - 0 - } - }, }; - self.big_files.entry(metadata.len()).or_insert_with(Vec::new); - self.big_files.get_mut(&metadata.len()).unwrap().push(fe); + // Check every sub folder/file/link etc. + 'dir: for entry in read_dir { + let entry_data = match entry { + Ok(t) => t, + Err(e) => { + warnings.push(format!("Cannot read entry in dir {}, reason {}", current_folder.display(), e)); + continue 'dir; + } + }; + let metadata: Metadata = match entry_data.metadata() { + Ok(t) => t, + Err(e) => { + warnings.push(format!("Cannot read metadata in dir {}, reason {}", current_folder.display(), e)); + continue 'dir; + } + }; + if metadata.is_dir() { + if !self.recursive_search { + continue 'dir; + } + + let next_folder = current_folder.join(entry_data.file_name()); + if self.directories.is_excluded(&next_folder) { + continue 'dir; + } + + if self.excluded_items.is_excluded(&next_folder) { + continue 'dir; + } + + dir_result.push(next_folder); + } else if metadata.is_file() { + atomic_file_counter.fetch_add(1, Ordering::Relaxed); + + let file_name_lowercase: String = match entry_data.file_name().into_string() { + Ok(t) => t, + Err(_inspected) => { + warnings.push(format!("File {:?} has not valid UTF-8 name", entry_data)); + continue 'dir; + } + } + .to_lowercase(); + + if !self.allowed_extensions.matches_filename(&file_name_lowercase) { + continue 'dir; + } + + let current_file_name = current_folder.join(entry_data.file_name()); + if self.excluded_items.is_excluded(¤t_file_name) { + continue 'dir; + } + + let fe: FileEntry = FileEntry { + path: current_file_name.clone(), + size: metadata.len(), + modified_date: match metadata.modified() { + Ok(t) => match t.duration_since(UNIX_EPOCH) { + Ok(d) => d.as_secs(), + Err(_inspected) => { + warnings.push(format!("File {} seems to be modified before Unix Epoch.", current_file_name.display())); + 0 + } + }, + Err(e) => { + warnings.push(format!("Unable to get modification date from file {}, reason {}", current_file_name.display(), e)); + 0 + } + }, + }; + + fe_result.push((fe.size, fe)); + } + } + (dir_result, warnings, fe_result) + }) + .collect(); + + // Advance the frontier + folders_to_check.clear(); + + // Process collected data + for (segment, warnings, fe_result) in segments { + folders_to_check.extend(segment); + self.text_messages.warnings.extend(warnings); + for (size, fe) in fe_result { + self.big_files.entry(size).or_insert_with(Vec::new); + self.big_files.get_mut(&size).unwrap().push(fe); } } } @@ -345,7 +371,6 @@ impl DebugPrint for BigFile { println!("### Other"); println!("Big files size {} in {} groups", self.information.number_of_real_files, self.big_files.len()); - println!("Allowed extensions - {:?}", self.allowed_extensions.file_extensions); println!("Excluded items - {:?}", self.excluded_items.items); println!("Included directories - {:?}", self.directories.included_directories); println!("Excluded directories - {:?}", self.directories.excluded_directories); diff --git a/czkawka_core/src/broken_files.rs b/czkawka_core/src/broken_files.rs index 32be68e..497e1e4 100644 --- a/czkawka_core/src/broken_files.rs +++ b/czkawka_core/src/broken_files.rs @@ -203,101 +203,119 @@ impl BrokenFiles { progress_thread_handle.join().unwrap(); return false; } - let current_folder = folders_to_check.pop().unwrap(); - // Read current dir, if permission are denied just go to next - let read_dir = match fs::read_dir(¤t_folder) { - Ok(t) => t, - Err(e) => { - self.text_messages.warnings.push(format!("Cannot open dir {}, reason {}", current_folder.display(), e)); - continue; - } // Permissions denied - }; - - // Check every sub folder/file/link etc. - 'dir: for entry in read_dir { - let entry_data = match entry { - Ok(t) => t, - Err(e) => { - self.text_messages.warnings.push(format!("Cannot read entry in dir {}, reason {}", current_folder.display(), e)); - continue; - } //Permissions denied - }; - let metadata: Metadata = match entry_data.metadata() { - Ok(t) => t, - Err(e) => { - self.text_messages.warnings.push(format!("Cannot read metadata in dir {}, reason {}", current_folder.display(), e)); - continue; - } //Permissions denied - }; - if metadata.is_dir() { - if !self.recursive_search { - continue; - } - - let next_folder = current_folder.join(entry_data.file_name()); - if self.directories.is_excluded(&next_folder) || self.excluded_items.is_excluded(&next_folder) { - continue 'dir; - } - - folders_to_check.push(next_folder); - } else if metadata.is_file() { - atomic_file_counter.fetch_add(1, Ordering::Relaxed); - let file_name_lowercase: String = match entry_data.file_name().into_string() { + let segments: Vec<_> = folders_to_check + .par_iter() + .map(|current_folder| { + let mut dir_result = vec![]; + let mut warnings = vec![]; + let mut fe_result = vec![]; + // Read current dir childrens + let read_dir = match fs::read_dir(¤t_folder) { Ok(t) => t, - Err(_inspected) => { - println!("File {:?} has not valid UTF-8 name", entry_data); - continue 'dir; + Err(e) => { + warnings.push(format!("Cannot open dir {}, reason {}", current_folder.display(), e)); + return (dir_result, warnings, fe_result); } - } - .to_lowercase(); - - let type_of_file = check_extension_avaibility(&file_name_lowercase); - if type_of_file == TypeOfFile::Unknown { - continue 'dir; - } - - // Checking allowed extensions - if !self.allowed_extensions.file_extensions.is_empty() { - let allowed = self.allowed_extensions.file_extensions.iter().any(|e| file_name_lowercase.ends_with((".".to_string() + e.to_lowercase().as_str()).as_str())); - if !allowed { - // Not an allowed extension, ignore it. - continue 'dir; - } - } - - // Checking files - let current_file_name = current_folder.join(entry_data.file_name()); - if self.excluded_items.is_excluded(¤t_file_name) { - continue 'dir; - } - - // Creating new file entry - let fe: FileEntry = FileEntry { - path: current_file_name.clone(), - modified_date: match metadata.modified() { - Ok(t) => match t.duration_since(UNIX_EPOCH) { - Ok(d) => d.as_secs(), - Err(_inspected) => { - self.text_messages.warnings.push(format!("File {} seems to be modified before Unix Epoch.", current_file_name.display())); - 0 - } - }, - Err(e) => { - self.text_messages.warnings.push(format!("Unable to get modification date from file {}, reason {}", current_file_name.display(), e)); - 0 - } // Permissions Denied - }, - size: metadata.len(), - type_of_file, - error_string: "".to_string(), }; - // Adding files to Vector - self.files_to_check.insert(fe.path.to_string_lossy().to_string(), fe); + // Check every sub folder/file/link etc. + 'dir: for entry in read_dir { + let entry_data = match entry { + Ok(t) => t, + Err(e) => { + warnings.push(format!("Cannot read entry in dir {}, reason {}", current_folder.display(), e)); + continue 'dir; + } + }; + let metadata: Metadata = match entry_data.metadata() { + Ok(t) => t, + Err(e) => { + warnings.push(format!("Cannot read metadata in dir {}, reason {}", current_folder.display(), e)); + continue 'dir; + } + }; + if metadata.is_dir() { + if !self.recursive_search { + continue 'dir; + } + + let next_folder = current_folder.join(entry_data.file_name()); + if self.directories.is_excluded(&next_folder) { + continue 'dir; + } + + if self.excluded_items.is_excluded(&next_folder) { + continue 'dir; + } + + dir_result.push(next_folder); + } else if metadata.is_file() { + atomic_file_counter.fetch_add(1, Ordering::Relaxed); + + let file_name_lowercase: String = match entry_data.file_name().into_string() { + Ok(t) => t, + Err(_inspected) => { + warnings.push(format!("File {:?} has not valid UTF-8 name", entry_data)); + continue 'dir; + } + } + .to_lowercase(); + + if !self.allowed_extensions.matches_filename(&file_name_lowercase) { + continue 'dir; + } + + let type_of_file = check_extension_avaibility(&file_name_lowercase); + if type_of_file == TypeOfFile::Unknown { + continue 'dir; + } + + let current_file_name = current_folder.join(entry_data.file_name()); + if self.excluded_items.is_excluded(¤t_file_name) { + continue 'dir; + } + + let fe: FileEntry = FileEntry { + path: current_file_name.clone(), + modified_date: match metadata.modified() { + Ok(t) => match t.duration_since(UNIX_EPOCH) { + Ok(d) => d.as_secs(), + Err(_inspected) => { + warnings.push(format!("File {} seems to be modified before Unix Epoch.", current_file_name.display())); + 0 + } + }, + Err(e) => { + warnings.push(format!("Unable to get modification date from file {}, reason {}", current_file_name.display(), e)); + 0 + } + }, + size: metadata.len(), + type_of_file, + error_string: "".to_string(), + }; + + fe_result.push((current_file_name.to_string_lossy().to_string(), fe)); + } + } + (dir_result, warnings, fe_result) + }) + .collect(); + + // Advance the frontier + folders_to_check.clear(); + + // Process collected data + for (segment, warnings, fe_result) in segments { + folders_to_check.extend(segment); + self.text_messages.warnings.extend(warnings); + for (name, fe) in fe_result { + self.files_to_check.insert(name, fe); } } } + // End thread which send info to gui progress_thread_run.store(false, Ordering::Relaxed); progress_thread_handle.join().unwrap(); @@ -516,7 +534,6 @@ impl DebugPrint for BrokenFiles { println!("### Other"); - println!("Allowed extensions - {:?}", self.allowed_extensions.file_extensions); println!("Excluded items - {:?}", self.excluded_items.items); println!("Included directories - {:?}", self.directories.included_directories); println!("Excluded directories - {:?}", self.directories.excluded_directories); diff --git a/czkawka_core/src/common_extensions.rs b/czkawka_core/src/common_extensions.rs index 87b1aa3..a36c09f 100644 --- a/czkawka_core/src/common_extensions.rs +++ b/czkawka_core/src/common_extensions.rs @@ -5,7 +5,7 @@ use crate::common_messages::Messages; #[derive(Default)] pub struct Extensions { - pub file_extensions: Vec, + file_extensions: Vec, } impl Extensions { @@ -16,7 +16,7 @@ impl Extensions { /// After, extensions cannot contains any dot, commas etc. pub fn set_allowed_extensions(&mut self, mut allowed_extensions: String, text_messages: &mut Messages) { let start_time: SystemTime = SystemTime::now(); - if allowed_extensions.is_empty() { + if allowed_extensions.trim().is_empty() { return; } allowed_extensions = allowed_extensions.replace("IMAGE", "jpg,kra,gif,png,bmp,tiff,hdr,svg"); @@ -24,23 +24,28 @@ impl Extensions { allowed_extensions = allowed_extensions.replace("MUSIC", "mp3,flac,ogg,tta,wma,webm"); allowed_extensions = allowed_extensions.replace("TEXT", "txt,doc,docx,odt,rtf"); - let extensions: Vec = allowed_extensions.split(',').map(String::from).collect(); + let extensions: Vec = allowed_extensions.split(',').map(|e| e.trim()).map(String::from).collect(); for mut extension in extensions { - if extension.is_empty() || extension.replace('.', "").trim() == "" { + if extension.is_empty() || extension.replace('.', "").replace(' ', "").trim().is_empty() { continue; } - if extension.starts_with('.') { - extension = extension[1..].to_string(); + if !extension.starts_with('.') { + extension = format!(".{}", extension); } if extension[1..].contains('.') { - text_messages.warnings.push(".".to_string() + extension.as_str() + " is not valid extension(valid extension doesn't have dot inside)"); + text_messages.warnings.push(format!("{} is not valid extension because contains dot inside", extension)); continue; } - if !self.file_extensions.contains(&extension.trim().to_string()) { - self.file_extensions.push(extension.trim().to_string()); + if extension[1..].contains(' ') { + text_messages.warnings.push(format!("{} is not valid extension because contains empty space inside", extension)); + continue; + } + + if !self.file_extensions.contains(&extension) { + self.file_extensions.push(extension); } } @@ -49,4 +54,19 @@ impl Extensions { } Common::print_time(start_time, SystemTime::now(), "set_allowed_extensions".to_string()); } + + pub fn matches_filename(&self, file_name: &str) -> bool { + assert_eq!(file_name, file_name.to_lowercase()); // TODO comment this this after tests + if !self.file_extensions.is_empty() && !self.file_extensions.iter().any(|e| file_name.ends_with(e)) { + return false; + } + true + } + + pub fn extend_allowed_extensions(&mut self, file_extensions: &[&str]) { + for extension in file_extensions { + assert!(extension.starts_with('.')); + self.file_extensions.push(extension.to_string()); + } + } } diff --git a/czkawka_core/src/duplicate.rs b/czkawka_core/src/duplicate.rs index a5f6746..14686b2 100644 --- a/czkawka_core/src/duplicate.rs +++ b/czkawka_core/src/duplicate.rs @@ -44,33 +44,6 @@ pub enum CheckingMethod { Hash, } -impl MyHasher for blake3::Hasher { - fn update(&mut self, bytes: &[u8]) { - self.update(bytes); - } - fn finalize(&self) -> String { - self.finalize().to_hex().to_string() - } -} - -impl MyHasher for crc32fast::Hasher { - fn update(&mut self, bytes: &[u8]) { - self.write(bytes); - } - fn finalize(&self) -> String { - self.finish().to_string() - } -} - -impl MyHasher for xxhash_rust::xxh3::Xxh3 { - fn update(&mut self, bytes: &[u8]) { - self.write(bytes); - } - fn finalize(&self) -> String { - self.finish().to_string() - } -} - #[derive(PartialEq, Eq, Clone, Debug, Copy)] pub enum HashType { Blake3, @@ -185,7 +158,7 @@ impl DuplicateFinder { match self.check_method { CheckingMethod::Name => { - if !self.check_files_name_threaded(stop_receiver, progress_sender) { + if !self.check_files_name(stop_receiver, progress_sender) { self.stopped_search = true; return; } @@ -314,7 +287,7 @@ impl DuplicateFinder { self.excluded_items.set_excluded_items(excluded_items, &mut self.text_messages); } - fn check_files_name_threaded(&mut self, stop_receiver: Option<&Receiver<()>>, progress_sender: Option<&futures::channel::mpsc::UnboundedSender>) -> bool { + fn check_files_name(&mut self, stop_receiver: Option<&Receiver<()>>, progress_sender: Option<&futures::channel::mpsc::UnboundedSender>) -> bool { let start_time: SystemTime = SystemTime::now(); let mut folders_to_check: Vec = Vec::with_capacity(1024 * 2); // This should be small enough too not see to big difference and big enough to store most of paths without needing to resize vector @@ -369,13 +342,13 @@ impl DuplicateFinder { let mut dir_result = vec![]; let mut warnings = vec![]; let mut fe_result = vec![]; - // Read current dir, if permission are denied just go to next + // Read current dir childrens let read_dir = match fs::read_dir(¤t_folder) { Ok(t) => t, Err(e) => { warnings.push(format!("Cannot open dir {}, reason {}", current_folder.display(), e)); return (dir_result, warnings, fe_result); - } // Permissions denied + } }; // Check every sub folder/file/link etc. @@ -385,14 +358,14 @@ impl DuplicateFinder { Err(e) => { warnings.push(format!("Cannot read entry in dir {}, reason {}", current_folder.display(), e)); continue 'dir; - } //Permissions denied + } }; let metadata: Metadata = match entry_data.metadata() { Ok(t) => t, Err(e) => { warnings.push(format!("Cannot read metadata in dir {}, reason {}", current_folder.display(), e)); continue 'dir; - } //Permissions denied + } }; if metadata.is_dir() { if !self.recursive_search { @@ -411,32 +384,26 @@ impl DuplicateFinder { dir_result.push(next_folder); } else if metadata.is_file() { atomic_file_counter.fetch_add(1, Ordering::Relaxed); - // let mut have_valid_extension: bool; + let file_name_lowercase: String = match entry_data.file_name().into_string() { Ok(t) => t, Err(_inspected) => { - println!("File {:?} has not valid UTF-8 name", entry_data); + warnings.push(format!("File {:?} has not valid UTF-8 name", entry_data)); continue 'dir; } } .to_lowercase(); - // Checking allowed extensions - if !self.allowed_extensions.file_extensions.is_empty() { - let allowed = self.allowed_extensions.file_extensions.iter().any(|e| file_name_lowercase.ends_with((".".to_string() + e.to_lowercase().as_str()).as_str())); - if !allowed { - // Not an allowed extension, ignore it. - continue 'dir; - } + if !self.allowed_extensions.matches_filename(&file_name_lowercase) { + continue 'dir; } - // Checking files + if (self.minimal_file_size..=self.maximal_file_size).contains(&metadata.len()) { let current_file_name = current_folder.join(entry_data.file_name()); if self.excluded_items.is_excluded(¤t_file_name) { continue 'dir; } - // Creating new file entry let fe: FileEntry = FileEntry { path: current_file_name.clone(), size: metadata.len(), @@ -451,12 +418,11 @@ impl DuplicateFinder { Err(e) => { warnings.push(format!("Unable to get modification date from file {}, reason {}", current_file_name.display(), e)); 0 - } // Permissions Denied + } }, hash: "".to_string(), }; - // Adding files to BTreeMap fe_result.push((entry_data.file_name().to_string_lossy().to_string(), fe)); } } @@ -525,7 +491,7 @@ impl DuplicateFinder { let max_stage = match self.check_method { CheckingMethod::Size => 0, CheckingMethod::Hash => 2, - _ => 255, + _ => panic!(), }; progress_thread_handle = thread::spawn(move || loop { progress_send @@ -556,130 +522,143 @@ impl DuplicateFinder { return false; } - let current_folder = folders_to_check.pop().unwrap(); - - // Read current dir, if permission are denied just go to next - let read_dir = match fs::read_dir(¤t_folder) { - Ok(t) => t, - Err(e) => { - self.text_messages.warnings.push(format!("Cannot open dir {}, reason {}", current_folder.display(), e)); - continue; - } // Permissions denied - }; - - // Check every sub folder/file/link etc. - 'dir: for entry in read_dir { - let entry_data = match entry { - Ok(t) => t, - Err(e) => { - self.text_messages.warnings.push(format!("Cannot read entry in dir {}, reason {}", current_folder.display(), e)); - continue 'dir; - } //Permissions denied - }; - let metadata: Metadata = match entry_data.metadata() { - Ok(t) => t, - Err(e) => { - self.text_messages.warnings.push(format!("Cannot read metadata in dir {}, reason {}", current_folder.display(), e)); - continue 'dir; - } //Permissions denied - }; - if metadata.is_dir() { - if !self.recursive_search { - continue 'dir; - } - - let next_folder = current_folder.join(entry_data.file_name()); - if self.directories.is_excluded(&next_folder) { - continue 'dir; - } - - if self.excluded_items.is_excluded(&next_folder) { - continue 'dir; - } - - folders_to_check.push(next_folder); - } else if metadata.is_file() { - atomic_file_counter.fetch_add(1, Ordering::Relaxed); - // let mut have_valid_extension: bool; - let file_name_lowercase: String = match entry_data.file_name().into_string() { + let segments: Vec<_> = folders_to_check + .par_iter() + .map(|current_folder| { + let mut dir_result = vec![]; + let mut warnings = vec![]; + let mut fe_result = vec![]; + // Read current dir childrens + let read_dir = match fs::read_dir(¤t_folder) { Ok(t) => t, - Err(_inspected) => { - println!("File {:?} has not valid UTF-8 name", entry_data); - continue 'dir; + Err(e) => { + warnings.push(format!("Cannot open dir {}, reason {}", current_folder.display(), e)); + return (dir_result, warnings, fe_result); } - } - .to_lowercase(); + }; - // Checking allowed extensions - if !self.allowed_extensions.file_extensions.is_empty() { - let allowed = self.allowed_extensions.file_extensions.iter().any(|e| file_name_lowercase.ends_with((".".to_string() + e.to_lowercase().as_str()).as_str())); - if !allowed { - // Not an allowed extension, ignore it. - - continue 'dir; - } - } - // Checking files - if (self.minimal_file_size..=self.maximal_file_size).contains(&metadata.len()) { - let current_file_name = current_folder.join(entry_data.file_name()); - if self.excluded_items.is_excluded(¤t_file_name) { - continue 'dir; - } - - // Creating new file entry - let fe: FileEntry = FileEntry { - path: current_file_name.clone(), - size: metadata.len(), - modified_date: match metadata.modified() { - Ok(t) => match t.duration_since(UNIX_EPOCH) { - Ok(d) => d.as_secs(), - Err(_inspected) => { - self.text_messages.warnings.push(format!("File {} seems to be modified before Unix Epoch.", current_file_name.display())); - 0 - } - }, - Err(e) => { - self.text_messages.warnings.push(format!("Unable to get modification date from file {}, reason {}", current_file_name.display(), e)); - 0 - } // Permissions Denied - }, - hash: "".to_string(), + // Check every sub folder/file/link etc. + 'dir: for entry in read_dir { + let entry_data = match entry { + Ok(t) => t, + Err(e) => { + warnings.push(format!("Cannot read entry in dir {}, reason {}", current_folder.display(), e)); + continue 'dir; + } }; + let metadata: Metadata = match entry_data.metadata() { + Ok(t) => t, + Err(e) => { + warnings.push(format!("Cannot read metadata in dir {}, reason {}", current_folder.display(), e)); + continue 'dir; + } + }; + if metadata.is_dir() { + if !self.recursive_search { + continue 'dir; + } - // Adding files to BTreeMap - self.files_with_identical_size.entry(metadata.len()).or_insert_with(Vec::new); - self.files_with_identical_size.get_mut(&metadata.len()).unwrap().push(fe); + let next_folder = current_folder.join(entry_data.file_name()); + if self.directories.is_excluded(&next_folder) { + continue 'dir; + } + + if self.excluded_items.is_excluded(&next_folder) { + continue 'dir; + } + + dir_result.push(next_folder); + } else if metadata.is_file() { + atomic_file_counter.fetch_add(1, Ordering::Relaxed); + + let file_name_lowercase: String = match entry_data.file_name().into_string() { + Ok(t) => t, + Err(_inspected) => { + warnings.push(format!("File {:?} has not valid UTF-8 name", entry_data)); + continue 'dir; + } + } + .to_lowercase(); + + if !self.allowed_extensions.matches_filename(&file_name_lowercase) { + continue 'dir; + } + + if (self.minimal_file_size..=self.maximal_file_size).contains(&metadata.len()) { + let current_file_name = current_folder.join(entry_data.file_name()); + if self.excluded_items.is_excluded(¤t_file_name) { + continue 'dir; + } + + // Creating new file entry + let fe: FileEntry = FileEntry { + path: current_file_name.clone(), + size: metadata.len(), + modified_date: match metadata.modified() { + Ok(t) => match t.duration_since(UNIX_EPOCH) { + Ok(d) => d.as_secs(), + Err(_inspected) => { + warnings.push(format!("File {} seems to be modified before Unix Epoch.", current_file_name.display())); + 0 + } + }, + Err(e) => { + warnings.push(format!("Unable to get modification date from file {}, reason {}", current_file_name.display(), e)); + 0 + } + }, + hash: "".to_string(), + }; + + fe_result.push(fe); + } + } } + (dir_result, warnings, fe_result) + }) + .collect(); + + // Advance the frontier + folders_to_check.clear(); + + // Process collected data + for (segment, warnings, fe_result) in segments { + folders_to_check.extend(segment); + self.text_messages.warnings.extend(warnings); + for fe in fe_result { + self.files_with_identical_size.entry(fe.size).or_insert_with(Vec::new); + self.files_with_identical_size.get_mut(&fe.size).unwrap().push(fe); } } } + // End thread which send info to gui progress_thread_run.store(false, Ordering::Relaxed); progress_thread_handle.join().unwrap(); // Create new BTreeMap without single size entries(files have not duplicates) - let mut new_map: BTreeMap> = Default::default(); + let mut old_map: BTreeMap> = Default::default(); + mem::swap(&mut old_map, &mut self.files_with_identical_size); - for (size, vec) in &self.files_with_identical_size { + for (size, vec) in old_map { if vec.len() <= 1 { continue; } let vector; if self.ignore_hard_links { - vector = filter_hard_links(vec); + vector = filter_hard_links(&vec); } else { - vector = vec.clone(); + vector = vec; } if vector.len() > 1 { self.information.number_of_duplicated_files_by_size += vector.len() - 1; self.information.number_of_groups_by_size += 1; self.information.lost_space_by_size += (vector.len() as u64 - 1) * size; - new_map.insert(*size, vector); + self.files_with_identical_size.insert(size, vector); } } - self.files_with_identical_size = new_map; Common::print_time(start_time, SystemTime::now(), "check_files_size".to_string()); true @@ -1134,7 +1113,6 @@ impl DebugPrint for DuplicateFinder { println!("Files list size - {}", self.files_with_identical_size.len()); println!("Hashed Files list size - {}", self.files_with_identical_hashes.len()); - println!("Allowed extensions - {:?}", self.allowed_extensions.file_extensions); println!("Excluded items - {:?}", self.excluded_items.items); println!("Included directories - {:?}", self.directories.included_directories); println!("Excluded directories - {:?}", self.directories.excluded_directories); @@ -1550,6 +1528,33 @@ pub fn load_hashes_from_file(text_messages: &mut Messages, delete_outdated_cache None } +impl MyHasher for blake3::Hasher { + fn update(&mut self, bytes: &[u8]) { + self.update(bytes); + } + fn finalize(&self) -> String { + self.finalize().to_hex().to_string() + } +} + +impl MyHasher for crc32fast::Hasher { + fn update(&mut self, bytes: &[u8]) { + self.write(bytes); + } + fn finalize(&self) -> String { + self.finish().to_string() + } +} + +impl MyHasher for xxhash_rust::xxh3::Xxh3 { + fn update(&mut self, bytes: &[u8]) { + self.write(bytes); + } + fn finalize(&self) -> String { + self.finish().to_string() + } +} + #[cfg(test)] mod tests { use std::fs::{read_dir, File}; diff --git a/czkawka_core/src/empty_files.rs b/czkawka_core/src/empty_files.rs index b3d946c..31c08e5 100644 --- a/czkawka_core/src/empty_files.rs +++ b/czkawka_core/src/empty_files.rs @@ -1,3 +1,4 @@ +use rayon::prelude::*; use std::fs::{File, Metadata}; use std::io::prelude::*; use std::io::BufWriter; @@ -166,7 +167,6 @@ impl EmptyFiles { progress_thread_handle = thread::spawn(|| {}); } //// PROGRESS THREAD END - while !folders_to_check.is_empty() { if stop_receiver.is_some() && stop_receiver.unwrap().try_recv().is_ok() { // End thread which send info to gui @@ -174,91 +174,109 @@ impl EmptyFiles { progress_thread_handle.join().unwrap(); return false; } - let current_folder = folders_to_check.pop().unwrap(); - // Read current dir, if permission are denied just go to next - let read_dir = match fs::read_dir(¤t_folder) { - Ok(t) => t, - Err(e) => { - self.text_messages.warnings.push(format!("Cannot open dir {}, reason {}", current_folder.display(), e)); - continue; - } // Permissions denied - }; - - // Check every sub folder/file/link etc. - 'dir: for entry in read_dir { - let entry_data = match entry { - Ok(t) => t, - Err(e) => { - self.text_messages.warnings.push(format!("Cannot read entry in dir {}, reason {}", current_folder.display(), e)); - continue; - } //Permissions denied - }; - let metadata: Metadata = match entry_data.metadata() { - Ok(t) => t, - Err(e) => { - self.text_messages.warnings.push(format!("Cannot read metadata in dir {}, reason {}", current_folder.display(), e)); - continue; - } //Permissions denied - }; - if metadata.is_dir() { - if !self.recursive_search { - continue; - } - - let next_folder = current_folder.join(entry_data.file_name()); - if self.directories.is_excluded(&next_folder) || self.excluded_items.is_excluded(&next_folder) { - continue 'dir; - } - - folders_to_check.push(next_folder); - } else if metadata.is_file() { - atomic_file_counter.fetch_add(1, Ordering::Relaxed); - let file_name_lowercase: String = match entry_data.file_name().into_string() { + let segments: Vec<_> = folders_to_check + .par_iter() + .map(|current_folder| { + let mut dir_result = vec![]; + let mut warnings = vec![]; + let mut fe_result = vec![]; + // Read current dir childrens + let read_dir = match fs::read_dir(¤t_folder) { Ok(t) => t, - Err(_inspected) => { - println!("File {:?} has not valid UTF-8 name", entry_data); - continue 'dir; + Err(e) => { + warnings.push(format!("Cannot open dir {}, reason {}", current_folder.display(), e)); + return (dir_result, warnings, fe_result); } - } - .to_lowercase(); + }; - // Checking allowed extensions - if !self.allowed_extensions.file_extensions.is_empty() { - let allowed = self.allowed_extensions.file_extensions.iter().any(|e| file_name_lowercase.ends_with((".".to_string() + e.to_lowercase().as_str()).as_str())); - if !allowed { - // Not an allowed extension, ignore it. - continue 'dir; - } - } - // Checking files - if metadata.len() == 0 { - let current_file_name = current_folder.join(entry_data.file_name()); - if self.excluded_items.is_excluded(¤t_file_name) { - continue 'dir; - } - - // Creating new file entry - let fe: FileEntry = FileEntry { - path: current_file_name.clone(), - modified_date: match metadata.modified() { - Ok(t) => match t.duration_since(UNIX_EPOCH) { - Ok(d) => d.as_secs(), - Err(_inspected) => { - self.text_messages.warnings.push(format!("File {} seems to be modified before Unix Epoch.", current_file_name.display())); - 0 - } - }, - Err(e) => { - self.text_messages.warnings.push(format!("Unable to get modification date from file {}, reason {}", current_file_name.display(), e)); - 0 - } // Permissions Denied - }, + // Check every sub folder/file/link etc. + 'dir: for entry in read_dir { + let entry_data = match entry { + Ok(t) => t, + Err(e) => { + warnings.push(format!("Cannot read entry in dir {}, reason {}", current_folder.display(), e)); + continue 'dir; + } }; + let metadata: Metadata = match entry_data.metadata() { + Ok(t) => t, + Err(e) => { + warnings.push(format!("Cannot read metadata in dir {}, reason {}", current_folder.display(), e)); + continue 'dir; + } + }; + if metadata.is_dir() { + if !self.recursive_search { + continue 'dir; + } - // Adding files to Vector - self.empty_files.push(fe); + let next_folder = current_folder.join(entry_data.file_name()); + if self.directories.is_excluded(&next_folder) { + continue 'dir; + } + + if self.excluded_items.is_excluded(&next_folder) { + continue 'dir; + } + + dir_result.push(next_folder); + } else if metadata.is_file() { + atomic_file_counter.fetch_add(1, Ordering::Relaxed); + + let file_name_lowercase: String = match entry_data.file_name().into_string() { + Ok(t) => t, + Err(_inspected) => { + warnings.push(format!("File {:?} has not valid UTF-8 name", entry_data)); + continue 'dir; + } + } + .to_lowercase(); + + if !self.allowed_extensions.matches_filename(&file_name_lowercase) { + continue 'dir; + } + + if metadata.len() == 0 { + let current_file_name = current_folder.join(entry_data.file_name()); + if self.excluded_items.is_excluded(¤t_file_name) { + continue 'dir; + } + + let fe: FileEntry = FileEntry { + path: current_file_name.clone(), + modified_date: match metadata.modified() { + Ok(t) => match t.duration_since(UNIX_EPOCH) { + Ok(d) => d.as_secs(), + Err(_inspected) => { + warnings.push(format!("File {} seems to be modified before Unix Epoch.", current_file_name.display())); + 0 + } + }, + Err(e) => { + warnings.push(format!("Unable to get modification date from file {}, reason {}", current_file_name.display(), e)); + 0 + } + }, + }; + + fe_result.push(fe); + } + } } + (dir_result, warnings, fe_result) + }) + .collect(); + + // Advance the frontier + folders_to_check.clear(); + + // Process collected data + for (segment, warnings, fe_result) in segments { + folders_to_check.extend(segment); + self.text_messages.warnings.extend(warnings); + for fe in fe_result { + self.empty_files.push(fe); } } } @@ -319,7 +337,6 @@ impl DebugPrint for EmptyFiles { println!("### Other"); println!("Empty list size - {}", self.empty_files.len()); - println!("Allowed extensions - {:?}", self.allowed_extensions.file_extensions); println!("Excluded items - {:?}", self.excluded_items.items); println!("Included directories - {:?}", self.directories.included_directories); println!("Excluded directories - {:?}", self.directories.excluded_directories); diff --git a/czkawka_core/src/empty_folder.rs b/czkawka_core/src/empty_folder.rs index c42cf7a..3e453b2 100644 --- a/czkawka_core/src/empty_folder.rs +++ b/czkawka_core/src/empty_folder.rs @@ -196,30 +196,31 @@ impl EmptyFolder { } let current_folder = folders_to_check.pop().unwrap(); // Checked folder may be deleted or we may not have permissions to open it so we assume that this folder is not be empty + // Read current dir childrens let read_dir = match fs::read_dir(¤t_folder) { Ok(t) => t, - Err(_inspected) => { - folders_checked.get_mut(¤t_folder).unwrap().is_empty = FolderEmptiness::No; + Err(e) => { + self.text_messages.warnings.push(format!("Cannot open dir {}, reason {}", current_folder.display(), e)); continue; } }; + // Check every sub folder/file/link etc. 'dir: for entry in read_dir { let entry_data = match entry { Ok(t) => t, - Err(_inspected) => { - set_as_not_empty_folder(&mut folders_checked, ¤t_folder); + Err(e) => { + self.text_messages.warnings.push(format!("Cannot read entry in dir {}, reason {}", current_folder.display(), e)); continue 'dir; - } //Permissions denied + } }; let metadata: Metadata = match entry_data.metadata() { Ok(t) => t, - Err(_inspected) => { - set_as_not_empty_folder(&mut folders_checked, ¤t_folder); + Err(e) => { + self.text_messages.warnings.push(format!("Cannot read metadata in dir {}, reason {}", current_folder.display(), e)); continue 'dir; - } //Permissions denied + } }; - // If child is dir, still folder may be considered as empty if all children are only directories. if metadata.is_dir() { atomic_folder_counter.fetch_add(1, Ordering::Relaxed); let next_folder = current_folder.join(entry_data.file_name()); diff --git a/czkawka_core/src/invalid_symlinks.rs b/czkawka_core/src/invalid_symlinks.rs index 6e00065..9d6b8a1 100644 --- a/czkawka_core/src/invalid_symlinks.rs +++ b/czkawka_core/src/invalid_symlinks.rs @@ -9,6 +9,7 @@ use std::time::{Duration, SystemTime, UNIX_EPOCH}; use std::{fs, thread}; use crossbeam_channel::Receiver; +use rayon::prelude::*; use crate::common::Common; use crate::common_directory::Directories; @@ -183,132 +184,152 @@ impl InvalidSymlinks { progress_thread_handle.join().unwrap(); return false; } - let current_folder = folders_to_check.pop().unwrap(); - // Read current dir, if permission are denied just go to next - let read_dir = match fs::read_dir(¤t_folder) { - Ok(t) => t, - Err(e) => { - self.text_messages.warnings.push(format!("Cannot open dir {}, reason {}", current_folder.display(), e)); - continue; - } // Permissions denied - }; - - // Check every sub folder/file/link etc. - 'dir: for entry in read_dir { - let entry_data = match entry { - Ok(t) => t, - Err(e) => { - self.text_messages.warnings.push(format!("Cannot read entry in dir {}, reason {}", current_folder.display(), e)); - continue; - } //Permissions denied - }; - let metadata: Metadata = match entry_data.metadata() { - Ok(t) => t, - Err(e) => { - self.text_messages.warnings.push(format!("Cannot read metadata in dir {}, reason {}", current_folder.display(), e)); - continue; - } //Permissions denied - }; - if metadata.is_dir() { - if !self.recursive_search { - continue; - } - - let next_folder = current_folder.join(entry_data.file_name()); - if self.directories.is_excluded(&next_folder) || self.excluded_items.is_excluded(&next_folder) { - continue 'dir; - } - - folders_to_check.push(next_folder); - } else if metadata.is_file() { - atomic_file_counter.fetch_add(1, Ordering::Relaxed); - } else if metadata.file_type().is_symlink() { - atomic_file_counter.fetch_add(1, Ordering::Relaxed); - let file_name_lowercase: String = match entry_data.file_name().into_string() { + let segments: Vec<_> = folders_to_check + .par_iter() + .map(|current_folder| { + let mut dir_result = vec![]; + let mut warnings = vec![]; + let mut fe_result = vec![]; + // Read current dir childrens + let read_dir = match fs::read_dir(¤t_folder) { Ok(t) => t, - Err(_inspected) => { - println!("File {:?} has not valid UTF-8 name", entry_data); - continue 'dir; + Err(e) => { + warnings.push(format!("Cannot open dir {}, reason {}", current_folder.display(), e)); + return (dir_result, warnings, fe_result); } - } - .to_lowercase(); - - // Checking allowed extensions - if !self.allowed_extensions.file_extensions.is_empty() { - let allowed = self.allowed_extensions.file_extensions.iter().any(|e| file_name_lowercase.ends_with((".".to_string() + e.to_lowercase().as_str()).as_str())); - if !allowed { - // Not an allowed extension, ignore it. - continue 'dir; - } - } - - // Checking files - let current_file_name = current_folder.join(entry_data.file_name()); - if self.excluded_items.is_excluded(¤t_file_name) { - continue 'dir; - } - - let mut destination_path = PathBuf::new(); - let type_of_error; - - match current_file_name.read_link() { - Ok(t) => { - destination_path.push(t); - let mut number_of_loop = 0; - let mut current_path = current_file_name.clone(); - loop { - if number_of_loop == 0 && !current_path.exists() { - type_of_error = ErrorType::NonExistentFile; - break; - } - if number_of_loop == MAX_NUMBER_OF_SYMLINK_JUMPS { - type_of_error = ErrorType::InfiniteRecursion; - break; - } - - current_path = match current_path.read_link() { - Ok(t) => t, - Err(_inspected) => { - // Looks that some next symlinks are broken, but we do nothing with it - TODO why they are broken - continue 'dir; - } - }; - - number_of_loop += 1; - } - } - Err(_inspected) => { - // Failed to load info about it - type_of_error = ErrorType::NonExistentFile; - } - } - - // Creating new file entry - let fe: FileEntry = FileEntry { - symlink_path: current_file_name.clone(), - destination_path, - type_of_error, - modified_date: match metadata.modified() { - Ok(t) => match t.duration_since(UNIX_EPOCH) { - Ok(d) => d.as_secs(), - Err(_inspected) => { - self.text_messages.warnings.push(format!("File {} seems to be modified before Unix Epoch.", current_file_name.display())); - 0 - } - }, - Err(e) => { - self.text_messages.warnings.push(format!("Unable to get modification date from file {}, reason {}", current_file_name.display(), e)); - 0 - } // Permissions Denied - }, }; - // Adding files to Vector + // Check every sub folder/file/link etc. + 'dir: for entry in read_dir { + let entry_data = match entry { + Ok(t) => t, + Err(e) => { + warnings.push(format!("Cannot read entry in dir {}, reason {}", current_folder.display(), e)); + continue 'dir; + } + }; + let metadata: Metadata = match entry_data.metadata() { + Ok(t) => t, + Err(e) => { + warnings.push(format!("Cannot read metadata in dir {}, reason {}", current_folder.display(), e)); + continue 'dir; + } + }; + if metadata.is_dir() { + if !self.recursive_search { + continue 'dir; + } + + let next_folder = current_folder.join(entry_data.file_name()); + if self.directories.is_excluded(&next_folder) { + continue 'dir; + } + + if self.excluded_items.is_excluded(&next_folder) { + continue 'dir; + } + + dir_result.push(next_folder); + } else if metadata.is_file() { + atomic_file_counter.fetch_add(1, Ordering::Relaxed); + } else if metadata.file_type().is_symlink() { + atomic_file_counter.fetch_add(1, Ordering::Relaxed); + + let file_name_lowercase: String = match entry_data.file_name().into_string() { + Ok(t) => t, + Err(_inspected) => { + warnings.push(format!("File {:?} has not valid UTF-8 name", entry_data)); + continue 'dir; + } + } + .to_lowercase(); + + if !self.allowed_extensions.matches_filename(&file_name_lowercase) { + continue 'dir; + } + + let current_file_name = current_folder.join(entry_data.file_name()); + if self.excluded_items.is_excluded(¤t_file_name) { + continue 'dir; + } + + let mut destination_path = PathBuf::new(); + let type_of_error; + + match current_file_name.read_link() { + Ok(t) => { + destination_path.push(t); + let mut number_of_loop = 0; + let mut current_path = current_file_name.clone(); + loop { + if number_of_loop == 0 && !current_path.exists() { + type_of_error = ErrorType::NonExistentFile; + break; + } + if number_of_loop == MAX_NUMBER_OF_SYMLINK_JUMPS { + type_of_error = ErrorType::InfiniteRecursion; + break; + } + + current_path = match current_path.read_link() { + Ok(t) => t, + Err(_inspected) => { + // Looks that some next symlinks are broken, but we do nothing with it - TODO why they are broken + continue 'dir; + } + }; + + number_of_loop += 1; + } + } + Err(_inspected) => { + // Failed to load info about it + type_of_error = ErrorType::NonExistentFile; + } + } + + // Creating new file entry + let fe: FileEntry = FileEntry { + symlink_path: current_file_name.clone(), + destination_path, + type_of_error, + modified_date: match metadata.modified() { + Ok(t) => match t.duration_since(UNIX_EPOCH) { + Ok(d) => d.as_secs(), + Err(_inspected) => { + warnings.push(format!("File {} seems to be modified before Unix Epoch.", current_file_name.display())); + 0 + } + }, + Err(e) => { + warnings.push(format!("Unable to get modification date from file {}, reason {}", current_file_name.display(), e)); + 0 + } + }, + }; + + // Adding files to Vector + fe_result.push(fe); + } + } + (dir_result, warnings, fe_result) + }) + .collect(); + + // Advance the frontier + folders_to_check.clear(); + + // Process collected data + for (segment, warnings, fe_result) in segments { + folders_to_check.extend(segment); + self.text_messages.warnings.extend(warnings); + for fe in fe_result { self.invalid_symlinks.push(fe); } } } + self.information.number_of_invalid_symlinks = self.invalid_symlinks.len(); // End thread which send info to gui progress_thread_run.store(false, Ordering::Relaxed); @@ -366,7 +387,6 @@ impl DebugPrint for InvalidSymlinks { println!("### Other"); println!("Invalid symlinks list size - {}", self.invalid_symlinks.len()); - println!("Allowed extensions - {:?}", self.allowed_extensions.file_extensions); println!("Excluded items - {:?}", self.excluded_items.items); println!("Included directories - {:?}", self.directories.included_directories); println!("Excluded directories - {:?}", self.directories.excluded_directories); diff --git a/czkawka_core/src/same_music.rs b/czkawka_core/src/same_music.rs index 5e966cd..32bfad0 100644 --- a/czkawka_core/src/same_music.rs +++ b/czkawka_core/src/same_music.rs @@ -15,6 +15,7 @@ use rayon::prelude::*; use crate::common::Common; use crate::common_directory::Directories; +use crate::common_extensions::Extensions; use crate::common_items::ExcludedItems; use crate::common_messages::Messages; use crate::common_traits::*; @@ -88,6 +89,7 @@ pub struct SameMusic { music_entries: Vec, duplicated_music_entries: Vec>, directories: Directories, + allowed_extensions: Extensions, excluded_items: ExcludedItems, minimal_file_size: u64, maximal_file_size: u64, @@ -105,6 +107,7 @@ impl SameMusic { information: Info::new(), recursive_search: true, directories: Directories::new(), + allowed_extensions: Extensions::new(), excluded_items: ExcludedItems::new(), music_entries: Vec::with_capacity(2048), delete_method: DeleteMethod::None, @@ -178,6 +181,9 @@ impl SameMusic { pub fn set_excluded_items(&mut self, excluded_items: Vec) { self.excluded_items.set_excluded_items(excluded_items, &mut self.text_messages); } + pub fn set_allowed_extensions(&mut self, allowed_extensions: String) { + self.allowed_extensions.set_allowed_extensions(allowed_extensions, &mut self.text_messages); + } pub fn set_music_similarity(&mut self, music_similarity: MusicSimilarity) { self.music_similarity = music_similarity; @@ -195,6 +201,8 @@ impl SameMusic { let start_time: SystemTime = SystemTime::now(); let mut folders_to_check: Vec = Vec::with_capacity(1024 * 2); // This should be small enough too not see to big difference and big enough to store most of paths without needing to resize vector + self.allowed_extensions.extend_allowed_extensions(&[".mp3", ".flac", ".m4a"]); + // Add root folders for finding for id in &self.directories.included_directories { folders_to_check.push(id.clone()); @@ -229,6 +237,7 @@ impl SameMusic { progress_thread_handle = thread::spawn(|| {}); } //// PROGRESS THREAD END + while !folders_to_check.is_empty() { if stop_receiver.is_some() && stop_receiver.unwrap().try_recv().is_ok() { // End thread which send info to gui @@ -236,90 +245,121 @@ impl SameMusic { progress_thread_handle.join().unwrap(); return false; } - let current_folder = folders_to_check.pop().unwrap(); - // Read current dir, if permission are denied just go to next - let read_dir = match fs::read_dir(¤t_folder) { - Ok(t) => t, - Err(e) => { - self.text_messages.warnings.push(format!("Cannot open dir {}, reason {}", current_folder.display(), e)); - continue; - } // Permissions denied - }; - - // Check every sub folder/file/link etc. - 'dir: for entry in read_dir { - let entry_data = match entry { - Ok(t) => t, - Err(e) => { - self.text_messages.warnings.push(format!("Cannot read entry in dir {}, reason {}", current_folder.display(), e)); - continue 'dir; - } //Permissions denied - }; - let metadata: Metadata = match entry_data.metadata() { - Ok(t) => t, - Err(e) => { - self.text_messages.warnings.push(format!("Cannot read metadata in dir {}, reason {}", current_folder.display(), e)); - continue 'dir; - } //Permissions denied - }; - if metadata.is_dir() { - if !self.recursive_search { - continue 'dir; - } - - let next_folder = current_folder.join(entry_data.file_name()); - if self.directories.is_excluded(&next_folder) || self.excluded_items.is_excluded(&next_folder) { - continue 'dir; - } - - folders_to_check.push(next_folder); - } else if metadata.is_file() { - atomic_file_counter.fetch_add(1, Ordering::Relaxed); - // Checking files - if (self.minimal_file_size..=self.maximal_file_size).contains(&metadata.len()) { - let current_file_name = current_folder.join(entry_data.file_name()); - if self.excluded_items.is_excluded(¤t_file_name) { - continue 'dir; + let segments: Vec<_> = folders_to_check + .par_iter() + .map(|current_folder| { + let mut dir_result = vec![]; + let mut warnings = vec![]; + let mut fe_result = vec![]; + // Read current dir childrens + let read_dir = match fs::read_dir(¤t_folder) { + Ok(t) => t, + Err(e) => { + warnings.push(format!("Cannot open dir {}, reason {}", current_folder.display(), e)); + return (dir_result, warnings, fe_result); } + }; - let allowed_extensions = [".mp3", ".flac", ".m4a"]; - - if !allowed_extensions.iter().any(|r| current_file_name.to_string_lossy().ends_with(r)) { - continue 'dir; - } - - // Creating new file entry - let file_entry: FileEntry = FileEntry { - size: metadata.len(), - path: current_file_name.clone(), - modified_date: match metadata.modified() { - Ok(t) => match t.duration_since(UNIX_EPOCH) { - Ok(d) => d.as_secs(), - Err(_inspected) => { - self.text_messages.warnings.push(format!("File {} seems to be modified before Unix Epoch.", current_file_name.display())); - 0 - } - }, - Err(e) => { - self.text_messages.warnings.push(format!("Unable to get modification date from file {}, reason {}", current_file_name.display(), e)); - continue 'dir; - } // Permissions Denied - }, - title: "".to_string(), - - artist: "".to_string(), - album_title: "".to_string(), - album_artist: "".to_string(), - year: 0, + // Check every sub folder/file/link etc. + 'dir: for entry in read_dir { + let entry_data = match entry { + Ok(t) => t, + Err(e) => { + warnings.push(format!("Cannot read entry in dir {}, reason {}", current_folder.display(), e)); + continue 'dir; + } }; + let metadata: Metadata = match entry_data.metadata() { + Ok(t) => t, + Err(e) => { + warnings.push(format!("Cannot read metadata in dir {}, reason {}", current_folder.display(), e)); + continue 'dir; + } + }; + if metadata.is_dir() { + if !self.recursive_search { + continue 'dir; + } - // Adding files to Vector - self.music_to_check.push(file_entry); + let next_folder = current_folder.join(entry_data.file_name()); + if self.directories.is_excluded(&next_folder) { + continue 'dir; + } + + if self.excluded_items.is_excluded(&next_folder) { + continue 'dir; + } + + dir_result.push(next_folder); + } else if metadata.is_file() { + atomic_file_counter.fetch_add(1, Ordering::Relaxed); + + let file_name_lowercase: String = match entry_data.file_name().into_string() { + Ok(t) => t, + Err(_inspected) => { + warnings.push(format!("File {:?} has not valid UTF-8 name", entry_data)); + continue 'dir; + } + } + .to_lowercase(); + + if !self.allowed_extensions.matches_filename(&file_name_lowercase) { + continue 'dir; + } + + if (self.minimal_file_size..=self.maximal_file_size).contains(&metadata.len()) { + let current_file_name = current_folder.join(entry_data.file_name()); + if self.excluded_items.is_excluded(¤t_file_name) { + continue 'dir; + } + + // Creating new file entry + let fe: FileEntry = FileEntry { + size: metadata.len(), + path: current_file_name.clone(), + modified_date: match metadata.modified() { + Ok(t) => match t.duration_since(UNIX_EPOCH) { + Ok(d) => d.as_secs(), + Err(_inspected) => { + warnings.push(format!("File {} seems to be modified before Unix Epoch.", current_file_name.display())); + 0 + } + }, + Err(e) => { + warnings.push(format!("Unable to get modification date from file {}, reason {}", current_file_name.display(), e)); + 0 + } + }, + title: "".to_string(), + + artist: "".to_string(), + album_title: "".to_string(), + album_artist: "".to_string(), + year: 0, + }; + + fe_result.push(fe); + } + } } + (dir_result, warnings, fe_result) + }) + .collect(); + + // Advance the frontier + folders_to_check.clear(); + + // Process collected data + for (segment, warnings, fe_result) in segments { + folders_to_check.extend(segment); + self.text_messages.warnings.extend(warnings); + for fe in fe_result { + self.music_to_check.push(fe); } } } + // End thread which send info to gui progress_thread_run.store(false, Ordering::Relaxed); progress_thread_handle.join().unwrap(); diff --git a/czkawka_core/src/similar_images.rs b/czkawka_core/src/similar_images.rs index 129b6ac..d7f627e 100644 --- a/czkawka_core/src/similar_images.rs +++ b/czkawka_core/src/similar_images.rs @@ -20,6 +20,7 @@ use rayon::prelude::*; use crate::common::Common; use crate::common_directory::Directories; +use crate::common_extensions::Extensions; use crate::common_items::ExcludedItems; use crate::common_messages::Messages; use crate::common_traits::{DebugPrint, PrintResults, SaveResults}; @@ -87,6 +88,7 @@ pub struct SimilarImages { information: Info, text_messages: Messages, directories: Directories, + allowed_extensions: Extensions, excluded_items: ExcludedItems, bktree: BKTree, Hamming>, similar_vectors: Vec>, @@ -128,6 +130,7 @@ impl SimilarImages { text_messages: Messages::new(), directories: Directories::new(), excluded_items: Default::default(), + allowed_extensions: Extensions::new(), bktree: BKTree::new(Hamming), similar_vectors: vec![], recursive_search: true, @@ -194,6 +197,9 @@ impl SimilarImages { pub fn set_recursive_search(&mut self, recursive_search: bool) { self.recursive_search = recursive_search; } + pub fn set_allowed_extensions(&mut self, allowed_extensions: String) { + self.allowed_extensions.set_allowed_extensions(allowed_extensions, &mut self.text_messages); + } pub fn set_minimal_file_size(&mut self, minimal_file_size: u64) { self.minimal_file_size = match minimal_file_size { @@ -238,6 +244,9 @@ impl SimilarImages { let start_time: SystemTime = SystemTime::now(); let mut folders_to_check: Vec = Vec::with_capacity(1024 * 2); // This should be small enough too not see to big difference and big enough to store most of paths without needing to resize vector + self.allowed_extensions + .extend_allowed_extensions(&[".jpg", ".jpeg", ".png" /*, ".bmp"*/, ".tiff", ".tif", ".tga", ".ff" /*, ".gif"*/, ".jif", ".jfi" /*, ".webp"*/]); // webp cannot be seen in preview, gif needs to be enabled after releasing image crate 0.24.0, bmp needs to be fixed in image crate + // Add root folders for finding for id in &self.directories.included_directories { folders_to_check.push(id.clone()); @@ -280,100 +289,119 @@ impl SimilarImages { progress_thread_handle.join().unwrap(); return false; } - let current_folder = folders_to_check.pop().unwrap(); - // Read current dir, if permission are denied just go to next - let read_dir = match fs::read_dir(¤t_folder) { - Ok(t) => t, - Err(e) => { - self.text_messages.warnings.push(format!("Cannot open dir {}, reason {}", current_folder.display(), e)); - continue; - } // Permissions denied - }; - - // Check every sub folder/file/link etc. - 'dir: for entry in read_dir { - let entry_data = match entry { - Ok(t) => t, - Err(e) => { - self.text_messages.warnings.push(format!("Cannot read entry in dir {}, reason {}", current_folder.display(), e)); - continue; - } //Permissions denied - }; - let metadata: Metadata = match entry_data.metadata() { - Ok(t) => t, - Err(e) => { - self.text_messages.warnings.push(format!("Cannot read metadata in dir {}, reason {}", current_folder.display(), e)); - continue; - } //Permissions denied - }; - if metadata.is_dir() { - if !self.recursive_search { - continue; - } - - let next_folder = current_folder.join(entry_data.file_name()); - if self.directories.is_excluded(&next_folder) { - continue 'dir; - } - - if self.excluded_items.is_excluded(&next_folder) { - continue 'dir; - } - - folders_to_check.push(next_folder); - } else if metadata.is_file() { - atomic_file_counter.fetch_add(1, Ordering::Relaxed); - - let file_name_lowercase: String = match entry_data.file_name().into_string() { + let segments: Vec<_> = folders_to_check + .par_iter() + .map(|current_folder| { + let mut dir_result = vec![]; + let mut warnings = vec![]; + let mut fe_result = vec![]; + // Read current dir childrens + let read_dir = match fs::read_dir(¤t_folder) { Ok(t) => t, - Err(_inspected) => { - println!("File {:?} has not valid UTF-8 name", entry_data); - continue 'dir; + Err(e) => { + warnings.push(format!("Cannot open dir {}, reason {}", current_folder.display(), e)); + return (dir_result, warnings, fe_result); } - } - .to_lowercase(); + }; - // Checking allowed image extensions - let allowed_image_extensions = [".jpg", ".jpeg", ".png" /*, ".bmp"*/, ".tiff", ".tif", ".tga", ".ff" /*, ".gif"*/, ".jif", ".jfi" /*, ".webp"*/]; // webp cannot be seen in preview, gif needs to be enabled after releasing image crate 0.24.0, bmp needs to be fixed in image crate - if !allowed_image_extensions.iter().any(|e| file_name_lowercase.ends_with(e)) { - continue 'dir; - } - - // Checking files - if (self.minimal_file_size..=self.maximal_file_size).contains(&metadata.len()) { - let current_file_name = current_folder.join(entry_data.file_name()); - if self.excluded_items.is_excluded(¤t_file_name) { - continue 'dir; - } - - let fe: FileEntry = FileEntry { - path: current_file_name.clone(), - size: metadata.len(), - dimensions: "".to_string(), - modified_date: match metadata.modified() { - Ok(t) => match t.duration_since(UNIX_EPOCH) { - Ok(d) => d.as_secs(), - Err(_inspected) => { - self.text_messages.warnings.push(format!("File {} seems to be modified before Unix Epoch.", current_file_name.display())); - 0 - } - }, - Err(e) => { - self.text_messages.warnings.push(format!("Unable to get modification date from file {}, reason {}", current_file_name.display(), e)); - 0 - } // Permissions Denied - }, - - hash: Vec::new(), - similarity: Similarity::None, + // Check every sub folder/file/link etc. + 'dir: for entry in read_dir { + let entry_data = match entry { + Ok(t) => t, + Err(e) => { + warnings.push(format!("Cannot read entry in dir {}, reason {}", current_folder.display(), e)); + continue 'dir; + } }; + let metadata: Metadata = match entry_data.metadata() { + Ok(t) => t, + Err(e) => { + warnings.push(format!("Cannot read metadata in dir {}, reason {}", current_folder.display(), e)); + continue 'dir; + } + }; + if metadata.is_dir() { + if !self.recursive_search { + continue 'dir; + } - self.images_to_check.insert(current_file_name.to_string_lossy().to_string(), fe); + let next_folder = current_folder.join(entry_data.file_name()); + if self.directories.is_excluded(&next_folder) { + continue 'dir; + } + + if self.excluded_items.is_excluded(&next_folder) { + continue 'dir; + } + + dir_result.push(next_folder); + } else if metadata.is_file() { + atomic_file_counter.fetch_add(1, Ordering::Relaxed); + + let file_name_lowercase: String = match entry_data.file_name().into_string() { + Ok(t) => t, + Err(_inspected) => { + warnings.push(format!("File {:?} has not valid UTF-8 name", entry_data)); + continue 'dir; + } + } + .to_lowercase(); + + if !self.allowed_extensions.matches_filename(&file_name_lowercase) { + continue 'dir; + } + + // Checking files + if (self.minimal_file_size..=self.maximal_file_size).contains(&metadata.len()) { + let current_file_name = current_folder.join(entry_data.file_name()); + if self.excluded_items.is_excluded(¤t_file_name) { + continue 'dir; + } + + let fe: FileEntry = FileEntry { + path: current_file_name.clone(), + size: metadata.len(), + dimensions: "".to_string(), + modified_date: match metadata.modified() { + Ok(t) => match t.duration_since(UNIX_EPOCH) { + Ok(d) => d.as_secs(), + Err(_inspected) => { + warnings.push(format!("File {} seems to be modified before Unix Epoch.", current_file_name.display())); + 0 + } + }, + Err(e) => { + warnings.push(format!("Unable to get modification date from file {}, reason {}", current_file_name.display(), e)); + 0 + } + }, + + hash: Vec::new(), + similarity: Similarity::None, + }; + + fe_result.push((current_file_name.to_string_lossy().to_string(), fe)); + } + } } + (dir_result, warnings, fe_result) + }) + .collect(); + + // Advance the frontier + folders_to_check.clear(); + + // Process collected data + for (segment, warnings, fe_result) in segments { + folders_to_check.extend(segment); + self.text_messages.warnings.extend(warnings); + for (name, fe) in fe_result { + self.images_to_check.insert(name, fe); } } } + // End thread which send info to gui progress_thread_run.store(false, Ordering::Relaxed); progress_thread_handle.join().unwrap(); diff --git a/czkawka_core/src/similar_videos.rs b/czkawka_core/src/similar_videos.rs index 20b1ac3..ad8338b 100644 --- a/czkawka_core/src/similar_videos.rs +++ b/czkawka_core/src/similar_videos.rs @@ -201,6 +201,9 @@ impl SimilarVideos { let start_time: SystemTime = SystemTime::now(); let mut folders_to_check: Vec = Vec::with_capacity(1024 * 2); // This should be small enough too not see to big difference and big enough to store most of paths without needing to resize vector + self.allowed_extensions + .extend_allowed_extensions(&[".mp4", ".mpv", ".flv", ".mp4a", ".webm", ".mpg", ".mp2", ".mpeg", ".m4p", ".m4v", ".avi", ".wmv", ".qt", ".mov", ".swf", ".mkv"]); + // Add root folders for finding for id in &self.directories.included_directories { folders_to_check.push(id.clone()); @@ -243,106 +246,117 @@ impl SimilarVideos { progress_thread_handle.join().unwrap(); return false; } - let current_folder = folders_to_check.pop().unwrap(); - // Read current dir, if permission are denied just go to next - let read_dir = match fs::read_dir(¤t_folder) { - Ok(t) => t, - Err(e) => { - self.text_messages.warnings.push(format!("Cannot open dir {}, reason {}", current_folder.display(), e)); - continue; - } // Permissions denied - }; - - // Check every sub folder/file/link etc. - 'dir: for entry in read_dir { - let entry_data = match entry { - Ok(t) => t, - Err(e) => { - self.text_messages.warnings.push(format!("Cannot read entry in dir {}, reason {}", current_folder.display(), e)); - continue; - } //Permissions denied - }; - let metadata: Metadata = match entry_data.metadata() { - Ok(t) => t, - Err(e) => { - self.text_messages.warnings.push(format!("Cannot read metadata in dir {}, reason {}", current_folder.display(), e)); - continue; - } //Permissions denied - }; - if metadata.is_dir() { - if !self.recursive_search { - continue; - } - - let next_folder = current_folder.join(entry_data.file_name()); - if self.directories.is_excluded(&next_folder) { - continue 'dir; - } - - if self.excluded_items.is_excluded(&next_folder) { - continue 'dir; - } - - folders_to_check.push(next_folder); - } else if metadata.is_file() { - atomic_file_counter.fetch_add(1, Ordering::Relaxed); - - let file_name_lowercase: String = match entry_data.file_name().into_string() { + let segments: Vec<_> = folders_to_check + .par_iter() + .map(|current_folder| { + let mut dir_result = vec![]; + let mut warnings = vec![]; + let mut fe_result = vec![]; + // Read current dir childrens + let read_dir = match fs::read_dir(¤t_folder) { Ok(t) => t, - Err(_inspected) => { - println!("File {:?} has not valid UTF-8 name", entry_data); - continue 'dir; + Err(e) => { + warnings.push(format!("Cannot open dir {}, reason {}", current_folder.display(), e)); + return (dir_result, warnings, fe_result); } - } - .to_lowercase(); + }; - if !self.allowed_extensions.file_extensions.is_empty() { - let allowed = self.allowed_extensions.file_extensions.iter().any(|e| file_name_lowercase.ends_with((".".to_string() + e.to_lowercase().as_str()).as_str())); - if !allowed { - // Not an allowed extension, ignore it. - continue 'dir; - } - } - - // Checking allowed video extensions - let allowed_video_extensions = [".mp4", ".mpv", ".flv", ".mp4a", ".webm", ".mpg", ".mp2", ".mpeg", ".m4p", ".m4v", ".avi", ".wmv", ".qt", ".mov", ".swf", ".mkv"]; - if !allowed_video_extensions.iter().any(|e| file_name_lowercase.ends_with(e)) { - continue 'dir; - } - - // Checking files - if (self.minimal_file_size..=self.maximal_file_size).contains(&metadata.len()) { - let current_file_name = current_folder.join(entry_data.file_name()); - if self.excluded_items.is_excluded(¤t_file_name) { - continue 'dir; - } - - let fe: FileEntry = FileEntry { - path: current_file_name.clone(), - size: metadata.len(), - modified_date: match metadata.modified() { - Ok(t) => match t.duration_since(UNIX_EPOCH) { - Ok(d) => d.as_secs(), - Err(_inspected) => { - self.text_messages.warnings.push(format!("File {} seems to be modified before Unix Epoch.", current_file_name.display())); - 0 - } - }, - Err(e) => { - self.text_messages.warnings.push(format!("Unable to get modification date from file {}, reason {}", current_file_name.display(), e)); - 0 - } // Permissions Denied - }, - vhash: Default::default(), - error: "".to_string(), + // Check every sub folder/file/link etc. + 'dir: for entry in read_dir { + let entry_data = match entry { + Ok(t) => t, + Err(e) => { + warnings.push(format!("Cannot read entry in dir {}, reason {}", current_folder.display(), e)); + continue 'dir; + } }; + let metadata: Metadata = match entry_data.metadata() { + Ok(t) => t, + Err(e) => { + warnings.push(format!("Cannot read metadata in dir {}, reason {}", current_folder.display(), e)); + continue 'dir; + } + }; + if metadata.is_dir() { + if !self.recursive_search { + continue 'dir; + } - self.videos_to_check.insert(current_file_name.to_string_lossy().to_string(), fe); + let next_folder = current_folder.join(entry_data.file_name()); + if self.directories.is_excluded(&next_folder) { + continue 'dir; + } + + if self.excluded_items.is_excluded(&next_folder) { + continue 'dir; + } + + dir_result.push(next_folder); + } else if metadata.is_file() { + atomic_file_counter.fetch_add(1, Ordering::Relaxed); + + let file_name_lowercase: String = match entry_data.file_name().into_string() { + Ok(t) => t, + Err(_inspected) => { + warnings.push(format!("File {:?} has not valid UTF-8 name", entry_data)); + continue 'dir; + } + } + .to_lowercase(); + + if !self.allowed_extensions.matches_filename(&file_name_lowercase) { + continue 'dir; + } + + // Checking files + if (self.minimal_file_size..=self.maximal_file_size).contains(&metadata.len()) { + let current_file_name = current_folder.join(entry_data.file_name()); + if self.excluded_items.is_excluded(¤t_file_name) { + continue 'dir; + } + + let fe: FileEntry = FileEntry { + path: current_file_name.clone(), + size: metadata.len(), + modified_date: match metadata.modified() { + Ok(t) => match t.duration_since(UNIX_EPOCH) { + Ok(d) => d.as_secs(), + Err(_inspected) => { + warnings.push(format!("File {} seems to be modified before Unix Epoch.", current_file_name.display())); + 0 + } + }, + Err(e) => { + warnings.push(format!("Unable to get modification date from file {}, reason {}", current_file_name.display(), e)); + 0 + } + }, + vhash: Default::default(), + error: "".to_string(), + }; + + fe_result.push((current_file_name.to_string_lossy().to_string(), fe)); + } + } } + (dir_result, warnings, fe_result) + }) + .collect(); + + // Advance the frontier + folders_to_check.clear(); + + // Process collected data + for (segment, warnings, fe_result) in segments { + folders_to_check.extend(segment); + self.text_messages.warnings.extend(warnings); + for (name, fe) in fe_result { + self.videos_to_check.insert(name, fe); } } } + // End thread which send info to gui progress_thread_run.store(false, Ordering::Relaxed); progress_thread_handle.join().unwrap(); diff --git a/czkawka_core/src/temporary.rs b/czkawka_core/src/temporary.rs index 42e19e1..5fec89d 100644 --- a/czkawka_core/src/temporary.rs +++ b/czkawka_core/src/temporary.rs @@ -9,6 +9,7 @@ use std::time::{Duration, SystemTime, UNIX_EPOCH}; use std::{fs, thread}; use crossbeam_channel::Receiver; +use rayon::prelude::*; use crate::common::Common; use crate::common_directory::Directories; @@ -166,89 +167,113 @@ impl Temporary { return false; } - let current_folder = folders_to_check.pop().unwrap(); - // Read current dir, if permission are denied just go to next - let read_dir = match fs::read_dir(¤t_folder) { - Ok(t) => t, - Err(e) => { - self.text_messages.warnings.push(format!("Cannot open dir {}, reason {}", current_folder.display(), e)); - continue; - } // Permissions denied - }; - - // Check every sub folder/file/link etc. - 'dir: for entry in read_dir { - let entry_data = match entry { - Ok(t) => t, - Err(e) => { - self.text_messages.warnings.push(format!("Cannot read entry in dir {}, reason {}", current_folder.display(), e)); - continue; - } //Permissions denied - }; - let metadata: Metadata = match entry_data.metadata() { - Ok(t) => t, - Err(e) => { - self.text_messages.warnings.push(format!("Cannot read metadata in dir {}, reason {}", current_folder.display(), e)); - continue; - } //Permissions denied - }; - if metadata.is_dir() { - if !self.recursive_search { - continue; - } - - let next_folder = current_folder.join(entry_data.file_name()); - if self.directories.is_excluded(&next_folder) || self.excluded_items.is_excluded(&next_folder) { - continue 'dir; - } - - folders_to_check.push(next_folder); - } else if metadata.is_file() { - atomic_file_counter.fetch_add(1, Ordering::Relaxed); - let file_name_lowercase: String = match entry_data.file_name().into_string() { + let segments: Vec<_> = folders_to_check + .par_iter() + .map(|current_folder| { + let mut dir_result = vec![]; + let mut warnings = vec![]; + let mut fe_result = vec![]; + // Read current dir childrens + let read_dir = match fs::read_dir(¤t_folder) { Ok(t) => t, - Err(_inspected) => { - println!("File {:?} has not valid UTF-8 name", entry_data); - continue 'dir; + Err(e) => { + warnings.push(format!("Cannot open dir {}, reason {}", current_folder.display(), e)); + return (dir_result, warnings, fe_result); } - } - .to_lowercase(); - - // Temporary files which needs to have dot in name(not sure if exists without dot) - let temporary_with_dot = ["#", "thumbs.db", ".bak", "~", ".tmp", ".temp", ".ds_store", ".crdownload", ".part", ".cache", ".dmp", ".download", ".partial"]; - - if !file_name_lowercase.contains('.') || !temporary_with_dot.iter().any(|f| file_name_lowercase.ends_with(f)) { - continue 'dir; - } - // Checking files - let current_file_name = current_folder.join(entry_data.file_name()); - if self.excluded_items.is_excluded(¤t_file_name) { - continue 'dir; - } - - // Creating new file entry - let fe: FileEntry = FileEntry { - path: current_file_name.clone(), - modified_date: match metadata.modified() { - Ok(t) => match t.duration_since(UNIX_EPOCH) { - Ok(d) => d.as_secs(), - Err(_inspected) => { - self.text_messages.warnings.push(format!("File {} seems to be modified before Unix Epoch.", current_file_name.display())); - 0 - } - }, - Err(e) => { - self.text_messages.warnings.push(format!("Unable to get modification date from file {}, reason {}", current_file_name.display(), e)); - 0 - } // Permissions Denied - }, }; - // Adding files to Vector + // Check every sub folder/file/link etc. + 'dir: for entry in read_dir { + let entry_data = match entry { + Ok(t) => t, + Err(e) => { + warnings.push(format!("Cannot read entry in dir {}, reason {}", current_folder.display(), e)); + continue 'dir; + } + }; + let metadata: Metadata = match entry_data.metadata() { + Ok(t) => t, + Err(e) => { + warnings.push(format!("Cannot read metadata in dir {}, reason {}", current_folder.display(), e)); + continue 'dir; + } + }; + if metadata.is_dir() { + if !self.recursive_search { + continue 'dir; + } + + let next_folder = current_folder.join(entry_data.file_name()); + if self.directories.is_excluded(&next_folder) { + continue 'dir; + } + + if self.excluded_items.is_excluded(&next_folder) { + continue 'dir; + } + + dir_result.push(next_folder); + } else if metadata.is_file() { + atomic_file_counter.fetch_add(1, Ordering::Relaxed); + + let file_name_lowercase: String = match entry_data.file_name().into_string() { + Ok(t) => t, + Err(_inspected) => { + warnings.push(format!("File {:?} has not valid UTF-8 name", entry_data)); + continue 'dir; + } + } + .to_lowercase(); + + if !["#", "thumbs.db", ".bak", "~", ".tmp", ".temp", ".ds_store", ".crdownload", ".part", ".cache", ".dmp", ".download", ".partial"] + .iter() + .any(|f| file_name_lowercase.ends_with(f)) + { + continue 'dir; + } + let current_file_name = current_folder.join(entry_data.file_name()); + if self.excluded_items.is_excluded(¤t_file_name) { + continue 'dir; + } + + // Creating new file entry + let fe: FileEntry = FileEntry { + path: current_file_name.clone(), + modified_date: match metadata.modified() { + Ok(t) => match t.duration_since(UNIX_EPOCH) { + Ok(d) => d.as_secs(), + Err(_inspected) => { + warnings.push(format!("File {} seems to be modified before Unix Epoch.", current_file_name.display())); + 0 + } + }, + Err(e) => { + warnings.push(format!("Unable to get modification date from file {}, reason {}", current_file_name.display(), e)); + 0 + } // Permissions Denied + }, + }; + + fe_result.push(fe); + } + } + (dir_result, warnings, fe_result) + }) + .collect(); + + // Advance the frontier + folders_to_check.clear(); + + // Process collected data + for (segment, warnings, fe_result) in segments { + folders_to_check.extend(segment); + self.text_messages.warnings.extend(warnings); + for fe in fe_result { self.temporary_files.push(fe); } } } + // End thread which send info to gui progress_thread_run.store(false, Ordering::Relaxed); progress_thread_handle.join().unwrap(); diff --git a/czkawka_gui/src/connect_button_search.rs b/czkawka_gui/src/connect_button_search.rs index 0dd054f..11f366d 100644 --- a/czkawka_gui/src/connect_button_search.rs +++ b/czkawka_gui/src/connect_button_search.rs @@ -231,6 +231,7 @@ pub fn connect_button_search( bf.set_excluded_directory(excluded_directories); bf.set_recursive_search(recursive_search); bf.set_excluded_items(excluded_items); + bf.set_allowed_extensions(allowed_extensions); bf.set_number_of_files_to_check(numbers_of_files_to_check); bf.find_big_files(Some(&stop_receiver), Some(&futures_sender_big_file)); let _ = glib_stop_sender.send(Message::BigFiles(bf)); @@ -299,6 +300,7 @@ pub fn connect_button_search( sf.set_hash_alg(hash_alg); sf.set_hash_size(hash_size); sf.set_image_filter(image_filter); + sf.set_allowed_extensions(allowed_extensions); sf.set_delete_outdated_cache(delete_outdated_cache); sf.set_exclude_images_with_same_size(ignore_same_size); sf.find_similar_images(Some(&stop_receiver), Some(&futures_sender_similar_images)); @@ -332,6 +334,7 @@ pub fn connect_button_search( sf.set_excluded_items(excluded_items); sf.set_minimal_file_size(minimal_file_size); sf.set_maximal_file_size(maximal_file_size); + sf.set_allowed_extensions(allowed_extensions); sf.set_use_cache(use_cache); sf.set_tolerance(tolerance); sf.set_delete_outdated_cache(delete_outdated_cache); @@ -380,6 +383,7 @@ pub fn connect_button_search( mf.set_excluded_items(excluded_items); mf.set_minimal_file_size(minimal_file_size); mf.set_maximal_file_size(maximal_file_size); + mf.set_allowed_extensions(allowed_extensions); mf.set_recursive_search(recursive_search); mf.set_music_similarity(music_similarity); mf.set_approximate_comparison(approximate_comparison); @@ -408,6 +412,7 @@ pub fn connect_button_search( isf.set_excluded_directory(excluded_directories); isf.set_recursive_search(recursive_search); isf.set_excluded_items(excluded_items); + isf.set_allowed_extensions(allowed_extensions); isf.find_invalid_links(Some(&stop_receiver), Some(&futures_sender_invalid_symlinks)); let _ = glib_stop_sender.send(Message::InvalidSymlinks(isf)); }); @@ -429,6 +434,7 @@ pub fn connect_button_search( br.set_recursive_search(recursive_search); br.set_excluded_items(excluded_items); br.set_use_cache(use_cache); + br.set_allowed_extensions(allowed_extensions); br.find_broken_files(Some(&stop_receiver), Some(&futures_sender_broken_files)); let _ = glib_stop_sender.send(Message::BrokenFiles(br)); }); diff --git a/czkawka_gui/src/gui_progress_dialog.rs b/czkawka_gui/src/gui_progress_dialog.rs index a79ebf3..c7cbf96 100644 --- a/czkawka_gui/src/gui_progress_dialog.rs +++ b/czkawka_gui/src/gui_progress_dialog.rs @@ -27,6 +27,7 @@ impl GuiProgressDialog { let window_progress: gtk::Dialog = builder.object("window_progress").unwrap(); window_progress.set_transient_for(Some(window_main)); + window_progress.set_modal(true); window_progress.set_title("Czkawka"); let progress_bar_current_stage: gtk::ProgressBar = builder.object("progress_bar_current_stage").unwrap();