use std::collections::{BTreeSet, HashMap}; use std::fs::File; use std::io::prelude::*; use std::io::BufWriter; use std::path::PathBuf; use std::sync::atomic::{AtomicBool, AtomicUsize, Ordering}; use std::sync::Arc; use std::thread::sleep; use std::time::{Duration, SystemTime}; use std::{mem, thread}; use crossbeam_channel::Receiver; use mime_guess::get_mime_extensions; use rayon::prelude::*; use crate::common::{Common, LOOP_DURATION}; use crate::common_dir_traversal::{CheckingMethod, DirTraversalBuilder, DirTraversalResult, FileEntry, ProgressData}; use crate::common_directory::Directories; use crate::common_extensions::Extensions; use crate::common_items::ExcludedItems; use crate::common_messages::Messages; use crate::common_traits::*; static DISABLED_EXTENSIONS: &[&str] = &["file", "cache", "bak"]; // Such files can have any type inside // This adds several workarounds for bugs/invalid recognizing types by external libraries // ("real_content_extension", "current_file_extension") static WORKAROUNDS: &[(&str, &str)] = &[ // Wine/Windows ("der", "cat"), ("exe", "acm"), ("exe", "ax"), ("exe", "bck"), ("exe", "com"), ("exe", "cpl"), ("exe", "dll"), ("exe", "dll16"), ("exe", "drv"), ("exe", "drv16"), ("exe", "ds"), ("exe", "efi"), ("exe", "exe16"), ("exe", "fon"), // Type of font or something else ("exe", "mod16"), ("exe", "msstyles"), ("exe", "mui"), ("exe", "orig"), ("exe", "signed"), ("exe", "sys"), ("exe", "tlb"), ("exe", "vxd"), ("exe", "winmd"), ("xml", "adml"), ("xml", "manifest"), ("xml", "mum"), // Other ("gz", "blend"), // Blender ("gz", "crate"), // Cargo ("gz", "svgz"), // Archive svg ("gz", "tgz"), // Archive ("html", "md"), // Markdown ("jpg", "jfif"), // Photo format ("mobi", "azw3"), // Ebook format ("mpg", "vob"), // Weddings in parts have usually vob extension ("obj", "bin"), // Multiple apps, Czkawka, Nvidia, Windows ("obj", "o"), // Compilators ("odp", "otp"), // LibreOffice ("ods", "ots"), // Libreoffice ("odt", "ott"), // Libreoffice ("ogg", "ogv"), // Audio format ("pptx", "ppsx"), // Powerpoint ("sh", "bash"), // Linux ("sh", "pl"), // Gnome/Linux ("sh", "pm"), // Gnome/Linux ("sh", "py"), // Python ("sh", "pyx"), // Python ("sh", "rs"), // Rust ("sh", "sample"), // Git ("xml", "bsp"), // Quartus ("xml", "cbp"), // CodeBlocks config ("xml", "cfg"), // Multiple apps - Godot ("xml", "cmb"), // Cambalache ("xml", "conf"), // Multiple apps - Python ("xml", "config"), // Multiple apps - QT Creator ("xml", "dae"), // 3D models ("xml", "docbook"), // ("xml", "fb2"), // ("xml", "gir"), // GTK ("xml", "glade"), // Glade ("xml", "iml"), // Intelij Idea ("xml", "kdenlive"), // KDenLive ("xml", "lang"), // ? ("xml", "policy"), // SystemD ("xml", "qsys"), // Quartus ("xml", "sopcinfo"), // Quartus ("xml", "svg"), // SVG ("xml", "ui"), // Cambalache, Glade ("xml", "user"), // Qtcreator ("xml", "vbox"), // VirtualBox ("xml", "vbox-prev"), // VirtualBox ("xml", "vcproj"), // VisualStudio ("xml", "xba"), // Libreoffice ("xml", "xcd"), // Libreoffice files ("zip", "apk"), // Android apk ("zip", "cbr"), // Comics ("zip", "dat"), // Multiple - python, brave ("zip", "doc"), // Word ("zip", "docx"), // Word ("zip", "jar"), // Java ("zip", "kra"), // Krita ("zip", "nupkg"), // Nuget packages ("zip", "odg"), // Libreoffice ("zip", "pptx"), // Powerpoint ("zip", "whl"), // Python packages ("zip", "xpi"), // Firefox extensions ("zip", "zcos"), // Scilab // Probably invalid ("html", "svg"), ("xml", "html"), // Probably bug in external library ("msi", "doc"), // Not sure whe doc is not recognized ("exe", "xls"), // Not sure whe xls is not recognized ]; #[derive(Clone)] pub struct BadFileEntry { pub path: PathBuf, pub modified_date: u64, pub size: u64, pub current_extension: String, pub proper_extensions: String, } /// Info struck with helpful information's about results #[derive(Default)] pub struct Info { pub number_of_files_with_bad_extension: usize, } impl Info { pub fn new() -> Self { Default::default() } } pub struct BadExtensions { text_messages: Messages, information: Info, files_to_check: Vec, bad_extensions_files: Vec, directories: Directories, allowed_extensions: Extensions, excluded_items: ExcludedItems, minimal_file_size: u64, maximal_file_size: u64, recursive_search: bool, stopped_search: bool, save_also_as_json: bool, include_files_without_extension: bool, } impl BadExtensions { pub fn new() -> Self { Self { text_messages: Messages::new(), information: Info::new(), recursive_search: true, allowed_extensions: Extensions::new(), directories: Directories::new(), excluded_items: ExcludedItems::new(), files_to_check: Default::default(), stopped_search: false, minimal_file_size: 8192, maximal_file_size: u64::MAX, bad_extensions_files: Default::default(), save_also_as_json: false, include_files_without_extension: true, } } pub fn find_bad_extensions_files(&mut self, stop_receiver: Option<&Receiver<()>>, progress_sender: Option<&futures::channel::mpsc::UnboundedSender>) { self.directories.optimize_directories(self.recursive_search, &mut self.text_messages); if !self.check_files(stop_receiver, progress_sender) { self.stopped_search = true; return; } if !self.look_for_bad_extensions_files(stop_receiver, progress_sender) { self.stopped_search = true; return; } self.debug_print(); } pub fn get_stopped_search(&self) -> bool { self.stopped_search } pub const fn get_bad_extensions_files(&self) -> &Vec { &self.bad_extensions_files } pub fn set_maximal_file_size(&mut self, maximal_file_size: u64) { self.maximal_file_size = match maximal_file_size { 0 => 1, t => t, }; } pub fn set_minimal_file_size(&mut self, minimal_file_size: u64) { self.minimal_file_size = match minimal_file_size { 0 => 1, t => t, }; } pub const fn get_text_messages(&self) -> &Messages { &self.text_messages } pub const fn get_information(&self) -> &Info { &self.information } pub fn set_save_also_as_json(&mut self, save_also_as_json: bool) { self.save_also_as_json = save_also_as_json; } pub fn set_recursive_search(&mut self, recursive_search: bool) { self.recursive_search = recursive_search; } pub fn set_included_directory(&mut self, included_directory: Vec) -> bool { self.directories.set_included_directory(included_directory, &mut self.text_messages) } pub fn set_excluded_directory(&mut self, excluded_directory: Vec) { self.directories.set_excluded_directory(excluded_directory, &mut self.text_messages); } pub fn set_allowed_extensions(&mut self, allowed_extensions: String) { self.allowed_extensions.set_allowed_extensions(allowed_extensions, &mut self.text_messages); } pub fn set_excluded_items(&mut self, excluded_items: Vec) { self.excluded_items.set_excluded_items(excluded_items, &mut self.text_messages); } fn check_files(&mut self, stop_receiver: Option<&Receiver<()>>, progress_sender: Option<&futures::channel::mpsc::UnboundedSender>) -> bool { let result = DirTraversalBuilder::new() .root_dirs(self.directories.included_directories.clone()) .group_by(|_fe| ()) .stop_receiver(stop_receiver) .progress_sender(progress_sender) .minimal_file_size(self.minimal_file_size) .maximal_file_size(self.maximal_file_size) .directories(self.directories.clone()) .allowed_extensions(self.allowed_extensions.clone()) .excluded_items(self.excluded_items.clone()) .recursive_search(self.recursive_search) .build() .run(); match result { DirTraversalResult::SuccessFiles { start_time, grouped_file_entries, warnings, } => { if let Some(files_to_check) = grouped_file_entries.get(&()) { self.files_to_check = files_to_check.clone(); } self.text_messages.warnings.extend(warnings); Common::print_time(start_time, SystemTime::now(), "check_files".to_string()); true } DirTraversalResult::SuccessFolders { .. } => { unreachable!() } DirTraversalResult::Stopped => false, } } fn look_for_bad_extensions_files(&mut self, stop_receiver: Option<&Receiver<()>>, progress_sender: Option<&futures::channel::mpsc::UnboundedSender>) -> bool { let system_time = SystemTime::now(); let include_files_without_extension = self.include_files_without_extension; let check_was_stopped = AtomicBool::new(false); // Used for breaking from GUI and ending check thread //// PROGRESS THREAD START let progress_thread_run = Arc::new(AtomicBool::new(true)); let atomic_file_counter = Arc::new(AtomicUsize::new(0)); let progress_thread_handle = if let Some(progress_sender) = progress_sender { let progress_send = progress_sender.clone(); let progress_thread_run = progress_thread_run.clone(); let atomic_file_counter = atomic_file_counter.clone(); let entries_to_check = self.files_to_check.len(); thread::spawn(move || loop { progress_send .unbounded_send(ProgressData { checking_method: CheckingMethod::None, current_stage: 1, max_stage: 1, entries_checked: atomic_file_counter.load(Ordering::Relaxed) as usize, entries_to_check, }) .unwrap(); if !progress_thread_run.load(Ordering::Relaxed) { break; } sleep(Duration::from_millis(LOOP_DURATION as u64)); }) } else { thread::spawn(|| {}) }; let mut files_to_check = Default::default(); mem::swap(&mut files_to_check, &mut self.files_to_check); //// PROGRESS THREAD END let mut hashmap_workarounds: HashMap<&str, Vec<&str>> = Default::default(); for (proper, found) in WORKAROUNDS { // This should be enabled when items will have only 1 possible workaround items // if hashmap_workarounds.contains_key(found) { // panic!("Already have {} key", found); // } hashmap_workarounds.entry(found).or_insert_with(Vec::new).push(proper); } self.bad_extensions_files = files_to_check .into_par_iter() .map(|file_entry| { atomic_file_counter.fetch_add(1, Ordering::Relaxed); if stop_receiver.is_some() && stop_receiver.unwrap().try_recv().is_ok() { check_was_stopped.store(true, Ordering::Relaxed); return None; } // Check what exactly content file contains let kind = match infer::get_from_path(&file_entry.path) { Ok(k) => match k { Some(t) => t, None => return Some(None), }, Err(_) => return Some(None), }; let proper_extension = kind.extension(); // Extract current extension from file let current_extension; if let Some(extension) = file_entry.path.extension() { let extension = extension.to_string_lossy().to_lowercase(); if DISABLED_EXTENSIONS.contains(&extension.as_str()) { return Some(None); } // Text longer than 10 characters is not considered as extension if extension.len() > 10 { current_extension = "".to_string(); } else { current_extension = extension; } } else { current_extension = "".to_string(); } // Already have proper extension, no need to do more things if current_extension == proper_extension { return Some(None); } // Check for all extensions that file can use(not sure if it is worth to do it) let mut all_available_extensions: BTreeSet<&str> = Default::default(); let think_extension = match current_extension.is_empty() { true => "".to_string(), false => { for mim in mime_guess::from_ext(proper_extension) { if let Some(all_ext) = get_mime_extensions(&mim) { for ext in all_ext { all_available_extensions.insert(ext); } } } // Workarounds if let Some(vec_pre) = hashmap_workarounds.get(current_extension.as_str()) { for pre in vec_pre { if all_available_extensions.contains(pre) { all_available_extensions.insert(current_extension.as_str()); break; } } } let mut guessed_multiple_extensions = format!("({}) - ", proper_extension); for ext in &all_available_extensions { guessed_multiple_extensions.push_str(ext); guessed_multiple_extensions.push(','); } guessed_multiple_extensions.pop(); guessed_multiple_extensions } }; if all_available_extensions.is_empty() { // Not found any extension return Some(None); } else if current_extension.is_empty() { if !include_files_without_extension { return Some(None); } } else if all_available_extensions.take(¤t_extension.as_str()).is_some() { // Found proper extension return Some(None); } Some(Some(BadFileEntry { path: file_entry.path, modified_date: file_entry.modified_date, size: file_entry.size, current_extension, proper_extensions: think_extension, })) }) .while_some() .filter(|file_entry| file_entry.is_some()) .map(|file_entry| file_entry.unwrap()) .collect::>(); // End thread which send info to gui progress_thread_run.store(false, Ordering::Relaxed); progress_thread_handle.join().unwrap(); // Break if stop was clicked if check_was_stopped.load(Ordering::Relaxed) { return false; } self.information.number_of_files_with_bad_extension = self.bad_extensions_files.len(); Common::print_time(system_time, SystemTime::now(), "sort_images - reading data from files in parallel".to_string()); // Clean unused data self.files_to_check = Default::default(); true } } impl Default for BadExtensions { fn default() -> Self { Self::new() } } impl DebugPrint for BadExtensions { #[allow(dead_code)] #[allow(unreachable_code)] /// Debugging printing - only available on debug build fn debug_print(&self) { #[cfg(not(debug_assertions))] { return; } println!("---------------DEBUG PRINT---------------"); println!("### Information's"); println!("Errors size - {}", self.text_messages.errors.len()); println!("Warnings size - {}", self.text_messages.warnings.len()); println!("Messages size - {}", self.text_messages.messages.len()); println!("### Other"); println!("Excluded items - {:?}", self.excluded_items.items); println!("Included directories - {:?}", self.directories.included_directories); println!("Excluded directories - {:?}", self.directories.excluded_directories); println!("Recursive search - {}", self.recursive_search); println!("-----------------------------------------"); } } impl SaveResults for BadExtensions { fn save_results_to_file(&mut self, file_name: &str) -> bool { let start_time: SystemTime = SystemTime::now(); let file_name: String = match file_name { "" => "results.txt".to_string(), k => k.to_string(), }; let file_handler = match File::create(&file_name) { Ok(t) => t, Err(e) => { self.text_messages.errors.push(format!("Failed to create file {}, reason {}", file_name, e)); return false; } }; let mut writer = BufWriter::new(file_handler); if let Err(e) = writeln!( writer, "Results of searching {:?} with excluded directories {:?} and excluded items {:?}", self.directories.included_directories, self.directories.excluded_directories, self.excluded_items.items ) { self.text_messages.errors.push(format!("Failed to save results to file {}, reason {}", file_name, e)); return false; } if !self.bad_extensions_files.is_empty() { writeln!(writer, "Found {} files with invalid extension.", self.information.number_of_files_with_bad_extension).unwrap(); for file_entry in self.bad_extensions_files.iter() { writeln!(writer, "{}", file_entry.path.display()).unwrap(); } } else { write!(writer, "Not found any files with invalid extension.").unwrap(); } Common::print_time(start_time, SystemTime::now(), "save_results_to_file".to_string()); true } } impl PrintResults for BadExtensions { /// Print information's about duplicated entries /// Only needed for CLI fn print_results(&self) { let start_time: SystemTime = SystemTime::now(); println!("Found {} files with invalid extension.\n", self.information.number_of_files_with_bad_extension); for file_entry in self.bad_extensions_files.iter() { println!("{}", file_entry.path.display()); } Common::print_time(start_time, SystemTime::now(), "print_entries".to_string()); } }