1
0
Fork 0
mirror of synced 2024-04-28 17:42:26 +12:00

Add big files finding to CLI

This commit is contained in:
Rafał Mikrut 2020-09-26 13:48:53 +02:00
parent d17f2d3ef1
commit 5f643d4356
3 changed files with 172 additions and 19 deletions

View file

@ -30,7 +30,12 @@ fn main() {
process::exit(1); process::exit(1);
} }
if arguments[arguments.len() - 1].argument != Option::None { if arguments[arguments.len() - 1].argument != Option::None {
println!("FATAL ERROR: Trying set second parameter {}, but only one is supported", argument); // This may be changed in future to support 2 or more attributes with space println!(
"FATAL ERROR: Trying set second parameter \"{}\" for \"{}\" which already have this parameter \"{}\" ",
argument,
arguments[arguments.len() - 1].command,
arguments[arguments.len() - 1].argument.as_ref().unwrap()
); // This may be changed in future to support 2 or more attributes with space
process::exit(1); process::exit(1);
} }
let last_element = arguments.len() - 1; let last_element = arguments.len() - 1;
@ -91,8 +96,6 @@ fn main() {
} }
if ArgumentsPair::has_command(&arguments, "-k") { if ArgumentsPair::has_command(&arguments, "-k") {
df.set_excluded_items(ArgumentsPair::get_argument(&arguments, "-k", false)); df.set_excluded_items(ArgumentsPair::get_argument(&arguments, "-k", false));
} else {
df.set_excluded_items("DEFAULT".to_string());
} }
if ArgumentsPair::has_command(&arguments, "-o") { if ArgumentsPair::has_command(&arguments, "-o") {
@ -179,6 +182,63 @@ fn main() {
#[cfg(not(debug_assertions))] // This will show too much probably unnecessary data to debug, comment line only if needed #[cfg(not(debug_assertions))] // This will show too much probably unnecessary data to debug, comment line only if needed
ef.print_empty_folders(); ef.print_empty_folders();
} }
"--b" => {
let mut bf = big_file::BigFile::new();
if ArgumentsPair::has_command(&arguments, "-i") {
bf.set_include_directory(ArgumentsPair::get_argument(&arguments, "-i", false));
} else {
println!("FATAL ERROR: Parameter -i with set of included files is required.");
process::exit(1);
}
if ArgumentsPair::has_command(&arguments, "-e") {
bf.set_exclude_directory(ArgumentsPair::get_argument(&arguments, "-e", false));
}
if ArgumentsPair::has_command(&arguments, "-s") {
let number_of_files = match ArgumentsPair::get_argument(&arguments, "-s", false).parse::<usize>() {
Ok(t) => {
if t == 0 {
println!("ERROR: Minimum one biggest file must be showed..");
1
} else {
t
}
}
Err(_) => {
println!("FATAL ERROR: \"{}\" is not valid number of files to show(allowed range <1,usize::max>)", ArgumentsPair::get_argument(&arguments, "-s", false));
process::exit(1);
}
};
bf.set_number_of_files_to_check(number_of_files);
}
if ArgumentsPair::has_command(&arguments, "-x") {
bf.set_allowed_extensions(ArgumentsPair::get_argument(&arguments, "-x", false));
}
if ArgumentsPair::has_command(&arguments, "-k") {
bf.set_excluded_items(ArgumentsPair::get_argument(&arguments, "-k", false));
}
if ArgumentsPair::has_command(&arguments, "-o") {
bf.set_recursive_search(false);
}
bf.find_big_files();
#[allow(clippy::collapsible_if)]
if ArgumentsPair::has_command(&arguments, "-f") {
if !bf.save_results_to_file(&ArgumentsPair::get_argument(&arguments, "-f", false)) {
bf.get_text_messages().print_messages();
process::exit(1);
}
}
#[cfg(not(debug_assertions))] // This will show too much probably unnecessary data to debug, comment line only if needed
bf.print_duplicated_entries();
bf.get_text_messages().print_messages();
}
"--version" | "v" => { "--version" | "v" => {
println!("Czkawka CLI {}", CZKAWKA_VERSION); println!("Czkawka CLI {}", CZKAWKA_VERSION);
process::exit(0); process::exit(0);
@ -203,7 +263,7 @@ Usage of Czkawka:
czkawka czkawka
--d <-i directory_to_search> [-e exclude_directories = ""] [-k excluded_items = "DEFAULT"] [-s min_size = 1024] [-x allowed_extension = ""] [-l type_of_search = "hash"] [-o] [-f file_to_save = "results.txt"] [-delete = "aeo"] - search for duplicates files --d <-i directory_to_search> [-e exclude_directories = ""] [-k excluded_items = ""] [-s min_size = 1024] [-x allowed_extension = ""] [-l type_of_search = "hash"] [-o] [-f file_to_save = "results.txt"] [-delete = "aeo"] - search for duplicates files
-i directory_to_search - list of directories which should will be searched like /home/rafal -i directory_to_search - list of directories which should will be searched like /home/rafal
-e exclude_directories - list of directories which will be excluded from search. -e exclude_directories - list of directories which will be excluded from search.
-k excluded_items - list of excluded items which contains * wildcard(may be slow) -k excluded_items - list of excluded items which contains * wildcard(may be slow)
@ -230,6 +290,15 @@ Usage of Czkawka:
Usage example: Usage example:
czkawka --e -i "/home/rafal/rr, /home/gateway" -e "/home/rafal/rr/2" -delete czkawka --e -i "/home/rafal/rr, /home/gateway" -e "/home/rafal/rr/2" -delete
--b <-i directory_to_search> [-e exclude_directories = ""] [-k excluded_items = ""] [-s number_of_files = 50] [-x allowed_extension = ""] [-o] [-f file_to_save = "results.txt"]
-i directory_to_search - list of directories which should will be searched like /home/rafal
-e exclude_directories - list of directories which will be excluded from search.
-k excluded_items - list of excluded items which contains * wildcard(may be slow)
-o - this options prevents from recursive check of folders
-f file_to_save - saves results to file
-s number_of_files - number of showed the biggest files.
-x allowed_extension - list of checked extension, e.g. "jpg,mp4" will allow to check "book.jpg" and "car.mp4" but not roman.png. There are also helpful macros which allow to easy use a typcal extension like IMAGE("jpg,kra,gif,png,bmp,tiff,webp,hdr,svg") or TEXT("txt,doc,docx,odt,rtf")
--version / --v - prints program name and version --version / --v - prints program name and version
"### "###

View file

@ -1,7 +1,9 @@
use crate::common::{Common, Messages}; use crate::common::{Common, Messages};
use humansize::{file_size_opts as options, FileSize};
use std::collections::BTreeMap; use std::collections::BTreeMap;
use std::fs; use std::fs;
use std::fs::Metadata; use std::fs::{File, Metadata};
use std::io::Write;
use std::path::Path; use std::path::Path;
use std::time::SystemTime; use std::time::SystemTime;
@ -20,6 +22,7 @@ pub struct Info {
pub number_of_ignored_files: usize, pub number_of_ignored_files: usize,
pub number_of_ignored_things: usize, pub number_of_ignored_things: usize,
pub taken_space: u64, pub taken_space: u64,
pub number_of_real_files: usize,
} }
impl Info { impl Info {
pub fn new() -> Info { pub fn new() -> Info {
@ -29,6 +32,7 @@ impl Info {
number_of_ignored_files: 0, number_of_ignored_files: 0,
number_of_ignored_things: 0, number_of_ignored_things: 0,
taken_space: 0, taken_space: 0,
number_of_real_files: 0,
} }
} }
} }
@ -62,7 +66,7 @@ impl BigFile {
excluded_directories: vec![], excluded_directories: vec![],
included_directories: vec![], included_directories: vec![],
allowed_extensions: vec![], allowed_extensions: vec![],
recursive_search: false, recursive_search: true,
number_of_files_to_check: 50, number_of_files_to_check: 50,
} }
} }
@ -76,9 +80,59 @@ impl BigFile {
pub fn find_big_files(&mut self) { pub fn find_big_files(&mut self) {
self.optimize_directories(); self.optimize_directories();
self.look_for_big_files();
self.debug_print(); self.debug_print();
} }
pub fn set_recursive_search(&mut self, recursive_search: bool) {
self.recursive_search = recursive_search;
}
/// Saving results to provided file
pub fn save_results_to_file(&mut self, file_name: &str) -> bool {
let start_time: SystemTime = SystemTime::now();
let file_name: String = match file_name {
"" => "results.txt".to_string(),
k => k.to_string(),
};
let mut file = match File::create(&file_name) {
Ok(t) => t,
Err(_) => {
self.text_messages.errors.push("Failed to create file ".to_string() + file_name.as_str());
return false;
}
};
match file.write_all(
format!(
"Results of searching {:?} with excluded directories {:?} and excluded items {:?}\n",
self.included_directories, self.excluded_directories, self.excluded_items
)
.as_bytes(),
) {
Ok(_) => (),
Err(_) => {
self.text_messages.errors.push("Failed to save results to file ".to_string() + file_name.as_str());
return false;
}
}
if self.information.number_of_real_files != 0 {
file.write_all(format!("{} the biggest files.\n\n", self.information.number_of_real_files).as_bytes()).unwrap();
for (size, files) in self.big_files.iter().rev() {
for file_entry in files {
file.write_all(format!("{} ({}) - {}\n", size.file_size(options::BINARY).unwrap(), size, file_entry.path.clone()).as_bytes()).unwrap();
}
}
} else {
file.write_all(b"Not found any empty folders.").unwrap();
}
Common::print_time(start_time, SystemTime::now(), "save_results_to_file".to_string());
true
}
/// List of allowed extensions, only files with this extensions will be checking if are duplicates /// List of allowed extensions, only files with this extensions will be checking if are duplicates
pub fn set_allowed_extensions(&mut self, mut allowed_extensions: String) { pub fn set_allowed_extensions(&mut self, mut allowed_extensions: String) {
let start_time: SystemTime = SystemTime::now(); let start_time: SystemTime = SystemTime::now();
@ -116,7 +170,6 @@ impl BigFile {
Common::print_time(start_time, SystemTime::now(), "set_allowed_extensions".to_string()); Common::print_time(start_time, SystemTime::now(), "set_allowed_extensions".to_string());
} }
fn look_for_big_files(&mut self) { fn look_for_big_files(&mut self) {
let start_time: SystemTime = SystemTime::now(); let start_time: SystemTime = SystemTime::now();
let mut folders_to_check: Vec<String> = Vec::with_capacity(1024 * 2); // This should be small enough too not see to big difference and big enough to store most of paths without needing to resize vector let mut folders_to_check: Vec<String> = Vec::with_capacity(1024 * 2); // This should be small enough too not see to big difference and big enough to store most of paths without needing to resize vector
@ -256,15 +309,15 @@ impl BigFile {
// //
let mut new_map: BTreeMap<u64, Vec<FileEntry>> = Default::default(); let mut new_map: BTreeMap<u64, Vec<FileEntry>> = Default::default();
let mut number_of_files: usize = 0;
for (size, vector) in &self.big_files { for (size, vector) in self.big_files.iter().rev() {
if number_of_files < self.number_of_files_to_check { if self.information.number_of_real_files < self.number_of_files_to_check {
for file in vector { for file in vector {
if number_of_files < self.number_of_files_to_check { if self.information.number_of_real_files < self.number_of_files_to_check {
new_map.entry(*size).or_insert_with(Vec::new); new_map.entry(*size).or_insert_with(Vec::new);
new_map.get_mut(size).unwrap().push(file.clone()); new_map.get_mut(size).unwrap().push(file.clone());
number_of_files += 1; self.information.taken_space += size;
self.information.number_of_real_files += 1;
} else { } else {
break; break;
} }
@ -278,19 +331,33 @@ impl BigFile {
Common::print_time(start_time, SystemTime::now(), "look_for_big_files".to_string()); Common::print_time(start_time, SystemTime::now(), "look_for_big_files".to_string());
} }
/// Debug print
#[allow(dead_code)] #[allow(dead_code)]
#[allow(unreachable_code)] #[allow(unreachable_code)]
/// Debugging printing - only available on debug build
fn debug_print(&self) { fn debug_print(&self) {
#[cfg(not(debug_assertions))] #[cfg(not(debug_assertions))]
{ {
return; return;
} }
println!("---------------DEBUG PRINT---------------"); println!("---------------DEBUG PRINT---------------");
// println!("Number of all checked folders - {}", self.information.number_of_checked_folders); println!("### Information's");
// println!("Number of empty folders - {}", self.information.number_of_empty_folders);
// println!("Included directories - {:?}", self.included_directories); println!("Errors size - {}", self.text_messages.errors.len());
println!("Warnings size - {}", self.text_messages.warnings.len());
println!("Messages size - {}", self.text_messages.messages.len());
println!("Number of checked files - {}", self.information.number_of_checked_files);
println!("Number of checked folders - {}", self.information.number_of_checked_folders);
println!("Number of ignored files - {}", self.information.number_of_ignored_files);
println!("Number of ignored things(like symbolic links) - {}", self.information.number_of_ignored_things);
println!("### Other");
println!("Big files size {} in {} groups", self.information.number_of_real_files, self.big_files.len());
println!("Allowed extensions - {:?}", self.allowed_extensions);
println!("Excluded items - {:?}", self.excluded_items);
println!("Included directories - {:?}", self.included_directories);
println!("Excluded directories - {:?}", self.excluded_directories);
println!("Recursive search - {}", self.recursive_search.to_string());
println!("Number of files to check - {:?}", self.number_of_files_to_check);
println!("-----------------------------------------"); println!("-----------------------------------------");
} }
@ -298,6 +365,20 @@ impl BigFile {
self.number_of_files_to_check = number_of_files_to_check; self.number_of_files_to_check = number_of_files_to_check;
} }
/// Print information's about duplicated entries
/// Only needed for CLI
pub fn print_duplicated_entries(&self) {
let start_time: SystemTime = SystemTime::now();
println!("Found {} files which take {}:", self.information.number_of_real_files, self.information.taken_space.file_size(options::BINARY).unwrap());
for (size, vector) in self.big_files.iter().rev() {
// TODO Align all to same width
for entry in vector {
println!("{} ({}) - {}", size.file_size(options::BINARY).unwrap(), size, entry.path);
}
}
Common::print_time(start_time, SystemTime::now(), "print_duplicated_entries".to_string());
}
/// Setting excluded items which needs to contains * wildcrard /// Setting excluded items which needs to contains * wildcrard
/// Are a lot of slower than absolute path, so it should be used to heavy /// Are a lot of slower than absolute path, so it should be used to heavy
pub fn set_excluded_items(&mut self, mut excluded_items: String) { pub fn set_excluded_items(&mut self, mut excluded_items: String) {

View file

@ -8,14 +8,14 @@ use std::time::{SystemTime, UNIX_EPOCH};
use crate::common::{Common, Messages}; use crate::common::{Common, Messages};
#[derive(PartialEq, Eq, Clone)] #[derive(PartialEq, Eq, Clone, Debug)]
pub enum CheckingMethod { pub enum CheckingMethod {
None, None,
Size, Size,
Hash, Hash,
} }
#[derive(Eq, PartialEq, Clone)] #[derive(Eq, PartialEq, Clone, Debug)]
pub enum DeleteMethod { pub enum DeleteMethod {
None, None,
AllExceptNewest, AllExceptNewest,
@ -678,9 +678,12 @@ impl DuplicateFinder {
println!("Hashed Files list size - {}", self.files_with_identical_hashes.len()); println!("Hashed Files list size - {}", self.files_with_identical_hashes.len());
println!("Allowed extensions - {:?}", self.allowed_extensions); println!("Allowed extensions - {:?}", self.allowed_extensions);
println!("Excluded items - {:?}", self.excluded_items); println!("Excluded items - {:?}", self.excluded_items);
println!("Included directories - {:?}", self.included_directories);
println!("Excluded directories - {:?}", self.excluded_directories); println!("Excluded directories - {:?}", self.excluded_directories);
println!("Recursive search - {}", self.recursive_search.to_string()); println!("Recursive search - {}", self.recursive_search.to_string());
println!("Minimum file size - {:?}", self.min_file_size); println!("Minimum file size - {:?}", self.min_file_size);
println!("Checking Method - {:?}", self.check_method);
println!("Delete Method - {:?}", self.delete_method);
println!("-----------------------------------------"); println!("-----------------------------------------");
} }