Add big files finding to CLI

This commit is contained in:
Rafał Mikrut 2020-09-26 13:48:53 +02:00
parent d17f2d3ef1
commit 5f643d4356
3 changed files with 172 additions and 19 deletions

View File

@ -30,7 +30,12 @@ fn main() {
process::exit(1);
}
if arguments[arguments.len() - 1].argument != Option::None {
println!("FATAL ERROR: Trying set second parameter {}, but only one is supported", argument); // This may be changed in future to support 2 or more attributes with space
println!(
"FATAL ERROR: Trying set second parameter \"{}\" for \"{}\" which already have this parameter \"{}\" ",
argument,
arguments[arguments.len() - 1].command,
arguments[arguments.len() - 1].argument.as_ref().unwrap()
); // This may be changed in future to support 2 or more attributes with space
process::exit(1);
}
let last_element = arguments.len() - 1;
@ -91,8 +96,6 @@ fn main() {
}
if ArgumentsPair::has_command(&arguments, "-k") {
df.set_excluded_items(ArgumentsPair::get_argument(&arguments, "-k", false));
} else {
df.set_excluded_items("DEFAULT".to_string());
}
if ArgumentsPair::has_command(&arguments, "-o") {
@ -179,6 +182,63 @@ fn main() {
#[cfg(not(debug_assertions))] // This will show too much probably unnecessary data to debug, comment line only if needed
ef.print_empty_folders();
}
"--b" => {
let mut bf = big_file::BigFile::new();
if ArgumentsPair::has_command(&arguments, "-i") {
bf.set_include_directory(ArgumentsPair::get_argument(&arguments, "-i", false));
} else {
println!("FATAL ERROR: Parameter -i with set of included files is required.");
process::exit(1);
}
if ArgumentsPair::has_command(&arguments, "-e") {
bf.set_exclude_directory(ArgumentsPair::get_argument(&arguments, "-e", false));
}
if ArgumentsPair::has_command(&arguments, "-s") {
let number_of_files = match ArgumentsPair::get_argument(&arguments, "-s", false).parse::<usize>() {
Ok(t) => {
if t == 0 {
println!("ERROR: Minimum one biggest file must be showed..");
1
} else {
t
}
}
Err(_) => {
println!("FATAL ERROR: \"{}\" is not valid number of files to show(allowed range <1,usize::max>)", ArgumentsPair::get_argument(&arguments, "-s", false));
process::exit(1);
}
};
bf.set_number_of_files_to_check(number_of_files);
}
if ArgumentsPair::has_command(&arguments, "-x") {
bf.set_allowed_extensions(ArgumentsPair::get_argument(&arguments, "-x", false));
}
if ArgumentsPair::has_command(&arguments, "-k") {
bf.set_excluded_items(ArgumentsPair::get_argument(&arguments, "-k", false));
}
if ArgumentsPair::has_command(&arguments, "-o") {
bf.set_recursive_search(false);
}
bf.find_big_files();
#[allow(clippy::collapsible_if)]
if ArgumentsPair::has_command(&arguments, "-f") {
if !bf.save_results_to_file(&ArgumentsPair::get_argument(&arguments, "-f", false)) {
bf.get_text_messages().print_messages();
process::exit(1);
}
}
#[cfg(not(debug_assertions))] // This will show too much probably unnecessary data to debug, comment line only if needed
bf.print_duplicated_entries();
bf.get_text_messages().print_messages();
}
"--version" | "v" => {
println!("Czkawka CLI {}", CZKAWKA_VERSION);
process::exit(0);
@ -203,7 +263,7 @@ Usage of Czkawka:
czkawka
--d <-i directory_to_search> [-e exclude_directories = ""] [-k excluded_items = "DEFAULT"] [-s min_size = 1024] [-x allowed_extension = ""] [-l type_of_search = "hash"] [-o] [-f file_to_save = "results.txt"] [-delete = "aeo"] - search for duplicates files
--d <-i directory_to_search> [-e exclude_directories = ""] [-k excluded_items = ""] [-s min_size = 1024] [-x allowed_extension = ""] [-l type_of_search = "hash"] [-o] [-f file_to_save = "results.txt"] [-delete = "aeo"] - search for duplicates files
-i directory_to_search - list of directories which should will be searched like /home/rafal
-e exclude_directories - list of directories which will be excluded from search.
-k excluded_items - list of excluded items which contains * wildcard(may be slow)
@ -230,6 +290,15 @@ Usage of Czkawka:
Usage example:
czkawka --e -i "/home/rafal/rr, /home/gateway" -e "/home/rafal/rr/2" -delete
--b <-i directory_to_search> [-e exclude_directories = ""] [-k excluded_items = ""] [-s number_of_files = 50] [-x allowed_extension = ""] [-o] [-f file_to_save = "results.txt"]
-i directory_to_search - list of directories which should will be searched like /home/rafal
-e exclude_directories - list of directories which will be excluded from search.
-k excluded_items - list of excluded items which contains * wildcard(may be slow)
-o - this options prevents from recursive check of folders
-f file_to_save - saves results to file
-s number_of_files - number of showed the biggest files.
-x allowed_extension - list of checked extension, e.g. "jpg,mp4" will allow to check "book.jpg" and "car.mp4" but not roman.png. There are also helpful macros which allow to easy use a typcal extension like IMAGE("jpg,kra,gif,png,bmp,tiff,webp,hdr,svg") or TEXT("txt,doc,docx,odt,rtf")
--version / --v - prints program name and version
"###

View File

@ -1,7 +1,9 @@
use crate::common::{Common, Messages};
use humansize::{file_size_opts as options, FileSize};
use std::collections::BTreeMap;
use std::fs;
use std::fs::Metadata;
use std::fs::{File, Metadata};
use std::io::Write;
use std::path::Path;
use std::time::SystemTime;
@ -20,6 +22,7 @@ pub struct Info {
pub number_of_ignored_files: usize,
pub number_of_ignored_things: usize,
pub taken_space: u64,
pub number_of_real_files: usize,
}
impl Info {
pub fn new() -> Info {
@ -29,6 +32,7 @@ impl Info {
number_of_ignored_files: 0,
number_of_ignored_things: 0,
taken_space: 0,
number_of_real_files: 0,
}
}
}
@ -62,7 +66,7 @@ impl BigFile {
excluded_directories: vec![],
included_directories: vec![],
allowed_extensions: vec![],
recursive_search: false,
recursive_search: true,
number_of_files_to_check: 50,
}
}
@ -76,9 +80,59 @@ impl BigFile {
pub fn find_big_files(&mut self) {
self.optimize_directories();
self.look_for_big_files();
self.debug_print();
}
pub fn set_recursive_search(&mut self, recursive_search: bool) {
self.recursive_search = recursive_search;
}
/// Saving results to provided file
pub fn save_results_to_file(&mut self, file_name: &str) -> bool {
let start_time: SystemTime = SystemTime::now();
let file_name: String = match file_name {
"" => "results.txt".to_string(),
k => k.to_string(),
};
let mut file = match File::create(&file_name) {
Ok(t) => t,
Err(_) => {
self.text_messages.errors.push("Failed to create file ".to_string() + file_name.as_str());
return false;
}
};
match file.write_all(
format!(
"Results of searching {:?} with excluded directories {:?} and excluded items {:?}\n",
self.included_directories, self.excluded_directories, self.excluded_items
)
.as_bytes(),
) {
Ok(_) => (),
Err(_) => {
self.text_messages.errors.push("Failed to save results to file ".to_string() + file_name.as_str());
return false;
}
}
if self.information.number_of_real_files != 0 {
file.write_all(format!("{} the biggest files.\n\n", self.information.number_of_real_files).as_bytes()).unwrap();
for (size, files) in self.big_files.iter().rev() {
for file_entry in files {
file.write_all(format!("{} ({}) - {}\n", size.file_size(options::BINARY).unwrap(), size, file_entry.path.clone()).as_bytes()).unwrap();
}
}
} else {
file.write_all(b"Not found any empty folders.").unwrap();
}
Common::print_time(start_time, SystemTime::now(), "save_results_to_file".to_string());
true
}
/// List of allowed extensions, only files with this extensions will be checking if are duplicates
pub fn set_allowed_extensions(&mut self, mut allowed_extensions: String) {
let start_time: SystemTime = SystemTime::now();
@ -116,7 +170,6 @@ impl BigFile {
Common::print_time(start_time, SystemTime::now(), "set_allowed_extensions".to_string());
}
fn look_for_big_files(&mut self) {
let start_time: SystemTime = SystemTime::now();
let mut folders_to_check: Vec<String> = Vec::with_capacity(1024 * 2); // This should be small enough too not see to big difference and big enough to store most of paths without needing to resize vector
@ -256,15 +309,15 @@ impl BigFile {
//
let mut new_map: BTreeMap<u64, Vec<FileEntry>> = Default::default();
let mut number_of_files: usize = 0;
for (size, vector) in &self.big_files {
if number_of_files < self.number_of_files_to_check {
for (size, vector) in self.big_files.iter().rev() {
if self.information.number_of_real_files < self.number_of_files_to_check {
for file in vector {
if number_of_files < self.number_of_files_to_check {
if self.information.number_of_real_files < self.number_of_files_to_check {
new_map.entry(*size).or_insert_with(Vec::new);
new_map.get_mut(size).unwrap().push(file.clone());
number_of_files += 1;
self.information.taken_space += size;
self.information.number_of_real_files += 1;
} else {
break;
}
@ -278,19 +331,33 @@ impl BigFile {
Common::print_time(start_time, SystemTime::now(), "look_for_big_files".to_string());
}
/// Debug print
#[allow(dead_code)]
#[allow(unreachable_code)]
/// Debugging printing - only available on debug build
fn debug_print(&self) {
#[cfg(not(debug_assertions))]
{
return;
}
println!("---------------DEBUG PRINT---------------");
// println!("Number of all checked folders - {}", self.information.number_of_checked_folders);
// println!("Number of empty folders - {}", self.information.number_of_empty_folders);
// println!("Included directories - {:?}", self.included_directories);
println!("### Information's");
println!("Errors size - {}", self.text_messages.errors.len());
println!("Warnings size - {}", self.text_messages.warnings.len());
println!("Messages size - {}", self.text_messages.messages.len());
println!("Number of checked files - {}", self.information.number_of_checked_files);
println!("Number of checked folders - {}", self.information.number_of_checked_folders);
println!("Number of ignored files - {}", self.information.number_of_ignored_files);
println!("Number of ignored things(like symbolic links) - {}", self.information.number_of_ignored_things);
println!("### Other");
println!("Big files size {} in {} groups", self.information.number_of_real_files, self.big_files.len());
println!("Allowed extensions - {:?}", self.allowed_extensions);
println!("Excluded items - {:?}", self.excluded_items);
println!("Included directories - {:?}", self.included_directories);
println!("Excluded directories - {:?}", self.excluded_directories);
println!("Recursive search - {}", self.recursive_search.to_string());
println!("Number of files to check - {:?}", self.number_of_files_to_check);
println!("-----------------------------------------");
}
@ -298,6 +365,20 @@ impl BigFile {
self.number_of_files_to_check = number_of_files_to_check;
}
/// Print information's about duplicated entries
/// Only needed for CLI
pub fn print_duplicated_entries(&self) {
let start_time: SystemTime = SystemTime::now();
println!("Found {} files which take {}:", self.information.number_of_real_files, self.information.taken_space.file_size(options::BINARY).unwrap());
for (size, vector) in self.big_files.iter().rev() {
// TODO Align all to same width
for entry in vector {
println!("{} ({}) - {}", size.file_size(options::BINARY).unwrap(), size, entry.path);
}
}
Common::print_time(start_time, SystemTime::now(), "print_duplicated_entries".to_string());
}
/// Setting excluded items which needs to contains * wildcrard
/// Are a lot of slower than absolute path, so it should be used to heavy
pub fn set_excluded_items(&mut self, mut excluded_items: String) {

View File

@ -8,14 +8,14 @@ use std::time::{SystemTime, UNIX_EPOCH};
use crate::common::{Common, Messages};
#[derive(PartialEq, Eq, Clone)]
#[derive(PartialEq, Eq, Clone, Debug)]
pub enum CheckingMethod {
None,
Size,
Hash,
}
#[derive(Eq, PartialEq, Clone)]
#[derive(Eq, PartialEq, Clone, Debug)]
pub enum DeleteMethod {
None,
AllExceptNewest,
@ -678,9 +678,12 @@ impl DuplicateFinder {
println!("Hashed Files list size - {}", self.files_with_identical_hashes.len());
println!("Allowed extensions - {:?}", self.allowed_extensions);
println!("Excluded items - {:?}", self.excluded_items);
println!("Included directories - {:?}", self.included_directories);
println!("Excluded directories - {:?}", self.excluded_directories);
println!("Recursive search - {}", self.recursive_search.to_string());
println!("Minimum file size - {:?}", self.min_file_size);
println!("Checking Method - {:?}", self.check_method);
println!("Delete Method - {:?}", self.delete_method);
println!("-----------------------------------------");
}