1
0
Fork 0
mirror of synced 2024-04-24 15:52:07 +12:00

Big code refactoring

This commit is contained in:
Rafał Mikrut 2020-09-26 16:52:13 +02:00
parent 3cff32c031
commit 8161a1c59b
13 changed files with 764 additions and 1050 deletions

View file

@ -1,3 +1,4 @@
use czkawka_core::common_traits::*;
use czkawka_core::*;
use std::{env, process};
@ -64,13 +65,13 @@ fn main() {
let mut df = duplicate::DuplicateFinder::new();
if ArgumentsPair::has_command(&arguments, "-i") {
df.set_include_directory(ArgumentsPair::get_argument(&arguments, "-i", false));
df.set_included_directory(ArgumentsPair::get_argument(&arguments, "-i", false));
} else {
println!("FATAL ERROR: Parameter -i with set of included files is required.");
process::exit(1);
}
if ArgumentsPair::has_command(&arguments, "-e") {
df.set_exclude_directory(ArgumentsPair::get_argument(&arguments, "-e", false));
df.set_excluded_directory(ArgumentsPair::get_argument(&arguments, "-e", false));
}
if ArgumentsPair::has_command(&arguments, "-s") {
@ -148,7 +149,7 @@ fn main() {
}
#[cfg(not(debug_assertions))] // This will show too much probably unnecessary data to debug, comment line only if needed
df.print_duplicated_entries();
df.print_results();
df.get_text_messages().print_messages();
}
@ -159,7 +160,7 @@ fn main() {
let mut ef = empty_folder::EmptyFolder::new();
if ArgumentsPair::has_command(&arguments, "-i") {
ef.set_include_directory(ArgumentsPair::get_argument(&arguments, "-i", false));
ef.set_included_directory(ArgumentsPair::get_argument(&arguments, "-i", false));
} else {
println!("FATAL ERROR: Parameter -i with set of included files is required.");
process::exit(1);
@ -180,19 +181,19 @@ fn main() {
}
#[cfg(not(debug_assertions))] // This will show too much probably unnecessary data to debug, comment line only if needed
ef.print_empty_folders();
ef.print_results();
}
"--b" => {
let mut bf = big_file::BigFile::new();
if ArgumentsPair::has_command(&arguments, "-i") {
bf.set_include_directory(ArgumentsPair::get_argument(&arguments, "-i", false));
bf.set_included_directory(ArgumentsPair::get_argument(&arguments, "-i", false));
} else {
println!("FATAL ERROR: Parameter -i with set of included files is required.");
process::exit(1);
}
if ArgumentsPair::has_command(&arguments, "-e") {
bf.set_exclude_directory(ArgumentsPair::get_argument(&arguments, "-e", false));
bf.set_excluded_directory(ArgumentsPair::get_argument(&arguments, "-e", false));
}
if ArgumentsPair::has_command(&arguments, "-s") {
@ -235,7 +236,7 @@ fn main() {
}
#[cfg(not(debug_assertions))] // This will show too much probably unnecessary data to debug, comment line only if needed
bf.print_duplicated_entries();
bf.print_results();
bf.get_text_messages().print_messages();
}

View file

@ -1,10 +1,14 @@
use crate::common::{Common, Messages};
use crate::common::Common;
use crate::common_directory::Directories;
use crate::common_extensions::Extensions;
use crate::common_items::ExcludedItems;
use crate::common_messages::Messages;
use crate::common_traits::{DebugPrint, PrintResults, SaveResults};
use humansize::{file_size_opts as options, FileSize};
use std::collections::BTreeMap;
use std::fs;
use std::fs::{File, Metadata};
use std::io::Write;
use std::path::Path;
use std::time::SystemTime;
#[derive(Clone)]
@ -48,10 +52,9 @@ pub struct BigFile {
text_messages: Messages,
information: Info,
big_files: BTreeMap<u64, Vec<FileEntry>>,
excluded_items: Vec<String>,
excluded_directories: Vec<String>,
included_directories: Vec<String>,
allowed_extensions: Vec<String>,
excluded_items: ExcludedItems,
directories: Directories,
allowed_extensions: Extensions,
recursive_search: bool,
number_of_files_to_check: usize,
}
@ -62,10 +65,9 @@ impl BigFile {
text_messages: Default::default(),
information: Info::new(),
big_files: Default::default(),
excluded_items: vec![],
excluded_directories: vec![],
included_directories: vec![],
allowed_extensions: vec![],
excluded_items: ExcludedItems::new(),
directories: Directories::new(),
allowed_extensions: Extensions::new(),
recursive_search: true,
number_of_files_to_check: 50,
}
@ -88,86 +90,9 @@ impl BigFile {
self.recursive_search = recursive_search;
}
/// Saving results to provided file
pub fn save_results_to_file(&mut self, file_name: &str) -> bool {
let start_time: SystemTime = SystemTime::now();
let file_name: String = match file_name {
"" => "results.txt".to_string(),
k => k.to_string(),
};
let mut file = match File::create(&file_name) {
Ok(t) => t,
Err(_) => {
self.text_messages.errors.push("Failed to create file ".to_string() + file_name.as_str());
return false;
}
};
match file.write_all(
format!(
"Results of searching {:?} with excluded directories {:?} and excluded items {:?}\n",
self.included_directories, self.excluded_directories, self.excluded_items
)
.as_bytes(),
) {
Ok(_) => (),
Err(_) => {
self.text_messages.errors.push("Failed to save results to file ".to_string() + file_name.as_str());
return false;
}
}
if self.information.number_of_real_files != 0 {
file.write_all(format!("{} the biggest files.\n\n", self.information.number_of_real_files).as_bytes()).unwrap();
for (size, files) in self.big_files.iter().rev() {
for file_entry in files {
file.write_all(format!("{} ({}) - {}\n", size.file_size(options::BINARY).unwrap(), size, file_entry.path.clone()).as_bytes()).unwrap();
}
}
} else {
file.write_all(b"Not found any empty folders.").unwrap();
}
Common::print_time(start_time, SystemTime::now(), "save_results_to_file".to_string());
true
}
/// List of allowed extensions, only files with this extensions will be checking if are duplicates
pub fn set_allowed_extensions(&mut self, mut allowed_extensions: String) {
let start_time: SystemTime = SystemTime::now();
if allowed_extensions.is_empty() {
return;
}
allowed_extensions = allowed_extensions.replace("IMAGE", "jpg,kra,gif,png,bmp,tiff,webp,hdr,svg");
allowed_extensions = allowed_extensions.replace("VIDEO", "mp4,flv,mkv,webm,vob,ogv,gifv,avi,mov,wmv,mpg,m4v,m4p,mpeg,3gp");
allowed_extensions = allowed_extensions.replace("MUSIC", "mp3,flac,ogg,tta,wma,webm");
allowed_extensions = allowed_extensions.replace("TEXT", "txt,doc,docx,odt,rtf");
let extensions: Vec<String> = allowed_extensions.split(',').map(String::from).collect();
for mut extension in extensions {
if extension == "" || extension.replace('.', "").trim() == "" {
continue;
}
if extension.starts_with('.') {
extension = extension[1..].to_string();
}
if extension[1..].contains('.') {
self.text_messages.warnings.push(".".to_string() + extension.as_str() + " is not valid extension(valid extension doesn't have dot inside)");
continue;
}
if !self.allowed_extensions.contains(&extension.trim().to_string()) {
self.allowed_extensions.push(extension.trim().to_string());
}
}
if self.allowed_extensions.is_empty() {
self.text_messages.messages.push("No valid extensions were provided, so allowing all extensions by default.".to_string());
}
Common::print_time(start_time, SystemTime::now(), "set_allowed_extensions".to_string());
pub fn set_allowed_extensions(&mut self, allowed_extensions: String) {
self.allowed_extensions.set_allowed_extensions(allowed_extensions, &mut self.text_messages);
}
fn look_for_big_files(&mut self) {
@ -175,7 +100,7 @@ impl BigFile {
let mut folders_to_check: Vec<String> = Vec::with_capacity(1024 * 2); // This should be small enough too not see to big difference and big enough to store most of paths without needing to resize vector
// Add root folders for finding
for id in &self.included_directories {
for id in &self.directories.included_directories {
folders_to_check.push(id.to_string());
}
self.information.number_of_checked_folders += folders_to_check.len();
@ -221,7 +146,7 @@ impl BigFile {
let mut is_excluded_dir = false;
next_folder = "".to_owned() + &current_folder + &entry_data.file_name().into_string().unwrap() + "/";
for ed in &self.excluded_directories {
for ed in &self.directories.excluded_directories {
if next_folder == *ed {
is_excluded_dir = true;
break;
@ -229,7 +154,7 @@ impl BigFile {
}
if !is_excluded_dir {
let mut found_expression: bool = false;
for expression in &self.excluded_items {
for expression in &self.excluded_items.items {
if Common::regex_check(expression, &next_folder) {
found_expression = true;
break;
@ -245,9 +170,9 @@ impl BigFile {
let file_name_lowercase: String = entry_data.file_name().into_string().unwrap().to_lowercase();
// Checking allowed extensions
if !self.allowed_extensions.is_empty() {
if !self.allowed_extensions.file_extensions.is_empty() {
have_valid_extension = false;
for i in &self.allowed_extensions {
for i in &self.allowed_extensions.file_extensions {
if file_name_lowercase.ends_with((".".to_string() + i.to_lowercase().as_str()).as_str()) {
have_valid_extension = true;
break;
@ -263,7 +188,7 @@ impl BigFile {
// Checking expressions
let mut found_expression: bool = false;
for expression in &self.excluded_items {
for expression in &self.excluded_items.items {
if Common::regex_check(expression, &current_file_name) {
found_expression = true;
break;
@ -307,7 +232,6 @@ impl BigFile {
}
}
//
let mut new_map: BTreeMap<u64, Vec<FileEntry>> = Default::default();
for (size, vector) in self.big_files.iter().rev() {
@ -331,6 +255,39 @@ impl BigFile {
Common::print_time(start_time, SystemTime::now(), "look_for_big_files".to_string());
}
pub fn set_number_of_files_to_check(&mut self, number_of_files_to_check: usize) {
self.number_of_files_to_check = number_of_files_to_check;
}
/// Setting excluded items which needs to contains * wildcrard
/// Are a lot of slower than absolute path, so it should be used to heavy
pub fn set_excluded_items(&mut self, excluded_items: String) {
self.excluded_items.set_excluded_items(excluded_items, &mut self.text_messages);
}
/// Remove unused entries when included or excluded overlaps with each other or are duplicated etc.
fn optimize_directories(&mut self) {
self.directories.optimize_directories(self.recursive_search, &mut self.text_messages);
}
/// Setting include directories, at least one must be provided
pub fn set_included_directory(&mut self, included_directory: String) {
self.directories.set_included_directory(included_directory, &mut self.text_messages);
}
/// Setting absolute path to exclude
pub fn set_excluded_directory(&mut self, excluded_directory: String) {
self.directories.set_excluded_directory(excluded_directory, &mut self.text_messages);
}
}
impl Default for BigFile {
fn default() -> Self {
Self::new()
}
}
impl DebugPrint for BigFile {
#[allow(dead_code)]
#[allow(unreachable_code)]
/// Debugging printing - only available on debug build
@ -352,22 +309,63 @@ impl BigFile {
println!("### Other");
println!("Big files size {} in {} groups", self.information.number_of_real_files, self.big_files.len());
println!("Allowed extensions - {:?}", self.allowed_extensions);
println!("Excluded items - {:?}", self.excluded_items);
println!("Included directories - {:?}", self.included_directories);
println!("Excluded directories - {:?}", self.excluded_directories);
println!("Allowed extensions - {:?}", self.allowed_extensions.file_extensions);
println!("Excluded items - {:?}", self.excluded_items.items);
println!("Included directories - {:?}", self.directories.included_directories);
println!("Excluded directories - {:?}", self.directories.excluded_directories);
println!("Recursive search - {}", self.recursive_search.to_string());
println!("Number of files to check - {:?}", self.number_of_files_to_check);
println!("-----------------------------------------");
}
}
impl SaveResults for BigFile {
/// Saving results to provided file
fn save_results_to_file(&mut self, file_name: &str) -> bool {
let start_time: SystemTime = SystemTime::now();
let file_name: String = match file_name {
"" => "results.txt".to_string(),
k => k.to_string(),
};
pub fn set_number_of_files_to_check(&mut self, number_of_files_to_check: usize) {
self.number_of_files_to_check = number_of_files_to_check;
let mut file = match File::create(&file_name) {
Ok(t) => t,
Err(_) => {
self.text_messages.errors.push("Failed to create file ".to_string() + file_name.as_str());
return false;
}
};
match file.write_all(
format!(
"Results of searching {:?} with excluded directories {:?} and excluded items {:?}\n",
self.directories.included_directories, self.directories.excluded_directories, self.excluded_items.items
)
.as_bytes(),
) {
Ok(_) => (),
Err(_) => {
self.text_messages.errors.push("Failed to save results to file ".to_string() + file_name.as_str());
return false;
}
}
if self.information.number_of_real_files != 0 {
file.write_all(format!("{} the biggest files.\n\n", self.information.number_of_real_files).as_bytes()).unwrap();
for (size, files) in self.big_files.iter().rev() {
for file_entry in files {
file.write_all(format!("{} ({}) - {}\n", size.file_size(options::BINARY).unwrap(), size, file_entry.path.clone()).as_bytes()).unwrap();
}
}
} else {
file.write_all(b"Not found any empty folders.").unwrap();
}
Common::print_time(start_time, SystemTime::now(), "save_results_to_file".to_string());
true
}
/// Print information's about duplicated entries
/// Only needed for CLI
pub fn print_duplicated_entries(&self) {
}
impl PrintResults for BigFile {
fn print_results(&self) {
let start_time: SystemTime = SystemTime::now();
println!("Found {} files which take {}:", self.information.number_of_real_files, self.information.taken_space.file_size(options::BINARY).unwrap());
for (size, vector) in self.big_files.iter().rev() {
@ -378,273 +376,4 @@ impl BigFile {
}
Common::print_time(start_time, SystemTime::now(), "print_duplicated_entries".to_string());
}
/// Setting excluded items which needs to contains * wildcrard
/// Are a lot of slower than absolute path, so it should be used to heavy
pub fn set_excluded_items(&mut self, mut excluded_items: String) {
let start_time: SystemTime = SystemTime::now();
if excluded_items.is_empty() {
return;
}
excluded_items = excluded_items.replace("\"", "");
let expressions: Vec<String> = excluded_items.split(',').map(String::from).collect();
let mut checked_expressions: Vec<String> = Vec::new();
for expression in expressions {
let expression: String = expression.trim().to_string();
if expression == "" {
continue;
}
if expression == "DEFAULT" {
// TODO add more files by default
checked_expressions.push("*/.git/*".to_string());
continue;
}
if !expression.contains('*') {
self.text_messages.warnings.push("Excluded Items Warning: Wildcard * is required in expression, ignoring ".to_string() + expression.as_str());
continue;
}
checked_expressions.push(expression);
}
self.excluded_items = checked_expressions;
Common::print_time(start_time, SystemTime::now(), "set_excluded_items".to_string());
}
/// Remove unused entries when included or excluded overlaps with each other or are duplicated etc.
fn optimize_directories(&mut self) -> bool {
let start_time: SystemTime = SystemTime::now();
let mut optimized_included: Vec<String> = Vec::<String>::new();
let mut optimized_excluded: Vec<String> = Vec::<String>::new();
// Remove duplicated entries like: "/", "/"
self.excluded_directories.sort();
self.included_directories.sort();
self.excluded_directories.dedup();
self.included_directories.dedup();
// Optimize for duplicated included directories - "/", "/home". "/home/Pulpit" to "/"
if self.recursive_search {
// This is only point which can't be done when recursive search is disabled.
let mut is_inside: bool;
for ed_checked in &self.excluded_directories {
is_inside = false;
for ed_help in &self.excluded_directories {
if ed_checked == ed_help {
// We checking same element
continue;
}
if ed_checked.starts_with(ed_help) {
is_inside = true;
break;
}
}
if !is_inside {
optimized_excluded.push(ed_checked.to_string());
}
}
for id_checked in &self.included_directories {
is_inside = false;
for id_help in &self.included_directories {
if id_checked == id_help {
// We checking same element
continue;
}
if id_checked.starts_with(id_help) {
is_inside = true;
break;
}
}
if !is_inside {
optimized_included.push(id_checked.to_string());
}
}
self.included_directories = optimized_included;
optimized_included = Vec::<String>::new();
self.excluded_directories = optimized_excluded;
optimized_excluded = Vec::<String>::new();
}
// Remove include directories which are inside any exclude directory
for id in &self.included_directories {
let mut is_inside: bool = false;
for ed in &self.excluded_directories {
if id.starts_with(ed) {
is_inside = true;
break;
}
}
if !is_inside {
optimized_included.push(id.to_string());
}
}
self.included_directories = optimized_included;
optimized_included = Vec::<String>::new();
// Remove non existed directories
for id in &self.included_directories {
let path = Path::new(id);
if path.exists() {
optimized_included.push(id.to_string());
}
}
for ed in &self.excluded_directories {
let path = Path::new(ed);
if path.exists() {
optimized_excluded.push(ed.to_string());
}
}
self.included_directories = optimized_included;
// optimized_included = Vec::<String>::new();
self.excluded_directories = optimized_excluded;
optimized_excluded = Vec::<String>::new();
// Excluded paths must are inside include path, because
for ed in &self.excluded_directories {
let mut is_inside: bool = false;
for id in &self.included_directories {
if ed.starts_with(id) {
is_inside = true;
break;
}
}
if is_inside {
optimized_excluded.push(ed.to_string());
}
}
self.excluded_directories = optimized_excluded;
// optimized_excluded = Vec::<String>::new();
if self.included_directories.is_empty() {
self.text_messages.errors.push("Optimize Directories ERROR: Excluded directories overlaps all included directories.".to_string());
return false;
}
// Not needed, but better is to have sorted everything
self.excluded_directories.sort();
self.included_directories.sort();
Common::print_time(start_time, SystemTime::now(), "optimize_directories".to_string());
true
}
/// Setting include directories, at least one must be provided
pub fn set_include_directory(&mut self, mut include_directory: String) -> bool {
let start_time: SystemTime = SystemTime::now();
if include_directory.is_empty() {
self.text_messages.errors.push("At least one directory must be provided".to_string());
return false;
}
include_directory = include_directory.replace("\"", "");
let directories: Vec<String> = include_directory.split(',').map(String::from).collect();
let mut checked_directories: Vec<String> = Vec::new();
for directory in directories {
let directory: String = directory.trim().to_string();
if directory == "" {
continue;
}
if directory.contains('*') {
self.text_messages.warnings.push("Include Directory Warning: Wildcards in path are not supported, ignoring ".to_string() + directory.as_str());
continue;
}
if !directory.starts_with('/') {
self.text_messages.warnings.push("Include Directory Warning: Relative path are not supported, ignoring ".to_string() + directory.as_str());
continue;
}
if !Path::new(&directory).exists() {
self.text_messages.warnings.push("Include Directory Warning: Provided folder path must exits, ignoring ".to_string() + directory.as_str());
continue;
}
if !Path::new(&directory).is_dir() {
self.text_messages.warnings.push("Include Directory Warning: Provided path must point at the directory, ignoring ".to_string() + directory.as_str());
continue;
}
// directory must end with /, due to possibility of incorrect assumption, that e.g. /home/rafal is top folder to /home/rafalinho
if !directory.ends_with('/') {
checked_directories.push(directory + "/");
} else {
checked_directories.push(directory);
}
}
if checked_directories.is_empty() {
self.text_messages.errors.push("Include Directory ERROR: Not found even one correct path to include which is required.".to_string());
return false;
}
self.included_directories = checked_directories;
Common::print_time(start_time, SystemTime::now(), "set_include_directory".to_string());
true
}
/// Setting absolute path to exclude
pub fn set_exclude_directory(&mut self, mut exclude_directory: String) {
let start_time: SystemTime = SystemTime::now();
if exclude_directory.is_empty() {
return;
}
exclude_directory = exclude_directory.replace("\"", "");
let directories: Vec<String> = exclude_directory.split(',').map(String::from).collect();
let mut checked_directories: Vec<String> = Vec::new();
for directory in directories {
let directory: String = directory.trim().to_string();
if directory == "" {
continue;
}
if directory == "/" {
self.text_messages.errors.push("Exclude Directory ERROR: Excluding / is pointless, because it means that no files will be scanned.".to_string());
break;
}
if directory.contains('*') {
self.text_messages.warnings.push("Exclude Directory Warning: Wildcards in path are not supported, ignoring ".to_string() + directory.as_str());
continue;
}
if !directory.starts_with('/') {
self.text_messages.warnings.push("Exclude Directory Warning: Relative path are not supported, ignoring ".to_string() + directory.as_str());
continue;
}
if !Path::new(&directory).exists() {
self.text_messages.warnings.push("Exclude Directory Warning: Provided folder path must exits, ignoring ".to_string() + directory.as_str());
continue;
}
if !Path::new(&directory).is_dir() {
self.text_messages.warnings.push("Exclude Directory Warning: Provided path must point at the directory, ignoring ".to_string() + directory.as_str());
continue;
}
// directory must end with /, due to possibility of incorrect assumption, that e.g. /home/rafal is top folder to /home/rafalinho
if !directory.ends_with('/') {
checked_directories.push(directory.trim().to_string() + "/");
} else {
checked_directories.push(directory.trim().to_string());
}
}
self.excluded_directories = checked_directories;
Common::print_time(start_time, SystemTime::now(), "set_exclude_directory".to_string());
}
}
impl Default for BigFile {
fn default() -> Self {
Self::new()
}
}

View file

@ -104,54 +104,6 @@ impl Common {
}
}
pub struct Messages {
pub messages: Vec<String>,
pub warnings: Vec<String>,
pub errors: Vec<String>,
}
impl Messages {
pub fn new() -> Messages {
Messages {
messages: vec![],
warnings: vec![],
errors: vec![],
}
}
pub fn print_messages(&self) {
if !self.messages.is_empty() {
println!("-------------------------------MESSAGES--------------------------------");
for i in &self.messages {
println!("{}", i);
}
println!("---------------------------END OF MESSAGES-----------------------------");
}
if !self.warnings.is_empty() {
println!("-------------------------------WARNINGS--------------------------------");
for i in &self.warnings {
println!("{}", i);
}
println!("---------------------------END OF WARNINGS-----------------------------");
}
if !self.errors.is_empty() {
println!("--------------------------------ERRORS---------------------------------");
for i in &self.errors {
println!("{}", i);
}
println!("----------------------------END OF ERRORS------------------------------");
}
}
}
impl Default for Messages {
fn default() -> Self {
Self::new()
}
}
#[cfg(test)]
mod test {
use crate::common::Common;

View file

@ -0,0 +1,252 @@
use crate::common::Common;
use crate::common_messages::Messages;
use std::path::Path;
use std::time::SystemTime;
pub struct Directories {
pub excluded_directories: Vec<String>,
pub included_directories: Vec<String>,
}
impl Directories {
pub fn new() -> Directories {
Directories {
excluded_directories: vec![],
included_directories: vec![],
}
}
/// Setting include directories, at least one must be provided
pub fn set_included_directory(&mut self, mut included_directory: String, text_messages: &mut Messages) -> bool {
let start_time: SystemTime = SystemTime::now();
if included_directory.is_empty() {
text_messages.errors.push("At least one directory must be provided".to_string());
return false;
}
included_directory = included_directory.replace("\"", "");
let directories: Vec<String> = included_directory.split(',').map(String::from).collect();
let mut checked_directories: Vec<String> = Vec::new();
for directory in directories {
let directory: String = directory.trim().to_string();
if directory == "" {
continue;
}
if directory.contains('*') {
text_messages.warnings.push("Include Directory Warning: Wildcards in path are not supported, ignoring ".to_string() + directory.as_str());
continue;
}
if !directory.starts_with('/') {
text_messages.warnings.push("Include Directory Warning: Relative path are not supported, ignoring ".to_string() + directory.as_str());
continue;
}
if !Path::new(&directory).exists() {
text_messages.warnings.push("Include Directory Warning: Provided folder path must exits, ignoring ".to_string() + directory.as_str());
continue;
}
if !Path::new(&directory).is_dir() {
text_messages.warnings.push("Include Directory Warning: Provided path must point at the directory, ignoring ".to_string() + directory.as_str());
continue;
}
// directory must end with /, due to possibility of incorrect assumption, that e.g. /home/rafal is top folder to /home/rafalinho
if !directory.ends_with('/') {
checked_directories.push(directory + "/");
} else {
checked_directories.push(directory);
}
}
if checked_directories.is_empty() {
text_messages.errors.push("Include Directory ERROR: Not found even one correct path to include which is required.".to_string());
return false;
}
self.included_directories = checked_directories;
Common::print_time(start_time, SystemTime::now(), "set_included_directory".to_string());
true
}
/// Setting absolute path to exclude
pub fn set_excluded_directory(&mut self, mut excluded_directory: String, text_messages: &mut Messages) {
let start_time: SystemTime = SystemTime::now();
if excluded_directory.is_empty() {
return;
}
excluded_directory = excluded_directory.replace("\"", "");
let directories: Vec<String> = excluded_directory.split(',').map(String::from).collect();
let mut checked_directories: Vec<String> = Vec::new();
for directory in directories {
let directory: String = directory.trim().to_string();
if directory == "" {
continue;
}
if directory == "/" {
text_messages.errors.push("Exclude Directory ERROR: Excluding / is pointless, because it means that no files will be scanned.".to_string());
break;
}
if directory.contains('*') {
text_messages.warnings.push("Exclude Directory Warning: Wildcards in path are not supported, ignoring ".to_string() + directory.as_str());
continue;
}
if !directory.starts_with('/') {
text_messages.warnings.push("Exclude Directory Warning: Relative path are not supported, ignoring ".to_string() + directory.as_str());
continue;
}
if !Path::new(&directory).exists() {
text_messages.warnings.push("Exclude Directory Warning: Provided folder path must exits, ignoring ".to_string() + directory.as_str());
continue;
}
if !Path::new(&directory).is_dir() {
text_messages.warnings.push("Exclude Directory Warning: Provided path must point at the directory, ignoring ".to_string() + directory.as_str());
continue;
}
// directory must end with /, due to possibility of incorrect assumption, that e.g. /home/rafal is top folder to /home/rafalinho
if !directory.ends_with('/') {
checked_directories.push(directory.trim().to_string() + "/");
} else {
checked_directories.push(directory.trim().to_string());
}
}
self.excluded_directories = checked_directories;
Common::print_time(start_time, SystemTime::now(), "set_excluded_directory".to_string());
}
/// Remove unused entries when included or excluded overlaps with each other or are duplicated etc.
pub fn optimize_directories(&mut self, recursive_search: bool, text_messages: &mut Messages) -> bool {
let start_time: SystemTime = SystemTime::now();
let mut optimized_included: Vec<String> = Vec::<String>::new();
let mut optimized_excluded: Vec<String> = Vec::<String>::new();
// Remove duplicated entries like: "/", "/"
self.excluded_directories.sort();
self.included_directories.sort();
self.excluded_directories.dedup();
self.included_directories.dedup();
// Optimize for duplicated included directories - "/", "/home". "/home/Pulpit" to "/"
if recursive_search {
// This is only point which can't be done when recursive search is disabled.
let mut is_inside: bool;
for ed_checked in &self.excluded_directories {
is_inside = false;
for ed_help in &self.excluded_directories {
if ed_checked == ed_help {
// We checking same element
continue;
}
if ed_checked.starts_with(ed_help) {
is_inside = true;
break;
}
}
if !is_inside {
optimized_excluded.push(ed_checked.to_string());
}
}
for id_checked in &self.included_directories {
is_inside = false;
for id_help in &self.included_directories {
if id_checked == id_help {
// We checking same element
continue;
}
if id_checked.starts_with(id_help) {
is_inside = true;
break;
}
}
if !is_inside {
optimized_included.push(id_checked.to_string());
}
}
self.included_directories = optimized_included;
optimized_included = Vec::<String>::new();
self.excluded_directories = optimized_excluded;
optimized_excluded = Vec::<String>::new();
}
// Remove include directories which are inside any exclude directory
for id in &self.included_directories {
let mut is_inside: bool = false;
for ed in &self.excluded_directories {
if id.starts_with(ed) {
is_inside = true;
break;
}
}
if !is_inside {
optimized_included.push(id.to_string());
}
}
self.included_directories = optimized_included;
optimized_included = Vec::<String>::new();
// Remove non existed directories
for id in &self.included_directories {
let path = Path::new(id);
if path.exists() {
optimized_included.push(id.to_string());
}
}
for ed in &self.excluded_directories {
let path = Path::new(ed);
if path.exists() {
optimized_excluded.push(ed.to_string());
}
}
self.included_directories = optimized_included;
// optimized_included = Vec::<String>::new();
self.excluded_directories = optimized_excluded;
optimized_excluded = Vec::<String>::new();
// Excluded paths must are inside include path, because
for ed in &self.excluded_directories {
let mut is_inside: bool = false;
for id in &self.included_directories {
if ed.starts_with(id) {
is_inside = true;
break;
}
}
if is_inside {
optimized_excluded.push(ed.to_string());
}
}
self.excluded_directories = optimized_excluded;
// optimized_excluded = Vec::<String>::new();
if self.included_directories.is_empty() {
text_messages.errors.push("Optimize Directories ERROR: Excluded directories overlaps all included directories.".to_string());
return false;
}
// Not needed, but better is to have sorted everything
self.excluded_directories.sort();
self.included_directories.sort();
Common::print_time(start_time, SystemTime::now(), "optimize_directories".to_string());
true
}
}
impl Default for Directories {
fn default() -> Self {
Self::new()
}
}

View file

@ -0,0 +1,54 @@
use crate::common::Common;
use crate::common_messages::Messages;
use std::time::SystemTime;
pub struct Extensions {
pub file_extensions: Vec<String>,
}
impl Extensions {
pub fn new() -> Extensions {
Extensions { file_extensions: vec![] }
}
/// List of allowed extensions, only files with this extensions will be checking if are duplicates
pub fn set_allowed_extensions(&mut self, mut allowed_extensions: String, text_messages: &mut Messages) {
let start_time: SystemTime = SystemTime::now();
if allowed_extensions.is_empty() {
return;
}
allowed_extensions = allowed_extensions.replace("IMAGE", "jpg,kra,gif,png,bmp,tiff,webp,hdr,svg");
allowed_extensions = allowed_extensions.replace("VIDEO", "mp4,flv,mkv,webm,vob,ogv,gifv,avi,mov,wmv,mpg,m4v,m4p,mpeg,3gp");
allowed_extensions = allowed_extensions.replace("MUSIC", "mp3,flac,ogg,tta,wma,webm");
allowed_extensions = allowed_extensions.replace("TEXT", "txt,doc,docx,odt,rtf");
let extensions: Vec<String> = allowed_extensions.split(',').map(String::from).collect();
for mut extension in extensions {
if extension == "" || extension.replace('.', "").trim() == "" {
continue;
}
if extension.starts_with('.') {
extension = extension[1..].to_string();
}
if extension[1..].contains('.') {
text_messages.warnings.push(".".to_string() + extension.as_str() + " is not valid extension(valid extension doesn't have dot inside)");
continue;
}
if !self.file_extensions.contains(&extension.trim().to_string()) {
self.file_extensions.push(extension.trim().to_string());
}
}
if self.file_extensions.is_empty() {
text_messages.messages.push("No valid extensions were provided, so allowing all extensions by default.".to_string());
}
Common::print_time(start_time, SystemTime::now(), "set_allowed_extensions".to_string());
}
}
impl Default for Extensions {
fn default() -> Self {
Self::new()
}
}

View file

@ -0,0 +1,52 @@
use crate::common::Common;
use crate::common_messages::Messages;
use std::time::SystemTime;
pub struct ExcludedItems {
pub items: Vec<String>,
}
impl ExcludedItems {
pub fn new() -> ExcludedItems {
ExcludedItems { items: vec![] }
}
/// Setting excluded items which needs to contains * wildcrard
/// Are a lot of slower than absolute path, so it should be used to heavy
pub fn set_excluded_items(&mut self, mut excluded_items: String, text_messages: &mut Messages) {
let start_time: SystemTime = SystemTime::now();
if excluded_items.is_empty() {
return;
}
excluded_items = excluded_items.replace("\"", "");
let expressions: Vec<String> = excluded_items.split(',').map(String::from).collect();
let mut checked_expressions: Vec<String> = Vec::new();
for expression in expressions {
let expression: String = expression.trim().to_string();
if expression == "" {
continue;
}
if expression == "DEFAULT" {
// TODO add more files by default
checked_expressions.push("*/.git/*".to_string());
continue;
}
if !expression.contains('*') {
text_messages.warnings.push("Excluded Items Warning: Wildcard * is required in expression, ignoring ".to_string() + expression.as_str());
continue;
}
checked_expressions.push(expression);
}
self.items = checked_expressions;
Common::print_time(start_time, SystemTime::now(), "set_excluded_items".to_string());
}
}
impl Default for ExcludedItems {
fn default() -> Self {
Self::new()
}
}

View file

@ -0,0 +1,47 @@
pub struct Messages {
pub messages: Vec<String>,
pub warnings: Vec<String>,
pub errors: Vec<String>,
}
impl Messages {
pub fn new() -> Messages {
Messages {
messages: vec![],
warnings: vec![],
errors: vec![],
}
}
pub fn print_messages(&self) {
if !self.messages.is_empty() {
println!("-------------------------------MESSAGES--------------------------------");
for i in &self.messages {
println!("{}", i);
}
println!("---------------------------END OF MESSAGES-----------------------------");
}
if !self.warnings.is_empty() {
println!("-------------------------------WARNINGS--------------------------------");
for i in &self.warnings {
println!("{}", i);
}
println!("---------------------------END OF WARNINGS-----------------------------");
}
if !self.errors.is_empty() {
println!("--------------------------------ERRORS---------------------------------");
for i in &self.errors {
println!("{}", i);
}
println!("----------------------------END OF ERRORS------------------------------");
}
}
}
impl Default for Messages {
fn default() -> Self {
Self::new()
}
}

View file

@ -0,0 +1,9 @@
pub trait DebugPrint {
fn debug_print(&self);
}
pub trait SaveResults {
fn save_results_to_file(&mut self, file_name: &str) -> bool;
}
pub trait PrintResults {
fn print_results(&self);
}

View file

@ -3,10 +3,14 @@ use std::collections::{BTreeMap, HashMap};
use std::fs;
use std::fs::{File, Metadata};
use std::io::prelude::*;
use std::path::Path;
use std::time::{SystemTime, UNIX_EPOCH};
use crate::common::{Common, Messages};
use crate::common::Common;
use crate::common_directory::Directories;
use crate::common_extensions::Extensions;
use crate::common_items::ExcludedItems;
use crate::common_messages::Messages;
use crate::common_traits::*;
#[derive(PartialEq, Eq, Clone, Debug)]
pub enum CheckingMethod {
@ -82,10 +86,9 @@ pub struct DuplicateFinder {
information: Info,
files_with_identical_size: BTreeMap<u64, Vec<FileEntry>>,
files_with_identical_hashes: BTreeMap<u64, Vec<Vec<FileEntry>>>,
allowed_extensions: Vec<String>,
excluded_items: Vec<String>,
excluded_directories: Vec<String>,
included_directories: Vec<String>,
directories: Directories,
allowed_extensions: Extensions,
excluded_items: ExcludedItems,
recursive_search: bool,
min_file_size: u64,
check_method: CheckingMethod,
@ -95,21 +98,31 @@ pub struct DuplicateFinder {
impl DuplicateFinder {
pub fn new() -> DuplicateFinder {
DuplicateFinder {
text_messages: Default::default(),
text_messages: Messages::new(),
information: Info::new(),
files_with_identical_size: Default::default(),
files_with_identical_hashes: Default::default(),
excluded_items: vec![],
excluded_directories: vec![],
included_directories: vec![],
recursive_search: true,
allowed_extensions: vec![],
allowed_extensions: Extensions::new(),
check_method: CheckingMethod::None,
delete_method: DeleteMethod::None,
min_file_size: 1024,
directories: Directories::new(),
excluded_items: ExcludedItems::new(),
}
}
/// Finding duplicates, save results to internal struct variables
pub fn find_duplicates(&mut self) {
self.directories.optimize_directories(self.recursive_search, &mut self.text_messages);
self.check_files_size();
if self.check_method == CheckingMethod::Hash {
self.check_files_hash();
}
self.delete_files();
self.debug_print();
}
pub fn get_files_sorted_by_size(&self) -> &BTreeMap<u64, Vec<FileEntry>> {
&self.files_with_identical_size
}
@ -142,192 +155,19 @@ impl DuplicateFinder {
self.recursive_search = recursive_search;
}
/// Finding duplicates, save results to internal struct variables
pub fn find_duplicates(&mut self) {
self.optimize_directories();
self.check_files_size();
if self.check_method == CheckingMethod::Hash {
self.check_files_hash();
}
self.delete_files();
self.debug_print();
pub fn set_included_directory(&mut self, included_directory: String) -> bool {
self.directories.set_included_directory(included_directory, &mut self.text_messages)
}
/// Setting excluded items which needs to contains * wildcrard
/// Are a lot of slower than absolute path, so it should be used to heavy
pub fn set_excluded_items(&mut self, mut excluded_items: String) {
let start_time: SystemTime = SystemTime::now();
if excluded_items.is_empty() {
return;
}
excluded_items = excluded_items.replace("\"", "");
let expressions: Vec<String> = excluded_items.split(',').map(String::from).collect();
let mut checked_expressions: Vec<String> = Vec::new();
for expression in expressions {
let expression: String = expression.trim().to_string();
if expression == "" {
continue;
}
if expression == "DEFAULT" {
// TODO add more files by default
checked_expressions.push("*/.git/*".to_string());
continue;
}
if !expression.contains('*') {
self.text_messages.warnings.push("Excluded Items Warning: Wildcard * is required in expression, ignoring ".to_string() + expression.as_str());
continue;
}
checked_expressions.push(expression);
}
self.excluded_items = checked_expressions;
Common::print_time(start_time, SystemTime::now(), "set_excluded_items".to_string());
pub fn set_excluded_directory(&mut self, excluded_directory: String) {
self.directories.set_excluded_directory(excluded_directory, &mut self.text_messages);
}
pub fn set_allowed_extensions(&mut self, allowed_extensions: String) {
self.allowed_extensions.set_allowed_extensions(allowed_extensions, &mut self.text_messages);
}
/// List of allowed extensions, only files with this extensions will be checking if are duplicates
pub fn set_allowed_extensions(&mut self, mut allowed_extensions: String) {
let start_time: SystemTime = SystemTime::now();
if allowed_extensions.is_empty() {
return;
}
allowed_extensions = allowed_extensions.replace("IMAGE", "jpg,kra,gif,png,bmp,tiff,webp,hdr,svg");
allowed_extensions = allowed_extensions.replace("VIDEO", "mp4,flv,mkv,webm,vob,ogv,gifv,avi,mov,wmv,mpg,m4v,m4p,mpeg,3gp");
allowed_extensions = allowed_extensions.replace("MUSIC", "mp3,flac,ogg,tta,wma,webm");
allowed_extensions = allowed_extensions.replace("TEXT", "txt,doc,docx,odt,rtf");
let extensions: Vec<String> = allowed_extensions.split(',').map(String::from).collect();
for mut extension in extensions {
if extension == "" || extension.replace('.', "").trim() == "" {
continue;
}
if extension.starts_with('.') {
extension = extension[1..].to_string();
}
if extension[1..].contains('.') {
self.text_messages.warnings.push(".".to_string() + extension.as_str() + " is not valid extension(valid extension doesn't have dot inside)");
continue;
}
if !self.allowed_extensions.contains(&extension.trim().to_string()) {
self.allowed_extensions.push(extension.trim().to_string());
}
}
if self.allowed_extensions.is_empty() {
self.text_messages.messages.push("No valid extensions were provided, so allowing all extensions by default.".to_string());
}
Common::print_time(start_time, SystemTime::now(), "set_allowed_extensions".to_string());
}
/// Setting include directories, at least one must be provided
pub fn set_include_directory(&mut self, mut include_directory: String) -> bool {
let start_time: SystemTime = SystemTime::now();
if include_directory.is_empty() {
self.text_messages.errors.push("At least one directory must be provided".to_string());
return false;
}
include_directory = include_directory.replace("\"", "");
let directories: Vec<String> = include_directory.split(',').map(String::from).collect();
let mut checked_directories: Vec<String> = Vec::new();
for directory in directories {
let directory: String = directory.trim().to_string();
if directory == "" {
continue;
}
if directory.contains('*') {
self.text_messages.warnings.push("Include Directory Warning: Wildcards in path are not supported, ignoring ".to_string() + directory.as_str());
continue;
}
if !directory.starts_with('/') {
self.text_messages.warnings.push("Include Directory Warning: Relative path are not supported, ignoring ".to_string() + directory.as_str());
continue;
}
if !Path::new(&directory).exists() {
self.text_messages.warnings.push("Include Directory Warning: Provided folder path must exits, ignoring ".to_string() + directory.as_str());
continue;
}
if !Path::new(&directory).is_dir() {
self.text_messages.warnings.push("Include Directory Warning: Provided path must point at the directory, ignoring ".to_string() + directory.as_str());
continue;
}
// directory must end with /, due to possibility of incorrect assumption, that e.g. /home/rafal is top folder to /home/rafalinho
if !directory.ends_with('/') {
checked_directories.push(directory + "/");
} else {
checked_directories.push(directory);
}
}
if checked_directories.is_empty() {
self.text_messages.errors.push("Include Directory ERROR: Not found even one correct path to include which is required.".to_string());
return false;
}
self.included_directories = checked_directories;
Common::print_time(start_time, SystemTime::now(), "set_include_directory".to_string());
true
}
/// Setting absolute path to exclude
pub fn set_exclude_directory(&mut self, mut exclude_directory: String) {
let start_time: SystemTime = SystemTime::now();
if exclude_directory.is_empty() {
return;
}
exclude_directory = exclude_directory.replace("\"", "");
let directories: Vec<String> = exclude_directory.split(',').map(String::from).collect();
let mut checked_directories: Vec<String> = Vec::new();
for directory in directories {
let directory: String = directory.trim().to_string();
if directory == "" {
continue;
}
if directory == "/" {
self.text_messages.errors.push("Exclude Directory ERROR: Excluding / is pointless, because it means that no files will be scanned.".to_string());
break;
}
if directory.contains('*') {
self.text_messages.warnings.push("Exclude Directory Warning: Wildcards in path are not supported, ignoring ".to_string() + directory.as_str());
continue;
}
if !directory.starts_with('/') {
self.text_messages.warnings.push("Exclude Directory Warning: Relative path are not supported, ignoring ".to_string() + directory.as_str());
continue;
}
if !Path::new(&directory).exists() {
self.text_messages.warnings.push("Exclude Directory Warning: Provided folder path must exits, ignoring ".to_string() + directory.as_str());
continue;
}
if !Path::new(&directory).is_dir() {
self.text_messages.warnings.push("Exclude Directory Warning: Provided path must point at the directory, ignoring ".to_string() + directory.as_str());
continue;
}
// directory must end with /, due to possibility of incorrect assumption, that e.g. /home/rafal is top folder to /home/rafalinho
if !directory.ends_with('/') {
checked_directories.push(directory.trim().to_string() + "/");
} else {
checked_directories.push(directory.trim().to_string());
}
}
self.excluded_directories = checked_directories;
Common::print_time(start_time, SystemTime::now(), "set_exclude_directory".to_string());
pub fn set_excluded_items(&mut self, excluded_items: String) {
self.excluded_items.set_excluded_items(excluded_items, &mut self.text_messages);
}
/// Read file length and puts it to different boxes(each for different lengths)
@ -338,7 +178,7 @@ impl DuplicateFinder {
let mut folders_to_check: Vec<String> = Vec::with_capacity(1024 * 2); // This should be small enough too not see to big difference and big enough to store most of paths without needing to resize vector
// Add root folders for finding
for id in &self.included_directories {
for id in &self.directories.included_directories {
folders_to_check.push(id.to_string());
}
self.information.number_of_checked_folders += folders_to_check.len();
@ -384,7 +224,7 @@ impl DuplicateFinder {
let mut is_excluded_dir = false;
next_folder = "".to_owned() + &current_folder + &entry_data.file_name().into_string().unwrap() + "/";
for ed in &self.excluded_directories {
for ed in &self.directories.excluded_directories {
if next_folder == *ed {
is_excluded_dir = true;
break;
@ -392,7 +232,7 @@ impl DuplicateFinder {
}
if !is_excluded_dir {
let mut found_expression: bool = false;
for expression in &self.excluded_items {
for expression in &self.excluded_items.items {
if Common::regex_check(expression, &next_folder) {
found_expression = true;
break;
@ -408,9 +248,9 @@ impl DuplicateFinder {
let file_name_lowercase: String = entry_data.file_name().into_string().unwrap().to_lowercase();
// Checking allowed extensions
if !self.allowed_extensions.is_empty() {
if !self.allowed_extensions.file_extensions.is_empty() {
have_valid_extension = false;
for i in &self.allowed_extensions {
for i in &self.allowed_extensions.file_extensions {
if file_name_lowercase.ends_with((".".to_string() + i.to_lowercase().as_str()).as_str()) {
have_valid_extension = true;
break;
@ -426,7 +266,7 @@ impl DuplicateFinder {
// Checking expressions
let mut found_expression: bool = false;
for expression in &self.excluded_items {
for expression in &self.excluded_items.items {
if Common::regex_check(expression, &current_file_name) {
found_expression = true;
break;
@ -488,87 +328,6 @@ impl DuplicateFinder {
Common::print_time(start_time, SystemTime::now(), "check_files_size".to_string());
}
/// Saving results to provided file
pub fn save_results_to_file(&mut self, file_name: &str) -> bool {
let start_time: SystemTime = SystemTime::now();
let file_name: String = match file_name {
"" => "results.txt".to_string(),
k => k.to_string(),
};
let mut file = match File::create(&file_name) {
Ok(t) => t,
Err(_) => {
self.text_messages.errors.push("Failed to create file ".to_string() + file_name.as_str());
return false;
}
};
match file.write_all(format!("Results of searching in {:?}\n", self.included_directories).as_bytes()) {
Ok(_) => (),
Err(_) => {
self.text_messages.errors.push("Failed to save results to file ".to_string() + file_name.as_str());
return false;
}
}
if !self.files_with_identical_size.is_empty() {
file.write_all(b"-------------------------------------------------Files with same size-------------------------------------------------\n").unwrap();
file.write_all(
("Found ".to_string()
+ self.information.number_of_duplicated_files_by_size.to_string().as_str()
+ " duplicated files which in "
+ self.information.number_of_groups_by_size.to_string().as_str()
+ " groups which takes "
+ self.information.lost_space_by_size.file_size(options::BINARY).unwrap().as_str()
+ ".\n")
.as_bytes(),
)
.unwrap();
for (size, files) in self.files_with_identical_size.iter().rev() {
file.write_all(b"\n---- Size ").unwrap();
file.write_all(size.file_size(options::BINARY).unwrap().as_bytes()).unwrap();
file.write_all((" (".to_string() + size.to_string().as_str() + ")").as_bytes()).unwrap();
file.write_all((" - ".to_string() + files.len().to_string().as_str() + " files").as_bytes()).unwrap();
file.write_all(b"\n").unwrap();
for file_entry in files {
file.write_all((file_entry.path.clone() + "\n").as_bytes()).unwrap();
}
}
if !self.files_with_identical_hashes.is_empty() {
file.write_all(b"-------------------------------------------------Files with same hashes-------------------------------------------------\n").unwrap();
file.write_all(
("Found ".to_string()
+ self.information.number_of_duplicated_files_by_hash.to_string().as_str()
+ " duplicated files which in "
+ self.information.number_of_groups_by_hash.to_string().as_str()
+ " groups which takes "
+ self.information.lost_space_by_hash.file_size(options::BINARY).unwrap().as_str()
+ ".\n")
.as_bytes(),
)
.unwrap();
for (size, files) in self.files_with_identical_hashes.iter().rev() {
for vector in files {
file.write_all(b"\n---- Size ").unwrap();
file.write_all(size.file_size(options::BINARY).unwrap().as_bytes()).unwrap();
file.write_all((" (".to_string() + size.to_string().as_str() + ")").as_bytes()).unwrap();
file.write_all((" - ".to_string() + vector.len().to_string().as_str() + " files").as_bytes()).unwrap();
file.write_all(b"\n").unwrap();
for file_entry in vector {
file.write_all((file_entry.path.clone() + "\n").as_bytes()).unwrap();
}
}
}
}
} else {
file.write_all(b"Not found any empty folders.").unwrap();
}
Common::print_time(start_time, SystemTime::now(), "save_results_to_file".to_string());
true
}
/// The slowest checking type, which must be applied after checking for size
fn check_files_hash(&mut self) {
let start_time: SystemTime = SystemTime::now();
@ -631,6 +390,46 @@ impl DuplicateFinder {
Common::print_time(start_time, SystemTime::now(), "check_files_hash".to_string());
}
/// Function to delete files, from filed before BTreeMap
/// Using another function to delete files to avoid duplicates data
fn delete_files(&mut self) {
let start_time: SystemTime = SystemTime::now();
match self.check_method {
CheckingMethod::Hash => {
for entry in &self.files_with_identical_hashes {
for vector in entry.1 {
let tuple: (u64, usize, usize) = delete_files(&vector, &self.delete_method, &mut self.text_messages.warnings);
self.information.gained_space += tuple.0;
self.information.number_of_removed_files += tuple.1;
self.information.number_of_failed_to_remove_files += tuple.2;
}
}
}
CheckingMethod::Size => {
for entry in &self.files_with_identical_size {
let tuple: (u64, usize, usize) = delete_files(&entry.1, &self.delete_method, &mut self.text_messages.warnings);
self.information.gained_space += tuple.0;
self.information.number_of_removed_files += tuple.1;
self.information.number_of_failed_to_remove_files += tuple.2;
}
}
CheckingMethod::None => {
//Just do nothing
panic!("Checking method should never be none.");
}
}
Common::print_time(start_time, SystemTime::now(), "delete_files".to_string());
}
}
impl Default for DuplicateFinder {
fn default() -> Self {
Self::new()
}
}
impl DebugPrint for DuplicateFinder {
#[allow(dead_code)]
#[allow(unreachable_code)]
/// Debugging printing - only available on debug build
@ -676,20 +475,102 @@ impl DuplicateFinder {
println!("Files list size - {}", self.files_with_identical_size.len());
println!("Hashed Files list size - {}", self.files_with_identical_hashes.len());
println!("Allowed extensions - {:?}", self.allowed_extensions);
println!("Excluded items - {:?}", self.excluded_items);
println!("Included directories - {:?}", self.included_directories);
println!("Excluded directories - {:?}", self.excluded_directories);
println!("Allowed extensions - {:?}", self.allowed_extensions.file_extensions);
println!("Excluded items - {:?}", self.excluded_items.items);
println!("Included directories - {:?}", self.directories.included_directories);
println!("Excluded directories - {:?}", self.directories.excluded_directories);
println!("Recursive search - {}", self.recursive_search.to_string());
println!("Minimum file size - {:?}", self.min_file_size);
println!("Checking Method - {:?}", self.check_method);
println!("Delete Method - {:?}", self.delete_method);
println!("-----------------------------------------");
}
}
impl SaveResults for DuplicateFinder {
fn save_results_to_file(&mut self, file_name: &str) -> bool {
let start_time: SystemTime = SystemTime::now();
let file_name: String = match file_name {
"" => "results.txt".to_string(),
k => k.to_string(),
};
let mut file = match File::create(&file_name) {
Ok(t) => t,
Err(_) => {
self.text_messages.errors.push("Failed to create file ".to_string() + file_name.as_str());
return false;
}
};
match file.write_all(format!("Results of searching in {:?}\n", self.directories.included_directories).as_bytes()) {
Ok(_) => (),
Err(_) => {
self.text_messages.errors.push("Failed to save results to file ".to_string() + file_name.as_str());
return false;
}
}
if !self.files_with_identical_size.is_empty() {
file.write_all(b"-------------------------------------------------Files with same size-------------------------------------------------\n").unwrap();
file.write_all(
("Found ".to_string()
+ self.information.number_of_duplicated_files_by_size.to_string().as_str()
+ " duplicated files which in "
+ self.information.number_of_groups_by_size.to_string().as_str()
+ " groups which takes "
+ self.information.lost_space_by_size.file_size(options::BINARY).unwrap().as_str()
+ ".\n")
.as_bytes(),
)
.unwrap();
for (size, files) in self.files_with_identical_size.iter().rev() {
file.write_all(b"\n---- Size ").unwrap();
file.write_all(size.file_size(options::BINARY).unwrap().as_bytes()).unwrap();
file.write_all((" (".to_string() + size.to_string().as_str() + ")").as_bytes()).unwrap();
file.write_all((" - ".to_string() + files.len().to_string().as_str() + " files").as_bytes()).unwrap();
file.write_all(b"\n").unwrap();
for file_entry in files {
file.write_all((file_entry.path.clone() + "\n").as_bytes()).unwrap();
}
}
if !self.files_with_identical_hashes.is_empty() {
file.write_all(b"-------------------------------------------------Files with same hashes-------------------------------------------------\n").unwrap();
file.write_all(
("Found ".to_string()
+ self.information.number_of_duplicated_files_by_hash.to_string().as_str()
+ " duplicated files which in "
+ self.information.number_of_groups_by_hash.to_string().as_str()
+ " groups which takes "
+ self.information.lost_space_by_hash.file_size(options::BINARY).unwrap().as_str()
+ ".\n")
.as_bytes(),
)
.unwrap();
for (size, files) in self.files_with_identical_hashes.iter().rev() {
for vector in files {
file.write_all(b"\n---- Size ").unwrap();
file.write_all(size.file_size(options::BINARY).unwrap().as_bytes()).unwrap();
file.write_all((" (".to_string() + size.to_string().as_str() + ")").as_bytes()).unwrap();
file.write_all((" - ".to_string() + vector.len().to_string().as_str() + " files").as_bytes()).unwrap();
file.write_all(b"\n").unwrap();
for file_entry in vector {
file.write_all((file_entry.path.clone() + "\n").as_bytes()).unwrap();
}
}
}
}
} else {
file.write_all(b"Not found any empty folders.").unwrap();
}
Common::print_time(start_time, SystemTime::now(), "save_results_to_file".to_string());
true
}
}
impl PrintResults for DuplicateFinder {
/// Print information's about duplicated entries
/// Only needed for CLI
pub fn print_duplicated_entries(&self) {
fn print_results(&self) {
let start_time: SystemTime = SystemTime::now();
let mut number_of_files: u64 = 0;
let mut number_of_groups: u64 = 0;
@ -744,168 +625,6 @@ impl DuplicateFinder {
}
Common::print_time(start_time, SystemTime::now(), "print_duplicated_entries".to_string());
}
/// Remove unused entries when included or excluded overlaps with each other or are duplicated etc.
fn optimize_directories(&mut self) -> bool {
let start_time: SystemTime = SystemTime::now();
let mut optimized_included: Vec<String> = Vec::<String>::new();
let mut optimized_excluded: Vec<String> = Vec::<String>::new();
// Remove duplicated entries like: "/", "/"
self.excluded_directories.sort();
self.included_directories.sort();
self.excluded_directories.dedup();
self.included_directories.dedup();
// Optimize for duplicated included directories - "/", "/home". "/home/Pulpit" to "/"
if self.recursive_search {
// This is only point which can't be done when recursive search is disabled.
let mut is_inside: bool;
for ed_checked in &self.excluded_directories {
is_inside = false;
for ed_help in &self.excluded_directories {
if ed_checked == ed_help {
// We checking same element
continue;
}
if ed_checked.starts_with(ed_help) {
is_inside = true;
break;
}
}
if !is_inside {
optimized_excluded.push(ed_checked.to_string());
}
}
for id_checked in &self.included_directories {
is_inside = false;
for id_help in &self.included_directories {
if id_checked == id_help {
// We checking same element
continue;
}
if id_checked.starts_with(id_help) {
is_inside = true;
break;
}
}
if !is_inside {
optimized_included.push(id_checked.to_string());
}
}
self.included_directories = optimized_included;
optimized_included = Vec::<String>::new();
self.excluded_directories = optimized_excluded;
optimized_excluded = Vec::<String>::new();
}
// Remove include directories which are inside any exclude directory
for id in &self.included_directories {
let mut is_inside: bool = false;
for ed in &self.excluded_directories {
if id.starts_with(ed) {
is_inside = true;
break;
}
}
if !is_inside {
optimized_included.push(id.to_string());
}
}
self.included_directories = optimized_included;
optimized_included = Vec::<String>::new();
// Remove non existed directories
for id in &self.included_directories {
let path = Path::new(id);
if path.exists() {
optimized_included.push(id.to_string());
}
}
for ed in &self.excluded_directories {
let path = Path::new(ed);
if path.exists() {
optimized_excluded.push(ed.to_string());
}
}
self.included_directories = optimized_included;
// optimized_included = Vec::<String>::new();
self.excluded_directories = optimized_excluded;
optimized_excluded = Vec::<String>::new();
// Excluded paths must are inside include path, because
for ed in &self.excluded_directories {
let mut is_inside: bool = false;
for id in &self.included_directories {
if ed.starts_with(id) {
is_inside = true;
break;
}
}
if is_inside {
optimized_excluded.push(ed.to_string());
}
}
self.excluded_directories = optimized_excluded;
// optimized_excluded = Vec::<String>::new();
if self.included_directories.is_empty() {
self.text_messages.errors.push("Optimize Directories ERROR: Excluded directories overlaps all included directories.".to_string());
return false;
}
// Not needed, but better is to have sorted everything
self.excluded_directories.sort();
self.included_directories.sort();
Common::print_time(start_time, SystemTime::now(), "optimize_directories".to_string());
true
}
/// Function to delete files, from filed before BTreeMap
/// Using another function to delete files to avoid duplicates data
fn delete_files(&mut self) {
let start_time: SystemTime = SystemTime::now();
match self.check_method {
CheckingMethod::Hash => {
for entry in &self.files_with_identical_hashes {
for vector in entry.1 {
let tuple: (u64, usize, usize) = delete_files(&vector, &self.delete_method, &mut self.text_messages.warnings);
self.information.gained_space += tuple.0;
self.information.number_of_removed_files += tuple.1;
self.information.number_of_failed_to_remove_files += tuple.2;
}
}
}
CheckingMethod::Size => {
for entry in &self.files_with_identical_size {
let tuple: (u64, usize, usize) = delete_files(&entry.1, &self.delete_method, &mut self.text_messages.warnings);
self.information.gained_space += tuple.0;
self.information.number_of_removed_files += tuple.1;
self.information.number_of_failed_to_remove_files += tuple.2;
}
}
CheckingMethod::None => {
//Just do nothing
panic!("Checking method should never be none.");
}
}
Common::print_time(start_time, SystemTime::now(), "delete_files".to_string());
}
}
impl Default for DuplicateFinder {
fn default() -> Self {
Self::new()
}
}
/// Functions to remove slice(vector) of files with provided method

View file

@ -1,10 +1,12 @@
use crate::common::{Common, Messages};
use crate::common::Common;
use crate::common_directory::Directories;
use crate::common_messages::Messages;
use crate::common_traits::{DebugPrint, PrintResults, SaveResults};
use std::collections::BTreeMap;
use std::fs;
use std::fs::{File, Metadata};
use std::io::Write;
use std::path::Path;
use std::time::SystemTime;
use std::{fs, process};
/// Enum with values which show if folder is empty.
/// In function "optimize_folders" automatically "Maybe" is changed to "Yes", so it is not necessery to put it here
@ -28,7 +30,7 @@ pub struct EmptyFolder {
delete_folders: bool,
text_messages: Messages,
empty_folder_list: BTreeMap<String, FolderEntry>, // Path, FolderEntry
included_directories: Vec<String>,
directories: Directories,
}
/// Info struck with helpful information's about results
@ -57,9 +59,9 @@ impl EmptyFolder {
EmptyFolder {
information: Default::default(),
delete_folders: false,
text_messages: Default::default(),
text_messages: Messages::new(),
empty_folder_list: Default::default(),
included_directories: vec![],
directories: Directories::new(),
}
}
@ -76,9 +78,9 @@ impl EmptyFolder {
/// Public function used by CLI to search for empty folders
pub fn find_empty_folders(&mut self) {
self.optimize_directories();
self.directories.optimize_directories(true, &mut self.text_messages);
self.check_for_empty_folders(true);
self.check_for_empty_folders(false); // Not needed for CLI, but it is better to check this
self.check_for_empty_folders(false); // Not needed for CLI, but it is better to check this again, because maybe empty folder stops to be empty
self.optimize_folders();
if self.delete_folders {
self.delete_empty_folders();
@ -90,43 +92,6 @@ impl EmptyFolder {
self.delete_folders = delete_folder;
}
pub fn save_results_to_file(&mut self, file_name: &str) -> bool {
let start_time: SystemTime = SystemTime::now();
let file_name: String = match file_name {
"" => "results.txt".to_string(),
k => k.to_string(),
};
let mut file = match File::create(&file_name) {
Ok(t) => t,
Err(_) => {
self.text_messages.errors.push("Failed to create file ".to_string() + file_name.as_str());
return false;
}
};
match file.write_all(format!("Results of searching in {:?}\n", self.included_directories).as_bytes()) {
Ok(_) => (),
Err(_) => {
self.text_messages.errors.push("Failed to save results to file ".to_string() + file_name.as_str());
return false;
}
}
if !self.empty_folder_list.is_empty() {
file.write_all(b"-------------------------------------------------Empty folder list-------------------------------------------------\n").unwrap();
file.write_all(("Found ".to_string() + self.information.number_of_empty_folders.to_string().as_str() + " empty folders which in " + ".\n").as_bytes())
.unwrap();
for name in self.empty_folder_list.keys() {
file.write_all((name.clone() + "\n").as_bytes()).unwrap();
}
} else {
file.write_all(b"Not found any empty folders.").unwrap();
}
Common::print_time(start_time, SystemTime::now(), "save_results_to_file".to_string());
true
}
/// Clean directory tree
/// If directory contains only 2 empty folders, then this directory should be removed instead two empty folders inside because it will produce another empty folder.
fn optimize_folders(&mut self) {
@ -157,7 +122,7 @@ impl EmptyFolder {
if initial_checking {
// Add root folders for finding
for id in &self.included_directories {
for id in &self.directories.included_directories {
folders_checked.insert(
id.clone(),
FolderEntry {
@ -285,17 +250,18 @@ impl EmptyFolder {
Common::print_time(start_time, SystemTime::now(), "delete_files".to_string());
}
/// Prints basic info about empty folders // TODO print better
pub fn print_empty_folders(&self) {
if !self.empty_folder_list.is_empty() {
println!("Found {} empty folders", self.empty_folder_list.len());
}
for i in &self.empty_folder_list {
println!("{}", i.0);
}
/// Set include dir which needs to be relative, exists etc.
pub fn set_included_directory(&mut self, included_directory: String) {
self.directories.set_included_directory(included_directory, &mut self.text_messages);
}
}
impl Default for EmptyFolder {
fn default() -> Self {
Self::new()
}
}
/// Debug print
impl DebugPrint for EmptyFolder {
#[allow(dead_code)]
#[allow(unreachable_code)]
fn debug_print(&self) {
@ -307,128 +273,56 @@ impl EmptyFolder {
println!("---------------DEBUG PRINT---------------");
println!("Number of all checked folders - {}", self.information.number_of_checked_folders);
println!("Number of empty folders - {}", self.information.number_of_empty_folders);
println!("Included directories - {:?}", self.included_directories);
println!("Included directories - {:?}", self.directories.included_directories);
println!("-----------------------------------------");
}
// TODO maybe move this and one from duplicated finder to one common class to avoid duplicating code
/// Optimize include and exclude directories by removing duplicates etc.
fn optimize_directories(&mut self) {
}
impl SaveResults for EmptyFolder {
fn save_results_to_file(&mut self, file_name: &str) -> bool {
let start_time: SystemTime = SystemTime::now();
let file_name: String = match file_name {
"" => "results.txt".to_string(),
k => k.to_string(),
};
let mut optimized_included: Vec<String> = Vec::<String>::new();
// Remove duplicated entries like: "/", "/"
self.included_directories.sort();
self.included_directories.dedup();
// Optimize for duplicated included directories - "/", "/home". "/home/Pulpit" to "/"
let mut is_inside: bool;
for id_checked in &self.included_directories {
is_inside = false;
for id_help in &self.included_directories {
if id_checked == id_help {
// We checking same element
continue;
}
if id_checked.starts_with(id_help) {
is_inside = true;
break;
}
let mut file = match File::create(&file_name) {
Ok(t) => t,
Err(_) => {
self.text_messages.errors.push("Failed to create file ".to_string() + file_name.as_str());
return false;
}
if !is_inside {
optimized_included.push(id_checked.to_string());
};
match file.write_all(format!("Results of searching in {:?}\n", self.directories.included_directories).as_bytes()) {
Ok(_) => (),
Err(_) => {
self.text_messages.errors.push("Failed to save results to file ".to_string() + file_name.as_str());
return false;
}
}
self.included_directories = optimized_included;
optimized_included = Vec::<String>::new();
// Remove non existed directories
for id in &self.included_directories {
let path = Path::new(id);
if path.exists() {
optimized_included.push(id.to_string());
if !self.empty_folder_list.is_empty() {
file.write_all(b"-------------------------------------------------Empty folder list-------------------------------------------------\n").unwrap();
file.write_all(("Found ".to_string() + self.information.number_of_empty_folders.to_string().as_str() + " empty folders which in " + ".\n").as_bytes())
.unwrap();
for name in self.empty_folder_list.keys() {
file.write_all((name.clone() + "\n").as_bytes()).unwrap();
}
} else {
file.write_all(b"Not found any empty folders.").unwrap();
}
self.included_directories = optimized_included;
//optimized_included = Vec::<String>::new();
if self.included_directories.is_empty() {
println!("Optimize Directories ERROR: Excluded directories overlaps all included directories.");
process::exit(1);
}
// Not needed, but better is to have sorted everything
self.included_directories.sort();
Common::print_time(start_time, SystemTime::now(), "optimize_directories".to_string());
}
/// Set include dir which needs to be relative, exists etc.
pub fn set_include_directory(&mut self, mut include_directory: String) {
let start_time: SystemTime = SystemTime::now();
if include_directory.is_empty() {
println!("At least one directory must be provided");
}
include_directory = include_directory.replace("\"", "");
let directories: Vec<String> = include_directory.split(',').map(String::from).collect();
let mut checked_directories: Vec<String> = Vec::new();
for directory in directories {
let directory: String = directory.trim().to_string();
if directory == "" {
continue;
}
if directory == "/" {
println!("Using / is probably not good idea, you may go out of ram.");
}
if directory.contains('*') {
println!("Include Directory ERROR: Wildcards are not supported, ignoring path {}.", directory);
continue;
}
if directory.starts_with('~') {
println!("Include Directory ERROR: ~ in path isn't supported, ignoring path {}.", directory);
continue;
}
if !directory.starts_with('/') {
println!("Include Directory ERROR: Relative path are not supported, ignoring path {}.", directory);
continue;
}
if !Path::new(&directory).exists() {
println!("Include Directory ERROR: Path {} doesn't exists.", directory);
continue;
}
if !Path::new(&directory).is_dir() {
println!("Include Directory ERROR: {} isn't folder.", directory);
continue;
}
// directory must end with /, due to possiblity of incorrect assumption, that e.g. /home/rafal is top folder to /home/rafalinho
if !directory.ends_with('/') {
checked_directories.push(directory + "/");
} else {
checked_directories.push(directory);
}
}
if checked_directories.is_empty() {
println!("Not found even one correct path to include.");
process::exit(1);
}
self.included_directories = checked_directories;
Common::print_time(start_time, SystemTime::now(), "set_include_directory".to_string());
Common::print_time(start_time, SystemTime::now(), "save_results_to_file".to_string());
true
}
}
impl Default for EmptyFolder {
fn default() -> Self {
Self::new()
impl PrintResults for EmptyFolder {
/// Prints basic info about empty folders // TODO print better
fn print_results(&self) {
if !self.empty_folder_list.is_empty() {
println!("Found {} empty folders", self.empty_folder_list.len());
}
for i in &self.empty_folder_list {
println!("{}", i.0);
}
}
}

View file

@ -3,5 +3,10 @@ pub mod duplicate;
pub mod empty_folder;
pub mod common;
pub mod common_directory;
pub mod common_extensions;
pub mod common_items;
pub mod common_messages;
pub mod common_traits;
pub const CZKAWKA_VERSION: &str = "0.1.3";

View file

@ -3,7 +3,7 @@ use humansize::{file_size_opts as options, FileSize};
extern crate gtk;
use chrono::NaiveDateTime;
use czkawka_core::common::Messages;
use czkawka_core::common_messages::Messages;
use czkawka_core::duplicate::CheckingMethod;
use czkawka_core::empty_folder::EmptyFolder;
use duplicate::DuplicateFinder;
@ -259,8 +259,8 @@ fn main() {
let mut df = DuplicateFinder::new();
let check_method = duplicate::CheckingMethod::Hash; // TODO
{
df.set_include_directory(get_string_from_list_store(&scrolled_window_included_directories));
df.set_exclude_directory(get_string_from_list_store(&scrolled_window_excluded_directories));
df.set_included_directory(get_string_from_list_store(&scrolled_window_included_directories));
df.set_excluded_directory(get_string_from_list_store(&scrolled_window_excluded_directories));
df.set_excluded_items(entry_excluded_items.get_text().as_str().to_string());
df.set_allowed_extensions(entry_allowed_extensions.get_text().as_str().to_string());
df.set_min_file_size(match entry_duplicate_minimal_size.get_text().as_str().parse::<u64>() {
@ -401,7 +401,7 @@ fn main() {
// TODO Change to proper value
let mut ef = EmptyFolder::new();
ef.set_include_directory("/home/rafal/Pulpit".to_string());
ef.set_included_directory("/home/rafal/Pulpit".to_string());
ef.set_delete_folder(false);
ef.find_empty_folders();

View file

@ -19,7 +19,7 @@ fn main() {
.child(TextBlock::new().text("Include Directory:").v_align("center").h_align("start").attach(Grid::column(0)).attach(Grid::row(0)).build(ctx))
.child(
TextBox::new()
.id("include_directory")
.id("included_directory")
.water_mark("Include Directory")
.v_align("center")
.h_align("start")
@ -31,7 +31,7 @@ fn main() {
.child(TextBlock::new().text("Exclude Directory:").v_align("center").h_align("start").attach(Grid::column(0)).attach(Grid::row(1)).build(ctx))
.child(
TextBox::new()
.id("exclude_directory")
.id("excluded_directory")
.water_mark("Exclude Directory")
.v_align("center")
.h_align("start")