From 10642548f90e3a363025462d3a92a8b5b6cbf2da Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Rafa=C5=82=20Mikrut?= Date: Sat, 29 Aug 2020 16:12:20 +0200 Subject: [PATCH] Beginning of adding allowed extension support. --- Cargo.lock | 43 +++++ Cargo.toml | 3 +- src/duplicate.rs | 481 +++++++++++++++++++++++++++++------------------ src/main.rs | 34 +++- 4 files changed, 371 insertions(+), 190 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index d5b7912..0d81469 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -1,5 +1,14 @@ # This file is automatically @generated by Cargo. # It is not intended for manual editing. +[[package]] +name = "aho-corasick" +version = "0.7.13" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "043164d8ba5c4c3035fec9bbee8647c0261d788f3474306f93bb65901cae0e86" +dependencies = [ + "memchr", +] + [[package]] name = "arrayref" version = "0.3.6" @@ -115,6 +124,7 @@ dependencies = [ "blake3", "multimap", "rayon", + "regex", ] [[package]] @@ -169,6 +179,12 @@ version = "2.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "60302e4db3a61da70c0cb7991976248362f30319e88850c487b9b95bbf059e00" +[[package]] +name = "memchr" +version = "2.3.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3728d817d99e5ac407411fa471ff9800a778d88a24685968b36824eaf4bee400" + [[package]] name = "memoffset" version = "0.5.5" @@ -222,6 +238,24 @@ dependencies = [ "num_cpus", ] +[[package]] +name = "regex" +version = "1.3.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9c3780fcf44b193bc4d09f36d2a3c87b251da4a046c87795a0d35f4f927ad8e6" +dependencies = [ + "aho-corasick", + "memchr", + "regex-syntax", + "thread_local", +] + +[[package]] +name = "regex-syntax" +version = "0.6.18" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "26412eb97c6b088a6997e05f69403a802a92d520de2f8e63c2b65f9e0f47c4e8" + [[package]] name = "scopeguard" version = "1.1.0" @@ -240,6 +274,15 @@ version = "2.2.3" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "502d53007c02d7605a05df1c1a73ee436952781653da5d0bf57ad608f66932c1" +[[package]] +name = "thread_local" +version = "1.0.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d40c6d1b69745a6ec6fb1ca717914848da4b44ae29d9b3080cbee91d72a69b14" +dependencies = [ + "lazy_static", +] + [[package]] name = "typenum" version = "1.12.0" diff --git a/Cargo.toml b/Cargo.toml index b7755e8..480bf9a 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -7,4 +7,5 @@ edition = "2018" [dependencies] rayon = "1.4.0" blake3 = "0.3.6" -multimap = "0.8.2" \ No newline at end of file +multimap = "0.8.2" +regex = "1.3.9" \ No newline at end of file diff --git a/src/duplicate.rs b/src/duplicate.rs index c639ca3..5f81275 100644 --- a/src/duplicate.rs +++ b/src/duplicate.rs @@ -1,204 +1,100 @@ // Todo, należy upewnić się, że ma wystarczające uprawnienia do odczytu i usuwania -use std::collections::HashMap; +use std::collections::{BTreeMap, HashMap}; use std::fs::{File, Metadata}; -use std::hash::Hash; use std::io::prelude::*; use std::path::Path; use std::time::SystemTime; use std::{fs, process}; +const MIN_FILE_SIZE: u64 = 1000; + +#[derive(PartialEq)] +#[allow(dead_code)] +pub enum CheckingMethod { + SIZE, + HASH, +} + pub struct DuplicateFinder { number_of_checked_files: usize, + number_of_ignored_files: usize, number_of_checked_folders: usize, number_of_ignored_things: usize, number_of_duplicated_files: usize, - // files : Vec>>, - files_size: HashMap>, - // files_hashes: HashMap<[u8],Vec>, - // duplicated_entries // Same as files, but only with 2+ entries - // files : Vec>, + files_with_identical_size: HashMap>, + files_with_identical_hashes: BTreeMap>>, + allowed_extensions: Vec, // jpg, jpeg, mp4 + // excluded_items: Vec, excluded_directories: Vec, included_directories: Vec, - // ignored_extensions: Vec, - // allowed_extensions: Vec, - // ignored_file_names: Vec, // TODO Regex Support - // allowed_file_names: Vec, // TODO Regex Support + min_file_size: u64, } impl DuplicateFinder { pub fn new() -> DuplicateFinder { DuplicateFinder { number_of_checked_files: 0, + number_of_ignored_files: 0, number_of_checked_folders: 0, number_of_ignored_things: 0, number_of_duplicated_files: 0, - files_size: Default::default(), - // files_hashes: Default::default(), + files_with_identical_size: Default::default(), + files_with_identical_hashes: Default::default(), + // excluded_items: vec![], excluded_directories: vec![], included_directories: vec![], - // ignored_extensions: vec![], - // allowed_extensions: vec![], - // ignored_file_names: vec![], - // allowed_file_names: vec![] + min_file_size: 0, + allowed_extensions: vec![], } } - // TODO - Still isn't used but it will be probably required with GUI - // pub fn clear(&mut self) { - // - // self.number_of_checked_files = 0; - // self.number_of_checked_folders = 0; - // self.number_of_ignored_things = 0; - // self.number_of_files_which_has_duplicated_entries = 0; - // self.number_of_duplicated_files = 0; - // self.files_sizeclear(); - // self.excluded_directories.clear(); - // self.included_directories.clear(); - // } - pub fn find_duplicates_by_size(&mut self) { - // TODO add multithread checking for file hash - //let mut path; - let start_time: SystemTime = SystemTime::now(); - let mut folders_to_check: Vec = Vec::with_capacity(1024 * 16); // This should be small enough too not see to big difference and big enough to store most of paths without needing to resize vector - // Add root folders for finding - for id in &self.included_directories { - folders_to_check.push(id.to_string()); + pub fn find_duplicates(mut self, check_method: CheckingMethod) { + self.optimize_directories(); + self.debug_print(); + self.check_files_size(); + self.remove_files_with_unique_size(); + if check_method == CheckingMethod::HASH { + self.check_files_hash(); } + // self.print_duplicated_entries(check_method); + } - let mut current_folder: String; - let mut next_folder: String; - while !folders_to_check.is_empty() { - current_folder = folders_to_check.pop().unwrap(); + pub fn set_min_file_size(&mut self, min_size: u64) { + self.min_file_size = min_size; + } - let read_dir = fs::read_dir(¤t_folder); - let read_dir = match read_dir { - Ok(t) => t, - _ => continue, - }; - for entry in read_dir { - let entry_data = entry.unwrap(); - let metadata: Metadata = entry_data.metadata().unwrap(); - if metadata.is_dir() { - let mut is_excluded_dir = false; - next_folder = "".to_owned() + ¤t_folder + &entry_data.file_name().into_string().unwrap() + "/"; - for ed in &self.excluded_directories { - if next_folder == ed.to_string() { - is_excluded_dir = true; - break; - } - } - if !is_excluded_dir { - folders_to_check.push(next_folder); - } - self.number_of_checked_folders += 1; + pub fn set_excluded_items(&mut self, _excluded_items: String) { + // TODO Still don't know how to exactly parse this + // Things like /.git/ should be by default hidden with help of this *.git* + } + pub fn set_allowed_extensions(&mut self, mut allowed_extensions: String) { + if allowed_extensions.is_empty() { + println!("No allowed extension was provided, so all are allowed"); + } + allowed_extensions = allowed_extensions.replace("IMAGE","jpg,kra,gif,png,bmp,tiff,webp,hdr,svg"); + allowed_extensions = allowed_extensions.replace("VIDEO","mp4,flv,mkv,webm,vob,ogv,gifv,avi,mov,wmv,mpg,m4v,m4p,mpeg,3gp"); + allowed_extensions = allowed_extensions.replace("MUSIC", "mp3,flac,ogg,tta,wma,webm"); - //println!("Directory\t - {:?}", next_folder); // DEBUG - } else if metadata.is_file() { - let current_file_name = "".to_owned() + ¤t_folder + &entry_data.file_name().into_string().unwrap(); - // println!("File\t\t - {:?}", current_file_name); // DEBUG - //file_to_check - let fe: FileEntry = FileEntry { - path: current_file_name, - size: metadata.len(), - created_date: metadata.created().unwrap(), - modified_date: metadata.modified().unwrap(), - }; - if !self.files_size.contains_key(&metadata.len()) { - self.files_size.insert(metadata.len(), Vec::new()); - } - self.files_size.get_mut(&metadata.len()).unwrap().push(fe); - - self.number_of_checked_files += 1; - } else { - // Probably this is symbolic links so we are free to ignore this - // println!("Found another type of file {} {:?}","".to_owned() + ¤t_folder + &entry_data.file_name().into_string().unwrap(), metadata) //DEBUG - self.number_of_ignored_things += 1; + let extensions: Vec = allowed_extensions.split(',').map(String::from).collect(); + for mut extension in extensions{ + if extension.contains('.'){ + if !extension.starts_with('.'){ + println!("{} is not valid extension(valid extension doesn't have dot inside)",extension); } + extension = extension.replace('.',""); } - } - self.debug_print(); - DuplicateFinder::print_time(start_time, SystemTime::now(), "find_duplicates".to_string()); - //println!("Duration of finding duplicates {:?}", end_time.duration_since(start_time).expect("a")); - } - // pub fn save_to_file(&self) {} - /// Remove files which have unique size - pub fn remove_files_with_unique_size(&mut self) { - let start_time: SystemTime = SystemTime::now(); - self.debug_print(); - let mut new_hashmap: HashMap> = Default::default(); - self.number_of_duplicated_files = 0; - - for entry in &self.files_size { - if entry.1.len() > 1 { - self.number_of_duplicated_files += entry.1.len() - 1; - new_hashmap.insert(*entry.0, entry.1.clone()); - } + self.allowed_extensions.push(extension.trim().to_string()); } - self.files_size = new_hashmap; - - self.debug_print(); - DuplicateFinder::print_time(start_time, SystemTime::now(), "optimize_files".to_string()); - } - - /// Should be slower than checking in different ways, but still needs to be checked - pub fn find_duplicates_by_hashing(mut self) { - let start_time: SystemTime = SystemTime::now(); - let mut file_handler: File; - - for entry in self.files_size { - let mut hashes: Vec = Vec::new(); - if entry.1.len() > 5 { - println!("{}", entry.1.len()); - } - - for file_entry in entry.1.iter().enumerate() { - file_handler = match File::open(&file_entry.1.path) { - Ok(T) => T, - Err(_) => { - // Removing File may happens,so we should handle this - hashes.push("".to_owned()); - continue; - } - }; - - let mut hasher: blake3::Hasher = blake3::Hasher::new(); - let mut buffer = [0u8; 16384]; - loop { - let n = file_handler.read(&mut buffer).unwrap(); - if n == 0 { - break; - } - hasher.update(&buffer[..n]); - } - //println!("{}", hasher.finalize().to_hex().to_string()); - } + if self.allowed_extensions.len() == 0{ + println!("No valid extensions were provided, so allowing all extensions by default."); } - DuplicateFinder::print_time(start_time, SystemTime::now(), "find_duplicates_by_hashing".to_string()); } - // /// I'mm not sure about performance, so maybe I - // pub fn find_small_duplicates_by_hashing(mut self){ - // let start_time: SystemTime = SystemTime::now(); - // let size_limit_for_small_files u64 = // 16 MB - // let mut new_hashmap - // - // DuplicateFinder::print_time(start_time, SystemTime::now(), "find_duplicates_by_comparting_begin_bytes_of_file".to_string()); - // } - - pub fn print_time(start_time: SystemTime, end_time: SystemTime, function_name: String) { - println!( - "Execution of function \"{}\" took {:?}", - function_name, - end_time.duration_since(start_time).expect("Time cannot go reverse.") - ); - } - - /// Setting include directories, panics when there is not directories available pub fn set_include_directory(&mut self, mut include_directory: String) { - let start_time: SystemTime = SystemTime::now(); + // let start_time: SystemTime = SystemTime::now(); if include_directory.is_empty() { println!("At least one directory must be provided") @@ -225,7 +121,7 @@ impl DuplicateFinder { process::exit(1); } if !Path::new(&directory).exists() { - println!("Include Directory ERROR: Path {} doens't exists.", directory); + println!("Include Directory ERROR: Path {} doesn't exists.", directory); process::exit(1); } if !Path::new(&directory).exists() { @@ -235,9 +131,9 @@ impl DuplicateFinder { // directory must end with /, due to possiblity of incorrect assumption, that e.g. /home/rafal is top folder to /home/rafalinho if !directory.ends_with('/') { - checked_directories.push(directory + "/"); + checked_directories.push(directory.trim().to_string() + "/"); } else { - checked_directories.push(directory); + checked_directories.push(directory.trim().to_string()); } } @@ -248,11 +144,11 @@ impl DuplicateFinder { self.included_directories = checked_directories; - DuplicateFinder::print_time(start_time, SystemTime::now(), "set_include_directory".to_string()); + //DuplicateFinder::print_time(start_time, SystemTime::now(), "set_include_directory".to_string()); } pub fn set_exclude_directory(&mut self, mut exclude_directory: String) { - let start_time: SystemTime = SystemTime::now(); + //let start_time: SystemTime = SystemTime::now(); if exclude_directory.is_empty() { return; } @@ -278,7 +174,7 @@ impl DuplicateFinder { process::exit(1); } if !Path::new(&directory).exists() { - println!("Exclude Directory ERROR: Path {} doens't exists.", directory); + println!("Exclude Directory ERROR: Path {} doesn't exists.", directory); process::exit(1); } if !Path::new(&directory).exists() { @@ -288,33 +184,258 @@ impl DuplicateFinder { // directory must end with /, due to possiblity of incorrect assumption, that e.g. /home/rafal is top folder to /home/rafalinho if !directory.ends_with('/') { - checked_directories.push(directory + "/"); + checked_directories.push(directory.trim().to_string() + "/"); } else { - checked_directories.push(directory); + checked_directories.push(directory.trim().to_string()); + } + } + println!("{:?}", checked_directories); + self.excluded_directories = checked_directories; + + //DuplicateFinder::print_time(start_time, SystemTime::now(), "set_exclude_directory".to_string()); + } + + // TODO - Still isn't used but it will be probably required with GUI + // pub fn clear(&mut self) { + // + // self.number_of_checked_files = 0; + // self.number_of_checked_folders = 0; + // self.number_of_ignored_things = 0; + // self.number_of_files_which_has_duplicated_entries = 0; + // self.number_of_duplicated_files = 0; + // self.files_sizeclear(); + // self.excluded_directories.clear(); + // self.included_directories.clear(); + // } + fn check_files_size(&mut self) { + // TODO maybe add multithread checking for file hash + let start_time: SystemTime = SystemTime::now(); + let mut folders_to_check: Vec = Vec::with_capacity(1024 * 2); // This should be small enough too not see to big difference and + // big enough to store most of paths without needing to resize vector + + // Add root folders for finding + for id in &self.included_directories { + folders_to_check.push(id.to_string()); + } + + let mut current_folder: String; + let mut next_folder: String; + while !folders_to_check.is_empty() { + current_folder = folders_to_check.pop().unwrap(); + + let read_dir = fs::read_dir(¤t_folder); + let read_dir = match read_dir { + Ok(t) => t, + _ => continue, + }; + for entry in read_dir { + let entry_data = entry.unwrap(); + let metadata: Metadata = entry_data.metadata().unwrap(); + if metadata.is_dir() { + let mut is_excluded_dir = false; + next_folder = "".to_owned() + ¤t_folder + &entry_data.file_name().into_string().unwrap() + "/"; + for ed in &self.excluded_directories { + if next_folder == *ed { + is_excluded_dir = true; + break; + } + } + if !is_excluded_dir { + folders_to_check.push(next_folder); + } + self.number_of_checked_folders += 1; + + //println!("Directory\t - {:?}", next_folder); // DEBUG + } else if metadata.is_file() { + if metadata.len() >= MIN_FILE_SIZE { + let current_file_name = "".to_owned() + ¤t_folder + &entry_data.file_name().into_string().unwrap(); + // println!("File\t\t - {:?}", current_file_name); // DEBUG + //file_to_check + let fe: FileEntry = FileEntry { + path: current_file_name, + size: metadata.len(), + created_date: metadata.created().unwrap(), + modified_date: metadata.modified().unwrap(), + }; + // // self.files_with_identical_size.entry from below should be faster according to clippy + // if !self.files_with_identical_size.contains_key(&metadata.len()) { + // self.files_with_identical_size.insert(metadata.len(), Vec::new()); + // } + self.files_with_identical_size.entry(metadata.len()).or_insert_with(Vec::new); + + self.files_with_identical_size.get_mut(&metadata.len()).unwrap().push(fe); + + self.number_of_checked_files += 1; + } else { + self.number_of_ignored_files += 1; + } + } else { + // Probably this is symbolic links so we are free to ignore this + // println!("Found another type of file {} {:?}","".to_owned() + ¤t_folder + &entry_data.file_name().into_string().unwrap(), metadata) //DEBUG + self.number_of_ignored_things += 1; + } + } + } + self.debug_print(); + DuplicateFinder::print_time(start_time, SystemTime::now(), "check_files_size".to_string()); + //println!("Duration of finding duplicates {:?}", end_time.duration_since(start_time).expect("a")); + } + // pub fn save_results_to_file(&self) {} + + /// Remove files which have unique size + fn remove_files_with_unique_size(&mut self) { + let start_time: SystemTime = SystemTime::now(); + self.debug_print(); + let mut new_hashmap: HashMap> = Default::default(); + + self.number_of_duplicated_files = 0; + + for entry in &self.files_with_identical_size { + if entry.1.len() > 1 { + self.number_of_duplicated_files += entry.1.len() - 1; + new_hashmap.insert(*entry.0, entry.1.clone()); } } - self.excluded_directories = checked_directories; + self.files_with_identical_size = new_hashmap; - DuplicateFinder::print_time(start_time, SystemTime::now(), "set_exclude_directory".to_string()); + self.debug_print(); + DuplicateFinder::print_time(start_time, SystemTime::now(), "remove_files_with_unique_size".to_string()); } - pub fn debug_print(&self) { - println!("---------------DEBUG PRINT---------------"); - println!("Number of all checked files - {}", self.number_of_checked_files); - println!("Number of all checked folders - {}", self.number_of_checked_folders); - println!("Number of all ignored things - {}", self.number_of_ignored_things); - println!("Number of duplicated files - {}", self.number_of_duplicated_files); - println!("Files list - {}", self.files_size.len()); - println!("Excluded directories - {:?}", self.excluded_directories); - println!("Included directories - {:?}", self.included_directories); - println!("-----------------------------------------"); + /// Should be slower than checking in different ways, but still needs to be checked + fn check_files_hash(&mut self) { + let start_time: SystemTime = SystemTime::now(); + let mut file_handler: File; + let mut hashmap_with_hash: HashMap>; + + for entry in &self.files_with_identical_size { + hashmap_with_hash = Default::default(); + + for file_entry in entry.1.iter().enumerate() { + file_handler = match File::open(&file_entry.1.path) { + Ok(t) => t, + Err(_) => { + continue; + } + }; + + let mut hasher: blake3::Hasher = blake3::Hasher::new(); + let mut buffer = [0u8; 16384]; + loop { + let n = file_handler.read(&mut buffer).unwrap(); + if n == 0 { + break; + } + hasher.update(&buffer[..n]); + } + let hash_string: String = hasher.finalize().to_hex().to_string(); + hashmap_with_hash.entry(hash_string.to_string()).or_insert_with(Vec::new); + hashmap_with_hash.get_mut(&*hash_string).unwrap().push(file_entry.1.to_owned()); + } + for hash_entry in hashmap_with_hash { + if hash_entry.1.len() > 1 { + self.files_with_identical_hashes.entry(*entry.0).or_insert_with(Vec::new); + self.files_with_identical_hashes.get_mut(entry.0).unwrap().push(hash_entry.1); + // self.files_with_identical_hashes.insert(*entry.0,hash_entry.1); + } + } + } + self.debug_print(); + DuplicateFinder::print_time(start_time, SystemTime::now(), "check_files_hash".to_string()); + } + // /// I'mm not sure about performance, so maybe I + // pub fn find_small_duplicates_by_hashing(mut self){ + // let start_time: SystemTime = SystemTime::now(); + // let size_limit_for_small_files u64 = // 16 MB + // let mut new_hashmap + // + // DuplicateFinder::print_time(start_time, SystemTime::now(), "find_duplicates_by_comparting_begin_bytes_of_file".to_string()); + // } + + fn print_time(start_time: SystemTime, end_time: SystemTime, function_name: String) { + println!( + "Execution of function \"{}\" took {:?}", + function_name, + end_time.duration_since(start_time).expect("Time cannot go reverse.") + ); + } + + /// Setting include directories, panics when there is not directories available + + + fn debug_print(&self) { + // println!("---------------DEBUG PRINT---------------"); + // println!("Number of all checked files - {}", self.number_of_checked_files); + // println!("Number of all ignored files - {}", self.number_of_ignored_files); + // println!("Number of all checked folders - {}", self.number_of_checked_folders); + // println!("Number of all ignored things - {}", self.number_of_ignored_things); + // println!("Number of duplicated files - {}", self.number_of_duplicated_files); + // let mut file_size : u64 = 0; + // for i in &self.files_with_identical_size{ + // file_size += i.1.len() as u64; + // } + // println!("Files list size - {} ({})", self.files_with_identical_size.len(), file_size); + // let mut hashed_file_size : u64 = 0; + // for i in &self.files_with_identical_hashes{ + // for j in i.1{ + // hashed_file_size += j.len() as u64; + // } + // } + // println!("Hashed Files list size - {} ({})", self.files_with_identical_hashes.len(), hashed_file_size); + // println!("Excluded directories - {:?}", self.excluded_directories); + // println!("Included directories - {:?}", self.included_directories); + // println!("-----------------------------------------"); + } + + #[allow(dead_code)] + fn print_duplicated_entries(&self, check_method: CheckingMethod) { + let start_time: SystemTime = SystemTime::now(); + let mut number_of_files: u64 = 0; + let mut number_of_groups: u64 = 0; + + match check_method { + CheckingMethod::HASH => { + for i in &self.files_with_identical_hashes { + for j in i.1 { + number_of_files += j.len() as u64; + number_of_groups += 1; + } + } + println!("Found {} files in {} groups with same content:", number_of_files, number_of_groups); + for i in &self.files_with_identical_hashes { + println!("Size - {}", i.0); + for j in i.1 { + for k in j { + println!("{}", k.path); + } + println!("----"); + } + println!(); + } + } + CheckingMethod::SIZE => { + for i in &self.files_with_identical_size { + number_of_files += i.1.len() as u64; + number_of_groups += 1; + } + println!("Found {} files in {} groups with same size(may have different content):", number_of_files, number_of_groups); + for i in &self.files_with_identical_size { + println!("Size - {}", i.0); + for j in i.1 { + println!("{}", j.path); + } + println!(); + } + } + } + DuplicateFinder::print_time(start_time, SystemTime::now(), "print_duplicated_entries".to_string()); } /// Remove unused entries when included or excluded overlaps with each other or are duplicated /// ``` /// let df : DuplicateFinder = saf /// ``` - pub fn optimize_directories(&mut self) { + fn optimize_directories(&mut self) { let start_time: SystemTime = SystemTime::now(); let mut optimized_included: Vec = Vec::::new(); diff --git a/src/main.rs b/src/main.rs index fbcf61f..1f8ba22 100644 --- a/src/main.rs +++ b/src/main.rs @@ -18,20 +18,36 @@ fn main() { match arguments[1].as_ref() { "-d" | "-duplicate_finder" => { if arguments.len() != 4 { - println!("Duplicate Finder must be executed with exactly 3 arguments"); + println!("FATAL ERROR: Duplicate Finder must be executed with exactly 3 arguments"); process::exit(1); } let mut df = duplicate::DuplicateFinder::new(); - df.set_exclude_directory(arguments[3].to_string()); - df.set_include_directory(arguments[2].to_string()); - df.optimize_directories(); - df.debug_print(); - df.find_duplicates_by_size(); - df.remove_files_with_unique_size(); - df.find_duplicates_by_hashing(); + df.set_include_directory(arguments[2].clone()); + + if arguments.len() > 3 { + df.set_exclude_directory(arguments[3].clone()); + } + if arguments.len() > 4 { + let min_size = match arguments[4].parse::() { + Ok(t) => t, + Err(_) => { + println!("FATAL ERROR: Cannot parse \"{}\" to u64", arguments[4]); + process::exit(1); + } + }; + df.set_min_file_size(min_size); + } + if arguments.len() > 5 { + df.set_allowed_extensions(arguments[5].clone()); + } + if arguments.len() > 6 { + df.set_excluded_items(arguments[6].clone()); + } + + df.find_duplicates(duplicate::CheckingMethod::SIZE); } - argum => println!("{} argument is not supported, check help for more info.", argum), + argum => println!("\"{}\" argument is not supported, check help for more info.", argum), }; }