Beginning of adding allowed extension support.

2020-08-29 16:12:20 +02:00 · 2020-08-29 16:12:20 +02:00 · 10642548f9
parent ba8121a334
commit 10642548f9
4 changed files with 371 additions and 190 deletions
--- a/Cargo.lock
+++ b/Cargo.lock
@ -1,5 +1,14 @@
 # This file is automatically @generated by Cargo.
 # It is not intended for manual editing.
+[[package]]
+name = "aho-corasick"
+version = "0.7.13"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "043164d8ba5c4c3035fec9bbee8647c0261d788f3474306f93bb65901cae0e86"
+dependencies = [
+ "memchr",
+]
+
 [[package]]
 name = "arrayref"
 version = "0.3.6"
@ -115,6 +124,7 @@ dependencies = [
 "blake3",
 "multimap",
 "rayon",
+ "regex",
 ]

 [[package]]
@ -169,6 +179,12 @@ version = "2.0.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "60302e4db3a61da70c0cb7991976248362f30319e88850c487b9b95bbf059e00"

+[[package]]
+name = "memchr"
+version = "2.3.3"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "3728d817d99e5ac407411fa471ff9800a778d88a24685968b36824eaf4bee400"
+
 [[package]]
 name = "memoffset"
 version = "0.5.5"
@ -222,6 +238,24 @@ dependencies = [
 "num_cpus",
 ]

+[[package]]
+name = "regex"
+version = "1.3.9"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "9c3780fcf44b193bc4d09f36d2a3c87b251da4a046c87795a0d35f4f927ad8e6"
+dependencies = [
+ "aho-corasick",
+ "memchr",
+ "regex-syntax",
+ "thread_local",
+]
+
+[[package]]
+name = "regex-syntax"
+version = "0.6.18"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "26412eb97c6b088a6997e05f69403a802a92d520de2f8e63c2b65f9e0f47c4e8"
+
 [[package]]
 name = "scopeguard"
 version = "1.1.0"
@ -240,6 +274,15 @@ version = "2.2.3"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "502d53007c02d7605a05df1c1a73ee436952781653da5d0bf57ad608f66932c1"

+[[package]]
+name = "thread_local"
+version = "1.0.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "d40c6d1b69745a6ec6fb1ca717914848da4b44ae29d9b3080cbee91d72a69b14"
+dependencies = [
+ "lazy_static",
+]
+
 [[package]]
 name = "typenum"
 version = "1.12.0"
--- a/Cargo.toml
+++ b/Cargo.toml
@ -7,4 +7,5 @@ edition = "2018"
 [dependencies]
 rayon = "1.4.0"
 blake3 = "0.3.6"
-multimap = "0.8.2"
+multimap = "0.8.2"
+regex = "1.3.9"
--- a/src/duplicate.rs
+++ b/src/duplicate.rs
@ -1,204 +1,100 @@
 // Todo, należy upewnić się, że ma wystarczające uprawnienia do odczytu i usuwania
-use std::collections::HashMap;
+use std::collections::{BTreeMap, HashMap};
 use std::fs::{File, Metadata};
-use std::hash::Hash;
 use std::io::prelude::*;
 use std::path::Path;
 use std::time::SystemTime;
 use std::{fs, process};

+const MIN_FILE_SIZE: u64 = 1000;
+
+#[derive(PartialEq)]
+#[allow(dead_code)]
+pub enum CheckingMethod {
+    SIZE,
+    HASH,
+}
+
 pub struct DuplicateFinder {
    number_of_checked_files: usize,
+    number_of_ignored_files: usize,
    number_of_checked_folders: usize,
    number_of_ignored_things: usize,
    number_of_duplicated_files: usize,
-    // files : Vec<HashMap<FileEntry, Vec<FileEntry>>>,
-    files_size: HashMap<u64, Vec<FileEntry>>,
-    // files_hashes: HashMap<[u8],Vec<FileEntry>>,
-    // duplicated_entries // Same as files, but only with 2+ entries
-    // files : Vec<Vec<FileEntry>>,
+    files_with_identical_size: HashMap<u64, Vec<FileEntry>>,
+    files_with_identical_hashes: BTreeMap<u64, Vec<Vec<FileEntry>>>,
+    allowed_extensions: Vec<String>, // jpg, jpeg, mp4
+    // excluded_items: Vec<String>,
    excluded_directories: Vec<String>,
    included_directories: Vec<String>,
-    // ignored_extensions: Vec<String>,
-    // allowed_extensions: Vec<String>,
-    // ignored_file_names: Vec<String>, // TODO Regex Support
-    // allowed_file_names: Vec<String>, // TODO Regex Support
+    min_file_size: u64,
 }

 impl DuplicateFinder {
    pub fn new() -> DuplicateFinder {
        DuplicateFinder {
            number_of_checked_files: 0,
+            number_of_ignored_files: 0,
            number_of_checked_folders: 0,
            number_of_ignored_things: 0,
            number_of_duplicated_files: 0,
-            files_size: Default::default(),
-            // files_hashes: Default::default(),
+            files_with_identical_size: Default::default(),
+            files_with_identical_hashes: Default::default(),
+            // excluded_items: vec![],
            excluded_directories: vec![],
            included_directories: vec![],
-            // ignored_extensions: vec![],
-            // allowed_extensions: vec![],
-            // ignored_file_names: vec![],
-            // allowed_file_names: vec![]
+            min_file_size: 0,
+            allowed_extensions: vec![],
        }
    }
-    // TODO - Still isn't used but it will be probably required with GUI
-    // pub fn clear(&mut self) {
-    //
-    //     self.number_of_checked_files = 0;
-    //     self.number_of_checked_folders = 0;
-    //     self.number_of_ignored_things = 0;
-    //     self.number_of_files_which_has_duplicated_entries = 0;
-    //     self.number_of_duplicated_files = 0;
-    //     self.files_sizeclear();
-    //     self.excluded_directories.clear();
-    //     self.included_directories.clear();
-    // }
-    pub fn find_duplicates_by_size(&mut self) {
-        // TODO add multithread checking for file hash
-        //let mut path;
-        let start_time: SystemTime = SystemTime::now();
-        let mut folders_to_check: Vec<String> = Vec::with_capacity(1024 * 16); // This should be small enough too not see to big difference and big enough to store most of paths without needing to resize vector

-        // Add root folders for finding
-        for id in &self.included_directories {
-            folders_to_check.push(id.to_string());
+    pub fn find_duplicates(mut self, check_method: CheckingMethod) {
+        self.optimize_directories();
+        self.debug_print();
+        self.check_files_size();
+        self.remove_files_with_unique_size();
+        if check_method == CheckingMethod::HASH {
+            self.check_files_hash();
        }
+        // self.print_duplicated_entries(check_method);
+    }

-        let mut current_folder: String;
-        let mut next_folder: String;
-        while !folders_to_check.is_empty() {
-            current_folder = folders_to_check.pop().unwrap();
+    pub fn set_min_file_size(&mut self, min_size: u64) {
+        self.min_file_size = min_size;
+    }

-            let read_dir = fs::read_dir(&current_folder);
-            let read_dir = match read_dir {
-                Ok(t) => t,
-                _ => continue,
-            };
-            for entry in read_dir {
-                let entry_data = entry.unwrap();
-                let metadata: Metadata = entry_data.metadata().unwrap();
-                if metadata.is_dir() {
-                    let mut is_excluded_dir = false;
-                    next_folder = "".to_owned() + &current_folder + &entry_data.file_name().into_string().unwrap() + "/";
-                    for ed in &self.excluded_directories {
-                        if next_folder == ed.to_string() {
-                            is_excluded_dir = true;
-                            break;
-                        }
-                    }
-                    if !is_excluded_dir {
-                        folders_to_check.push(next_folder);
-                    }
-                    self.number_of_checked_folders += 1;
+    pub fn set_excluded_items(&mut self, _excluded_items: String) {
+        // TODO Still don't know how to exactly parse this
+        // Things like /.git/ should be by default hidden with help of this *.git*
+    }
+    pub fn set_allowed_extensions(&mut self, mut allowed_extensions: String) {
+        if allowed_extensions.is_empty() {
+            println!("No allowed extension was provided, so all are allowed");
+        }
+        allowed_extensions = allowed_extensions.replace("IMAGE","jpg,kra,gif,png,bmp,tiff,webp,hdr,svg");
+        allowed_extensions = allowed_extensions.replace("VIDEO","mp4,flv,mkv,webm,vob,ogv,gifv,avi,mov,wmv,mpg,m4v,m4p,mpeg,3gp");
+        allowed_extensions = allowed_extensions.replace("MUSIC", "mp3,flac,ogg,tta,wma,webm");

-                //println!("Directory\t - {:?}", next_folder); // DEBUG
-                } else if metadata.is_file() {
-                    let current_file_name = "".to_owned() + &current_folder + &entry_data.file_name().into_string().unwrap();
-                    // println!("File\t\t - {:?}", current_file_name); // DEBUG
-                    //file_to_check
-                    let fe: FileEntry = FileEntry {
-                        path: current_file_name,
-                        size: metadata.len(),
-                        created_date: metadata.created().unwrap(),
-                        modified_date: metadata.modified().unwrap(),
-                    };
-                    if !self.files_size.contains_key(&metadata.len()) {
-                        self.files_size.insert(metadata.len(), Vec::new());
-                    }
-                    self.files_size.get_mut(&metadata.len()).unwrap().push(fe);
-
-                    self.number_of_checked_files += 1;
-                } else {
-                    // Probably this is symbolic links so we are free to ignore this
-                    // println!("Found another type of file {} {:?}","".to_owned() + &current_folder + &entry_data.file_name().into_string().unwrap(), metadata) //DEBUG
-                    self.number_of_ignored_things += 1;
+        let extensions: Vec<String> = allowed_extensions.split(',').map(String::from).collect();
+        for mut extension in extensions{
+            if extension.contains('.'){
+                if !extension.starts_with('.'){
+                    println!("{} is not valid extension(valid extension doesn't have dot inside)",extension);
                }
+                extension = extension.replace('.',"");
            }
-        }
-        self.debug_print();
-        DuplicateFinder::print_time(start_time, SystemTime::now(), "find_duplicates".to_string());
-        //println!("Duration of finding duplicates {:?}", end_time.duration_since(start_time).expect("a"));
-    }
-    // pub fn save_to_file(&self) {}

-    /// Remove files which have unique size
-    pub fn remove_files_with_unique_size(&mut self) {
-        let start_time: SystemTime = SystemTime::now();
-        self.debug_print();
-        let mut new_hashmap: HashMap<u64, Vec<FileEntry>> = Default::default();

-        self.number_of_duplicated_files = 0;
-
-        for entry in &self.files_size {
-            if entry.1.len() > 1 {
-                self.number_of_duplicated_files += entry.1.len() - 1;
-                new_hashmap.insert(*entry.0, entry.1.clone());
-            }
+            self.allowed_extensions.push(extension.trim().to_string());
        }

-        self.files_size = new_hashmap;
-
-        self.debug_print();
-        DuplicateFinder::print_time(start_time, SystemTime::now(), "optimize_files".to_string());
-    }
-
-    /// Should be slower than checking in different ways, but still needs to be checked
-    pub fn find_duplicates_by_hashing(mut self) {
-        let start_time: SystemTime = SystemTime::now();
-        let mut file_handler: File;
-
-        for entry in self.files_size {
-            let mut hashes: Vec<String> = Vec::new();
-            if entry.1.len() > 5 {
-                println!("{}", entry.1.len());
-            }
-
-            for file_entry in entry.1.iter().enumerate() {
-                file_handler = match File::open(&file_entry.1.path) {
-                    Ok(T) => T,
-                    Err(_) => {
-                        // Removing File may happens,so we should handle this
-                        hashes.push("".to_owned());
-                        continue;
-                    }
-                };
-
-                let mut hasher: blake3::Hasher = blake3::Hasher::new();
-                let mut buffer = [0u8; 16384];
-                loop {
-                    let n = file_handler.read(&mut buffer).unwrap();
-                    if n == 0 {
-                        break;
-                    }
-                    hasher.update(&buffer[..n]);
-                }
-                //println!("{}", hasher.finalize().to_hex().to_string());
-            }
+        if self.allowed_extensions.len() == 0{
+            println!("No valid extensions were provided, so allowing all extensions by default.");
        }

-        DuplicateFinder::print_time(start_time, SystemTime::now(), "find_duplicates_by_hashing".to_string());
    }
-    // /// I'mm not sure about performance, so maybe I
-    // pub fn find_small_duplicates_by_hashing(mut self){
-    //     let start_time: SystemTime = SystemTime::now();
-    //     let size_limit_for_small_files u64 =  // 16 MB
-    //     let mut new_hashmap
-    //
-    //     DuplicateFinder::print_time(start_time, SystemTime::now(), "find_duplicates_by_comparting_begin_bytes_of_file".to_string());
-    // }
-
-    pub fn print_time(start_time: SystemTime, end_time: SystemTime, function_name: String) {
-        println!(
-            "Execution of function \"{}\" took {:?}",
-            function_name,
-            end_time.duration_since(start_time).expect("Time cannot go reverse.")
-        );
-    }
-
-    /// Setting include directories, panics when there is not directories available
    pub fn set_include_directory(&mut self, mut include_directory: String) {
-        let start_time: SystemTime = SystemTime::now();
+        // let start_time: SystemTime = SystemTime::now();

        if include_directory.is_empty() {
            println!("At least one directory must be provided")
@ -225,7 +121,7 @@ impl DuplicateFinder {
                process::exit(1);
            }
            if !Path::new(&directory).exists() {
-                println!("Include Directory ERROR: Path {} doens't exists.", directory);
+                println!("Include Directory ERROR: Path {} doesn't exists.", directory);
                process::exit(1);
            }
            if !Path::new(&directory).exists() {
@ -235,9 +131,9 @@ impl DuplicateFinder {

            // directory must end with /, due to possiblity of incorrect assumption, that e.g. /home/rafal is top folder to /home/rafalinho
            if !directory.ends_with('/') {
-                checked_directories.push(directory + "/");
+                checked_directories.push(directory.trim().to_string() + "/");
            } else {
-                checked_directories.push(directory);
+                checked_directories.push(directory.trim().to_string());
            }
        }

@ -248,11 +144,11 @@ impl DuplicateFinder {

        self.included_directories = checked_directories;

-        DuplicateFinder::print_time(start_time, SystemTime::now(), "set_include_directory".to_string());
+        //DuplicateFinder::print_time(start_time, SystemTime::now(), "set_include_directory".to_string());
    }

    pub fn set_exclude_directory(&mut self, mut exclude_directory: String) {
-        let start_time: SystemTime = SystemTime::now();
+        //let start_time: SystemTime = SystemTime::now();
        if exclude_directory.is_empty() {
            return;
        }
@ -278,7 +174,7 @@ impl DuplicateFinder {
                process::exit(1);
            }
            if !Path::new(&directory).exists() {
-                println!("Exclude Directory ERROR: Path {} doens't exists.", directory);
+                println!("Exclude Directory ERROR: Path {} doesn't exists.", directory);
                process::exit(1);
            }
            if !Path::new(&directory).exists() {
@ -288,33 +184,258 @@ impl DuplicateFinder {

            // directory must end with /, due to possiblity of incorrect assumption, that e.g. /home/rafal is top folder to /home/rafalinho
            if !directory.ends_with('/') {
-                checked_directories.push(directory + "/");
+                checked_directories.push(directory.trim().to_string() + "/");
            } else {
-                checked_directories.push(directory);
+                checked_directories.push(directory.trim().to_string());
+            }
+        }
+        println!("{:?}", checked_directories);
+        self.excluded_directories = checked_directories;
+
+        //DuplicateFinder::print_time(start_time, SystemTime::now(), "set_exclude_directory".to_string());
+    }
+
+    // TODO - Still isn't used but it will be probably required with GUI
+    // pub fn clear(&mut self) {
+    //
+    //     self.number_of_checked_files = 0;
+    //     self.number_of_checked_folders = 0;
+    //     self.number_of_ignored_things = 0;
+    //     self.number_of_files_which_has_duplicated_entries = 0;
+    //     self.number_of_duplicated_files = 0;
+    //     self.files_sizeclear();
+    //     self.excluded_directories.clear();
+    //     self.included_directories.clear();
+    // }
+    fn check_files_size(&mut self) {
+        // TODO maybe add multithread checking for file hash
+        let start_time: SystemTime = SystemTime::now();
+        let mut folders_to_check: Vec<String> = Vec::with_capacity(1024 * 2); // This should be small enough too not see to big difference and
+                                                                              // big enough to store most of paths without needing to resize vector
+
+        // Add root folders for finding
+        for id in &self.included_directories {
+            folders_to_check.push(id.to_string());
+        }
+
+        let mut current_folder: String;
+        let mut next_folder: String;
+        while !folders_to_check.is_empty() {
+            current_folder = folders_to_check.pop().unwrap();
+
+            let read_dir = fs::read_dir(&current_folder);
+            let read_dir = match read_dir {
+                Ok(t) => t,
+                _ => continue,
+            };
+            for entry in read_dir {
+                let entry_data = entry.unwrap();
+                let metadata: Metadata = entry_data.metadata().unwrap();
+                if metadata.is_dir() {
+                    let mut is_excluded_dir = false;
+                    next_folder = "".to_owned() + &current_folder + &entry_data.file_name().into_string().unwrap() + "/";
+                    for ed in &self.excluded_directories {
+                        if next_folder == *ed {
+                            is_excluded_dir = true;
+                            break;
+                        }
+                    }
+                    if !is_excluded_dir {
+                        folders_to_check.push(next_folder);
+                    }
+                    self.number_of_checked_folders += 1;
+
+                //println!("Directory\t - {:?}", next_folder); // DEBUG
+                } else if metadata.is_file() {
+                    if metadata.len() >= MIN_FILE_SIZE {
+                        let current_file_name = "".to_owned() + &current_folder + &entry_data.file_name().into_string().unwrap();
+                        // println!("File\t\t - {:?}", current_file_name); // DEBUG
+                        //file_to_check
+                        let fe: FileEntry = FileEntry {
+                            path: current_file_name,
+                            size: metadata.len(),
+                            created_date: metadata.created().unwrap(),
+                            modified_date: metadata.modified().unwrap(),
+                        };
+                        // // self.files_with_identical_size.entry from below should be faster according to clippy
+                        // if !self.files_with_identical_size.contains_key(&metadata.len()) {
+                        //     self.files_with_identical_size.insert(metadata.len(), Vec::new());
+                        // }
+                        self.files_with_identical_size.entry(metadata.len()).or_insert_with(Vec::new);
+
+                        self.files_with_identical_size.get_mut(&metadata.len()).unwrap().push(fe);
+
+                        self.number_of_checked_files += 1;
+                    } else {
+                        self.number_of_ignored_files += 1;
+                    }
+                } else {
+                    // Probably this is symbolic links so we are free to ignore this
+                    // println!("Found another type of file {} {:?}","".to_owned() + &current_folder + &entry_data.file_name().into_string().unwrap(), metadata) //DEBUG
+                    self.number_of_ignored_things += 1;
+                }
+            }
+        }
+        self.debug_print();
+        DuplicateFinder::print_time(start_time, SystemTime::now(), "check_files_size".to_string());
+        //println!("Duration of finding duplicates {:?}", end_time.duration_since(start_time).expect("a"));
+    }
+    // pub fn save_results_to_file(&self) {}
+
+    /// Remove files which have unique size
+    fn remove_files_with_unique_size(&mut self) {
+        let start_time: SystemTime = SystemTime::now();
+        self.debug_print();
+        let mut new_hashmap: HashMap<u64, Vec<FileEntry>> = Default::default();
+
+        self.number_of_duplicated_files = 0;
+
+        for entry in &self.files_with_identical_size {
+            if entry.1.len() > 1 {
+                self.number_of_duplicated_files += entry.1.len() - 1;
+                new_hashmap.insert(*entry.0, entry.1.clone());
            }
        }

-        self.excluded_directories = checked_directories;
+        self.files_with_identical_size = new_hashmap;

-        DuplicateFinder::print_time(start_time, SystemTime::now(), "set_exclude_directory".to_string());
+        self.debug_print();
+        DuplicateFinder::print_time(start_time, SystemTime::now(), "remove_files_with_unique_size".to_string());
    }

-    pub fn debug_print(&self) {
-        println!("---------------DEBUG PRINT---------------");
-        println!("Number of all checked files - {}", self.number_of_checked_files);
-        println!("Number of all checked folders - {}", self.number_of_checked_folders);
-        println!("Number of all ignored things - {}", self.number_of_ignored_things);
-        println!("Number of duplicated files - {}", self.number_of_duplicated_files);
-        println!("Files list - {}", self.files_size.len());
-        println!("Excluded directories - {:?}", self.excluded_directories);
-        println!("Included directories - {:?}", self.included_directories);
-        println!("-----------------------------------------");
+    /// Should be slower than checking in different ways, but still needs to be checked
+    fn check_files_hash(&mut self) {
+        let start_time: SystemTime = SystemTime::now();
+        let mut file_handler: File;
+        let mut hashmap_with_hash: HashMap<String, Vec<FileEntry>>;
+
+        for entry in &self.files_with_identical_size {
+            hashmap_with_hash = Default::default();
+
+            for file_entry in entry.1.iter().enumerate() {
+                file_handler = match File::open(&file_entry.1.path) {
+                    Ok(t) => t,
+                    Err(_) => {
+                        continue;
+                    }
+                };
+
+                let mut hasher: blake3::Hasher = blake3::Hasher::new();
+                let mut buffer = [0u8; 16384];
+                loop {
+                    let n = file_handler.read(&mut buffer).unwrap();
+                    if n == 0 {
+                        break;
+                    }
+                    hasher.update(&buffer[..n]);
+                }
+                let hash_string: String = hasher.finalize().to_hex().to_string();
+                hashmap_with_hash.entry(hash_string.to_string()).or_insert_with(Vec::new);
+                hashmap_with_hash.get_mut(&*hash_string).unwrap().push(file_entry.1.to_owned());
+            }
+            for hash_entry in hashmap_with_hash {
+                if hash_entry.1.len() > 1 {
+                    self.files_with_identical_hashes.entry(*entry.0).or_insert_with(Vec::new);
+                    self.files_with_identical_hashes.get_mut(entry.0).unwrap().push(hash_entry.1);
+                    // self.files_with_identical_hashes.insert(*entry.0,hash_entry.1);
+                }
+            }
+        }
+        self.debug_print();
+        DuplicateFinder::print_time(start_time, SystemTime::now(), "check_files_hash".to_string());
+    }
+    // /// I'mm not sure about performance, so maybe I
+    // pub fn find_small_duplicates_by_hashing(mut self){
+    //     let start_time: SystemTime = SystemTime::now();
+    //     let size_limit_for_small_files u64 =  // 16 MB
+    //     let mut new_hashmap
+    //
+    //     DuplicateFinder::print_time(start_time, SystemTime::now(), "find_duplicates_by_comparting_begin_bytes_of_file".to_string());
+    // }
+
+    fn print_time(start_time: SystemTime, end_time: SystemTime, function_name: String) {
+        println!(
+            "Execution of function \"{}\" took {:?}",
+            function_name,
+            end_time.duration_since(start_time).expect("Time cannot go reverse.")
+        );
+    }
+
+    /// Setting include directories, panics when there is not directories available
+
+
+    fn debug_print(&self) {
+        // println!("---------------DEBUG PRINT---------------");
+        // println!("Number of all checked files - {}", self.number_of_checked_files);
+        // println!("Number of all ignored files - {}", self.number_of_ignored_files);
+        // println!("Number of all checked folders - {}", self.number_of_checked_folders);
+        // println!("Number of all ignored things - {}", self.number_of_ignored_things);
+        // println!("Number of duplicated files - {}", self.number_of_duplicated_files);
+        // let mut file_size : u64 = 0;
+        // for i in &self.files_with_identical_size{
+        //     file_size += i.1.len() as u64;
+        // }
+        // println!("Files list size - {} ({})", self.files_with_identical_size.len(), file_size);
+        // let mut hashed_file_size : u64 = 0;
+        // for i in &self.files_with_identical_hashes{
+        //     for j in i.1{
+        //         hashed_file_size += j.len() as u64;
+        //     }
+        // }
+        // println!("Hashed Files list size - {} ({})", self.files_with_identical_hashes.len(), hashed_file_size);
+        // println!("Excluded directories - {:?}", self.excluded_directories);
+        // println!("Included directories - {:?}", self.included_directories);
+        // println!("-----------------------------------------");
+    }
+
+    #[allow(dead_code)]
+    fn print_duplicated_entries(&self, check_method: CheckingMethod) {
+        let start_time: SystemTime = SystemTime::now();
+        let mut number_of_files: u64 = 0;
+        let mut number_of_groups: u64 = 0;
+
+        match check_method {
+            CheckingMethod::HASH => {
+                for i in &self.files_with_identical_hashes {
+                    for j in i.1 {
+                        number_of_files += j.len() as u64;
+                        number_of_groups += 1;
+                    }
+                }
+                println!("Found {} files in {} groups with same content:", number_of_files, number_of_groups);
+                for i in &self.files_with_identical_hashes {
+                    println!("Size - {}", i.0);
+                    for j in i.1 {
+                        for k in j {
+                            println!("{}", k.path);
+                        }
+                        println!("----");
+                    }
+                    println!();
+                }
+            }
+            CheckingMethod::SIZE => {
+                for i in &self.files_with_identical_size {
+                    number_of_files += i.1.len() as u64;
+                    number_of_groups += 1;
+                }
+                println!("Found {} files in {} groups with same size(may have different content):", number_of_files, number_of_groups);
+                for i in &self.files_with_identical_size {
+                    println!("Size - {}", i.0);
+                    for j in i.1 {
+                        println!("{}", j.path);
+                    }
+                    println!();
+                }
+            }
+        }
+        DuplicateFinder::print_time(start_time, SystemTime::now(), "print_duplicated_entries".to_string());
    }
    /// Remove unused entries when included or excluded overlaps with each other or are duplicated
    /// ```
    /// let df : DuplicateFinder = saf
    /// ```
-    pub fn optimize_directories(&mut self) {
+    fn optimize_directories(&mut self) {
        let start_time: SystemTime = SystemTime::now();

        let mut optimized_included: Vec<String> = Vec::<String>::new();
--- a/src/main.rs
+++ b/src/main.rs
@ -18,20 +18,36 @@ fn main() {
    match arguments[1].as_ref() {
        "-d" | "-duplicate_finder" => {
            if arguments.len() != 4 {
-                println!("Duplicate Finder must be executed with exactly 3 arguments");
+                println!("FATAL ERROR: Duplicate Finder must be executed with exactly 3 arguments");
                process::exit(1);
            }

            let mut df = duplicate::DuplicateFinder::new();
-            df.set_exclude_directory(arguments[3].to_string());
-            df.set_include_directory(arguments[2].to_string());
-            df.optimize_directories();
-            df.debug_print();
-            df.find_duplicates_by_size();
-            df.remove_files_with_unique_size();
-            df.find_duplicates_by_hashing();
+            df.set_include_directory(arguments[2].clone());
+
+            if arguments.len() > 3 {
+                df.set_exclude_directory(arguments[3].clone());
+            }
+            if arguments.len() > 4 {
+                let min_size = match arguments[4].parse::<u64>() {
+                    Ok(t) => t,
+                    Err(_) => {
+                        println!("FATAL ERROR: Cannot parse \"{}\" to u64", arguments[4]);
+                        process::exit(1);
+                    }
+                };
+                df.set_min_file_size(min_size);
+            }
+            if arguments.len() > 5 {
+                df.set_allowed_extensions(arguments[5].clone());
+            }
+            if arguments.len() > 6 {
+                df.set_excluded_items(arguments[6].clone());
+            }
+
+            df.find_duplicates(duplicate::CheckingMethod::SIZE);
        }
-        argum => println!("{} argument is not supported, check help for more info.", argum),
+        argum => println!("\"{}\" argument is not supported, check help for more info.", argum),
    };
 }