From 752ee0d7c755821f6fac43abce71d2a0efc198ca Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Rafa=C5=82=20Mikrut?= Date: Thu, 27 Aug 2020 18:57:56 +0200 Subject: [PATCH] Added simple files adding to HashMap --- .rustfmt.toml | 2 +- src/duplicate.rs | 157 +++++++++++++++++++++++++++++++++++++++++++---- src/main.rs | 1 + 3 files changed, 146 insertions(+), 14 deletions(-) diff --git a/.rustfmt.toml b/.rustfmt.toml index 59b0665..68405c5 100644 --- a/.rustfmt.toml +++ b/.rustfmt.toml @@ -1,4 +1,4 @@ newline_style = "Unix" -max_width = 120 +max_width = 200 remove_nested_parens = true diff --git a/src/duplicate.rs b/src/duplicate.rs index 6fd191d..778e651 100644 --- a/src/duplicate.rs +++ b/src/duplicate.rs @@ -1,13 +1,20 @@ // Todo, należy upewnić się, że ma wystarczające uprawnienia do odczytu i usuwania use std::collections::HashMap; -use std::process; +use std::fs::Metadata; +use std::path::Path; +use std::time::SystemTime; +use std::{fs, process}; pub struct DuplicateFinder { number_of_checked_files: u64, + number_of_checked_folders: u64, + number_of_ignored_things: u64, number_of_files_which_has_duplicated_entries: u64, number_of_duplicated_files: u64, // files : Vec>>, - files: HashMap>, + files: HashMap, + files_with_duplicated_entries: HashMap, + // duplicated_entries // Same as files, but only with 2+ entries // files : Vec>, excluded_directories: Vec, included_directories: Vec, @@ -17,22 +24,88 @@ impl DuplicateFinder { pub fn new() -> DuplicateFinder { DuplicateFinder { number_of_checked_files: 0, + number_of_checked_folders: 0, + number_of_ignored_things: 0, number_of_files_which_has_duplicated_entries: 0, number_of_duplicated_files: 0, files: Default::default(), + files_with_duplicated_entries: Default::default(), excluded_directories: vec![], included_directories: vec![], } } // pub fn clear(&mut self) { // self.number_of_checked_files = 0; + // self.number_of_checked_folders = 0; + // self.number_of_ignored_things = 0; // self.number_of_files_which_has_duplicated_entries = 0; // self.number_of_duplicated_files = 0; // self.files.clear(); // self.excluded_directories.clear(); // self.included_directories.clear(); // } - // pub fn find_duplicates(&mut self) {} + pub fn find_duplicates(&mut self) { + //let mut path; + let start_time: SystemTime = SystemTime::now(); + let mut folders_to_check: Vec = Vec::with_capacity(1024 * 16); // This should be small enough too not see to big difference and big enough to store most of paths without needing to resize vector + + // Add root folders for finding + for id in &self.included_directories { + folders_to_check.push(id.to_string()); + } + + let mut current_folder: String; + let mut next_folder: String; + while !folders_to_check.is_empty() { + current_folder = folders_to_check.pop().unwrap(); + + let read_dir = fs::read_dir(¤t_folder); + let read_dir = match read_dir { + Ok(t) => t, + _ => continue, + }; + for entry in read_dir { + let entry_data = entry.unwrap(); + let metadata: Metadata = entry_data.metadata().unwrap(); + if metadata.is_dir() { + let mut is_excluded_dir = false; + next_folder = "".to_owned() + ¤t_folder + &entry_data.file_name().into_string().unwrap() + "/"; + for ed in &self.excluded_directories { + if next_folder == ed.to_string() { + is_excluded_dir = true; + break; + } + } + if !is_excluded_dir { + folders_to_check.push(next_folder); + } + self.number_of_checked_folders += 1; + + //println!("Directory\t - {:?}", next_folder); // DEBUG + } else if metadata.is_file() { + let current_file_name = "".to_owned() + ¤t_folder + &entry_data.file_name().into_string().unwrap(); + //file_to_check + let fe: FileEntry = FileEntry { + path: current_file_name, + size: metadata.len(), + created_date: metadata.created().unwrap(), + modified_date: metadata.modified().unwrap(), + }; + self.files.insert(metadata.len(), fe); + + self.number_of_checked_files += 1; + // println!("File\t\t - {:?}", current_file); // DEBUG + } else { + // Probably this is symbolic links so we are free to ignore this + // println!("Found another type of file {} {:?}","".to_owned() + ¤t_folder + &entry_data.file_name().into_string().unwrap(), metadata) //DEBUG + self.number_of_ignored_things += 1; + } + } + } + self.debug_print(); + let end_time: SystemTime = SystemTime::now(); + println!("Duration of finding duplicates {:?}", end_time.duration_since(start_time).expect("a")); + } // pub fn save_to_file(&self) {} /// Setting include directories, panics when there is not directories available @@ -61,6 +134,14 @@ impl DuplicateFinder { println!("Include Directory ERROR: Relative path are not supported."); process::exit(1); } + if !Path::new(&directory).exists() { + println!("Include Directory ERROR: Path {} doens't exists.", directory); + process::exit(1); + } + if !Path::new(&directory).exists() { + println!("Include Directory ERROR: {} isn't folder.", directory); + process::exit(1); + } // directory must end with /, due to possiblity of incorrect assumption, that e.g. /home/rafal is top folder to /home/rafalinho if !directory.ends_with("/") { @@ -105,6 +186,14 @@ impl DuplicateFinder { println!("Exclude Directory ERROR: Relative path are not supported."); process::exit(1); } + if !Path::new(&directory).exists() { + println!("Exclude Directory ERROR: Path {} doens't exists.", directory); + process::exit(1); + } + if !Path::new(&directory).exists() { + println!("Exclude Directory ERROR: {} isn't folder.", directory); + process::exit(1); + } // directory must end with /, due to possiblity of incorrect assumption, that e.g. /home/rafal is top folder to /home/rafalinho if !directory.ends_with("/") { @@ -122,10 +211,9 @@ impl DuplicateFinder { pub fn debug_print(&self) { println!("---------------DEBUG PRINT---------------"); println!("Number of all checked files - {}", self.number_of_checked_files); - println!( - "Number of all files with duplicates - {}", - self.number_of_files_which_has_duplicated_entries - ); + println!("Number of all checked folders - {}", self.number_of_checked_folders); + println!("Number of all ignored things - {}", self.number_of_ignored_things); + println!("Number of all files with duplicates - {}", self.number_of_files_which_has_duplicated_entries); println!("Number of duplicated files - {}", self.number_of_duplicated_files); println!("Files list - {}", self.files.len()); println!("Excluded directories - {:?}", self.excluded_directories); @@ -186,19 +274,60 @@ impl DuplicateFinder { self.included_directories = optimized_included; optimized_included = Vec::::new(); self.excluded_directories = optimized_excluded; - // optimized_excluded = Vec::::new(); + optimized_excluded = Vec::::new(); // Remove include directories which are inside any exclude directory - for ed in &self.excluded_directories { - for id in &self.included_directories { + for id in &self.included_directories { + let mut is_inside: bool = false; + for ed in &self.excluded_directories { if id.starts_with(ed) { - continue; + is_inside = true; + break; } + } + if !is_inside { optimized_included.push(id.to_string()); } } + self.included_directories = optimized_included; + optimized_included = Vec::::new(); + + // Remove non existed directories + for id in &self.included_directories { + let path = Path::new(id); + if path.exists() { + optimized_included.push(id.to_string()); + } + } + + for ed in &self.excluded_directories { + let path = Path::new(ed); + if path.exists() { + optimized_excluded.push(ed.to_string()); + } + } + self.included_directories = optimized_included; // optimized_included = Vec::::new(); + self.excluded_directories = optimized_excluded; + optimized_excluded = Vec::::new(); + + // Excluded paths must are inside include path, because TODO + for ed in &self.excluded_directories { + let mut is_inside: bool = false; + for id in &self.included_directories { + if ed.starts_with(id) { + is_inside = true; + break; + } + } + if is_inside { + optimized_excluded.push(ed.to_string()); + } + } + + self.excluded_directories = optimized_excluded; + // optimized_excluded = Vec::::new(); if self.included_directories.len() == 0 { println!("Optimize Directories ERROR: Excluded directories overlaps all included directories."); @@ -212,6 +341,8 @@ impl DuplicateFinder { } struct FileEntry { - file_path: String, - file_size: u64, + pub path: String, + pub size: u64, + pub created_date: SystemTime, + pub modified_date: SystemTime, } diff --git a/src/main.rs b/src/main.rs index c3b06e1..c40d998 100644 --- a/src/main.rs +++ b/src/main.rs @@ -27,6 +27,7 @@ fn main() { df.set_include_directory(arguments[2].to_string()); df.optimize_directories(); df.debug_print(); + df.find_duplicates(); } argum => println!("{} argument is not supported, check help for more info.", argum), };