Added basic hashes
This commit is contained in:
parent
752ee0d7c7
commit
ba8121a334
248
Cargo.lock
generated
248
Cargo.lock
generated
|
@ -1,5 +1,253 @@
|
||||||
# This file is automatically @generated by Cargo.
|
# This file is automatically @generated by Cargo.
|
||||||
# It is not intended for manual editing.
|
# It is not intended for manual editing.
|
||||||
|
[[package]]
|
||||||
|
name = "arrayref"
|
||||||
|
version = "0.3.6"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "a4c527152e37cf757a3f78aae5a06fbeefdb07ccc535c980a3208ee3060dd544"
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "arrayvec"
|
||||||
|
version = "0.5.1"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "cff77d8686867eceff3105329d4698d96c2391c176d5d03adc90c7389162b5b8"
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "autocfg"
|
||||||
|
version = "1.0.1"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "cdb031dd78e28731d87d56cc8ffef4a8f36ca26c38fe2de700543e627f8a464a"
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "blake3"
|
||||||
|
version = "0.3.6"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "ce4f9586c9a3151c4b49b19e82ba163dd073614dd057e53c969e1a4db5b52720"
|
||||||
|
dependencies = [
|
||||||
|
"arrayref",
|
||||||
|
"arrayvec",
|
||||||
|
"cc",
|
||||||
|
"cfg-if",
|
||||||
|
"constant_time_eq",
|
||||||
|
"crypto-mac",
|
||||||
|
"digest",
|
||||||
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "cc"
|
||||||
|
version = "1.0.59"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "66120af515773fb005778dc07c261bd201ec8ce50bd6e7144c927753fe013381"
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "cfg-if"
|
||||||
|
version = "0.1.10"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "4785bdd1c96b2a846b2bd7cc02e86b6b3dbf14e7e53446c4f54c92a361040822"
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "constant_time_eq"
|
||||||
|
version = "0.1.5"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "245097e9a4535ee1e3e3931fcfcd55a796a44c643e8596ff6566d68f09b87bbc"
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "crossbeam-channel"
|
||||||
|
version = "0.4.3"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "09ee0cc8804d5393478d743b035099520087a5186f3b93fa58cec08fa62407b6"
|
||||||
|
dependencies = [
|
||||||
|
"cfg-if",
|
||||||
|
"crossbeam-utils",
|
||||||
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "crossbeam-deque"
|
||||||
|
version = "0.7.3"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "9f02af974daeee82218205558e51ec8768b48cf524bd01d550abe5573a608285"
|
||||||
|
dependencies = [
|
||||||
|
"crossbeam-epoch",
|
||||||
|
"crossbeam-utils",
|
||||||
|
"maybe-uninit",
|
||||||
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "crossbeam-epoch"
|
||||||
|
version = "0.8.2"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "058ed274caafc1f60c4997b5fc07bf7dc7cca454af7c6e81edffe5f33f70dace"
|
||||||
|
dependencies = [
|
||||||
|
"autocfg",
|
||||||
|
"cfg-if",
|
||||||
|
"crossbeam-utils",
|
||||||
|
"lazy_static",
|
||||||
|
"maybe-uninit",
|
||||||
|
"memoffset",
|
||||||
|
"scopeguard",
|
||||||
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "crossbeam-utils"
|
||||||
|
version = "0.7.2"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "c3c7c73a2d1e9fc0886a08b93e98eb643461230d5f1925e4036204d5f2e261a8"
|
||||||
|
dependencies = [
|
||||||
|
"autocfg",
|
||||||
|
"cfg-if",
|
||||||
|
"lazy_static",
|
||||||
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "crypto-mac"
|
||||||
|
version = "0.8.0"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "b584a330336237c1eecd3e94266efb216c56ed91225d634cb2991c5f3fd1aeab"
|
||||||
|
dependencies = [
|
||||||
|
"generic-array",
|
||||||
|
"subtle",
|
||||||
|
]
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "czkawka"
|
name = "czkawka"
|
||||||
version = "0.1.0"
|
version = "0.1.0"
|
||||||
|
dependencies = [
|
||||||
|
"blake3",
|
||||||
|
"multimap",
|
||||||
|
"rayon",
|
||||||
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "digest"
|
||||||
|
version = "0.9.0"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "d3dd60d1080a57a05ab032377049e0591415d2b31afd7028356dbf3cc6dcb066"
|
||||||
|
dependencies = [
|
||||||
|
"generic-array",
|
||||||
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "either"
|
||||||
|
version = "1.6.0"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "cd56b59865bce947ac5958779cfa508f6c3b9497cc762b7e24a12d11ccde2c4f"
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "generic-array"
|
||||||
|
version = "0.14.4"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "501466ecc8a30d1d3b7fc9229b122b2ce8ed6e9d9223f1138d4babb253e51817"
|
||||||
|
dependencies = [
|
||||||
|
"typenum",
|
||||||
|
"version_check",
|
||||||
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "hermit-abi"
|
||||||
|
version = "0.1.15"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "3deed196b6e7f9e44a2ae8d94225d80302d81208b1bb673fd21fe634645c85a9"
|
||||||
|
dependencies = [
|
||||||
|
"libc",
|
||||||
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "lazy_static"
|
||||||
|
version = "1.4.0"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "e2abad23fbc42b3700f2f279844dc832adb2b2eb069b2df918f455c4e18cc646"
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "libc"
|
||||||
|
version = "0.2.76"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "755456fae044e6fa1ebbbd1b3e902ae19e73097ed4ed87bb79934a867c007bc3"
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "maybe-uninit"
|
||||||
|
version = "2.0.0"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "60302e4db3a61da70c0cb7991976248362f30319e88850c487b9b95bbf059e00"
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "memoffset"
|
||||||
|
version = "0.5.5"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "c198b026e1bbf08a937e94c6c60f9ec4a2267f5b0d2eec9c1b21b061ce2be55f"
|
||||||
|
dependencies = [
|
||||||
|
"autocfg",
|
||||||
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "multimap"
|
||||||
|
version = "0.8.2"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "1255076139a83bb467426e7f8d0134968a8118844faa755985e077cf31850333"
|
||||||
|
dependencies = [
|
||||||
|
"serde",
|
||||||
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "num_cpus"
|
||||||
|
version = "1.13.0"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "05499f3756671c15885fee9034446956fff3f243d6077b91e5767df161f766b3"
|
||||||
|
dependencies = [
|
||||||
|
"hermit-abi",
|
||||||
|
"libc",
|
||||||
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "rayon"
|
||||||
|
version = "1.4.0"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "cfd016f0c045ad38b5251be2c9c0ab806917f82da4d36b2a327e5166adad9270"
|
||||||
|
dependencies = [
|
||||||
|
"autocfg",
|
||||||
|
"crossbeam-deque",
|
||||||
|
"either",
|
||||||
|
"rayon-core",
|
||||||
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "rayon-core"
|
||||||
|
version = "1.8.0"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "91739a34c4355b5434ce54c9086c5895604a9c278586d1f1aa95e04f66b525a0"
|
||||||
|
dependencies = [
|
||||||
|
"crossbeam-channel",
|
||||||
|
"crossbeam-deque",
|
||||||
|
"crossbeam-utils",
|
||||||
|
"lazy_static",
|
||||||
|
"num_cpus",
|
||||||
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "scopeguard"
|
||||||
|
version = "1.1.0"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "d29ab0c6d3fc0ee92fe66e2d99f700eab17a8d57d1c1d3b748380fb20baa78cd"
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "serde"
|
||||||
|
version = "1.0.115"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "e54c9a88f2da7238af84b5101443f0c0d0a3bbdc455e34a5c9497b1903ed55d5"
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "subtle"
|
||||||
|
version = "2.2.3"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "502d53007c02d7605a05df1c1a73ee436952781653da5d0bf57ad608f66932c1"
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "typenum"
|
||||||
|
version = "1.12.0"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "373c8a200f9e67a0c95e62a4f52fbf80c23b4381c05a17845531982fa99e6b33"
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "version_check"
|
||||||
|
version = "0.9.2"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "b5a972e5669d67ba988ce3dc826706fb0a8b01471c088cb0b6110b805cc36aed"
|
||||||
|
|
|
@ -1,9 +1,10 @@
|
||||||
[package]
|
[package]
|
||||||
name = "czkawka"
|
name = "czkawka"
|
||||||
version = "0.1.0"
|
version = "0.1.0"
|
||||||
authors = ["Rafał Mikrut <mikrutrafal54@gmail.com>"]
|
authors = ["Rafał Mikrut <mikrutrafal@protonmail.com>"]
|
||||||
edition = "2018"
|
edition = "2018"
|
||||||
|
|
||||||
# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html
|
|
||||||
|
|
||||||
[dependencies]
|
[dependencies]
|
||||||
|
rayon = "1.4.0"
|
||||||
|
blake3 = "0.3.6"
|
||||||
|
multimap = "0.8.2"
|
178
src/duplicate.rs
178
src/duplicate.rs
|
@ -1,23 +1,28 @@
|
||||||
// Todo, należy upewnić się, że ma wystarczające uprawnienia do odczytu i usuwania
|
// Todo, należy upewnić się, że ma wystarczające uprawnienia do odczytu i usuwania
|
||||||
use std::collections::HashMap;
|
use std::collections::HashMap;
|
||||||
use std::fs::Metadata;
|
use std::fs::{File, Metadata};
|
||||||
|
use std::hash::Hash;
|
||||||
|
use std::io::prelude::*;
|
||||||
use std::path::Path;
|
use std::path::Path;
|
||||||
use std::time::SystemTime;
|
use std::time::SystemTime;
|
||||||
use std::{fs, process};
|
use std::{fs, process};
|
||||||
|
|
||||||
pub struct DuplicateFinder {
|
pub struct DuplicateFinder {
|
||||||
number_of_checked_files: u64,
|
number_of_checked_files: usize,
|
||||||
number_of_checked_folders: u64,
|
number_of_checked_folders: usize,
|
||||||
number_of_ignored_things: u64,
|
number_of_ignored_things: usize,
|
||||||
number_of_files_which_has_duplicated_entries: u64,
|
number_of_duplicated_files: usize,
|
||||||
number_of_duplicated_files: u64,
|
|
||||||
// files : Vec<HashMap<FileEntry, Vec<FileEntry>>>,
|
// files : Vec<HashMap<FileEntry, Vec<FileEntry>>>,
|
||||||
files: HashMap<u64, FileEntry>,
|
files_size: HashMap<u64, Vec<FileEntry>>,
|
||||||
files_with_duplicated_entries: HashMap<u64, FileEntry>,
|
// files_hashes: HashMap<[u8],Vec<FileEntry>>,
|
||||||
// duplicated_entries // Same as files, but only with 2+ entries
|
// duplicated_entries // Same as files, but only with 2+ entries
|
||||||
// files : Vec<Vec<FileEntry>>,
|
// files : Vec<Vec<FileEntry>>,
|
||||||
excluded_directories: Vec<String>,
|
excluded_directories: Vec<String>,
|
||||||
included_directories: Vec<String>,
|
included_directories: Vec<String>,
|
||||||
|
// ignored_extensions: Vec<String>,
|
||||||
|
// allowed_extensions: Vec<String>,
|
||||||
|
// ignored_file_names: Vec<String>, // TODO Regex Support
|
||||||
|
// allowed_file_names: Vec<String>, // TODO Regex Support
|
||||||
}
|
}
|
||||||
|
|
||||||
impl DuplicateFinder {
|
impl DuplicateFinder {
|
||||||
|
@ -26,25 +31,31 @@ impl DuplicateFinder {
|
||||||
number_of_checked_files: 0,
|
number_of_checked_files: 0,
|
||||||
number_of_checked_folders: 0,
|
number_of_checked_folders: 0,
|
||||||
number_of_ignored_things: 0,
|
number_of_ignored_things: 0,
|
||||||
number_of_files_which_has_duplicated_entries: 0,
|
|
||||||
number_of_duplicated_files: 0,
|
number_of_duplicated_files: 0,
|
||||||
files: Default::default(),
|
files_size: Default::default(),
|
||||||
files_with_duplicated_entries: Default::default(),
|
// files_hashes: Default::default(),
|
||||||
excluded_directories: vec![],
|
excluded_directories: vec![],
|
||||||
included_directories: vec![],
|
included_directories: vec![],
|
||||||
|
// ignored_extensions: vec![],
|
||||||
|
// allowed_extensions: vec![],
|
||||||
|
// ignored_file_names: vec![],
|
||||||
|
// allowed_file_names: vec![]
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
// TODO - Still isn't used but it will be probably required with GUI
|
||||||
// pub fn clear(&mut self) {
|
// pub fn clear(&mut self) {
|
||||||
|
//
|
||||||
// self.number_of_checked_files = 0;
|
// self.number_of_checked_files = 0;
|
||||||
// self.number_of_checked_folders = 0;
|
// self.number_of_checked_folders = 0;
|
||||||
// self.number_of_ignored_things = 0;
|
// self.number_of_ignored_things = 0;
|
||||||
// self.number_of_files_which_has_duplicated_entries = 0;
|
// self.number_of_files_which_has_duplicated_entries = 0;
|
||||||
// self.number_of_duplicated_files = 0;
|
// self.number_of_duplicated_files = 0;
|
||||||
// self.files.clear();
|
// self.files_sizeclear();
|
||||||
// self.excluded_directories.clear();
|
// self.excluded_directories.clear();
|
||||||
// self.included_directories.clear();
|
// self.included_directories.clear();
|
||||||
// }
|
// }
|
||||||
pub fn find_duplicates(&mut self) {
|
pub fn find_duplicates_by_size(&mut self) {
|
||||||
|
// TODO add multithread checking for file hash
|
||||||
//let mut path;
|
//let mut path;
|
||||||
let start_time: SystemTime = SystemTime::now();
|
let start_time: SystemTime = SystemTime::now();
|
||||||
let mut folders_to_check: Vec<String> = Vec::with_capacity(1024 * 16); // This should be small enough too not see to big difference and big enough to store most of paths without needing to resize vector
|
let mut folders_to_check: Vec<String> = Vec::with_capacity(1024 * 16); // This should be small enough too not see to big difference and big enough to store most of paths without needing to resize vector
|
||||||
|
@ -84,6 +95,7 @@ impl DuplicateFinder {
|
||||||
//println!("Directory\t - {:?}", next_folder); // DEBUG
|
//println!("Directory\t - {:?}", next_folder); // DEBUG
|
||||||
} else if metadata.is_file() {
|
} else if metadata.is_file() {
|
||||||
let current_file_name = "".to_owned() + ¤t_folder + &entry_data.file_name().into_string().unwrap();
|
let current_file_name = "".to_owned() + ¤t_folder + &entry_data.file_name().into_string().unwrap();
|
||||||
|
// println!("File\t\t - {:?}", current_file_name); // DEBUG
|
||||||
//file_to_check
|
//file_to_check
|
||||||
let fe: FileEntry = FileEntry {
|
let fe: FileEntry = FileEntry {
|
||||||
path: current_file_name,
|
path: current_file_name,
|
||||||
|
@ -91,10 +103,12 @@ impl DuplicateFinder {
|
||||||
created_date: metadata.created().unwrap(),
|
created_date: metadata.created().unwrap(),
|
||||||
modified_date: metadata.modified().unwrap(),
|
modified_date: metadata.modified().unwrap(),
|
||||||
};
|
};
|
||||||
self.files.insert(metadata.len(), fe);
|
if !self.files_size.contains_key(&metadata.len()) {
|
||||||
|
self.files_size.insert(metadata.len(), Vec::new());
|
||||||
|
}
|
||||||
|
self.files_size.get_mut(&metadata.len()).unwrap().push(fe);
|
||||||
|
|
||||||
self.number_of_checked_files += 1;
|
self.number_of_checked_files += 1;
|
||||||
// println!("File\t\t - {:?}", current_file); // DEBUG
|
|
||||||
} else {
|
} else {
|
||||||
// Probably this is symbolic links so we are free to ignore this
|
// Probably this is symbolic links so we are free to ignore this
|
||||||
// println!("Found another type of file {} {:?}","".to_owned() + ¤t_folder + &entry_data.file_name().into_string().unwrap(), metadata) //DEBUG
|
// println!("Found another type of file {} {:?}","".to_owned() + ¤t_folder + &entry_data.file_name().into_string().unwrap(), metadata) //DEBUG
|
||||||
|
@ -103,34 +117,110 @@ impl DuplicateFinder {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
self.debug_print();
|
self.debug_print();
|
||||||
let end_time: SystemTime = SystemTime::now();
|
DuplicateFinder::print_time(start_time, SystemTime::now(), "find_duplicates".to_string());
|
||||||
println!("Duration of finding duplicates {:?}", end_time.duration_since(start_time).expect("a"));
|
//println!("Duration of finding duplicates {:?}", end_time.duration_since(start_time).expect("a"));
|
||||||
}
|
}
|
||||||
// pub fn save_to_file(&self) {}
|
// pub fn save_to_file(&self) {}
|
||||||
|
|
||||||
|
/// Remove files which have unique size
|
||||||
|
pub fn remove_files_with_unique_size(&mut self) {
|
||||||
|
let start_time: SystemTime = SystemTime::now();
|
||||||
|
self.debug_print();
|
||||||
|
let mut new_hashmap: HashMap<u64, Vec<FileEntry>> = Default::default();
|
||||||
|
|
||||||
|
self.number_of_duplicated_files = 0;
|
||||||
|
|
||||||
|
for entry in &self.files_size {
|
||||||
|
if entry.1.len() > 1 {
|
||||||
|
self.number_of_duplicated_files += entry.1.len() - 1;
|
||||||
|
new_hashmap.insert(*entry.0, entry.1.clone());
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
self.files_size = new_hashmap;
|
||||||
|
|
||||||
|
self.debug_print();
|
||||||
|
DuplicateFinder::print_time(start_time, SystemTime::now(), "optimize_files".to_string());
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Should be slower than checking in different ways, but still needs to be checked
|
||||||
|
pub fn find_duplicates_by_hashing(mut self) {
|
||||||
|
let start_time: SystemTime = SystemTime::now();
|
||||||
|
let mut file_handler: File;
|
||||||
|
|
||||||
|
for entry in self.files_size {
|
||||||
|
let mut hashes: Vec<String> = Vec::new();
|
||||||
|
if entry.1.len() > 5 {
|
||||||
|
println!("{}", entry.1.len());
|
||||||
|
}
|
||||||
|
|
||||||
|
for file_entry in entry.1.iter().enumerate() {
|
||||||
|
file_handler = match File::open(&file_entry.1.path) {
|
||||||
|
Ok(T) => T,
|
||||||
|
Err(_) => {
|
||||||
|
// Removing File may happens,so we should handle this
|
||||||
|
hashes.push("".to_owned());
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
let mut hasher: blake3::Hasher = blake3::Hasher::new();
|
||||||
|
let mut buffer = [0u8; 16384];
|
||||||
|
loop {
|
||||||
|
let n = file_handler.read(&mut buffer).unwrap();
|
||||||
|
if n == 0 {
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
hasher.update(&buffer[..n]);
|
||||||
|
}
|
||||||
|
//println!("{}", hasher.finalize().to_hex().to_string());
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
DuplicateFinder::print_time(start_time, SystemTime::now(), "find_duplicates_by_hashing".to_string());
|
||||||
|
}
|
||||||
|
// /// I'mm not sure about performance, so maybe I
|
||||||
|
// pub fn find_small_duplicates_by_hashing(mut self){
|
||||||
|
// let start_time: SystemTime = SystemTime::now();
|
||||||
|
// let size_limit_for_small_files u64 = // 16 MB
|
||||||
|
// let mut new_hashmap
|
||||||
|
//
|
||||||
|
// DuplicateFinder::print_time(start_time, SystemTime::now(), "find_duplicates_by_comparting_begin_bytes_of_file".to_string());
|
||||||
|
// }
|
||||||
|
|
||||||
|
pub fn print_time(start_time: SystemTime, end_time: SystemTime, function_name: String) {
|
||||||
|
println!(
|
||||||
|
"Execution of function \"{}\" took {:?}",
|
||||||
|
function_name,
|
||||||
|
end_time.duration_since(start_time).expect("Time cannot go reverse.")
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
/// Setting include directories, panics when there is not directories available
|
/// Setting include directories, panics when there is not directories available
|
||||||
pub fn set_include_directory(&mut self, mut include_directory: String) {
|
pub fn set_include_directory(&mut self, mut include_directory: String) {
|
||||||
if include_directory.len() == 0 {
|
let start_time: SystemTime = SystemTime::now();
|
||||||
|
|
||||||
|
if include_directory.is_empty() {
|
||||||
println!("At least one directory must be provided")
|
println!("At least one directory must be provided")
|
||||||
}
|
}
|
||||||
|
|
||||||
include_directory = include_directory.replace("\"", "");
|
include_directory = include_directory.replace("\"", "");
|
||||||
let directories: Vec<String> = include_directory.split(",").map(String::from).collect();
|
let directories: Vec<String> = include_directory.split(',').map(String::from).collect();
|
||||||
let mut checked_directories: Vec<String> = Vec::new();
|
let mut checked_directories: Vec<String> = Vec::new();
|
||||||
|
|
||||||
for directory in directories {
|
for directory in directories {
|
||||||
if directory == "/" {
|
if directory == "/" {
|
||||||
println!("Using / is probably not good idea, you may go out of ram.");
|
println!("Using / is probably not good idea, you may go out of ram.");
|
||||||
}
|
}
|
||||||
if directory.contains("*") {
|
if directory.contains('*') {
|
||||||
println!("Include Directory ERROR: Wildcards are not supported, please don't use it.");
|
println!("Include Directory ERROR: Wildcards are not supported, please don't use it.");
|
||||||
process::exit(1);
|
process::exit(1);
|
||||||
}
|
}
|
||||||
if directory.starts_with("~") {
|
if directory.starts_with('~') {
|
||||||
println!("Include Directory ERROR: ~ in path isn't supported.");
|
println!("Include Directory ERROR: ~ in path isn't supported.");
|
||||||
process::exit(1);
|
process::exit(1);
|
||||||
}
|
}
|
||||||
if !directory.starts_with("/") {
|
if !directory.starts_with('/') {
|
||||||
println!("Include Directory ERROR: Relative path are not supported.");
|
println!("Include Directory ERROR: Relative path are not supported.");
|
||||||
process::exit(1);
|
process::exit(1);
|
||||||
}
|
}
|
||||||
|
@ -144,45 +234,46 @@ impl DuplicateFinder {
|
||||||
}
|
}
|
||||||
|
|
||||||
// directory must end with /, due to possiblity of incorrect assumption, that e.g. /home/rafal is top folder to /home/rafalinho
|
// directory must end with /, due to possiblity of incorrect assumption, that e.g. /home/rafal is top folder to /home/rafalinho
|
||||||
if !directory.ends_with("/") {
|
if !directory.ends_with('/') {
|
||||||
checked_directories.push(directory + "/");
|
checked_directories.push(directory + "/");
|
||||||
} else {
|
} else {
|
||||||
checked_directories.push(directory);
|
checked_directories.push(directory);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
if checked_directories.len() == 0 {
|
if checked_directories.is_empty() {
|
||||||
println!("Not found even one correct path to include.");
|
println!("Not found even one correct path to include.");
|
||||||
process::exit(1);
|
process::exit(1);
|
||||||
}
|
}
|
||||||
|
|
||||||
self.included_directories = checked_directories;
|
self.included_directories = checked_directories;
|
||||||
|
|
||||||
println!("Included directories - {:?}", self.included_directories);
|
DuplicateFinder::print_time(start_time, SystemTime::now(), "set_include_directory".to_string());
|
||||||
}
|
}
|
||||||
|
|
||||||
pub fn set_exclude_directory(&mut self, mut exclude_directory: String) {
|
pub fn set_exclude_directory(&mut self, mut exclude_directory: String) {
|
||||||
if exclude_directory.len() == 0 {
|
let start_time: SystemTime = SystemTime::now();
|
||||||
|
if exclude_directory.is_empty() {
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
exclude_directory = exclude_directory.replace("\"", "");
|
exclude_directory = exclude_directory.replace("\"", "");
|
||||||
let directories: Vec<String> = exclude_directory.split(",").map(String::from).collect();
|
let directories: Vec<String> = exclude_directory.split(',').map(String::from).collect();
|
||||||
let mut checked_directories: Vec<String> = Vec::new();
|
let mut checked_directories: Vec<String> = Vec::new();
|
||||||
|
|
||||||
for directory in directories {
|
for directory in directories {
|
||||||
if directory == "/" {
|
if directory == "/" {
|
||||||
println!("Exclude Directory ERROR: Excluding / is pointless, because it means that no files will be scanned.");
|
println!("Exclude Directory ERROR: Excluding / is pointless, because it means that no files will be scanned.");
|
||||||
}
|
}
|
||||||
if directory.contains("*") {
|
if directory.contains('*') {
|
||||||
println!("Exclude Directory ERROR: Wildcards are not supported, please don't use it.");
|
println!("Exclude Directory ERROR: Wildcards are not supported, please don't use it.");
|
||||||
process::exit(1);
|
process::exit(1);
|
||||||
}
|
}
|
||||||
if directory.starts_with("~") {
|
if directory.starts_with('~') {
|
||||||
println!("Exclude Directory ERROR: ~ in path isn't supported.");
|
println!("Exclude Directory ERROR: ~ in path isn't supported.");
|
||||||
process::exit(1);
|
process::exit(1);
|
||||||
}
|
}
|
||||||
if !directory.starts_with("/") {
|
if !directory.starts_with('/') {
|
||||||
println!("Exclude Directory ERROR: Relative path are not supported.");
|
println!("Exclude Directory ERROR: Relative path are not supported.");
|
||||||
process::exit(1);
|
process::exit(1);
|
||||||
}
|
}
|
||||||
|
@ -196,7 +287,7 @@ impl DuplicateFinder {
|
||||||
}
|
}
|
||||||
|
|
||||||
// directory must end with /, due to possiblity of incorrect assumption, that e.g. /home/rafal is top folder to /home/rafalinho
|
// directory must end with /, due to possiblity of incorrect assumption, that e.g. /home/rafal is top folder to /home/rafalinho
|
||||||
if !directory.ends_with("/") {
|
if !directory.ends_with('/') {
|
||||||
checked_directories.push(directory + "/");
|
checked_directories.push(directory + "/");
|
||||||
} else {
|
} else {
|
||||||
checked_directories.push(directory);
|
checked_directories.push(directory);
|
||||||
|
@ -205,7 +296,7 @@ impl DuplicateFinder {
|
||||||
|
|
||||||
self.excluded_directories = checked_directories;
|
self.excluded_directories = checked_directories;
|
||||||
|
|
||||||
println!("Excluded directories - {:?}", &self.excluded_directories);
|
DuplicateFinder::print_time(start_time, SystemTime::now(), "set_exclude_directory".to_string());
|
||||||
}
|
}
|
||||||
|
|
||||||
pub fn debug_print(&self) {
|
pub fn debug_print(&self) {
|
||||||
|
@ -213,9 +304,8 @@ impl DuplicateFinder {
|
||||||
println!("Number of all checked files - {}", self.number_of_checked_files);
|
println!("Number of all checked files - {}", self.number_of_checked_files);
|
||||||
println!("Number of all checked folders - {}", self.number_of_checked_folders);
|
println!("Number of all checked folders - {}", self.number_of_checked_folders);
|
||||||
println!("Number of all ignored things - {}", self.number_of_ignored_things);
|
println!("Number of all ignored things - {}", self.number_of_ignored_things);
|
||||||
println!("Number of all files with duplicates - {}", self.number_of_files_which_has_duplicated_entries);
|
|
||||||
println!("Number of duplicated files - {}", self.number_of_duplicated_files);
|
println!("Number of duplicated files - {}", self.number_of_duplicated_files);
|
||||||
println!("Files list - {}", self.files.len());
|
println!("Files list - {}", self.files_size.len());
|
||||||
println!("Excluded directories - {:?}", self.excluded_directories);
|
println!("Excluded directories - {:?}", self.excluded_directories);
|
||||||
println!("Included directories - {:?}", self.included_directories);
|
println!("Included directories - {:?}", self.included_directories);
|
||||||
println!("-----------------------------------------");
|
println!("-----------------------------------------");
|
||||||
|
@ -225,6 +315,8 @@ impl DuplicateFinder {
|
||||||
/// let df : DuplicateFinder = saf
|
/// let df : DuplicateFinder = saf
|
||||||
/// ```
|
/// ```
|
||||||
pub fn optimize_directories(&mut self) {
|
pub fn optimize_directories(&mut self) {
|
||||||
|
let start_time: SystemTime = SystemTime::now();
|
||||||
|
|
||||||
let mut optimized_included: Vec<String> = Vec::<String>::new();
|
let mut optimized_included: Vec<String> = Vec::<String>::new();
|
||||||
let mut optimized_excluded: Vec<String> = Vec::<String>::new();
|
let mut optimized_excluded: Vec<String> = Vec::<String>::new();
|
||||||
// Remove duplicated entries like: "/", "/"
|
// Remove duplicated entries like: "/", "/"
|
||||||
|
@ -249,7 +341,7 @@ impl DuplicateFinder {
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
if is_inside == false {
|
if !is_inside {
|
||||||
optimized_excluded.push(ed_checked.to_string());
|
optimized_excluded.push(ed_checked.to_string());
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -266,7 +358,7 @@ impl DuplicateFinder {
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
if is_inside == false {
|
if !is_inside {
|
||||||
optimized_included.push(id_checked.to_string());
|
optimized_included.push(id_checked.to_string());
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -329,7 +421,7 @@ impl DuplicateFinder {
|
||||||
self.excluded_directories = optimized_excluded;
|
self.excluded_directories = optimized_excluded;
|
||||||
// optimized_excluded = Vec::<String>::new();
|
// optimized_excluded = Vec::<String>::new();
|
||||||
|
|
||||||
if self.included_directories.len() == 0 {
|
if self.included_directories.is_empty() {
|
||||||
println!("Optimize Directories ERROR: Excluded directories overlaps all included directories.");
|
println!("Optimize Directories ERROR: Excluded directories overlaps all included directories.");
|
||||||
process::exit(1);
|
process::exit(1);
|
||||||
}
|
}
|
||||||
|
@ -337,12 +429,24 @@ impl DuplicateFinder {
|
||||||
// Not needed, but better is to have sorted everything
|
// Not needed, but better is to have sorted everything
|
||||||
self.excluded_directories.sort();
|
self.excluded_directories.sort();
|
||||||
self.included_directories.sort();
|
self.included_directories.sort();
|
||||||
|
DuplicateFinder::print_time(start_time, SystemTime::now(), "optimize_directories".to_string());
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#[derive(Clone)]
|
||||||
struct FileEntry {
|
struct FileEntry {
|
||||||
pub path: String,
|
pub path: String,
|
||||||
pub size: u64,
|
pub size: u64,
|
||||||
pub created_date: SystemTime,
|
pub created_date: SystemTime,
|
||||||
pub modified_date: SystemTime,
|
pub modified_date: SystemTime,
|
||||||
}
|
}
|
||||||
|
impl FileEntry {
|
||||||
|
// pub fn return_copy(&self) -> FileEntry {
|
||||||
|
// let new_copy : FileEntry = FileEntry{
|
||||||
|
// path: self.path.to_string(),
|
||||||
|
// size: self.size,
|
||||||
|
// created_date: self.created_date,
|
||||||
|
// modified_date: self.modified_date
|
||||||
|
// };
|
||||||
|
// }
|
||||||
|
}
|
||||||
|
|
|
@ -27,7 +27,9 @@ fn main() {
|
||||||
df.set_include_directory(arguments[2].to_string());
|
df.set_include_directory(arguments[2].to_string());
|
||||||
df.optimize_directories();
|
df.optimize_directories();
|
||||||
df.debug_print();
|
df.debug_print();
|
||||||
df.find_duplicates();
|
df.find_duplicates_by_size();
|
||||||
|
df.remove_files_with_unique_size();
|
||||||
|
df.find_duplicates_by_hashing();
|
||||||
}
|
}
|
||||||
argum => println!("{} argument is not supported, check help for more info.", argum),
|
argum => println!("{} argument is not supported, check help for more info.", argum),
|
||||||
};
|
};
|
||||||
|
|
Loading…
Reference in a new issue