1
0
Fork 0
mirror of synced 2024-04-26 08:42:07 +12:00

Improved searching and printing duplicated entries,

Saving to file
This commit is contained in:
Rafał Mikrut 2020-09-15 19:17:13 +02:00
parent c28b5a8a2f
commit 0f06ed636a
5 changed files with 159 additions and 90 deletions

View file

@ -1,4 +1,14 @@
## Version 0.10
## Version 0.1.1
- Added images to readme
- Better GTK buttons
- Basic search in GTK
- Cleaned core from println
- Core functions doesn't use now process::exit(everything is done with help of messages/errors/warnings)
- Added support for non recursive search
- Improved finding number and size of duplicated files
- Saving results to file
## Version 0.1.0
- Initial Version
- Duplicate file finder
- Empty folder finder

View file

@ -61,9 +61,7 @@ fn main() {
let mut check_method: duplicate::CheckingMethod = duplicate::CheckingMethod::HASH;
if ArgumentsPair::has_command(&arguments, "-i") {
if !df.set_include_directory(ArgumentsPair::get_argument(&arguments, "-i", false)) {
process::exit(1);
}
df.set_include_directory(ArgumentsPair::get_argument(&arguments, "-i", false));
} else {
println!("FATAL ERROR: Parameter -i with set of included files is required.");
process::exit(1);
@ -95,7 +93,10 @@ fn main() {
}
if ArgumentsPair::has_command(&arguments, "-k") {
df.set_excluded_items(ArgumentsPair::get_argument(&arguments, "-k", false));
} else {
df.set_excluded_items("DEFAULT".to_string());
}
if ArgumentsPair::has_command(&arguments, "-o") {
df.set_recursive_search(false);
}
@ -136,6 +137,10 @@ fn main() {
df.find_duplicates(&check_method, &delete_method);
if ArgumentsPair::has_command(&arguments, "-f") {
df.save_results_to_file(&ArgumentsPair::get_argument(&arguments, "-f", false));
}
print_infos(df.get_infos());
}
"--h" | "--help" => {
@ -179,11 +184,12 @@ Usage of Czkawka:
czkawka --help
czkawka
--d <-i directory_to_search> [-e exclude_directories = ""] [-k excluded_items = ""] [-s min_size = 1024] [-x allowed_extension = ""] [-l type_of_search = "hash"] [-o] [-delete = "aeo"] - search for duplicates files
--d <-i directory_to_search> [-e exclude_directories = ""] [-k excluded_items = "DEFAULT"] [-s min_size = 1024] [-x allowed_extension = ""] [-l type_of_search = "hash"] [-o] [-f file_to_save = "results.txt"] [-delete = "aeo"] - search for duplicates files
-i directory_to_search - list of directories which should will be searched like /home/rafal
-e exclude_directories - list of directories which will be excluded from search.
-k excluded_items - list of excluded items which contains * wildcard(may be slow)
-o non_recursive - this options prevents from recursive check of folders
-o - this options prevents from recursive check of folders
-f file_to_save - saves results to file
-s min_size - minimum size of checked files in bytes, assigning bigger value may speed up searching.
-x allowed_extension - list of checked extension, e.g. "jpg,mp4" will allow to check "book.jpg" and "car.mp4" but not roman.png. There are also helpful macros which allow to easy use a typcal extension like IMAGE("jpg,kra,gif,png,bmp,tiff,webp,hdr,svg") or TEXT("txt,doc,docx,odt,rtf")
-l type_of_search - allows to use fastest which takes into account only size, and more accurate which check if file contnet is same(hashes).

View file

@ -36,17 +36,16 @@ impl Common {
pub fn delete_one_entry(entry: &str) -> String {
let path: &Path = Path::new(entry);
let mut warning: String = String::from("");
if path.is_dir() {
match fs::remove_dir_all(&entry) {
Ok(_) => (),
Err(_) => warning = "Failed to remove folder ".to_owned() + entry,
}
} else {
match fs::remove_file(&entry) {
Ok(_) => (),
Err(_) => warning = "Failed to remove file ".to_owned() + entry,
}
if path.is_dir() {
match fs::remove_dir_all(&entry) {
Ok(_) => (),
Err(_) => warning = "Failed to remove folder ".to_owned() + entry,
}
} else {
match fs::remove_file(&entry) {
Ok(_) => (),
Err(_) => warning = "Failed to remove file ".to_owned() + entry,
}
}
warning
}

View file

@ -34,13 +34,13 @@ struct FileEntry {
/// Struct with required information's to work
pub struct DuplicateFinder {
infos: Info,
files_with_identical_size: HashMap<u64, Vec<FileEntry>>,
files_with_identical_size: BTreeMap<u64, Vec<FileEntry>>,
files_with_identical_hashes: BTreeMap<u64, Vec<Vec<FileEntry>>>,
allowed_extensions: Vec<String>,
excluded_items: Vec<String>,
excluded_directories: Vec<String>,
included_directories: Vec<String>,
recursive_search : bool,
recursive_search: bool,
min_file_size: u64,
}
@ -53,8 +53,12 @@ pub struct Info {
pub number_of_checked_folders: usize,
pub number_of_ignored_files: usize,
pub number_of_ignored_things: usize,
pub number_of_duplicated_files: usize,
pub lost_space: u64,
pub number_of_groups_by_size: usize,
pub number_of_duplicated_files_by_size: usize,
pub number_of_groups_by_hash: usize,
pub number_of_duplicated_files_by_hash: usize,
pub lost_space_by_size: u64,
pub lost_space_by_hash: u64,
pub number_of_removed_files: usize,
pub number_of_failed_to_remove_files: usize,
pub gained_space: u64,
@ -69,8 +73,12 @@ impl Info {
number_of_ignored_files: 0,
number_of_checked_folders: 0,
number_of_ignored_things: 0,
number_of_duplicated_files: 0,
lost_space: 0,
number_of_groups_by_size: 0,
number_of_duplicated_files_by_size: 0,
number_of_groups_by_hash: 0,
number_of_duplicated_files_by_hash: 0,
lost_space_by_size: 0,
lost_space_by_hash: 0,
number_of_removed_files: 0,
number_of_failed_to_remove_files: 0,
gained_space: 0,
@ -109,7 +117,6 @@ impl DuplicateFinder {
if *check_method == CheckingMethod::HASH {
self.check_files_hash();
}
self.calculate_lost_space(check_method);
self.delete_files(check_method, delete_method);
self.debug_print();
}
@ -118,8 +125,8 @@ impl DuplicateFinder {
self.min_file_size = min_size;
}
pub fn set_recursive_search(&mut self, reqursive_search : bool){
self.recursive_search = reqursive_search;
pub fn set_recursive_search(&mut self, recursive_search: bool) {
self.recursive_search = recursive_search;
}
pub fn set_excluded_items(&mut self, mut excluded_items: String) {
// let start_time: SystemTime = SystemTime::now();
@ -138,6 +145,11 @@ impl DuplicateFinder {
if expression == "" {
continue;
}
if expression == "DEFAULT" {
// TODO add more files by default
checked_expressions.push("*/.git/*".to_string());
continue;
}
if !expression.contains('*') {
self.infos.warnings.push("Excluded Items Warning: Wildcard * is required in expression, ignoring ".to_string() + &*expression);
continue;
@ -282,25 +294,6 @@ impl DuplicateFinder {
//Common::print_time(start_time, SystemTime::now(), "set_exclude_directory".to_string());
}
fn calculate_lost_space(&mut self, check_method: &CheckingMethod) {
let mut bytes: u64 = 0;
match check_method {
CheckingMethod::SIZE => {
for i in &self.files_with_identical_size {
bytes += i.0 * (i.1.len() as u64 - 1);
}
}
CheckingMethod::HASH => {
for i in &self.files_with_identical_hashes {
for j in i.1 {
bytes += i.0 * (j.len() as u64 - 1);
}
}
}
}
self.infos.lost_space = bytes;
}
fn check_files_size(&mut self) {
// TODO maybe add multithreading checking for file hash
@ -311,6 +304,7 @@ impl DuplicateFinder {
for id in &self.included_directories {
folders_to_check.push(id.to_string());
}
self.infos.number_of_checked_folders += folders_to_check.len();
let mut current_folder: String;
let mut next_folder: String;
@ -340,12 +334,13 @@ impl DuplicateFinder {
} //Permissions denied
};
if metadata.is_dir() {
self.infos.number_of_checked_folders += 1;
// if entry_data.file_name().into_string().is_err() { // Probably this can be removed, if crash still will be happens, then uncomment this line
// self.infos.warnings.push("Cannot read folder name in dir ".to_string() + &*current_folder);
// continue; // Permissions denied
// }
if !self.recursive_search{
if !self.recursive_search {
continue;
}
@ -371,7 +366,6 @@ impl DuplicateFinder {
}
folders_to_check.push(next_folder);
}
self.infos.number_of_checked_folders += 1;
} else if metadata.is_file() {
let mut have_valid_extension: bool;
let file_name_lowercase: String = entry_data.file_name().into_string().unwrap().to_lowercase();
@ -441,23 +435,77 @@ impl DuplicateFinder {
Common::print_time(start_time, SystemTime::now(), "check_files_size".to_string());
//println!("Duration of finding duplicates {:?}", end_time.duration_since(start_time).expect("a"));
}
// pub fn save_results_to_file(&self) {} // TODO Saving results to files
pub fn save_results_to_file(&mut self, file_name: &str) {
let file_name: String = match file_name {
"" => "results.txt".to_string(),
k => k.to_string(),
};
let mut file = match File::create(&file_name) {
Ok(t) => t,
Err(_) => {
self.infos.errors.push("Failed to create file ".to_string() + file_name.as_str());
return;
}
};
match file.write_all(b"Results of searching\n\n") {
Ok(_) => (),
Err(_) => {
self.infos.errors.push("Failed to save results to file ".to_string() + file_name.as_str());
return;
}
}
if !self.files_with_identical_size.is_empty() {
file.write_all(b"-------------------------------------------------Files with same size-------------------------------------------------\n").unwrap();
file.write_all(("Found ".to_string() + self.infos.number_of_duplicated_files_by_size.to_string().as_str() + " duplicated files which in " + self.files_with_identical_size.len().to_string().as_str() + " groups.\n").as_bytes())
.unwrap();
for (size, files) in self.files_with_identical_size.iter().rev() {
file.write_all(b"\n---- Size ").unwrap();
file.write_all(size.file_size(options::BINARY).unwrap().as_bytes()).unwrap();
file.write_all((" (".to_string() + size.to_string().as_str() + ")").as_bytes()).unwrap();
file.write_all(b"\n").unwrap();
for file_entry in files {
file.write_all((file_entry.path.clone() + "\n").as_bytes()).unwrap();
}
}
}
if !self.files_with_identical_hashes.is_empty() {
file.write_all(b"-------------------------------------------------Files with same hashes-------------------------------------------------\n").unwrap();
file.write_all(("Found ".to_string() + self.infos.number_of_duplicated_files_by_size.to_string().as_str() + " duplicated files which in " + self.files_with_identical_hashes.len().to_string().as_str() + " groups.\n").as_bytes())
.unwrap();
for (size, files) in self.files_with_identical_hashes.iter().rev() {
for vector in files {
file.write_all(b"\n---- Size ").unwrap();
file.write_all(size.file_size(options::BINARY).unwrap().as_bytes()).unwrap();
file.write_all((" (".to_string() + size.to_string().as_str() + ")").as_bytes()).unwrap();
file.write_all(b"\n").unwrap();
for file_entry in vector {
file.write_all((file_entry.path.clone() + "\n").as_bytes()).unwrap();
}
}
}
}
}
/// Remove files which have unique size
fn remove_files_with_unique_size(&mut self) {
let start_time: SystemTime = SystemTime::now();
let mut new_hashmap: HashMap<u64, Vec<FileEntry>> = Default::default();
let mut new_map: BTreeMap<u64, Vec<FileEntry>> = Default::default();
self.infos.number_of_duplicated_files = 0;
self.infos.number_of_duplicated_files_by_size = 0;
for entry in &self.files_with_identical_size {
if entry.1.len() > 1 {
self.infos.number_of_duplicated_files += entry.1.len() - 1;
new_hashmap.insert(*entry.0, entry.1.clone());
for (size, vector) in &self.files_with_identical_size {
if vector.len() > 1 {
self.infos.number_of_duplicated_files_by_size += vector.len() - 1;
self.infos.number_of_groups_by_size += 1;
self.infos.lost_space_by_size += (vector.len() as u64 - 1) * size;
new_map.insert(*size, vector.clone());
}
}
self.files_with_identical_size = new_hashmap;
self.files_with_identical_size = new_map;
Common::print_time(start_time, SystemTime::now(), "remove_files_with_unique_size".to_string());
}
@ -492,7 +540,7 @@ impl DuplicateFinder {
error_reading_file = true;
break;
}
}; //.unwrap();
};
if n == 0 {
break;
}
@ -511,6 +559,15 @@ impl DuplicateFinder {
}
}
}
for (size, vector) in &self.files_with_identical_hashes {
for vec_file_entry in vector {
self.infos.number_of_duplicated_files_by_hash += vec_file_entry.len() - 1;
self.infos.number_of_groups_by_hash += 1;
self.infos.lost_space_by_hash += (vec_file_entry.len() as u64 - 1) * size;
}
}
Common::print_time(start_time, SystemTime::now(), "check_files_hash".to_string());
}
@ -518,30 +575,32 @@ impl DuplicateFinder {
/// Setting include directories, panics when there is not directories available
fn debug_print(&self) {
println!("---------------DEBUG PRINT---------------");
println!("Recursive search - {}", self.recursive_search.to_string());
println!("### Infos");
println!("Errors size - {}", self.infos.errors.len());
println!("Warnings size - {}", self.infos.warnings.len());
println!("Messages size - {}", self.infos.messages.len());
println!("Number of checked files - {}", self.infos.number_of_checked_files);
println!("Number of checked folders - {}", self.infos.number_of_checked_folders);
println!("Number of ignored files - {}", self.infos.number_of_ignored_files);
println!("Number of ignored things(like symbolic links) - {}", self.infos.number_of_ignored_things);
println!("Number of duplicated files - {}", self.infos.number_of_duplicated_files);
let mut file_size: u64 = 0;
for i in &self.files_with_identical_size {
file_size += i.1.len() as u64;
}
println!("Files list size - {} ({})", self.files_with_identical_size.len(), file_size);
let mut hashed_file_size: u64 = 0;
for i in &self.files_with_identical_hashes {
for j in i.1 {
hashed_file_size += j.len() as u64;
}
}
println!("Hashed Files list size - {} ({})", self.files_with_identical_hashes.len(), hashed_file_size);
println!("Number of duplicated files by size(in groups) - {} ({})", self.infos.number_of_duplicated_files_by_size, self.infos.number_of_groups_by_size);
println!("Number of duplicated files by hash(in groups) - {} ({})", self.infos.number_of_duplicated_files_by_hash, self.infos.number_of_groups_by_hash);
println!("Lost space by size - {} ({} bytes)", self.infos.lost_space_by_size.file_size(options::BINARY).unwrap(), self.infos.lost_space_by_size);
println!("Lost space by hash - {} ({} bytes)", self.infos.lost_space_by_hash.file_size(options::BINARY).unwrap(), self.infos.lost_space_by_hash);
println!("Gained space by removing duplicated entries - {} ({} bytes)", self.infos.gained_space.file_size(options::BINARY).unwrap(), self.infos.gained_space);
println!("Number of removed files - {}", self.infos.number_of_removed_files);
println!("Number of failed to remove files - {}", self.infos.number_of_failed_to_remove_files);
println!("Lost space - {} ({} bytes)", self.infos.lost_space.file_size(options::BINARY).unwrap(), self.infos.lost_space);
println!("Gained space by removing duplicated entries - {} ({} bytes)", self.infos.gained_space.file_size(options::BINARY).unwrap(), self.infos.gained_space);
println!("### Other");
println!("Files list size - {}", self.files_with_identical_size.len());
println!("Hashed Files list size - {}", self.files_with_identical_hashes.len());
println!("Allowed extensions - {:?}", self.allowed_extensions);
println!("Excluded items - {:?}", self.excluded_items);
println!("Excluded directories - {:?}", self.excluded_directories);
println!("Included directories - {:?}", self.included_directories);
println!("Recursive search - {}", self.recursive_search.to_string());
println!("Minimum file size - {:?}", self.min_file_size);
println!("-----------------------------------------");
}
@ -563,7 +622,7 @@ impl DuplicateFinder {
"Found {} duplicated files in {} groups with same content which took {}:",
number_of_files,
number_of_groups,
self.infos.lost_space.file_size(options::BINARY).unwrap()
self.infos.lost_space_by_size.file_size(options::BINARY).unwrap()
);
for (key, vector) in self.files_with_identical_hashes.iter().rev() {
println!("Size - {}", key.file_size(options::BINARY).unwrap());
@ -585,7 +644,7 @@ impl DuplicateFinder {
"Found {} files in {} groups with same size(may have different content) which took {}:",
number_of_files,
number_of_groups,
self.infos.lost_space.file_size(options::BINARY).unwrap()
self.infos.lost_space_by_size.file_size(options::BINARY).unwrap()
);
for i in &self.files_with_identical_size {
println!("Size - {}", i.0);
@ -614,7 +673,8 @@ impl DuplicateFinder {
self.included_directories.dedup();
// Optimize for duplicated included directories - "/", "/home". "/home/Pulpit" to "/"
if self.recursive_search { // This is only point which can't be done when recursive search is disabled.
if self.recursive_search {
// This is only point which can't be done when recursive search is disabled.
let mut is_inside: bool;
for ed_checked in &self.excluded_directories {
is_inside = false;

View file

@ -21,15 +21,11 @@ fn main() {
// Buttons State
let mut hashmap_buttons : HashMap<&str,bool> = Default::default();
for i in ["duplicate","empty_folder"].iter() {
hashmap_buttons.insert(i,false);
let mut hashmap_buttons: HashMap<&str, bool> = Default::default();
for i in ["duplicate", "empty_folder"].iter() {
hashmap_buttons.insert(i, false);
}
// let mut hashmap_buttons : HashMap<&str,bool> = Default::default();
// let mut buttons_state : HashMap<&str,HashMap<&str,bool>> = Default::default();
// for i in ["buttons_search","buttons_stop","buttons_resume","buttons_pause","buttons_select","buttons_delete","buttons_save"].iter() {
@ -39,9 +35,7 @@ fn main() {
// for i in ["buttons_search","buttons_stop","buttons_resume","buttons_pause","buttons_select","buttons_delete","buttons_save"].iter() {
// buttons_state.insert(i,hashmap_buttons.clone());
// }
// buttons_state.insert(hashmap_buttons.clone());
// buttons_state.insert(hashmap_buttons.clone());
// GUI Notepad Buttons
@ -113,7 +107,7 @@ fn main() {
{
// Connect Buttons
// let buttons_search = buttons_search.clone();
let buttons_search_clone = buttons_search.clone();
// let info_entry = info_entry.clone();
buttons_search.connect_clicked(move |_| {
@ -128,7 +122,7 @@ fn main() {
df.set_allowed_extensions("".to_owned());
df.set_min_file_size(1000); // TODO Change to proper value
df.find_duplicates(&CheckingMethod::HASH, &DeleteMethod::None);
let infos = df.get_infos();
let _infos = df.get_infos();
info_entry.set_text("Found TODO duplicates files in TODO groups which took TODO GB/MB/KB/B");
@ -138,7 +132,7 @@ fn main() {
buttons_delete.show();
//
buttons_search.show();
buttons_search_clone.show();
buttons_stop.hide();
buttons_resume.hide();
buttons_pause.hide();