1
0
Fork 0
mirror of synced 2024-05-05 21:13:41 +12:00

Added several options to delete files in different ways.

This commit is contained in:
Rafał Mikrut 2020-08-31 16:15:56 +02:00
parent 1095d719d0
commit bbf3606f4f
2 changed files with 188 additions and 95 deletions

View file

@ -4,7 +4,7 @@ use std::collections::{BTreeMap, HashMap};
use std::fs::{File, Metadata}; use std::fs::{File, Metadata};
use std::io::prelude::*; use std::io::prelude::*;
use std::path::Path; use std::path::Path;
use std::time::SystemTime; use std::time::{SystemTime, UNIX_EPOCH};
use std::{fs, process}; use std::{fs, process};
#[derive(PartialEq)] #[derive(PartialEq)]
@ -15,13 +15,21 @@ pub enum CheckingMethod {
// TODO // TODO
#[allow(dead_code)] #[allow(dead_code)]
pub enum TypeOfDelete { #[derive(Eq, PartialEq)]
AllExceptRandom, // Choose one random file from duplicates which won't be deleted pub enum DeleteMethod {
None,
AllExceptNewest, AllExceptNewest,
AllExceptOldest, AllExceptOldest,
OneOldest, OneOldest,
OneNewest, OneNewest,
OneRandom }
#[derive(Clone)]
struct FileEntry {
pub path: String,
pub size: u64,
pub created_date: SystemTime,
pub modified_date: SystemTime,
} }
pub struct DuplicateFinder { pub struct DuplicateFinder {
@ -59,19 +67,17 @@ impl DuplicateFinder {
} }
} }
pub fn find_duplicates(mut self, check_method: CheckingMethod, delete_files: bool) { pub fn find_duplicates(mut self, check_method: &CheckingMethod, delete_method: &DeleteMethod) {
self.optimize_directories(); self.optimize_directories();
self.debug_print(); self.debug_print();
self.check_files_size(); self.check_files_size();
self.remove_files_with_unique_size(); self.remove_files_with_unique_size();
if check_method == CheckingMethod::HASH { if *check_method == CheckingMethod::HASH {
self.check_files_hash(); self.check_files_hash();
} }
self.calculate_lost_space(&check_method); self.calculate_lost_space(check_method);
self.print_duplicated_entries(&check_method); self.print_duplicated_entries(check_method);
if delete_files { self.delete_files(check_method, delete_method);
self.delete_files(&check_method);
}
} }
pub fn set_min_file_size(&mut self, min_size: u64) { pub fn set_min_file_size(&mut self, min_size: u64) {
@ -610,34 +616,23 @@ impl DuplicateFinder {
DuplicateFinder::print_time(start_time, SystemTime::now(), "optimize_directories".to_string()); DuplicateFinder::print_time(start_time, SystemTime::now(), "optimize_directories".to_string());
} }
fn delete_files(&mut self, check_method: &CheckingMethod) { fn delete_files(&mut self, check_method: &CheckingMethod, delete_method: &DeleteMethod) {
if *delete_method == DeleteMethod::None {
return;
}
let start_time: SystemTime = SystemTime::now(); let start_time: SystemTime = SystemTime::now();
let mut errors: Vec<String> = Vec::new(); let mut errors: Vec<String> = Vec::new();
match check_method { match check_method {
CheckingMethod::HASH => { CheckingMethod::HASH => {
for entry in &self.files_with_identical_hashes { for entry in &self.files_with_identical_hashes {
for vector in entry.1 { for vector in entry.1 {
for files in vector.iter().enumerate() { delete_files(&vector, &delete_method, &mut errors);
if files.0 != 0 {
match fs::remove_file(&files.1.path) {
Ok(_) => (),
Err(_) => errors.push(files.1.path.clone()),
};
}
}
} }
} }
} }
CheckingMethod::SIZE => { CheckingMethod::SIZE => {
for entry in &self.files_with_identical_size { for entry in &self.files_with_identical_size {
for files in entry.1.iter().enumerate() { delete_files(&entry.1, &delete_method, &mut errors);
if files.0 != 0 {
match fs::remove_file(&files.1.path) {
Ok(_) => (),
Err(_) => errors.push(files.1.path.clone()),
};
}
}
} }
} }
} }
@ -647,11 +642,73 @@ impl DuplicateFinder {
DuplicateFinder::print_time(start_time, SystemTime::now(), "delete_files".to_string()); DuplicateFinder::print_time(start_time, SystemTime::now(), "delete_files".to_string());
} }
} }
fn delete_files(vector: &[FileEntry], delete_method: &DeleteMethod, errors: &mut Vec<String>) {
#[derive(Clone)] assert!(vector.len() > 1, "Vector length must be bigger than 1(This should be done in previous steps).");
struct FileEntry { let mut q_index: usize = 0;
pub path: String, let mut q_time: u64 = 0;
pub size: u64, match delete_method {
pub created_date: SystemTime, DeleteMethod::OneOldest => {
pub modified_date: SystemTime, for files in vector.iter().enumerate() {
let time_since_epoch = files.1.created_date.duration_since(UNIX_EPOCH).expect("Invalid file date").as_secs();
if q_time == 0 || q_time > time_since_epoch {
q_time = time_since_epoch;
q_index = files.0;
}
}
match fs::remove_file(vector[q_index].path.clone()) {
Ok(_) => (),
Err(_) => errors.push(vector[q_index].path.clone()),
};
}
DeleteMethod::OneNewest => {
for files in vector.iter().enumerate() {
let time_since_epoch = files.1.created_date.duration_since(UNIX_EPOCH).expect("Invalid file date").as_secs();
if q_time == 0 || q_time < time_since_epoch {
q_time = time_since_epoch;
q_index = files.0;
}
}
match fs::remove_file(vector[q_index].path.clone()) {
Ok(_) => (),
Err(_) => errors.push(vector[q_index].path.clone()),
};
}
DeleteMethod::AllExceptOldest => {
for files in vector.iter().enumerate() {
let time_since_epoch = files.1.created_date.duration_since(UNIX_EPOCH).expect("Invalid file date").as_secs();
if q_time == 0 || q_time > time_since_epoch {
q_time = time_since_epoch;
q_index = files.0;
}
}
for files in vector.iter().enumerate() {
if q_index != files.0 {
match fs::remove_file(vector[files.0].path.clone()) {
Ok(_) => (),
Err(_) => errors.push(vector[files.0].path.clone()),
};
}
}
}
DeleteMethod::AllExceptNewest => {
for files in vector.iter().enumerate() {
let time_since_epoch = files.1.created_date.duration_since(UNIX_EPOCH).expect("Invalid file date").as_secs();
if q_time == 0 || q_time < time_since_epoch {
q_time = time_since_epoch;
q_index = files.0;
}
}
for files in vector.iter().enumerate() {
if q_index != files.0 {
match fs::remove_file(vector[files.0].path.clone()) {
Ok(_) => (),
Err(_) => errors.push(vector[files.0].path.clone()),
};
}
}
}
DeleteMethod::None => {
panic!();
}
};
} }

View file

@ -18,37 +18,38 @@ fn main() {
// Assigning commands with arguments // Assigning commands with arguments
let mut arguments: Vec<ArgumentsPair> = Vec::new(); let mut arguments: Vec<ArgumentsPair> = Vec::new();
let mut can_pass_argument: bool = false; for argument in all_arguments {
for argument in 0..all_arguments.len() { if argument.starts_with("--") {
if all_arguments[argument].starts_with("--") { commands_arguments.push(argument);
commands_arguments.push(all_arguments[argument].clone()); } else if argument.starts_with('-') {
} else if all_arguments[argument].starts_with('-') { let a: ArgumentsPair = ArgumentsPair {
if argument + 1 < all_arguments.len() { command: argument,
if all_arguments[argument + 1].starts_with("--") || all_arguments[argument + 1].starts_with('-') { argument: Option::None,
println!("FATAL ERROR: Missing argument for {}", all_arguments[argument]); };
process::exit(1); arguments.push(a);
} else { } else {
let a: ArgumentsPair = ArgumentsPair { if arguments.is_empty() {
command: all_arguments[argument].clone(), println!("FATAL ERROR: Trying to use {} without any arguments(like -i -e -delete)", argument);
argument: all_arguments[argument + 1].clone(),
};
arguments.push(a);
can_pass_argument = true;
}
} else {
println!("FATAL ERROR: Missing argument for {}", all_arguments[argument]);
process::exit(1); process::exit(1);
} }
} else if !can_pass_argument { if arguments[arguments.len() - 1].argument != Option::None {
println!("FATAL ERROR: Argument \"{}\" is not linked to any command", all_arguments[argument]); println!("FATAL ERROR: Trying set second parameter {}, but only one is supported", argument); // This may be changed in future to support 2 or more attributes with space
process::exit(1); process::exit(1);
} else { }
can_pass_argument = false; let last_element = arguments.len() - 1;
arguments[last_element].argument = Option::from(argument);
} }
} }
for a in &arguments { for a in &arguments {
println!("Argument number {} - {}", a.command, a.argument); println!(
"Argument number {} - {}",
a.command,
match &a.argument {
Some(t) => t.clone(),
None => "MISSING_ARGUMENT".to_string(),
}
);
} }
if commands_arguments.is_empty() { if commands_arguments.is_empty() {
@ -61,20 +62,23 @@ fn main() {
let mut check_method: duplicate::CheckingMethod = duplicate::CheckingMethod::HASH; let mut check_method: duplicate::CheckingMethod = duplicate::CheckingMethod::HASH;
if ArgumentsPair::has_command(&arguments, "-i") { if ArgumentsPair::has_command(&arguments, "-i") {
df.set_include_directory(ArgumentsPair::get_argument(&arguments, "-i")); df.set_include_directory(ArgumentsPair::get_argument(&arguments, "-i", false));
} else { } else {
println!("FATAL ERROR: Parameter -i with set of included files is required."); println!("FATAL ERROR: Parameter -i with set of included files is required.");
process::exit(1); process::exit(1);
} }
if ArgumentsPair::has_command(&arguments, "-e") { if ArgumentsPair::has_command(&arguments, "-e") {
df.set_exclude_directory(ArgumentsPair::get_argument(&arguments, "-e")); df.set_exclude_directory(ArgumentsPair::get_argument(&arguments, "-e", false));
} }
if ArgumentsPair::has_command(&arguments, "-s") { if ArgumentsPair::has_command(&arguments, "-s") {
let min_size = match ArgumentsPair::get_argument(&arguments, "-s").parse::<u64>() { let min_size = match ArgumentsPair::get_argument(&arguments, "-s", false).parse::<u64>() {
Ok(t) => t, Ok(t) => t,
Err(_) => { Err(_) => {
println!("FATAL ERROR: \"{}\" is not valid file size(allowed range <0,u64::max>)", ArgumentsPair::get_argument(&arguments, "-s")); println!(
"FATAL ERROR: \"{}\" is not valid file size(allowed range <0,u64::max>)",
ArgumentsPair::get_argument(&arguments, "-s", false)
);
process::exit(1); process::exit(1);
} }
}; };
@ -82,22 +86,47 @@ fn main() {
} }
if ArgumentsPair::has_command(&arguments, "-x") { if ArgumentsPair::has_command(&arguments, "-x") {
df.set_allowed_extensions(ArgumentsPair::get_argument(&arguments, "-x")); df.set_allowed_extensions(ArgumentsPair::get_argument(&arguments, "-x", false));
} }
if ArgumentsPair::has_command(&arguments, "-k") { if ArgumentsPair::has_command(&arguments, "-k") {
df.set_excluded_items(ArgumentsPair::get_argument(&arguments, "-k")); df.set_excluded_items(ArgumentsPair::get_argument(&arguments, "-k", false));
} }
if ArgumentsPair::has_command(&arguments, "-l") { if ArgumentsPair::has_command(&arguments, "-l") {
if ArgumentsPair::get_argument(&arguments, "-l").to_lowercase() == "size" { let argument_name = ArgumentsPair::get_argument(&arguments, "-l", false).to_lowercase();
if argument_name == "size" {
check_method = duplicate::CheckingMethod::SIZE; check_method = duplicate::CheckingMethod::SIZE;
} else if ArgumentsPair::get_argument(&arguments, "-l").to_lowercase() == "hash" { } else if argument_name == "hash" {
check_method = duplicate::CheckingMethod::HASH; check_method = duplicate::CheckingMethod::HASH;
} else { } else {
println!("-l can only have values hash or size"); println!("-l can only have values hash or size");
process::exit(1); process::exit(1);
} }
} }
df.find_duplicates(check_method, ArgumentsPair::has_command(&arguments, "--delete"));
let mut delete_method: duplicate::DeleteMethod = duplicate::DeleteMethod::None;
if ArgumentsPair::has_command(&arguments, "-delete") {
delete_method = duplicate::DeleteMethod::AllExceptOldest;
let argument_name = ArgumentsPair::get_argument(&arguments, "-delete", true).to_lowercase();
if argument_name == "aen" {
delete_method = duplicate::DeleteMethod::AllExceptNewest;
} else if argument_name == "aeo" {
delete_method = duplicate::DeleteMethod::AllExceptOldest;
} else if argument_name == "on" {
delete_method = duplicate::DeleteMethod::OneNewest;
} else if argument_name == "oo" {
delete_method = duplicate::DeleteMethod::OneOldest;
} else if argument_name == "" {
// Nothing to do choosing default one
} else {
println!(
"Invalid argument {} for command -delete, available arguments - aen(All except newest one), aeo(All except oldest one), on(Only one newest), oo(Only one oldest)",
argument_name
);
process::exit(1);
}
}
df.find_duplicates(&check_method, &delete_method);
} }
"--h" | "--help" => { "--h" | "--help" => {
print_help(); print_help();
@ -110,36 +139,36 @@ fn main() {
} }
fn print_help() { fn print_help() {
println!();
println!("Usage of Czkawka:");
println!("czkawka <option> <>");
println!("# Main arguments:");
println!(" --h / --help - prints help, also works without any arguments");
println!(" Usage example:");
println!(" czkawka --help");
println!(" czkawka");
println!();
println!( println!(
" --d - <-i directory_to_search> [-e exclude_directories = \"\"] [-s min_size = 1024] [-x allowed_extension = \"\"] [-l type_of_search = \"hash\"] [--delete] - search for duplicates files" r###"
Usage of Czkawka:
## Main arguments:
--h / --help - prints help, also works without any arguments
Usage example:
czkawka --help
czkawka
--d - <-i directory_to_search> [-e exclude_directories = ""] [-s min_size = 1024] [-x allowed_extension = ""] [-l type_of_search = "hash"] [-delete = all_] - search for duplicates files
-i directory_to_search - list of directories which should will be searched like /home/rafal
-e exclude_directories - list of directories which will be excluded from search.
-s min_size - minimum size of checked files in bytes, assigning bigger value may speed up searching.
-x allowed_extension - list of checked extension, e.g. "jpg,mp4" will allow to check "book.jpg" and "car.mp4" but not roman.png.There are also helpful macros which allow to easy use a typcal extension like IMAGE("jpg,kra,gif,png,bmp,tiff,webp,hdr,svg") or TEXT("txt,doc,docx,odt,rtf")
-k type_of_search - allows to use fastest which takes into account only size, and more accurate which check if file contnet is same(hashes).
-delete - removing file except one.
Usage example:
czkawka --d -i "/home/rafal/,/home/szczekacz" -e "/home/rafal/Pulpit,/home/rafal/Obrazy" -s 25 -x "7z,rar,IMAGE" -k "size" -delete
czkawka --d -i "/etc/,/mnt/Miecz" -s 1000 -x "VIDEO" -k "hash"
czkawka --d -i "/etc/" --delete
--e - option to find and delete empty folders
"###
); );
println!(" -i directory_to_search - list of directories which should will be searched like /home/rafal");
println!(" -e exclude_directories - list of directories which will be excluded from search.");
println!(" -s min_size - minimum size of checked files in bytes, assigning bigger value may speed up searching.");
println!(" -x allowed_extension - list of checked extension, e.g. \"jpg,mp4\" will allow to check \"book.jpg\" and \"car.mp4\" but not roman.png.There are also helpful macros which allow to easy use a typcal extension like IMAGE(\"jpg,kra,gif,png,bmp,tiff,webp,hdr,svg\") or TEXT(\"txt,doc,docx,odt,rtf\")");
println!(" -k type_of_search - allows to use fastest which takes into account only size, and more accurate which check if file contnet is same(hashes).");
println!(" -delete - removing file except one.");
println!(" Usage example:");
println!(" czkawka --d -i \"/home/rafal/,/home/szczekacz\" -e \"/home/rafal/Pulpit,/home/rafal/Obrazy\" -s 25 -x \"7z,rar,IMAGE\" -k \"size\" -delete");
println!(" czkawka --d -i \"/etc/,/mnt/Miecz\" -s 1000 -x \"VIDEO\" -k \"hash\"");
println!(" czkawka --d -i \"/etc/\" --delete");
println!();
println!(" --e - option to find and delete empty folders");
println!();
} }
struct ArgumentsPair { struct ArgumentsPair {
command: String, command: String,
argument: String, argument: Option<String>,
} }
impl ArgumentsPair { impl ArgumentsPair {
@ -151,10 +180,17 @@ impl ArgumentsPair {
} }
false false
} }
pub fn get_argument(ar: &[ArgumentsPair], command: &str) -> String { pub fn get_argument(ar: &[ArgumentsPair], command: &str, can_be_empty: bool) -> String {
for a in ar { for a in ar {
if a.command == command { if a.command == command {
return a.argument.clone(); if !can_be_empty && a.argument == Option::None {
println!("FATAL ERROR: {} commands should have argument passed", command);
process::exit(1);
}
return match &a.argument {
Some(t) => t.clone(),
None => "".to_string(),
};
} }
} }
panic!("FATAL ERROR: Get argument should always return value"); panic!("FATAL ERROR: Get argument should always return value");