1
0
Fork 0
mirror of synced 2024-04-26 08:42:07 +12:00

Added several options to delete files in different ways.

This commit is contained in:
Rafał Mikrut 2020-08-31 16:15:56 +02:00
parent 1095d719d0
commit bbf3606f4f
2 changed files with 188 additions and 95 deletions

View file

@ -4,7 +4,7 @@ use std::collections::{BTreeMap, HashMap};
use std::fs::{File, Metadata};
use std::io::prelude::*;
use std::path::Path;
use std::time::SystemTime;
use std::time::{SystemTime, UNIX_EPOCH};
use std::{fs, process};
#[derive(PartialEq)]
@ -15,13 +15,21 @@ pub enum CheckingMethod {
// TODO
#[allow(dead_code)]
pub enum TypeOfDelete {
AllExceptRandom, // Choose one random file from duplicates which won't be deleted
#[derive(Eq, PartialEq)]
pub enum DeleteMethod {
None,
AllExceptNewest,
AllExceptOldest,
OneOldest,
OneNewest,
OneRandom
}
#[derive(Clone)]
struct FileEntry {
pub path: String,
pub size: u64,
pub created_date: SystemTime,
pub modified_date: SystemTime,
}
pub struct DuplicateFinder {
@ -59,19 +67,17 @@ impl DuplicateFinder {
}
}
pub fn find_duplicates(mut self, check_method: CheckingMethod, delete_files: bool) {
pub fn find_duplicates(mut self, check_method: &CheckingMethod, delete_method: &DeleteMethod) {
self.optimize_directories();
self.debug_print();
self.check_files_size();
self.remove_files_with_unique_size();
if check_method == CheckingMethod::HASH {
if *check_method == CheckingMethod::HASH {
self.check_files_hash();
}
self.calculate_lost_space(&check_method);
self.print_duplicated_entries(&check_method);
if delete_files {
self.delete_files(&check_method);
}
self.calculate_lost_space(check_method);
self.print_duplicated_entries(check_method);
self.delete_files(check_method, delete_method);
}
pub fn set_min_file_size(&mut self, min_size: u64) {
@ -610,34 +616,23 @@ impl DuplicateFinder {
DuplicateFinder::print_time(start_time, SystemTime::now(), "optimize_directories".to_string());
}
fn delete_files(&mut self, check_method: &CheckingMethod) {
fn delete_files(&mut self, check_method: &CheckingMethod, delete_method: &DeleteMethod) {
if *delete_method == DeleteMethod::None {
return;
}
let start_time: SystemTime = SystemTime::now();
let mut errors: Vec<String> = Vec::new();
match check_method {
CheckingMethod::HASH => {
for entry in &self.files_with_identical_hashes {
for vector in entry.1 {
for files in vector.iter().enumerate() {
if files.0 != 0 {
match fs::remove_file(&files.1.path) {
Ok(_) => (),
Err(_) => errors.push(files.1.path.clone()),
};
}
}
delete_files(&vector, &delete_method, &mut errors);
}
}
}
CheckingMethod::SIZE => {
for entry in &self.files_with_identical_size {
for files in entry.1.iter().enumerate() {
if files.0 != 0 {
match fs::remove_file(&files.1.path) {
Ok(_) => (),
Err(_) => errors.push(files.1.path.clone()),
};
}
}
delete_files(&entry.1, &delete_method, &mut errors);
}
}
}
@ -647,11 +642,73 @@ impl DuplicateFinder {
DuplicateFinder::print_time(start_time, SystemTime::now(), "delete_files".to_string());
}
}
#[derive(Clone)]
struct FileEntry {
pub path: String,
pub size: u64,
pub created_date: SystemTime,
pub modified_date: SystemTime,
fn delete_files(vector: &[FileEntry], delete_method: &DeleteMethod, errors: &mut Vec<String>) {
assert!(vector.len() > 1, "Vector length must be bigger than 1(This should be done in previous steps).");
let mut q_index: usize = 0;
let mut q_time: u64 = 0;
match delete_method {
DeleteMethod::OneOldest => {
for files in vector.iter().enumerate() {
let time_since_epoch = files.1.created_date.duration_since(UNIX_EPOCH).expect("Invalid file date").as_secs();
if q_time == 0 || q_time > time_since_epoch {
q_time = time_since_epoch;
q_index = files.0;
}
}
match fs::remove_file(vector[q_index].path.clone()) {
Ok(_) => (),
Err(_) => errors.push(vector[q_index].path.clone()),
};
}
DeleteMethod::OneNewest => {
for files in vector.iter().enumerate() {
let time_since_epoch = files.1.created_date.duration_since(UNIX_EPOCH).expect("Invalid file date").as_secs();
if q_time == 0 || q_time < time_since_epoch {
q_time = time_since_epoch;
q_index = files.0;
}
}
match fs::remove_file(vector[q_index].path.clone()) {
Ok(_) => (),
Err(_) => errors.push(vector[q_index].path.clone()),
};
}
DeleteMethod::AllExceptOldest => {
for files in vector.iter().enumerate() {
let time_since_epoch = files.1.created_date.duration_since(UNIX_EPOCH).expect("Invalid file date").as_secs();
if q_time == 0 || q_time > time_since_epoch {
q_time = time_since_epoch;
q_index = files.0;
}
}
for files in vector.iter().enumerate() {
if q_index != files.0 {
match fs::remove_file(vector[files.0].path.clone()) {
Ok(_) => (),
Err(_) => errors.push(vector[files.0].path.clone()),
};
}
}
}
DeleteMethod::AllExceptNewest => {
for files in vector.iter().enumerate() {
let time_since_epoch = files.1.created_date.duration_since(UNIX_EPOCH).expect("Invalid file date").as_secs();
if q_time == 0 || q_time < time_since_epoch {
q_time = time_since_epoch;
q_index = files.0;
}
}
for files in vector.iter().enumerate() {
if q_index != files.0 {
match fs::remove_file(vector[files.0].path.clone()) {
Ok(_) => (),
Err(_) => errors.push(vector[files.0].path.clone()),
};
}
}
}
DeleteMethod::None => {
panic!();
}
};
}

View file

@ -18,37 +18,38 @@ fn main() {
// Assigning commands with arguments
let mut arguments: Vec<ArgumentsPair> = Vec::new();
let mut can_pass_argument: bool = false;
for argument in 0..all_arguments.len() {
if all_arguments[argument].starts_with("--") {
commands_arguments.push(all_arguments[argument].clone());
} else if all_arguments[argument].starts_with('-') {
if argument + 1 < all_arguments.len() {
if all_arguments[argument + 1].starts_with("--") || all_arguments[argument + 1].starts_with('-') {
println!("FATAL ERROR: Missing argument for {}", all_arguments[argument]);
process::exit(1);
} else {
let a: ArgumentsPair = ArgumentsPair {
command: all_arguments[argument].clone(),
argument: all_arguments[argument + 1].clone(),
};
arguments.push(a);
can_pass_argument = true;
}
} else {
println!("FATAL ERROR: Missing argument for {}", all_arguments[argument]);
for argument in all_arguments {
if argument.starts_with("--") {
commands_arguments.push(argument);
} else if argument.starts_with('-') {
let a: ArgumentsPair = ArgumentsPair {
command: argument,
argument: Option::None,
};
arguments.push(a);
} else {
if arguments.is_empty() {
println!("FATAL ERROR: Trying to use {} without any arguments(like -i -e -delete)", argument);
process::exit(1);
}
} else if !can_pass_argument {
println!("FATAL ERROR: Argument \"{}\" is not linked to any command", all_arguments[argument]);
process::exit(1);
} else {
can_pass_argument = false;
if arguments[arguments.len() - 1].argument != Option::None {
println!("FATAL ERROR: Trying set second parameter {}, but only one is supported", argument); // This may be changed in future to support 2 or more attributes with space
process::exit(1);
}
let last_element = arguments.len() - 1;
arguments[last_element].argument = Option::from(argument);
}
}
for a in &arguments {
println!("Argument number {} - {}", a.command, a.argument);
println!(
"Argument number {} - {}",
a.command,
match &a.argument {
Some(t) => t.clone(),
None => "MISSING_ARGUMENT".to_string(),
}
);
}
if commands_arguments.is_empty() {
@ -61,20 +62,23 @@ fn main() {
let mut check_method: duplicate::CheckingMethod = duplicate::CheckingMethod::HASH;
if ArgumentsPair::has_command(&arguments, "-i") {
df.set_include_directory(ArgumentsPair::get_argument(&arguments, "-i"));
df.set_include_directory(ArgumentsPair::get_argument(&arguments, "-i", false));
} else {
println!("FATAL ERROR: Parameter -i with set of included files is required.");
process::exit(1);
}
if ArgumentsPair::has_command(&arguments, "-e") {
df.set_exclude_directory(ArgumentsPair::get_argument(&arguments, "-e"));
df.set_exclude_directory(ArgumentsPair::get_argument(&arguments, "-e", false));
}
if ArgumentsPair::has_command(&arguments, "-s") {
let min_size = match ArgumentsPair::get_argument(&arguments, "-s").parse::<u64>() {
let min_size = match ArgumentsPair::get_argument(&arguments, "-s", false).parse::<u64>() {
Ok(t) => t,
Err(_) => {
println!("FATAL ERROR: \"{}\" is not valid file size(allowed range <0,u64::max>)", ArgumentsPair::get_argument(&arguments, "-s"));
println!(
"FATAL ERROR: \"{}\" is not valid file size(allowed range <0,u64::max>)",
ArgumentsPair::get_argument(&arguments, "-s", false)
);
process::exit(1);
}
};
@ -82,22 +86,47 @@ fn main() {
}
if ArgumentsPair::has_command(&arguments, "-x") {
df.set_allowed_extensions(ArgumentsPair::get_argument(&arguments, "-x"));
df.set_allowed_extensions(ArgumentsPair::get_argument(&arguments, "-x", false));
}
if ArgumentsPair::has_command(&arguments, "-k") {
df.set_excluded_items(ArgumentsPair::get_argument(&arguments, "-k"));
df.set_excluded_items(ArgumentsPair::get_argument(&arguments, "-k", false));
}
if ArgumentsPair::has_command(&arguments, "-l") {
if ArgumentsPair::get_argument(&arguments, "-l").to_lowercase() == "size" {
let argument_name = ArgumentsPair::get_argument(&arguments, "-l", false).to_lowercase();
if argument_name == "size" {
check_method = duplicate::CheckingMethod::SIZE;
} else if ArgumentsPair::get_argument(&arguments, "-l").to_lowercase() == "hash" {
} else if argument_name == "hash" {
check_method = duplicate::CheckingMethod::HASH;
} else {
println!("-l can only have values hash or size");
process::exit(1);
}
}
df.find_duplicates(check_method, ArgumentsPair::has_command(&arguments, "--delete"));
let mut delete_method: duplicate::DeleteMethod = duplicate::DeleteMethod::None;
if ArgumentsPair::has_command(&arguments, "-delete") {
delete_method = duplicate::DeleteMethod::AllExceptOldest;
let argument_name = ArgumentsPair::get_argument(&arguments, "-delete", true).to_lowercase();
if argument_name == "aen" {
delete_method = duplicate::DeleteMethod::AllExceptNewest;
} else if argument_name == "aeo" {
delete_method = duplicate::DeleteMethod::AllExceptOldest;
} else if argument_name == "on" {
delete_method = duplicate::DeleteMethod::OneNewest;
} else if argument_name == "oo" {
delete_method = duplicate::DeleteMethod::OneOldest;
} else if argument_name == "" {
// Nothing to do choosing default one
} else {
println!(
"Invalid argument {} for command -delete, available arguments - aen(All except newest one), aeo(All except oldest one), on(Only one newest), oo(Only one oldest)",
argument_name
);
process::exit(1);
}
}
df.find_duplicates(&check_method, &delete_method);
}
"--h" | "--help" => {
print_help();
@ -110,36 +139,36 @@ fn main() {
}
fn print_help() {
println!();
println!("Usage of Czkawka:");
println!("czkawka <option> <>");
println!("# Main arguments:");
println!(" --h / --help - prints help, also works without any arguments");
println!(" Usage example:");
println!(" czkawka --help");
println!(" czkawka");
println!();
println!(
" --d - <-i directory_to_search> [-e exclude_directories = \"\"] [-s min_size = 1024] [-x allowed_extension = \"\"] [-l type_of_search = \"hash\"] [--delete] - search for duplicates files"
r###"
Usage of Czkawka:
## Main arguments:
--h / --help - prints help, also works without any arguments
Usage example:
czkawka --help
czkawka
--d - <-i directory_to_search> [-e exclude_directories = ""] [-s min_size = 1024] [-x allowed_extension = ""] [-l type_of_search = "hash"] [-delete = all_] - search for duplicates files
-i directory_to_search - list of directories which should will be searched like /home/rafal
-e exclude_directories - list of directories which will be excluded from search.
-s min_size - minimum size of checked files in bytes, assigning bigger value may speed up searching.
-x allowed_extension - list of checked extension, e.g. "jpg,mp4" will allow to check "book.jpg" and "car.mp4" but not roman.png.There are also helpful macros which allow to easy use a typcal extension like IMAGE("jpg,kra,gif,png,bmp,tiff,webp,hdr,svg") or TEXT("txt,doc,docx,odt,rtf")
-k type_of_search - allows to use fastest which takes into account only size, and more accurate which check if file contnet is same(hashes).
-delete - removing file except one.
Usage example:
czkawka --d -i "/home/rafal/,/home/szczekacz" -e "/home/rafal/Pulpit,/home/rafal/Obrazy" -s 25 -x "7z,rar,IMAGE" -k "size" -delete
czkawka --d -i "/etc/,/mnt/Miecz" -s 1000 -x "VIDEO" -k "hash"
czkawka --d -i "/etc/" --delete
--e - option to find and delete empty folders
"###
);
println!(" -i directory_to_search - list of directories which should will be searched like /home/rafal");
println!(" -e exclude_directories - list of directories which will be excluded from search.");
println!(" -s min_size - minimum size of checked files in bytes, assigning bigger value may speed up searching.");
println!(" -x allowed_extension - list of checked extension, e.g. \"jpg,mp4\" will allow to check \"book.jpg\" and \"car.mp4\" but not roman.png.There are also helpful macros which allow to easy use a typcal extension like IMAGE(\"jpg,kra,gif,png,bmp,tiff,webp,hdr,svg\") or TEXT(\"txt,doc,docx,odt,rtf\")");
println!(" -k type_of_search - allows to use fastest which takes into account only size, and more accurate which check if file contnet is same(hashes).");
println!(" -delete - removing file except one.");
println!(" Usage example:");
println!(" czkawka --d -i \"/home/rafal/,/home/szczekacz\" -e \"/home/rafal/Pulpit,/home/rafal/Obrazy\" -s 25 -x \"7z,rar,IMAGE\" -k \"size\" -delete");
println!(" czkawka --d -i \"/etc/,/mnt/Miecz\" -s 1000 -x \"VIDEO\" -k \"hash\"");
println!(" czkawka --d -i \"/etc/\" --delete");
println!();
println!(" --e - option to find and delete empty folders");
println!();
}
struct ArgumentsPair {
command: String,
argument: String,
argument: Option<String>,
}
impl ArgumentsPair {
@ -151,10 +180,17 @@ impl ArgumentsPair {
}
false
}
pub fn get_argument(ar: &[ArgumentsPair], command: &str) -> String {
pub fn get_argument(ar: &[ArgumentsPair], command: &str, can_be_empty: bool) -> String {
for a in ar {
if a.command == command {
return a.argument.clone();
if !can_be_empty && a.argument == Option::None {
println!("FATAL ERROR: {} commands should have argument passed", command);
process::exit(1);
}
return match &a.argument {
Some(t) => t.clone(),
None => "".to_string(),
};
}
}
panic!("FATAL ERROR: Get argument should always return value");