Added support for excluded items with wildcard *
This commit is contained in:
parent
a08b4549ba
commit
4b68330393
|
@ -1,3 +1,4 @@
|
|||
use czkawka_core::duplicate::Info;
|
||||
use czkawka_core::{duplicate, empty_folder};
|
||||
use std::{env, process};
|
||||
|
||||
|
@ -132,37 +133,7 @@ fn main() {
|
|||
|
||||
df.find_duplicates(&check_method, &delete_method);
|
||||
|
||||
let info = df.get_infos();
|
||||
|
||||
if !info.messages.is_empty() {
|
||||
println!("-------------------------------MESSAGES--------------------------------");
|
||||
}
|
||||
for i in &info.messages {
|
||||
println!("{}", i);
|
||||
}
|
||||
if !info.messages.is_empty() {
|
||||
println!("---------------------------END OF MESSAGES-----------------------------");
|
||||
}
|
||||
|
||||
if !info.warnings.is_empty() {
|
||||
println!("-------------------------------WARNINGS--------------------------------");
|
||||
}
|
||||
for i in &info.warnings {
|
||||
println!("{}", i);
|
||||
}
|
||||
if !info.warnings.is_empty() {
|
||||
println!("---------------------------END OF WARNINGS-----------------------------");
|
||||
}
|
||||
|
||||
if !info.errors.is_empty() {
|
||||
println!("--------------------------------ERRORS---------------------------------");
|
||||
}
|
||||
for i in &info.errors {
|
||||
println!("{}", i);
|
||||
}
|
||||
if !info.errors.is_empty() {
|
||||
println!("----------------------------END OF ERRORS------------------------------");
|
||||
}
|
||||
print_infos(df.get_infos());
|
||||
}
|
||||
"--h" | "--help" => {
|
||||
print_help();
|
||||
|
@ -205,16 +176,18 @@ Usage of Czkawka:
|
|||
czkawka --help
|
||||
czkawka
|
||||
|
||||
--d <-i directory_to_search> [-e exclude_directories = ""] [-s min_size = 1024] [-x allowed_extension = ""] [-l type_of_search = "hash"] [-delete = "aeo"] - search for duplicates files
|
||||
--d <-i directory_to_search> [-e exclude_directories = ""] [-k excluded_items = ""] [-s min_size = 1024] [-x allowed_extension = ""] [-l type_of_search = "hash"] [-delete = "aeo"] - search for duplicates files
|
||||
-i directory_to_search - list of directories which should will be searched like /home/rafal
|
||||
-e exclude_directories - list of directories which will be excluded from search.
|
||||
-k excluded_items - list of excluded items which contains * wildcard(may be slow)
|
||||
-s min_size - minimum size of checked files in bytes, assigning bigger value may speed up searching.
|
||||
-x allowed_extension - list of checked extension, e.g. "jpg,mp4" will allow to check "book.jpg" and "car.mp4" but not roman.png.There are also helpful macros which allow to easy use a typcal extension like IMAGE("jpg,kra,gif,png,bmp,tiff,webp,hdr,svg") or TEXT("txt,doc,docx,odt,rtf")
|
||||
-x allowed_extension - list of checked extension, e.g. "jpg,mp4" will allow to check "book.jpg" and "car.mp4" but not roman.png. There are also helpful macros which allow to easy use a typcal extension like IMAGE("jpg,kra,gif,png,bmp,tiff,webp,hdr,svg") or TEXT("txt,doc,docx,odt,rtf")
|
||||
-l type_of_search - allows to use fastest which takes into account only size, and more accurate which check if file contnet is same(hashes).
|
||||
-delete - delete found files, by default remove all except the most oldest one, it can take arguments: aen(All except newest one), aeo(All except oldest one), on(Only one newest), oo(Only one oldest)
|
||||
Usage example:
|
||||
czkawka --d -i "/home/rafal/,/home/szczekacz" -e "/home/rafal/Pulpit,/home/rafal/Obrazy" -s 25 -x "7z,rar,IMAGE" -l "size" -delete
|
||||
czkawka --d -i "/etc/,/mnt/Miecz" -s 1000 -x "VIDEO" -l "hash"
|
||||
czkawka --d -i "/var/" -k "/var/l*b/,/var/lo*,*tmp"
|
||||
czkawka --d -i "/etc/" -delete "aeo"
|
||||
|
||||
--e <-i directory_to_search> [-e exclude_directories = ""] [-delete] - option to find and delete empty folders
|
||||
|
@ -225,6 +198,38 @@ Usage of Czkawka:
|
|||
"###
|
||||
);
|
||||
}
|
||||
/// Printing infos about warnings, messages and errors
|
||||
fn print_infos(infos: &Info) {
|
||||
if !infos.messages.is_empty() {
|
||||
println!("-------------------------------MESSAGES--------------------------------");
|
||||
}
|
||||
for i in &infos.messages {
|
||||
println!("{}", i);
|
||||
}
|
||||
if !infos.messages.is_empty() {
|
||||
println!("---------------------------END OF MESSAGES-----------------------------");
|
||||
}
|
||||
|
||||
if !infos.warnings.is_empty() {
|
||||
println!("-------------------------------WARNINGS--------------------------------");
|
||||
}
|
||||
for i in &infos.warnings {
|
||||
println!("{}", i);
|
||||
}
|
||||
if !infos.warnings.is_empty() {
|
||||
println!("---------------------------END OF WARNINGS-----------------------------");
|
||||
}
|
||||
|
||||
if !infos.errors.is_empty() {
|
||||
println!("--------------------------------ERRORS---------------------------------");
|
||||
}
|
||||
for i in &infos.errors {
|
||||
println!("{}", i);
|
||||
}
|
||||
if !infos.errors.is_empty() {
|
||||
println!("----------------------------END OF ERRORS------------------------------");
|
||||
}
|
||||
}
|
||||
|
||||
struct ArgumentsPair {
|
||||
command: String,
|
||||
|
|
|
@ -5,7 +5,6 @@ authors = ["Rafał Mikrut <mikrutrafal54@gmail.com>"]
|
|||
edition = "2018"
|
||||
|
||||
[dependencies]
|
||||
humansize = "1.1.0"
|
||||
humansize = "1"
|
||||
blake3 = "0.3.6"
|
||||
#rayon = "1.4.0"
|
||||
#regex = "1.3.9"
|
||||
#rayon = "1"
|
|
@ -5,9 +5,85 @@ use std::time::SystemTime;
|
|||
pub struct Common();
|
||||
impl Common {
|
||||
pub fn print_time(start_time: SystemTime, end_time: SystemTime, function_name: String) {
|
||||
if true {
|
||||
if false {
|
||||
return;
|
||||
}
|
||||
println!("Execution of function \"{}\" took {:?}", function_name, end_time.duration_since(start_time).expect("Time cannot go reverse."));
|
||||
}
|
||||
|
||||
/// Function to check if directory match expression
|
||||
pub fn regex_check(expression: &str, directory: &str) -> bool {
|
||||
if !expression.contains('*') {
|
||||
println!("Expression should have *");
|
||||
return false;
|
||||
}
|
||||
|
||||
let temp_splits: Vec<&str> = expression.split('*').collect();
|
||||
let mut splits: Vec<&str> = Vec::new();
|
||||
for i in temp_splits {
|
||||
if i != "" {
|
||||
splits.push(i);
|
||||
}
|
||||
}
|
||||
if splits.is_empty() {
|
||||
return false;
|
||||
}
|
||||
|
||||
// Early checking if directory contains all parts needed by expression
|
||||
for split in &splits {
|
||||
if !directory.contains(split) {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
let mut position_of_splits: Vec<usize> = Vec::new();
|
||||
|
||||
// `git*` shouldn't be true for `/gitsfafasfs`
|
||||
if !expression.starts_with('*') && directory.find(&splits[0]).unwrap() > 0 {
|
||||
return false;
|
||||
}
|
||||
// `*home` shouldn't be true for `/homeowner`
|
||||
if !expression.ends_with('*') && !directory.ends_with(splits.last().unwrap()) {
|
||||
// && !directory.ends_with(&(splits.last().unwrap().to_string() + "/")){
|
||||
return false;
|
||||
}
|
||||
|
||||
// At the end we check if parts between * are correctly positioned
|
||||
position_of_splits.push(directory.find(&splits[0]).unwrap());
|
||||
let mut current_index: usize;
|
||||
let mut found_index: usize;
|
||||
for i in splits[1..].iter().enumerate() {
|
||||
current_index = *position_of_splits.get(i.0).unwrap() + i.1.len();
|
||||
found_index = match directory[current_index..].find(i.1) {
|
||||
Some(t) => t,
|
||||
None => return false,
|
||||
};
|
||||
position_of_splits.push(found_index + current_index);
|
||||
}
|
||||
true
|
||||
}
|
||||
}
|
||||
#[cfg(test)]
|
||||
mod test {
|
||||
use crate::common::Common;
|
||||
|
||||
#[test]
|
||||
fn test_regex() {
|
||||
assert!(Common::regex_check("*home*", "/home/rafal"));
|
||||
assert!(Common::regex_check("*home", "/home"));
|
||||
assert!(Common::regex_check("*home/", "/home/"));
|
||||
assert!(Common::regex_check("*home/*", "/home/"));
|
||||
assert!(Common::regex_check("*.git*", "/home/.git"));
|
||||
assert!(Common::regex_check("*/home/rafal*rafal*rafal*rafal*", "/home/rafal/rafalrafalrafal"));
|
||||
assert!(!Common::regex_check("*home", "/home/"));
|
||||
assert!(!Common::regex_check("*home", "/homefasfasfasfasf/"));
|
||||
assert!(!Common::regex_check("*home", "/homefasfasfasfasf"));
|
||||
assert!(!Common::regex_check("rafal*afal*fal", "rafal"));
|
||||
assert!(!Common::regex_check("AAAAAAAA****", "/AAAAAAAAAAAAAAAAA"));
|
||||
assert!(!Common::regex_check("*.git/*", "/home/.git"));
|
||||
assert!(!Common::regex_check("*home/*koc", "/koc/home/"));
|
||||
assert!(!Common::regex_check("*home/", "/home"));
|
||||
assert!(!Common::regex_check("*TTT", "/GGG"));
|
||||
assert!(!Common::regex_check("AAA", "AAA"));
|
||||
}
|
||||
}
|
||||
|
|
|
@ -1,4 +1,3 @@
|
|||
// TODO when using GUI all or most println!() should be used as variables passed by argument
|
||||
use humansize::{file_size_opts as options, FileSize};
|
||||
use std::collections::{BTreeMap, HashMap};
|
||||
use std::fs;
|
||||
|
@ -38,7 +37,7 @@ pub struct DuplicateFinder {
|
|||
files_with_identical_size: HashMap<u64, Vec<FileEntry>>,
|
||||
files_with_identical_hashes: BTreeMap<u64, Vec<Vec<FileEntry>>>,
|
||||
allowed_extensions: Vec<String>, // jpg, jpeg, mp4
|
||||
// excluded_items: Vec<String>, // TODO, support for e.g. */.git/*
|
||||
excluded_items: Vec<String>, // TODO, support for e.g. */.git/*
|
||||
excluded_directories: Vec<String>,
|
||||
included_directories: Vec<String>,
|
||||
min_file_size: u64,
|
||||
|
@ -90,7 +89,7 @@ impl DuplicateFinder {
|
|||
infos: Info::new(),
|
||||
files_with_identical_size: Default::default(),
|
||||
files_with_identical_hashes: Default::default(),
|
||||
// excluded_items: vec![],
|
||||
excluded_items: vec![],
|
||||
excluded_directories: vec![],
|
||||
included_directories: vec![],
|
||||
min_file_size: 1024,
|
||||
|
@ -117,9 +116,32 @@ impl DuplicateFinder {
|
|||
self.min_file_size = min_size;
|
||||
}
|
||||
|
||||
pub fn set_excluded_items(&mut self, _excluded_items: String) {
|
||||
// TODO Still don't know how to exactly parse this
|
||||
// Things like /.git/ should be by default hidden with help of this *.git*
|
||||
pub fn set_excluded_items(&mut self, mut excluded_items: String) {
|
||||
// let start_time: SystemTime = SystemTime::now();
|
||||
|
||||
if excluded_items.is_empty() {
|
||||
return;
|
||||
}
|
||||
|
||||
excluded_items = excluded_items.replace("\"", "");
|
||||
let expressions: Vec<String> = excluded_items.split(',').map(String::from).collect();
|
||||
let mut checked_expressions: Vec<String> = Vec::new();
|
||||
|
||||
for expression in expressions {
|
||||
let expression: String = expression.trim().to_string();
|
||||
|
||||
if expression == "" {
|
||||
continue;
|
||||
}
|
||||
if !expression.contains('*') {
|
||||
self.infos.warnings.push("Excluded Items Warning: Wildcard * is required in expression, ignoring ".to_string() + &*expression);
|
||||
continue;
|
||||
}
|
||||
|
||||
checked_expressions.push(expression);
|
||||
}
|
||||
|
||||
self.excluded_items = checked_expressions;
|
||||
}
|
||||
pub fn set_allowed_extensions(&mut self, mut allowed_extensions: String) {
|
||||
if allowed_extensions.is_empty() {
|
||||
|
@ -320,6 +342,7 @@ impl DuplicateFinder {
|
|||
|
||||
let mut is_excluded_dir = false;
|
||||
next_folder = "".to_owned() + ¤t_folder + &entry_data.file_name().into_string().unwrap() + "/";
|
||||
|
||||
for ed in &self.excluded_directories {
|
||||
if next_folder == *ed {
|
||||
is_excluded_dir = true;
|
||||
|
@ -327,6 +350,16 @@ impl DuplicateFinder {
|
|||
}
|
||||
}
|
||||
if !is_excluded_dir {
|
||||
let mut found_expression: bool = false;
|
||||
for expression in &self.excluded_items {
|
||||
if Common::regex_check(expression, &next_folder) {
|
||||
found_expression = true;
|
||||
break;
|
||||
}
|
||||
}
|
||||
if found_expression {
|
||||
break;
|
||||
}
|
||||
folders_to_check.push(next_folder);
|
||||
}
|
||||
self.infos.number_of_checked_folders += 1;
|
||||
|
@ -334,6 +367,7 @@ impl DuplicateFinder {
|
|||
let mut have_valid_extension: bool;
|
||||
let file_name_lowercase: String = entry_data.file_name().into_string().unwrap().to_lowercase();
|
||||
|
||||
// Checking allowed extensions
|
||||
if !self.allowed_extensions.is_empty() {
|
||||
have_valid_extension = false;
|
||||
for i in &self.allowed_extensions {
|
||||
|
@ -346,9 +380,23 @@ impl DuplicateFinder {
|
|||
have_valid_extension = true;
|
||||
}
|
||||
|
||||
// Checking files
|
||||
if metadata.len() >= self.min_file_size && have_valid_extension {
|
||||
let current_file_name = "".to_owned() + ¤t_folder + &entry_data.file_name().into_string().unwrap();
|
||||
|
||||
// Checking expressions
|
||||
let mut found_expression: bool = false;
|
||||
for expression in &self.excluded_items {
|
||||
if Common::regex_check(expression, ¤t_file_name) {
|
||||
found_expression = true;
|
||||
break;
|
||||
}
|
||||
}
|
||||
if found_expression {
|
||||
break;
|
||||
}
|
||||
|
||||
// Creating new file entry
|
||||
let fe: FileEntry = FileEntry {
|
||||
path: current_file_name.clone(),
|
||||
size: metadata.len(),
|
||||
|
@ -384,7 +432,7 @@ impl DuplicateFinder {
|
|||
Common::print_time(start_time, SystemTime::now(), "check_files_size".to_string());
|
||||
//println!("Duration of finding duplicates {:?}", end_time.duration_since(start_time).expect("a"));
|
||||
}
|
||||
// pub fn save_results_to_file(&self) {}
|
||||
// pub fn save_results_to_file(&self) {} // TODO Saving results to files
|
||||
|
||||
/// Remove files which have unique size
|
||||
fn remove_files_with_unique_size(&mut self) {
|
||||
|
@ -541,9 +589,6 @@ impl DuplicateFinder {
|
|||
Common::print_time(start_time, SystemTime::now(), "print_duplicated_entries".to_string());
|
||||
}
|
||||
/// Remove unused entries when included or excluded overlaps with each other or are duplicated
|
||||
/// ```
|
||||
// let df : DuplicateFinder = saf
|
||||
/// ```
|
||||
fn optimize_directories(&mut self) -> bool {
|
||||
let start_time: SystemTime = SystemTime::now();
|
||||
|
||||
|
|
Loading…
Reference in a new issue