1
0
Fork 0
mirror of synced 2024-04-27 17:22:13 +12:00

Added support for excluded items with wildcard *

This commit is contained in:
Rafał Mikrut 2020-09-11 22:32:17 +02:00
parent a08b4549ba
commit 4b68330393
4 changed files with 172 additions and 47 deletions

View file

@ -1,3 +1,4 @@
use czkawka_core::duplicate::Info;
use czkawka_core::{duplicate, empty_folder};
use std::{env, process};
@ -132,37 +133,7 @@ fn main() {
df.find_duplicates(&check_method, &delete_method);
let info = df.get_infos();
if !info.messages.is_empty() {
println!("-------------------------------MESSAGES--------------------------------");
}
for i in &info.messages {
println!("{}", i);
}
if !info.messages.is_empty() {
println!("---------------------------END OF MESSAGES-----------------------------");
}
if !info.warnings.is_empty() {
println!("-------------------------------WARNINGS--------------------------------");
}
for i in &info.warnings {
println!("{}", i);
}
if !info.warnings.is_empty() {
println!("---------------------------END OF WARNINGS-----------------------------");
}
if !info.errors.is_empty() {
println!("--------------------------------ERRORS---------------------------------");
}
for i in &info.errors {
println!("{}", i);
}
if !info.errors.is_empty() {
println!("----------------------------END OF ERRORS------------------------------");
}
print_infos(df.get_infos());
}
"--h" | "--help" => {
print_help();
@ -205,16 +176,18 @@ Usage of Czkawka:
czkawka --help
czkawka
--d <-i directory_to_search> [-e exclude_directories = ""] [-s min_size = 1024] [-x allowed_extension = ""] [-l type_of_search = "hash"] [-delete = "aeo"] - search for duplicates files
--d <-i directory_to_search> [-e exclude_directories = ""] [-k excluded_items = ""] [-s min_size = 1024] [-x allowed_extension = ""] [-l type_of_search = "hash"] [-delete = "aeo"] - search for duplicates files
-i directory_to_search - list of directories which should will be searched like /home/rafal
-e exclude_directories - list of directories which will be excluded from search.
-k excluded_items - list of excluded items which contains * wildcard(may be slow)
-s min_size - minimum size of checked files in bytes, assigning bigger value may speed up searching.
-x allowed_extension - list of checked extension, e.g. "jpg,mp4" will allow to check "book.jpg" and "car.mp4" but not roman.png.There are also helpful macros which allow to easy use a typcal extension like IMAGE("jpg,kra,gif,png,bmp,tiff,webp,hdr,svg") or TEXT("txt,doc,docx,odt,rtf")
-x allowed_extension - list of checked extension, e.g. "jpg,mp4" will allow to check "book.jpg" and "car.mp4" but not roman.png. There are also helpful macros which allow to easy use a typcal extension like IMAGE("jpg,kra,gif,png,bmp,tiff,webp,hdr,svg") or TEXT("txt,doc,docx,odt,rtf")
-l type_of_search - allows to use fastest which takes into account only size, and more accurate which check if file contnet is same(hashes).
-delete - delete found files, by default remove all except the most oldest one, it can take arguments: aen(All except newest one), aeo(All except oldest one), on(Only one newest), oo(Only one oldest)
Usage example:
czkawka --d -i "/home/rafal/,/home/szczekacz" -e "/home/rafal/Pulpit,/home/rafal/Obrazy" -s 25 -x "7z,rar,IMAGE" -l "size" -delete
czkawka --d -i "/etc/,/mnt/Miecz" -s 1000 -x "VIDEO" -l "hash"
czkawka --d -i "/var/" -k "/var/l*b/,/var/lo*,*tmp"
czkawka --d -i "/etc/" -delete "aeo"
--e <-i directory_to_search> [-e exclude_directories = ""] [-delete] - option to find and delete empty folders
@ -225,6 +198,38 @@ Usage of Czkawka:
"###
);
}
/// Printing infos about warnings, messages and errors
fn print_infos(infos: &Info) {
if !infos.messages.is_empty() {
println!("-------------------------------MESSAGES--------------------------------");
}
for i in &infos.messages {
println!("{}", i);
}
if !infos.messages.is_empty() {
println!("---------------------------END OF MESSAGES-----------------------------");
}
if !infos.warnings.is_empty() {
println!("-------------------------------WARNINGS--------------------------------");
}
for i in &infos.warnings {
println!("{}", i);
}
if !infos.warnings.is_empty() {
println!("---------------------------END OF WARNINGS-----------------------------");
}
if !infos.errors.is_empty() {
println!("--------------------------------ERRORS---------------------------------");
}
for i in &infos.errors {
println!("{}", i);
}
if !infos.errors.is_empty() {
println!("----------------------------END OF ERRORS------------------------------");
}
}
struct ArgumentsPair {
command: String,

View file

@ -5,7 +5,6 @@ authors = ["Rafał Mikrut <mikrutrafal54@gmail.com>"]
edition = "2018"
[dependencies]
humansize = "1.1.0"
humansize = "1"
blake3 = "0.3.6"
#rayon = "1.4.0"
#regex = "1.3.9"
#rayon = "1"

View file

@ -5,9 +5,85 @@ use std::time::SystemTime;
pub struct Common();
impl Common {
pub fn print_time(start_time: SystemTime, end_time: SystemTime, function_name: String) {
if true {
if false {
return;
}
println!("Execution of function \"{}\" took {:?}", function_name, end_time.duration_since(start_time).expect("Time cannot go reverse."));
}
/// Function to check if directory match expression
pub fn regex_check(expression: &str, directory: &str) -> bool {
if !expression.contains('*') {
println!("Expression should have *");
return false;
}
let temp_splits: Vec<&str> = expression.split('*').collect();
let mut splits: Vec<&str> = Vec::new();
for i in temp_splits {
if i != "" {
splits.push(i);
}
}
if splits.is_empty() {
return false;
}
// Early checking if directory contains all parts needed by expression
for split in &splits {
if !directory.contains(split) {
return false;
}
}
let mut position_of_splits: Vec<usize> = Vec::new();
// `git*` shouldn't be true for `/gitsfafasfs`
if !expression.starts_with('*') && directory.find(&splits[0]).unwrap() > 0 {
return false;
}
// `*home` shouldn't be true for `/homeowner`
if !expression.ends_with('*') && !directory.ends_with(splits.last().unwrap()) {
// && !directory.ends_with(&(splits.last().unwrap().to_string() + "/")){
return false;
}
// At the end we check if parts between * are correctly positioned
position_of_splits.push(directory.find(&splits[0]).unwrap());
let mut current_index: usize;
let mut found_index: usize;
for i in splits[1..].iter().enumerate() {
current_index = *position_of_splits.get(i.0).unwrap() + i.1.len();
found_index = match directory[current_index..].find(i.1) {
Some(t) => t,
None => return false,
};
position_of_splits.push(found_index + current_index);
}
true
}
}
#[cfg(test)]
mod test {
use crate::common::Common;
#[test]
fn test_regex() {
assert!(Common::regex_check("*home*", "/home/rafal"));
assert!(Common::regex_check("*home", "/home"));
assert!(Common::regex_check("*home/", "/home/"));
assert!(Common::regex_check("*home/*", "/home/"));
assert!(Common::regex_check("*.git*", "/home/.git"));
assert!(Common::regex_check("*/home/rafal*rafal*rafal*rafal*", "/home/rafal/rafalrafalrafal"));
assert!(!Common::regex_check("*home", "/home/"));
assert!(!Common::regex_check("*home", "/homefasfasfasfasf/"));
assert!(!Common::regex_check("*home", "/homefasfasfasfasf"));
assert!(!Common::regex_check("rafal*afal*fal", "rafal"));
assert!(!Common::regex_check("AAAAAAAA****", "/AAAAAAAAAAAAAAAAA"));
assert!(!Common::regex_check("*.git/*", "/home/.git"));
assert!(!Common::regex_check("*home/*koc", "/koc/home/"));
assert!(!Common::regex_check("*home/", "/home"));
assert!(!Common::regex_check("*TTT", "/GGG"));
assert!(!Common::regex_check("AAA", "AAA"));
}
}

View file

@ -1,4 +1,3 @@
// TODO when using GUI all or most println!() should be used as variables passed by argument
use humansize::{file_size_opts as options, FileSize};
use std::collections::{BTreeMap, HashMap};
use std::fs;
@ -38,7 +37,7 @@ pub struct DuplicateFinder {
files_with_identical_size: HashMap<u64, Vec<FileEntry>>,
files_with_identical_hashes: BTreeMap<u64, Vec<Vec<FileEntry>>>,
allowed_extensions: Vec<String>, // jpg, jpeg, mp4
// excluded_items: Vec<String>, // TODO, support for e.g. */.git/*
excluded_items: Vec<String>, // TODO, support for e.g. */.git/*
excluded_directories: Vec<String>,
included_directories: Vec<String>,
min_file_size: u64,
@ -90,7 +89,7 @@ impl DuplicateFinder {
infos: Info::new(),
files_with_identical_size: Default::default(),
files_with_identical_hashes: Default::default(),
// excluded_items: vec![],
excluded_items: vec![],
excluded_directories: vec![],
included_directories: vec![],
min_file_size: 1024,
@ -117,9 +116,32 @@ impl DuplicateFinder {
self.min_file_size = min_size;
}
pub fn set_excluded_items(&mut self, _excluded_items: String) {
// TODO Still don't know how to exactly parse this
// Things like /.git/ should be by default hidden with help of this *.git*
pub fn set_excluded_items(&mut self, mut excluded_items: String) {
// let start_time: SystemTime = SystemTime::now();
if excluded_items.is_empty() {
return;
}
excluded_items = excluded_items.replace("\"", "");
let expressions: Vec<String> = excluded_items.split(',').map(String::from).collect();
let mut checked_expressions: Vec<String> = Vec::new();
for expression in expressions {
let expression: String = expression.trim().to_string();
if expression == "" {
continue;
}
if !expression.contains('*') {
self.infos.warnings.push("Excluded Items Warning: Wildcard * is required in expression, ignoring ".to_string() + &*expression);
continue;
}
checked_expressions.push(expression);
}
self.excluded_items = checked_expressions;
}
pub fn set_allowed_extensions(&mut self, mut allowed_extensions: String) {
if allowed_extensions.is_empty() {
@ -320,6 +342,7 @@ impl DuplicateFinder {
let mut is_excluded_dir = false;
next_folder = "".to_owned() + &current_folder + &entry_data.file_name().into_string().unwrap() + "/";
for ed in &self.excluded_directories {
if next_folder == *ed {
is_excluded_dir = true;
@ -327,6 +350,16 @@ impl DuplicateFinder {
}
}
if !is_excluded_dir {
let mut found_expression: bool = false;
for expression in &self.excluded_items {
if Common::regex_check(expression, &next_folder) {
found_expression = true;
break;
}
}
if found_expression {
break;
}
folders_to_check.push(next_folder);
}
self.infos.number_of_checked_folders += 1;
@ -334,6 +367,7 @@ impl DuplicateFinder {
let mut have_valid_extension: bool;
let file_name_lowercase: String = entry_data.file_name().into_string().unwrap().to_lowercase();
// Checking allowed extensions
if !self.allowed_extensions.is_empty() {
have_valid_extension = false;
for i in &self.allowed_extensions {
@ -346,9 +380,23 @@ impl DuplicateFinder {
have_valid_extension = true;
}
// Checking files
if metadata.len() >= self.min_file_size && have_valid_extension {
let current_file_name = "".to_owned() + &current_folder + &entry_data.file_name().into_string().unwrap();
// Checking expressions
let mut found_expression: bool = false;
for expression in &self.excluded_items {
if Common::regex_check(expression, &current_file_name) {
found_expression = true;
break;
}
}
if found_expression {
break;
}
// Creating new file entry
let fe: FileEntry = FileEntry {
path: current_file_name.clone(),
size: metadata.len(),
@ -384,7 +432,7 @@ impl DuplicateFinder {
Common::print_time(start_time, SystemTime::now(), "check_files_size".to_string());
//println!("Duration of finding duplicates {:?}", end_time.duration_since(start_time).expect("a"));
}
// pub fn save_results_to_file(&self) {}
// pub fn save_results_to_file(&self) {} // TODO Saving results to files
/// Remove files which have unique size
fn remove_files_with_unique_size(&mut self) {
@ -541,9 +589,6 @@ impl DuplicateFinder {
Common::print_time(start_time, SystemTime::now(), "print_duplicated_entries".to_string());
}
/// Remove unused entries when included or excluded overlaps with each other or are duplicated
/// ```
// let df : DuplicateFinder = saf
/// ```
fn optimize_directories(&mut self) -> bool {
let start_time: SystemTime = SystemTime::now();