Added support for non recursive search

This commit is contained in:
Rafał Mikrut 2020-09-12 13:25:23 +02:00
parent 4b68330393
commit cff5ac93c4
7 changed files with 115 additions and 39 deletions

View File

@ -28,6 +28,7 @@ This is my first ever project in Rust so probably a lot of things are written in
- Maybe windows support, but this will need some refactoring in code
- Translation support
- Add support for fast searching based on checking only first ~1MB of file.
- Selecting different objects in
## Usage and requirements
Rustc 1.46 works fine(not sure about a minimal version)

View File

@ -96,6 +96,9 @@ fn main() {
if ArgumentsPair::has_command(&arguments, "-k") {
df.set_excluded_items(ArgumentsPair::get_argument(&arguments, "-k", false));
}
if ArgumentsPair::has_command(&arguments, "-o") {
df.set_recursive_search(false);
}
if ArgumentsPair::has_command(&arguments, "-l") {
let argument_name = ArgumentsPair::get_argument(&arguments, "-l", false).to_lowercase();
if argument_name == "size" {
@ -176,17 +179,18 @@ Usage of Czkawka:
czkawka --help
czkawka
--d <-i directory_to_search> [-e exclude_directories = ""] [-k excluded_items = ""] [-s min_size = 1024] [-x allowed_extension = ""] [-l type_of_search = "hash"] [-delete = "aeo"] - search for duplicates files
--d <-i directory_to_search> [-e exclude_directories = ""] [-k excluded_items = ""] [-s min_size = 1024] [-x allowed_extension = ""] [-l type_of_search = "hash"] [-o] [-delete = "aeo"] - search for duplicates files
-i directory_to_search - list of directories which should will be searched like /home/rafal
-e exclude_directories - list of directories which will be excluded from search.
-k excluded_items - list of excluded items which contains * wildcard(may be slow)
-o non_recursive - this options prevents from recursive check of folders
-s min_size - minimum size of checked files in bytes, assigning bigger value may speed up searching.
-x allowed_extension - list of checked extension, e.g. "jpg,mp4" will allow to check "book.jpg" and "car.mp4" but not roman.png. There are also helpful macros which allow to easy use a typcal extension like IMAGE("jpg,kra,gif,png,bmp,tiff,webp,hdr,svg") or TEXT("txt,doc,docx,odt,rtf")
-l type_of_search - allows to use fastest which takes into account only size, and more accurate which check if file contnet is same(hashes).
-delete - delete found files, by default remove all except the most oldest one, it can take arguments: aen(All except newest one), aeo(All except oldest one), on(Only one newest), oo(Only one oldest)
Usage example:
czkawka --d -i "/home/rafal/,/home/szczekacz" -e "/home/rafal/Pulpit,/home/rafal/Obrazy" -s 25 -x "7z,rar,IMAGE" -l "size" -delete
czkawka --d -i "/etc/,/mnt/Miecz" -s 1000 -x "VIDEO" -l "hash"
czkawka --d -i "/etc/,/mnt/Miecz" -s 1000 -x "VIDEO" -l "hash" -o
czkawka --d -i "/var/" -k "/var/l*b/,/var/lo*,*tmp"
czkawka --d -i "/etc/" -delete "aeo"

View File

@ -6,5 +6,5 @@ edition = "2018"
[dependencies]
humansize = "1"
blake3 = "0.3.6"
blake3 = "0.3"
#rayon = "1"

View File

@ -1,9 +1,12 @@
use std::fs;
use std::path::Path;
use std::time::SystemTime;
/// Class for common functions used across other class/functions
pub struct Common();
impl Common {
/// Printing time which took between start and stop point and prints also function name
pub fn print_time(start_time: SystemTime, end_time: SystemTime, function_name: String) {
if false {
return;
@ -11,6 +14,43 @@ impl Common {
println!("Execution of function \"{}\" took {:?}", function_name, end_time.duration_since(start_time).expect("Time cannot go reverse."));
}
pub fn delete_multiple_entries(entries: &[String]) -> Vec<String> {
let mut path: &Path;
let mut warnings: Vec<String> = Vec::new();
for entry in entries {
path = Path::new(entry);
if path.is_dir() {
match fs::remove_dir_all(&entry) {
Ok(_) => (),
Err(_) => warnings.push("Failed to remove folder ".to_owned() + entry.as_str()),
}
} else {
match fs::remove_file(&entry) {
Ok(_) => (),
Err(_) => warnings.push("Failed to remove file ".to_owned() + entry.as_str()),
}
}
}
warnings
}
pub fn delete_one_entry(entry: &str) -> String {
let path: &Path = Path::new(entry);
let mut warning: String = String::from("");
if path.is_dir() {
match fs::remove_dir_all(&entry) {
Ok(_) => (),
Err(_) => warning = "Failed to remove folder ".to_owned() + entry,
}
} else {
match fs::remove_file(&entry) {
Ok(_) => (),
Err(_) => warning = "Failed to remove file ".to_owned() + entry,
}
}
warning
}
/// Function to check if directory match expression
pub fn regex_check(expression: &str, directory: &str) -> bool {
if !expression.contains('*') {

View File

@ -36,10 +36,11 @@ pub struct DuplicateFinder {
infos: Info,
files_with_identical_size: HashMap<u64, Vec<FileEntry>>,
files_with_identical_hashes: BTreeMap<u64, Vec<Vec<FileEntry>>>,
allowed_extensions: Vec<String>, // jpg, jpeg, mp4
excluded_items: Vec<String>, // TODO, support for e.g. */.git/*
allowed_extensions: Vec<String>,
excluded_items: Vec<String>,
excluded_directories: Vec<String>,
included_directories: Vec<String>,
recursive_search : bool,
min_file_size: u64,
}
@ -92,6 +93,7 @@ impl DuplicateFinder {
excluded_items: vec![],
excluded_directories: vec![],
included_directories: vec![],
recursive_search: true,
min_file_size: 1024,
allowed_extensions: vec![],
}
@ -116,6 +118,9 @@ impl DuplicateFinder {
self.min_file_size = min_size;
}
pub fn set_recursive_search(&mut self, reqursive_search : bool){
self.recursive_search = reqursive_search;
}
pub fn set_excluded_items(&mut self, mut excluded_items: String) {
// let start_time: SystemTime = SystemTime::now();
@ -340,6 +345,10 @@ impl DuplicateFinder {
// continue; // Permissions denied
// }
if !self.recursive_search{
continue;
}
let mut is_excluded_dir = false;
next_folder = "".to_owned() + &current_folder + &entry_data.file_name().into_string().unwrap() + "/";
@ -509,6 +518,7 @@ impl DuplicateFinder {
/// Setting include directories, panics when there is not directories available
fn debug_print(&self) {
println!("---------------DEBUG PRINT---------------");
println!("Recursive search - {}", self.recursive_search.to_string());
println!("Number of checked files - {}", self.infos.number_of_checked_files);
println!("Number of checked folders - {}", self.infos.number_of_checked_folders);
println!("Number of ignored files - {}", self.infos.number_of_ignored_files);
@ -594,6 +604,7 @@ impl DuplicateFinder {
let mut optimized_included: Vec<String> = Vec::<String>::new();
let mut optimized_excluded: Vec<String> = Vec::<String>::new();
// Remove duplicated entries like: "/", "/"
self.excluded_directories.sort();
@ -603,45 +614,47 @@ impl DuplicateFinder {
self.included_directories.dedup();
// Optimize for duplicated included directories - "/", "/home". "/home/Pulpit" to "/"
let mut is_inside: bool;
for ed_checked in &self.excluded_directories {
is_inside = false;
for ed_help in &self.excluded_directories {
if ed_checked == ed_help {
// We checking same element
continue;
if self.recursive_search { // This is only point which can't be done when recursive search is disabled.
let mut is_inside: bool;
for ed_checked in &self.excluded_directories {
is_inside = false;
for ed_help in &self.excluded_directories {
if ed_checked == ed_help {
// We checking same element
continue;
}
if ed_checked.starts_with(ed_help) {
is_inside = true;
break;
}
}
if ed_checked.starts_with(ed_help) {
is_inside = true;
break;
if !is_inside {
optimized_excluded.push(ed_checked.to_string());
}
}
if !is_inside {
optimized_excluded.push(ed_checked.to_string());
}
}
for id_checked in &self.included_directories {
is_inside = false;
for id_help in &self.included_directories {
if id_checked == id_help {
// We checking same element
continue;
for id_checked in &self.included_directories {
is_inside = false;
for id_help in &self.included_directories {
if id_checked == id_help {
// We checking same element
continue;
}
if id_checked.starts_with(id_help) {
is_inside = true;
break;
}
}
if id_checked.starts_with(id_help) {
is_inside = true;
break;
if !is_inside {
optimized_included.push(id_checked.to_string());
}
}
if !is_inside {
optimized_included.push(id_checked.to_string());
}
}
self.included_directories = optimized_included;
optimized_included = Vec::<String>::new();
self.excluded_directories = optimized_excluded;
optimized_excluded = Vec::<String>::new();
self.included_directories = optimized_included;
optimized_included = Vec::<String>::new();
self.excluded_directories = optimized_excluded;
optimized_excluded = Vec::<String>::new();
}
// Remove include directories which are inside any exclude directory
for id in &self.included_directories {

View File

@ -189,7 +189,7 @@ impl EmptyFolder {
Common::print_time(start_time, SystemTime::now(), "check_for_empty_folder".to_string());
}
/// Deletes earlier finded empty folders
/// Deletes earlier found empty folders
fn delete_empty_folders(&self) {
let start_time: SystemTime = SystemTime::now();
let mut errors: Vec<String> = Vec::new();

View File

@ -6,6 +6,7 @@ use czkawka_core::duplicate::{CheckingMethod, DeleteMethod};
use duplicate::DuplicateFinder;
use gtk::prelude::*;
use gtk::{Builder, TreeView, TreeViewColumn};
use std::collections::HashMap;
fn main() {
gtk::init().expect("Failed to initialize GTK.");
@ -18,9 +19,23 @@ fn main() {
let main_window: gtk::Window = builder.get_object("main_window").unwrap();
main_window.show_all();
// Notepad Buttons
// Buttons State
let mut hashmap_buttons : HashMap<&str,bool> = Default::default();
let mut buttons_state : HashMap<&str,HashMap<&str,bool>> = Default::default();
for i in ["buttons_search","buttons_stop","buttons_resume","buttons_pause","buttons_select","buttons_delete","buttons_save"].iter() {
hashmap_buttons.insert(i,false);
}
// Buttons
for i in ["buttons_search","buttons_stop","buttons_resume","buttons_pause","buttons_select","buttons_delete","buttons_save"].iter() {
buttons_state.insert(i,hashmap_buttons.clone());
}
// buttons_state.insert(hashmap_buttons.clone());
// GUI Notepad Buttons
// GUI Buttons
let buttons_search: gtk::Button = builder.get_object("buttons_search").unwrap();
let buttons_stop: gtk::Button = builder.get_object("buttons_stop").unwrap();
let buttons_resume: gtk::Button = builder.get_object("buttons_resume").unwrap();
@ -99,10 +114,13 @@ fn main() {
let mut df = DuplicateFinder::new();
df.set_include_directory("/home/rafal/Pulpit".to_owned());
df.set_exclude_directory("/rafa/".to_owned());
df.set_excluded_items("".to_owned());
df.set_allowed_extensions("".to_owned());
df.set_min_file_size(1000); // TODO Change to proper value
df.find_duplicates(&CheckingMethod::HASH, &DeleteMethod::None);
//let infos = df.get_infos();
info_entry.set_text("Found TODO duplicates files in TODO groups which took TODO GB/MB/KB/B");
buttons_delete.show();
}
"notebook_empty_folders_label" => {}
e => panic!("Not existent {}", e),