1
0
Fork 0
mirror of synced 2024-05-17 19:03:08 +12:00
This commit is contained in:
Rafał Mikrut 2023-03-28 23:05:58 +02:00
parent 4fdb1e3358
commit 3bd24918ba
7 changed files with 487 additions and 291 deletions

622
Cargo.lock generated

File diff suppressed because it is too large Load diff

View file

@ -10,7 +10,7 @@ homepage = "https://github.com/qarmin/czkawka"
repository = "https://github.com/qarmin/czkawka"
[dependencies]
clap = { version = "4.1", features = ["derive"] }
clap = { version = "4.2", features = ["derive"] }
# For enum types
image_hasher = "1.1.2"

View file

@ -20,16 +20,16 @@ directories-next = "2.0.0"
# Needed by similar images
image_hasher = "1.1.2"
bk-tree = "0.4.0"
image = "0.24.5"
bk-tree = "0.5.0"
image = "0.24.6"
hamming = "0.1.3"
# Needed by same music
bitflags = "1.3.2"
bitflags = "2.0.2"
lofty = "0.11.0"
# Futures - needed by async progress sender
futures = "0.3.26"
futures = "0.3.27"
# Needed by broken files
zip = { version = "0.6.4", features = ["aes-crypto", "bzip2", "deflate", "time"], default-features = false }
@ -54,8 +54,8 @@ serde_json = "1.0"
# Language
i18n-embed = { version = "0.13.8", features = ["fluent-system", "desktop-requester"] }
i18n-embed-fl = "0.6.5"
rust-embed = "6.6.0"
i18n-embed-fl = "0.6.6"
rust-embed = "6.6.1"
once_cell = "1.17.1"
# Raw image files
@ -69,7 +69,7 @@ infer = "0.13.0"
num_cpus = "1.15.0"
# Heif/Heic
libheif-rs = { version = "0.18.0", optional = true }
libheif-rs = { version = "0.19.2", optional = true }
anyhow = { version = "1.0", optional = true }
state="0.5.3"

View file

@ -59,6 +59,7 @@ pub enum TypeOfFile {
}
bitflags! {
#[derive(PartialEq, Copy, Clone)]
pub struct CheckedTypes : u32 {
const NONE = 0;

View file

@ -81,11 +81,13 @@ impl Info {
pub struct DuplicateFinder {
text_messages: Messages,
information: Info,
files_with_identical_names: BTreeMap<String, Vec<FileEntry>>, // File Size, File Entry
files_with_identical_size: BTreeMap<u64, Vec<FileEntry>>, // File Size, File Entry
files_with_identical_hashes: BTreeMap<u64, Vec<Vec<FileEntry>>>, // File Size, next grouped by file size, next grouped by hash
files_with_identical_names_referenced: BTreeMap<String, (FileEntry, Vec<FileEntry>)>, // File Size, File Entry
files_with_identical_size_referenced: BTreeMap<u64, (FileEntry, Vec<FileEntry>)>, // File Size, File Entry
files_with_identical_names: BTreeMap<String, Vec<FileEntry>>, // File Size, File Entry
files_with_identical_size_names: BTreeMap<(u64, String), Vec<FileEntry>>, // File (Size, Name), File Entry
files_with_identical_size: BTreeMap<u64, Vec<FileEntry>>, // File Size, File Entry
files_with_identical_hashes: BTreeMap<u64, Vec<Vec<FileEntry>>>, // File Size, next grouped by file size, next grouped by hash
files_with_identical_names_referenced: BTreeMap<String, (FileEntry, Vec<FileEntry>)>, // File Size, File Entry
files_with_identical_size_names_referenced: BTreeMap<(u64, String), (FileEntry, Vec<FileEntry>)>, // File (Size, Name), File Entry
files_with_identical_size_referenced: BTreeMap<u64, (FileEntry, Vec<FileEntry>)>, // File Size, File Entry
files_with_identical_hashes_referenced: BTreeMap<u64, Vec<(FileEntry, Vec<FileEntry>)>>, // File Size, next grouped by file size, next grouped by hash
directories: Directories,
allowed_extensions: Extensions,
@ -116,8 +118,10 @@ impl DuplicateFinder {
information: Info::new(),
files_with_identical_names: Default::default(),
files_with_identical_size: Default::default(),
files_with_identical_size_names: Default::default(),
files_with_identical_hashes: Default::default(),
files_with_identical_names_referenced: Default::default(),
files_with_identical_size_names_referenced: Default::default(),
files_with_identical_size_referenced: Default::default(),
files_with_identical_hashes_referenced: Default::default(),
recursive_search: true,
@ -148,7 +152,7 @@ impl DuplicateFinder {
match self.check_method {
CheckingMethod::Name => {
self.stopped_search = !self.check_files_name(stop_receiver, progress_sender);
self.stopped_search = !self.check_files_size_name(stop_receiver, progress_sender); // TODO restore this to name
if self.stopped_search {
return;
}
@ -388,18 +392,102 @@ impl DuplicateFinder {
self.files_with_identical_names_referenced.insert(fe.path.to_string_lossy().to_string(), (fe, vec_fe));
}
}
self.calculate_name_stats();
if self.use_reference_folders {
for (_fe, vector) in self.files_with_identical_names_referenced.values() {
self.information.number_of_duplicated_files_by_name += vector.len();
self.information.number_of_groups_by_name += 1;
}
} else {
for vector in self.files_with_identical_names.values() {
self.information.number_of_duplicated_files_by_name += vector.len() - 1;
self.information.number_of_groups_by_name += 1;
Common::print_time(start_time, SystemTime::now(), "check_files_name");
true
}
DirTraversalResult::SuccessFolders { .. } => {
unreachable!()
}
DirTraversalResult::Stopped => false,
}
}
fn calculate_name_stats(&mut self) {
if self.use_reference_folders {
for (_fe, vector) in self.files_with_identical_names_referenced.values() {
self.information.number_of_duplicated_files_by_name += vector.len();
self.information.number_of_groups_by_name += 1;
}
} else {
for vector in self.files_with_identical_names.values() {
self.information.number_of_duplicated_files_by_name += vector.len() - 1;
self.information.number_of_groups_by_name += 1;
}
}
}
fn check_files_size_name(&mut self, stop_receiver: Option<&Receiver<()>>, progress_sender: Option<&futures::channel::mpsc::UnboundedSender<ProgressData>>) -> bool {
let group_by_func = if self.case_sensitive_name_comparison {
|fe: &FileEntry| (fe.size, fe.path.file_name().unwrap().to_string_lossy().to_string())
} else {
|fe: &FileEntry| (fe.size, fe.path.file_name().unwrap().to_string_lossy().to_lowercase())
};
let result = DirTraversalBuilder::new()
.root_dirs(self.directories.included_directories.clone())
.group_by(group_by_func)
.stop_receiver(stop_receiver)
.progress_sender(progress_sender)
.checking_method(CheckingMethod::Name)
.directories(self.directories.clone())
.allowed_extensions(self.allowed_extensions.clone())
.excluded_items(self.excluded_items.clone())
.recursive_search(self.recursive_search)
.minimal_file_size(self.minimal_file_size)
.maximal_file_size(self.maximal_file_size)
.build()
.run();
match result {
DirTraversalResult::SuccessFiles {
start_time,
grouped_file_entries,
warnings,
} => {
self.files_with_identical_size_names = grouped_file_entries;
self.text_messages.warnings.extend(warnings);
// Create new BTreeMap without single size entries(files have not duplicates)
let mut new_map: BTreeMap<(u64, String), Vec<FileEntry>> = Default::default();
for (name_size, vector) in &self.files_with_identical_size_names {
if vector.len() > 1 {
new_map.insert(name_size.clone(), vector.clone());
}
}
self.files_with_identical_size_names = new_map;
// Reference - only use in size, because later hash will be counted differently
if self.use_reference_folders {
let mut btree_map = Default::default();
mem::swap(&mut self.files_with_identical_size_names, &mut btree_map);
let reference_directories = self.directories.reference_directories.clone();
let vec = btree_map
.into_iter()
.filter_map(|(_size, vec_file_entry)| {
let mut files_from_referenced_folders = Vec::new();
let mut normal_files = Vec::new();
for file_entry in vec_file_entry {
if reference_directories.iter().any(|e| file_entry.path.starts_with(e)) {
files_from_referenced_folders.push(file_entry);
} else {
normal_files.push(file_entry);
}
}
if files_from_referenced_folders.is_empty() || normal_files.is_empty() {
None
} else {
Some((files_from_referenced_folders.pop().unwrap(), normal_files))
}
})
.collect::<Vec<(FileEntry, Vec<FileEntry>)>>();
for (fe, vec_fe) in vec {
self.files_with_identical_names_referenced.insert(fe.path.to_string_lossy().to_string(), (fe, vec_fe));
}
}
self.calculate_name_stats(); // TODO change this
Common::print_time(start_time, SystemTime::now(), "check_files_name");
true

View file

@ -31,6 +31,7 @@ pub enum DeleteMethod {
}
bitflags! {
#[derive(PartialEq, Copy, Clone, Debug)]
pub struct MusicSimilarity : u32 {
const NONE = 0;

View file

@ -10,29 +10,29 @@ homepage = "https://github.com/qarmin/czkawka"
repository = "https://github.com/qarmin/czkawka"
[dependencies]
gdk4 = "0.6.2"
glib = "0.17.2"
gdk4 = "0.6.3"
glib = "0.17.5"
humansize = "2.1.3"
chrono = "0.4.23"
chrono = "0.4.24"
# Used for sending stop signal across threads
crossbeam-channel = "0.5.7"
# To get information about progress
futures = "0.3.26"
futures = "0.3.27"
# For saving/loading config files to specific directories
directories-next = "2.0.0"
# For opening files
open = "3.2.0"
open = "4.0.1"
# To get image preview
image = "0.24.5"
image = "0.24.6"
# To be able to use custom select
regex = "1.7.1"
regex = "1.7.3"
# To get image_hasher types
image_hasher = "1.1.2"
@ -45,15 +45,15 @@ fs_extra = "1.3.0"
# Language
i18n-embed = { version = "0.13.8", features = ["fluent-system", "desktop-requester"] }
i18n-embed-fl = "0.6.5"
rust-embed = "6.6.0"
i18n-embed-fl = "0.6.6"
rust-embed = "6.6.1"
once_cell = "1.17.1"
[target.'cfg(windows)'.dependencies]
winapi = { version = "0.3.9", features = ["combaseapi", "objbase", "shobjidl_core", "windef", "winerror", "wtypesbase", "winuser"] }
[dependencies.gtk4]
version = "0.6.2"
version = "0.6.4"
default-features = false
features = ["v4_6"]