1
0
Fork 0
mirror of synced 2024-05-17 19:03:08 +12:00

Add some code

This commit is contained in:
Rafał Mikrut 2023-03-29 18:41:55 +02:00
parent 3bd24918ba
commit 9728b43580
10 changed files with 244 additions and 42 deletions

5
Cargo.lock generated
View file

@ -1722,13 +1722,12 @@ checksum = "99227334921fae1a979cf0bfdfcc6b3e5ce376ef57e16fb6fb3ea2ed6095f80c"
[[package]]
name = "libheif-rs"
version = "0.19.2"
version = "0.18.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "749fcebc2069f334599304546cfa891c30be08cdf4f358ed984a2c71c5e0031f"
checksum = "37d09b0d2d69da084eeeda9534662bc6b6096fbce3f307149750c0e572ad0ccd"
dependencies = [
"enumn",
"four-cc",
"libc",
"libheif-sys",
]

View file

@ -584,6 +584,7 @@ fn parse_checking_method(src: &str) -> Result<CheckingMethod, &'static str> {
match src.to_ascii_lowercase().as_str() {
"name" => Ok(CheckingMethod::Name),
"size" => Ok(CheckingMethod::Size),
"size_name" => Ok(CheckingMethod::SizeName),
"hash" => Ok(CheckingMethod::Hash),
_ => Err("Couldn't parse the search method (allowed: NAME, SIZE, HASH)"),
}

View file

@ -69,7 +69,7 @@ infer = "0.13.0"
num_cpus = "1.15.0"
# Heif/Heic
libheif-rs = { version = "0.19.2", optional = true }
libheif-rs = { version = "0.18.0", optional = true } # TODO update this, decode function is missing in this version
anyhow = { version = "1.0", optional = true }
state="0.5.3"

View file

@ -30,6 +30,7 @@ pub struct ProgressData {
pub enum CheckingMethod {
None,
Name,
SizeName,
Size,
Hash,
}

View file

@ -67,6 +67,8 @@ pub struct Info {
pub number_of_duplicated_files_by_hash: usize,
pub number_of_groups_by_name: usize,
pub number_of_duplicated_files_by_name: usize,
pub number_of_groups_by_size_name: usize,
pub number_of_duplicated_files_by_size_name: usize,
pub lost_space_by_size: u64,
pub lost_space_by_hash: u64,
}
@ -152,7 +154,13 @@ impl DuplicateFinder {
match self.check_method {
CheckingMethod::Name => {
self.stopped_search = !self.check_files_size_name(stop_receiver, progress_sender); // TODO restore this to name
self.stopped_search = !self.check_files_name(stop_receiver, progress_sender); // TODO restore this to name
if self.stopped_search {
return;
}
}
CheckingMethod::SizeName => {
self.stopped_search = !self.check_files_size_name(stop_receiver, progress_sender);
if self.stopped_search {
return;
}
@ -225,6 +233,11 @@ impl DuplicateFinder {
&self.files_with_identical_size
}
#[must_use]
pub const fn get_files_sorted_by_size_name(&self) -> &BTreeMap<(u64, String), Vec<FileEntry>> {
&self.files_with_identical_size_names
}
#[must_use]
pub const fn get_files_sorted_by_hash(&self) -> &BTreeMap<u64, Vec<Vec<FileEntry>>> {
&self.files_with_identical_hashes
@ -323,6 +336,11 @@ impl DuplicateFinder {
&self.files_with_identical_size_referenced
}
#[must_use]
pub fn get_files_with_identical_size_names_referenced(&self) -> &BTreeMap<(u64, String), (FileEntry, Vec<FileEntry>)> {
&self.files_with_identical_size_names_referenced
}
fn check_files_name(&mut self, stop_receiver: Option<&Receiver<()>>, progress_sender: Option<&futures::channel::mpsc::UnboundedSender<ProgressData>>) -> bool {
let group_by_func = if self.case_sensitive_name_comparison {
|fe: &FileEntry| fe.path.file_name().unwrap().to_string_lossy().to_string()
@ -484,12 +502,13 @@ impl DuplicateFinder {
})
.collect::<Vec<(FileEntry, Vec<FileEntry>)>>();
for (fe, vec_fe) in vec {
self.files_with_identical_names_referenced.insert(fe.path.to_string_lossy().to_string(), (fe, vec_fe));
self.files_with_identical_size_names_referenced
.insert((fe.size, fe.path.to_string_lossy().to_string()), (fe, vec_fe));
}
}
self.calculate_name_stats(); // TODO change this
self.calculate_size_name_stats();
Common::print_time(start_time, SystemTime::now(), "check_files_name");
Common::print_time(start_time, SystemTime::now(), "check_files_size_name");
true
}
DirTraversalResult::SuccessFolders { .. } => {
@ -499,6 +518,22 @@ impl DuplicateFinder {
}
}
fn calculate_size_name_stats(&mut self) {
if self.use_reference_folders {
for ((size, _name), (_fe, vector)) in &self.files_with_identical_size_names_referenced {
self.information.number_of_duplicated_files_by_size_name += vector.len();
self.information.number_of_groups_by_size_name += 1;
self.information.lost_space_by_size += (vector.len() as u64) * size;
}
} else {
for ((size, _name), vector) in &self.files_with_identical_size_names {
self.information.number_of_duplicated_files_by_size_name += vector.len() - 1;
self.information.number_of_groups_by_size_name += 1;
self.information.lost_space_by_size += (vector.len() as u64 - 1) * size;
}
}
}
/// Read file length and puts it to different boxes(each for different lengths)
/// If in box is only 1 result, then it is removed
fn check_files_size(&mut self, stop_receiver: Option<&Receiver<()>>, progress_sender: Option<&futures::channel::mpsc::UnboundedSender<ProgressData>>) -> bool {
@ -1036,6 +1071,11 @@ impl DuplicateFinder {
let _tuple: (u64, usize, usize) = delete_files(vector, &self.delete_method, &mut self.text_messages, self.dryrun);
}
}
CheckingMethod::SizeName => {
for vector in self.files_with_identical_size_names.values() {
let _tuple: (u64, usize, usize) = delete_files(vector, &self.delete_method, &mut self.text_messages, self.dryrun);
}
}
CheckingMethod::Hash => {
for vector_vectors in self.files_with_identical_hashes.values() {
for vector in vector_vectors.iter() {
@ -1169,6 +1209,30 @@ impl SaveResults for DuplicateFinder {
write!(writer, "Not found any files with same names.").unwrap();
}
}
CheckingMethod::SizeName => {
if !self.files_with_identical_names.is_empty() {
writeln!(
writer,
"-------------------------------------------------Files with same size and names-------------------------------------------------"
)
.unwrap();
writeln!(
writer,
"Found {} files in {} groups with same size and name(may have different content)",
self.information.number_of_duplicated_files_by_size_name, self.information.number_of_groups_by_size_name,
)
.unwrap();
for ((size, name), vector) in self.files_with_identical_size_names.iter().rev() {
writeln!(writer, "Name - {}, {} - {} files ", name, format_size(*size, BINARY), vector.len()).unwrap();
for j in vector {
writeln!(writer, "{}", j.path.display()).unwrap();
}
writeln!(writer).unwrap();
}
} else {
write!(writer, "Not found any files with same size and names.").unwrap();
}
}
CheckingMethod::Size => {
if !self.files_with_identical_size.is_empty() {
writeln!(
@ -1253,6 +1317,20 @@ impl PrintResults for DuplicateFinder {
println!();
}
}
CheckingMethod::SizeName => {
for i in &self.files_with_identical_size_names {
number_of_files += i.1.len() as u64;
number_of_groups += 1;
}
println!("Found {number_of_files} files in {number_of_groups} groups with same size and name(may have different content)",);
for ((size, name), vector) in &self.files_with_identical_size_names {
println!("Name - {}, {} - {} files ", name, format_size(*size, BINARY), vector.len());
for j in vector {
println!("{}", j.path.display());
}
println!();
}
}
CheckingMethod::Hash => {
for vector in self.files_with_identical_hashes.values() {
for j in vector {

View file

@ -28,6 +28,7 @@ duplicate_case_sensitive_name_tooltip =
Disabling such option will group names without checking if each letter is same size e.g. żoŁD <-> Żołd
duplicate_mode_size_name_combo_box = Size and Name
duplicate_mode_name_combo_box = Name
duplicate_mode_size_combo_box = Size
duplicate_mode_hash_combo_box = Hash
@ -447,6 +448,7 @@ progress_scanning_music_tags_end = Comparing tags of {$file_checked}/{$all_files
progress_scanning_music_tags = Reading tags of {$file_checked}/{$all_files} music file
progress_scanning_empty_folders = Scanning {$folder_number} folder
progress_scanning_size = Scanning size of {$file_number} file
progress_scanning_size_name = Scanning name and size of {$file_number} file
progress_scanning_name = Scanning name of {$file_number} file
progress_analyzed_partial_hash = Analyzed partial hash of {$file_checked}/{$all_files} files
progress_analyzed_full_hash = Analyzed full hash of {$file_checked}/{$all_files} files

View file

@ -99,52 +99,49 @@ pub fn connect_compute_results(gui_data: &GuiData, glib_stop_receiver: Receiver<
match df.get_check_method() {
CheckingMethod::Name => {
duplicates_number = information.number_of_duplicated_files_by_name;
// duplicates_size = 0;
duplicates_size = 0;
duplicates_group = information.number_of_groups_by_name;
entry_info.set_text(
flg!(
"compute_found_duplicates_name",
generate_translation_hashmap(vec![("number_files", duplicates_number.to_string()), ("number_groups", duplicates_group.to_string())])
)
.as_str(),
);
}
CheckingMethod::Hash => {
duplicates_number = information.number_of_duplicated_files_by_hash;
duplicates_size = information.lost_space_by_hash;
duplicates_group = information.number_of_groups_by_hash;
entry_info.set_text(
flg!(
"compute_found_duplicates_hash_size",
generate_translation_hashmap(vec![
("number_files", duplicates_number.to_string()),
("number_groups", duplicates_group.to_string()),
("size", format_size(duplicates_size, BINARY))
])
)
.as_str(),
);
}
CheckingMethod::Size => {
duplicates_number = information.number_of_duplicated_files_by_size;
duplicates_size = information.lost_space_by_size;
duplicates_group = information.number_of_groups_by_size;
entry_info.set_text(
flg!(
"compute_found_duplicates_hash_size",
generate_translation_hashmap(vec![
("number_files", duplicates_number.to_string()),
("number_groups", duplicates_group.to_string()),
("size", format_size(duplicates_size, BINARY))
])
)
.as_str(),
);
}
CheckingMethod::SizeName => {
duplicates_number = information.number_of_duplicated_files_by_size_name;
duplicates_size = information.lost_space_by_size;
duplicates_group = information.number_of_groups_by_size_name;
}
CheckingMethod::None => {
panic!();
}
}
if duplicates_size == 0 {
entry_info.set_text(
flg!(
"compute_found_duplicates_name",
generate_translation_hashmap(vec![("number_files", duplicates_number.to_string()), ("number_groups", duplicates_group.to_string())])
)
.as_str(),
);
} else {
entry_info.set_text(
flg!(
"compute_found_duplicates_hash_size",
generate_translation_hashmap(vec![
("number_files", duplicates_number.to_string()),
("number_groups", duplicates_group.to_string()),
("size", format_size(duplicates_size, BINARY))
])
)
.as_str(),
);
}
// Create GUI
{
@ -335,6 +332,65 @@ pub fn connect_compute_results(gui_data: &GuiData, glib_stop_receiver: Receiver<
}
}
}
CheckingMethod::SizeName => {
let btreemap = df.get_files_with_identical_size_names_referenced();
for (_size, (base_file_entry, vector)) in btreemap.iter().rev() {
// Sort
let vector = if vector.len() >= 2 {
let mut vector = vector.clone();
vector.sort_unstable_by_key(|e| {
let t = split_path(e.path.as_path());
(t.0, t.1)
});
vector
} else {
vector.clone()
};
// HEADER
let (directory, file) = split_path(&base_file_entry.path);
let values: [(u32, &dyn ToValue); COLUMNS_NUMBER] = [
(ColumnsDuplicates::ActivatableSelectButton as u32, &false),
(ColumnsDuplicates::SelectionButton as u32, &false),
(ColumnsDuplicates::Size as u32, (&format_size(base_file_entry.size, BINARY))),
(ColumnsDuplicates::SizeAsBytes as u32, &base_file_entry.size),
(ColumnsDuplicates::Name as u32, &file),
(ColumnsDuplicates::Path as u32, &directory),
(
ColumnsDuplicates::Modification as u32,
&(NaiveDateTime::from_timestamp_opt(base_file_entry.modified_date as i64, 0).unwrap().to_string()),
),
(ColumnsDuplicates::ModificationAsSecs as u32, &(base_file_entry.modified_date)),
(ColumnsDuplicates::Color as u32, &(HEADER_ROW_COLOR.to_string())),
(ColumnsDuplicates::IsHeader as u32, &true),
(ColumnsDuplicates::TextColor as u32, &(TEXT_COLOR.to_string())),
];
// MEAT
list_store.set(&list_store.append(), &values);
for entry in vector {
let (directory, file) = split_path(&entry.path);
let values: [(u32, &dyn ToValue); COLUMNS_NUMBER] = [
(ColumnsDuplicates::ActivatableSelectButton as u32, &true),
(ColumnsDuplicates::SelectionButton as u32, &false),
(ColumnsDuplicates::Size as u32, (&format_size(entry.size, BINARY))),
(ColumnsDuplicates::SizeAsBytes as u32, &entry.size),
(ColumnsDuplicates::Name as u32, &file),
(ColumnsDuplicates::Path as u32, &directory),
(
ColumnsDuplicates::Modification as u32,
&(NaiveDateTime::from_timestamp_opt(entry.modified_date as i64, 0).unwrap().to_string()),
),
(ColumnsDuplicates::ModificationAsSecs as u32, &(entry.modified_date)),
(ColumnsDuplicates::Color as u32, &(MAIN_ROW_COLOR.to_string())),
(ColumnsDuplicates::IsHeader as u32, &false),
(ColumnsDuplicates::TextColor as u32, &(TEXT_COLOR.to_string())),
];
list_store.set(&list_store.append(), &values);
}
}
}
CheckingMethod::None => {
panic!();
}
@ -507,6 +563,58 @@ pub fn connect_compute_results(gui_data: &GuiData, glib_stop_receiver: Receiver<
}
}
}
CheckingMethod::SizeName => {
let btreemap = df.get_files_sorted_by_size_name();
for (_size, vector) in btreemap.iter().rev() {
// Sort
let vector = if vector.len() >= 2 {
let mut vector = vector.clone();
vector.sort_unstable_by_key(|e| {
let t = split_path(e.path.as_path());
(t.0, t.1)
});
vector
} else {
vector.clone()
};
let values: [(u32, &dyn ToValue); COLUMNS_NUMBER] = [
(ColumnsDuplicates::ActivatableSelectButton as u32, &false),
(ColumnsDuplicates::SelectionButton as u32, &false),
(ColumnsDuplicates::Size as u32, (&String::new())),
(ColumnsDuplicates::SizeAsBytes as u32, &0),
(ColumnsDuplicates::Name as u32, (&String::new())),
(ColumnsDuplicates::Path as u32, (&String::new())),
(ColumnsDuplicates::Modification as u32, &String::new()), // No text in 3 column
(ColumnsDuplicates::ModificationAsSecs as u32, &(0)), // Not used here
(ColumnsDuplicates::Color as u32, &(HEADER_ROW_COLOR.to_string())),
(ColumnsDuplicates::IsHeader as u32, &true),
(ColumnsDuplicates::TextColor as u32, &(TEXT_COLOR.to_string())),
];
list_store.set(&list_store.append(), &values);
for entry in vector {
let (directory, file) = split_path(&entry.path);
let values: [(u32, &dyn ToValue); COLUMNS_NUMBER] = [
(ColumnsDuplicates::ActivatableSelectButton as u32, &true),
(ColumnsDuplicates::SelectionButton as u32, &false),
(ColumnsDuplicates::Size as u32, (&format_size(entry.size, BINARY))),
(ColumnsDuplicates::SizeAsBytes as u32, &entry.size),
(ColumnsDuplicates::Name as u32, &file),
(ColumnsDuplicates::Path as u32, &directory),
(
ColumnsDuplicates::Modification as u32,
&(NaiveDateTime::from_timestamp_opt(entry.modified_date as i64, 0).unwrap().to_string()),
),
(ColumnsDuplicates::ModificationAsSecs as u32, &(entry.modified_date)),
(ColumnsDuplicates::Color as u32, &(MAIN_ROW_COLOR.to_string())),
(ColumnsDuplicates::IsHeader as u32, &false),
(ColumnsDuplicates::TextColor as u32, &(TEXT_COLOR.to_string())),
];
list_store.set(&list_store.append(), &values);
}
}
}
CheckingMethod::None => {
panic!();
}

View file

@ -109,6 +109,16 @@ pub fn connect_progress_window(
));
taskbar_state.borrow().set_progress_state(TBPF_INDETERMINATE);
}
common_dir_traversal::CheckingMethod::SizeName => {
label_stage.show();
grid_progress_stages.hide();
label_stage.set_text(&flg!(
"progress_scanning_size_name",
generate_translation_hashmap(vec![("file_number", item.entries_checked.to_string())])
));
taskbar_state.borrow().set_progress_state(TBPF_INDETERMINATE);
}
common_dir_traversal::CheckingMethod::Size => {
label_stage.show();
grid_progress_stages.hide();

View file

@ -549,9 +549,8 @@ impl GuiMainNotebook {
CheckingMethod::Hash => flg!("duplicate_mode_hash_combo_box"),
CheckingMethod::Size => flg!("duplicate_mode_size_combo_box"),
CheckingMethod::Name => flg!("duplicate_mode_name_combo_box"),
_ => {
panic!()
}
CheckingMethod::SizeName => flg!("duplicate_mode_size_name_combo_box"),
CheckingMethod::None => panic!(),
};
self.combo_box_duplicate_check_method.append_text(&text);
}

View file

@ -29,7 +29,7 @@ pub struct CheckMethodStruct {
pub check_method: CheckingMethod,
}
pub const DUPLICATES_CHECK_METHOD_COMBO_BOX: [CheckMethodStruct; 3] = [
pub const DUPLICATES_CHECK_METHOD_COMBO_BOX: [CheckMethodStruct; 4] = [
CheckMethodStruct {
eng_name: "Hash",
check_method: CheckingMethod::Hash,
@ -42,6 +42,10 @@ pub const DUPLICATES_CHECK_METHOD_COMBO_BOX: [CheckMethodStruct; 3] = [
eng_name: "Name",
check_method: CheckingMethod::Name,
},
CheckMethodStruct {
eng_name: "Size and Name",
check_method: CheckingMethod::SizeName,
},
];
#[derive(Copy, Clone)]