Loading saving cache improvements (#1072)
* Loading cache * Loading * Loading x2 * Optimization * Cache common * Delete outdated cache * Common * Loading cache/save almost * Simplified a lot of cache concept * Fix regression
This commit is contained in:
parent
edfc8e7b5f
commit
e976d40eee
58
Cargo.lock
generated
58
Cargo.lock
generated
|
@ -129,7 +129,7 @@ checksum = "bc00ceb34980c03614e35a3a4e218276a0a824e911d07651cd0d858a51e8c0f0"
|
||||||
dependencies = [
|
dependencies = [
|
||||||
"proc-macro2",
|
"proc-macro2",
|
||||||
"quote",
|
"quote",
|
||||||
"syn 2.0.37",
|
"syn 2.0.38",
|
||||||
]
|
]
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
|
@ -247,9 +247,9 @@ checksum = "374d28ec25809ee0e23827c2ab573d729e293f281dfe393500e7ad618baa61c6"
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "byteorder"
|
name = "byteorder"
|
||||||
version = "1.4.3"
|
version = "1.5.0"
|
||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
checksum = "14c189c53d098945499cdfa7ecc63567cf3886b3332b312a5b4585d8d3a6a610"
|
checksum = "1fd0f2584146f6f2ef48085050886acf353beff7305ebd1ae69500e27c67f64b"
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "bzip2"
|
name = "bzip2"
|
||||||
|
@ -397,7 +397,7 @@ dependencies = [
|
||||||
"heck",
|
"heck",
|
||||||
"proc-macro2",
|
"proc-macro2",
|
||||||
"quote",
|
"quote",
|
||||||
"syn 2.0.37",
|
"syn 2.0.38",
|
||||||
]
|
]
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
|
@ -692,7 +692,7 @@ checksum = "487585f4d0c6655fe74905e2504d8ad6908e4db67f744eb140876906c2f3175d"
|
||||||
dependencies = [
|
dependencies = [
|
||||||
"proc-macro2",
|
"proc-macro2",
|
||||||
"quote",
|
"quote",
|
||||||
"syn 2.0.37",
|
"syn 2.0.38",
|
||||||
]
|
]
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
|
@ -724,7 +724,7 @@ checksum = "c2ad8cef1d801a4686bfd8919f0b30eac4c8e48968c437a6405ded4fb5272d2b"
|
||||||
dependencies = [
|
dependencies = [
|
||||||
"proc-macro2",
|
"proc-macro2",
|
||||||
"quote",
|
"quote",
|
||||||
"syn 2.0.37",
|
"syn 2.0.38",
|
||||||
]
|
]
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
|
@ -990,7 +990,7 @@ checksum = "89ca545a94061b6365f2c7355b4b32bd20df3ff95f02da9329b34ccc3bd6ee72"
|
||||||
dependencies = [
|
dependencies = [
|
||||||
"proc-macro2",
|
"proc-macro2",
|
||||||
"quote",
|
"quote",
|
||||||
"syn 2.0.37",
|
"syn 2.0.38",
|
||||||
]
|
]
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
|
@ -1191,7 +1191,7 @@ dependencies = [
|
||||||
"proc-macro-error",
|
"proc-macro-error",
|
||||||
"proc-macro2",
|
"proc-macro2",
|
||||||
"quote",
|
"quote",
|
||||||
"syn 2.0.37",
|
"syn 2.0.38",
|
||||||
]
|
]
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
|
@ -1460,7 +1460,7 @@ dependencies = [
|
||||||
"proc-macro2",
|
"proc-macro2",
|
||||||
"quote",
|
"quote",
|
||||||
"strsim",
|
"strsim",
|
||||||
"syn 2.0.37",
|
"syn 2.0.38",
|
||||||
"unic-langid",
|
"unic-langid",
|
||||||
]
|
]
|
||||||
|
|
||||||
|
@ -1474,7 +1474,7 @@ dependencies = [
|
||||||
"i18n-config",
|
"i18n-config",
|
||||||
"proc-macro2",
|
"proc-macro2",
|
||||||
"quote",
|
"quote",
|
||||||
"syn 2.0.37",
|
"syn 2.0.38",
|
||||||
]
|
]
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
|
@ -1730,9 +1730,9 @@ dependencies = [
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "libm"
|
name = "libm"
|
||||||
version = "0.2.7"
|
version = "0.2.8"
|
||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
checksum = "f7012b1bbb0719e1097c47611d3898568c546d597c2e74d66f6087edd5233ff4"
|
checksum = "4ec2a862134d2a7d32d7983ddcdd1c4923530833c9f2ea1a44fc5fa473989058"
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "linked-hash-map"
|
name = "linked-hash-map"
|
||||||
|
@ -1793,7 +1793,7 @@ checksum = "764b60e1ddd07e5665a6a17636a95cd7d8f3b86c73503a69c32979d05f72f3cf"
|
||||||
dependencies = [
|
dependencies = [
|
||||||
"proc-macro2",
|
"proc-macro2",
|
||||||
"quote",
|
"quote",
|
||||||
"syn 2.0.37",
|
"syn 2.0.38",
|
||||||
]
|
]
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
|
@ -2238,9 +2238,9 @@ dependencies = [
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "proc-macro2"
|
name = "proc-macro2"
|
||||||
version = "1.0.67"
|
version = "1.0.68"
|
||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
checksum = "3d433d9f1a3e8c1263d9456598b16fec66f4acc9a74dacffd35c7bb09b3a1328"
|
checksum = "5b1106fec09662ec6dd98ccac0f81cef56984d0b49f75c92d8cbad76e20c005c"
|
||||||
dependencies = [
|
dependencies = [
|
||||||
"unicode-ident",
|
"unicode-ident",
|
||||||
]
|
]
|
||||||
|
@ -2442,7 +2442,7 @@ dependencies = [
|
||||||
"proc-macro2",
|
"proc-macro2",
|
||||||
"quote",
|
"quote",
|
||||||
"rust-embed-utils",
|
"rust-embed-utils",
|
||||||
"syn 2.0.37",
|
"syn 2.0.38",
|
||||||
"walkdir",
|
"walkdir",
|
||||||
]
|
]
|
||||||
|
|
||||||
|
@ -2520,9 +2520,9 @@ dependencies = [
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "rustix"
|
name = "rustix"
|
||||||
version = "0.38.15"
|
version = "0.38.17"
|
||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
checksum = "d2f9da0cbd88f9f09e7814e388301c8414c51c62aa6ce1e4b5c551d49d96e531"
|
checksum = "f25469e9ae0f3d0047ca8b93fc56843f38e6774f0914a107ff8b41be8be8e0b7"
|
||||||
dependencies = [
|
dependencies = [
|
||||||
"bitflags 2.4.0",
|
"bitflags 2.4.0",
|
||||||
"errno",
|
"errno",
|
||||||
|
@ -2603,7 +2603,7 @@ checksum = "4eca7ac642d82aa35b60049a6eccb4be6be75e599bd2e9adb5f875a737654af2"
|
||||||
dependencies = [
|
dependencies = [
|
||||||
"proc-macro2",
|
"proc-macro2",
|
||||||
"quote",
|
"quote",
|
||||||
"syn 2.0.37",
|
"syn 2.0.38",
|
||||||
]
|
]
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
|
@ -2662,9 +2662,9 @@ dependencies = [
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "sharded-slab"
|
name = "sharded-slab"
|
||||||
version = "0.1.6"
|
version = "0.1.7"
|
||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
checksum = "c1b21f559e07218024e7e9f90f96f601825397de0e25420135f7f952453fed0b"
|
checksum = "f40ca3c46823713e0d4209592e8d6e826aa57e928f09752619fc696c499637f6"
|
||||||
dependencies = [
|
dependencies = [
|
||||||
"lazy_static",
|
"lazy_static",
|
||||||
]
|
]
|
||||||
|
@ -2955,9 +2955,9 @@ dependencies = [
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "syn"
|
name = "syn"
|
||||||
version = "2.0.37"
|
version = "2.0.38"
|
||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
checksum = "7303ef2c05cd654186cb250d29049a24840ca25d2747c25c0381c8d9e2f582e8"
|
checksum = "e96b79aaa137db8f61e26363a0c9b47d8b4ec75da28b7d1d614c2303e232408b"
|
||||||
dependencies = [
|
dependencies = [
|
||||||
"proc-macro2",
|
"proc-macro2",
|
||||||
"quote",
|
"quote",
|
||||||
|
@ -3022,7 +3022,7 @@ checksum = "10712f02019e9288794769fba95cd6847df9874d49d871d062172f9dd41bc4cc"
|
||||||
dependencies = [
|
dependencies = [
|
||||||
"proc-macro2",
|
"proc-macro2",
|
||||||
"quote",
|
"quote",
|
||||||
"syn 2.0.37",
|
"syn 2.0.38",
|
||||||
]
|
]
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
|
@ -3174,7 +3174,7 @@ checksum = "5f4f31f56159e98206da9efd823404b79b6ef3143b4a7ab76e67b1751b25a4ab"
|
||||||
dependencies = [
|
dependencies = [
|
||||||
"proc-macro2",
|
"proc-macro2",
|
||||||
"quote",
|
"quote",
|
||||||
"syn 2.0.37",
|
"syn 2.0.38",
|
||||||
]
|
]
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
|
@ -3416,7 +3416,7 @@ dependencies = [
|
||||||
"once_cell",
|
"once_cell",
|
||||||
"proc-macro2",
|
"proc-macro2",
|
||||||
"quote",
|
"quote",
|
||||||
"syn 2.0.37",
|
"syn 2.0.38",
|
||||||
"wasm-bindgen-shared",
|
"wasm-bindgen-shared",
|
||||||
]
|
]
|
||||||
|
|
||||||
|
@ -3438,7 +3438,7 @@ checksum = "54681b18a46765f095758388f2d0cf16eb8d4169b639ab575a8f5693af210c7b"
|
||||||
dependencies = [
|
dependencies = [
|
||||||
"proc-macro2",
|
"proc-macro2",
|
||||||
"quote",
|
"quote",
|
||||||
"syn 2.0.37",
|
"syn 2.0.38",
|
||||||
"wasm-bindgen-backend",
|
"wasm-bindgen-backend",
|
||||||
"wasm-bindgen-shared",
|
"wasm-bindgen-shared",
|
||||||
]
|
]
|
||||||
|
@ -3629,9 +3629,9 @@ checksum = "ed94fce61571a4006852b7389a063ab983c02eb1bb37b47f8272ce92d06d9538"
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "winnow"
|
name = "winnow"
|
||||||
version = "0.5.15"
|
version = "0.5.16"
|
||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
checksum = "7c2e3184b9c4e92ad5167ca73039d0c42476302ab603e2fec4487511f38ccefc"
|
checksum = "037711d82167854aff2018dfd193aa0fef5370f456732f0d5a0c59b0f1b4b907"
|
||||||
dependencies = [
|
dependencies = [
|
||||||
"memchr",
|
"memchr",
|
||||||
]
|
]
|
||||||
|
|
|
@ -17,7 +17,7 @@ use crate::common_dir_traversal::{common_get_entry_data_metadata, common_read_di
|
||||||
use crate::common_tool::{CommonData, CommonToolData};
|
use crate::common_tool::{CommonData, CommonToolData};
|
||||||
use crate::common_traits::{DebugPrint, PrintResults, SaveResults};
|
use crate::common_traits::{DebugPrint, PrintResults, SaveResults};
|
||||||
|
|
||||||
#[derive(Clone)]
|
#[derive(Clone, Debug)]
|
||||||
pub struct FileEntry {
|
pub struct FileEntry {
|
||||||
pub path: PathBuf,
|
pub path: PathBuf,
|
||||||
pub size: u64,
|
pub size: u64,
|
||||||
|
|
|
@ -1,7 +1,7 @@
|
||||||
use std::collections::BTreeMap;
|
use std::collections::BTreeMap;
|
||||||
use std::fs::{DirEntry, File, Metadata};
|
use std::fs::{DirEntry, File, Metadata};
|
||||||
use std::io::prelude::*;
|
use std::io::prelude::*;
|
||||||
use std::io::{BufReader, BufWriter};
|
use std::io::BufWriter;
|
||||||
use std::path::{Path, PathBuf};
|
use std::path::{Path, PathBuf};
|
||||||
use std::sync::atomic::{AtomicUsize, Ordering};
|
use std::sync::atomic::{AtomicUsize, Ordering};
|
||||||
use std::sync::Arc;
|
use std::sync::Arc;
|
||||||
|
@ -18,11 +18,11 @@ use rayon::prelude::*;
|
||||||
use serde::{Deserialize, Serialize};
|
use serde::{Deserialize, Serialize};
|
||||||
|
|
||||||
use crate::common::{
|
use crate::common::{
|
||||||
check_folder_children, create_crash_message, open_cache_folder, prepare_thread_handler_common, send_info_and_wait_for_ending_all_threads, AUDIO_FILES_EXTENSIONS,
|
check_folder_children, create_crash_message, prepare_thread_handler_common, send_info_and_wait_for_ending_all_threads, AUDIO_FILES_EXTENSIONS,
|
||||||
IMAGE_RS_BROKEN_FILES_EXTENSIONS, PDF_FILES_EXTENSIONS, ZIP_FILES_EXTENSIONS,
|
IMAGE_RS_BROKEN_FILES_EXTENSIONS, PDF_FILES_EXTENSIONS, ZIP_FILES_EXTENSIONS,
|
||||||
};
|
};
|
||||||
|
use crate::common_cache::{get_broken_files_cache_file, load_cache_from_file_generalized_by_path, save_cache_to_file_generalized};
|
||||||
use crate::common_dir_traversal::{common_get_entry_data_metadata, common_read_dir, get_lowercase_name, get_modified_time, CheckingMethod, ProgressData, ToolType};
|
use crate::common_dir_traversal::{common_get_entry_data_metadata, common_read_dir, get_lowercase_name, get_modified_time, CheckingMethod, ProgressData, ToolType};
|
||||||
use crate::common_messages::Messages;
|
|
||||||
use crate::common_tool::{CommonData, CommonToolData};
|
use crate::common_tool::{CommonData, CommonToolData};
|
||||||
use crate::common_traits::*;
|
use crate::common_traits::*;
|
||||||
|
|
||||||
|
@ -40,6 +40,17 @@ pub struct FileEntry {
|
||||||
pub type_of_file: TypeOfFile,
|
pub type_of_file: TypeOfFile,
|
||||||
pub error_string: String,
|
pub error_string: String,
|
||||||
}
|
}
|
||||||
|
impl ResultEntry for FileEntry {
|
||||||
|
fn get_path(&self) -> &Path {
|
||||||
|
&self.path
|
||||||
|
}
|
||||||
|
fn get_modified_date(&self) -> u64 {
|
||||||
|
self.modified_date
|
||||||
|
}
|
||||||
|
fn get_size(&self) -> u64 {
|
||||||
|
self.size
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
#[derive(Copy, Clone, PartialEq, Eq, Serialize, Deserialize, Debug)]
|
#[derive(Copy, Clone, PartialEq, Eq, Serialize, Deserialize, Debug)]
|
||||||
pub enum TypeOfFile {
|
pub enum TypeOfFile {
|
||||||
|
@ -218,11 +229,8 @@ impl BrokenFiles {
|
||||||
}
|
}
|
||||||
|
|
||||||
let type_of_file = check_extension_availability(&file_name_lowercase);
|
let type_of_file = check_extension_availability(&file_name_lowercase);
|
||||||
if type_of_file == TypeOfFile::Unknown {
|
|
||||||
return None;
|
|
||||||
}
|
|
||||||
|
|
||||||
if !check_extension_allowed(&type_of_file, &self.checked_types) {
|
if !check_if_file_extension_is_allowed(&type_of_file, &self.checked_types) {
|
||||||
return None;
|
return None;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -349,26 +357,15 @@ impl BrokenFiles {
|
||||||
let files_to_check = mem::take(&mut self.files_to_check);
|
let files_to_check = mem::take(&mut self.files_to_check);
|
||||||
|
|
||||||
if self.common_data.use_cache {
|
if self.common_data.use_cache {
|
||||||
loaded_hash_map = match load_cache_from_file(&mut self.common_data.text_messages, self.common_data.delete_outdated_cache) {
|
let (messages, loaded_items) = load_cache_from_file_generalized_by_path::<FileEntry>(&get_broken_files_cache_file(), self.get_delete_outdated_cache(), &files_to_check);
|
||||||
Some(t) => t,
|
self.get_text_messages_mut().extend_with_another_messages(messages);
|
||||||
None => Default::default(),
|
loaded_hash_map = loaded_items.unwrap_or_default();
|
||||||
};
|
|
||||||
|
|
||||||
for (name, file_entry) in files_to_check {
|
for (name, file_entry) in files_to_check {
|
||||||
let checked_extension = check_extension_allowed(&file_entry.type_of_file, &self.checked_types); // Only broken
|
if let Some(cached_file_entry) = loaded_hash_map.get(&name) {
|
||||||
|
records_already_cached.insert(name.clone(), cached_file_entry.clone());
|
||||||
#[allow(clippy::if_same_then_else)]
|
|
||||||
if checked_extension && !loaded_hash_map.contains_key(&name) {
|
|
||||||
// If loaded data doesn't contains current info
|
|
||||||
non_cached_files_to_check.insert(name, file_entry.clone());
|
|
||||||
} else if checked_extension && file_entry.size != loaded_hash_map.get(&name).unwrap().size
|
|
||||||
|| file_entry.modified_date != loaded_hash_map.get(&name).unwrap().modified_date
|
|
||||||
{
|
|
||||||
// When size or modification date of image changed, then it is clear that is different image
|
|
||||||
non_cached_files_to_check.insert(name, file_entry);
|
|
||||||
} else {
|
} else {
|
||||||
// Checking may be omitted when already there is entry with same size and modification date
|
non_cached_files_to_check.insert(name, file_entry);
|
||||||
records_already_cached.insert(name.clone(), loaded_hash_map.get(&name).unwrap().clone());
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
|
@ -440,7 +437,9 @@ impl BrokenFiles {
|
||||||
for (_name, file_entry) in loaded_hash_map {
|
for (_name, file_entry) in loaded_hash_map {
|
||||||
all_results.insert(file_entry.path.to_string_lossy().to_string(), file_entry);
|
all_results.insert(file_entry.path.to_string_lossy().to_string(), file_entry);
|
||||||
}
|
}
|
||||||
save_cache_to_file(&all_results, &mut self.common_data.text_messages, self.common_data.save_also_as_json);
|
|
||||||
|
let messages = save_cache_to_file_generalized(&get_broken_files_cache_file(), &all_results, self.common_data.save_also_as_json, 0);
|
||||||
|
self.get_text_messages_mut().extend_with_another_messages(messages);
|
||||||
}
|
}
|
||||||
debug!("save_to_cache - end");
|
debug!("save_to_cache - end");
|
||||||
}
|
}
|
||||||
|
@ -536,84 +535,6 @@ impl PrintResults for BrokenFiles {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
fn save_cache_to_file(old_hashmap: &BTreeMap<String, FileEntry>, text_messages: &mut Messages, save_also_as_json: bool) {
|
|
||||||
let mut hashmap: BTreeMap<String, FileEntry> = Default::default();
|
|
||||||
for (path, fe) in old_hashmap {
|
|
||||||
if fe.size > 1024 {
|
|
||||||
hashmap.insert(path.clone(), fe.clone());
|
|
||||||
}
|
|
||||||
}
|
|
||||||
let hashmap = &hashmap;
|
|
||||||
|
|
||||||
if let Some(((file_handler, cache_file), (file_handler_json, cache_file_json))) = open_cache_folder(&get_cache_file(), true, save_also_as_json, &mut text_messages.warnings) {
|
|
||||||
{
|
|
||||||
let writer = BufWriter::new(file_handler.unwrap()); // Unwrap because cannot fail here
|
|
||||||
if let Err(e) = bincode::serialize_into(writer, hashmap) {
|
|
||||||
text_messages
|
|
||||||
.warnings
|
|
||||||
.push(format!("Cannot write data to cache file {}, reason {}", cache_file.display(), e));
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
if save_also_as_json {
|
|
||||||
if let Some(file_handler_json) = file_handler_json {
|
|
||||||
let writer = BufWriter::new(file_handler_json);
|
|
||||||
if let Err(e) = serde_json::to_writer(writer, hashmap) {
|
|
||||||
text_messages
|
|
||||||
.warnings
|
|
||||||
.push(format!("Cannot write data to cache file {}, reason {}", cache_file_json.display(), e));
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
text_messages.messages.push(format!("Properly saved to file {} cache entries.", hashmap.len()));
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
fn load_cache_from_file(text_messages: &mut Messages, delete_outdated_cache: bool) -> Option<BTreeMap<String, FileEntry>> {
|
|
||||||
if let Some(((file_handler, cache_file), (file_handler_json, cache_file_json))) = open_cache_folder(&get_cache_file(), false, true, &mut text_messages.warnings) {
|
|
||||||
let mut hashmap_loaded_entries: BTreeMap<String, FileEntry>;
|
|
||||||
if let Some(file_handler) = file_handler {
|
|
||||||
let reader = BufReader::new(file_handler);
|
|
||||||
hashmap_loaded_entries = match bincode::deserialize_from(reader) {
|
|
||||||
Ok(t) => t,
|
|
||||||
Err(e) => {
|
|
||||||
text_messages
|
|
||||||
.warnings
|
|
||||||
.push(format!("Failed to load data from cache file {}, reason {}", cache_file.display(), e));
|
|
||||||
return None;
|
|
||||||
}
|
|
||||||
};
|
|
||||||
} else {
|
|
||||||
let reader = BufReader::new(file_handler_json.unwrap()); // Unwrap cannot fail, because at least one file must be valid
|
|
||||||
hashmap_loaded_entries = match serde_json::from_reader(reader) {
|
|
||||||
Ok(t) => t,
|
|
||||||
Err(e) => {
|
|
||||||
text_messages
|
|
||||||
.warnings
|
|
||||||
.push(format!("Failed to load data from cache file {}, reason {}", cache_file_json.display(), e));
|
|
||||||
return None;
|
|
||||||
}
|
|
||||||
};
|
|
||||||
}
|
|
||||||
|
|
||||||
// Don't load cache data if destination file not exists
|
|
||||||
if delete_outdated_cache {
|
|
||||||
hashmap_loaded_entries.retain(|src_path, _file_entry| Path::new(src_path).exists());
|
|
||||||
}
|
|
||||||
|
|
||||||
text_messages.messages.push(format!("Properly loaded {} cache entries.", hashmap_loaded_entries.len()));
|
|
||||||
|
|
||||||
return Some(hashmap_loaded_entries);
|
|
||||||
}
|
|
||||||
None
|
|
||||||
}
|
|
||||||
|
|
||||||
fn get_cache_file() -> String {
|
|
||||||
"cache_broken_files.bin".to_string()
|
|
||||||
}
|
|
||||||
|
|
||||||
fn check_extension_availability(file_name_lowercase: &str) -> TypeOfFile {
|
fn check_extension_availability(file_name_lowercase: &str) -> TypeOfFile {
|
||||||
if IMAGE_RS_BROKEN_FILES_EXTENSIONS.iter().any(|e| file_name_lowercase.ends_with(e)) {
|
if IMAGE_RS_BROKEN_FILES_EXTENSIONS.iter().any(|e| file_name_lowercase.ends_with(e)) {
|
||||||
TypeOfFile::Image
|
TypeOfFile::Image
|
||||||
|
@ -628,7 +549,7 @@ fn check_extension_availability(file_name_lowercase: &str) -> TypeOfFile {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
fn check_extension_allowed(type_of_file: &TypeOfFile, checked_types: &CheckedTypes) -> bool {
|
fn check_if_file_extension_is_allowed(type_of_file: &TypeOfFile, checked_types: &CheckedTypes) -> bool {
|
||||||
((*type_of_file == TypeOfFile::Image) && ((*checked_types & CheckedTypes::IMAGE) == CheckedTypes::IMAGE))
|
((*type_of_file == TypeOfFile::Image) && ((*checked_types & CheckedTypes::IMAGE) == CheckedTypes::IMAGE))
|
||||||
|| ((*type_of_file == TypeOfFile::PDF) && ((*checked_types & CheckedTypes::PDF) == CheckedTypes::PDF))
|
|| ((*type_of_file == TypeOfFile::PDF) && ((*checked_types & CheckedTypes::PDF) == CheckedTypes::PDF))
|
||||||
|| ((*type_of_file == TypeOfFile::ArchiveZip) && ((*checked_types & CheckedTypes::ARCHIVE) == CheckedTypes::ARCHIVE))
|
|| ((*type_of_file == TypeOfFile::ArchiveZip) && ((*checked_types & CheckedTypes::ARCHIVE) == CheckedTypes::ARCHIVE))
|
||||||
|
|
237
czkawka_core/src/common_cache.rs
Normal file
237
czkawka_core/src/common_cache.rs
Normal file
|
@ -0,0 +1,237 @@
|
||||||
|
use crate::common;
|
||||||
|
use crate::common_messages::Messages;
|
||||||
|
use crate::common_traits::ResultEntry;
|
||||||
|
use crate::duplicate::HashType;
|
||||||
|
use crate::similar_images::{convert_algorithm_to_string, convert_filters_to_string};
|
||||||
|
use image::imageops::FilterType;
|
||||||
|
use image_hasher::HashAlg;
|
||||||
|
use log::debug;
|
||||||
|
use rayon::iter::{IntoParallelIterator, ParallelIterator};
|
||||||
|
use serde::{Deserialize, Serialize};
|
||||||
|
use std::collections::BTreeMap;
|
||||||
|
use std::io::{BufReader, BufWriter};
|
||||||
|
|
||||||
|
pub fn get_broken_files_cache_file() -> String {
|
||||||
|
"cache_broken_files_61.bin".to_string()
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn get_similar_images_cache_file(hash_size: &u8, hash_alg: &HashAlg, image_filter: &FilterType) -> String {
|
||||||
|
format!(
|
||||||
|
"cache_similar_images_{}_{}_{}_61.bin",
|
||||||
|
hash_size,
|
||||||
|
convert_algorithm_to_string(hash_alg),
|
||||||
|
convert_filters_to_string(image_filter),
|
||||||
|
)
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn get_similar_videos_cache_file() -> String {
|
||||||
|
"cache_similar_videos_61.bin".to_string()
|
||||||
|
}
|
||||||
|
pub fn get_similar_music_cache_file(checking_tags: bool) -> &'static str {
|
||||||
|
if checking_tags {
|
||||||
|
"cache_same_music_tags_61.bin"
|
||||||
|
} else {
|
||||||
|
"cache_same_music_fingerprints_61.bin"
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn get_duplicate_cache_file(type_of_hash: &HashType, is_prehash: bool) -> String {
|
||||||
|
let prehash_str = if is_prehash { "_prehash" } else { "" };
|
||||||
|
format!("cache_duplicates_{type_of_hash:?}{prehash_str}_61.bin")
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn save_cache_to_file_generalized<T>(cache_file_name: &str, hashmap: &BTreeMap<String, T>, save_also_as_json: bool, minimum_file_size: u64) -> Messages
|
||||||
|
where
|
||||||
|
T: Serialize + ResultEntry + Sized + Send + Sync,
|
||||||
|
{
|
||||||
|
debug!("Saving cache to file {} (or also json alternative) - {} results", cache_file_name, hashmap.len());
|
||||||
|
let mut text_messages = Messages::new();
|
||||||
|
if let Some(((file_handler, cache_file), (file_handler_json, cache_file_json))) =
|
||||||
|
common::open_cache_folder(cache_file_name, true, save_also_as_json, &mut text_messages.warnings)
|
||||||
|
{
|
||||||
|
let hashmap_to_save = hashmap.values().filter(|t| t.get_size() >= minimum_file_size).collect::<Vec<_>>();
|
||||||
|
|
||||||
|
{
|
||||||
|
let writer = BufWriter::new(file_handler.unwrap()); // Unwrap because cannot fail here
|
||||||
|
if let Err(e) = bincode::serialize_into(writer, &hashmap_to_save) {
|
||||||
|
text_messages
|
||||||
|
.warnings
|
||||||
|
.push(format!("Cannot write data to cache file {}, reason {}", cache_file.display(), e));
|
||||||
|
debug!("Failed to save cache to file {:?}", cache_file);
|
||||||
|
return text_messages;
|
||||||
|
}
|
||||||
|
debug!("Saved binary to file {:?}", cache_file);
|
||||||
|
}
|
||||||
|
if save_also_as_json {
|
||||||
|
if let Some(file_handler_json) = file_handler_json {
|
||||||
|
let writer = BufWriter::new(file_handler_json);
|
||||||
|
if let Err(e) = serde_json::to_writer(writer, &hashmap_to_save) {
|
||||||
|
text_messages
|
||||||
|
.warnings
|
||||||
|
.push(format!("Cannot write data to cache file {}, reason {}", cache_file_json.display(), e));
|
||||||
|
debug!("Failed to save cache to file {:?}", cache_file_json);
|
||||||
|
return text_messages;
|
||||||
|
}
|
||||||
|
debug!("Saved json to file {:?}", cache_file_json);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
text_messages.messages.push(format!("Properly saved to file {} cache entries.", hashmap.len()));
|
||||||
|
} else {
|
||||||
|
debug!("Failed to save cache to file {cache_file_name} because not exists");
|
||||||
|
}
|
||||||
|
text_messages
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn load_cache_from_file_generalized_by_path<T>(cache_file_name: &str, delete_outdated_cache: bool, used_files: &BTreeMap<String, T>) -> (Messages, Option<BTreeMap<String, T>>)
|
||||||
|
where
|
||||||
|
for<'a> T: Deserialize<'a> + ResultEntry + Sized + Send + Sync + Clone,
|
||||||
|
{
|
||||||
|
let (text_messages, vec_loaded_cache) = load_cache_from_file_generalized(cache_file_name, delete_outdated_cache, used_files);
|
||||||
|
let Some(vec_loaded_entries) = vec_loaded_cache else {
|
||||||
|
return (text_messages, None);
|
||||||
|
};
|
||||||
|
|
||||||
|
debug!("Converting cache Vec<T> into BTreeMap<String, T>");
|
||||||
|
let map_loaded_entries: BTreeMap<String, T> = vec_loaded_entries
|
||||||
|
.into_iter()
|
||||||
|
.map(|file_entry| (file_entry.get_path().to_string_lossy().into_owned(), file_entry))
|
||||||
|
.collect();
|
||||||
|
debug!("Converted cache Vec<T> into BTreeMap<String, T>");
|
||||||
|
|
||||||
|
(text_messages, Some(map_loaded_entries))
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn load_cache_from_file_generalized_by_size<T>(
|
||||||
|
cache_file_name: &str,
|
||||||
|
delete_outdated_cache: bool,
|
||||||
|
cache_not_converted: &BTreeMap<u64, Vec<T>>,
|
||||||
|
) -> (Messages, Option<BTreeMap<u64, Vec<T>>>)
|
||||||
|
where
|
||||||
|
for<'a> T: Deserialize<'a> + ResultEntry + Sized + Send + Sync + Clone,
|
||||||
|
{
|
||||||
|
debug!("Converting cache BtreeMap<u64, Vec<T>> into BTreeMap<String, T>");
|
||||||
|
let mut used_files: BTreeMap<String, T> = Default::default();
|
||||||
|
for file_entry in cache_not_converted.values().flatten() {
|
||||||
|
used_files.insert(file_entry.get_path().to_string_lossy().into_owned(), file_entry.clone());
|
||||||
|
}
|
||||||
|
debug!("Converted cache BtreeMap<u64, Vec<T>> into BTreeMap<String, T>");
|
||||||
|
|
||||||
|
let (text_messages, vec_loaded_cache) = load_cache_from_file_generalized(cache_file_name, delete_outdated_cache, &used_files);
|
||||||
|
let Some(vec_loaded_entries) = vec_loaded_cache else {
|
||||||
|
return (text_messages, None);
|
||||||
|
};
|
||||||
|
|
||||||
|
debug!("Converting cache Vec<T> into BTreeMap<u64, Vec<T>>");
|
||||||
|
let mut map_loaded_entries: BTreeMap<u64, Vec<T>> = Default::default();
|
||||||
|
for file_entry in vec_loaded_entries {
|
||||||
|
map_loaded_entries.entry(file_entry.get_size()).or_default().push(file_entry);
|
||||||
|
}
|
||||||
|
debug!("Converted cache Vec<T> into BTreeMap<u64, Vec<T>>");
|
||||||
|
|
||||||
|
(text_messages, Some(map_loaded_entries))
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn load_cache_from_file_generalized_by_path_from_size<T>(
|
||||||
|
cache_file_name: &str,
|
||||||
|
delete_outdated_cache: bool,
|
||||||
|
cache_not_converted: &BTreeMap<u64, Vec<T>>,
|
||||||
|
) -> (Messages, Option<BTreeMap<String, T>>)
|
||||||
|
where
|
||||||
|
for<'a> T: Deserialize<'a> + ResultEntry + Sized + Send + Sync + Clone,
|
||||||
|
{
|
||||||
|
debug!("Converting cache BtreeMap<u64, Vec<T>> into BTreeMap<String, T>");
|
||||||
|
let mut used_files: BTreeMap<String, T> = Default::default();
|
||||||
|
for file_entry in cache_not_converted.values().flatten() {
|
||||||
|
used_files.insert(file_entry.get_path().to_string_lossy().into_owned(), file_entry.clone());
|
||||||
|
}
|
||||||
|
debug!("Converted cache BtreeMap<u64, Vec<T>> into BTreeMap<String, T>");
|
||||||
|
|
||||||
|
let (text_messages, vec_loaded_cache) = load_cache_from_file_generalized(cache_file_name, delete_outdated_cache, &used_files);
|
||||||
|
let Some(vec_loaded_entries) = vec_loaded_cache else {
|
||||||
|
return (text_messages, None);
|
||||||
|
};
|
||||||
|
|
||||||
|
debug!("Converting cache Vec<T> into BTreeMap<String, T>");
|
||||||
|
let map_loaded_entries: BTreeMap<String, T> = vec_loaded_entries
|
||||||
|
.into_iter()
|
||||||
|
.map(|file_entry| (file_entry.get_path().to_string_lossy().into_owned(), file_entry))
|
||||||
|
.collect();
|
||||||
|
debug!("Converted cache Vec<T> into BTreeMap<String, T>");
|
||||||
|
|
||||||
|
(text_messages, Some(map_loaded_entries))
|
||||||
|
}
|
||||||
|
|
||||||
|
fn load_cache_from_file_generalized<T>(cache_file_name: &str, delete_outdated_cache: bool, used_files: &BTreeMap<String, T>) -> (Messages, Option<Vec<T>>)
|
||||||
|
where
|
||||||
|
for<'a> T: Deserialize<'a> + ResultEntry + Sized + Send + Sync + Clone,
|
||||||
|
{
|
||||||
|
debug!("Loading cache from file {} (or json alternative)", cache_file_name);
|
||||||
|
let mut text_messages = Messages::new();
|
||||||
|
|
||||||
|
if let Some(((file_handler, cache_file), (file_handler_json, cache_file_json))) = common::open_cache_folder(cache_file_name, false, true, &mut text_messages.warnings) {
|
||||||
|
let mut vec_loaded_entries: Vec<T>;
|
||||||
|
if let Some(file_handler) = file_handler {
|
||||||
|
let reader = BufReader::new(file_handler);
|
||||||
|
|
||||||
|
vec_loaded_entries = match bincode::deserialize_from(reader) {
|
||||||
|
Ok(t) => t,
|
||||||
|
Err(e) => {
|
||||||
|
text_messages
|
||||||
|
.warnings
|
||||||
|
.push(format!("Failed to load data from cache file {}, reason {}", cache_file.display(), e));
|
||||||
|
debug!("Failed to load cache from file {:?}", cache_file);
|
||||||
|
return (text_messages, None);
|
||||||
|
}
|
||||||
|
};
|
||||||
|
} else {
|
||||||
|
let reader = BufReader::new(file_handler_json.unwrap()); // Unwrap cannot fail, because at least one file must be valid
|
||||||
|
vec_loaded_entries = match serde_json::from_reader(reader) {
|
||||||
|
Ok(t) => t,
|
||||||
|
Err(e) => {
|
||||||
|
text_messages
|
||||||
|
.warnings
|
||||||
|
.push(format!("Failed to load data from cache file {}, reason {}", cache_file_json.display(), e));
|
||||||
|
debug!("Failed to load cache from file {:?}", cache_file);
|
||||||
|
return (text_messages, None);
|
||||||
|
}
|
||||||
|
};
|
||||||
|
}
|
||||||
|
|
||||||
|
// Don't load cache data if destination file not exists
|
||||||
|
debug!("Starting to removing outdated cache entries");
|
||||||
|
let initial_number_of_entries = vec_loaded_entries.len();
|
||||||
|
vec_loaded_entries = vec_loaded_entries
|
||||||
|
.into_par_iter()
|
||||||
|
.filter(|file_entry| {
|
||||||
|
if delete_outdated_cache && !file_entry.get_path().exists() {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
let file_entry_path_str = file_entry.get_path().to_string_lossy().to_string();
|
||||||
|
if let Some(used_file) = used_files.get(&file_entry_path_str) {
|
||||||
|
if file_entry.get_size() != used_file.get_size() {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
if file_entry.get_modified_date() != used_file.get_modified_date() {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
true
|
||||||
|
})
|
||||||
|
.collect();
|
||||||
|
debug!(
|
||||||
|
"Completed removing outdated cache entries, removed {} out of all {} entries",
|
||||||
|
initial_number_of_entries - vec_loaded_entries.len(),
|
||||||
|
initial_number_of_entries
|
||||||
|
);
|
||||||
|
|
||||||
|
text_messages.messages.push(format!("Properly loaded {} cache entries.", vec_loaded_entries.len()));
|
||||||
|
|
||||||
|
debug!("Loaded cache from file {cache_file_name} (or json alternative) - {} results", vec_loaded_entries.len());
|
||||||
|
return (text_messages, Some(vec_loaded_entries));
|
||||||
|
}
|
||||||
|
debug!("Failed to load cache from file {cache_file_name} because not exists");
|
||||||
|
(text_messages, None)
|
||||||
|
}
|
|
@ -8,6 +8,7 @@ use std::time::UNIX_EPOCH;
|
||||||
use crossbeam_channel::Receiver;
|
use crossbeam_channel::Receiver;
|
||||||
use futures::channel::mpsc::UnboundedSender;
|
use futures::channel::mpsc::UnboundedSender;
|
||||||
use rayon::prelude::*;
|
use rayon::prelude::*;
|
||||||
|
use serde::{Deserialize, Serialize};
|
||||||
|
|
||||||
use crate::common::{prepare_thread_handler_common, send_info_and_wait_for_ending_all_threads};
|
use crate::common::{prepare_thread_handler_common, send_info_and_wait_for_ending_all_threads};
|
||||||
use crate::common_directory::Directories;
|
use crate::common_directory::Directories;
|
||||||
|
@ -44,7 +45,7 @@ pub enum ToolType {
|
||||||
None,
|
None,
|
||||||
}
|
}
|
||||||
|
|
||||||
#[derive(PartialEq, Eq, Clone, Debug, Copy, Default)]
|
#[derive(PartialEq, Eq, Clone, Debug, Copy, Default, Deserialize, Serialize)]
|
||||||
pub enum CheckingMethod {
|
pub enum CheckingMethod {
|
||||||
#[default]
|
#[default]
|
||||||
None,
|
None,
|
||||||
|
@ -56,7 +57,7 @@ pub enum CheckingMethod {
|
||||||
AudioContent,
|
AudioContent,
|
||||||
}
|
}
|
||||||
|
|
||||||
#[derive(Clone, Debug, Default, PartialEq, Eq)]
|
#[derive(Clone, Debug, Default, PartialEq, Eq, Serialize, Deserialize)]
|
||||||
pub struct FileEntry {
|
pub struct FileEntry {
|
||||||
pub path: PathBuf,
|
pub path: PathBuf,
|
||||||
pub size: u64,
|
pub size: u64,
|
||||||
|
@ -69,19 +70,25 @@ impl ResultEntry for FileEntry {
|
||||||
fn get_path(&self) -> &Path {
|
fn get_path(&self) -> &Path {
|
||||||
&self.path
|
&self.path
|
||||||
}
|
}
|
||||||
|
fn get_modified_date(&self) -> u64 {
|
||||||
|
self.modified_date
|
||||||
|
}
|
||||||
|
fn get_size(&self) -> u64 {
|
||||||
|
self.size
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// Symlinks
|
// Symlinks
|
||||||
|
|
||||||
const MAX_NUMBER_OF_SYMLINK_JUMPS: i32 = 20;
|
const MAX_NUMBER_OF_SYMLINK_JUMPS: i32 = 20;
|
||||||
|
|
||||||
#[derive(Clone, Debug, PartialEq, Eq)]
|
#[derive(Clone, Debug, PartialEq, Eq, Deserialize, Serialize)]
|
||||||
pub struct SymlinkInfo {
|
pub struct SymlinkInfo {
|
||||||
pub destination_path: PathBuf,
|
pub destination_path: PathBuf,
|
||||||
pub type_of_error: ErrorType,
|
pub type_of_error: ErrorType,
|
||||||
}
|
}
|
||||||
|
|
||||||
#[derive(Clone, Debug, PartialEq, Eq, Copy)]
|
#[derive(Clone, Debug, PartialEq, Eq, Copy, Deserialize, Serialize)]
|
||||||
pub enum ErrorType {
|
pub enum ErrorType {
|
||||||
InfiniteRecursion,
|
InfiniteRecursion,
|
||||||
NonExistentFile,
|
NonExistentFile,
|
||||||
|
|
|
@ -53,4 +53,11 @@ impl Messages {
|
||||||
self.warnings.extend(warnings);
|
self.warnings.extend(warnings);
|
||||||
self.errors.extend(errors);
|
self.errors.extend(errors);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
pub fn extend_with_another_messages(&mut self, messages: Messages) {
|
||||||
|
let (messages, warnings, errors) = (messages.messages, messages.warnings, messages.errors);
|
||||||
|
self.messages.extend(messages);
|
||||||
|
self.warnings.extend(warnings);
|
||||||
|
self.errors.extend(errors);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -103,6 +103,9 @@ pub trait CommonData {
|
||||||
fn get_text_messages(&self) -> &Messages {
|
fn get_text_messages(&self) -> &Messages {
|
||||||
&self.get_cd().text_messages
|
&self.get_cd().text_messages
|
||||||
}
|
}
|
||||||
|
fn get_text_messages_mut(&mut self) -> &mut Messages {
|
||||||
|
&mut self.get_cd_mut().text_messages
|
||||||
|
}
|
||||||
|
|
||||||
fn set_save_also_as_json(&mut self, save_also_as_json: bool) {
|
fn set_save_also_as_json(&mut self, save_also_as_json: bool) {
|
||||||
self.get_cd_mut().save_also_as_json = save_also_as_json;
|
self.get_cd_mut().save_also_as_json = save_also_as_json;
|
||||||
|
|
|
@ -14,4 +14,6 @@ pub trait PrintResults {
|
||||||
|
|
||||||
pub trait ResultEntry {
|
pub trait ResultEntry {
|
||||||
fn get_path(&self) -> &Path;
|
fn get_path(&self) -> &Path;
|
||||||
|
fn get_modified_date(&self) -> u64;
|
||||||
|
fn get_size(&self) -> u64;
|
||||||
}
|
}
|
||||||
|
|
|
@ -1,13 +1,12 @@
|
||||||
use std::collections::BTreeMap;
|
use std::collections::BTreeMap;
|
||||||
#[cfg(target_family = "unix")]
|
|
||||||
use std::collections::HashSet;
|
use std::collections::HashSet;
|
||||||
use std::fs::File;
|
use std::fs::File;
|
||||||
use std::hash::Hasher;
|
use std::hash::Hasher;
|
||||||
use std::io::prelude::*;
|
use std::io::prelude::*;
|
||||||
use std::io::{self, BufReader, BufWriter, Error, ErrorKind};
|
use std::io::{self, BufWriter, Error, ErrorKind};
|
||||||
#[cfg(target_family = "unix")]
|
#[cfg(target_family = "unix")]
|
||||||
use std::os::unix::fs::MetadataExt;
|
use std::os::unix::fs::MetadataExt;
|
||||||
use std::path::{Path, PathBuf};
|
use std::path::Path;
|
||||||
use std::sync::atomic::Ordering;
|
use std::sync::atomic::Ordering;
|
||||||
use std::{fs, mem};
|
use std::{fs, mem};
|
||||||
|
|
||||||
|
@ -18,13 +17,12 @@ use log::{debug, info};
|
||||||
use rayon::prelude::*;
|
use rayon::prelude::*;
|
||||||
use xxhash_rust::xxh3::Xxh3;
|
use xxhash_rust::xxh3::Xxh3;
|
||||||
|
|
||||||
use crate::common::{open_cache_folder, prepare_thread_handler_common, send_info_and_wait_for_ending_all_threads};
|
use crate::common::{prepare_thread_handler_common, send_info_and_wait_for_ending_all_threads};
|
||||||
|
use crate::common_cache::{get_duplicate_cache_file, load_cache_from_file_generalized_by_size, save_cache_to_file_generalized};
|
||||||
use crate::common_dir_traversal::{CheckingMethod, DirTraversalBuilder, DirTraversalResult, FileEntry, ProgressData, ToolType};
|
use crate::common_dir_traversal::{CheckingMethod, DirTraversalBuilder, DirTraversalResult, FileEntry, ProgressData, ToolType};
|
||||||
use crate::common_messages::Messages;
|
use crate::common_messages::Messages;
|
||||||
use crate::common_tool::{CommonData, CommonToolData};
|
use crate::common_tool::{CommonData, CommonToolData};
|
||||||
use crate::common_traits::*;
|
use crate::common_traits::*;
|
||||||
use crate::flc;
|
|
||||||
use crate::localizer_core::generate_translation_hashmap;
|
|
||||||
|
|
||||||
const TEMP_HARDLINK_FILE: &str = "rzeczek.rxrxrxl";
|
const TEMP_HARDLINK_FILE: &str = "rzeczek.rxrxrxl";
|
||||||
|
|
||||||
|
@ -480,7 +478,13 @@ impl DuplicateFinder {
|
||||||
}
|
}
|
||||||
DirTraversalResult::Stopped => false,
|
DirTraversalResult::Stopped => false,
|
||||||
};
|
};
|
||||||
debug!("check_file_size - after calculating size stats/duplicates");
|
debug!(
|
||||||
|
"check_file_size - after calculating size stats/duplicates, found in {} groups, {} files with same size | referenced {} groups, {} files",
|
||||||
|
self.files_with_identical_size.len(),
|
||||||
|
self.files_with_identical_size.values().map(Vec::len).sum::<usize>(),
|
||||||
|
self.files_with_identical_size_referenced.len(),
|
||||||
|
self.files_with_identical_size_referenced.values().map(|(_fe, vec)| vec.len()).sum::<usize>()
|
||||||
|
);
|
||||||
res
|
res
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -535,34 +539,39 @@ impl DuplicateFinder {
|
||||||
|
|
||||||
if self.use_prehash_cache {
|
if self.use_prehash_cache {
|
||||||
debug!("prehash_load_cache_at_start - using prehash cache start");
|
debug!("prehash_load_cache_at_start - using prehash cache start");
|
||||||
loaded_hash_map = match load_hashes_from_file(&mut self.common_data.text_messages, self.common_data.delete_outdated_cache, &self.hash_type, true) {
|
|
||||||
Some(t) => t,
|
|
||||||
None => Default::default(),
|
|
||||||
};
|
|
||||||
|
|
||||||
let mut loaded_hash_map2: BTreeMap<String, FileEntry> = Default::default();
|
let (messages, loaded_items) = load_cache_from_file_generalized_by_size::<FileEntry>(
|
||||||
for vec_file_entry in loaded_hash_map.values() {
|
&get_duplicate_cache_file(&self.hash_type, true),
|
||||||
for file_entry in vec_file_entry {
|
self.get_delete_outdated_cache(),
|
||||||
loaded_hash_map2.insert(file_entry.path.to_string_lossy().to_string(), file_entry.clone());
|
&self.files_with_identical_size,
|
||||||
}
|
);
|
||||||
}
|
self.get_text_messages_mut().extend_with_another_messages(messages);
|
||||||
|
loaded_hash_map = loaded_items.unwrap_or_default();
|
||||||
|
|
||||||
#[allow(clippy::if_same_then_else)]
|
debug!("prehash_load_cache_at_start - started diff between loaded and prechecked files");
|
||||||
for vec_file_entry in self.files_with_identical_size.values() {
|
for (size, mut vec_file_entry) in mem::take(&mut self.files_with_identical_size) {
|
||||||
for file_entry in vec_file_entry {
|
if let Some(cached_vec_file_entry) = loaded_hash_map.get(&size) {
|
||||||
let name = file_entry.path.to_string_lossy().to_string();
|
// TODO maybe hashset is not needed when using < 4 elements
|
||||||
if !loaded_hash_map2.contains_key(&name) {
|
let cached_path_entries = cached_vec_file_entry.iter().map(|e| &e.path).collect::<HashSet<_>>();
|
||||||
// If loaded data doesn't contains current image info
|
for file_entry in vec_file_entry {
|
||||||
non_cached_files_to_check.entry(file_entry.size).or_default().push(file_entry.clone());
|
if cached_path_entries.contains(&file_entry.path) {
|
||||||
} else if file_entry.size != loaded_hash_map2.get(&name).unwrap().size || file_entry.modified_date != loaded_hash_map2.get(&name).unwrap().modified_date {
|
records_already_cached.entry(size).or_default().push(file_entry);
|
||||||
// When size or modification date of image changed, then it is clear that is different image
|
} else {
|
||||||
non_cached_files_to_check.entry(file_entry.size).or_default().push(file_entry.clone());
|
non_cached_files_to_check.entry(size).or_default().push(file_entry);
|
||||||
} else {
|
}
|
||||||
// Checking may be omitted when already there is entry with same size and modification date
|
|
||||||
records_already_cached.entry(file_entry.size).or_default().push(file_entry.clone());
|
|
||||||
}
|
}
|
||||||
|
} else {
|
||||||
|
non_cached_files_to_check.entry(size).or_default().append(&mut vec_file_entry);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
debug!(
|
||||||
|
"prehash_load_cache_at_start - completed diff between loaded and prechecked files, {}({}) - non cached, {}({}) - already cached",
|
||||||
|
non_cached_files_to_check.values().map(Vec::len).sum::<usize>(),
|
||||||
|
format_size(non_cached_files_to_check.values().map(|v| v.iter().map(|e| e.size).sum::<u64>()).sum::<u64>(), BINARY),
|
||||||
|
records_already_cached.values().map(Vec::len).sum::<usize>(),
|
||||||
|
format_size(records_already_cached.values().map(|v| v.iter().map(|e| e.size).sum::<u64>()).sum::<u64>(), BINARY),
|
||||||
|
);
|
||||||
} else {
|
} else {
|
||||||
debug!("prehash_load_cache_at_start - not using prehash cache start");
|
debug!("prehash_load_cache_at_start - not using prehash cache start");
|
||||||
loaded_hash_map = Default::default();
|
loaded_hash_map = Default::default();
|
||||||
|
@ -596,13 +605,14 @@ impl DuplicateFinder {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
save_hashes_to_file(
|
let messages = save_cache_to_file_generalized(
|
||||||
|
&get_duplicate_cache_file(&self.hash_type, true),
|
||||||
&save_cache_to_hashmap,
|
&save_cache_to_hashmap,
|
||||||
&mut self.common_data.text_messages,
|
self.common_data.save_also_as_json,
|
||||||
&self.hash_type,
|
|
||||||
true,
|
|
||||||
self.minimal_prehash_cache_file_size,
|
self.minimal_prehash_cache_file_size,
|
||||||
);
|
);
|
||||||
|
self.get_text_messages_mut().extend_with_another_messages(messages);
|
||||||
|
|
||||||
debug!("prehash_save_cache_at_exit - saving prehash cache end");
|
debug!("prehash_save_cache_at_exit - saving prehash cache end");
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -691,35 +701,35 @@ impl DuplicateFinder {
|
||||||
|
|
||||||
if self.common_data.use_cache {
|
if self.common_data.use_cache {
|
||||||
debug!("full_hashing_load_cache_at_start - using cache");
|
debug!("full_hashing_load_cache_at_start - using cache");
|
||||||
loaded_hash_map = match load_hashes_from_file(&mut self.common_data.text_messages, self.common_data.delete_outdated_cache, &self.hash_type, false) {
|
let (messages, loaded_items) =
|
||||||
Some(t) => t,
|
load_cache_from_file_generalized_by_size::<FileEntry>(&get_duplicate_cache_file(&self.hash_type, false), self.get_delete_outdated_cache(), &pre_checked_map);
|
||||||
None => Default::default(),
|
self.get_text_messages_mut().extend_with_another_messages(messages);
|
||||||
};
|
loaded_hash_map = loaded_items.unwrap_or_default();
|
||||||
|
|
||||||
for (size, vec_file_entry) in pre_checked_map {
|
|
||||||
#[allow(clippy::collapsible_if)]
|
|
||||||
if !loaded_hash_map.contains_key(&size) {
|
|
||||||
// If loaded data doesn't contains current info
|
|
||||||
non_cached_files_to_check.insert(size, vec_file_entry);
|
|
||||||
} else {
|
|
||||||
let loaded_vec_file_entry = loaded_hash_map.get(&size).unwrap();
|
|
||||||
|
|
||||||
|
debug!("full_hashing_load_cache_at_start - started diff between loaded and prechecked files");
|
||||||
|
for (size, mut vec_file_entry) in pre_checked_map {
|
||||||
|
if let Some(cached_vec_file_entry) = loaded_hash_map.get(&size) {
|
||||||
|
// TODO maybe hashset is not needed when using < 4 elements
|
||||||
|
let cached_path_entries = cached_vec_file_entry.iter().map(|e| &e.path).collect::<HashSet<_>>();
|
||||||
for file_entry in vec_file_entry {
|
for file_entry in vec_file_entry {
|
||||||
let mut found: bool = false;
|
if cached_path_entries.contains(&file_entry.path) {
|
||||||
for loaded_file_entry in loaded_vec_file_entry {
|
records_already_cached.entry(size).or_default().push(file_entry);
|
||||||
if file_entry.path == loaded_file_entry.path && file_entry.modified_date == loaded_file_entry.modified_date {
|
} else {
|
||||||
records_already_cached.entry(file_entry.size).or_default().push(loaded_file_entry.clone());
|
non_cached_files_to_check.entry(size).or_default().push(file_entry);
|
||||||
found = true;
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
if !found {
|
|
||||||
non_cached_files_to_check.entry(file_entry.size).or_default().push(file_entry);
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
} else {
|
||||||
|
non_cached_files_to_check.entry(size).or_default().append(&mut vec_file_entry);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
debug!(
|
||||||
|
"full_hashing_load_cache_at_start - completed diff between loaded and prechecked files - {}({}) non cached, {}({}) already cached",
|
||||||
|
non_cached_files_to_check.len(),
|
||||||
|
format_size(non_cached_files_to_check.values().map(|v| v.iter().map(|e| e.size).sum::<u64>()).sum::<u64>(), BINARY),
|
||||||
|
records_already_cached.len(),
|
||||||
|
format_size(records_already_cached.values().map(|v| v.iter().map(|e| e.size).sum::<u64>()).sum::<u64>(), BINARY),
|
||||||
|
);
|
||||||
} else {
|
} else {
|
||||||
debug!("full_hashing_load_cache_at_start - not using cache");
|
debug!("full_hashing_load_cache_at_start - not using cache");
|
||||||
loaded_hash_map = Default::default();
|
loaded_hash_map = Default::default();
|
||||||
|
@ -771,7 +781,15 @@ impl DuplicateFinder {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
save_hashes_to_file(&all_results, &mut self.common_data.text_messages, &self.hash_type, false, self.minimal_cache_file_size);
|
|
||||||
|
let messages = save_cache_to_file_generalized(
|
||||||
|
&get_duplicate_cache_file(&self.hash_type, false),
|
||||||
|
&all_results,
|
||||||
|
self.common_data.save_also_as_json,
|
||||||
|
self.minimal_cache_file_size,
|
||||||
|
);
|
||||||
|
self.get_text_messages_mut().extend_with_another_messages(messages);
|
||||||
|
|
||||||
debug!("full_hashing_save_cache_at_exit - end");
|
debug!("full_hashing_save_cache_at_exit - end");
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -1318,112 +1336,6 @@ pub fn make_hard_link(src: &Path, dst: &Path) -> io::Result<()> {
|
||||||
result
|
result
|
||||||
}
|
}
|
||||||
|
|
||||||
pub fn save_hashes_to_file(hashmap: &BTreeMap<String, FileEntry>, text_messages: &mut Messages, type_of_hash: &HashType, is_prehash: bool, minimal_cache_file_size: u64) {
|
|
||||||
if let Some(((file_handler, cache_file), (_json_file, _json_name))) = open_cache_folder(&get_file_hash_name(type_of_hash, is_prehash), true, false, &mut text_messages.warnings)
|
|
||||||
{
|
|
||||||
let mut writer = BufWriter::new(file_handler.unwrap()); // Unwrap cannot fail
|
|
||||||
|
|
||||||
let mut how_much = 0;
|
|
||||||
for file_entry in hashmap.values() {
|
|
||||||
if file_entry.size >= minimal_cache_file_size {
|
|
||||||
let string: String = format!("{}//{}//{}//{}", file_entry.path.display(), file_entry.size, file_entry.modified_date, file_entry.hash);
|
|
||||||
|
|
||||||
if let Err(e) = writeln!(writer, "{string}") {
|
|
||||||
text_messages
|
|
||||||
.warnings
|
|
||||||
.push(format!("Failed to save some data to cache file {}, reason {}", cache_file.display(), e));
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
how_much += 1;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
text_messages
|
|
||||||
.messages
|
|
||||||
.push(flc!("core_saving_to_cache", generate_translation_hashmap(vec![("number", how_much.to_string())])));
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
pub fn load_hashes_from_file(text_messages: &mut Messages, delete_outdated_cache: bool, type_of_hash: &HashType, is_prehash: bool) -> Option<BTreeMap<u64, Vec<FileEntry>>> {
|
|
||||||
if let Some(((file_handler, cache_file), (_json_file, _json_name))) =
|
|
||||||
open_cache_folder(&get_file_hash_name(type_of_hash, is_prehash), false, false, &mut text_messages.warnings)
|
|
||||||
{
|
|
||||||
// Unwrap could fail when failed to open cache file, but json would exists
|
|
||||||
let Some(file_handler) = file_handler else {
|
|
||||||
return Default::default();
|
|
||||||
};
|
|
||||||
let reader = BufReader::new(file_handler);
|
|
||||||
|
|
||||||
let mut hashmap_loaded_entries: BTreeMap<u64, Vec<FileEntry>> = Default::default();
|
|
||||||
|
|
||||||
// Read the file line by line using the lines() iterator from std::io::BufRead.
|
|
||||||
for (index, line) in reader.lines().enumerate() {
|
|
||||||
let line = match line {
|
|
||||||
Ok(t) => t,
|
|
||||||
Err(e) => {
|
|
||||||
text_messages
|
|
||||||
.warnings
|
|
||||||
.push(format!("Failed to load line number {} from cache file {}, reason {}", index + 1, cache_file.display(), e));
|
|
||||||
return None;
|
|
||||||
}
|
|
||||||
};
|
|
||||||
let uuu = line.split("//").collect::<Vec<&str>>();
|
|
||||||
if uuu.len() != 4 {
|
|
||||||
text_messages.warnings.push(format!(
|
|
||||||
"Found invalid data(too much or too low amount of data) in line {} - ({}) in cache file {}",
|
|
||||||
index + 1,
|
|
||||||
line,
|
|
||||||
cache_file.display()
|
|
||||||
));
|
|
||||||
continue;
|
|
||||||
}
|
|
||||||
// Don't load cache data if destination file not exists
|
|
||||||
if !delete_outdated_cache || Path::new(uuu[0]).exists() {
|
|
||||||
let file_entry = FileEntry {
|
|
||||||
path: PathBuf::from(uuu[0]),
|
|
||||||
size: match uuu[1].parse::<u64>() {
|
|
||||||
Ok(t) => t,
|
|
||||||
Err(e) => {
|
|
||||||
text_messages.warnings.push(format!(
|
|
||||||
"Found invalid size value in line {} - ({}) in cache file {}, reason {}",
|
|
||||||
index + 1,
|
|
||||||
line,
|
|
||||||
cache_file.display(),
|
|
||||||
e
|
|
||||||
));
|
|
||||||
continue;
|
|
||||||
}
|
|
||||||
},
|
|
||||||
modified_date: match uuu[2].parse::<u64>() {
|
|
||||||
Ok(t) => t,
|
|
||||||
Err(e) => {
|
|
||||||
text_messages.warnings.push(format!(
|
|
||||||
"Found invalid modified date value in line {} - ({}) in cache file {}, reason {}",
|
|
||||||
index + 1,
|
|
||||||
line,
|
|
||||||
cache_file.display(),
|
|
||||||
e
|
|
||||||
));
|
|
||||||
continue;
|
|
||||||
}
|
|
||||||
},
|
|
||||||
hash: uuu[3].to_string(),
|
|
||||||
symlink_info: None,
|
|
||||||
};
|
|
||||||
hashmap_loaded_entries.entry(file_entry.size).or_default().push(file_entry);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
text_messages.messages.push(flc!(
|
|
||||||
"core_loading_from_cache",
|
|
||||||
generate_translation_hashmap(vec![("number", hashmap_loaded_entries.values().map(std::vec::Vec::len).sum::<usize>().to_string())])
|
|
||||||
));
|
|
||||||
|
|
||||||
return Some(hashmap_loaded_entries);
|
|
||||||
}
|
|
||||||
None
|
|
||||||
}
|
|
||||||
|
|
||||||
pub trait MyHasher {
|
pub trait MyHasher {
|
||||||
fn update(&mut self, bytes: &[u8]);
|
fn update(&mut self, bytes: &[u8]);
|
||||||
fn finalize(&self) -> String;
|
fn finalize(&self) -> String;
|
||||||
|
@ -1453,11 +1365,6 @@ fn hash_calculation(buffer: &mut [u8], file_entry: &FileEntry, hash_type: &HashT
|
||||||
Ok(hasher.finalize())
|
Ok(hasher.finalize())
|
||||||
}
|
}
|
||||||
|
|
||||||
fn get_file_hash_name(type_of_hash: &HashType, is_prehash: bool) -> String {
|
|
||||||
let prehash_str = if is_prehash { "_prehash" } else { "" };
|
|
||||||
format!("cache_duplicates_{type_of_hash:?}{prehash_str}.txt")
|
|
||||||
}
|
|
||||||
|
|
||||||
impl MyHasher for blake3::Hasher {
|
impl MyHasher for blake3::Hasher {
|
||||||
fn update(&mut self, bytes: &[u8]) {
|
fn update(&mut self, bytes: &[u8]) {
|
||||||
self.update(bytes);
|
self.update(bytes);
|
||||||
|
@ -1502,6 +1409,7 @@ mod tests {
|
||||||
use std::os::fs::MetadataExt;
|
use std::os::fs::MetadataExt;
|
||||||
#[cfg(target_family = "unix")]
|
#[cfg(target_family = "unix")]
|
||||||
use std::os::unix::fs::MetadataExt;
|
use std::os::unix::fs::MetadataExt;
|
||||||
|
use std::path::PathBuf;
|
||||||
|
|
||||||
use super::*;
|
use super::*;
|
||||||
|
|
||||||
|
|
|
@ -19,6 +19,7 @@ pub mod temporary;
|
||||||
|
|
||||||
pub mod bad_extensions;
|
pub mod bad_extensions;
|
||||||
pub mod common;
|
pub mod common;
|
||||||
|
pub mod common_cache;
|
||||||
pub mod common_dir_traversal;
|
pub mod common_dir_traversal;
|
||||||
pub mod common_directory;
|
pub mod common_directory;
|
||||||
pub mod common_extensions;
|
pub mod common_extensions;
|
||||||
|
|
|
@ -1,8 +1,8 @@
|
||||||
use std::cmp::max;
|
use std::cmp::max;
|
||||||
use std::collections::{BTreeMap, HashMap, HashSet};
|
use std::collections::{BTreeMap, HashSet};
|
||||||
use std::fs::File;
|
use std::fs::File;
|
||||||
use std::io::prelude::*;
|
use std::io::prelude::*;
|
||||||
use std::io::{BufReader, BufWriter};
|
use std::io::BufWriter;
|
||||||
use std::path::{Path, PathBuf};
|
use std::path::{Path, PathBuf};
|
||||||
use std::sync::atomic::{AtomicUsize, Ordering};
|
use std::sync::atomic::{AtomicUsize, Ordering};
|
||||||
use std::sync::Arc;
|
use std::sync::Arc;
|
||||||
|
@ -23,11 +23,9 @@ use symphonia::core::io::MediaSourceStream;
|
||||||
use symphonia::core::meta::MetadataOptions;
|
use symphonia::core::meta::MetadataOptions;
|
||||||
use symphonia::core::probe::Hint;
|
use symphonia::core::probe::Hint;
|
||||||
|
|
||||||
use crate::common::{
|
use crate::common::{create_crash_message, filter_reference_folders_generic, prepare_thread_handler_common, send_info_and_wait_for_ending_all_threads, AUDIO_FILES_EXTENSIONS};
|
||||||
create_crash_message, filter_reference_folders_generic, open_cache_folder, prepare_thread_handler_common, send_info_and_wait_for_ending_all_threads, AUDIO_FILES_EXTENSIONS,
|
use crate::common_cache::{get_similar_music_cache_file, load_cache_from_file_generalized_by_path, save_cache_to_file_generalized};
|
||||||
};
|
|
||||||
use crate::common_dir_traversal::{CheckingMethod, DirTraversalBuilder, DirTraversalResult, FileEntry, ProgressData, ToolType};
|
use crate::common_dir_traversal::{CheckingMethod, DirTraversalBuilder, DirTraversalResult, FileEntry, ProgressData, ToolType};
|
||||||
use crate::common_messages::Messages;
|
|
||||||
use crate::common_tool::{CommonData, CommonToolData};
|
use crate::common_tool::{CommonData, CommonToolData};
|
||||||
use crate::common_traits::*;
|
use crate::common_traits::*;
|
||||||
|
|
||||||
|
@ -71,6 +69,12 @@ impl ResultEntry for MusicEntry {
|
||||||
fn get_path(&self) -> &Path {
|
fn get_path(&self) -> &Path {
|
||||||
&self.path
|
&self.path
|
||||||
}
|
}
|
||||||
|
fn get_modified_date(&self) -> u64 {
|
||||||
|
self.modified_date
|
||||||
|
}
|
||||||
|
fn get_size(&self) -> u64 {
|
||||||
|
self.size
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
impl FileEntry {
|
impl FileEntry {
|
||||||
|
@ -102,7 +106,7 @@ pub struct Info {
|
||||||
pub struct SameMusic {
|
pub struct SameMusic {
|
||||||
common_data: CommonToolData,
|
common_data: CommonToolData,
|
||||||
information: Info,
|
information: Info,
|
||||||
music_to_check: HashMap<String, MusicEntry>,
|
music_to_check: BTreeMap<String, MusicEntry>,
|
||||||
music_entries: Vec<MusicEntry>,
|
music_entries: Vec<MusicEntry>,
|
||||||
duplicated_music_entries: Vec<Vec<MusicEntry>>,
|
duplicated_music_entries: Vec<Vec<MusicEntry>>,
|
||||||
duplicated_music_entries_referenced: Vec<(MusicEntry, Vec<MusicEntry>)>,
|
duplicated_music_entries_referenced: Vec<(MusicEntry, Vec<MusicEntry>)>,
|
||||||
|
@ -221,32 +225,24 @@ impl SameMusic {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
fn load_cache(&mut self, checking_tags: bool) -> (HashMap<String, MusicEntry>, HashMap<String, MusicEntry>, HashMap<String, MusicEntry>) {
|
fn load_cache(&mut self, checking_tags: bool) -> (BTreeMap<String, MusicEntry>, BTreeMap<String, MusicEntry>, BTreeMap<String, MusicEntry>) {
|
||||||
debug!("load_cache - start, using cache {}", self.common_data.use_cache);
|
debug!("load_cache - start, using cache {}", self.common_data.use_cache);
|
||||||
let loaded_hash_map;
|
let loaded_hash_map;
|
||||||
|
|
||||||
let mut records_already_cached: HashMap<String, MusicEntry> = Default::default();
|
let mut records_already_cached: BTreeMap<String, MusicEntry> = Default::default();
|
||||||
let mut non_cached_files_to_check: HashMap<String, MusicEntry> = Default::default();
|
let mut non_cached_files_to_check: BTreeMap<String, MusicEntry> = Default::default();
|
||||||
|
|
||||||
if self.common_data.use_cache {
|
if self.common_data.use_cache {
|
||||||
loaded_hash_map = match load_cache_from_file(&mut self.common_data.text_messages, self.common_data.delete_outdated_cache, checking_tags) {
|
let (messages, loaded_items) =
|
||||||
Some(t) => t,
|
load_cache_from_file_generalized_by_path::<MusicEntry>(get_similar_music_cache_file(checking_tags), self.get_delete_outdated_cache(), &self.music_to_check);
|
||||||
None => Default::default(),
|
self.get_text_messages_mut().extend_with_another_messages(messages);
|
||||||
};
|
loaded_hash_map = loaded_items.unwrap_or_default();
|
||||||
|
|
||||||
for (name, file_entry) in &self.music_to_check {
|
for (name, file_entry) in mem::take(&mut self.music_to_check) {
|
||||||
if !loaded_hash_map.contains_key(name) {
|
if let Some(cached_file_entry) = loaded_hash_map.get(&name) {
|
||||||
// If loaded data doesn't contains current image info
|
records_already_cached.insert(name.clone(), cached_file_entry.clone());
|
||||||
non_cached_files_to_check.insert(name.clone(), file_entry.clone());
|
|
||||||
} else {
|
} else {
|
||||||
let loaded_item = loaded_hash_map.get(name).unwrap();
|
non_cached_files_to_check.insert(name, file_entry);
|
||||||
if file_entry.size != loaded_item.size || file_entry.modified_date != loaded_item.modified_date {
|
|
||||||
// When size or modification date of image changed, then it is clear that is different image
|
|
||||||
non_cached_files_to_check.insert(name.clone(), file_entry.clone());
|
|
||||||
} else {
|
|
||||||
// Checking may be omitted when already there is entry with same size and modification date
|
|
||||||
records_already_cached.insert(name.clone(), loaded_item.clone());
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
|
@ -257,18 +253,20 @@ impl SameMusic {
|
||||||
(loaded_hash_map, records_already_cached, non_cached_files_to_check)
|
(loaded_hash_map, records_already_cached, non_cached_files_to_check)
|
||||||
}
|
}
|
||||||
|
|
||||||
fn save_cache(&mut self, vec_file_entry: Vec<MusicEntry>, loaded_hash_map: HashMap<String, MusicEntry>, checking_tags: bool) {
|
fn save_cache(&mut self, vec_file_entry: Vec<MusicEntry>, loaded_hash_map: BTreeMap<String, MusicEntry>, checking_tags: bool) {
|
||||||
debug!("save_cache - start, using cache {}", self.common_data.use_cache);
|
debug!("save_cache - start, using cache {}", self.common_data.use_cache);
|
||||||
if !self.common_data.use_cache {
|
if !self.common_data.use_cache {
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
// Must save all results to file, old loaded from file with all currently counted results
|
// Must save all results to file, old loaded from file with all currently counted results
|
||||||
let mut all_results: HashMap<String, MusicEntry> = loaded_hash_map;
|
let mut all_results: BTreeMap<String, MusicEntry> = loaded_hash_map;
|
||||||
|
|
||||||
for file_entry in vec_file_entry {
|
for file_entry in vec_file_entry {
|
||||||
all_results.insert(file_entry.path.to_string_lossy().to_string(), file_entry);
|
all_results.insert(file_entry.path.to_string_lossy().to_string(), file_entry);
|
||||||
}
|
}
|
||||||
save_cache_to_file(&all_results, &mut self.common_data.text_messages, self.common_data.save_also_as_json, checking_tags);
|
|
||||||
|
let messages = save_cache_to_file_generalized(get_similar_music_cache_file(checking_tags), &all_results, self.common_data.save_also_as_json, 0);
|
||||||
|
self.get_text_messages_mut().extend_with_another_messages(messages);
|
||||||
debug!("save_cache - end");
|
debug!("save_cache - end");
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -745,74 +743,6 @@ impl SameMusic {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
fn save_cache_to_file(hashmap: &HashMap<String, MusicEntry>, text_messages: &mut Messages, save_also_as_json: bool, checking_tags: bool) {
|
|
||||||
if let Some(((file_handler, cache_file), (file_handler_json, cache_file_json))) =
|
|
||||||
open_cache_folder(get_cache_file(checking_tags), true, save_also_as_json, &mut text_messages.warnings)
|
|
||||||
{
|
|
||||||
{
|
|
||||||
let writer = BufWriter::new(file_handler.unwrap()); // Unwrap because cannot fail here
|
|
||||||
if let Err(e) = bincode::serialize_into(writer, hashmap) {
|
|
||||||
text_messages
|
|
||||||
.warnings
|
|
||||||
.push(format!("Cannot write data to cache file {}, reason {}", cache_file.display(), e));
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
if save_also_as_json {
|
|
||||||
if let Some(file_handler_json) = file_handler_json {
|
|
||||||
let writer = BufWriter::new(file_handler_json);
|
|
||||||
if let Err(e) = serde_json::to_writer(writer, hashmap) {
|
|
||||||
text_messages
|
|
||||||
.warnings
|
|
||||||
.push(format!("Cannot write data to cache file {}, reason {}", cache_file_json.display(), e));
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
text_messages.messages.push(format!("Properly saved to file {} cache entries.", hashmap.len()));
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
fn load_cache_from_file(text_messages: &mut Messages, delete_outdated_cache: bool, checking_tags: bool) -> Option<HashMap<String, MusicEntry>> {
|
|
||||||
if let Some(((file_handler, cache_file), (file_handler_json, cache_file_json))) = open_cache_folder(get_cache_file(checking_tags), false, true, &mut text_messages.warnings) {
|
|
||||||
let mut hashmap_loaded_entries: HashMap<String, MusicEntry>;
|
|
||||||
if let Some(file_handler) = file_handler {
|
|
||||||
let reader = BufReader::new(file_handler);
|
|
||||||
hashmap_loaded_entries = match bincode::deserialize_from(reader) {
|
|
||||||
Ok(t) => t,
|
|
||||||
Err(e) => {
|
|
||||||
text_messages
|
|
||||||
.warnings
|
|
||||||
.push(format!("Failed to load data from cache file {}, reason {}", cache_file.display(), e));
|
|
||||||
return None;
|
|
||||||
}
|
|
||||||
};
|
|
||||||
} else {
|
|
||||||
let reader = BufReader::new(file_handler_json.unwrap()); // Unwrap cannot fail, because at least one file must be valid
|
|
||||||
hashmap_loaded_entries = match serde_json::from_reader(reader) {
|
|
||||||
Ok(t) => t,
|
|
||||||
Err(e) => {
|
|
||||||
text_messages
|
|
||||||
.warnings
|
|
||||||
.push(format!("Failed to load data from cache file {}, reason {}", cache_file_json.display(), e));
|
|
||||||
return None;
|
|
||||||
}
|
|
||||||
};
|
|
||||||
}
|
|
||||||
|
|
||||||
// Don't load cache data if destination file not exists
|
|
||||||
if delete_outdated_cache {
|
|
||||||
hashmap_loaded_entries.retain(|src_path, _file_entry| Path::new(src_path).exists());
|
|
||||||
}
|
|
||||||
|
|
||||||
text_messages.messages.push(format!("Properly loaded {} cache entries.", hashmap_loaded_entries.len()));
|
|
||||||
|
|
||||||
return Some(hashmap_loaded_entries);
|
|
||||||
}
|
|
||||||
None
|
|
||||||
}
|
|
||||||
|
|
||||||
// TODO this should be taken from rusty-chromaprint repo, not reimplemented here
|
// TODO this should be taken from rusty-chromaprint repo, not reimplemented here
|
||||||
fn calc_fingerprint_helper(path: impl AsRef<Path>, config: &Configuration) -> anyhow::Result<Vec<u32>> {
|
fn calc_fingerprint_helper(path: impl AsRef<Path>, config: &Configuration) -> anyhow::Result<Vec<u32>> {
|
||||||
let path = path.as_ref();
|
let path = path.as_ref();
|
||||||
|
@ -976,15 +906,6 @@ fn read_single_file_tag(path: &str, music_entry: &mut MusicEntry) -> bool {
|
||||||
true
|
true
|
||||||
}
|
}
|
||||||
|
|
||||||
// Using different cache folders, because loading cache just for finding duplicated tags would be really slow
|
|
||||||
fn get_cache_file(checking_tags: bool) -> &'static str {
|
|
||||||
if checking_tags {
|
|
||||||
"cache_same_music_tags.bin"
|
|
||||||
} else {
|
|
||||||
"cache_same_music_fingerprints.bin"
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
impl Default for SameMusic {
|
impl Default for SameMusic {
|
||||||
fn default() -> Self {
|
fn default() -> Self {
|
||||||
Self::new()
|
Self::new()
|
||||||
|
|
|
@ -1,4 +1,4 @@
|
||||||
use std::collections::{BTreeSet, HashMap, HashSet};
|
use std::collections::{BTreeMap, BTreeSet, HashMap, HashSet};
|
||||||
use std::fs::{DirEntry, File, Metadata};
|
use std::fs::{DirEntry, File, Metadata};
|
||||||
use std::io::{Write, *};
|
use std::io::{Write, *};
|
||||||
use std::path::{Path, PathBuf};
|
use std::path::{Path, PathBuf};
|
||||||
|
@ -19,11 +19,11 @@ use serde::{Deserialize, Serialize};
|
||||||
#[cfg(feature = "heif")]
|
#[cfg(feature = "heif")]
|
||||||
use crate::common::get_dynamic_image_from_heic;
|
use crate::common::get_dynamic_image_from_heic;
|
||||||
use crate::common::{
|
use crate::common::{
|
||||||
check_folder_children, create_crash_message, get_dynamic_image_from_raw_image, open_cache_folder, prepare_thread_handler_common, send_info_and_wait_for_ending_all_threads,
|
check_folder_children, create_crash_message, get_dynamic_image_from_raw_image, prepare_thread_handler_common, send_info_and_wait_for_ending_all_threads, HEIC_EXTENSIONS,
|
||||||
HEIC_EXTENSIONS, IMAGE_RS_SIMILAR_IMAGES_EXTENSIONS, RAW_IMAGE_EXTENSIONS,
|
IMAGE_RS_SIMILAR_IMAGES_EXTENSIONS, RAW_IMAGE_EXTENSIONS,
|
||||||
};
|
};
|
||||||
|
use crate::common_cache::{get_similar_images_cache_file, load_cache_from_file_generalized_by_path, save_cache_to_file_generalized};
|
||||||
use crate::common_dir_traversal::{common_get_entry_data_metadata, common_read_dir, get_lowercase_name, get_modified_time, CheckingMethod, ProgressData, ToolType};
|
use crate::common_dir_traversal::{common_get_entry_data_metadata, common_read_dir, get_lowercase_name, get_modified_time, CheckingMethod, ProgressData, ToolType};
|
||||||
use crate::common_messages::Messages;
|
|
||||||
use crate::common_tool::{CommonData, CommonToolData};
|
use crate::common_tool::{CommonData, CommonToolData};
|
||||||
use crate::common_traits::{DebugPrint, PrintResults, ResultEntry, SaveResults};
|
use crate::common_traits::{DebugPrint, PrintResults, ResultEntry, SaveResults};
|
||||||
use crate::flc;
|
use crate::flc;
|
||||||
|
@ -51,6 +51,12 @@ impl ResultEntry for FileEntry {
|
||||||
fn get_path(&self) -> &Path {
|
fn get_path(&self) -> &Path {
|
||||||
&self.path
|
&self.path
|
||||||
}
|
}
|
||||||
|
fn get_modified_date(&self) -> u64 {
|
||||||
|
self.modified_date
|
||||||
|
}
|
||||||
|
fn get_size(&self) -> u64 {
|
||||||
|
self.size
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Used by CLI tool when we cannot use directly values
|
/// Used by CLI tool when we cannot use directly values
|
||||||
|
@ -89,7 +95,7 @@ pub struct SimilarImages {
|
||||||
image_hashes: HashMap<ImHash, Vec<FileEntry>>,
|
image_hashes: HashMap<ImHash, Vec<FileEntry>>,
|
||||||
// Hashmap with image hashes and Vector with names of files
|
// Hashmap with image hashes and Vector with names of files
|
||||||
similarity: u32,
|
similarity: u32,
|
||||||
images_to_check: HashMap<String, FileEntry>,
|
images_to_check: BTreeMap<String, FileEntry>,
|
||||||
hash_size: u8,
|
hash_size: u8,
|
||||||
hash_alg: HashAlg,
|
hash_alg: HashAlg,
|
||||||
image_filter: FilterType,
|
image_filter: FilterType,
|
||||||
|
@ -273,38 +279,27 @@ impl SimilarImages {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
fn hash_images_load_cache(&mut self) -> (HashMap<String, FileEntry>, HashMap<String, FileEntry>, HashMap<String, FileEntry>) {
|
fn hash_images_load_cache(&mut self) -> (BTreeMap<String, FileEntry>, BTreeMap<String, FileEntry>, BTreeMap<String, FileEntry>) {
|
||||||
debug!("hash_images_load_cache - start, use cache: {}", self.common_data.use_cache);
|
debug!("hash_images_load_cache - start, use cache: {}", self.common_data.use_cache);
|
||||||
let loaded_hash_map;
|
let loaded_hash_map;
|
||||||
|
|
||||||
let mut records_already_cached: HashMap<String, FileEntry> = Default::default();
|
let mut records_already_cached: BTreeMap<String, FileEntry> = Default::default();
|
||||||
let mut non_cached_files_to_check: HashMap<String, FileEntry> = Default::default();
|
let mut non_cached_files_to_check: BTreeMap<String, FileEntry> = Default::default();
|
||||||
|
|
||||||
if self.common_data.use_cache {
|
if self.common_data.use_cache {
|
||||||
loaded_hash_map = match load_hashes_from_file(
|
let (messages, loaded_items) = load_cache_from_file_generalized_by_path::<FileEntry>(
|
||||||
&mut self.common_data.text_messages,
|
&get_similar_images_cache_file(&self.hash_size, &self.hash_alg, &self.image_filter),
|
||||||
self.common_data.delete_outdated_cache,
|
self.get_delete_outdated_cache(),
|
||||||
self.hash_size,
|
&self.images_to_check,
|
||||||
self.hash_alg,
|
);
|
||||||
self.image_filter,
|
self.get_text_messages_mut().extend_with_another_messages(messages);
|
||||||
) {
|
loaded_hash_map = loaded_items.unwrap_or_default();
|
||||||
Some(t) => t,
|
|
||||||
None => Default::default(),
|
|
||||||
};
|
|
||||||
|
|
||||||
for (name, file_entry) in &self.images_to_check {
|
for (name, file_entry) in mem::take(&mut self.images_to_check) {
|
||||||
if !loaded_hash_map.contains_key(name) {
|
if let Some(cached_file_entry) = loaded_hash_map.get(&name) {
|
||||||
// If loaded data doesn't contains current image info
|
records_already_cached.insert(name.clone(), cached_file_entry.clone());
|
||||||
non_cached_files_to_check.insert(name.clone(), file_entry.clone());
|
|
||||||
} else {
|
} else {
|
||||||
let loaded_item = loaded_hash_map.get(name).unwrap();
|
non_cached_files_to_check.insert(name, file_entry);
|
||||||
if file_entry.size != loaded_item.size || file_entry.modified_date != loaded_item.modified_date {
|
|
||||||
// When size or modification date of image changed, then it is clear that is different image
|
|
||||||
non_cached_files_to_check.insert(name.clone(), file_entry.clone());
|
|
||||||
} else {
|
|
||||||
// Checking may be omitted when already there is entry with same size and modification date
|
|
||||||
records_already_cached.insert(name.clone(), loaded_item.clone());
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
|
@ -373,22 +368,22 @@ impl SimilarImages {
|
||||||
true
|
true
|
||||||
}
|
}
|
||||||
|
|
||||||
fn save_to_cache(&mut self, vec_file_entry: Vec<(FileEntry, ImHash)>, loaded_hash_map: HashMap<String, FileEntry>) {
|
fn save_to_cache(&mut self, vec_file_entry: Vec<(FileEntry, ImHash)>, loaded_hash_map: BTreeMap<String, FileEntry>) {
|
||||||
debug!("save_to_cache - start, using cache: {}", self.common_data.use_cache);
|
debug!("save_to_cache - start, using cache: {}", self.common_data.use_cache);
|
||||||
if self.common_data.use_cache {
|
if self.common_data.use_cache {
|
||||||
// Must save all results to file, old loaded from file with all currently counted results
|
// Must save all results to file, old loaded from file with all currently counted results
|
||||||
let mut all_results: HashMap<String, FileEntry> = loaded_hash_map;
|
let mut all_results: BTreeMap<String, FileEntry> = loaded_hash_map;
|
||||||
for (file_entry, _hash) in vec_file_entry {
|
for (file_entry, _hash) in vec_file_entry {
|
||||||
all_results.insert(file_entry.path.to_string_lossy().to_string(), file_entry);
|
all_results.insert(file_entry.path.to_string_lossy().to_string(), file_entry);
|
||||||
}
|
}
|
||||||
save_hashes_to_file(
|
|
||||||
|
let messages = save_cache_to_file_generalized(
|
||||||
|
&get_similar_images_cache_file(&self.hash_size, &self.hash_alg, &self.image_filter),
|
||||||
&all_results,
|
&all_results,
|
||||||
&mut self.common_data.text_messages,
|
|
||||||
self.common_data.save_also_as_json,
|
self.common_data.save_also_as_json,
|
||||||
self.hash_size,
|
0,
|
||||||
self.hash_alg,
|
|
||||||
self.image_filter,
|
|
||||||
);
|
);
|
||||||
|
self.get_text_messages_mut().extend_with_another_messages(messages);
|
||||||
}
|
}
|
||||||
debug!("save_to_cache - end");
|
debug!("save_to_cache - end");
|
||||||
}
|
}
|
||||||
|
@ -945,98 +940,6 @@ impl PrintResults for SimilarImages {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
pub fn save_hashes_to_file(
|
|
||||||
hashmap: &HashMap<String, FileEntry>,
|
|
||||||
text_messages: &mut Messages,
|
|
||||||
save_also_as_json: bool,
|
|
||||||
hash_size: u8,
|
|
||||||
hash_alg: HashAlg,
|
|
||||||
image_filter: FilterType,
|
|
||||||
) {
|
|
||||||
if let Some(((file_handler, cache_file), (file_handler_json, cache_file_json))) =
|
|
||||||
open_cache_folder(&get_cache_file(&hash_size, &hash_alg, &image_filter), true, save_also_as_json, &mut text_messages.warnings)
|
|
||||||
{
|
|
||||||
{
|
|
||||||
let writer = BufWriter::new(file_handler.unwrap()); // Unwrap because cannot fail here
|
|
||||||
if let Err(e) = bincode::serialize_into(writer, hashmap) {
|
|
||||||
text_messages
|
|
||||||
.warnings
|
|
||||||
.push(format!("Cannot write data to cache file {}, reason {}", cache_file.display(), e));
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
if save_also_as_json {
|
|
||||||
if let Some(file_handler_json) = file_handler_json {
|
|
||||||
let writer = BufWriter::new(file_handler_json);
|
|
||||||
if let Err(e) = serde_json::to_writer(writer, hashmap) {
|
|
||||||
text_messages
|
|
||||||
.warnings
|
|
||||||
.push(format!("Cannot write data to cache file {}, reason {}", cache_file_json.display(), e));
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
text_messages.messages.push(format!("Properly saved to file {} cache entries.", hashmap.len()));
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
pub fn load_hashes_from_file(
|
|
||||||
text_messages: &mut Messages,
|
|
||||||
delete_outdated_cache: bool,
|
|
||||||
hash_size: u8,
|
|
||||||
hash_alg: HashAlg,
|
|
||||||
image_filter: FilterType,
|
|
||||||
) -> Option<HashMap<String, FileEntry>> {
|
|
||||||
if let Some(((file_handler, cache_file), (file_handler_json, cache_file_json))) =
|
|
||||||
open_cache_folder(&get_cache_file(&hash_size, &hash_alg, &image_filter), false, true, &mut text_messages.warnings)
|
|
||||||
{
|
|
||||||
let mut hashmap_loaded_entries: HashMap<String, FileEntry>;
|
|
||||||
if let Some(file_handler) = file_handler {
|
|
||||||
let reader = BufReader::new(file_handler);
|
|
||||||
hashmap_loaded_entries = match bincode::deserialize_from(reader) {
|
|
||||||
Ok(t) => t,
|
|
||||||
Err(e) => {
|
|
||||||
text_messages
|
|
||||||
.warnings
|
|
||||||
.push(format!("Failed to load data from cache file {}, reason {}", cache_file.display(), e));
|
|
||||||
return None;
|
|
||||||
}
|
|
||||||
};
|
|
||||||
} else {
|
|
||||||
let reader = BufReader::new(file_handler_json.unwrap()); // Unwrap cannot fail, because at least one file must be valid
|
|
||||||
hashmap_loaded_entries = match serde_json::from_reader(reader) {
|
|
||||||
Ok(t) => t,
|
|
||||||
Err(e) => {
|
|
||||||
text_messages
|
|
||||||
.warnings
|
|
||||||
.push(format!("Failed to load data from cache file {}, reason {}", cache_file_json.display(), e));
|
|
||||||
return None;
|
|
||||||
}
|
|
||||||
};
|
|
||||||
}
|
|
||||||
|
|
||||||
// Don't load cache data if destination file not exists
|
|
||||||
if delete_outdated_cache {
|
|
||||||
hashmap_loaded_entries.retain(|src_path, _file_entry| Path::new(src_path).exists());
|
|
||||||
}
|
|
||||||
|
|
||||||
text_messages.messages.push(format!("Properly loaded {} cache entries.", hashmap_loaded_entries.len()));
|
|
||||||
|
|
||||||
return Some(hashmap_loaded_entries);
|
|
||||||
}
|
|
||||||
None
|
|
||||||
}
|
|
||||||
|
|
||||||
fn get_cache_file(hash_size: &u8, hash_alg: &HashAlg, image_filter: &FilterType) -> String {
|
|
||||||
format!(
|
|
||||||
"cache_similar_images_{}_{}_{}_50.bin",
|
|
||||||
hash_size,
|
|
||||||
convert_algorithm_to_string(hash_alg),
|
|
||||||
convert_filters_to_string(image_filter),
|
|
||||||
)
|
|
||||||
}
|
|
||||||
|
|
||||||
pub fn get_string_from_similarity(similarity: &u32, hash_size: u8) -> String {
|
pub fn get_string_from_similarity(similarity: &u32, hash_size: u8) -> String {
|
||||||
let index_preset = match hash_size {
|
let index_preset = match hash_size {
|
||||||
8 => 0,
|
8 => 0,
|
||||||
|
@ -1085,7 +988,7 @@ pub fn return_similarity_from_similarity_preset(similarity_preset: &SimilarityPr
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
fn convert_filters_to_string(image_filter: &FilterType) -> String {
|
pub fn convert_filters_to_string(image_filter: &FilterType) -> String {
|
||||||
match image_filter {
|
match image_filter {
|
||||||
FilterType::Lanczos3 => "Lanczos3",
|
FilterType::Lanczos3 => "Lanczos3",
|
||||||
FilterType::Nearest => "Nearest",
|
FilterType::Nearest => "Nearest",
|
||||||
|
@ -1096,7 +999,7 @@ fn convert_filters_to_string(image_filter: &FilterType) -> String {
|
||||||
.to_string()
|
.to_string()
|
||||||
}
|
}
|
||||||
|
|
||||||
fn convert_algorithm_to_string(hash_alg: &HashAlg) -> String {
|
pub fn convert_algorithm_to_string(hash_alg: &HashAlg) -> String {
|
||||||
match hash_alg {
|
match hash_alg {
|
||||||
HashAlg::Mean => "Mean",
|
HashAlg::Mean => "Mean",
|
||||||
HashAlg::Gradient => "Gradient",
|
HashAlg::Gradient => "Gradient",
|
||||||
|
|
|
@ -15,9 +15,9 @@ use serde::{Deserialize, Serialize};
|
||||||
use vid_dup_finder_lib::HashCreationErrorKind::DetermineVideo;
|
use vid_dup_finder_lib::HashCreationErrorKind::DetermineVideo;
|
||||||
use vid_dup_finder_lib::{NormalizedTolerance, VideoHash};
|
use vid_dup_finder_lib::{NormalizedTolerance, VideoHash};
|
||||||
|
|
||||||
use crate::common::{check_folder_children, open_cache_folder, prepare_thread_handler_common, send_info_and_wait_for_ending_all_threads, VIDEO_FILES_EXTENSIONS};
|
use crate::common::{check_folder_children, prepare_thread_handler_common, send_info_and_wait_for_ending_all_threads, VIDEO_FILES_EXTENSIONS};
|
||||||
|
use crate::common_cache::{get_similar_videos_cache_file, load_cache_from_file_generalized_by_path, save_cache_to_file_generalized};
|
||||||
use crate::common_dir_traversal::{common_get_entry_data_metadata, common_read_dir, get_lowercase_name, get_modified_time, CheckingMethod, ProgressData, ToolType};
|
use crate::common_dir_traversal::{common_get_entry_data_metadata, common_read_dir, get_lowercase_name, get_modified_time, CheckingMethod, ProgressData, ToolType};
|
||||||
use crate::common_messages::Messages;
|
|
||||||
use crate::common_tool::{CommonData, CommonToolData};
|
use crate::common_tool::{CommonData, CommonToolData};
|
||||||
use crate::common_traits::{DebugPrint, PrintResults, ResultEntry, SaveResults};
|
use crate::common_traits::{DebugPrint, PrintResults, ResultEntry, SaveResults};
|
||||||
use crate::flc;
|
use crate::flc;
|
||||||
|
@ -38,6 +38,12 @@ impl ResultEntry for FileEntry {
|
||||||
fn get_path(&self) -> &Path {
|
fn get_path(&self) -> &Path {
|
||||||
&self.path
|
&self.path
|
||||||
}
|
}
|
||||||
|
fn get_modified_date(&self) -> u64 {
|
||||||
|
self.modified_date
|
||||||
|
}
|
||||||
|
fn get_size(&self) -> u64 {
|
||||||
|
self.size
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Distance metric to use with the BK-tree.
|
/// Distance metric to use with the BK-tree.
|
||||||
|
@ -259,24 +265,16 @@ impl SimilarVideos {
|
||||||
let mut non_cached_files_to_check: BTreeMap<String, FileEntry> = Default::default();
|
let mut non_cached_files_to_check: BTreeMap<String, FileEntry> = Default::default();
|
||||||
|
|
||||||
if self.common_data.use_cache {
|
if self.common_data.use_cache {
|
||||||
loaded_hash_map = match load_hashes_from_file(&mut self.common_data.text_messages, self.common_data.delete_outdated_cache) {
|
let (messages, loaded_items) =
|
||||||
Some(t) => t,
|
load_cache_from_file_generalized_by_path::<FileEntry>(&get_similar_videos_cache_file(), self.get_delete_outdated_cache(), &self.videos_to_check);
|
||||||
None => Default::default(),
|
self.get_text_messages_mut().extend_with_another_messages(messages);
|
||||||
};
|
loaded_hash_map = loaded_items.unwrap_or_default();
|
||||||
|
|
||||||
for (name, file_entry) in &self.videos_to_check {
|
for (name, file_entry) in mem::take(&mut self.videos_to_check) {
|
||||||
if !loaded_hash_map.contains_key(name) {
|
if let Some(cached_file_entry) = loaded_hash_map.get(&name) {
|
||||||
// If loaded data doesn't contains current videos info
|
records_already_cached.insert(name.clone(), cached_file_entry.clone());
|
||||||
non_cached_files_to_check.insert(name.clone(), file_entry.clone());
|
|
||||||
} else {
|
} else {
|
||||||
let loaded_item = loaded_hash_map.get(name).unwrap();
|
non_cached_files_to_check.insert(name, file_entry);
|
||||||
if file_entry.size != loaded_item.size || file_entry.modified_date != loaded_item.modified_date {
|
|
||||||
// When size or modification date of video changed, then it is clear that is different video
|
|
||||||
non_cached_files_to_check.insert(name.clone(), file_entry.clone());
|
|
||||||
} else {
|
|
||||||
// Checking may be omitted when already there is entry with same size and modification date
|
|
||||||
records_already_cached.insert(name.clone(), loaded_item.clone());
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
|
@ -375,7 +373,9 @@ impl SimilarVideos {
|
||||||
for file_entry in vec_file_entry {
|
for file_entry in vec_file_entry {
|
||||||
all_results.insert(file_entry.path.to_string_lossy().to_string(), file_entry);
|
all_results.insert(file_entry.path.to_string_lossy().to_string(), file_entry);
|
||||||
}
|
}
|
||||||
save_hashes_to_file(&all_results, &mut self.common_data.text_messages, self.common_data.save_also_as_json);
|
|
||||||
|
let messages = save_cache_to_file_generalized(&get_similar_videos_cache_file(), &all_results, self.common_data.save_also_as_json, 0);
|
||||||
|
self.get_text_messages_mut().extend_with_another_messages(messages);
|
||||||
}
|
}
|
||||||
debug!("save_cache - end");
|
debug!("save_cache - end");
|
||||||
}
|
}
|
||||||
|
@ -512,76 +512,6 @@ impl PrintResults for SimilarVideos {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
pub fn save_hashes_to_file(hashmap: &BTreeMap<String, FileEntry>, text_messages: &mut Messages, save_also_as_json: bool) {
|
|
||||||
if let Some(((file_handler, cache_file), (file_handler_json, cache_file_json))) = open_cache_folder(&get_cache_file(), true, save_also_as_json, &mut text_messages.warnings) {
|
|
||||||
{
|
|
||||||
let writer = BufWriter::new(file_handler.unwrap()); // Unwrap because cannot fail here
|
|
||||||
if let Err(e) = bincode::serialize_into(writer, hashmap) {
|
|
||||||
text_messages
|
|
||||||
.warnings
|
|
||||||
.push(format!("Cannot write data to cache file {}, reason {}", cache_file.display(), e));
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
if save_also_as_json {
|
|
||||||
if let Some(file_handler_json) = file_handler_json {
|
|
||||||
let writer = BufWriter::new(file_handler_json);
|
|
||||||
if let Err(e) = serde_json::to_writer(writer, hashmap) {
|
|
||||||
text_messages
|
|
||||||
.warnings
|
|
||||||
.push(format!("Cannot write data to cache file {}, reason {}", cache_file_json.display(), e));
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
text_messages.messages.push(format!("Properly saved to file {} cache entries.", hashmap.len()));
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
pub fn load_hashes_from_file(text_messages: &mut Messages, delete_outdated_cache: bool) -> Option<BTreeMap<String, FileEntry>> {
|
|
||||||
if let Some(((file_handler, cache_file), (file_handler_json, cache_file_json))) = open_cache_folder(&get_cache_file(), false, true, &mut text_messages.warnings) {
|
|
||||||
let mut hashmap_loaded_entries: BTreeMap<String, FileEntry>;
|
|
||||||
if let Some(file_handler) = file_handler {
|
|
||||||
let reader = BufReader::new(file_handler);
|
|
||||||
hashmap_loaded_entries = match bincode::deserialize_from(reader) {
|
|
||||||
Ok(t) => t,
|
|
||||||
Err(e) => {
|
|
||||||
text_messages
|
|
||||||
.warnings
|
|
||||||
.push(format!("Failed to load data from cache file {}, reason {}", cache_file.display(), e));
|
|
||||||
return None;
|
|
||||||
}
|
|
||||||
};
|
|
||||||
} else {
|
|
||||||
let reader = BufReader::new(file_handler_json.unwrap()); // Unwrap cannot fail, because at least one file must be valid
|
|
||||||
hashmap_loaded_entries = match serde_json::from_reader(reader) {
|
|
||||||
Ok(t) => t,
|
|
||||||
Err(e) => {
|
|
||||||
text_messages
|
|
||||||
.warnings
|
|
||||||
.push(format!("Failed to load data from cache file {}, reason {}", cache_file_json.display(), e));
|
|
||||||
return None;
|
|
||||||
}
|
|
||||||
};
|
|
||||||
}
|
|
||||||
|
|
||||||
// Don't load cache data if destination file not exists
|
|
||||||
if delete_outdated_cache {
|
|
||||||
hashmap_loaded_entries.retain(|src_path, _file_entry| Path::new(src_path).exists());
|
|
||||||
}
|
|
||||||
|
|
||||||
text_messages.messages.push(format!("Properly loaded {} cache entries.", hashmap_loaded_entries.len()));
|
|
||||||
|
|
||||||
return Some(hashmap_loaded_entries);
|
|
||||||
}
|
|
||||||
None
|
|
||||||
}
|
|
||||||
|
|
||||||
fn get_cache_file() -> String {
|
|
||||||
"cache_similar_videos.bin".to_string()
|
|
||||||
}
|
|
||||||
|
|
||||||
pub fn check_if_ffmpeg_is_installed() -> bool {
|
pub fn check_if_ffmpeg_is_installed() -> bool {
|
||||||
let vid = "9999czekoczekoczekolada999.txt";
|
let vid = "9999czekoczekoczekolada999.txt";
|
||||||
if let Err(DetermineVideo {
|
if let Err(DetermineVideo {
|
||||||
|
|
|
@ -330,6 +330,7 @@ fn duplicate_search(
|
||||||
df.set_minimal_prehash_cache_file_size(minimal_prehash_cache_file_size);
|
df.set_minimal_prehash_cache_file_size(minimal_prehash_cache_file_size);
|
||||||
df.set_check_method(check_method);
|
df.set_check_method(check_method);
|
||||||
df.set_hash_type(hash_type);
|
df.set_hash_type(hash_type);
|
||||||
|
df.set_save_also_as_json(loaded_common_items.save_also_as_json);
|
||||||
df.set_ignore_hard_links(loaded_common_items.hide_hard_links);
|
df.set_ignore_hard_links(loaded_common_items.hide_hard_links);
|
||||||
df.set_use_cache(loaded_common_items.use_cache);
|
df.set_use_cache(loaded_common_items.use_cache);
|
||||||
df.set_use_prehash_cache(use_prehash_cache);
|
df.set_use_prehash_cache(use_prehash_cache);
|
||||||
|
|
|
@ -1,6 +1,10 @@
|
||||||
use std::collections::BTreeMap;
|
use std::collections::BTreeMap;
|
||||||
use std::default::Default;
|
use std::default::Default;
|
||||||
|
|
||||||
|
use czkawka_core::common_cache::{
|
||||||
|
get_duplicate_cache_file, get_similar_images_cache_file, get_similar_videos_cache_file, load_cache_from_file_generalized_by_path, load_cache_from_file_generalized_by_size,
|
||||||
|
save_cache_to_file_generalized,
|
||||||
|
};
|
||||||
use directories_next::ProjectDirs;
|
use directories_next::ProjectDirs;
|
||||||
use gtk4::prelude::*;
|
use gtk4::prelude::*;
|
||||||
use gtk4::{Label, ResponseType, Window};
|
use gtk4::{Label, ResponseType, Window};
|
||||||
|
@ -121,20 +125,25 @@ pub fn connect_settings(gui_data: &GuiData) {
|
||||||
let mut messages: Messages = Messages::new();
|
let mut messages: Messages = Messages::new();
|
||||||
for use_prehash in [true, false] {
|
for use_prehash in [true, false] {
|
||||||
for type_of_hash in &[HashType::Xxh3, HashType::Blake3, HashType::Crc32] {
|
for type_of_hash in &[HashType::Xxh3, HashType::Blake3, HashType::Crc32] {
|
||||||
if let Some(cache_entries) = czkawka_core::duplicate::load_hashes_from_file(&mut messages, true, type_of_hash, use_prehash) {
|
let (mut messages, loaded_items) = load_cache_from_file_generalized_by_size::<czkawka_core::common_dir_traversal::FileEntry>(
|
||||||
|
&get_duplicate_cache_file(type_of_hash, use_prehash),
|
||||||
|
true,
|
||||||
|
&Default::default(),
|
||||||
|
);
|
||||||
|
|
||||||
|
if let Some(cache_entries) = loaded_items {
|
||||||
let mut hashmap_to_save: BTreeMap<String, czkawka_core::common_dir_traversal::FileEntry> = Default::default();
|
let mut hashmap_to_save: BTreeMap<String, czkawka_core::common_dir_traversal::FileEntry> = Default::default();
|
||||||
for (_, vec_file_entry) in cache_entries {
|
for (_, vec_file_entry) in cache_entries {
|
||||||
for file_entry in vec_file_entry {
|
for file_entry in vec_file_entry {
|
||||||
hashmap_to_save.insert(file_entry.path.to_string_lossy().to_string(), file_entry);
|
hashmap_to_save.insert(file_entry.path.to_string_lossy().to_string(), file_entry);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
czkawka_core::duplicate::save_hashes_to_file(
|
|
||||||
&hashmap_to_save,
|
let minimal_cache_size = entry_settings_cache_file_minimal_size.text().as_str().parse::<u64>().unwrap_or(2 * 1024 * 1024);
|
||||||
&mut messages,
|
|
||||||
type_of_hash,
|
let save_messages =
|
||||||
use_prehash,
|
save_cache_to_file_generalized(&get_duplicate_cache_file(type_of_hash, use_prehash), &hashmap_to_save, false, minimal_cache_size);
|
||||||
entry_settings_cache_file_minimal_size.text().as_str().parse::<u64>().unwrap_or(2 * 1024 * 1024),
|
messages.extend_with_another_messages(save_messages);
|
||||||
);
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -169,8 +178,16 @@ pub fn connect_settings(gui_data: &GuiData) {
|
||||||
FilterType::Triangle,
|
FilterType::Triangle,
|
||||||
] {
|
] {
|
||||||
for hash_alg in &[HashAlg::Blockhash, HashAlg::Gradient, HashAlg::DoubleGradient, HashAlg::VertGradient, HashAlg::Mean] {
|
for hash_alg in &[HashAlg::Blockhash, HashAlg::Gradient, HashAlg::DoubleGradient, HashAlg::VertGradient, HashAlg::Mean] {
|
||||||
if let Some(cache_entries) = czkawka_core::similar_images::load_hashes_from_file(&mut messages, true, *hash_size, *hash_alg, *image_filter) {
|
let (mut messages, loaded_items) = load_cache_from_file_generalized_by_path::<czkawka_core::similar_images::FileEntry>(
|
||||||
czkawka_core::similar_images::save_hashes_to_file(&cache_entries, &mut messages, false, *hash_size, *hash_alg, *image_filter);
|
&get_similar_images_cache_file(hash_size, hash_alg, image_filter),
|
||||||
|
true,
|
||||||
|
&Default::default(),
|
||||||
|
);
|
||||||
|
|
||||||
|
if let Some(cache_entries) = loaded_items {
|
||||||
|
let save_messages =
|
||||||
|
save_cache_to_file_generalized(&get_similar_images_cache_file(hash_size, hash_alg, image_filter), &cache_entries, false, 0);
|
||||||
|
messages.extend_with_another_messages(save_messages);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -196,9 +213,12 @@ pub fn connect_settings(gui_data: &GuiData) {
|
||||||
|
|
||||||
dialog.connect_response(move |dialog, response_type| {
|
dialog.connect_response(move |dialog, response_type| {
|
||||||
if response_type == ResponseType::Ok {
|
if response_type == ResponseType::Ok {
|
||||||
let mut messages: Messages = Messages::new();
|
let (mut messages, loaded_items) =
|
||||||
if let Some(cache_entries) = czkawka_core::similar_videos::load_hashes_from_file(&mut messages, true) {
|
load_cache_from_file_generalized_by_path::<czkawka_core::similar_videos::FileEntry>(&get_similar_videos_cache_file(), true, &Default::default());
|
||||||
czkawka_core::similar_videos::save_hashes_to_file(&cache_entries, &mut messages, false);
|
|
||||||
|
if let Some(cache_entries) = loaded_items {
|
||||||
|
let save_messages = save_cache_to_file_generalized(&get_similar_videos_cache_file(), &cache_entries, false, 0);
|
||||||
|
messages.extend_with_another_messages(save_messages);
|
||||||
}
|
}
|
||||||
|
|
||||||
messages.messages.push(flg!("cache_properly_cleared"));
|
messages.messages.push(flg!("cache_properly_cleared"));
|
||||||
|
|
Loading…
Reference in a new issue