2024-02-15 05:45:25 +13:00
|
|
|
use std::collections::BTreeMap;
|
|
|
|
use std::io::{BufReader, BufWriter};
|
|
|
|
|
2023-10-11 07:54:41 +13:00
|
|
|
use fun_time::fun_time;
|
2023-10-08 05:04:17 +13:00
|
|
|
use image::imageops::FilterType;
|
|
|
|
use image_hasher::HashAlg;
|
|
|
|
use log::debug;
|
|
|
|
use rayon::iter::{IntoParallelIterator, ParallelIterator};
|
|
|
|
use serde::{Deserialize, Serialize};
|
2024-02-15 05:45:25 +13:00
|
|
|
|
|
|
|
use crate::common;
|
|
|
|
use crate::common_messages::Messages;
|
|
|
|
use crate::common_traits::ResultEntry;
|
|
|
|
use crate::duplicate::HashType;
|
|
|
|
use crate::similar_images::{convert_algorithm_to_string, convert_filters_to_string};
|
2023-10-08 05:04:17 +13:00
|
|
|
|
2024-01-14 01:57:51 +13:00
|
|
|
const CACHE_VERSION: &str = "70";
|
|
|
|
|
2023-10-08 05:04:17 +13:00
|
|
|
pub fn get_broken_files_cache_file() -> String {
|
2024-01-14 01:57:51 +13:00
|
|
|
format!("cache_broken_files_{CACHE_VERSION}.bin")
|
2023-10-08 05:04:17 +13:00
|
|
|
}
|
|
|
|
|
|
|
|
pub fn get_similar_images_cache_file(hash_size: &u8, hash_alg: &HashAlg, image_filter: &FilterType) -> String {
|
|
|
|
format!(
|
2024-01-14 01:57:51 +13:00
|
|
|
"cache_similar_images_{hash_size}_{}_{}_{CACHE_VERSION}.bin",
|
2023-10-08 05:04:17 +13:00
|
|
|
convert_algorithm_to_string(hash_alg),
|
|
|
|
convert_filters_to_string(image_filter),
|
|
|
|
)
|
|
|
|
}
|
|
|
|
|
|
|
|
pub fn get_similar_videos_cache_file() -> String {
|
2024-01-14 01:57:51 +13:00
|
|
|
format!("cache_similar_videos_{CACHE_VERSION}.bin")
|
2023-10-08 05:04:17 +13:00
|
|
|
}
|
2024-01-14 01:57:51 +13:00
|
|
|
pub fn get_similar_music_cache_file(checking_tags: bool) -> String {
|
2023-10-08 05:04:17 +13:00
|
|
|
if checking_tags {
|
2024-01-14 01:57:51 +13:00
|
|
|
format!("cache_same_music_tags_{CACHE_VERSION}.bin")
|
2023-10-08 05:04:17 +13:00
|
|
|
} else {
|
2024-01-14 01:57:51 +13:00
|
|
|
format!("cache_same_music_fingerprints_{CACHE_VERSION}.bin")
|
2023-10-08 05:04:17 +13:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
pub fn get_duplicate_cache_file(type_of_hash: &HashType, is_prehash: bool) -> String {
|
|
|
|
let prehash_str = if is_prehash { "_prehash" } else { "" };
|
2024-01-14 01:57:51 +13:00
|
|
|
format!("cache_duplicates_{type_of_hash:?}{prehash_str}_{CACHE_VERSION}.bin")
|
2023-10-08 05:04:17 +13:00
|
|
|
}
|
|
|
|
|
2023-10-15 04:48:57 +13:00
|
|
|
#[fun_time(message = "save_cache_to_file_generalized", level = "debug")]
|
2023-10-08 05:04:17 +13:00
|
|
|
pub fn save_cache_to_file_generalized<T>(cache_file_name: &str, hashmap: &BTreeMap<String, T>, save_also_as_json: bool, minimum_file_size: u64) -> Messages
|
|
|
|
where
|
|
|
|
T: Serialize + ResultEntry + Sized + Send + Sync,
|
|
|
|
{
|
|
|
|
let mut text_messages = Messages::new();
|
|
|
|
if let Some(((file_handler, cache_file), (file_handler_json, cache_file_json))) =
|
|
|
|
common::open_cache_folder(cache_file_name, true, save_also_as_json, &mut text_messages.warnings)
|
|
|
|
{
|
|
|
|
let hashmap_to_save = hashmap.values().filter(|t| t.get_size() >= minimum_file_size).collect::<Vec<_>>();
|
|
|
|
|
|
|
|
{
|
|
|
|
let writer = BufWriter::new(file_handler.unwrap()); // Unwrap because cannot fail here
|
|
|
|
if let Err(e) = bincode::serialize_into(writer, &hashmap_to_save) {
|
2023-12-08 07:38:41 +13:00
|
|
|
text_messages.warnings.push(format!("Cannot write data to cache file {cache_file:?}, reason {e}"));
|
|
|
|
debug!("Failed to save cache to file {cache_file:?}");
|
2023-10-08 05:04:17 +13:00
|
|
|
return text_messages;
|
|
|
|
}
|
2023-12-08 07:38:41 +13:00
|
|
|
debug!("Saved binary to file {cache_file:?}");
|
2023-10-08 05:04:17 +13:00
|
|
|
}
|
|
|
|
if save_also_as_json {
|
|
|
|
if let Some(file_handler_json) = file_handler_json {
|
|
|
|
let writer = BufWriter::new(file_handler_json);
|
|
|
|
if let Err(e) = serde_json::to_writer(writer, &hashmap_to_save) {
|
2023-12-08 07:38:41 +13:00
|
|
|
text_messages.warnings.push(format!("Cannot write data to cache file {cache_file_json:?}, reason {e}"));
|
|
|
|
debug!("Failed to save cache to file {cache_file_json:?}");
|
2023-10-08 05:04:17 +13:00
|
|
|
return text_messages;
|
|
|
|
}
|
2023-12-08 07:38:41 +13:00
|
|
|
debug!("Saved json to file {cache_file_json:?}");
|
2023-10-08 05:04:17 +13:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
text_messages.messages.push(format!("Properly saved to file {} cache entries.", hashmap.len()));
|
2023-10-15 04:48:57 +13:00
|
|
|
debug!("Properly saved to file {} cache entries.", hashmap.len());
|
2023-10-08 05:04:17 +13:00
|
|
|
} else {
|
|
|
|
debug!("Failed to save cache to file {cache_file_name} because not exists");
|
|
|
|
}
|
|
|
|
text_messages
|
|
|
|
}
|
|
|
|
|
2023-10-15 04:48:57 +13:00
|
|
|
#[fun_time(message = "load_cache_from_file_generalized_by_path", level = "debug")]
|
2023-10-08 05:04:17 +13:00
|
|
|
pub fn load_cache_from_file_generalized_by_path<T>(cache_file_name: &str, delete_outdated_cache: bool, used_files: &BTreeMap<String, T>) -> (Messages, Option<BTreeMap<String, T>>)
|
|
|
|
where
|
|
|
|
for<'a> T: Deserialize<'a> + ResultEntry + Sized + Send + Sync + Clone,
|
|
|
|
{
|
|
|
|
let (text_messages, vec_loaded_cache) = load_cache_from_file_generalized(cache_file_name, delete_outdated_cache, used_files);
|
|
|
|
let Some(vec_loaded_entries) = vec_loaded_cache else {
|
|
|
|
return (text_messages, None);
|
|
|
|
};
|
|
|
|
|
|
|
|
debug!("Converting cache Vec<T> into BTreeMap<String, T>");
|
|
|
|
let map_loaded_entries: BTreeMap<String, T> = vec_loaded_entries
|
|
|
|
.into_iter()
|
|
|
|
.map(|file_entry| (file_entry.get_path().to_string_lossy().into_owned(), file_entry))
|
|
|
|
.collect();
|
|
|
|
debug!("Converted cache Vec<T> into BTreeMap<String, T>");
|
|
|
|
|
|
|
|
(text_messages, Some(map_loaded_entries))
|
|
|
|
}
|
|
|
|
|
2023-10-15 04:48:57 +13:00
|
|
|
#[fun_time(message = "load_cache_from_file_generalized_by_size", level = "debug")]
|
2023-10-08 05:04:17 +13:00
|
|
|
pub fn load_cache_from_file_generalized_by_size<T>(
|
|
|
|
cache_file_name: &str,
|
|
|
|
delete_outdated_cache: bool,
|
|
|
|
cache_not_converted: &BTreeMap<u64, Vec<T>>,
|
|
|
|
) -> (Messages, Option<BTreeMap<u64, Vec<T>>>)
|
|
|
|
where
|
|
|
|
for<'a> T: Deserialize<'a> + ResultEntry + Sized + Send + Sync + Clone,
|
|
|
|
{
|
|
|
|
debug!("Converting cache BtreeMap<u64, Vec<T>> into BTreeMap<String, T>");
|
|
|
|
let mut used_files: BTreeMap<String, T> = Default::default();
|
|
|
|
for file_entry in cache_not_converted.values().flatten() {
|
|
|
|
used_files.insert(file_entry.get_path().to_string_lossy().into_owned(), file_entry.clone());
|
|
|
|
}
|
|
|
|
debug!("Converted cache BtreeMap<u64, Vec<T>> into BTreeMap<String, T>");
|
|
|
|
|
|
|
|
let (text_messages, vec_loaded_cache) = load_cache_from_file_generalized(cache_file_name, delete_outdated_cache, &used_files);
|
|
|
|
let Some(vec_loaded_entries) = vec_loaded_cache else {
|
|
|
|
return (text_messages, None);
|
|
|
|
};
|
|
|
|
|
|
|
|
debug!("Converting cache Vec<T> into BTreeMap<u64, Vec<T>>");
|
|
|
|
let mut map_loaded_entries: BTreeMap<u64, Vec<T>> = Default::default();
|
|
|
|
for file_entry in vec_loaded_entries {
|
|
|
|
map_loaded_entries.entry(file_entry.get_size()).or_default().push(file_entry);
|
|
|
|
}
|
|
|
|
debug!("Converted cache Vec<T> into BTreeMap<u64, Vec<T>>");
|
|
|
|
|
|
|
|
(text_messages, Some(map_loaded_entries))
|
|
|
|
}
|
|
|
|
|
2023-10-15 04:48:57 +13:00
|
|
|
#[fun_time(message = "load_cache_from_file_generalized_by_path_from_size", level = "debug")]
|
2023-10-08 05:04:17 +13:00
|
|
|
pub fn load_cache_from_file_generalized_by_path_from_size<T>(
|
|
|
|
cache_file_name: &str,
|
|
|
|
delete_outdated_cache: bool,
|
|
|
|
cache_not_converted: &BTreeMap<u64, Vec<T>>,
|
|
|
|
) -> (Messages, Option<BTreeMap<String, T>>)
|
|
|
|
where
|
|
|
|
for<'a> T: Deserialize<'a> + ResultEntry + Sized + Send + Sync + Clone,
|
|
|
|
{
|
|
|
|
debug!("Converting cache BtreeMap<u64, Vec<T>> into BTreeMap<String, T>");
|
|
|
|
let mut used_files: BTreeMap<String, T> = Default::default();
|
|
|
|
for file_entry in cache_not_converted.values().flatten() {
|
|
|
|
used_files.insert(file_entry.get_path().to_string_lossy().into_owned(), file_entry.clone());
|
|
|
|
}
|
|
|
|
debug!("Converted cache BtreeMap<u64, Vec<T>> into BTreeMap<String, T>");
|
|
|
|
|
|
|
|
let (text_messages, vec_loaded_cache) = load_cache_from_file_generalized(cache_file_name, delete_outdated_cache, &used_files);
|
|
|
|
let Some(vec_loaded_entries) = vec_loaded_cache else {
|
|
|
|
return (text_messages, None);
|
|
|
|
};
|
|
|
|
|
|
|
|
debug!("Converting cache Vec<T> into BTreeMap<String, T>");
|
|
|
|
let map_loaded_entries: BTreeMap<String, T> = vec_loaded_entries
|
|
|
|
.into_iter()
|
|
|
|
.map(|file_entry| (file_entry.get_path().to_string_lossy().into_owned(), file_entry))
|
|
|
|
.collect();
|
|
|
|
debug!("Converted cache Vec<T> into BTreeMap<String, T>");
|
|
|
|
|
|
|
|
(text_messages, Some(map_loaded_entries))
|
|
|
|
}
|
|
|
|
|
2023-10-15 04:48:57 +13:00
|
|
|
#[fun_time(message = "load_cache_from_file_generalized", level = "debug")]
|
2023-10-08 05:04:17 +13:00
|
|
|
fn load_cache_from_file_generalized<T>(cache_file_name: &str, delete_outdated_cache: bool, used_files: &BTreeMap<String, T>) -> (Messages, Option<Vec<T>>)
|
|
|
|
where
|
|
|
|
for<'a> T: Deserialize<'a> + ResultEntry + Sized + Send + Sync + Clone,
|
|
|
|
{
|
|
|
|
let mut text_messages = Messages::new();
|
|
|
|
|
|
|
|
if let Some(((file_handler, cache_file), (file_handler_json, cache_file_json))) = common::open_cache_folder(cache_file_name, false, true, &mut text_messages.warnings) {
|
|
|
|
let mut vec_loaded_entries: Vec<T>;
|
|
|
|
if let Some(file_handler) = file_handler {
|
|
|
|
let reader = BufReader::new(file_handler);
|
|
|
|
|
|
|
|
vec_loaded_entries = match bincode::deserialize_from(reader) {
|
|
|
|
Ok(t) => t,
|
|
|
|
Err(e) => {
|
2023-12-08 07:38:41 +13:00
|
|
|
text_messages.warnings.push(format!("Failed to load data from cache file {cache_file:?}, reason {e}"));
|
|
|
|
debug!("Failed to load cache from file {cache_file:?}");
|
2023-10-08 05:04:17 +13:00
|
|
|
return (text_messages, None);
|
|
|
|
}
|
|
|
|
};
|
|
|
|
} else {
|
|
|
|
let reader = BufReader::new(file_handler_json.unwrap()); // Unwrap cannot fail, because at least one file must be valid
|
|
|
|
vec_loaded_entries = match serde_json::from_reader(reader) {
|
|
|
|
Ok(t) => t,
|
|
|
|
Err(e) => {
|
2023-12-08 07:38:41 +13:00
|
|
|
text_messages.warnings.push(format!("Failed to load data from cache file {cache_file_json:?}, reason {e}"));
|
|
|
|
debug!("Failed to load cache from file {cache_file:?}");
|
2023-10-08 05:04:17 +13:00
|
|
|
return (text_messages, None);
|
|
|
|
}
|
|
|
|
};
|
|
|
|
}
|
|
|
|
|
2023-10-11 07:54:41 +13:00
|
|
|
debug!(
|
|
|
|
"Starting removing outdated cache entries (removing non existent files from cache - {})",
|
|
|
|
delete_outdated_cache
|
|
|
|
);
|
2023-10-08 05:04:17 +13:00
|
|
|
let initial_number_of_entries = vec_loaded_entries.len();
|
|
|
|
vec_loaded_entries = vec_loaded_entries
|
|
|
|
.into_par_iter()
|
|
|
|
.filter(|file_entry| {
|
2023-12-17 11:21:09 +13:00
|
|
|
let path = file_entry.get_path();
|
2023-10-08 05:04:17 +13:00
|
|
|
|
2023-12-17 11:21:09 +13:00
|
|
|
let file_entry_path_str = path.to_string_lossy().to_string();
|
2023-10-08 05:04:17 +13:00
|
|
|
if let Some(used_file) = used_files.get(&file_entry_path_str) {
|
|
|
|
if file_entry.get_size() != used_file.get_size() {
|
|
|
|
return false;
|
|
|
|
}
|
|
|
|
if file_entry.get_modified_date() != used_file.get_modified_date() {
|
|
|
|
return false;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2023-12-17 11:21:09 +13:00
|
|
|
if delete_outdated_cache && !path.exists() {
|
|
|
|
return false;
|
|
|
|
}
|
|
|
|
|
2023-10-08 05:04:17 +13:00
|
|
|
true
|
|
|
|
})
|
|
|
|
.collect();
|
|
|
|
debug!(
|
|
|
|
"Completed removing outdated cache entries, removed {} out of all {} entries",
|
|
|
|
initial_number_of_entries - vec_loaded_entries.len(),
|
|
|
|
initial_number_of_entries
|
|
|
|
);
|
|
|
|
|
|
|
|
text_messages.messages.push(format!("Properly loaded {} cache entries.", vec_loaded_entries.len()));
|
|
|
|
|
|
|
|
debug!("Loaded cache from file {cache_file_name} (or json alternative) - {} results", vec_loaded_entries.len());
|
|
|
|
return (text_messages, Some(vec_loaded_entries));
|
|
|
|
}
|
|
|
|
debug!("Failed to load cache from file {cache_file_name} because not exists");
|
|
|
|
(text_messages, None)
|
|
|
|
}
|