1
0
Fork 0
mirror of synced 2024-05-02 03:24:25 +12:00

Add prehash cache support (#477)

This commit is contained in:
Rafał Mikrut 2021-12-01 20:09:04 +01:00 committed by GitHub
parent 51271dcdf0
commit 1fd53b854b
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
10 changed files with 427 additions and 220 deletions

View file

@ -22,7 +22,7 @@ pub enum Commands {
minimal_file_size: u64, minimal_file_size: u64,
#[structopt(short = "i", long, parse(try_from_str = parse_maximal_file_size), default_value = "18446744073709551615", help = "Maximum size in bytes", long_help = "Maximum size of checked files in bytes, assigning lower value may speed up searching")] #[structopt(short = "i", long, parse(try_from_str = parse_maximal_file_size), default_value = "18446744073709551615", help = "Maximum size in bytes", long_help = "Maximum size of checked files in bytes, assigning lower value may speed up searching")]
maximal_file_size: u64, maximal_file_size: u64,
#[structopt(short = "c", long, parse(try_from_str = parse_minimal_file_size), default_value = "524288", help = "Minimum cached file size in bytes", long_help = "Minimum size of cached files in bytes, assigning bigger value may speed up will cause that lower amount of files will be cached, but loading of cache will be faster")] #[structopt(short = "c", long, parse(try_from_str = parse_minimal_file_size), default_value = "257144", help = "Minimum cached file size in bytes", long_help = "Minimum size of cached files in bytes, assigning bigger value may speed up will cause that lower amount of files will be cached, but loading of cache will be faster")]
minimal_cached_file_size: u64, minimal_cached_file_size: u64,
#[structopt(flatten)] #[structopt(flatten)]
allowed_extensions: AllowedExtensions, allowed_extensions: AllowedExtensions,

View file

@ -117,9 +117,6 @@ pub struct Info {
pub number_of_duplicated_files_by_name: usize, pub number_of_duplicated_files_by_name: usize,
pub lost_space_by_size: u64, pub lost_space_by_size: u64,
pub lost_space_by_hash: u64, pub lost_space_by_hash: u64,
pub bytes_read_when_hashing: u64,
pub number_of_removed_files: usize,
pub number_of_failed_to_remove_files: usize,
pub gained_space: u64, pub gained_space: u64,
} }
@ -149,7 +146,9 @@ pub struct DuplicateFinder {
dryrun: bool, dryrun: bool,
stopped_search: bool, stopped_search: bool,
use_cache: bool, use_cache: bool,
use_prehash_cache: bool,
minimal_cache_file_size: u64, minimal_cache_file_size: u64,
minimal_prehash_cache_file_size: u64,
delete_outdated_cache: bool, delete_outdated_cache: bool,
} }
@ -174,7 +173,9 @@ impl DuplicateFinder {
hash_type: HashType::Blake3, hash_type: HashType::Blake3,
dryrun: false, dryrun: false,
use_cache: true, use_cache: true,
minimal_cache_file_size: 2 * 1024 * 1024, // By default cache only >= 1MB files use_prehash_cache: true,
minimal_cache_file_size: 1024 * 1024 / 4, // By default cache only >= 256 KB files
minimal_prehash_cache_file_size: 0,
delete_outdated_cache: true, delete_outdated_cache: true,
} }
} }
@ -229,6 +230,10 @@ impl DuplicateFinder {
self.minimal_cache_file_size = minimal_cache_file_size; self.minimal_cache_file_size = minimal_cache_file_size;
} }
pub fn set_minimal_prehash_cache_file_size(&mut self, minimal_prehash_cache_file_size: u64) {
self.minimal_prehash_cache_file_size = minimal_prehash_cache_file_size;
}
pub const fn get_files_sorted_by_names(&self) -> &BTreeMap<String, Vec<FileEntry>> { pub const fn get_files_sorted_by_names(&self) -> &BTreeMap<String, Vec<FileEntry>> {
&self.files_with_identical_names &self.files_with_identical_names
} }
@ -237,6 +242,10 @@ impl DuplicateFinder {
self.use_cache = use_cache; self.use_cache = use_cache;
} }
pub fn set_use_prehash_cache(&mut self, use_prehash_cache: bool) {
self.use_prehash_cache = use_prehash_cache;
}
pub const fn get_files_sorted_by_size(&self) -> &BTreeMap<u64, Vec<FileEntry>> { pub const fn get_files_sorted_by_size(&self) -> &BTreeMap<u64, Vec<FileEntry>> {
&self.files_with_identical_size &self.files_with_identical_size
} }
@ -659,6 +668,8 @@ impl DuplicateFinder {
/// The slowest checking type, which must be applied after checking for size /// The slowest checking type, which must be applied after checking for size
fn check_files_hash(&mut self, stop_receiver: Option<&Receiver<()>>, progress_sender: Option<&futures::channel::mpsc::UnboundedSender<ProgressData>>) -> bool { fn check_files_hash(&mut self, stop_receiver: Option<&Receiver<()>>, progress_sender: Option<&futures::channel::mpsc::UnboundedSender<ProgressData>>) -> bool {
assert_eq!(self.check_method, CheckingMethod::Hash);
let check_type = Arc::new(self.hash_type); let check_type = Arc::new(self.hash_type);
let start_time: SystemTime = SystemTime::now(); let start_time: SystemTime = SystemTime::now();
@ -699,57 +710,136 @@ impl DuplicateFinder {
//// PROGRESS THREAD END //// PROGRESS THREAD END
#[allow(clippy::type_complexity)] ///////////////////////////////////////////////////////////////////////////// PREHASHING START
let pre_hash_results: Vec<(u64, BTreeMap<String, Vec<FileEntry>>, Vec<String>, u64)> = self {
.files_with_identical_size let loaded_hash_map;
.par_iter() let mut records_already_cached: BTreeMap<u64, Vec<FileEntry>> = Default::default();
.map(|(size, vec_file_entry)| { let mut non_cached_files_to_check: BTreeMap<u64, Vec<FileEntry>> = Default::default();
let mut hashmap_with_hash: BTreeMap<String, Vec<FileEntry>> = Default::default();
let mut errors: Vec<String> = Vec::new();
let mut bytes_read: u64 = 0;
let mut buffer = [0u8; 1024 * 2];
atomic_file_counter.fetch_add(vec_file_entry.len(), Ordering::Relaxed); // Cache algorithm
for file_entry in vec_file_entry { // - Load data from cache
if stop_receiver.is_some() && stop_receiver.unwrap().try_recv().is_ok() { // - Convert from BT<u64,Vec<FileEntry>> to BT<String,FileEntry>
check_was_breaked.store(true, Ordering::Relaxed); // - Save to proper values
return None; if self.use_prehash_cache {
} loaded_hash_map = match load_hashes_from_file(&mut self.text_messages, self.delete_outdated_cache, &self.hash_type, true) {
match hash_calculation(&mut buffer, file_entry, &check_type, 0) { Some(t) => t,
Ok((hash_string, bytes)) => { None => Default::default(),
bytes_read += bytes; };
hashmap_with_hash.entry(hash_string.clone()).or_insert_with(Vec::new);
hashmap_with_hash.get_mut(hash_string.as_str()).unwrap().push(file_entry.clone()); let mut loaded_hash_map2: BTreeMap<String, FileEntry> = Default::default();
} for vec_file_entry in loaded_hash_map.values() {
Err(s) => errors.push(s), for file_entry in vec_file_entry {
loaded_hash_map2.insert(file_entry.path.to_string_lossy().to_string(), file_entry.clone());
} }
} }
Some((*size, hashmap_with_hash, errors, bytes_read))
})
.while_some()
.collect();
// End thread which send info to gui #[allow(clippy::if_same_then_else)]
progress_thread_run.store(false, Ordering::Relaxed); for vec_file_entry in self.files_with_identical_size.values() {
progress_thread_handle.join().unwrap(); for file_entry in vec_file_entry {
let name = file_entry.path.to_string_lossy().to_string();
if !loaded_hash_map2.contains_key(&name) {
// If loaded data doesn't contains current image info
non_cached_files_to_check.entry(file_entry.size).or_insert_with(Vec::new);
non_cached_files_to_check.get_mut(&file_entry.size).unwrap().push(file_entry.clone());
} else if file_entry.size != loaded_hash_map2.get(&name).unwrap().size || file_entry.modified_date != loaded_hash_map2.get(&name).unwrap().modified_date {
// When size or modification date of image changed, then it is clear that is different image
non_cached_files_to_check.entry(file_entry.size).or_insert_with(Vec::new);
non_cached_files_to_check.get_mut(&file_entry.size).unwrap().push(file_entry.clone());
} else {
// Checking may be omitted when already there is entry with same size and modification date
records_already_cached.entry(file_entry.size).or_insert_with(Vec::new);
records_already_cached.get_mut(&file_entry.size).unwrap().push(file_entry.clone());
}
}
}
} else {
loaded_hash_map = Default::default();
mem::swap(&mut self.files_with_identical_size, &mut non_cached_files_to_check);
}
// Check if user aborted search(only from GUI) #[allow(clippy::type_complexity)]
if check_was_breaked.load(Ordering::Relaxed) { let pre_hash_results: Vec<(u64, BTreeMap<String, Vec<FileEntry>>, Vec<String>)> = non_cached_files_to_check
return false; .par_iter()
} .map(|(size, vec_file_entry)| {
let mut hashmap_with_hash: BTreeMap<String, Vec<FileEntry>> = Default::default();
let mut errors: Vec<String> = Vec::new();
let mut buffer = [0u8; 1024 * 2];
// Check results atomic_file_counter.fetch_add(vec_file_entry.len(), Ordering::Relaxed);
for (size, hash_map, mut errors, bytes_read) in pre_hash_results { for file_entry in vec_file_entry {
self.information.bytes_read_when_hashing += bytes_read; if stop_receiver.is_some() && stop_receiver.unwrap().try_recv().is_ok() {
self.text_messages.warnings.append(&mut errors); check_was_breaked.store(true, Ordering::Relaxed);
for (_hash, mut vec_file_entry) in hash_map { return None;
if vec_file_entry.len() > 1 { }
pre_checked_map.entry(size).or_insert_with(Vec::new); match hash_calculation(&mut buffer, file_entry, &check_type, 0) {
pre_checked_map.get_mut(&size).unwrap().append(&mut vec_file_entry); Ok(hash_string) => {
hashmap_with_hash.entry(hash_string.clone()).or_insert_with(Vec::new);
hashmap_with_hash.get_mut(hash_string.as_str()).unwrap().push(file_entry.clone());
}
Err(s) => errors.push(s),
}
}
Some((*size, hashmap_with_hash, errors))
})
.while_some()
.collect();
// End thread which send info to gui
progress_thread_run.store(false, Ordering::Relaxed);
progress_thread_handle.join().unwrap();
// Check if user aborted search(only from GUI)
if check_was_breaked.load(Ordering::Relaxed) {
return false;
}
// Add data from cache
for (size, vec_file_entry) in &records_already_cached {
pre_checked_map.entry(*size).or_insert_with(Vec::new);
pre_checked_map.get_mut(size).unwrap().append(&mut vec_file_entry.clone());
}
// Check results
for (size, hash_map, errors) in &pre_hash_results {
self.text_messages.warnings.append(&mut errors.clone());
for vec_file_entry in hash_map.values() {
if vec_file_entry.len() > 1 {
pre_checked_map.entry(*size).or_insert_with(Vec::new);
pre_checked_map.get_mut(size).unwrap().append(&mut vec_file_entry.clone());
}
} }
} }
if self.use_prehash_cache {
println!("non cached - {}", non_cached_files_to_check.values().map(|e| e.len()).sum::<usize>());
println!("cached - {}", records_already_cached.values().map(|e| e.len()).sum::<usize>());
// All results = records already cached + computed results
let mut save_cache_to_hashmap: BTreeMap<String, FileEntry> = Default::default();
for (size, vec_file_entry) in loaded_hash_map {
if size >= self.minimal_prehash_cache_file_size {
for file_entry in vec_file_entry {
save_cache_to_hashmap.insert(file_entry.path.to_string_lossy().to_string(), file_entry.clone());
}
}
}
for (size, hash_map, _errors) in &pre_hash_results {
if *size >= self.minimal_prehash_cache_file_size {
for vec_file_entry in hash_map.values() {
for file_entry in vec_file_entry {
save_cache_to_hashmap.insert(file_entry.path.to_string_lossy().to_string(), file_entry.clone());
}
}
}
}
save_hashes_to_file(&save_cache_to_hashmap, &mut self.text_messages, &self.hash_type, true, self.minimal_prehash_cache_file_size);
}
} }
///////////////////////////////////////////////////////////////////////////// PREHASHING END
Common::print_time(start_time, SystemTime::now(), "check_files_hash - prehash".to_string()); Common::print_time(start_time, SystemTime::now(), "check_files_hash - prehash".to_string());
let start_time: SystemTime = SystemTime::now(); let start_time: SystemTime = SystemTime::now();
@ -766,7 +856,7 @@ impl DuplicateFinder {
let progress_send = progress_sender.clone(); let progress_send = progress_sender.clone();
let progress_thread_run = progress_thread_run.clone(); let progress_thread_run = progress_thread_run.clone();
let atomic_file_counter = atomic_file_counter.clone(); let atomic_file_counter = atomic_file_counter.clone();
let files_to_check = pre_checked_map.iter().map(|e| e.1.len()).sum(); let files_to_check = pre_checked_map.iter().map(|(_size, vec_file_entry)| vec_file_entry.len()).sum();
let checking_method = self.check_method.clone(); let checking_method = self.check_method.clone();
progress_thread_handle = thread::spawn(move || loop { progress_thread_handle = thread::spawn(move || loop {
progress_send progress_send
@ -789,154 +879,151 @@ impl DuplicateFinder {
//// PROGRESS THREAD END //// PROGRESS THREAD END
#[allow(clippy::type_complexity)] ///////////////////////////////////////////////////////////////////////////// HASHING START
let mut full_hash_results: Vec<(u64, BTreeMap<String, Vec<FileEntry>>, Vec<String>, u64)>; {
#[allow(clippy::type_complexity)]
let mut full_hash_results: Vec<(u64, BTreeMap<String, Vec<FileEntry>>, Vec<String>)>;
match self.check_method { let loaded_hash_map;
CheckingMethod::Hash => {
let loaded_hash_map;
let mut records_already_cached: BTreeMap<u64, Vec<FileEntry>> = Default::default(); let mut records_already_cached: BTreeMap<u64, Vec<FileEntry>> = Default::default();
let mut non_cached_files_to_check: BTreeMap<u64, Vec<FileEntry>> = Default::default(); let mut non_cached_files_to_check: BTreeMap<u64, Vec<FileEntry>> = Default::default();
if self.use_cache { if self.use_cache {
loaded_hash_map = match load_hashes_from_file(&mut self.text_messages, self.delete_outdated_cache, &self.hash_type, false) { loaded_hash_map = match load_hashes_from_file(&mut self.text_messages, self.delete_outdated_cache, &self.hash_type, false) {
Some(t) => t, Some(t) => t,
None => Default::default(), None => Default::default(),
}; };
for (size, vec_file_entry) in pre_checked_map { for (size, vec_file_entry) in pre_checked_map {
#[allow(clippy::collapsible_if)] #[allow(clippy::collapsible_if)]
if !loaded_hash_map.contains_key(&size) { if !loaded_hash_map.contains_key(&size) {
// If loaded data doesn't contains current info // If loaded data doesn't contains current info
non_cached_files_to_check.insert(size, vec_file_entry); non_cached_files_to_check.insert(size, vec_file_entry);
} else { } else {
let loaded_vec_file_entry = loaded_hash_map.get(&size).unwrap(); let loaded_vec_file_entry = loaded_hash_map.get(&size).unwrap();
for file_entry in vec_file_entry {
let mut found: bool = false;
for loaded_file_entry in loaded_vec_file_entry {
if file_entry.path == loaded_file_entry.path && file_entry.modified_date == loaded_file_entry.modified_date {
records_already_cached.entry(file_entry.size).or_insert_with(Vec::new);
records_already_cached.get_mut(&file_entry.size).unwrap().push(loaded_file_entry.clone());
found = true;
break;
}
}
if !found {
non_cached_files_to_check.entry(file_entry.size).or_insert_with(Vec::new);
non_cached_files_to_check.get_mut(&file_entry.size).unwrap().push(file_entry);
}
}
}
}
} else {
loaded_hash_map = Default::default();
mem::swap(&mut pre_checked_map, &mut non_cached_files_to_check);
}
full_hash_results = non_cached_files_to_check
.par_iter()
.map(|(size, vec_file_entry)| {
let mut hashmap_with_hash: BTreeMap<String, Vec<FileEntry>> = Default::default();
let mut errors: Vec<String> = Vec::new();
let mut buffer = [0u8; 1024 * 128];
atomic_file_counter.fetch_add(vec_file_entry.len(), Ordering::Relaxed);
for file_entry in vec_file_entry {
if stop_receiver.is_some() && stop_receiver.unwrap().try_recv().is_ok() {
check_was_breaked.store(true, Ordering::Relaxed);
return None;
}
match hash_calculation(&mut buffer, file_entry, &check_type, u64::MAX) {
Ok(hash_string) => {
let mut file_entry = file_entry.clone();
file_entry.hash = hash_string.clone();
hashmap_with_hash.entry(hash_string.clone()).or_insert_with(Vec::new);
hashmap_with_hash.get_mut(hash_string.as_str()).unwrap().push(file_entry);
}
Err(s) => errors.push(s),
}
}
Some((*size, hashmap_with_hash, errors))
})
.while_some()
.collect();
if self.use_cache {
'main: for (size, vec_file_entry) in records_already_cached {
// Check if size already exists, if exists we must to change it outside because cannot have mut and non mut reference to full_hash_results
for (full_size, full_hashmap, _errors) in &mut full_hash_results {
if size == *full_size {
for file_entry in vec_file_entry { for file_entry in vec_file_entry {
let mut found: bool = false; full_hashmap.entry(file_entry.hash.clone()).or_insert_with(Vec::new);
for loaded_file_entry in loaded_vec_file_entry { full_hashmap.get_mut(&file_entry.hash).unwrap().push(file_entry);
if file_entry.path == loaded_file_entry.path && file_entry.modified_date == loaded_file_entry.modified_date {
records_already_cached.entry(file_entry.size).or_insert_with(Vec::new);
records_already_cached.get_mut(&file_entry.size).unwrap().push(loaded_file_entry.clone());
found = true;
break;
}
}
if !found {
non_cached_files_to_check.entry(file_entry.size).or_insert_with(Vec::new);
non_cached_files_to_check.get_mut(&file_entry.size).unwrap().push(file_entry);
}
} }
continue 'main;
} }
} }
} else { // Size doesn't exists add results to files
loaded_hash_map = Default::default(); let mut temp_hashmap: BTreeMap<String, Vec<FileEntry>> = Default::default();
mem::swap(&mut pre_checked_map, &mut non_cached_files_to_check); for file_entry in vec_file_entry {
temp_hashmap.entry(file_entry.hash.clone()).or_insert_with(Vec::new);
temp_hashmap.get_mut(&file_entry.hash).unwrap().push(file_entry);
}
full_hash_results.push((size, temp_hashmap, Vec::new()));
} }
full_hash_results = non_cached_files_to_check // Must save all results to file, old loaded from file with all currently counted results
.par_iter() let mut all_results: BTreeMap<String, FileEntry> = Default::default();
.map(|(size, vec_file_entry)| { for (_size, vec_file_entry) in loaded_hash_map {
let mut hashmap_with_hash: BTreeMap<String, Vec<FileEntry>> = Default::default(); for file_entry in vec_file_entry {
let mut errors: Vec<String> = Vec::new(); all_results.insert(file_entry.path.to_string_lossy().to_string(), file_entry);
let mut bytes_read: u64 = 0;
let mut buffer = [0u8; 1024 * 128];
atomic_file_counter.fetch_add(vec_file_entry.len(), Ordering::Relaxed);
for file_entry in vec_file_entry {
if stop_receiver.is_some() && stop_receiver.unwrap().try_recv().is_ok() {
check_was_breaked.store(true, Ordering::Relaxed);
return None;
}
match hash_calculation(&mut buffer, file_entry, &check_type, u64::MAX) {
Ok((hash_string, bytes)) => {
bytes_read += bytes;
let mut file_entry = file_entry.clone();
file_entry.hash = hash_string.clone();
hashmap_with_hash.entry(hash_string.clone()).or_insert_with(Vec::new);
hashmap_with_hash.get_mut(hash_string.as_str()).unwrap().push(file_entry);
}
Err(s) => errors.push(s),
}
}
Some((*size, hashmap_with_hash, errors, bytes_read))
})
.while_some()
.collect();
if self.use_cache {
'main: for (size, vec_file_entry) in records_already_cached {
// Check if size already exists, if exists we must to change it outside because cannot have mut and non mut reference to full_hash_results
for (full_size, full_hashmap, _errors, _bytes_read) in &mut full_hash_results {
if size == *full_size {
for file_entry in vec_file_entry {
full_hashmap.entry(file_entry.hash.clone()).or_insert_with(Vec::new);
full_hashmap.get_mut(&file_entry.hash).unwrap().push(file_entry);
}
continue 'main;
}
}
// Size doesn't exists add results to files
let mut temp_hashmap: BTreeMap<String, Vec<FileEntry>> = Default::default();
for file_entry in vec_file_entry {
temp_hashmap.entry(file_entry.hash.clone()).or_insert_with(Vec::new);
temp_hashmap.get_mut(&file_entry.hash).unwrap().push(file_entry);
}
full_hash_results.push((size, temp_hashmap, Vec::new(), 0));
} }
}
// Must save all results to file, old loaded from file with all currently counted results for (_size, hashmap, _errors) in &full_hash_results {
let mut all_results: BTreeMap<String, FileEntry> = Default::default(); for vec_file_entry in hashmap.values() {
for (_size, vec_file_entry) in loaded_hash_map {
for file_entry in vec_file_entry { for file_entry in vec_file_entry {
all_results.insert(file_entry.path.to_string_lossy().to_string(), file_entry); all_results.insert(file_entry.path.to_string_lossy().to_string(), file_entry.clone());
} }
} }
for (_size, hashmap, _errors, _bytes_read) in &full_hash_results { }
for vec_file_entry in hashmap.values() { save_hashes_to_file(&all_results, &mut self.text_messages, &self.hash_type, false, self.minimal_cache_file_size);
for file_entry in vec_file_entry { }
all_results.insert(file_entry.path.to_string_lossy().to_string(), file_entry.clone());
} // End thread which send info to gui
} progress_thread_run.store(false, Ordering::Relaxed);
progress_thread_handle.join().unwrap();
// Check if user aborted search(only from GUI)
if check_was_breaked.load(Ordering::Relaxed) {
return false;
}
for (size, hash_map, mut errors) in full_hash_results {
self.text_messages.warnings.append(&mut errors);
for (_hash, vec_file_entry) in hash_map {
if vec_file_entry.len() > 1 {
self.files_with_identical_hashes.entry(size).or_insert_with(Vec::new);
self.files_with_identical_hashes.get_mut(&size).unwrap().push(vec_file_entry);
} }
save_hashes_to_file(&all_results, &mut self.text_messages, &self.hash_type, false, self.minimal_cache_file_size);
} }
} }
_ => panic!("What"),
}
// End thread which send info to gui /////////////////////////
progress_thread_run.store(false, Ordering::Relaxed);
progress_thread_handle.join().unwrap();
// Check if user aborted search(only from GUI) for (size, vector_vectors) in &self.files_with_identical_hashes {
if check_was_breaked.load(Ordering::Relaxed) { for vector in vector_vectors {
return false; self.information.number_of_duplicated_files_by_hash += vector.len() - 1;
} self.information.number_of_groups_by_hash += 1;
self.information.lost_space_by_hash += (vector.len() as u64 - 1) * size;
for (size, hash_map, mut errors, bytes_read) in full_hash_results {
self.information.bytes_read_when_hashing += bytes_read;
self.text_messages.warnings.append(&mut errors);
for (_hash, vec_file_entry) in hash_map {
if vec_file_entry.len() > 1 {
self.files_with_identical_hashes.entry(size).or_insert_with(Vec::new);
self.files_with_identical_hashes.get_mut(&size).unwrap().push(vec_file_entry);
} }
} }
} }
///////////////////////// ///////////////////////////////////////////////////////////////////////////// HASHING END
for (size, vector_vectors) in &self.files_with_identical_hashes {
for vector in vector_vectors {
self.information.number_of_duplicated_files_by_hash += vector.len() - 1;
self.information.number_of_groups_by_hash += 1;
self.information.lost_space_by_hash += (vector.len() as u64 - 1) * size;
}
}
Common::print_time(start_time, SystemTime::now(), "check_files_hash - full hash".to_string()); Common::print_time(start_time, SystemTime::now(), "check_files_hash - full hash".to_string());
@ -959,8 +1046,6 @@ impl DuplicateFinder {
for vector in self.files_with_identical_names.values() { for vector in self.files_with_identical_names.values() {
let tuple: (u64, usize, usize) = delete_files(vector, &self.delete_method, &mut self.text_messages, self.dryrun); let tuple: (u64, usize, usize) = delete_files(vector, &self.delete_method, &mut self.text_messages, self.dryrun);
self.information.gained_space += tuple.0; self.information.gained_space += tuple.0;
self.information.number_of_removed_files += tuple.1;
self.information.number_of_failed_to_remove_files += tuple.2;
} }
} }
CheckingMethod::Hash => { CheckingMethod::Hash => {
@ -968,8 +1053,6 @@ impl DuplicateFinder {
for vector in vector_vectors.iter() { for vector in vector_vectors.iter() {
let tuple: (u64, usize, usize) = delete_files(vector, &self.delete_method, &mut self.text_messages, self.dryrun); let tuple: (u64, usize, usize) = delete_files(vector, &self.delete_method, &mut self.text_messages, self.dryrun);
self.information.gained_space += tuple.0; self.information.gained_space += tuple.0;
self.information.number_of_removed_files += tuple.1;
self.information.number_of_failed_to_remove_files += tuple.2;
} }
} }
} }
@ -977,8 +1060,6 @@ impl DuplicateFinder {
for vector in self.files_with_identical_size.values() { for vector in self.files_with_identical_size.values() {
let tuple: (u64, usize, usize) = delete_files(vector, &self.delete_method, &mut self.text_messages, self.dryrun); let tuple: (u64, usize, usize) = delete_files(vector, &self.delete_method, &mut self.text_messages, self.dryrun);
self.information.gained_space += tuple.0; self.information.gained_space += tuple.0;
self.information.number_of_removed_files += tuple.1;
self.information.number_of_failed_to_remove_files += tuple.2;
} }
} }
CheckingMethod::None => { CheckingMethod::None => {
@ -1031,13 +1112,6 @@ impl DebugPrint for DuplicateFinder {
self.information.gained_space.file_size(options::BINARY).unwrap(), self.information.gained_space.file_size(options::BINARY).unwrap(),
self.information.gained_space self.information.gained_space
); );
println!(
"Bytes read when hashing - {} ({} bytes)",
self.information.bytes_read_when_hashing.file_size(options::BINARY).unwrap(),
self.information.bytes_read_when_hashing
);
println!("Number of removed files - {}", self.information.number_of_removed_files);
println!("Number of failed to remove files - {}", self.information.number_of_failed_to_remove_files);
println!("### Other"); println!("### Other");
@ -1363,7 +1437,7 @@ pub trait MyHasher {
fn finalize(&self) -> String; fn finalize(&self) -> String;
} }
fn hash_calculation(buffer: &mut [u8], file_entry: &FileEntry, hash_type: &HashType, limit: u64) -> Result<(String, u64), String> { fn hash_calculation(buffer: &mut [u8], file_entry: &FileEntry, hash_type: &HashType, limit: u64) -> Result<String, String> {
let mut file_handler = match File::open(&file_entry.path) { let mut file_handler = match File::open(&file_entry.path) {
Ok(t) => t, Ok(t) => t,
Err(e) => return Err(format!("Unable to check hash of file {}, reason {}", file_entry.path.display(), e)), Err(e) => return Err(format!("Unable to check hash of file {}, reason {}", file_entry.path.display(), e)),
@ -1384,7 +1458,7 @@ fn hash_calculation(buffer: &mut [u8], file_entry: &FileEntry, hash_type: &HashT
break; break;
} }
} }
Ok((hasher.finalize(), current_file_read_bytes)) Ok(hasher.finalize())
} }
fn get_file_hash_name(type_of_hash: &HashType, is_prehash: bool) -> String { fn get_file_hash_name(type_of_hash: &HashType, is_prehash: bool) -> String {
@ -1560,8 +1634,7 @@ mod tests {
file.write_all(b"aa")?; file.write_all(b"aa")?;
let e = FileEntry { path: src, ..Default::default() }; let e = FileEntry { path: src, ..Default::default() };
let r = hash_calculation(&mut buf, &e, &HashType::Blake3, 0).unwrap(); let r = hash_calculation(&mut buf, &e, &HashType::Blake3, 0).unwrap();
assert_eq!(2, r.1); assert!(!r.is_empty());
assert!(!r.0.is_empty());
Ok(()) Ok(())
} }

View file

@ -457,7 +457,7 @@ impl SimilarVideos {
hashmap_with_file_entries.insert(file_entry.vhash.src_path().to_string_lossy().to_string(), file_entry.clone()); hashmap_with_file_entries.insert(file_entry.vhash.src_path().to_string_lossy().to_string(), file_entry.clone());
vector_of_hashes.push(file_entry.vhash.clone()); vector_of_hashes.push(file_entry.vhash.clone());
} else { } else {
self.text_messages.errors.push(file_entry.error.clone()); self.text_messages.warnings.push(file_entry.error.clone());
} }
} }

View file

@ -92,7 +92,9 @@ pub fn connect_button_search(
let radio_button_hash_type_xxh3 = gui_data.main_notebook.radio_button_hash_type_xxh3.clone(); let radio_button_hash_type_xxh3 = gui_data.main_notebook.radio_button_hash_type_xxh3.clone();
let check_button_settings_hide_hard_links = gui_data.settings.check_button_settings_hide_hard_links.clone(); let check_button_settings_hide_hard_links = gui_data.settings.check_button_settings_hide_hard_links.clone();
let check_button_settings_use_cache = gui_data.settings.check_button_settings_use_cache.clone(); let check_button_settings_use_cache = gui_data.settings.check_button_settings_use_cache.clone();
let check_button_duplicates_use_prehash_cache = gui_data.settings.check_button_duplicates_use_prehash_cache.clone();
let entry_settings_cache_file_minimal_size = gui_data.settings.entry_settings_cache_file_minimal_size.clone(); let entry_settings_cache_file_minimal_size = gui_data.settings.entry_settings_cache_file_minimal_size.clone();
let entry_settings_prehash_cache_file_minimal_size = gui_data.settings.entry_settings_prehash_cache_file_minimal_size.clone();
let radio_button_similar_hash_size_4 = gui_data.main_notebook.radio_button_similar_hash_size_4.clone(); let radio_button_similar_hash_size_4 = gui_data.main_notebook.radio_button_similar_hash_size_4.clone();
let radio_button_similar_hash_size_8 = gui_data.main_notebook.radio_button_similar_hash_size_8.clone(); let radio_button_similar_hash_size_8 = gui_data.main_notebook.radio_button_similar_hash_size_8.clone();
let radio_button_similar_hash_size_16 = gui_data.main_notebook.radio_button_similar_hash_size_16.clone(); let radio_button_similar_hash_size_16 = gui_data.main_notebook.radio_button_similar_hash_size_16.clone();
@ -118,7 +120,7 @@ pub fn connect_button_search(
let allowed_extensions = entry_allowed_extensions.text().as_str().to_string(); let allowed_extensions = entry_allowed_extensions.text().as_str().to_string();
let hide_hard_links = check_button_settings_hide_hard_links.is_active(); let hide_hard_links = check_button_settings_hide_hard_links.is_active();
let use_cache = check_button_settings_use_cache.is_active(); let use_cache = check_button_settings_use_cache.is_active();
let minimal_cache_file_size = entry_settings_cache_file_minimal_size.text().as_str().parse::<u64>().unwrap_or(2 * 1024 * 1024); let minimal_cache_file_size = entry_settings_cache_file_minimal_size.text().as_str().parse::<u64>().unwrap_or(1024 * 1024 / 4);
let show_dialog = Arc::new(AtomicBool::new(true)); let show_dialog = Arc::new(AtomicBool::new(true));
@ -170,6 +172,9 @@ pub fn connect_button_search(
panic!("No radio button is pressed"); panic!("No radio button is pressed");
} }
let use_prehash_cache = check_button_duplicates_use_prehash_cache.is_active();
let minimal_prehash_cache_file_size = entry_settings_prehash_cache_file_minimal_size.text().as_str().parse::<u64>().unwrap_or(0);
let delete_outdated_cache = check_button_settings_duplicates_delete_outdated_cache.is_active(); let delete_outdated_cache = check_button_settings_duplicates_delete_outdated_cache.is_active();
let futures_sender_duplicate_files = futures_sender_duplicate_files.clone(); let futures_sender_duplicate_files = futures_sender_duplicate_files.clone();
@ -184,10 +189,12 @@ pub fn connect_button_search(
df.set_minimal_file_size(minimal_file_size); df.set_minimal_file_size(minimal_file_size);
df.set_maximal_file_size(maximal_file_size); df.set_maximal_file_size(maximal_file_size);
df.set_minimal_cache_file_size(minimal_cache_file_size); df.set_minimal_cache_file_size(minimal_cache_file_size);
df.set_minimal_prehash_cache_file_size(minimal_prehash_cache_file_size);
df.set_check_method(check_method); df.set_check_method(check_method);
df.set_hash_type(hash_type); df.set_hash_type(hash_type);
df.set_ignore_hard_links(hide_hard_links); df.set_ignore_hard_links(hide_hard_links);
df.set_use_cache(use_cache); df.set_use_cache(use_cache);
df.set_use_prehash_cache(use_prehash_cache);
df.set_delete_outdated_cache(delete_outdated_cache); df.set_delete_outdated_cache(delete_outdated_cache);
df.find_duplicates(Some(&stop_receiver), Some(&futures_sender_duplicate_files)); df.find_duplicates(Some(&stop_receiver), Some(&futures_sender_duplicate_files));
let _ = glib_stop_sender.send(Message::Duplicates(df)); let _ = glib_stop_sender.send(Message::Duplicates(df));

View file

@ -10,10 +10,10 @@ use crate::notebook_enums::*;
pub fn connect_button_select(gui_data: &GuiData) { pub fn connect_button_select(gui_data: &GuiData) {
let mut hashmap: HashMap<NotebookMainEnum, Vec<PopoverTypes>> = Default::default(); let mut hashmap: HashMap<NotebookMainEnum, Vec<PopoverTypes>> = Default::default();
{ {
hashmap.insert(NotebookMainEnum::SimilarImages, vec![PopoverTypes::All, PopoverTypes::ImageSize, PopoverTypes::Reverse, PopoverTypes::Custom, PopoverTypes::Date]); hashmap.insert(NotebookMainEnum::SimilarImages, vec![PopoverTypes::All, PopoverTypes::Size, PopoverTypes::Reverse, PopoverTypes::Custom, PopoverTypes::Date]);
hashmap.insert(NotebookMainEnum::SimilarVideos, vec![PopoverTypes::All, PopoverTypes::Reverse, PopoverTypes::Custom, PopoverTypes::Date]); hashmap.insert(NotebookMainEnum::SimilarVideos, vec![PopoverTypes::All, PopoverTypes::Reverse, PopoverTypes::Custom, PopoverTypes::Date, PopoverTypes::Size]);
hashmap.insert(NotebookMainEnum::Duplicate, vec![PopoverTypes::All, PopoverTypes::Reverse, PopoverTypes::Custom, PopoverTypes::Date]); hashmap.insert(NotebookMainEnum::Duplicate, vec![PopoverTypes::All, PopoverTypes::Reverse, PopoverTypes::Custom, PopoverTypes::Date]);
hashmap.insert(NotebookMainEnum::SameMusic, vec![PopoverTypes::All, PopoverTypes::Reverse, PopoverTypes::Custom, PopoverTypes::Date]); hashmap.insert(NotebookMainEnum::SameMusic, vec![PopoverTypes::All, PopoverTypes::Reverse, PopoverTypes::Custom, PopoverTypes::Date, PopoverTypes::Size]);
hashmap.insert(NotebookMainEnum::EmptyFiles, vec![PopoverTypes::All, PopoverTypes::Reverse, PopoverTypes::Custom]); hashmap.insert(NotebookMainEnum::EmptyFiles, vec![PopoverTypes::All, PopoverTypes::Reverse, PopoverTypes::Custom]);
hashmap.insert(NotebookMainEnum::EmptyDirectories, vec![PopoverTypes::All, PopoverTypes::Reverse, PopoverTypes::Custom]); hashmap.insert(NotebookMainEnum::EmptyDirectories, vec![PopoverTypes::All, PopoverTypes::Reverse, PopoverTypes::Custom]);
@ -63,7 +63,7 @@ fn show_required_popovers(popovers: &GuiPopovers, current_mode: &NotebookMainEnu
buttons_popover_unselect_all.hide(); buttons_popover_unselect_all.hide();
} }
if vec.contains(&PopoverTypes::ImageSize) { if vec.contains(&PopoverTypes::Size) {
buttons_popover_select_all_images_except_biggest.show(); buttons_popover_select_all_images_except_biggest.show();
buttons_popover_select_all_images_except_smallest.show(); buttons_popover_select_all_images_except_smallest.show();
separator_select_image_size.show(); separator_select_image_size.show();

View file

@ -345,7 +345,7 @@ fn popover_custom_select_unselect(popover: &gtk::Popover, window_main: &Window,
} }
} }
fn popover_all_except_biggest_smallest(popover: &gtk::Popover, tree_view: &gtk::TreeView, column_color: i32, column_size_as_bytes: i32, column_dimensions: i32, column_button_selection: u32, except_biggest: bool) { fn popover_all_except_biggest_smallest(popover: &gtk::Popover, tree_view: &gtk::TreeView, column_color: i32, column_size_as_bytes: i32, column_dimensions: Option<i32>, column_button_selection: u32, except_biggest: bool) {
let model = get_list_store(tree_view); let model = get_list_store(tree_view);
if let Some(iter) = model.iter_first() { if let Some(iter) = model.iter_first() {
@ -373,22 +373,38 @@ fn popover_all_except_biggest_smallest(popover: &gtk::Popover, tree_view: &gtk::
} }
tree_iter_array.push(iter.clone()); tree_iter_array.push(iter.clone());
let size_as_bytes = model.value(&iter, column_size_as_bytes).get::<u64>().unwrap(); let size_as_bytes = model.value(&iter, column_size_as_bytes).get::<u64>().unwrap();
let dimensions_string = model.value(&iter, column_dimensions).get::<String>().unwrap();
let dimensions = change_dimension_to_krotka(dimensions_string); // If dimension exists, then needs to be checked images
let number_of_pixels = dimensions.0 * dimensions.1; if let Some(column_dimensions) = column_dimensions {
let dimensions_string = model.value(&iter, column_dimensions).get::<String>().unwrap();
if except_biggest { let dimensions = change_dimension_to_krotka(dimensions_string);
if number_of_pixels > number_of_pixels_min_max || (number_of_pixels == number_of_pixels_min_max && size_as_bytes > size_as_bytes_min_max) { let number_of_pixels = dimensions.0 * dimensions.1;
number_of_pixels_min_max = number_of_pixels;
size_as_bytes_min_max = size_as_bytes; if except_biggest {
used_index = Some(current_index); if number_of_pixels > number_of_pixels_min_max || (number_of_pixels == number_of_pixels_min_max && size_as_bytes > size_as_bytes_min_max) {
number_of_pixels_min_max = number_of_pixels;
size_as_bytes_min_max = size_as_bytes;
used_index = Some(current_index);
}
} else {
if number_of_pixels < number_of_pixels_min_max || (number_of_pixels == number_of_pixels_min_max && size_as_bytes < size_as_bytes_min_max) {
number_of_pixels_min_max = number_of_pixels;
size_as_bytes_min_max = size_as_bytes;
used_index = Some(current_index);
}
} }
} else { } else {
if number_of_pixels < number_of_pixels_min_max || (number_of_pixels == number_of_pixels_min_max && size_as_bytes < size_as_bytes_min_max) { if except_biggest {
number_of_pixels_min_max = number_of_pixels; if size_as_bytes > size_as_bytes_min_max {
size_as_bytes_min_max = size_as_bytes; size_as_bytes_min_max = size_as_bytes;
used_index = Some(current_index); used_index = Some(current_index);
}
} else {
if size_as_bytes < size_as_bytes_min_max {
size_as_bytes_min_max = size_as_bytes;
used_index = Some(current_index);
}
} }
} }
@ -593,9 +609,9 @@ pub fn connect_popovers(gui_data: &GuiData) {
popover_all_except_biggest_smallest( popover_all_except_biggest_smallest(
&popover_select, &popover_select,
tree_view, tree_view,
nb_object.column_color.expect("AEB can't be used without headers"), nb_object.column_color.expect("AEBI can't be used without headers"),
nb_object.column_size_as_bytes.expect("AEB needs size as bytes column"), nb_object.column_size_as_bytes.expect("AEBI needs size as bytes column"),
nb_object.column_dimensions.expect("AEB needs dimensions column"), nb_object.column_dimensions,
nb_object.column_selection as u32, nb_object.column_selection as u32,
true, true,
); );
@ -613,9 +629,9 @@ pub fn connect_popovers(gui_data: &GuiData) {
popover_all_except_biggest_smallest( popover_all_except_biggest_smallest(
&popover_select, &popover_select,
tree_view, tree_view,
nb_object.column_color.expect("AES can't be used without headers"), nb_object.column_color.expect("AESI can't be used without headers"),
nb_object.column_size_as_bytes.expect("AES needs size as bytes column"), nb_object.column_size_as_bytes.expect("AESI needs size as bytes column"),
nb_object.column_dimensions.expect("AES needs dimensions column"), nb_object.column_dimensions,
nb_object.column_selection as u32, nb_object.column_selection as u32,
false, false,
); );

View file

@ -17,6 +17,8 @@ pub struct GuiSettings {
// Duplicates // Duplicates
pub check_button_settings_hide_hard_links: gtk::CheckButton, pub check_button_settings_hide_hard_links: gtk::CheckButton,
pub entry_settings_cache_file_minimal_size: gtk::Entry, pub entry_settings_cache_file_minimal_size: gtk::Entry,
pub entry_settings_prehash_cache_file_minimal_size: gtk::Entry,
pub check_button_duplicates_use_prehash_cache: gtk::CheckButton,
pub check_button_settings_show_preview_duplicates: gtk::CheckButton, pub check_button_settings_show_preview_duplicates: gtk::CheckButton,
pub check_button_settings_duplicates_delete_outdated_cache: gtk::CheckButton, pub check_button_settings_duplicates_delete_outdated_cache: gtk::CheckButton,
pub button_settings_duplicates_clear_cache: gtk::Button, pub button_settings_duplicates_clear_cache: gtk::Button,
@ -71,6 +73,8 @@ impl GuiSettings {
let check_button_settings_show_preview_duplicates: gtk::CheckButton = builder.object("check_button_settings_show_preview_duplicates").unwrap(); let check_button_settings_show_preview_duplicates: gtk::CheckButton = builder.object("check_button_settings_show_preview_duplicates").unwrap();
let check_button_settings_duplicates_delete_outdated_cache: gtk::CheckButton = builder.object("check_button_settings_duplicates_delete_outdated_cache").unwrap(); let check_button_settings_duplicates_delete_outdated_cache: gtk::CheckButton = builder.object("check_button_settings_duplicates_delete_outdated_cache").unwrap();
let button_settings_duplicates_clear_cache: gtk::Button = builder.object("button_settings_duplicates_clear_cache").unwrap(); let button_settings_duplicates_clear_cache: gtk::Button = builder.object("button_settings_duplicates_clear_cache").unwrap();
let check_button_duplicates_use_prehash_cache: gtk::CheckButton = builder.object("check_button_duplicates_use_prehash_cache").unwrap();
let entry_settings_prehash_cache_file_minimal_size: gtk::Entry = builder.object("entry_settings_prehash_cache_file_minimal_size").unwrap();
check_button_settings_hide_hard_links.set_tooltip_text(Some( check_button_settings_hide_hard_links.set_tooltip_text(Some(
"Hides all files except one, if are points to same data(are hardlinked).\n\nE.g. in case where on disk there is 7 files which are hardlinked to specific data and one different file with same data but different inode, then in duplicate finder will be visible only one unique file and one file from hardlinked ones.", "Hides all files except one, if are points to same data(are hardlinked).\n\nE.g. in case where on disk there is 7 files which are hardlinked to specific data and one different file with same data but different inode, then in duplicate finder will be visible only one unique file and one file from hardlinked ones.",
@ -80,7 +84,10 @@ impl GuiSettings {
)); ));
check_button_settings_show_preview_duplicates.set_tooltip_text(Some("Shows preview at right side, when selecting image file.")); check_button_settings_show_preview_duplicates.set_tooltip_text(Some("Shows preview at right side, when selecting image file."));
check_button_settings_duplicates_delete_outdated_cache.set_tooltip_text(Some("Allows to delete outdated cache results which points to non-existent files.\n\nWhen enabled, app make sure when loading records, that all points to valid files and ignore broken ones.\n\nDisabling this option, will help to scan files on external drives, so cache entries about them will not be purged in next scan.\n\nIn case of having hundred of thousands records in cache, it is suggested to enable this option, to speedup cache loading and saving at start and end of scan.")); check_button_settings_duplicates_delete_outdated_cache.set_tooltip_text(Some("Allows to delete outdated cache results which points to non-existent files.\n\nWhen enabled, app make sure when loading records, that all points to valid files and ignore broken ones.\n\nDisabling this option, will help to scan files on external drives, so cache entries about them will not be purged in next scan.\n\nIn case of having hundred of thousands records in cache, it is suggested to enable this option, to speedup cache loading and saving at start and end of scan."));
button_settings_duplicates_clear_cache.set_tooltip_text(Some("Manually clear cache from outdated entries.\nShould be used only if automatic clearing was disabled.")); button_settings_duplicates_clear_cache.set_tooltip_text(Some("Manually clear cache from outdated entries.\n\nShould be used only if automatic clearing was disabled."));
check_button_duplicates_use_prehash_cache.set_tooltip_text(Some(
"Enables caching of prehash(hash computed from small part of file) which allows to earlier throw out non duplicated results.\n\nIt is disabled by default because can cause in some situations slowdowns.\n\nIt is heavily recommended to use it when scanning hundred of thousands or million files, because it can speedup search multiple times.",
));
// Similar Images // Similar Images
let check_button_settings_show_preview_similar_images: gtk::CheckButton = builder.object("check_button_settings_show_preview_similar_images").unwrap(); let check_button_settings_show_preview_similar_images: gtk::CheckButton = builder.object("check_button_settings_show_preview_similar_images").unwrap();
@ -126,6 +133,8 @@ impl GuiSettings {
check_button_settings_use_trash, check_button_settings_use_trash,
check_button_settings_hide_hard_links, check_button_settings_hide_hard_links,
entry_settings_cache_file_minimal_size, entry_settings_cache_file_minimal_size,
entry_settings_prehash_cache_file_minimal_size,
check_button_duplicates_use_prehash_cache,
check_button_settings_show_preview_duplicates, check_button_settings_show_preview_duplicates,
check_button_settings_duplicates_delete_outdated_cache, check_button_settings_duplicates_delete_outdated_cache,
button_settings_duplicates_clear_cache, button_settings_duplicates_clear_cache,

View file

@ -33,7 +33,7 @@ pub const KEY_END: u32 = 110;
#[derive(Eq, PartialEq)] #[derive(Eq, PartialEq)]
pub enum PopoverTypes { pub enum PopoverTypes {
All, All,
ImageSize, Size,
Reverse, Reverse,
Custom, Custom,
Date, Date,

View file

@ -141,7 +141,7 @@ pub fn save_configuration(manual_execution: bool, upper_notebook: &GuiUpperNoteb
//// minimal cache file size //// minimal cache file size
data_to_save.push("--cache_minimal_file_size:".to_string()); data_to_save.push("--cache_minimal_file_size:".to_string());
let entry_settings_cache_file_minimal_size = settings.entry_settings_cache_file_minimal_size.clone(); let entry_settings_cache_file_minimal_size = settings.entry_settings_cache_file_minimal_size.clone();
data_to_save.push(entry_settings_cache_file_minimal_size.text().as_str().parse::<u64>().unwrap_or(2 * 1024 * 1024).to_string()); data_to_save.push(entry_settings_cache_file_minimal_size.text().as_str().parse::<u64>().unwrap_or(1024 * 1024 / 4).to_string());
//// Duplicates, delete outdated entries to trash //// Duplicates, delete outdated entries to trash
data_to_save.push("--delete_outdated_entries_duplicates:".to_string()); data_to_save.push("--delete_outdated_entries_duplicates:".to_string());
@ -157,6 +157,16 @@ pub fn save_configuration(manual_execution: bool, upper_notebook: &GuiUpperNoteb
data_to_save.push("--delete_outdated_entries_similar_videos:".to_string()); data_to_save.push("--delete_outdated_entries_similar_videos:".to_string());
let check_button_settings_similar_videos_delete_outdated_cache = settings.check_button_settings_similar_videos_delete_outdated_cache.clone(); let check_button_settings_similar_videos_delete_outdated_cache = settings.check_button_settings_similar_videos_delete_outdated_cache.clone();
data_to_save.push(check_button_settings_similar_videos_delete_outdated_cache.is_active().to_string()); data_to_save.push(check_button_settings_similar_videos_delete_outdated_cache.is_active().to_string());
//// Use prehash cache system
data_to_save.push("--use_prehash_cache:".to_string());
let check_button_duplicates_use_prehash_cache = settings.check_button_duplicates_use_prehash_cache.clone();
data_to_save.push(check_button_duplicates_use_prehash_cache.is_active().to_string());
//// minimal prehash cache file size
data_to_save.push("--cache_prehash_minimal_file_size:".to_string());
let entry_settings_prehash_cache_file_minimal_size = settings.entry_settings_prehash_cache_file_minimal_size.clone();
data_to_save.push(entry_settings_prehash_cache_file_minimal_size.text().as_str().parse::<u64>().unwrap_or(0).to_string());
} }
// Creating/Opening config file // Creating/Opening config file
@ -213,6 +223,8 @@ enum TypeOfLoadedData {
DeleteCacheDuplicates, DeleteCacheDuplicates,
DeleteCacheSimilarImages, DeleteCacheSimilarImages,
DeleteCacheSimilarVideos, DeleteCacheSimilarVideos,
UsePrehashCache,
CachePrehashMinimalSize,
} }
pub fn load_configuration(manual_execution: bool, upper_notebook: &GuiUpperNotebook, settings: &GuiSettings, text_view_errors: &TextView, scrolled_window_errors: &ScrolledWindow) { pub fn load_configuration(manual_execution: bool, upper_notebook: &GuiUpperNotebook, settings: &GuiSettings, text_view_errors: &TextView, scrolled_window_errors: &ScrolledWindow) {
@ -264,6 +276,8 @@ pub fn load_configuration(manual_execution: bool, upper_notebook: &GuiUpperNoteb
let mut delete_outdated_cache_dupliactes: bool = true; let mut delete_outdated_cache_dupliactes: bool = true;
let mut delete_outdated_cache_similar_images: bool = true; let mut delete_outdated_cache_similar_images: bool = true;
let mut delete_outdated_cache_similar_videos: bool = false; let mut delete_outdated_cache_similar_videos: bool = false;
let mut use_prehash_cache: bool = false;
let mut cache_prehash_minimal_size: u64 = 0;
let mut current_type = TypeOfLoadedData::None; let mut current_type = TypeOfLoadedData::None;
for (line_number, line) in loaded_data.replace("\r\n", "\n").split('\n').enumerate() { for (line_number, line) in loaded_data.replace("\r\n", "\n").split('\n').enumerate() {
@ -307,6 +321,10 @@ pub fn load_configuration(manual_execution: bool, upper_notebook: &GuiUpperNoteb
current_type = TypeOfLoadedData::DeleteCacheSimilarVideos; current_type = TypeOfLoadedData::DeleteCacheSimilarVideos;
} else if line.starts_with("--delete_outdated_entries_similar_images") { } else if line.starts_with("--delete_outdated_entries_similar_images") {
current_type = TypeOfLoadedData::DeleteCacheSimilarImages; current_type = TypeOfLoadedData::DeleteCacheSimilarImages;
} else if line.starts_with("--use_prehash_cache") {
current_type = TypeOfLoadedData::UsePrehashCache;
} else if line.starts_with("--cache_prehash_minimal_file_size") {
current_type = TypeOfLoadedData::CachePrehashMinimalSize;
} else if line.starts_with("--") { } else if line.starts_with("--") {
current_type = TypeOfLoadedData::None; current_type = TypeOfLoadedData::None;
add_text_to_text_view( add_text_to_text_view(
@ -512,6 +530,29 @@ pub fn load_configuration(manual_execution: bool, upper_notebook: &GuiUpperNoteb
); );
} }
} }
TypeOfLoadedData::UsePrehashCache => {
let line = line.to_lowercase();
if line == "1" || line == "true" {
use_prehash_cache = true;
} else if line == "0" || line == "false" {
use_prehash_cache = false;
} else {
add_text_to_text_view(
&text_view_errors,
format!("Found invalid data in line {} \"{}\" isn't proper value(0/1/true/false) when loading file {:?}", line_number, line, config_file).as_str(),
);
}
}
TypeOfLoadedData::CachePrehashMinimalSize => {
if let Ok(number) = line.parse::<u64>() {
cache_prehash_minimal_size = number;
} else {
add_text_to_text_view(
&text_view_errors,
format!("Found invalid data in line {} \"{}\" isn't proper value(u64) when loading file {:?}", line_number, line, config_file).as_str(),
);
}
}
} }
} }
} }
@ -566,8 +607,10 @@ pub fn load_configuration(manual_execution: bool, upper_notebook: &GuiUpperNoteb
} }
settings.check_button_settings_hide_hard_links.set_active(hide_hard_links); settings.check_button_settings_hide_hard_links.set_active(hide_hard_links);
settings.check_button_settings_use_cache.set_active(use_cache); settings.check_button_settings_use_cache.set_active(use_cache);
settings.check_button_duplicates_use_prehash_cache.set_active(use_prehash_cache);
settings.check_button_settings_use_trash.set_active(use_trash); settings.check_button_settings_use_trash.set_active(use_trash);
settings.entry_settings_cache_file_minimal_size.set_text(cache_minimal_size.to_string().as_str()); settings.entry_settings_cache_file_minimal_size.set_text(cache_minimal_size.to_string().as_str());
settings.entry_settings_prehash_cache_file_minimal_size.set_text(cache_prehash_minimal_size.to_string().as_str());
} else { } else {
settings.check_button_settings_load_at_start.set_active(false); settings.check_button_settings_load_at_start.set_active(false);
} }
@ -650,10 +693,12 @@ pub fn reset_configuration(manual_clearing: bool, upper_notebook: &GuiUpperNoteb
settings.check_button_settings_hide_hard_links.set_active(true); settings.check_button_settings_hide_hard_links.set_active(true);
settings.check_button_settings_use_cache.set_active(true); settings.check_button_settings_use_cache.set_active(true);
settings.check_button_settings_use_trash.set_active(false); settings.check_button_settings_use_trash.set_active(false);
settings.entry_settings_cache_file_minimal_size.set_text("524288"); settings.entry_settings_cache_file_minimal_size.set_text("257144");
settings.check_button_settings_similar_videos_delete_outdated_cache.set_active(false); settings.check_button_settings_similar_videos_delete_outdated_cache.set_active(false);
settings.check_button_settings_similar_images_delete_outdated_cache.set_active(true); settings.check_button_settings_similar_images_delete_outdated_cache.set_active(true);
settings.check_button_settings_duplicates_delete_outdated_cache.set_active(true); settings.check_button_settings_duplicates_delete_outdated_cache.set_active(true);
settings.check_button_duplicates_use_prehash_cache.set_active(false);
settings.entry_settings_prehash_cache_file_minimal_size.set_text("0");
} }
if manual_clearing { if manual_clearing {
add_text_to_text_view(&text_view_errors, "Current configuration was cleared."); add_text_to_text_view(&text_view_errors, "Current configuration was cleared.");

View file

@ -301,7 +301,7 @@ Author: Rafał Mikrut
<object class="GtkLabel"> <object class="GtkLabel">
<property name="visible">True</property> <property name="visible">True</property>
<property name="can-focus">False</property> <property name="can-focus">False</property>
<property name="label" translatable="yes">Minimal cached file size in bytes</property> <property name="label" translatable="yes">Minimal size of files in bytes saved to cache</property>
</object> </object>
<packing> <packing>
<property name="expand">True</property> <property name="expand">True</property>
@ -314,7 +314,7 @@ Author: Rafał Mikrut
<property name="visible">True</property> <property name="visible">True</property>
<property name="can-focus">True</property> <property name="can-focus">True</property>
<property name="max-length">15</property> <property name="max-length">15</property>
<property name="text" translatable="yes">524288</property> <property name="text" translatable="yes">257144</property>
<property name="caps-lock-warning">False</property> <property name="caps-lock-warning">False</property>
<property name="input-purpose">number</property> <property name="input-purpose">number</property>
</object> </object>
@ -332,6 +332,20 @@ Author: Rafał Mikrut
<property name="position">3</property> <property name="position">3</property>
</packing> </packing>
</child> </child>
<child>
<object class="GtkCheckButton" id="check_button_duplicates_use_prehash_cache">
<property name="label" translatable="yes">Use prehash cache</property>
<property name="visible">True</property>
<property name="can-focus">True</property>
<property name="receives-default">False</property>
<property name="draw-indicator">True</property>
</object>
<packing>
<property name="expand">False</property>
<property name="fill">True</property>
<property name="position">4</property>
</packing>
</child>
<child> <child>
<object class="GtkButton" id="button_settings_duplicates_clear_cache"> <object class="GtkButton" id="button_settings_duplicates_clear_cache">
<property name="label" translatable="yes">Remove outdated results from duplicates cache</property> <property name="label" translatable="yes">Remove outdated results from duplicates cache</property>
@ -343,7 +357,50 @@ Author: Rafał Mikrut
<property name="expand">False</property> <property name="expand">False</property>
<property name="fill">False</property> <property name="fill">False</property>
<property name="pack-type">end</property> <property name="pack-type">end</property>
<property name="position">4</property> <property name="position">5</property>
</packing>
</child>
<child>
<object class="GtkBox">
<property name="visible">True</property>
<property name="can-focus">False</property>
<property name="margin-left">4</property>
<property name="margin-right">4</property>
<property name="margin-start">4</property>
<property name="margin-end">4</property>
<child>
<object class="GtkLabel">
<property name="visible">True</property>
<property name="can-focus">False</property>
<property name="label" translatable="yes">Minimal size of files in bytes saved to prehash cache</property>
</object>
<packing>
<property name="expand">True</property>
<property name="fill">True</property>
<property name="position">0</property>
</packing>
</child>
<child>
<object class="GtkEntry" id="entry_settings_prehash_cache_file_minimal_size">
<property name="visible">True</property>
<property name="can-focus">True</property>
<property name="max-length">15</property>
<property name="text" translatable="yes">1</property>
<property name="caps-lock-warning">False</property>
<property name="input-purpose">number</property>
</object>
<packing>
<property name="expand">False</property>
<property name="fill">False</property>
<property name="pack-type">end</property>
<property name="position">1</property>
</packing>
</child>
</object>
<packing>
<property name="expand">False</property>
<property name="fill">True</property>
<property name="position">6</property>
</packing> </packing>
</child> </child>
</object> </object>