Add cache for duplicate (#205)
This commit is contained in:
parent
6e89bcb507
commit
7ec665ab7a
4 changed files with 311 additions and 74 deletions
|
@ -6,13 +6,11 @@
|
||||||
- Written in memory safe Rust
|
- Written in memory safe Rust
|
||||||
- Amazingly fast - due using more or less advanced algorithms and multithreading support
|
- Amazingly fast - due using more or less advanced algorithms and multithreading support
|
||||||
- Free, Open Source without ads
|
- Free, Open Source without ads
|
||||||
- Multiplatform - works on Linux, Windows and macOS
|
- Multiplatform - works on Linux, Windows and macOS
|
||||||
|
- Cache support - second and further scans should be a lot of faster than first
|
||||||
- CLI frontend, very fast to automate tasks
|
- CLI frontend, very fast to automate tasks
|
||||||
- GUI GTK frontend - uses modern GTK 3 and looks similar to FSlint
|
- GUI frontend - uses modern GTK 3 and looks similar to FSlint
|
||||||
- Light/Dark theme match the appearance of the system(Linux only)
|
|
||||||
- Saving results to a file - allows reading entries found by the tool easily
|
|
||||||
- Rich search option - allows setting absolute included and excluded directories, set of allowed file extensions or excluded items with * wildcard
|
- Rich search option - allows setting absolute included and excluded directories, set of allowed file extensions or excluded items with * wildcard
|
||||||
- Image previews to get quick view at the compared photos
|
|
||||||
- Multiple tools to use:
|
- Multiple tools to use:
|
||||||
- Duplicates - Finds duplicates basing on file name, size, hash, first 1 MB of hash
|
- Duplicates - Finds duplicates basing on file name, size, hash, first 1 MB of hash
|
||||||
- Empty Folders - Finds empty folders with the help of advanced algorithm
|
- Empty Folders - Finds empty folders with the help of advanced algorithm
|
||||||
|
@ -228,6 +226,7 @@ So still is a big room for improvements.
|
||||||
| Non stripped binaries | | X | |
|
| Non stripped binaries | | X | |
|
||||||
| Redundant whitespace | | X | |
|
| Redundant whitespace | | X | |
|
||||||
| Multiple languages(po) | | X | X |
|
| Multiple languages(po) | | X | X |
|
||||||
|
| Cache support | X | | X |
|
||||||
| Project Activity | High | Very Low | High |
|
| Project Activity | High | Very Low | High |
|
||||||
|
|
||||||
## Contributions
|
## Contributions
|
||||||
|
|
|
@ -1,9 +1,9 @@
|
||||||
use crossbeam_channel::Receiver;
|
use crossbeam_channel::Receiver;
|
||||||
use humansize::{file_size_opts as options, FileSize};
|
use humansize::{file_size_opts as options, FileSize};
|
||||||
use std::collections::{BTreeMap, HashMap};
|
use std::collections::{BTreeMap, HashMap};
|
||||||
use std::fs::{File, Metadata};
|
use std::fs::{File, Metadata, OpenOptions};
|
||||||
use std::io::prelude::*;
|
use std::io::prelude::*;
|
||||||
use std::path::PathBuf;
|
use std::path::{Path, PathBuf};
|
||||||
use std::time::{Duration, SystemTime, UNIX_EPOCH};
|
use std::time::{Duration, SystemTime, UNIX_EPOCH};
|
||||||
use std::{fs, thread};
|
use std::{fs, thread};
|
||||||
|
|
||||||
|
@ -13,14 +13,17 @@ use crate::common_extensions::Extensions;
|
||||||
use crate::common_items::ExcludedItems;
|
use crate::common_items::ExcludedItems;
|
||||||
use crate::common_messages::Messages;
|
use crate::common_messages::Messages;
|
||||||
use crate::common_traits::*;
|
use crate::common_traits::*;
|
||||||
|
use directories_next::ProjectDirs;
|
||||||
use rayon::prelude::*;
|
use rayon::prelude::*;
|
||||||
use std::io::BufWriter;
|
use std::io::{BufReader, BufWriter};
|
||||||
use std::sync::atomic::{AtomicBool, AtomicUsize, Ordering};
|
use std::sync::atomic::{AtomicBool, AtomicUsize, Ordering};
|
||||||
use std::sync::Arc;
|
use std::sync::Arc;
|
||||||
use std::thread::sleep;
|
use std::thread::sleep;
|
||||||
|
|
||||||
const HASH_MB_LIMIT_BYTES: u64 = 1024 * 1024; // 1MB
|
const HASH_MB_LIMIT_BYTES: u64 = 1024 * 1024; // 1MB
|
||||||
|
|
||||||
|
const CACHE_FILE_NAME: &str = "cache_duplicates.txt";
|
||||||
|
|
||||||
#[derive(Debug)]
|
#[derive(Debug)]
|
||||||
pub struct ProgressData {
|
pub struct ProgressData {
|
||||||
pub checking_method: CheckingMethod,
|
pub checking_method: CheckingMethod,
|
||||||
|
@ -39,7 +42,7 @@ pub enum CheckingMethod {
|
||||||
HashMB,
|
HashMB,
|
||||||
}
|
}
|
||||||
|
|
||||||
#[derive(PartialEq, Eq, Clone, Debug)]
|
#[derive(PartialEq, Eq, Clone, Debug, Copy)]
|
||||||
pub enum HashType {
|
pub enum HashType {
|
||||||
Blake3,
|
Blake3,
|
||||||
}
|
}
|
||||||
|
@ -58,6 +61,7 @@ pub struct FileEntry {
|
||||||
pub path: PathBuf,
|
pub path: PathBuf,
|
||||||
pub size: u64,
|
pub size: u64,
|
||||||
pub modified_date: u64,
|
pub modified_date: u64,
|
||||||
|
pub hash: String,
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Info struck with helpful information's about results
|
/// Info struck with helpful information's about results
|
||||||
|
@ -349,6 +353,7 @@ impl DuplicateFinder {
|
||||||
continue 'dir;
|
continue 'dir;
|
||||||
} // Permissions Denied
|
} // Permissions Denied
|
||||||
},
|
},
|
||||||
|
hash: "".to_string(),
|
||||||
};
|
};
|
||||||
|
|
||||||
// Adding files to BTreeMap
|
// Adding files to BTreeMap
|
||||||
|
@ -520,6 +525,7 @@ impl DuplicateFinder {
|
||||||
continue 'dir;
|
continue 'dir;
|
||||||
} // Permissions Denied
|
} // Permissions Denied
|
||||||
},
|
},
|
||||||
|
hash: "".to_string(),
|
||||||
};
|
};
|
||||||
|
|
||||||
// Adding files to BTreeMap
|
// Adding files to BTreeMap
|
||||||
|
@ -631,8 +637,8 @@ impl DuplicateFinder {
|
||||||
hasher.update(&buffer[..n]);
|
hasher.update(&buffer[..n]);
|
||||||
|
|
||||||
let hash_string: String = hasher.finalize().to_hex().to_string();
|
let hash_string: String = hasher.finalize().to_hex().to_string();
|
||||||
hashmap_with_hash.entry(hash_string.to_string()).or_insert_with(Vec::new);
|
hashmap_with_hash.entry(hash_string.clone()).or_insert_with(Vec::new);
|
||||||
hashmap_with_hash.get_mut(hash_string.as_str()).unwrap().push(file_entry.to_owned());
|
hashmap_with_hash.get_mut(hash_string.as_str()).unwrap().push(file_entry.clone());
|
||||||
}
|
}
|
||||||
Some((*size, hashmap_with_hash, errors, bytes_read))
|
Some((*size, hashmap_with_hash, errors, bytes_read))
|
||||||
})
|
})
|
||||||
|
@ -700,60 +706,191 @@ impl DuplicateFinder {
|
||||||
//// PROGRESS THREAD END
|
//// PROGRESS THREAD END
|
||||||
|
|
||||||
#[allow(clippy::type_complexity)]
|
#[allow(clippy::type_complexity)]
|
||||||
let full_hash_results: Vec<(u64, HashMap<String, Vec<FileEntry>>, Vec<String>, u64)> = pre_checked_map
|
let mut full_hash_results: Vec<(u64, HashMap<String, Vec<FileEntry>>, Vec<String>, u64)>;
|
||||||
.par_iter()
|
|
||||||
.map(|(size, vec_file_entry)| {
|
|
||||||
let mut hashmap_with_hash: HashMap<String, Vec<FileEntry>> = Default::default();
|
|
||||||
let mut errors: Vec<String> = Vec::new();
|
|
||||||
let mut file_handler: File;
|
|
||||||
let mut bytes_read: u64 = 0;
|
|
||||||
atomic_file_counter.fetch_add(vec_file_entry.len(), Ordering::Relaxed);
|
|
||||||
'fe: for file_entry in vec_file_entry {
|
|
||||||
if stop_receiver.is_some() && stop_receiver.unwrap().try_recv().is_ok() {
|
|
||||||
check_was_breaked.store(true, Ordering::Relaxed);
|
|
||||||
return None;
|
|
||||||
}
|
|
||||||
file_handler = match File::open(&file_entry.path) {
|
|
||||||
Ok(t) => t,
|
|
||||||
Err(_) => {
|
|
||||||
errors.push(format!("Unable to check hash of file {}", file_entry.path.display()));
|
|
||||||
continue 'fe;
|
|
||||||
}
|
|
||||||
};
|
|
||||||
|
|
||||||
let mut hasher: blake3::Hasher = blake3::Hasher::new();
|
match self.check_method {
|
||||||
let mut buffer = [0u8; 1024 * 32];
|
CheckingMethod::HashMB => {
|
||||||
let mut current_file_read_bytes: u64 = 0;
|
full_hash_results = pre_checked_map
|
||||||
|
.par_iter()
|
||||||
loop {
|
.map(|(size, vec_file_entry)| {
|
||||||
let n = match file_handler.read(&mut buffer) {
|
let mut hashmap_with_hash: HashMap<String, Vec<FileEntry>> = Default::default();
|
||||||
Ok(t) => t,
|
let mut errors: Vec<String> = Vec::new();
|
||||||
Err(_) => {
|
let mut file_handler: File;
|
||||||
errors.push(format!("Error happened when checking hash of file {}", file_entry.path.display()));
|
let mut bytes_read: u64 = 0;
|
||||||
continue 'fe;
|
atomic_file_counter.fetch_add(vec_file_entry.len(), Ordering::Relaxed);
|
||||||
|
'fe: for file_entry in vec_file_entry {
|
||||||
|
if stop_receiver.is_some() && stop_receiver.unwrap().try_recv().is_ok() {
|
||||||
|
check_was_breaked.store(true, Ordering::Relaxed);
|
||||||
|
return None;
|
||||||
}
|
}
|
||||||
};
|
file_handler = match File::open(&file_entry.path) {
|
||||||
if n == 0 {
|
Ok(t) => t,
|
||||||
break;
|
Err(_) => {
|
||||||
|
errors.push(format!("Unable to check hash of file {}", file_entry.path.display()));
|
||||||
|
continue 'fe;
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
let mut hasher: blake3::Hasher = blake3::Hasher::new();
|
||||||
|
let mut buffer = [0u8; 1024 * 128];
|
||||||
|
let mut current_file_read_bytes: u64 = 0;
|
||||||
|
|
||||||
|
loop {
|
||||||
|
let n = match file_handler.read(&mut buffer) {
|
||||||
|
Ok(t) => t,
|
||||||
|
Err(_) => {
|
||||||
|
errors.push(format!("Error happened when checking hash of file {}", file_entry.path.display()));
|
||||||
|
continue 'fe;
|
||||||
|
}
|
||||||
|
};
|
||||||
|
if n == 0 {
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
|
||||||
|
current_file_read_bytes += n as u64;
|
||||||
|
bytes_read += n as u64;
|
||||||
|
hasher.update(&buffer[..n]);
|
||||||
|
|
||||||
|
if current_file_read_bytes >= HASH_MB_LIMIT_BYTES {
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
let hash_string: String = hasher.finalize().to_hex().to_string();
|
||||||
|
hashmap_with_hash.entry(hash_string.to_string()).or_insert_with(Vec::new);
|
||||||
|
hashmap_with_hash.get_mut(hash_string.as_str()).unwrap().push(file_entry.to_owned());
|
||||||
}
|
}
|
||||||
|
Some((*size, hashmap_with_hash, errors, bytes_read))
|
||||||
|
})
|
||||||
|
.while_some()
|
||||||
|
.collect();
|
||||||
|
}
|
||||||
|
CheckingMethod::Hash => {
|
||||||
|
let loaded_hash_map = match load_hashes_from_file(&mut self.text_messages, &self.hash_type) {
|
||||||
|
Some(t) => t,
|
||||||
|
None => Default::default(),
|
||||||
|
};
|
||||||
|
|
||||||
current_file_read_bytes += n as u64;
|
let mut records_already_cached: HashMap<u64, Vec<FileEntry>> = Default::default();
|
||||||
bytes_read += n as u64;
|
let mut non_cached_files_to_check: HashMap<u64, Vec<FileEntry>> = Default::default();
|
||||||
hasher.update(&buffer[..n]);
|
for (size, vec_file_entry) in pre_checked_map {
|
||||||
|
#[allow(clippy::collapsible_if)]
|
||||||
|
if !loaded_hash_map.contains_key(&size) {
|
||||||
|
// If loaded data doesn't contains current info
|
||||||
|
non_cached_files_to_check.insert(size, vec_file_entry);
|
||||||
|
} else {
|
||||||
|
let loaded_vec_file_entry = loaded_hash_map.get(&size).unwrap();
|
||||||
|
|
||||||
if self.check_method == CheckingMethod::HashMB && current_file_read_bytes >= HASH_MB_LIMIT_BYTES {
|
for file_entry in vec_file_entry {
|
||||||
break;
|
let mut found: bool = false;
|
||||||
|
for loaded_file_entry in loaded_vec_file_entry {
|
||||||
|
if file_entry.path == loaded_file_entry.path && file_entry.modified_date == loaded_file_entry.modified_date {
|
||||||
|
records_already_cached.entry(file_entry.size).or_insert_with(Vec::new);
|
||||||
|
records_already_cached.get_mut(&file_entry.size).unwrap().push(loaded_file_entry.clone());
|
||||||
|
found = true;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if !found {
|
||||||
|
non_cached_files_to_check.entry(file_entry.size).or_insert_with(Vec::new);
|
||||||
|
non_cached_files_to_check.get_mut(&file_entry.size).unwrap().push(file_entry);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
let hash_string: String = hasher.finalize().to_hex().to_string();
|
|
||||||
hashmap_with_hash.entry(hash_string.to_string()).or_insert_with(Vec::new);
|
|
||||||
hashmap_with_hash.get_mut(hash_string.as_str()).unwrap().push(file_entry.to_owned());
|
|
||||||
}
|
}
|
||||||
Some((*size, hashmap_with_hash, errors, bytes_read))
|
|
||||||
})
|
full_hash_results = non_cached_files_to_check
|
||||||
.while_some()
|
.par_iter()
|
||||||
.collect();
|
.map(|(size, vec_file_entry)| {
|
||||||
|
let mut hashmap_with_hash: HashMap<String, Vec<FileEntry>> = Default::default();
|
||||||
|
let mut errors: Vec<String> = Vec::new();
|
||||||
|
let mut file_handler: File;
|
||||||
|
let mut bytes_read: u64 = 0;
|
||||||
|
atomic_file_counter.fetch_add(vec_file_entry.len(), Ordering::Relaxed);
|
||||||
|
'fe: for file_entry in vec_file_entry {
|
||||||
|
if stop_receiver.is_some() && stop_receiver.unwrap().try_recv().is_ok() {
|
||||||
|
check_was_breaked.store(true, Ordering::Relaxed);
|
||||||
|
return None;
|
||||||
|
}
|
||||||
|
file_handler = match File::open(&file_entry.path) {
|
||||||
|
Ok(t) => t,
|
||||||
|
Err(_) => {
|
||||||
|
errors.push(format!("Unable to check hash of file {}", file_entry.path.display()));
|
||||||
|
continue 'fe;
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
let mut hasher: blake3::Hasher = blake3::Hasher::new();
|
||||||
|
let mut buffer = [0u8; 1024 * 128];
|
||||||
|
|
||||||
|
loop {
|
||||||
|
let n = match file_handler.read(&mut buffer) {
|
||||||
|
Ok(t) => t,
|
||||||
|
Err(_) => {
|
||||||
|
errors.push(format!("Error happened when checking hash of file {}", file_entry.path.display()));
|
||||||
|
continue 'fe;
|
||||||
|
}
|
||||||
|
};
|
||||||
|
if n == 0 {
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
|
||||||
|
bytes_read += n as u64;
|
||||||
|
hasher.update(&buffer[..n]);
|
||||||
|
}
|
||||||
|
|
||||||
|
let hash_string: String = hasher.finalize().to_hex().to_string();
|
||||||
|
let mut file_entry = file_entry.clone();
|
||||||
|
file_entry.hash = hash_string.clone();
|
||||||
|
hashmap_with_hash.entry(hash_string.clone()).or_insert_with(Vec::new);
|
||||||
|
hashmap_with_hash.get_mut(hash_string.as_str()).unwrap().push(file_entry);
|
||||||
|
}
|
||||||
|
Some((*size, hashmap_with_hash, errors, bytes_read))
|
||||||
|
})
|
||||||
|
.while_some()
|
||||||
|
.collect();
|
||||||
|
|
||||||
|
// Size, Vec
|
||||||
|
|
||||||
|
'main: for (size, vec_file_entry) in records_already_cached {
|
||||||
|
// Check if size already exists, if exists we must to change it outside because cannot have mut and non mut reference to full_hash_results
|
||||||
|
for (full_size, full_hashmap, _errors, _bytes_read) in &mut full_hash_results {
|
||||||
|
if size == *full_size {
|
||||||
|
for file_entry in vec_file_entry {
|
||||||
|
full_hashmap.entry(file_entry.hash.clone()).or_insert_with(Vec::new);
|
||||||
|
full_hashmap.get_mut(&file_entry.hash).unwrap().push(file_entry);
|
||||||
|
}
|
||||||
|
continue 'main;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
// Size doesn't exists add results to files
|
||||||
|
let mut temp_hashmap: HashMap<String, Vec<FileEntry>> = Default::default();
|
||||||
|
for file_entry in vec_file_entry {
|
||||||
|
temp_hashmap.entry(file_entry.hash.clone()).or_insert_with(Vec::new);
|
||||||
|
temp_hashmap.get_mut(&file_entry.hash).unwrap().push(file_entry);
|
||||||
|
}
|
||||||
|
full_hash_results.push((size, temp_hashmap, Vec::new(), 0));
|
||||||
|
}
|
||||||
|
|
||||||
|
// Must save all results to file, old loaded from file with all currently counted results
|
||||||
|
let mut all_results: HashMap<String, FileEntry> = Default::default();
|
||||||
|
for (_size, vec_file_entry) in loaded_hash_map {
|
||||||
|
for file_entry in vec_file_entry {
|
||||||
|
all_results.insert(file_entry.path.to_string_lossy().to_string(), file_entry);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
for (_size, hashmap, _errors, _bytes_read) in &full_hash_results {
|
||||||
|
for vec_file_entry in hashmap.values() {
|
||||||
|
for file_entry in vec_file_entry {
|
||||||
|
all_results.insert(file_entry.path.to_string_lossy().to_string(), file_entry.clone());
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
save_hashes_to_file(&all_results, &mut self.text_messages, &self.hash_type);
|
||||||
|
}
|
||||||
|
_ => panic!("What"),
|
||||||
|
}
|
||||||
|
|
||||||
// End thread which send info to gui
|
// End thread which send info to gui
|
||||||
progress_thread_run.store(false, Ordering::Relaxed);
|
progress_thread_run.store(false, Ordering::Relaxed);
|
||||||
|
@ -1169,3 +1306,104 @@ fn delete_files(vector: &[FileEntry], delete_method: &DeleteMethod, warnings: &m
|
||||||
};
|
};
|
||||||
(gained_space, removed_files, failed_to_remove_files)
|
(gained_space, removed_files, failed_to_remove_files)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
fn save_hashes_to_file(hashmap: &HashMap<String, FileEntry>, text_messages: &mut Messages, type_of_hash: &HashType) {
|
||||||
|
println!("Trying to save {} files", hashmap.len());
|
||||||
|
if let Some(proj_dirs) = ProjectDirs::from("pl", "Qarmin", "Czkawka") {
|
||||||
|
let cache_dir = PathBuf::from(proj_dirs.cache_dir());
|
||||||
|
if cache_dir.exists() {
|
||||||
|
if !cache_dir.is_dir() {
|
||||||
|
text_messages.messages.push(format!("Config dir {} is a file!", cache_dir.display()));
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
} else if fs::create_dir_all(&cache_dir).is_err() {
|
||||||
|
text_messages.messages.push(format!("Cannot create config dir {}", cache_dir.display()));
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
let cache_file = cache_dir.join(CACHE_FILE_NAME.replace(".", format!("_{:?}.", type_of_hash).as_str()));
|
||||||
|
let file_handler = match OpenOptions::new().truncate(true).write(true).create(true).open(&cache_file) {
|
||||||
|
Ok(t) => t,
|
||||||
|
Err(_) => {
|
||||||
|
text_messages.messages.push(format!("Cannot create or open cache file {}", cache_file.display()));
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
};
|
||||||
|
let mut writer = BufWriter::new(file_handler);
|
||||||
|
|
||||||
|
for file_entry in hashmap.values() {
|
||||||
|
// Only cache bigger than 5MB files
|
||||||
|
if file_entry.size > 5 * 1024 * 1024 {
|
||||||
|
let string: String = format!("{}//{}//{}//{}", file_entry.path.display(), file_entry.size, file_entry.modified_date, file_entry.hash);
|
||||||
|
|
||||||
|
if writeln!(writer, "{}", string).is_err() {
|
||||||
|
text_messages.messages.push(format!("Failed to save some data to cache file {}", cache_file.display()));
|
||||||
|
return;
|
||||||
|
};
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
fn load_hashes_from_file(text_messages: &mut Messages, type_of_hash: &HashType) -> Option<BTreeMap<u64, Vec<FileEntry>>> {
|
||||||
|
if let Some(proj_dirs) = ProjectDirs::from("pl", "Qarmin", "Czkawka") {
|
||||||
|
let cache_dir = PathBuf::from(proj_dirs.cache_dir());
|
||||||
|
let cache_file = cache_dir.join(CACHE_FILE_NAME.replace(".", format!("_{:?}.", type_of_hash).as_str()));
|
||||||
|
let file_handler = match OpenOptions::new().read(true).open(&cache_file) {
|
||||||
|
Ok(t) => t,
|
||||||
|
Err(_) => {
|
||||||
|
// text_messages.messages.push(format!("Cannot find or open cache file {}", cache_file.display())); // This shouldn't be write to output
|
||||||
|
return None;
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
let reader = BufReader::new(file_handler);
|
||||||
|
|
||||||
|
let mut hashmap_loaded_entries: BTreeMap<u64, Vec<FileEntry>> = Default::default();
|
||||||
|
|
||||||
|
// Read the file line by line using the lines() iterator from std::io::BufRead.
|
||||||
|
for (index, line) in reader.lines().enumerate() {
|
||||||
|
let line = match line {
|
||||||
|
Ok(t) => t,
|
||||||
|
Err(_) => {
|
||||||
|
text_messages.warnings.push(format!("Failed to load line number {} from cache file {}", index + 1, cache_file.display()));
|
||||||
|
return None;
|
||||||
|
}
|
||||||
|
};
|
||||||
|
let uuu = line.split("//").collect::<Vec<&str>>();
|
||||||
|
if uuu.len() != 4 {
|
||||||
|
text_messages
|
||||||
|
.warnings
|
||||||
|
.push(format!("Found invalid data(too much or too low amount of data) in line {} - ({}) in cache file {}", index + 1, line, cache_file.display()));
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
// Don't load cache data if destination file not exists
|
||||||
|
if Path::new(uuu[0]).exists() {
|
||||||
|
let file_entry = FileEntry {
|
||||||
|
path: PathBuf::from(uuu[0]),
|
||||||
|
size: match uuu[1].parse::<u64>() {
|
||||||
|
Ok(t) => t,
|
||||||
|
Err(_) => {
|
||||||
|
text_messages.warnings.push(format!("Found invalid size value in line {} - ({}) in cache file {}", index + 1, line, cache_file.display()));
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
},
|
||||||
|
modified_date: match uuu[2].parse::<u64>() {
|
||||||
|
Ok(t) => t,
|
||||||
|
Err(_) => {
|
||||||
|
text_messages.warnings.push(format!("Found invalid modified date value in line {} - ({}) in cache file {}", index + 1, line, cache_file.display()));
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
},
|
||||||
|
hash: uuu[3].to_string(),
|
||||||
|
};
|
||||||
|
hashmap_loaded_entries.entry(file_entry.size).or_insert_with(Vec::new);
|
||||||
|
hashmap_loaded_entries.get_mut(&file_entry.size).unwrap().push(file_entry);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return Some(hashmap_loaded_entries);
|
||||||
|
}
|
||||||
|
|
||||||
|
text_messages.messages.push("Cannot find or open system config dir to save cache file".to_string());
|
||||||
|
None
|
||||||
|
}
|
||||||
|
|
|
@ -326,20 +326,20 @@ impl SimilarImages {
|
||||||
None => Default::default(),
|
None => Default::default(),
|
||||||
};
|
};
|
||||||
|
|
||||||
let mut hashes_already_counted: HashMap<String, FileEntry> = Default::default();
|
let mut records_already_cached: HashMap<String, FileEntry> = Default::default();
|
||||||
let mut hashes_to_check: HashMap<String, FileEntry> = Default::default();
|
let mut non_cached_files_to_check: HashMap<String, FileEntry> = Default::default();
|
||||||
for (name, file_entry) in &self.images_to_check {
|
for (name, file_entry) in &self.images_to_check {
|
||||||
#[allow(clippy::collapsible_if)]
|
#[allow(clippy::collapsible_if)]
|
||||||
if !loaded_hash_map.contains_key(name) {
|
if !loaded_hash_map.contains_key(name) {
|
||||||
// If loaded data doesn't contains current image info
|
// If loaded data doesn't contains current image info
|
||||||
hashes_to_check.insert(name.clone(), file_entry.clone());
|
non_cached_files_to_check.insert(name.clone(), file_entry.clone());
|
||||||
} else {
|
} else {
|
||||||
if file_entry.size != loaded_hash_map.get(name).unwrap().size || file_entry.modified_date != loaded_hash_map.get(name).unwrap().modified_date {
|
if file_entry.size != loaded_hash_map.get(name).unwrap().size || file_entry.modified_date != loaded_hash_map.get(name).unwrap().modified_date {
|
||||||
// When size or modification date of image changed, then it is clear that is different image
|
// When size or modification date of image changed, then it is clear that is different image
|
||||||
hashes_to_check.insert(name.clone(), file_entry.clone());
|
non_cached_files_to_check.insert(name.clone(), file_entry.clone());
|
||||||
} else {
|
} else {
|
||||||
// Checking may be omitted when already there is entry with same size and modification date
|
// Checking may be omitted when already there is entry with same size and modification date
|
||||||
hashes_already_counted.insert(name.clone(), loaded_hash_map.get(name).unwrap().clone());
|
records_already_cached.insert(name.clone(), loaded_hash_map.get(name).unwrap().clone());
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -358,7 +358,7 @@ impl SimilarImages {
|
||||||
let mut progress_send = progress_sender.clone();
|
let mut progress_send = progress_sender.clone();
|
||||||
let progress_thread_run = progress_thread_run.clone();
|
let progress_thread_run = progress_thread_run.clone();
|
||||||
let atomic_file_counter = atomic_file_counter.clone();
|
let atomic_file_counter = atomic_file_counter.clone();
|
||||||
let images_to_check = hashes_to_check.len();
|
let images_to_check = non_cached_files_to_check.len();
|
||||||
progress_thread_handle = thread::spawn(move || loop {
|
progress_thread_handle = thread::spawn(move || loop {
|
||||||
progress_send
|
progress_send
|
||||||
.try_send(ProgressData {
|
.try_send(ProgressData {
|
||||||
|
@ -377,7 +377,7 @@ impl SimilarImages {
|
||||||
progress_thread_handle = thread::spawn(|| {});
|
progress_thread_handle = thread::spawn(|| {});
|
||||||
}
|
}
|
||||||
//// PROGRESS THREAD END
|
//// PROGRESS THREAD END
|
||||||
let mut vec_file_entry: Vec<(FileEntry, Node)> = hashes_to_check
|
let mut vec_file_entry: Vec<(FileEntry, Node)> = non_cached_files_to_check
|
||||||
.par_iter()
|
.par_iter()
|
||||||
.map(|file_entry| {
|
.map(|file_entry| {
|
||||||
atomic_file_counter.fetch_add(1, Ordering::Relaxed);
|
atomic_file_counter.fetch_add(1, Ordering::Relaxed);
|
||||||
|
@ -416,7 +416,7 @@ impl SimilarImages {
|
||||||
let hash_map_modification = SystemTime::now();
|
let hash_map_modification = SystemTime::now();
|
||||||
|
|
||||||
// Just connect loaded results with already calculated hashes
|
// Just connect loaded results with already calculated hashes
|
||||||
for (_name, file_entry) in hashes_already_counted {
|
for (_name, file_entry) in records_already_cached {
|
||||||
vec_file_entry.push((file_entry.clone(), file_entry.hash));
|
vec_file_entry.push((file_entry.clone(), file_entry.hash));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -457,15 +457,15 @@ impl SimilarImages {
|
||||||
// Maybe also add here progress report
|
// Maybe also add here progress report
|
||||||
|
|
||||||
let mut new_vector: Vec<Vec<FileEntry>> = Vec::new();
|
let mut new_vector: Vec<Vec<FileEntry>> = Vec::new();
|
||||||
let mut hashes_to_check = self.image_hashes.clone();
|
let mut non_cached_files_to_check = self.image_hashes.clone();
|
||||||
for (hash, vec_file_entry) in &self.image_hashes {
|
for (hash, vec_file_entry) in &self.image_hashes {
|
||||||
if stop_receiver.is_some() && stop_receiver.unwrap().try_recv().is_ok() {
|
if stop_receiver.is_some() && stop_receiver.unwrap().try_recv().is_ok() {
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
if !hashes_to_check.contains_key(hash) {
|
if !non_cached_files_to_check.contains_key(hash) {
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
hashes_to_check.remove(hash);
|
non_cached_files_to_check.remove(hash);
|
||||||
|
|
||||||
let vector_with_found_similar_hashes = self.bktree.find(hash, similarity).collect::<Vec<_>>();
|
let vector_with_found_similar_hashes = self.bktree.find(hash, similarity).collect::<Vec<_>>();
|
||||||
if vector_with_found_similar_hashes.len() == 1 && vec_file_entry.len() == 1 {
|
if vector_with_found_similar_hashes.len() == 1 && vec_file_entry.len() == 1 {
|
||||||
|
@ -493,7 +493,7 @@ impl SimilarImages {
|
||||||
panic!("I'm not sure if same hash can have distance > 0");
|
panic!("I'm not sure if same hash can have distance > 0");
|
||||||
}
|
}
|
||||||
|
|
||||||
if let Some(vec_file_entry) = hashes_to_check.get(*similar_hash) {
|
if let Some(vec_file_entry) = non_cached_files_to_check.get(*similar_hash) {
|
||||||
vector_of_similar_images.append(
|
vector_of_similar_images.append(
|
||||||
&mut (vec_file_entry
|
&mut (vec_file_entry
|
||||||
.iter()
|
.iter()
|
||||||
|
@ -515,7 +515,7 @@ impl SimilarImages {
|
||||||
})
|
})
|
||||||
.collect::<Vec<_>>()),
|
.collect::<Vec<_>>()),
|
||||||
);
|
);
|
||||||
hashes_to_check.remove(*similar_hash);
|
non_cached_files_to_check.remove(*similar_hash);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
if vector_of_similar_images.len() > 1 {
|
if vector_of_similar_images.len() > 1 {
|
||||||
|
|
|
@ -137,17 +137,17 @@ Only some image extensions are supported, because I rely on image crate. Also so
|
||||||
## Config/Cache files
|
## Config/Cache files
|
||||||
For now Czkawka store only 2 files on disk:
|
For now Czkawka store only 2 files on disk:
|
||||||
- `czkawka_gui_config.txt` - stores configuration of GUI which may be loaded at startup
|
- `czkawka_gui_config.txt` - stores configuration of GUI which may be loaded at startup
|
||||||
- `cache_similar_image.txt` - stores cache data and hashes which may be used later without needing to compute image hash again - DO NOT TRY TO EDIT THIS FILE MANUALLY! - editing this file may cause app crashes.
|
- `cache_similar_image.txt` - stores cache data and hashes which may be used later without needing to compute image hash again - editing this file may cause app crashes.
|
||||||
- `cache_broken_files.txt` - stores cache data of broken files
|
- `cache_broken_files.txt` - stores cache data of broken files
|
||||||
|
- `cache_duplicates_Blake3.txt` - stores cache data of duplicated files, to not get too big performance hit when saving/loading file, only already fully hashed files bigger than 5MB are stored. Similar files with replaced `Blake3` to e.g. `SHA256` may be shown, when support for new hashes will be introduced in Czkawka.
|
||||||
|
|
||||||
|
Config files are located in this path
|
||||||
First file is located in this path
|
|
||||||
|
|
||||||
Linux - `/home/username/.config/czkawka`
|
Linux - `/home/username/.config/czkawka`
|
||||||
Mac - `/Users/username/Library/Application Support/pl.Qarmin.Czkawka`
|
Mac - `/Users/username/Library/Application Support/pl.Qarmin.Czkawka`
|
||||||
Windows - `C:\Users\Username\AppData\Roaming\Qarmin\Czkawka\config`
|
Windows - `C:\Users\Username\AppData\Roaming\Qarmin\Czkawka\config`
|
||||||
|
|
||||||
Second with cache here:
|
Cache should be here:
|
||||||
|
|
||||||
Linux - `/home/username/.cache/czkawka`
|
Linux - `/home/username/.cache/czkawka`
|
||||||
Mac - `/Users/Username/Library/Caches/pl.Qarmin.Czkawka`
|
Mac - `/Users/Username/Library/Caches/pl.Qarmin.Czkawka`
|
||||||
|
|
Loading…
Reference in a new issue