Adds support for multiple hashes type and size in similar images, resize formats and also test to check performacne (#447)
This commit is contained in:
parent
742139379c
commit
20c89f44f0
1
Cargo.lock
generated
1
Cargo.lock
generated
|
@ -461,6 +461,7 @@ name = "czkawka_cli"
|
|||
version = "3.2.0"
|
||||
dependencies = [
|
||||
"czkawka_core",
|
||||
"img_hash",
|
||||
"structopt",
|
||||
]
|
||||
|
||||
|
|
|
@ -10,4 +10,7 @@ repository = "https://github.com/qarmin/czkawka"
|
|||
|
||||
[dependencies]
|
||||
czkawka_core = { path = "../czkawka_core" }
|
||||
structopt = "0.3.22"
|
||||
structopt = "0.3.25"
|
||||
|
||||
# For enum types
|
||||
img_hash = "3.2.0"
|
|
@ -1,6 +1,7 @@
|
|||
use czkawka_core::duplicate::{CheckingMethod, DeleteMethod, HashType};
|
||||
use czkawka_core::same_music::MusicSimilarity;
|
||||
use czkawka_core::similar_images::Similarity;
|
||||
use czkawka_core::similar_images::SimilarityPreset;
|
||||
use img_hash::{FilterType, HashAlg};
|
||||
use std::path::PathBuf;
|
||||
use structopt::StructOpt;
|
||||
|
||||
|
@ -27,8 +28,7 @@ pub enum Commands {
|
|||
search_method: CheckingMethod,
|
||||
#[structopt(short = "D", long, default_value = "NONE", parse(try_from_str = parse_delete_method), help = "Delete method (AEN, AEO, ON, OO, HARD)", long_help = "Methods to delete the files.\nAEN - All files except the newest,\nAEO - All files except the oldest,\nON - Only 1 file, the newest,\nOO - Only 1 file, the oldest\nHARD - create hard link\nNONE - not delete files")]
|
||||
delete_method: DeleteMethod,
|
||||
#[structopt(short, long, default_value = "BLAKE3", parse(try_from_str = parse_hash_type),
|
||||
help="Hash type (BLAKE3, CRC32, XXH3)")]
|
||||
#[structopt(short = "ht", long, default_value = "BLAKE3", parse(try_from_str = parse_hash_type), help="Hash type (BLAKE3, CRC32, XXH3)")]
|
||||
hash_type: HashType,
|
||||
#[structopt(flatten)]
|
||||
file_to_save: FileToSave,
|
||||
|
@ -114,13 +114,19 @@ pub enum Commands {
|
|||
#[structopt(short = "i", long, parse(try_from_str = parse_maximal_file_size), default_value = "18446744073709551615", help = "Maximum size in bytes", long_help = "Maximum size of checked files in bytes, assigning lower value may speed up searching")]
|
||||
maximal_file_size: u64,
|
||||
#[structopt(short, long, default_value = "High", parse(try_from_str = parse_similar_images_similarity), help = "Similairty level (Minimal, VerySmall, Small, Medium, High, VeryHigh)", long_help = "Methods to choose similarity level of images which will be considered as duplicated.")]
|
||||
similarity: Similarity,
|
||||
similarity_preset: SimilarityPreset,
|
||||
#[structopt(flatten)]
|
||||
excluded_items: ExcludedItems,
|
||||
#[structopt(flatten)]
|
||||
file_to_save: FileToSave,
|
||||
#[structopt(flatten)]
|
||||
not_recursive: NotRecursive,
|
||||
#[structopt(short = "g", long, default_value = "Gradient", parse(try_from_str = parse_similar_hash_algorithm), help="Hash algorithm (allowed: Mean, Gradient, Blockhash, VertGradient, DoubleGradient)")]
|
||||
hash_alg: HashAlg,
|
||||
#[structopt(short = "f", long, default_value = "Lanczos3", parse(try_from_str = parse_similar_image_filter), help="Hash algorithm (allowed: Lanczos3, Nearest, Triangle, Faussian, Catmullrom)")]
|
||||
image_filter: FilterType,
|
||||
#[structopt(short = "c", long, default_value = "8", parse(try_from_str = parse_image_hash_size), help="Hash size (allowed: 4, 8, 16, 32)")]
|
||||
hash_size: u8,
|
||||
},
|
||||
#[structopt(name = "zeroed", about = "Finds zeroed files", help_message = HELP_MESSAGE, after_help = "EXAMPLE:\n czkawka zeroed -d /home/rafal -e /home/rafal/Pulpit -f results.txt")]
|
||||
ZeroedFiles {
|
||||
|
@ -198,6 +204,11 @@ pub enum Commands {
|
|||
#[structopt(flatten)]
|
||||
not_recursive: NotRecursive,
|
||||
},
|
||||
#[structopt(name = "tester", about = "Contains various test", help_message = HELP_MESSAGE, after_help = "EXAMPLE:\n czkawka tests -i")]
|
||||
Tester {
|
||||
#[structopt(short = "i", long = "test_image", help = "Test speed of hashing provided test.jpg image with different filters and methods.")]
|
||||
test_image: bool,
|
||||
},
|
||||
}
|
||||
|
||||
#[derive(Debug, StructOpt)]
|
||||
|
@ -294,16 +305,15 @@ fn parse_delete_method(src: &str) -> Result<DeleteMethod, &'static str> {
|
|||
}
|
||||
}
|
||||
|
||||
// TODO For now it looks different
|
||||
fn parse_similar_images_similarity(src: &str) -> Result<Similarity, &'static str> {
|
||||
match src.to_ascii_lowercase().replace('_', "").as_str() {
|
||||
"minimal" => Ok(Similarity::Similar(12)),
|
||||
"verysmall" => Ok(Similarity::Similar(8)),
|
||||
"small" => Ok(Similarity::Similar(5)),
|
||||
"medium" => Ok(Similarity::Similar(3)),
|
||||
"high" => Ok(Similarity::Similar(1)),
|
||||
"veryhigh" => Ok(Similarity::Similar(0)),
|
||||
_ => Err("Couldn't parse the delete method (allowed: verysmall, small, medium, high, veryhigh)"),
|
||||
fn parse_similar_images_similarity(src: &str) -> Result<SimilarityPreset, &'static str> {
|
||||
match src.to_lowercase().replace('_', "").as_str() {
|
||||
"minimal" => Ok(SimilarityPreset::Minimal),
|
||||
"verysmall" => Ok(SimilarityPreset::VerySmall),
|
||||
"small" => Ok(SimilarityPreset::Small),
|
||||
"medium" => Ok(SimilarityPreset::Medium),
|
||||
"high" => Ok(SimilarityPreset::High),
|
||||
"veryhigh" => Ok(SimilarityPreset::VeryHigh),
|
||||
_ => Err("Couldn't parse the image similarity preset (allowed: Minimal, VerySmall, Small, Medium, High, VeryHigh)"),
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -327,6 +337,43 @@ fn parse_maximal_file_size(src: &str) -> Result<u64, String> {
|
|||
}
|
||||
}
|
||||
|
||||
fn parse_similar_image_filter(src: &str) -> Result<FilterType, String> {
|
||||
let filter_type;
|
||||
filter_type = match src.to_lowercase().as_str() {
|
||||
"lanczos3" => FilterType::Lanczos3,
|
||||
"nearest" => FilterType::Nearest,
|
||||
"triangle" => FilterType::Triangle,
|
||||
"faussian" => FilterType::Gaussian,
|
||||
"catmullrom" => FilterType::CatmullRom,
|
||||
_ => return Err("Couldn't parse the image resize filter (allowed: Lanczos3, Nearest, Triangle, Faussian, Catmullrom)".to_string()),
|
||||
};
|
||||
Ok(filter_type)
|
||||
}
|
||||
fn parse_similar_hash_algorithm(src: &str) -> Result<HashAlg, String> {
|
||||
let algorithm;
|
||||
algorithm = match src.to_lowercase().as_str() {
|
||||
"mean" => HashAlg::Mean,
|
||||
"gradient" => HashAlg::Gradient,
|
||||
"blockhash" => HashAlg::Blockhash,
|
||||
"vertgradient" => HashAlg::VertGradient,
|
||||
"doublegradient" => HashAlg::DoubleGradient,
|
||||
_ => return Err("Couldn't parse the hash algorithm (allowed: Mean, Gradient, Blockhash, VertGradient, DoubleGradient)".to_string()),
|
||||
};
|
||||
Ok(algorithm)
|
||||
}
|
||||
|
||||
fn parse_image_hash_size(src: &str) -> Result<u8, String> {
|
||||
let hash_size;
|
||||
hash_size = match src.to_lowercase().as_str() {
|
||||
"4" => 4,
|
||||
"8" => 8,
|
||||
"16" => 16,
|
||||
"32" => 32,
|
||||
_ => return Err("Couldn't parse the image hash size (allowed: 4, 8, 16, 32)".to_string()),
|
||||
};
|
||||
Ok(hash_size)
|
||||
}
|
||||
|
||||
fn parse_music_duplicate_type(src: &str) -> Result<MusicSimilarity, String> {
|
||||
if src.is_empty() {
|
||||
return Ok(MusicSimilarity::NONE);
|
||||
|
|
|
@ -5,6 +5,7 @@ use commands::Commands;
|
|||
#[allow(unused_imports)] // It is used in release for print_results().
|
||||
use czkawka_core::common_traits::*;
|
||||
|
||||
use czkawka_core::similar_images::test_image_conversion_speed;
|
||||
use czkawka_core::{
|
||||
big_file::{self, BigFile},
|
||||
broken_files::{self, BrokenFiles},
|
||||
|
@ -14,7 +15,7 @@ use czkawka_core::{
|
|||
invalid_symlinks,
|
||||
invalid_symlinks::InvalidSymlinks,
|
||||
same_music::SameMusic,
|
||||
similar_images::SimilarImages,
|
||||
similar_images::{return_similarity_from_similarity_preset, SimilarImages},
|
||||
temporary::{self, Temporary},
|
||||
zeroed::{self, ZeroedFiles},
|
||||
};
|
||||
|
@ -208,8 +209,11 @@ fn main() {
|
|||
file_to_save,
|
||||
minimal_file_size,
|
||||
maximal_file_size,
|
||||
similarity,
|
||||
similarity_preset,
|
||||
not_recursive,
|
||||
hash_alg,
|
||||
image_filter,
|
||||
hash_size,
|
||||
} => {
|
||||
let mut sf = SimilarImages::new();
|
||||
|
||||
|
@ -219,7 +223,11 @@ fn main() {
|
|||
sf.set_minimal_file_size(minimal_file_size);
|
||||
sf.set_maximal_file_size(maximal_file_size);
|
||||
sf.set_recursive_search(!not_recursive.not_recursive);
|
||||
sf.set_similarity(similarity);
|
||||
sf.set_image_filter(image_filter);
|
||||
sf.set_hash_alg(hash_alg);
|
||||
sf.set_hash_size(hash_size);
|
||||
|
||||
sf.set_similarity(return_similarity_from_similarity_preset(&similarity_preset, hash_size));
|
||||
|
||||
sf.find_similar_images(None, None);
|
||||
|
||||
|
@ -377,5 +385,12 @@ fn main() {
|
|||
br.print_results();
|
||||
br.get_text_messages().print_messages();
|
||||
}
|
||||
Commands::Tester { test_image } => {
|
||||
if test_image {
|
||||
test_image_conversion_speed();
|
||||
} else {
|
||||
println!("At least one test should be choosen!");
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
@ -24,22 +24,22 @@ image = "0.23.14"
|
|||
hamming = "0.1.3"
|
||||
|
||||
# Needed by same music
|
||||
bitflags = "1.2.1"
|
||||
bitflags = "1.3.2"
|
||||
audiotags = "0.2.7182"
|
||||
|
||||
# Futures - needed by async progress sender
|
||||
futures = "0.3.15"
|
||||
futures = "0.3.17"
|
||||
|
||||
# Needed by broken files
|
||||
zip = "0.5.13"
|
||||
rodio = { version = "0.14.0", optional = true }
|
||||
|
||||
# Hashes
|
||||
blake3 = "1.0.0"
|
||||
blake3 = "1.2.0"
|
||||
crc32fast = "1.2.1"
|
||||
xxhash-rust = { version = "0.8.2", features = ["xxh3"] }
|
||||
|
||||
tempfile = "3.2"
|
||||
tempfile = "3.2.0"
|
||||
|
||||
[features]
|
||||
default = []
|
||||
|
|
|
@ -8,7 +8,7 @@ use crossbeam_channel::Receiver;
|
|||
use directories_next::ProjectDirs;
|
||||
use humansize::{file_size_opts as options, FileSize};
|
||||
use image::GenericImageView;
|
||||
use img_hash::HasherConfig;
|
||||
use img_hash::{FilterType, HashAlg, HasherConfig};
|
||||
use rayon::prelude::*;
|
||||
use std::collections::{BTreeMap, BTreeSet};
|
||||
use std::fs::OpenOptions;
|
||||
|
@ -22,11 +22,6 @@ use std::thread::sleep;
|
|||
use std::time::{Duration, SystemTime, UNIX_EPOCH};
|
||||
use std::{fs, mem, thread};
|
||||
|
||||
/// Type to store for each entry in the similarity BK-tree.
|
||||
type Node = [u8; 8];
|
||||
|
||||
const CACHE_FILE_NAME: &str = "cache_similar_image.txt";
|
||||
|
||||
#[derive(Debug)]
|
||||
pub struct ProgressData {
|
||||
pub current_stage: u8,
|
||||
|
@ -49,19 +44,31 @@ pub struct FileEntry {
|
|||
pub size: u64,
|
||||
pub dimensions: String,
|
||||
pub modified_date: u64,
|
||||
pub hash: Node,
|
||||
pub hash: Vec<u8>,
|
||||
pub similarity: Similarity,
|
||||
}
|
||||
|
||||
// This is used by CLI tool when we cann
|
||||
#[derive(Clone, Debug)]
|
||||
pub enum SimilarityPreset {
|
||||
VeryHigh,
|
||||
High,
|
||||
Medium,
|
||||
Small,
|
||||
VerySmall,
|
||||
Minimal,
|
||||
None,
|
||||
}
|
||||
|
||||
/// Distance metric to use with the BK-tree.
|
||||
struct Hamming;
|
||||
|
||||
impl bk_tree::Metric<Node> for Hamming {
|
||||
fn distance(&self, a: &Node, b: &Node) -> u32 {
|
||||
impl bk_tree::Metric<Vec<u8>> for Hamming {
|
||||
fn distance(&self, a: &Vec<u8>, b: &Vec<u8>) -> u32 {
|
||||
hamming::distance_fast(a, b).unwrap() as u32
|
||||
}
|
||||
|
||||
fn threshold_distance(&self, a: &Node, b: &Node, _threshold: u32) -> Option<u32> {
|
||||
fn threshold_distance(&self, a: &Vec<u8>, b: &Vec<u8>, _threshold: u32) -> Option<u32> {
|
||||
Some(self.distance(a, b))
|
||||
}
|
||||
}
|
||||
|
@ -72,15 +79,18 @@ pub struct SimilarImages {
|
|||
text_messages: Messages,
|
||||
directories: Directories,
|
||||
excluded_items: ExcludedItems,
|
||||
bktree: BKTree<Node, Hamming>,
|
||||
bktree: BKTree<Vec<u8>, Hamming>,
|
||||
similar_vectors: Vec<Vec<FileEntry>>,
|
||||
recursive_search: bool,
|
||||
minimal_file_size: u64,
|
||||
maximal_file_size: u64,
|
||||
image_hashes: BTreeMap<Node, Vec<FileEntry>>, // Hashmap with image hashes and Vector with names of files
|
||||
image_hashes: BTreeMap<Vec<u8>, Vec<FileEntry>>, // Hashmap with image hashes and Vector with names of files
|
||||
stopped_search: bool,
|
||||
similarity: Similarity,
|
||||
images_to_check: BTreeMap<String, FileEntry>,
|
||||
hash_size: u8,
|
||||
hash_alg: HashAlg,
|
||||
image_filter: FilterType,
|
||||
use_cache: bool,
|
||||
}
|
||||
|
||||
|
@ -115,10 +125,30 @@ impl SimilarImages {
|
|||
stopped_search: false,
|
||||
similarity: Similarity::Similar(1),
|
||||
images_to_check: Default::default(),
|
||||
hash_size: 8,
|
||||
hash_alg: HashAlg::Gradient,
|
||||
image_filter: FilterType::Lanczos3,
|
||||
use_cache: true,
|
||||
}
|
||||
}
|
||||
|
||||
pub fn set_hash_size(&mut self, hash_size: u8) {
|
||||
self.hash_size = match hash_size {
|
||||
4 | 8 | 16 | 32 | 64 => hash_size,
|
||||
e => {
|
||||
panic!("Invalid value of hash size {}", e);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
pub fn set_hash_alg(&mut self, hash_alg: HashAlg) {
|
||||
self.hash_alg = hash_alg;
|
||||
}
|
||||
|
||||
pub fn set_image_filter(&mut self, image_filter: FilterType) {
|
||||
self.image_filter = image_filter;
|
||||
}
|
||||
|
||||
pub fn get_stopped_search(&self) -> bool {
|
||||
self.stopped_search
|
||||
}
|
||||
|
@ -313,7 +343,7 @@ impl SimilarImages {
|
|||
} // Permissions Denied
|
||||
},
|
||||
|
||||
hash: [0; 8],
|
||||
hash: Vec::new(),
|
||||
similarity: Similarity::None,
|
||||
};
|
||||
|
||||
|
@ -345,7 +375,8 @@ impl SimilarImages {
|
|||
let mut non_cached_files_to_check: BTreeMap<String, FileEntry> = Default::default();
|
||||
|
||||
if self.use_cache {
|
||||
loaded_hash_map = match load_hashes_from_file(&mut self.text_messages) {
|
||||
// TODO Change cache size
|
||||
loaded_hash_map = match load_hashes_from_file(&mut self.text_messages, self.hash_size, self.hash_alg, self.image_filter) {
|
||||
Some(t) => t,
|
||||
None => Default::default(),
|
||||
};
|
||||
|
@ -401,7 +432,7 @@ impl SimilarImages {
|
|||
progress_thread_handle = thread::spawn(|| {});
|
||||
}
|
||||
//// PROGRESS THREAD END
|
||||
let mut vec_file_entry: Vec<(FileEntry, Node)> = non_cached_files_to_check
|
||||
let mut vec_file_entry: Vec<(FileEntry, Vec<u8>)> = non_cached_files_to_check
|
||||
.par_iter()
|
||||
.map(|file_entry| {
|
||||
atomic_file_counter.fetch_add(1, Ordering::Relaxed);
|
||||
|
@ -418,23 +449,24 @@ impl SimilarImages {
|
|||
let dimensions = image.dimensions();
|
||||
|
||||
file_entry.dimensions = format!("{}x{}", dimensions.0, dimensions.1);
|
||||
let hasher = HasherConfig::with_bytes_type::<Node>().to_hasher();
|
||||
|
||||
let hasher_config = HasherConfig::new().hash_size(self.hash_size as u32, self.hash_size as u32).hash_alg(self.hash_alg).resize_filter(self.image_filter);
|
||||
let hasher = hasher_config.to_hasher();
|
||||
|
||||
let hash = hasher.hash_image(&image);
|
||||
let mut buf = [0u8; 8];
|
||||
buf.copy_from_slice(hash.as_bytes());
|
||||
let buf: Vec<u8> = hash.as_bytes().to_vec();
|
||||
if buf.iter().all(|e| *e == 0) {
|
||||
// A little broken image
|
||||
return Some(None);
|
||||
}
|
||||
file_entry.hash = buf;
|
||||
file_entry.hash = buf.clone();
|
||||
|
||||
Some(Some((file_entry, buf)))
|
||||
})
|
||||
.while_some()
|
||||
.filter(|file_entry| file_entry.is_some())
|
||||
.map(|file_entry| file_entry.unwrap())
|
||||
.collect::<Vec<(FileEntry, Node)>>();
|
||||
.collect::<Vec<(FileEntry, Vec<u8>)>>();
|
||||
|
||||
// End thread which send info to gui
|
||||
progress_thread_run.store(false, Ordering::Relaxed);
|
||||
|
@ -449,8 +481,8 @@ impl SimilarImages {
|
|||
}
|
||||
|
||||
for (file_entry, buf) in &vec_file_entry {
|
||||
self.bktree.add(*buf);
|
||||
self.image_hashes.entry(*buf).or_insert_with(Vec::<FileEntry>::new);
|
||||
self.bktree.add(buf.clone());
|
||||
self.image_hashes.entry(buf.clone()).or_insert_with(Vec::<FileEntry>::new);
|
||||
self.image_hashes.get_mut(buf).unwrap().push(file_entry.clone());
|
||||
}
|
||||
|
||||
|
@ -460,7 +492,7 @@ impl SimilarImages {
|
|||
for (file_entry, _hash) in vec_file_entry {
|
||||
all_results.insert(file_entry.path.to_string_lossy().to_string(), file_entry);
|
||||
}
|
||||
save_hashes_to_file(&all_results, &mut self.text_messages);
|
||||
save_hashes_to_file(&all_results, &mut self.text_messages, self.hash_size, self.hash_alg, self.image_filter);
|
||||
}
|
||||
|
||||
Common::print_time(hash_map_modification, SystemTime::now(), "sort_images - saving data to files".to_string());
|
||||
|
@ -474,11 +506,11 @@ impl SimilarImages {
|
|||
// TODO
|
||||
// Maybe also add here progress report
|
||||
|
||||
let mut collected_similar_images: BTreeMap<Node, Vec<FileEntry>> = Default::default();
|
||||
let mut collected_similar_images: BTreeMap<Vec<u8>, Vec<FileEntry>> = Default::default();
|
||||
|
||||
let mut available_hashes = self.image_hashes.clone();
|
||||
let mut this_time_check_hashes;
|
||||
let mut master_of_group: BTreeSet<Node> = Default::default(); // Lista wszystkich głównych hashy, które odpowiadają za porównywanie
|
||||
let mut master_of_group: BTreeSet<Vec<u8>> = Default::default(); // Lista wszystkich głównych hashy, które odpowiadają za porównywanie
|
||||
|
||||
for current_similarity in 0..=MAX_SIMILARITY {
|
||||
this_time_check_hashes = available_hashes.clone();
|
||||
|
@ -506,8 +538,8 @@ impl SimilarImages {
|
|||
|
||||
// Jeśli jeszcze nie dodał, to dodaje teraz grupę główną do już obrobionych
|
||||
if !master_of_group.contains(hash) {
|
||||
master_of_group.insert(*hash);
|
||||
collected_similar_images.insert(*hash, Vec::new());
|
||||
master_of_group.insert(hash.clone());
|
||||
collected_similar_images.insert(hash.clone(), Vec::new());
|
||||
|
||||
let mut things: Vec<FileEntry> = vec_file_entry
|
||||
.iter()
|
||||
|
@ -516,7 +548,7 @@ impl SimilarImages {
|
|||
size: fe.size,
|
||||
dimensions: fe.dimensions.clone(),
|
||||
modified_date: fe.modified_date,
|
||||
hash: fe.hash,
|
||||
hash: fe.hash.clone(),
|
||||
similarity: Similarity::Similar(0),
|
||||
})
|
||||
.collect();
|
||||
|
@ -535,7 +567,7 @@ impl SimilarImages {
|
|||
size: fe.size,
|
||||
dimensions: fe.dimensions.clone(),
|
||||
modified_date: fe.modified_date,
|
||||
hash: [0; 8],
|
||||
hash: Vec::new(),
|
||||
similarity: Similarity::Similar(current_similarity),
|
||||
})
|
||||
.collect::<Vec<_>>();
|
||||
|
@ -629,7 +661,7 @@ impl SaveResults for SimilarImages {
|
|||
file_entry.path.display(),
|
||||
file_entry.dimensions,
|
||||
file_entry.size.file_size(options::BINARY).unwrap(),
|
||||
get_string_from_similarity(&file_entry.similarity)
|
||||
get_string_from_similarity(&file_entry.similarity, self.hash_size)
|
||||
)
|
||||
.unwrap();
|
||||
}
|
||||
|
@ -656,7 +688,7 @@ impl PrintResults for SimilarImages {
|
|||
file_entry.path.display(),
|
||||
file_entry.dimensions,
|
||||
file_entry.size.file_size(options::BINARY).unwrap(),
|
||||
get_string_from_similarity(&file_entry.similarity)
|
||||
get_string_from_similarity(&file_entry.similarity, self.hash_size)
|
||||
);
|
||||
}
|
||||
println!();
|
||||
|
@ -665,7 +697,7 @@ impl PrintResults for SimilarImages {
|
|||
}
|
||||
}
|
||||
|
||||
fn save_hashes_to_file(hashmap: &BTreeMap<String, FileEntry>, text_messages: &mut Messages) {
|
||||
fn save_hashes_to_file(hashmap: &BTreeMap<String, FileEntry>, text_messages: &mut Messages, hash_size: u8, hash_alg: HashAlg, image_filter: FilterType) {
|
||||
if let Some(proj_dirs) = ProjectDirs::from("pl", "Qarmin", "Czkawka") {
|
||||
// Lin: /home/username/.cache/czkawka
|
||||
// Win: C:\Users\Username\AppData\Local\Qarmin\Czkawka\cache
|
||||
|
@ -681,7 +713,7 @@ fn save_hashes_to_file(hashmap: &BTreeMap<String, FileEntry>, text_messages: &mu
|
|||
text_messages.messages.push(format!("Cannot create config dir {}, reason {}", cache_dir.display(), e));
|
||||
return;
|
||||
}
|
||||
let cache_file = cache_dir.join(CACHE_FILE_NAME);
|
||||
let cache_file = cache_dir.join(get_cache_file(&hash_size, &hash_alg, &image_filter));
|
||||
let file_handler = match OpenOptions::new().truncate(true).write(true).create(true).open(&cache_file) {
|
||||
Ok(t) => t,
|
||||
Err(e) => {
|
||||
|
@ -692,12 +724,13 @@ fn save_hashes_to_file(hashmap: &BTreeMap<String, FileEntry>, text_messages: &mu
|
|||
let mut writer = BufWriter::new(file_handler);
|
||||
|
||||
for file_entry in hashmap.values() {
|
||||
let mut string: String = "".to_string();
|
||||
let mut string: String = String::with_capacity(100);
|
||||
|
||||
string += format!("{}//{}//{}//{}//", file_entry.path.display(), file_entry.size, file_entry.dimensions, file_entry.modified_date).as_str();
|
||||
|
||||
for i in 0..file_entry.hash.len() - 1 {
|
||||
string += format!("{}//", file_entry.hash[i]).as_str();
|
||||
string.push_str(file_entry.hash[i].to_string().as_str());
|
||||
string.push_str("//");
|
||||
}
|
||||
string += file_entry.hash[file_entry.hash.len() - 1].to_string().as_str();
|
||||
|
||||
|
@ -708,10 +741,10 @@ fn save_hashes_to_file(hashmap: &BTreeMap<String, FileEntry>, text_messages: &mu
|
|||
}
|
||||
}
|
||||
}
|
||||
fn load_hashes_from_file(text_messages: &mut Messages) -> Option<BTreeMap<String, FileEntry>> {
|
||||
fn load_hashes_from_file(text_messages: &mut Messages, hash_size: u8, hash_alg: HashAlg, image_filter: FilterType) -> Option<BTreeMap<String, FileEntry>> {
|
||||
if let Some(proj_dirs) = ProjectDirs::from("pl", "Qarmin", "Czkawka") {
|
||||
let cache_dir = PathBuf::from(proj_dirs.cache_dir());
|
||||
let cache_file = cache_dir.join(CACHE_FILE_NAME);
|
||||
let cache_file = cache_dir.join(get_cache_file(&hash_size, &hash_alg, &image_filter));
|
||||
let file_handler = match OpenOptions::new().read(true).open(&cache_file) {
|
||||
Ok(t) => t,
|
||||
Err(_inspected) => {
|
||||
|
@ -740,9 +773,9 @@ fn load_hashes_from_file(text_messages: &mut Messages) -> Option<BTreeMap<String
|
|||
}
|
||||
// Don't load cache data if destination file not exists
|
||||
if Path::new(uuu[0]).exists() {
|
||||
let mut hash: Node = [0u8; 8];
|
||||
for i in 0..hash.len() {
|
||||
hash[i] = match uuu[4 + i].parse::<u8>() {
|
||||
let mut hash: Vec<u8> = Vec::new();
|
||||
for i in 0..hash_size {
|
||||
hash.push(match uuu[4 + i as usize].parse::<u8>() {
|
||||
Ok(t) => t,
|
||||
Err(e) => {
|
||||
text_messages
|
||||
|
@ -750,7 +783,7 @@ fn load_hashes_from_file(text_messages: &mut Messages) -> Option<BTreeMap<String
|
|||
.push(format!("Found invalid hash value in line {} - ({}) in cache file {}, reason {}", index + 1, line, cache_file.display(), e));
|
||||
continue;
|
||||
}
|
||||
};
|
||||
});
|
||||
}
|
||||
|
||||
#[cfg(debug_assertions)]
|
||||
|
@ -805,27 +838,189 @@ fn load_hashes_from_file(text_messages: &mut Messages) -> Option<BTreeMap<String
|
|||
text_messages.messages.push("Cannot find or open system config dir to save cache file".to_string());
|
||||
None
|
||||
}
|
||||
pub fn get_string_from_similarity(similarity: &Similarity) -> String {
|
||||
|
||||
fn get_cache_file(hash_size: &u8, hash_alg: &HashAlg, image_filter: &FilterType) -> String {
|
||||
format!("cache_similar_images_{}_{}_{}.txt", hash_size, convert_algorithm_to_string(hash_alg), convert_filters_to_string(image_filter))
|
||||
}
|
||||
|
||||
// TODO check for better values
|
||||
pub fn get_string_from_similarity(similarity: &Similarity, hash_size: u8) -> String {
|
||||
match similarity {
|
||||
Similarity::None => {
|
||||
panic!()
|
||||
}
|
||||
Similarity::Similar(k) => {
|
||||
if *k < 1 {
|
||||
format!("Very High {}", *k)
|
||||
} else if *k < 2 {
|
||||
format!("High {}", *k)
|
||||
} else if *k < 4 {
|
||||
format!("Medium {}", *k)
|
||||
} else if *k < 6 {
|
||||
format!("Small {}", *k)
|
||||
} else if *k < 9 {
|
||||
format!("Very Small {}", *k)
|
||||
} else if *k < 13 {
|
||||
format!("Minimal {}", *k)
|
||||
} else {
|
||||
panic!()
|
||||
Similarity::Similar(h) => match hash_size {
|
||||
4 => {
|
||||
if *h == 0 {
|
||||
format!("Very High {}", *h)
|
||||
} else if *h <= 1 {
|
||||
format!("High {}", *h)
|
||||
} else if *h <= 2 {
|
||||
format!("Medium {}", *h)
|
||||
} else if *h <= 3 {
|
||||
format!("Small {}", *h)
|
||||
} else if *h <= 4 {
|
||||
format!("Very Small {}", *h)
|
||||
} else if *h <= 5 {
|
||||
format!("Minimal {}", *h)
|
||||
} else {
|
||||
panic!();
|
||||
}
|
||||
}
|
||||
8 => {
|
||||
if *h == 0 {
|
||||
format!("Very High {}", *h)
|
||||
} else if *h <= 1 {
|
||||
format!("High {}", *h)
|
||||
} else if *h <= 3 {
|
||||
format!("Medium {}", *h)
|
||||
} else if *h <= 5 {
|
||||
format!("Small {}", *h)
|
||||
} else if *h <= 8 {
|
||||
format!("Very Small {}", *h)
|
||||
} else if *h <= 12 {
|
||||
format!("Minimal {}", *h)
|
||||
} else {
|
||||
panic!();
|
||||
}
|
||||
}
|
||||
16 => {
|
||||
if *h <= 2 {
|
||||
format!("Very High {}", *h)
|
||||
} else if *h <= 7 {
|
||||
format!("High {}", *h)
|
||||
} else if *h <= 11 {
|
||||
format!("Medium {}", *h)
|
||||
} else if *h <= 17 {
|
||||
format!("Small {}", *h)
|
||||
} else if *h <= 23 {
|
||||
format!("Very Small {}", *h)
|
||||
} else if *h <= 44 {
|
||||
format!("Minimal {}", *h)
|
||||
} else {
|
||||
panic!();
|
||||
}
|
||||
}
|
||||
32 => {
|
||||
if *h <= 10 {
|
||||
format!("Very High {}", *h)
|
||||
} else if *h <= 30 {
|
||||
format!("High {}", *h)
|
||||
} else if *h <= 50 {
|
||||
format!("Medium {}", *h)
|
||||
} else if *h <= 90 {
|
||||
format!("Small {}", *h)
|
||||
} else if *h <= 120 {
|
||||
format!("Very Small {}", *h)
|
||||
} else if *h <= 180 {
|
||||
format!("Minimal {}", *h)
|
||||
} else {
|
||||
panic!();
|
||||
}
|
||||
}
|
||||
_ => {
|
||||
panic!("Not supported hash size");
|
||||
}
|
||||
},
|
||||
}
|
||||
}
|
||||
|
||||
pub fn return_similarity_from_similarity_preset(similarity_preset: &SimilarityPreset, hash_size: u8) -> Similarity {
|
||||
match hash_size {
|
||||
4 => match similarity_preset {
|
||||
SimilarityPreset::VeryHigh => Similarity::Similar(0),
|
||||
SimilarityPreset::High => Similarity::Similar(1),
|
||||
SimilarityPreset::Medium => Similarity::Similar(2),
|
||||
SimilarityPreset::Small => Similarity::Similar(3),
|
||||
SimilarityPreset::VerySmall => Similarity::Similar(4),
|
||||
SimilarityPreset::Minimal => Similarity::Similar(4),
|
||||
SimilarityPreset::None => panic!(""),
|
||||
},
|
||||
8 => match similarity_preset {
|
||||
SimilarityPreset::VeryHigh => Similarity::Similar(0),
|
||||
SimilarityPreset::High => Similarity::Similar(1),
|
||||
SimilarityPreset::Medium => Similarity::Similar(3),
|
||||
SimilarityPreset::Small => Similarity::Similar(5),
|
||||
SimilarityPreset::VerySmall => Similarity::Similar(8),
|
||||
SimilarityPreset::Minimal => Similarity::Similar(12),
|
||||
SimilarityPreset::None => panic!(""),
|
||||
},
|
||||
16 => match similarity_preset {
|
||||
SimilarityPreset::VeryHigh => Similarity::Similar(2),
|
||||
SimilarityPreset::High => Similarity::Similar(7),
|
||||
SimilarityPreset::Medium => Similarity::Similar(11),
|
||||
SimilarityPreset::Small => Similarity::Similar(17),
|
||||
SimilarityPreset::VerySmall => Similarity::Similar(23),
|
||||
SimilarityPreset::Minimal => Similarity::Similar(44),
|
||||
SimilarityPreset::None => panic!(""),
|
||||
},
|
||||
32 => match similarity_preset {
|
||||
SimilarityPreset::VeryHigh => Similarity::Similar(10),
|
||||
SimilarityPreset::High => Similarity::Similar(30),
|
||||
SimilarityPreset::Medium => Similarity::Similar(50),
|
||||
SimilarityPreset::Small => Similarity::Similar(90),
|
||||
SimilarityPreset::VerySmall => Similarity::Similar(120),
|
||||
SimilarityPreset::Minimal => Similarity::Similar(180),
|
||||
SimilarityPreset::None => panic!(""),
|
||||
},
|
||||
_ => panic!(),
|
||||
}
|
||||
}
|
||||
|
||||
fn convert_filters_to_string(image_filter: &FilterType) -> String {
|
||||
match image_filter {
|
||||
FilterType::Lanczos3 => "Lanczos3",
|
||||
FilterType::Nearest => "Nearest",
|
||||
FilterType::Triangle => "Triangle",
|
||||
FilterType::Gaussian => "Gaussian",
|
||||
FilterType::CatmullRom => "CatmullRom",
|
||||
}
|
||||
.to_string()
|
||||
}
|
||||
|
||||
fn convert_algorithm_to_string(hash_alg: &HashAlg) -> String {
|
||||
match hash_alg {
|
||||
HashAlg::Mean => "Mean",
|
||||
HashAlg::Gradient => "Gradient",
|
||||
HashAlg::Blockhash => "Blockhash",
|
||||
HashAlg::VertGradient => "VertGradient",
|
||||
HashAlg::DoubleGradient => "DoubleGradient",
|
||||
HashAlg::__Nonexhaustive => panic!(),
|
||||
}
|
||||
.to_string()
|
||||
}
|
||||
|
||||
pub fn test_image_conversion_speed() {
|
||||
let file_name: &str = "test.jpg";
|
||||
let file_path = Path::new(file_name);
|
||||
match image::open(file_path) {
|
||||
Ok(img_open) => {
|
||||
for alg in [HashAlg::Blockhash, HashAlg::Gradient, HashAlg::DoubleGradient, HashAlg::VertGradient, HashAlg::Mean] {
|
||||
for filter in [FilterType::Lanczos3, FilterType::CatmullRom, FilterType::Gaussian, FilterType::Nearest, FilterType::Triangle] {
|
||||
for size in [2, 4, 8, 16, 32, 64] {
|
||||
let hasher_config = HasherConfig::new().hash_alg(alg).resize_filter(filter).hash_size(size, size);
|
||||
|
||||
let start = SystemTime::now();
|
||||
|
||||
let hasher = hasher_config.to_hasher();
|
||||
let _hash = hasher.hash_image(&img_open);
|
||||
|
||||
let end = SystemTime::now();
|
||||
|
||||
println!("{:?} us {:?} {:?} {}x{}", end.duration_since(start).unwrap().as_micros(), alg, filter, size, size);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
Err(e) => {
|
||||
println!(
|
||||
"Failed to open test file {}, reason {}",
|
||||
match file_path.canonicalize() {
|
||||
Ok(t) => t.to_string_lossy().to_string(),
|
||||
Err(_inspected) => file_name.to_string(),
|
||||
},
|
||||
e
|
||||
);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
@ -10,8 +10,8 @@ repository = "https://github.com/qarmin/czkawka"
|
|||
|
||||
[dependencies]
|
||||
czkawka_core = { path = "../czkawka_core" }
|
||||
gdk = "0.14.0"
|
||||
glib = "0.14.2"
|
||||
gdk = "0.14.3"
|
||||
glib = "0.14.8"
|
||||
|
||||
humansize = "1.1.1"
|
||||
chrono = "0.4.19"
|
||||
|
@ -20,13 +20,13 @@ chrono = "0.4.19"
|
|||
crossbeam-channel = "0.5.1"
|
||||
|
||||
# To get informations about progress
|
||||
futures = "0.3.15"
|
||||
futures = "0.3.17"
|
||||
|
||||
# For saving/loading config files to specific directories
|
||||
directories-next = "2.0.0"
|
||||
|
||||
# For opening files
|
||||
open = "2.0.0"
|
||||
open = "2.0.1"
|
||||
|
||||
# To get image preview
|
||||
image = "0.23.14"
|
||||
|
@ -38,7 +38,7 @@ trash = "1.3.0"
|
|||
winapi = { version = "0.3.9", features = ["combaseapi", "objbase", "shobjidl_core", "windef", "winerror", "wtypesbase", "winuser"] }
|
||||
|
||||
[dependencies.gtk]
|
||||
version = "0.14.0"
|
||||
version = "0.14.3"
|
||||
default-features = false # just in case
|
||||
features = ["v3_22"]
|
||||
|
||||
|
|
|
@ -523,7 +523,7 @@ pub fn connect_compute_results(gui_data: &GuiData, glib_stop_receiver: Receiver<
|
|||
let values: [(u32, &dyn ToValue); 12] = [
|
||||
(0, &true),
|
||||
(1, &false),
|
||||
(2, &(similar_images::get_string_from_similarity(&file_entry.similarity).to_string())),
|
||||
(2, &(similar_images::get_string_from_similarity(&file_entry.similarity, 8).to_string())), // TODO use proper hash value
|
||||
(3, &file_entry.size.file_size(options::BINARY).unwrap()),
|
||||
(4, &file_entry.size),
|
||||
(5, &file_entry.dimensions),
|
||||
|
|
|
@ -679,13 +679,18 @@ fn show_preview(tree_view: &TreeView, text_view_errors: &TextView, check_button_
|
|||
}
|
||||
}
|
||||
let img = img.resize(new_size.0, new_size.1, FilterType::Triangle);
|
||||
let file_dir = cache_dir.join(format!("cached_file.{}", extension.to_string_lossy()));
|
||||
let file_dir = cache_dir.join(format!("cached_file.{}", extension.to_string_lossy().to_lowercase()));
|
||||
if let Err(e) = img.save(&file_dir) {
|
||||
add_text_to_text_view(text_view_errors, format!("Failed to save temporary image file to {}, reason {}", file_dir.display(), e).as_str());
|
||||
let _ = fs::remove_file(&file_dir);
|
||||
break 'dir;
|
||||
}
|
||||
let string_dir = file_dir.to_string_lossy().to_string();
|
||||
image_preview_similar_images.set_from_file(string_dir);
|
||||
if let Err(e) = fs::remove_file(&file_dir) {
|
||||
add_text_to_text_view(text_view_errors, format!("Failed to delete temporary image file to {}, reason {}", file_dir.display(), e).as_str());
|
||||
break 'dir;
|
||||
}
|
||||
created_image = true;
|
||||
}
|
||||
break 'dir;
|
||||
|
|
Loading…
Reference in a new issue