Save to cache info about broken, too short etc. images and videos (#471)
This commit is contained in:
parent
f14101139d
commit
f2333b9df2
|
@ -451,7 +451,17 @@ impl SimilarImages {
|
||||||
|
|
||||||
let image = match image::open(file_entry.path.clone()) {
|
let image = match image::open(file_entry.path.clone()) {
|
||||||
Ok(t) => t,
|
Ok(t) => t,
|
||||||
Err(_inspected) => return Some(None), // Something is wrong with image
|
// Err(_inspected) => return Some(None), // Something is wrong with image,
|
||||||
|
// For broken images empty hash is used, because without it will try to resecan files each time when it is called(missing cache file is responsible for it)
|
||||||
|
// This may cause problems(very rarely), when e.g. file was not available due lack of permissions, but it is available now
|
||||||
|
Err(_inspected) => {
|
||||||
|
let mut buf = Vec::new();
|
||||||
|
for _i in 0..(self.hash_size * self.hash_size / 8) {
|
||||||
|
buf.push(0);
|
||||||
|
}
|
||||||
|
file_entry.hash = buf.clone();
|
||||||
|
return Some(Some((file_entry, buf)));
|
||||||
|
}
|
||||||
};
|
};
|
||||||
let dimensions = image.dimensions();
|
let dimensions = image.dimensions();
|
||||||
|
|
||||||
|
@ -463,16 +473,6 @@ impl SimilarImages {
|
||||||
let hash = hasher.hash_image(&image);
|
let hash = hasher.hash_image(&image);
|
||||||
let buf: Vec<u8> = hash.as_bytes().to_vec();
|
let buf: Vec<u8> = hash.as_bytes().to_vec();
|
||||||
|
|
||||||
// Images with hashes with full of 0 or 255 usually means that algorithm fails to decode them because e.g. contains a log of alpha channel
|
|
||||||
{
|
|
||||||
if buf.iter().all(|e| *e == 0) {
|
|
||||||
return Some(None);
|
|
||||||
}
|
|
||||||
if buf.iter().all(|e| *e == 255) {
|
|
||||||
return Some(None);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
file_entry.hash = buf.clone();
|
file_entry.hash = buf.clone();
|
||||||
|
|
||||||
Some(Some((file_entry, buf)))
|
Some(Some((file_entry, buf)))
|
||||||
|
@ -495,9 +495,12 @@ impl SimilarImages {
|
||||||
}
|
}
|
||||||
|
|
||||||
for (file_entry, buf) in &vec_file_entry {
|
for (file_entry, buf) in &vec_file_entry {
|
||||||
self.bktree.add(buf.clone());
|
// Only use to comparing, non broken hashes(all 0 or 255 hashes means that algorithm fails to decode them because e.g. contains a log of alpha channel)
|
||||||
self.image_hashes.entry(buf.clone()).or_insert_with(Vec::<FileEntry>::new);
|
if !(buf.iter().all(|e| *e == 0) || buf.iter().all(|e| *e == 255)) {
|
||||||
self.image_hashes.get_mut(buf).unwrap().push(file_entry.clone());
|
self.bktree.add(buf.clone());
|
||||||
|
self.image_hashes.entry(buf.clone()).or_insert_with(Vec::<FileEntry>::new);
|
||||||
|
self.image_hashes.get_mut(buf).unwrap().push(file_entry.clone());
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
if self.use_cache {
|
if self.use_cache {
|
||||||
|
@ -753,9 +756,9 @@ fn save_hashes_to_file(hashmap: &BTreeMap<String, FileEntry>, text_messages: &mu
|
||||||
|
|
||||||
string += format!("{}//{}//{}//{}", file_entry.path.display(), file_entry.size, file_entry.dimensions, file_entry.modified_date).as_str();
|
string += format!("{}//{}//{}//{}", file_entry.path.display(), file_entry.size, file_entry.dimensions, file_entry.modified_date).as_str();
|
||||||
|
|
||||||
for i in 0..file_entry.hash.len() - 1 {
|
for hash in &file_entry.hash {
|
||||||
string.push_str("//");
|
string.push_str("//");
|
||||||
string.push_str(file_entry.hash[i].to_string().as_str());
|
string.push_str(hash.to_string().as_str());
|
||||||
}
|
}
|
||||||
|
|
||||||
if let Err(e) = writeln!(writer, "{}", string) {
|
if let Err(e) = writeln!(writer, "{}", string) {
|
||||||
|
@ -800,8 +803,8 @@ fn load_hashes_from_file(text_messages: &mut Messages, hash_size: u8, hash_alg:
|
||||||
index + 1,
|
index + 1,
|
||||||
line,
|
line,
|
||||||
cache_file.display(),
|
cache_file.display(),
|
||||||
uuu.len(),
|
number_of_results + 4,
|
||||||
number_of_results + 4
|
uuu.len()
|
||||||
));
|
));
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
|
@ -820,22 +823,6 @@ fn load_hashes_from_file(text_messages: &mut Messages, hash_size: u8, hash_alg:
|
||||||
});
|
});
|
||||||
}
|
}
|
||||||
|
|
||||||
#[cfg(debug_assertions)]
|
|
||||||
{
|
|
||||||
let mut have_at_least: u8 = 0;
|
|
||||||
for i in hash.iter() {
|
|
||||||
if *i == 0 {
|
|
||||||
have_at_least += 1;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
if have_at_least == hash.len() as u8 {
|
|
||||||
println!("ERROR START - {}", line);
|
|
||||||
println!("have_at_least == hash.len() as u8");
|
|
||||||
println!("ERROR END hash.len() - {} == have_at_least - {}", hash.len(), have_at_least);
|
|
||||||
continue; // Just skip this entry, it is very very unlikelly that something have this hash, but if it has, then just ignore it
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
hashmap_loaded_entries.insert(
|
hashmap_loaded_entries.insert(
|
||||||
uuu[0].to_string(),
|
uuu[0].to_string(),
|
||||||
FileEntry {
|
FileEntry {
|
||||||
|
|
|
@ -27,6 +27,8 @@ use crate::common_traits::{DebugPrint, PrintResults, SaveResults};
|
||||||
|
|
||||||
pub const MAX_TOLERANCE: i32 = 20;
|
pub const MAX_TOLERANCE: i32 = 20;
|
||||||
|
|
||||||
|
const HASH_SIZE: usize = 19;
|
||||||
|
|
||||||
#[derive(Debug)]
|
#[derive(Debug)]
|
||||||
pub struct ProgressData {
|
pub struct ProgressData {
|
||||||
pub current_stage: u8,
|
pub current_stage: u8,
|
||||||
|
@ -41,6 +43,7 @@ pub struct FileEntry {
|
||||||
pub size: u64,
|
pub size: u64,
|
||||||
pub modified_date: u64,
|
pub modified_date: u64,
|
||||||
pub vhash: VideoHash,
|
pub vhash: VideoHash,
|
||||||
|
pub error: String,
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Distance metric to use with the BK-tree.
|
/// Distance metric to use with the BK-tree.
|
||||||
|
@ -321,6 +324,7 @@ impl SimilarVideos {
|
||||||
} // Permissions Denied
|
} // Permissions Denied
|
||||||
},
|
},
|
||||||
vhash: Default::default(),
|
vhash: Default::default(),
|
||||||
|
error: "".to_string(),
|
||||||
};
|
};
|
||||||
|
|
||||||
self.videos_to_check.insert(current_file_name.to_string_lossy().to_string(), fe);
|
self.videos_to_check.insert(current_file_name.to_string_lossy().to_string(), fe);
|
||||||
|
@ -400,7 +404,7 @@ impl SimilarVideos {
|
||||||
progress_thread_handle = thread::spawn(|| {});
|
progress_thread_handle = thread::spawn(|| {});
|
||||||
}
|
}
|
||||||
//// PROGRESS THREAD END
|
//// PROGRESS THREAD END
|
||||||
let old_vec_file_entry: Vec<std::result::Result<FileEntry, String>> = non_cached_files_to_check
|
let mut vec_file_entry: Vec<FileEntry> = non_cached_files_to_check
|
||||||
.par_iter()
|
.par_iter()
|
||||||
.map(|file_entry| {
|
.map(|file_entry| {
|
||||||
atomic_file_counter.fetch_add(1, Ordering::Relaxed);
|
atomic_file_counter.fetch_add(1, Ordering::Relaxed);
|
||||||
|
@ -412,30 +416,25 @@ impl SimilarVideos {
|
||||||
|
|
||||||
let vhash = match VideoHash::from_path(&file_entry.path) {
|
let vhash = match VideoHash::from_path(&file_entry.path) {
|
||||||
Ok(t) => t,
|
Ok(t) => t,
|
||||||
Err(e) => return Some(Err(format!("Failed to hash file, {}", e))),
|
Err(e) => {
|
||||||
|
return {
|
||||||
|
file_entry.error = format!("Failed to hash file, {}", e);
|
||||||
|
Some(file_entry)
|
||||||
|
}
|
||||||
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
file_entry.vhash = vhash;
|
file_entry.vhash = vhash;
|
||||||
|
|
||||||
Some(Ok(file_entry))
|
Some(file_entry)
|
||||||
})
|
})
|
||||||
.while_some()
|
.while_some()
|
||||||
.collect::<Vec<std::result::Result<FileEntry, String>>>();
|
.collect::<Vec<FileEntry>>();
|
||||||
|
|
||||||
// End thread which send info to gui
|
// End thread which send info to gui
|
||||||
progress_thread_run.store(false, Ordering::Relaxed);
|
progress_thread_run.store(false, Ordering::Relaxed);
|
||||||
progress_thread_handle.join().unwrap();
|
progress_thread_handle.join().unwrap();
|
||||||
|
|
||||||
let mut vec_file_entry = Vec::new();
|
|
||||||
for result in old_vec_file_entry {
|
|
||||||
match result {
|
|
||||||
Ok(t) => vec_file_entry.push(t),
|
|
||||||
Err(e) => {
|
|
||||||
self.text_messages.errors.push(e);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
Common::print_time(hash_map_modification, SystemTime::now(), "sort_videos - reading data from files in parallel".to_string());
|
Common::print_time(hash_map_modification, SystemTime::now(), "sort_videos - reading data from files in parallel".to_string());
|
||||||
let hash_map_modification = SystemTime::now();
|
let hash_map_modification = SystemTime::now();
|
||||||
|
|
||||||
|
@ -446,9 +445,14 @@ impl SimilarVideos {
|
||||||
|
|
||||||
let mut hashmap_with_file_entries: HashMap<String, FileEntry> = Default::default();
|
let mut hashmap_with_file_entries: HashMap<String, FileEntry> = Default::default();
|
||||||
let mut vector_of_hashes: Vec<VideoHash> = Vec::new();
|
let mut vector_of_hashes: Vec<VideoHash> = Vec::new();
|
||||||
for i in &vec_file_entry {
|
for file_entry in &vec_file_entry {
|
||||||
hashmap_with_file_entries.insert(i.vhash.src_path().to_string_lossy().to_string(), i.clone());
|
// 0 means that images was not hashed correctly, e.g. could be improperly
|
||||||
vector_of_hashes.push(i.vhash.clone());
|
if file_entry.error.is_empty() {
|
||||||
|
hashmap_with_file_entries.insert(file_entry.vhash.src_path().to_string_lossy().to_string(), file_entry.clone());
|
||||||
|
vector_of_hashes.push(file_entry.vhash.clone());
|
||||||
|
} else {
|
||||||
|
self.text_messages.errors.push(file_entry.error.clone());
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
if self.use_cache {
|
if self.use_cache {
|
||||||
|
@ -614,7 +618,16 @@ fn save_hashes_to_file(hashmap: &BTreeMap<String, FileEntry>, text_messages: &mu
|
||||||
for file_entry in hashmap.values() {
|
for file_entry in hashmap.values() {
|
||||||
let mut string: String = String::with_capacity(256);
|
let mut string: String = String::with_capacity(256);
|
||||||
|
|
||||||
string += format!("{}//{}//{}//{}//{}", file_entry.path.display(), file_entry.size, file_entry.modified_date, file_entry.vhash.num_frames(), file_entry.vhash.duration()).as_str();
|
string += format!(
|
||||||
|
"{}//{}//{}//{}//{}//{}",
|
||||||
|
file_entry.path.display(),
|
||||||
|
file_entry.size,
|
||||||
|
file_entry.modified_date,
|
||||||
|
file_entry.vhash.num_frames(),
|
||||||
|
file_entry.vhash.duration(),
|
||||||
|
file_entry.error
|
||||||
|
)
|
||||||
|
.as_str();
|
||||||
|
|
||||||
for i in file_entry.vhash.hash() {
|
for i in file_entry.vhash.hash() {
|
||||||
string.push_str("//");
|
string.push_str("//");
|
||||||
|
@ -655,15 +668,14 @@ fn load_hashes_from_file(text_messages: &mut Messages) -> Option<BTreeMap<String
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
let uuu = line.split("//").collect::<Vec<&str>>();
|
let uuu = line.split("//").collect::<Vec<&str>>();
|
||||||
let hash_size = 19;
|
|
||||||
// Hash size + other things
|
// Hash size + other things
|
||||||
if uuu.len() != (hash_size + 5) {
|
if uuu.len() != (HASH_SIZE + 6) {
|
||||||
text_messages.warnings.push(format!(
|
text_messages.warnings.push(format!(
|
||||||
"Found invalid data in line {} - ({}) in cache file {}, expected {} values, found {}",
|
"Found invalid data in line {} - ({}) in cache file {}, expected {} values, found {}",
|
||||||
index + 1,
|
index + 1,
|
||||||
line,
|
line,
|
||||||
cache_file.display(),
|
cache_file.display(),
|
||||||
hash_size + 5,
|
HASH_SIZE + 6,
|
||||||
uuu.len(),
|
uuu.len(),
|
||||||
));
|
));
|
||||||
continue;
|
continue;
|
||||||
|
@ -671,8 +683,8 @@ fn load_hashes_from_file(text_messages: &mut Messages) -> Option<BTreeMap<String
|
||||||
// Don't load cache data if destination file not exists
|
// Don't load cache data if destination file not exists
|
||||||
if Path::new(uuu[0]).exists() {
|
if Path::new(uuu[0]).exists() {
|
||||||
let mut hash: [u64; 19] = [0; 19];
|
let mut hash: [u64; 19] = [0; 19];
|
||||||
for i in 0..hash_size {
|
for i in 0..HASH_SIZE {
|
||||||
hash[i] = match uuu[5 + i as usize].parse::<u64>() {
|
hash[i] = match uuu[6 + i as usize].parse::<u64>() {
|
||||||
Ok(t) => t,
|
Ok(t) => t,
|
||||||
Err(e) => {
|
Err(e) => {
|
||||||
text_messages
|
text_messages
|
||||||
|
@ -706,6 +718,7 @@ fn load_hashes_from_file(text_messages: &mut Messages) -> Option<BTreeMap<String
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
vhash: VideoHash::with_start_data(uuu[4].parse::<u32>().unwrap_or(0), uuu[0], hash, uuu[3].parse::<u32>().unwrap_or(10)),
|
vhash: VideoHash::with_start_data(uuu[4].parse::<u32>().unwrap_or(0), uuu[0], hash, uuu[3].parse::<u32>().unwrap_or(10)),
|
||||||
|
error: uuu[5].to_string(),
|
||||||
},
|
},
|
||||||
);
|
);
|
||||||
}
|
}
|
||||||
|
|
Loading…
Reference in a new issue