1
0
Fork 0
mirror of synced 2024-06-28 11:00:26 +12:00

Unifying finding items

This commit is contained in:
Rafał Mikrut 2023-12-16 17:49:37 +01:00
parent da1797cb55
commit f528e77aef
10 changed files with 216 additions and 306 deletions

View file

@ -1,10 +1,9 @@
use std::collections::BTreeMap;
use std::fs::{DirEntry, File};
use std::collections::{BTreeMap, HashSet};
use std::fs::File;
use std::io::prelude::*;
use std::path::{Path, PathBuf};
use std::sync::atomic::{AtomicUsize, Ordering};
use std::sync::Arc;
use std::sync::atomic::Ordering;
use std::{fs, mem, panic};
use crossbeam_channel::{Receiver, Sender};
@ -18,23 +17,23 @@ use rayon::prelude::*;
use serde::{Deserialize, Serialize};
use crate::common::{
check_folder_children, check_if_stop_received, create_crash_message, prepare_thread_handler_common, send_info_and_wait_for_ending_all_threads, AUDIO_FILES_EXTENSIONS,
check_if_stop_received, create_crash_message, prepare_thread_handler_common, send_info_and_wait_for_ending_all_threads, AUDIO_FILES_EXTENSIONS,
IMAGE_RS_BROKEN_FILES_EXTENSIONS, PDF_FILES_EXTENSIONS, ZIP_FILES_EXTENSIONS,
};
use crate::common_cache::{get_broken_files_cache_file, load_cache_from_file_generalized_by_path, save_cache_to_file_generalized};
use crate::common_dir_traversal::{common_read_dir, get_modified_time, CheckingMethod, ProgressData, ToolType};
use crate::common_dir_traversal::{CheckingMethod, DirTraversalBuilder, DirTraversalResult, FileEntry, ProgressData, ToolType};
use crate::common_tool::{CommonData, CommonToolData, DeleteMethod};
use crate::common_traits::*;
#[derive(Clone, Serialize, Deserialize, Debug)]
pub struct FileEntry {
pub struct BrokenEntry {
pub path: PathBuf,
pub modified_date: u64,
pub size: u64,
pub type_of_file: TypeOfFile,
pub error_string: String,
}
impl ResultEntry for FileEntry {
impl ResultEntry for BrokenEntry {
fn get_path(&self) -> &Path {
&self.path
}
@ -46,6 +45,19 @@ impl ResultEntry for FileEntry {
}
}
impl FileEntry {
fn into_broken_entry(self) -> BrokenEntry {
BrokenEntry {
size: self.size,
path: self.path,
modified_date: self.modified_date,
type_of_file: TypeOfFile::Unknown,
error_string: String::new(),
}
}
}
#[derive(Copy, Clone, PartialEq, Eq, Serialize, Deserialize, Debug)]
pub enum TypeOfFile {
Unknown = -1,
@ -55,6 +67,8 @@ pub enum TypeOfFile {
PDF,
}
const MAX_BROKEN_FILES_STAGE: u8 = 1;
bitflags! {
#[derive(PartialEq, Copy, Clone)]
pub struct CheckedTypes : u32 {
@ -75,8 +89,8 @@ pub struct Info {
pub struct BrokenFiles {
common_data: CommonToolData,
information: Info,
files_to_check: BTreeMap<String, FileEntry>,
broken_files: Vec<FileEntry>,
files_to_check: BTreeMap<String, BrokenEntry>,
broken_files: Vec<BrokenEntry>,
checked_types: CheckedTypes,
}
@ -108,108 +122,59 @@ impl BrokenFiles {
#[fun_time(message = "check_files", level = "debug")]
fn check_files(&mut self, stop_receiver: Option<&Receiver<()>>, progress_sender: Option<&Sender<ProgressData>>) -> bool {
let mut folders_to_check: Vec<PathBuf> = self.common_data.directories.included_directories.clone();
let zip_extensions = ZIP_FILES_EXTENSIONS.iter().collect::<HashSet<_>>();
let audio_extensions = AUDIO_FILES_EXTENSIONS.iter().collect::<HashSet<_>>();
let pdf_extensions = PDF_FILES_EXTENSIONS.iter().collect::<HashSet<_>>();
let images_extensions = IMAGE_RS_BROKEN_FILES_EXTENSIONS.iter().collect::<HashSet<_>>();
let (progress_thread_handle, progress_thread_run, atomic_counter, _check_was_stopped) =
prepare_thread_handler_common(progress_sender, 0, 1, 0, CheckingMethod::None, self.common_data.tool_type);
debug!("check_files - starting to collect files");
while !folders_to_check.is_empty() {
if check_if_stop_received(stop_receiver) {
send_info_and_wait_for_ending_all_threads(&progress_thread_run, progress_thread_handle);
return false;
let mut extensions = Vec::new();
let vec_extensions = [
(CheckedTypes::PDF, PDF_FILES_EXTENSIONS),
(CheckedTypes::AUDIO, AUDIO_FILES_EXTENSIONS),
(CheckedTypes::ARCHIVE, ZIP_FILES_EXTENSIONS),
(CheckedTypes::IMAGE, IMAGE_RS_BROKEN_FILES_EXTENSIONS),
];
for (checked_type, extensions_to_add) in &vec_extensions {
if self.checked_types.contains(*checked_type) {
extensions.extend_from_slice(extensions_to_add);
}
}
let segments: Vec<_> = folders_to_check
.into_par_iter()
.map(|current_folder| {
let mut dir_result = vec![];
let mut warnings = vec![];
let mut fe_result = vec![];
self.common_data.allowed_extensions.set_and_validate_extensions(&extensions);
if !self.common_data.allowed_extensions.set_any_extensions() {
return true;
}
let Some(read_dir) = common_read_dir(&current_folder, &mut warnings) else {
return (dir_result, warnings, fe_result);
};
let result = DirTraversalBuilder::new()
.group_by(|_fe| ())
.stop_receiver(stop_receiver)
.progress_sender(progress_sender)
.common_data(&self.common_data)
.max_stage(MAX_BROKEN_FILES_STAGE)
.build()
.run();
// Check every sub folder/file/link etc.
for entry in read_dir {
let Ok(entry_data) = entry else {
continue;
};
let Ok(file_type) = entry_data.file_type() else {
continue;
};
if file_type.is_dir() {
check_folder_children(
&mut dir_result,
&mut warnings,
&entry_data,
self.common_data.recursive_search,
&self.common_data.directories,
&self.common_data.excluded_items,
);
} else if file_type.is_file() {
if let Some(file_entry) = self.get_file_entry(&atomic_counter, &entry_data, &mut warnings) {
fe_result.push((file_entry.path.to_string_lossy().to_string(), file_entry));
}
}
}
(dir_result, warnings, fe_result)
})
.collect();
debug!("check_files - collected files");
let required_size = segments.iter().map(|(segment, _, _)| segment.len()).sum::<usize>();
folders_to_check = Vec::with_capacity(required_size);
// Process collected data
for (segment, warnings, fe_result) in segments {
folders_to_check.extend(segment);
match result {
DirTraversalResult::SuccessFiles { grouped_file_entries, warnings } => {
self.broken_files = grouped_file_entries
.into_values()
.flatten()
.map(|fe| {
let mut broken_entry = fe.into_broken_entry();
broken_entry.type_of_file = check_extension_availability(broken_entry.get_path(), &images_extensions, &zip_extensions, &audio_extensions, &pdf_extensions);
broken_entry
})
.collect();
self.common_data.text_messages.warnings.extend(warnings);
for (name, fe) in fe_result {
self.files_to_check.insert(name, fe);
}
debug!("check_files - Found {} image files.", self.broken_files.len());
true
}
}
send_info_and_wait_for_ending_all_threads(&progress_thread_run, progress_thread_handle);
true
DirTraversalResult::Stopped => false,
}
}
fn get_file_entry(&self, atomic_counter: &Arc<AtomicUsize>, entry_data: &DirEntry, warnings: &mut Vec<String>) -> Option<FileEntry> {
atomic_counter.fetch_add(1, Ordering::Relaxed);
if !self.common_data.allowed_extensions.check_if_entry_ends_with_extension(entry_data) {
return None;
}
let file_name_lowercase = entry_data.file_name().to_string_lossy().to_lowercase();
let type_of_file = check_extension_availability(&file_name_lowercase);
if !check_if_file_extension_is_allowed(&type_of_file, &self.checked_types) {
return None;
}
let current_file_name = entry_data.path();
if self.common_data.excluded_items.is_excluded(&current_file_name) {
return None;
}
let Ok(metadata) = entry_data.metadata() else {
return None;
};
let fe: FileEntry = FileEntry {
modified_date: get_modified_time(&metadata, warnings, &current_file_name, false),
path: current_file_name,
size: metadata.len(),
type_of_file,
error_string: String::new(),
};
Some(fe)
}
fn check_broken_image(&self, mut file_entry: FileEntry) -> Option<FileEntry> {
fn check_broken_image(&self, mut file_entry: BrokenEntry) -> Option<BrokenEntry> {
let mut file_entry_clone = file_entry.clone();
panic::catch_unwind(|| {
@ -230,7 +195,7 @@ impl BrokenFiles {
Some(file_entry_clone)
})
}
fn check_broken_zip(&self, mut file_entry: FileEntry) -> Option<FileEntry> {
fn check_broken_zip(&self, mut file_entry: BrokenEntry) -> Option<BrokenEntry> {
match File::open(&file_entry.path) {
Ok(file) => {
if let Err(e) = zip::ZipArchive::new(file) {
@ -241,7 +206,7 @@ impl BrokenFiles {
Err(_inspected) => None,
}
}
fn check_broken_audio(&self, mut file_entry: FileEntry) -> Option<FileEntry> {
fn check_broken_audio(&self, mut file_entry: BrokenEntry) -> Option<BrokenEntry> {
match File::open(&file_entry.path) {
Ok(file) => {
let mut file_entry_clone = file_entry.clone();
@ -262,7 +227,7 @@ impl BrokenFiles {
Err(_inspected) => None,
}
}
fn check_broken_pdf(&self, mut file_entry: FileEntry) -> Option<FileEntry> {
fn check_broken_pdf(&self, mut file_entry: BrokenEntry) -> Option<BrokenEntry> {
let parser_options = ParseOptions::tolerant(); // Only show as broken files with really big bugs
let mut file_entry_clone = file_entry.clone();
@ -300,15 +265,16 @@ impl BrokenFiles {
}
#[fun_time(message = "load_cache", level = "debug")]
fn load_cache(&mut self) -> (BTreeMap<String, FileEntry>, BTreeMap<String, FileEntry>, BTreeMap<String, FileEntry>) {
fn load_cache(&mut self) -> (BTreeMap<String, BrokenEntry>, BTreeMap<String, BrokenEntry>, BTreeMap<String, BrokenEntry>) {
let loaded_hash_map;
let mut records_already_cached: BTreeMap<String, FileEntry> = Default::default();
let mut non_cached_files_to_check: BTreeMap<String, FileEntry> = Default::default();
let mut records_already_cached: BTreeMap<String, BrokenEntry> = Default::default();
let mut non_cached_files_to_check: BTreeMap<String, BrokenEntry> = Default::default();
let files_to_check = mem::take(&mut self.files_to_check);
if self.common_data.use_cache {
let (messages, loaded_items) = load_cache_from_file_generalized_by_path::<FileEntry>(&get_broken_files_cache_file(), self.get_delete_outdated_cache(), &files_to_check);
let (messages, loaded_items) =
load_cache_from_file_generalized_by_path::<BrokenEntry>(&get_broken_files_cache_file(), self.get_delete_outdated_cache(), &files_to_check);
self.get_text_messages_mut().extend_with_another_messages(messages);
loaded_hash_map = loaded_items.unwrap_or_default();
@ -334,7 +300,7 @@ impl BrokenFiles {
prepare_thread_handler_common(progress_sender, 1, 1, non_cached_files_to_check.len(), CheckingMethod::None, self.common_data.tool_type);
debug!("look_for_broken_files - started finding for broken files");
let mut vec_file_entry: Vec<FileEntry> = non_cached_files_to_check
let mut vec_file_entry: Vec<BrokenEntry> = non_cached_files_to_check
.into_par_iter()
.map(|(_, file_entry)| {
atomic_counter.fetch_add(1, Ordering::Relaxed);
@ -354,7 +320,7 @@ impl BrokenFiles {
.while_some()
.filter(Option::is_some)
.map(Option::unwrap)
.collect::<Vec<FileEntry>>();
.collect::<Vec<BrokenEntry>>();
debug!("look_for_broken_files - ended finding for broken files");
send_info_and_wait_for_ending_all_threads(&progress_thread_run, progress_thread_handle);
@ -377,10 +343,10 @@ impl BrokenFiles {
true
}
#[fun_time(message = "save_to_cache", level = "debug")]
fn save_to_cache(&mut self, vec_file_entry: &[FileEntry], loaded_hash_map: BTreeMap<String, FileEntry>) {
fn save_to_cache(&mut self, vec_file_entry: &[BrokenEntry], loaded_hash_map: BTreeMap<String, BrokenEntry>) {
if self.common_data.use_cache {
// Must save all results to file, old loaded from file with all currently counted results
let mut all_results: BTreeMap<String, FileEntry> = Default::default();
let mut all_results: BTreeMap<String, BrokenEntry> = Default::default();
for file_entry in vec_file_entry.iter().cloned() {
all_results.insert(file_entry.path.to_string_lossy().to_string(), file_entry);
@ -415,7 +381,7 @@ impl BrokenFiles {
}
impl BrokenFiles {
pub const fn get_broken_files(&self) -> &Vec<FileEntry> {
pub const fn get_broken_files(&self) -> &Vec<BrokenEntry> {
&self.broken_files
}
@ -469,27 +435,36 @@ impl PrintResults for BrokenFiles {
}
}
fn check_extension_availability(file_name_lowercase: &str) -> TypeOfFile {
if IMAGE_RS_BROKEN_FILES_EXTENSIONS.iter().any(|e| file_name_lowercase.ends_with(e)) {
fn check_extension_availability(
full_name: &Path,
images_extensions: &HashSet<&&'static str>,
zip_extensions: &HashSet<&&'static str>,
audio_extensions: &HashSet<&&'static str>,
pdf_extensions: &HashSet<&&'static str>,
) -> TypeOfFile {
let Some(extension) = full_name.extension() else {
debug_assert!(false, "Missing extension");
return TypeOfFile::Unknown;
};
let Some(extension_str) = extension.to_str() else {
return TypeOfFile::Unknown;
};
if images_extensions.contains(&extension_str) {
TypeOfFile::Image
} else if ZIP_FILES_EXTENSIONS.iter().any(|e| file_name_lowercase.ends_with(e)) {
} else if zip_extensions.contains(&extension_str) {
TypeOfFile::ArchiveZip
} else if AUDIO_FILES_EXTENSIONS.iter().any(|e| file_name_lowercase.ends_with(e)) {
} else if audio_extensions.contains(&extension_str) {
TypeOfFile::Audio
} else if PDF_FILES_EXTENSIONS.iter().any(|e| file_name_lowercase.ends_with(e)) {
} else if pdf_extensions.contains(&extension_str) {
TypeOfFile::PDF
} else {
debug_assert!(false, "File with unknown extension");
TypeOfFile::Unknown
}
}
fn check_if_file_extension_is_allowed(type_of_file: &TypeOfFile, checked_types: &CheckedTypes) -> bool {
((*type_of_file == TypeOfFile::Image) && ((*checked_types & CheckedTypes::IMAGE) == CheckedTypes::IMAGE))
|| ((*type_of_file == TypeOfFile::PDF) && ((*checked_types & CheckedTypes::PDF) == CheckedTypes::PDF))
|| ((*type_of_file == TypeOfFile::ArchiveZip) && ((*checked_types & CheckedTypes::ARCHIVE) == CheckedTypes::ARCHIVE))
|| ((*type_of_file == TypeOfFile::Audio) && ((*checked_types & CheckedTypes::AUDIO) == CheckedTypes::AUDIO))
}
fn unpack_pdf_error(e: PdfError) -> PdfError {
if let Try {
file: _,
@ -505,7 +480,7 @@ fn unpack_pdf_error(e: PdfError) -> PdfError {
}
}
fn validate_pdf_error(file_entry: &mut FileEntry, e: PdfError) -> PdfError {
fn validate_pdf_error(file_entry: &mut BrokenEntry, e: PdfError) -> PdfError {
let mut error_string = e.to_string();
// Workaround for strange error message https://github.com/qarmin/czkawka/issues/898
if error_string.starts_with("Try at") {

View file

@ -123,30 +123,27 @@ pub fn set_number_of_threads(thread_number: usize) {
}
pub const RAW_IMAGE_EXTENSIONS: &[&str] = &[
".mrw", ".arw", ".srf", ".sr2", ".mef", ".orf", ".srw", ".erf", ".kdc", ".kdc", ".dcs", ".rw2", ".raf", ".dcr", ".dng", ".pef", ".crw", ".iiq", ".3fr", ".nrw", ".nef", ".mos",
".cr2", ".ari",
];
pub const IMAGE_RS_EXTENSIONS: &[&str] = &[
".jpg", ".jpeg", ".png", ".bmp", ".tiff", ".tif", ".tga", ".ff", ".jif", ".jfi", ".webp", ".gif", ".ico", ".exr", ".qoi",
"mrw", "arw", "srf", "sr2", "mef", "orf", "srw", "erf", "kdc", "kdc", "dcs", "rw2", "raf", "dcr", "dng", "pef", "crw", "iiq", "3fr", "nrw", "nef", "mos", "cr2", "ari",
];
pub const IMAGE_RS_EXTENSIONS: &[&str] = &["jpg", "jpeg", "png", "bmp", "tiff", "tif", "tga", "ff", "jif", "jfi", "webp", "gif", "ico", "exr", "qoi"];
pub const IMAGE_RS_SIMILAR_IMAGES_EXTENSIONS: &[&str] = &[".jpg", ".jpeg", ".png", ".tiff", ".tif", ".tga", ".ff", ".jif", ".jfi", ".bmp", ".webp", ".exr", ".qoi"];
pub const IMAGE_RS_SIMILAR_IMAGES_EXTENSIONS: &[&str] = &["jpg", "jpeg", "png", "tiff", "tif", "tga", "ff", "jif", "jfi", "bmp", "webp", "exr", "qoi"];
pub const IMAGE_RS_BROKEN_FILES_EXTENSIONS: &[&str] = &[
".jpg", ".jpeg", ".png", ".tiff", ".tif", ".tga", ".ff", ".jif", ".jfi", ".gif", ".bmp", ".ico", ".jfif", ".jpe", ".pnz", ".dib", ".webp", ".exr",
"jpg", "jpeg", "png", "tiff", "tif", "tga", "ff", "jif", "jfi", "gif", "bmp", "ico", "jfif", "jpe", "pnz", "dib", "webp", "exr",
];
pub const HEIC_EXTENSIONS: &[&str] = &[".heif", ".heifs", ".heic", ".heics", ".avci", ".avcs", ".avifs"];
pub const HEIC_EXTENSIONS: &[&str] = &["heif", "heifs", "heic", "heics", "avci", "avcs", "avifs"];
pub const ZIP_FILES_EXTENSIONS: &[&str] = &[".zip", ".jar"];
pub const ZIP_FILES_EXTENSIONS: &[&str] = &["zip", "jar"];
pub const PDF_FILES_EXTENSIONS: &[&str] = &[".pdf"];
pub const PDF_FILES_EXTENSIONS: &[&str] = &["pdf"];
pub const AUDIO_FILES_EXTENSIONS: &[&str] = &[
".mp3", ".flac", ".wav", ".ogg", ".m4a", ".aac", ".aiff", ".pcm", ".aif", ".aiff", ".aifc", ".m3a", ".mp2", ".mp4a", ".mp2a", ".mpga", ".wave", ".weba", ".wma", ".oga",
"mp3", "flac", "wav", "ogg", "m4a", "aac", "aiff", "pcm", "aif", "aiff", "aifc", "m3a", "mp2", "mp4a", "mp2a", "mpga", "wave", "weba", "wma", "oga",
];
pub const VIDEO_FILES_EXTENSIONS: &[&str] = &[
".mp4", ".mpv", ".flv", ".mp4a", ".webm", ".mpg", ".mp2", ".mpeg", ".m4p", ".m4v", ".avi", ".wmv", ".qt", ".mov", ".swf", ".mkv",
"mp4", "mpv", "flv", "mp4a", "webm", "mpg", "mp2", "mpeg", "m4p", "m4v", "avi", "wmv", "qt", "mov", "swf", "mkv",
];
pub const LOOP_DURATION: u32 = 20; //ms

View file

@ -684,18 +684,3 @@ pub fn get_modified_time(metadata: &Metadata, warnings: &mut Vec<String>, curren
}
}
}
pub fn get_lowercase_name(entry_data: &DirEntry, warnings: &mut Vec<String>) -> Option<String> {
let name = match entry_data.file_name().into_string() {
Ok(t) => t,
Err(_inspected) => {
warnings.push(flc!(
"core_file_not_utf8_name",
generate_translation_hashmap(vec![("name", entry_data.path().to_string_lossy().to_string())])
));
return None;
}
}
.to_lowercase();
Some(name)
}

View file

@ -79,14 +79,32 @@ impl Extensions {
}
}
pub fn using_custom_extensions(&self) -> bool {
pub fn set_any_extensions(&self) -> bool {
!self.file_extensions_hashset.is_empty()
}
pub fn extend_allowed_extensions(&mut self, file_extensions: &[&str]) {
fn extend_allowed_extensions(&mut self, file_extensions: &[&str]) {
for extension in file_extensions {
let extension_without_dot = extension.trim_start_matches('.');
self.file_extensions_hashset.insert(extension_without_dot.to_string());
}
}
// E.g. when using similar videos, user can provide extensions like "mp4,flv", but if user provide "mp4,jpg" then
// it will be only "mp4" because "jpg" is not valid extension for videos
fn union_allowed_extensions(&mut self, file_extensions: &[&str]) {
let mut new_extensions = HashSet::new();
for extension in file_extensions {
let extension_without_dot = extension.trim_start_matches('.');
new_extensions.insert(extension_without_dot.to_string());
}
}
pub fn set_and_validate_extensions(&mut self, file_extensions: &[&str]) {
if self.file_extensions_hashset.is_empty() {
self.extend_allowed_extensions(file_extensions);
} else {
self.union_allowed_extensions(file_extensions);
}
}
}

View file

@ -177,13 +177,9 @@ impl SameMusic {
#[fun_time(message = "check_files", level = "debug")]
fn check_files(&mut self, stop_receiver: Option<&Receiver<()>>, progress_sender: Option<&Sender<ProgressData>>) -> bool {
if !self.common_data.allowed_extensions.using_custom_extensions() {
self.common_data.allowed_extensions.extend_allowed_extensions(AUDIO_FILES_EXTENSIONS);
} else {
self.common_data.allowed_extensions.extend_allowed_extensions(AUDIO_FILES_EXTENSIONS);
if !self.common_data.allowed_extensions.using_custom_extensions() {
return true;
}
self.common_data.allowed_extensions.set_and_validate_extensions(AUDIO_FILES_EXTENSIONS);
if !self.common_data.allowed_extensions.set_any_extensions() {
return true;
}
let max_stage = match self.check_type {

View file

@ -104,11 +104,11 @@ pub struct SimilarImages {
bktree: BKTree<ImHash, Hamming>,
similar_vectors: Vec<Vec<ImagesEntry>>,
similar_referenced_vectors: Vec<(ImagesEntry, Vec<ImagesEntry>)>,
image_hashes: HashMap<ImHash, Vec<ImagesEntry>>,
// Hashmap with image hashes and Vector with names of files
image_hashes: HashMap<ImHash, Vec<ImagesEntry>>,
similarity: u32,
images_to_check: BTreeMap<String, ImagesEntry>,
pub hash_size: u8, // TODO to remove pub, this is needeed by new gui, because there is no way to check what exactly was seelected
hash_size: u8,
hash_alg: HashAlg,
image_filter: FilterType,
exclude_images_with_same_size: bool,
@ -160,18 +160,17 @@ impl SimilarImages {
#[fun_time(message = "check_for_similar_images", level = "debug")]
fn check_for_similar_images(&mut self, stop_receiver: Option<&Receiver<()>>, progress_sender: Option<&Sender<ProgressData>>) -> bool {
if !self.common_data.allowed_extensions.using_custom_extensions() {
self.common_data.allowed_extensions.extend_allowed_extensions(IMAGE_RS_SIMILAR_IMAGES_EXTENSIONS);
self.common_data.allowed_extensions.extend_allowed_extensions(RAW_IMAGE_EXTENSIONS);
#[cfg(feature = "heif")]
self.common_data.allowed_extensions.extend_allowed_extensions(HEIC_EXTENSIONS);
if cfg!(feature = "heif") {
self.common_data
.allowed_extensions
.set_and_validate_extensions(&[IMAGE_RS_SIMILAR_IMAGES_EXTENSIONS, RAW_IMAGE_EXTENSIONS, HEIC_EXTENSIONS].concat());
} else {
self.common_data
.allowed_extensions
.extend_allowed_extensions(&[IMAGE_RS_SIMILAR_IMAGES_EXTENSIONS, RAW_IMAGE_EXTENSIONS, HEIC_EXTENSIONS].concat());
if !self.common_data.allowed_extensions.using_custom_extensions() {
return true;
}
.set_and_validate_extensions(&[IMAGE_RS_SIMILAR_IMAGES_EXTENSIONS, RAW_IMAGE_EXTENSIONS].concat());
}
if !self.common_data.allowed_extensions.set_any_extensions() {
return true;
}
let result = DirTraversalBuilder::new()
@ -244,7 +243,7 @@ impl SimilarImages {
// - Join already read hashes with hashes which were read from file
// - Join all hashes and save it to file
// #[fun_time(message = "hash_images", level = "debug")]
#[fun_time(message = "hash_images", level = "debug")]
fn hash_images(&mut self, stop_receiver: Option<&Receiver<()>>, progress_sender: Option<&Sender<ProgressData>>) -> bool {
let (loaded_hash_map, records_already_cached, non_cached_files_to_check) = self.hash_images_load_cache();

View file

@ -1,5 +1,4 @@
use std::collections::{BTreeMap, BTreeSet, HashMap};
use std::fs::DirEntry;
use std::io::Write;
use std::mem;
use std::path::{Path, PathBuf};
@ -9,16 +8,15 @@ use crossbeam_channel::{Receiver, Sender};
use ffmpeg_cmdline_utils::FfmpegErrorKind::FfmpegNotFound;
use fun_time::fun_time;
use humansize::{format_size, BINARY};
use log::debug;
use rayon::prelude::*;
use serde::{Deserialize, Serialize};
use vid_dup_finder_lib::HashCreationErrorKind::DetermineVideo;
use vid_dup_finder_lib::{NormalizedTolerance, VideoHash};
use crate::common::{
check_folder_children, check_if_stop_received, delete_files_custom, prepare_thread_handler_common, send_info_and_wait_for_ending_all_threads, VIDEO_FILES_EXTENSIONS,
};
use crate::common::{check_if_stop_received, delete_files_custom, prepare_thread_handler_common, send_info_and_wait_for_ending_all_threads, VIDEO_FILES_EXTENSIONS};
use crate::common_cache::{get_similar_videos_cache_file, load_cache_from_file_generalized_by_path, save_cache_to_file_generalized};
use crate::common_dir_traversal::{common_read_dir, get_modified_time, CheckingMethod, ProgressData, ToolType};
use crate::common_dir_traversal::{CheckingMethod, DirTraversalBuilder, DirTraversalResult, FileEntry, ProgressData, ToolType};
use crate::common_tool::{CommonData, CommonToolData, DeleteMethod};
use crate::common_traits::{DebugPrint, PrintResults, ResultEntry};
use crate::flc;
@ -27,7 +25,7 @@ use crate::localizer_core::generate_translation_hashmap;
pub const MAX_TOLERANCE: i32 = 20;
#[derive(Clone, Debug, Serialize, Deserialize)]
pub struct FileEntry {
pub struct VideosEntry {
pub path: PathBuf,
pub size: u64,
pub modified_date: u64,
@ -35,7 +33,7 @@ pub struct FileEntry {
pub error: String,
}
impl ResultEntry for FileEntry {
impl ResultEntry for VideosEntry {
fn get_path(&self) -> &Path {
&self.path
}
@ -47,6 +45,19 @@ impl ResultEntry for FileEntry {
}
}
impl FileEntry {
fn into_videos_entry(self) -> VideosEntry {
VideosEntry {
size: self.size,
path: self.path,
modified_date: self.modified_date,
vhash: Default::default(),
error: String::new(),
}
}
}
struct Hamming;
impl bk_tree::Metric<Vec<u8>> for Hamming {
@ -61,13 +72,15 @@ impl bk_tree::Metric<Vec<u8>> for Hamming {
}
}
const MAX_VIDEOS_STAGE: u8 = 1;
pub struct SimilarVideos {
common_data: CommonToolData,
information: Info,
similar_vectors: Vec<Vec<FileEntry>>,
similar_referenced_vectors: Vec<(FileEntry, Vec<FileEntry>)>,
videos_hashes: BTreeMap<Vec<u8>, Vec<FileEntry>>,
videos_to_check: BTreeMap<String, FileEntry>,
similar_vectors: Vec<Vec<VideosEntry>>,
similar_referenced_vectors: Vec<(VideosEntry, Vec<VideosEntry>)>,
videos_hashes: BTreeMap<Vec<u8>, Vec<VideosEntry>>,
videos_to_check: BTreeMap<String, VideosEntry>,
tolerance: i32,
exclude_videos_with_same_size: bool,
}
@ -128,122 +141,47 @@ impl SimilarVideos {
self.debug_print();
}
#[fun_time(message = "check_for_similar_videos", level = "debug")]
// #[fun_time(message = "check_for_similar_videos", level = "debug")]
fn check_for_similar_videos(&mut self, stop_receiver: Option<&Receiver<()>>, progress_sender: Option<&Sender<ProgressData>>) -> bool {
let mut folders_to_check: Vec<PathBuf> = self.common_data.directories.included_directories.clone();
if !self.common_data.allowed_extensions.using_custom_extensions() {
self.common_data.allowed_extensions.extend_allowed_extensions(VIDEO_FILES_EXTENSIONS);
} else {
self.common_data.allowed_extensions.extend_allowed_extensions(VIDEO_FILES_EXTENSIONS);
if !self.common_data.allowed_extensions.using_custom_extensions() {
return true;
}
self.common_data.allowed_extensions.set_and_validate_extensions(VIDEO_FILES_EXTENSIONS);
if !self.common_data.allowed_extensions.set_any_extensions() {
return true;
}
let (progress_thread_handle, progress_thread_run, atomic_counter, _check_was_stopped) =
prepare_thread_handler_common(progress_sender, 0, 1, 0, CheckingMethod::None, self.common_data.tool_type);
let result = DirTraversalBuilder::new()
.group_by(|_fe| ())
.stop_receiver(stop_receiver)
.progress_sender(progress_sender)
.common_data(&self.common_data)
.max_stage(MAX_VIDEOS_STAGE)
.build()
.run();
while !folders_to_check.is_empty() {
if check_if_stop_received(stop_receiver) {
send_info_and_wait_for_ending_all_threads(&progress_thread_run, progress_thread_handle);
return false;
}
let segments: Vec<_> = folders_to_check
.into_par_iter()
.map(|current_folder| {
let mut dir_result = vec![];
let mut warnings = vec![];
let mut fe_result = vec![];
let Some(read_dir) = common_read_dir(&current_folder, &mut warnings) else {
return (dir_result, warnings, fe_result);
};
// Check every sub folder/file/link etc.
for entry in read_dir {
let Ok(entry_data) = entry else {
continue;
};
let Ok(file_type) = entry_data.file_type() else {
continue;
};
if file_type.is_dir() {
check_folder_children(
&mut dir_result,
&mut warnings,
&entry_data,
self.common_data.recursive_search,
&self.common_data.directories,
&self.common_data.excluded_items,
);
} else if file_type.is_file() {
atomic_counter.fetch_add(1, Ordering::Relaxed);
self.add_video_file_entry(&entry_data, &mut fe_result, &mut warnings);
}
}
(dir_result, warnings, fe_result)
})
.collect();
let required_size = segments.iter().map(|(segment, _, _)| segment.len()).sum::<usize>();
folders_to_check = Vec::with_capacity(required_size);
// Process collected data
for (segment, warnings, fe_result) in segments {
folders_to_check.extend(segment);
match result {
DirTraversalResult::SuccessFiles { grouped_file_entries, warnings } => {
self.videos_to_check = grouped_file_entries
.into_values()
.flatten()
.map(|fe| (fe.path.to_string_lossy().to_string(), fe.into_videos_entry()))
.collect();
self.common_data.text_messages.warnings.extend(warnings);
for (name, fe) in fe_result {
self.videos_to_check.insert(name, fe);
}
debug!("check_files - Found {} video files.", self.videos_to_check.len());
true
}
}
send_info_and_wait_for_ending_all_threads(&progress_thread_run, progress_thread_handle);
true
}
fn add_video_file_entry(&self, entry_data: &DirEntry, fe_result: &mut Vec<(String, FileEntry)>, warnings: &mut Vec<String>) {
if !self.common_data.allowed_extensions.check_if_entry_ends_with_extension(entry_data) {
return;
}
let current_file_name = entry_data.path();
if self.common_data.excluded_items.is_excluded(&current_file_name) {
return;
}
let current_file_name_str = current_file_name.to_string_lossy().to_string();
let Ok(metadata) = entry_data.metadata() else {
return;
};
// Checking files
if (self.common_data.minimal_file_size..=self.common_data.maximal_file_size).contains(&metadata.len()) {
let fe: FileEntry = FileEntry {
size: metadata.len(),
modified_date: get_modified_time(&metadata, warnings, &current_file_name, false),
path: current_file_name,
vhash: Default::default(),
error: String::new(),
};
fe_result.push((current_file_name_str, fe));
DirTraversalResult::Stopped => false,
}
}
#[fun_time(message = "load_cache_at_start", level = "debug")]
fn load_cache_at_start(&mut self) -> (BTreeMap<String, FileEntry>, BTreeMap<String, FileEntry>, BTreeMap<String, FileEntry>) {
fn load_cache_at_start(&mut self) -> (BTreeMap<String, VideosEntry>, BTreeMap<String, VideosEntry>, BTreeMap<String, VideosEntry>) {
let loaded_hash_map;
let mut records_already_cached: BTreeMap<String, FileEntry> = Default::default();
let mut non_cached_files_to_check: BTreeMap<String, FileEntry> = Default::default();
let mut records_already_cached: BTreeMap<String, VideosEntry> = Default::default();
let mut non_cached_files_to_check: BTreeMap<String, VideosEntry> = Default::default();
if self.common_data.use_cache {
let (messages, loaded_items) =
load_cache_from_file_generalized_by_path::<FileEntry>(&get_similar_videos_cache_file(), self.get_delete_outdated_cache(), &self.videos_to_check);
load_cache_from_file_generalized_by_path::<VideosEntry>(&get_similar_videos_cache_file(), self.get_delete_outdated_cache(), &self.videos_to_check);
self.get_text_messages_mut().extend_with_another_messages(messages);
loaded_hash_map = loaded_items.unwrap_or_default();
@ -268,7 +206,7 @@ impl SimilarVideos {
let (progress_thread_handle, progress_thread_run, atomic_counter, check_was_stopped) =
prepare_thread_handler_common(progress_sender, 1, 1, non_cached_files_to_check.len(), CheckingMethod::None, self.common_data.tool_type);
let mut vec_file_entry: Vec<FileEntry> = non_cached_files_to_check
let mut vec_file_entry: Vec<VideosEntry> = non_cached_files_to_check
.par_iter()
.map(|file_entry| {
atomic_counter.fetch_add(1, Ordering::Relaxed);
@ -293,14 +231,14 @@ impl SimilarVideos {
Some(file_entry)
})
.while_some()
.collect::<Vec<FileEntry>>();
.collect::<Vec<VideosEntry>>();
send_info_and_wait_for_ending_all_threads(&progress_thread_run, progress_thread_handle);
// Just connect loaded results with already calculated hashes
vec_file_entry.extend(records_already_cached.into_values());
let mut hashmap_with_file_entries: HashMap<String, FileEntry> = Default::default();
let mut hashmap_with_file_entries: HashMap<String, VideosEntry> = Default::default();
let mut vector_of_hashes: Vec<VideoHash> = Vec::new();
for file_entry in &vec_file_entry {
// 0 means that images was not hashed correctly, e.g. could be improperly
@ -342,10 +280,10 @@ impl SimilarVideos {
}
#[fun_time(message = "save_cache", level = "debug")]
fn save_cache(&mut self, vec_file_entry: Vec<FileEntry>, loaded_hash_map: BTreeMap<String, FileEntry>) {
fn save_cache(&mut self, vec_file_entry: Vec<VideosEntry>, loaded_hash_map: BTreeMap<String, VideosEntry>) {
if self.common_data.use_cache {
// Must save all results to file, old loaded from file with all currently counted results
let mut all_results: BTreeMap<String, FileEntry> = loaded_hash_map;
let mut all_results: BTreeMap<String, VideosEntry> = loaded_hash_map;
for file_entry in vec_file_entry {
all_results.insert(file_entry.path.to_string_lossy().to_string(), file_entry);
}
@ -356,11 +294,11 @@ impl SimilarVideos {
}
#[fun_time(message = "match_groups_of_videos", level = "debug")]
fn match_groups_of_videos(&mut self, vector_of_hashes: Vec<VideoHash>, hashmap_with_file_entries: &HashMap<String, FileEntry>) {
fn match_groups_of_videos(&mut self, vector_of_hashes: Vec<VideoHash>, hashmap_with_file_entries: &HashMap<String, VideosEntry>) {
let match_group = vid_dup_finder_lib::search(vector_of_hashes, NormalizedTolerance::new(self.tolerance as f64 / 100.0f64));
let mut collected_similar_videos: Vec<Vec<FileEntry>> = Default::default();
let mut collected_similar_videos: Vec<Vec<VideosEntry>> = Default::default();
for i in match_group {
let mut temp_vector: Vec<FileEntry> = Vec::new();
let mut temp_vector: Vec<VideosEntry> = Vec::new();
let mut bt_size: BTreeSet<u64> = Default::default();
for j in i.duplicates() {
let file_entry = hashmap_with_file_entries.get(&j.to_string_lossy().to_string()).unwrap();
@ -397,7 +335,7 @@ impl SimilarVideos {
Some((files_from_referenced_folders.pop().unwrap(), normal_files))
}
})
.collect::<Vec<(FileEntry, Vec<FileEntry>)>>();
.collect::<Vec<(VideosEntry, Vec<VideosEntry>)>>();
}
}
@ -493,7 +431,7 @@ impl SimilarVideos {
self.tolerance = tolerance;
}
pub const fn get_similar_videos(&self) -> &Vec<Vec<FileEntry>> {
pub const fn get_similar_videos(&self) -> &Vec<Vec<VideosEntry>> {
&self.similar_vectors
}
@ -501,7 +439,7 @@ impl SimilarVideos {
&self.information
}
pub fn get_similar_videos_referenced(&self) -> &Vec<(FileEntry, Vec<FileEntry>)> {
pub fn get_similar_videos_referenced(&self) -> &Vec<(VideosEntry, Vec<VideosEntry>)> {
&self.similar_referenced_vectors
}

View file

@ -12,7 +12,7 @@ use rayon::prelude::*;
use serde::Serialize;
use crate::common::{check_folder_children, check_if_stop_received, prepare_thread_handler_common, send_info_and_wait_for_ending_all_threads};
use crate::common_dir_traversal::{common_read_dir, get_lowercase_name, get_modified_time, CheckingMethod, ProgressData, ToolType};
use crate::common_dir_traversal::{common_read_dir, get_modified_time, CheckingMethod, ProgressData, ToolType};
use crate::common_tool::{CommonData, CommonToolData, DeleteMethod};
use crate::common_traits::*;
@ -142,16 +142,18 @@ impl Temporary {
pub fn get_file_entry(&self, atomic_counter: &Arc<AtomicUsize>, entry_data: &DirEntry, warnings: &mut Vec<String>) -> Option<FileEntry> {
atomic_counter.fetch_add(1, Ordering::Relaxed);
let file_name_lowercase = get_lowercase_name(entry_data, warnings)?;
if !TEMP_EXTENSIONS.iter().any(|f| file_name_lowercase.ends_with(f)) {
return None;
}
let current_file_name = entry_data.path();
if self.common_data.excluded_items.is_excluded(&current_file_name) {
return None;
}
let file_name = entry_data.file_name();
let file_name_ascii_lowercase = file_name.to_ascii_lowercase();
let file_name_lowercase = file_name_ascii_lowercase.to_string_lossy();
if !TEMP_EXTENSIONS.iter().any(|f| file_name_lowercase.ends_with(f)) {
return None;
}
let Ok(metadata) = entry_data.metadata() else {
return None;
};

View file

@ -214,7 +214,7 @@ pub fn connect_settings(gui_data: &GuiData) {
dialog.connect_response(move |dialog, response_type| {
if response_type == ResponseType::Ok {
let (mut messages, loaded_items) =
load_cache_from_file_generalized_by_path::<czkawka_core::similar_videos::FileEntry>(&get_similar_videos_cache_file(), true, &Default::default());
load_cache_from_file_generalized_by_path::<czkawka_core::similar_videos::VideosEntry>(&get_similar_videos_cache_file(), true, &Default::default());
if let Some(cache_entries) = loaded_items {
let save_messages = save_cache_to_file_generalized(&get_similar_videos_cache_file(), &cache_entries, false, 0);

View file

@ -78,7 +78,7 @@ fn scan_similar_images(a: Weak<MainWindow>, progress_sender: Sender<ProgressData
vec_fe.sort_unstable_by_key(|e| e.similarity);
}
let hash_size = finder.hash_size;
let hash_size = custom_settings.similar_images_sub_hash_size;
a.upgrade_in_event_loop(move |app| {
write_similar_images_results(&app, vector, messages, hash_size);