Unifying finding items
This commit is contained in:
parent
da1797cb55
commit
f528e77aef
10 changed files with 216 additions and 306 deletions
|
@ -1,10 +1,9 @@
|
||||||
use std::collections::BTreeMap;
|
use std::collections::{BTreeMap, HashSet};
|
||||||
use std::fs::{DirEntry, File};
|
use std::fs::File;
|
||||||
use std::io::prelude::*;
|
use std::io::prelude::*;
|
||||||
|
|
||||||
use std::path::{Path, PathBuf};
|
use std::path::{Path, PathBuf};
|
||||||
use std::sync::atomic::{AtomicUsize, Ordering};
|
use std::sync::atomic::Ordering;
|
||||||
use std::sync::Arc;
|
|
||||||
use std::{fs, mem, panic};
|
use std::{fs, mem, panic};
|
||||||
|
|
||||||
use crossbeam_channel::{Receiver, Sender};
|
use crossbeam_channel::{Receiver, Sender};
|
||||||
|
@ -18,23 +17,23 @@ use rayon::prelude::*;
|
||||||
use serde::{Deserialize, Serialize};
|
use serde::{Deserialize, Serialize};
|
||||||
|
|
||||||
use crate::common::{
|
use crate::common::{
|
||||||
check_folder_children, check_if_stop_received, create_crash_message, prepare_thread_handler_common, send_info_and_wait_for_ending_all_threads, AUDIO_FILES_EXTENSIONS,
|
check_if_stop_received, create_crash_message, prepare_thread_handler_common, send_info_and_wait_for_ending_all_threads, AUDIO_FILES_EXTENSIONS,
|
||||||
IMAGE_RS_BROKEN_FILES_EXTENSIONS, PDF_FILES_EXTENSIONS, ZIP_FILES_EXTENSIONS,
|
IMAGE_RS_BROKEN_FILES_EXTENSIONS, PDF_FILES_EXTENSIONS, ZIP_FILES_EXTENSIONS,
|
||||||
};
|
};
|
||||||
use crate::common_cache::{get_broken_files_cache_file, load_cache_from_file_generalized_by_path, save_cache_to_file_generalized};
|
use crate::common_cache::{get_broken_files_cache_file, load_cache_from_file_generalized_by_path, save_cache_to_file_generalized};
|
||||||
use crate::common_dir_traversal::{common_read_dir, get_modified_time, CheckingMethod, ProgressData, ToolType};
|
use crate::common_dir_traversal::{CheckingMethod, DirTraversalBuilder, DirTraversalResult, FileEntry, ProgressData, ToolType};
|
||||||
use crate::common_tool::{CommonData, CommonToolData, DeleteMethod};
|
use crate::common_tool::{CommonData, CommonToolData, DeleteMethod};
|
||||||
use crate::common_traits::*;
|
use crate::common_traits::*;
|
||||||
|
|
||||||
#[derive(Clone, Serialize, Deserialize, Debug)]
|
#[derive(Clone, Serialize, Deserialize, Debug)]
|
||||||
pub struct FileEntry {
|
pub struct BrokenEntry {
|
||||||
pub path: PathBuf,
|
pub path: PathBuf,
|
||||||
pub modified_date: u64,
|
pub modified_date: u64,
|
||||||
pub size: u64,
|
pub size: u64,
|
||||||
pub type_of_file: TypeOfFile,
|
pub type_of_file: TypeOfFile,
|
||||||
pub error_string: String,
|
pub error_string: String,
|
||||||
}
|
}
|
||||||
impl ResultEntry for FileEntry {
|
impl ResultEntry for BrokenEntry {
|
||||||
fn get_path(&self) -> &Path {
|
fn get_path(&self) -> &Path {
|
||||||
&self.path
|
&self.path
|
||||||
}
|
}
|
||||||
|
@ -46,6 +45,19 @@ impl ResultEntry for FileEntry {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
impl FileEntry {
|
||||||
|
fn into_broken_entry(self) -> BrokenEntry {
|
||||||
|
BrokenEntry {
|
||||||
|
size: self.size,
|
||||||
|
path: self.path,
|
||||||
|
modified_date: self.modified_date,
|
||||||
|
|
||||||
|
type_of_file: TypeOfFile::Unknown,
|
||||||
|
error_string: String::new(),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
#[derive(Copy, Clone, PartialEq, Eq, Serialize, Deserialize, Debug)]
|
#[derive(Copy, Clone, PartialEq, Eq, Serialize, Deserialize, Debug)]
|
||||||
pub enum TypeOfFile {
|
pub enum TypeOfFile {
|
||||||
Unknown = -1,
|
Unknown = -1,
|
||||||
|
@ -55,6 +67,8 @@ pub enum TypeOfFile {
|
||||||
PDF,
|
PDF,
|
||||||
}
|
}
|
||||||
|
|
||||||
|
const MAX_BROKEN_FILES_STAGE: u8 = 1;
|
||||||
|
|
||||||
bitflags! {
|
bitflags! {
|
||||||
#[derive(PartialEq, Copy, Clone)]
|
#[derive(PartialEq, Copy, Clone)]
|
||||||
pub struct CheckedTypes : u32 {
|
pub struct CheckedTypes : u32 {
|
||||||
|
@ -75,8 +89,8 @@ pub struct Info {
|
||||||
pub struct BrokenFiles {
|
pub struct BrokenFiles {
|
||||||
common_data: CommonToolData,
|
common_data: CommonToolData,
|
||||||
information: Info,
|
information: Info,
|
||||||
files_to_check: BTreeMap<String, FileEntry>,
|
files_to_check: BTreeMap<String, BrokenEntry>,
|
||||||
broken_files: Vec<FileEntry>,
|
broken_files: Vec<BrokenEntry>,
|
||||||
checked_types: CheckedTypes,
|
checked_types: CheckedTypes,
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -108,108 +122,59 @@ impl BrokenFiles {
|
||||||
|
|
||||||
#[fun_time(message = "check_files", level = "debug")]
|
#[fun_time(message = "check_files", level = "debug")]
|
||||||
fn check_files(&mut self, stop_receiver: Option<&Receiver<()>>, progress_sender: Option<&Sender<ProgressData>>) -> bool {
|
fn check_files(&mut self, stop_receiver: Option<&Receiver<()>>, progress_sender: Option<&Sender<ProgressData>>) -> bool {
|
||||||
let mut folders_to_check: Vec<PathBuf> = self.common_data.directories.included_directories.clone();
|
let zip_extensions = ZIP_FILES_EXTENSIONS.iter().collect::<HashSet<_>>();
|
||||||
|
let audio_extensions = AUDIO_FILES_EXTENSIONS.iter().collect::<HashSet<_>>();
|
||||||
|
let pdf_extensions = PDF_FILES_EXTENSIONS.iter().collect::<HashSet<_>>();
|
||||||
|
let images_extensions = IMAGE_RS_BROKEN_FILES_EXTENSIONS.iter().collect::<HashSet<_>>();
|
||||||
|
|
||||||
let (progress_thread_handle, progress_thread_run, atomic_counter, _check_was_stopped) =
|
let mut extensions = Vec::new();
|
||||||
prepare_thread_handler_common(progress_sender, 0, 1, 0, CheckingMethod::None, self.common_data.tool_type);
|
let vec_extensions = [
|
||||||
|
(CheckedTypes::PDF, PDF_FILES_EXTENSIONS),
|
||||||
debug!("check_files - starting to collect files");
|
(CheckedTypes::AUDIO, AUDIO_FILES_EXTENSIONS),
|
||||||
while !folders_to_check.is_empty() {
|
(CheckedTypes::ARCHIVE, ZIP_FILES_EXTENSIONS),
|
||||||
if check_if_stop_received(stop_receiver) {
|
(CheckedTypes::IMAGE, IMAGE_RS_BROKEN_FILES_EXTENSIONS),
|
||||||
send_info_and_wait_for_ending_all_threads(&progress_thread_run, progress_thread_handle);
|
];
|
||||||
return false;
|
for (checked_type, extensions_to_add) in &vec_extensions {
|
||||||
}
|
if self.checked_types.contains(*checked_type) {
|
||||||
|
extensions.extend_from_slice(extensions_to_add);
|
||||||
let segments: Vec<_> = folders_to_check
|
|
||||||
.into_par_iter()
|
|
||||||
.map(|current_folder| {
|
|
||||||
let mut dir_result = vec![];
|
|
||||||
let mut warnings = vec![];
|
|
||||||
let mut fe_result = vec![];
|
|
||||||
|
|
||||||
let Some(read_dir) = common_read_dir(¤t_folder, &mut warnings) else {
|
|
||||||
return (dir_result, warnings, fe_result);
|
|
||||||
};
|
|
||||||
|
|
||||||
// Check every sub folder/file/link etc.
|
|
||||||
for entry in read_dir {
|
|
||||||
let Ok(entry_data) = entry else {
|
|
||||||
continue;
|
|
||||||
};
|
|
||||||
let Ok(file_type) = entry_data.file_type() else {
|
|
||||||
continue;
|
|
||||||
};
|
|
||||||
|
|
||||||
if file_type.is_dir() {
|
|
||||||
check_folder_children(
|
|
||||||
&mut dir_result,
|
|
||||||
&mut warnings,
|
|
||||||
&entry_data,
|
|
||||||
self.common_data.recursive_search,
|
|
||||||
&self.common_data.directories,
|
|
||||||
&self.common_data.excluded_items,
|
|
||||||
);
|
|
||||||
} else if file_type.is_file() {
|
|
||||||
if let Some(file_entry) = self.get_file_entry(&atomic_counter, &entry_data, &mut warnings) {
|
|
||||||
fe_result.push((file_entry.path.to_string_lossy().to_string(), file_entry));
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
self.common_data.allowed_extensions.set_and_validate_extensions(&extensions);
|
||||||
|
if !self.common_data.allowed_extensions.set_any_extensions() {
|
||||||
|
return true;
|
||||||
}
|
}
|
||||||
(dir_result, warnings, fe_result)
|
|
||||||
|
let result = DirTraversalBuilder::new()
|
||||||
|
.group_by(|_fe| ())
|
||||||
|
.stop_receiver(stop_receiver)
|
||||||
|
.progress_sender(progress_sender)
|
||||||
|
.common_data(&self.common_data)
|
||||||
|
.max_stage(MAX_BROKEN_FILES_STAGE)
|
||||||
|
.build()
|
||||||
|
.run();
|
||||||
|
|
||||||
|
match result {
|
||||||
|
DirTraversalResult::SuccessFiles { grouped_file_entries, warnings } => {
|
||||||
|
self.broken_files = grouped_file_entries
|
||||||
|
.into_values()
|
||||||
|
.flatten()
|
||||||
|
.map(|fe| {
|
||||||
|
let mut broken_entry = fe.into_broken_entry();
|
||||||
|
broken_entry.type_of_file = check_extension_availability(broken_entry.get_path(), &images_extensions, &zip_extensions, &audio_extensions, &pdf_extensions);
|
||||||
|
broken_entry
|
||||||
})
|
})
|
||||||
.collect();
|
.collect();
|
||||||
debug!("check_files - collected files");
|
|
||||||
|
|
||||||
let required_size = segments.iter().map(|(segment, _, _)| segment.len()).sum::<usize>();
|
|
||||||
folders_to_check = Vec::with_capacity(required_size);
|
|
||||||
|
|
||||||
// Process collected data
|
|
||||||
for (segment, warnings, fe_result) in segments {
|
|
||||||
folders_to_check.extend(segment);
|
|
||||||
self.common_data.text_messages.warnings.extend(warnings);
|
self.common_data.text_messages.warnings.extend(warnings);
|
||||||
for (name, fe) in fe_result {
|
debug!("check_files - Found {} image files.", self.broken_files.len());
|
||||||
self.files_to_check.insert(name, fe);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
send_info_and_wait_for_ending_all_threads(&progress_thread_run, progress_thread_handle);
|
|
||||||
true
|
true
|
||||||
}
|
}
|
||||||
|
|
||||||
fn get_file_entry(&self, atomic_counter: &Arc<AtomicUsize>, entry_data: &DirEntry, warnings: &mut Vec<String>) -> Option<FileEntry> {
|
DirTraversalResult::Stopped => false,
|
||||||
atomic_counter.fetch_add(1, Ordering::Relaxed);
|
}
|
||||||
if !self.common_data.allowed_extensions.check_if_entry_ends_with_extension(entry_data) {
|
|
||||||
return None;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
let file_name_lowercase = entry_data.file_name().to_string_lossy().to_lowercase();
|
fn check_broken_image(&self, mut file_entry: BrokenEntry) -> Option<BrokenEntry> {
|
||||||
let type_of_file = check_extension_availability(&file_name_lowercase);
|
|
||||||
|
|
||||||
if !check_if_file_extension_is_allowed(&type_of_file, &self.checked_types) {
|
|
||||||
return None;
|
|
||||||
}
|
|
||||||
|
|
||||||
let current_file_name = entry_data.path();
|
|
||||||
if self.common_data.excluded_items.is_excluded(¤t_file_name) {
|
|
||||||
return None;
|
|
||||||
}
|
|
||||||
|
|
||||||
let Ok(metadata) = entry_data.metadata() else {
|
|
||||||
return None;
|
|
||||||
};
|
|
||||||
|
|
||||||
let fe: FileEntry = FileEntry {
|
|
||||||
modified_date: get_modified_time(&metadata, warnings, ¤t_file_name, false),
|
|
||||||
path: current_file_name,
|
|
||||||
size: metadata.len(),
|
|
||||||
type_of_file,
|
|
||||||
error_string: String::new(),
|
|
||||||
};
|
|
||||||
Some(fe)
|
|
||||||
}
|
|
||||||
|
|
||||||
fn check_broken_image(&self, mut file_entry: FileEntry) -> Option<FileEntry> {
|
|
||||||
let mut file_entry_clone = file_entry.clone();
|
let mut file_entry_clone = file_entry.clone();
|
||||||
|
|
||||||
panic::catch_unwind(|| {
|
panic::catch_unwind(|| {
|
||||||
|
@ -230,7 +195,7 @@ impl BrokenFiles {
|
||||||
Some(file_entry_clone)
|
Some(file_entry_clone)
|
||||||
})
|
})
|
||||||
}
|
}
|
||||||
fn check_broken_zip(&self, mut file_entry: FileEntry) -> Option<FileEntry> {
|
fn check_broken_zip(&self, mut file_entry: BrokenEntry) -> Option<BrokenEntry> {
|
||||||
match File::open(&file_entry.path) {
|
match File::open(&file_entry.path) {
|
||||||
Ok(file) => {
|
Ok(file) => {
|
||||||
if let Err(e) = zip::ZipArchive::new(file) {
|
if let Err(e) = zip::ZipArchive::new(file) {
|
||||||
|
@ -241,7 +206,7 @@ impl BrokenFiles {
|
||||||
Err(_inspected) => None,
|
Err(_inspected) => None,
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
fn check_broken_audio(&self, mut file_entry: FileEntry) -> Option<FileEntry> {
|
fn check_broken_audio(&self, mut file_entry: BrokenEntry) -> Option<BrokenEntry> {
|
||||||
match File::open(&file_entry.path) {
|
match File::open(&file_entry.path) {
|
||||||
Ok(file) => {
|
Ok(file) => {
|
||||||
let mut file_entry_clone = file_entry.clone();
|
let mut file_entry_clone = file_entry.clone();
|
||||||
|
@ -262,7 +227,7 @@ impl BrokenFiles {
|
||||||
Err(_inspected) => None,
|
Err(_inspected) => None,
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
fn check_broken_pdf(&self, mut file_entry: FileEntry) -> Option<FileEntry> {
|
fn check_broken_pdf(&self, mut file_entry: BrokenEntry) -> Option<BrokenEntry> {
|
||||||
let parser_options = ParseOptions::tolerant(); // Only show as broken files with really big bugs
|
let parser_options = ParseOptions::tolerant(); // Only show as broken files with really big bugs
|
||||||
|
|
||||||
let mut file_entry_clone = file_entry.clone();
|
let mut file_entry_clone = file_entry.clone();
|
||||||
|
@ -300,15 +265,16 @@ impl BrokenFiles {
|
||||||
}
|
}
|
||||||
|
|
||||||
#[fun_time(message = "load_cache", level = "debug")]
|
#[fun_time(message = "load_cache", level = "debug")]
|
||||||
fn load_cache(&mut self) -> (BTreeMap<String, FileEntry>, BTreeMap<String, FileEntry>, BTreeMap<String, FileEntry>) {
|
fn load_cache(&mut self) -> (BTreeMap<String, BrokenEntry>, BTreeMap<String, BrokenEntry>, BTreeMap<String, BrokenEntry>) {
|
||||||
let loaded_hash_map;
|
let loaded_hash_map;
|
||||||
|
|
||||||
let mut records_already_cached: BTreeMap<String, FileEntry> = Default::default();
|
let mut records_already_cached: BTreeMap<String, BrokenEntry> = Default::default();
|
||||||
let mut non_cached_files_to_check: BTreeMap<String, FileEntry> = Default::default();
|
let mut non_cached_files_to_check: BTreeMap<String, BrokenEntry> = Default::default();
|
||||||
let files_to_check = mem::take(&mut self.files_to_check);
|
let files_to_check = mem::take(&mut self.files_to_check);
|
||||||
|
|
||||||
if self.common_data.use_cache {
|
if self.common_data.use_cache {
|
||||||
let (messages, loaded_items) = load_cache_from_file_generalized_by_path::<FileEntry>(&get_broken_files_cache_file(), self.get_delete_outdated_cache(), &files_to_check);
|
let (messages, loaded_items) =
|
||||||
|
load_cache_from_file_generalized_by_path::<BrokenEntry>(&get_broken_files_cache_file(), self.get_delete_outdated_cache(), &files_to_check);
|
||||||
self.get_text_messages_mut().extend_with_another_messages(messages);
|
self.get_text_messages_mut().extend_with_another_messages(messages);
|
||||||
loaded_hash_map = loaded_items.unwrap_or_default();
|
loaded_hash_map = loaded_items.unwrap_or_default();
|
||||||
|
|
||||||
|
@ -334,7 +300,7 @@ impl BrokenFiles {
|
||||||
prepare_thread_handler_common(progress_sender, 1, 1, non_cached_files_to_check.len(), CheckingMethod::None, self.common_data.tool_type);
|
prepare_thread_handler_common(progress_sender, 1, 1, non_cached_files_to_check.len(), CheckingMethod::None, self.common_data.tool_type);
|
||||||
|
|
||||||
debug!("look_for_broken_files - started finding for broken files");
|
debug!("look_for_broken_files - started finding for broken files");
|
||||||
let mut vec_file_entry: Vec<FileEntry> = non_cached_files_to_check
|
let mut vec_file_entry: Vec<BrokenEntry> = non_cached_files_to_check
|
||||||
.into_par_iter()
|
.into_par_iter()
|
||||||
.map(|(_, file_entry)| {
|
.map(|(_, file_entry)| {
|
||||||
atomic_counter.fetch_add(1, Ordering::Relaxed);
|
atomic_counter.fetch_add(1, Ordering::Relaxed);
|
||||||
|
@ -354,7 +320,7 @@ impl BrokenFiles {
|
||||||
.while_some()
|
.while_some()
|
||||||
.filter(Option::is_some)
|
.filter(Option::is_some)
|
||||||
.map(Option::unwrap)
|
.map(Option::unwrap)
|
||||||
.collect::<Vec<FileEntry>>();
|
.collect::<Vec<BrokenEntry>>();
|
||||||
debug!("look_for_broken_files - ended finding for broken files");
|
debug!("look_for_broken_files - ended finding for broken files");
|
||||||
|
|
||||||
send_info_and_wait_for_ending_all_threads(&progress_thread_run, progress_thread_handle);
|
send_info_and_wait_for_ending_all_threads(&progress_thread_run, progress_thread_handle);
|
||||||
|
@ -377,10 +343,10 @@ impl BrokenFiles {
|
||||||
true
|
true
|
||||||
}
|
}
|
||||||
#[fun_time(message = "save_to_cache", level = "debug")]
|
#[fun_time(message = "save_to_cache", level = "debug")]
|
||||||
fn save_to_cache(&mut self, vec_file_entry: &[FileEntry], loaded_hash_map: BTreeMap<String, FileEntry>) {
|
fn save_to_cache(&mut self, vec_file_entry: &[BrokenEntry], loaded_hash_map: BTreeMap<String, BrokenEntry>) {
|
||||||
if self.common_data.use_cache {
|
if self.common_data.use_cache {
|
||||||
// Must save all results to file, old loaded from file with all currently counted results
|
// Must save all results to file, old loaded from file with all currently counted results
|
||||||
let mut all_results: BTreeMap<String, FileEntry> = Default::default();
|
let mut all_results: BTreeMap<String, BrokenEntry> = Default::default();
|
||||||
|
|
||||||
for file_entry in vec_file_entry.iter().cloned() {
|
for file_entry in vec_file_entry.iter().cloned() {
|
||||||
all_results.insert(file_entry.path.to_string_lossy().to_string(), file_entry);
|
all_results.insert(file_entry.path.to_string_lossy().to_string(), file_entry);
|
||||||
|
@ -415,7 +381,7 @@ impl BrokenFiles {
|
||||||
}
|
}
|
||||||
|
|
||||||
impl BrokenFiles {
|
impl BrokenFiles {
|
||||||
pub const fn get_broken_files(&self) -> &Vec<FileEntry> {
|
pub const fn get_broken_files(&self) -> &Vec<BrokenEntry> {
|
||||||
&self.broken_files
|
&self.broken_files
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -469,27 +435,36 @@ impl PrintResults for BrokenFiles {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
fn check_extension_availability(file_name_lowercase: &str) -> TypeOfFile {
|
fn check_extension_availability(
|
||||||
if IMAGE_RS_BROKEN_FILES_EXTENSIONS.iter().any(|e| file_name_lowercase.ends_with(e)) {
|
full_name: &Path,
|
||||||
|
images_extensions: &HashSet<&&'static str>,
|
||||||
|
zip_extensions: &HashSet<&&'static str>,
|
||||||
|
audio_extensions: &HashSet<&&'static str>,
|
||||||
|
pdf_extensions: &HashSet<&&'static str>,
|
||||||
|
) -> TypeOfFile {
|
||||||
|
let Some(extension) = full_name.extension() else {
|
||||||
|
debug_assert!(false, "Missing extension");
|
||||||
|
return TypeOfFile::Unknown;
|
||||||
|
};
|
||||||
|
|
||||||
|
let Some(extension_str) = extension.to_str() else {
|
||||||
|
return TypeOfFile::Unknown;
|
||||||
|
};
|
||||||
|
|
||||||
|
if images_extensions.contains(&extension_str) {
|
||||||
TypeOfFile::Image
|
TypeOfFile::Image
|
||||||
} else if ZIP_FILES_EXTENSIONS.iter().any(|e| file_name_lowercase.ends_with(e)) {
|
} else if zip_extensions.contains(&extension_str) {
|
||||||
TypeOfFile::ArchiveZip
|
TypeOfFile::ArchiveZip
|
||||||
} else if AUDIO_FILES_EXTENSIONS.iter().any(|e| file_name_lowercase.ends_with(e)) {
|
} else if audio_extensions.contains(&extension_str) {
|
||||||
TypeOfFile::Audio
|
TypeOfFile::Audio
|
||||||
} else if PDF_FILES_EXTENSIONS.iter().any(|e| file_name_lowercase.ends_with(e)) {
|
} else if pdf_extensions.contains(&extension_str) {
|
||||||
TypeOfFile::PDF
|
TypeOfFile::PDF
|
||||||
} else {
|
} else {
|
||||||
|
debug_assert!(false, "File with unknown extension");
|
||||||
TypeOfFile::Unknown
|
TypeOfFile::Unknown
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
fn check_if_file_extension_is_allowed(type_of_file: &TypeOfFile, checked_types: &CheckedTypes) -> bool {
|
|
||||||
((*type_of_file == TypeOfFile::Image) && ((*checked_types & CheckedTypes::IMAGE) == CheckedTypes::IMAGE))
|
|
||||||
|| ((*type_of_file == TypeOfFile::PDF) && ((*checked_types & CheckedTypes::PDF) == CheckedTypes::PDF))
|
|
||||||
|| ((*type_of_file == TypeOfFile::ArchiveZip) && ((*checked_types & CheckedTypes::ARCHIVE) == CheckedTypes::ARCHIVE))
|
|
||||||
|| ((*type_of_file == TypeOfFile::Audio) && ((*checked_types & CheckedTypes::AUDIO) == CheckedTypes::AUDIO))
|
|
||||||
}
|
|
||||||
|
|
||||||
fn unpack_pdf_error(e: PdfError) -> PdfError {
|
fn unpack_pdf_error(e: PdfError) -> PdfError {
|
||||||
if let Try {
|
if let Try {
|
||||||
file: _,
|
file: _,
|
||||||
|
@ -505,7 +480,7 @@ fn unpack_pdf_error(e: PdfError) -> PdfError {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
fn validate_pdf_error(file_entry: &mut FileEntry, e: PdfError) -> PdfError {
|
fn validate_pdf_error(file_entry: &mut BrokenEntry, e: PdfError) -> PdfError {
|
||||||
let mut error_string = e.to_string();
|
let mut error_string = e.to_string();
|
||||||
// Workaround for strange error message https://github.com/qarmin/czkawka/issues/898
|
// Workaround for strange error message https://github.com/qarmin/czkawka/issues/898
|
||||||
if error_string.starts_with("Try at") {
|
if error_string.starts_with("Try at") {
|
||||||
|
|
|
@ -123,30 +123,27 @@ pub fn set_number_of_threads(thread_number: usize) {
|
||||||
}
|
}
|
||||||
|
|
||||||
pub const RAW_IMAGE_EXTENSIONS: &[&str] = &[
|
pub const RAW_IMAGE_EXTENSIONS: &[&str] = &[
|
||||||
".mrw", ".arw", ".srf", ".sr2", ".mef", ".orf", ".srw", ".erf", ".kdc", ".kdc", ".dcs", ".rw2", ".raf", ".dcr", ".dng", ".pef", ".crw", ".iiq", ".3fr", ".nrw", ".nef", ".mos",
|
"mrw", "arw", "srf", "sr2", "mef", "orf", "srw", "erf", "kdc", "kdc", "dcs", "rw2", "raf", "dcr", "dng", "pef", "crw", "iiq", "3fr", "nrw", "nef", "mos", "cr2", "ari",
|
||||||
".cr2", ".ari",
|
|
||||||
];
|
|
||||||
pub const IMAGE_RS_EXTENSIONS: &[&str] = &[
|
|
||||||
".jpg", ".jpeg", ".png", ".bmp", ".tiff", ".tif", ".tga", ".ff", ".jif", ".jfi", ".webp", ".gif", ".ico", ".exr", ".qoi",
|
|
||||||
];
|
];
|
||||||
|
pub const IMAGE_RS_EXTENSIONS: &[&str] = &["jpg", "jpeg", "png", "bmp", "tiff", "tif", "tga", "ff", "jif", "jfi", "webp", "gif", "ico", "exr", "qoi"];
|
||||||
|
|
||||||
pub const IMAGE_RS_SIMILAR_IMAGES_EXTENSIONS: &[&str] = &[".jpg", ".jpeg", ".png", ".tiff", ".tif", ".tga", ".ff", ".jif", ".jfi", ".bmp", ".webp", ".exr", ".qoi"];
|
pub const IMAGE_RS_SIMILAR_IMAGES_EXTENSIONS: &[&str] = &["jpg", "jpeg", "png", "tiff", "tif", "tga", "ff", "jif", "jfi", "bmp", "webp", "exr", "qoi"];
|
||||||
|
|
||||||
pub const IMAGE_RS_BROKEN_FILES_EXTENSIONS: &[&str] = &[
|
pub const IMAGE_RS_BROKEN_FILES_EXTENSIONS: &[&str] = &[
|
||||||
".jpg", ".jpeg", ".png", ".tiff", ".tif", ".tga", ".ff", ".jif", ".jfi", ".gif", ".bmp", ".ico", ".jfif", ".jpe", ".pnz", ".dib", ".webp", ".exr",
|
"jpg", "jpeg", "png", "tiff", "tif", "tga", "ff", "jif", "jfi", "gif", "bmp", "ico", "jfif", "jpe", "pnz", "dib", "webp", "exr",
|
||||||
];
|
];
|
||||||
pub const HEIC_EXTENSIONS: &[&str] = &[".heif", ".heifs", ".heic", ".heics", ".avci", ".avcs", ".avifs"];
|
pub const HEIC_EXTENSIONS: &[&str] = &["heif", "heifs", "heic", "heics", "avci", "avcs", "avifs"];
|
||||||
|
|
||||||
pub const ZIP_FILES_EXTENSIONS: &[&str] = &[".zip", ".jar"];
|
pub const ZIP_FILES_EXTENSIONS: &[&str] = &["zip", "jar"];
|
||||||
|
|
||||||
pub const PDF_FILES_EXTENSIONS: &[&str] = &[".pdf"];
|
pub const PDF_FILES_EXTENSIONS: &[&str] = &["pdf"];
|
||||||
|
|
||||||
pub const AUDIO_FILES_EXTENSIONS: &[&str] = &[
|
pub const AUDIO_FILES_EXTENSIONS: &[&str] = &[
|
||||||
".mp3", ".flac", ".wav", ".ogg", ".m4a", ".aac", ".aiff", ".pcm", ".aif", ".aiff", ".aifc", ".m3a", ".mp2", ".mp4a", ".mp2a", ".mpga", ".wave", ".weba", ".wma", ".oga",
|
"mp3", "flac", "wav", "ogg", "m4a", "aac", "aiff", "pcm", "aif", "aiff", "aifc", "m3a", "mp2", "mp4a", "mp2a", "mpga", "wave", "weba", "wma", "oga",
|
||||||
];
|
];
|
||||||
|
|
||||||
pub const VIDEO_FILES_EXTENSIONS: &[&str] = &[
|
pub const VIDEO_FILES_EXTENSIONS: &[&str] = &[
|
||||||
".mp4", ".mpv", ".flv", ".mp4a", ".webm", ".mpg", ".mp2", ".mpeg", ".m4p", ".m4v", ".avi", ".wmv", ".qt", ".mov", ".swf", ".mkv",
|
"mp4", "mpv", "flv", "mp4a", "webm", "mpg", "mp2", "mpeg", "m4p", "m4v", "avi", "wmv", "qt", "mov", "swf", "mkv",
|
||||||
];
|
];
|
||||||
|
|
||||||
pub const LOOP_DURATION: u32 = 20; //ms
|
pub const LOOP_DURATION: u32 = 20; //ms
|
||||||
|
|
|
@ -684,18 +684,3 @@ pub fn get_modified_time(metadata: &Metadata, warnings: &mut Vec<String>, curren
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
pub fn get_lowercase_name(entry_data: &DirEntry, warnings: &mut Vec<String>) -> Option<String> {
|
|
||||||
let name = match entry_data.file_name().into_string() {
|
|
||||||
Ok(t) => t,
|
|
||||||
Err(_inspected) => {
|
|
||||||
warnings.push(flc!(
|
|
||||||
"core_file_not_utf8_name",
|
|
||||||
generate_translation_hashmap(vec![("name", entry_data.path().to_string_lossy().to_string())])
|
|
||||||
));
|
|
||||||
return None;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
.to_lowercase();
|
|
||||||
Some(name)
|
|
||||||
}
|
|
||||||
|
|
|
@ -79,14 +79,32 @@ impl Extensions {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
pub fn using_custom_extensions(&self) -> bool {
|
pub fn set_any_extensions(&self) -> bool {
|
||||||
!self.file_extensions_hashset.is_empty()
|
!self.file_extensions_hashset.is_empty()
|
||||||
}
|
}
|
||||||
|
|
||||||
pub fn extend_allowed_extensions(&mut self, file_extensions: &[&str]) {
|
fn extend_allowed_extensions(&mut self, file_extensions: &[&str]) {
|
||||||
for extension in file_extensions {
|
for extension in file_extensions {
|
||||||
let extension_without_dot = extension.trim_start_matches('.');
|
let extension_without_dot = extension.trim_start_matches('.');
|
||||||
self.file_extensions_hashset.insert(extension_without_dot.to_string());
|
self.file_extensions_hashset.insert(extension_without_dot.to_string());
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// E.g. when using similar videos, user can provide extensions like "mp4,flv", but if user provide "mp4,jpg" then
|
||||||
|
// it will be only "mp4" because "jpg" is not valid extension for videos
|
||||||
|
fn union_allowed_extensions(&mut self, file_extensions: &[&str]) {
|
||||||
|
let mut new_extensions = HashSet::new();
|
||||||
|
for extension in file_extensions {
|
||||||
|
let extension_without_dot = extension.trim_start_matches('.');
|
||||||
|
new_extensions.insert(extension_without_dot.to_string());
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn set_and_validate_extensions(&mut self, file_extensions: &[&str]) {
|
||||||
|
if self.file_extensions_hashset.is_empty() {
|
||||||
|
self.extend_allowed_extensions(file_extensions);
|
||||||
|
} else {
|
||||||
|
self.union_allowed_extensions(file_extensions);
|
||||||
|
}
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -177,14 +177,10 @@ impl SameMusic {
|
||||||
|
|
||||||
#[fun_time(message = "check_files", level = "debug")]
|
#[fun_time(message = "check_files", level = "debug")]
|
||||||
fn check_files(&mut self, stop_receiver: Option<&Receiver<()>>, progress_sender: Option<&Sender<ProgressData>>) -> bool {
|
fn check_files(&mut self, stop_receiver: Option<&Receiver<()>>, progress_sender: Option<&Sender<ProgressData>>) -> bool {
|
||||||
if !self.common_data.allowed_extensions.using_custom_extensions() {
|
self.common_data.allowed_extensions.set_and_validate_extensions(AUDIO_FILES_EXTENSIONS);
|
||||||
self.common_data.allowed_extensions.extend_allowed_extensions(AUDIO_FILES_EXTENSIONS);
|
if !self.common_data.allowed_extensions.set_any_extensions() {
|
||||||
} else {
|
|
||||||
self.common_data.allowed_extensions.extend_allowed_extensions(AUDIO_FILES_EXTENSIONS);
|
|
||||||
if !self.common_data.allowed_extensions.using_custom_extensions() {
|
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
}
|
|
||||||
|
|
||||||
let max_stage = match self.check_type {
|
let max_stage = match self.check_type {
|
||||||
CheckingMethod::AudioTags => MAX_STAGE_TAGS,
|
CheckingMethod::AudioTags => MAX_STAGE_TAGS,
|
||||||
|
|
|
@ -104,11 +104,11 @@ pub struct SimilarImages {
|
||||||
bktree: BKTree<ImHash, Hamming>,
|
bktree: BKTree<ImHash, Hamming>,
|
||||||
similar_vectors: Vec<Vec<ImagesEntry>>,
|
similar_vectors: Vec<Vec<ImagesEntry>>,
|
||||||
similar_referenced_vectors: Vec<(ImagesEntry, Vec<ImagesEntry>)>,
|
similar_referenced_vectors: Vec<(ImagesEntry, Vec<ImagesEntry>)>,
|
||||||
image_hashes: HashMap<ImHash, Vec<ImagesEntry>>,
|
|
||||||
// Hashmap with image hashes and Vector with names of files
|
// Hashmap with image hashes and Vector with names of files
|
||||||
|
image_hashes: HashMap<ImHash, Vec<ImagesEntry>>,
|
||||||
similarity: u32,
|
similarity: u32,
|
||||||
images_to_check: BTreeMap<String, ImagesEntry>,
|
images_to_check: BTreeMap<String, ImagesEntry>,
|
||||||
pub hash_size: u8, // TODO to remove pub, this is needeed by new gui, because there is no way to check what exactly was seelected
|
hash_size: u8,
|
||||||
hash_alg: HashAlg,
|
hash_alg: HashAlg,
|
||||||
image_filter: FilterType,
|
image_filter: FilterType,
|
||||||
exclude_images_with_same_size: bool,
|
exclude_images_with_same_size: bool,
|
||||||
|
@ -160,18 +160,17 @@ impl SimilarImages {
|
||||||
|
|
||||||
#[fun_time(message = "check_for_similar_images", level = "debug")]
|
#[fun_time(message = "check_for_similar_images", level = "debug")]
|
||||||
fn check_for_similar_images(&mut self, stop_receiver: Option<&Receiver<()>>, progress_sender: Option<&Sender<ProgressData>>) -> bool {
|
fn check_for_similar_images(&mut self, stop_receiver: Option<&Receiver<()>>, progress_sender: Option<&Sender<ProgressData>>) -> bool {
|
||||||
if !self.common_data.allowed_extensions.using_custom_extensions() {
|
if cfg!(feature = "heif") {
|
||||||
self.common_data.allowed_extensions.extend_allowed_extensions(IMAGE_RS_SIMILAR_IMAGES_EXTENSIONS);
|
self.common_data
|
||||||
self.common_data.allowed_extensions.extend_allowed_extensions(RAW_IMAGE_EXTENSIONS);
|
.allowed_extensions
|
||||||
#[cfg(feature = "heif")]
|
.set_and_validate_extensions(&[IMAGE_RS_SIMILAR_IMAGES_EXTENSIONS, RAW_IMAGE_EXTENSIONS, HEIC_EXTENSIONS].concat());
|
||||||
self.common_data.allowed_extensions.extend_allowed_extensions(HEIC_EXTENSIONS);
|
|
||||||
} else {
|
} else {
|
||||||
self.common_data
|
self.common_data
|
||||||
.allowed_extensions
|
.allowed_extensions
|
||||||
.extend_allowed_extensions(&[IMAGE_RS_SIMILAR_IMAGES_EXTENSIONS, RAW_IMAGE_EXTENSIONS, HEIC_EXTENSIONS].concat());
|
.set_and_validate_extensions(&[IMAGE_RS_SIMILAR_IMAGES_EXTENSIONS, RAW_IMAGE_EXTENSIONS].concat());
|
||||||
if !self.common_data.allowed_extensions.using_custom_extensions() {
|
|
||||||
return true;
|
|
||||||
}
|
}
|
||||||
|
if !self.common_data.allowed_extensions.set_any_extensions() {
|
||||||
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
let result = DirTraversalBuilder::new()
|
let result = DirTraversalBuilder::new()
|
||||||
|
@ -244,7 +243,7 @@ impl SimilarImages {
|
||||||
// - Join already read hashes with hashes which were read from file
|
// - Join already read hashes with hashes which were read from file
|
||||||
// - Join all hashes and save it to file
|
// - Join all hashes and save it to file
|
||||||
|
|
||||||
// #[fun_time(message = "hash_images", level = "debug")]
|
#[fun_time(message = "hash_images", level = "debug")]
|
||||||
fn hash_images(&mut self, stop_receiver: Option<&Receiver<()>>, progress_sender: Option<&Sender<ProgressData>>) -> bool {
|
fn hash_images(&mut self, stop_receiver: Option<&Receiver<()>>, progress_sender: Option<&Sender<ProgressData>>) -> bool {
|
||||||
let (loaded_hash_map, records_already_cached, non_cached_files_to_check) = self.hash_images_load_cache();
|
let (loaded_hash_map, records_already_cached, non_cached_files_to_check) = self.hash_images_load_cache();
|
||||||
|
|
||||||
|
|
|
@ -1,5 +1,4 @@
|
||||||
use std::collections::{BTreeMap, BTreeSet, HashMap};
|
use std::collections::{BTreeMap, BTreeSet, HashMap};
|
||||||
use std::fs::DirEntry;
|
|
||||||
use std::io::Write;
|
use std::io::Write;
|
||||||
use std::mem;
|
use std::mem;
|
||||||
use std::path::{Path, PathBuf};
|
use std::path::{Path, PathBuf};
|
||||||
|
@ -9,16 +8,15 @@ use crossbeam_channel::{Receiver, Sender};
|
||||||
use ffmpeg_cmdline_utils::FfmpegErrorKind::FfmpegNotFound;
|
use ffmpeg_cmdline_utils::FfmpegErrorKind::FfmpegNotFound;
|
||||||
use fun_time::fun_time;
|
use fun_time::fun_time;
|
||||||
use humansize::{format_size, BINARY};
|
use humansize::{format_size, BINARY};
|
||||||
|
use log::debug;
|
||||||
use rayon::prelude::*;
|
use rayon::prelude::*;
|
||||||
use serde::{Deserialize, Serialize};
|
use serde::{Deserialize, Serialize};
|
||||||
use vid_dup_finder_lib::HashCreationErrorKind::DetermineVideo;
|
use vid_dup_finder_lib::HashCreationErrorKind::DetermineVideo;
|
||||||
use vid_dup_finder_lib::{NormalizedTolerance, VideoHash};
|
use vid_dup_finder_lib::{NormalizedTolerance, VideoHash};
|
||||||
|
|
||||||
use crate::common::{
|
use crate::common::{check_if_stop_received, delete_files_custom, prepare_thread_handler_common, send_info_and_wait_for_ending_all_threads, VIDEO_FILES_EXTENSIONS};
|
||||||
check_folder_children, check_if_stop_received, delete_files_custom, prepare_thread_handler_common, send_info_and_wait_for_ending_all_threads, VIDEO_FILES_EXTENSIONS,
|
|
||||||
};
|
|
||||||
use crate::common_cache::{get_similar_videos_cache_file, load_cache_from_file_generalized_by_path, save_cache_to_file_generalized};
|
use crate::common_cache::{get_similar_videos_cache_file, load_cache_from_file_generalized_by_path, save_cache_to_file_generalized};
|
||||||
use crate::common_dir_traversal::{common_read_dir, get_modified_time, CheckingMethod, ProgressData, ToolType};
|
use crate::common_dir_traversal::{CheckingMethod, DirTraversalBuilder, DirTraversalResult, FileEntry, ProgressData, ToolType};
|
||||||
use crate::common_tool::{CommonData, CommonToolData, DeleteMethod};
|
use crate::common_tool::{CommonData, CommonToolData, DeleteMethod};
|
||||||
use crate::common_traits::{DebugPrint, PrintResults, ResultEntry};
|
use crate::common_traits::{DebugPrint, PrintResults, ResultEntry};
|
||||||
use crate::flc;
|
use crate::flc;
|
||||||
|
@ -27,7 +25,7 @@ use crate::localizer_core::generate_translation_hashmap;
|
||||||
pub const MAX_TOLERANCE: i32 = 20;
|
pub const MAX_TOLERANCE: i32 = 20;
|
||||||
|
|
||||||
#[derive(Clone, Debug, Serialize, Deserialize)]
|
#[derive(Clone, Debug, Serialize, Deserialize)]
|
||||||
pub struct FileEntry {
|
pub struct VideosEntry {
|
||||||
pub path: PathBuf,
|
pub path: PathBuf,
|
||||||
pub size: u64,
|
pub size: u64,
|
||||||
pub modified_date: u64,
|
pub modified_date: u64,
|
||||||
|
@ -35,7 +33,7 @@ pub struct FileEntry {
|
||||||
pub error: String,
|
pub error: String,
|
||||||
}
|
}
|
||||||
|
|
||||||
impl ResultEntry for FileEntry {
|
impl ResultEntry for VideosEntry {
|
||||||
fn get_path(&self) -> &Path {
|
fn get_path(&self) -> &Path {
|
||||||
&self.path
|
&self.path
|
||||||
}
|
}
|
||||||
|
@ -47,6 +45,19 @@ impl ResultEntry for FileEntry {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
impl FileEntry {
|
||||||
|
fn into_videos_entry(self) -> VideosEntry {
|
||||||
|
VideosEntry {
|
||||||
|
size: self.size,
|
||||||
|
path: self.path,
|
||||||
|
modified_date: self.modified_date,
|
||||||
|
|
||||||
|
vhash: Default::default(),
|
||||||
|
error: String::new(),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
struct Hamming;
|
struct Hamming;
|
||||||
|
|
||||||
impl bk_tree::Metric<Vec<u8>> for Hamming {
|
impl bk_tree::Metric<Vec<u8>> for Hamming {
|
||||||
|
@ -61,13 +72,15 @@ impl bk_tree::Metric<Vec<u8>> for Hamming {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
const MAX_VIDEOS_STAGE: u8 = 1;
|
||||||
|
|
||||||
pub struct SimilarVideos {
|
pub struct SimilarVideos {
|
||||||
common_data: CommonToolData,
|
common_data: CommonToolData,
|
||||||
information: Info,
|
information: Info,
|
||||||
similar_vectors: Vec<Vec<FileEntry>>,
|
similar_vectors: Vec<Vec<VideosEntry>>,
|
||||||
similar_referenced_vectors: Vec<(FileEntry, Vec<FileEntry>)>,
|
similar_referenced_vectors: Vec<(VideosEntry, Vec<VideosEntry>)>,
|
||||||
videos_hashes: BTreeMap<Vec<u8>, Vec<FileEntry>>,
|
videos_hashes: BTreeMap<Vec<u8>, Vec<VideosEntry>>,
|
||||||
videos_to_check: BTreeMap<String, FileEntry>,
|
videos_to_check: BTreeMap<String, VideosEntry>,
|
||||||
tolerance: i32,
|
tolerance: i32,
|
||||||
exclude_videos_with_same_size: bool,
|
exclude_videos_with_same_size: bool,
|
||||||
}
|
}
|
||||||
|
@ -128,122 +141,47 @@ impl SimilarVideos {
|
||||||
self.debug_print();
|
self.debug_print();
|
||||||
}
|
}
|
||||||
|
|
||||||
#[fun_time(message = "check_for_similar_videos", level = "debug")]
|
// #[fun_time(message = "check_for_similar_videos", level = "debug")]
|
||||||
fn check_for_similar_videos(&mut self, stop_receiver: Option<&Receiver<()>>, progress_sender: Option<&Sender<ProgressData>>) -> bool {
|
fn check_for_similar_videos(&mut self, stop_receiver: Option<&Receiver<()>>, progress_sender: Option<&Sender<ProgressData>>) -> bool {
|
||||||
let mut folders_to_check: Vec<PathBuf> = self.common_data.directories.included_directories.clone();
|
self.common_data.allowed_extensions.set_and_validate_extensions(VIDEO_FILES_EXTENSIONS);
|
||||||
|
if !self.common_data.allowed_extensions.set_any_extensions() {
|
||||||
if !self.common_data.allowed_extensions.using_custom_extensions() {
|
|
||||||
self.common_data.allowed_extensions.extend_allowed_extensions(VIDEO_FILES_EXTENSIONS);
|
|
||||||
} else {
|
|
||||||
self.common_data.allowed_extensions.extend_allowed_extensions(VIDEO_FILES_EXTENSIONS);
|
|
||||||
if !self.common_data.allowed_extensions.using_custom_extensions() {
|
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
}
|
|
||||||
|
|
||||||
let (progress_thread_handle, progress_thread_run, atomic_counter, _check_was_stopped) =
|
let result = DirTraversalBuilder::new()
|
||||||
prepare_thread_handler_common(progress_sender, 0, 1, 0, CheckingMethod::None, self.common_data.tool_type);
|
.group_by(|_fe| ())
|
||||||
|
.stop_receiver(stop_receiver)
|
||||||
|
.progress_sender(progress_sender)
|
||||||
|
.common_data(&self.common_data)
|
||||||
|
.max_stage(MAX_VIDEOS_STAGE)
|
||||||
|
.build()
|
||||||
|
.run();
|
||||||
|
|
||||||
while !folders_to_check.is_empty() {
|
match result {
|
||||||
if check_if_stop_received(stop_receiver) {
|
DirTraversalResult::SuccessFiles { grouped_file_entries, warnings } => {
|
||||||
send_info_and_wait_for_ending_all_threads(&progress_thread_run, progress_thread_handle);
|
self.videos_to_check = grouped_file_entries
|
||||||
return false;
|
.into_values()
|
||||||
}
|
.flatten()
|
||||||
|
.map(|fe| (fe.path.to_string_lossy().to_string(), fe.into_videos_entry()))
|
||||||
let segments: Vec<_> = folders_to_check
|
|
||||||
.into_par_iter()
|
|
||||||
.map(|current_folder| {
|
|
||||||
let mut dir_result = vec![];
|
|
||||||
let mut warnings = vec![];
|
|
||||||
let mut fe_result = vec![];
|
|
||||||
|
|
||||||
let Some(read_dir) = common_read_dir(¤t_folder, &mut warnings) else {
|
|
||||||
return (dir_result, warnings, fe_result);
|
|
||||||
};
|
|
||||||
|
|
||||||
// Check every sub folder/file/link etc.
|
|
||||||
for entry in read_dir {
|
|
||||||
let Ok(entry_data) = entry else {
|
|
||||||
continue;
|
|
||||||
};
|
|
||||||
let Ok(file_type) = entry_data.file_type() else {
|
|
||||||
continue;
|
|
||||||
};
|
|
||||||
|
|
||||||
if file_type.is_dir() {
|
|
||||||
check_folder_children(
|
|
||||||
&mut dir_result,
|
|
||||||
&mut warnings,
|
|
||||||
&entry_data,
|
|
||||||
self.common_data.recursive_search,
|
|
||||||
&self.common_data.directories,
|
|
||||||
&self.common_data.excluded_items,
|
|
||||||
);
|
|
||||||
} else if file_type.is_file() {
|
|
||||||
atomic_counter.fetch_add(1, Ordering::Relaxed);
|
|
||||||
self.add_video_file_entry(&entry_data, &mut fe_result, &mut warnings);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
(dir_result, warnings, fe_result)
|
|
||||||
})
|
|
||||||
.collect();
|
.collect();
|
||||||
|
|
||||||
let required_size = segments.iter().map(|(segment, _, _)| segment.len()).sum::<usize>();
|
|
||||||
folders_to_check = Vec::with_capacity(required_size);
|
|
||||||
|
|
||||||
// Process collected data
|
|
||||||
for (segment, warnings, fe_result) in segments {
|
|
||||||
folders_to_check.extend(segment);
|
|
||||||
self.common_data.text_messages.warnings.extend(warnings);
|
self.common_data.text_messages.warnings.extend(warnings);
|
||||||
for (name, fe) in fe_result {
|
debug!("check_files - Found {} video files.", self.videos_to_check.len());
|
||||||
self.videos_to_check.insert(name, fe);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
send_info_and_wait_for_ending_all_threads(&progress_thread_run, progress_thread_handle);
|
|
||||||
|
|
||||||
true
|
true
|
||||||
}
|
}
|
||||||
|
|
||||||
fn add_video_file_entry(&self, entry_data: &DirEntry, fe_result: &mut Vec<(String, FileEntry)>, warnings: &mut Vec<String>) {
|
DirTraversalResult::Stopped => false,
|
||||||
if !self.common_data.allowed_extensions.check_if_entry_ends_with_extension(entry_data) {
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
|
|
||||||
let current_file_name = entry_data.path();
|
|
||||||
if self.common_data.excluded_items.is_excluded(¤t_file_name) {
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
let current_file_name_str = current_file_name.to_string_lossy().to_string();
|
|
||||||
|
|
||||||
let Ok(metadata) = entry_data.metadata() else {
|
|
||||||
return;
|
|
||||||
};
|
|
||||||
|
|
||||||
// Checking files
|
|
||||||
if (self.common_data.minimal_file_size..=self.common_data.maximal_file_size).contains(&metadata.len()) {
|
|
||||||
let fe: FileEntry = FileEntry {
|
|
||||||
size: metadata.len(),
|
|
||||||
modified_date: get_modified_time(&metadata, warnings, ¤t_file_name, false),
|
|
||||||
path: current_file_name,
|
|
||||||
vhash: Default::default(),
|
|
||||||
error: String::new(),
|
|
||||||
};
|
|
||||||
|
|
||||||
fe_result.push((current_file_name_str, fe));
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
#[fun_time(message = "load_cache_at_start", level = "debug")]
|
#[fun_time(message = "load_cache_at_start", level = "debug")]
|
||||||
fn load_cache_at_start(&mut self) -> (BTreeMap<String, FileEntry>, BTreeMap<String, FileEntry>, BTreeMap<String, FileEntry>) {
|
fn load_cache_at_start(&mut self) -> (BTreeMap<String, VideosEntry>, BTreeMap<String, VideosEntry>, BTreeMap<String, VideosEntry>) {
|
||||||
let loaded_hash_map;
|
let loaded_hash_map;
|
||||||
let mut records_already_cached: BTreeMap<String, FileEntry> = Default::default();
|
let mut records_already_cached: BTreeMap<String, VideosEntry> = Default::default();
|
||||||
let mut non_cached_files_to_check: BTreeMap<String, FileEntry> = Default::default();
|
let mut non_cached_files_to_check: BTreeMap<String, VideosEntry> = Default::default();
|
||||||
|
|
||||||
if self.common_data.use_cache {
|
if self.common_data.use_cache {
|
||||||
let (messages, loaded_items) =
|
let (messages, loaded_items) =
|
||||||
load_cache_from_file_generalized_by_path::<FileEntry>(&get_similar_videos_cache_file(), self.get_delete_outdated_cache(), &self.videos_to_check);
|
load_cache_from_file_generalized_by_path::<VideosEntry>(&get_similar_videos_cache_file(), self.get_delete_outdated_cache(), &self.videos_to_check);
|
||||||
self.get_text_messages_mut().extend_with_another_messages(messages);
|
self.get_text_messages_mut().extend_with_another_messages(messages);
|
||||||
loaded_hash_map = loaded_items.unwrap_or_default();
|
loaded_hash_map = loaded_items.unwrap_or_default();
|
||||||
|
|
||||||
|
@ -268,7 +206,7 @@ impl SimilarVideos {
|
||||||
let (progress_thread_handle, progress_thread_run, atomic_counter, check_was_stopped) =
|
let (progress_thread_handle, progress_thread_run, atomic_counter, check_was_stopped) =
|
||||||
prepare_thread_handler_common(progress_sender, 1, 1, non_cached_files_to_check.len(), CheckingMethod::None, self.common_data.tool_type);
|
prepare_thread_handler_common(progress_sender, 1, 1, non_cached_files_to_check.len(), CheckingMethod::None, self.common_data.tool_type);
|
||||||
|
|
||||||
let mut vec_file_entry: Vec<FileEntry> = non_cached_files_to_check
|
let mut vec_file_entry: Vec<VideosEntry> = non_cached_files_to_check
|
||||||
.par_iter()
|
.par_iter()
|
||||||
.map(|file_entry| {
|
.map(|file_entry| {
|
||||||
atomic_counter.fetch_add(1, Ordering::Relaxed);
|
atomic_counter.fetch_add(1, Ordering::Relaxed);
|
||||||
|
@ -293,14 +231,14 @@ impl SimilarVideos {
|
||||||
Some(file_entry)
|
Some(file_entry)
|
||||||
})
|
})
|
||||||
.while_some()
|
.while_some()
|
||||||
.collect::<Vec<FileEntry>>();
|
.collect::<Vec<VideosEntry>>();
|
||||||
|
|
||||||
send_info_and_wait_for_ending_all_threads(&progress_thread_run, progress_thread_handle);
|
send_info_and_wait_for_ending_all_threads(&progress_thread_run, progress_thread_handle);
|
||||||
|
|
||||||
// Just connect loaded results with already calculated hashes
|
// Just connect loaded results with already calculated hashes
|
||||||
vec_file_entry.extend(records_already_cached.into_values());
|
vec_file_entry.extend(records_already_cached.into_values());
|
||||||
|
|
||||||
let mut hashmap_with_file_entries: HashMap<String, FileEntry> = Default::default();
|
let mut hashmap_with_file_entries: HashMap<String, VideosEntry> = Default::default();
|
||||||
let mut vector_of_hashes: Vec<VideoHash> = Vec::new();
|
let mut vector_of_hashes: Vec<VideoHash> = Vec::new();
|
||||||
for file_entry in &vec_file_entry {
|
for file_entry in &vec_file_entry {
|
||||||
// 0 means that images was not hashed correctly, e.g. could be improperly
|
// 0 means that images was not hashed correctly, e.g. could be improperly
|
||||||
|
@ -342,10 +280,10 @@ impl SimilarVideos {
|
||||||
}
|
}
|
||||||
|
|
||||||
#[fun_time(message = "save_cache", level = "debug")]
|
#[fun_time(message = "save_cache", level = "debug")]
|
||||||
fn save_cache(&mut self, vec_file_entry: Vec<FileEntry>, loaded_hash_map: BTreeMap<String, FileEntry>) {
|
fn save_cache(&mut self, vec_file_entry: Vec<VideosEntry>, loaded_hash_map: BTreeMap<String, VideosEntry>) {
|
||||||
if self.common_data.use_cache {
|
if self.common_data.use_cache {
|
||||||
// Must save all results to file, old loaded from file with all currently counted results
|
// Must save all results to file, old loaded from file with all currently counted results
|
||||||
let mut all_results: BTreeMap<String, FileEntry> = loaded_hash_map;
|
let mut all_results: BTreeMap<String, VideosEntry> = loaded_hash_map;
|
||||||
for file_entry in vec_file_entry {
|
for file_entry in vec_file_entry {
|
||||||
all_results.insert(file_entry.path.to_string_lossy().to_string(), file_entry);
|
all_results.insert(file_entry.path.to_string_lossy().to_string(), file_entry);
|
||||||
}
|
}
|
||||||
|
@ -356,11 +294,11 @@ impl SimilarVideos {
|
||||||
}
|
}
|
||||||
|
|
||||||
#[fun_time(message = "match_groups_of_videos", level = "debug")]
|
#[fun_time(message = "match_groups_of_videos", level = "debug")]
|
||||||
fn match_groups_of_videos(&mut self, vector_of_hashes: Vec<VideoHash>, hashmap_with_file_entries: &HashMap<String, FileEntry>) {
|
fn match_groups_of_videos(&mut self, vector_of_hashes: Vec<VideoHash>, hashmap_with_file_entries: &HashMap<String, VideosEntry>) {
|
||||||
let match_group = vid_dup_finder_lib::search(vector_of_hashes, NormalizedTolerance::new(self.tolerance as f64 / 100.0f64));
|
let match_group = vid_dup_finder_lib::search(vector_of_hashes, NormalizedTolerance::new(self.tolerance as f64 / 100.0f64));
|
||||||
let mut collected_similar_videos: Vec<Vec<FileEntry>> = Default::default();
|
let mut collected_similar_videos: Vec<Vec<VideosEntry>> = Default::default();
|
||||||
for i in match_group {
|
for i in match_group {
|
||||||
let mut temp_vector: Vec<FileEntry> = Vec::new();
|
let mut temp_vector: Vec<VideosEntry> = Vec::new();
|
||||||
let mut bt_size: BTreeSet<u64> = Default::default();
|
let mut bt_size: BTreeSet<u64> = Default::default();
|
||||||
for j in i.duplicates() {
|
for j in i.duplicates() {
|
||||||
let file_entry = hashmap_with_file_entries.get(&j.to_string_lossy().to_string()).unwrap();
|
let file_entry = hashmap_with_file_entries.get(&j.to_string_lossy().to_string()).unwrap();
|
||||||
|
@ -397,7 +335,7 @@ impl SimilarVideos {
|
||||||
Some((files_from_referenced_folders.pop().unwrap(), normal_files))
|
Some((files_from_referenced_folders.pop().unwrap(), normal_files))
|
||||||
}
|
}
|
||||||
})
|
})
|
||||||
.collect::<Vec<(FileEntry, Vec<FileEntry>)>>();
|
.collect::<Vec<(VideosEntry, Vec<VideosEntry>)>>();
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -493,7 +431,7 @@ impl SimilarVideos {
|
||||||
self.tolerance = tolerance;
|
self.tolerance = tolerance;
|
||||||
}
|
}
|
||||||
|
|
||||||
pub const fn get_similar_videos(&self) -> &Vec<Vec<FileEntry>> {
|
pub const fn get_similar_videos(&self) -> &Vec<Vec<VideosEntry>> {
|
||||||
&self.similar_vectors
|
&self.similar_vectors
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -501,7 +439,7 @@ impl SimilarVideos {
|
||||||
&self.information
|
&self.information
|
||||||
}
|
}
|
||||||
|
|
||||||
pub fn get_similar_videos_referenced(&self) -> &Vec<(FileEntry, Vec<FileEntry>)> {
|
pub fn get_similar_videos_referenced(&self) -> &Vec<(VideosEntry, Vec<VideosEntry>)> {
|
||||||
&self.similar_referenced_vectors
|
&self.similar_referenced_vectors
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -12,7 +12,7 @@ use rayon::prelude::*;
|
||||||
use serde::Serialize;
|
use serde::Serialize;
|
||||||
|
|
||||||
use crate::common::{check_folder_children, check_if_stop_received, prepare_thread_handler_common, send_info_and_wait_for_ending_all_threads};
|
use crate::common::{check_folder_children, check_if_stop_received, prepare_thread_handler_common, send_info_and_wait_for_ending_all_threads};
|
||||||
use crate::common_dir_traversal::{common_read_dir, get_lowercase_name, get_modified_time, CheckingMethod, ProgressData, ToolType};
|
use crate::common_dir_traversal::{common_read_dir, get_modified_time, CheckingMethod, ProgressData, ToolType};
|
||||||
use crate::common_tool::{CommonData, CommonToolData, DeleteMethod};
|
use crate::common_tool::{CommonData, CommonToolData, DeleteMethod};
|
||||||
use crate::common_traits::*;
|
use crate::common_traits::*;
|
||||||
|
|
||||||
|
@ -142,16 +142,18 @@ impl Temporary {
|
||||||
pub fn get_file_entry(&self, atomic_counter: &Arc<AtomicUsize>, entry_data: &DirEntry, warnings: &mut Vec<String>) -> Option<FileEntry> {
|
pub fn get_file_entry(&self, atomic_counter: &Arc<AtomicUsize>, entry_data: &DirEntry, warnings: &mut Vec<String>) -> Option<FileEntry> {
|
||||||
atomic_counter.fetch_add(1, Ordering::Relaxed);
|
atomic_counter.fetch_add(1, Ordering::Relaxed);
|
||||||
|
|
||||||
let file_name_lowercase = get_lowercase_name(entry_data, warnings)?;
|
|
||||||
|
|
||||||
if !TEMP_EXTENSIONS.iter().any(|f| file_name_lowercase.ends_with(f)) {
|
|
||||||
return None;
|
|
||||||
}
|
|
||||||
let current_file_name = entry_data.path();
|
let current_file_name = entry_data.path();
|
||||||
if self.common_data.excluded_items.is_excluded(¤t_file_name) {
|
if self.common_data.excluded_items.is_excluded(¤t_file_name) {
|
||||||
return None;
|
return None;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
let file_name = entry_data.file_name();
|
||||||
|
let file_name_ascii_lowercase = file_name.to_ascii_lowercase();
|
||||||
|
let file_name_lowercase = file_name_ascii_lowercase.to_string_lossy();
|
||||||
|
if !TEMP_EXTENSIONS.iter().any(|f| file_name_lowercase.ends_with(f)) {
|
||||||
|
return None;
|
||||||
|
}
|
||||||
|
|
||||||
let Ok(metadata) = entry_data.metadata() else {
|
let Ok(metadata) = entry_data.metadata() else {
|
||||||
return None;
|
return None;
|
||||||
};
|
};
|
||||||
|
|
|
@ -214,7 +214,7 @@ pub fn connect_settings(gui_data: &GuiData) {
|
||||||
dialog.connect_response(move |dialog, response_type| {
|
dialog.connect_response(move |dialog, response_type| {
|
||||||
if response_type == ResponseType::Ok {
|
if response_type == ResponseType::Ok {
|
||||||
let (mut messages, loaded_items) =
|
let (mut messages, loaded_items) =
|
||||||
load_cache_from_file_generalized_by_path::<czkawka_core::similar_videos::FileEntry>(&get_similar_videos_cache_file(), true, &Default::default());
|
load_cache_from_file_generalized_by_path::<czkawka_core::similar_videos::VideosEntry>(&get_similar_videos_cache_file(), true, &Default::default());
|
||||||
|
|
||||||
if let Some(cache_entries) = loaded_items {
|
if let Some(cache_entries) = loaded_items {
|
||||||
let save_messages = save_cache_to_file_generalized(&get_similar_videos_cache_file(), &cache_entries, false, 0);
|
let save_messages = save_cache_to_file_generalized(&get_similar_videos_cache_file(), &cache_entries, false, 0);
|
||||||
|
|
|
@ -78,7 +78,7 @@ fn scan_similar_images(a: Weak<MainWindow>, progress_sender: Sender<ProgressData
|
||||||
vec_fe.sort_unstable_by_key(|e| e.similarity);
|
vec_fe.sort_unstable_by_key(|e| e.similarity);
|
||||||
}
|
}
|
||||||
|
|
||||||
let hash_size = finder.hash_size;
|
let hash_size = custom_settings.similar_images_sub_hash_size;
|
||||||
|
|
||||||
a.upgrade_in_event_loop(move |app| {
|
a.upgrade_in_event_loop(move |app| {
|
||||||
write_similar_images_results(&app, vector, messages, hash_size);
|
write_similar_images_results(&app, vector, messages, hash_size);
|
||||||
|
|
Loading…
Reference in a new issue