2023-12-17 11:21:09 +13:00
|
|
|
use std::collections::{BTreeMap, HashSet};
|
|
|
|
use std::fs::File;
|
2021-01-13 08:06:12 +13:00
|
|
|
use std::io::prelude::*;
|
2021-01-14 04:03:05 +13:00
|
|
|
use std::path::{Path, PathBuf};
|
2023-12-17 11:21:09 +13:00
|
|
|
use std::sync::atomic::Ordering;
|
2023-05-03 08:37:12 +12:00
|
|
|
use std::{fs, mem, panic};
|
2021-01-13 08:06:12 +13:00
|
|
|
|
2023-12-04 00:06:42 +13:00
|
|
|
use crossbeam_channel::{Receiver, Sender};
|
2023-10-11 07:54:41 +13:00
|
|
|
use fun_time::fun_time;
|
|
|
|
use log::debug;
|
2023-05-03 08:37:12 +12:00
|
|
|
use pdf::file::FileOptions;
|
2023-02-19 22:21:14 +13:00
|
|
|
use pdf::object::ParseOptions;
|
2022-05-17 04:23:07 +12:00
|
|
|
use pdf::PdfError;
|
|
|
|
use pdf::PdfError::Try;
|
2021-11-28 08:49:20 +13:00
|
|
|
use rayon::prelude::*;
|
2022-01-06 10:47:27 +13:00
|
|
|
use serde::{Deserialize, Serialize};
|
2021-11-28 08:49:20 +13:00
|
|
|
|
2023-05-03 08:37:12 +12:00
|
|
|
use crate::common::{
|
2023-12-17 11:21:09 +13:00
|
|
|
check_if_stop_received, create_crash_message, prepare_thread_handler_common, send_info_and_wait_for_ending_all_threads, AUDIO_FILES_EXTENSIONS,
|
2023-07-10 18:36:03 +12:00
|
|
|
IMAGE_RS_BROKEN_FILES_EXTENSIONS, PDF_FILES_EXTENSIONS, ZIP_FILES_EXTENSIONS,
|
2023-05-03 08:37:12 +12:00
|
|
|
};
|
2023-10-08 05:04:17 +13:00
|
|
|
use crate::common_cache::{get_broken_files_cache_file, load_cache_from_file_generalized_by_path, save_cache_to_file_generalized};
|
2023-12-17 11:21:09 +13:00
|
|
|
use crate::common_dir_traversal::{CheckingMethod, DirTraversalBuilder, DirTraversalResult, FileEntry, ProgressData, ToolType};
|
2023-10-11 07:54:41 +13:00
|
|
|
use crate::common_tool::{CommonData, CommonToolData, DeleteMethod};
|
2021-01-13 08:06:12 +13:00
|
|
|
use crate::common_traits::*;
|
|
|
|
|
2023-12-04 00:06:42 +13:00
|
|
|
#[derive(Clone, Serialize, Deserialize, Debug)]
|
2023-12-17 11:21:09 +13:00
|
|
|
pub struct BrokenEntry {
|
2021-01-13 08:06:12 +13:00
|
|
|
pub path: PathBuf,
|
|
|
|
pub modified_date: u64,
|
2021-01-14 04:03:05 +13:00
|
|
|
pub size: u64,
|
2021-01-13 08:06:12 +13:00
|
|
|
pub type_of_file: TypeOfFile,
|
|
|
|
pub error_string: String,
|
|
|
|
}
|
2023-12-17 11:21:09 +13:00
|
|
|
impl ResultEntry for BrokenEntry {
|
2023-10-08 05:04:17 +13:00
|
|
|
fn get_path(&self) -> &Path {
|
|
|
|
&self.path
|
|
|
|
}
|
|
|
|
fn get_modified_date(&self) -> u64 {
|
|
|
|
self.modified_date
|
|
|
|
}
|
|
|
|
fn get_size(&self) -> u64 {
|
|
|
|
self.size
|
|
|
|
}
|
|
|
|
}
|
2021-01-13 08:06:12 +13:00
|
|
|
|
2023-12-17 11:21:09 +13:00
|
|
|
impl FileEntry {
|
|
|
|
fn into_broken_entry(self) -> BrokenEntry {
|
|
|
|
BrokenEntry {
|
|
|
|
size: self.size,
|
|
|
|
path: self.path,
|
|
|
|
modified_date: self.modified_date,
|
|
|
|
|
|
|
|
type_of_file: TypeOfFile::Unknown,
|
|
|
|
error_string: String::new(),
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2022-06-11 04:58:53 +12:00
|
|
|
#[derive(Copy, Clone, PartialEq, Eq, Serialize, Deserialize, Debug)]
|
2021-01-13 08:06:12 +13:00
|
|
|
pub enum TypeOfFile {
|
2021-01-14 04:03:05 +13:00
|
|
|
Unknown = -1,
|
|
|
|
Image = 0,
|
2021-03-28 01:14:02 +13:00
|
|
|
ArchiveZip,
|
2021-01-15 23:04:52 +13:00
|
|
|
Audio,
|
2022-05-17 04:23:07 +12:00
|
|
|
PDF,
|
2021-01-13 08:06:12 +13:00
|
|
|
}
|
|
|
|
|
2023-12-17 11:21:09 +13:00
|
|
|
const MAX_BROKEN_FILES_STAGE: u8 = 1;
|
|
|
|
|
2022-06-11 04:58:53 +12:00
|
|
|
bitflags! {
|
2023-04-05 18:08:43 +12:00
|
|
|
#[derive(PartialEq, Copy, Clone)]
|
2022-06-11 04:58:53 +12:00
|
|
|
pub struct CheckedTypes : u32 {
|
|
|
|
const NONE = 0;
|
|
|
|
|
|
|
|
const PDF = 0b1;
|
|
|
|
const AUDIO = 0b10;
|
|
|
|
const IMAGE = 0b100;
|
|
|
|
const ARCHIVE = 0b1000;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2021-01-13 08:06:12 +13:00
|
|
|
#[derive(Default)]
|
|
|
|
pub struct Info {
|
|
|
|
pub number_of_broken_files: usize,
|
|
|
|
}
|
2021-11-28 08:57:10 +13:00
|
|
|
|
2021-01-13 08:06:12 +13:00
|
|
|
pub struct BrokenFiles {
|
2023-10-05 19:06:47 +13:00
|
|
|
common_data: CommonToolData,
|
2021-01-13 08:06:12 +13:00
|
|
|
information: Info,
|
2023-12-17 11:21:09 +13:00
|
|
|
files_to_check: BTreeMap<String, BrokenEntry>,
|
|
|
|
broken_files: Vec<BrokenEntry>,
|
2022-06-11 04:58:53 +12:00
|
|
|
checked_types: CheckedTypes,
|
2021-01-13 08:06:12 +13:00
|
|
|
}
|
|
|
|
|
|
|
|
impl BrokenFiles {
|
|
|
|
pub fn new() -> Self {
|
|
|
|
Self {
|
2023-10-05 19:06:47 +13:00
|
|
|
common_data: CommonToolData::new(ToolType::BrokenFiles),
|
|
|
|
information: Info::default(),
|
2021-01-14 04:03:05 +13:00
|
|
|
files_to_check: Default::default(),
|
|
|
|
broken_files: Default::default(),
|
2022-06-11 04:58:53 +12:00
|
|
|
checked_types: CheckedTypes::PDF | CheckedTypes::AUDIO | CheckedTypes::IMAGE | CheckedTypes::ARCHIVE,
|
2021-01-13 08:06:12 +13:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2023-10-15 04:48:57 +13:00
|
|
|
#[fun_time(message = "find_broken_files", level = "info")]
|
2023-12-04 00:06:42 +13:00
|
|
|
pub fn find_broken_files(&mut self, stop_receiver: Option<&Receiver<()>>, progress_sender: Option<&Sender<ProgressData>>) {
|
2024-02-15 05:45:25 +13:00
|
|
|
self.prepare_items();
|
2021-01-13 08:06:12 +13:00
|
|
|
if !self.check_files(stop_receiver, progress_sender) {
|
2023-10-05 19:06:47 +13:00
|
|
|
self.common_data.stopped_search = true;
|
2021-01-13 08:06:12 +13:00
|
|
|
return;
|
|
|
|
}
|
|
|
|
if !self.look_for_broken_files(stop_receiver, progress_sender) {
|
2023-10-05 19:06:47 +13:00
|
|
|
self.common_data.stopped_search = true;
|
2021-01-13 08:06:12 +13:00
|
|
|
return;
|
|
|
|
}
|
|
|
|
self.delete_files();
|
|
|
|
self.debug_print();
|
|
|
|
}
|
|
|
|
|
2023-10-15 04:48:57 +13:00
|
|
|
#[fun_time(message = "check_files", level = "debug")]
|
2023-12-04 00:06:42 +13:00
|
|
|
fn check_files(&mut self, stop_receiver: Option<&Receiver<()>>, progress_sender: Option<&Sender<ProgressData>>) -> bool {
|
2023-12-17 11:21:09 +13:00
|
|
|
let zip_extensions = ZIP_FILES_EXTENSIONS.iter().collect::<HashSet<_>>();
|
|
|
|
let audio_extensions = AUDIO_FILES_EXTENSIONS.iter().collect::<HashSet<_>>();
|
|
|
|
let pdf_extensions = PDF_FILES_EXTENSIONS.iter().collect::<HashSet<_>>();
|
|
|
|
let images_extensions = IMAGE_RS_BROKEN_FILES_EXTENSIONS.iter().collect::<HashSet<_>>();
|
|
|
|
|
|
|
|
let mut extensions = Vec::new();
|
|
|
|
let vec_extensions = [
|
|
|
|
(CheckedTypes::PDF, PDF_FILES_EXTENSIONS),
|
|
|
|
(CheckedTypes::AUDIO, AUDIO_FILES_EXTENSIONS),
|
|
|
|
(CheckedTypes::ARCHIVE, ZIP_FILES_EXTENSIONS),
|
|
|
|
(CheckedTypes::IMAGE, IMAGE_RS_BROKEN_FILES_EXTENSIONS),
|
|
|
|
];
|
|
|
|
for (checked_type, extensions_to_add) in &vec_extensions {
|
|
|
|
if self.checked_types.contains(*checked_type) {
|
|
|
|
extensions.extend_from_slice(extensions_to_add);
|
2021-01-13 08:06:12 +13:00
|
|
|
}
|
|
|
|
}
|
2021-12-18 07:29:37 +13:00
|
|
|
|
2024-02-15 05:45:25 +13:00
|
|
|
self.common_data.extensions.set_and_validate_allowed_extensions(&extensions);
|
|
|
|
if !self.common_data.extensions.set_any_extensions() {
|
2023-12-17 11:21:09 +13:00
|
|
|
return true;
|
2023-05-03 08:37:12 +12:00
|
|
|
}
|
|
|
|
|
2023-12-17 11:21:09 +13:00
|
|
|
let result = DirTraversalBuilder::new()
|
|
|
|
.group_by(|_fe| ())
|
|
|
|
.stop_receiver(stop_receiver)
|
|
|
|
.progress_sender(progress_sender)
|
|
|
|
.common_data(&self.common_data)
|
|
|
|
.max_stage(MAX_BROKEN_FILES_STAGE)
|
|
|
|
.build()
|
|
|
|
.run();
|
|
|
|
|
|
|
|
match result {
|
|
|
|
DirTraversalResult::SuccessFiles { grouped_file_entries, warnings } => {
|
2024-01-15 02:38:55 +13:00
|
|
|
self.files_to_check = grouped_file_entries
|
2023-12-17 11:21:09 +13:00
|
|
|
.into_values()
|
|
|
|
.flatten()
|
|
|
|
.map(|fe| {
|
|
|
|
let mut broken_entry = fe.into_broken_entry();
|
|
|
|
broken_entry.type_of_file = check_extension_availability(broken_entry.get_path(), &images_extensions, &zip_extensions, &audio_extensions, &pdf_extensions);
|
2024-01-15 02:38:55 +13:00
|
|
|
(broken_entry.path.to_string_lossy().to_string(), broken_entry)
|
2023-12-17 11:21:09 +13:00
|
|
|
})
|
|
|
|
.collect();
|
|
|
|
self.common_data.text_messages.warnings.extend(warnings);
|
2024-01-15 02:38:55 +13:00
|
|
|
debug!("check_files - Found {} files to check.", self.files_to_check.len());
|
2023-12-17 11:21:09 +13:00
|
|
|
true
|
|
|
|
}
|
2023-05-03 08:37:12 +12:00
|
|
|
|
2023-12-17 11:21:09 +13:00
|
|
|
DirTraversalResult::Stopped => false,
|
2023-05-03 08:37:12 +12:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2023-12-17 11:21:09 +13:00
|
|
|
fn check_broken_image(&self, mut file_entry: BrokenEntry) -> Option<BrokenEntry> {
|
2023-05-03 08:37:12 +12:00
|
|
|
let mut file_entry_clone = file_entry.clone();
|
|
|
|
|
2023-12-17 11:21:09 +13:00
|
|
|
panic::catch_unwind(|| {
|
2023-05-03 08:37:12 +12:00
|
|
|
if let Err(e) = image::open(&file_entry.path) {
|
|
|
|
let error_string = e.to_string();
|
|
|
|
// This error is a problem with image library, remove check when https://github.com/image-rs/jpeg-decoder/issues/130 will be fixed
|
|
|
|
if error_string.contains("spectral selection is not allowed in non-progressive scan") {
|
|
|
|
return None;
|
|
|
|
}
|
|
|
|
file_entry.error_string = error_string;
|
|
|
|
}
|
|
|
|
Some(file_entry)
|
2023-12-17 11:21:09 +13:00
|
|
|
})
|
|
|
|
.unwrap_or_else(|_| {
|
2023-05-03 08:37:12 +12:00
|
|
|
let message = create_crash_message("Image-rs", &file_entry_clone.path.to_string_lossy(), "https://github.com/Serial-ATA/lofty-rs");
|
|
|
|
println!("{message}");
|
|
|
|
file_entry_clone.error_string = message;
|
|
|
|
Some(file_entry_clone)
|
2023-12-17 11:21:09 +13:00
|
|
|
})
|
2023-05-03 08:37:12 +12:00
|
|
|
}
|
2023-12-17 11:21:09 +13:00
|
|
|
fn check_broken_zip(&self, mut file_entry: BrokenEntry) -> Option<BrokenEntry> {
|
2023-05-03 08:37:12 +12:00
|
|
|
match File::open(&file_entry.path) {
|
|
|
|
Ok(file) => {
|
|
|
|
if let Err(e) = zip::ZipArchive::new(file) {
|
|
|
|
file_entry.error_string = e.to_string();
|
|
|
|
}
|
|
|
|
Some(file_entry)
|
|
|
|
}
|
|
|
|
Err(_inspected) => None,
|
|
|
|
}
|
|
|
|
}
|
2023-12-17 11:21:09 +13:00
|
|
|
fn check_broken_audio(&self, mut file_entry: BrokenEntry) -> Option<BrokenEntry> {
|
2023-05-03 08:37:12 +12:00
|
|
|
match File::open(&file_entry.path) {
|
|
|
|
Ok(file) => {
|
|
|
|
let mut file_entry_clone = file_entry.clone();
|
|
|
|
|
2023-12-17 11:21:09 +13:00
|
|
|
panic::catch_unwind(|| {
|
2023-05-03 08:37:12 +12:00
|
|
|
if let Err(e) = audio_checker::parse_audio_file(file) {
|
|
|
|
file_entry.error_string = e.to_string();
|
|
|
|
}
|
|
|
|
Some(file_entry)
|
2023-12-17 11:21:09 +13:00
|
|
|
})
|
|
|
|
.unwrap_or_else(|_| {
|
2023-05-03 08:37:12 +12:00
|
|
|
let message = create_crash_message("Symphonia", &file_entry_clone.path.to_string_lossy(), "https://github.com/pdeljanov/Symphonia");
|
|
|
|
println!("{message}");
|
|
|
|
file_entry_clone.error_string = message;
|
|
|
|
Some(file_entry_clone)
|
2023-12-17 11:21:09 +13:00
|
|
|
})
|
2023-05-03 08:37:12 +12:00
|
|
|
}
|
|
|
|
Err(_inspected) => None,
|
|
|
|
}
|
|
|
|
}
|
2023-12-17 11:21:09 +13:00
|
|
|
fn check_broken_pdf(&self, mut file_entry: BrokenEntry) -> Option<BrokenEntry> {
|
2023-05-03 08:37:12 +12:00
|
|
|
let parser_options = ParseOptions::tolerant(); // Only show as broken files with really big bugs
|
|
|
|
|
|
|
|
let mut file_entry_clone = file_entry.clone();
|
2023-12-17 11:21:09 +13:00
|
|
|
panic::catch_unwind(|| {
|
2023-06-10 08:11:47 +12:00
|
|
|
match FileOptions::cached().parse_options(parser_options).open(&file_entry.path) {
|
|
|
|
Ok(file) => {
|
|
|
|
for idx in 0..file.num_pages() {
|
|
|
|
if let Err(e) = file.get_page(idx) {
|
|
|
|
let err = validate_pdf_error(&mut file_entry, e);
|
|
|
|
if let PdfError::InvalidPassword = err {
|
|
|
|
return None;
|
|
|
|
}
|
2023-06-11 18:51:21 +12:00
|
|
|
break;
|
2023-06-10 08:11:47 +12:00
|
|
|
}
|
2023-05-03 08:37:12 +12:00
|
|
|
}
|
|
|
|
}
|
2023-06-10 08:11:47 +12:00
|
|
|
Err(e) => {
|
|
|
|
if let PdfError::Io { .. } = e {
|
|
|
|
return None;
|
|
|
|
}
|
|
|
|
let err = validate_pdf_error(&mut file_entry, e);
|
|
|
|
if let PdfError::InvalidPassword = err {
|
|
|
|
return None;
|
|
|
|
}
|
2023-05-03 08:37:12 +12:00
|
|
|
}
|
|
|
|
}
|
|
|
|
Some(file_entry)
|
2023-12-17 11:21:09 +13:00
|
|
|
})
|
|
|
|
.unwrap_or_else(|_| {
|
2023-05-03 08:37:12 +12:00
|
|
|
let message = create_crash_message("PDF-rs", &file_entry_clone.path.to_string_lossy(), "https://github.com/pdf-rs/pdf");
|
|
|
|
println!("{message}");
|
|
|
|
file_entry_clone.error_string = message;
|
|
|
|
Some(file_entry_clone)
|
2023-12-17 11:21:09 +13:00
|
|
|
})
|
2023-05-03 08:37:12 +12:00
|
|
|
}
|
|
|
|
|
2023-10-15 04:48:57 +13:00
|
|
|
#[fun_time(message = "load_cache", level = "debug")]
|
2023-12-17 11:21:09 +13:00
|
|
|
fn load_cache(&mut self) -> (BTreeMap<String, BrokenEntry>, BTreeMap<String, BrokenEntry>, BTreeMap<String, BrokenEntry>) {
|
2021-03-05 00:09:53 +13:00
|
|
|
let loaded_hash_map;
|
2021-01-14 04:03:05 +13:00
|
|
|
|
2023-12-17 11:21:09 +13:00
|
|
|
let mut records_already_cached: BTreeMap<String, BrokenEntry> = Default::default();
|
|
|
|
let mut non_cached_files_to_check: BTreeMap<String, BrokenEntry> = Default::default();
|
2023-05-08 06:54:05 +12:00
|
|
|
let files_to_check = mem::take(&mut self.files_to_check);
|
2021-03-05 00:09:53 +13:00
|
|
|
|
2023-10-05 19:06:47 +13:00
|
|
|
if self.common_data.use_cache {
|
2023-12-17 11:21:09 +13:00
|
|
|
let (messages, loaded_items) =
|
|
|
|
load_cache_from_file_generalized_by_path::<BrokenEntry>(&get_broken_files_cache_file(), self.get_delete_outdated_cache(), &files_to_check);
|
2023-10-08 05:04:17 +13:00
|
|
|
self.get_text_messages_mut().extend_with_another_messages(messages);
|
|
|
|
loaded_hash_map = loaded_items.unwrap_or_default();
|
2021-03-05 00:09:53 +13:00
|
|
|
|
2022-06-11 04:58:53 +12:00
|
|
|
for (name, file_entry) in files_to_check {
|
2023-10-08 05:04:17 +13:00
|
|
|
if let Some(cached_file_entry) = loaded_hash_map.get(&name) {
|
2023-12-08 07:38:41 +13:00
|
|
|
records_already_cached.insert(name, cached_file_entry.clone());
|
2021-01-14 04:03:05 +13:00
|
|
|
} else {
|
2023-10-08 05:04:17 +13:00
|
|
|
non_cached_files_to_check.insert(name, file_entry);
|
2021-01-14 04:03:05 +13:00
|
|
|
}
|
|
|
|
}
|
2021-03-05 00:09:53 +13:00
|
|
|
} else {
|
|
|
|
loaded_hash_map = Default::default();
|
2022-06-11 04:58:53 +12:00
|
|
|
non_cached_files_to_check = files_to_check;
|
2021-01-14 04:03:05 +13:00
|
|
|
}
|
2023-10-05 19:06:47 +13:00
|
|
|
(loaded_hash_map, records_already_cached, non_cached_files_to_check)
|
|
|
|
}
|
|
|
|
|
2023-10-15 04:48:57 +13:00
|
|
|
#[fun_time(message = "look_for_broken_files", level = "debug")]
|
2023-12-04 00:06:42 +13:00
|
|
|
fn look_for_broken_files(&mut self, stop_receiver: Option<&Receiver<()>>, progress_sender: Option<&Sender<ProgressData>>) -> bool {
|
2023-10-05 19:06:47 +13:00
|
|
|
let (loaded_hash_map, records_already_cached, non_cached_files_to_check) = self.load_cache();
|
2021-01-14 04:03:05 +13:00
|
|
|
|
2023-05-08 06:54:05 +12:00
|
|
|
let (progress_thread_handle, progress_thread_run, atomic_counter, _check_was_stopped) =
|
2023-10-05 19:06:47 +13:00
|
|
|
prepare_thread_handler_common(progress_sender, 1, 1, non_cached_files_to_check.len(), CheckingMethod::None, self.common_data.tool_type);
|
2023-05-03 08:37:12 +12:00
|
|
|
|
2023-10-11 07:54:41 +13:00
|
|
|
debug!("look_for_broken_files - started finding for broken files");
|
2023-12-17 11:21:09 +13:00
|
|
|
let mut vec_file_entry: Vec<BrokenEntry> = non_cached_files_to_check
|
2021-12-30 01:43:38 +13:00
|
|
|
.into_par_iter()
|
2023-05-03 08:37:12 +12:00
|
|
|
.map(|(_, file_entry)| {
|
|
|
|
atomic_counter.fetch_add(1, Ordering::Relaxed);
|
2023-10-15 04:48:57 +13:00
|
|
|
if check_if_stop_received(stop_receiver) {
|
2021-01-13 08:06:12 +13:00
|
|
|
return None;
|
|
|
|
}
|
|
|
|
|
2021-01-15 23:04:52 +13:00
|
|
|
match file_entry.type_of_file {
|
2023-05-03 08:37:12 +12:00
|
|
|
TypeOfFile::Image => Some(self.check_broken_image(file_entry)),
|
|
|
|
TypeOfFile::ArchiveZip => Some(self.check_broken_zip(file_entry)),
|
|
|
|
TypeOfFile::Audio => Some(self.check_broken_audio(file_entry)),
|
|
|
|
TypeOfFile::PDF => Some(self.check_broken_pdf(file_entry)),
|
2021-01-14 04:03:05 +13:00
|
|
|
// This means that cache read invalid value because maybe cache comes from different czkawka version
|
|
|
|
TypeOfFile::Unknown => Some(None),
|
2021-01-13 08:06:12 +13:00
|
|
|
}
|
|
|
|
})
|
|
|
|
.while_some()
|
2023-01-29 06:54:02 +13:00
|
|
|
.filter(Option::is_some)
|
|
|
|
.map(Option::unwrap)
|
2023-12-17 11:21:09 +13:00
|
|
|
.collect::<Vec<BrokenEntry>>();
|
2023-10-11 07:54:41 +13:00
|
|
|
debug!("look_for_broken_files - ended finding for broken files");
|
2021-01-13 08:06:12 +13:00
|
|
|
|
2023-05-03 08:37:12 +12:00
|
|
|
send_info_and_wait_for_ending_all_threads(&progress_thread_run, progress_thread_handle);
|
2021-01-13 08:06:12 +13:00
|
|
|
|
2021-01-14 04:03:05 +13:00
|
|
|
// Just connect loaded results with already calculated
|
2023-05-08 06:54:05 +12:00
|
|
|
vec_file_entry.extend(records_already_cached.into_values());
|
2021-01-14 04:03:05 +13:00
|
|
|
|
2023-10-05 19:06:47 +13:00
|
|
|
self.save_to_cache(&vec_file_entry, loaded_hash_map);
|
2021-01-14 04:03:05 +13:00
|
|
|
|
2022-05-10 05:40:35 +12:00
|
|
|
self.broken_files = vec_file_entry
|
|
|
|
.into_par_iter()
|
|
|
|
.filter_map(|f| if f.error_string.is_empty() { None } else { Some(f) })
|
|
|
|
.collect();
|
|
|
|
|
2021-01-14 04:03:05 +13:00
|
|
|
self.information.number_of_broken_files = self.broken_files.len();
|
2023-10-11 07:54:41 +13:00
|
|
|
debug!("Found {} broken files.", self.information.number_of_broken_files);
|
2021-01-16 00:41:45 +13:00
|
|
|
// Clean unused data
|
2021-01-14 04:03:05 +13:00
|
|
|
self.files_to_check = Default::default();
|
2021-01-13 08:06:12 +13:00
|
|
|
|
|
|
|
true
|
|
|
|
}
|
2023-10-15 04:48:57 +13:00
|
|
|
#[fun_time(message = "save_to_cache", level = "debug")]
|
2023-12-17 11:21:09 +13:00
|
|
|
fn save_to_cache(&mut self, vec_file_entry: &[BrokenEntry], loaded_hash_map: BTreeMap<String, BrokenEntry>) {
|
2023-10-05 19:06:47 +13:00
|
|
|
if self.common_data.use_cache {
|
|
|
|
// Must save all results to file, old loaded from file with all currently counted results
|
2023-12-17 11:21:09 +13:00
|
|
|
let mut all_results: BTreeMap<String, BrokenEntry> = Default::default();
|
2023-10-05 19:06:47 +13:00
|
|
|
|
|
|
|
for file_entry in vec_file_entry.iter().cloned() {
|
|
|
|
all_results.insert(file_entry.path.to_string_lossy().to_string(), file_entry);
|
|
|
|
}
|
|
|
|
for (_name, file_entry) in loaded_hash_map {
|
|
|
|
all_results.insert(file_entry.path.to_string_lossy().to_string(), file_entry);
|
|
|
|
}
|
2023-10-08 05:04:17 +13:00
|
|
|
|
|
|
|
let messages = save_cache_to_file_generalized(&get_broken_files_cache_file(), &all_results, self.common_data.save_also_as_json, 0);
|
|
|
|
self.get_text_messages_mut().extend_with_another_messages(messages);
|
2023-10-05 19:06:47 +13:00
|
|
|
}
|
|
|
|
}
|
2023-05-03 08:37:12 +12:00
|
|
|
|
2023-10-15 04:48:57 +13:00
|
|
|
#[fun_time(message = "delete_files", level = "debug")]
|
2021-01-13 08:06:12 +13:00
|
|
|
fn delete_files(&mut self) {
|
2023-10-11 07:54:41 +13:00
|
|
|
match self.common_data.delete_method {
|
2021-01-13 08:06:12 +13:00
|
|
|
DeleteMethod::Delete => {
|
2023-01-29 06:54:02 +13:00
|
|
|
for file_entry in &self.broken_files {
|
2021-01-13 08:06:12 +13:00
|
|
|
if fs::remove_file(&file_entry.path).is_err() {
|
2023-12-08 07:38:41 +13:00
|
|
|
self.common_data.text_messages.warnings.push(file_entry.path.to_string_lossy().to_string());
|
2021-01-13 08:06:12 +13:00
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
DeleteMethod::None => {
|
|
|
|
//Just do nothing
|
|
|
|
}
|
2023-10-11 07:54:41 +13:00
|
|
|
_ => {
|
|
|
|
unreachable!()
|
|
|
|
}
|
2021-01-13 08:06:12 +13:00
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
2021-11-28 08:57:10 +13:00
|
|
|
|
2023-10-11 07:54:41 +13:00
|
|
|
impl BrokenFiles {
|
2023-12-17 11:21:09 +13:00
|
|
|
pub const fn get_broken_files(&self) -> &Vec<BrokenEntry> {
|
2023-10-11 07:54:41 +13:00
|
|
|
&self.broken_files
|
|
|
|
}
|
|
|
|
|
|
|
|
pub fn set_checked_types(&mut self, checked_types: CheckedTypes) {
|
|
|
|
self.checked_types = checked_types;
|
|
|
|
}
|
|
|
|
|
|
|
|
pub const fn get_information(&self) -> &Info {
|
|
|
|
&self.information
|
|
|
|
}
|
|
|
|
}
|
2021-01-13 08:06:12 +13:00
|
|
|
impl Default for BrokenFiles {
|
|
|
|
fn default() -> Self {
|
|
|
|
Self::new()
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
impl DebugPrint for BrokenFiles {
|
|
|
|
fn debug_print(&self) {
|
2023-10-11 07:54:41 +13:00
|
|
|
if !cfg!(debug_assertions) {
|
2021-01-13 08:06:12 +13:00
|
|
|
return;
|
|
|
|
}
|
2023-10-05 19:06:47 +13:00
|
|
|
self.debug_print_common();
|
2021-01-13 08:06:12 +13:00
|
|
|
}
|
|
|
|
}
|
2021-11-28 08:57:10 +13:00
|
|
|
|
2023-10-11 07:54:41 +13:00
|
|
|
impl PrintResults for BrokenFiles {
|
|
|
|
fn write_results<T: Write>(&self, writer: &mut T) -> std::io::Result<()> {
|
|
|
|
writeln!(
|
2021-01-13 08:06:12 +13:00
|
|
|
writer,
|
|
|
|
"Results of searching {:?} with excluded directories {:?} and excluded items {:?}",
|
2023-12-04 09:18:31 +13:00
|
|
|
self.common_data.directories.included_directories,
|
|
|
|
self.common_data.directories.excluded_directories,
|
|
|
|
self.common_data.excluded_items.get_excluded_items()
|
2023-10-11 07:54:41 +13:00
|
|
|
)?;
|
2021-01-13 08:06:12 +13:00
|
|
|
|
|
|
|
if !self.broken_files.is_empty() {
|
2023-10-11 07:54:41 +13:00
|
|
|
writeln!(writer, "Found {} broken files.", self.information.number_of_broken_files)?;
|
2023-01-29 06:54:02 +13:00
|
|
|
for file_entry in &self.broken_files {
|
2023-12-08 07:38:41 +13:00
|
|
|
writeln!(writer, "{:?} - {}", file_entry.path, file_entry.error_string)?;
|
2021-01-13 08:06:12 +13:00
|
|
|
}
|
|
|
|
} else {
|
2023-10-11 07:54:41 +13:00
|
|
|
write!(writer, "Not found any broken files.")?;
|
2021-01-13 08:06:12 +13:00
|
|
|
}
|
2023-05-08 06:54:05 +12:00
|
|
|
|
2023-10-11 07:54:41 +13:00
|
|
|
Ok(())
|
2021-01-13 08:06:12 +13:00
|
|
|
}
|
2023-10-13 05:48:46 +13:00
|
|
|
|
|
|
|
fn save_results_to_file_as_json(&self, file_name: &str, pretty_print: bool) -> std::io::Result<()> {
|
|
|
|
self.save_results_to_file_as_json_internal(file_name, &self.broken_files, pretty_print)
|
|
|
|
}
|
2021-01-13 08:06:12 +13:00
|
|
|
}
|
2021-01-14 04:03:05 +13:00
|
|
|
|
2023-12-17 11:21:09 +13:00
|
|
|
fn check_extension_availability(
|
|
|
|
full_name: &Path,
|
|
|
|
images_extensions: &HashSet<&&'static str>,
|
|
|
|
zip_extensions: &HashSet<&&'static str>,
|
|
|
|
audio_extensions: &HashSet<&&'static str>,
|
|
|
|
pdf_extensions: &HashSet<&&'static str>,
|
|
|
|
) -> TypeOfFile {
|
|
|
|
let Some(extension) = full_name.extension() else {
|
|
|
|
debug_assert!(false, "Missing extension");
|
|
|
|
return TypeOfFile::Unknown;
|
|
|
|
};
|
|
|
|
|
|
|
|
let Some(extension_str) = extension.to_str() else {
|
|
|
|
debug_assert!(false, "Extension not really fully str");
|
|
|
|
return TypeOfFile::Unknown;
|
|
|
|
};
|
2024-01-15 02:38:55 +13:00
|
|
|
let extension_lowercase = extension_str.to_ascii_lowercase();
|
2023-12-17 11:21:09 +13:00
|
|
|
|
2024-01-15 02:38:55 +13:00
|
|
|
if images_extensions.contains(&extension_lowercase.as_str()) {
|
2021-01-14 04:03:05 +13:00
|
|
|
TypeOfFile::Image
|
2024-01-15 02:38:55 +13:00
|
|
|
} else if zip_extensions.contains(&extension_lowercase.as_str()) {
|
2021-03-28 01:14:02 +13:00
|
|
|
TypeOfFile::ArchiveZip
|
2024-01-15 02:38:55 +13:00
|
|
|
} else if audio_extensions.contains(&extension_lowercase.as_str()) {
|
2022-05-17 04:23:07 +12:00
|
|
|
TypeOfFile::Audio
|
2024-01-15 02:38:55 +13:00
|
|
|
} else if pdf_extensions.contains(&extension_lowercase.as_str()) {
|
2022-05-17 04:23:07 +12:00
|
|
|
TypeOfFile::PDF
|
2021-01-14 04:03:05 +13:00
|
|
|
} else {
|
2024-01-15 02:38:55 +13:00
|
|
|
eprintln!("File with unknown extension: {full_name:?} - {extension_lowercase}");
|
2023-12-17 11:21:09 +13:00
|
|
|
debug_assert!(false, "File with unknown extension");
|
2021-01-14 04:03:05 +13:00
|
|
|
TypeOfFile::Unknown
|
|
|
|
}
|
|
|
|
}
|
2022-07-25 06:48:02 +12:00
|
|
|
|
2022-05-17 04:23:07 +12:00
|
|
|
fn unpack_pdf_error(e: PdfError) -> PdfError {
|
|
|
|
if let Try {
|
|
|
|
file: _,
|
|
|
|
line: _,
|
|
|
|
column: _,
|
2022-12-30 05:25:01 +13:00
|
|
|
context: _,
|
2022-05-17 04:23:07 +12:00
|
|
|
source,
|
|
|
|
} = e
|
|
|
|
{
|
|
|
|
unpack_pdf_error(*source)
|
|
|
|
} else {
|
|
|
|
e
|
|
|
|
}
|
|
|
|
}
|
2023-06-10 08:11:47 +12:00
|
|
|
|
2023-12-17 11:21:09 +13:00
|
|
|
fn validate_pdf_error(file_entry: &mut BrokenEntry, e: PdfError) -> PdfError {
|
2023-06-10 08:11:47 +12:00
|
|
|
let mut error_string = e.to_string();
|
|
|
|
// Workaround for strange error message https://github.com/qarmin/czkawka/issues/898
|
|
|
|
if error_string.starts_with("Try at") {
|
|
|
|
if let Some(start_index) = error_string.find("/pdf-") {
|
|
|
|
error_string = format!("Decoding error in pdf-rs library - {}", &error_string[start_index..]);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
file_entry.error_string = error_string;
|
|
|
|
unpack_pdf_error(e)
|
|
|
|
}
|
2023-10-05 19:06:47 +13:00
|
|
|
|
|
|
|
impl CommonData for BrokenFiles {
|
|
|
|
fn get_cd(&self) -> &CommonToolData {
|
|
|
|
&self.common_data
|
|
|
|
}
|
|
|
|
fn get_cd_mut(&mut self) -> &mut CommonToolData {
|
|
|
|
&mut self.common_data
|
|
|
|
}
|
|
|
|
}
|