1
0
Fork 0
mirror of synced 2024-05-11 16:02:44 +12:00

Add cache for broken files (#204)

This commit is contained in:
Rafał Mikrut 2021-01-13 16:03:05 +01:00 committed by GitHub
parent eeaaea20cc
commit 6e89bcb507
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
5 changed files with 211 additions and 40 deletions

View file

@ -207,7 +207,7 @@ pub struct AllowedExtensions {
short = "x", short = "x",
long, long,
help = "Allowed file extension(s)", help = "Allowed file extension(s)",
long_help = "List of checked files with provided extension(s). There are also helpful macros which allow to easy use a typical extensions like:\nIMAGE(\"jpg,kra,gif,png,bmp,tiff,webp,hdr,svg\"),\nTEXT(\"txt,doc,docx,odt,rtf\"),\nVIDEO(\"mp4,flv,mkv,webm,vob,ogv,gifv,avi,mov,wmv,mpg,m4v,m4p,mpeg,3gp\") or\nMUSIC(\"mp3,flac,ogg,tta,wma,webm\")\n " long_help = "List of checked files with provided extension(s). There are also helpful macros which allow to easy use a typical extensions like:\nIMAGE(\"jpg,kra,gif,png,bmp,tiff,hdr,svg\"),\nTEXT(\"txt,doc,docx,odt,rtf\"),\nVIDEO(\"mp4,flv,mkv,webm,vob,ogv,gifv,avi,mov,wmv,mpg,m4v,m4p,mpeg,3gp\") or\nMUSIC(\"mp3,flac,ogg,tta,wma,webm\")\n "
)] )]
pub allowed_extensions: Vec<String>, pub allowed_extensions: Vec<String>,
} }

View file

@ -1,6 +1,6 @@
use std::fs::{File, Metadata}; use std::fs::{File, Metadata, OpenOptions};
use std::io::prelude::*; use std::io::prelude::*;
use std::path::PathBuf; use std::path::{Path, PathBuf};
use std::time::{Duration, SystemTime, UNIX_EPOCH}; use std::time::{Duration, SystemTime, UNIX_EPOCH};
use std::{fs, thread}; use std::{fs, thread};
@ -11,12 +11,16 @@ use crate::common_items::ExcludedItems;
use crate::common_messages::Messages; use crate::common_messages::Messages;
use crate::common_traits::*; use crate::common_traits::*;
use crossbeam_channel::Receiver; use crossbeam_channel::Receiver;
use directories_next::ProjectDirs;
use rayon::prelude::*; use rayon::prelude::*;
use std::io::BufWriter; use std::collections::HashMap;
use std::io::{BufReader, BufWriter};
use std::sync::atomic::{AtomicBool, AtomicUsize, Ordering}; use std::sync::atomic::{AtomicBool, AtomicUsize, Ordering};
use std::sync::Arc; use std::sync::Arc;
use std::thread::sleep; use std::thread::sleep;
const CACHE_FILE_NAME: &str = "cache_broken_files.txt";
#[derive(Debug)] #[derive(Debug)]
pub struct ProgressData { pub struct ProgressData {
pub current_stage: u8, pub current_stage: u8,
@ -35,13 +39,15 @@ pub enum DeleteMethod {
pub struct FileEntry { pub struct FileEntry {
pub path: PathBuf, pub path: PathBuf,
pub modified_date: u64, pub modified_date: u64,
pub size: u64,
pub type_of_file: TypeOfFile, pub type_of_file: TypeOfFile,
pub error_string: String, pub error_string: String,
} }
#[derive(Clone, PartialEq, Eq)] #[derive(Copy, Clone, PartialEq, Eq)]
pub enum TypeOfFile { pub enum TypeOfFile {
Image, Unknown = -1,
Image = 0,
} }
/// Info struck with helpful information's about results /// Info struck with helpful information's about results
@ -61,7 +67,7 @@ impl Info {
pub struct BrokenFiles { pub struct BrokenFiles {
text_messages: Messages, text_messages: Messages,
information: Info, information: Info,
files_to_check: Vec<FileEntry>, files_to_check: HashMap<String, FileEntry>,
broken_files: Vec<FileEntry>, broken_files: Vec<FileEntry>,
directories: Directories, directories: Directories,
allowed_extensions: Extensions, allowed_extensions: Extensions,
@ -80,10 +86,10 @@ impl BrokenFiles {
allowed_extensions: Extensions::new(), allowed_extensions: Extensions::new(),
directories: Directories::new(), directories: Directories::new(),
excluded_items: ExcludedItems::new(), excluded_items: ExcludedItems::new(),
files_to_check: vec![], files_to_check: Default::default(),
delete_method: DeleteMethod::None, delete_method: DeleteMethod::None,
stopped_search: false, stopped_search: false,
broken_files: vec![], broken_files: Default::default(),
} }
} }
@ -232,13 +238,8 @@ impl BrokenFiles {
} }
.to_lowercase(); .to_lowercase();
let type_of_file; let type_of_file = check_extension_avaibility(&file_name_lowercase);
if type_of_file == TypeOfFile::Unknown {
// Checking allowed image extensions
let allowed_image_extensions = ["jpg", "jpeg", "png", "bmp", "ico", "webp", "tiff", "pnm", "tga", "ff", "gif"];
if allowed_image_extensions.iter().any(|e| file_name_lowercase.ends_with(format!(".{}", e).as_str())) {
type_of_file = TypeOfFile::Image;
} else {
continue 'dir; continue 'dir;
} }
@ -273,12 +274,13 @@ impl BrokenFiles {
continue; continue;
} // Permissions Denied } // Permissions Denied
}, },
size: metadata.len(),
type_of_file, type_of_file,
error_string: "".to_string(), error_string: "".to_string(),
}; };
// Adding files to Vector // Adding files to Vector
self.files_to_check.push(fe); self.files_to_check.insert(fe.path.to_string_lossy().to_string(), fe);
} }
} }
} }
@ -292,6 +294,29 @@ impl BrokenFiles {
fn look_for_broken_files(&mut self, stop_receiver: Option<&Receiver<()>>, progress_sender: Option<&futures::channel::mpsc::Sender<ProgressData>>) -> bool { fn look_for_broken_files(&mut self, stop_receiver: Option<&Receiver<()>>, progress_sender: Option<&futures::channel::mpsc::Sender<ProgressData>>) -> bool {
let system_time = SystemTime::now(); let system_time = SystemTime::now();
let loaded_hash_map = match load_cache_from_file(&mut self.text_messages) {
Some(t) => t,
None => Default::default(),
};
let mut records_already_cached: HashMap<String, FileEntry> = Default::default();
let mut non_cached_files_to_check: HashMap<String, FileEntry> = Default::default();
for (name, file_entry) in &self.files_to_check {
#[allow(clippy::collapsible_if)]
if !loaded_hash_map.contains_key(name) {
// If loaded data doesn't contains current image info
non_cached_files_to_check.insert(name.clone(), file_entry.clone());
} else {
if file_entry.size != loaded_hash_map.get(name).unwrap().size || file_entry.modified_date != loaded_hash_map.get(name).unwrap().modified_date {
// When size or modification date of image changed, then it is clear that is different image
non_cached_files_to_check.insert(name.clone(), file_entry.clone());
} else {
// Checking may be omitted when already there is entry with same size and modification date
records_already_cached.insert(name.clone(), loaded_hash_map.get(name).unwrap().clone());
}
}
}
let check_was_breaked = AtomicBool::new(false); // Used for breaking from GUI and ending check thread let check_was_breaked = AtomicBool::new(false); // Used for breaking from GUI and ending check thread
//// PROGRESS THREAD START //// PROGRESS THREAD START
@ -304,7 +329,7 @@ impl BrokenFiles {
let mut progress_send = progress_sender.clone(); let mut progress_send = progress_sender.clone();
let progress_thread_run = progress_thread_run.clone(); let progress_thread_run = progress_thread_run.clone();
let atomic_file_counter = atomic_file_counter.clone(); let atomic_file_counter = atomic_file_counter.clone();
let files_to_check = self.files_to_check.len(); let files_to_check = non_cached_files_to_check.len();
progress_thread_handle = thread::spawn(move || loop { progress_thread_handle = thread::spawn(move || loop {
progress_send progress_send
.try_send(ProgressData { .try_send(ProgressData {
@ -323,23 +348,34 @@ impl BrokenFiles {
progress_thread_handle = thread::spawn(|| {}); progress_thread_handle = thread::spawn(|| {});
} }
//// PROGRESS THREAD END //// PROGRESS THREAD END
self.broken_files = self let mut vec_file_entry: Vec<FileEntry> = non_cached_files_to_check
.files_to_check
.par_iter() .par_iter()
.map(|file_entry| { .map(|file_entry| {
atomic_file_counter.fetch_add(1, Ordering::Relaxed); atomic_file_counter.fetch_add(1, Ordering::Relaxed);
if stop_receiver.is_some() && stop_receiver.unwrap().try_recv().is_ok() { if stop_receiver.is_some() && stop_receiver.unwrap().try_recv().is_ok() {
// This will not break check_was_breaked.store(true, Ordering::Relaxed);
return None; return None;
} }
match image::open(&file_entry.path) { match file_entry.1.type_of_file {
Ok(_) => Some(None), TypeOfFile::Image => {
Err(t) => { match image::open(&file_entry.1.path) {
let mut file_entry = file_entry.clone(); Ok(_) => Some(None),
file_entry.error_string = t.to_string(); Err(t) => {
Some(Some(file_entry)) let error_string = t.to_string();
} // Something is wrong with image // This error is a problem with image library, remove check when https://github.com/image-rs/jpeg-decoder/issues/130 will be fixed
if !error_string.contains("spectral selection is not allowed in non-progressive scan") {
let mut file_entry = file_entry.1.clone();
file_entry.error_string = error_string;
Some(Some(file_entry))
} else {
Some(None)
}
} // Something is wrong with image
}
}
// This means that cache read invalid value because maybe cache comes from different czkawka version
TypeOfFile::Unknown => Some(None),
} }
}) })
.while_some() .while_some()
@ -351,16 +387,35 @@ impl BrokenFiles {
progress_thread_run.store(false, Ordering::Relaxed); progress_thread_run.store(false, Ordering::Relaxed);
progress_thread_handle.join().unwrap(); progress_thread_handle.join().unwrap();
self.information.number_of_broken_files = self.broken_files.len(); // Break if stop was clicked
// Check if user aborted search(only from GUI)
if check_was_breaked.load(Ordering::Relaxed) { if check_was_breaked.load(Ordering::Relaxed) {
return false; return false;
} }
// Just connect loaded results with already calculated
for (_name, file_entry) in records_already_cached {
vec_file_entry.push(file_entry.clone());
}
self.broken_files = vec_file_entry.iter().filter_map(|f| if f.error_string.is_empty() { None } else { Some(f.clone()) }).collect();
// Must save all results to file, old loaded from file with all currently counted results
let mut all_results: HashMap<String, FileEntry> = self.files_to_check.clone();
for file_entry in vec_file_entry {
all_results.insert(file_entry.path.to_string_lossy().to_string(), file_entry);
}
for (_name, file_entry) in loaded_hash_map {
all_results.insert(file_entry.path.to_string_lossy().to_string(), file_entry);
}
save_cache_to_file(&all_results, &mut self.text_messages);
self.information.number_of_broken_files = self.broken_files.len();
Common::print_time(system_time, SystemTime::now(), "sort_images - reading data from files in parallel".to_string()); Common::print_time(system_time, SystemTime::now(), "sort_images - reading data from files in parallel".to_string());
// Clean data // Clean data
self.files_to_check = vec![]; self.files_to_check = Default::default();
true true
} }
@ -370,7 +425,7 @@ impl BrokenFiles {
match self.delete_method { match self.delete_method {
DeleteMethod::Delete => { DeleteMethod::Delete => {
for file_entry in &self.files_to_check { for file_entry in self.broken_files.iter() {
if fs::remove_file(&file_entry.path).is_err() { if fs::remove_file(&file_entry.path).is_err() {
self.text_messages.warnings.push(file_entry.path.display().to_string()); self.text_messages.warnings.push(file_entry.path.display().to_string());
} }
@ -472,3 +527,118 @@ impl PrintResults for BrokenFiles {
Common::print_time(start_time, SystemTime::now(), "print_entries".to_string()); Common::print_time(start_time, SystemTime::now(), "print_entries".to_string());
} }
} }
fn save_cache_to_file(hashmap_file_entry: &HashMap<String, FileEntry>, text_messages: &mut Messages) {
println!("Allowed to save {} entries", hashmap_file_entry.len());
if let Some(proj_dirs) = ProjectDirs::from("pl", "Qarmin", "Czkawka") {
// Lin: /home/username/.cache/czkawka
// Win: C:\Users\Username\AppData\Local\Qarmin\Czkawka\cache
// Mac: /Users/Username/Library/Caches/pl.Qarmin.Czkawka
let cache_dir = PathBuf::from(proj_dirs.cache_dir());
if cache_dir.exists() {
if !cache_dir.is_dir() {
text_messages.messages.push(format!("Config dir {} is a file!", cache_dir.display()));
return;
}
} else if fs::create_dir_all(&cache_dir).is_err() {
text_messages.messages.push(format!("Cannot create config dir {}", cache_dir.display()));
return;
}
let cache_file = cache_dir.join(CACHE_FILE_NAME);
let file_handler = match OpenOptions::new().truncate(true).write(true).create(true).open(&cache_file) {
Ok(t) => t,
Err(_) => {
text_messages.messages.push(format!("Cannot create or open cache file {}", cache_file.display()));
return;
}
};
let mut writer = BufWriter::new(file_handler);
for file_entry in hashmap_file_entry.values() {
// Only save to cache files which have more than 1KB
if file_entry.size > 1024 {
let string: String = format!("{}//{}//{}//{}", file_entry.path.display(), file_entry.size, file_entry.modified_date, file_entry.error_string);
if writeln!(writer, "{}", string).is_err() {
text_messages.messages.push(format!("Failed to save some data to cache file {}", cache_file.display()));
return;
};
}
}
}
}
fn load_cache_from_file(text_messages: &mut Messages) -> Option<HashMap<String, FileEntry>> {
if let Some(proj_dirs) = ProjectDirs::from("pl", "Qarmin", "Czkawka") {
let cache_dir = PathBuf::from(proj_dirs.cache_dir());
let cache_file = cache_dir.join(CACHE_FILE_NAME);
let file_handler = match OpenOptions::new().read(true).open(&cache_file) {
Ok(t) => t,
Err(_) => {
// text_messages.messages.push(format!("Cannot find or open cache file {}", cache_file.display())); // This shouldn't be write to output
return None;
}
};
let reader = BufReader::new(file_handler);
let mut hashmap_loaded_entries: HashMap<String, FileEntry> = Default::default();
// Read the file line by line using the lines() iterator from std::io::BufRead.
for (index, line) in reader.lines().enumerate() {
let line = match line {
Ok(t) => t,
Err(_) => {
text_messages.warnings.push(format!("Failed to load line number {} from cache file {}", index + 1, cache_file.display()));
return None;
}
};
let uuu = line.split("//").collect::<Vec<&str>>();
if uuu.len() != 4 {
text_messages.warnings.push(format!("Found invalid data in line {} - ({}) in cache file {}", index + 1, line, cache_file.display()));
continue;
}
// Don't load cache data if destination file not exists
if Path::new(uuu[0]).exists() {
hashmap_loaded_entries.insert(
uuu[0].to_string(),
FileEntry {
path: PathBuf::from(uuu[0]),
size: match uuu[1].parse::<u64>() {
Ok(t) => t,
Err(_) => {
text_messages.warnings.push(format!("Found invalid size value in line {} - ({}) in cache file {}", index + 1, line, cache_file.display()));
continue;
}
},
modified_date: match uuu[2].parse::<u64>() {
Ok(t) => t,
Err(_) => {
text_messages.warnings.push(format!("Found invalid modified date value in line {} - ({}) in cache file {}", index + 1, line, cache_file.display()));
continue;
}
},
type_of_file: check_extension_avaibility(&uuu[0].to_lowercase()),
error_string: uuu[3].to_string(),
},
);
}
}
return Some(hashmap_loaded_entries);
}
text_messages.messages.push("Cannot find or open system config dir to save cache file".to_string());
None
}
fn check_extension_avaibility(file_name_lowercase: &str) -> TypeOfFile {
// Checking allowed image extensions
let allowed_image_extensions = ["jpg", "jpeg", "png", "bmp", "ico", "tiff", "pnm", "tga", "ff", "gif"];
if allowed_image_extensions.iter().any(|e| file_name_lowercase.ends_with(format!(".{}", e).as_str())) {
TypeOfFile::Image
} else {
TypeOfFile::Unknown
}
}

View file

@ -18,7 +18,7 @@ impl Extensions {
if allowed_extensions.is_empty() { if allowed_extensions.is_empty() {
return; return;
} }
allowed_extensions = allowed_extensions.replace("IMAGE", "jpg,kra,gif,png,bmp,tiff,webp,hdr,svg"); allowed_extensions = allowed_extensions.replace("IMAGE", "jpg,kra,gif,png,bmp,tiff,hdr,svg");
allowed_extensions = allowed_extensions.replace("VIDEO", "mp4,flv,mkv,webm,vob,ogv,gifv,avi,mov,wmv,mpg,m4v,m4p,mpeg,3gp"); allowed_extensions = allowed_extensions.replace("VIDEO", "mp4,flv,mkv,webm,vob,ogv,gifv,avi,mov,wmv,mpg,m4v,m4p,mpeg,3gp");
allowed_extensions = allowed_extensions.replace("MUSIC", "mp3,flac,ogg,tta,wma,webm"); allowed_extensions = allowed_extensions.replace("MUSIC", "mp3,flac,ogg,tta,wma,webm");
allowed_extensions = allowed_extensions.replace("TEXT", "txt,doc,docx,odt,rtf"); allowed_extensions = allowed_extensions.replace("TEXT", "txt,doc,docx,odt,rtf");

View file

@ -265,7 +265,7 @@ impl SimilarImages {
.to_lowercase(); .to_lowercase();
// Checking allowed image extensions // Checking allowed image extensions
let allowed_image_extensions = ["jpg", "jpeg", "png", "bmp", "ico", "webp", "tiff", "pnm", "tga", "ff", "gif"]; let allowed_image_extensions = ["jpg", "jpeg", "png", "bmp", "ico", "tiff", "pnm", "tga", "ff", "gif"];
if !allowed_image_extensions.iter().any(|e| file_name_lowercase.ends_with(format!(".{}", e).as_str())) { if !allowed_image_extensions.iter().any(|e| file_name_lowercase.ends_with(format!(".{}", e).as_str())) {
continue 'dir; continue 'dir;
} }
@ -693,7 +693,7 @@ fn load_hashes_from_file(text_messages: &mut Messages) -> Option<HashMap<String,
let file_handler = match OpenOptions::new().read(true).open(&cache_file) { let file_handler = match OpenOptions::new().read(true).open(&cache_file) {
Ok(t) => t, Ok(t) => t,
Err(_) => { Err(_) => {
text_messages.messages.push(format!("Cannot find or open cache file {}", cache_file.display())); // text_messages.messages.push(format!("Cannot find or open cache file {}", cache_file.display())); // This shouldn't be write to output
return None; return None;
} }
}; };

View file

@ -100,7 +100,7 @@ Then, for each selected tag by which we want to search for duplicates, we perfor
### Similar Images ### Similar Images
It is a tool for finding similar images that differ e.g. in watermark, size etc. It is a tool for finding similar images that differ e.g. in watermark, size etc.
The tool first collects images with specific extensions that can be checked - `["jpg", "png", "bmp", "ico", "webp", "tiff"]`. The tool first collects images with specific extensions that can be checked - `["jpg", "png", "bmp", "ico", "tiff"]`.
Next cached data are loaded from file to prevent hashing twice same file. Next cached data are loaded from file to prevent hashing twice same file.
Automatically cache which points to non existing data is deleted. Automatically cache which points to non existing data is deleted.
@ -138,6 +138,7 @@ Only some image extensions are supported, because I rely on image crate. Also so
For now Czkawka store only 2 files on disk: For now Czkawka store only 2 files on disk:
- `czkawka_gui_config.txt` - stores configuration of GUI which may be loaded at startup - `czkawka_gui_config.txt` - stores configuration of GUI which may be loaded at startup
- `cache_similar_image.txt` - stores cache data and hashes which may be used later without needing to compute image hash again - DO NOT TRY TO EDIT THIS FILE MANUALLY! - editing this file may cause app crashes. - `cache_similar_image.txt` - stores cache data and hashes which may be used later without needing to compute image hash again - DO NOT TRY TO EDIT THIS FILE MANUALLY! - editing this file may cause app crashes.
- `cache_broken_files.txt` - stores cache data of broken files
First file is located in this path First file is located in this path
@ -151,7 +152,7 @@ Second with cache here:
Linux - `/home/username/.cache/czkawka` Linux - `/home/username/.cache/czkawka`
Mac - `/Users/Username/Library/Caches/pl.Qarmin.Czkawka` Mac - `/Users/Username/Library/Caches/pl.Qarmin.Czkawka`
Windows - `C:\Users\Username\AppData\Local\Qarmin\Czkawka\cache` Windows - `C:\Users\Username\AppData\Local\Qarmin\Czkawka\cache`
## GUI GTK ## GUI GTK
<img src="https://user-images.githubusercontent.com/41945903/103002387-14d1b800-452f-11eb-967e-9d5905dd6db5.png" width="800" /> <img src="https://user-images.githubusercontent.com/41945903/103002387-14d1b800-452f-11eb-967e-9d5905dd6db5.png" width="800" />
@ -181,7 +182,7 @@ There are several buttons which do different actions:
- Add (directories) - adds directories to include or exclude - Add (directories) - adds directories to include or exclude
- Remove (directories) - remove directories to search or to exclude - Remove (directories) - remove directories to search or to exclude
- Manual Add (directories) - allows to write by hand directories(may be used to write non visible in file manager directories) - Manual Add (directories) - allows to write by hand directories(may be used to write non visible in file manager directories)
- Save current configuration - saves current GUI configuration to configuration file - Save current configuration - saves current GUI configuration to configuration file
- Load configuration - loads configuration of file and override current GUI config - Load configuration - loads configuration of file and override current GUI config
- Reset configuration - reset current GUI configuration to default - Reset configuration - reset current GUI configuration to default
@ -212,4 +213,4 @@ By default all tools only write about results to console, but it is possible wit
- **Manually adding multiple directories** - **Manually adding multiple directories**
You can manually edit config file `czkawka_gui_config.txt` and add required directories. After that load configuration. You can manually edit config file `czkawka_gui_config.txt` and add required directories. After that load configuration.
- **Slow checking of little number similar images** - **Slow checking of little number similar images**
If you checked before a big amount of images(several tens of thousands) and them still exists on disk, then information's about it are loaded from cache and save to it, even if you have check now only a few images. You can rename cache file `cache_similar_image.txt`(to be able to use it again) or delete it - cache will regenerate but with lower amount of entries it should load and save a lot of faster. If you checked before a big amount of images(several tens of thousands) and them still exists on disk, then information's about it are loaded from cache and save to it, even if you have check now only a few images. You can rename cache file `cache_similar_image.txt`(to be able to use it again) or delete it - cache will regenerate but with lower amount of entries it should load and save a lot of faster.