From 7ec665ab7a8b72d3e49a3edc55c6a651b6e0a1b5 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Rafa=C5=82=20Mikrut?=
 <41945903+qarmin@users.noreply.github.com>
Date: Thu, 14 Jan 2021 12:17:15 +0100
Subject: [PATCH] Add cache for duplicate (#205)

---
 README.md                          |   9 +-
 czkawka_core/src/duplicate.rs      | 342 ++++++++++++++++++++++++-----
 czkawka_core/src/similar_images.rs |  26 +--
 instructions/Instruction.md        |   8 +-
 4 files changed, 311 insertions(+), 74 deletions(-)

diff --git a/README.md b/README.md
index 9b4f794..8e13d5f 100644
--- a/README.md
+++ b/README.md
@@ -6,13 +6,11 @@
 - Written in memory safe Rust
 - Amazingly fast - due using more or less advanced algorithms and multithreading support
 - Free, Open Source without ads
-- Multiplatform - works on Linux, Windows and macOS  
+- Multiplatform - works on Linux, Windows and macOS
+- Cache support - second and further scans should be a lot of faster than first
 - CLI frontend, very fast to automate tasks
-- GUI GTK frontend - uses modern GTK 3 and looks similar to FSlint
-- Light/Dark theme match the appearance of the system(Linux only)
-- Saving results to a file - allows reading entries found by the tool easily
+- GUI frontend - uses modern GTK 3 and looks similar to FSlint
 - Rich search option - allows setting absolute included and excluded directories, set of allowed file extensions or excluded items with * wildcard
-- Image previews to get quick view at the compared photos
 - Multiple tools to use:
   - Duplicates - Finds duplicates basing on file name, size, hash, first 1 MB of hash
   - Empty Folders - Finds empty folders with the help of advanced algorithm
@@ -228,6 +226,7 @@ So still is a big room for improvements.
 | Non stripped binaries |   | X |  |
 | Redundant whitespace |  | X |  |
 | Multiple languages(po) | | X | X |
+| Cache support | X |  | X |
 | Project Activity | High | Very Low | High |
 
 ## Contributions
diff --git a/czkawka_core/src/duplicate.rs b/czkawka_core/src/duplicate.rs
index eea4314..9479ab7 100644
--- a/czkawka_core/src/duplicate.rs
+++ b/czkawka_core/src/duplicate.rs
@@ -1,9 +1,9 @@
 use crossbeam_channel::Receiver;
 use humansize::{file_size_opts as options, FileSize};
 use std::collections::{BTreeMap, HashMap};
-use std::fs::{File, Metadata};
+use std::fs::{File, Metadata, OpenOptions};
 use std::io::prelude::*;
-use std::path::PathBuf;
+use std::path::{Path, PathBuf};
 use std::time::{Duration, SystemTime, UNIX_EPOCH};
 use std::{fs, thread};
 
@@ -13,14 +13,17 @@ use crate::common_extensions::Extensions;
 use crate::common_items::ExcludedItems;
 use crate::common_messages::Messages;
 use crate::common_traits::*;
+use directories_next::ProjectDirs;
 use rayon::prelude::*;
-use std::io::BufWriter;
+use std::io::{BufReader, BufWriter};
 use std::sync::atomic::{AtomicBool, AtomicUsize, Ordering};
 use std::sync::Arc;
 use std::thread::sleep;
 
 const HASH_MB_LIMIT_BYTES: u64 = 1024 * 1024; // 1MB
 
+const CACHE_FILE_NAME: &str = "cache_duplicates.txt";
+
 #[derive(Debug)]
 pub struct ProgressData {
     pub checking_method: CheckingMethod,
@@ -39,7 +42,7 @@ pub enum CheckingMethod {
     HashMB,
 }
 
-#[derive(PartialEq, Eq, Clone, Debug)]
+#[derive(PartialEq, Eq, Clone, Debug, Copy)]
 pub enum HashType {
     Blake3,
 }
@@ -58,6 +61,7 @@ pub struct FileEntry {
     pub path: PathBuf,
     pub size: u64,
     pub modified_date: u64,
+    pub hash: String,
 }
 
 /// Info struck with helpful information's about results
@@ -349,6 +353,7 @@ impl DuplicateFinder {
                                     continue 'dir;
                                 } // Permissions Denied
                             },
+                            hash: "".to_string(),
                         };
 
                         // Adding files to BTreeMap
@@ -520,6 +525,7 @@ impl DuplicateFinder {
                                     continue 'dir;
                                 } // Permissions Denied
                             },
+                            hash: "".to_string(),
                         };
 
                         // Adding files to BTreeMap
@@ -631,8 +637,8 @@ impl DuplicateFinder {
                     hasher.update(&buffer[..n]);
 
                     let hash_string: String = hasher.finalize().to_hex().to_string();
-                    hashmap_with_hash.entry(hash_string.to_string()).or_insert_with(Vec::new);
-                    hashmap_with_hash.get_mut(hash_string.as_str()).unwrap().push(file_entry.to_owned());
+                    hashmap_with_hash.entry(hash_string.clone()).or_insert_with(Vec::new);
+                    hashmap_with_hash.get_mut(hash_string.as_str()).unwrap().push(file_entry.clone());
                 }
                 Some((*size, hashmap_with_hash, errors, bytes_read))
             })
@@ -700,60 +706,191 @@ impl DuplicateFinder {
         //// PROGRESS THREAD END
 
         #[allow(clippy::type_complexity)]
-        let full_hash_results: Vec<(u64, HashMap<String, Vec<FileEntry>>, Vec<String>, u64)> = pre_checked_map
-            .par_iter()
-            .map(|(size, vec_file_entry)| {
-                let mut hashmap_with_hash: HashMap<String, Vec<FileEntry>> = Default::default();
-                let mut errors: Vec<String> = Vec::new();
-                let mut file_handler: File;
-                let mut bytes_read: u64 = 0;
-                atomic_file_counter.fetch_add(vec_file_entry.len(), Ordering::Relaxed);
-                'fe: for file_entry in vec_file_entry {
-                    if stop_receiver.is_some() && stop_receiver.unwrap().try_recv().is_ok() {
-                        check_was_breaked.store(true, Ordering::Relaxed);
-                        return None;
-                    }
-                    file_handler = match File::open(&file_entry.path) {
-                        Ok(t) => t,
-                        Err(_) => {
-                            errors.push(format!("Unable to check hash of file {}", file_entry.path.display()));
-                            continue 'fe;
-                        }
-                    };
+        let mut full_hash_results: Vec<(u64, HashMap<String, Vec<FileEntry>>, Vec<String>, u64)>;
 
-                    let mut hasher: blake3::Hasher = blake3::Hasher::new();
-                    let mut buffer = [0u8; 1024 * 32];
-                    let mut current_file_read_bytes: u64 = 0;
-
-                    loop {
-                        let n = match file_handler.read(&mut buffer) {
-                            Ok(t) => t,
-                            Err(_) => {
-                                errors.push(format!("Error happened when checking hash of file {}", file_entry.path.display()));
-                                continue 'fe;
+        match self.check_method {
+            CheckingMethod::HashMB => {
+                full_hash_results = pre_checked_map
+                    .par_iter()
+                    .map(|(size, vec_file_entry)| {
+                        let mut hashmap_with_hash: HashMap<String, Vec<FileEntry>> = Default::default();
+                        let mut errors: Vec<String> = Vec::new();
+                        let mut file_handler: File;
+                        let mut bytes_read: u64 = 0;
+                        atomic_file_counter.fetch_add(vec_file_entry.len(), Ordering::Relaxed);
+                        'fe: for file_entry in vec_file_entry {
+                            if stop_receiver.is_some() && stop_receiver.unwrap().try_recv().is_ok() {
+                                check_was_breaked.store(true, Ordering::Relaxed);
+                                return None;
                             }
-                        };
-                        if n == 0 {
-                            break;
+                            file_handler = match File::open(&file_entry.path) {
+                                Ok(t) => t,
+                                Err(_) => {
+                                    errors.push(format!("Unable to check hash of file {}", file_entry.path.display()));
+                                    continue 'fe;
+                                }
+                            };
+
+                            let mut hasher: blake3::Hasher = blake3::Hasher::new();
+                            let mut buffer = [0u8; 1024 * 128];
+                            let mut current_file_read_bytes: u64 = 0;
+
+                            loop {
+                                let n = match file_handler.read(&mut buffer) {
+                                    Ok(t) => t,
+                                    Err(_) => {
+                                        errors.push(format!("Error happened when checking hash of file {}", file_entry.path.display()));
+                                        continue 'fe;
+                                    }
+                                };
+                                if n == 0 {
+                                    break;
+                                }
+
+                                current_file_read_bytes += n as u64;
+                                bytes_read += n as u64;
+                                hasher.update(&buffer[..n]);
+
+                                if current_file_read_bytes >= HASH_MB_LIMIT_BYTES {
+                                    break;
+                                }
+                            }
+
+                            let hash_string: String = hasher.finalize().to_hex().to_string();
+                            hashmap_with_hash.entry(hash_string.to_string()).or_insert_with(Vec::new);
+                            hashmap_with_hash.get_mut(hash_string.as_str()).unwrap().push(file_entry.to_owned());
                         }
+                        Some((*size, hashmap_with_hash, errors, bytes_read))
+                    })
+                    .while_some()
+                    .collect();
+            }
+            CheckingMethod::Hash => {
+                let loaded_hash_map = match load_hashes_from_file(&mut self.text_messages, &self.hash_type) {
+                    Some(t) => t,
+                    None => Default::default(),
+                };
 
-                        current_file_read_bytes += n as u64;
-                        bytes_read += n as u64;
-                        hasher.update(&buffer[..n]);
+                let mut records_already_cached: HashMap<u64, Vec<FileEntry>> = Default::default();
+                let mut non_cached_files_to_check: HashMap<u64, Vec<FileEntry>> = Default::default();
+                for (size, vec_file_entry) in pre_checked_map {
+                    #[allow(clippy::collapsible_if)]
+                    if !loaded_hash_map.contains_key(&size) {
+                        // If loaded data doesn't contains current info
+                        non_cached_files_to_check.insert(size, vec_file_entry);
+                    } else {
+                        let loaded_vec_file_entry = loaded_hash_map.get(&size).unwrap();
 
-                        if self.check_method == CheckingMethod::HashMB && current_file_read_bytes >= HASH_MB_LIMIT_BYTES {
-                            break;
+                        for file_entry in vec_file_entry {
+                            let mut found: bool = false;
+                            for loaded_file_entry in loaded_vec_file_entry {
+                                if file_entry.path == loaded_file_entry.path && file_entry.modified_date == loaded_file_entry.modified_date {
+                                    records_already_cached.entry(file_entry.size).or_insert_with(Vec::new);
+                                    records_already_cached.get_mut(&file_entry.size).unwrap().push(loaded_file_entry.clone());
+                                    found = true;
+                                    break;
+                                }
+                            }
+
+                            if !found {
+                                non_cached_files_to_check.entry(file_entry.size).or_insert_with(Vec::new);
+                                non_cached_files_to_check.get_mut(&file_entry.size).unwrap().push(file_entry);
+                            }
                         }
                     }
-
-                    let hash_string: String = hasher.finalize().to_hex().to_string();
-                    hashmap_with_hash.entry(hash_string.to_string()).or_insert_with(Vec::new);
-                    hashmap_with_hash.get_mut(hash_string.as_str()).unwrap().push(file_entry.to_owned());
                 }
-                Some((*size, hashmap_with_hash, errors, bytes_read))
-            })
-            .while_some()
-            .collect();
+
+                full_hash_results = non_cached_files_to_check
+                    .par_iter()
+                    .map(|(size, vec_file_entry)| {
+                        let mut hashmap_with_hash: HashMap<String, Vec<FileEntry>> = Default::default();
+                        let mut errors: Vec<String> = Vec::new();
+                        let mut file_handler: File;
+                        let mut bytes_read: u64 = 0;
+                        atomic_file_counter.fetch_add(vec_file_entry.len(), Ordering::Relaxed);
+                        'fe: for file_entry in vec_file_entry {
+                            if stop_receiver.is_some() && stop_receiver.unwrap().try_recv().is_ok() {
+                                check_was_breaked.store(true, Ordering::Relaxed);
+                                return None;
+                            }
+                            file_handler = match File::open(&file_entry.path) {
+                                Ok(t) => t,
+                                Err(_) => {
+                                    errors.push(format!("Unable to check hash of file {}", file_entry.path.display()));
+                                    continue 'fe;
+                                }
+                            };
+
+                            let mut hasher: blake3::Hasher = blake3::Hasher::new();
+                            let mut buffer = [0u8; 1024 * 128];
+
+                            loop {
+                                let n = match file_handler.read(&mut buffer) {
+                                    Ok(t) => t,
+                                    Err(_) => {
+                                        errors.push(format!("Error happened when checking hash of file {}", file_entry.path.display()));
+                                        continue 'fe;
+                                    }
+                                };
+                                if n == 0 {
+                                    break;
+                                }
+
+                                bytes_read += n as u64;
+                                hasher.update(&buffer[..n]);
+                            }
+
+                            let hash_string: String = hasher.finalize().to_hex().to_string();
+                            let mut file_entry = file_entry.clone();
+                            file_entry.hash = hash_string.clone();
+                            hashmap_with_hash.entry(hash_string.clone()).or_insert_with(Vec::new);
+                            hashmap_with_hash.get_mut(hash_string.as_str()).unwrap().push(file_entry);
+                        }
+                        Some((*size, hashmap_with_hash, errors, bytes_read))
+                    })
+                    .while_some()
+                    .collect();
+
+                // Size, Vec
+
+                'main: for (size, vec_file_entry) in records_already_cached {
+                    // Check if size already exists, if exists we must to change it outside because cannot have mut and non mut reference to full_hash_results
+                    for (full_size, full_hashmap, _errors, _bytes_read) in &mut full_hash_results {
+                        if size == *full_size {
+                            for file_entry in vec_file_entry {
+                                full_hashmap.entry(file_entry.hash.clone()).or_insert_with(Vec::new);
+                                full_hashmap.get_mut(&file_entry.hash).unwrap().push(file_entry);
+                            }
+                            continue 'main;
+                        }
+                    }
+                    // Size doesn't exists add results to files
+                    let mut temp_hashmap: HashMap<String, Vec<FileEntry>> = Default::default();
+                    for file_entry in vec_file_entry {
+                        temp_hashmap.entry(file_entry.hash.clone()).or_insert_with(Vec::new);
+                        temp_hashmap.get_mut(&file_entry.hash).unwrap().push(file_entry);
+                    }
+                    full_hash_results.push((size, temp_hashmap, Vec::new(), 0));
+                }
+
+                // Must save all results to file, old loaded from file with all currently counted results
+                let mut all_results: HashMap<String, FileEntry> = Default::default();
+                for (_size, vec_file_entry) in loaded_hash_map {
+                    for file_entry in vec_file_entry {
+                        all_results.insert(file_entry.path.to_string_lossy().to_string(), file_entry);
+                    }
+                }
+                for (_size, hashmap, _errors, _bytes_read) in &full_hash_results {
+                    for vec_file_entry in hashmap.values() {
+                        for file_entry in vec_file_entry {
+                            all_results.insert(file_entry.path.to_string_lossy().to_string(), file_entry.clone());
+                        }
+                    }
+                }
+                save_hashes_to_file(&all_results, &mut self.text_messages, &self.hash_type);
+            }
+            _ => panic!("What"),
+        }
 
         // End thread which send info to gui
         progress_thread_run.store(false, Ordering::Relaxed);
@@ -1169,3 +1306,104 @@ fn delete_files(vector: &[FileEntry], delete_method: &DeleteMethod, warnings: &m
     };
     (gained_space, removed_files, failed_to_remove_files)
 }
+
+fn save_hashes_to_file(hashmap: &HashMap<String, FileEntry>, text_messages: &mut Messages, type_of_hash: &HashType) {
+    println!("Trying to save {} files", hashmap.len());
+    if let Some(proj_dirs) = ProjectDirs::from("pl", "Qarmin", "Czkawka") {
+        let cache_dir = PathBuf::from(proj_dirs.cache_dir());
+        if cache_dir.exists() {
+            if !cache_dir.is_dir() {
+                text_messages.messages.push(format!("Config dir {} is a file!", cache_dir.display()));
+                return;
+            }
+        } else if fs::create_dir_all(&cache_dir).is_err() {
+            text_messages.messages.push(format!("Cannot create config dir {}", cache_dir.display()));
+            return;
+        }
+        let cache_file = cache_dir.join(CACHE_FILE_NAME.replace(".", format!("_{:?}.", type_of_hash).as_str()));
+        let file_handler = match OpenOptions::new().truncate(true).write(true).create(true).open(&cache_file) {
+            Ok(t) => t,
+            Err(_) => {
+                text_messages.messages.push(format!("Cannot create or open cache file {}", cache_file.display()));
+                return;
+            }
+        };
+        let mut writer = BufWriter::new(file_handler);
+
+        for file_entry in hashmap.values() {
+            // Only cache bigger than 5MB files
+            if file_entry.size > 5 * 1024 * 1024 {
+                let string: String = format!("{}//{}//{}//{}", file_entry.path.display(), file_entry.size, file_entry.modified_date, file_entry.hash);
+
+                if writeln!(writer, "{}", string).is_err() {
+                    text_messages.messages.push(format!("Failed to save some data to cache file {}", cache_file.display()));
+                    return;
+                };
+            }
+        }
+    }
+}
+
+fn load_hashes_from_file(text_messages: &mut Messages, type_of_hash: &HashType) -> Option<BTreeMap<u64, Vec<FileEntry>>> {
+    if let Some(proj_dirs) = ProjectDirs::from("pl", "Qarmin", "Czkawka") {
+        let cache_dir = PathBuf::from(proj_dirs.cache_dir());
+        let cache_file = cache_dir.join(CACHE_FILE_NAME.replace(".", format!("_{:?}.", type_of_hash).as_str()));
+        let file_handler = match OpenOptions::new().read(true).open(&cache_file) {
+            Ok(t) => t,
+            Err(_) => {
+                // text_messages.messages.push(format!("Cannot find or open cache file {}", cache_file.display())); // This shouldn't be write to output
+                return None;
+            }
+        };
+
+        let reader = BufReader::new(file_handler);
+
+        let mut hashmap_loaded_entries: BTreeMap<u64, Vec<FileEntry>> = Default::default();
+
+        // Read the file line by line using the lines() iterator from std::io::BufRead.
+        for (index, line) in reader.lines().enumerate() {
+            let line = match line {
+                Ok(t) => t,
+                Err(_) => {
+                    text_messages.warnings.push(format!("Failed to load line number {} from cache file {}", index + 1, cache_file.display()));
+                    return None;
+                }
+            };
+            let uuu = line.split("//").collect::<Vec<&str>>();
+            if uuu.len() != 4 {
+                text_messages
+                    .warnings
+                    .push(format!("Found invalid data(too much or too low amount of data) in line {} - ({}) in cache file {}", index + 1, line, cache_file.display()));
+                continue;
+            }
+            // Don't load cache data if destination file not exists
+            if Path::new(uuu[0]).exists() {
+                let file_entry = FileEntry {
+                    path: PathBuf::from(uuu[0]),
+                    size: match uuu[1].parse::<u64>() {
+                        Ok(t) => t,
+                        Err(_) => {
+                            text_messages.warnings.push(format!("Found invalid size value in line {} - ({}) in cache file {}", index + 1, line, cache_file.display()));
+                            continue;
+                        }
+                    },
+                    modified_date: match uuu[2].parse::<u64>() {
+                        Ok(t) => t,
+                        Err(_) => {
+                            text_messages.warnings.push(format!("Found invalid modified date value in line {} - ({}) in cache file {}", index + 1, line, cache_file.display()));
+                            continue;
+                        }
+                    },
+                    hash: uuu[3].to_string(),
+                };
+                hashmap_loaded_entries.entry(file_entry.size).or_insert_with(Vec::new);
+                hashmap_loaded_entries.get_mut(&file_entry.size).unwrap().push(file_entry);
+            }
+        }
+
+        return Some(hashmap_loaded_entries);
+    }
+
+    text_messages.messages.push("Cannot find or open system config dir to save cache file".to_string());
+    None
+}
diff --git a/czkawka_core/src/similar_images.rs b/czkawka_core/src/similar_images.rs
index 4d1f8e3..0c2f8ea 100644
--- a/czkawka_core/src/similar_images.rs
+++ b/czkawka_core/src/similar_images.rs
@@ -326,20 +326,20 @@ impl SimilarImages {
             None => Default::default(),
         };
 
-        let mut hashes_already_counted: HashMap<String, FileEntry> = Default::default();
-        let mut hashes_to_check: HashMap<String, FileEntry> = Default::default();
+        let mut records_already_cached: HashMap<String, FileEntry> = Default::default();
+        let mut non_cached_files_to_check: HashMap<String, FileEntry> = Default::default();
         for (name, file_entry) in &self.images_to_check {
             #[allow(clippy::collapsible_if)]
             if !loaded_hash_map.contains_key(name) {
                 // If loaded data doesn't contains current image info
-                hashes_to_check.insert(name.clone(), file_entry.clone());
+                non_cached_files_to_check.insert(name.clone(), file_entry.clone());
             } else {
                 if file_entry.size != loaded_hash_map.get(name).unwrap().size || file_entry.modified_date != loaded_hash_map.get(name).unwrap().modified_date {
                     // When size or modification date of image changed, then it is clear that is different image
-                    hashes_to_check.insert(name.clone(), file_entry.clone());
+                    non_cached_files_to_check.insert(name.clone(), file_entry.clone());
                 } else {
                     // Checking may be omitted when already there is entry with same size and modification date
-                    hashes_already_counted.insert(name.clone(), loaded_hash_map.get(name).unwrap().clone());
+                    records_already_cached.insert(name.clone(), loaded_hash_map.get(name).unwrap().clone());
                 }
             }
         }
@@ -358,7 +358,7 @@ impl SimilarImages {
             let mut progress_send = progress_sender.clone();
             let progress_thread_run = progress_thread_run.clone();
             let atomic_file_counter = atomic_file_counter.clone();
-            let images_to_check = hashes_to_check.len();
+            let images_to_check = non_cached_files_to_check.len();
             progress_thread_handle = thread::spawn(move || loop {
                 progress_send
                     .try_send(ProgressData {
@@ -377,7 +377,7 @@ impl SimilarImages {
             progress_thread_handle = thread::spawn(|| {});
         }
         //// PROGRESS THREAD END
-        let mut vec_file_entry: Vec<(FileEntry, Node)> = hashes_to_check
+        let mut vec_file_entry: Vec<(FileEntry, Node)> = non_cached_files_to_check
             .par_iter()
             .map(|file_entry| {
                 atomic_file_counter.fetch_add(1, Ordering::Relaxed);
@@ -416,7 +416,7 @@ impl SimilarImages {
         let hash_map_modification = SystemTime::now();
 
         // Just connect loaded results with already calculated hashes
-        for (_name, file_entry) in hashes_already_counted {
+        for (_name, file_entry) in records_already_cached {
             vec_file_entry.push((file_entry.clone(), file_entry.hash));
         }
 
@@ -457,15 +457,15 @@ impl SimilarImages {
         // Maybe also add here progress report
 
         let mut new_vector: Vec<Vec<FileEntry>> = Vec::new();
-        let mut hashes_to_check = self.image_hashes.clone();
+        let mut non_cached_files_to_check = self.image_hashes.clone();
         for (hash, vec_file_entry) in &self.image_hashes {
             if stop_receiver.is_some() && stop_receiver.unwrap().try_recv().is_ok() {
                 return false;
             }
-            if !hashes_to_check.contains_key(hash) {
+            if !non_cached_files_to_check.contains_key(hash) {
                 continue;
             }
-            hashes_to_check.remove(hash);
+            non_cached_files_to_check.remove(hash);
 
             let vector_with_found_similar_hashes = self.bktree.find(hash, similarity).collect::<Vec<_>>();
             if vector_with_found_similar_hashes.len() == 1 && vec_file_entry.len() == 1 {
@@ -493,7 +493,7 @@ impl SimilarImages {
                     panic!("I'm not sure if same hash can have distance > 0");
                 }
 
-                if let Some(vec_file_entry) = hashes_to_check.get(*similar_hash) {
+                if let Some(vec_file_entry) = non_cached_files_to_check.get(*similar_hash) {
                     vector_of_similar_images.append(
                         &mut (vec_file_entry
                             .iter()
@@ -515,7 +515,7 @@ impl SimilarImages {
                             })
                             .collect::<Vec<_>>()),
                     );
-                    hashes_to_check.remove(*similar_hash);
+                    non_cached_files_to_check.remove(*similar_hash);
                 }
             }
             if vector_of_similar_images.len() > 1 {
diff --git a/instructions/Instruction.md b/instructions/Instruction.md
index a924f74..903cacc 100644
--- a/instructions/Instruction.md
+++ b/instructions/Instruction.md
@@ -137,17 +137,17 @@ Only some image extensions are supported, because I rely on image crate. Also so
 ## Config/Cache files
 For now Czkawka store only 2 files on disk:
 - `czkawka_gui_config.txt` - stores configuration of GUI which may be loaded at startup
-- `cache_similar_image.txt` - stores cache data and hashes which may be used later without needing to compute image hash again - DO NOT TRY TO EDIT THIS FILE MANUALLY! - editing this file may cause app crashes.
+- `cache_similar_image.txt` - stores cache data and hashes which may be used later without needing to compute image hash again - editing this file may cause app crashes.
 - `cache_broken_files.txt` - stores cache data of broken files
+- `cache_duplicates_Blake3.txt` - stores cache data of duplicated files, to not get too big performance hit when saving/loading file, only already fully hashed files bigger than 5MB are stored. Similar files with replaced `Blake3` to e.g. `SHA256` may be shown, when support for new hashes will be introduced in Czkawka.
 
-
-First file is located in this path
+Config files are located in this path
 
 Linux - `/home/username/.config/czkawka`  
 Mac - `/Users/username/Library/Application Support/pl.Qarmin.Czkawka`  
 Windows - `C:\Users\Username\AppData\Roaming\Qarmin\Czkawka\config`
 
-Second with cache here:
+Cache should be here:
 
 Linux - `/home/username/.cache/czkawka`  
 Mac - `/Users/Username/Library/Caches/pl.Qarmin.Czkawka`