Remove hashes from common results
This commit is contained in:
parent
f0f6572550
commit
7e49bc6d33
|
@ -64,7 +64,6 @@ pub struct FileEntry {
|
|||
pub path: PathBuf,
|
||||
pub size: u64,
|
||||
pub modified_date: u64,
|
||||
pub hash: String,
|
||||
}
|
||||
|
||||
impl ResultEntry for FileEntry {
|
||||
|
@ -468,7 +467,6 @@ fn process_file_in_file_mode(
|
|||
size: metadata.len(),
|
||||
modified_date: get_modified_time(&metadata, warnings, ¤t_file_name, false),
|
||||
path: current_file_name,
|
||||
hash: String::new(),
|
||||
};
|
||||
|
||||
fe_result.push(fe);
|
||||
|
@ -543,7 +541,6 @@ fn process_symlink_in_symlink_mode(
|
|||
size: metadata.len(),
|
||||
modified_date: get_modified_time(&metadata, warnings, ¤t_file_name, false),
|
||||
path: current_file_name,
|
||||
hash: String::new(),
|
||||
};
|
||||
|
||||
fe_result.push(fe);
|
||||
|
|
|
@ -6,7 +6,7 @@ use std::io::prelude::*;
|
|||
use std::io::{self, Error, ErrorKind};
|
||||
#[cfg(target_family = "unix")]
|
||||
use std::os::unix::fs::MetadataExt;
|
||||
use std::path::Path;
|
||||
use std::path::{Path, PathBuf};
|
||||
use std::sync::atomic::Ordering;
|
||||
use std::{fs, mem};
|
||||
|
||||
|
@ -15,6 +15,7 @@ use fun_time::fun_time;
|
|||
use humansize::{format_size, BINARY};
|
||||
use log::debug;
|
||||
use rayon::prelude::*;
|
||||
use serde::{Deserialize, Serialize};
|
||||
use xxhash_rust::xxh3::Xxh3;
|
||||
|
||||
use crate::common::{check_if_stop_received, delete_files_custom, prepare_thread_handler_common, send_info_and_wait_for_ending_all_threads};
|
||||
|
@ -44,6 +45,36 @@ impl HashType {
|
|||
}
|
||||
}
|
||||
|
||||
#[derive(Clone, Serialize, Deserialize, Debug, Default)]
|
||||
pub struct DuplicateEntry {
|
||||
pub path: PathBuf,
|
||||
pub modified_date: u64,
|
||||
pub size: u64,
|
||||
pub hash: String,
|
||||
}
|
||||
impl ResultEntry for DuplicateEntry {
|
||||
fn get_path(&self) -> &Path {
|
||||
&self.path
|
||||
}
|
||||
fn get_modified_date(&self) -> u64 {
|
||||
self.modified_date
|
||||
}
|
||||
fn get_size(&self) -> u64 {
|
||||
self.size
|
||||
}
|
||||
}
|
||||
|
||||
impl FileEntry {
|
||||
fn into_duplicate_entry(self) -> DuplicateEntry {
|
||||
DuplicateEntry {
|
||||
size: self.size,
|
||||
path: self.path,
|
||||
modified_date: self.modified_date,
|
||||
hash: String::new(),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Default)]
|
||||
pub struct Info {
|
||||
pub number_of_groups_by_size: usize,
|
||||
|
@ -62,21 +93,21 @@ pub struct DuplicateFinder {
|
|||
common_data: CommonToolData,
|
||||
information: Info,
|
||||
// File Size, File Entry
|
||||
files_with_identical_names: BTreeMap<String, Vec<FileEntry>>,
|
||||
files_with_identical_names: BTreeMap<String, Vec<DuplicateEntry>>,
|
||||
// File (Size, Name), File Entry
|
||||
files_with_identical_size_names: BTreeMap<(u64, String), Vec<FileEntry>>,
|
||||
files_with_identical_size_names: BTreeMap<(u64, String), Vec<DuplicateEntry>>,
|
||||
// File Size, File Entry
|
||||
files_with_identical_size: BTreeMap<u64, Vec<FileEntry>>,
|
||||
files_with_identical_size: BTreeMap<u64, Vec<DuplicateEntry>>,
|
||||
// File Size, next grouped by file size, next grouped by hash
|
||||
files_with_identical_hashes: BTreeMap<u64, Vec<Vec<FileEntry>>>,
|
||||
files_with_identical_hashes: BTreeMap<u64, Vec<Vec<DuplicateEntry>>>,
|
||||
// File Size, File Entry
|
||||
files_with_identical_names_referenced: BTreeMap<String, (FileEntry, Vec<FileEntry>)>,
|
||||
files_with_identical_names_referenced: BTreeMap<String, (DuplicateEntry, Vec<DuplicateEntry>)>,
|
||||
// File (Size, Name), File Entry
|
||||
files_with_identical_size_names_referenced: BTreeMap<(u64, String), (FileEntry, Vec<FileEntry>)>,
|
||||
files_with_identical_size_names_referenced: BTreeMap<(u64, String), (DuplicateEntry, Vec<DuplicateEntry>)>,
|
||||
// File Size, File Entry
|
||||
files_with_identical_size_referenced: BTreeMap<u64, (FileEntry, Vec<FileEntry>)>,
|
||||
files_with_identical_size_referenced: BTreeMap<u64, (DuplicateEntry, Vec<DuplicateEntry>)>,
|
||||
// File Size, next grouped by file size, next grouped by hash
|
||||
files_with_identical_hashes_referenced: BTreeMap<u64, Vec<(FileEntry, Vec<FileEntry>)>>,
|
||||
files_with_identical_hashes_referenced: BTreeMap<u64, Vec<(DuplicateEntry, Vec<DuplicateEntry>)>>,
|
||||
check_method: CheckingMethod,
|
||||
hash_type: HashType,
|
||||
ignore_hard_links: bool,
|
||||
|
@ -171,7 +202,16 @@ impl DuplicateFinder {
|
|||
self.common_data.text_messages.warnings.extend(warnings);
|
||||
|
||||
// Create new BTreeMap without single size entries(files have not duplicates)
|
||||
self.files_with_identical_names = grouped_file_entries.into_iter().filter(|(_name, vector)| vector.len() > 1).collect();
|
||||
self.files_with_identical_names = grouped_file_entries
|
||||
.into_iter()
|
||||
.filter_map(|(name, vector)| {
|
||||
if vector.len() > 1 {
|
||||
Some((name, vector.into_iter().map(FileEntry::into_duplicate_entry).collect()))
|
||||
} else {
|
||||
None
|
||||
}
|
||||
})
|
||||
.collect();
|
||||
|
||||
// Reference - only use in size, because later hash will be counted differently
|
||||
if self.common_data.use_reference_folders {
|
||||
|
@ -188,7 +228,7 @@ impl DuplicateFinder {
|
|||
Some((files_from_referenced_folders.pop().unwrap(), normal_files))
|
||||
}
|
||||
})
|
||||
.collect::<Vec<(FileEntry, Vec<FileEntry>)>>();
|
||||
.collect::<Vec<(DuplicateEntry, Vec<DuplicateEntry>)>>();
|
||||
for (fe, vec_fe) in vec {
|
||||
self.files_with_identical_names_referenced.insert(fe.path.to_string_lossy().to_string(), (fe, vec_fe));
|
||||
}
|
||||
|
@ -236,7 +276,16 @@ impl DuplicateFinder {
|
|||
DirTraversalResult::SuccessFiles { grouped_file_entries, warnings } => {
|
||||
self.common_data.text_messages.warnings.extend(warnings);
|
||||
|
||||
self.files_with_identical_size_names = grouped_file_entries.into_iter().filter(|(_name, vector)| vector.len() > 1).collect();
|
||||
self.files_with_identical_size_names = grouped_file_entries
|
||||
.into_iter()
|
||||
.filter_map(|(size_name, vector)| {
|
||||
if vector.len() > 1 {
|
||||
Some((size_name, vector.into_iter().map(FileEntry::into_duplicate_entry).collect()))
|
||||
} else {
|
||||
None
|
||||
}
|
||||
})
|
||||
.collect();
|
||||
|
||||
// Reference - only use in size, because later hash will be counted differently
|
||||
if self.common_data.use_reference_folders {
|
||||
|
@ -253,7 +302,7 @@ impl DuplicateFinder {
|
|||
Some((files_from_referenced_folders.pop().unwrap(), normal_files))
|
||||
}
|
||||
})
|
||||
.collect::<Vec<(FileEntry, Vec<FileEntry>)>>();
|
||||
.collect::<Vec<(DuplicateEntry, Vec<DuplicateEntry>)>>();
|
||||
for (fe, vec_fe) in vec {
|
||||
self.files_with_identical_size_names_referenced
|
||||
.insert((fe.size, fe.path.to_string_lossy().to_string()), (fe, vec_fe));
|
||||
|
@ -313,7 +362,8 @@ impl DuplicateFinder {
|
|||
let vector = if self.ignore_hard_links { filter_hard_links(&vec) } else { vec };
|
||||
|
||||
if vector.len() > 1 {
|
||||
self.files_with_identical_size.insert(size, vector);
|
||||
self.files_with_identical_size
|
||||
.insert(size, vector.into_iter().map(FileEntry::into_duplicate_entry).collect());
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -367,7 +417,7 @@ impl DuplicateFinder {
|
|||
Some((files_from_referenced_folders.pop().unwrap(), normal_files))
|
||||
}
|
||||
})
|
||||
.collect::<Vec<(FileEntry, Vec<FileEntry>)>>();
|
||||
.collect::<Vec<(DuplicateEntry, Vec<DuplicateEntry>)>>();
|
||||
for (fe, vec_fe) in vec {
|
||||
self.files_with_identical_size_referenced.insert(fe.size, (fe, vec_fe));
|
||||
}
|
||||
|
@ -375,17 +425,17 @@ impl DuplicateFinder {
|
|||
}
|
||||
|
||||
#[fun_time(message = "prehash_load_cache_at_start", level = "debug")]
|
||||
fn prehash_load_cache_at_start(&mut self) -> (BTreeMap<u64, Vec<FileEntry>>, BTreeMap<u64, Vec<FileEntry>>, BTreeMap<u64, Vec<FileEntry>>) {
|
||||
fn prehash_load_cache_at_start(&mut self) -> (BTreeMap<u64, Vec<DuplicateEntry>>, BTreeMap<u64, Vec<DuplicateEntry>>, BTreeMap<u64, Vec<DuplicateEntry>>) {
|
||||
// Cache algorithm
|
||||
// - Load data from cache
|
||||
// - Convert from BT<u64,Vec<FileEntry>> to BT<String,FileEntry>
|
||||
// - Convert from BT<u64,Vec<DuplicateEntry>> to BT<String,DuplicateEntry>
|
||||
// - Save to proper values
|
||||
let loaded_hash_map;
|
||||
let mut records_already_cached: BTreeMap<u64, Vec<FileEntry>> = Default::default();
|
||||
let mut non_cached_files_to_check: BTreeMap<u64, Vec<FileEntry>> = Default::default();
|
||||
let mut records_already_cached: BTreeMap<u64, Vec<DuplicateEntry>> = Default::default();
|
||||
let mut non_cached_files_to_check: BTreeMap<u64, Vec<DuplicateEntry>> = Default::default();
|
||||
|
||||
if self.use_prehash_cache {
|
||||
let (messages, loaded_items) = load_cache_from_file_generalized_by_size::<FileEntry>(
|
||||
let (messages, loaded_items) = load_cache_from_file_generalized_by_size::<DuplicateEntry>(
|
||||
&get_duplicate_cache_file(&self.hash_type, true),
|
||||
self.get_delete_outdated_cache(),
|
||||
&self.files_with_identical_size,
|
||||
|
@ -397,7 +447,7 @@ impl DuplicateFinder {
|
|||
for (size, mut vec_file_entry) in mem::take(&mut self.files_with_identical_size) {
|
||||
if let Some(cached_vec_file_entry) = loaded_hash_map.get(&size) {
|
||||
// TODO maybe hashmap is not needed when using < 4 elements
|
||||
let mut cached_path_entries: HashMap<&Path, FileEntry> = HashMap::new();
|
||||
let mut cached_path_entries: HashMap<&Path, DuplicateEntry> = HashMap::new();
|
||||
for file_entry in cached_vec_file_entry {
|
||||
cached_path_entries.insert(&file_entry.path, file_entry.clone());
|
||||
}
|
||||
|
@ -428,10 +478,14 @@ impl DuplicateFinder {
|
|||
}
|
||||
|
||||
#[fun_time(message = "prehash_save_cache_at_exit", level = "debug")]
|
||||
fn prehash_save_cache_at_exit(&mut self, loaded_hash_map: BTreeMap<u64, Vec<FileEntry>>, pre_hash_results: &Vec<(u64, BTreeMap<String, Vec<FileEntry>>, Vec<String>)>) {
|
||||
fn prehash_save_cache_at_exit(
|
||||
&mut self,
|
||||
loaded_hash_map: BTreeMap<u64, Vec<DuplicateEntry>>,
|
||||
pre_hash_results: &Vec<(u64, BTreeMap<String, Vec<DuplicateEntry>>, Vec<String>)>,
|
||||
) {
|
||||
if self.use_prehash_cache {
|
||||
// All results = records already cached + computed results
|
||||
let mut save_cache_to_hashmap: BTreeMap<String, FileEntry> = Default::default();
|
||||
let mut save_cache_to_hashmap: BTreeMap<String, DuplicateEntry> = Default::default();
|
||||
|
||||
for (size, vec_file_entry) in loaded_hash_map {
|
||||
if size >= self.minimal_prehash_cache_file_size {
|
||||
|
@ -466,7 +520,7 @@ impl DuplicateFinder {
|
|||
&mut self,
|
||||
stop_receiver: Option<&Receiver<()>>,
|
||||
progress_sender: Option<&Sender<ProgressData>>,
|
||||
pre_checked_map: &mut BTreeMap<u64, Vec<FileEntry>>,
|
||||
pre_checked_map: &mut BTreeMap<u64, Vec<DuplicateEntry>>,
|
||||
) -> Option<()> {
|
||||
let check_type = self.hash_type;
|
||||
let (progress_thread_handle, progress_thread_run, _atomic_counter, _check_was_stopped) =
|
||||
|
@ -489,10 +543,10 @@ impl DuplicateFinder {
|
|||
|
||||
debug!("Starting calculating prehash");
|
||||
#[allow(clippy::type_complexity)]
|
||||
let pre_hash_results: Vec<(u64, BTreeMap<String, Vec<FileEntry>>, Vec<String>)> = non_cached_files_to_check
|
||||
let pre_hash_results: Vec<(u64, BTreeMap<String, Vec<DuplicateEntry>>, Vec<String>)> = non_cached_files_to_check
|
||||
.into_par_iter()
|
||||
.map(|(size, vec_file_entry)| {
|
||||
let mut hashmap_with_hash: BTreeMap<String, Vec<FileEntry>> = Default::default();
|
||||
let mut hashmap_with_hash: BTreeMap<String, Vec<DuplicateEntry>> = Default::default();
|
||||
let mut errors: Vec<String> = Vec::new();
|
||||
let mut buffer = [0u8; 1024 * 32];
|
||||
|
||||
|
@ -552,16 +606,16 @@ impl DuplicateFinder {
|
|||
#[fun_time(message = "full_hashing_load_cache_at_start", level = "debug")]
|
||||
fn full_hashing_load_cache_at_start(
|
||||
&mut self,
|
||||
mut pre_checked_map: BTreeMap<u64, Vec<FileEntry>>,
|
||||
) -> (BTreeMap<u64, Vec<FileEntry>>, BTreeMap<u64, Vec<FileEntry>>, BTreeMap<u64, Vec<FileEntry>>) {
|
||||
mut pre_checked_map: BTreeMap<u64, Vec<DuplicateEntry>>,
|
||||
) -> (BTreeMap<u64, Vec<DuplicateEntry>>, BTreeMap<u64, Vec<DuplicateEntry>>, BTreeMap<u64, Vec<DuplicateEntry>>) {
|
||||
let loaded_hash_map;
|
||||
let mut records_already_cached: BTreeMap<u64, Vec<FileEntry>> = Default::default();
|
||||
let mut non_cached_files_to_check: BTreeMap<u64, Vec<FileEntry>> = Default::default();
|
||||
let mut records_already_cached: BTreeMap<u64, Vec<DuplicateEntry>> = Default::default();
|
||||
let mut non_cached_files_to_check: BTreeMap<u64, Vec<DuplicateEntry>> = Default::default();
|
||||
|
||||
if self.common_data.use_cache {
|
||||
debug!("full_hashing_load_cache_at_start - using cache");
|
||||
let (messages, loaded_items) =
|
||||
load_cache_from_file_generalized_by_size::<FileEntry>(&get_duplicate_cache_file(&self.hash_type, false), self.get_delete_outdated_cache(), &pre_checked_map);
|
||||
load_cache_from_file_generalized_by_size::<DuplicateEntry>(&get_duplicate_cache_file(&self.hash_type, false), self.get_delete_outdated_cache(), &pre_checked_map);
|
||||
self.get_text_messages_mut().extend_with_another_messages(messages);
|
||||
loaded_hash_map = loaded_items.unwrap_or_default();
|
||||
|
||||
|
@ -569,7 +623,7 @@ impl DuplicateFinder {
|
|||
for (size, mut vec_file_entry) in pre_checked_map {
|
||||
if let Some(cached_vec_file_entry) = loaded_hash_map.get(&size) {
|
||||
// TODO maybe hashmap is not needed when using < 4 elements
|
||||
let mut cached_path_entries: HashMap<&Path, FileEntry> = HashMap::new();
|
||||
let mut cached_path_entries: HashMap<&Path, DuplicateEntry> = HashMap::new();
|
||||
for file_entry in cached_vec_file_entry {
|
||||
cached_path_entries.insert(&file_entry.path, file_entry.clone());
|
||||
}
|
||||
|
@ -603,9 +657,9 @@ impl DuplicateFinder {
|
|||
#[fun_time(message = "full_hashing_save_cache_at_exit", level = "debug")]
|
||||
fn full_hashing_save_cache_at_exit(
|
||||
&mut self,
|
||||
records_already_cached: BTreeMap<u64, Vec<FileEntry>>,
|
||||
full_hash_results: &mut Vec<(u64, BTreeMap<String, Vec<FileEntry>>, Vec<String>)>,
|
||||
loaded_hash_map: BTreeMap<u64, Vec<FileEntry>>,
|
||||
records_already_cached: BTreeMap<u64, Vec<DuplicateEntry>>,
|
||||
full_hash_results: &mut Vec<(u64, BTreeMap<String, Vec<DuplicateEntry>>, Vec<String>)>,
|
||||
loaded_hash_map: BTreeMap<u64, Vec<DuplicateEntry>>,
|
||||
) {
|
||||
if !self.common_data.use_cache {
|
||||
return;
|
||||
|
@ -621,7 +675,7 @@ impl DuplicateFinder {
|
|||
}
|
||||
}
|
||||
// Size doesn't exists add results to files
|
||||
let mut temp_hashmap: BTreeMap<String, Vec<FileEntry>> = Default::default();
|
||||
let mut temp_hashmap: BTreeMap<String, Vec<DuplicateEntry>> = Default::default();
|
||||
for file_entry in vec_file_entry {
|
||||
temp_hashmap.entry(file_entry.hash.clone()).or_default().push(file_entry);
|
||||
}
|
||||
|
@ -629,7 +683,7 @@ impl DuplicateFinder {
|
|||
}
|
||||
|
||||
// Must save all results to file, old loaded from file with all currently counted results
|
||||
let mut all_results: BTreeMap<String, FileEntry> = Default::default();
|
||||
let mut all_results: BTreeMap<String, DuplicateEntry> = Default::default();
|
||||
for (_size, vec_file_entry) in loaded_hash_map {
|
||||
for file_entry in vec_file_entry {
|
||||
all_results.insert(file_entry.path.to_string_lossy().to_string(), file_entry);
|
||||
|
@ -653,7 +707,12 @@ impl DuplicateFinder {
|
|||
}
|
||||
|
||||
#[fun_time(message = "full_hashing", level = "debug")]
|
||||
fn full_hashing(&mut self, stop_receiver: Option<&Receiver<()>>, progress_sender: Option<&Sender<ProgressData>>, pre_checked_map: BTreeMap<u64, Vec<FileEntry>>) -> Option<()> {
|
||||
fn full_hashing(
|
||||
&mut self,
|
||||
stop_receiver: Option<&Receiver<()>>,
|
||||
progress_sender: Option<&Sender<ProgressData>>,
|
||||
pre_checked_map: BTreeMap<u64, Vec<DuplicateEntry>>,
|
||||
) -> Option<()> {
|
||||
let (progress_thread_handle, progress_thread_run, _atomic_counter, _check_was_stopped) =
|
||||
prepare_thread_handler_common(progress_sender, 4, MAX_STAGE, 0, self.check_method, self.common_data.tool_type);
|
||||
|
||||
|
@ -675,10 +734,10 @@ impl DuplicateFinder {
|
|||
|
||||
let check_type = self.hash_type;
|
||||
debug!("Starting full hashing of {} files", non_cached_files_to_check.values().map(Vec::len).sum::<usize>());
|
||||
let mut full_hash_results: Vec<(u64, BTreeMap<String, Vec<FileEntry>>, Vec<String>)> = non_cached_files_to_check
|
||||
let mut full_hash_results: Vec<(u64, BTreeMap<String, Vec<DuplicateEntry>>, Vec<String>)> = non_cached_files_to_check
|
||||
.into_par_iter()
|
||||
.map(|(size, vec_file_entry)| {
|
||||
let mut hashmap_with_hash: BTreeMap<String, Vec<FileEntry>> = Default::default();
|
||||
let mut hashmap_with_hash: BTreeMap<String, Vec<DuplicateEntry>> = Default::default();
|
||||
let mut errors: Vec<String> = Vec::new();
|
||||
let mut buffer = [0u8; 1024 * 16];
|
||||
|
||||
|
@ -748,7 +807,7 @@ impl DuplicateFinder {
|
|||
Some(all_results_with_same_size)
|
||||
}
|
||||
})
|
||||
.collect::<Vec<Vec<(FileEntry, Vec<FileEntry>)>>>();
|
||||
.collect::<Vec<Vec<(DuplicateEntry, Vec<DuplicateEntry>)>>>();
|
||||
for vec_of_vec in vec {
|
||||
self.files_with_identical_hashes_referenced.insert(vec_of_vec[0].0.size, vec_of_vec);
|
||||
}
|
||||
|
@ -777,7 +836,7 @@ impl DuplicateFinder {
|
|||
fn check_files_hash(&mut self, stop_receiver: Option<&Receiver<()>>, progress_sender: Option<&Sender<ProgressData>>) -> bool {
|
||||
assert_eq!(self.check_method, CheckingMethod::Hash);
|
||||
|
||||
let mut pre_checked_map: BTreeMap<u64, Vec<FileEntry>> = Default::default();
|
||||
let mut pre_checked_map: BTreeMap<u64, Vec<DuplicateEntry>> = Default::default();
|
||||
let ret = self.prehashing(stop_receiver, progress_sender, &mut pre_checked_map);
|
||||
if ret.is_none() {
|
||||
return false;
|
||||
|
@ -813,7 +872,7 @@ impl DuplicateFinder {
|
|||
}
|
||||
CheckingMethod::Hash => {
|
||||
for vec_files in self.files_with_identical_hashes.values() {
|
||||
let vev: Vec<&Vec<FileEntry>> = vec_files.iter().collect::<Vec<_>>();
|
||||
let vev: Vec<&Vec<DuplicateEntry>> = vec_files.iter().collect::<Vec<_>>();
|
||||
delete_files_custom(&vev, &self.common_data.delete_method, &mut self.common_data.text_messages, self.common_data.dry_run);
|
||||
}
|
||||
}
|
||||
|
@ -843,7 +902,7 @@ impl DuplicateFinder {
|
|||
self.minimal_prehash_cache_file_size = minimal_prehash_cache_file_size;
|
||||
}
|
||||
|
||||
pub const fn get_files_sorted_by_names(&self) -> &BTreeMap<String, Vec<FileEntry>> {
|
||||
pub const fn get_files_sorted_by_names(&self) -> &BTreeMap<String, Vec<DuplicateEntry>> {
|
||||
&self.files_with_identical_names
|
||||
}
|
||||
|
||||
|
@ -851,15 +910,15 @@ impl DuplicateFinder {
|
|||
self.use_prehash_cache = use_prehash_cache;
|
||||
}
|
||||
|
||||
pub const fn get_files_sorted_by_size(&self) -> &BTreeMap<u64, Vec<FileEntry>> {
|
||||
pub const fn get_files_sorted_by_size(&self) -> &BTreeMap<u64, Vec<DuplicateEntry>> {
|
||||
&self.files_with_identical_size
|
||||
}
|
||||
|
||||
pub const fn get_files_sorted_by_size_name(&self) -> &BTreeMap<(u64, String), Vec<FileEntry>> {
|
||||
pub const fn get_files_sorted_by_size_name(&self) -> &BTreeMap<(u64, String), Vec<DuplicateEntry>> {
|
||||
&self.files_with_identical_size_names
|
||||
}
|
||||
|
||||
pub const fn get_files_sorted_by_hash(&self) -> &BTreeMap<u64, Vec<Vec<FileEntry>>> {
|
||||
pub const fn get_files_sorted_by_hash(&self) -> &BTreeMap<u64, Vec<Vec<DuplicateEntry>>> {
|
||||
&self.files_with_identical_hashes
|
||||
}
|
||||
|
||||
|
@ -887,19 +946,19 @@ impl DuplicateFinder {
|
|||
self.common_data.use_reference_folders
|
||||
}
|
||||
|
||||
pub fn get_files_with_identical_hashes_referenced(&self) -> &BTreeMap<u64, Vec<(FileEntry, Vec<FileEntry>)>> {
|
||||
pub fn get_files_with_identical_hashes_referenced(&self) -> &BTreeMap<u64, Vec<(DuplicateEntry, Vec<DuplicateEntry>)>> {
|
||||
&self.files_with_identical_hashes_referenced
|
||||
}
|
||||
|
||||
pub fn get_files_with_identical_name_referenced(&self) -> &BTreeMap<String, (FileEntry, Vec<FileEntry>)> {
|
||||
pub fn get_files_with_identical_name_referenced(&self) -> &BTreeMap<String, (DuplicateEntry, Vec<DuplicateEntry>)> {
|
||||
&self.files_with_identical_names_referenced
|
||||
}
|
||||
|
||||
pub fn get_files_with_identical_size_referenced(&self) -> &BTreeMap<u64, (FileEntry, Vec<FileEntry>)> {
|
||||
pub fn get_files_with_identical_size_referenced(&self) -> &BTreeMap<u64, (DuplicateEntry, Vec<DuplicateEntry>)> {
|
||||
&self.files_with_identical_size_referenced
|
||||
}
|
||||
|
||||
pub fn get_files_with_identical_size_names_referenced(&self) -> &BTreeMap<(u64, String), (FileEntry, Vec<FileEntry>)> {
|
||||
pub fn get_files_with_identical_size_names_referenced(&self) -> &BTreeMap<(u64, String), (DuplicateEntry, Vec<DuplicateEntry>)> {
|
||||
&self.files_with_identical_size_names_referenced
|
||||
}
|
||||
}
|
||||
|
@ -1195,7 +1254,7 @@ pub trait MyHasher {
|
|||
fn finalize(&self) -> String;
|
||||
}
|
||||
|
||||
fn hash_calculation(buffer: &mut [u8], file_entry: &FileEntry, hash_type: &HashType, limit: u64) -> Result<String, String> {
|
||||
fn hash_calculation(buffer: &mut [u8], file_entry: &DuplicateEntry, hash_type: &HashType, limit: u64) -> Result<String, String> {
|
||||
let mut file_handler = match File::open(&file_entry.path) {
|
||||
Ok(t) => t,
|
||||
Err(e) => return Err(format!("Unable to check hash of file {:?}, reason {e}", file_entry.path)),
|
||||
|
@ -1355,7 +1414,7 @@ mod tests {
|
|||
let src = dir.path().join("a");
|
||||
let mut file = File::create(&src)?;
|
||||
file.write_all(b"aa")?;
|
||||
let e = FileEntry { path: src, ..Default::default() };
|
||||
let e = DuplicateEntry { path: src, ..Default::default() };
|
||||
let r = hash_calculation(&mut buf, &e, &HashType::Blake3, 0).unwrap();
|
||||
assert!(!r.is_empty());
|
||||
Ok(())
|
||||
|
@ -1368,7 +1427,7 @@ mod tests {
|
|||
let src = dir.path().join("a");
|
||||
let mut file = File::create(&src)?;
|
||||
file.write_all(b"aa")?;
|
||||
let e = FileEntry { path: src, ..Default::default() };
|
||||
let e = DuplicateEntry { path: src, ..Default::default() };
|
||||
let r1 = hash_calculation(&mut buf, &e, &HashType::Blake3, 1).unwrap();
|
||||
let r2 = hash_calculation(&mut buf, &e, &HashType::Blake3, 2).unwrap();
|
||||
let r3 = hash_calculation(&mut buf, &e, &HashType::Blake3, u64::MAX).unwrap();
|
||||
|
@ -1382,7 +1441,7 @@ mod tests {
|
|||
let dir = tempfile::Builder::new().tempdir()?;
|
||||
let mut buf = [0u8; 1 << 10];
|
||||
let src = dir.path().join("a");
|
||||
let e = FileEntry { path: src, ..Default::default() };
|
||||
let e = DuplicateEntry { path: src, ..Default::default() };
|
||||
let r = hash_calculation(&mut buf, &e, &HashType::Blake3, 0).unwrap_err();
|
||||
assert!(!r.is_empty());
|
||||
Ok(())
|
||||
|
|
|
@ -13,7 +13,7 @@ use czkawka_core::bad_extensions::BadExtensions;
|
|||
use czkawka_core::big_file::BigFile;
|
||||
use czkawka_core::broken_files::BrokenFiles;
|
||||
use czkawka_core::common::{split_path, split_path_compare};
|
||||
use czkawka_core::common_dir_traversal::{CheckingMethod, FileEntry};
|
||||
use czkawka_core::common_dir_traversal::CheckingMethod;
|
||||
use czkawka_core::common_tool::CommonData;
|
||||
use czkawka_core::common_traits::ResultEntry;
|
||||
use czkawka_core::duplicate::DuplicateFinder;
|
||||
|
@ -23,8 +23,7 @@ use czkawka_core::invalid_symlinks::InvalidSymlinks;
|
|||
use czkawka_core::localizer_core::generate_translation_hashmap;
|
||||
use czkawka_core::same_music::{MusicSimilarity, SameMusic};
|
||||
use czkawka_core::similar_images;
|
||||
use czkawka_core::similar_images::ImagesEntry;
|
||||
use czkawka_core::similar_images::SimilarImages;
|
||||
use czkawka_core::similar_images::{ImagesEntry, SimilarImages};
|
||||
use czkawka_core::similar_videos::SimilarVideos;
|
||||
use czkawka_core::temporary::Temporary;
|
||||
|
||||
|
@ -1327,13 +1326,16 @@ fn computer_duplicate_finder(
|
|||
}
|
||||
}
|
||||
|
||||
fn vector_sort_unstable_entry_by_path(vector: &[FileEntry]) -> Vec<FileEntry> {
|
||||
fn vector_sort_unstable_entry_by_path<T>(vector: &[T]) -> Vec<T>
|
||||
where
|
||||
T: ResultEntry + Clone,
|
||||
{
|
||||
if vector.len() >= 2 {
|
||||
let mut vector = vector.to_owned();
|
||||
vector.sort_unstable_by(|a, b| split_path_compare(a.path.as_path(), b.path.as_path()));
|
||||
let mut vector = vector.to_vec();
|
||||
vector.sort_unstable_by(|a, b| split_path_compare(a.get_path(), b.get_path()));
|
||||
vector
|
||||
} else {
|
||||
vector.to_owned()
|
||||
vector.to_vec()
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
@ -7,8 +7,7 @@ use czkawka_core::common_dir_traversal::{FileEntry, ProgressData};
|
|||
use czkawka_core::common_tool::CommonData;
|
||||
use czkawka_core::common_traits::ResultEntry;
|
||||
use czkawka_core::empty_files::EmptyFiles;
|
||||
use czkawka_core::empty_folder::EmptyFolder;
|
||||
use czkawka_core::empty_folder::FolderEntry;
|
||||
use czkawka_core::empty_folder::{EmptyFolder, FolderEntry};
|
||||
use czkawka_core::similar_images;
|
||||
use czkawka_core::similar_images::SimilarImages;
|
||||
use humansize::{format_size, BINARY};
|
||||
|
|
Loading…
Reference in a new issue