Implementing Size+Name method
This commit is contained in:
parent
329fc8f93d
commit
8a54a5e3c6
|
@ -11,7 +11,7 @@ use std::os::unix::fs::MetadataExt;
|
||||||
use std::path::{Path, PathBuf};
|
use std::path::{Path, PathBuf};
|
||||||
use std::sync::atomic::{AtomicBool, AtomicUsize, Ordering};
|
use std::sync::atomic::{AtomicBool, AtomicUsize, Ordering};
|
||||||
use std::sync::Arc;
|
use std::sync::Arc;
|
||||||
use std::thread::sleep;
|
use std::thread::{sleep, JoinHandle};
|
||||||
use std::time::{Duration, SystemTime};
|
use std::time::{Duration, SystemTime};
|
||||||
use std::{fs, mem, thread};
|
use std::{fs, mem, thread};
|
||||||
|
|
||||||
|
@ -148,24 +148,24 @@ impl DuplicateFinder {
|
||||||
|
|
||||||
match self.check_method {
|
match self.check_method {
|
||||||
CheckingMethod::Name => {
|
CheckingMethod::Name => {
|
||||||
if !self.check_files_name(stop_receiver, progress_sender) {
|
self.stopped_search = !self.check_files_name(stop_receiver, progress_sender);
|
||||||
self.stopped_search = true;
|
if self.stopped_search {
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
CheckingMethod::Size => {
|
CheckingMethod::Size => {
|
||||||
if !self.check_files_size(stop_receiver, progress_sender) {
|
self.stopped_search = !self.check_files_size(stop_receiver, progress_sender);
|
||||||
self.stopped_search = true;
|
if self.stopped_search {
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
CheckingMethod::Hash => {
|
CheckingMethod::Hash => {
|
||||||
if !self.check_files_size(stop_receiver, progress_sender) {
|
self.stopped_search = !self.check_files_size(stop_receiver, progress_sender);
|
||||||
self.stopped_search = true;
|
if self.stopped_search {
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
if !self.check_files_hash(stop_receiver, progress_sender) {
|
self.stopped_search = !self.check_files_hash(stop_receiver, progress_sender);
|
||||||
self.stopped_search = true;
|
if self.stopped_search {
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -459,49 +459,8 @@ impl DuplicateFinder {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// Reference - only use in size, because later hash will be counted differently
|
self.check_references_for_size();
|
||||||
if self.use_reference_folders && self.check_method == CheckingMethod::Size {
|
self.calculate_size_stats();
|
||||||
let mut btree_map = Default::default();
|
|
||||||
mem::swap(&mut self.files_with_identical_size, &mut btree_map);
|
|
||||||
let reference_directories = self.directories.reference_directories.clone();
|
|
||||||
let vec = btree_map
|
|
||||||
.into_iter()
|
|
||||||
.filter_map(|(_size, vec_file_entry)| {
|
|
||||||
let mut files_from_referenced_folders = Vec::new();
|
|
||||||
let mut normal_files = Vec::new();
|
|
||||||
for file_entry in vec_file_entry {
|
|
||||||
if reference_directories.iter().any(|e| file_entry.path.starts_with(e)) {
|
|
||||||
files_from_referenced_folders.push(file_entry);
|
|
||||||
} else {
|
|
||||||
normal_files.push(file_entry);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
if files_from_referenced_folders.is_empty() || normal_files.is_empty() {
|
|
||||||
None
|
|
||||||
} else {
|
|
||||||
Some((files_from_referenced_folders.pop().unwrap(), normal_files))
|
|
||||||
}
|
|
||||||
})
|
|
||||||
.collect::<Vec<(FileEntry, Vec<FileEntry>)>>();
|
|
||||||
for (fe, vec_fe) in vec {
|
|
||||||
self.files_with_identical_size_referenced.insert(fe.size, (fe, vec_fe));
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
if self.use_reference_folders {
|
|
||||||
for (size, (_fe, vector)) in &self.files_with_identical_size_referenced {
|
|
||||||
self.information.number_of_duplicated_files_by_size += vector.len();
|
|
||||||
self.information.number_of_groups_by_size += 1;
|
|
||||||
self.information.lost_space_by_size += (vector.len() as u64) * size;
|
|
||||||
}
|
|
||||||
} else {
|
|
||||||
for (size, vector) in &self.files_with_identical_size {
|
|
||||||
self.information.number_of_duplicated_files_by_size += vector.len() - 1;
|
|
||||||
self.information.number_of_groups_by_size += 1;
|
|
||||||
self.information.lost_space_by_size += (vector.len() as u64 - 1) * size;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
Common::print_time(start_time, SystemTime::now(), "check_files_size");
|
Common::print_time(start_time, SystemTime::now(), "check_files_size");
|
||||||
true
|
true
|
||||||
|
@ -513,6 +472,90 @@ impl DuplicateFinder {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
fn calculate_size_stats(&mut self) {
|
||||||
|
if self.use_reference_folders {
|
||||||
|
for (size, (_fe, vector)) in &self.files_with_identical_size_referenced {
|
||||||
|
self.information.number_of_duplicated_files_by_size += vector.len();
|
||||||
|
self.information.number_of_groups_by_size += 1;
|
||||||
|
self.information.lost_space_by_size += (vector.len() as u64) * size;
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
for (size, vector) in &self.files_with_identical_size {
|
||||||
|
self.information.number_of_duplicated_files_by_size += vector.len() - 1;
|
||||||
|
self.information.number_of_groups_by_size += 1;
|
||||||
|
self.information.lost_space_by_size += (vector.len() as u64 - 1) * size;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/// This step check for references, only when checking for size.
|
||||||
|
/// This is needed, because later reference folders looks for hashes, not size
|
||||||
|
fn check_references_for_size(&mut self) {
|
||||||
|
if self.use_reference_folders && self.check_method == CheckingMethod::Size {
|
||||||
|
let mut btree_map = Default::default();
|
||||||
|
mem::swap(&mut self.files_with_identical_size, &mut btree_map);
|
||||||
|
let reference_directories = self.directories.reference_directories.clone();
|
||||||
|
let vec = btree_map
|
||||||
|
.into_iter()
|
||||||
|
.filter_map(|(_size, vec_file_entry)| {
|
||||||
|
let mut files_from_referenced_folders = Vec::new();
|
||||||
|
let mut normal_files = Vec::new();
|
||||||
|
for file_entry in vec_file_entry {
|
||||||
|
if reference_directories.iter().any(|e| file_entry.path.starts_with(e)) {
|
||||||
|
files_from_referenced_folders.push(file_entry);
|
||||||
|
} else {
|
||||||
|
normal_files.push(file_entry);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if files_from_referenced_folders.is_empty() || normal_files.is_empty() {
|
||||||
|
None
|
||||||
|
} else {
|
||||||
|
Some((files_from_referenced_folders.pop().unwrap(), normal_files))
|
||||||
|
}
|
||||||
|
})
|
||||||
|
.collect::<Vec<(FileEntry, Vec<FileEntry>)>>();
|
||||||
|
for (fe, vec_fe) in vec {
|
||||||
|
self.files_with_identical_size_referenced.insert(fe.size, (fe, vec_fe));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// TODO Generalize this if possible
|
||||||
|
fn prepare_hash_thread_handler(
|
||||||
|
&self,
|
||||||
|
progress_sender: Option<&futures::channel::mpsc::UnboundedSender<ProgressData>>,
|
||||||
|
progress_thread_run: Arc<AtomicBool>,
|
||||||
|
atomic_counter: Arc<AtomicUsize>,
|
||||||
|
current_stage: u8,
|
||||||
|
max_stage: u8,
|
||||||
|
max_value: usize,
|
||||||
|
) -> JoinHandle<()> {
|
||||||
|
if let Some(progress_sender) = progress_sender {
|
||||||
|
let progress_send = progress_sender.clone();
|
||||||
|
let progress_thread_run = progress_thread_run;
|
||||||
|
let atomic_counter = atomic_counter;
|
||||||
|
let checking_method = self.check_method;
|
||||||
|
thread::spawn(move || loop {
|
||||||
|
progress_send
|
||||||
|
.unbounded_send(ProgressData {
|
||||||
|
checking_method,
|
||||||
|
current_stage,
|
||||||
|
max_stage,
|
||||||
|
entries_checked: atomic_counter.load(Ordering::Relaxed),
|
||||||
|
entries_to_check: max_value,
|
||||||
|
})
|
||||||
|
.unwrap();
|
||||||
|
if !progress_thread_run.load(Ordering::Relaxed) {
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
sleep(Duration::from_millis(LOOP_DURATION as u64));
|
||||||
|
})
|
||||||
|
} else {
|
||||||
|
thread::spawn(|| {})
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
/// The slowest checking type, which must be applied after checking for size
|
/// The slowest checking type, which must be applied after checking for size
|
||||||
fn check_files_hash(&mut self, stop_receiver: Option<&Receiver<()>>, progress_sender: Option<&futures::channel::mpsc::UnboundedSender<ProgressData>>) -> bool {
|
fn check_files_hash(&mut self, stop_receiver: Option<&Receiver<()>>, progress_sender: Option<&futures::channel::mpsc::UnboundedSender<ProgressData>>) -> bool {
|
||||||
assert_eq!(self.check_method, CheckingMethod::Hash);
|
assert_eq!(self.check_method, CheckingMethod::Hash);
|
||||||
|
@ -523,37 +566,16 @@ impl DuplicateFinder {
|
||||||
let check_was_stopped = AtomicBool::new(false); // Used for breaking from GUI and ending check thread
|
let check_was_stopped = AtomicBool::new(false); // Used for breaking from GUI and ending check thread
|
||||||
let mut pre_checked_map: BTreeMap<u64, Vec<FileEntry>> = Default::default();
|
let mut pre_checked_map: BTreeMap<u64, Vec<FileEntry>> = Default::default();
|
||||||
|
|
||||||
//// PROGRESS THREAD START
|
|
||||||
let progress_thread_run = Arc::new(AtomicBool::new(true));
|
let progress_thread_run = Arc::new(AtomicBool::new(true));
|
||||||
|
|
||||||
let atomic_file_counter = Arc::new(AtomicUsize::new(0));
|
let atomic_file_counter = Arc::new(AtomicUsize::new(0));
|
||||||
|
let progress_thread_handle = self.prepare_hash_thread_handler(
|
||||||
let progress_thread_handle = if let Some(progress_sender) = progress_sender {
|
progress_sender,
|
||||||
let progress_send = progress_sender.clone();
|
progress_thread_run.clone(),
|
||||||
let progress_thread_run = progress_thread_run.clone();
|
atomic_file_counter.clone(),
|
||||||
let atomic_file_counter = atomic_file_counter.clone();
|
1,
|
||||||
let files_to_check = self.files_with_identical_size.values().map(Vec::len).sum();
|
2,
|
||||||
let checking_method = self.check_method;
|
self.files_with_identical_size.values().map(Vec::len).sum(),
|
||||||
thread::spawn(move || loop {
|
);
|
||||||
progress_send
|
|
||||||
.unbounded_send(ProgressData {
|
|
||||||
checking_method,
|
|
||||||
current_stage: 1,
|
|
||||||
max_stage: 2,
|
|
||||||
entries_checked: atomic_file_counter.load(Ordering::Relaxed),
|
|
||||||
entries_to_check: files_to_check,
|
|
||||||
})
|
|
||||||
.unwrap();
|
|
||||||
if !progress_thread_run.load(Ordering::Relaxed) {
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
sleep(Duration::from_millis(LOOP_DURATION as u64));
|
|
||||||
})
|
|
||||||
} else {
|
|
||||||
thread::spawn(|| {})
|
|
||||||
};
|
|
||||||
|
|
||||||
//// PROGRESS THREAD END
|
|
||||||
|
|
||||||
///////////////////////////////////////////////////////////////////////////// PREHASHING START
|
///////////////////////////////////////////////////////////////////////////// PREHASHING START
|
||||||
{
|
{
|
||||||
|
@ -684,33 +706,16 @@ impl DuplicateFinder {
|
||||||
|
|
||||||
//// PROGRESS THREAD START
|
//// PROGRESS THREAD START
|
||||||
let progress_thread_run = Arc::new(AtomicBool::new(true));
|
let progress_thread_run = Arc::new(AtomicBool::new(true));
|
||||||
|
|
||||||
let atomic_file_counter = Arc::new(AtomicUsize::new(0));
|
let atomic_file_counter = Arc::new(AtomicUsize::new(0));
|
||||||
|
|
||||||
let progress_thread_handle = if let Some(progress_sender) = progress_sender {
|
let progress_thread_handle = self.prepare_hash_thread_handler(
|
||||||
let progress_send = progress_sender.clone();
|
progress_sender,
|
||||||
let progress_thread_run = progress_thread_run.clone();
|
progress_thread_run.clone(),
|
||||||
let atomic_file_counter = atomic_file_counter.clone();
|
atomic_file_counter.clone(),
|
||||||
let files_to_check = pre_checked_map.values().map(Vec::len).sum();
|
2,
|
||||||
let checking_method = self.check_method;
|
2,
|
||||||
thread::spawn(move || loop {
|
pre_checked_map.values().map(Vec::len).sum(),
|
||||||
progress_send
|
);
|
||||||
.unbounded_send(ProgressData {
|
|
||||||
checking_method,
|
|
||||||
current_stage: 2,
|
|
||||||
max_stage: 2,
|
|
||||||
entries_checked: atomic_file_counter.load(Ordering::Relaxed),
|
|
||||||
entries_to_check: files_to_check,
|
|
||||||
})
|
|
||||||
.unwrap();
|
|
||||||
if !progress_thread_run.load(Ordering::Relaxed) {
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
sleep(Duration::from_millis(LOOP_DURATION as u64));
|
|
||||||
})
|
|
||||||
} else {
|
|
||||||
thread::spawn(|| {})
|
|
||||||
};
|
|
||||||
|
|
||||||
//// PROGRESS THREAD END
|
//// PROGRESS THREAD END
|
||||||
|
|
||||||
|
|
Loading…
Reference in a new issue