1
0
Fork 0
mirror of synced 2024-05-17 19:03:08 +12:00

Reference folder and swap cleaning

This commit is contained in:
Rafał Mikrut 2023-05-06 18:57:03 +02:00
parent f121a57af9
commit fd38b4fac6
13 changed files with 45 additions and 109 deletions

View file

@ -7,7 +7,6 @@ use std::path::PathBuf;
use std::sync::atomic::{AtomicBool, AtomicUsize, Ordering};
use std::sync::Arc;
use crossbeam_channel::Receiver;
use futures::channel::mpsc::UnboundedSender;
use mime_guess::get_mime_extensions;
@ -328,8 +327,7 @@ impl BadExtensions {
CheckingMethod::None,
);
let mut files_to_check = Default::default();
mem::swap(&mut files_to_check, &mut self.files_to_check);
let files_to_check = mem::take(&mut self.files_to_check);
let mut hashmap_workarounds: HashMap<&str, Vec<&str>> = Default::default();
for (proper, found) in WORKAROUNDS {

View file

@ -7,14 +7,12 @@ use std::sync::atomic::AtomicBool;
use std::sync::atomic::{AtomicUsize, Ordering};
use std::sync::Arc;
use crossbeam_channel::Receiver;
use futures::channel::mpsc::UnboundedSender;
use humansize::format_size;
use humansize::BINARY;
use rayon::prelude::*;
use crate::common::{check_folder_children, prepare_thread_handler_common, send_info_and_wait_for_ending_all_threads, split_path};
use crate::common_dir_traversal::{common_get_entry_data_metadata, common_read_dir, get_lowercase_name, get_modified_time, CheckingMethod, ProgressData};
use crate::common_directory::Directories;

View file

@ -407,8 +407,7 @@ impl BrokenFiles {
let mut records_already_cached: BTreeMap<String, FileEntry> = Default::default();
let mut non_cached_files_to_check: BTreeMap<String, FileEntry> = Default::default();
let mut files_to_check = Default::default();
mem::swap(&mut self.files_to_check, &mut files_to_check);
let files_to_check = mem::take(&mut self.files_to_check);
if self.use_cache {
loaded_hash_map = match load_cache_from_file(&mut self.text_messages, self.delete_outdated_cache) {

View file

@ -4,7 +4,7 @@ use std::fs::{DirEntry, Metadata, ReadDir};
use std::path::{Path, PathBuf};
use std::sync::atomic::{AtomicBool, AtomicUsize, Ordering};
use std::sync::Arc;
use std::time::{UNIX_EPOCH};
use std::time::UNIX_EPOCH;
use crossbeam_channel::Receiver;
use futures::channel::mpsc::UnboundedSender;
@ -14,6 +14,7 @@ use crate::common::{prepare_thread_handler_common, send_info_and_wait_for_ending
use crate::common_directory::Directories;
use crate::common_extensions::Extensions;
use crate::common_items::ExcludedItems;
use crate::common_traits::ResultEntry;
use crate::flc;
use crate::localizer_core::generate_translation_hashmap;
@ -43,6 +44,11 @@ pub struct FileEntry {
pub hash: String,
pub symlink_info: Option<SymlinkInfo>,
}
impl ResultEntry for FileEntry {
fn get_path(&self) -> &Path {
&self.path
}
}
// Symlinks

View file

@ -1,6 +1,3 @@
use crate::common_messages::Messages;
#[derive(Clone, Default)]

View file

@ -1,6 +1,5 @@
use std::path::Path;
use crate::common::Common;
use crate::common_messages::Messages;

View file

@ -380,21 +380,11 @@ impl DuplicateFinder {
// Reference - only use in size, because later hash will be counted differently
if self.use_reference_folders {
let mut btree_map = Default::default();
mem::swap(&mut self.files_with_identical_names, &mut btree_map);
let reference_directories = self.directories.reference_directories.clone();
let vec = btree_map
let vec = mem::take(&mut self.files_with_identical_names)
.into_iter()
.filter_map(|(_size, vec_file_entry)| {
let mut files_from_referenced_folders = Vec::new();
let mut normal_files = Vec::new();
for file_entry in vec_file_entry {
if reference_directories.iter().any(|e| file_entry.path.starts_with(e)) {
files_from_referenced_folders.push(file_entry);
} else {
normal_files.push(file_entry);
}
}
.filter_map(|(_name, vec_file_entry)| {
let (mut files_from_referenced_folders, normal_files): (Vec<_>, Vec<_>) =
vec_file_entry.into_iter().partition(|e| self.directories.is_referenced_directory(e.get_path()));
if files_from_referenced_folders.is_empty() || normal_files.is_empty() {
None
@ -470,21 +460,11 @@ impl DuplicateFinder {
// Reference - only use in size, because later hash will be counted differently
if self.use_reference_folders {
let mut btree_map = Default::default();
mem::swap(&mut self.files_with_identical_size_names, &mut btree_map);
let reference_directories = self.directories.reference_directories.clone();
let vec = btree_map
let vec = mem::take(&mut self.files_with_identical_size_names)
.into_iter()
.filter_map(|(_size, vec_file_entry)| {
let mut files_from_referenced_folders = Vec::new();
let mut normal_files = Vec::new();
for file_entry in vec_file_entry {
if reference_directories.iter().any(|e| file_entry.path.starts_with(e)) {
files_from_referenced_folders.push(file_entry);
} else {
normal_files.push(file_entry);
}
}
let (mut files_from_referenced_folders, normal_files): (Vec<_>, Vec<_>) =
vec_file_entry.into_iter().partition(|e| self.directories.is_referenced_directory(e.get_path()));
if files_from_referenced_folders.is_empty() || normal_files.is_empty() {
None
@ -554,8 +534,7 @@ impl DuplicateFinder {
self.text_messages.warnings.extend(warnings);
// Create new BTreeMap without single size entries(files have not duplicates)
let mut old_map: BTreeMap<u64, Vec<FileEntry>> = Default::default();
mem::swap(&mut old_map, &mut self.files_with_identical_size);
let old_map: BTreeMap<u64, Vec<FileEntry>> = mem::take(&mut self.files_with_identical_size);
for (size, vec) in old_map {
if vec.len() <= 1 {
@ -601,21 +580,11 @@ impl DuplicateFinder {
/// This is needed, because later reference folders looks for hashes, not size
fn filter_reference_folders_by_size(&mut self) {
if self.use_reference_folders && self.check_method == CheckingMethod::Size {
let mut btree_map = Default::default();
mem::swap(&mut self.files_with_identical_size, &mut btree_map);
let reference_directories = self.directories.reference_directories.clone();
let vec = btree_map
let vec = mem::take(&mut self.files_with_identical_size)
.into_iter()
.filter_map(|(_size, vec_file_entry)| {
let mut files_from_referenced_folders = Vec::new();
let mut normal_files = Vec::new();
for file_entry in vec_file_entry {
if reference_directories.iter().any(|e| file_entry.path.starts_with(e)) {
files_from_referenced_folders.push(file_entry);
} else {
normal_files.push(file_entry);
}
}
let (mut files_from_referenced_folders, normal_files): (Vec<_>, Vec<_>) =
vec_file_entry.into_iter().partition(|e| self.directories.is_referenced_directory(e.get_path()));
if files_from_referenced_folders.is_empty() || normal_files.is_empty() {
None
@ -946,23 +915,13 @@ impl DuplicateFinder {
fn hash_reference_folders(&mut self) {
// Reference - only use in size, because later hash will be counted differently
if self.use_reference_folders {
let mut btree_map = Default::default();
mem::swap(&mut self.files_with_identical_hashes, &mut btree_map);
let reference_directories = self.directories.reference_directories.clone();
let vec = btree_map
let vec = mem::take(&mut self.files_with_identical_hashes)
.into_iter()
.filter_map(|(_size, vec_vec_file_entry)| {
let mut all_results_with_same_size = Vec::new();
for vec_file_entry in vec_vec_file_entry {
let mut files_from_referenced_folders = Vec::new();
let mut normal_files = Vec::new();
for file_entry in vec_file_entry {
if reference_directories.iter().any(|e| file_entry.path.starts_with(e)) {
files_from_referenced_folders.push(file_entry);
} else {
normal_files.push(file_entry);
}
}
let (mut files_from_referenced_folders, normal_files): (Vec<_>, Vec<_>) =
vec_file_entry.into_iter().partition(|e| self.directories.is_referenced_directory(e.get_path()));
if files_from_referenced_folders.is_empty() || normal_files.is_empty() {
continue;

View file

@ -4,11 +4,9 @@ use std::io::prelude::*;
use std::io::BufWriter;
use std::path::PathBuf;
use crossbeam_channel::Receiver;
use futures::channel::mpsc::UnboundedSender;
use crate::common_dir_traversal::{DirTraversalBuilder, DirTraversalResult, FileEntry, ProgressData};
use crate::common_directory::Directories;
use crate::common_extensions::Extensions;

View file

@ -4,11 +4,9 @@ use std::fs::File;
use std::io::{BufWriter, Write};
use std::path::PathBuf;
use crossbeam_channel::Receiver;
use futures::channel::mpsc::UnboundedSender;
use crate::common_dir_traversal::{Collect, DirTraversalBuilder, DirTraversalResult, FolderEmptiness, FolderEntry, ProgressData};
use crate::common_directory::Directories;
use crate::common_items::ExcludedItems;

View file

@ -4,11 +4,9 @@ use std::io::prelude::*;
use std::io::BufWriter;
use std::path::PathBuf;
use crossbeam_channel::Receiver;
use futures::channel::mpsc::UnboundedSender;
use crate::common_dir_traversal::{Collect, DirTraversalBuilder, DirTraversalResult, ErrorType, FileEntry, ProgressData};
use crate::common_directory::Directories;
use crate::common_extensions::Extensions;

View file

@ -30,7 +30,7 @@ use crate::common_directory::Directories;
use crate::common_extensions::Extensions;
use crate::common_items::ExcludedItems;
use crate::common_messages::Messages;
use crate::common_traits::{DebugPrint, PrintResults, SaveResults};
use crate::common_traits::{DebugPrint, PrintResults, ResultEntry, SaveResults};
use crate::flc;
type ImHash = Vec<u8>;
@ -51,6 +51,11 @@ pub struct FileEntry {
pub hash: ImHash,
pub similarity: u32,
}
impl ResultEntry for FileEntry {
fn get_path(&self) -> &Path {
&self.path
}
}
/// Used by CLI tool when we cannot use directly values
#[derive(Clone, Debug, Copy)]
@ -423,7 +428,7 @@ impl SimilarImages {
// Cache algorithm:
// - Load data from file
// - Remove from data to search, already loaded entries from cache(size and modified datamust match)
// - Remove from data to search, already loaded entries from cache(size and modified date must match)
// - Check hash of files which doesn't have saved entry
// - Join already read hashes with hashes which were read from file
// - Join all hashes and save it to file
@ -810,8 +815,7 @@ impl SimilarImages {
// Results
let mut collected_similar_images: HashMap<ImHash, Vec<FileEntry>> = Default::default();
let mut all_hashed_images = Default::default();
mem::swap(&mut all_hashed_images, &mut self.image_hashes);
let all_hashed_images = mem::take(&mut self.image_hashes);
let all_hashes: Vec<_> = all_hashed_images.clone().into_keys().collect();
@ -877,7 +881,7 @@ impl SimilarImages {
}
}
// Clean unused data
// Clean unused data to save ram
self.image_hashes = Default::default();
self.images_to_check = Default::default();
self.bktree = BKTree::new(Hamming);
@ -887,9 +891,7 @@ impl SimilarImages {
fn exclude_items_with_same_size(&mut self) {
if self.exclude_images_with_same_size {
let mut new_vector = Default::default();
mem::swap(&mut self.similar_vectors, &mut new_vector);
for vec_file_entry in new_vector {
for vec_file_entry in mem::take(&mut self.similar_vectors) {
let mut bt_sizes: BTreeSet<u64> = Default::default();
let mut vec_values = Vec::new();
for file_entry in vec_file_entry {
@ -907,21 +909,11 @@ impl SimilarImages {
fn check_for_reference_folders(&mut self) {
if self.use_reference_folders {
let mut similar_vector = Default::default();
mem::swap(&mut self.similar_vectors, &mut similar_vector);
let reference_directories = self.directories.reference_directories.clone();
self.similar_referenced_vectors = similar_vector
self.similar_referenced_vectors = mem::take(&mut self.similar_vectors)
.into_iter()
.filter_map(|vec_file_entry| {
let mut files_from_referenced_folders = Vec::new();
let mut normal_files = Vec::new();
for file_entry in vec_file_entry {
if reference_directories.iter().any(|e| file_entry.path.starts_with(e)) {
files_from_referenced_folders.push(file_entry);
} else {
normal_files.push(file_entry);
}
}
let (mut files_from_referenced_folders, normal_files): (Vec<_>, Vec<_>) =
vec_file_entry.into_iter().partition(|e| self.directories.is_referenced_directory(e.get_path()));
if files_from_referenced_folders.is_empty() || normal_files.is_empty() {
None

View file

@ -24,7 +24,7 @@ use crate::common_directory::Directories;
use crate::common_extensions::Extensions;
use crate::common_items::ExcludedItems;
use crate::common_messages::Messages;
use crate::common_traits::{DebugPrint, PrintResults, SaveResults};
use crate::common_traits::{DebugPrint, PrintResults, ResultEntry, SaveResults};
use crate::flc;
use crate::localizer_core::generate_translation_hashmap;
@ -38,6 +38,11 @@ pub struct FileEntry {
pub vhash: VideoHash,
pub error: String,
}
impl ResultEntry for FileEntry {
fn get_path(&self) -> &Path {
&self.path
}
}
/// Distance metric to use with the BK-tree.
struct Hamming;
@ -508,21 +513,11 @@ impl SimilarVideos {
fn remove_from_reference_folders(&mut self) {
if self.use_reference_folders {
let mut similar_vector = Default::default();
mem::swap(&mut self.similar_vectors, &mut similar_vector);
let reference_directories = self.directories.reference_directories.clone();
self.similar_referenced_vectors = similar_vector
self.similar_referenced_vectors = mem::take(&mut self.similar_vectors)
.into_iter()
.filter_map(|vec_file_entry| {
let mut files_from_referenced_folders = Vec::new();
let mut normal_files = Vec::new();
for file_entry in vec_file_entry {
if reference_directories.iter().any(|e| file_entry.path.starts_with(e)) {
files_from_referenced_folders.push(file_entry);
} else {
normal_files.push(file_entry);
}
}
let (mut files_from_referenced_folders, normal_files): (Vec<_>, Vec<_>) =
vec_file_entry.into_iter().partition(|e| self.directories.is_referenced_directory(e.get_path()));
if files_from_referenced_folders.is_empty() || normal_files.is_empty() {
None

View file

@ -6,7 +6,6 @@ use std::path::{Path, PathBuf};
use std::sync::atomic::{AtomicBool, AtomicUsize, Ordering};
use std::sync::Arc;
use crossbeam_channel::Receiver;
use futures::channel::mpsc::UnboundedSender;
use rayon::prelude::*;