1
0
Fork 0
mirror of synced 2024-05-18 19:32:43 +12:00

Support hard links for similar images and videos with -L

This ignores matches for files that have the same inode.

This only works on Unix.
This commit is contained in:
Thomas Jung 2024-01-29 11:03:50 +01:00
parent 6cde5ab7a0
commit 183f333f64
5 changed files with 234 additions and 12 deletions

View file

@ -212,6 +212,8 @@ pub struct SimilarImagesArgs {
#[clap(flatten)] #[clap(flatten)]
pub delete_method: DMethod, pub delete_method: DMethod,
#[clap(flatten)] #[clap(flatten)]
pub allow_hard_links: AllowHardLinks,
#[clap(flatten)]
pub dry_run: DryRun, pub dry_run: DryRun,
#[clap( #[clap(
short = 'g', short = 'g',
@ -355,6 +357,8 @@ pub struct SimilarVideosArgs {
#[clap(flatten)] #[clap(flatten)]
pub delete_method: DMethod, pub delete_method: DMethod,
#[clap(flatten)] #[clap(flatten)]
pub allow_hard_links: AllowHardLinks,
#[clap(flatten)]
pub dry_run: DryRun, pub dry_run: DryRun,
#[clap( #[clap(
short, short,

View file

@ -185,6 +185,7 @@ fn similar_images(similar_images: SimilarImagesArgs, stop_receiver: &Receiver<()
hash_size, hash_size,
delete_method, delete_method,
dry_run, dry_run,
allow_hard_links,
} = similar_images; } = similar_images;
let mut item = SimilarImages::new(); let mut item = SimilarImages::new();
@ -198,6 +199,7 @@ fn similar_images(similar_images: SimilarImagesArgs, stop_receiver: &Receiver<()
item.set_delete_method(delete_method.delete_method); item.set_delete_method(delete_method.delete_method);
item.set_dry_run(dry_run.dry_run); item.set_dry_run(dry_run.dry_run);
item.set_similarity(return_similarity_from_similarity_preset(&similarity_preset, hash_size)); item.set_similarity(return_similarity_from_similarity_preset(&similarity_preset, hash_size));
item.set_ignore_hard_links(!allow_hard_links.allow_hard_links);
item.find_similar_images(Some(stop_receiver), Some(progress_sender)); item.find_similar_images(Some(stop_receiver), Some(progress_sender));
@ -272,6 +274,7 @@ fn similar_videos(similar_videos: SimilarVideosArgs, stop_receiver: &Receiver<()
maximal_file_size, maximal_file_size,
delete_method, delete_method,
dry_run, dry_run,
allow_hard_links,
} = similar_videos; } = similar_videos;
let mut item = SimilarVideos::new(); let mut item = SimilarVideos::new();
@ -282,6 +285,7 @@ fn similar_videos(similar_videos: SimilarVideosArgs, stop_receiver: &Receiver<()
item.set_tolerance(tolerance); item.set_tolerance(tolerance);
item.set_delete_method(delete_method.delete_method); item.set_delete_method(delete_method.delete_method);
item.set_dry_run(dry_run.dry_run); item.set_dry_run(dry_run.dry_run);
item.set_ignore_hard_links(!allow_hard_links.allow_hard_links);
item.find_similar_videos(Some(stop_receiver), Some(progress_sender)); item.find_similar_videos(Some(stop_receiver), Some(progress_sender));

View file

@ -1,6 +1,8 @@
use std::collections::BTreeMap; use std::collections::BTreeMap;
use std::fs; use std::fs;
use std::fs::{DirEntry, FileType, Metadata, ReadDir}; use std::fs::{DirEntry, FileType, Metadata};
#[cfg(target_family = "unix")]
use std::os::unix::fs::MetadataExt;
use std::path::{Path, PathBuf}; use std::path::{Path, PathBuf};
use std::sync::atomic::Ordering; use std::sync::atomic::Ordering;
use std::time::UNIX_EPOCH; use std::time::UNIX_EPOCH;
@ -92,7 +94,7 @@ pub enum Collect {
Files, Files,
} }
#[derive(Eq, PartialEq, Copy, Clone)] #[derive(Eq, PartialEq, Copy, Clone, Debug)]
enum EntryType { enum EntryType {
File, File,
Dir, Dir,
@ -546,9 +548,17 @@ fn process_symlink_in_symlink_mode(
fe_result.push(fe); fe_result.push(fe);
} }
pub fn common_read_dir(current_folder: &Path, warnings: &mut Vec<String>) -> Option<ReadDir> { pub fn common_read_dir(current_folder: &Path, warnings: &mut Vec<String>) -> Option<Vec<Result<DirEntry, std::io::Error>>> {
match fs::read_dir(current_folder) { match fs::read_dir(current_folder) {
Ok(t) => Some(t), Ok(t) => {
// Make directory traversal order stable
let mut r: Vec<_> = t.collect();
r.sort_by_key(|d| match d {
Ok(f) => f.path(),
_ => PathBuf::new(),
});
Some(r)
}
Err(e) => { Err(e) => {
warnings.push(flc!( warnings.push(flc!(
"core_cannot_open_dir", "core_cannot_open_dir",
@ -634,3 +644,195 @@ pub fn get_modified_time(metadata: &Metadata, warnings: &mut Vec<String>, curren
} }
} }
} }
#[cfg(target_family = "windows")]
pub fn inode(_fe: &FileEntry) -> Option<u64> {
None
}
#[cfg(target_family = "unix")]
pub fn inode(fe: &FileEntry) -> Option<u64> {
if let Ok(meta) = fs::metadata(&fe.path) {
Some(meta.ino())
} else {
None
}
}
pub fn take_1_per_inode((k, mut v): (Option<u64>, Vec<FileEntry>)) -> Vec<FileEntry> {
if k.is_some() {
v.drain(1..);
}
v
}
#[cfg(test)]
mod tests {
use super::*;
use crate::common_tool::*;
use once_cell::sync::Lazy;
use std::fs;
use std::fs::File;
use std::io;
use std::io::prelude::*;
use std::time::{Duration, SystemTime};
use tempfile::TempDir;
impl CommonData for CommonToolData {
fn get_cd(&self) -> &CommonToolData {
self
}
fn get_cd_mut(&mut self) -> &mut CommonToolData {
self
}
}
static NOW: Lazy<SystemTime> = Lazy::new(|| SystemTime::UNIX_EPOCH + Duration::new(100, 0));
const CONTENT: &[u8; 1] = b"a";
fn create_files(dir: &TempDir) -> io::Result<(PathBuf, PathBuf, PathBuf)> {
let (src, hard, other) = (dir.path().join("a"), dir.path().join("b"), dir.path().join("c"));
let mut file = File::create(&src)?;
file.write_all(CONTENT)?;
fs::hard_link(&src, &hard)?;
file.set_modified(*NOW)?;
let mut file = File::create(&other)?;
file.write_all(CONTENT)?;
file.set_modified(*NOW)?;
Ok((src, hard, other))
}
#[test]
fn test_traversal() -> io::Result<()> {
let dir = tempfile::Builder::new().tempdir()?;
let (src, hard, other) = create_files(&dir)?;
let secs = NOW.duration_since(SystemTime::UNIX_EPOCH).unwrap().as_secs();
let mut common_data = CommonToolData::new(ToolType::SimilarImages);
common_data.directories.set_included_directory([dir.path().to_owned()].to_vec());
common_data.set_minimal_file_size(0);
match DirTraversalBuilder::new().group_by(|_fe| ()).common_data(&common_data).build().run() {
DirTraversalResult::SuccessFiles {
warnings: _,
grouped_file_entries,
} => {
let actual: Vec<_> = grouped_file_entries.into_values().flatten().collect();
assert_eq!(
[
FileEntry {
path: src,
size: 1,
modified_date: secs,
},
FileEntry {
path: hard,
size: 1,
modified_date: secs,
},
FileEntry {
path: other,
size: 1,
modified_date: secs,
},
]
.to_vec(),
actual
);
}
_ => {
panic!("Expect SuccessFiles.");
}
};
Ok(())
}
#[cfg(target_family = "unix")]
#[test]
fn test_traversal_group_by_inode() -> io::Result<()> {
let dir = tempfile::Builder::new().tempdir()?;
let (src, _, other) = create_files(&dir)?;
let secs = NOW.duration_since(SystemTime::UNIX_EPOCH).unwrap().as_secs();
let mut common_data = CommonToolData::new(ToolType::SimilarImages);
common_data.directories.set_included_directory([dir.path().to_owned()].to_vec());
common_data.set_minimal_file_size(0);
match DirTraversalBuilder::new().group_by(inode).common_data(&common_data).build().run() {
DirTraversalResult::SuccessFiles {
warnings: _,
grouped_file_entries,
} => {
let actual: Vec<_> = grouped_file_entries.into_iter().flat_map(take_1_per_inode).collect();
assert_eq!(
[
FileEntry {
path: src,
size: 1,
modified_date: secs,
},
FileEntry {
path: other,
size: 1,
modified_date: secs,
},
]
.to_vec(),
actual
);
}
_ => {
panic!("Expect SuccessFiles.");
}
};
Ok(())
}
#[cfg(target_family = "windows")]
#[test]
fn test_traversal_group_by_inode() -> io::Result<()> {
let dir = tempfile::Builder::new().tempdir()?;
let (src, hard, other) = create_files(&dir)?;
let secs = NOW.duration_since(SystemTime::UNIX_EPOCH).unwrap().as_secs();
let mut common_data = CommonToolData::new(ToolType::SimilarImages);
common_data.directories.set_included_directory([dir.path().to_owned()].to_vec());
common_data.set_minimal_file_size(0);
match DirTraversalBuilder::new().group_by(inode).common_data(&common_data).build().run() {
DirTraversalResult::SuccessFiles {
warnings: _,
grouped_file_entries,
} => {
let actual: Vec<_> = grouped_file_entries.into_iter().flat_map(take_1_per_inode).collect();
assert_eq!(
[
FileEntry {
path: src,
size: 1,
modified_date: secs,
},
FileEntry {
path: hard,
size: 1,
modified_date: secs,
},
FileEntry {
path: other,
size: 1,
modified_date: secs,
},
]
.to_vec(),
actual
);
}
_ => {
panic!("Expect SuccessFiles.");
}
};
Ok(())
}
}

View file

@ -23,7 +23,7 @@ use crate::common::{
HEIC_EXTENSIONS, IMAGE_RS_SIMILAR_IMAGES_EXTENSIONS, RAW_IMAGE_EXTENSIONS, HEIC_EXTENSIONS, IMAGE_RS_SIMILAR_IMAGES_EXTENSIONS, RAW_IMAGE_EXTENSIONS,
}; };
use crate::common_cache::{get_similar_images_cache_file, load_cache_from_file_generalized_by_path, save_cache_to_file_generalized}; use crate::common_cache::{get_similar_images_cache_file, load_cache_from_file_generalized_by_path, save_cache_to_file_generalized};
use crate::common_dir_traversal::{CheckingMethod, DirTraversalBuilder, DirTraversalResult, FileEntry, ProgressData, ToolType}; use crate::common_dir_traversal::{inode, take_1_per_inode, CheckingMethod, DirTraversalBuilder, DirTraversalResult, FileEntry, ProgressData, ToolType};
use crate::common_tool::{CommonData, CommonToolData, DeleteMethod}; use crate::common_tool::{CommonData, CommonToolData, DeleteMethod};
use crate::common_traits::{DebugPrint, PrintResults, ResultEntry}; use crate::common_traits::{DebugPrint, PrintResults, ResultEntry};
use crate::flc; use crate::flc;
@ -122,6 +122,7 @@ pub struct SimilarImages {
hash_alg: HashAlg, hash_alg: HashAlg,
image_filter: FilterType, image_filter: FilterType,
exclude_images_with_same_size: bool, exclude_images_with_same_size: bool,
ignore_hard_links: bool,
} }
#[derive(Default)] #[derive(Default)]
@ -145,6 +146,7 @@ impl SimilarImages {
hash_alg: HashAlg::Gradient, hash_alg: HashAlg::Gradient,
image_filter: FilterType::Lanczos3, image_filter: FilterType::Lanczos3,
exclude_images_with_same_size: false, exclude_images_with_same_size: false,
ignore_hard_links: false,
} }
} }
@ -188,7 +190,7 @@ impl SimilarImages {
let heic_extensions = HEIC_EXTENSIONS.iter().collect::<HashSet<_>>(); let heic_extensions = HEIC_EXTENSIONS.iter().collect::<HashSet<_>>();
let result = DirTraversalBuilder::new() let result = DirTraversalBuilder::new()
.group_by(|_fe| ()) .group_by(inode)
.stop_receiver(stop_receiver) .stop_receiver(stop_receiver)
.progress_sender(progress_sender) .progress_sender(progress_sender)
.common_data(&self.common_data) .common_data(&self.common_data)
@ -199,8 +201,8 @@ impl SimilarImages {
match result { match result {
DirTraversalResult::SuccessFiles { grouped_file_entries, warnings } => { DirTraversalResult::SuccessFiles { grouped_file_entries, warnings } => {
self.images_to_check = grouped_file_entries self.images_to_check = grouped_file_entries
.into_values() .into_iter()
.flatten() .flat_map(if self.ignore_hard_links { |(_, fes)| fes } else { take_1_per_inode })
.map(|fe| { .map(|fe| {
let fe_str = fe.path.to_string_lossy().to_string(); let fe_str = fe.path.to_string_lossy().to_string();
let extension_lowercase = fe.path.extension().unwrap_or_default().to_string_lossy().to_lowercase(); let extension_lowercase = fe.path.extension().unwrap_or_default().to_string_lossy().to_lowercase();
@ -1090,6 +1092,10 @@ impl SimilarImages {
pub fn set_similarity(&mut self, similarity: u32) { pub fn set_similarity(&mut self, similarity: u32) {
self.similarity = similarity; self.similarity = similarity;
} }
pub fn set_ignore_hard_links(&mut self, ignore_hard_links: bool) {
self.ignore_hard_links = ignore_hard_links;
}
} }
#[cfg(test)] #[cfg(test)]

View file

@ -16,7 +16,7 @@ use vid_dup_finder_lib::{NormalizedTolerance, VideoHash};
use crate::common::{check_if_stop_received, delete_files_custom, prepare_thread_handler_common, send_info_and_wait_for_ending_all_threads, VIDEO_FILES_EXTENSIONS}; use crate::common::{check_if_stop_received, delete_files_custom, prepare_thread_handler_common, send_info_and_wait_for_ending_all_threads, VIDEO_FILES_EXTENSIONS};
use crate::common_cache::{get_similar_videos_cache_file, load_cache_from_file_generalized_by_path, save_cache_to_file_generalized}; use crate::common_cache::{get_similar_videos_cache_file, load_cache_from_file_generalized_by_path, save_cache_to_file_generalized};
use crate::common_dir_traversal::{CheckingMethod, DirTraversalBuilder, DirTraversalResult, FileEntry, ProgressData, ToolType}; use crate::common_dir_traversal::{inode, take_1_per_inode, CheckingMethod, DirTraversalBuilder, DirTraversalResult, FileEntry, ProgressData, ToolType};
use crate::common_tool::{CommonData, CommonToolData, DeleteMethod}; use crate::common_tool::{CommonData, CommonToolData, DeleteMethod};
use crate::common_traits::{DebugPrint, PrintResults, ResultEntry}; use crate::common_traits::{DebugPrint, PrintResults, ResultEntry};
use crate::flc; use crate::flc;
@ -83,6 +83,7 @@ pub struct SimilarVideos {
videos_to_check: BTreeMap<String, VideosEntry>, videos_to_check: BTreeMap<String, VideosEntry>,
tolerance: i32, tolerance: i32,
exclude_videos_with_same_size: bool, exclude_videos_with_same_size: bool,
ignore_hard_links: bool,
} }
impl CommonData for SimilarVideos { impl CommonData for SimilarVideos {
@ -111,6 +112,7 @@ impl SimilarVideos {
tolerance: 10, tolerance: 10,
exclude_videos_with_same_size: false, exclude_videos_with_same_size: false,
similar_referenced_vectors: vec![], similar_referenced_vectors: vec![],
ignore_hard_links: false,
} }
} }
@ -149,7 +151,7 @@ impl SimilarVideos {
} }
let result = DirTraversalBuilder::new() let result = DirTraversalBuilder::new()
.group_by(|_fe| ()) .group_by(inode)
.stop_receiver(stop_receiver) .stop_receiver(stop_receiver)
.progress_sender(progress_sender) .progress_sender(progress_sender)
.common_data(&self.common_data) .common_data(&self.common_data)
@ -160,8 +162,8 @@ impl SimilarVideos {
match result { match result {
DirTraversalResult::SuccessFiles { grouped_file_entries, warnings } => { DirTraversalResult::SuccessFiles { grouped_file_entries, warnings } => {
self.videos_to_check = grouped_file_entries self.videos_to_check = grouped_file_entries
.into_values() .into_iter()
.flatten() .flat_map(if self.ignore_hard_links { |(_, fes)| fes } else { take_1_per_inode })
.map(|fe| (fe.path.to_string_lossy().to_string(), fe.into_videos_entry())) .map(|fe| (fe.path.to_string_lossy().to_string(), fe.into_videos_entry()))
.collect(); .collect();
self.common_data.text_messages.warnings.extend(warnings); self.common_data.text_messages.warnings.extend(warnings);
@ -454,4 +456,8 @@ impl SimilarVideos {
pub fn get_use_reference(&self) -> bool { pub fn get_use_reference(&self) -> bool {
self.common_data.use_reference_folders self.common_data.use_reference_folders
} }
pub fn set_ignore_hard_links(&mut self, ignore_hard_links: bool) {
self.ignore_hard_links = ignore_hard_links;
}
} }