1
0
Fork 0
mirror of synced 2024-05-03 03:52:58 +12:00

Support hard links for similar images and videos with -L

This ignores matches for files that have the same inode.

This only works on Unix.
This commit is contained in:
Thomas Jung 2024-01-29 11:03:50 +01:00
parent 6cde5ab7a0
commit 183f333f64
5 changed files with 234 additions and 12 deletions

View file

@ -212,6 +212,8 @@ pub struct SimilarImagesArgs {
#[clap(flatten)]
pub delete_method: DMethod,
#[clap(flatten)]
pub allow_hard_links: AllowHardLinks,
#[clap(flatten)]
pub dry_run: DryRun,
#[clap(
short = 'g',
@ -355,6 +357,8 @@ pub struct SimilarVideosArgs {
#[clap(flatten)]
pub delete_method: DMethod,
#[clap(flatten)]
pub allow_hard_links: AllowHardLinks,
#[clap(flatten)]
pub dry_run: DryRun,
#[clap(
short,

View file

@ -185,6 +185,7 @@ fn similar_images(similar_images: SimilarImagesArgs, stop_receiver: &Receiver<()
hash_size,
delete_method,
dry_run,
allow_hard_links,
} = similar_images;
let mut item = SimilarImages::new();
@ -198,6 +199,7 @@ fn similar_images(similar_images: SimilarImagesArgs, stop_receiver: &Receiver<()
item.set_delete_method(delete_method.delete_method);
item.set_dry_run(dry_run.dry_run);
item.set_similarity(return_similarity_from_similarity_preset(&similarity_preset, hash_size));
item.set_ignore_hard_links(!allow_hard_links.allow_hard_links);
item.find_similar_images(Some(stop_receiver), Some(progress_sender));
@ -272,6 +274,7 @@ fn similar_videos(similar_videos: SimilarVideosArgs, stop_receiver: &Receiver<()
maximal_file_size,
delete_method,
dry_run,
allow_hard_links,
} = similar_videos;
let mut item = SimilarVideos::new();
@ -282,6 +285,7 @@ fn similar_videos(similar_videos: SimilarVideosArgs, stop_receiver: &Receiver<()
item.set_tolerance(tolerance);
item.set_delete_method(delete_method.delete_method);
item.set_dry_run(dry_run.dry_run);
item.set_ignore_hard_links(!allow_hard_links.allow_hard_links);
item.find_similar_videos(Some(stop_receiver), Some(progress_sender));

View file

@ -1,6 +1,8 @@
use std::collections::BTreeMap;
use std::fs;
use std::fs::{DirEntry, FileType, Metadata, ReadDir};
use std::fs::{DirEntry, FileType, Metadata};
#[cfg(target_family = "unix")]
use std::os::unix::fs::MetadataExt;
use std::path::{Path, PathBuf};
use std::sync::atomic::Ordering;
use std::time::UNIX_EPOCH;
@ -92,7 +94,7 @@ pub enum Collect {
Files,
}
#[derive(Eq, PartialEq, Copy, Clone)]
#[derive(Eq, PartialEq, Copy, Clone, Debug)]
enum EntryType {
File,
Dir,
@ -546,9 +548,17 @@ fn process_symlink_in_symlink_mode(
fe_result.push(fe);
}
pub fn common_read_dir(current_folder: &Path, warnings: &mut Vec<String>) -> Option<ReadDir> {
pub fn common_read_dir(current_folder: &Path, warnings: &mut Vec<String>) -> Option<Vec<Result<DirEntry, std::io::Error>>> {
match fs::read_dir(current_folder) {
Ok(t) => Some(t),
Ok(t) => {
// Make directory traversal order stable
let mut r: Vec<_> = t.collect();
r.sort_by_key(|d| match d {
Ok(f) => f.path(),
_ => PathBuf::new(),
});
Some(r)
}
Err(e) => {
warnings.push(flc!(
"core_cannot_open_dir",
@ -634,3 +644,195 @@ pub fn get_modified_time(metadata: &Metadata, warnings: &mut Vec<String>, curren
}
}
}
#[cfg(target_family = "windows")]
pub fn inode(_fe: &FileEntry) -> Option<u64> {
None
}
#[cfg(target_family = "unix")]
pub fn inode(fe: &FileEntry) -> Option<u64> {
if let Ok(meta) = fs::metadata(&fe.path) {
Some(meta.ino())
} else {
None
}
}
pub fn take_1_per_inode((k, mut v): (Option<u64>, Vec<FileEntry>)) -> Vec<FileEntry> {
if k.is_some() {
v.drain(1..);
}
v
}
#[cfg(test)]
mod tests {
use super::*;
use crate::common_tool::*;
use once_cell::sync::Lazy;
use std::fs;
use std::fs::File;
use std::io;
use std::io::prelude::*;
use std::time::{Duration, SystemTime};
use tempfile::TempDir;
impl CommonData for CommonToolData {
fn get_cd(&self) -> &CommonToolData {
self
}
fn get_cd_mut(&mut self) -> &mut CommonToolData {
self
}
}
static NOW: Lazy<SystemTime> = Lazy::new(|| SystemTime::UNIX_EPOCH + Duration::new(100, 0));
const CONTENT: &[u8; 1] = b"a";
fn create_files(dir: &TempDir) -> io::Result<(PathBuf, PathBuf, PathBuf)> {
let (src, hard, other) = (dir.path().join("a"), dir.path().join("b"), dir.path().join("c"));
let mut file = File::create(&src)?;
file.write_all(CONTENT)?;
fs::hard_link(&src, &hard)?;
file.set_modified(*NOW)?;
let mut file = File::create(&other)?;
file.write_all(CONTENT)?;
file.set_modified(*NOW)?;
Ok((src, hard, other))
}
#[test]
fn test_traversal() -> io::Result<()> {
let dir = tempfile::Builder::new().tempdir()?;
let (src, hard, other) = create_files(&dir)?;
let secs = NOW.duration_since(SystemTime::UNIX_EPOCH).unwrap().as_secs();
let mut common_data = CommonToolData::new(ToolType::SimilarImages);
common_data.directories.set_included_directory([dir.path().to_owned()].to_vec());
common_data.set_minimal_file_size(0);
match DirTraversalBuilder::new().group_by(|_fe| ()).common_data(&common_data).build().run() {
DirTraversalResult::SuccessFiles {
warnings: _,
grouped_file_entries,
} => {
let actual: Vec<_> = grouped_file_entries.into_values().flatten().collect();
assert_eq!(
[
FileEntry {
path: src,
size: 1,
modified_date: secs,
},
FileEntry {
path: hard,
size: 1,
modified_date: secs,
},
FileEntry {
path: other,
size: 1,
modified_date: secs,
},
]
.to_vec(),
actual
);
}
_ => {
panic!("Expect SuccessFiles.");
}
};
Ok(())
}
#[cfg(target_family = "unix")]
#[test]
fn test_traversal_group_by_inode() -> io::Result<()> {
let dir = tempfile::Builder::new().tempdir()?;
let (src, _, other) = create_files(&dir)?;
let secs = NOW.duration_since(SystemTime::UNIX_EPOCH).unwrap().as_secs();
let mut common_data = CommonToolData::new(ToolType::SimilarImages);
common_data.directories.set_included_directory([dir.path().to_owned()].to_vec());
common_data.set_minimal_file_size(0);
match DirTraversalBuilder::new().group_by(inode).common_data(&common_data).build().run() {
DirTraversalResult::SuccessFiles {
warnings: _,
grouped_file_entries,
} => {
let actual: Vec<_> = grouped_file_entries.into_iter().flat_map(take_1_per_inode).collect();
assert_eq!(
[
FileEntry {
path: src,
size: 1,
modified_date: secs,
},
FileEntry {
path: other,
size: 1,
modified_date: secs,
},
]
.to_vec(),
actual
);
}
_ => {
panic!("Expect SuccessFiles.");
}
};
Ok(())
}
#[cfg(target_family = "windows")]
#[test]
fn test_traversal_group_by_inode() -> io::Result<()> {
let dir = tempfile::Builder::new().tempdir()?;
let (src, hard, other) = create_files(&dir)?;
let secs = NOW.duration_since(SystemTime::UNIX_EPOCH).unwrap().as_secs();
let mut common_data = CommonToolData::new(ToolType::SimilarImages);
common_data.directories.set_included_directory([dir.path().to_owned()].to_vec());
common_data.set_minimal_file_size(0);
match DirTraversalBuilder::new().group_by(inode).common_data(&common_data).build().run() {
DirTraversalResult::SuccessFiles {
warnings: _,
grouped_file_entries,
} => {
let actual: Vec<_> = grouped_file_entries.into_iter().flat_map(take_1_per_inode).collect();
assert_eq!(
[
FileEntry {
path: src,
size: 1,
modified_date: secs,
},
FileEntry {
path: hard,
size: 1,
modified_date: secs,
},
FileEntry {
path: other,
size: 1,
modified_date: secs,
},
]
.to_vec(),
actual
);
}
_ => {
panic!("Expect SuccessFiles.");
}
};
Ok(())
}
}

View file

@ -23,7 +23,7 @@ use crate::common::{
HEIC_EXTENSIONS, IMAGE_RS_SIMILAR_IMAGES_EXTENSIONS, RAW_IMAGE_EXTENSIONS,
};
use crate::common_cache::{get_similar_images_cache_file, load_cache_from_file_generalized_by_path, save_cache_to_file_generalized};
use crate::common_dir_traversal::{CheckingMethod, DirTraversalBuilder, DirTraversalResult, FileEntry, ProgressData, ToolType};
use crate::common_dir_traversal::{inode, take_1_per_inode, CheckingMethod, DirTraversalBuilder, DirTraversalResult, FileEntry, ProgressData, ToolType};
use crate::common_tool::{CommonData, CommonToolData, DeleteMethod};
use crate::common_traits::{DebugPrint, PrintResults, ResultEntry};
use crate::flc;
@ -122,6 +122,7 @@ pub struct SimilarImages {
hash_alg: HashAlg,
image_filter: FilterType,
exclude_images_with_same_size: bool,
ignore_hard_links: bool,
}
#[derive(Default)]
@ -145,6 +146,7 @@ impl SimilarImages {
hash_alg: HashAlg::Gradient,
image_filter: FilterType::Lanczos3,
exclude_images_with_same_size: false,
ignore_hard_links: false,
}
}
@ -188,7 +190,7 @@ impl SimilarImages {
let heic_extensions = HEIC_EXTENSIONS.iter().collect::<HashSet<_>>();
let result = DirTraversalBuilder::new()
.group_by(|_fe| ())
.group_by(inode)
.stop_receiver(stop_receiver)
.progress_sender(progress_sender)
.common_data(&self.common_data)
@ -199,8 +201,8 @@ impl SimilarImages {
match result {
DirTraversalResult::SuccessFiles { grouped_file_entries, warnings } => {
self.images_to_check = grouped_file_entries
.into_values()
.flatten()
.into_iter()
.flat_map(if self.ignore_hard_links { |(_, fes)| fes } else { take_1_per_inode })
.map(|fe| {
let fe_str = fe.path.to_string_lossy().to_string();
let extension_lowercase = fe.path.extension().unwrap_or_default().to_string_lossy().to_lowercase();
@ -1090,6 +1092,10 @@ impl SimilarImages {
pub fn set_similarity(&mut self, similarity: u32) {
self.similarity = similarity;
}
pub fn set_ignore_hard_links(&mut self, ignore_hard_links: bool) {
self.ignore_hard_links = ignore_hard_links;
}
}
#[cfg(test)]

View file

@ -16,7 +16,7 @@ use vid_dup_finder_lib::{NormalizedTolerance, VideoHash};
use crate::common::{check_if_stop_received, delete_files_custom, prepare_thread_handler_common, send_info_and_wait_for_ending_all_threads, VIDEO_FILES_EXTENSIONS};
use crate::common_cache::{get_similar_videos_cache_file, load_cache_from_file_generalized_by_path, save_cache_to_file_generalized};
use crate::common_dir_traversal::{CheckingMethod, DirTraversalBuilder, DirTraversalResult, FileEntry, ProgressData, ToolType};
use crate::common_dir_traversal::{inode, take_1_per_inode, CheckingMethod, DirTraversalBuilder, DirTraversalResult, FileEntry, ProgressData, ToolType};
use crate::common_tool::{CommonData, CommonToolData, DeleteMethod};
use crate::common_traits::{DebugPrint, PrintResults, ResultEntry};
use crate::flc;
@ -83,6 +83,7 @@ pub struct SimilarVideos {
videos_to_check: BTreeMap<String, VideosEntry>,
tolerance: i32,
exclude_videos_with_same_size: bool,
ignore_hard_links: bool,
}
impl CommonData for SimilarVideos {
@ -111,6 +112,7 @@ impl SimilarVideos {
tolerance: 10,
exclude_videos_with_same_size: false,
similar_referenced_vectors: vec![],
ignore_hard_links: false,
}
}
@ -149,7 +151,7 @@ impl SimilarVideos {
}
let result = DirTraversalBuilder::new()
.group_by(|_fe| ())
.group_by(inode)
.stop_receiver(stop_receiver)
.progress_sender(progress_sender)
.common_data(&self.common_data)
@ -160,8 +162,8 @@ impl SimilarVideos {
match result {
DirTraversalResult::SuccessFiles { grouped_file_entries, warnings } => {
self.videos_to_check = grouped_file_entries
.into_values()
.flatten()
.into_iter()
.flat_map(if self.ignore_hard_links { |(_, fes)| fes } else { take_1_per_inode })
.map(|fe| (fe.path.to_string_lossy().to_string(), fe.into_videos_entry()))
.collect();
self.common_data.text_messages.warnings.extend(warnings);
@ -454,4 +456,8 @@ impl SimilarVideos {
pub fn get_use_reference(&self) -> bool {
self.common_data.use_reference_folders
}
pub fn set_ignore_hard_links(&mut self, ignore_hard_links: bool) {
self.ignore_hard_links = ignore_hard_links;
}
}