Support hard links for similar images and videos with -L
This ignores matches for files that have the same inode. This only works on Unix.
This commit is contained in:
parent
6cde5ab7a0
commit
183f333f64
|
@ -212,6 +212,8 @@ pub struct SimilarImagesArgs {
|
|||
#[clap(flatten)]
|
||||
pub delete_method: DMethod,
|
||||
#[clap(flatten)]
|
||||
pub allow_hard_links: AllowHardLinks,
|
||||
#[clap(flatten)]
|
||||
pub dry_run: DryRun,
|
||||
#[clap(
|
||||
short = 'g',
|
||||
|
@ -355,6 +357,8 @@ pub struct SimilarVideosArgs {
|
|||
#[clap(flatten)]
|
||||
pub delete_method: DMethod,
|
||||
#[clap(flatten)]
|
||||
pub allow_hard_links: AllowHardLinks,
|
||||
#[clap(flatten)]
|
||||
pub dry_run: DryRun,
|
||||
#[clap(
|
||||
short,
|
||||
|
|
|
@ -185,6 +185,7 @@ fn similar_images(similar_images: SimilarImagesArgs, stop_receiver: &Receiver<()
|
|||
hash_size,
|
||||
delete_method,
|
||||
dry_run,
|
||||
allow_hard_links,
|
||||
} = similar_images;
|
||||
|
||||
let mut item = SimilarImages::new();
|
||||
|
@ -198,6 +199,7 @@ fn similar_images(similar_images: SimilarImagesArgs, stop_receiver: &Receiver<()
|
|||
item.set_delete_method(delete_method.delete_method);
|
||||
item.set_dry_run(dry_run.dry_run);
|
||||
item.set_similarity(return_similarity_from_similarity_preset(&similarity_preset, hash_size));
|
||||
item.set_ignore_hard_links(!allow_hard_links.allow_hard_links);
|
||||
|
||||
item.find_similar_images(Some(stop_receiver), Some(progress_sender));
|
||||
|
||||
|
@ -272,6 +274,7 @@ fn similar_videos(similar_videos: SimilarVideosArgs, stop_receiver: &Receiver<()
|
|||
maximal_file_size,
|
||||
delete_method,
|
||||
dry_run,
|
||||
allow_hard_links,
|
||||
} = similar_videos;
|
||||
|
||||
let mut item = SimilarVideos::new();
|
||||
|
@ -282,6 +285,7 @@ fn similar_videos(similar_videos: SimilarVideosArgs, stop_receiver: &Receiver<()
|
|||
item.set_tolerance(tolerance);
|
||||
item.set_delete_method(delete_method.delete_method);
|
||||
item.set_dry_run(dry_run.dry_run);
|
||||
item.set_ignore_hard_links(!allow_hard_links.allow_hard_links);
|
||||
|
||||
item.find_similar_videos(Some(stop_receiver), Some(progress_sender));
|
||||
|
||||
|
|
|
@ -1,6 +1,8 @@
|
|||
use std::collections::BTreeMap;
|
||||
use std::fs;
|
||||
use std::fs::{DirEntry, FileType, Metadata, ReadDir};
|
||||
use std::fs::{DirEntry, FileType, Metadata};
|
||||
#[cfg(target_family = "unix")]
|
||||
use std::os::unix::fs::MetadataExt;
|
||||
use std::path::{Path, PathBuf};
|
||||
use std::sync::atomic::Ordering;
|
||||
use std::time::UNIX_EPOCH;
|
||||
|
@ -92,7 +94,7 @@ pub enum Collect {
|
|||
Files,
|
||||
}
|
||||
|
||||
#[derive(Eq, PartialEq, Copy, Clone)]
|
||||
#[derive(Eq, PartialEq, Copy, Clone, Debug)]
|
||||
enum EntryType {
|
||||
File,
|
||||
Dir,
|
||||
|
@ -546,9 +548,17 @@ fn process_symlink_in_symlink_mode(
|
|||
fe_result.push(fe);
|
||||
}
|
||||
|
||||
pub fn common_read_dir(current_folder: &Path, warnings: &mut Vec<String>) -> Option<ReadDir> {
|
||||
pub fn common_read_dir(current_folder: &Path, warnings: &mut Vec<String>) -> Option<Vec<Result<DirEntry, std::io::Error>>> {
|
||||
match fs::read_dir(current_folder) {
|
||||
Ok(t) => Some(t),
|
||||
Ok(t) => {
|
||||
// Make directory traversal order stable
|
||||
let mut r: Vec<_> = t.collect();
|
||||
r.sort_by_key(|d| match d {
|
||||
Ok(f) => f.path(),
|
||||
_ => PathBuf::new(),
|
||||
});
|
||||
Some(r)
|
||||
}
|
||||
Err(e) => {
|
||||
warnings.push(flc!(
|
||||
"core_cannot_open_dir",
|
||||
|
@ -634,3 +644,195 @@ pub fn get_modified_time(metadata: &Metadata, warnings: &mut Vec<String>, curren
|
|||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(target_family = "windows")]
|
||||
pub fn inode(_fe: &FileEntry) -> Option<u64> {
|
||||
None
|
||||
}
|
||||
|
||||
#[cfg(target_family = "unix")]
|
||||
pub fn inode(fe: &FileEntry) -> Option<u64> {
|
||||
if let Ok(meta) = fs::metadata(&fe.path) {
|
||||
Some(meta.ino())
|
||||
} else {
|
||||
None
|
||||
}
|
||||
}
|
||||
|
||||
pub fn take_1_per_inode((k, mut v): (Option<u64>, Vec<FileEntry>)) -> Vec<FileEntry> {
|
||||
if k.is_some() {
|
||||
v.drain(1..);
|
||||
}
|
||||
v
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
use crate::common_tool::*;
|
||||
use once_cell::sync::Lazy;
|
||||
use std::fs;
|
||||
use std::fs::File;
|
||||
use std::io;
|
||||
use std::io::prelude::*;
|
||||
use std::time::{Duration, SystemTime};
|
||||
use tempfile::TempDir;
|
||||
|
||||
impl CommonData for CommonToolData {
|
||||
fn get_cd(&self) -> &CommonToolData {
|
||||
self
|
||||
}
|
||||
fn get_cd_mut(&mut self) -> &mut CommonToolData {
|
||||
self
|
||||
}
|
||||
}
|
||||
|
||||
static NOW: Lazy<SystemTime> = Lazy::new(|| SystemTime::UNIX_EPOCH + Duration::new(100, 0));
|
||||
const CONTENT: &[u8; 1] = b"a";
|
||||
|
||||
fn create_files(dir: &TempDir) -> io::Result<(PathBuf, PathBuf, PathBuf)> {
|
||||
let (src, hard, other) = (dir.path().join("a"), dir.path().join("b"), dir.path().join("c"));
|
||||
|
||||
let mut file = File::create(&src)?;
|
||||
file.write_all(CONTENT)?;
|
||||
fs::hard_link(&src, &hard)?;
|
||||
file.set_modified(*NOW)?;
|
||||
|
||||
let mut file = File::create(&other)?;
|
||||
file.write_all(CONTENT)?;
|
||||
file.set_modified(*NOW)?;
|
||||
Ok((src, hard, other))
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_traversal() -> io::Result<()> {
|
||||
let dir = tempfile::Builder::new().tempdir()?;
|
||||
let (src, hard, other) = create_files(&dir)?;
|
||||
let secs = NOW.duration_since(SystemTime::UNIX_EPOCH).unwrap().as_secs();
|
||||
|
||||
let mut common_data = CommonToolData::new(ToolType::SimilarImages);
|
||||
common_data.directories.set_included_directory([dir.path().to_owned()].to_vec());
|
||||
common_data.set_minimal_file_size(0);
|
||||
|
||||
match DirTraversalBuilder::new().group_by(|_fe| ()).common_data(&common_data).build().run() {
|
||||
DirTraversalResult::SuccessFiles {
|
||||
warnings: _,
|
||||
grouped_file_entries,
|
||||
} => {
|
||||
let actual: Vec<_> = grouped_file_entries.into_values().flatten().collect();
|
||||
assert_eq!(
|
||||
[
|
||||
FileEntry {
|
||||
path: src,
|
||||
size: 1,
|
||||
modified_date: secs,
|
||||
},
|
||||
FileEntry {
|
||||
path: hard,
|
||||
size: 1,
|
||||
modified_date: secs,
|
||||
},
|
||||
FileEntry {
|
||||
path: other,
|
||||
size: 1,
|
||||
modified_date: secs,
|
||||
},
|
||||
]
|
||||
.to_vec(),
|
||||
actual
|
||||
);
|
||||
}
|
||||
_ => {
|
||||
panic!("Expect SuccessFiles.");
|
||||
}
|
||||
};
|
||||
Ok(())
|
||||
}
|
||||
|
||||
#[cfg(target_family = "unix")]
|
||||
#[test]
|
||||
fn test_traversal_group_by_inode() -> io::Result<()> {
|
||||
let dir = tempfile::Builder::new().tempdir()?;
|
||||
let (src, _, other) = create_files(&dir)?;
|
||||
let secs = NOW.duration_since(SystemTime::UNIX_EPOCH).unwrap().as_secs();
|
||||
|
||||
let mut common_data = CommonToolData::new(ToolType::SimilarImages);
|
||||
common_data.directories.set_included_directory([dir.path().to_owned()].to_vec());
|
||||
common_data.set_minimal_file_size(0);
|
||||
|
||||
match DirTraversalBuilder::new().group_by(inode).common_data(&common_data).build().run() {
|
||||
DirTraversalResult::SuccessFiles {
|
||||
warnings: _,
|
||||
grouped_file_entries,
|
||||
} => {
|
||||
let actual: Vec<_> = grouped_file_entries.into_iter().flat_map(take_1_per_inode).collect();
|
||||
assert_eq!(
|
||||
[
|
||||
FileEntry {
|
||||
path: src,
|
||||
size: 1,
|
||||
modified_date: secs,
|
||||
},
|
||||
FileEntry {
|
||||
path: other,
|
||||
size: 1,
|
||||
modified_date: secs,
|
||||
},
|
||||
]
|
||||
.to_vec(),
|
||||
actual
|
||||
);
|
||||
}
|
||||
_ => {
|
||||
panic!("Expect SuccessFiles.");
|
||||
}
|
||||
};
|
||||
Ok(())
|
||||
}
|
||||
|
||||
#[cfg(target_family = "windows")]
|
||||
#[test]
|
||||
fn test_traversal_group_by_inode() -> io::Result<()> {
|
||||
let dir = tempfile::Builder::new().tempdir()?;
|
||||
let (src, hard, other) = create_files(&dir)?;
|
||||
let secs = NOW.duration_since(SystemTime::UNIX_EPOCH).unwrap().as_secs();
|
||||
|
||||
let mut common_data = CommonToolData::new(ToolType::SimilarImages);
|
||||
common_data.directories.set_included_directory([dir.path().to_owned()].to_vec());
|
||||
common_data.set_minimal_file_size(0);
|
||||
|
||||
match DirTraversalBuilder::new().group_by(inode).common_data(&common_data).build().run() {
|
||||
DirTraversalResult::SuccessFiles {
|
||||
warnings: _,
|
||||
grouped_file_entries,
|
||||
} => {
|
||||
let actual: Vec<_> = grouped_file_entries.into_iter().flat_map(take_1_per_inode).collect();
|
||||
assert_eq!(
|
||||
[
|
||||
FileEntry {
|
||||
path: src,
|
||||
size: 1,
|
||||
modified_date: secs,
|
||||
},
|
||||
FileEntry {
|
||||
path: hard,
|
||||
size: 1,
|
||||
modified_date: secs,
|
||||
},
|
||||
FileEntry {
|
||||
path: other,
|
||||
size: 1,
|
||||
modified_date: secs,
|
||||
},
|
||||
]
|
||||
.to_vec(),
|
||||
actual
|
||||
);
|
||||
}
|
||||
_ => {
|
||||
panic!("Expect SuccessFiles.");
|
||||
}
|
||||
};
|
||||
Ok(())
|
||||
}
|
||||
}
|
||||
|
|
|
@ -23,7 +23,7 @@ use crate::common::{
|
|||
HEIC_EXTENSIONS, IMAGE_RS_SIMILAR_IMAGES_EXTENSIONS, RAW_IMAGE_EXTENSIONS,
|
||||
};
|
||||
use crate::common_cache::{get_similar_images_cache_file, load_cache_from_file_generalized_by_path, save_cache_to_file_generalized};
|
||||
use crate::common_dir_traversal::{CheckingMethod, DirTraversalBuilder, DirTraversalResult, FileEntry, ProgressData, ToolType};
|
||||
use crate::common_dir_traversal::{inode, take_1_per_inode, CheckingMethod, DirTraversalBuilder, DirTraversalResult, FileEntry, ProgressData, ToolType};
|
||||
use crate::common_tool::{CommonData, CommonToolData, DeleteMethod};
|
||||
use crate::common_traits::{DebugPrint, PrintResults, ResultEntry};
|
||||
use crate::flc;
|
||||
|
@ -122,6 +122,7 @@ pub struct SimilarImages {
|
|||
hash_alg: HashAlg,
|
||||
image_filter: FilterType,
|
||||
exclude_images_with_same_size: bool,
|
||||
ignore_hard_links: bool,
|
||||
}
|
||||
|
||||
#[derive(Default)]
|
||||
|
@ -145,6 +146,7 @@ impl SimilarImages {
|
|||
hash_alg: HashAlg::Gradient,
|
||||
image_filter: FilterType::Lanczos3,
|
||||
exclude_images_with_same_size: false,
|
||||
ignore_hard_links: false,
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -188,7 +190,7 @@ impl SimilarImages {
|
|||
let heic_extensions = HEIC_EXTENSIONS.iter().collect::<HashSet<_>>();
|
||||
|
||||
let result = DirTraversalBuilder::new()
|
||||
.group_by(|_fe| ())
|
||||
.group_by(inode)
|
||||
.stop_receiver(stop_receiver)
|
||||
.progress_sender(progress_sender)
|
||||
.common_data(&self.common_data)
|
||||
|
@ -199,8 +201,8 @@ impl SimilarImages {
|
|||
match result {
|
||||
DirTraversalResult::SuccessFiles { grouped_file_entries, warnings } => {
|
||||
self.images_to_check = grouped_file_entries
|
||||
.into_values()
|
||||
.flatten()
|
||||
.into_iter()
|
||||
.flat_map(if self.ignore_hard_links { |(_, fes)| fes } else { take_1_per_inode })
|
||||
.map(|fe| {
|
||||
let fe_str = fe.path.to_string_lossy().to_string();
|
||||
let extension_lowercase = fe.path.extension().unwrap_or_default().to_string_lossy().to_lowercase();
|
||||
|
@ -1090,6 +1092,10 @@ impl SimilarImages {
|
|||
pub fn set_similarity(&mut self, similarity: u32) {
|
||||
self.similarity = similarity;
|
||||
}
|
||||
|
||||
pub fn set_ignore_hard_links(&mut self, ignore_hard_links: bool) {
|
||||
self.ignore_hard_links = ignore_hard_links;
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
|
|
|
@ -16,7 +16,7 @@ use vid_dup_finder_lib::{NormalizedTolerance, VideoHash};
|
|||
|
||||
use crate::common::{check_if_stop_received, delete_files_custom, prepare_thread_handler_common, send_info_and_wait_for_ending_all_threads, VIDEO_FILES_EXTENSIONS};
|
||||
use crate::common_cache::{get_similar_videos_cache_file, load_cache_from_file_generalized_by_path, save_cache_to_file_generalized};
|
||||
use crate::common_dir_traversal::{CheckingMethod, DirTraversalBuilder, DirTraversalResult, FileEntry, ProgressData, ToolType};
|
||||
use crate::common_dir_traversal::{inode, take_1_per_inode, CheckingMethod, DirTraversalBuilder, DirTraversalResult, FileEntry, ProgressData, ToolType};
|
||||
use crate::common_tool::{CommonData, CommonToolData, DeleteMethod};
|
||||
use crate::common_traits::{DebugPrint, PrintResults, ResultEntry};
|
||||
use crate::flc;
|
||||
|
@ -83,6 +83,7 @@ pub struct SimilarVideos {
|
|||
videos_to_check: BTreeMap<String, VideosEntry>,
|
||||
tolerance: i32,
|
||||
exclude_videos_with_same_size: bool,
|
||||
ignore_hard_links: bool,
|
||||
}
|
||||
|
||||
impl CommonData for SimilarVideos {
|
||||
|
@ -111,6 +112,7 @@ impl SimilarVideos {
|
|||
tolerance: 10,
|
||||
exclude_videos_with_same_size: false,
|
||||
similar_referenced_vectors: vec![],
|
||||
ignore_hard_links: false,
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -149,7 +151,7 @@ impl SimilarVideos {
|
|||
}
|
||||
|
||||
let result = DirTraversalBuilder::new()
|
||||
.group_by(|_fe| ())
|
||||
.group_by(inode)
|
||||
.stop_receiver(stop_receiver)
|
||||
.progress_sender(progress_sender)
|
||||
.common_data(&self.common_data)
|
||||
|
@ -160,8 +162,8 @@ impl SimilarVideos {
|
|||
match result {
|
||||
DirTraversalResult::SuccessFiles { grouped_file_entries, warnings } => {
|
||||
self.videos_to_check = grouped_file_entries
|
||||
.into_values()
|
||||
.flatten()
|
||||
.into_iter()
|
||||
.flat_map(if self.ignore_hard_links { |(_, fes)| fes } else { take_1_per_inode })
|
||||
.map(|fe| (fe.path.to_string_lossy().to_string(), fe.into_videos_entry()))
|
||||
.collect();
|
||||
self.common_data.text_messages.warnings.extend(warnings);
|
||||
|
@ -454,4 +456,8 @@ impl SimilarVideos {
|
|||
pub fn get_use_reference(&self) -> bool {
|
||||
self.common_data.use_reference_folders
|
||||
}
|
||||
|
||||
pub fn set_ignore_hard_links(&mut self, ignore_hard_links: bool) {
|
||||
self.ignore_hard_links = ignore_hard_links;
|
||||
}
|
||||
}
|
||||
|
|
Loading…
Reference in a new issue