diff --git a/.github/workflows/linux_cli.yml b/.github/workflows/linux_cli.yml index 6db43c6..70f3c9b 100644 --- a/.github/workflows/linux_cli.yml +++ b/.github/workflows/linux_cli.yml @@ -19,7 +19,7 @@ jobs: - uses: actions/checkout@v3 - name: Install basic libraries - run: sudo apt-get update; sudo apt install libheif-dev -y + run: sudo apt-get update; sudo apt install libheif-dev ffmpeg -y - name: Setup rust version run: rustup default ${{ matrix.toolchain }} diff --git a/ci_tester/src/main.rs b/ci_tester/src/main.rs index 7efe1a4..dc05e66 100644 --- a/ci_tester/src/main.rs +++ b/ci_tester/src/main.rs @@ -15,6 +15,7 @@ static CZKAWKA_PATH: state::InitCell = state::InitCell::new(); static COLLECTED_FILES: state::InitCell = state::InitCell::new(); const ATTEMPTS: u32 = 10; +const PRINT_MESSAGES_CZKAWKA: bool = true; // App runs - ./ci_tester PATH_TO_CZKAWKA fn main() { @@ -41,10 +42,113 @@ fn main() { test_remove_duplicates_one_newest(); test_remove_duplicates_all_expect_newest(); test_remove_duplicates_all_expect_oldest(); + test_remove_same_music_tags_one_oldest(); + test_remove_same_music_tags_one_newest(); + test_remove_same_music_tags_all_expect_oldest(); + test_remove_same_music_tags_all_expect_newest(); + test_remove_same_music_content_one_oldest(); + test_remove_same_music_content_all_expect_oldest(); + test_remove_same_music_content_one_newest(); + test_remove_same_music_content_all_expect_newest(); + test_remove_videos_one_oldest(); + test_remove_videos_one_newest(); + test_remove_videos_all_expect_oldest(); + test_remove_videos_all_expect_newest(); } println!("Completed checking"); } +fn test_remove_videos_one_oldest() { + info!("test_remove_videos_one_oldest"); + run_test(&["video", "-d", "TestFiles", "-D", "OO"], vec!["Videos/V3.webm"], vec![], vec![]); +} +fn test_remove_videos_one_newest() { + info!("test_remove_videos_one_newest"); + run_test(&["video", "-d", "TestFiles", "-D", "ON"], vec!["Videos/V5.mp4"], vec![], vec![]); +} +fn test_remove_videos_all_expect_oldest() { + info!("test_remove_videos_all_expect_oldest"); + run_test( + &["video", "-d", "TestFiles", "-D", "AEO"], + vec!["Videos/V1.mp4", "Videos/V2.mp4", "Videos/V5.mp4"], + vec![], + vec![], + ); +} +fn test_remove_videos_all_expect_newest() { + info!("test_remove_videos_all_expect_newest"); + run_test( + &["video", "-d", "TestFiles", "-D", "AEN"], + vec!["Videos/V1.mp4", "Videos/V2.mp4", "Videos/V3.webm"], + vec![], + vec![], + ); +} + +fn test_remove_same_music_content_one_newest() { + info!("test_remove_same_music_content_one_newest"); + run_test( + &["music", "-d", "TestFiles", "-s", "CONTENT", "-l", "2.0", "-D", "ON"], + vec!["Music/M2.mp3"], + vec![], + vec![], + ); +} +fn test_remove_same_music_content_all_expect_newest() { + info!("test_remove_same_music_content_all_expect_newest"); + run_test( + &["music", "-d", "TestFiles", "-s", "CONTENT", "-l", "2.0", "-D", "AEN"], + vec!["Music/M1.mp3", "Music/M3.flac", "Music/M5.mp3"], + vec![], + vec![], + ); +} + +fn test_remove_same_music_content_all_expect_oldest() { + info!("test_remove_same_music_content_all_expect_oldest"); + run_test( + &["music", "-d", "TestFiles", "-s", "CONTENT", "-l", "2.0", "-D", "AEO"], + vec!["Music/M1.mp3", "Music/M2.mp3", "Music/M3.flac"], + vec![], + vec![], + ); +} + +fn test_remove_same_music_content_one_oldest() { + info!("test_remove_same_music_content_one_oldest"); + run_test( + &["music", "-d", "TestFiles", "-s", "CONTENT", "-l", "2.0", "-D", "OO"], + vec!["Music/M5.mp3"], + vec![], + vec![], + ); +} +fn test_remove_same_music_tags_one_oldest() { + info!("test_remove_same_music_one_oldest"); + run_test(&["music", "-d", "TestFiles", "-D", "OO"], vec!["Music/M5.mp3"], vec![], vec![]); +} +fn test_remove_same_music_tags_one_newest() { + info!("test_remove_same_music_one_newest"); + run_test(&["music", "-d", "TestFiles", "-D", "ON"], vec!["Music/M2.mp3"], vec![], vec![]); +} +fn test_remove_same_music_tags_all_expect_oldest() { + info!("test_remove_same_music_all_expect_oldest"); + run_test( + &["music", "-d", "TestFiles", "-D", "AEO"], + vec!["Music/M1.mp3", "Music/M2.mp3", "Music/M3.flac"], + vec![], + vec![], + ); +} +fn test_remove_same_music_tags_all_expect_newest() { + info!("test_remove_same_music_all_expect_newest"); + run_test( + &["music", "-d", "TestFiles", "-D", "AEN"], + vec!["Music/M1.mp3", "Music/M3.flac", "Music/M5.mp3"], + vec![], + vec![], + ); +} fn test_remove_duplicates_all_expect_oldest() { info!("test_remove_duplicates_all_expect_oldest"); run_test( @@ -138,7 +242,7 @@ fn run_test(arguments: &[&str], expected_files_differences: Vec<&'static str>, e let mut all_arguments = vec![]; all_arguments.push(CZKAWKA_PATH.get().as_str()); all_arguments.extend_from_slice(arguments); - run_with_good_status(&all_arguments, true); + run_with_good_status(&all_arguments, PRINT_MESSAGES_CZKAWKA); file_folder_diffs( COLLECTED_FILES.get(), expected_files_differences, diff --git a/czkawka_cli/src/commands.rs b/czkawka_cli/src/commands.rs index ae633d5..a884122 100644 --- a/czkawka_cli/src/commands.rs +++ b/czkawka_cli/src/commands.rs @@ -127,20 +127,13 @@ pub struct DuplicatesArgs { short, long, default_value = "HASH", - value_parser = parse_checking_method, + value_parser = parse_checking_method_duplicate, help = "Search method (NAME, SIZE, HASH)", long_help = "Methods to search files.\nNAME - Fast but but rarely usable,\nSIZE - Fast but not accurate, checking by the file's size,\nHASH - The slowest method, checking by the hash of the entire file" )] pub search_method: CheckingMethod, - #[clap( - short = 'D', - long, - default_value = "NONE", - value_parser = parse_delete_method, - help = "Delete method (AEN, AEO, ON, OO, HARD)", - long_help = "Methods to delete the files.\nAEN - All files except the newest,\nAEO - All files except the oldest,\nON - Only 1 file, the newest,\nOO - Only 1 file, the oldest\nHARD - create hard link\nNONE - not delete files" - )] - pub delete_method: DeleteMethod, + #[clap(flatten)] + pub delete_method: DMethod, #[clap( short = 't', long, @@ -165,7 +158,7 @@ pub struct DuplicatesArgs { #[clap(flatten)] pub allow_hard_links: AllowHardLinks, #[clap(flatten)] - pub dryrun: DryRun, + pub dry_run: DryRun, } #[derive(Debug, clap::Args)] @@ -314,6 +307,10 @@ pub struct SimilarImagesArgs { #[clap(flatten)] pub file_to_save: FileToSave, #[clap(flatten)] + pub delete_method: DMethod, + #[clap(flatten)] + pub dry_run: DryRun, + #[clap(flatten)] pub json_compact_file_to_save: JsonCompactFileToSave, #[clap(flatten)] pub json_pretty_file_to_save: JsonPrettyFileToSave, @@ -358,8 +355,10 @@ pub struct SameMusicArgs { pub excluded_directories: ExcludedDirectories, #[clap(flatten)] pub excluded_items: ExcludedItems, - // #[clap(short = 'D', long, help = "Delete found files")] - // delete_files: bool, TODO + #[clap(flatten)] + pub delete_method: DMethod, + #[clap(flatten)] + pub dry_run: DryRun, #[clap( short = 'z', long, @@ -369,6 +368,15 @@ pub struct SameMusicArgs { long_help = "Sets which rows must be equal to set this files as duplicates(may be mixed, but must be divided by commas)." )] pub music_similarity: MusicSimilarity, + #[clap( + short, + long, + default_value = "TAGS", + value_parser = parse_checking_method_same_music, + help = "Search method (CONTENT, TAGS)", + long_help = "Methods to search files.\nCONTENT - finds similar audio files by content, TAGS - finds similar images by tags, needs to set" + )] + pub search_method: CheckingMethod, #[clap(flatten)] pub file_to_save: FileToSave, #[clap(flatten)] @@ -398,6 +406,53 @@ pub struct SameMusicArgs { long_help = "Maximum size of checked files in bytes, assigning lower value may speed up searching" )] pub maximal_file_size: u64, + #[clap( + short = 'l', + long, + value_parser = parse_minimum_segment_duration, + default_value = "10.0", + help = "Maximum size in bytes", + long_help = "Minimum segment duration, smaller value will finds also shorter similar segments, which may increase false positives number" + )] + pub minimum_segment_duration: f32, + #[clap( + short = 'd', + long, + value_parser = parse_maximum_difference, + default_value = "2.0", + help = "Maximum difference between segments", + long_help = "Maximum difference between segments, 0.0 will find only identical segments, 10.0 will find also segments which are almost not similar at all" + )] + pub maximum_difference: f64, +} + +fn parse_maximum_difference(src: &str) -> Result { + match src.parse::() { + Ok(maximum_difference) => { + if maximum_difference <= 0.0 { + Err("Maximum difference must be bigger than 0".to_string()) + } else if maximum_difference >= 10.0 { + Err("Maximum difference must be smaller than 10.0".to_string()) + } else { + Ok(maximum_difference) + } + } + Err(e) => Err(e.to_string()), + } +} +fn parse_minimum_segment_duration(src: &str) -> Result { + match src.parse::() { + Ok(minimum_segment_duration) => { + if minimum_segment_duration <= 0.0 { + Err("Minimum segment duration must be bigger than 0".to_string()) + } else if minimum_segment_duration >= 3600.0 { + Err("Minimum segment duration must be smaller than 3600(greater values not have much sense)".to_string()) + } else { + Ok(minimum_segment_duration) + } + } + Err(e) => Err(e.to_string()), + } } #[derive(Debug, clap::Args)] @@ -464,8 +519,10 @@ pub struct SimilarVideosArgs { pub excluded_directories: ExcludedDirectories, #[clap(flatten)] pub excluded_items: ExcludedItems, - // #[clap(short = 'D', long, help = "Delete found files")] - // delete_files: bool, TODO + #[clap(flatten)] + pub delete_method: DMethod, + #[clap(flatten)] + pub dry_run: DryRun, #[clap(flatten)] pub file_to_save: FileToSave, #[clap(flatten)] @@ -533,6 +590,19 @@ pub struct BadExtensionsArgs { pub exclude_other_filesystems: ExcludeOtherFilesystems, } +#[derive(Debug, clap::Args)] +pub struct DMethod { + #[clap( + short = 'D', + long, + default_value = "NONE", + value_parser = parse_delete_method, + help = "Delete method (AEN, AEO, ON, OO, HARD)", + long_help = "Methods to delete the files.\nAEN - All files except the newest,\nAEO - All files except the oldest,\nON - Only 1 file, the newest,\nOO - Only 1 file, the oldest\nHARD - create hard link\nNONE - not delete files" + )] + pub delete_method: DeleteMethod, +} + #[derive(Debug, clap::Args)] pub struct Directories { #[clap( @@ -630,7 +700,7 @@ pub struct CaseSensitiveNameComparison { #[derive(Debug, clap::Args)] pub struct DryRun { #[clap(long, help = "Do nothing and print the operation that would happen.")] - pub dryrun: bool, + pub dry_run: bool, } impl FileToSave { @@ -683,7 +753,7 @@ fn parse_tolerance(src: &str) -> Result { } } -fn parse_checking_method(src: &str) -> Result { +fn parse_checking_method_duplicate(src: &str) -> Result { match src.to_ascii_lowercase().as_str() { "name" => Ok(CheckingMethod::Name), "size" => Ok(CheckingMethod::Size), @@ -693,6 +763,14 @@ fn parse_checking_method(src: &str) -> Result { } } +fn parse_checking_method_same_music(src: &str) -> Result { + match src.to_ascii_lowercase().as_str() { + "tags" => Ok(CheckingMethod::AudioTags), + "content" => Ok(CheckingMethod::AudioContent), + _ => Err("Couldn't parse the searc method (allowed: TAGS, CONTENT)"), + } +} + fn parse_delete_method(src: &str) -> Result { match src.to_ascii_lowercase().as_str() { "none" => Ok(DeleteMethod::None), @@ -773,7 +851,7 @@ fn parse_image_hash_size(src: &str) -> Result { } fn parse_music_duplicate_type(src: &str) -> Result { - if src.is_empty() { + if src.trim().is_empty() { return Ok(MusicSimilarity::NONE); } @@ -781,22 +859,22 @@ fn parse_music_duplicate_type(src: &str) -> Result { let parts: Vec = src.split(',').map(|e| e.to_lowercase().replace('_', "")).collect(); - if parts.iter().any(|e| e.contains("tracktitle")) { + if parts.contains(&"tracktitle".into()) { similarity |= MusicSimilarity::TRACK_TITLE; } - if parts.iter().any(|e| e.contains("trackartist")) { + if parts.contains(&"trackartist".into()) { similarity |= MusicSimilarity::TRACK_ARTIST; } - if parts.iter().any(|e| e.contains("year")) { + if parts.contains(&"year".into()) { similarity |= MusicSimilarity::YEAR; } - if parts.iter().any(|e| e.contains("bitrate")) { + if parts.contains(&"bitrate".into()) { similarity |= MusicSimilarity::BITRATE; } - if parts.iter().any(|e| e.contains("genre")) { + if parts.contains(&"genre".into()) { similarity |= MusicSimilarity::GENRE; } - if parts.iter().any(|e| e.contains("length")) { + if parts.contains(&"length".into()) { similarity |= MusicSimilarity::LENGTH; } diff --git a/czkawka_cli/src/main.rs b/czkawka_cli/src/main.rs index 29c6228..5589559 100644 --- a/czkawka_cli/src/main.rs +++ b/czkawka_cli/src/main.rs @@ -75,7 +75,7 @@ fn duplicates(duplicates: DuplicatesArgs) { #[cfg(target_family = "unix")] exclude_other_filesystems, allow_hard_links, - dryrun, + dry_run, case_sensitive_name_comparison, } = duplicates; @@ -91,13 +91,13 @@ fn duplicates(duplicates: DuplicatesArgs) { item.set_minimal_cache_file_size(minimal_cached_file_size); item.set_allowed_extensions(allowed_extensions.allowed_extensions.join(",")); item.set_check_method(search_method); - item.set_delete_method(delete_method); + item.set_delete_method(delete_method.delete_method); item.set_hash_type(hash_type); item.set_recursive_search(!not_recursive.not_recursive); #[cfg(target_family = "unix")] item.set_exclude_other_filesystems(exclude_other_filesystems.exclude_other_filesystems); item.set_ignore_hard_links(!allow_hard_links.allow_hard_links); - item.set_dryrun(dryrun.dryrun); + item.set_dry_run(dry_run.dry_run); item.set_case_sensitive_name_comparison(case_sensitive_name_comparison.case_sensitive_name_comparison); item.find_duplicates(None, None); @@ -131,7 +131,9 @@ fn empty_folders(empty_folders: EmptyFoldersArgs) { item.set_included_directory(directories.directories); item.set_excluded_directory(excluded_directories.excluded_directories); item.set_excluded_items(excluded_items.excluded_items); - item.set_delete_folder(delete_folders); + if delete_folders { + item.set_delete_method(DeleteMethod::Delete); + } #[cfg(target_family = "unix")] item.set_exclude_other_filesystems(exclude_other_filesystems.exclude_other_filesystems); @@ -292,6 +294,8 @@ fn similar_images(similar_images: SimilarImagesArgs) { hash_alg, image_filter, hash_size, + delete_method, + dry_run, } = similar_images; set_number_of_threads(thread_number.thread_number); @@ -309,6 +313,8 @@ fn similar_images(similar_images: SimilarImagesArgs) { item.set_image_filter(image_filter); item.set_hash_alg(hash_alg); item.set_hash_size(hash_size); + item.set_delete_method(delete_method.delete_method); + item.set_dry_run(dry_run.dry_run); item.set_similarity(return_similarity_from_similarity_preset(&similarity_preset, hash_size)); @@ -328,7 +334,7 @@ fn same_music(same_music: SameMusicArgs) { directories, excluded_directories, excluded_items, - // delete_files, + delete_method, file_to_save, json_compact_file_to_save, json_pretty_file_to_save, @@ -338,6 +344,10 @@ fn same_music(same_music: SameMusicArgs) { minimal_file_size, maximal_file_size, music_similarity, + dry_run, + minimum_segment_duration, + maximum_difference, + search_method, } = same_music; set_number_of_threads(thread_number.thread_number); @@ -353,10 +363,11 @@ fn same_music(same_music: SameMusicArgs) { #[cfg(target_family = "unix")] item.set_exclude_other_filesystems(exclude_other_filesystems.exclude_other_filesystems); item.set_music_similarity(music_similarity); - - // if delete_files { - // // TODO item.set_delete_method(same_music::DeleteMethod::Delete); - // } + item.set_delete_method(delete_method.delete_method); + item.set_dry_run(dry_run.dry_run); + item.set_minimum_segment_duration(minimum_segment_duration); + item.set_maximum_difference(maximum_difference); + item.set_check_type(search_method); item.find_same_music(None, None); @@ -467,6 +478,8 @@ fn similar_videos(similar_videos: SimilarVideosArgs) { minimal_file_size, maximal_file_size, allowed_extensions, + delete_method, + dry_run, } = similar_videos; set_number_of_threads(thread_number.thread_number); @@ -483,6 +496,8 @@ fn similar_videos(similar_videos: SimilarVideosArgs) { item.set_minimal_file_size(minimal_file_size); item.set_maximal_file_size(maximal_file_size); item.set_tolerance(tolerance); + item.set_delete_method(delete_method.delete_method); + item.set_dry_run(dry_run.dry_run); item.find_similar_videos(None, None); diff --git a/czkawka_core/src/common.rs b/czkawka_core/src/common.rs index 3a74397..0580e8c 100644 --- a/czkawka_core/src/common.rs +++ b/czkawka_core/src/common.rs @@ -25,7 +25,10 @@ use log::{info, LevelFilter, Record}; use crate::common_dir_traversal::{CheckingMethod, ProgressData, ToolType}; use crate::common_directory::Directories; use crate::common_items::ExcludedItems; +use crate::common_messages::Messages; +use crate::common_tool::DeleteMethod; use crate::common_traits::ResultEntry; +use crate::duplicate::make_hard_link; use crate::CZKAWKA_VERSION; static NUMBER_OF_THREADS: state::InitCell = state::InitCell::new(); @@ -234,35 +237,6 @@ pub fn create_crash_message(library_name: &str, file_path: &str, home_library_ur } impl Common { - pub fn delete_multiple_entries(entries: &[String]) -> Vec { - let mut path: &Path; - let mut warnings: Vec = Vec::new(); - for entry in entries { - path = Path::new(entry); - if path.is_dir() { - if let Err(e) = fs::remove_dir_all(entry) { - warnings.push(format!("Failed to remove folder {entry}, reason {e}")); - } - } else if let Err(e) = fs::remove_file(entry) { - warnings.push(format!("Failed to remove file {entry}, reason {e}")); - } - } - warnings - } - - pub fn delete_one_entry(entry: &str) -> String { - let path: &Path = Path::new(entry); - let mut warning: String = String::new(); - if path.is_dir() { - if let Err(e) = fs::remove_dir_all(entry) { - warning = format!("Failed to remove folder {entry}, reason {e}"); - } - } else if let Err(e) = fs::remove_file(entry) { - warning = format!("Failed to remove file {entry}, reason {e}"); - } - warning - } - pub fn regex_check(expression: &str, directory: impl AsRef) -> bool { if expression == "*" { return true; @@ -374,6 +348,98 @@ pub fn check_folder_children( dir_result.push(next_folder); } +// Here we assume, that internal Vec<> have at least 1 object +#[allow(clippy::ptr_arg)] +pub fn delete_files_custom(items: &Vec<&Vec>, delete_method: &DeleteMethod, text_messages: &mut Messages, dry_run: bool) -> (u64, usize, usize) +where + T: ResultEntry + Clone, +{ + let res = items + .iter() + .map(|values| { + let mut gained_space: u64 = 0; + let mut removed_files: usize = 0; + let mut failed_to_remove_files: usize = 0; + let mut infos = Vec::new(); + let mut errors = Vec::new(); + + let mut all_values = (*values).clone(); + let len = all_values.len(); + + // Sorted from oldest to newest - from smallest value to bigger + all_values.sort_unstable_by_key(ResultEntry::get_modified_date); + + if delete_method == &DeleteMethod::HardLink { + let original_file = &all_values[0]; + for file_entry in &all_values[1..] { + if dry_run { + infos.push(format!( + "dry_run - would create hardlink from {:?} to {:?}", + original_file.get_path(), + original_file.get_path() + )); + } else { + if dry_run { + infos.push(format!("Replace file {:?} with hard link to {:?}", original_file.get_path(), file_entry.get_path())); + } else { + if let Err(e) = make_hard_link(original_file.get_path(), file_entry.get_path()) { + errors.push(format!( + "Cannot create hard link from {:?} to {:?} - {}", + file_entry.get_path(), + original_file.get_path(), + e + )); + failed_to_remove_files += 1; + } else { + gained_space += 1; + removed_files += 1; + } + } + } + } + + return (infos, errors, gained_space, removed_files, failed_to_remove_files); + } + + let items = match delete_method { + DeleteMethod::Delete => &all_values, + DeleteMethod::AllExceptNewest => &all_values[..(len - 1)], + DeleteMethod::AllExceptOldest => &all_values[1..], + DeleteMethod::OneOldest => &all_values[..1], + DeleteMethod::OneNewest => &all_values[(len - 1)..], + DeleteMethod::HardLink | DeleteMethod::None => unreachable!("HardLink and None should be handled before"), + }; + + for i in items { + if dry_run { + infos.push(format!("dry_run - would delete file: {:?}", i.get_path())); + } else { + if let Err(e) = std::fs::remove_file(i.get_path()) { + errors.push(format!("Cannot delete file: {:?} - {e}", i.get_path())); + failed_to_remove_files += 1; + } else { + removed_files += 1; + gained_space += i.get_size(); + } + } + } + (infos, errors, gained_space, removed_files, failed_to_remove_files) + }) + .collect::>(); + + let mut gained_space = 0; + let mut removed_files = 0; + let mut failed_to_remove_files = 0; + for (infos, errors, gained_space_v, removed_files_v, failed_to_remove_files_v) in res { + text_messages.messages.extend(infos); + text_messages.errors.extend(errors); + gained_space += gained_space_v; + removed_files += removed_files_v; + failed_to_remove_files += failed_to_remove_files_v; + } + + (gained_space, removed_files, failed_to_remove_files) +} pub fn filter_reference_folders_generic(entries_to_check: Vec>, directories: &Directories) -> Vec<(T, Vec)> where T: ResultEntry, diff --git a/czkawka_core/src/common_dir_traversal.rs b/czkawka_core/src/common_dir_traversal.rs index 3cc59b9..8c052db 100644 --- a/czkawka_core/src/common_dir_traversal.rs +++ b/czkawka_core/src/common_dir_traversal.rs @@ -100,14 +100,14 @@ pub enum ErrorType { /// Enum with values which show if folder is empty. /// In function "`optimize_folders`" automatically "Maybe" is changed to "Yes", so it is not necessary to put it here -#[derive(Eq, PartialEq, Copy, Clone)] +#[derive(Eq, PartialEq, Copy, Clone, Debug)] pub(crate) enum FolderEmptiness { No, Maybe, } /// Struct assigned to each checked folder with parent path(used to ignore parent if children are not empty) and flag which shows if folder is empty -#[derive(Clone)] +#[derive(Clone, Debug)] pub struct FolderEntry { pub(crate) parent_path: Option, // Usable only when finding diff --git a/czkawka_core/src/common_tool.rs b/czkawka_core/src/common_tool.rs index d7b843e..694abe0 100644 --- a/czkawka_core/src/common_tool.rs +++ b/czkawka_core/src/common_tool.rs @@ -22,6 +22,7 @@ pub struct CommonToolData { pub(crate) delete_outdated_cache: bool, pub(crate) save_also_as_json: bool, pub(crate) use_reference_folders: bool, + pub(crate) dry_run: bool, } #[derive(Eq, PartialEq, Clone, Debug, Copy, Default)] @@ -53,6 +54,7 @@ impl CommonToolData { delete_outdated_cache: true, save_also_as_json: false, use_reference_folders: false, + dry_run: false, } } } @@ -61,6 +63,13 @@ pub trait CommonData { fn get_cd(&self) -> &CommonToolData; fn get_cd_mut(&mut self) -> &mut CommonToolData; + fn set_dry_run(&mut self, dry_run: bool) { + self.get_cd_mut().dry_run = dry_run; + } + fn get_dry_run(&self) -> bool { + self.get_cd().dry_run + } + fn set_use_cache(&mut self, use_cache: bool) { self.get_cd_mut().use_cache = use_cache; } @@ -189,6 +198,8 @@ pub trait CommonData { println!("Delete outdated cache: {:?}", self.get_cd().delete_outdated_cache); println!("Save also as json: {:?}", self.get_cd().save_also_as_json); println!("Delete method: {:?}", self.get_cd().delete_method); + println!("Use reference folders: {:?}", self.get_cd().use_reference_folders); + println!("Dry run: {:?}", self.get_cd().dry_run); println!("---------------DEBUG PRINT MESSAGES---------------"); println!("Errors size - {}", self.get_cd().text_messages.errors.len()); diff --git a/czkawka_core/src/duplicate.rs b/czkawka_core/src/duplicate.rs index 96a34f4..5b1d4dd 100644 --- a/czkawka_core/src/duplicate.rs +++ b/czkawka_core/src/duplicate.rs @@ -1,5 +1,4 @@ -use std::collections::HashMap; -use std::collections::{BTreeMap, HashSet}; +use std::collections::{BTreeMap, HashMap, HashSet}; use std::fmt::Debug; use std::fs::File; use std::hash::Hasher; @@ -19,10 +18,9 @@ use log::debug; use rayon::prelude::*; use xxhash_rust::xxh3::Xxh3; -use crate::common::{prepare_thread_handler_common, send_info_and_wait_for_ending_all_threads}; +use crate::common::{delete_files_custom, prepare_thread_handler_common, send_info_and_wait_for_ending_all_threads}; use crate::common_cache::{get_duplicate_cache_file, load_cache_from_file_generalized_by_size, save_cache_to_file_generalized}; use crate::common_dir_traversal::{CheckingMethod, DirTraversalBuilder, DirTraversalResult, FileEntry, ProgressData, ToolType}; -use crate::common_messages::Messages; use crate::common_tool::{CommonData, CommonToolData, DeleteMethod}; use crate::common_traits::*; @@ -82,7 +80,6 @@ pub struct DuplicateFinder { check_method: CheckingMethod, hash_type: HashType, ignore_hard_links: bool, - dryrun: bool, use_prehash_cache: bool, minimal_cache_file_size: u64, minimal_prehash_cache_file_size: u64, @@ -105,7 +102,6 @@ impl DuplicateFinder { check_method: CheckingMethod::None, ignore_hard_links: true, hash_type: HashType::Blake3, - dryrun: false, use_prehash_cache: true, minimal_cache_file_size: 1024 * 256, // By default cache only >= 256 KB files minimal_prehash_cache_file_size: 0, @@ -823,115 +819,26 @@ impl DuplicateFinder { match self.check_method { CheckingMethod::Name => { let vec_files = self.files_with_identical_names.values().collect::>(); - delete_files(&vec_files, &self.common_data.delete_method, &mut self.common_data.text_messages, self.dryrun); + delete_files_custom(&vec_files, &self.common_data.delete_method, &mut self.common_data.text_messages, self.common_data.dry_run); } CheckingMethod::SizeName => { let vec_files = self.files_with_identical_size_names.values().collect::>(); - delete_files(&vec_files, &self.common_data.delete_method, &mut self.common_data.text_messages, self.dryrun); + delete_files_custom(&vec_files, &self.common_data.delete_method, &mut self.common_data.text_messages, self.common_data.dry_run); } CheckingMethod::Hash => { for vec_files in self.files_with_identical_hashes.values() { let vev: Vec<&Vec> = vec_files.iter().collect::>(); - delete_files(&vev, &self.common_data.delete_method, &mut self.common_data.text_messages, self.dryrun); + delete_files_custom(&vev, &self.common_data.delete_method, &mut self.common_data.text_messages, self.common_data.dry_run); } } CheckingMethod::Size => { let vec_files = self.files_with_identical_size.values().collect::>(); - delete_files(&vec_files, &self.common_data.delete_method, &mut self.common_data.text_messages, self.dryrun); + delete_files_custom(&vec_files, &self.common_data.delete_method, &mut self.common_data.text_messages, self.common_data.dry_run); } _ => panic!(), } } } -// Here we assume, that internal Vec<> have at least 1 object -#[allow(clippy::ptr_arg)] -fn delete_files(items: &Vec<&Vec>, delete_method: &DeleteMethod, text_messages: &mut Messages, dryrun: bool) -> (u64, usize, usize) { - let res = items - .iter() - .map(|values| { - let mut gained_space: u64 = 0; - let mut removed_files: usize = 0; - let mut failed_to_remove_files: usize = 0; - let mut infos = Vec::new(); - let mut errors = Vec::new(); - - let mut all_values = (*values).clone(); - let len = all_values.len(); - - // Sorted from oldest to newest - from smallest value to bigger - all_values.sort_unstable_by_key(ResultEntry::get_modified_date); - - if delete_method == &DeleteMethod::HardLink { - let original_file = &all_values[0]; - for file_entry in &all_values[1..] { - if dryrun { - infos.push(format!( - "Dryrun - would create hardlink from {:?} to {:?}", - original_file.get_path(), - original_file.get_path() - )); - } else { - if dryrun { - infos.push(format!("Replace file {:?} with hard link to {:?}", original_file.get_path(), file_entry.get_path())); - } else { - if let Err(e) = make_hard_link(original_file.get_path(), file_entry.get_path()) { - errors.push(format!( - "Cannot create hard link from {:?} to {:?} - {}", - file_entry.get_path(), - original_file.get_path(), - e - )); - failed_to_remove_files += 1; - } else { - gained_space += 1; - removed_files += 1; - } - } - } - } - - return (infos, errors, gained_space, removed_files, failed_to_remove_files); - } - - let items = match delete_method { - DeleteMethod::Delete => &all_values, - DeleteMethod::AllExceptNewest => &all_values[..(len - 1)], - DeleteMethod::AllExceptOldest => &all_values[1..], - DeleteMethod::OneOldest => &all_values[..1], - DeleteMethod::OneNewest => &all_values[(len - 1)..], - DeleteMethod::HardLink | DeleteMethod::None => unreachable!("HardLink and None should be handled before"), - }; - - for i in items { - if dryrun { - infos.push(format!("Dryrun - would delete file: {:?}", i.get_path())); - } else { - if let Err(e) = std::fs::remove_file(i.get_path()) { - errors.push(format!("Cannot delete file: {:?} - {e}", i.get_path())); - failed_to_remove_files += 1; - } else { - removed_files += 1; - gained_space += i.get_size(); - } - } - } - (infos, errors, gained_space, removed_files, failed_to_remove_files) - }) - .collect::>(); - - let mut gained_space = 0; - let mut removed_files = 0; - let mut failed_to_remove_files = 0; - for (infos, errors, gained_space_v, removed_files_v, failed_to_remove_files_v) in res { - text_messages.messages.extend(infos); - text_messages.errors.extend(errors); - gained_space += gained_space_v; - removed_files += removed_files_v; - failed_to_remove_files += failed_to_remove_files_v; - } - - (gained_space, removed_files, failed_to_remove_files) -} impl DuplicateFinder { pub fn set_case_sensitive_name_comparison(&mut self, case_sensitive_name_comparison: bool) { @@ -982,8 +889,8 @@ impl DuplicateFinder { self.ignore_hard_links = ignore_hard_links; } - pub fn set_dryrun(&mut self, dryrun: bool) { - self.dryrun = dryrun; + pub fn set_dry_run(&mut self, dry_run: bool) { + self.common_data.dry_run = dry_run; } pub fn set_check_method(&mut self, check_method: CheckingMethod) { diff --git a/czkawka_core/src/empty_folder.rs b/czkawka_core/src/empty_folder.rs index 11f14f5..2a814db 100644 --- a/czkawka_core/src/empty_folder.rs +++ b/czkawka_core/src/empty_folder.rs @@ -1,6 +1,5 @@ use std::collections::BTreeMap; use std::fs; - use std::io::Write; use std::path::PathBuf; @@ -8,15 +7,15 @@ use crossbeam_channel::Receiver; use fun_time::fun_time; use futures::channel::mpsc::UnboundedSender; use log::debug; +use rayon::prelude::*; use crate::common_dir_traversal::{Collect, DirTraversalBuilder, DirTraversalResult, FolderEmptiness, FolderEntry, ProgressData, ToolType}; -use crate::common_tool::{CommonData, CommonToolData}; +use crate::common_tool::{CommonData, CommonToolData, DeleteMethod}; use crate::common_traits::{DebugPrint, PrintResults}; pub struct EmptyFolder { common_data: CommonToolData, information: Info, - delete_folders: bool, empty_folder_list: BTreeMap, // Path, FolderEntry } @@ -30,7 +29,6 @@ impl EmptyFolder { Self { common_data: CommonToolData::new(ToolType::EmptyFolders), information: Default::default(), - delete_folders: false, empty_folder_list: Default::default(), } } @@ -51,9 +49,8 @@ impl EmptyFolder { return; } self.optimize_folders(); - if self.delete_folders { - self.delete_empty_folders(); - } + + self.delete_files(); self.debug_print(); } @@ -109,19 +106,24 @@ impl EmptyFolder { } } - #[fun_time(message = "delete_empty_folders")] - fn delete_empty_folders(&mut self) { - // Folders may be deleted or require too big privileges - for name in self.empty_folder_list.keys() { - match fs::remove_dir_all(name) { - Ok(()) => (), - Err(e) => self - .common_data - .text_messages - .warnings - .push(format!("Failed to remove folder {}, reason {}", name.display(), e)), - }; + // #[fun_time(message = "delete_files")] + fn delete_files(&mut self) { + if self.get_delete_method() == DeleteMethod::None { + return; } + let folders_to_remove = self.empty_folder_list.keys().collect::>(); + + let errors: Vec<_> = folders_to_remove + .into_par_iter() + .filter_map(|name| { + if let Err(e) = fs::remove_dir_all(name) { + Some(format!("Failed to remove folder {name:?}, reason {e}")) + } else { + None + } + }) + .collect(); + self.get_text_messages_mut().errors.extend(errors); } } @@ -172,8 +174,3 @@ impl CommonData for EmptyFolder { &mut self.common_data } } -impl EmptyFolder { - pub fn set_delete_folder(&mut self, delete_folder: bool) { - self.delete_folders = delete_folder; - } -} diff --git a/czkawka_core/src/same_music.rs b/czkawka_core/src/same_music.rs index 108dabc..ceb9f19 100644 --- a/czkawka_core/src/same_music.rs +++ b/czkawka_core/src/same_music.rs @@ -25,10 +25,12 @@ use symphonia::core::io::MediaSourceStream; use symphonia::core::meta::MetadataOptions; use symphonia::core::probe::Hint; -use crate::common::{create_crash_message, filter_reference_folders_generic, prepare_thread_handler_common, send_info_and_wait_for_ending_all_threads, AUDIO_FILES_EXTENSIONS}; +use crate::common::{ + create_crash_message, delete_files_custom, filter_reference_folders_generic, prepare_thread_handler_common, send_info_and_wait_for_ending_all_threads, AUDIO_FILES_EXTENSIONS, +}; use crate::common_cache::{get_similar_music_cache_file, load_cache_from_file_generalized_by_path, save_cache_to_file_generalized}; use crate::common_dir_traversal::{CheckingMethod, DirTraversalBuilder, DirTraversalResult, FileEntry, ProgressData, ToolType}; -use crate::common_tool::{CommonData, CommonToolData}; +use crate::common_tool::{CommonData, CommonToolData, DeleteMethod}; use crate::common_traits::*; bitflags! { @@ -123,7 +125,7 @@ impl SameMusic { music_to_check: Default::default(), approximate_comparison: true, duplicated_music_entries_referenced: vec![], - check_type: CheckingMethod::AudioContent, + check_type: CheckingMethod::AudioTags, hash_preset_config: Configuration::preset_test1(), // TODO allow to change this minimum_segment_duration: 10.0, maximum_difference: 2.0, @@ -653,20 +655,12 @@ impl SameMusic { #[fun_time(message = "delete_files")] fn delete_files(&mut self) { + if self.common_data.delete_method == DeleteMethod::None { + return; + } - // TODO - // match self.delete_method { - // DeleteMethod::Delete => { - // for file_entry in &self.music_entries { - // if fs::remove_file(file_entry.path.clone()).is_err() { - // self.common_data.text_messages.warnings.push(file_entry.path.display().to_string()); - // } - // } - // } - // DeleteMethod::None => { - // //Just do nothing - // } - // } + let vec_files = self.duplicated_music_entries.iter().collect::>(); + delete_files_custom(&vec_files, &self.common_data.delete_method, &mut self.common_data.text_messages, self.common_data.dry_run); } } diff --git a/czkawka_core/src/similar_images.rs b/czkawka_core/src/similar_images.rs index c797522..288e8b2 100644 --- a/czkawka_core/src/similar_images.rs +++ b/czkawka_core/src/similar_images.rs @@ -20,12 +20,12 @@ use serde::{Deserialize, Serialize}; #[cfg(feature = "heif")] use crate::common::get_dynamic_image_from_heic; use crate::common::{ - check_folder_children, create_crash_message, get_dynamic_image_from_raw_image, prepare_thread_handler_common, send_info_and_wait_for_ending_all_threads, HEIC_EXTENSIONS, - IMAGE_RS_SIMILAR_IMAGES_EXTENSIONS, RAW_IMAGE_EXTENSIONS, + check_folder_children, create_crash_message, delete_files_custom, get_dynamic_image_from_raw_image, prepare_thread_handler_common, send_info_and_wait_for_ending_all_threads, + HEIC_EXTENSIONS, IMAGE_RS_SIMILAR_IMAGES_EXTENSIONS, RAW_IMAGE_EXTENSIONS, }; use crate::common_cache::{get_similar_images_cache_file, load_cache_from_file_generalized_by_path, save_cache_to_file_generalized}; use crate::common_dir_traversal::{common_get_entry_data_metadata, common_read_dir, get_lowercase_name, get_modified_time, CheckingMethod, ProgressData, ToolType}; -use crate::common_tool::{CommonData, CommonToolData}; +use crate::common_tool::{CommonData, CommonToolData, DeleteMethod}; use crate::common_traits::{DebugPrint, PrintResults, ResultEntry}; use crate::flc; @@ -140,6 +140,7 @@ impl SimilarImages { self.common_data.stopped_search = true; return; } + self.delete_files(); self.debug_print(); } @@ -807,6 +808,15 @@ impl SimilarImages { } assert!(!found, "Found Invalid entries, verify errors before"); // TODO crashes with empty result with reference folder, verify why } + + fn delete_files(&mut self) { + if self.common_data.delete_method == DeleteMethod::None { + return; + } + + let vec_files = self.similar_vectors.iter().collect::>(); + delete_files_custom(&vec_files, &self.common_data.delete_method, &mut self.common_data.text_messages, self.common_data.dry_run); + } } fn is_in_reference_folder(reference_directories: &[PathBuf], path: &Path) -> bool { diff --git a/czkawka_core/src/similar_videos.rs b/czkawka_core/src/similar_videos.rs index be197fd..6a63197 100644 --- a/czkawka_core/src/similar_videos.rs +++ b/czkawka_core/src/similar_videos.rs @@ -15,10 +15,10 @@ use serde::{Deserialize, Serialize}; use vid_dup_finder_lib::HashCreationErrorKind::DetermineVideo; use vid_dup_finder_lib::{NormalizedTolerance, VideoHash}; -use crate::common::{check_folder_children, prepare_thread_handler_common, send_info_and_wait_for_ending_all_threads, VIDEO_FILES_EXTENSIONS}; +use crate::common::{check_folder_children, delete_files_custom, prepare_thread_handler_common, send_info_and_wait_for_ending_all_threads, VIDEO_FILES_EXTENSIONS}; use crate::common_cache::{get_similar_videos_cache_file, load_cache_from_file_generalized_by_path, save_cache_to_file_generalized}; use crate::common_dir_traversal::{common_get_entry_data_metadata, common_read_dir, get_lowercase_name, get_modified_time, CheckingMethod, ProgressData, ToolType}; -use crate::common_tool::{CommonData, CommonToolData}; +use crate::common_tool::{CommonData, CommonToolData, DeleteMethod}; use crate::common_traits::{DebugPrint, PrintResults, ResultEntry}; use crate::flc; use crate::localizer_core::generate_translation_hashmap; @@ -123,6 +123,7 @@ impl SimilarVideos { return; } } + self.delete_files(); self.debug_print(); } @@ -401,6 +402,15 @@ impl SimilarVideos { .collect::)>>(); } } + + fn delete_files(&mut self) { + if self.common_data.delete_method == DeleteMethod::None { + return; + } + + let vec_files = self.similar_vectors.iter().collect::>(); + delete_files_custom(&vec_files, &self.common_data.delete_method, &mut self.common_data.text_messages, self.common_data.dry_run); + } } impl Default for SimilarVideos {