Add delete method to replace duplicate files with hard links (#236)

This results in the same space saving but keeps the "deleted" files around.

$ mkdir hardlink
$ cd hardlink
$ echo a > a
$ cp a b
$ ln a c
$ touch -t 01010000 a
$ ls -il --time-style=full-iso .
25169323 -rw-rw-r-- 2 thomas thomas 2 2021-01-01 00:00:00.000000000 +0100 a
25169558 -rw-rw-r-- 1 thomas thomas 2 2021-01-31 18:11:08.298161098 +0100 b
25169323 -rw-rw-r-- 2 thomas thomas 2 2021-01-01 00:00:00.000000000 +0100 c

$ czkawka_cli dup --directories $(pwd)/hardlink -m 1 -f hardlink.result --delete-method HARD

$ ls --time-style=full-iso -li hardlink
25169323 -rw-rw-r-- 3 thomas thomas 2 2021-01-01 00:00:00.000000000 +0100 a
25169323 -rw-rw-r-- 3 thomas thomas 2 2021-01-01 00:00:00.000000000 +0100 b
25169323 -rw-rw-r-- 3 thomas thomas 2 2021-01-01 00:00:00.000000000 +0100 c
This commit is contained in:
Thomas Andreas Jung 2021-02-05 17:59:34 +01:00 committed by GitHub
parent f490a1169c
commit 3ffa55b008
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
2 changed files with 24 additions and 2 deletions

View File

@ -21,7 +21,7 @@ pub enum Commands {
allowed_extensions: AllowedExtensions,
#[structopt(short, long, default_value = "HASH", parse(try_from_str = parse_checking_method), help = "Search method (NAME, SIZE, HASH, HASHMB)", long_help = "Methods to search files.\nNAME - Fast but but rarely usable,\nSIZE - Fast but not accurate, checking by the file's size,\nHASHMB - More accurate but slower, checking by the hash of the file's first mebibyte or\nHASH - The slowest method, checking by the hash of the entire file")]
search_method: CheckingMethod,
#[structopt(short = "D", long, default_value = "NONE", parse(try_from_str = parse_delete_method), help = "Delete method (AEN, AEO, ON, OO)", long_help = "Methods to delete the files.\nAEN - All files except the newest,\nAEO - All files except the oldest,\nON - Only 1 file, the newest,\nOO - Only 1 file, the oldest\nNONE - not delete files")]
#[structopt(short = "D", long, default_value = "NONE", parse(try_from_str = parse_delete_method), help = "Delete method (AEN, AEO, ON, OO, HARD)", long_help = "Methods to delete the files.\nAEN - All files except the newest,\nAEO - All files except the oldest,\nON - Only 1 file, the newest,\nOO - Only 1 file, the oldest\nHARD - create hard link\nNONE - not delete files")]
delete_method: DeleteMethod,
#[structopt(flatten)]
file_to_save: FileToSave,
@ -249,9 +249,10 @@ fn parse_delete_method(src: &str) -> Result<DeleteMethod, &'static str> {
"none" => Ok(DeleteMethod::None),
"aen" => Ok(DeleteMethod::AllExceptNewest),
"aeo" => Ok(DeleteMethod::AllExceptOldest),
"hard" => Ok(DeleteMethod::HardLink),
"on" => Ok(DeleteMethod::OneNewest),
"oo" => Ok(DeleteMethod::OneOldest),
_ => Err("Couldn't parse the delete method (allowed: AEN, AEO, ON, OO)"),
_ => Err("Couldn't parse the delete method (allowed: AEN, AEO, ON, OO, HARD)"),
}
}

View File

@ -57,6 +57,7 @@ pub enum DeleteMethod {
AllExceptOldest,
OneOldest,
OneNewest,
HardLink,
}
#[derive(Clone, Debug)]
@ -1269,6 +1270,26 @@ fn delete_files(vector: &[FileEntry], delete_method: &DeleteMethod, warnings: &m
}
}
}
DeleteMethod::HardLink => {
for (index, file) in vector.iter().enumerate() {
if q_time == 0 || q_time > file.modified_date {
q_time = file.modified_date;
q_index = index;
}
}
let src = vector[q_index].path.clone();
for (index, file) in vector.iter().enumerate() {
if q_index != index {
if fs::remove_file(file.path.clone()).and_then(|_| fs::hard_link(&src, &file.path)).is_ok() {
removed_files += 1;
gained_space += file.size;
} else {
failed_to_remove_files += 1;
warnings.push(format!("Failed to link {} -> {}", file.path.display(), src.display()));
}
}
}
}
DeleteMethod::None => {
// Just don't remove files
}