diff --git a/.github/workflows/linux_cli_eyra.yml b/.github/workflows/linux_cli_eyra.yml index 6370205..beda1f9 100644 --- a/.github/workflows/linux_cli_eyra.yml +++ b/.github/workflows/linux_cli_eyra.yml @@ -23,7 +23,7 @@ jobs: # New versions of nightly rust may call new unimplemented in eyra functions, so use const version - name: Setup rust version - run: rustup default nightly-2023-11-29 + run: rustup default nightly-2023-12-14 - name: Add eyra run: | diff --git a/Cargo.lock b/Cargo.lock index 1a87015..ebd4d95 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -278,13 +278,13 @@ dependencies = [ [[package]] name = "async-global-executor" -version = "2.4.0" +version = "2.4.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9b4353121d5644cdf2beb5726ab752e79a8db1ebb52031770ec47db31d245526" +checksum = "05b1b633a2115cd122d73b955eadd9916c18c8f510ec9cd1686404c60ad1c29c" dependencies = [ "async-channel 2.1.1", "async-executor", - "async-io 2.2.1", + "async-io 2.2.2", "async-lock 3.2.0", "blocking", "futures-lite 2.1.0", @@ -313,9 +313,9 @@ dependencies = [ [[package]] name = "async-io" -version = "2.2.1" +version = "2.2.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d6d3b15875ba253d1110c740755e246537483f152fa334f91abd7fe84c88b3ff" +checksum = "6afaa937395a620e33dc6a742c593c01aced20aa376ffb0f628121198578ccc7" dependencies = [ "async-lock 3.2.0", "cfg-if", @@ -375,7 +375,7 @@ checksum = "5fd55a5ba1179988837d24ab4c7cc8ed6efdeff578ede0416b4225a5fca35bd0" dependencies = [ "proc-macro2", "quote", - "syn 2.0.39", + "syn 2.0.41", ] [[package]] @@ -384,7 +384,7 @@ version = "0.2.5" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "9e47d90f65a225c4527103a8d747001fc56e375203592b25ad103e1ca13124c5" dependencies = [ - "async-io 2.2.1", + "async-io 2.2.2", "async-lock 2.8.0", "atomic-waker", "cfg-if", @@ -436,7 +436,7 @@ checksum = "a66537f1bb974b254c98ed142ff995236e81b9d0fe4db0575f46612cb15eb0f9" dependencies = [ "proc-macro2", "quote", - "syn 2.0.39", + "syn 2.0.41", ] [[package]] @@ -463,7 +463,7 @@ dependencies = [ "derive_utils", "proc-macro2", "quote", - "syn 2.0.39", + "syn 2.0.41", ] [[package]] @@ -512,7 +512,7 @@ dependencies = [ "regex", "rustc-hash", "shlex", - "syn 2.0.39", + "syn 2.0.41", "which", ] @@ -645,7 +645,7 @@ checksum = "965ab7eb5f8f97d2a083c799f3a1b994fc397b2fe2da5d1da1626ce15a39f2b1" dependencies = [ "proc-macro2", "quote", - "syn 2.0.39", + "syn 2.0.41", ] [[package]] @@ -874,7 +874,7 @@ dependencies = [ "heck", "proc-macro2", "quote", - "syn 2.0.39", + "syn 2.0.41", ] [[package]] @@ -994,7 +994,7 @@ checksum = "57aaaad9185d3bcb3afe63549d8ba60b2fb0ea8dc2da83f62dd56805edf56fd1" dependencies = [ "proc-macro2", "quote", - "syn 2.0.39", + "syn 2.0.41", ] [[package]] @@ -1128,9 +1128,9 @@ checksum = "7059fff8937831a9ae6f0fe4d658ffabf58f2ca96aa9dec1c889f936f705f216" [[package]] name = "crossbeam-channel" -version = "0.5.8" +version = "0.5.9" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a33c2bf77f2df06183c3aa30d1e96c0695a313d4f9c453cc3762a6db39f99200" +checksum = "14c3242926edf34aec4ac3a77108ad4854bffaa2e4ddc1824124ce59231302d5" dependencies = [ "cfg-if", "crossbeam-utils", @@ -1138,9 +1138,9 @@ dependencies = [ [[package]] name = "crossbeam-deque" -version = "0.8.3" +version = "0.8.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ce6fd6f855243022dcecf8702fef0c297d4338e226845fe067f6341ad9fa0cef" +checksum = "fca89a0e215bab21874660c67903c5f143333cab1da83d041c7ded6053774751" dependencies = [ "cfg-if", "crossbeam-epoch", @@ -1149,22 +1149,21 @@ dependencies = [ [[package]] name = "crossbeam-epoch" -version = "0.9.15" +version = "0.9.16" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ae211234986c545741a7dc064309f67ee1e5ad243d0e48335adc0484d960bcc7" +checksum = "2d2fe95351b870527a5d09bf563ed3c97c0cffb87cf1c78a591bf48bb218d9aa" dependencies = [ "autocfg", "cfg-if", "crossbeam-utils", "memoffset 0.9.0", - "scopeguard", ] [[package]] name = "crossbeam-queue" -version = "0.3.8" +version = "0.3.9" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d1cfb3ea8a53f37c40dea2c7bedcbd88bdfae54f5e2175d6ecaff1c988353add" +checksum = "b9bcf5bdbfdd6030fb4a1c497b5d5fc5921aa2f60d359a17e249c0e6df3de153" dependencies = [ "cfg-if", "crossbeam-utils", @@ -1172,9 +1171,9 @@ dependencies = [ [[package]] name = "crossbeam-utils" -version = "0.8.16" +version = "0.8.17" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5a22b2d63d4d1dc0b7f1b6b2747dd0088008a9be28b6ddf0b1e7d335e3037294" +checksum = "c06d96137f14f244c37f989d9fff8f95e6c18b918e71f36638f8c49112e4c78f" dependencies = [ "cfg-if", ] @@ -1206,12 +1205,12 @@ dependencies = [ [[package]] name = "ctor" -version = "0.2.5" +version = "0.2.6" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "37e366bff8cd32dd8754b0991fb66b279dc48f598c3a18914852a6673deef583" +checksum = "30d2b3721e861707777e3195b0158f950ae6dc4a27e4d02ff9f67e3eb3de199e" dependencies = [ "quote", - "syn 2.0.39", + "syn 2.0.41", ] [[package]] @@ -1302,6 +1301,7 @@ dependencies = [ "log", "once_cell", "open", + "rayon", "regex", "rust-embed", "trash", @@ -1438,7 +1438,7 @@ checksum = "9abcad25e9720609ccb3dcdb795d845e37d8ce34183330a9f48b03a1a71c8e21" dependencies = [ "proc-macro2", "quote", - "syn 2.0.39", + "syn 2.0.41", ] [[package]] @@ -1487,7 +1487,7 @@ checksum = "487585f4d0c6655fe74905e2504d8ad6908e4db67f744eb140876906c2f3175d" dependencies = [ "proc-macro2", "quote", - "syn 2.0.39", + "syn 2.0.41", ] [[package]] @@ -1625,7 +1625,7 @@ checksum = "f95e2801cd355d4a1a3e3953ce6ee5ae9603a5c833455343a8bfe3f44d418246" dependencies = [ "proc-macro2", "quote", - "syn 2.0.39", + "syn 2.0.41", ] [[package]] @@ -1636,7 +1636,7 @@ checksum = "c2ad8cef1d801a4686bfd8919f0b30eac4c8e48968c437a6405ded4fb5272d2b" dependencies = [ "proc-macro2", "quote", - "syn 2.0.39", + "syn 2.0.41", ] [[package]] @@ -1964,7 +1964,7 @@ checksum = "1a5c6c585bc94aaf2c7b51dd4c2ba22680844aba4c687be581871a6f518c5742" dependencies = [ "proc-macro2", "quote", - "syn 2.0.39", + "syn 2.0.41", ] [[package]] @@ -2085,7 +2085,7 @@ checksum = "53b153fd91e4b0147f4aced87be237c98248656bb01050b96bf3ee89220a8ddb" dependencies = [ "proc-macro2", "quote", - "syn 2.0.39", + "syn 2.0.41", ] [[package]] @@ -2339,7 +2339,7 @@ dependencies = [ "proc-macro-error", "proc-macro2", "quote", - "syn 2.0.39", + "syn 2.0.41", ] [[package]] @@ -2659,11 +2659,11 @@ dependencies = [ [[package]] name = "home" -version = "0.5.5" +version = "0.5.9" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5444c27eef6923071f7ebcc33e3444508466a76f7a2b93da00ed6e19f30c1ddb" +checksum = "e3d1354bf6b7235cb4a0576c2619fd4ed18183f689b12b006a0ee7329eeff9a5" dependencies = [ - "windows-sys 0.48.0", + "windows-sys 0.52.0", ] [[package]] @@ -2840,7 +2840,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "9148dba9642f055cdda5060f2e82be4a4c1c4a046ea1d08970e9279220b7ed13" dependencies = [ "quote", - "syn 2.0.39", + "syn 2.0.41", ] [[package]] @@ -2966,7 +2966,7 @@ dependencies = [ "proc-macro2", "quote", "strsim", - "syn 2.0.39", + "syn 2.0.41", "unic-langid", ] @@ -2980,7 +2980,7 @@ dependencies = [ "i18n-config", "proc-macro2", "quote", - "syn 2.0.39", + "syn 2.0.41", ] [[package]] @@ -3386,9 +3386,9 @@ checksum = "03087c2bad5e1034e8cace5926dec053fb3790248370865f5117a7d0213354c8" [[package]] name = "libc" -version = "0.2.150" +version = "0.2.151" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "89d92a4743f9a61002fae18374ed11e7973f530cb3a3255fb354818118b2203c" +checksum = "302d7ab3130588088d277783b1e2d2e10c9e9e4a16dd9050e6ec93fb3e7048f4" [[package]] name = "libflate" @@ -3605,7 +3605,7 @@ checksum = "764b60e1ddd07e5665a6a17636a95cd7d8f3b86c73503a69c32979d05f72f3cf" dependencies = [ "proc-macro2", "quote", - "syn 2.0.39", + "syn 2.0.41", ] [[package]] @@ -3719,9 +3719,9 @@ dependencies = [ [[package]] name = "memmap2" -version = "0.9.0" +version = "0.9.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "deaba38d7abf1d4cca21cc89e932e542ba2b9258664d2a9ef0e61512039c9375" +checksum = "8f850157af41022bbb1b04ed15c011ce4d59520be82a4e3718b10c34b02cb85e" dependencies = [ "libc", ] @@ -3950,7 +3950,7 @@ dependencies = [ "proc-macro-crate 2.0.0", "proc-macro2", "quote", - "syn 2.0.39", + "syn 2.0.41", ] [[package]] @@ -4260,7 +4260,7 @@ checksum = "4359fd9c9171ec6e8c62926d6faaf553a8dc3f64e1507e76da7911b4f6a04405" dependencies = [ "proc-macro2", "quote", - "syn 2.0.39", + "syn 2.0.41", ] [[package]] @@ -4375,7 +4375,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "ae005bd773ab59b4725093fd7df83fd7892f7d8eafb48dbd7de6e024e4215f9d" dependencies = [ "proc-macro2", - "syn 2.0.39", + "syn 2.0.41", ] [[package]] @@ -4753,7 +4753,7 @@ dependencies = [ "proc-macro2", "quote", "rust-embed-utils", - "syn 2.0.39", + "syn 2.0.41", "walkdir", ] @@ -4977,7 +4977,7 @@ checksum = "1729a30a469de249c6effc17ec8d039b0aa29b3af79b819b7f51cb6ab8046a90" dependencies = [ "ab_glyph", "log", - "memmap2 0.9.0", + "memmap2 0.9.1", "smithay-client-toolkit", "tiny-skia", ] @@ -5026,7 +5026,7 @@ checksum = "43576ca501357b9b071ac53cdc7da8ef0cbd9493d8df094cd821777ea6e894d3" dependencies = [ "proc-macro2", "quote", - "syn 2.0.39", + "syn 2.0.41", ] [[package]] @@ -5048,7 +5048,7 @@ checksum = "3081f5ffbb02284dda55132aa26daecedd7372a42417bbbab6f14ab7d6bb9145" dependencies = [ "proc-macro2", "quote", - "syn 2.0.39", + "syn 2.0.41", ] [[package]] @@ -5247,7 +5247,7 @@ dependencies = [ "cursor-icon", "libc", "log", - "memmap2 0.9.0", + "memmap2 0.9.1", "rustix 0.38.28", "thiserror", "wayland-backend", @@ -5328,7 +5328,7 @@ dependencies = [ "foreign-types", "js-sys", "log", - "memmap2 0.9.0", + "memmap2 0.9.1", "objc", "raw-window-handle 0.5.2", "redox_syscall 0.4.1", @@ -5433,7 +5433,7 @@ dependencies = [ "proc-macro2", "quote", "rustversion", - "syn 2.0.39", + "syn 2.0.41", ] [[package]] @@ -5648,9 +5648,9 @@ dependencies = [ [[package]] name = "syn" -version = "2.0.39" +version = "2.0.41" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "23e78b90f2fcf45d3e842032ce32e3f2d1545ba6636271dcbf24fa306d87be7a" +checksum = "44c8b28c477cc3bf0e7966561e3460130e1255f7a1cf71931075f1c5e7a7e269" dependencies = [ "proc-macro2", "quote", @@ -5717,22 +5717,22 @@ checksum = "f18aa187839b2bdb1ad2fa35ead8c4c2976b64e4363c386d45ac0f7ee85c9233" [[package]] name = "thiserror" -version = "1.0.50" +version = "1.0.51" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f9a7210f5c9a7156bb50aa36aed4c95afb51df0df00713949448cf9e97d382d2" +checksum = "f11c217e1416d6f036b870f14e0413d480dbf28edbee1f877abaf0206af43bb7" dependencies = [ "thiserror-impl", ] [[package]] name = "thiserror-impl" -version = "1.0.50" +version = "1.0.51" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "266b2e40bc00e5a6c09c3584011e08b06f123c00362c92b975ba9843aaaa14b8" +checksum = "01742297787513b79cf8e29d1056ede1313e2420b7b3b15d0a768b4921f549df" dependencies = [ "proc-macro2", "quote", - "syn 2.0.39", + "syn 2.0.41", ] [[package]] @@ -5933,7 +5933,7 @@ checksum = "34704c8d6ebcbc939824180af020566b01a7c01f80641264eba0999f6c2b6be7" dependencies = [ "proc-macro2", "quote", - "syn 2.0.39", + "syn 2.0.41", ] [[package]] @@ -6021,9 +6021,9 @@ checksum = "17f77d76d837a7830fe1d4f12b7b4ba4192c1888001c7164257e4bc6d21d96b4" [[package]] name = "tuple" -version = "0.5.1" +version = "0.5.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "39a40ba241047e1174c927dc5f61c141a166b938d61a2ff61838441368cc7d0e" +checksum = "9bb9f6bd73479481158ba8ee3edf17aca93354623d13f02e96a2014fdbc1c37e" dependencies = [ "num-traits", "serde", @@ -6066,28 +6066,29 @@ dependencies = [ [[package]] name = "uds_windows" -version = "1.0.2" +version = "1.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ce65604324d3cce9b966701489fbd0cf318cb1f7bd9dd07ac9a4ee6fb791930d" +checksum = "89daebc3e6fd160ac4aa9fc8b3bf71e1f74fbf92367ae71fb83a037e8bf164b9" dependencies = [ + "memoffset 0.9.0", "tempfile", "winapi", ] [[package]] name = "unic-langid" -version = "0.9.3" +version = "0.9.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "887622f8e7b723780c5e64b04dcc0c9b8f426ada7cca6790cd3ea3bf0f08037a" +checksum = "238722e6d794ed130f91f4ea33e01fcff4f188d92337a21297892521c72df516" dependencies = [ "unic-langid-impl", ] [[package]] name = "unic-langid-impl" -version = "0.9.3" +version = "0.9.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5adeb847e35eed4efbffd9fb2e4d078b91ece56e4d6a3c0d2df55b3a1dac07d5" +checksum = "4bd55a2063fdea4ef1f8633243a7b0524cbeef1905ae04c31a1c9b9775c55bc6" dependencies = [ "serde", "tinystr", @@ -6348,7 +6349,7 @@ checksum = "6b2b8eecdb8e4284adf5546fc518f048f6dc33e7203dbe36fa93a4add39b31f6" dependencies = [ "proc-macro2", "quote", - "syn 2.0.39", + "syn 2.0.41", ] [[package]] @@ -6423,7 +6424,7 @@ dependencies = [ "once_cell", "proc-macro2", "quote", - "syn 2.0.39", + "syn 2.0.41", "wasm-bindgen-shared", ] @@ -6457,7 +6458,7 @@ checksum = "f0eb82fcb7930ae6219a7ecfd55b217f5f0893484b7a13022ebb2b2bf20b5283" dependencies = [ "proc-macro2", "quote", - "syn 2.0.39", + "syn 2.0.41", "wasm-bindgen-backend", "wasm-bindgen-shared", ] @@ -6906,7 +6907,7 @@ dependencies = [ "js-sys", "libc", "log", - "memmap2 0.9.0", + "memmap2 0.9.1", "ndk", "ndk-sys", "objc2", @@ -6936,9 +6937,9 @@ dependencies = [ [[package]] name = "winnow" -version = "0.5.26" +version = "0.5.28" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b67b5f0a4e7a27a64c651977932b9dc5667ca7fc31ac44b03ed37a0cf42fdfff" +checksum = "6c830786f7720c2fd27a1a0e27a709dbd3c4d009b56d098fc742d4f4eab91fe2" dependencies = [ "memchr", ] @@ -7000,21 +7001,20 @@ dependencies = [ [[package]] name = "xattr" -version = "1.1.1" +version = "1.1.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "fbc6ab6ec1907d1a901cdbcd2bd4cb9e7d64ce5c9739cbb97d3c391acd8c7fae" +checksum = "a7dae5072fe1f8db8f8d29059189ac175196e410e40ba42d5d4684ae2f750995" dependencies = [ "libc", + "linux-raw-sys 0.4.12", + "rustix 0.38.28", ] [[package]] name = "xcursor" -version = "0.3.4" +version = "0.3.5" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "463705a63313cd4301184381c5e8042f0a7e9b4bb63653f216311d4ae74690b7" -dependencies = [ - "nom", -] +checksum = "6a0ccd7b4a5345edfcd0c3535718a4e9ff7798ffc536bb5b5a0e26ff84732911" [[package]] name = "xdg-home" @@ -7157,22 +7157,22 @@ dependencies = [ [[package]] name = "zerocopy" -version = "0.7.30" +version = "0.7.31" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "306dca4455518f1f31635ec308b6b3e4eb1b11758cefafc782827d0aa7acb5c7" +checksum = "1c4061bedbb353041c12f413700357bec76df2c7e2ca8e4df8bac24c6bf68e3d" dependencies = [ "zerocopy-derive", ] [[package]] name = "zerocopy-derive" -version = "0.7.30" +version = "0.7.31" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "be912bf68235a88fbefd1b73415cb218405958d1655b2ece9035a19920bdf6ba" +checksum = "b3c129550b3e6de3fd0ba67ba5c81818f9805e58b8d7fee80a3a59d2c9fc601a" dependencies = [ "proc-macro2", "quote", - "syn 2.0.39", + "syn 2.0.41", ] [[package]] diff --git a/Changelog.md b/Changelog.md index 984804a..75b1714 100644 --- a/Changelog.md +++ b/Changelog.md @@ -5,7 +5,7 @@ ### CLI - Providing full static rust binary with [Eyra](https://github.com/sunfishcode/eyra) - [#1102](https://github.com/qarmin/czkawka/pull/1102) -- Fixed duplicated `-c` argument, now saving as compact json is handled via `-C` - ???? +- Fixed duplicated `-c` argument, now saving as compact json is handled via `-C` - [#1153](https://github.com/qarmin/czkawka/pull/1153) ### Krokiet GUI - Initial release of new gui - [#1102](https://github.com/qarmin/czkawka/pull/1102) @@ -21,6 +21,8 @@ - Big speedup when searching for empty folders(especially with multithreading + cached FS schema) - [#1152](https://github.com/qarmin/czkawka/pull/1152) - Collecting files for scan can be a lot of faster due lazy file metadata gathering - [#1152](https://github.com/qarmin/czkawka/pull/1152) - Fixed recognizing not accessible folders as non-empty - [#1152](https://github.com/qarmin/czkawka/pull/1152) +- Unifying code for collecting files to scan - [#1159](https://github.com/qarmin/czkawka/pull/1159) +- Decrease memory usage when collecting files by removing unused fields in custom file entries structs - [#1159](https://github.com/qarmin/czkawka/pull/1159) ## Version 6.1.0 - 15.10.2023r - BREAKING CHANGE - Changed cache saving method, deduplicated, optimized and simplified procedure(all files needs to be hashed again) - [#1072](https://github.com/qarmin/czkawka/pull/1072), [#1086](https://github.com/qarmin/czkawka/pull/1086) diff --git a/czkawka_core/src/bad_extensions.rs b/czkawka_core/src/bad_extensions.rs index 04c6c5b..abbb8e3 100644 --- a/czkawka_core/src/bad_extensions.rs +++ b/czkawka_core/src/bad_extensions.rs @@ -210,17 +210,10 @@ impl BadExtensions { #[fun_time(message = "check_files", level = "debug")] fn check_files(&mut self, stop_receiver: Option<&Receiver<()>>, progress_sender: Option<&Sender>) -> bool { let result = DirTraversalBuilder::new() - .root_dirs(self.common_data.directories.included_directories.clone()) + .common_data(&self.common_data) .group_by(|_fe| ()) .stop_receiver(stop_receiver) .progress_sender(progress_sender) - .minimal_file_size(self.common_data.minimal_file_size) - .maximal_file_size(self.common_data.maximal_file_size) - .directories(self.common_data.directories.clone()) - .allowed_extensions(self.common_data.allowed_extensions.clone()) - .excluded_items(self.common_data.excluded_items.clone()) - .recursive_search(self.common_data.recursive_search) - .tool_type(self.common_data.tool_type) .build() .run(); diff --git a/czkawka_core/src/big_file.rs b/czkawka_core/src/big_file.rs index 3b471d8..d6a3239 100644 --- a/czkawka_core/src/big_file.rs +++ b/czkawka_core/src/big_file.rs @@ -1,30 +1,16 @@ -use std::collections::BTreeMap; use std::fs; -use std::fs::DirEntry; use std::io::Write; -use std::path::PathBuf; -use std::sync::atomic::{AtomicUsize, Ordering}; -use std::sync::Arc; use crossbeam_channel::{Receiver, Sender}; use fun_time::fun_time; use humansize::{format_size, BINARY}; use log::debug; use rayon::prelude::*; -use serde::{Deserialize, Serialize}; -use crate::common::{check_folder_children, check_if_stop_received, prepare_thread_handler_common, send_info_and_wait_for_ending_all_threads, split_path_compare}; -use crate::common_dir_traversal::{common_read_dir, get_modified_time, CheckingMethod, ProgressData, ToolType}; +use crate::common_dir_traversal::{DirTraversalBuilder, DirTraversalResult, FileEntry, ProgressData, ToolType}; use crate::common_tool::{CommonData, CommonToolData, DeleteMethod}; use crate::common_traits::{DebugPrint, PrintResults}; -#[derive(Clone, Debug, Serialize, Deserialize)] -pub struct FileEntry { - pub path: PathBuf, - pub size: u64, - pub modified_date: u64, -} - #[derive(Copy, Clone, Eq, PartialEq)] pub enum SearchMode { BiggestFiles, @@ -66,134 +52,39 @@ impl BigFile { self.debug_print(); } - #[fun_time(message = "look_for_big_files", level = "debug")] + // #[fun_time(message = "look_for_big_files", level = "debug")] fn look_for_big_files(&mut self, stop_receiver: Option<&Receiver<()>>, progress_sender: Option<&Sender>) -> bool { - let mut old_map: BTreeMap> = Default::default(); + let result = DirTraversalBuilder::new() + .group_by(|_fe| ()) + .stop_receiver(stop_receiver) + .progress_sender(progress_sender) + .common_data(&self.common_data) + .minimal_file_size(1) + .max_stage(0) + .build() + .run(); - let mut folders_to_check: Vec = self.common_data.directories.included_directories.clone(); + match result { + DirTraversalResult::SuccessFiles { grouped_file_entries, warnings } => { + let mut all_files = grouped_file_entries.into_values().flatten().collect::>(); + all_files.par_sort_unstable_by_key(|fe| fe.size); - let (progress_thread_handle, progress_thread_run, atomic_counter, _check_was_stopped) = - prepare_thread_handler_common(progress_sender, 0, 0, 0, CheckingMethod::None, self.common_data.tool_type); + if self.search_mode == SearchMode::BiggestFiles { + all_files.reverse(); + } - debug!("Starting to search for big files"); - while !folders_to_check.is_empty() { - if check_if_stop_received(stop_receiver) { - send_info_and_wait_for_ending_all_threads(&progress_thread_run, progress_thread_handle); - return false; - } + if all_files.len() > self.number_of_files_to_check { + all_files.truncate(self.number_of_files_to_check); + } - let segments: Vec<_> = folders_to_check - .into_par_iter() - .map(|current_folder| { - let mut dir_result = vec![]; - let mut warnings = vec![]; - let mut fe_result = vec![]; + self.big_files = all_files; - let Some(read_dir) = common_read_dir(¤t_folder, &mut warnings) else { - return (dir_result, warnings, fe_result); - }; - - // Check every sub folder/file/link etc. - for entry in read_dir { - let Ok(entry_data) = entry else { - continue; - }; - let Ok(file_type) = entry_data.file_type() else { - continue; - }; - - if file_type.is_dir() { - check_folder_children( - &mut dir_result, - &mut warnings, - &entry_data, - self.common_data.recursive_search, - &self.common_data.directories, - &self.common_data.excluded_items, - ); - } else if file_type.is_file() { - self.collect_file_entry(&atomic_counter, &entry_data, &mut fe_result, &mut warnings); - } - } - (dir_result, warnings, fe_result) - }) - .collect(); - - let required_size = segments.iter().map(|(segment, _, _)| segment.len()).sum::(); - folders_to_check = Vec::with_capacity(required_size); - - // Process collected data - for (segment, warnings, fe_result) in segments { - folders_to_check.extend(segment); self.common_data.text_messages.warnings.extend(warnings); - for (size, fe) in fe_result { - old_map.entry(size).or_default().push(fe); - } + debug!("check_files - Found {} biggest/smallest files.", self.big_files.len()); + true } - } - debug!("Collected {} files", old_map.len()); - - send_info_and_wait_for_ending_all_threads(&progress_thread_run, progress_thread_handle); - - self.extract_n_biggest_files(old_map); - - true - } - - pub fn collect_file_entry(&self, atomic_counter: &Arc, entry_data: &DirEntry, fe_result: &mut Vec<(u64, FileEntry)>, warnings: &mut Vec) { - atomic_counter.fetch_add(1, Ordering::Relaxed); - if !self.common_data.allowed_extensions.check_if_entry_ends_with_extension(entry_data) { - return; - } - - let current_file_name = entry_data.path(); - if self.common_data.excluded_items.is_excluded(¤t_file_name) { - return; - } - - let Ok(metadata) = entry_data.metadata() else { - return; - }; - - if metadata.len() == 0 { - return; - } - - let fe: FileEntry = FileEntry { - modified_date: get_modified_time(&metadata, warnings, ¤t_file_name, false), - path: current_file_name, - size: metadata.len(), - }; - - fe_result.push((fe.size, fe)); - } - - #[fun_time(message = "extract_n_biggest_files", level = "debug")] - pub fn extract_n_biggest_files(&mut self, old_map: BTreeMap>) { - let iter: Box>; - if self.search_mode == SearchMode::SmallestFiles { - iter = Box::new(old_map.into_iter()); - } else { - iter = Box::new(old_map.into_iter().rev()); - } - - for (_size, mut vector) in iter { - if self.information.number_of_real_files < self.number_of_files_to_check { - if vector.len() > 1 { - vector.sort_unstable_by(|a, b| split_path_compare(a.path.as_path(), b.path.as_path())); - } - for file in vector { - if self.information.number_of_real_files < self.number_of_files_to_check { - self.big_files.push(file); - self.information.number_of_real_files += 1; - } else { - break; - } - } - } else { - break; - } + DirTraversalResult::Stopped => false, } } diff --git a/czkawka_core/src/broken_files.rs b/czkawka_core/src/broken_files.rs index bc5fb41..26f46b3 100644 --- a/czkawka_core/src/broken_files.rs +++ b/czkawka_core/src/broken_files.rs @@ -1,10 +1,9 @@ -use std::collections::BTreeMap; -use std::fs::{DirEntry, File}; +use std::collections::{BTreeMap, HashSet}; +use std::fs::File; use std::io::prelude::*; use std::path::{Path, PathBuf}; -use std::sync::atomic::{AtomicUsize, Ordering}; -use std::sync::Arc; +use std::sync::atomic::Ordering; use std::{fs, mem, panic}; use crossbeam_channel::{Receiver, Sender}; @@ -18,23 +17,23 @@ use rayon::prelude::*; use serde::{Deserialize, Serialize}; use crate::common::{ - check_folder_children, check_if_stop_received, create_crash_message, prepare_thread_handler_common, send_info_and_wait_for_ending_all_threads, AUDIO_FILES_EXTENSIONS, + check_if_stop_received, create_crash_message, prepare_thread_handler_common, send_info_and_wait_for_ending_all_threads, AUDIO_FILES_EXTENSIONS, IMAGE_RS_BROKEN_FILES_EXTENSIONS, PDF_FILES_EXTENSIONS, ZIP_FILES_EXTENSIONS, }; use crate::common_cache::{get_broken_files_cache_file, load_cache_from_file_generalized_by_path, save_cache_to_file_generalized}; -use crate::common_dir_traversal::{common_read_dir, get_modified_time, CheckingMethod, ProgressData, ToolType}; +use crate::common_dir_traversal::{CheckingMethod, DirTraversalBuilder, DirTraversalResult, FileEntry, ProgressData, ToolType}; use crate::common_tool::{CommonData, CommonToolData, DeleteMethod}; use crate::common_traits::*; #[derive(Clone, Serialize, Deserialize, Debug)] -pub struct FileEntry { +pub struct BrokenEntry { pub path: PathBuf, pub modified_date: u64, pub size: u64, pub type_of_file: TypeOfFile, pub error_string: String, } -impl ResultEntry for FileEntry { +impl ResultEntry for BrokenEntry { fn get_path(&self) -> &Path { &self.path } @@ -46,6 +45,19 @@ impl ResultEntry for FileEntry { } } +impl FileEntry { + fn into_broken_entry(self) -> BrokenEntry { + BrokenEntry { + size: self.size, + path: self.path, + modified_date: self.modified_date, + + type_of_file: TypeOfFile::Unknown, + error_string: String::new(), + } + } +} + #[derive(Copy, Clone, PartialEq, Eq, Serialize, Deserialize, Debug)] pub enum TypeOfFile { Unknown = -1, @@ -55,6 +67,8 @@ pub enum TypeOfFile { PDF, } +const MAX_BROKEN_FILES_STAGE: u8 = 1; + bitflags! { #[derive(PartialEq, Copy, Clone)] pub struct CheckedTypes : u32 { @@ -75,8 +89,8 @@ pub struct Info { pub struct BrokenFiles { common_data: CommonToolData, information: Info, - files_to_check: BTreeMap, - broken_files: Vec, + files_to_check: BTreeMap, + broken_files: Vec, checked_types: CheckedTypes, } @@ -108,111 +122,62 @@ impl BrokenFiles { #[fun_time(message = "check_files", level = "debug")] fn check_files(&mut self, stop_receiver: Option<&Receiver<()>>, progress_sender: Option<&Sender>) -> bool { - let mut folders_to_check: Vec = self.common_data.directories.included_directories.clone(); + let zip_extensions = ZIP_FILES_EXTENSIONS.iter().collect::>(); + let audio_extensions = AUDIO_FILES_EXTENSIONS.iter().collect::>(); + let pdf_extensions = PDF_FILES_EXTENSIONS.iter().collect::>(); + let images_extensions = IMAGE_RS_BROKEN_FILES_EXTENSIONS.iter().collect::>(); - let (progress_thread_handle, progress_thread_run, atomic_counter, _check_was_stopped) = - prepare_thread_handler_common(progress_sender, 0, 1, 0, CheckingMethod::None, self.common_data.tool_type); - - debug!("check_files - starting to collect files"); - while !folders_to_check.is_empty() { - if check_if_stop_received(stop_receiver) { - send_info_and_wait_for_ending_all_threads(&progress_thread_run, progress_thread_handle); - return false; + let mut extensions = Vec::new(); + let vec_extensions = [ + (CheckedTypes::PDF, PDF_FILES_EXTENSIONS), + (CheckedTypes::AUDIO, AUDIO_FILES_EXTENSIONS), + (CheckedTypes::ARCHIVE, ZIP_FILES_EXTENSIONS), + (CheckedTypes::IMAGE, IMAGE_RS_BROKEN_FILES_EXTENSIONS), + ]; + for (checked_type, extensions_to_add) in &vec_extensions { + if self.checked_types.contains(*checked_type) { + extensions.extend_from_slice(extensions_to_add); } + } - let segments: Vec<_> = folders_to_check - .into_par_iter() - .map(|current_folder| { - let mut dir_result = vec![]; - let mut warnings = vec![]; - let mut fe_result = vec![]; + self.common_data.allowed_extensions.set_and_validate_extensions(&extensions); + if !self.common_data.allowed_extensions.set_any_extensions() { + return true; + } - let Some(read_dir) = common_read_dir(¤t_folder, &mut warnings) else { - return (dir_result, warnings, fe_result); - }; + let result = DirTraversalBuilder::new() + .group_by(|_fe| ()) + .stop_receiver(stop_receiver) + .progress_sender(progress_sender) + .common_data(&self.common_data) + .max_stage(MAX_BROKEN_FILES_STAGE) + .build() + .run(); - // Check every sub folder/file/link etc. - for entry in read_dir { - let Ok(entry_data) = entry else { - continue; - }; - let Ok(file_type) = entry_data.file_type() else { - continue; - }; - - if file_type.is_dir() { - check_folder_children( - &mut dir_result, - &mut warnings, - &entry_data, - self.common_data.recursive_search, - &self.common_data.directories, - &self.common_data.excluded_items, - ); - } else if file_type.is_file() { - if let Some(file_entry) = self.get_file_entry(&atomic_counter, &entry_data, &mut warnings) { - fe_result.push((file_entry.path.to_string_lossy().to_string(), file_entry)); - } - } - } - (dir_result, warnings, fe_result) - }) - .collect(); - debug!("check_files - collected files"); - - let required_size = segments.iter().map(|(segment, _, _)| segment.len()).sum::(); - folders_to_check = Vec::with_capacity(required_size); - - // Process collected data - for (segment, warnings, fe_result) in segments { - folders_to_check.extend(segment); + match result { + DirTraversalResult::SuccessFiles { grouped_file_entries, warnings } => { + self.broken_files = grouped_file_entries + .into_values() + .flatten() + .map(|fe| { + let mut broken_entry = fe.into_broken_entry(); + broken_entry.type_of_file = check_extension_availability(broken_entry.get_path(), &images_extensions, &zip_extensions, &audio_extensions, &pdf_extensions); + broken_entry + }) + .collect(); self.common_data.text_messages.warnings.extend(warnings); - for (name, fe) in fe_result { - self.files_to_check.insert(name, fe); - } + debug!("check_files - Found {} image files.", self.broken_files.len()); + true } - } - send_info_and_wait_for_ending_all_threads(&progress_thread_run, progress_thread_handle); - true + DirTraversalResult::Stopped => false, + } } - fn get_file_entry(&self, atomic_counter: &Arc, entry_data: &DirEntry, warnings: &mut Vec) -> Option { - atomic_counter.fetch_add(1, Ordering::Relaxed); - if !self.common_data.allowed_extensions.check_if_entry_ends_with_extension(entry_data) { - return None; - } - - let file_name_lowercase = entry_data.file_name().to_string_lossy().to_lowercase(); - let type_of_file = check_extension_availability(&file_name_lowercase); - - if !check_if_file_extension_is_allowed(&type_of_file, &self.checked_types) { - return None; - } - - let current_file_name = entry_data.path(); - if self.common_data.excluded_items.is_excluded(¤t_file_name) { - return None; - } - - let Ok(metadata) = entry_data.metadata() else { - return None; - }; - - let fe: FileEntry = FileEntry { - modified_date: get_modified_time(&metadata, warnings, ¤t_file_name, false), - path: current_file_name, - size: metadata.len(), - type_of_file, - error_string: String::new(), - }; - Some(fe) - } - - fn check_broken_image(&self, mut file_entry: FileEntry) -> Option { + fn check_broken_image(&self, mut file_entry: BrokenEntry) -> Option { let mut file_entry_clone = file_entry.clone(); - let result = panic::catch_unwind(|| { + panic::catch_unwind(|| { if let Err(e) = image::open(&file_entry.path) { let error_string = e.to_string(); // This error is a problem with image library, remove check when https://github.com/image-rs/jpeg-decoder/issues/130 will be fixed @@ -222,19 +187,15 @@ impl BrokenFiles { file_entry.error_string = error_string; } Some(file_entry) - }); - - // If image crashed during opening, needs to be printed info about crashes thing - if let Ok(image_result) = result { - image_result - } else { + }) + .unwrap_or_else(|_| { let message = create_crash_message("Image-rs", &file_entry_clone.path.to_string_lossy(), "https://github.com/Serial-ATA/lofty-rs"); println!("{message}"); file_entry_clone.error_string = message; Some(file_entry_clone) - } + }) } - fn check_broken_zip(&self, mut file_entry: FileEntry) -> Option { + fn check_broken_zip(&self, mut file_entry: BrokenEntry) -> Option { match File::open(&file_entry.path) { Ok(file) => { if let Err(e) = zip::ZipArchive::new(file) { @@ -245,35 +206,32 @@ impl BrokenFiles { Err(_inspected) => None, } } - fn check_broken_audio(&self, mut file_entry: FileEntry) -> Option { + fn check_broken_audio(&self, mut file_entry: BrokenEntry) -> Option { match File::open(&file_entry.path) { Ok(file) => { let mut file_entry_clone = file_entry.clone(); - let result = panic::catch_unwind(|| { + panic::catch_unwind(|| { if let Err(e) = audio_checker::parse_audio_file(file) { file_entry.error_string = e.to_string(); } Some(file_entry) - }); - - if let Ok(audio_result) = result { - audio_result - } else { + }) + .unwrap_or_else(|_| { let message = create_crash_message("Symphonia", &file_entry_clone.path.to_string_lossy(), "https://github.com/pdeljanov/Symphonia"); println!("{message}"); file_entry_clone.error_string = message; Some(file_entry_clone) - } + }) } Err(_inspected) => None, } } - fn check_broken_pdf(&self, mut file_entry: FileEntry) -> Option { + fn check_broken_pdf(&self, mut file_entry: BrokenEntry) -> Option { let parser_options = ParseOptions::tolerant(); // Only show as broken files with really big bugs let mut file_entry_clone = file_entry.clone(); - let result = panic::catch_unwind(|| { + panic::catch_unwind(|| { match FileOptions::cached().parse_options(parser_options).open(&file_entry.path) { Ok(file) => { for idx in 0..file.num_pages() { @@ -297,27 +255,26 @@ impl BrokenFiles { } } Some(file_entry) - }); - if let Ok(pdf_result) = result { - pdf_result - } else { + }) + .unwrap_or_else(|_| { let message = create_crash_message("PDF-rs", &file_entry_clone.path.to_string_lossy(), "https://github.com/pdf-rs/pdf"); println!("{message}"); file_entry_clone.error_string = message; Some(file_entry_clone) - } + }) } #[fun_time(message = "load_cache", level = "debug")] - fn load_cache(&mut self) -> (BTreeMap, BTreeMap, BTreeMap) { + fn load_cache(&mut self) -> (BTreeMap, BTreeMap, BTreeMap) { let loaded_hash_map; - let mut records_already_cached: BTreeMap = Default::default(); - let mut non_cached_files_to_check: BTreeMap = Default::default(); + let mut records_already_cached: BTreeMap = Default::default(); + let mut non_cached_files_to_check: BTreeMap = Default::default(); let files_to_check = mem::take(&mut self.files_to_check); if self.common_data.use_cache { - let (messages, loaded_items) = load_cache_from_file_generalized_by_path::(&get_broken_files_cache_file(), self.get_delete_outdated_cache(), &files_to_check); + let (messages, loaded_items) = + load_cache_from_file_generalized_by_path::(&get_broken_files_cache_file(), self.get_delete_outdated_cache(), &files_to_check); self.get_text_messages_mut().extend_with_another_messages(messages); loaded_hash_map = loaded_items.unwrap_or_default(); @@ -343,7 +300,7 @@ impl BrokenFiles { prepare_thread_handler_common(progress_sender, 1, 1, non_cached_files_to_check.len(), CheckingMethod::None, self.common_data.tool_type); debug!("look_for_broken_files - started finding for broken files"); - let mut vec_file_entry: Vec = non_cached_files_to_check + let mut vec_file_entry: Vec = non_cached_files_to_check .into_par_iter() .map(|(_, file_entry)| { atomic_counter.fetch_add(1, Ordering::Relaxed); @@ -363,7 +320,7 @@ impl BrokenFiles { .while_some() .filter(Option::is_some) .map(Option::unwrap) - .collect::>(); + .collect::>(); debug!("look_for_broken_files - ended finding for broken files"); send_info_and_wait_for_ending_all_threads(&progress_thread_run, progress_thread_handle); @@ -386,10 +343,10 @@ impl BrokenFiles { true } #[fun_time(message = "save_to_cache", level = "debug")] - fn save_to_cache(&mut self, vec_file_entry: &[FileEntry], loaded_hash_map: BTreeMap) { + fn save_to_cache(&mut self, vec_file_entry: &[BrokenEntry], loaded_hash_map: BTreeMap) { if self.common_data.use_cache { // Must save all results to file, old loaded from file with all currently counted results - let mut all_results: BTreeMap = Default::default(); + let mut all_results: BTreeMap = Default::default(); for file_entry in vec_file_entry.iter().cloned() { all_results.insert(file_entry.path.to_string_lossy().to_string(), file_entry); @@ -424,7 +381,7 @@ impl BrokenFiles { } impl BrokenFiles { - pub const fn get_broken_files(&self) -> &Vec { + pub const fn get_broken_files(&self) -> &Vec { &self.broken_files } @@ -478,27 +435,37 @@ impl PrintResults for BrokenFiles { } } -fn check_extension_availability(file_name_lowercase: &str) -> TypeOfFile { - if IMAGE_RS_BROKEN_FILES_EXTENSIONS.iter().any(|e| file_name_lowercase.ends_with(e)) { +fn check_extension_availability( + full_name: &Path, + images_extensions: &HashSet<&&'static str>, + zip_extensions: &HashSet<&&'static str>, + audio_extensions: &HashSet<&&'static str>, + pdf_extensions: &HashSet<&&'static str>, +) -> TypeOfFile { + let Some(extension) = full_name.extension() else { + debug_assert!(false, "Missing extension"); + return TypeOfFile::Unknown; + }; + + let Some(extension_str) = extension.to_str() else { + debug_assert!(false, "Extension not really fully str"); + return TypeOfFile::Unknown; + }; + + if images_extensions.contains(&extension_str) { TypeOfFile::Image - } else if ZIP_FILES_EXTENSIONS.iter().any(|e| file_name_lowercase.ends_with(e)) { + } else if zip_extensions.contains(&extension_str) { TypeOfFile::ArchiveZip - } else if AUDIO_FILES_EXTENSIONS.iter().any(|e| file_name_lowercase.ends_with(e)) { + } else if audio_extensions.contains(&extension_str) { TypeOfFile::Audio - } else if PDF_FILES_EXTENSIONS.iter().any(|e| file_name_lowercase.ends_with(e)) { + } else if pdf_extensions.contains(&extension_str) { TypeOfFile::PDF } else { + debug_assert!(false, "File with unknown extension"); TypeOfFile::Unknown } } -fn check_if_file_extension_is_allowed(type_of_file: &TypeOfFile, checked_types: &CheckedTypes) -> bool { - ((*type_of_file == TypeOfFile::Image) && ((*checked_types & CheckedTypes::IMAGE) == CheckedTypes::IMAGE)) - || ((*type_of_file == TypeOfFile::PDF) && ((*checked_types & CheckedTypes::PDF) == CheckedTypes::PDF)) - || ((*type_of_file == TypeOfFile::ArchiveZip) && ((*checked_types & CheckedTypes::ARCHIVE) == CheckedTypes::ARCHIVE)) - || ((*type_of_file == TypeOfFile::Audio) && ((*checked_types & CheckedTypes::AUDIO) == CheckedTypes::AUDIO)) -} - fn unpack_pdf_error(e: PdfError) -> PdfError { if let Try { file: _, @@ -514,7 +481,7 @@ fn unpack_pdf_error(e: PdfError) -> PdfError { } } -fn validate_pdf_error(file_entry: &mut FileEntry, e: PdfError) -> PdfError { +fn validate_pdf_error(file_entry: &mut BrokenEntry, e: PdfError) -> PdfError { let mut error_string = e.to_string(); // Workaround for strange error message https://github.com/qarmin/czkawka/issues/898 if error_string.starts_with("Try at") { diff --git a/czkawka_core/src/common.rs b/czkawka_core/src/common.rs index 76958f5..8c2f961 100644 --- a/czkawka_core/src/common.rs +++ b/czkawka_core/src/common.rs @@ -123,30 +123,27 @@ pub fn set_number_of_threads(thread_number: usize) { } pub const RAW_IMAGE_EXTENSIONS: &[&str] = &[ - ".mrw", ".arw", ".srf", ".sr2", ".mef", ".orf", ".srw", ".erf", ".kdc", ".kdc", ".dcs", ".rw2", ".raf", ".dcr", ".dng", ".pef", ".crw", ".iiq", ".3fr", ".nrw", ".nef", ".mos", - ".cr2", ".ari", -]; -pub const IMAGE_RS_EXTENSIONS: &[&str] = &[ - ".jpg", ".jpeg", ".png", ".bmp", ".tiff", ".tif", ".tga", ".ff", ".jif", ".jfi", ".webp", ".gif", ".ico", ".exr", ".qoi", + "mrw", "arw", "srf", "sr2", "mef", "orf", "srw", "erf", "kdc", "kdc", "dcs", "rw2", "raf", "dcr", "dng", "pef", "crw", "iiq", "3fr", "nrw", "nef", "mos", "cr2", "ari", ]; +pub const IMAGE_RS_EXTENSIONS: &[&str] = &["jpg", "jpeg", "png", "bmp", "tiff", "tif", "tga", "ff", "jif", "jfi", "webp", "gif", "ico", "exr", "qoi"]; -pub const IMAGE_RS_SIMILAR_IMAGES_EXTENSIONS: &[&str] = &[".jpg", ".jpeg", ".png", ".tiff", ".tif", ".tga", ".ff", ".jif", ".jfi", ".bmp", ".webp", ".exr", ".qoi"]; +pub const IMAGE_RS_SIMILAR_IMAGES_EXTENSIONS: &[&str] = &["jpg", "jpeg", "png", "tiff", "tif", "tga", "ff", "jif", "jfi", "bmp", "webp", "exr", "qoi"]; pub const IMAGE_RS_BROKEN_FILES_EXTENSIONS: &[&str] = &[ - ".jpg", ".jpeg", ".png", ".tiff", ".tif", ".tga", ".ff", ".jif", ".jfi", ".gif", ".bmp", ".ico", ".jfif", ".jpe", ".pnz", ".dib", ".webp", ".exr", + "jpg", "jpeg", "png", "tiff", "tif", "tga", "ff", "jif", "jfi", "gif", "bmp", "ico", "jfif", "jpe", "pnz", "dib", "webp", "exr", ]; -pub const HEIC_EXTENSIONS: &[&str] = &[".heif", ".heifs", ".heic", ".heics", ".avci", ".avcs", ".avifs"]; +pub const HEIC_EXTENSIONS: &[&str] = &["heif", "heifs", "heic", "heics", "avci", "avcs", "avifs"]; -pub const ZIP_FILES_EXTENSIONS: &[&str] = &[".zip", ".jar"]; +pub const ZIP_FILES_EXTENSIONS: &[&str] = &["zip", "jar"]; -pub const PDF_FILES_EXTENSIONS: &[&str] = &[".pdf"]; +pub const PDF_FILES_EXTENSIONS: &[&str] = &["pdf"]; pub const AUDIO_FILES_EXTENSIONS: &[&str] = &[ - ".mp3", ".flac", ".wav", ".ogg", ".m4a", ".aac", ".aiff", ".pcm", ".aif", ".aiff", ".aifc", ".m3a", ".mp2", ".mp4a", ".mp2a", ".mpga", ".wave", ".weba", ".wma", ".oga", + "mp3", "flac", "wav", "ogg", "m4a", "aac", "aiff", "pcm", "aif", "aiff", "aifc", "m3a", "mp2", "mp4a", "mp2a", "mpga", "wave", "weba", "wma", "oga", ]; pub const VIDEO_FILES_EXTENSIONS: &[&str] = &[ - ".mp4", ".mpv", ".flv", ".mp4a", ".webm", ".mpg", ".mp2", ".mpeg", ".m4p", ".m4v", ".avi", ".wmv", ".qt", ".mov", ".swf", ".mkv", + "mp4", "mpv", "flv", "mp4a", "webm", "mpg", "mp2", "mpeg", "m4p", "m4v", "avi", "wmv", "qt", "mov", "swf", "mkv", ]; pub const LOOP_DURATION: u32 = 20; //ms diff --git a/czkawka_core/src/common_cache.rs b/czkawka_core/src/common_cache.rs index ac29129..5260c24 100644 --- a/czkawka_core/src/common_cache.rs +++ b/czkawka_core/src/common_cache.rs @@ -203,11 +203,9 @@ where vec_loaded_entries = vec_loaded_entries .into_par_iter() .filter(|file_entry| { - if delete_outdated_cache && !file_entry.get_path().exists() { - return false; - } + let path = file_entry.get_path(); - let file_entry_path_str = file_entry.get_path().to_string_lossy().to_string(); + let file_entry_path_str = path.to_string_lossy().to_string(); if let Some(used_file) = used_files.get(&file_entry_path_str) { if file_entry.get_size() != used_file.get_size() { return false; @@ -217,6 +215,10 @@ where } } + if delete_outdated_cache && !path.exists() { + return false; + } + true }) .collect(); diff --git a/czkawka_core/src/common_dir_traversal.rs b/czkawka_core/src/common_dir_traversal.rs index 63df001..fe2414d 100644 --- a/czkawka_core/src/common_dir_traversal.rs +++ b/czkawka_core/src/common_dir_traversal.rs @@ -15,6 +15,7 @@ use crate::common::{check_if_stop_received, prepare_thread_handler_common, send_ use crate::common_directory::Directories; use crate::common_extensions::Extensions; use crate::common_items::ExcludedItems; +use crate::common_tool::CommonToolData; use crate::common_traits::ResultEntry; use crate::flc; use crate::localizer_core::generate_translation_hashmap; @@ -63,8 +64,6 @@ pub struct FileEntry { pub path: PathBuf, pub size: u64, pub modified_date: u64, - pub hash: String, - pub symlink_info: Option, } impl ResultEntry for FileEntry { @@ -81,14 +80,6 @@ impl ResultEntry for FileEntry { // Symlinks -const MAX_NUMBER_OF_SYMLINK_JUMPS: i32 = 20; - -#[derive(Clone, Debug, PartialEq, Eq, Deserialize, Serialize)] -pub struct SymlinkInfo { - pub destination_path: PathBuf, - pub type_of_error: ErrorType, -} - #[derive(Clone, Debug, PartialEq, Eq, Copy, Deserialize, Serialize)] pub enum ErrorType { InfiniteRecursion, @@ -176,6 +167,18 @@ impl<'a, 'b, F> DirTraversalBuilder<'a, 'b, F> { self } + pub fn common_data(mut self, common_tool_data: &CommonToolData) -> Self { + self.root_dirs = common_tool_data.directories.included_directories.clone(); + self.allowed_extensions = Some(common_tool_data.allowed_extensions.clone()); + self.excluded_items = Some(common_tool_data.excluded_items.clone()); + self.recursive_search = common_tool_data.recursive_search; + self.minimal_file_size = Some(common_tool_data.minimal_file_size); + self.maximal_file_size = Some(common_tool_data.maximal_file_size); + self.tool_type = common_tool_data.tool_type; + self.directories = Some(common_tool_data.directories.clone()); + self + } + pub fn stop_receiver(mut self, stop_receiver: Option<&'a Receiver<()>>) -> Self { self.stop_receiver = stop_receiver; self @@ -464,8 +467,6 @@ fn process_file_in_file_mode( size: metadata.len(), modified_date: get_modified_time(&metadata, warnings, ¤t_file_name, false), path: current_file_name, - hash: String::new(), - symlink_info: None, }; fe_result.push(fe); @@ -535,51 +536,13 @@ fn process_symlink_in_symlink_mode( return; }; - let mut destination_path = PathBuf::new(); - let type_of_error; - - match current_file_name.read_link() { - Ok(t) => { - destination_path.push(t); - let mut number_of_loop = 0; - let mut current_path = current_file_name.clone(); - loop { - if number_of_loop == 0 && !current_path.exists() { - type_of_error = ErrorType::NonExistentFile; - break; - } - if number_of_loop == MAX_NUMBER_OF_SYMLINK_JUMPS { - type_of_error = ErrorType::InfiniteRecursion; - break; - } - - current_path = match current_path.read_link() { - Ok(t) => t, - Err(_inspected) => { - // Looks that some next symlinks are broken, but we do nothing with it - TODO why they are broken - return; - } - }; - - number_of_loop += 1; - } - } - Err(_inspected) => { - // Failed to load info about it - type_of_error = ErrorType::NonExistentFile; - } - } - // Creating new file entry let fe: FileEntry = FileEntry { + size: metadata.len(), modified_date: get_modified_time(&metadata, warnings, ¤t_file_name, false), path: current_file_name, - size: 0, - hash: String::new(), - symlink_info: Some(SymlinkInfo { destination_path, type_of_error }), }; - // Adding files to Vector fe_result.push(fe); } @@ -671,18 +634,3 @@ pub fn get_modified_time(metadata: &Metadata, warnings: &mut Vec, curren } } } - -pub fn get_lowercase_name(entry_data: &DirEntry, warnings: &mut Vec) -> Option { - let name = match entry_data.file_name().into_string() { - Ok(t) => t, - Err(_inspected) => { - warnings.push(flc!( - "core_file_not_utf8_name", - generate_translation_hashmap(vec![("name", entry_data.path().to_string_lossy().to_string())]) - )); - return None; - } - } - .to_lowercase(); - Some(name) -} diff --git a/czkawka_core/src/common_directory.rs b/czkawka_core/src/common_directory.rs index b5e2a3e..f1a78b4 100644 --- a/czkawka_core/src/common_directory.rs +++ b/czkawka_core/src/common_directory.rs @@ -122,7 +122,6 @@ impl Directories { } // Try to canonicalize them - if cfg!(windows) { // Only canonicalize if it's not a network path // This can be check by checking if path starts with \\?\UNC\ diff --git a/czkawka_core/src/common_extensions.rs b/czkawka_core/src/common_extensions.rs index 0c4d9d9..d210fd3 100644 --- a/czkawka_core/src/common_extensions.rs +++ b/czkawka_core/src/common_extensions.rs @@ -79,14 +79,32 @@ impl Extensions { } } - pub fn using_custom_extensions(&self) -> bool { + pub fn set_any_extensions(&self) -> bool { !self.file_extensions_hashset.is_empty() } - pub fn extend_allowed_extensions(&mut self, file_extensions: &[&str]) { + fn extend_allowed_extensions(&mut self, file_extensions: &[&str]) { for extension in file_extensions { let extension_without_dot = extension.trim_start_matches('.'); self.file_extensions_hashset.insert(extension_without_dot.to_string()); } } + + // E.g. when using similar videos, user can provide extensions like "mp4,flv", but if user provide "mp4,jpg" then + // it will be only "mp4" because "jpg" is not valid extension for videos + fn union_allowed_extensions(&mut self, file_extensions: &[&str]) { + let mut new_extensions = HashSet::new(); + for extension in file_extensions { + let extension_without_dot = extension.trim_start_matches('.'); + new_extensions.insert(extension_without_dot.to_string()); + } + } + + pub fn set_and_validate_extensions(&mut self, file_extensions: &[&str]) { + if self.file_extensions_hashset.is_empty() { + self.extend_allowed_extensions(file_extensions); + } else { + self.union_allowed_extensions(file_extensions); + } + } } diff --git a/czkawka_core/src/duplicate.rs b/czkawka_core/src/duplicate.rs index 96a3e7e..67cc9e4 100644 --- a/czkawka_core/src/duplicate.rs +++ b/czkawka_core/src/duplicate.rs @@ -6,7 +6,7 @@ use std::io::prelude::*; use std::io::{self, Error, ErrorKind}; #[cfg(target_family = "unix")] use std::os::unix::fs::MetadataExt; -use std::path::Path; +use std::path::{Path, PathBuf}; use std::sync::atomic::Ordering; use std::{fs, mem}; @@ -15,6 +15,7 @@ use fun_time::fun_time; use humansize::{format_size, BINARY}; use log::debug; use rayon::prelude::*; +use serde::{Deserialize, Serialize}; use xxhash_rust::xxh3::Xxh3; use crate::common::{check_if_stop_received, delete_files_custom, prepare_thread_handler_common, send_info_and_wait_for_ending_all_threads}; @@ -44,6 +45,36 @@ impl HashType { } } +#[derive(Clone, Serialize, Deserialize, Debug, Default)] +pub struct DuplicateEntry { + pub path: PathBuf, + pub modified_date: u64, + pub size: u64, + pub hash: String, +} +impl ResultEntry for DuplicateEntry { + fn get_path(&self) -> &Path { + &self.path + } + fn get_modified_date(&self) -> u64 { + self.modified_date + } + fn get_size(&self) -> u64 { + self.size + } +} + +impl FileEntry { + fn into_duplicate_entry(self) -> DuplicateEntry { + DuplicateEntry { + size: self.size, + path: self.path, + modified_date: self.modified_date, + hash: String::new(), + } + } +} + #[derive(Default)] pub struct Info { pub number_of_groups_by_size: usize, @@ -62,21 +93,21 @@ pub struct DuplicateFinder { common_data: CommonToolData, information: Info, // File Size, File Entry - files_with_identical_names: BTreeMap>, + files_with_identical_names: BTreeMap>, // File (Size, Name), File Entry - files_with_identical_size_names: BTreeMap<(u64, String), Vec>, + files_with_identical_size_names: BTreeMap<(u64, String), Vec>, // File Size, File Entry - files_with_identical_size: BTreeMap>, + files_with_identical_size: BTreeMap>, // File Size, next grouped by file size, next grouped by hash - files_with_identical_hashes: BTreeMap>>, + files_with_identical_hashes: BTreeMap>>, // File Size, File Entry - files_with_identical_names_referenced: BTreeMap)>, + files_with_identical_names_referenced: BTreeMap)>, // File (Size, Name), File Entry - files_with_identical_size_names_referenced: BTreeMap<(u64, String), (FileEntry, Vec)>, + files_with_identical_size_names_referenced: BTreeMap<(u64, String), (DuplicateEntry, Vec)>, // File Size, File Entry - files_with_identical_size_referenced: BTreeMap)>, + files_with_identical_size_referenced: BTreeMap)>, // File Size, next grouped by file size, next grouped by hash - files_with_identical_hashes_referenced: BTreeMap)>>, + files_with_identical_hashes_referenced: BTreeMap)>>, check_method: CheckingMethod, hash_type: HashType, ignore_hard_links: bool, @@ -158,18 +189,11 @@ impl DuplicateFinder { }; let result = DirTraversalBuilder::new() - .root_dirs(self.common_data.directories.included_directories.clone()) + .common_data(&self.common_data) .group_by(group_by_func) .stop_receiver(stop_receiver) .progress_sender(progress_sender) .checking_method(CheckingMethod::Name) - .directories(self.common_data.directories.clone()) - .allowed_extensions(self.common_data.allowed_extensions.clone()) - .excluded_items(self.common_data.excluded_items.clone()) - .recursive_search(self.common_data.recursive_search) - .minimal_file_size(self.common_data.minimal_file_size) - .maximal_file_size(self.common_data.maximal_file_size) - .tool_type(self.common_data.tool_type) .build() .run(); @@ -178,7 +202,16 @@ impl DuplicateFinder { self.common_data.text_messages.warnings.extend(warnings); // Create new BTreeMap without single size entries(files have not duplicates) - self.files_with_identical_names = grouped_file_entries.into_iter().filter(|(_name, vector)| vector.len() > 1).collect(); + self.files_with_identical_names = grouped_file_entries + .into_iter() + .filter_map(|(name, vector)| { + if vector.len() > 1 { + Some((name, vector.into_iter().map(FileEntry::into_duplicate_entry).collect())) + } else { + None + } + }) + .collect(); // Reference - only use in size, because later hash will be counted differently if self.common_data.use_reference_folders { @@ -195,7 +228,7 @@ impl DuplicateFinder { Some((files_from_referenced_folders.pop().unwrap(), normal_files)) } }) - .collect::)>>(); + .collect::)>>(); for (fe, vec_fe) in vec { self.files_with_identical_names_referenced.insert(fe.path.to_string_lossy().to_string(), (fe, vec_fe)); } @@ -231,18 +264,11 @@ impl DuplicateFinder { }; let result = DirTraversalBuilder::new() - .root_dirs(self.common_data.directories.included_directories.clone()) + .common_data(&self.common_data) .group_by(group_by_func) .stop_receiver(stop_receiver) .progress_sender(progress_sender) .checking_method(CheckingMethod::Name) - .directories(self.common_data.directories.clone()) - .allowed_extensions(self.common_data.allowed_extensions.clone()) - .excluded_items(self.common_data.excluded_items.clone()) - .recursive_search(self.common_data.recursive_search) - .minimal_file_size(self.common_data.minimal_file_size) - .maximal_file_size(self.common_data.maximal_file_size) - .tool_type(self.common_data.tool_type) .build() .run(); @@ -250,7 +276,16 @@ impl DuplicateFinder { DirTraversalResult::SuccessFiles { grouped_file_entries, warnings } => { self.common_data.text_messages.warnings.extend(warnings); - self.files_with_identical_size_names = grouped_file_entries.into_iter().filter(|(_name, vector)| vector.len() > 1).collect(); + self.files_with_identical_size_names = grouped_file_entries + .into_iter() + .filter_map(|(size_name, vector)| { + if vector.len() > 1 { + Some((size_name, vector.into_iter().map(FileEntry::into_duplicate_entry).collect())) + } else { + None + } + }) + .collect(); // Reference - only use in size, because later hash will be counted differently if self.common_data.use_reference_folders { @@ -267,7 +302,7 @@ impl DuplicateFinder { Some((files_from_referenced_folders.pop().unwrap(), normal_files)) } }) - .collect::)>>(); + .collect::)>>(); for (fe, vec_fe) in vec { self.files_with_identical_size_names_referenced .insert((fe.size, fe.path.to_string_lossy().to_string()), (fe, vec_fe)); @@ -306,19 +341,12 @@ impl DuplicateFinder { _ => panic!(), }; let result = DirTraversalBuilder::new() - .root_dirs(self.common_data.directories.included_directories.clone()) + .common_data(&self.common_data) .group_by(|fe| fe.size) .stop_receiver(stop_receiver) .progress_sender(progress_sender) .checking_method(self.check_method) .max_stage(max_stage) - .directories(self.common_data.directories.clone()) - .allowed_extensions(self.common_data.allowed_extensions.clone()) - .excluded_items(self.common_data.excluded_items.clone()) - .recursive_search(self.common_data.recursive_search) - .minimal_file_size(self.common_data.minimal_file_size) - .maximal_file_size(self.common_data.maximal_file_size) - .tool_type(self.common_data.tool_type) .build() .run(); @@ -334,7 +362,8 @@ impl DuplicateFinder { let vector = if self.ignore_hard_links { filter_hard_links(&vec) } else { vec }; if vector.len() > 1 { - self.files_with_identical_size.insert(size, vector); + self.files_with_identical_size + .insert(size, vector.into_iter().map(FileEntry::into_duplicate_entry).collect()); } } @@ -388,7 +417,7 @@ impl DuplicateFinder { Some((files_from_referenced_folders.pop().unwrap(), normal_files)) } }) - .collect::)>>(); + .collect::)>>(); for (fe, vec_fe) in vec { self.files_with_identical_size_referenced.insert(fe.size, (fe, vec_fe)); } @@ -396,17 +425,17 @@ impl DuplicateFinder { } #[fun_time(message = "prehash_load_cache_at_start", level = "debug")] - fn prehash_load_cache_at_start(&mut self) -> (BTreeMap>, BTreeMap>, BTreeMap>) { + fn prehash_load_cache_at_start(&mut self) -> (BTreeMap>, BTreeMap>, BTreeMap>) { // Cache algorithm // - Load data from cache - // - Convert from BT> to BT + // - Convert from BT> to BT // - Save to proper values let loaded_hash_map; - let mut records_already_cached: BTreeMap> = Default::default(); - let mut non_cached_files_to_check: BTreeMap> = Default::default(); + let mut records_already_cached: BTreeMap> = Default::default(); + let mut non_cached_files_to_check: BTreeMap> = Default::default(); if self.use_prehash_cache { - let (messages, loaded_items) = load_cache_from_file_generalized_by_size::( + let (messages, loaded_items) = load_cache_from_file_generalized_by_size::( &get_duplicate_cache_file(&self.hash_type, true), self.get_delete_outdated_cache(), &self.files_with_identical_size, @@ -418,7 +447,7 @@ impl DuplicateFinder { for (size, mut vec_file_entry) in mem::take(&mut self.files_with_identical_size) { if let Some(cached_vec_file_entry) = loaded_hash_map.get(&size) { // TODO maybe hashmap is not needed when using < 4 elements - let mut cached_path_entries: HashMap<&Path, FileEntry> = HashMap::new(); + let mut cached_path_entries: HashMap<&Path, DuplicateEntry> = HashMap::new(); for file_entry in cached_vec_file_entry { cached_path_entries.insert(&file_entry.path, file_entry.clone()); } @@ -449,10 +478,14 @@ impl DuplicateFinder { } #[fun_time(message = "prehash_save_cache_at_exit", level = "debug")] - fn prehash_save_cache_at_exit(&mut self, loaded_hash_map: BTreeMap>, pre_hash_results: &Vec<(u64, BTreeMap>, Vec)>) { + fn prehash_save_cache_at_exit( + &mut self, + loaded_hash_map: BTreeMap>, + pre_hash_results: &Vec<(u64, BTreeMap>, Vec)>, + ) { if self.use_prehash_cache { // All results = records already cached + computed results - let mut save_cache_to_hashmap: BTreeMap = Default::default(); + let mut save_cache_to_hashmap: BTreeMap = Default::default(); for (size, vec_file_entry) in loaded_hash_map { if size >= self.minimal_prehash_cache_file_size { @@ -487,7 +520,7 @@ impl DuplicateFinder { &mut self, stop_receiver: Option<&Receiver<()>>, progress_sender: Option<&Sender>, - pre_checked_map: &mut BTreeMap>, + pre_checked_map: &mut BTreeMap>, ) -> Option<()> { let check_type = self.hash_type; let (progress_thread_handle, progress_thread_run, _atomic_counter, _check_was_stopped) = @@ -510,10 +543,10 @@ impl DuplicateFinder { debug!("Starting calculating prehash"); #[allow(clippy::type_complexity)] - let pre_hash_results: Vec<(u64, BTreeMap>, Vec)> = non_cached_files_to_check + let pre_hash_results: Vec<(u64, BTreeMap>, Vec)> = non_cached_files_to_check .into_par_iter() .map(|(size, vec_file_entry)| { - let mut hashmap_with_hash: BTreeMap> = Default::default(); + let mut hashmap_with_hash: BTreeMap> = Default::default(); let mut errors: Vec = Vec::new(); let mut buffer = [0u8; 1024 * 32]; @@ -526,7 +559,7 @@ impl DuplicateFinder { match hash_calculation(&mut buffer, &file_entry, &check_type, 0) { Ok(hash_string) => { file_entry.hash = hash_string.clone(); - hashmap_with_hash.entry(hash_string.clone()).or_default().push(file_entry); + hashmap_with_hash.entry(hash_string).or_default().push(file_entry); } Err(s) => errors.push(s), } @@ -573,16 +606,16 @@ impl DuplicateFinder { #[fun_time(message = "full_hashing_load_cache_at_start", level = "debug")] fn full_hashing_load_cache_at_start( &mut self, - mut pre_checked_map: BTreeMap>, - ) -> (BTreeMap>, BTreeMap>, BTreeMap>) { + mut pre_checked_map: BTreeMap>, + ) -> (BTreeMap>, BTreeMap>, BTreeMap>) { let loaded_hash_map; - let mut records_already_cached: BTreeMap> = Default::default(); - let mut non_cached_files_to_check: BTreeMap> = Default::default(); + let mut records_already_cached: BTreeMap> = Default::default(); + let mut non_cached_files_to_check: BTreeMap> = Default::default(); if self.common_data.use_cache { debug!("full_hashing_load_cache_at_start - using cache"); let (messages, loaded_items) = - load_cache_from_file_generalized_by_size::(&get_duplicate_cache_file(&self.hash_type, false), self.get_delete_outdated_cache(), &pre_checked_map); + load_cache_from_file_generalized_by_size::(&get_duplicate_cache_file(&self.hash_type, false), self.get_delete_outdated_cache(), &pre_checked_map); self.get_text_messages_mut().extend_with_another_messages(messages); loaded_hash_map = loaded_items.unwrap_or_default(); @@ -590,7 +623,7 @@ impl DuplicateFinder { for (size, mut vec_file_entry) in pre_checked_map { if let Some(cached_vec_file_entry) = loaded_hash_map.get(&size) { // TODO maybe hashmap is not needed when using < 4 elements - let mut cached_path_entries: HashMap<&Path, FileEntry> = HashMap::new(); + let mut cached_path_entries: HashMap<&Path, DuplicateEntry> = HashMap::new(); for file_entry in cached_vec_file_entry { cached_path_entries.insert(&file_entry.path, file_entry.clone()); } @@ -624,9 +657,9 @@ impl DuplicateFinder { #[fun_time(message = "full_hashing_save_cache_at_exit", level = "debug")] fn full_hashing_save_cache_at_exit( &mut self, - records_already_cached: BTreeMap>, - full_hash_results: &mut Vec<(u64, BTreeMap>, Vec)>, - loaded_hash_map: BTreeMap>, + records_already_cached: BTreeMap>, + full_hash_results: &mut Vec<(u64, BTreeMap>, Vec)>, + loaded_hash_map: BTreeMap>, ) { if !self.common_data.use_cache { return; @@ -642,7 +675,7 @@ impl DuplicateFinder { } } // Size doesn't exists add results to files - let mut temp_hashmap: BTreeMap> = Default::default(); + let mut temp_hashmap: BTreeMap> = Default::default(); for file_entry in vec_file_entry { temp_hashmap.entry(file_entry.hash.clone()).or_default().push(file_entry); } @@ -650,7 +683,7 @@ impl DuplicateFinder { } // Must save all results to file, old loaded from file with all currently counted results - let mut all_results: BTreeMap = Default::default(); + let mut all_results: BTreeMap = Default::default(); for (_size, vec_file_entry) in loaded_hash_map { for file_entry in vec_file_entry { all_results.insert(file_entry.path.to_string_lossy().to_string(), file_entry); @@ -674,7 +707,12 @@ impl DuplicateFinder { } #[fun_time(message = "full_hashing", level = "debug")] - fn full_hashing(&mut self, stop_receiver: Option<&Receiver<()>>, progress_sender: Option<&Sender>, pre_checked_map: BTreeMap>) -> Option<()> { + fn full_hashing( + &mut self, + stop_receiver: Option<&Receiver<()>>, + progress_sender: Option<&Sender>, + pre_checked_map: BTreeMap>, + ) -> Option<()> { let (progress_thread_handle, progress_thread_run, _atomic_counter, _check_was_stopped) = prepare_thread_handler_common(progress_sender, 4, MAX_STAGE, 0, self.check_method, self.common_data.tool_type); @@ -696,10 +734,10 @@ impl DuplicateFinder { let check_type = self.hash_type; debug!("Starting full hashing of {} files", non_cached_files_to_check.values().map(Vec::len).sum::()); - let mut full_hash_results: Vec<(u64, BTreeMap>, Vec)> = non_cached_files_to_check + let mut full_hash_results: Vec<(u64, BTreeMap>, Vec)> = non_cached_files_to_check .into_par_iter() .map(|(size, vec_file_entry)| { - let mut hashmap_with_hash: BTreeMap> = Default::default(); + let mut hashmap_with_hash: BTreeMap> = Default::default(); let mut errors: Vec = Vec::new(); let mut buffer = [0u8; 1024 * 16]; @@ -769,7 +807,7 @@ impl DuplicateFinder { Some(all_results_with_same_size) } }) - .collect::)>>>(); + .collect::)>>>(); for vec_of_vec in vec { self.files_with_identical_hashes_referenced.insert(vec_of_vec[0].0.size, vec_of_vec); } @@ -798,7 +836,7 @@ impl DuplicateFinder { fn check_files_hash(&mut self, stop_receiver: Option<&Receiver<()>>, progress_sender: Option<&Sender>) -> bool { assert_eq!(self.check_method, CheckingMethod::Hash); - let mut pre_checked_map: BTreeMap> = Default::default(); + let mut pre_checked_map: BTreeMap> = Default::default(); let ret = self.prehashing(stop_receiver, progress_sender, &mut pre_checked_map); if ret.is_none() { return false; @@ -834,7 +872,7 @@ impl DuplicateFinder { } CheckingMethod::Hash => { for vec_files in self.files_with_identical_hashes.values() { - let vev: Vec<&Vec> = vec_files.iter().collect::>(); + let vev: Vec<&Vec> = vec_files.iter().collect::>(); delete_files_custom(&vev, &self.common_data.delete_method, &mut self.common_data.text_messages, self.common_data.dry_run); } } @@ -864,7 +902,7 @@ impl DuplicateFinder { self.minimal_prehash_cache_file_size = minimal_prehash_cache_file_size; } - pub const fn get_files_sorted_by_names(&self) -> &BTreeMap> { + pub const fn get_files_sorted_by_names(&self) -> &BTreeMap> { &self.files_with_identical_names } @@ -872,15 +910,15 @@ impl DuplicateFinder { self.use_prehash_cache = use_prehash_cache; } - pub const fn get_files_sorted_by_size(&self) -> &BTreeMap> { + pub const fn get_files_sorted_by_size(&self) -> &BTreeMap> { &self.files_with_identical_size } - pub const fn get_files_sorted_by_size_name(&self) -> &BTreeMap<(u64, String), Vec> { + pub const fn get_files_sorted_by_size_name(&self) -> &BTreeMap<(u64, String), Vec> { &self.files_with_identical_size_names } - pub const fn get_files_sorted_by_hash(&self) -> &BTreeMap>> { + pub const fn get_files_sorted_by_hash(&self) -> &BTreeMap>> { &self.files_with_identical_hashes } @@ -908,19 +946,19 @@ impl DuplicateFinder { self.common_data.use_reference_folders } - pub fn get_files_with_identical_hashes_referenced(&self) -> &BTreeMap)>> { + pub fn get_files_with_identical_hashes_referenced(&self) -> &BTreeMap)>> { &self.files_with_identical_hashes_referenced } - pub fn get_files_with_identical_name_referenced(&self) -> &BTreeMap)> { + pub fn get_files_with_identical_name_referenced(&self) -> &BTreeMap)> { &self.files_with_identical_names_referenced } - pub fn get_files_with_identical_size_referenced(&self) -> &BTreeMap)> { + pub fn get_files_with_identical_size_referenced(&self) -> &BTreeMap)> { &self.files_with_identical_size_referenced } - pub fn get_files_with_identical_size_names_referenced(&self) -> &BTreeMap<(u64, String), (FileEntry, Vec)> { + pub fn get_files_with_identical_size_names_referenced(&self) -> &BTreeMap<(u64, String), (DuplicateEntry, Vec)> { &self.files_with_identical_size_names_referenced } } @@ -1216,7 +1254,7 @@ pub trait MyHasher { fn finalize(&self) -> String; } -fn hash_calculation(buffer: &mut [u8], file_entry: &FileEntry, hash_type: &HashType, limit: u64) -> Result { +fn hash_calculation(buffer: &mut [u8], file_entry: &DuplicateEntry, hash_type: &HashType, limit: u64) -> Result { let mut file_handler = match File::open(&file_entry.path) { Ok(t) => t, Err(e) => return Err(format!("Unable to check hash of file {:?}, reason {e}", file_entry.path)), @@ -1376,7 +1414,7 @@ mod tests { let src = dir.path().join("a"); let mut file = File::create(&src)?; file.write_all(b"aa")?; - let e = FileEntry { path: src, ..Default::default() }; + let e = DuplicateEntry { path: src, ..Default::default() }; let r = hash_calculation(&mut buf, &e, &HashType::Blake3, 0).unwrap(); assert!(!r.is_empty()); Ok(()) @@ -1389,7 +1427,7 @@ mod tests { let src = dir.path().join("a"); let mut file = File::create(&src)?; file.write_all(b"aa")?; - let e = FileEntry { path: src, ..Default::default() }; + let e = DuplicateEntry { path: src, ..Default::default() }; let r1 = hash_calculation(&mut buf, &e, &HashType::Blake3, 1).unwrap(); let r2 = hash_calculation(&mut buf, &e, &HashType::Blake3, 2).unwrap(); let r3 = hash_calculation(&mut buf, &e, &HashType::Blake3, u64::MAX).unwrap(); @@ -1403,7 +1441,7 @@ mod tests { let dir = tempfile::Builder::new().tempdir()?; let mut buf = [0u8; 1 << 10]; let src = dir.path().join("a"); - let e = FileEntry { path: src, ..Default::default() }; + let e = DuplicateEntry { path: src, ..Default::default() }; let r = hash_calculation(&mut buf, &e, &HashType::Blake3, 0).unwrap_err(); assert!(!r.is_empty()); Ok(()) diff --git a/czkawka_core/src/empty_files.rs b/czkawka_core/src/empty_files.rs index ce2647d..a396c26 100644 --- a/czkawka_core/src/empty_files.rs +++ b/czkawka_core/src/empty_files.rs @@ -53,17 +53,12 @@ impl EmptyFiles { #[fun_time(message = "check_files", level = "debug")] fn check_files(&mut self, stop_receiver: Option<&Receiver<()>>, progress_sender: Option<&Sender>) -> bool { let result = DirTraversalBuilder::new() - .root_dirs(self.common_data.directories.included_directories.clone()) + .common_data(&self.common_data) .group_by(|_fe| ()) .stop_receiver(stop_receiver) .progress_sender(progress_sender) .minimal_file_size(0) .maximal_file_size(0) - .directories(self.common_data.directories.clone()) - .allowed_extensions(self.common_data.allowed_extensions.clone()) - .excluded_items(self.common_data.excluded_items.clone()) - .recursive_search(self.common_data.recursive_search) - .tool_type(self.common_data.tool_type) .build() .run(); @@ -87,7 +82,7 @@ impl EmptyFiles { match self.common_data.delete_method { DeleteMethod::Delete => { for file_entry in &self.empty_files { - if fs::remove_file(file_entry.path.clone()).is_err() { + if fs::remove_file(&file_entry.path).is_err() { self.common_data.text_messages.warnings.push(file_entry.path.to_string_lossy().to_string()); } } diff --git a/czkawka_core/src/empty_folder.rs b/czkawka_core/src/empty_folder.rs index 16e6f09..1e1346a 100644 --- a/czkawka_core/src/empty_folder.rs +++ b/czkawka_core/src/empty_folder.rs @@ -310,7 +310,7 @@ impl PrintResults for EmptyFolder { writeln!(writer, "--------------------------Empty folder list--------------------------")?; writeln!(writer, "Found {} empty folders", self.information.number_of_empty_folders)?; let mut empty_folder_list = self.empty_folder_list.keys().collect::>(); - empty_folder_list.sort_unstable(); + empty_folder_list.par_sort_unstable(); for name in empty_folder_list { writeln!(writer, "{name}")?; } diff --git a/czkawka_core/src/invalid_symlinks.rs b/czkawka_core/src/invalid_symlinks.rs index c5f5273..ba00b58 100644 --- a/czkawka_core/src/invalid_symlinks.rs +++ b/czkawka_core/src/invalid_symlinks.rs @@ -1,10 +1,12 @@ use std::fs; use std::io::prelude::*; +use std::path::{Path, PathBuf}; use crossbeam_channel::{Receiver, Sender}; use fun_time::fun_time; use log::debug; +use serde::{Deserialize, Serialize}; use crate::common_dir_traversal::{Collect, DirTraversalBuilder, DirTraversalResult, ErrorType, FileEntry, ProgressData, ToolType}; use crate::common_tool::{CommonData, CommonToolData, DeleteMethod}; @@ -15,10 +17,50 @@ pub struct Info { pub number_of_invalid_symlinks: usize, } +const MAX_NUMBER_OF_SYMLINK_JUMPS: i32 = 20; + +#[derive(Clone, Debug, PartialEq, Eq, Deserialize, Serialize)] +pub struct SymlinkInfo { + pub destination_path: PathBuf, + pub type_of_error: ErrorType, +} + +#[derive(Clone, Debug, PartialEq, Eq, Serialize, Deserialize)] +pub struct SymlinksFileEntry { + pub path: PathBuf, + pub size: u64, + pub modified_date: u64, + pub symlink_info: SymlinkInfo, +} + +impl ResultEntry for SymlinksFileEntry { + fn get_path(&self) -> &Path { + &self.path + } + fn get_modified_date(&self) -> u64 { + self.modified_date + } + fn get_size(&self) -> u64 { + self.size + } +} + +impl FileEntry { + fn into_symlinks_entry(self, symlink_info: SymlinkInfo) -> SymlinksFileEntry { + SymlinksFileEntry { + size: self.size, + path: self.path, + modified_date: self.modified_date, + + symlink_info, + } + } +} + pub struct InvalidSymlinks { common_data: CommonToolData, information: Info, - invalid_symlinks: Vec, + invalid_symlinks: Vec, } impl InvalidSymlinks { pub fn new() -> Self { @@ -43,22 +85,26 @@ impl InvalidSymlinks { #[fun_time(message = "check_files", level = "debug")] fn check_files(&mut self, stop_receiver: Option<&Receiver<()>>, progress_sender: Option<&Sender>) -> bool { let result = DirTraversalBuilder::new() - .root_dirs(self.common_data.directories.included_directories.clone()) + .common_data(&self.common_data) .group_by(|_fe| ()) .stop_receiver(stop_receiver) .progress_sender(progress_sender) .collect(Collect::InvalidSymlinks) - .directories(self.common_data.directories.clone()) - .allowed_extensions(self.common_data.allowed_extensions.clone()) - .excluded_items(self.common_data.excluded_items.clone()) - .recursive_search(self.common_data.recursive_search) - .tool_type(self.common_data.tool_type) .build() .run(); match result { DirTraversalResult::SuccessFiles { grouped_file_entries, warnings } => { - self.invalid_symlinks = grouped_file_entries.into_values().flatten().collect(); + self.invalid_symlinks = grouped_file_entries + .into_values() + .flatten() + .filter_map(|e| { + let Some((destination_path, type_of_error)) = Self::check_invalid_symlinks(&e.path) else { + return None; + }; + Some(e.into_symlinks_entry(SymlinkInfo { destination_path, type_of_error })) + }) + .collect(); self.information.number_of_invalid_symlinks = self.invalid_symlinks.len(); self.common_data.text_messages.warnings.extend(warnings); debug!("Found {} invalid symlinks.", self.information.number_of_invalid_symlinks); @@ -68,12 +114,50 @@ impl InvalidSymlinks { } } + fn check_invalid_symlinks(current_file_name: &Path) -> Option<(PathBuf, ErrorType)> { + let mut destination_path = PathBuf::new(); + let type_of_error; + + match current_file_name.read_link() { + Ok(t) => { + destination_path.push(t); + let mut number_of_loop = 0; + let mut current_path = current_file_name.to_path_buf(); + loop { + if number_of_loop == 0 && !current_path.exists() { + type_of_error = ErrorType::NonExistentFile; + break; + } + if number_of_loop == MAX_NUMBER_OF_SYMLINK_JUMPS { + type_of_error = ErrorType::InfiniteRecursion; + break; + } + + current_path = match current_path.read_link() { + Ok(t) => t, + Err(_inspected) => { + // Looks that some next symlinks are broken, but we do nothing with it - TODO why they are broken + return None; + } + }; + + number_of_loop += 1; + } + } + Err(_inspected) => { + // Failed to load info about it + type_of_error = ErrorType::NonExistentFile; + } + } + Some((destination_path, type_of_error)) + } + #[fun_time(message = "delete_files", level = "debug")] fn delete_files(&mut self) { match self.common_data.delete_method { DeleteMethod::Delete => { for file_entry in &self.invalid_symlinks { - if fs::remove_file(file_entry.path.clone()).is_err() { + if fs::remove_file(&file_entry.path).is_err() { self.common_data.text_messages.warnings.push(file_entry.path.to_string_lossy().to_string()); } } @@ -113,8 +197,8 @@ impl PrintResults for InvalidSymlinks { writer, "{:?}\t\t{:?}\t\t{}", file_entry.path, - file_entry.symlink_info.clone().expect("invalid traversal result").destination_path, - match file_entry.symlink_info.clone().expect("invalid traversal result").type_of_error { + file_entry.symlink_info.destination_path, + match file_entry.symlink_info.type_of_error { ErrorType::InfiniteRecursion => "Infinite Recursion", ErrorType::NonExistentFile => "Non Existent File", } @@ -142,7 +226,7 @@ impl CommonData for InvalidSymlinks { } impl InvalidSymlinks { - pub const fn get_invalid_symlinks(&self) -> &Vec { + pub const fn get_invalid_symlinks(&self) -> &Vec { &self.invalid_symlinks } diff --git a/czkawka_core/src/same_music.rs b/czkawka_core/src/same_music.rs index 693b7b1..62f9cdc 100644 --- a/czkawka_core/src/same_music.rs +++ b/czkawka_core/src/same_music.rs @@ -80,10 +80,10 @@ impl ResultEntry for MusicEntry { } impl FileEntry { - fn to_music_entry(&self) -> MusicEntry { + fn into_music_entry(self) -> MusicEntry { MusicEntry { size: self.size, - path: self.path.clone(), + path: self.path, modified_date: self.modified_date, fingerprint: vec![], @@ -177,13 +177,9 @@ impl SameMusic { #[fun_time(message = "check_files", level = "debug")] fn check_files(&mut self, stop_receiver: Option<&Receiver<()>>, progress_sender: Option<&Sender>) -> bool { - if !self.common_data.allowed_extensions.using_custom_extensions() { - self.common_data.allowed_extensions.extend_allowed_extensions(AUDIO_FILES_EXTENSIONS); - } else { - self.common_data.allowed_extensions.extend_allowed_extensions(AUDIO_FILES_EXTENSIONS); - if !self.common_data.allowed_extensions.using_custom_extensions() { - return true; - } + self.common_data.allowed_extensions.set_and_validate_extensions(AUDIO_FILES_EXTENSIONS); + if !self.common_data.allowed_extensions.set_any_extensions() { + return true; } let max_stage = match self.check_type { @@ -193,17 +189,10 @@ impl SameMusic { }; let result = DirTraversalBuilder::new() - .root_dirs(self.common_data.directories.included_directories.clone()) .group_by(|_fe| ()) .stop_receiver(stop_receiver) .progress_sender(progress_sender) - .minimal_file_size(self.common_data.minimal_file_size) - .maximal_file_size(self.common_data.maximal_file_size) - .directories(self.common_data.directories.clone()) - .allowed_extensions(self.common_data.allowed_extensions.clone()) - .excluded_items(self.common_data.excluded_items.clone()) - .recursive_search(self.common_data.recursive_search) - .tool_type(self.common_data.tool_type) + .common_data(&self.common_data) .max_stage(max_stage) .build() .run(); @@ -213,7 +202,7 @@ impl SameMusic { self.music_to_check = grouped_file_entries .into_values() .flatten() - .map(|fe| (fe.path.to_string_lossy().to_string(), fe.to_music_entry())) + .map(|fe| (fe.path.to_string_lossy().to_string(), fe.into_music_entry())) .collect(); self.common_data.text_messages.warnings.extend(warnings); debug!("check_files - Found {} music files.", self.music_to_check.len()); @@ -834,7 +823,7 @@ fn read_single_file_tag(path: &str, music_entry: &mut MusicEntry) -> bool { return false; }; - let result = panic::catch_unwind(move || { + let Ok(possible_tagged_file) = panic::catch_unwind(move || { match read_from(&mut file) { Ok(t) => Some(t), Err(_inspected) => { @@ -842,21 +831,14 @@ fn read_single_file_tag(path: &str, music_entry: &mut MusicEntry) -> bool { None } } - }); - - let tagged_file = if let Ok(t) = result { - match t { - Some(r) => r, - None => { - return true; - } - } - } else { + }) else { let message = create_crash_message("Lofty", path, "https://github.com/image-rs/image/issues"); println!("{message}"); return false; }; + let Some(tagged_file) = possible_tagged_file else { return true }; + let properties = tagged_file.properties(); let mut track_title = String::new(); diff --git a/czkawka_core/src/similar_images.rs b/czkawka_core/src/similar_images.rs index e8921ca..750172c 100644 --- a/czkawka_core/src/similar_images.rs +++ b/czkawka_core/src/similar_images.rs @@ -1,5 +1,4 @@ use std::collections::{BTreeMap, BTreeSet, HashMap, HashSet}; -use std::fs::DirEntry; use std::io::Write; use std::path::{Path, PathBuf}; use std::sync::atomic::Ordering; @@ -20,11 +19,11 @@ use serde::{Deserialize, Serialize}; #[cfg(feature = "heif")] use crate::common::get_dynamic_image_from_heic; use crate::common::{ - check_folder_children, check_if_stop_received, create_crash_message, delete_files_custom, get_dynamic_image_from_raw_image, prepare_thread_handler_common, - send_info_and_wait_for_ending_all_threads, HEIC_EXTENSIONS, IMAGE_RS_SIMILAR_IMAGES_EXTENSIONS, RAW_IMAGE_EXTENSIONS, + check_if_stop_received, create_crash_message, delete_files_custom, get_dynamic_image_from_raw_image, prepare_thread_handler_common, send_info_and_wait_for_ending_all_threads, + HEIC_EXTENSIONS, IMAGE_RS_SIMILAR_IMAGES_EXTENSIONS, RAW_IMAGE_EXTENSIONS, }; use crate::common_cache::{get_similar_images_cache_file, load_cache_from_file_generalized_by_path, save_cache_to_file_generalized}; -use crate::common_dir_traversal::{common_read_dir, get_modified_time, CheckingMethod, ProgressData, ToolType}; +use crate::common_dir_traversal::{CheckingMethod, DirTraversalBuilder, DirTraversalResult, FileEntry, ProgressData, ToolType}; use crate::common_tool::{CommonData, CommonToolData, DeleteMethod}; use crate::common_traits::{DebugPrint, PrintResults, ResultEntry}; use crate::flc; @@ -39,16 +38,17 @@ pub const SIMILAR_VALUES: [[u32; 6]; 4] = [ ]; #[derive(Clone, Debug, Serialize, Deserialize)] -pub struct FileEntry { +pub struct ImagesEntry { pub path: PathBuf, pub size: u64, pub dimensions: String, pub modified_date: u64, pub hash: ImHash, pub similarity: u32, + pub image_type: ImageType, } -impl ResultEntry for FileEntry { +impl ResultEntry for ImagesEntry { fn get_path(&self) -> &Path { &self.path } @@ -59,6 +59,30 @@ impl ResultEntry for FileEntry { self.size } } +impl FileEntry { + fn into_images_entry(self) -> ImagesEntry { + ImagesEntry { + size: self.size, + path: self.path, + modified_date: self.modified_date, + + dimensions: String::new(), + hash: Vec::new(), + similarity: 0, + image_type: ImageType::Unknown, + } + } +} + +#[derive(Clone, Debug, Serialize, Deserialize, PartialEq, Eq)] +pub enum ImageType { + Normal, + Raw, + Heic, + Unknown, +} + +const MAX_IMAGE_STAGE: u8 = 2; #[derive(Clone, Debug, Copy)] pub enum SimilarityPreset { @@ -88,13 +112,13 @@ pub struct SimilarImages { common_data: CommonToolData, information: Info, bktree: BKTree, - similar_vectors: Vec>, - similar_referenced_vectors: Vec<(FileEntry, Vec)>, - image_hashes: HashMap>, + similar_vectors: Vec>, + similar_referenced_vectors: Vec<(ImagesEntry, Vec)>, // Hashmap with image hashes and Vector with names of files + image_hashes: HashMap>, similarity: u32, - images_to_check: BTreeMap, - pub hash_size: u8, // TODO to remove pub, this is needeed by new gui, because there is no way to check what exactly was seelected + images_to_check: BTreeMap, + hash_size: u8, hash_alg: HashAlg, image_filter: FilterType, exclude_images_with_same_size: bool, @@ -144,133 +168,71 @@ impl SimilarImages { self.debug_print(); } - #[fun_time(message = "check_for_similar_images", level = "debug")] + // #[fun_time(message = "check_for_similar_images", level = "debug")] fn check_for_similar_images(&mut self, stop_receiver: Option<&Receiver<()>>, progress_sender: Option<&Sender>) -> bool { - let mut folders_to_check: Vec = self.common_data.directories.included_directories.clone(); - - if !self.common_data.allowed_extensions.using_custom_extensions() { - self.common_data.allowed_extensions.extend_allowed_extensions(IMAGE_RS_SIMILAR_IMAGES_EXTENSIONS); - self.common_data.allowed_extensions.extend_allowed_extensions(RAW_IMAGE_EXTENSIONS); - #[cfg(feature = "heif")] - self.common_data.allowed_extensions.extend_allowed_extensions(HEIC_EXTENSIONS); + if cfg!(feature = "heif") { + self.common_data + .allowed_extensions + .set_and_validate_extensions(&[IMAGE_RS_SIMILAR_IMAGES_EXTENSIONS, RAW_IMAGE_EXTENSIONS, HEIC_EXTENSIONS].concat()); } else { self.common_data .allowed_extensions - .extend_allowed_extensions(&[IMAGE_RS_SIMILAR_IMAGES_EXTENSIONS, RAW_IMAGE_EXTENSIONS, HEIC_EXTENSIONS].concat()); - if !self.common_data.allowed_extensions.using_custom_extensions() { - return true; - } + .set_and_validate_extensions(&[IMAGE_RS_SIMILAR_IMAGES_EXTENSIONS, RAW_IMAGE_EXTENSIONS].concat()); + } + if !self.common_data.allowed_extensions.set_any_extensions() { + return true; } - // Add root folders for finding - for id in &self.common_data.directories.included_directories { - folders_to_check.push(id.clone()); - } + let normal_image_extensions = IMAGE_RS_SIMILAR_IMAGES_EXTENSIONS.iter().collect::>(); + let raw_image_extensions = RAW_IMAGE_EXTENSIONS.iter().collect::>(); + let heic_extensions = HEIC_EXTENSIONS.iter().collect::>(); - let (progress_thread_handle, progress_thread_run, atomic_counter, _check_was_stopped) = - prepare_thread_handler_common(progress_sender, 0, 2, 0, CheckingMethod::None, self.common_data.tool_type); + let result = DirTraversalBuilder::new() + .group_by(|_fe| ()) + .stop_receiver(stop_receiver) + .progress_sender(progress_sender) + .common_data(&self.common_data) + .max_stage(MAX_IMAGE_STAGE) + .build() + .run(); - while !folders_to_check.is_empty() { - if check_if_stop_received(stop_receiver) { - send_info_and_wait_for_ending_all_threads(&progress_thread_run, progress_thread_handle); - return false; - } - - let segments: Vec<_> = folders_to_check - .into_par_iter() - .map(|current_folder| { - let mut dir_result = vec![]; - let mut warnings = vec![]; - let mut fe_result = vec![]; - - let Some(read_dir) = common_read_dir(¤t_folder, &mut warnings) else { - return (dir_result, warnings, fe_result); - }; - - for entry in read_dir { - let Ok(entry_data) = entry else { - continue; - }; - let Ok(file_type) = entry_data.file_type() else { - continue; - }; - if file_type.is_dir() { - check_folder_children( - &mut dir_result, - &mut warnings, - &entry_data, - self.common_data.recursive_search, - &self.common_data.directories, - &self.common_data.excluded_items, - ); - } else if file_type.is_file() { - atomic_counter.fetch_add(1, Ordering::Relaxed); - self.add_file_entry(&entry_data, &mut fe_result, &mut warnings); + match result { + DirTraversalResult::SuccessFiles { grouped_file_entries, warnings } => { + self.images_to_check = grouped_file_entries + .into_values() + .flatten() + .map(|fe| { + let fe_str = fe.path.to_string_lossy().to_string(); + let extension_lowercase = fe.path.extension().unwrap_or_default().to_string_lossy().to_lowercase(); + let mut image_entry = fe.into_images_entry(); + if normal_image_extensions.contains(&extension_lowercase.as_str()) { + image_entry.image_type = ImageType::Normal; + } else if raw_image_extensions.contains(&extension_lowercase.as_str()) { + image_entry.image_type = ImageType::Raw; + } else if heic_extensions.contains(&extension_lowercase.as_str()) { + image_entry.image_type = ImageType::Heic; } - } - (dir_result, warnings, fe_result) - }) - .collect(); - - let required_size = segments.iter().map(|(segment, _, _)| segment.len()).sum::(); - folders_to_check = Vec::with_capacity(required_size); - - // Process collected data - for (segment, warnings, fe_result) in segments { - folders_to_check.extend(segment); + (fe_str, image_entry) + }) + .collect(); self.common_data.text_messages.warnings.extend(warnings); - for (name, fe) in fe_result { - self.images_to_check.insert(name, fe); - } + debug!("check_files - Found {} image files.", self.images_to_check.len()); + true } - } - eprintln!("Tested {} files", atomic_counter.load(Ordering::Relaxed)); - eprintln!("Imagest to check {}", self.images_to_check.len()); - send_info_and_wait_for_ending_all_threads(&progress_thread_run, progress_thread_handle); - - true - } - - fn add_file_entry(&self, entry_data: &DirEntry, fe_result: &mut Vec<(String, FileEntry)>, warnings: &mut Vec) { - if !self.common_data.allowed_extensions.check_if_entry_ends_with_extension(entry_data) { - return; - } - - let current_file_name = entry_data.path(); - if self.common_data.excluded_items.is_excluded(¤t_file_name) { - return; - } - - let Ok(metadata) = entry_data.metadata() else { - return; - }; - - // Checking files - if (self.common_data.minimal_file_size..=self.common_data.maximal_file_size).contains(&metadata.len()) { - let path_str = current_file_name.to_string_lossy().to_string(); - let fe: FileEntry = FileEntry { - size: metadata.len(), - dimensions: String::new(), - modified_date: get_modified_time(&metadata, warnings, ¤t_file_name, false), - path: current_file_name, - hash: Vec::new(), - similarity: 0, - }; - - fe_result.push((path_str, fe)); + DirTraversalResult::Stopped => false, } } #[fun_time(message = "hash_images_load_cache", level = "debug")] - fn hash_images_load_cache(&mut self) -> (BTreeMap, BTreeMap, BTreeMap) { + fn hash_images_load_cache(&mut self) -> (BTreeMap, BTreeMap, BTreeMap) { let loaded_hash_map; - let mut records_already_cached: BTreeMap = Default::default(); - let mut non_cached_files_to_check: BTreeMap = Default::default(); + let mut records_already_cached: BTreeMap = Default::default(); + let mut non_cached_files_to_check: BTreeMap = Default::default(); if self.common_data.use_cache { - let (messages, loaded_items) = load_cache_from_file_generalized_by_path::( + let (messages, loaded_items) = load_cache_from_file_generalized_by_path::( &get_similar_images_cache_file(&self.hash_size, &self.hash_alg, &self.image_filter), self.get_delete_outdated_cache(), &self.images_to_check, @@ -315,34 +277,35 @@ impl SimilarImages { prepare_thread_handler_common(progress_sender, 1, 2, non_cached_files_to_check.len(), CheckingMethod::None, self.common_data.tool_type); debug!("hash_images - start hashing images"); - let mut vec_file_entry: Vec<(FileEntry, ImHash)> = non_cached_files_to_check + let mut vec_file_entry: Vec = non_cached_files_to_check .into_par_iter() - .map(|(_s, file_entry)| { + .map(|(_s, mut file_entry)| { atomic_counter.fetch_add(1, Ordering::Relaxed); if check_if_stop_received(stop_receiver) { check_was_stopped.store(true, Ordering::Relaxed); return None; } - Some(Some(self.collect_image_file_entry(file_entry))) + self.collect_image_file_entry(&mut file_entry); + + Some(Some(file_entry)) }) .while_some() - .filter(Option::is_some) - .map(Option::unwrap) - .collect::>(); + .filter_map(|e| e) + .collect::>(); debug!("hash_images - end hashing images"); send_info_and_wait_for_ending_all_threads(&progress_thread_run, progress_thread_handle); // Just connect loaded results with already calculated hashes for file_entry in records_already_cached.into_values() { - vec_file_entry.push((file_entry.clone(), file_entry.hash)); + vec_file_entry.push(file_entry); } // All valid entries are used to create bktree used to check for hash similarity - for (file_entry, buf) in &vec_file_entry { - // Only use to comparing, non broken hashes(all 0 or 255 hashes means that algorithm fails to decode them because e.g. contains a log of alpha channel) - if !(buf.is_empty() || buf.iter().all(|e| *e == 0) || buf.iter().all(|e| *e == 255)) { - self.image_hashes.entry(buf.clone()).or_default().push(file_entry.clone()); + for file_entry in &vec_file_entry { + // Only use to comparing, non broken hashes(all 0 or 255 hashes means that algorithm fails to decode them because e.g. contains a lot of alpha channel) + if !(file_entry.hash.is_empty() || file_entry.hash.iter().all(|e| *e == 0) || file_entry.hash.iter().all(|e| *e == 255)) { + self.image_hashes.entry(file_entry.hash.clone()).or_default().push(file_entry.clone()); } } @@ -357,11 +320,11 @@ impl SimilarImages { } #[fun_time(message = "save_to_cache", level = "debug")] - fn save_to_cache(&mut self, vec_file_entry: Vec<(FileEntry, ImHash)>, loaded_hash_map: BTreeMap) { + fn save_to_cache(&mut self, vec_file_entry: Vec, loaded_hash_map: BTreeMap) { if self.common_data.use_cache { // Must save all results to file, old loaded from file with all currently counted results - let mut all_results: BTreeMap = loaded_hash_map; - for (file_entry, _hash) in vec_file_entry { + let mut all_results: BTreeMap = loaded_hash_map; + for file_entry in vec_file_entry { all_results.insert(file_entry.path.to_string_lossy().to_string(), file_entry); } @@ -375,60 +338,61 @@ impl SimilarImages { } } - fn collect_image_file_entry(&self, mut file_entry: FileEntry) -> (FileEntry, ImHash) { - let file_name_lowercase = file_entry.path.to_string_lossy().to_lowercase(); - - let image; - - #[allow(clippy::never_loop)] // Required to implement nice if/else - 'krztyna: loop { - if RAW_IMAGE_EXTENSIONS.iter().any(|e| file_name_lowercase.ends_with(e)) { - image = match get_dynamic_image_from_raw_image(&file_entry.path) { - Some(t) => t, - None => return (file_entry, Vec::new()), - }; - break 'krztyna; - } + fn collect_image_file_entry(&self, file_entry: &mut ImagesEntry) { + let img; + if file_entry.image_type == ImageType::Heic { #[cfg(feature = "heif")] - if HEIC_EXTENSIONS.iter().any(|e| file_name_lowercase.ends_with(e)) { - image = match get_dynamic_image_from_heic(&file_entry.path.to_string_lossy()) { + { + img = match get_dynamic_image_from_heic(&file_entry.path.to_string_lossy()) { Ok(t) => t, Err(_) => { - return (file_entry, Vec::new()); + return; } }; - break 'krztyna; } - - // Normal image extension, when any other fail, not using if/else - let result = panic::catch_unwind(|| { - match image::open(file_entry.path.clone()) { - Ok(t) => Ok(t), - // Err(_inspected) => return Some(None), // Something is wrong with image, - // For broken images empty hash is used, because without it will try to resecan files each time when it is called(missing cache file is responsible for it) - // This may cause problems(very rarely), when e.g. file was not available due lack of permissions, but it is available now - Err(_inspected) => Err(()), - } - }); - - // If image crashed during opening, we just skip checking its hash and go on - if let Ok(image_result) = result { - if let Ok(image2) = image_result { - image = image2; + #[cfg(not(feature = "heif"))] + { + if let Ok(image_result) = panic::catch_unwind(|| image::open(&file_entry.path)) { + if let Ok(image2) = image_result { + img = image2; + } else { + return; + } } else { - return (file_entry, Vec::new()); + let message = crate::common::create_crash_message("Image-rs", &file_entry.path.to_string_lossy(), "https://github.com/image-rs/image/issues"); + println!("{message}"); + return; + } + } + } else { + match file_entry.image_type { + ImageType::Normal | ImageType::Heic => { + if let Ok(image_result) = panic::catch_unwind(|| image::open(&file_entry.path)) { + if let Ok(image2) = image_result { + img = image2; + } else { + return; + } + } else { + let message = create_crash_message("Image-rs", &file_entry.path.to_string_lossy(), "https://github.com/image-rs/image/issues"); + println!("{message}"); + return; + } + } + ImageType::Raw => { + img = match get_dynamic_image_from_raw_image(&file_entry.path) { + Some(t) => t, + None => return, + }; + } + _ => { + unreachable!(); } - } else { - let message = create_crash_message("Image-rs", &file_entry.path.to_string_lossy(), "https://github.com/image-rs/image/issues"); - println!("{message}"); - return (file_entry, Vec::new()); } - - break 'krztyna; } - let dimensions = image.dimensions(); + let dimensions = img.dimensions(); file_entry.dimensions = format!("{}x{}", dimensions.0, dimensions.1); @@ -438,17 +402,13 @@ impl SimilarImages { .resize_filter(self.image_filter); let hasher = hasher_config.to_hasher(); - let hash = hasher.hash_image(&image); - let buf: ImHash = hash.as_bytes().to_vec(); - - file_entry.hash = buf.clone(); - - (file_entry, buf) + let hash = hasher.hash_image(&img); + file_entry.hash = hash.as_bytes().to_vec(); } // Split hashes at 2 parts, base hashes and hashes to compare, 3 argument is set of hashes with multiple images #[fun_time(message = "split_hashes", level = "debug")] - fn split_hashes(&mut self, all_hashed_images: &HashMap>) -> (Vec, HashSet) { + fn split_hashes(&mut self, all_hashed_images: &HashMap>) -> (Vec, HashSet) { let hashes_with_multiple_images: HashSet = all_hashed_images .iter() .filter_map(|(hash, vec_file_entry)| { @@ -460,8 +420,8 @@ impl SimilarImages { .collect(); let mut base_hashes = Vec::new(); // Initial hashes if self.common_data.use_reference_folders { - let mut files_from_referenced_folders: HashMap> = HashMap::new(); - let mut normal_files: HashMap> = HashMap::new(); + let mut files_from_referenced_folders: HashMap> = HashMap::new(); + let mut normal_files: HashMap> = HashMap::new(); all_hashed_images.clone().into_iter().for_each(|(hash, vec_file_entry)| { for file_entry in vec_file_entry { @@ -494,8 +454,8 @@ impl SimilarImages { &self, hashes_parents: HashMap, hashes_with_multiple_images: &HashSet, - all_hashed_images: &HashMap>, - collected_similar_images: &mut HashMap>, + all_hashed_images: &HashMap>, + collected_similar_images: &mut HashMap>, hashes_similarity: HashMap, ) { if self.common_data.use_reference_folders { @@ -552,8 +512,8 @@ impl SimilarImages { #[fun_time(message = "compare_hashes_with_non_zero_tolerance", level = "debug")] fn compare_hashes_with_non_zero_tolerance( &mut self, - all_hashed_images: &HashMap>, - collected_similar_images: &mut HashMap>, + all_hashed_images: &HashMap>, + collected_similar_images: &mut HashMap>, progress_sender: Option<&Sender>, stop_receiver: Option<&Receiver<()>>, tolerance: u32, @@ -697,7 +657,7 @@ impl SimilarImages { let tolerance = self.similarity; // Results - let mut collected_similar_images: HashMap> = Default::default(); + let mut collected_similar_images: HashMap> = Default::default(); let all_hashed_images = mem::take(&mut self.image_hashes); @@ -776,13 +736,13 @@ impl SimilarImages { Some((files_from_referenced_folders.pop().unwrap(), normal_files)) } }) - .collect::)>>(); + .collect::)>>(); } } #[allow(unused_variables)] // TODO this probably not works good when reference folders are used - pub fn verify_duplicated_items(&self, collected_similar_images: &HashMap>) { + pub fn verify_duplicated_items(&self, collected_similar_images: &HashMap>) { if !cfg!(debug_assertions) { return; } @@ -1028,7 +988,7 @@ fn debug_check_for_duplicated_things( use_reference_folders: bool, hashes_parents: &HashMap, hashes_similarity: &HashMap, - all_hashed_images: &HashMap>, + all_hashed_images: &HashMap>, numm: &str, ) { if !cfg!(debug_assertions) { @@ -1111,11 +1071,11 @@ impl SimilarImages { self.image_filter = image_filter; } - pub const fn get_similar_images(&self) -> &Vec> { + pub const fn get_similar_images(&self) -> &Vec> { &self.similar_vectors } - pub fn get_similar_images_referenced(&self) -> &Vec<(FileEntry, Vec)> { + pub fn get_similar_images_referenced(&self) -> &Vec<(ImagesEntry, Vec)> { &self.similar_referenced_vectors } @@ -1142,7 +1102,7 @@ mod tests { use crate::common_directory::Directories; use crate::common_tool::CommonToolData; - use crate::similar_images::{FileEntry, Hamming, ImHash, SimilarImages}; + use crate::similar_images::{Hamming, ImHash, ImageType, ImagesEntry, SimilarImages}; #[test] fn test_compare_no_images() { @@ -1568,20 +1528,21 @@ mod tests { assert_eq!(similarity, 1); } - fn add_hashes(hashmap: &mut HashMap>, file_entries: Vec) { + fn add_hashes(hashmap: &mut HashMap>, file_entries: Vec) { for fe in file_entries { hashmap.entry(fe.hash.clone()).or_default().push(fe); } } - fn create_random_file_entry(hash: Vec, name: &str) -> FileEntry { - FileEntry { + fn create_random_file_entry(hash: Vec, name: &str) -> ImagesEntry { + ImagesEntry { path: PathBuf::from(name.to_string()), size: 0, dimensions: String::new(), modified_date: 0, hash, similarity: 0, + image_type: ImageType::Normal, } } } diff --git a/czkawka_core/src/similar_videos.rs b/czkawka_core/src/similar_videos.rs index 1dd1a5f..de8391e 100644 --- a/czkawka_core/src/similar_videos.rs +++ b/czkawka_core/src/similar_videos.rs @@ -1,5 +1,4 @@ use std::collections::{BTreeMap, BTreeSet, HashMap}; -use std::fs::DirEntry; use std::io::Write; use std::mem; use std::path::{Path, PathBuf}; @@ -9,16 +8,15 @@ use crossbeam_channel::{Receiver, Sender}; use ffmpeg_cmdline_utils::FfmpegErrorKind::FfmpegNotFound; use fun_time::fun_time; use humansize::{format_size, BINARY}; +use log::debug; use rayon::prelude::*; use serde::{Deserialize, Serialize}; use vid_dup_finder_lib::HashCreationErrorKind::DetermineVideo; use vid_dup_finder_lib::{NormalizedTolerance, VideoHash}; -use crate::common::{ - check_folder_children, check_if_stop_received, delete_files_custom, prepare_thread_handler_common, send_info_and_wait_for_ending_all_threads, VIDEO_FILES_EXTENSIONS, -}; +use crate::common::{check_if_stop_received, delete_files_custom, prepare_thread_handler_common, send_info_and_wait_for_ending_all_threads, VIDEO_FILES_EXTENSIONS}; use crate::common_cache::{get_similar_videos_cache_file, load_cache_from_file_generalized_by_path, save_cache_to_file_generalized}; -use crate::common_dir_traversal::{common_read_dir, get_modified_time, CheckingMethod, ProgressData, ToolType}; +use crate::common_dir_traversal::{CheckingMethod, DirTraversalBuilder, DirTraversalResult, FileEntry, ProgressData, ToolType}; use crate::common_tool::{CommonData, CommonToolData, DeleteMethod}; use crate::common_traits::{DebugPrint, PrintResults, ResultEntry}; use crate::flc; @@ -27,7 +25,7 @@ use crate::localizer_core::generate_translation_hashmap; pub const MAX_TOLERANCE: i32 = 20; #[derive(Clone, Debug, Serialize, Deserialize)] -pub struct FileEntry { +pub struct VideosEntry { pub path: PathBuf, pub size: u64, pub modified_date: u64, @@ -35,7 +33,7 @@ pub struct FileEntry { pub error: String, } -impl ResultEntry for FileEntry { +impl ResultEntry for VideosEntry { fn get_path(&self) -> &Path { &self.path } @@ -47,6 +45,19 @@ impl ResultEntry for FileEntry { } } +impl FileEntry { + fn into_videos_entry(self) -> VideosEntry { + VideosEntry { + size: self.size, + path: self.path, + modified_date: self.modified_date, + + vhash: Default::default(), + error: String::new(), + } + } +} + struct Hamming; impl bk_tree::Metric> for Hamming { @@ -61,13 +72,15 @@ impl bk_tree::Metric> for Hamming { } } +const MAX_VIDEOS_STAGE: u8 = 1; + pub struct SimilarVideos { common_data: CommonToolData, information: Info, - similar_vectors: Vec>, - similar_referenced_vectors: Vec<(FileEntry, Vec)>, - videos_hashes: BTreeMap, Vec>, - videos_to_check: BTreeMap, + similar_vectors: Vec>, + similar_referenced_vectors: Vec<(VideosEntry, Vec)>, + videos_hashes: BTreeMap, Vec>, + videos_to_check: BTreeMap, tolerance: i32, exclude_videos_with_same_size: bool, } @@ -128,122 +141,47 @@ impl SimilarVideos { self.debug_print(); } - #[fun_time(message = "check_for_similar_videos", level = "debug")] + // #[fun_time(message = "check_for_similar_videos", level = "debug")] fn check_for_similar_videos(&mut self, stop_receiver: Option<&Receiver<()>>, progress_sender: Option<&Sender>) -> bool { - let mut folders_to_check: Vec = self.common_data.directories.included_directories.clone(); - - if !self.common_data.allowed_extensions.using_custom_extensions() { - self.common_data.allowed_extensions.extend_allowed_extensions(VIDEO_FILES_EXTENSIONS); - } else { - self.common_data.allowed_extensions.extend_allowed_extensions(VIDEO_FILES_EXTENSIONS); - if !self.common_data.allowed_extensions.using_custom_extensions() { - return true; - } + self.common_data.allowed_extensions.set_and_validate_extensions(VIDEO_FILES_EXTENSIONS); + if !self.common_data.allowed_extensions.set_any_extensions() { + return true; } - let (progress_thread_handle, progress_thread_run, atomic_counter, _check_was_stopped) = - prepare_thread_handler_common(progress_sender, 0, 1, 0, CheckingMethod::None, self.common_data.tool_type); + let result = DirTraversalBuilder::new() + .group_by(|_fe| ()) + .stop_receiver(stop_receiver) + .progress_sender(progress_sender) + .common_data(&self.common_data) + .max_stage(MAX_VIDEOS_STAGE) + .build() + .run(); - while !folders_to_check.is_empty() { - if check_if_stop_received(stop_receiver) { - send_info_and_wait_for_ending_all_threads(&progress_thread_run, progress_thread_handle); - return false; - } - - let segments: Vec<_> = folders_to_check - .into_par_iter() - .map(|current_folder| { - let mut dir_result = vec![]; - let mut warnings = vec![]; - let mut fe_result = vec![]; - - let Some(read_dir) = common_read_dir(¤t_folder, &mut warnings) else { - return (dir_result, warnings, fe_result); - }; - - // Check every sub folder/file/link etc. - for entry in read_dir { - let Ok(entry_data) = entry else { - continue; - }; - let Ok(file_type) = entry_data.file_type() else { - continue; - }; - - if file_type.is_dir() { - check_folder_children( - &mut dir_result, - &mut warnings, - &entry_data, - self.common_data.recursive_search, - &self.common_data.directories, - &self.common_data.excluded_items, - ); - } else if file_type.is_file() { - atomic_counter.fetch_add(1, Ordering::Relaxed); - self.add_video_file_entry(&entry_data, &mut fe_result, &mut warnings); - } - } - (dir_result, warnings, fe_result) - }) - .collect(); - - let required_size = segments.iter().map(|(segment, _, _)| segment.len()).sum::(); - folders_to_check = Vec::with_capacity(required_size); - - // Process collected data - for (segment, warnings, fe_result) in segments { - folders_to_check.extend(segment); + match result { + DirTraversalResult::SuccessFiles { grouped_file_entries, warnings } => { + self.videos_to_check = grouped_file_entries + .into_values() + .flatten() + .map(|fe| (fe.path.to_string_lossy().to_string(), fe.into_videos_entry())) + .collect(); self.common_data.text_messages.warnings.extend(warnings); - for (name, fe) in fe_result { - self.videos_to_check.insert(name, fe); - } + debug!("check_files - Found {} video files.", self.videos_to_check.len()); + true } - } - send_info_and_wait_for_ending_all_threads(&progress_thread_run, progress_thread_handle); - - true - } - - fn add_video_file_entry(&self, entry_data: &DirEntry, fe_result: &mut Vec<(String, FileEntry)>, warnings: &mut Vec) { - if !self.common_data.allowed_extensions.check_if_entry_ends_with_extension(entry_data) { - return; - } - - let current_file_name = entry_data.path(); - if self.common_data.excluded_items.is_excluded(¤t_file_name) { - return; - } - let current_file_name_str = current_file_name.to_string_lossy().to_string(); - - let Ok(metadata) = entry_data.metadata() else { - return; - }; - - // Checking files - if (self.common_data.minimal_file_size..=self.common_data.maximal_file_size).contains(&metadata.len()) { - let fe: FileEntry = FileEntry { - size: metadata.len(), - modified_date: get_modified_time(&metadata, warnings, ¤t_file_name, false), - path: current_file_name, - vhash: Default::default(), - error: String::new(), - }; - - fe_result.push((current_file_name_str, fe)); + DirTraversalResult::Stopped => false, } } #[fun_time(message = "load_cache_at_start", level = "debug")] - fn load_cache_at_start(&mut self) -> (BTreeMap, BTreeMap, BTreeMap) { + fn load_cache_at_start(&mut self) -> (BTreeMap, BTreeMap, BTreeMap) { let loaded_hash_map; - let mut records_already_cached: BTreeMap = Default::default(); - let mut non_cached_files_to_check: BTreeMap = Default::default(); + let mut records_already_cached: BTreeMap = Default::default(); + let mut non_cached_files_to_check: BTreeMap = Default::default(); if self.common_data.use_cache { let (messages, loaded_items) = - load_cache_from_file_generalized_by_path::(&get_similar_videos_cache_file(), self.get_delete_outdated_cache(), &self.videos_to_check); + load_cache_from_file_generalized_by_path::(&get_similar_videos_cache_file(), self.get_delete_outdated_cache(), &self.videos_to_check); self.get_text_messages_mut().extend_with_another_messages(messages); loaded_hash_map = loaded_items.unwrap_or_default(); @@ -268,7 +206,7 @@ impl SimilarVideos { let (progress_thread_handle, progress_thread_run, atomic_counter, check_was_stopped) = prepare_thread_handler_common(progress_sender, 1, 1, non_cached_files_to_check.len(), CheckingMethod::None, self.common_data.tool_type); - let mut vec_file_entry: Vec = non_cached_files_to_check + let mut vec_file_entry: Vec = non_cached_files_to_check .par_iter() .map(|file_entry| { atomic_counter.fetch_add(1, Ordering::Relaxed); @@ -293,14 +231,14 @@ impl SimilarVideos { Some(file_entry) }) .while_some() - .collect::>(); + .collect::>(); send_info_and_wait_for_ending_all_threads(&progress_thread_run, progress_thread_handle); // Just connect loaded results with already calculated hashes vec_file_entry.extend(records_already_cached.into_values()); - let mut hashmap_with_file_entries: HashMap = Default::default(); + let mut hashmap_with_file_entries: HashMap = Default::default(); let mut vector_of_hashes: Vec = Vec::new(); for file_entry in &vec_file_entry { // 0 means that images was not hashed correctly, e.g. could be improperly @@ -342,10 +280,10 @@ impl SimilarVideos { } #[fun_time(message = "save_cache", level = "debug")] - fn save_cache(&mut self, vec_file_entry: Vec, loaded_hash_map: BTreeMap) { + fn save_cache(&mut self, vec_file_entry: Vec, loaded_hash_map: BTreeMap) { if self.common_data.use_cache { // Must save all results to file, old loaded from file with all currently counted results - let mut all_results: BTreeMap = loaded_hash_map; + let mut all_results: BTreeMap = loaded_hash_map; for file_entry in vec_file_entry { all_results.insert(file_entry.path.to_string_lossy().to_string(), file_entry); } @@ -356,11 +294,11 @@ impl SimilarVideos { } #[fun_time(message = "match_groups_of_videos", level = "debug")] - fn match_groups_of_videos(&mut self, vector_of_hashes: Vec, hashmap_with_file_entries: &HashMap) { + fn match_groups_of_videos(&mut self, vector_of_hashes: Vec, hashmap_with_file_entries: &HashMap) { let match_group = vid_dup_finder_lib::search(vector_of_hashes, NormalizedTolerance::new(self.tolerance as f64 / 100.0f64)); - let mut collected_similar_videos: Vec> = Default::default(); + let mut collected_similar_videos: Vec> = Default::default(); for i in match_group { - let mut temp_vector: Vec = Vec::new(); + let mut temp_vector: Vec = Vec::new(); let mut bt_size: BTreeSet = Default::default(); for j in i.duplicates() { let file_entry = hashmap_with_file_entries.get(&j.to_string_lossy().to_string()).unwrap(); @@ -397,7 +335,7 @@ impl SimilarVideos { Some((files_from_referenced_folders.pop().unwrap(), normal_files)) } }) - .collect::)>>(); + .collect::)>>(); } } @@ -493,7 +431,7 @@ impl SimilarVideos { self.tolerance = tolerance; } - pub const fn get_similar_videos(&self) -> &Vec> { + pub const fn get_similar_videos(&self) -> &Vec> { &self.similar_vectors } @@ -501,7 +439,7 @@ impl SimilarVideos { &self.information } - pub fn get_similar_videos_referenced(&self) -> &Vec<(FileEntry, Vec)> { + pub fn get_similar_videos_referenced(&self) -> &Vec<(VideosEntry, Vec)> { &self.similar_referenced_vectors } diff --git a/czkawka_core/src/temporary.rs b/czkawka_core/src/temporary.rs index 938665c..4042aa8 100644 --- a/czkawka_core/src/temporary.rs +++ b/czkawka_core/src/temporary.rs @@ -12,7 +12,7 @@ use rayon::prelude::*; use serde::Serialize; use crate::common::{check_folder_children, check_if_stop_received, prepare_thread_handler_common, send_info_and_wait_for_ending_all_threads}; -use crate::common_dir_traversal::{common_read_dir, get_lowercase_name, get_modified_time, CheckingMethod, ProgressData, ToolType}; +use crate::common_dir_traversal::{common_read_dir, get_modified_time, CheckingMethod, ProgressData, ToolType}; use crate::common_tool::{CommonData, CommonToolData, DeleteMethod}; use crate::common_traits::*; @@ -142,16 +142,18 @@ impl Temporary { pub fn get_file_entry(&self, atomic_counter: &Arc, entry_data: &DirEntry, warnings: &mut Vec) -> Option { atomic_counter.fetch_add(1, Ordering::Relaxed); - let file_name_lowercase = get_lowercase_name(entry_data, warnings)?; - - if !TEMP_EXTENSIONS.iter().any(|f| file_name_lowercase.ends_with(f)) { - return None; - } let current_file_name = entry_data.path(); if self.common_data.excluded_items.is_excluded(¤t_file_name) { return None; } + let file_name = entry_data.file_name(); + let file_name_ascii_lowercase = file_name.to_ascii_lowercase(); + let file_name_lowercase = file_name_ascii_lowercase.to_string_lossy(); + if !TEMP_EXTENSIONS.iter().any(|f| file_name_lowercase.ends_with(f)) { + return None; + } + let Ok(metadata) = entry_data.metadata() else { return None; }; diff --git a/czkawka_gui/Cargo.toml b/czkawka_gui/Cargo.toml index 72a7e5a..ef1b8e7 100644 --- a/czkawka_gui/Cargo.toml +++ b/czkawka_gui/Cargo.toml @@ -12,12 +12,13 @@ repository = "https://github.com/qarmin/czkawka" [dependencies] gdk4 = "0.7" glib = "0.18" +gtk4 = { version = "0.7", default-features = false, features = ["v4_6"] } humansize = "2.1" chrono = "0.4.31" # Used for sending stop signal across threads -crossbeam-channel = "0.5.8" +crossbeam-channel = "0.5" # For saving/loading config files to specific directories directories-next = "2.0" @@ -49,9 +50,9 @@ once_cell = "1.19" log = "0.4.20" handsome_logger = "0.8" fun_time = { version = "0.3", features = ["log"] } +rayon = "1.8" czkawka_core = { path = "../czkawka_core", version = "6.1.0", features = [] } -gtk4 = { version = "0.7", default-features = false, features = ["v4_6"] } [target.'cfg(windows)'.dependencies] winapi = { version = "0.3.9", features = ["combaseapi", "objbase", "shobjidl_core", "windef", "winerror", "wtypesbase", "winuser"] } diff --git a/czkawka_gui/src/compute_results.rs b/czkawka_gui/src/compute_results.rs index f10f85b..982633a 100644 --- a/czkawka_gui/src/compute_results.rs +++ b/czkawka_gui/src/compute_results.rs @@ -8,13 +8,15 @@ use glib::Receiver; use gtk4::prelude::*; use gtk4::{Entry, ListStore, TextView, TreeView, Widget}; use humansize::{format_size, BINARY}; +use rayon::prelude::*; use czkawka_core::bad_extensions::BadExtensions; use czkawka_core::big_file::BigFile; use czkawka_core::broken_files::BrokenFiles; use czkawka_core::common::{split_path, split_path_compare}; -use czkawka_core::common_dir_traversal::{CheckingMethod, FileEntry}; +use czkawka_core::common_dir_traversal::CheckingMethod; use czkawka_core::common_tool::CommonData; +use czkawka_core::common_traits::ResultEntry; use czkawka_core::duplicate::DuplicateFinder; use czkawka_core::empty_files::EmptyFiles; use czkawka_core::empty_folder::EmptyFolder; @@ -22,7 +24,7 @@ use czkawka_core::invalid_symlinks::InvalidSymlinks; use czkawka_core::localizer_core::generate_translation_hashmap; use czkawka_core::same_music::{MusicSimilarity, SameMusic}; use czkawka_core::similar_images; -use czkawka_core::similar_images::SimilarImages; +use czkawka_core::similar_images::{ImagesEntry, SimilarImages}; use czkawka_core::similar_videos::SimilarVideos; use czkawka_core::temporary::Temporary; @@ -264,7 +266,7 @@ fn computer_bad_extensions( // Sort let mut vector = vector.clone(); - vector.sort_unstable_by(|a, b| split_path_compare(a.path.as_path(), b.path.as_path())); + vector.par_sort_unstable_by(|a, b| split_path_compare(a.path.as_path(), b.path.as_path())); for file_entry in vector { let (directory, file) = split_path(&file_entry.path); @@ -336,7 +338,7 @@ fn computer_broken_files( // Sort let mut vector = vector.clone(); - vector.sort_unstable_by(|a, b| split_path_compare(a.path.as_path(), b.path.as_path())); + vector.par_sort_unstable_by(|a, b| split_path_compare(a.path.as_path(), b.path.as_path())); for file_entry in vector { let (directory, file) = split_path(&file_entry.path); @@ -407,7 +409,7 @@ fn computer_invalid_symlinks( for file_entry in vector { let (directory, file) = split_path(&file_entry.path); - let symlink_info = file_entry.symlink_info.clone().expect("invalid traversal result"); + let symlink_info = file_entry.symlink_info; let values: [(u32, &dyn ToValue); COLUMNS_NUMBER] = [ (ColumnsInvalidSymlinks::SelectionButton as u32, &false), (ColumnsInvalidSymlinks::Name as u32, &file), @@ -499,7 +501,7 @@ fn computer_same_music( // Sort let vec_file_entry = if vec_file_entry.len() >= 2 { let mut vec_file_entry = vec_file_entry.clone(); - vec_file_entry.sort_unstable_by(|a, b| split_path_compare(a.path.as_path(), b.path.as_path())); + vec_file_entry.par_sort_unstable_by(|a, b| split_path_compare(a.path.as_path(), b.path.as_path())); vec_file_entry } else { vec_file_entry.clone() @@ -551,7 +553,7 @@ fn computer_same_music( // Sort let vec_file_entry = if vec_file_entry.len() >= 2 { let mut vec_file_entry = vec_file_entry.clone(); - vec_file_entry.sort_unstable_by(|a, b| split_path_compare(a.path.as_path(), b.path.as_path())); + vec_file_entry.par_sort_unstable_by(|a, b| split_path_compare(a.path.as_path(), b.path.as_path())); vec_file_entry } else { vec_file_entry.clone() @@ -657,7 +659,7 @@ fn computer_similar_videos( // Sort let vec_file_entry = if vec_file_entry.len() >= 2 { let mut vec_file_entry = vec_file_entry.clone(); - vec_file_entry.sort_unstable_by(|a, b| split_path_compare(a.path.as_path(), b.path.as_path())); + vec_file_entry.par_sort_unstable_by(|a, b| split_path_compare(a.path.as_path(), b.path.as_path())); vec_file_entry } else { vec_file_entry.clone() @@ -676,7 +678,7 @@ fn computer_similar_videos( // Sort let vec_file_entry = if vec_file_entry.len() >= 2 { let mut vec_file_entry = vec_file_entry.clone(); - vec_file_entry.sort_unstable_by(|a, b| split_path_compare(a.path.as_path(), b.path.as_path())); + vec_file_entry.par_sort_unstable_by(|a, b| split_path_compare(a.path.as_path(), b.path.as_path())); vec_file_entry } else { vec_file_entry.clone() @@ -749,13 +751,13 @@ fn computer_similar_images( let list_store = get_list_store(tree_view); if sf.get_use_reference() { - let vec_struct_similar: &Vec<(similar_images::FileEntry, Vec)> = sf.get_similar_images_referenced(); + let vec_struct_similar: &Vec<(ImagesEntry, Vec)> = sf.get_similar_images_referenced(); for (base_file_entry, vec_file_entry) in vec_struct_similar { // Sort let vec_file_entry = if vec_file_entry.len() >= 2 { let mut vec_file_entry = vec_file_entry.clone(); // Use comparison by similarity, because it is more important that path here - vec_file_entry.sort_unstable_by_key(|e| e.similarity); + vec_file_entry.par_sort_unstable_by_key(|e| e.similarity); vec_file_entry } else { vec_file_entry.clone() @@ -798,7 +800,7 @@ fn computer_similar_images( let vec_file_entry = if vec_file_entry.len() >= 2 { let mut vec_file_entry = vec_file_entry.clone(); // Use comparison by similarity, because it is more important that path here - vec_file_entry.sort_unstable_by_key(|e| e.similarity); + vec_file_entry.par_sort_unstable_by_key(|e| e.similarity); vec_file_entry } else { vec_file_entry.clone() @@ -876,7 +878,7 @@ fn computer_temporary_files( // Sort // TODO maybe simplify this via common file entry let mut vector = vector.clone(); - vector.sort_unstable_by(|a, b| split_path_compare(a.path.as_path(), b.path.as_path())); + vector.par_sort_unstable_by(|a, b| split_path_compare(a.path.as_path(), b.path.as_path())); for file_entry in vector { let (directory, file) = split_path(&file_entry.path); @@ -1080,7 +1082,7 @@ fn computer_empty_folders( let hashmap = ef.get_empty_folder_list(); let mut vector = hashmap.values().collect::>(); - vector.sort_unstable_by(|a, b| split_path_compare(a.path.as_path(), b.path.as_path())); + vector.par_sort_unstable_by(|a, b| split_path_compare(a.path.as_path(), b.path.as_path())); for fe in vector { let (directory, file) = split_path(&fe.path); @@ -1325,19 +1327,27 @@ fn computer_duplicate_finder( } } -fn vector_sort_unstable_entry_by_path(vector: &[FileEntry]) -> Vec { +fn vector_sort_unstable_entry_by_path(vector: &[T]) -> Vec +where + T: ResultEntry + Clone, + T: std::marker::Send, +{ if vector.len() >= 2 { - let mut vector = vector.to_owned(); - vector.sort_unstable_by(|a, b| split_path_compare(a.path.as_path(), b.path.as_path())); + let mut vector = vector.to_vec(); + vector.par_sort_unstable_by(|a, b| split_path_compare(a.get_path(), b.get_path())); vector } else { - vector.to_owned() + vector.to_vec() } } -fn vector_sort_simple_unstable_entry_by_path(vector: &[FileEntry]) -> Vec { - let mut vector = vector.to_owned(); - vector.sort_unstable_by(|a, b| split_path_compare(a.path.as_path(), b.path.as_path())); +fn vector_sort_simple_unstable_entry_by_path(vector: &[T]) -> Vec +where + T: ResultEntry + Clone, + T: std::marker::Send, +{ + let mut vector = vector.to_vec(); + vector.par_sort_unstable_by(|a, b| split_path_compare(a.get_path(), b.get_path())); vector } diff --git a/czkawka_gui/src/connect_things/connect_button_compare.rs b/czkawka_gui/src/connect_things/connect_button_compare.rs index 4a114d7..07fd227 100644 --- a/czkawka_gui/src/connect_things/connect_button_compare.rs +++ b/czkawka_gui/src/connect_things/connect_button_compare.rs @@ -353,8 +353,11 @@ fn generate_cache_for_results(vector_with_path: Vec<(String, String, TreePath)>) let big_img = Image::new(); let mut pixbuf = get_pixbuf_from_dynamic_image(&DynamicImage::new_rgb8(1, 1)).unwrap(); - let name_lowercase = name.to_lowercase(); - let is_heic = HEIC_EXTENSIONS.iter().any(|extension| name_lowercase.ends_with(extension)); + let extension_lowercase = full_path.split('.').last().map(str::to_lowercase); + let is_heic = match extension_lowercase { + Some(extension) => HEIC_EXTENSIONS.iter().any(|e| e == &extension), + None => false, + }; if is_heic { #[allow(clippy::never_loop)] diff --git a/czkawka_gui/src/connect_things/connect_settings.rs b/czkawka_gui/src/connect_things/connect_settings.rs index 1f3a827..ee98c94 100644 --- a/czkawka_gui/src/connect_things/connect_settings.rs +++ b/czkawka_gui/src/connect_things/connect_settings.rs @@ -178,7 +178,7 @@ pub fn connect_settings(gui_data: &GuiData) { FilterType::Triangle, ] { for hash_alg in &[HashAlg::Blockhash, HashAlg::Gradient, HashAlg::DoubleGradient, HashAlg::VertGradient, HashAlg::Mean] { - let (mut messages, loaded_items) = load_cache_from_file_generalized_by_path::( + let (mut messages, loaded_items) = load_cache_from_file_generalized_by_path::( &get_similar_images_cache_file(hash_size, hash_alg, image_filter), true, &Default::default(), @@ -214,7 +214,7 @@ pub fn connect_settings(gui_data: &GuiData) { dialog.connect_response(move |dialog, response_type| { if response_type == ResponseType::Ok { let (mut messages, loaded_items) = - load_cache_from_file_generalized_by_path::(&get_similar_videos_cache_file(), true, &Default::default()); + load_cache_from_file_generalized_by_path::(&get_similar_videos_cache_file(), true, &Default::default()); if let Some(cache_entries) = loaded_items { let save_messages = save_cache_to_file_generalized(&get_similar_videos_cache_file(), &cache_entries, false, 0); diff --git a/czkawka_gui/src/initialize_gui.rs b/czkawka_gui/src/initialize_gui.rs index 70bccee..79a3dd2 100644 --- a/czkawka_gui/src/initialize_gui.rs +++ b/czkawka_gui/src/initialize_gui.rs @@ -497,9 +497,9 @@ fn show_preview( let is_heic; if let Some(extension) = Path::new(&name).extension() { - let extension = format!(".{}", extension.to_string_lossy().to_lowercase()); - is_heic = HEIC_EXTENSIONS.contains(&extension.as_str()); - if !RAW_IMAGE_EXTENSIONS.contains(&extension.as_str()) && !IMAGE_RS_EXTENSIONS.contains(&extension.as_str()) && !is_heic { + let extension_lowercase = extension.to_string_lossy().to_lowercase(); + is_heic = HEIC_EXTENSIONS.contains(&extension_lowercase.as_str()); + if !RAW_IMAGE_EXTENSIONS.contains(&extension_lowercase.as_str()) && !IMAGE_RS_EXTENSIONS.contains(&extension_lowercase.as_str()) && !is_heic { break 'dir; } } else { diff --git a/krokiet/Cargo.toml b/krokiet/Cargo.toml index 2ae6ecf..bbd260e 100644 --- a/krokiet/Cargo.toml +++ b/krokiet/Cargo.toml @@ -24,7 +24,7 @@ rand = "0.8" czkawka_core = { version = "6.1.0", path = "../czkawka_core" } chrono = "0.4.31" open = "5.0" -crossbeam-channel = "0.5.8" +crossbeam-channel = "0.5" handsome_logger = "0.8" rfd = { version = "0.12", default-features = false, features = ["xdg-portal"] } home = "0.5" diff --git a/krokiet/src/connect_scan.rs b/krokiet/src/connect_scan.rs index 834a7f7..39649d9 100644 --- a/krokiet/src/connect_scan.rs +++ b/krokiet/src/connect_scan.rs @@ -7,11 +7,11 @@ use czkawka_core::common_dir_traversal::{FileEntry, ProgressData}; use czkawka_core::common_tool::CommonData; use czkawka_core::common_traits::ResultEntry; use czkawka_core::empty_files::EmptyFiles; -use czkawka_core::empty_folder::EmptyFolder; -use czkawka_core::empty_folder::FolderEntry; +use czkawka_core::empty_folder::{EmptyFolder, FolderEntry}; use czkawka_core::similar_images; use czkawka_core::similar_images::SimilarImages; use humansize::{format_size, BINARY}; +use rayon::prelude::*; use slint::{ComponentHandle, ModelRc, SharedString, VecModel, Weak}; use std::rc::Rc; use std::thread; @@ -75,10 +75,10 @@ fn scan_similar_images(a: Weak, progress_sender: Sender, progress_sender: Sender>, messages: String, hash_size: u8) { +fn write_similar_images_results(app: &MainWindow, vector: Vec>, messages: String, hash_size: u8) { let items_found = vector.len(); let items = Rc::new(VecModel::default()); for vec_fe in vector { @@ -121,7 +121,7 @@ fn scan_empty_files(a: Weak, progress_sender: Sender, let mut vector = finder.get_empty_files().clone(); let messages = finder.get_text_messages().create_messages_text(); - vector.sort_unstable_by(|a, b| split_path_compare(a.path.as_path(), b.path.as_path())); + vector.par_sort_unstable_by(|a, b| split_path_compare(a.path.as_path(), b.path.as_path())); a.upgrade_in_event_loop(move |app| { write_empty_files_results(&app, vector, messages); @@ -158,7 +158,7 @@ fn scan_empty_folders(a: Weak, progress_sender: Sender let mut vector = finder.get_empty_folder_list().values().cloned().collect::>(); let messages = finder.get_text_messages().create_messages_text(); - vector.sort_unstable_by(|a, b| split_path_compare(a.path.as_path(), b.path.as_path())); + vector.par_sort_unstable_by(|a, b| split_path_compare(a.path.as_path(), b.path.as_path())); a.upgrade_in_event_loop(move |app| { write_empty_folders_results(&app, vector, messages); diff --git a/krokiet/src/connect_show_preview.rs b/krokiet/src/connect_show_preview.rs index 30aeec3..dc4942b 100644 --- a/krokiet/src/connect_show_preview.rs +++ b/krokiet/src/connect_show_preview.rs @@ -48,10 +48,9 @@ fn load_image(image_path: &Path) -> Option<(Duration, image::DynamicImage)> { } let image_name = image_path.to_string_lossy().to_string(); let image_extension = image_path.extension()?.to_string_lossy().to_lowercase(); - let extension_with_dot = format!(".{}", image_extension); - let is_raw_image = RAW_IMAGE_EXTENSIONS.contains(&extension_with_dot.as_str()); - let is_normal_image = IMAGE_RS_EXTENSIONS.contains(&extension_with_dot.as_str()); + let is_raw_image = RAW_IMAGE_EXTENSIONS.contains(&image_extension.as_str()); + let is_normal_image = IMAGE_RS_EXTENSIONS.contains(&image_extension.as_str()); if !is_raw_image && !is_normal_image { return None; diff --git a/krokiet/src/settings.rs b/krokiet/src/settings.rs index 59c5e4f..c963e13 100644 --- a/krokiet/src/settings.rs +++ b/krokiet/src/settings.rs @@ -129,7 +129,7 @@ pub fn connect_changing_settings_preset(app: &MainWindow) { app.global::().on_changed_settings_preset(move || { let app = a.upgrade().unwrap(); let current_item = app.global::().get_settings_preset_idx(); - let loaded_data = load_data_from_file::(get_config_file(current_item)); + let loaded_data = load_data_from_file::(get_config_file(current_item + 1)); match loaded_data { Ok(loaded_data) => { set_settings_to_gui(&app, &loaded_data); @@ -138,6 +138,7 @@ pub fn connect_changing_settings_preset(app: &MainWindow) { Err(e) => { set_settings_to_gui(&app, &SettingsCustom::default()); app.set_text_summary_text(format!("Cannot change and load preset {} - reason {e}", current_item + 1).into()); + error!("Failed to change preset - {e}, using default instead"); } } });