Cache vidoes with serde to avoid forked vid_dup_finder_lib (#486)
* Cache vidoes with serde to avoid forked vid_dup_finder_lib * Fix clippy lint. * Replace json serialization with bincode for videos
This commit is contained in:
parent
875ed2c8a9
commit
88b8f11e07
14
Cargo.lock
generated
14
Cargo.lock
generated
|
@ -138,6 +138,15 @@ version = "0.13.0"
|
||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
checksum = "904dfeac50f3cdaba28fc6f57fdcddb75f49ed61346676a78c4ffe55877802fd"
|
checksum = "904dfeac50f3cdaba28fc6f57fdcddb75f49ed61346676a78c4ffe55877802fd"
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "bincode"
|
||||||
|
version = "1.3.3"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "b1f45e9417d87227c7a56d22e471c6206462cba514c7590c09aff4cf6d1ddcad"
|
||||||
|
dependencies = [
|
||||||
|
"serde",
|
||||||
|
]
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "bindgen"
|
name = "bindgen"
|
||||||
version = "0.56.0"
|
version = "0.56.0"
|
||||||
|
@ -479,6 +488,7 @@ name = "czkawka_core"
|
||||||
version = "3.3.1"
|
version = "3.3.1"
|
||||||
dependencies = [
|
dependencies = [
|
||||||
"audiotags",
|
"audiotags",
|
||||||
|
"bincode",
|
||||||
"bitflags",
|
"bitflags",
|
||||||
"bk-tree",
|
"bk-tree",
|
||||||
"blake3",
|
"blake3",
|
||||||
|
@ -493,6 +503,7 @@ dependencies = [
|
||||||
"img_hash",
|
"img_hash",
|
||||||
"rayon",
|
"rayon",
|
||||||
"rodio",
|
"rodio",
|
||||||
|
"serde",
|
||||||
"tempfile",
|
"tempfile",
|
||||||
"vid_dup_finder_lib",
|
"vid_dup_finder_lib",
|
||||||
"xxhash-rust",
|
"xxhash-rust",
|
||||||
|
@ -2270,7 +2281,8 @@ checksum = "5fecdca9a5291cc2b8dcf7dc02453fee791a280f3743cb0905f8822ae463b3fe"
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "vid_dup_finder_lib"
|
name = "vid_dup_finder_lib"
|
||||||
version = "0.1.0"
|
version = "0.1.0"
|
||||||
source = "git+https://github.com/qarmin/vid_dup_finder_lib#a4809772aea8f73c9a22da6fb43df50bfdd1b31d"
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "64bf85b5022ad9df9ec55750118ec38f33406165fcf9edf19c71eb2bd6823232"
|
||||||
dependencies = [
|
dependencies = [
|
||||||
"ffmpeg_cmdline_utils",
|
"ffmpeg_cmdline_utils",
|
||||||
"image",
|
"image",
|
||||||
|
|
|
@ -42,8 +42,11 @@ xxhash-rust = { version = "0.8.2", features = ["xxh3"] }
|
||||||
tempfile = "3.2.0"
|
tempfile = "3.2.0"
|
||||||
|
|
||||||
# Video Duplactes
|
# Video Duplactes
|
||||||
vid_dup_finder_lib = { git = "https://github.com/qarmin/vid_dup_finder_lib" }
|
vid_dup_finder_lib = "0.1.0"
|
||||||
ffmpeg_cmdline_utils = "0.1.0"
|
ffmpeg_cmdline_utils = "0.1.0"
|
||||||
|
serde = "1.0.130"
|
||||||
|
bincode = "1.3.3"
|
||||||
|
|
||||||
|
|
||||||
[features]
|
[features]
|
||||||
default = []
|
default = []
|
||||||
|
|
|
@ -15,6 +15,7 @@ use directories_next::ProjectDirs;
|
||||||
use ffmpeg_cmdline_utils::FfmpegErrorKind::FfmpegNotFound;
|
use ffmpeg_cmdline_utils::FfmpegErrorKind::FfmpegNotFound;
|
||||||
use humansize::{file_size_opts as options, FileSize};
|
use humansize::{file_size_opts as options, FileSize};
|
||||||
use rayon::prelude::*;
|
use rayon::prelude::*;
|
||||||
|
use serde::{Deserialize, Serialize};
|
||||||
use vid_dup_finder_lib::HashCreationErrorKind::DetermineVideo;
|
use vid_dup_finder_lib::HashCreationErrorKind::DetermineVideo;
|
||||||
use vid_dup_finder_lib::{NormalizedTolerance, VideoHash};
|
use vid_dup_finder_lib::{NormalizedTolerance, VideoHash};
|
||||||
|
|
||||||
|
@ -27,8 +28,6 @@ use crate::common_traits::{DebugPrint, PrintResults, SaveResults};
|
||||||
|
|
||||||
pub const MAX_TOLERANCE: i32 = 20;
|
pub const MAX_TOLERANCE: i32 = 20;
|
||||||
|
|
||||||
const HASH_SIZE: usize = 19;
|
|
||||||
|
|
||||||
#[derive(Debug)]
|
#[derive(Debug)]
|
||||||
pub struct ProgressData {
|
pub struct ProgressData {
|
||||||
pub current_stage: u8,
|
pub current_stage: u8,
|
||||||
|
@ -37,7 +36,7 @@ pub struct ProgressData {
|
||||||
pub videos_to_check: usize,
|
pub videos_to_check: usize,
|
||||||
}
|
}
|
||||||
|
|
||||||
#[derive(Clone, Debug)]
|
#[derive(Clone, Debug, Serialize, Deserialize)]
|
||||||
pub struct FileEntry {
|
pub struct FileEntry {
|
||||||
pub path: PathBuf,
|
pub path: PathBuf,
|
||||||
pub size: u64,
|
pub size: u64,
|
||||||
|
@ -611,7 +610,7 @@ pub fn save_hashes_to_file(hashmap: &BTreeMap<String, FileEntry>, text_messages:
|
||||||
text_messages.messages.push(format!("Cannot create config dir {}, reason {}", cache_dir.display(), e));
|
text_messages.messages.push(format!("Cannot create config dir {}, reason {}", cache_dir.display(), e));
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
let cache_file = cache_dir.join("cache_similar_videos.txt");
|
let cache_file = cache_dir.join("cache_similar_videos.bin");
|
||||||
let file_handler = match OpenOptions::new().truncate(true).write(true).create(true).open(&cache_file) {
|
let file_handler = match OpenOptions::new().truncate(true).write(true).create(true).open(&cache_file) {
|
||||||
Ok(t) => t,
|
Ok(t) => t,
|
||||||
Err(e) => {
|
Err(e) => {
|
||||||
|
@ -619,31 +618,10 @@ pub fn save_hashes_to_file(hashmap: &BTreeMap<String, FileEntry>, text_messages:
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
let mut writer = BufWriter::new(file_handler);
|
|
||||||
|
|
||||||
for file_entry in hashmap.values() {
|
let writer = BufWriter::new(file_handler);
|
||||||
let mut string: String = String::with_capacity(256);
|
if let Err(e) = bincode::serialize_into(writer, hashmap) {
|
||||||
|
text_messages.messages.push(format!("cannot write data to cache file {}, reason {}", cache_file.display(), e));
|
||||||
string += format!(
|
|
||||||
"{}//{}//{}//{}//{}//{}",
|
|
||||||
file_entry.path.display(),
|
|
||||||
file_entry.size,
|
|
||||||
file_entry.modified_date,
|
|
||||||
file_entry.vhash.num_frames(),
|
|
||||||
file_entry.vhash.duration(),
|
|
||||||
file_entry.error
|
|
||||||
)
|
|
||||||
.as_str();
|
|
||||||
|
|
||||||
for i in file_entry.vhash.hash() {
|
|
||||||
string.push_str("//");
|
|
||||||
string.push_str(i.to_string().as_str());
|
|
||||||
}
|
|
||||||
|
|
||||||
if let Err(e) = writeln!(writer, "{}", string) {
|
|
||||||
text_messages.messages.push(format!("Failed to save some data to cache file {}, reason {}", cache_file.display(), e));
|
|
||||||
return;
|
|
||||||
};
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -651,7 +629,7 @@ pub fn save_hashes_to_file(hashmap: &BTreeMap<String, FileEntry>, text_messages:
|
||||||
pub fn load_hashes_from_file(text_messages: &mut Messages, delete_outdated_cache: bool) -> Option<BTreeMap<String, FileEntry>> {
|
pub fn load_hashes_from_file(text_messages: &mut Messages, delete_outdated_cache: bool) -> Option<BTreeMap<String, FileEntry>> {
|
||||||
if let Some(proj_dirs) = ProjectDirs::from("pl", "Qarmin", "Czkawka") {
|
if let Some(proj_dirs) = ProjectDirs::from("pl", "Qarmin", "Czkawka") {
|
||||||
let cache_dir = PathBuf::from(proj_dirs.cache_dir());
|
let cache_dir = PathBuf::from(proj_dirs.cache_dir());
|
||||||
let cache_file = cache_dir.join("cache_similar_videos.txt");
|
let cache_file = cache_dir.join("cache_similar_videos.bin");
|
||||||
let file_handler = match OpenOptions::new().read(true).open(&cache_file) {
|
let file_handler = match OpenOptions::new().read(true).open(&cache_file) {
|
||||||
Ok(t) => t,
|
Ok(t) => t,
|
||||||
Err(_inspected) => {
|
Err(_inspected) => {
|
||||||
|
@ -661,74 +639,16 @@ pub fn load_hashes_from_file(text_messages: &mut Messages, delete_outdated_cache
|
||||||
};
|
};
|
||||||
|
|
||||||
let reader = BufReader::new(file_handler);
|
let reader = BufReader::new(file_handler);
|
||||||
|
let mut hashmap_loaded_entries: BTreeMap<String, FileEntry> = match bincode::deserialize_from(reader) {
|
||||||
let mut hashmap_loaded_entries: BTreeMap<String, FileEntry> = Default::default();
|
|
||||||
|
|
||||||
// Read the file line by line using the lines() iterator from std::io::BufRead.
|
|
||||||
for (index, line) in reader.lines().enumerate() {
|
|
||||||
let line = match line {
|
|
||||||
Ok(t) => t,
|
Ok(t) => t,
|
||||||
Err(e) => {
|
Err(e) => {
|
||||||
text_messages.warnings.push(format!("Failed to load line number {} from cache file {}, reason {}", index + 1, cache_file.display(), e));
|
text_messages.warnings.push(format!("Failed to load data from cache file {}, reason {}", cache_file.display(), e));
|
||||||
return None;
|
return None;
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
let uuu = line.split("//").collect::<Vec<&str>>();
|
|
||||||
// Hash size + other things
|
|
||||||
if uuu.len() != (HASH_SIZE + 6) {
|
|
||||||
text_messages.warnings.push(format!(
|
|
||||||
"Found invalid data in line {} - ({}) in cache file {}, expected {} values, found {}",
|
|
||||||
index + 1,
|
|
||||||
line,
|
|
||||||
cache_file.display(),
|
|
||||||
HASH_SIZE + 6,
|
|
||||||
uuu.len(),
|
|
||||||
));
|
|
||||||
continue;
|
|
||||||
};
|
|
||||||
// Don't load cache data if destination file not exists
|
|
||||||
if !delete_outdated_cache || Path::new(uuu[0]).exists() {
|
|
||||||
let mut hash: [u64; 19] = [0; 19];
|
|
||||||
for i in 0..HASH_SIZE {
|
|
||||||
hash[i] = match uuu[6 + i as usize].parse::<u64>() {
|
|
||||||
Ok(t) => t,
|
|
||||||
Err(e) => {
|
|
||||||
text_messages
|
|
||||||
.warnings
|
|
||||||
.push(format!("Found invalid hash value in line {} - ({}) in cache file {}, reason {}", index + 1, line, cache_file.display(), e));
|
|
||||||
continue;
|
|
||||||
}
|
|
||||||
};
|
|
||||||
}
|
|
||||||
|
|
||||||
hashmap_loaded_entries.insert(
|
// Don't load cache data if destination file not exists
|
||||||
uuu[0].to_string(),
|
hashmap_loaded_entries.retain(|src_path, _file_entry| Path::new(src_path).exists() && !delete_outdated_cache);
|
||||||
FileEntry {
|
|
||||||
path: PathBuf::from(uuu[0]),
|
|
||||||
size: match uuu[1].parse::<u64>() {
|
|
||||||
Ok(t) => t,
|
|
||||||
Err(e) => {
|
|
||||||
text_messages
|
|
||||||
.warnings
|
|
||||||
.push(format!("Found invalid size value in line {} - ({}) in cache file {}, reason {}", index + 1, line, cache_file.display(), e));
|
|
||||||
continue;
|
|
||||||
}
|
|
||||||
},
|
|
||||||
modified_date: match uuu[2].parse::<u64>() {
|
|
||||||
Ok(t) => t,
|
|
||||||
Err(e) => {
|
|
||||||
text_messages
|
|
||||||
.warnings
|
|
||||||
.push(format!("Found invalid modified date value in line {} - ({}) in cache file {}, reason {}", index + 1, line, cache_file.display(), e));
|
|
||||||
continue;
|
|
||||||
}
|
|
||||||
},
|
|
||||||
vhash: VideoHash::with_start_data(uuu[4].parse::<u32>().unwrap_or(0), uuu[0], hash, uuu[3].parse::<u32>().unwrap_or(10)),
|
|
||||||
error: uuu[5].to_string(),
|
|
||||||
},
|
|
||||||
);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
return Some(hashmap_loaded_entries);
|
return Some(hashmap_loaded_entries);
|
||||||
}
|
}
|
||||||
|
|
Loading…
Reference in a new issue