1
0
Fork 0
mirror of synced 2024-04-27 01:02:23 +12:00

Cache vidoes with serde to avoid forked vid_dup_finder_lib (#486)

* Cache vidoes with serde to avoid forked vid_dup_finder_lib

* Fix clippy lint.

* Replace json serialization with bincode for videos
This commit is contained in:
Farmadupe 2021-12-05 17:34:12 +00:00 committed by GitHub
parent 875ed2c8a9
commit 88b8f11e07
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
3 changed files with 33 additions and 98 deletions

14
Cargo.lock generated
View file

@ -138,6 +138,15 @@ version = "0.13.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "904dfeac50f3cdaba28fc6f57fdcddb75f49ed61346676a78c4ffe55877802fd"
[[package]]
name = "bincode"
version = "1.3.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "b1f45e9417d87227c7a56d22e471c6206462cba514c7590c09aff4cf6d1ddcad"
dependencies = [
"serde",
]
[[package]]
name = "bindgen"
version = "0.56.0"
@ -479,6 +488,7 @@ name = "czkawka_core"
version = "3.3.1"
dependencies = [
"audiotags",
"bincode",
"bitflags",
"bk-tree",
"blake3",
@ -493,6 +503,7 @@ dependencies = [
"img_hash",
"rayon",
"rodio",
"serde",
"tempfile",
"vid_dup_finder_lib",
"xxhash-rust",
@ -2270,7 +2281,8 @@ checksum = "5fecdca9a5291cc2b8dcf7dc02453fee791a280f3743cb0905f8822ae463b3fe"
[[package]]
name = "vid_dup_finder_lib"
version = "0.1.0"
source = "git+https://github.com/qarmin/vid_dup_finder_lib#a4809772aea8f73c9a22da6fb43df50bfdd1b31d"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "64bf85b5022ad9df9ec55750118ec38f33406165fcf9edf19c71eb2bd6823232"
dependencies = [
"ffmpeg_cmdline_utils",
"image",

View file

@ -42,8 +42,11 @@ xxhash-rust = { version = "0.8.2", features = ["xxh3"] }
tempfile = "3.2.0"
# Video Duplactes
vid_dup_finder_lib = { git = "https://github.com/qarmin/vid_dup_finder_lib" }
vid_dup_finder_lib = "0.1.0"
ffmpeg_cmdline_utils = "0.1.0"
serde = "1.0.130"
bincode = "1.3.3"
[features]
default = []

View file

@ -15,6 +15,7 @@ use directories_next::ProjectDirs;
use ffmpeg_cmdline_utils::FfmpegErrorKind::FfmpegNotFound;
use humansize::{file_size_opts as options, FileSize};
use rayon::prelude::*;
use serde::{Deserialize, Serialize};
use vid_dup_finder_lib::HashCreationErrorKind::DetermineVideo;
use vid_dup_finder_lib::{NormalizedTolerance, VideoHash};
@ -27,8 +28,6 @@ use crate::common_traits::{DebugPrint, PrintResults, SaveResults};
pub const MAX_TOLERANCE: i32 = 20;
const HASH_SIZE: usize = 19;
#[derive(Debug)]
pub struct ProgressData {
pub current_stage: u8,
@ -37,7 +36,7 @@ pub struct ProgressData {
pub videos_to_check: usize,
}
#[derive(Clone, Debug)]
#[derive(Clone, Debug, Serialize, Deserialize)]
pub struct FileEntry {
pub path: PathBuf,
pub size: u64,
@ -611,7 +610,7 @@ pub fn save_hashes_to_file(hashmap: &BTreeMap<String, FileEntry>, text_messages:
text_messages.messages.push(format!("Cannot create config dir {}, reason {}", cache_dir.display(), e));
return;
}
let cache_file = cache_dir.join("cache_similar_videos.txt");
let cache_file = cache_dir.join("cache_similar_videos.bin");
let file_handler = match OpenOptions::new().truncate(true).write(true).create(true).open(&cache_file) {
Ok(t) => t,
Err(e) => {
@ -619,31 +618,10 @@ pub fn save_hashes_to_file(hashmap: &BTreeMap<String, FileEntry>, text_messages:
return;
}
};
let mut writer = BufWriter::new(file_handler);
for file_entry in hashmap.values() {
let mut string: String = String::with_capacity(256);
string += format!(
"{}//{}//{}//{}//{}//{}",
file_entry.path.display(),
file_entry.size,
file_entry.modified_date,
file_entry.vhash.num_frames(),
file_entry.vhash.duration(),
file_entry.error
)
.as_str();
for i in file_entry.vhash.hash() {
string.push_str("//");
string.push_str(i.to_string().as_str());
}
if let Err(e) = writeln!(writer, "{}", string) {
text_messages.messages.push(format!("Failed to save some data to cache file {}, reason {}", cache_file.display(), e));
return;
};
let writer = BufWriter::new(file_handler);
if let Err(e) = bincode::serialize_into(writer, hashmap) {
text_messages.messages.push(format!("cannot write data to cache file {}, reason {}", cache_file.display(), e));
}
}
}
@ -651,7 +629,7 @@ pub fn save_hashes_to_file(hashmap: &BTreeMap<String, FileEntry>, text_messages:
pub fn load_hashes_from_file(text_messages: &mut Messages, delete_outdated_cache: bool) -> Option<BTreeMap<String, FileEntry>> {
if let Some(proj_dirs) = ProjectDirs::from("pl", "Qarmin", "Czkawka") {
let cache_dir = PathBuf::from(proj_dirs.cache_dir());
let cache_file = cache_dir.join("cache_similar_videos.txt");
let cache_file = cache_dir.join("cache_similar_videos.bin");
let file_handler = match OpenOptions::new().read(true).open(&cache_file) {
Ok(t) => t,
Err(_inspected) => {
@ -661,74 +639,16 @@ pub fn load_hashes_from_file(text_messages: &mut Messages, delete_outdated_cache
};
let reader = BufReader::new(file_handler);
let mut hashmap_loaded_entries: BTreeMap<String, FileEntry> = Default::default();
// Read the file line by line using the lines() iterator from std::io::BufRead.
for (index, line) in reader.lines().enumerate() {
let line = match line {
Ok(t) => t,
Err(e) => {
text_messages.warnings.push(format!("Failed to load line number {} from cache file {}, reason {}", index + 1, cache_file.display(), e));
return None;
}
};
let uuu = line.split("//").collect::<Vec<&str>>();
// Hash size + other things
if uuu.len() != (HASH_SIZE + 6) {
text_messages.warnings.push(format!(
"Found invalid data in line {} - ({}) in cache file {}, expected {} values, found {}",
index + 1,
line,
cache_file.display(),
HASH_SIZE + 6,
uuu.len(),
));
continue;
};
// Don't load cache data if destination file not exists
if !delete_outdated_cache || Path::new(uuu[0]).exists() {
let mut hash: [u64; 19] = [0; 19];
for i in 0..HASH_SIZE {
hash[i] = match uuu[6 + i as usize].parse::<u64>() {
Ok(t) => t,
Err(e) => {
text_messages
.warnings
.push(format!("Found invalid hash value in line {} - ({}) in cache file {}, reason {}", index + 1, line, cache_file.display(), e));
continue;
}
};
}
hashmap_loaded_entries.insert(
uuu[0].to_string(),
FileEntry {
path: PathBuf::from(uuu[0]),
size: match uuu[1].parse::<u64>() {
Ok(t) => t,
Err(e) => {
text_messages
.warnings
.push(format!("Found invalid size value in line {} - ({}) in cache file {}, reason {}", index + 1, line, cache_file.display(), e));
continue;
}
},
modified_date: match uuu[2].parse::<u64>() {
Ok(t) => t,
Err(e) => {
text_messages
.warnings
.push(format!("Found invalid modified date value in line {} - ({}) in cache file {}, reason {}", index + 1, line, cache_file.display(), e));
continue;
}
},
vhash: VideoHash::with_start_data(uuu[4].parse::<u32>().unwrap_or(0), uuu[0], hash, uuu[3].parse::<u32>().unwrap_or(10)),
error: uuu[5].to_string(),
},
);
let mut hashmap_loaded_entries: BTreeMap<String, FileEntry> = match bincode::deserialize_from(reader) {
Ok(t) => t,
Err(e) => {
text_messages.warnings.push(format!("Failed to load data from cache file {}, reason {}", cache_file.display(), e));
return None;
}
}
};
// Don't load cache data if destination file not exists
hashmap_loaded_entries.retain(|src_path, _file_entry| Path::new(src_path).exists() && !delete_outdated_cache);
return Some(hashmap_loaded_entries);
}