2021-12-14 08:13:49 +13:00
|
|
|
use std::collections::{BTreeMap, BTreeSet, HashMap};
|
2023-10-11 07:54:41 +13:00
|
|
|
use std::io::Write;
|
2023-05-03 08:37:12 +12:00
|
|
|
use std::mem;
|
2021-11-23 23:10:24 +13:00
|
|
|
use std::path::{Path, PathBuf};
|
2023-05-08 06:54:05 +12:00
|
|
|
use std::sync::atomic::Ordering;
|
2021-11-28 08:49:20 +13:00
|
|
|
|
2023-12-04 00:06:42 +13:00
|
|
|
use crossbeam_channel::{Receiver, Sender};
|
2021-11-28 08:49:20 +13:00
|
|
|
use ffmpeg_cmdline_utils::FfmpegErrorKind::FfmpegNotFound;
|
2023-10-11 07:54:41 +13:00
|
|
|
use fun_time::fun_time;
|
2023-07-10 18:36:03 +12:00
|
|
|
use humansize::{format_size, BINARY};
|
2023-12-17 11:21:09 +13:00
|
|
|
use log::debug;
|
2021-11-28 08:49:20 +13:00
|
|
|
use rayon::prelude::*;
|
2021-12-06 06:34:12 +13:00
|
|
|
use serde::{Deserialize, Serialize};
|
2021-11-23 23:10:24 +13:00
|
|
|
use vid_dup_finder_lib::HashCreationErrorKind::DetermineVideo;
|
|
|
|
use vid_dup_finder_lib::{NormalizedTolerance, VideoHash};
|
|
|
|
|
2023-12-17 11:21:09 +13:00
|
|
|
use crate::common::{check_if_stop_received, delete_files_custom, prepare_thread_handler_common, send_info_and_wait_for_ending_all_threads, VIDEO_FILES_EXTENSIONS};
|
2023-10-08 05:04:17 +13:00
|
|
|
use crate::common_cache::{get_similar_videos_cache_file, load_cache_from_file_generalized_by_path, save_cache_to_file_generalized};
|
2024-02-15 05:41:17 +13:00
|
|
|
use crate::common_dir_traversal::{inode, take_1_per_inode, CheckingMethod, DirTraversalBuilder, DirTraversalResult, FileEntry, ProgressData, ToolType};
|
2023-10-14 07:33:17 +13:00
|
|
|
use crate::common_tool::{CommonData, CommonToolData, DeleteMethod};
|
2023-10-11 07:54:41 +13:00
|
|
|
use crate::common_traits::{DebugPrint, PrintResults, ResultEntry};
|
2022-01-20 10:35:07 +13:00
|
|
|
use crate::flc;
|
2021-11-28 08:49:20 +13:00
|
|
|
|
2021-11-23 23:10:24 +13:00
|
|
|
pub const MAX_TOLERANCE: i32 = 20;
|
|
|
|
|
2021-12-06 06:34:12 +13:00
|
|
|
#[derive(Clone, Debug, Serialize, Deserialize)]
|
2023-12-17 11:21:09 +13:00
|
|
|
pub struct VideosEntry {
|
2021-11-23 23:10:24 +13:00
|
|
|
pub path: PathBuf,
|
|
|
|
pub size: u64,
|
|
|
|
pub modified_date: u64,
|
|
|
|
pub vhash: VideoHash,
|
2021-11-30 01:53:09 +13:00
|
|
|
pub error: String,
|
2021-11-23 23:10:24 +13:00
|
|
|
}
|
2023-10-05 19:06:47 +13:00
|
|
|
|
2023-12-17 11:21:09 +13:00
|
|
|
impl ResultEntry for VideosEntry {
|
2023-05-08 06:54:05 +12:00
|
|
|
fn get_path(&self) -> &Path {
|
|
|
|
&self.path
|
|
|
|
}
|
2023-10-08 05:04:17 +13:00
|
|
|
fn get_modified_date(&self) -> u64 {
|
|
|
|
self.modified_date
|
|
|
|
}
|
|
|
|
fn get_size(&self) -> u64 {
|
|
|
|
self.size
|
|
|
|
}
|
2023-05-08 06:54:05 +12:00
|
|
|
}
|
2021-11-23 23:10:24 +13:00
|
|
|
|
2023-12-17 11:21:09 +13:00
|
|
|
impl FileEntry {
|
|
|
|
fn into_videos_entry(self) -> VideosEntry {
|
|
|
|
VideosEntry {
|
|
|
|
size: self.size,
|
|
|
|
path: self.path,
|
|
|
|
modified_date: self.modified_date,
|
|
|
|
|
|
|
|
vhash: Default::default(),
|
|
|
|
error: String::new(),
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2021-11-23 23:10:24 +13:00
|
|
|
struct Hamming;
|
|
|
|
|
|
|
|
impl bk_tree::Metric<Vec<u8>> for Hamming {
|
2023-01-29 06:54:02 +13:00
|
|
|
#[inline]
|
2021-11-23 23:10:24 +13:00
|
|
|
fn distance(&self, a: &Vec<u8>, b: &Vec<u8>) -> u32 {
|
|
|
|
hamming::distance_fast(a, b).unwrap() as u32
|
|
|
|
}
|
|
|
|
|
2023-01-29 06:54:02 +13:00
|
|
|
#[inline]
|
2021-11-23 23:10:24 +13:00
|
|
|
fn threshold_distance(&self, a: &Vec<u8>, b: &Vec<u8>, _threshold: u32) -> Option<u32> {
|
|
|
|
Some(self.distance(a, b))
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2023-12-17 11:21:09 +13:00
|
|
|
const MAX_VIDEOS_STAGE: u8 = 1;
|
|
|
|
|
2021-11-23 23:10:24 +13:00
|
|
|
pub struct SimilarVideos {
|
2023-10-05 19:06:47 +13:00
|
|
|
common_data: CommonToolData,
|
2021-11-23 23:10:24 +13:00
|
|
|
information: Info,
|
2023-12-17 11:21:09 +13:00
|
|
|
similar_vectors: Vec<Vec<VideosEntry>>,
|
|
|
|
similar_referenced_vectors: Vec<(VideosEntry, Vec<VideosEntry>)>,
|
|
|
|
videos_hashes: BTreeMap<Vec<u8>, Vec<VideosEntry>>,
|
|
|
|
videos_to_check: BTreeMap<String, VideosEntry>,
|
2021-11-23 23:10:24 +13:00
|
|
|
tolerance: i32,
|
2021-12-14 08:13:49 +13:00
|
|
|
exclude_videos_with_same_size: bool,
|
2024-02-15 05:41:17 +13:00
|
|
|
ignore_hard_links: bool,
|
2023-10-05 19:06:47 +13:00
|
|
|
}
|
|
|
|
|
|
|
|
impl CommonData for SimilarVideos {
|
|
|
|
fn get_cd(&self) -> &CommonToolData {
|
|
|
|
&self.common_data
|
|
|
|
}
|
|
|
|
fn get_cd_mut(&mut self) -> &mut CommonToolData {
|
|
|
|
&mut self.common_data
|
|
|
|
}
|
2021-11-23 23:10:24 +13:00
|
|
|
}
|
|
|
|
|
|
|
|
#[derive(Default)]
|
|
|
|
pub struct Info {
|
2021-12-24 21:18:55 +13:00
|
|
|
pub number_of_duplicates: usize,
|
|
|
|
pub number_of_groups: u64,
|
2021-11-23 23:10:24 +13:00
|
|
|
}
|
2021-11-28 08:57:10 +13:00
|
|
|
|
2021-11-23 23:10:24 +13:00
|
|
|
impl SimilarVideos {
|
|
|
|
pub fn new() -> Self {
|
|
|
|
Self {
|
2023-10-05 19:06:47 +13:00
|
|
|
common_data: CommonToolData::new(ToolType::SimilarVideos),
|
2021-11-23 23:10:24 +13:00
|
|
|
information: Default::default(),
|
|
|
|
similar_vectors: vec![],
|
|
|
|
videos_hashes: Default::default(),
|
|
|
|
videos_to_check: Default::default(),
|
|
|
|
tolerance: 10,
|
2021-12-14 08:13:49 +13:00
|
|
|
exclude_videos_with_same_size: false,
|
2021-12-24 21:18:55 +13:00
|
|
|
similar_referenced_vectors: vec![],
|
2024-02-15 05:41:17 +13:00
|
|
|
ignore_hard_links: false,
|
2021-12-24 21:18:55 +13:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2023-10-15 04:48:57 +13:00
|
|
|
#[fun_time(message = "find_similar_videos", level = "info")]
|
2023-12-04 00:06:42 +13:00
|
|
|
pub fn find_similar_videos(&mut self, stop_receiver: Option<&Receiver<()>>, progress_sender: Option<&Sender<ProgressData>>) {
|
2021-11-23 23:10:24 +13:00
|
|
|
if !check_if_ffmpeg_is_installed() {
|
2023-10-05 19:06:47 +13:00
|
|
|
self.common_data.text_messages.errors.push(flc!("core_ffmpeg_not_found"));
|
2022-07-06 05:58:57 +12:00
|
|
|
#[cfg(target_os = "windows")]
|
2023-10-05 19:06:47 +13:00
|
|
|
self.common_data.text_messages.errors.push(flc!("core_ffmpeg_not_found_windows"));
|
2022-07-06 05:58:57 +12:00
|
|
|
#[cfg(target_os = "linux")]
|
2024-02-15 05:45:25 +13:00
|
|
|
self.common_data
|
|
|
|
.text_messages
|
|
|
|
.errors
|
|
|
|
.push(flc!("core_ffmpeg_missing_in_snap", url = "https://github.com/snapcrafters/ffmpeg/issues/73"));
|
2021-11-23 23:10:24 +13:00
|
|
|
} else {
|
2024-02-15 05:45:25 +13:00
|
|
|
self.prepare_items();
|
2023-10-05 19:06:47 +13:00
|
|
|
self.common_data.use_reference_folders = !self.common_data.directories.reference_directories.is_empty();
|
2021-11-23 23:10:24 +13:00
|
|
|
if !self.check_for_similar_videos(stop_receiver, progress_sender) {
|
2023-10-05 19:06:47 +13:00
|
|
|
self.common_data.stopped_search = true;
|
2021-11-23 23:10:24 +13:00
|
|
|
return;
|
|
|
|
}
|
|
|
|
if !self.sort_videos(stop_receiver, progress_sender) {
|
2023-10-05 19:06:47 +13:00
|
|
|
self.common_data.stopped_search = true;
|
2021-11-23 23:10:24 +13:00
|
|
|
return;
|
|
|
|
}
|
|
|
|
}
|
2023-10-14 07:33:17 +13:00
|
|
|
self.delete_files();
|
2021-11-23 23:10:24 +13:00
|
|
|
self.debug_print();
|
|
|
|
}
|
|
|
|
|
2023-12-17 11:21:09 +13:00
|
|
|
// #[fun_time(message = "check_for_similar_videos", level = "debug")]
|
2023-12-04 00:06:42 +13:00
|
|
|
fn check_for_similar_videos(&mut self, stop_receiver: Option<&Receiver<()>>, progress_sender: Option<&Sender<ProgressData>>) -> bool {
|
2024-02-15 05:45:25 +13:00
|
|
|
self.common_data.extensions.set_and_validate_allowed_extensions(VIDEO_FILES_EXTENSIONS);
|
|
|
|
if !self.common_data.extensions.set_any_extensions() {
|
2023-12-17 11:21:09 +13:00
|
|
|
return true;
|
2022-01-01 10:34:24 +13:00
|
|
|
}
|
2021-12-18 07:29:37 +13:00
|
|
|
|
2023-12-17 11:21:09 +13:00
|
|
|
let result = DirTraversalBuilder::new()
|
2024-02-15 05:41:17 +13:00
|
|
|
.group_by(inode)
|
2023-12-17 11:21:09 +13:00
|
|
|
.stop_receiver(stop_receiver)
|
|
|
|
.progress_sender(progress_sender)
|
|
|
|
.common_data(&self.common_data)
|
|
|
|
.max_stage(MAX_VIDEOS_STAGE)
|
|
|
|
.build()
|
|
|
|
.run();
|
|
|
|
|
|
|
|
match result {
|
|
|
|
DirTraversalResult::SuccessFiles { grouped_file_entries, warnings } => {
|
|
|
|
self.videos_to_check = grouped_file_entries
|
2024-02-15 05:41:17 +13:00
|
|
|
.into_iter()
|
|
|
|
.flat_map(if self.ignore_hard_links { |(_, fes)| fes } else { take_1_per_inode })
|
2023-12-17 11:21:09 +13:00
|
|
|
.map(|fe| (fe.path.to_string_lossy().to_string(), fe.into_videos_entry()))
|
|
|
|
.collect();
|
2023-10-05 19:06:47 +13:00
|
|
|
self.common_data.text_messages.warnings.extend(warnings);
|
2023-12-17 11:21:09 +13:00
|
|
|
debug!("check_files - Found {} video files.", self.videos_to_check.len());
|
|
|
|
true
|
2021-11-23 23:10:24 +13:00
|
|
|
}
|
2023-05-03 08:37:12 +12:00
|
|
|
|
2023-12-17 11:21:09 +13:00
|
|
|
DirTraversalResult::Stopped => false,
|
2023-05-03 08:37:12 +12:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2023-10-15 04:48:57 +13:00
|
|
|
#[fun_time(message = "load_cache_at_start", level = "debug")]
|
2023-12-17 11:21:09 +13:00
|
|
|
fn load_cache_at_start(&mut self) -> (BTreeMap<String, VideosEntry>, BTreeMap<String, VideosEntry>, BTreeMap<String, VideosEntry>) {
|
2023-05-03 08:37:12 +12:00
|
|
|
let loaded_hash_map;
|
2023-12-17 11:21:09 +13:00
|
|
|
let mut records_already_cached: BTreeMap<String, VideosEntry> = Default::default();
|
|
|
|
let mut non_cached_files_to_check: BTreeMap<String, VideosEntry> = Default::default();
|
2021-11-23 23:10:24 +13:00
|
|
|
|
2023-10-05 19:06:47 +13:00
|
|
|
if self.common_data.use_cache {
|
2023-10-08 05:04:17 +13:00
|
|
|
let (messages, loaded_items) =
|
2023-12-17 11:21:09 +13:00
|
|
|
load_cache_from_file_generalized_by_path::<VideosEntry>(&get_similar_videos_cache_file(), self.get_delete_outdated_cache(), &self.videos_to_check);
|
2023-10-08 05:04:17 +13:00
|
|
|
self.get_text_messages_mut().extend_with_another_messages(messages);
|
|
|
|
loaded_hash_map = loaded_items.unwrap_or_default();
|
|
|
|
|
|
|
|
for (name, file_entry) in mem::take(&mut self.videos_to_check) {
|
|
|
|
if let Some(cached_file_entry) = loaded_hash_map.get(&name) {
|
2023-12-08 07:38:41 +13:00
|
|
|
records_already_cached.insert(name, cached_file_entry.clone());
|
2021-11-23 23:10:24 +13:00
|
|
|
} else {
|
2023-10-08 05:04:17 +13:00
|
|
|
non_cached_files_to_check.insert(name, file_entry);
|
2021-11-23 23:10:24 +13:00
|
|
|
}
|
|
|
|
}
|
|
|
|
} else {
|
|
|
|
loaded_hash_map = Default::default();
|
|
|
|
mem::swap(&mut self.videos_to_check, &mut non_cached_files_to_check);
|
|
|
|
}
|
2023-05-03 08:37:12 +12:00
|
|
|
(loaded_hash_map, records_already_cached, non_cached_files_to_check)
|
|
|
|
}
|
|
|
|
|
2023-10-15 04:48:57 +13:00
|
|
|
#[fun_time(message = "sort_videos", level = "debug")]
|
2023-12-04 00:06:42 +13:00
|
|
|
fn sort_videos(&mut self, stop_receiver: Option<&Receiver<()>>, progress_sender: Option<&Sender<ProgressData>>) -> bool {
|
2023-05-03 08:37:12 +12:00
|
|
|
let (loaded_hash_map, records_already_cached, non_cached_files_to_check) = self.load_cache_at_start();
|
2021-11-23 23:10:24 +13:00
|
|
|
|
2023-05-08 06:54:05 +12:00
|
|
|
let (progress_thread_handle, progress_thread_run, atomic_counter, check_was_stopped) =
|
2023-10-05 19:06:47 +13:00
|
|
|
prepare_thread_handler_common(progress_sender, 1, 1, non_cached_files_to_check.len(), CheckingMethod::None, self.common_data.tool_type);
|
2023-05-03 08:37:12 +12:00
|
|
|
|
2023-12-17 11:21:09 +13:00
|
|
|
let mut vec_file_entry: Vec<VideosEntry> = non_cached_files_to_check
|
2021-11-23 23:10:24 +13:00
|
|
|
.par_iter()
|
|
|
|
.map(|file_entry| {
|
2023-05-03 08:37:12 +12:00
|
|
|
atomic_counter.fetch_add(1, Ordering::Relaxed);
|
2023-10-15 04:48:57 +13:00
|
|
|
if check_if_stop_received(stop_receiver) {
|
2022-05-10 05:40:35 +12:00
|
|
|
check_was_stopped.store(true, Ordering::Relaxed);
|
2021-11-23 23:10:24 +13:00
|
|
|
return None;
|
|
|
|
}
|
|
|
|
let mut file_entry = file_entry.1.clone();
|
|
|
|
|
|
|
|
let vhash = match VideoHash::from_path(&file_entry.path) {
|
|
|
|
Ok(t) => t,
|
2021-11-30 01:53:09 +13:00
|
|
|
Err(e) => {
|
|
|
|
return {
|
2022-12-21 20:44:26 +13:00
|
|
|
file_entry.error = format!("Failed to hash file, reason {e}");
|
2021-11-30 01:53:09 +13:00
|
|
|
Some(file_entry)
|
2022-06-01 03:52:55 +12:00
|
|
|
};
|
2021-11-30 01:53:09 +13:00
|
|
|
}
|
2021-11-23 23:10:24 +13:00
|
|
|
};
|
|
|
|
|
|
|
|
file_entry.vhash = vhash;
|
|
|
|
|
2021-11-30 01:53:09 +13:00
|
|
|
Some(file_entry)
|
2021-11-23 23:10:24 +13:00
|
|
|
})
|
|
|
|
.while_some()
|
2023-12-17 11:21:09 +13:00
|
|
|
.collect::<Vec<VideosEntry>>();
|
2021-11-23 23:10:24 +13:00
|
|
|
|
2023-05-03 08:37:12 +12:00
|
|
|
send_info_and_wait_for_ending_all_threads(&progress_thread_run, progress_thread_handle);
|
2021-11-23 23:10:24 +13:00
|
|
|
|
|
|
|
// Just connect loaded results with already calculated hashes
|
2023-05-08 06:54:05 +12:00
|
|
|
vec_file_entry.extend(records_already_cached.into_values());
|
2021-11-23 23:10:24 +13:00
|
|
|
|
2023-12-17 11:21:09 +13:00
|
|
|
let mut hashmap_with_file_entries: HashMap<String, VideosEntry> = Default::default();
|
2021-11-23 23:10:24 +13:00
|
|
|
let mut vector_of_hashes: Vec<VideoHash> = Vec::new();
|
2021-11-30 01:53:09 +13:00
|
|
|
for file_entry in &vec_file_entry {
|
|
|
|
// 0 means that images was not hashed correctly, e.g. could be improperly
|
|
|
|
if file_entry.error.is_empty() {
|
|
|
|
hashmap_with_file_entries.insert(file_entry.vhash.src_path().to_string_lossy().to_string(), file_entry.clone());
|
|
|
|
vector_of_hashes.push(file_entry.vhash.clone());
|
|
|
|
} else {
|
2023-10-05 19:06:47 +13:00
|
|
|
self.common_data.text_messages.warnings.push(file_entry.error.clone());
|
2021-11-30 01:53:09 +13:00
|
|
|
}
|
2021-11-23 23:10:24 +13:00
|
|
|
}
|
|
|
|
|
2023-10-05 19:06:47 +13:00
|
|
|
self.save_cache(vec_file_entry, loaded_hash_map);
|
2021-11-23 23:10:24 +13:00
|
|
|
|
2022-05-10 05:40:35 +12:00
|
|
|
// Break if stop was clicked after saving to cache
|
|
|
|
if check_was_stopped.load(Ordering::Relaxed) {
|
|
|
|
return false;
|
|
|
|
}
|
|
|
|
|
2023-05-03 08:37:12 +12:00
|
|
|
self.match_groups_of_videos(vector_of_hashes, &hashmap_with_file_entries);
|
|
|
|
self.remove_from_reference_folders();
|
|
|
|
|
2023-10-05 19:06:47 +13:00
|
|
|
if self.common_data.use_reference_folders {
|
2023-05-03 08:37:12 +12:00
|
|
|
for (_fe, vector) in &self.similar_referenced_vectors {
|
|
|
|
self.information.number_of_duplicates += vector.len();
|
|
|
|
self.information.number_of_groups += 1;
|
|
|
|
}
|
|
|
|
} else {
|
|
|
|
for vector in &self.similar_vectors {
|
|
|
|
self.information.number_of_duplicates += vector.len() - 1;
|
|
|
|
self.information.number_of_groups += 1;
|
|
|
|
}
|
|
|
|
}
|
2021-11-23 23:10:24 +13:00
|
|
|
|
2023-05-03 08:37:12 +12:00
|
|
|
// Clean unused data
|
|
|
|
self.videos_hashes = Default::default();
|
|
|
|
self.videos_to_check = Default::default();
|
|
|
|
|
|
|
|
true
|
|
|
|
}
|
2023-10-11 07:54:41 +13:00
|
|
|
|
2023-10-15 04:48:57 +13:00
|
|
|
#[fun_time(message = "save_cache", level = "debug")]
|
2023-12-17 11:21:09 +13:00
|
|
|
fn save_cache(&mut self, vec_file_entry: Vec<VideosEntry>, loaded_hash_map: BTreeMap<String, VideosEntry>) {
|
2023-10-05 19:06:47 +13:00
|
|
|
if self.common_data.use_cache {
|
|
|
|
// Must save all results to file, old loaded from file with all currently counted results
|
2023-12-17 11:21:09 +13:00
|
|
|
let mut all_results: BTreeMap<String, VideosEntry> = loaded_hash_map;
|
2023-10-05 19:06:47 +13:00
|
|
|
for file_entry in vec_file_entry {
|
|
|
|
all_results.insert(file_entry.path.to_string_lossy().to_string(), file_entry);
|
|
|
|
}
|
2023-10-08 05:04:17 +13:00
|
|
|
|
|
|
|
let messages = save_cache_to_file_generalized(&get_similar_videos_cache_file(), &all_results, self.common_data.save_also_as_json, 0);
|
|
|
|
self.get_text_messages_mut().extend_with_another_messages(messages);
|
2023-10-05 19:06:47 +13:00
|
|
|
}
|
|
|
|
}
|
2023-05-03 08:37:12 +12:00
|
|
|
|
2023-10-15 04:48:57 +13:00
|
|
|
#[fun_time(message = "match_groups_of_videos", level = "debug")]
|
2023-12-17 11:21:09 +13:00
|
|
|
fn match_groups_of_videos(&mut self, vector_of_hashes: Vec<VideoHash>, hashmap_with_file_entries: &HashMap<String, VideosEntry>) {
|
2023-05-03 08:37:12 +12:00
|
|
|
let match_group = vid_dup_finder_lib::search(vector_of_hashes, NormalizedTolerance::new(self.tolerance as f64 / 100.0f64));
|
2023-12-17 11:21:09 +13:00
|
|
|
let mut collected_similar_videos: Vec<Vec<VideosEntry>> = Default::default();
|
2021-11-23 23:10:24 +13:00
|
|
|
for i in match_group {
|
2023-12-17 11:21:09 +13:00
|
|
|
let mut temp_vector: Vec<VideosEntry> = Vec::new();
|
2021-12-14 08:13:49 +13:00
|
|
|
let mut bt_size: BTreeSet<u64> = Default::default();
|
2021-11-23 23:10:24 +13:00
|
|
|
for j in i.duplicates() {
|
2021-12-14 08:13:49 +13:00
|
|
|
let file_entry = hashmap_with_file_entries.get(&j.to_string_lossy().to_string()).unwrap();
|
|
|
|
if self.exclude_videos_with_same_size {
|
|
|
|
if !bt_size.contains(&file_entry.size) {
|
|
|
|
bt_size.insert(file_entry.size);
|
|
|
|
temp_vector.push(file_entry.clone());
|
|
|
|
}
|
|
|
|
} else {
|
|
|
|
temp_vector.push(file_entry.clone());
|
|
|
|
}
|
|
|
|
}
|
|
|
|
if temp_vector.len() > 1 {
|
|
|
|
collected_similar_videos.push(temp_vector);
|
2021-11-23 23:10:24 +13:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
self.similar_vectors = collected_similar_videos;
|
2023-05-03 08:37:12 +12:00
|
|
|
}
|
2021-11-23 23:10:24 +13:00
|
|
|
|
2023-10-15 04:48:57 +13:00
|
|
|
#[fun_time(message = "remove_from_reference_folders", level = "debug")]
|
2023-05-03 08:37:12 +12:00
|
|
|
fn remove_from_reference_folders(&mut self) {
|
2023-10-05 19:06:47 +13:00
|
|
|
if self.common_data.use_reference_folders {
|
2023-05-08 06:54:05 +12:00
|
|
|
self.similar_referenced_vectors = mem::take(&mut self.similar_vectors)
|
2021-12-24 21:18:55 +13:00
|
|
|
.into_iter()
|
|
|
|
.filter_map(|vec_file_entry| {
|
2023-10-05 19:06:47 +13:00
|
|
|
let (mut files_from_referenced_folders, normal_files): (Vec<_>, Vec<_>) = vec_file_entry
|
|
|
|
.into_iter()
|
|
|
|
.partition(|e| self.common_data.directories.is_in_referenced_directory(e.get_path()));
|
2021-12-24 21:18:55 +13:00
|
|
|
|
|
|
|
if files_from_referenced_folders.is_empty() || normal_files.is_empty() {
|
|
|
|
None
|
|
|
|
} else {
|
|
|
|
Some((files_from_referenced_folders.pop().unwrap(), normal_files))
|
|
|
|
}
|
|
|
|
})
|
2023-12-17 11:21:09 +13:00
|
|
|
.collect::<Vec<(VideosEntry, Vec<VideosEntry>)>>();
|
2021-12-24 21:18:55 +13:00
|
|
|
}
|
2021-11-23 23:10:24 +13:00
|
|
|
}
|
2023-10-14 07:33:17 +13:00
|
|
|
|
|
|
|
fn delete_files(&mut self) {
|
|
|
|
if self.common_data.delete_method == DeleteMethod::None {
|
|
|
|
return;
|
|
|
|
}
|
|
|
|
|
|
|
|
let vec_files = self.similar_vectors.iter().collect::<Vec<_>>();
|
|
|
|
delete_files_custom(&vec_files, &self.common_data.delete_method, &mut self.common_data.text_messages, self.common_data.dry_run);
|
|
|
|
}
|
2021-11-23 23:10:24 +13:00
|
|
|
}
|
2021-11-28 08:57:10 +13:00
|
|
|
|
2021-11-23 23:10:24 +13:00
|
|
|
impl Default for SimilarVideos {
|
|
|
|
fn default() -> Self {
|
|
|
|
Self::new()
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
impl DebugPrint for SimilarVideos {
|
2023-10-15 04:48:57 +13:00
|
|
|
#[fun_time(message = "debug_print", level = "debug")]
|
2021-11-23 23:10:24 +13:00
|
|
|
fn debug_print(&self) {
|
2023-10-11 07:54:41 +13:00
|
|
|
if !cfg!(debug_assertions) {
|
2021-11-23 23:10:24 +13:00
|
|
|
return;
|
|
|
|
}
|
|
|
|
|
|
|
|
println!("---------------DEBUG PRINT---------------");
|
2023-10-05 19:06:47 +13:00
|
|
|
println!("Included directories - {:?}", self.common_data.directories.included_directories);
|
|
|
|
self.debug_print_common();
|
2021-11-23 23:10:24 +13:00
|
|
|
println!("-----------------------------------------");
|
|
|
|
}
|
|
|
|
}
|
2021-11-28 08:57:10 +13:00
|
|
|
|
2023-10-11 07:54:41 +13:00
|
|
|
impl PrintResults for SimilarVideos {
|
|
|
|
fn write_results<T: Write>(&self, writer: &mut T) -> std::io::Result<()> {
|
2021-11-23 23:10:24 +13:00
|
|
|
if !self.similar_vectors.is_empty() {
|
2023-10-11 07:54:41 +13:00
|
|
|
write!(writer, "{} videos which have similar friends\n\n", self.similar_vectors.len())?;
|
2021-11-23 23:10:24 +13:00
|
|
|
|
2023-01-29 06:54:02 +13:00
|
|
|
for struct_similar in &self.similar_vectors {
|
2023-10-11 07:54:41 +13:00
|
|
|
writeln!(writer, "Found {} videos which have similar friends", struct_similar.len())?;
|
2021-11-23 23:10:24 +13:00
|
|
|
for file_entry in struct_similar {
|
2023-12-08 07:38:41 +13:00
|
|
|
writeln!(writer, "{:?} - {}", file_entry.path, format_size(file_entry.size, BINARY))?;
|
2021-11-23 23:10:24 +13:00
|
|
|
}
|
2023-10-11 07:54:41 +13:00
|
|
|
writeln!(writer)?;
|
2021-11-23 23:10:24 +13:00
|
|
|
}
|
2023-10-13 05:48:46 +13:00
|
|
|
} else if !self.similar_referenced_vectors.is_empty() {
|
|
|
|
write!(writer, "{} videos which have similar friends\n\n", self.similar_referenced_vectors.len())?;
|
|
|
|
|
|
|
|
for (fe, struct_similar) in &self.similar_referenced_vectors {
|
|
|
|
writeln!(writer, "Found {} videos which have similar friends", struct_similar.len())?;
|
|
|
|
writeln!(writer)?;
|
2023-12-08 07:38:41 +13:00
|
|
|
writeln!(writer, "{:?} - {}", fe.path, format_size(fe.size, BINARY))?;
|
2023-10-13 05:48:46 +13:00
|
|
|
for file_entry in struct_similar {
|
2023-12-08 07:38:41 +13:00
|
|
|
writeln!(writer, "{:?} - {}", file_entry.path, format_size(file_entry.size, BINARY))?;
|
2023-10-13 05:48:46 +13:00
|
|
|
}
|
|
|
|
writeln!(writer)?;
|
|
|
|
}
|
2021-11-23 23:10:24 +13:00
|
|
|
} else {
|
2023-10-11 07:54:41 +13:00
|
|
|
write!(writer, "Not found any similar videos.")?;
|
2021-11-23 23:10:24 +13:00
|
|
|
}
|
|
|
|
|
2023-10-11 07:54:41 +13:00
|
|
|
Ok(())
|
2021-11-23 23:10:24 +13:00
|
|
|
}
|
2023-10-13 05:48:46 +13:00
|
|
|
|
|
|
|
fn save_results_to_file_as_json(&self, file_name: &str, pretty_print: bool) -> std::io::Result<()> {
|
|
|
|
if self.get_use_reference() {
|
|
|
|
self.save_results_to_file_as_json_internal(file_name, &self.similar_referenced_vectors, pretty_print)
|
|
|
|
} else {
|
|
|
|
self.save_results_to_file_as_json_internal(file_name, &self.similar_vectors, pretty_print)
|
|
|
|
}
|
|
|
|
}
|
2021-11-23 23:10:24 +13:00
|
|
|
}
|
|
|
|
|
|
|
|
pub fn check_if_ffmpeg_is_installed() -> bool {
|
2022-12-23 07:59:50 +13:00
|
|
|
let vid = "9999czekoczekoczekolada999.txt";
|
2021-12-22 06:44:20 +13:00
|
|
|
if let Err(DetermineVideo {
|
|
|
|
src_path: _a,
|
|
|
|
error: FfmpegNotFound,
|
2022-11-24 08:23:17 +13:00
|
|
|
}) = VideoHash::from_path(vid)
|
2021-12-22 06:44:20 +13:00
|
|
|
{
|
2021-11-23 23:10:24 +13:00
|
|
|
return false;
|
|
|
|
}
|
|
|
|
true
|
|
|
|
}
|
2023-10-05 19:06:47 +13:00
|
|
|
|
|
|
|
impl SimilarVideos {
|
|
|
|
pub fn set_exclude_videos_with_same_size(&mut self, exclude_videos_with_same_size: bool) {
|
|
|
|
self.exclude_videos_with_same_size = exclude_videos_with_same_size;
|
|
|
|
}
|
|
|
|
|
|
|
|
pub fn set_tolerance(&mut self, tolerance: i32) {
|
|
|
|
assert!((0..=MAX_TOLERANCE).contains(&tolerance));
|
|
|
|
self.tolerance = tolerance;
|
|
|
|
}
|
|
|
|
|
2023-12-17 11:21:09 +13:00
|
|
|
pub const fn get_similar_videos(&self) -> &Vec<Vec<VideosEntry>> {
|
2023-10-05 19:06:47 +13:00
|
|
|
&self.similar_vectors
|
|
|
|
}
|
|
|
|
|
|
|
|
pub const fn get_information(&self) -> &Info {
|
|
|
|
&self.information
|
|
|
|
}
|
|
|
|
|
2023-12-17 11:21:09 +13:00
|
|
|
pub fn get_similar_videos_referenced(&self) -> &Vec<(VideosEntry, Vec<VideosEntry>)> {
|
2023-10-05 19:06:47 +13:00
|
|
|
&self.similar_referenced_vectors
|
|
|
|
}
|
|
|
|
|
|
|
|
pub fn get_number_of_base_duplicated_files(&self) -> usize {
|
|
|
|
if self.common_data.use_reference_folders {
|
|
|
|
self.similar_referenced_vectors.len()
|
|
|
|
} else {
|
|
|
|
self.similar_vectors.len()
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
pub fn get_use_reference(&self) -> bool {
|
|
|
|
self.common_data.use_reference_folders
|
|
|
|
}
|
2024-02-15 05:41:17 +13:00
|
|
|
|
|
|
|
pub fn set_ignore_hard_links(&mut self, ignore_hard_links: bool) {
|
|
|
|
self.ignore_hard_links = ignore_hard_links;
|
|
|
|
}
|
2023-10-05 19:06:47 +13:00
|
|
|
}
|