1052 lines
41 KiB
Rust
1052 lines
41 KiB
Rust
use std::cmp::max;
|
|
use std::collections::{BTreeMap, HashSet};
|
|
use std::fs::File;
|
|
use std::io::prelude::*;
|
|
use std::path::{Path, PathBuf};
|
|
use std::sync::atomic::{AtomicUsize, Ordering};
|
|
use std::sync::Arc;
|
|
use std::{mem, panic};
|
|
|
|
use anyhow::Context;
|
|
use crossbeam_channel::{Receiver, Sender};
|
|
use fun_time::fun_time;
|
|
use humansize::{format_size, BINARY};
|
|
use lofty::{read_from, AudioFile, ItemKey, TaggedFileExt};
|
|
use log::debug;
|
|
use rayon::prelude::*;
|
|
use rusty_chromaprint::{match_fingerprints, Configuration, Fingerprinter};
|
|
use serde::{Deserialize, Serialize};
|
|
use symphonia::core::audio::SampleBuffer;
|
|
use symphonia::core::codecs::{DecoderOptions, CODEC_TYPE_NULL};
|
|
use symphonia::core::formats::FormatOptions;
|
|
use symphonia::core::io::MediaSourceStream;
|
|
use symphonia::core::meta::MetadataOptions;
|
|
use symphonia::core::probe::Hint;
|
|
|
|
use crate::common::{
|
|
check_if_stop_received, create_crash_message, delete_files_custom, filter_reference_folders_generic, prepare_thread_handler_common, send_info_and_wait_for_ending_all_threads,
|
|
AUDIO_FILES_EXTENSIONS,
|
|
};
|
|
use crate::common_cache::{get_similar_music_cache_file, load_cache_from_file_generalized_by_path, save_cache_to_file_generalized};
|
|
use crate::common_dir_traversal::{CheckingMethod, DirTraversalBuilder, DirTraversalResult, FileEntry, ProgressData, ToolType};
|
|
use crate::common_tool::{CommonData, CommonToolData, DeleteMethod};
|
|
use crate::common_traits::*;
|
|
|
|
bitflags! {
|
|
#[derive(PartialEq, Copy, Clone, Debug)]
|
|
pub struct MusicSimilarity : u32 {
|
|
const NONE = 0;
|
|
|
|
const TRACK_TITLE = 0b1;
|
|
const TRACK_ARTIST = 0b10;
|
|
const YEAR = 0b100;
|
|
const LENGTH = 0b1000;
|
|
const GENRE = 0b10000;
|
|
const BITRATE = 0b10_0000;
|
|
}
|
|
}
|
|
|
|
#[derive(Clone, Debug, Deserialize, Serialize)]
|
|
pub struct MusicEntry {
|
|
pub size: u64,
|
|
|
|
pub path: PathBuf,
|
|
pub modified_date: u64,
|
|
pub fingerprint: Vec<u32>,
|
|
|
|
pub track_title: String,
|
|
pub track_artist: String,
|
|
pub year: String,
|
|
pub length: String,
|
|
pub genre: String,
|
|
pub bitrate: u32,
|
|
}
|
|
|
|
const MAX_STAGE_TAGS: u8 = 4;
|
|
const MAX_STAGE_CONTENT: u8 = 5;
|
|
|
|
impl ResultEntry for MusicEntry {
|
|
fn get_path(&self) -> &Path {
|
|
&self.path
|
|
}
|
|
fn get_modified_date(&self) -> u64 {
|
|
self.modified_date
|
|
}
|
|
fn get_size(&self) -> u64 {
|
|
self.size
|
|
}
|
|
}
|
|
|
|
impl FileEntry {
|
|
fn into_music_entry(self) -> MusicEntry {
|
|
MusicEntry {
|
|
size: self.size,
|
|
path: self.path,
|
|
modified_date: self.modified_date,
|
|
|
|
fingerprint: vec![],
|
|
track_title: String::new(),
|
|
track_artist: String::new(),
|
|
year: String::new(),
|
|
length: String::new(),
|
|
genre: String::new(),
|
|
bitrate: 0,
|
|
}
|
|
}
|
|
}
|
|
|
|
#[derive(Default)]
|
|
pub struct Info {
|
|
pub number_of_duplicates: usize,
|
|
pub number_of_groups: u64,
|
|
}
|
|
|
|
pub struct SameMusic {
|
|
common_data: CommonToolData,
|
|
information: Info,
|
|
music_to_check: BTreeMap<String, MusicEntry>,
|
|
music_entries: Vec<MusicEntry>,
|
|
duplicated_music_entries: Vec<Vec<MusicEntry>>,
|
|
duplicated_music_entries_referenced: Vec<(MusicEntry, Vec<MusicEntry>)>,
|
|
music_similarity: MusicSimilarity,
|
|
approximate_comparison: bool,
|
|
check_type: CheckingMethod,
|
|
hash_preset_config: Configuration,
|
|
minimum_segment_duration: f32,
|
|
maximum_difference: f64,
|
|
}
|
|
|
|
impl SameMusic {
|
|
pub fn new() -> Self {
|
|
Self {
|
|
common_data: CommonToolData::new(ToolType::SameMusic),
|
|
information: Info::default(),
|
|
music_entries: Vec::with_capacity(2048),
|
|
music_similarity: MusicSimilarity::NONE,
|
|
duplicated_music_entries: vec![],
|
|
music_to_check: Default::default(),
|
|
approximate_comparison: true,
|
|
duplicated_music_entries_referenced: vec![],
|
|
check_type: CheckingMethod::AudioTags,
|
|
hash_preset_config: Configuration::preset_test1(), // TODO allow to change this
|
|
minimum_segment_duration: 10.0,
|
|
maximum_difference: 2.0,
|
|
}
|
|
}
|
|
|
|
#[fun_time(message = "find_same_music", level = "info")]
|
|
pub fn find_same_music(&mut self, stop_receiver: Option<&Receiver<()>>, progress_sender: Option<&Sender<ProgressData>>) {
|
|
self.prepare_items();
|
|
self.common_data.use_reference_folders = !self.common_data.directories.reference_directories.is_empty();
|
|
if !self.check_files(stop_receiver, progress_sender) {
|
|
self.common_data.stopped_search = true;
|
|
return;
|
|
}
|
|
match self.check_type {
|
|
CheckingMethod::AudioTags => {
|
|
if !self.read_tags(stop_receiver, progress_sender) {
|
|
self.common_data.stopped_search = true;
|
|
return;
|
|
}
|
|
if !self.check_for_duplicate_tags(stop_receiver, progress_sender) {
|
|
self.common_data.stopped_search = true;
|
|
return;
|
|
}
|
|
}
|
|
CheckingMethod::AudioContent => {
|
|
if !self.calculate_fingerprint(stop_receiver, progress_sender) {
|
|
self.common_data.stopped_search = true;
|
|
return;
|
|
}
|
|
if !self.check_for_duplicate_fingerprints(stop_receiver, progress_sender) {
|
|
self.common_data.stopped_search = true;
|
|
return;
|
|
}
|
|
if !self.read_tags_to_files_similar_by_content(stop_receiver, progress_sender) {
|
|
self.common_data.stopped_search = true;
|
|
return;
|
|
}
|
|
}
|
|
_ => panic!(),
|
|
}
|
|
self.delete_files();
|
|
self.debug_print();
|
|
}
|
|
|
|
#[fun_time(message = "check_files", level = "debug")]
|
|
fn check_files(&mut self, stop_receiver: Option<&Receiver<()>>, progress_sender: Option<&Sender<ProgressData>>) -> bool {
|
|
self.common_data.extensions.set_and_validate_allowed_extensions(AUDIO_FILES_EXTENSIONS);
|
|
if !self.common_data.extensions.set_any_extensions() {
|
|
return true;
|
|
}
|
|
|
|
let max_stage = match self.check_type {
|
|
CheckingMethod::AudioTags => MAX_STAGE_TAGS,
|
|
CheckingMethod::AudioContent => MAX_STAGE_CONTENT,
|
|
_ => panic!(),
|
|
};
|
|
|
|
let result = DirTraversalBuilder::new()
|
|
.group_by(|_fe| ())
|
|
.stop_receiver(stop_receiver)
|
|
.progress_sender(progress_sender)
|
|
.common_data(&self.common_data)
|
|
.checking_method(self.check_type)
|
|
.max_stage(max_stage)
|
|
.build()
|
|
.run();
|
|
|
|
match result {
|
|
DirTraversalResult::SuccessFiles { grouped_file_entries, warnings } => {
|
|
self.music_to_check = grouped_file_entries
|
|
.into_values()
|
|
.flatten()
|
|
.map(|fe| (fe.path.to_string_lossy().to_string(), fe.into_music_entry()))
|
|
.collect();
|
|
self.common_data.text_messages.warnings.extend(warnings);
|
|
debug!("check_files - Found {} music files.", self.music_to_check.len());
|
|
true
|
|
}
|
|
|
|
DirTraversalResult::Stopped => false,
|
|
}
|
|
}
|
|
|
|
#[fun_time(message = "load_cache", level = "debug")]
|
|
fn load_cache(&mut self, checking_tags: bool) -> (BTreeMap<String, MusicEntry>, BTreeMap<String, MusicEntry>, BTreeMap<String, MusicEntry>) {
|
|
let loaded_hash_map;
|
|
|
|
let mut records_already_cached: BTreeMap<String, MusicEntry> = Default::default();
|
|
let mut non_cached_files_to_check: BTreeMap<String, MusicEntry> = Default::default();
|
|
|
|
if self.common_data.use_cache {
|
|
let (messages, loaded_items) =
|
|
load_cache_from_file_generalized_by_path::<MusicEntry>(&get_similar_music_cache_file(checking_tags), self.get_delete_outdated_cache(), &self.music_to_check);
|
|
self.get_text_messages_mut().extend_with_another_messages(messages);
|
|
loaded_hash_map = loaded_items.unwrap_or_default();
|
|
|
|
debug!("load_cache - Starting to check for differences");
|
|
for (name, file_entry) in mem::take(&mut self.music_to_check) {
|
|
if let Some(cached_file_entry) = loaded_hash_map.get(&name) {
|
|
records_already_cached.insert(name, cached_file_entry.clone());
|
|
} else {
|
|
non_cached_files_to_check.insert(name, file_entry);
|
|
}
|
|
}
|
|
debug!(
|
|
"load_cache - completed diff between loaded and prechecked files, {}({}) - non cached, {}({}) - already cached",
|
|
non_cached_files_to_check.len(),
|
|
format_size(non_cached_files_to_check.values().map(|e| e.size).sum::<u64>(), BINARY),
|
|
records_already_cached.len(),
|
|
format_size(records_already_cached.values().map(|e| e.size).sum::<u64>(), BINARY),
|
|
);
|
|
} else {
|
|
loaded_hash_map = Default::default();
|
|
mem::swap(&mut self.music_to_check, &mut non_cached_files_to_check);
|
|
}
|
|
(loaded_hash_map, records_already_cached, non_cached_files_to_check)
|
|
}
|
|
|
|
#[fun_time(message = "save_cache", level = "debug")]
|
|
fn save_cache(&mut self, vec_file_entry: Vec<MusicEntry>, loaded_hash_map: BTreeMap<String, MusicEntry>, checking_tags: bool) {
|
|
if !self.common_data.use_cache {
|
|
return;
|
|
}
|
|
// Must save all results to file, old loaded from file with all currently counted results
|
|
let mut all_results: BTreeMap<String, MusicEntry> = loaded_hash_map;
|
|
|
|
for file_entry in vec_file_entry {
|
|
all_results.insert(file_entry.path.to_string_lossy().to_string(), file_entry);
|
|
}
|
|
|
|
let messages = save_cache_to_file_generalized(&get_similar_music_cache_file(checking_tags), &all_results, self.common_data.save_also_as_json, 0);
|
|
self.get_text_messages_mut().extend_with_another_messages(messages);
|
|
}
|
|
|
|
#[fun_time(message = "calculate_fingerprint", level = "debug")]
|
|
fn calculate_fingerprint(&mut self, stop_receiver: Option<&Receiver<()>>, progress_sender: Option<&Sender<ProgressData>>) -> bool {
|
|
let (progress_thread_handle, progress_thread_run, _atomic_counter, _check_was_stopped) =
|
|
prepare_thread_handler_common(progress_sender, 1, MAX_STAGE_CONTENT, 0, self.check_type, self.common_data.tool_type);
|
|
|
|
let (loaded_hash_map, records_already_cached, non_cached_files_to_check) = self.load_cache(false);
|
|
|
|
send_info_and_wait_for_ending_all_threads(&progress_thread_run, progress_thread_handle);
|
|
if check_if_stop_received(stop_receiver) {
|
|
return false;
|
|
}
|
|
|
|
let (progress_thread_handle, progress_thread_run, atomic_counter, check_was_stopped) = prepare_thread_handler_common(
|
|
progress_sender,
|
|
2,
|
|
MAX_STAGE_CONTENT,
|
|
non_cached_files_to_check.len(),
|
|
self.check_type,
|
|
self.common_data.tool_type,
|
|
);
|
|
let configuration = &self.hash_preset_config;
|
|
|
|
debug!("calculate_fingerprint - starting fingerprinting");
|
|
let mut vec_file_entry = non_cached_files_to_check
|
|
.into_par_iter()
|
|
.map(|(path, mut music_entry)| {
|
|
atomic_counter.fetch_add(1, Ordering::Relaxed);
|
|
if check_if_stop_received(stop_receiver) {
|
|
check_was_stopped.store(true, Ordering::Relaxed);
|
|
return None;
|
|
}
|
|
|
|
let Ok(fingerprint) = calc_fingerprint_helper(path, configuration) else {
|
|
return Some(None);
|
|
};
|
|
music_entry.fingerprint = fingerprint;
|
|
|
|
Some(Some(music_entry))
|
|
})
|
|
.while_some()
|
|
.filter(Option::is_some)
|
|
.map(Option::unwrap)
|
|
.collect::<Vec<_>>();
|
|
debug!("calculate_fingerprint - ended fingerprinting");
|
|
|
|
send_info_and_wait_for_ending_all_threads(&progress_thread_run, progress_thread_handle);
|
|
let (progress_thread_handle, progress_thread_run, _atomic_counter, _check_was_stopped) =
|
|
prepare_thread_handler_common(progress_sender, 3, MAX_STAGE_CONTENT, 0, self.check_type, self.common_data.tool_type);
|
|
|
|
// Just connect loaded results with already calculated
|
|
vec_file_entry.extend(records_already_cached.into_values());
|
|
|
|
self.music_entries = vec_file_entry.clone();
|
|
|
|
self.save_cache(vec_file_entry, loaded_hash_map, false);
|
|
|
|
// Break if stop was clicked after saving to cache
|
|
send_info_and_wait_for_ending_all_threads(&progress_thread_run, progress_thread_handle);
|
|
if check_was_stopped.load(Ordering::Relaxed) || check_if_stop_received(stop_receiver) {
|
|
return false;
|
|
}
|
|
true
|
|
}
|
|
|
|
#[fun_time(message = "read_tags", level = "debug")]
|
|
fn read_tags(&mut self, stop_receiver: Option<&Receiver<()>>, progress_sender: Option<&Sender<ProgressData>>) -> bool {
|
|
let (progress_thread_handle, progress_thread_run, _atomic_counter, _check_was_stopped) =
|
|
prepare_thread_handler_common(progress_sender, 1, MAX_STAGE_TAGS, 0, self.check_type, self.common_data.tool_type);
|
|
|
|
let (loaded_hash_map, records_already_cached, non_cached_files_to_check) = self.load_cache(true);
|
|
|
|
send_info_and_wait_for_ending_all_threads(&progress_thread_run, progress_thread_handle);
|
|
if check_if_stop_received(stop_receiver) {
|
|
return false;
|
|
}
|
|
|
|
let (progress_thread_handle, progress_thread_run, atomic_counter, check_was_stopped) = prepare_thread_handler_common(
|
|
progress_sender,
|
|
2,
|
|
MAX_STAGE_TAGS,
|
|
non_cached_files_to_check.len(),
|
|
self.check_type,
|
|
self.common_data.tool_type,
|
|
);
|
|
|
|
debug!("read_tags - starting reading tags");
|
|
// Clean for duplicate files
|
|
let mut vec_file_entry = non_cached_files_to_check
|
|
.into_par_iter()
|
|
.map(|(path, mut music_entry)| {
|
|
atomic_counter.fetch_add(1, Ordering::Relaxed);
|
|
if check_if_stop_received(stop_receiver) {
|
|
check_was_stopped.store(true, Ordering::Relaxed);
|
|
return None;
|
|
}
|
|
if read_single_file_tag(&path, &mut music_entry) {
|
|
Some(Some(music_entry))
|
|
} else {
|
|
Some(None)
|
|
}
|
|
})
|
|
.while_some()
|
|
.filter(Option::is_some)
|
|
.map(Option::unwrap)
|
|
.collect::<Vec<_>>();
|
|
debug!("read_tags - ended reading tags");
|
|
|
|
send_info_and_wait_for_ending_all_threads(&progress_thread_run, progress_thread_handle);
|
|
let (progress_thread_handle, progress_thread_run, _atomic_counter, _check_was_stopped) =
|
|
prepare_thread_handler_common(progress_sender, 3, MAX_STAGE_TAGS, 0, self.check_type, self.common_data.tool_type);
|
|
|
|
// Just connect loaded results with already calculated
|
|
vec_file_entry.extend(records_already_cached.into_values());
|
|
|
|
self.music_entries = vec_file_entry.clone();
|
|
|
|
self.save_cache(vec_file_entry, loaded_hash_map, true);
|
|
|
|
// Break if stop was clicked after saving to cache
|
|
send_info_and_wait_for_ending_all_threads(&progress_thread_run, progress_thread_handle);
|
|
if check_was_stopped.load(Ordering::Relaxed) {
|
|
return false;
|
|
}
|
|
|
|
true
|
|
}
|
|
|
|
#[fun_time(message = "check_for_duplicate_tags", level = "debug")]
|
|
fn check_for_duplicate_tags(&mut self, stop_receiver: Option<&Receiver<()>>, progress_sender: Option<&Sender<ProgressData>>) -> bool {
|
|
let (progress_thread_handle, progress_thread_run, atomic_counter, _check_was_stopped) =
|
|
prepare_thread_handler_common(progress_sender, 4, MAX_STAGE_TAGS, self.music_to_check.len(), self.check_type, self.common_data.tool_type);
|
|
|
|
let mut old_duplicates: Vec<Vec<MusicEntry>> = vec![self.music_entries.clone()];
|
|
let mut new_duplicates: Vec<Vec<MusicEntry>> = Vec::new();
|
|
|
|
if (self.music_similarity & MusicSimilarity::TRACK_TITLE) == MusicSimilarity::TRACK_TITLE {
|
|
if check_if_stop_received(stop_receiver) {
|
|
send_info_and_wait_for_ending_all_threads(&progress_thread_run, progress_thread_handle);
|
|
return false;
|
|
}
|
|
|
|
old_duplicates = self.check_music_item(old_duplicates, &atomic_counter, |fe| &fe.track_title, self.approximate_comparison);
|
|
}
|
|
if (self.music_similarity & MusicSimilarity::TRACK_ARTIST) == MusicSimilarity::TRACK_ARTIST {
|
|
if check_if_stop_received(stop_receiver) {
|
|
send_info_and_wait_for_ending_all_threads(&progress_thread_run, progress_thread_handle);
|
|
return false;
|
|
}
|
|
|
|
old_duplicates = self.check_music_item(old_duplicates, &atomic_counter, |fe| &fe.track_artist, self.approximate_comparison);
|
|
}
|
|
if (self.music_similarity & MusicSimilarity::YEAR) == MusicSimilarity::YEAR {
|
|
if check_if_stop_received(stop_receiver) {
|
|
send_info_and_wait_for_ending_all_threads(&progress_thread_run, progress_thread_handle);
|
|
return false;
|
|
}
|
|
|
|
old_duplicates = self.check_music_item(old_duplicates, &atomic_counter, |fe| &fe.year, false);
|
|
}
|
|
if (self.music_similarity & MusicSimilarity::LENGTH) == MusicSimilarity::LENGTH {
|
|
if check_if_stop_received(stop_receiver) {
|
|
send_info_and_wait_for_ending_all_threads(&progress_thread_run, progress_thread_handle);
|
|
return false;
|
|
}
|
|
|
|
old_duplicates = self.check_music_item(old_duplicates, &atomic_counter, |fe| &fe.length, false);
|
|
}
|
|
if (self.music_similarity & MusicSimilarity::GENRE) == MusicSimilarity::GENRE {
|
|
if check_if_stop_received(stop_receiver) {
|
|
send_info_and_wait_for_ending_all_threads(&progress_thread_run, progress_thread_handle);
|
|
return false;
|
|
}
|
|
|
|
old_duplicates = self.check_music_item(old_duplicates, &atomic_counter, |fe| &fe.genre, false);
|
|
}
|
|
if (self.music_similarity & MusicSimilarity::BITRATE) == MusicSimilarity::BITRATE {
|
|
if check_if_stop_received(stop_receiver) {
|
|
send_info_and_wait_for_ending_all_threads(&progress_thread_run, progress_thread_handle);
|
|
return false;
|
|
}
|
|
let old_duplicates_len = old_duplicates.len();
|
|
for vec_file_entry in old_duplicates {
|
|
let mut hash_map: BTreeMap<String, Vec<MusicEntry>> = Default::default();
|
|
for file_entry in vec_file_entry {
|
|
if file_entry.bitrate != 0 {
|
|
let thing = file_entry.bitrate.to_string();
|
|
if !thing.is_empty() {
|
|
hash_map.entry(thing.clone()).or_default().push(file_entry);
|
|
}
|
|
}
|
|
}
|
|
for (_title, vec_file_entry) in hash_map {
|
|
if vec_file_entry.len() > 1 {
|
|
new_duplicates.push(vec_file_entry);
|
|
}
|
|
}
|
|
}
|
|
atomic_counter.fetch_add(old_duplicates_len, Ordering::Relaxed);
|
|
old_duplicates = new_duplicates;
|
|
}
|
|
|
|
send_info_and_wait_for_ending_all_threads(&progress_thread_run, progress_thread_handle);
|
|
|
|
self.duplicated_music_entries = old_duplicates;
|
|
|
|
if self.common_data.use_reference_folders {
|
|
self.duplicated_music_entries_referenced = filter_reference_folders_generic(mem::take(&mut self.duplicated_music_entries), &self.common_data.directories);
|
|
}
|
|
|
|
if self.common_data.use_reference_folders {
|
|
for (_fe, vector) in &self.duplicated_music_entries_referenced {
|
|
self.information.number_of_duplicates += vector.len();
|
|
self.information.number_of_groups += 1;
|
|
}
|
|
} else {
|
|
for vector in &self.duplicated_music_entries {
|
|
self.information.number_of_duplicates += vector.len() - 1;
|
|
self.information.number_of_groups += 1;
|
|
}
|
|
}
|
|
|
|
// Clear unused data
|
|
self.music_entries.clear();
|
|
|
|
true
|
|
}
|
|
#[fun_time(message = "read_tags_to_files_similar_by_content", level = "debug")]
|
|
fn read_tags_to_files_similar_by_content(&mut self, stop_receiver: Option<&Receiver<()>>, progress_sender: Option<&Sender<ProgressData>>) -> bool {
|
|
let groups_to_check = max(self.duplicated_music_entries.len(), self.duplicated_music_entries_referenced.len());
|
|
let (progress_thread_handle, progress_thread_run, atomic_counter, check_was_stopped) =
|
|
prepare_thread_handler_common(progress_sender, 5, MAX_STAGE_CONTENT, groups_to_check, self.check_type, self.common_data.tool_type);
|
|
|
|
if !self.duplicated_music_entries.is_empty() {
|
|
let _: Vec<_> = self
|
|
.duplicated_music_entries
|
|
.par_iter_mut()
|
|
.map(|vec_me| {
|
|
atomic_counter.fetch_add(1, Ordering::Relaxed);
|
|
if check_if_stop_received(stop_receiver) {
|
|
check_was_stopped.store(true, Ordering::Relaxed);
|
|
return None;
|
|
}
|
|
for me in vec_me {
|
|
let me_path = me.path.to_string_lossy().to_string();
|
|
read_single_file_tag(&me_path, me);
|
|
}
|
|
Some(())
|
|
})
|
|
.while_some()
|
|
.collect();
|
|
} else {
|
|
let _: Vec<_> = self
|
|
.duplicated_music_entries_referenced
|
|
.par_iter_mut()
|
|
.map(|(me_o, vec_me)| {
|
|
atomic_counter.fetch_add(1, Ordering::Relaxed);
|
|
if check_if_stop_received(stop_receiver) {
|
|
check_was_stopped.store(true, Ordering::Relaxed);
|
|
return None;
|
|
}
|
|
let me_o_path = me_o.path.to_string_lossy().to_string();
|
|
read_single_file_tag(&me_o_path, me_o);
|
|
for me in vec_me {
|
|
let me_path = me.path.to_string_lossy().to_string();
|
|
read_single_file_tag(&me_path, me);
|
|
}
|
|
Some(())
|
|
})
|
|
.while_some()
|
|
.collect();
|
|
}
|
|
|
|
send_info_and_wait_for_ending_all_threads(&progress_thread_run, progress_thread_handle);
|
|
|
|
!check_was_stopped.load(Ordering::Relaxed)
|
|
}
|
|
|
|
fn split_fingerprints_to_check(&mut self) -> (Vec<MusicEntry>, Vec<MusicEntry>) {
|
|
let base_files: Vec<MusicEntry>;
|
|
let files_to_compare: Vec<MusicEntry>;
|
|
|
|
if self.common_data.use_reference_folders {
|
|
(base_files, files_to_compare) = mem::take(&mut self.music_entries)
|
|
.into_iter()
|
|
.partition(|f| self.common_data.directories.is_in_referenced_directory(f.get_path()));
|
|
} else {
|
|
base_files = self.music_entries.clone();
|
|
files_to_compare = mem::take(&mut self.music_entries);
|
|
}
|
|
|
|
(base_files, files_to_compare)
|
|
}
|
|
|
|
#[fun_time(message = "compare_fingerprints", level = "debug")]
|
|
fn compare_fingerprints(
|
|
&mut self,
|
|
stop_receiver: Option<&Receiver<()>>,
|
|
atomic_counter: &Arc<AtomicUsize>,
|
|
base_files: Vec<MusicEntry>,
|
|
files_to_compare: &[MusicEntry],
|
|
) -> Option<Vec<Vec<MusicEntry>>> {
|
|
let mut used_paths: HashSet<String> = Default::default();
|
|
|
|
let configuration = &self.hash_preset_config;
|
|
let minimum_segment_duration = self.minimum_segment_duration;
|
|
let maximum_difference = self.maximum_difference;
|
|
|
|
let mut duplicated_music_entries = Vec::new();
|
|
|
|
for f_entry in base_files {
|
|
atomic_counter.fetch_add(1, Ordering::Relaxed);
|
|
if check_if_stop_received(stop_receiver) {
|
|
return None;
|
|
}
|
|
|
|
let f_string = f_entry.path.to_string_lossy().to_string();
|
|
if used_paths.contains(&f_string) {
|
|
continue;
|
|
}
|
|
|
|
let mut collected_similar_items = files_to_compare
|
|
.par_iter()
|
|
.filter_map(|e_entry| {
|
|
let e_string = e_entry.path.to_string_lossy().to_string();
|
|
if used_paths.contains(&e_string) || e_string == f_string {
|
|
return None;
|
|
}
|
|
let mut segments = match_fingerprints(&f_entry.fingerprint, &e_entry.fingerprint, configuration).unwrap();
|
|
segments.retain(|s| s.duration(configuration) > minimum_segment_duration && s.score < maximum_difference);
|
|
if segments.is_empty() {
|
|
None
|
|
} else {
|
|
Some((e_string, e_entry))
|
|
}
|
|
})
|
|
.collect::<Vec<_>>();
|
|
|
|
collected_similar_items.retain(|(path, _entry)| !used_paths.contains(path));
|
|
if !collected_similar_items.is_empty() {
|
|
let mut music_entries = Vec::new();
|
|
for (path, entry) in collected_similar_items {
|
|
used_paths.insert(path);
|
|
music_entries.push(entry.clone());
|
|
}
|
|
used_paths.insert(f_string);
|
|
music_entries.push(f_entry);
|
|
duplicated_music_entries.push(music_entries);
|
|
}
|
|
}
|
|
Some(duplicated_music_entries)
|
|
}
|
|
|
|
#[fun_time(message = "check_for_duplicate_fingerprints", level = "debug")]
|
|
fn check_for_duplicate_fingerprints(&mut self, stop_receiver: Option<&Receiver<()>>, progress_sender: Option<&Sender<ProgressData>>) -> bool {
|
|
let (base_files, files_to_compare) = self.split_fingerprints_to_check();
|
|
let (progress_thread_handle, progress_thread_run, atomic_counter, _check_was_stopped) =
|
|
prepare_thread_handler_common(progress_sender, 2, 3, base_files.len(), self.check_type, self.common_data.tool_type);
|
|
|
|
let Some(duplicated_music_entries) = self.compare_fingerprints(stop_receiver, &atomic_counter, base_files, &files_to_compare) else {
|
|
send_info_and_wait_for_ending_all_threads(&progress_thread_run, progress_thread_handle);
|
|
return false;
|
|
};
|
|
|
|
send_info_and_wait_for_ending_all_threads(&progress_thread_run, progress_thread_handle);
|
|
|
|
self.duplicated_music_entries = duplicated_music_entries;
|
|
|
|
if self.common_data.use_reference_folders {
|
|
self.duplicated_music_entries_referenced = filter_reference_folders_generic(mem::take(&mut self.duplicated_music_entries), &self.common_data.directories);
|
|
}
|
|
|
|
if self.common_data.use_reference_folders {
|
|
for (_fe, vector) in &self.duplicated_music_entries_referenced {
|
|
self.information.number_of_duplicates += vector.len();
|
|
self.information.number_of_groups += 1;
|
|
}
|
|
} else {
|
|
for vector in &self.duplicated_music_entries {
|
|
self.information.number_of_duplicates += vector.len() - 1;
|
|
self.information.number_of_groups += 1;
|
|
}
|
|
}
|
|
|
|
// Clear unused data
|
|
self.music_entries.clear();
|
|
|
|
true
|
|
}
|
|
|
|
#[fun_time(message = "check_music_item", level = "debug")]
|
|
fn check_music_item(
|
|
&self,
|
|
old_duplicates: Vec<Vec<MusicEntry>>,
|
|
atomic_counter: &Arc<AtomicUsize>,
|
|
get_item: fn(&MusicEntry) -> &str,
|
|
approximate_comparison: bool,
|
|
) -> Vec<Vec<MusicEntry>> {
|
|
let mut new_duplicates: Vec<_> = Default::default();
|
|
let old_duplicates_len = old_duplicates.len();
|
|
for vec_file_entry in old_duplicates {
|
|
let mut hash_map: BTreeMap<String, Vec<MusicEntry>> = Default::default();
|
|
for file_entry in vec_file_entry {
|
|
let mut thing = get_item(&file_entry).trim().to_lowercase();
|
|
if approximate_comparison {
|
|
get_approximate_conversion(&mut thing);
|
|
}
|
|
if !thing.is_empty() {
|
|
hash_map.entry(thing).or_default().push(file_entry);
|
|
}
|
|
}
|
|
for (_title, vec_file_entry) in hash_map {
|
|
if vec_file_entry.len() > 1 {
|
|
new_duplicates.push(vec_file_entry);
|
|
}
|
|
}
|
|
}
|
|
atomic_counter.fetch_add(old_duplicates_len, Ordering::Relaxed);
|
|
|
|
new_duplicates
|
|
}
|
|
|
|
#[fun_time(message = "delete_files", level = "debug")]
|
|
fn delete_files(&mut self) {
|
|
if self.common_data.delete_method == DeleteMethod::None {
|
|
return;
|
|
}
|
|
|
|
let vec_files = self.duplicated_music_entries.iter().collect::<Vec<_>>();
|
|
delete_files_custom(&vec_files, &self.common_data.delete_method, &mut self.common_data.text_messages, self.common_data.dry_run);
|
|
}
|
|
}
|
|
|
|
impl SameMusic {
|
|
pub const fn get_duplicated_music_entries(&self) -> &Vec<Vec<MusicEntry>> {
|
|
&self.duplicated_music_entries
|
|
}
|
|
|
|
pub const fn get_music_similarity(&self) -> &MusicSimilarity {
|
|
&self.music_similarity
|
|
}
|
|
|
|
pub const fn get_information(&self) -> &Info {
|
|
&self.information
|
|
}
|
|
|
|
pub fn set_approximate_comparison(&mut self, approximate_comparison: bool) {
|
|
self.approximate_comparison = approximate_comparison;
|
|
}
|
|
|
|
pub fn set_maximum_difference(&mut self, maximum_difference: f64) {
|
|
self.maximum_difference = maximum_difference;
|
|
}
|
|
pub fn set_minimum_segment_duration(&mut self, minimum_segment_duration: f32) {
|
|
self.minimum_segment_duration = minimum_segment_duration;
|
|
}
|
|
|
|
pub fn set_check_type(&mut self, check_type: CheckingMethod) {
|
|
assert!([CheckingMethod::AudioTags, CheckingMethod::AudioContent].contains(&check_type));
|
|
self.check_type = check_type;
|
|
}
|
|
|
|
pub fn get_check_type(&self) -> CheckingMethod {
|
|
self.check_type
|
|
}
|
|
|
|
pub fn set_music_similarity(&mut self, music_similarity: MusicSimilarity) {
|
|
self.music_similarity = music_similarity;
|
|
}
|
|
|
|
pub fn get_similar_music_referenced(&self) -> &Vec<(MusicEntry, Vec<MusicEntry>)> {
|
|
&self.duplicated_music_entries_referenced
|
|
}
|
|
|
|
pub fn get_number_of_base_duplicated_files(&self) -> usize {
|
|
if self.common_data.use_reference_folders {
|
|
self.duplicated_music_entries_referenced.len()
|
|
} else {
|
|
self.duplicated_music_entries.len()
|
|
}
|
|
}
|
|
|
|
pub fn get_use_reference(&self) -> bool {
|
|
self.common_data.use_reference_folders
|
|
}
|
|
}
|
|
|
|
// TODO this should be taken from rusty-chromaprint repo, not reimplemented here
|
|
fn calc_fingerprint_helper(path: impl AsRef<Path>, config: &Configuration) -> anyhow::Result<Vec<u32>> {
|
|
let path = path.as_ref();
|
|
let src = File::open(path).context("failed to open file")?;
|
|
let mss = MediaSourceStream::new(Box::new(src), Default::default());
|
|
|
|
let mut hint = Hint::new();
|
|
if let Some(ext) = path.extension().and_then(std::ffi::OsStr::to_str) {
|
|
hint.with_extension(ext);
|
|
}
|
|
|
|
let meta_opts: MetadataOptions = Default::default();
|
|
let fmt_opts: FormatOptions = Default::default();
|
|
|
|
let probed = symphonia::default::get_probe().format(&hint, mss, &fmt_opts, &meta_opts).context("unsupported format")?;
|
|
|
|
let mut format = probed.format;
|
|
|
|
let track = format
|
|
.tracks()
|
|
.iter()
|
|
.find(|t| t.codec_params.codec != CODEC_TYPE_NULL)
|
|
.context("no supported audio tracks")?;
|
|
|
|
let dec_opts: DecoderOptions = Default::default();
|
|
|
|
let mut decoder = symphonia::default::get_codecs().make(&track.codec_params, &dec_opts).context("unsupported codec")?;
|
|
|
|
let track_id = track.id;
|
|
|
|
let mut printer = Fingerprinter::new(config);
|
|
let sample_rate = track.codec_params.sample_rate.context("missing sample rate")?;
|
|
let channels = track.codec_params.channels.context("missing audio channels")?.count() as u32;
|
|
printer.start(sample_rate, channels).context("initializing fingerprinter")?;
|
|
|
|
let mut sample_buf = None;
|
|
|
|
loop {
|
|
let Ok(packet) = format.next_packet() else {
|
|
break;
|
|
};
|
|
|
|
if packet.track_id() != track_id {
|
|
continue;
|
|
}
|
|
|
|
match decoder.decode(&packet) {
|
|
Ok(audio_buf) => {
|
|
if sample_buf.is_none() {
|
|
let spec = *audio_buf.spec();
|
|
let duration = audio_buf.capacity() as u64;
|
|
sample_buf = Some(SampleBuffer::<i16>::new(duration, spec));
|
|
}
|
|
|
|
if let Some(buf) = &mut sample_buf {
|
|
buf.copy_interleaved_ref(audio_buf);
|
|
printer.consume(buf.samples());
|
|
}
|
|
}
|
|
Err(symphonia::core::errors::Error::DecodeError(_)) => (),
|
|
Err(_) => break,
|
|
}
|
|
}
|
|
|
|
printer.finish();
|
|
Ok(printer.fingerprint().to_vec())
|
|
}
|
|
|
|
fn read_single_file_tag(path: &str, music_entry: &mut MusicEntry) -> bool {
|
|
let Ok(mut file) = File::open(path) else {
|
|
return false;
|
|
};
|
|
|
|
let Ok(possible_tagged_file) = panic::catch_unwind(move || {
|
|
match read_from(&mut file) {
|
|
Ok(t) => Some(t),
|
|
Err(_inspected) => {
|
|
// println!("Failed to open {}", path);
|
|
None
|
|
}
|
|
}
|
|
}) else {
|
|
let message = create_crash_message("Lofty", path, "https://github.com/image-rs/image/issues");
|
|
println!("{message}");
|
|
return false;
|
|
};
|
|
|
|
let Some(tagged_file) = possible_tagged_file else { return true };
|
|
|
|
let properties = tagged_file.properties();
|
|
|
|
let mut track_title = String::new();
|
|
let mut track_artist = String::new();
|
|
let mut year = String::new();
|
|
let mut genre = String::new();
|
|
|
|
let bitrate = properties.audio_bitrate().unwrap_or(0);
|
|
let mut length = properties.duration().as_millis().to_string();
|
|
|
|
if let Some(tag) = tagged_file.primary_tag() {
|
|
track_title = tag.get_string(&ItemKey::TrackTitle).unwrap_or("").to_string();
|
|
track_artist = tag.get_string(&ItemKey::TrackArtist).unwrap_or("").to_string();
|
|
year = tag.get_string(&ItemKey::Year).unwrap_or("").to_string();
|
|
genre = tag.get_string(&ItemKey::Genre).unwrap_or("").to_string();
|
|
}
|
|
|
|
for tag in tagged_file.tags() {
|
|
if track_title.is_empty() {
|
|
if let Some(tag_value) = tag.get_string(&ItemKey::TrackTitle) {
|
|
track_title = tag_value.to_string();
|
|
}
|
|
}
|
|
if track_artist.is_empty() {
|
|
if let Some(tag_value) = tag.get_string(&ItemKey::TrackArtist) {
|
|
track_artist = tag_value.to_string();
|
|
}
|
|
}
|
|
if year.is_empty() {
|
|
if let Some(tag_value) = tag.get_string(&ItemKey::Year) {
|
|
year = tag_value.to_string();
|
|
}
|
|
}
|
|
if genre.is_empty() {
|
|
if let Some(tag_value) = tag.get_string(&ItemKey::Genre) {
|
|
genre = tag_value.to_string();
|
|
}
|
|
}
|
|
}
|
|
|
|
if let Ok(old_length_number) = length.parse::<u32>() {
|
|
let length_number = old_length_number / 60;
|
|
let minutes = length_number / 1000;
|
|
let seconds = (length_number % 1000) * 6 / 100;
|
|
if minutes != 0 || seconds != 0 {
|
|
length = format!("{minutes}:{seconds:02}");
|
|
} else if old_length_number > 0 {
|
|
// That means, that audio have length smaller that second, but length was properly read
|
|
length = "0:01".to_string();
|
|
} else {
|
|
length = String::new();
|
|
}
|
|
} else {
|
|
length = String::new();
|
|
}
|
|
|
|
music_entry.track_title = track_title;
|
|
music_entry.track_artist = track_artist;
|
|
music_entry.year = year;
|
|
music_entry.length = length;
|
|
music_entry.genre = genre;
|
|
music_entry.bitrate = bitrate;
|
|
|
|
true
|
|
}
|
|
|
|
impl Default for SameMusic {
|
|
fn default() -> Self {
|
|
Self::new()
|
|
}
|
|
}
|
|
|
|
impl DebugPrint for SameMusic {
|
|
#[fun_time(message = "debug_print", level = "debug")]
|
|
fn debug_print(&self) {
|
|
if !cfg!(debug_assertions) {
|
|
return;
|
|
}
|
|
println!("---------------DEBUG PRINT---------------");
|
|
println!("Found files music - {}", self.music_entries.len());
|
|
println!("Found duplicated files music - {}", self.duplicated_music_entries.len());
|
|
self.debug_print_common();
|
|
println!("-----------------------------------------");
|
|
}
|
|
}
|
|
|
|
impl PrintResults for SameMusic {
|
|
fn write_results<T: Write>(&self, writer: &mut T) -> std::io::Result<()> {
|
|
if !self.duplicated_music_entries.is_empty() {
|
|
writeln!(writer, "{} music files which have similar friends\n\n.", self.duplicated_music_entries.len())?;
|
|
|
|
for vec_file_entry in &self.duplicated_music_entries {
|
|
writeln!(writer, "Found {} music files which have similar friends", vec_file_entry.len())?;
|
|
for file_entry in vec_file_entry {
|
|
writeln!(
|
|
writer,
|
|
"TT: {} - TA: {} - Y: {} - L: {} - G: {} - B: {} - P: {:?}",
|
|
file_entry.track_title, file_entry.track_artist, file_entry.year, file_entry.length, file_entry.genre, file_entry.bitrate, file_entry.path
|
|
)?;
|
|
}
|
|
writeln!(writer)?;
|
|
}
|
|
} else if !self.duplicated_music_entries_referenced.is_empty() {
|
|
writeln!(writer, "{} music files which have similar friends\n\n.", self.duplicated_music_entries_referenced.len())?;
|
|
for (file_entry, vec_file_entry) in &self.duplicated_music_entries_referenced {
|
|
writeln!(writer, "Found {} music files which have similar friends", vec_file_entry.len())?;
|
|
writeln!(writer)?;
|
|
writeln!(
|
|
writer,
|
|
"TT: {} - TA: {} - Y: {} - L: {} - G: {} - B: {} - P: {:?}",
|
|
file_entry.track_title, file_entry.track_artist, file_entry.year, file_entry.length, file_entry.genre, file_entry.bitrate, file_entry.path
|
|
)?;
|
|
for file_entry in vec_file_entry {
|
|
writeln!(
|
|
writer,
|
|
"TT: {} - TA: {} - Y: {} - L: {} - G: {} - B: {} - P: {:?}",
|
|
file_entry.track_title, file_entry.track_artist, file_entry.year, file_entry.length, file_entry.genre, file_entry.bitrate, file_entry.path
|
|
)?;
|
|
}
|
|
writeln!(writer)?;
|
|
}
|
|
} else {
|
|
write!(writer, "Not found any similar music files.")?;
|
|
}
|
|
|
|
Ok(())
|
|
}
|
|
|
|
fn save_results_to_file_as_json(&self, file_name: &str, pretty_print: bool) -> std::io::Result<()> {
|
|
if self.get_use_reference() {
|
|
self.save_results_to_file_as_json_internal(file_name, &self.duplicated_music_entries_referenced, pretty_print)
|
|
} else {
|
|
self.save_results_to_file_as_json_internal(file_name, &self.duplicated_music_entries, pretty_print)
|
|
}
|
|
}
|
|
}
|
|
|
|
fn get_approximate_conversion(what: &mut String) {
|
|
let mut new_what = String::with_capacity(what.len());
|
|
let mut tab_number = 0;
|
|
let mut space_before = true;
|
|
for character in what.chars() {
|
|
match character {
|
|
'(' | '[' => {
|
|
tab_number += 1;
|
|
}
|
|
')' | ']' => {
|
|
if tab_number == 0 {
|
|
// Nothing to do, not even save it to output
|
|
} else {
|
|
tab_number -= 1;
|
|
}
|
|
}
|
|
' ' => {
|
|
if !space_before {
|
|
new_what.push(' ');
|
|
space_before = true;
|
|
}
|
|
}
|
|
ch => {
|
|
if tab_number == 0 {
|
|
// Ignore all non alphabetic ascii characters like " or .
|
|
if !ch.is_ascii() || ch.is_ascii_alphabetic() {
|
|
space_before = false;
|
|
new_what.push(ch);
|
|
} else if !space_before {
|
|
new_what.push(' ');
|
|
space_before = true;
|
|
}
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
if new_what.ends_with(' ') {
|
|
new_what.pop();
|
|
}
|
|
*what = new_what;
|
|
}
|
|
|
|
impl CommonData for SameMusic {
|
|
fn get_cd(&self) -> &CommonToolData {
|
|
&self.common_data
|
|
}
|
|
fn get_cd_mut(&mut self) -> &mut CommonToolData {
|
|
&mut self.common_data
|
|
}
|
|
}
|
|
|
|
#[cfg(test)]
|
|
mod tests {
|
|
use crate::same_music::get_approximate_conversion;
|
|
|
|
#[test]
|
|
fn test_strings() {
|
|
let mut what = "roman ( ziemniak ) ".to_string();
|
|
get_approximate_conversion(&mut what);
|
|
assert_eq!(what, "roman");
|
|
|
|
let mut what = " HH) ".to_string();
|
|
get_approximate_conversion(&mut what);
|
|
assert_eq!(what, "HH");
|
|
|
|
let mut what = " fsf.f. ".to_string();
|
|
get_approximate_conversion(&mut what);
|
|
assert_eq!(what, "fsf f");
|
|
|
|
let mut what = "Kekistan (feat. roman) [Mix on Mix]".to_string();
|
|
get_approximate_conversion(&mut what);
|
|
assert_eq!(what, "Kekistan");
|
|
}
|
|
}
|