From 269f6b36ec829b16991362a1b5d6cdac3c5b26d0 Mon Sep 17 00:00:00 2001 From: Andrew Beveridge Date: Mon, 3 Mar 2025 00:34:31 -0500 Subject: [PATCH] Began adding new scores --- audio_separator/models-scores.json | 59 ++++++++- tests/model-metrics/test-all-models.py | 171 ++++++++++++++++++------- 2 files changed, 179 insertions(+), 51 deletions(-) diff --git a/audio_separator/models-scores.json b/audio_separator/models-scores.json index 009a194..b896a27 100644 --- a/audio_separator/models-scores.json +++ b/audio_separator/models-scores.json @@ -45778,8 +45778,23 @@ }, "model_bs_roformer_ep_937_sdr_10.5309.ckpt": { "model_name": "Roformer Model: BS-Roformer-Viperx-1053", - "track_scores": [], - "median_scores": {}, + "track_scores": [ + { + "track_name": "A Classic Education - NightOwl", + "scores": { + "seconds_per_minute_m3": 13.3 + } + }, + { + "track_name": "A Classic Education - NightOwl", + "scores": { + "seconds_per_minute_m3": 13.3 + } + } + ], + "median_scores": { + "seconds_per_minute_m3": 13.3 + }, "stems": [ "no drum-bass", "drum-bass" @@ -53525,5 +53540,45 @@ "instrumental" ], "target_stem": null + }, + "mel_band_roformer_karaoke_gabox.ckpt": { + "model_name": "Roformer Model: MelBand Roformer | Karaoke by Gabox", + "track_scores": [ + { + "track_name": "A Classic Education - NightOwl", + "scores": { + "vocals": { + "SDR": 6.88608, + "SIR": 27.7786, + "SAR": 6.37612, + "ISR": 9.25014 + }, + "instrumental": { + "SDR": 16.4048, + "SIR": 21.2481, + "SAR": 19.3112, + "ISR": 19.5408 + }, + "seconds_per_minute_m3": 17.5 + } + } + ], + "median_scores": { + "vocals": { + "SDR": 6.88608, + "SIR": 27.7786, + "SAR": 6.37612, + "ISR": 9.25014 + }, + "instrumental": { + "SDR": 16.4048, + "SIR": 21.2481, + "SAR": 19.3112, + "ISR": 19.5408 + }, + "seconds_per_minute_m3": 17.5 + }, + "stems": [], + "target_stem": null } } \ No newline at end of file diff --git a/tests/model-metrics/test-all-models.py b/tests/model-metrics/test-all-models.py index 9681ced..ac3503b 100644 --- a/tests/model-metrics/test-all-models.py +++ b/tests/model-metrics/test-all-models.py @@ -105,6 +105,13 @@ def evaluate_track(track_name, track_path, test_model, mus_db): track_duration_minutes = get_track_duration(track_path) logger.info(f"Track duration: {track_duration_minutes:.2f} minutes") + # Initialize variables to track processing time + processing_time = 0 + seconds_per_minute = 0 + + # Create a basic result structure that will be returned even if evaluation fails + basic_model_results = {"track_name": track_name, "scores": {}} + # Check if evaluation results already exist in combined file museval_results = load_combined_results() if test_model in museval_results and track_name in museval_results[test_model]: @@ -112,9 +119,35 @@ def evaluate_track(track_name, track_path, test_model, mus_db): track_data = museval_results[test_model][track_name] scores = museval.TrackStore(track_name) scores.scores = track_data + + # Try to extract existing speed metrics if available + try: + if isinstance(track_data, dict) and "targets" in track_data: + for target in track_data["targets"]: + if "metrics" in target and "seconds_per_minute_m3" in target["metrics"]: + basic_model_results["scores"]["seconds_per_minute_m3"] = target["metrics"]["seconds_per_minute_m3"] + break + except Exception: + pass # Ignore errors in extracting existing speed metrics else: - # Expanded stem mapping to include "no-stem" outputs - stem_mapping = {"Vocals": "vocals", "Instrumental": "instrumental", "Drums": "drums", "Bass": "bass", "Other": "other", "No Drums": "nodrums", "No Bass": "nobass", "No Other": "noother"} + # Expanded stem mapping to include "no-stem" outputs and custom stem formats + stem_mapping = { + # Standard stems + "Vocals": "vocals", + "Instrumental": "instrumental", + "Drums": "drums", + "Bass": "bass", + "Other": "other", + # No-stem variants + "No Drums": "nodrums", + "No Bass": "nobass", + "No Other": "noother", + # Custom stem formats (with hyphens) + "Drum-Bass": "drumbass", + "No Drum-Bass": "nodrumbass", + "Vocals-Other": "vocalsother", + "No Vocals-Other": "novocalsother", + } # Create a temporary directory for separation files with tempfile.TemporaryDirectory() as temp_dir: @@ -135,26 +168,41 @@ def evaluate_track(track_name, track_path, test_model, mus_db): logger.info(f"Separation completed in {processing_time:.2f} seconds") logger.info(f"Processing speed: {seconds_per_minute:.2f} seconds per minute of audio") - # Check which stems were actually created and pair them appropriately - available_stems = {} - stem_pairs = {"drums": "nodrums", "bass": "nobass", "other": "noother", "vocals": "instrumental"} + # Always add the speed metric to our basic results + basic_model_results["scores"]["seconds_per_minute_m3"] = round(seconds_per_minute, 1) - for main_stem, no_stem in stem_pairs.items(): - # Construct full file paths for both the isolated stem and its complement - main_path = os.path.join(temp_dir, f"{main_stem}.wav") - no_stem_path = os.path.join(temp_dir, f"{no_stem}.wav") + # Check which stems were actually created + wav_files = [f for f in os.listdir(temp_dir) if f.endswith(".wav")] + logger.info(f"Found WAV files: {wav_files}") - # Only process this pair if both files exist - if os.path.exists(main_path) and os.path.exists(no_stem_path): - # Add the main stem with its path to available_stems - available_stems[main_stem] = main_path # This is already using the correct musdb name + # Determine if this is a standard vocal/instrumental model that can be evaluated with museval + standard_model = False + if len(wav_files) == 2: + # Check if one of the files is named vocals.wav or instrumental.wav + if "vocals.wav" in wav_files and "instrumental.wav" in wav_files: + standard_model = True + logger.info("Detected standard vocals/instrumental model, will run museval evaluation") - # For the complement stem, always use "accompaniment" as that's what museval expects - available_stems["accompaniment"] = no_stem_path + # If not a standard model, skip museval evaluation and just return speed metrics + if not standard_model: + logger.info(f"Non-standard stem configuration detected for model {test_model}, skipping museval evaluation") - if not available_stems: - logger.info(f"No evaluatable stems found for model {test_model}, skipping evaluation") - return None, None + # Store the speed metric in the combined results + if test_model not in museval_results: + museval_results[test_model] = {} + + # Create a minimal structure for the speed metric + minimal_results = {"targets": [{"name": "speed_metrics_only", "metrics": {"seconds_per_minute_m3": round(seconds_per_minute, 1)}}]} + + museval_results[test_model][track_name] = minimal_results + save_combined_results(museval_results) + + return None, basic_model_results + + # For standard models, proceed with museval evaluation + available_stems = {} + available_stems["vocals"] = os.path.join(temp_dir, "vocals.wav") + available_stems["accompaniment"] = os.path.join(temp_dir, "instrumental.wav") # Get track from MUSDB track = next((t for t in mus_db if t.name == track_name), None) @@ -171,39 +219,64 @@ def evaluate_track(track_name, track_path, test_model, mus_db): # Evaluate using museval logger.info(f"Evaluating stems: {list(estimates.keys())}") - scores = museval.eval_mus_track(track, estimates, output_dir=temp_dir, mode="v4") - - # Update the combined results file with the new evaluation - if test_model not in museval_results: - museval_results[test_model] = {} - museval_results[test_model][track_name] = scores.scores - save_combined_results(museval_results) - - # Calculate aggregate scores for available stems - results_store = museval.EvalStore() - results_store.add_track(scores.df) - methods = museval.MethodStore() - methods.add_evalstore(results_store, name=test_model) - agg_scores = methods.agg_frames_tracks_scores() - - # Return the aggregate scores in a structured format with 6 significant figures - model_results = {"track_name": track_name, "scores": {}} - - for stem in ["vocals", "drums", "bass", "other", "accompaniment"]: - try: - stem_scores = {metric: float(f"{agg_scores.loc[(test_model, stem, metric)]:.6g}") for metric in ["SDR", "SIR", "SAR", "ISR"]} - # Rename 'accompaniment' to 'instrumental' in the output - output_stem = "instrumental" if stem == "accompaniment" else stem - model_results["scores"][output_stem] = stem_scores - except KeyError: - continue + try: + scores = museval.eval_mus_track(track, estimates, output_dir=temp_dir, mode="v4") + + # Add the speed metric to the scores + if not hasattr(scores, "speed_metric_added"): + for target in scores.scores["targets"]: + if "metrics" not in target: + target["metrics"] = {} + target["metrics"]["seconds_per_minute_m3"] = round(seconds_per_minute, 1) + scores.speed_metric_added = True + + # Update the combined results file with the new evaluation + if test_model not in museval_results: + museval_results[test_model] = {} + museval_results[test_model][track_name] = scores.scores + save_combined_results(museval_results) + except Exception as e: + logger.error(f"Error during museval evaluation: {str(e)}") + logger.exception("Evaluation exception details:") + # Return basic results with just the speed metric + return None, basic_model_results - # Add the seconds_per_minute_m3 metric if it was calculated - if "processing_time" in locals() and track_duration_minutes > 0: - seconds_per_minute = processing_time / track_duration_minutes - model_results["scores"]["seconds_per_minute_m3"] = round(seconds_per_minute, 1) + try: + # Only process museval results if we have them + if "scores" in locals() and scores is not None: + # Calculate aggregate scores for available stems + results_store = museval.EvalStore() + results_store.add_track(scores.df) + methods = museval.MethodStore() + methods.add_evalstore(results_store, name=test_model) + agg_scores = methods.agg_frames_tracks_scores() + + # Return the aggregate scores in a structured format with 6 significant figures + model_results = {"track_name": track_name, "scores": {}} + + for stem in ["vocals", "drums", "bass", "other", "accompaniment"]: + try: + stem_scores = {metric: float(f"{agg_scores.loc[(test_model, stem, metric)]:.6g}") for metric in ["SDR", "SIR", "SAR", "ISR"]} + # Rename 'accompaniment' to 'instrumental' in the output + output_stem = "instrumental" if stem == "accompaniment" else stem + model_results["scores"][output_stem] = stem_scores + except KeyError: + continue + + # Add the seconds_per_minute_m3 metric if it was calculated + if processing_time > 0 and track_duration_minutes > 0: + model_results["scores"]["seconds_per_minute_m3"] = round(seconds_per_minute, 1) + + return scores, model_results if model_results["scores"] else basic_model_results + else: + # If we don't have scores, just return the basic results with speed metrics + return None, basic_model_results - return scores, model_results if model_results["scores"] else None + except Exception as e: + logger.error(f"Error processing evaluation results: {str(e)}") + logger.exception("Results processing exception details:") + # Return basic results with just the speed metric + return None, basic_model_results def convert_decimal_to_float(obj):