diff --git a/README.md b/README.md index 826023f..6991e54 100644 --- a/README.md +++ b/README.md @@ -39,18 +39,24 @@ Download [model weights](https://huggingface.co/spaces/Audio-AGI/AudioSep/tree/m from pipeline import build_audiosep, inference import torch + # Check if a CUDA-compatible GPU is available, and set the device accordingly device = torch.device('cuda' if torch.cuda.is_available() else 'cpu') + # Build an audio separation model by providing a configuration YAML file, a checkpoint path, + and the chosen device model = build_audiosep( config_yaml='config/audiosep_base.yaml', checkpoint_path='checkpoint/audiosep_base_4M_steps.ckpt', device=device) + # Define the paths and information for audio separation audio_file = 'path_to_audio_file' text = 'textual_description' output_file='separated_audio.wav' - # AudioSep processes the audio at 32 kHz sampling rate + # Note: The 'inference' function processes audio at a 32 kHz sampling rate + # Perform audio separation using the 'inference' function with the specified model, audio + file, text, output file, and device inference(model, audio_file, text, output_file, device) ``` @@ -63,17 +69,25 @@ To load directly from Hugging Face, you can do the following: from utils import get_ss_model import torch + # Check if a CUDA-compatible GPU is available and set the device accordingly device = torch.device('cuda' if torch.cuda.is_available() else 'cpu') + # Get the source separation model configuration from the specified YAML file ss_model = get_ss_model('config/audiosep_base.yaml') + # Create an instance of the AudioSep model from a pretrained model checkpoint, + # while providing the source separation model and setting the device model = AudioSep.from_pretrained("nielsr/audiosep-demo", ss_model=ss_model) + # Define the paths and information for audio separation audio_file = 'path_to_audio_file' text = 'textual_description' output_file='separated_audio.wav' - # AudioSep processes the audio at 32 kHz sampling rate + + # Note: The 'inference' function processes audio at a 32 kHz sampling rate + # Perform audio separation using the 'inference' function with the specified model, audio + file, text, output file, and device inference(model, audio_file, text, output_file, device) ```