Removed auditok to remove OS specific dependency

nomadkaraoke · Dec 4, 2024 · 5810df4 · 5810df4
1 parent 9f0ef2b
commit 5810df4
Show file tree

Hide file tree

Showing 4 changed files with 31 additions and 620 deletions.
diff --git a/.github/removetritonandpyaudio.patch b/.github/removetritonandpyaudio.patch
diff --git a/lyrics_transcriber/transcriber.py b/lyrics_transcriber/transcriber.py
@@ -969,7 +969,10 @@ def transcribe(self):
             self.logger.debug(f"Using Whisper for transcription with model: {self.transcription_model}")
             audio = whisper.load_audio(self.audio_filepath)
             model = whisper.load_model(self.transcription_model, device="cpu")
-            transcription_data = whisper.transcribe(model, audio, language="en", vad="auditok", beam_size=5, temperature=0.2, best_of=5)
+            transcription_data = whisper.transcribe(model, audio, language="en", beam_size=5, temperature=0.2, best_of=5)
+
+            # auditok is needed for voice activity detection, but it has OS package dependencies that are hard to install on some platforms
+            # transcription_data = whisper.transcribe(model, audio, language="en", vad="auditok", beam_size=5, temperature=0.2, best_of=5)
 
             # Remove segments with no words, only music
             transcription_data["segments"] = [segment for segment in transcription_data["segments"] if segment["text"].strip() != "Music"]