Skip to content

Commit

Permalink
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Updated output files to be compatible for karaoke-prep standard outpu…
Browse files Browse the repository at this point in the history
…t file naming
beveradb committed Dec 27, 2024
1 parent 5810df4 commit 4757ac1
Showing 3 changed files with 47 additions and 24 deletions.
9 changes: 8 additions & 1 deletion lyrics_transcriber/audioshake_transcriber.py
Original file line number Diff line number Diff line change
@@ -5,10 +5,11 @@


class AudioShakeTranscriber:
def __init__(self, api_token, logger):
def __init__(self, api_token, logger, output_prefix):
self.api_token = api_token
self.base_url = "https://groovy.audioshake.ai"
self.logger = logger
self.output_prefix = output_prefix

def transcribe(self, audio_filepath):
self.logger.info(f"Transcribing {audio_filepath} using AudioShake API")
@@ -103,4 +104,10 @@ def _process_result(self, job_data):
if "text" not in segment:
segment["text"] = " ".join(word["text"] for word in segment["words"])

transcription_data["output_filename"] = self.get_output_filename(" (AudioShake)")

return transcription_data

def get_output_filename(self, suffix):
"""Generate consistent filename with (Purpose) suffix pattern"""
return f"{self.output_prefix}{suffix}"
60 changes: 38 additions & 22 deletions lyrics_transcriber/transcriber.py
Original file line number Diff line number Diff line change
@@ -166,6 +166,8 @@ def __init__(

self.create_folders()

self.output_prefix = f"{artist} - {title}"

def generate(self):
self.logger.debug(f"audio_filepath is set: {self.audio_filepath}, beginning initial whisper transcription")

@@ -294,7 +296,7 @@ def write_corrected_lyrics_data_file(self):

self.logger.debug("write_corrected_lyrics_data_file initiating OpenAI client")

corrected_lyrics_data_json_cache_filepath = os.path.join(self.cache_dir, "lyrics-" + self.get_song_slug() + "-corrected.json")
corrected_lyrics_data_json_cache_filepath = os.path.join(self.cache_dir, self.get_output_filename(" (Lyrics Corrected).json"))

if os.path.isfile(corrected_lyrics_data_json_cache_filepath):
self.logger.debug(
@@ -331,9 +333,7 @@ def write_corrected_lyrics_data_file(self):
# TODO: Possibly add a step after segment-based correct to get the LLM to self-analyse the diff

self.outputs["llm_transcript"] = ""
self.outputs["llm_transcript_filepath"] = os.path.join(
self.cache_dir, "lyrics-" + self.get_song_slug() + "-llm-correction-transcript.txt"
)
self.outputs["llm_transcript_filepath"] = os.path.join(self.cache_dir, self.get_output_filename(" (LLM Transcript).txt"))

total_segments = len(self.outputs["transcription_data_dict"]["segments"])
self.logger.info(f"Beginning correction using LLM, total segments: {total_segments}")
@@ -466,7 +466,9 @@ def write_corrected_lyrics_plain_text(self):
if self.outputs["corrected_lyrics_data_dict"]:
self.logger.debug(f"corrected_lyrics_data_dict exists, writing plain text lyrics file")

corrected_lyrics_text_filepath = os.path.join(self.cache_dir, "lyrics-" + self.get_song_slug() + "-corrected.txt")
corrected_lyrics_text_filepath = os.path.join(
self.cache_dir, self.get_output_filename(" (Lyrics Corrected).txt") # Updated to use consistent naming
)
self.outputs["corrected_lyrics_text_filepath"] = corrected_lyrics_text_filepath

self.outputs["corrected_lyrics_text"] = ""
@@ -475,7 +477,7 @@ def write_corrected_lyrics_plain_text(self):
with open(corrected_lyrics_text_filepath, "w", encoding="utf-8") as f:
for corrected_segment in self.outputs["corrected_lyrics_data_dict"]["segments"]:
self.outputs["corrected_lyrics_text"] += corrected_segment["text"].strip() + "\n"
f.write(corrected_segment["text".strip()] + "\n")
f.write(corrected_segment["text"].strip() + "\n")

def write_spotify_lyrics_data_file(self):
if self.spotify_cookie and self.song_known:
@@ -484,7 +486,9 @@ def write_spotify_lyrics_data_file(self):
self.logger.warning(f"skipping spotify fetch as not all spotify params were set")
return

spotify_lyrics_data_json_cache_filepath = os.path.join(self.cache_dir, "lyrics-" + self.get_song_slug() + "-spotify.json")
spotify_lyrics_data_json_cache_filepath = os.path.join(
self.cache_dir, self.get_output_filename(" (Lyrics Spotify).json") # Updated to use consistent naming
)

if os.path.isfile(spotify_lyrics_data_json_cache_filepath):
self.logger.debug(
@@ -531,7 +535,9 @@ def write_spotify_lyrics_plain_text(self):
if self.outputs["spotify_lyrics_data_dict"]:
self.logger.debug(f"spotify_lyrics data found, checking/writing plain text lyrics file")

spotify_lyrics_text_filepath = os.path.join(self.cache_dir, "lyrics-" + self.get_song_slug() + "-spotify.txt")
spotify_lyrics_text_filepath = os.path.join(
self.cache_dir, self.get_output_filename(" (Lyrics Spotify).txt") # Updated to use consistent naming
)
self.outputs["spotify_lyrics_text_filepath"] = spotify_lyrics_text_filepath

lines = self.outputs["spotify_lyrics_data_dict"]["lyrics"]["lines"]
@@ -561,7 +567,7 @@ def write_genius_lyrics_file(self):
self.logger.warning(f"skipping genius fetch as not all genius params were set")
return

genius_lyrics_cache_filepath = os.path.join(self.cache_dir, "lyrics-" + self.get_song_slug() + "-genius.txt")
genius_lyrics_cache_filepath = os.path.join(self.cache_dir, self.get_output_filename(" (Lyrics Genius).txt"))

if os.path.isfile(genius_lyrics_cache_filepath):
self.logger.debug(f"found existing file at genius_lyrics_cache_filepath, reading: {genius_lyrics_cache_filepath}")
@@ -635,7 +641,9 @@ def calculate_singing_percentage(self):
# then loops over each word and writes all words with MidiCo segment start/end formatting
# and word-level timestamps to a MidiCo-compatible LRC file
def write_midico_lrc_file(self):
self.outputs["midico_lrc_filepath"] = self.get_cache_filepath(".lrc")
self.outputs["midico_lrc_filepath"] = os.path.join(
self.cache_dir, self.get_output_filename(" (Lyrics Corrected).lrc") # Updated suffix
)

lrc_filename = self.outputs["midico_lrc_filepath"]
self.logger.debug(f"writing midico formatted word timestamps to LRC file: {lrc_filename}")
@@ -692,9 +700,15 @@ def create_screens(self):
self.logger.debug("Reset current line")

current_line_text += (" " if current_line_text else "") + word["text"]

# fmt: off
lyric_segment = subtitles.LyricSegment(
text=word["text"], ts=timedelta(seconds=word["start"]), end_ts=timedelta(seconds=word["end"])
text=word["text"],
ts=timedelta(seconds=word["start"]),
end_ts=timedelta(seconds=word["end"])
)
# fmt: on

current_line.segments.append(lyric_segment)
self.logger.debug(f"Added word to current line. Current line: '{current_line_text}'")

@@ -706,7 +720,7 @@ def create_screens(self):
return screens

def write_ass_file(self):
self.outputs["ass_subtitles_filepath"] = self.get_cache_filepath(".ass")
self.outputs["ass_subtitles_filepath"] = os.path.join(self.cache_dir, self.get_output_filename(" (Lyrics Corrected).ass"))

ass_filepath = self.outputs["ass_subtitles_filepath"]
self.logger.debug(f"writing ASS formatted subtitle file: {ass_filepath}")
@@ -832,10 +846,10 @@ def format_time_lrc(self, duration):

def write_transcribed_lyrics_plain_text(self):
if self.outputs["transcription_data_dict"]:
transcription_cache_suffix = "-audioshake-transcribed.txt" if self.audioshake_api_token else "-whisper-transcribed.txt"
transcription_cache_suffix = " (Lyrics AudioShake).txt" if self.audioshake_api_token else " (Lyrics Whisper).txt"
self.logger.debug(f"transcription_cache_suffix: {transcription_cache_suffix}")

transcribed_lyrics_text_filepath = os.path.join(self.cache_dir, "lyrics-" + self.get_song_slug() + transcription_cache_suffix)
transcribed_lyrics_text_filepath = os.path.join(self.cache_dir, self.get_output_filename(transcription_cache_suffix))
self.outputs["transcribed_lyrics_text_filepath"] = transcribed_lyrics_text_filepath

self.outputs["transcribed_lyrics_text"] = ""
@@ -949,8 +963,8 @@ def split_long_segments(self, segments, max_length):
return new_segments

def transcribe(self):
transcription_cache_suffix = "-audioshake" if self.audioshake_api_token else "-whisper"
self.outputs["transcription_data_filepath"] = self.get_cache_filepath(f"{transcription_cache_suffix}.json")
transcription_cache_suffix = " (AudioShake).json" if self.audioshake_api_token else " (Whisper).json"
self.outputs["transcription_data_filepath"] = self.get_cache_filepath(transcription_cache_suffix)

transcription_cache_filepath = self.outputs["transcription_data_filepath"]
if os.path.isfile(transcription_cache_filepath):
@@ -963,14 +977,14 @@ def transcribe(self):
self.logger.debug(f"Using AudioShake API for transcription")
from .audioshake_transcriber import AudioShakeTranscriber

audioshake = AudioShakeTranscriber(self.audioshake_api_token, logger=self.logger)
audioshake = AudioShakeTranscriber(api_token=self.audioshake_api_token, logger=self.logger, output_prefix=self.output_prefix)
transcription_data = audioshake.transcribe(self.audio_filepath)
else:
self.logger.debug(f"Using Whisper for transcription with model: {self.transcription_model}")
audio = whisper.load_audio(self.audio_filepath)
model = whisper.load_model(self.transcription_model, device="cpu")
transcription_data = whisper.transcribe(model, audio, language="en", beam_size=5, temperature=0.2, best_of=5)

# auditok is needed for voice activity detection, but it has OS package dependencies that are hard to install on some platforms
# transcription_data = whisper.transcribe(model, audio, language="en", vad="auditok", beam_size=5, temperature=0.2, best_of=5)

@@ -990,10 +1004,8 @@ def transcribe(self):
self.outputs["transcription_data_dict"] = transcription_data

def get_cache_filepath(self, extension):
filename = os.path.split(self.audio_filepath)[1]
filename_slug = slugify.slugify(filename, lowercase=False)
hash_value = self.get_file_hash(self.audio_filepath)
cache_filepath = os.path.join(self.cache_dir, filename_slug + "_" + hash_value + extension)
# Instead of using slugify and hash, use the consistent naming pattern
cache_filepath = os.path.join(self.cache_dir, self.get_output_filename(extension))
self.logger.debug(f"get_cache_filepath returning cache_filepath: {cache_filepath}")
return cache_filepath

@@ -1014,3 +1026,7 @@ def create_folders(self):

if self.output_dir is not None:
os.makedirs(self.output_dir, exist_ok=True)

def get_output_filename(self, suffix):
"""Generate consistent filename with (Purpose) suffix pattern"""
return f"{self.output_prefix}{suffix}"
2 changes: 1 addition & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
[tool.poetry]
name = "lyrics-transcriber"
version = "0.18.0"
version = "0.19.0"
description = "Automatically create synchronised lyrics files in ASS and MidiCo LRC formats with word-level timestamps, using Whisper and lyrics from Genius and Spotify"
authors = ["Andrew Beveridge <andrew@beveridge.uk>"]
license = "MIT"

0 comments on commit 4757ac1

Please sign in to comment.