Skip to content

Commit

Permalink
Refactored output config slightly to make it cleaner to add more CLI …
Browse files Browse the repository at this point in the history
…args in future, added all outputs to CLI logs, made CDG and video generation both optional
  • Loading branch information
beveradb committed Jan 19, 2025
1 parent 110ec9c commit a49ffeb
Show file tree
Hide file tree
Showing 8 changed files with 137 additions and 150 deletions.
3 changes: 2 additions & 1 deletion lyrics_transcriber/__init__.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
from lyrics_transcriber.core.controller import LyricsTranscriber, TranscriberConfig, LyricsConfig, OutputConfig
from lyrics_transcriber.core.config import TranscriberConfig, LyricsConfig, OutputConfig
from lyrics_transcriber.core.controller import LyricsTranscriber

__all__ = ["LyricsTranscriber", "TranscriberConfig", "LyricsConfig", "OutputConfig"]
22 changes: 17 additions & 5 deletions lyrics_transcriber/cli/cli_main.py
Original file line number Diff line number Diff line change
Expand Up @@ -67,6 +67,7 @@ def create_arg_parser() -> argparse.ArgumentParser:
type=Path,
help="JSON file containing output style configurations for CDG and video generation",
)
output_group.add_argument("--generate_cdg", action="store_true", help="Generate CDG karaoke files")

# Video options
video_group = parser.add_argument_group("Video Options")
Expand Down Expand Up @@ -135,6 +136,7 @@ def create_configs(args: argparse.Namespace, env_config: Dict[str, str]) -> tupl
output_dir=str(args.output_dir) if args.output_dir else os.getcwd(),
cache_dir=str(args.cache_dir),
render_video=args.render_video,
generate_cdg=args.generate_cdg,
video_resolution=args.video_resolution,
)

Expand Down Expand Up @@ -191,18 +193,28 @@ def main() -> None:
# Log results
logger.info("*** Success! ***")

# Log all generated output files
if results.original_txt:
logger.info(f"Generated original transcription: {results.original_txt}")
if results.corrections_json:
logger.info(f"Generated corrections data: {results.corrections_json}")

if results.corrected_txt:
logger.info(f"Generated corrected lyrics: {results.corrected_txt}")
if results.lrc_filepath:
logger.info(f"Generated LRC file: {results.lrc_filepath}")
if results.mp3_filepath:
logger.info(f"Generated MP3 file: {results.mp3_filepath}")

if results.cdg_filepath:
logger.info(f"Generated CDG file: {results.cdg_filepath}")
if results.mp3_filepath:
logger.info(f"Generated MP3 file: {results.mp3_filepath}")
if results.cdg_zip_filepath:
logger.info(f"Generated CDG ZIP file: {results.cdg_zip_filepath}")
logger.info(f"Generated CDG ZIP archive: {results.cdg_zip_filepath}")

if results.ass_filepath:
logger.info(f"Generated ASS file: {results.ass_filepath}")
logger.info(f"Generated ASS subtitles: {results.ass_filepath}")
if results.video_filepath:
logger.info(f"Generated MKV video file: {results.video_filepath}")
logger.info(f"Generated video: {results.video_filepath}")

except Exception as e:
# Get the full exception traceback
Expand Down
34 changes: 34 additions & 0 deletions lyrics_transcriber/core/config.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,34 @@
import os
from dataclasses import dataclass, field
from typing import Any, Dict, Optional


@dataclass
class TranscriberConfig:
"""Configuration for transcription services."""

audioshake_api_token: Optional[str] = None
runpod_api_key: Optional[str] = None
whisper_runpod_id: Optional[str] = None


@dataclass
class LyricsConfig:
"""Configuration for lyrics services."""

genius_api_token: Optional[str] = None
spotify_cookie: Optional[str] = None


@dataclass
class OutputConfig:
"""Configuration for output generation."""

output_styles_json: str
max_line_length: int = 36
styles: Dict[str, Any] = field(default_factory=dict)
output_dir: Optional[str] = os.getcwd()
cache_dir: str = os.getenv("LYRICS_TRANSCRIBER_CACHE_DIR", "/tmp/lyrics-transcriber-cache/")
render_video: bool = False
generate_cdg: bool = False
video_resolution: str = "360p"
72 changes: 12 additions & 60 deletions lyrics_transcriber/core/controller.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,5 @@
import os
import logging
import json
from dataclasses import dataclass, field
from typing import Dict, Optional, List
from lyrics_transcriber.types import (
Expand All @@ -14,41 +13,9 @@
from lyrics_transcriber.lyrics.base_lyrics_provider import BaseLyricsProvider, LyricsProviderConfig
from lyrics_transcriber.lyrics.genius import GeniusProvider
from lyrics_transcriber.lyrics.spotify import SpotifyProvider
from lyrics_transcriber.output.generator import OutputGenerator, OutputGeneratorConfig
from lyrics_transcriber.output.generator import OutputGenerator
from lyrics_transcriber.correction.corrector import LyricsCorrector


@dataclass
class TranscriberConfig:
"""Configuration for transcription services."""

audioshake_api_token: Optional[str] = None
runpod_api_key: Optional[str] = None
whisper_runpod_id: Optional[str] = None


@dataclass
class LyricsConfig:
"""Configuration for lyrics services."""

genius_api_token: Optional[str] = None
spotify_cookie: Optional[str] = None


@dataclass
class OutputConfig:
"""Configuration for output generation."""

output_styles_json: str
output_dir: Optional[str] = os.getcwd()
cache_dir: str = os.getenv("LYRICS_TRANSCRIBER_CACHE_DIR", "/tmp/lyrics-transcriber-cache/")
render_video: bool = False
video_resolution: str = "360p"

def __post_init__(self):
"""Validate configuration after initialization."""
if self.output_styles_json and not os.path.isfile(self.output_styles_json):
raise FileNotFoundError(f"Output styles JSON file not found: {self.output_styles_json}")
from lyrics_transcriber.core.config import TranscriberConfig, LyricsConfig, OutputConfig


@dataclass
Expand All @@ -69,6 +36,9 @@ class LyricsControllerResult:
mp3_filepath: Optional[str] = None
cdg_filepath: Optional[str] = None
cdg_zip_filepath: Optional[str] = None
original_txt: Optional[str] = None
corrected_txt: Optional[str] = None
corrections_json: Optional[str] = None


class LyricsTranscriber:
Expand Down Expand Up @@ -202,26 +172,7 @@ def _initialize_lyrics_providers(self) -> Dict[str, BaseLyricsProvider]:

def _initialize_output_generator(self) -> OutputGenerator:
"""Initialize output generation service."""
# Load output styles from JSON
try:
with open(self.output_config.output_styles_json, "r") as f:
styles = json.load(f)
self.logger.debug(f"Loaded output styles from: {self.output_config.output_styles_json}")
except json.JSONDecodeError as e:
raise ValueError(f"Invalid JSON in output styles file: {str(e)}")
except Exception as e:
raise ValueError(f"Failed to load output styles file: {str(e)}")

# Convert OutputConfig to OutputGeneratorConfig
generator_config = OutputGeneratorConfig(
output_dir=self.output_config.output_dir,
cache_dir=self.output_config.cache_dir,
styles=styles,
video_resolution=self.output_config.video_resolution,
)

# Initialize output generator
return OutputGenerator(config=generator_config, logger=self.logger)
return OutputGenerator(config=self.output_config, logger=self.logger)

def process(self) -> LyricsControllerResult:
"""
Expand Down Expand Up @@ -328,17 +279,18 @@ def generate_outputs(self) -> None:
audio_filepath=self.audio_filepath,
artist=self.artist,
title=self.title,
render_video=self.output_config.render_video,
)

# Store output paths - access attributes directly instead of using .get()
# Store all output paths in results
self.results.lrc_filepath = output_files.lrc
self.results.ass_filepath = output_files.ass

self.results.mp3_filepath = output_files.mp3
self.results.video_filepath = output_files.video
self.results.original_txt = output_files.original_txt
self.results.corrected_txt = output_files.corrected_txt
self.results.corrections_json = output_files.corrections_json
self.results.cdg_filepath = output_files.cdg
self.results.mp3_filepath = output_files.mp3
self.results.cdg_zip_filepath = output_files.cdg_zip
self.results.video_filepath = output_files.video

except Exception as e:
self.logger.error(f"Failed to generate outputs: {str(e)}")
Expand Down
9 changes: 0 additions & 9 deletions lyrics_transcriber/correction/corrector.py
Original file line number Diff line number Diff line change
Expand Up @@ -94,15 +94,6 @@ def _preserve_formatting(self, original: str, new_word: str) -> str:
trailing_space = " " if original != original.rstrip() else ""
return leading_space + new_word.strip() + trailing_space

def _try_correct_word(self, gap: GapSequence, word: Word, current_word_idx: int, segment_idx: int) -> Optional[WordCorrection]:
"""Attempt to correct a word using available handlers."""
for handler in self.handlers:
if handler.can_handle(gap, current_word_idx):
correction = handler.handle(gap, word, current_word_idx, segment_idx)
if correction:
return correction
return None

def _process_corrections(
self, segments: List[LyricsSegment], gap_sequences: List[GapSequence]
) -> Tuple[List[WordCorrection], List[LyricsSegment]]:
Expand Down
30 changes: 15 additions & 15 deletions lyrics_transcriber/output/cdg.py
Original file line number Diff line number Diff line change
Expand Up @@ -88,7 +88,7 @@ def _convert_segments_to_lyrics_data(self, segments: List[LyricsSegment]) -> Lis
# Convert time from seconds to centiseconds
timestamp = int(word.start_time * 100)
lyrics_data.append({"timestamp": timestamp, "text": word.text.upper()}) # CDG format expects uppercase text
self.logger.debug(f"Added lyric: timestamp {timestamp}, text '{word.text}'")
# self.logger.debug(f"Added lyric: timestamp {timestamp}, text '{word.text}'")

# Sort by timestamp to ensure correct order
lyrics_data.sort(key=lambda x: x["timestamp"])
Expand Down Expand Up @@ -312,20 +312,20 @@ def _format_lyrics_data(self, lyrics_data: List[dict], instrumentals: List[dict]
formatted_lyrics = []

for i, lyric in enumerate(lyrics_data):
self.logger.debug(f"Processing lyric {i}: timestamp {lyric['timestamp']}, text '{lyric['text']}'")
# self.logger.debug(f"Processing lyric {i}: timestamp {lyric['timestamp']}, text '{lyric['text']}'")

if i == 0 or lyric["timestamp"] - lyrics_data[i - 1]["timestamp"] >= cdg_styles["lead_in_threshold"]:
lead_in_start = lyric["timestamp"] - cdg_styles["lead_in_total"]
self.logger.debug(f"Adding lead-in before lyric {i} at timestamp {lead_in_start}")
# self.logger.debug(f"Adding lead-in before lyric {i} at timestamp {lead_in_start}")
for j, symbol in enumerate(cdg_styles["lead_in_symbols"]):
sync_time = lead_in_start + j * cdg_styles["lead_in_duration"]
sync_times.append(sync_time)
formatted_lyrics.append(symbol)
self.logger.debug(f" Added lead-in symbol {j+1}: '{symbol}' at {sync_time}")
# self.logger.debug(f" Added lead-in symbol {j+1}: '{symbol}' at {sync_time}")

sync_times.append(lyric["timestamp"])
formatted_lyrics.append(lyric["text"])
self.logger.debug(f"Added lyric: '{lyric['text']}' at {lyric['timestamp']}")
# self.logger.debug(f"Added lyric: '{lyric['text']}' at {lyric['timestamp']}")

formatted_text = self.format_lyrics(
formatted_lyrics,
Expand Down Expand Up @@ -446,24 +446,24 @@ def format_lyrics(self, lyrics_data, instrumentals, sync_times, font_path=None,
page_number = 1

for i, text in enumerate(lyrics_data):
self.logger.debug(f"Processing text {i}: '{text}' (sync time: {sync_times[i]})")
# self.logger.debug(f"Processing text {i}: '{text}' (sync time: {sync_times[i]})")

if text.startswith("/"):
if current_line:
wrapped_lines = get_wrapped_text(current_line.strip(), font, self.cdg_visible_width).split("\n")
for wrapped_line in wrapped_lines:
formatted_lyrics.append(wrapped_line)
lines_on_page += 1
self.logger.debug(f"Added wrapped line: '{wrapped_line}'. Lines on page: {lines_on_page}")
# self.logger.debug(f"Added wrapped line: '{wrapped_line}'. Lines on page: {lines_on_page}")
if lines_on_page == 4:
lines_on_page = 0
page_number += 1
self.logger.debug(f"Page full. New page number: {page_number}")
# self.logger.debug(f"Page full. New page number: {page_number}")
current_line = ""
text = text[1:]

current_line += text + " "
self.logger.debug(f"Current line: '{current_line}'")
# self.logger.debug(f"Current line: '{current_line}'")

is_last_before_instrumental = any(
inst["sync"] > sync_times[i] and (i == len(sync_times) - 1 or sync_times[i + 1] > inst["sync"]) for inst in instrumentals
Expand All @@ -475,29 +475,29 @@ def format_lyrics(self, lyrics_data, instrumentals, sync_times, font_path=None,
for wrapped_line in wrapped_lines:
formatted_lyrics.append(wrapped_line)
lines_on_page += 1
self.logger.debug(f"Added wrapped line at end of section: '{wrapped_line}'. Lines on page: {lines_on_page}")
# self.logger.debug(f"Added wrapped line at end of section: '{wrapped_line}'. Lines on page: {lines_on_page}")
if lines_on_page == 4:
lines_on_page = 0
page_number += 1
self.logger.debug(f"Page full. New page number: {page_number}")
# self.logger.debug(f"Page full. New page number: {page_number}")
current_line = ""

if is_last_before_instrumental:
blank_lines_needed = 4 - lines_on_page
if blank_lines_needed < 4:
formatted_lyrics.extend(["~"] * blank_lines_needed)
self.logger.debug(f"Added {blank_lines_needed} empty lines before instrumental. Lines on page was {lines_on_page}")
# self.logger.debug(f"Added {blank_lines_needed} empty lines before instrumental. Lines on page was {lines_on_page}")
lines_on_page = 0
page_number += 1
self.logger.debug(f"Reset lines_on_page to 0. New page number: {page_number}")
# self.logger.debug(f"Reset lines_on_page to 0. New page number: {page_number}")

final_lyrics = []
for line in formatted_lyrics:
final_lyrics.append(line)
if line.endswith(("!", "?", ".")) and not line == "~":
final_lyrics.append("~")
self.logger.debug("Added empty line after punctuation")
# self.logger.debug("Added empty line after punctuation")

result = "\n".join(final_lyrics)
self.logger.debug(f"Final formatted lyrics:\n{result}")
# self.logger.debug(f"Final formatted lyrics:\n{result}")
return result
20 changes: 10 additions & 10 deletions lyrics_transcriber/output/cdgmaker/composer.py
Original file line number Diff line number Diff line change
Expand Up @@ -969,11 +969,11 @@ def _compose_lyric(
# Queue the erasing of this line if necessary
if should_erase_this_line:
assert line_erase_info is not None
logger.debug(
f"t={self.writer.packets_queued}: erasing lyric "
f"{line_erase_info.lyric_index} line "
f"{line_erase_info.line_index}"
)
# logger.debug(
# f"t={self.writer.packets_queued}: erasing lyric "
# f"{line_erase_info.lyric_index} line "
# f"{line_erase_info.line_index}"
# )
if line_erase_info.text.strip():
state.draw_queue.extend(line_image_to_packets(
line_erase_info.image,
Expand All @@ -987,11 +987,11 @@ def _compose_lyric(
# Queue the drawing of this line if necessary
if should_draw_this_line:
assert line_draw_info is not None
logger.debug(
f"t={self.writer.packets_queued}: drawing lyric "
f"{line_draw_info.lyric_index} line "
f"{line_draw_info.line_index}"
)
# logger.debug(
# f"t={self.writer.packets_queued}: drawing lyric "
# f"{line_draw_info.lyric_index} line "
# f"{line_draw_info.line_index}"
# )
if line_draw_info.text.strip():
state.draw_queue.extend(line_image_to_packets(
line_draw_info.image,
Expand Down
Loading

0 comments on commit a49ffeb

Please sign in to comment.