Refactored output config slightly to make it cleaner to add more CLI …

…args in future, added all outputs to CLI logs, made CDG and video generation both optional
nomadkaraoke · Jan 19, 2025 · a49ffeb · a49ffeb
1 parent 110ec9c
commit a49ffeb
Show file tree

Hide file tree

Showing 8 changed files with 137 additions and 150 deletions.
diff --git a/lyrics_transcriber/__init__.py b/lyrics_transcriber/__init__.py
@@ -1,3 +1,4 @@
-from lyrics_transcriber.core.controller import LyricsTranscriber, TranscriberConfig, LyricsConfig, OutputConfig
+from lyrics_transcriber.core.config import TranscriberConfig, LyricsConfig, OutputConfig
+from lyrics_transcriber.core.controller import LyricsTranscriber
 
 __all__ = ["LyricsTranscriber", "TranscriberConfig", "LyricsConfig", "OutputConfig"]
diff --git a/lyrics_transcriber/cli/cli_main.py b/lyrics_transcriber/cli/cli_main.py
@@ -67,6 +67,7 @@ def create_arg_parser() -> argparse.ArgumentParser:
         type=Path,
         help="JSON file containing output style configurations for CDG and video generation",
     )
+    output_group.add_argument("--generate_cdg", action="store_true", help="Generate CDG karaoke files")
 
     # Video options
     video_group = parser.add_argument_group("Video Options")
@@ -135,6 +136,7 @@ def create_configs(args: argparse.Namespace, env_config: Dict[str, str]) -> tupl
         output_dir=str(args.output_dir) if args.output_dir else os.getcwd(),
         cache_dir=str(args.cache_dir),
         render_video=args.render_video,
+        generate_cdg=args.generate_cdg,
         video_resolution=args.video_resolution,
     )
 
@@ -191,18 +193,28 @@ def main() -> None:
         # Log results
         logger.info("*** Success! ***")
 
+        # Log all generated output files
+        if results.original_txt:
+            logger.info(f"Generated original transcription: {results.original_txt}")
+        if results.corrections_json:
+            logger.info(f"Generated corrections data: {results.corrections_json}")
+
+        if results.corrected_txt:
+            logger.info(f"Generated corrected lyrics: {results.corrected_txt}")
         if results.lrc_filepath:
             logger.info(f"Generated LRC file: {results.lrc_filepath}")
-        if results.mp3_filepath:
-            logger.info(f"Generated MP3 file: {results.mp3_filepath}")
+
         if results.cdg_filepath:
             logger.info(f"Generated CDG file: {results.cdg_filepath}")
+        if results.mp3_filepath:
+            logger.info(f"Generated MP3 file: {results.mp3_filepath}")
         if results.cdg_zip_filepath:
-            logger.info(f"Generated CDG ZIP file: {results.cdg_zip_filepath}")
+            logger.info(f"Generated CDG ZIP archive: {results.cdg_zip_filepath}")
+
         if results.ass_filepath:
-            logger.info(f"Generated ASS file: {results.ass_filepath}")
+            logger.info(f"Generated ASS subtitles: {results.ass_filepath}")
         if results.video_filepath:
-            logger.info(f"Generated MKV video file: {results.video_filepath}")
+            logger.info(f"Generated video: {results.video_filepath}")
 
     except Exception as e:
         # Get the full exception traceback

diff --git a/lyrics_transcriber/core/config.py b/lyrics_transcriber/core/config.py
@@ -0,0 +1,34 @@
+import os
+from dataclasses import dataclass, field
+from typing import Any, Dict, Optional
+
+
+@dataclass
+class TranscriberConfig:
+    """Configuration for transcription services."""
+
+    audioshake_api_token: Optional[str] = None
+    runpod_api_key: Optional[str] = None
+    whisper_runpod_id: Optional[str] = None
+
+
+@dataclass
+class LyricsConfig:
+    """Configuration for lyrics services."""
+
+    genius_api_token: Optional[str] = None
+    spotify_cookie: Optional[str] = None
+
+
+@dataclass
+class OutputConfig:
+    """Configuration for output generation."""
+
+    output_styles_json: str
+    max_line_length: int = 36
+    styles: Dict[str, Any] = field(default_factory=dict)
+    output_dir: Optional[str] = os.getcwd()
+    cache_dir: str = os.getenv("LYRICS_TRANSCRIBER_CACHE_DIR", "/tmp/lyrics-transcriber-cache/")
+    render_video: bool = False
+    generate_cdg: bool = False
+    video_resolution: str = "360p"
diff --git a/lyrics_transcriber/core/controller.py b/lyrics_transcriber/core/controller.py
@@ -1,6 +1,5 @@
 import os
 import logging
-import json
 from dataclasses import dataclass, field
 from typing import Dict, Optional, List
 from lyrics_transcriber.types import (
@@ -14,41 +13,9 @@
 from lyrics_transcriber.lyrics.base_lyrics_provider import BaseLyricsProvider, LyricsProviderConfig
 from lyrics_transcriber.lyrics.genius import GeniusProvider
 from lyrics_transcriber.lyrics.spotify import SpotifyProvider
-from lyrics_transcriber.output.generator import OutputGenerator, OutputGeneratorConfig
+from lyrics_transcriber.output.generator import OutputGenerator
 from lyrics_transcriber.correction.corrector import LyricsCorrector
-
-
-@dataclass
-class TranscriberConfig:
-    """Configuration for transcription services."""
-
-    audioshake_api_token: Optional[str] = None
-    runpod_api_key: Optional[str] = None
-    whisper_runpod_id: Optional[str] = None
-
-
-@dataclass
-class LyricsConfig:
-    """Configuration for lyrics services."""
-
-    genius_api_token: Optional[str] = None
-    spotify_cookie: Optional[str] = None
-
-
-@dataclass
-class OutputConfig:
-    """Configuration for output generation."""
-
-    output_styles_json: str
-    output_dir: Optional[str] = os.getcwd()
-    cache_dir: str = os.getenv("LYRICS_TRANSCRIBER_CACHE_DIR", "/tmp/lyrics-transcriber-cache/")
-    render_video: bool = False
-    video_resolution: str = "360p"
-
-    def __post_init__(self):
-        """Validate configuration after initialization."""
-        if self.output_styles_json and not os.path.isfile(self.output_styles_json):
-            raise FileNotFoundError(f"Output styles JSON file not found: {self.output_styles_json}")
+from lyrics_transcriber.core.config import TranscriberConfig, LyricsConfig, OutputConfig
 
 
 @dataclass
@@ -69,6 +36,9 @@ class LyricsControllerResult:
     mp3_filepath: Optional[str] = None
     cdg_filepath: Optional[str] = None
     cdg_zip_filepath: Optional[str] = None
+    original_txt: Optional[str] = None
+    corrected_txt: Optional[str] = None
+    corrections_json: Optional[str] = None
 
 
 class LyricsTranscriber:
@@ -202,26 +172,7 @@ def _initialize_lyrics_providers(self) -> Dict[str, BaseLyricsProvider]:
 
     def _initialize_output_generator(self) -> OutputGenerator:
         """Initialize output generation service."""
-        # Load output styles from JSON
-        try:
-            with open(self.output_config.output_styles_json, "r") as f:
-                styles = json.load(f)
-            self.logger.debug(f"Loaded output styles from: {self.output_config.output_styles_json}")
-        except json.JSONDecodeError as e:
-            raise ValueError(f"Invalid JSON in output styles file: {str(e)}")
-        except Exception as e:
-            raise ValueError(f"Failed to load output styles file: {str(e)}")
-
-        # Convert OutputConfig to OutputGeneratorConfig
-        generator_config = OutputGeneratorConfig(
-            output_dir=self.output_config.output_dir,
-            cache_dir=self.output_config.cache_dir,
-            styles=styles,
-            video_resolution=self.output_config.video_resolution,
-        )
-
-        # Initialize output generator
-        return OutputGenerator(config=generator_config, logger=self.logger)
+        return OutputGenerator(config=self.output_config, logger=self.logger)
 
     def process(self) -> LyricsControllerResult:
         """
@@ -328,17 +279,18 @@ def generate_outputs(self) -> None:
                 audio_filepath=self.audio_filepath,
                 artist=self.artist,
                 title=self.title,
-                render_video=self.output_config.render_video,
             )
 
-            # Store output paths - access attributes directly instead of using .get()
+            # Store all output paths in results
             self.results.lrc_filepath = output_files.lrc
             self.results.ass_filepath = output_files.ass
-
-            self.results.mp3_filepath = output_files.mp3
+            self.results.video_filepath = output_files.video
+            self.results.original_txt = output_files.original_txt
+            self.results.corrected_txt = output_files.corrected_txt
+            self.results.corrections_json = output_files.corrections_json
             self.results.cdg_filepath = output_files.cdg
+            self.results.mp3_filepath = output_files.mp3
             self.results.cdg_zip_filepath = output_files.cdg_zip
-            self.results.video_filepath = output_files.video
 
         except Exception as e:
             self.logger.error(f"Failed to generate outputs: {str(e)}")

diff --git a/lyrics_transcriber/correction/corrector.py b/lyrics_transcriber/correction/corrector.py
@@ -94,15 +94,6 @@ def _preserve_formatting(self, original: str, new_word: str) -> str:
         trailing_space = " " if original != original.rstrip() else ""
         return leading_space + new_word.strip() + trailing_space
 
-    def _try_correct_word(self, gap: GapSequence, word: Word, current_word_idx: int, segment_idx: int) -> Optional[WordCorrection]:
-        """Attempt to correct a word using available handlers."""
-        for handler in self.handlers:
-            if handler.can_handle(gap, current_word_idx):
-                correction = handler.handle(gap, word, current_word_idx, segment_idx)
-                if correction:
-                    return correction
-        return None
-
     def _process_corrections(
         self, segments: List[LyricsSegment], gap_sequences: List[GapSequence]
     ) -> Tuple[List[WordCorrection], List[LyricsSegment]]:

diff --git a/lyrics_transcriber/output/cdg.py b/lyrics_transcriber/output/cdg.py
@@ -88,7 +88,7 @@ def _convert_segments_to_lyrics_data(self, segments: List[LyricsSegment]) -> Lis
                 # Convert time from seconds to centiseconds
                 timestamp = int(word.start_time * 100)
                 lyrics_data.append({"timestamp": timestamp, "text": word.text.upper()})  # CDG format expects uppercase text
-                self.logger.debug(f"Added lyric: timestamp {timestamp}, text '{word.text}'")
+                # self.logger.debug(f"Added lyric: timestamp {timestamp}, text '{word.text}'")
 
         # Sort by timestamp to ensure correct order
         lyrics_data.sort(key=lambda x: x["timestamp"])
@@ -312,20 +312,20 @@ def _format_lyrics_data(self, lyrics_data: List[dict], instrumentals: List[dict]
         formatted_lyrics = []
 
         for i, lyric in enumerate(lyrics_data):
-            self.logger.debug(f"Processing lyric {i}: timestamp {lyric['timestamp']}, text '{lyric['text']}'")
+            # self.logger.debug(f"Processing lyric {i}: timestamp {lyric['timestamp']}, text '{lyric['text']}'")
 
             if i == 0 or lyric["timestamp"] - lyrics_data[i - 1]["timestamp"] >= cdg_styles["lead_in_threshold"]:
                 lead_in_start = lyric["timestamp"] - cdg_styles["lead_in_total"]
-                self.logger.debug(f"Adding lead-in before lyric {i} at timestamp {lead_in_start}")
+                # self.logger.debug(f"Adding lead-in before lyric {i} at timestamp {lead_in_start}")
                 for j, symbol in enumerate(cdg_styles["lead_in_symbols"]):
                     sync_time = lead_in_start + j * cdg_styles["lead_in_duration"]
                     sync_times.append(sync_time)
                     formatted_lyrics.append(symbol)
-                    self.logger.debug(f"  Added lead-in symbol {j+1}: '{symbol}' at {sync_time}")
+                    # self.logger.debug(f"  Added lead-in symbol {j+1}: '{symbol}' at {sync_time}")
 
             sync_times.append(lyric["timestamp"])
             formatted_lyrics.append(lyric["text"])
-            self.logger.debug(f"Added lyric: '{lyric['text']}' at {lyric['timestamp']}")
+            # self.logger.debug(f"Added lyric: '{lyric['text']}' at {lyric['timestamp']}")
 
         formatted_text = self.format_lyrics(
             formatted_lyrics,
@@ -446,24 +446,24 @@ def format_lyrics(self, lyrics_data, instrumentals, sync_times, font_path=None,
         page_number = 1
 
         for i, text in enumerate(lyrics_data):
-            self.logger.debug(f"Processing text {i}: '{text}' (sync time: {sync_times[i]})")
+            # self.logger.debug(f"Processing text {i}: '{text}' (sync time: {sync_times[i]})")
 
             if text.startswith("/"):
                 if current_line:
                     wrapped_lines = get_wrapped_text(current_line.strip(), font, self.cdg_visible_width).split("\n")
                     for wrapped_line in wrapped_lines:
                         formatted_lyrics.append(wrapped_line)
                         lines_on_page += 1
-                        self.logger.debug(f"Added wrapped line: '{wrapped_line}'. Lines on page: {lines_on_page}")
+                        # self.logger.debug(f"Added wrapped line: '{wrapped_line}'. Lines on page: {lines_on_page}")
                         if lines_on_page == 4:
                             lines_on_page = 0
                             page_number += 1
-                            self.logger.debug(f"Page full. New page number: {page_number}")
+                            # self.logger.debug(f"Page full. New page number: {page_number}")
                     current_line = ""
                 text = text[1:]
 
             current_line += text + " "
-            self.logger.debug(f"Current line: '{current_line}'")
+            # self.logger.debug(f"Current line: '{current_line}'")
 
             is_last_before_instrumental = any(
                 inst["sync"] > sync_times[i] and (i == len(sync_times) - 1 or sync_times[i + 1] > inst["sync"]) for inst in instrumentals
@@ -475,29 +475,29 @@ def format_lyrics(self, lyrics_data, instrumentals, sync_times, font_path=None,
                     for wrapped_line in wrapped_lines:
                         formatted_lyrics.append(wrapped_line)
                         lines_on_page += 1
-                        self.logger.debug(f"Added wrapped line at end of section: '{wrapped_line}'. Lines on page: {lines_on_page}")
+                        # self.logger.debug(f"Added wrapped line at end of section: '{wrapped_line}'. Lines on page: {lines_on_page}")
                         if lines_on_page == 4:
                             lines_on_page = 0
                             page_number += 1
-                            self.logger.debug(f"Page full. New page number: {page_number}")
+                            # self.logger.debug(f"Page full. New page number: {page_number}")
                     current_line = ""
 
                 if is_last_before_instrumental:
                     blank_lines_needed = 4 - lines_on_page
                     if blank_lines_needed < 4:
                         formatted_lyrics.extend(["~"] * blank_lines_needed)
-                        self.logger.debug(f"Added {blank_lines_needed} empty lines before instrumental. Lines on page was {lines_on_page}")
+                        # self.logger.debug(f"Added {blank_lines_needed} empty lines before instrumental. Lines on page was {lines_on_page}")
                     lines_on_page = 0
                     page_number += 1
-                    self.logger.debug(f"Reset lines_on_page to 0. New page number: {page_number}")
+                    # self.logger.debug(f"Reset lines_on_page to 0. New page number: {page_number}")
 
         final_lyrics = []
         for line in formatted_lyrics:
             final_lyrics.append(line)
             if line.endswith(("!", "?", ".")) and not line == "~":
                 final_lyrics.append("~")
-                self.logger.debug("Added empty line after punctuation")
+                # self.logger.debug("Added empty line after punctuation")
 
         result = "\n".join(final_lyrics)
-        self.logger.debug(f"Final formatted lyrics:\n{result}")
+        # self.logger.debug(f"Final formatted lyrics:\n{result}")
         return result
diff --git a/lyrics_transcriber/output/cdgmaker/composer.py b/lyrics_transcriber/output/cdgmaker/composer.py
@@ -969,11 +969,11 @@ def _compose_lyric(
         # Queue the erasing of this line if necessary
         if should_erase_this_line:
             assert line_erase_info is not None
-            logger.debug(
-                f"t={self.writer.packets_queued}: erasing lyric "
-                f"{line_erase_info.lyric_index} line "
-                f"{line_erase_info.line_index}"
-            )
+            # logger.debug(
+            #     f"t={self.writer.packets_queued}: erasing lyric "
+            #     f"{line_erase_info.lyric_index} line "
+            #     f"{line_erase_info.line_index}"
+            # )
             if line_erase_info.text.strip():
                 state.draw_queue.extend(line_image_to_packets(
                     line_erase_info.image,
@@ -987,11 +987,11 @@ def _compose_lyric(
         # Queue the drawing of this line if necessary
         if should_draw_this_line:
             assert line_draw_info is not None
-            logger.debug(
-                f"t={self.writer.packets_queued}: drawing lyric "
-                f"{line_draw_info.lyric_index} line "
-                f"{line_draw_info.line_index}"
-            )
+            # logger.debug(
+            #     f"t={self.writer.packets_queued}: drawing lyric "
+            #     f"{line_draw_info.lyric_index} line "
+            #     f"{line_draw_info.line_index}"
+            # )
             if line_draw_info.text.strip():
                 state.draw_queue.extend(line_image_to_packets(
                     line_draw_info.image,