Made styles configurable by JSON config file with combined config for…

… prep and CDG in finalise, etc.
nomadkaraoke · Dec 28, 2024 · 5d8b697 · 5d8b697
1 parent 94ea24b
commit 5d8b697
Showing 9 changed files with 312 additions and 334 deletions.
diff --git a/cdgmaker/composer.py b/cdgmaker/composer.py
@@ -1558,8 +1558,8 @@ def _compose_intro(self):
         background_image = self._load_image(
             self.config.title_screen_background,
             [
-                (17, 20, 39),  # background
-                (255, 170, 204),  # border
+                self.config.background,  # background
+                self.config.border,  # border
                 self.config.title_color,   # title color
                 self.config.artist_color,  # artist color
             ],
@@ -1658,8 +1658,9 @@ def _compose_intro(self):
         for coord in self._gradient_to_tile_positions(transition):
             self.writer.queue_packets(packets.get(coord, []))
 
-        INTRO_DURATION = 5 * CDG_FPS  # 5 seconds * 300 frames per second = 1500 frames
-        FIRST_SYLLABLE_BUFFER = 3 * CDG_FPS  # 3 seconds * 300 frames per second = 900 frames
+        # Replace hardcoded values with configured ones
+        INTRO_DURATION = int(self.config.intro_duration_seconds * CDG_FPS)
+        FIRST_SYLLABLE_BUFFER = int(self.config.first_syllable_buffer_seconds * CDG_FPS)
 
         # Queue the intro screen for 5 seconds
         end_time = INTRO_DURATION
@@ -1676,14 +1677,14 @@ def _compose_intro(self):
         logger.debug(f"first syllable starts at {first_syllable_start_offset}")
 
         MINIMUM_FIRST_SYLLABLE_TIME_FOR_NO_SILENCE = INTRO_DURATION + FIRST_SYLLABLE_BUFFER
-        # If the first syllable is within 8 seconds, add 5 seconds of silence
+        # If the first syllable is within buffer+intro time, add silence
         # Otherwise, don't add any silence
         if first_syllable_start_offset < MINIMUM_FIRST_SYLLABLE_TIME_FOR_NO_SILENCE:
             self.intro_delay = MINIMUM_FIRST_SYLLABLE_TIME_FOR_NO_SILENCE
-            logger.info(f"First syllable within 8 seconds. Adding {self.intro_delay} frames of silence.")
+            logger.info(f"First syllable within {self.config.intro_duration_seconds + self.config.first_syllable_buffer_seconds} seconds. Adding {self.intro_delay} frames of silence.")
         else:
             self.intro_delay = 0
-            logger.info("First syllable after 8 seconds. No additional silence needed.")
+            logger.info("First syllable after buffer period. No additional silence needed.")
 
     def _compose_outro(self, end: int):
         # TODO Make it so the outro screen is not hardcoded
@@ -1695,12 +1696,12 @@ def _compose_outro(self, end: int):
         logger.debug("loading outro background image")
         # Load background image
         background_image = self._load_image(
-            self.config.title_screen_background,
+            self.config.outro_background,
             [
-                (17, 20, 39),  # background
-                (255, 170, 204),  # border
-                self.config.artist_color,  # "Thank you for singing" color
-                self.config.title_color,   # "nomadkaraoke.com" color
+                self.config.background,  # background
+                self.config.border,  # border
+                self.config.outro_line1_color,
+                self.config.outro_line2_color,
             ],
         )
 
@@ -1712,33 +1713,41 @@ def _compose_outro(self, end: int):
         text_image = Image.new("P", (CDG_VISIBLE_WIDTH, MAX_HEIGHT * 2), 0)
         y = 0
 
-        # Render "Thank you for singing" text
+        # Render first line of outro text
+        outro_text_line1 = self.config.outro_text_line1.replace("$artist", self.config.artist).replace("$title", self.config.title)
+
         for image in render_lines(
             get_wrapped_text(
-                "THANK YOU FOR SINGING!",
+                outro_text_line1,
                 font=smallfont,
                 width=text_image.width,
             ).split("\n"),
             font=smallfont,
         ):
             text_image.paste(
-                image.point(lambda v: v and 2, "P"),  # Use index 2 for artist color
+                image.point(lambda v: v and 2, "P"),  # Use index 2 for line 1 color
                 ((text_image.width - image.width) // 2, y),
                 mask=image.point(lambda v: v and 255, "1"),
             )
             y += int(smallfont.size)
 
-        # Render "nomadkaraoke.com" text
+
+        # Add vertical gap between title and artist using configured value
+        y += self.config.outro_line1_line2_gap
+
+        # Render second line of outro text
+        outro_text_line2 = self.config.outro_text_line2.replace("$artist", self.config.artist).replace("$title", self.config.title)
+
         for image in render_lines(
             get_wrapped_text(
-                "nomadkaraoke.com",
+                outro_text_line2,
                 font=smallfont,
                 width=text_image.width,
             ).split("\n"),
             font=smallfont,
         ):
             text_image.paste(
-                image.point(lambda v: v and 3, "P"),  # Use index 3 for title color
+                image.point(lambda v: v and 3, "P"),  # Use index 3 for line 2 color
                 ((text_image.width - image.width) // 2, y),
                 mask=image.point(lambda v: v and 255, "1"),
             )
@@ -1778,7 +1787,7 @@ def _compose_outro(self, end: int):
 
         # Queue background image packets (and apply transition)
         transition = Image.open(
-            package_dir / "transitions" / f"{self.config.title_screen_transition}.png"
+            package_dir / "transitions" / f"{self.config.outro_transition}.png"
         )
         for coord in self._gradient_to_tile_positions(transition):
             self.writer.queue_packets(packets.get(coord, []))

diff --git a/cdgmaker/config.py b/cdgmaker/config.py
@@ -104,6 +104,7 @@ class Settings:
     file: Path
     font: Path
     title_screen_background: Path
+    outro_background: Path
 
     outname: str = "output"
     clear_mode: LyricClearMode = LyricClearMode.LINE_DELAYED
@@ -126,6 +127,16 @@ class Settings:
     title_screen_transition: str = "centertexttoplogobottomtext"
     title_artist_gap: int = 30
 
+    intro_duration_seconds: float = 5.0
+    first_syllable_buffer_seconds: float = 3.0
+
+    outro_transition: str = "centertexttoplogobottomtext"
+    outro_text_line1: str = "THANK YOU FOR SINGING!"
+    outro_text_line2: str = "nomadkaraoke.com"
+    outro_line1_line2_gap: int = 30
+    outro_line1_color: RGBColor = field(converter=to_rgbcolor, default="#ffffff")
+    outro_line2_color: RGBColor = field(converter=to_rgbcolor, default="#ffffff")
+
 
 __all__ = [
     "RGBColor",

diff --git a/cdgmaker/fonts/Oswald-SemiBold.ttf b/cdgmaker/fonts/Oswald-SemiBold.ttf
diff --git a/cdgmaker/lrc_to_cdg.py b/cdgmaker/lrc_to_cdg.py
@@ -124,6 +124,15 @@ def generate_toml(
             - lead_in_duration
             - lead_in_total
             - title_artist_gap
+            - intro_duration_seconds
+            - first_syllable_buffer_seconds
+            - outro_background
+            - outro_transition
+            - outro_text_line1
+            - outro_text_line2
+            - outro_line1_color
+            - outro_line2_color
+            - outro_line1_line2_gap
     """
     # Validate required style parameters
     required_styles = {
@@ -156,6 +165,15 @@ def generate_toml(
         "lead_in_duration",
         "lead_in_total",
         "title_artist_gap",
+        "intro_duration_seconds",
+        "first_syllable_buffer_seconds",
+        "outro_background",
+        "outro_transition",
+        "outro_text_line1",
+        "outro_text_line2",
+        "outro_line1_color",
+        "outro_line2_color",
+        "outro_line1_line2_gap",
     }
 
     missing_styles = required_styles - set(cdg_styles.keys())
@@ -241,6 +259,15 @@ def generate_toml(
         "title_screen_background": cdg_styles["title_screen_background"],
         "title_screen_transition": cdg_styles["title_screen_transition"],
         "instrumentals": instrumentals,
+        "intro_duration_seconds": cdg_styles["intro_duration_seconds"],
+        "first_syllable_buffer_seconds": cdg_styles["first_syllable_buffer_seconds"],
+        "outro_background": cdg_styles["outro_background"],
+        "outro_transition": cdg_styles["outro_transition"],
+        "outro_text_line1": cdg_styles["outro_text_line1"],
+        "outro_text_line2": cdg_styles["outro_text_line2"],
+        "outro_line1_color": cdg_styles["outro_line1_color"],
+        "outro_line2_color": cdg_styles["outro_line2_color"],
+        "outro_line1_line2_gap": cdg_styles["outro_line1_line2_gap"],
     }
 
     with open(output_file, "w", encoding="utf-8") as f:

diff --git a/karaoke_prep/karaoke_prep.py b/karaoke_prep/karaoke_prep.py
@@ -15,56 +15,40 @@
 class KaraokePrep:
     def __init__(
         self,
+        # Basic inputs
         input_media=None,
         artist=None,
         title=None,
         filename_pattern=None,
+        # Logging & Debugging
         dry_run=False,
         log_level=logging.DEBUG,
         log_formatter=None,
+        render_bounding_boxes=False,
+        # Input/Output Configuration
+        output_dir=".",
+        create_track_subfolders=False,
+        lossless_output_format="FLAC",
+        output_png=True,
+        output_jpg=True,
+        # Audio Processing Configuration
         clean_instrumental_model="model_bs_roformer_ep_317_sdr_12.9755.ckpt",
         backing_vocals_models=["mel_band_roformer_karaoke_aufr33_viperx_sdr_10.1956.ckpt"],
         other_stems_models=["htdemucs_6s.yaml"],
         model_file_dir=os.path.join(tempfile.gettempdir(), "audio-separator-models"),
-        output_dir=".",
         existing_instrumental=None,
-        lossless_output_format="FLAC",
+        denoise_enabled=True,
+        normalization_enabled=True,
+        # Hardware Acceleration
         use_cuda=False,
         use_coreml=False,
-        normalization_enabled=True,
-        denoise_enabled=True,
-        create_track_subfolders=False,
+        # Lyrics Configuration
         lyrics_artist=None,
         lyrics_title=None,
         skip_lyrics=False,
         skip_transcription=False,
-        output_png=True,
-        output_jpg=True,
-        render_bounding_boxes=False,
-        existing_title_image=None,
-        intro_video_duration=5,
-        intro_background_color="#000000",
-        intro_background_image=None,
-        intro_font="Montserrat-Bold.ttf",
-        intro_artist_color="#ffffff",
-        intro_title_color="#ffdf6b",
-        intro_extra_text=None,
-        intro_extra_text_color="#ffffff",
-        intro_extra_text_region=None,
-        intro_title_region=None,
-        intro_artist_region=None,
-        existing_end_image=None,
-        end_video_duration=5,
-        end_background_color="#000000",
-        end_background_image=None,
-        end_font="Montserrat-Bold.ttf",
-        end_artist_color="#ffffff",
-        end_title_color="#ffdf6b",
-        end_extra_text="THANK YOU FOR SINGING!",
-        end_extra_text_color="#ffffff",
-        end_extra_text_region=None,
-        end_title_region=None,
-        end_artist_region=None,
+        # Style Configuration
+        style_params=None,
     ):
         self.logger = logging.getLogger(__name__)
         self.logger.setLevel(log_level)
@@ -82,39 +66,111 @@ def __init__(
         self.logger.debug(f"KaraokePrep instantiating with input_media: {input_media} artist: {artist} title: {title}")
 
         self.dry_run = dry_run
-
         self.extractor = None
         self.media_id = None
         self.url = None
         self.input_media = input_media
         self.artist = artist
         self.title = title
         self.filename_pattern = filename_pattern
+
+        # Audio Processing
         self.clean_instrumental_model = clean_instrumental_model
         self.backing_vocals_models = backing_vocals_models
         self.other_stems_models = other_stems_models
         self.model_file_dir = model_file_dir
+        self.existing_instrumental = existing_instrumental
+        self.denoise_enabled = denoise_enabled
+        self.normalization_enabled = normalization_enabled
+
+        # Input/Output
         self.output_dir = output_dir
         self.lossless_output_format = lossless_output_format.lower()
+        self.create_track_subfolders = create_track_subfolders
+        self.output_png = output_png
+        self.output_jpg = output_jpg
+
+        # Hardware
         self.use_cuda = use_cuda
         self.use_coreml = use_coreml
-        self.normalization_enabled = normalization_enabled
-        self.denoise_enabled = denoise_enabled
-        self.create_track_subfolders = create_track_subfolders
-        self.existing_instrumental = existing_instrumental
-        self.existing_title_image = existing_title_image
 
+        # Lyrics
         self.lyrics = None
         self.lyrics_artist = lyrics_artist
         self.lyrics_title = lyrics_title
         self.skip_lyrics = skip_lyrics
-        self.render_bounding_boxes = render_bounding_boxes
-        self.output_png = output_png
-        self.output_jpg = output_jpg
-        self.intro_video_duration = intro_video_duration
-        self.end_video_duration = end_video_duration
         self.skip_transcription = skip_transcription
 
+        # Style
+        self.render_bounding_boxes = render_bounding_boxes
+
+        # Set default style parameters if none provided
+        if style_params is None:
+            style_params = {
+                "intro": {
+                    "video_duration": 5,
+                    "existing_image": None,
+                    "background_color": "#000000",
+                    "background_image": None,
+                    "font": "Montserrat-Bold.ttf",
+                    "artist_color": "#ffdf6b",
+                    "title_color": "#ffffff",
+                    "title_region": "370, 200, 3100, 480",
+                    "artist_region": "370, 700, 3100, 480",
+                    "extra_text": None,
+                    "extra_text_color": "#ffffff",
+                    "extra_text_region": "370, 1200, 3100, 480",
+                },
+                "end": {
+                    "video_duration": 5,
+                    "existing_image": None,
+                    "background_color": "#000000",
+                    "background_image": None,
+                    "font": "Montserrat-Bold.ttf",
+                    "artist_color": "#ffdf6b",
+                    "title_color": "#ffffff",
+                    "title_region": None,
+                    "artist_region": None,
+                    "extra_text": "THANK YOU FOR SINGING!",
+                    "extra_text_color": "#ff7acc",
+                    "extra_text_region": None,
+                },
+            }
+
+        # Set up title format from style params
+        self.title_format = {
+            "background_color": style_params["intro"]["background_color"],
+            "background_image": style_params["intro"]["background_image"],
+            "font": style_params["intro"]["font"],
+            "artist_color": style_params["intro"]["artist_color"],
+            "title_color": style_params["intro"]["title_color"],
+            "extra_text": style_params["intro"]["extra_text"],
+            "extra_text_color": style_params["intro"]["extra_text_color"],
+            "extra_text_region": style_params["intro"]["extra_text_region"],
+            "title_region": style_params["intro"]["title_region"],
+            "artist_region": style_params["intro"]["artist_region"],
+        }
+
+        # Set up end format from style params
+        self.end_format = {
+            "background_color": style_params["end"]["background_color"],
+            "background_image": style_params["end"]["background_image"],
+            "font": style_params["end"]["font"],
+            "artist_color": style_params["end"]["artist_color"],
+            "title_color": style_params["end"]["title_color"],
+            "extra_text": style_params["end"]["extra_text"],
+            "extra_text_color": style_params["end"]["extra_text_color"],
+            "extra_text_region": style_params["end"]["extra_text_region"],
+            "title_region": style_params["end"]["title_region"],
+            "artist_region": style_params["end"]["artist_region"],
+        }
+
+        # Store video durations and existing images
+        self.intro_video_duration = style_params["intro"]["video_duration"]
+        self.end_video_duration = style_params["end"]["video_duration"]
+        self.existing_title_image = style_params["intro"]["existing_image"]
+        self.existing_end_image = style_params["end"]["existing_image"]
+
         # Path to the Windows PyInstaller frozen bundled ffmpeg.exe, or the system-installed FFmpeg binary on Mac/Linux
         ffmpeg_path = os.path.join(sys._MEIPASS, "ffmpeg.exe") if getattr(sys, "frozen", False) else "ffmpeg"
 
@@ -125,40 +181,12 @@ def __init__(
         else:
             self.ffmpeg_base_command += " -loglevel fatal"
 
-        self.title_format = {
-            "background_color": intro_background_color,
-            "background_image": intro_background_image,
-            "font": intro_font,
-            "artist_color": intro_artist_color,
-            "title_color": intro_title_color,
-            "extra_text": intro_extra_text,
-            "extra_text_color": intro_extra_text_color,
-            "extra_text_region": self.parse_region(intro_extra_text_region) or (370, 1200, 3100, 480),
-            "title_region": self.parse_region(intro_title_region) or (370, 200, 3100, 480),
-            "artist_region": self.parse_region(intro_artist_region) or (370, 700, 3100, 480),
-        }
-
         self.logger.debug(f"Initialized title_format with extra_text: {self.title_format['extra_text']}")
         self.logger.debug(f"Initialized title_format with extra_text_region: {self.title_format['extra_text_region']}")
 
-        self.end_format = {
-            "background_color": end_background_color,
-            "background_image": end_background_image,
-            "font": end_font,
-            "artist_color": end_artist_color,
-            "title_color": end_title_color,
-            "extra_text": end_extra_text,
-            "extra_text_color": end_extra_text_color,
-            "extra_text_region": self.parse_region(end_extra_text_region) or (370, 300, 3100, 400),
-            "title_region": self.parse_region(end_title_region) or (370, 800, 3100, 400),
-            "artist_region": self.parse_region(end_artist_region) or (370, 1300, 3100, 400),
-        }
-
         self.logger.debug(f"Initialized end_format with extra_text: {self.end_format['extra_text']}")
         self.logger.debug(f"Initialized end_format with extra_text_region: {self.end_format['extra_text_region']}")
 
-        self.existing_end_image = existing_end_image
-
         self.extracted_info = None
         self.persistent_artist = None
 
@@ -262,6 +290,13 @@ def copy_input_media(self, input_media, output_filename_no_extension):
         self.logger.debug(f"Copying media from local path {input_media} to filename {output_filename_no_extension} + existing extension")
 
         copied_file_name = output_filename_no_extension + os.path.splitext(input_media)[1]
+        self.logger.debug(f"Target filename: {copied_file_name}")
+
+        # Check if source and destination are the same
+        if os.path.abspath(input_media) == os.path.abspath(copied_file_name):
+            self.logger.info("Source and destination are the same file, skipping copy")
+            return input_media
+
         self.logger.debug(f"Copying {input_media} to {copied_file_name}")
         shutil.copy2(input_media, copied_file_name)
 
@@ -673,20 +708,23 @@ def _create_background(self, format, resolution):
     def _render_all_text(self, draw, font_path, title_text, artist_text, format, render_bounding_boxes):
         """Render all text elements on the image."""
         # Render title
-        region = self._render_text_in_region(draw, title_text.upper(), font_path, format["title_region"], format["title_color"])
-        if render_bounding_boxes:
-            self._draw_bounding_box(draw, region, format["title_color"])
+        if format["title_region"]:
+            region_parsed = self.parse_region(format["title_region"])
+            region = self._render_text_in_region(draw, title_text.upper(), font_path, region_parsed, format["title_color"])
+            if render_bounding_boxes:
+                self._draw_bounding_box(draw, region, format["title_color"])
 
         # Render artist
-        region = self._render_text_in_region(draw, artist_text.upper(), font_path, format["artist_region"], format["artist_color"])
-        if render_bounding_boxes:
-            self._draw_bounding_box(draw, region, format["artist_color"])
+        if format["artist_region"]:
+            region_parsed = self.parse_region(format["artist_region"])
+            region = self._render_text_in_region(draw, artist_text.upper(), font_path, region_parsed, format["artist_color"])
+            if render_bounding_boxes:
+                self._draw_bounding_box(draw, region, format["artist_color"])
 
         # Render extra text if provided
         if format["extra_text"]:
-            region = self._render_text_in_region(
-                draw, format["extra_text"], font_path, format["extra_text_region"], format["extra_text_color"]
-            )
+            region_parsed = self.parse_region(format["extra_text_region"])
+            region = self._render_text_in_region(draw, format["extra_text"], font_path, region_parsed, format["extra_text_color"])
             if render_bounding_boxes:
                 self._draw_bounding_box(draw, region, format["extra_text_color"])
 

diff --git a/karaoke_prep/resources/Oswald-SemiBold.ttf b/karaoke_prep/resources/Oswald-SemiBold.ttf
diff --git a/karaoke_prep/utils/finalise_cli.py b/karaoke_prep/utils/finalise_cli.py
@@ -121,9 +121,9 @@ def main():
     )
 
     parser.add_argument(
-        "--cdg_styles_json",
+        "--style_params_json",
         default=None,
-        help="Optional: Path to JSON file containing CDG style configuration. Required if --enable_cdg is used. Example: --cdg_styles_json='/path/to/cdg_styles.json'",
+        help="Optional: Path to JSON file containing CDG style configuration. Required if --enable_cdg is used. Example: --style_params_json='/path/to/cdg_styles.json'",
     )
 
     args = parser.parse_args()
@@ -136,14 +136,15 @@ def main():
     # Load CDG styles if CDG generation is enabled
     cdg_styles = None
     if args.enable_cdg:
-        if not args.cdg_styles_json:
-            logger.error("CDG styles JSON file path (--cdg_styles_json) is required when --enable_cdg is used")
+        if not args.style_params_json:
+            logger.error("CDG styles JSON file path (--style_params_json) is required when --enable_cdg is used")
             sys.exit(1)
         try:
-            with open(args.cdg_styles_json, "r") as f:
-                cdg_styles = json.loads(f.read())
+            with open(args.style_params_json, "r") as f:
+                style_params = json.loads(f.read())
+                cdg_styles = style_params["cdg"]
         except FileNotFoundError:
-            logger.error(f"CDG styles configuration file not found: {args.cdg_styles_json}")
+            logger.error(f"CDG styles configuration file not found: {args.style_params_json}")
             sys.exit(1)
         except json.JSONDecodeError as e:
             logger.error(f"Invalid JSON in CDG styles configuration file: {e}")

diff --git a/karaoke_prep/utils/prep_cli.py b/karaoke_prep/utils/prep_cli.py
diff --git a/pyproject.toml b/pyproject.toml
@@ -1,6 +1,6 @@
 [tool.poetry]
 name = "karaoke-prep"
-version = "0.32.0"
+version = "0.33.0"
 description = "Prepare for karaoke video creation, by downloading audio and lyrics for a specified song or playlist from youtube and separating audio stems. After syncing, finalise the video with a title screen!"
 authors = ["Andrew Beveridge <andrew@beveridge.uk>"]
 license = "MIT"