Skip to content

Commit

Permalink
Made styles configurable by JSON config file with combined config for…
Browse files Browse the repository at this point in the history
… prep and CDG in finalise, etc.
beveradb committed Dec 28, 2024
1 parent 94ea24b commit 5d8b697
Showing 9 changed files with 312 additions and 334 deletions.
47 changes: 28 additions & 19 deletions cdgmaker/composer.py
Original file line number Diff line number Diff line change
@@ -1558,8 +1558,8 @@ def _compose_intro(self):
background_image = self._load_image(
self.config.title_screen_background,
[
(17, 20, 39), # background
(255, 170, 204), # border
self.config.background, # background
self.config.border, # border
self.config.title_color, # title color
self.config.artist_color, # artist color
],
@@ -1658,8 +1658,9 @@ def _compose_intro(self):
for coord in self._gradient_to_tile_positions(transition):
self.writer.queue_packets(packets.get(coord, []))

INTRO_DURATION = 5 * CDG_FPS # 5 seconds * 300 frames per second = 1500 frames
FIRST_SYLLABLE_BUFFER = 3 * CDG_FPS # 3 seconds * 300 frames per second = 900 frames
# Replace hardcoded values with configured ones
INTRO_DURATION = int(self.config.intro_duration_seconds * CDG_FPS)
FIRST_SYLLABLE_BUFFER = int(self.config.first_syllable_buffer_seconds * CDG_FPS)

# Queue the intro screen for 5 seconds
end_time = INTRO_DURATION
@@ -1676,14 +1677,14 @@ def _compose_intro(self):
logger.debug(f"first syllable starts at {first_syllable_start_offset}")

MINIMUM_FIRST_SYLLABLE_TIME_FOR_NO_SILENCE = INTRO_DURATION + FIRST_SYLLABLE_BUFFER
# If the first syllable is within 8 seconds, add 5 seconds of silence
# If the first syllable is within buffer+intro time, add silence
# Otherwise, don't add any silence
if first_syllable_start_offset < MINIMUM_FIRST_SYLLABLE_TIME_FOR_NO_SILENCE:
self.intro_delay = MINIMUM_FIRST_SYLLABLE_TIME_FOR_NO_SILENCE
logger.info(f"First syllable within 8 seconds. Adding {self.intro_delay} frames of silence.")
logger.info(f"First syllable within {self.config.intro_duration_seconds + self.config.first_syllable_buffer_seconds} seconds. Adding {self.intro_delay} frames of silence.")
else:
self.intro_delay = 0
logger.info("First syllable after 8 seconds. No additional silence needed.")
logger.info("First syllable after buffer period. No additional silence needed.")

def _compose_outro(self, end: int):
# TODO Make it so the outro screen is not hardcoded
@@ -1695,12 +1696,12 @@ def _compose_outro(self, end: int):
logger.debug("loading outro background image")
# Load background image
background_image = self._load_image(
self.config.title_screen_background,
self.config.outro_background,
[
(17, 20, 39), # background
(255, 170, 204), # border
self.config.artist_color, # "Thank you for singing" color
self.config.title_color, # "nomadkaraoke.com" color
self.config.background, # background
self.config.border, # border
self.config.outro_line1_color,
self.config.outro_line2_color,
],
)

@@ -1712,33 +1713,41 @@ def _compose_outro(self, end: int):
text_image = Image.new("P", (CDG_VISIBLE_WIDTH, MAX_HEIGHT * 2), 0)
y = 0

# Render "Thank you for singing" text
# Render first line of outro text
outro_text_line1 = self.config.outro_text_line1.replace("$artist", self.config.artist).replace("$title", self.config.title)

for image in render_lines(
get_wrapped_text(
"THANK YOU FOR SINGING!",
outro_text_line1,
font=smallfont,
width=text_image.width,
).split("\n"),
font=smallfont,
):
text_image.paste(
image.point(lambda v: v and 2, "P"), # Use index 2 for artist color
image.point(lambda v: v and 2, "P"), # Use index 2 for line 1 color
((text_image.width - image.width) // 2, y),
mask=image.point(lambda v: v and 255, "1"),
)
y += int(smallfont.size)

# Render "nomadkaraoke.com" text

# Add vertical gap between title and artist using configured value
y += self.config.outro_line1_line2_gap

# Render second line of outro text
outro_text_line2 = self.config.outro_text_line2.replace("$artist", self.config.artist).replace("$title", self.config.title)

for image in render_lines(
get_wrapped_text(
"nomadkaraoke.com",
outro_text_line2,
font=smallfont,
width=text_image.width,
).split("\n"),
font=smallfont,
):
text_image.paste(
image.point(lambda v: v and 3, "P"), # Use index 3 for title color
image.point(lambda v: v and 3, "P"), # Use index 3 for line 2 color
((text_image.width - image.width) // 2, y),
mask=image.point(lambda v: v and 255, "1"),
)
@@ -1778,7 +1787,7 @@ def _compose_outro(self, end: int):

# Queue background image packets (and apply transition)
transition = Image.open(
package_dir / "transitions" / f"{self.config.title_screen_transition}.png"
package_dir / "transitions" / f"{self.config.outro_transition}.png"
)
for coord in self._gradient_to_tile_positions(transition):
self.writer.queue_packets(packets.get(coord, []))
11 changes: 11 additions & 0 deletions cdgmaker/config.py
Original file line number Diff line number Diff line change
@@ -104,6 +104,7 @@ class Settings:
file: Path
font: Path
title_screen_background: Path
outro_background: Path

outname: str = "output"
clear_mode: LyricClearMode = LyricClearMode.LINE_DELAYED
@@ -126,6 +127,16 @@ class Settings:
title_screen_transition: str = "centertexttoplogobottomtext"
title_artist_gap: int = 30

intro_duration_seconds: float = 5.0
first_syllable_buffer_seconds: float = 3.0

outro_transition: str = "centertexttoplogobottomtext"
outro_text_line1: str = "THANK YOU FOR SINGING!"
outro_text_line2: str = "nomadkaraoke.com"
outro_line1_line2_gap: int = 30
outro_line1_color: RGBColor = field(converter=to_rgbcolor, default="#ffffff")
outro_line2_color: RGBColor = field(converter=to_rgbcolor, default="#ffffff")


__all__ = [
"RGBColor",
Binary file added cdgmaker/fonts/Oswald-SemiBold.ttf
Binary file not shown.
27 changes: 27 additions & 0 deletions cdgmaker/lrc_to_cdg.py
Original file line number Diff line number Diff line change
@@ -124,6 +124,15 @@ def generate_toml(
- lead_in_duration
- lead_in_total
- title_artist_gap
- intro_duration_seconds
- first_syllable_buffer_seconds
- outro_background
- outro_transition
- outro_text_line1
- outro_text_line2
- outro_line1_color
- outro_line2_color
- outro_line1_line2_gap
"""
# Validate required style parameters
required_styles = {
@@ -156,6 +165,15 @@ def generate_toml(
"lead_in_duration",
"lead_in_total",
"title_artist_gap",
"intro_duration_seconds",
"first_syllable_buffer_seconds",
"outro_background",
"outro_transition",
"outro_text_line1",
"outro_text_line2",
"outro_line1_color",
"outro_line2_color",
"outro_line1_line2_gap",
}

missing_styles = required_styles - set(cdg_styles.keys())
@@ -241,6 +259,15 @@ def generate_toml(
"title_screen_background": cdg_styles["title_screen_background"],
"title_screen_transition": cdg_styles["title_screen_transition"],
"instrumentals": instrumentals,
"intro_duration_seconds": cdg_styles["intro_duration_seconds"],
"first_syllable_buffer_seconds": cdg_styles["first_syllable_buffer_seconds"],
"outro_background": cdg_styles["outro_background"],
"outro_transition": cdg_styles["outro_transition"],
"outro_text_line1": cdg_styles["outro_text_line1"],
"outro_text_line2": cdg_styles["outro_text_line2"],
"outro_line1_color": cdg_styles["outro_line1_color"],
"outro_line2_color": cdg_styles["outro_line2_color"],
"outro_line1_line2_gap": cdg_styles["outro_line1_line2_gap"],
}

with open(output_file, "w", encoding="utf-8") as f:
198 changes: 118 additions & 80 deletions karaoke_prep/karaoke_prep.py
Original file line number Diff line number Diff line change
@@ -15,56 +15,40 @@
class KaraokePrep:
def __init__(
self,
# Basic inputs
input_media=None,
artist=None,
title=None,
filename_pattern=None,
# Logging & Debugging
dry_run=False,
log_level=logging.DEBUG,
log_formatter=None,
render_bounding_boxes=False,
# Input/Output Configuration
output_dir=".",
create_track_subfolders=False,
lossless_output_format="FLAC",
output_png=True,
output_jpg=True,
# Audio Processing Configuration
clean_instrumental_model="model_bs_roformer_ep_317_sdr_12.9755.ckpt",
backing_vocals_models=["mel_band_roformer_karaoke_aufr33_viperx_sdr_10.1956.ckpt"],
other_stems_models=["htdemucs_6s.yaml"],
model_file_dir=os.path.join(tempfile.gettempdir(), "audio-separator-models"),
output_dir=".",
existing_instrumental=None,
lossless_output_format="FLAC",
denoise_enabled=True,
normalization_enabled=True,
# Hardware Acceleration
use_cuda=False,
use_coreml=False,
normalization_enabled=True,
denoise_enabled=True,
create_track_subfolders=False,
# Lyrics Configuration
lyrics_artist=None,
lyrics_title=None,
skip_lyrics=False,
skip_transcription=False,
output_png=True,
output_jpg=True,
render_bounding_boxes=False,
existing_title_image=None,
intro_video_duration=5,
intro_background_color="#000000",
intro_background_image=None,
intro_font="Montserrat-Bold.ttf",
intro_artist_color="#ffffff",
intro_title_color="#ffdf6b",
intro_extra_text=None,
intro_extra_text_color="#ffffff",
intro_extra_text_region=None,
intro_title_region=None,
intro_artist_region=None,
existing_end_image=None,
end_video_duration=5,
end_background_color="#000000",
end_background_image=None,
end_font="Montserrat-Bold.ttf",
end_artist_color="#ffffff",
end_title_color="#ffdf6b",
end_extra_text="THANK YOU FOR SINGING!",
end_extra_text_color="#ffffff",
end_extra_text_region=None,
end_title_region=None,
end_artist_region=None,
# Style Configuration
style_params=None,
):
self.logger = logging.getLogger(__name__)
self.logger.setLevel(log_level)
@@ -82,39 +66,111 @@ def __init__(
self.logger.debug(f"KaraokePrep instantiating with input_media: {input_media} artist: {artist} title: {title}")

self.dry_run = dry_run

self.extractor = None
self.media_id = None
self.url = None
self.input_media = input_media
self.artist = artist
self.title = title
self.filename_pattern = filename_pattern

# Audio Processing
self.clean_instrumental_model = clean_instrumental_model
self.backing_vocals_models = backing_vocals_models
self.other_stems_models = other_stems_models
self.model_file_dir = model_file_dir
self.existing_instrumental = existing_instrumental
self.denoise_enabled = denoise_enabled
self.normalization_enabled = normalization_enabled

# Input/Output
self.output_dir = output_dir
self.lossless_output_format = lossless_output_format.lower()
self.create_track_subfolders = create_track_subfolders
self.output_png = output_png
self.output_jpg = output_jpg

# Hardware
self.use_cuda = use_cuda
self.use_coreml = use_coreml
self.normalization_enabled = normalization_enabled
self.denoise_enabled = denoise_enabled
self.create_track_subfolders = create_track_subfolders
self.existing_instrumental = existing_instrumental
self.existing_title_image = existing_title_image

# Lyrics
self.lyrics = None
self.lyrics_artist = lyrics_artist
self.lyrics_title = lyrics_title
self.skip_lyrics = skip_lyrics
self.render_bounding_boxes = render_bounding_boxes
self.output_png = output_png
self.output_jpg = output_jpg
self.intro_video_duration = intro_video_duration
self.end_video_duration = end_video_duration
self.skip_transcription = skip_transcription

# Style
self.render_bounding_boxes = render_bounding_boxes

# Set default style parameters if none provided
if style_params is None:
style_params = {
"intro": {
"video_duration": 5,
"existing_image": None,
"background_color": "#000000",
"background_image": None,
"font": "Montserrat-Bold.ttf",
"artist_color": "#ffdf6b",
"title_color": "#ffffff",
"title_region": "370, 200, 3100, 480",
"artist_region": "370, 700, 3100, 480",
"extra_text": None,
"extra_text_color": "#ffffff",
"extra_text_region": "370, 1200, 3100, 480",
},
"end": {
"video_duration": 5,
"existing_image": None,
"background_color": "#000000",
"background_image": None,
"font": "Montserrat-Bold.ttf",
"artist_color": "#ffdf6b",
"title_color": "#ffffff",
"title_region": None,
"artist_region": None,
"extra_text": "THANK YOU FOR SINGING!",
"extra_text_color": "#ff7acc",
"extra_text_region": None,
},
}

# Set up title format from style params
self.title_format = {
"background_color": style_params["intro"]["background_color"],
"background_image": style_params["intro"]["background_image"],
"font": style_params["intro"]["font"],
"artist_color": style_params["intro"]["artist_color"],
"title_color": style_params["intro"]["title_color"],
"extra_text": style_params["intro"]["extra_text"],
"extra_text_color": style_params["intro"]["extra_text_color"],
"extra_text_region": style_params["intro"]["extra_text_region"],
"title_region": style_params["intro"]["title_region"],
"artist_region": style_params["intro"]["artist_region"],
}

# Set up end format from style params
self.end_format = {
"background_color": style_params["end"]["background_color"],
"background_image": style_params["end"]["background_image"],
"font": style_params["end"]["font"],
"artist_color": style_params["end"]["artist_color"],
"title_color": style_params["end"]["title_color"],
"extra_text": style_params["end"]["extra_text"],
"extra_text_color": style_params["end"]["extra_text_color"],
"extra_text_region": style_params["end"]["extra_text_region"],
"title_region": style_params["end"]["title_region"],
"artist_region": style_params["end"]["artist_region"],
}

# Store video durations and existing images
self.intro_video_duration = style_params["intro"]["video_duration"]
self.end_video_duration = style_params["end"]["video_duration"]
self.existing_title_image = style_params["intro"]["existing_image"]
self.existing_end_image = style_params["end"]["existing_image"]

# Path to the Windows PyInstaller frozen bundled ffmpeg.exe, or the system-installed FFmpeg binary on Mac/Linux
ffmpeg_path = os.path.join(sys._MEIPASS, "ffmpeg.exe") if getattr(sys, "frozen", False) else "ffmpeg"

@@ -125,40 +181,12 @@ def __init__(
else:
self.ffmpeg_base_command += " -loglevel fatal"

self.title_format = {
"background_color": intro_background_color,
"background_image": intro_background_image,
"font": intro_font,
"artist_color": intro_artist_color,
"title_color": intro_title_color,
"extra_text": intro_extra_text,
"extra_text_color": intro_extra_text_color,
"extra_text_region": self.parse_region(intro_extra_text_region) or (370, 1200, 3100, 480),
"title_region": self.parse_region(intro_title_region) or (370, 200, 3100, 480),
"artist_region": self.parse_region(intro_artist_region) or (370, 700, 3100, 480),
}

self.logger.debug(f"Initialized title_format with extra_text: {self.title_format['extra_text']}")
self.logger.debug(f"Initialized title_format with extra_text_region: {self.title_format['extra_text_region']}")

self.end_format = {
"background_color": end_background_color,
"background_image": end_background_image,
"font": end_font,
"artist_color": end_artist_color,
"title_color": end_title_color,
"extra_text": end_extra_text,
"extra_text_color": end_extra_text_color,
"extra_text_region": self.parse_region(end_extra_text_region) or (370, 300, 3100, 400),
"title_region": self.parse_region(end_title_region) or (370, 800, 3100, 400),
"artist_region": self.parse_region(end_artist_region) or (370, 1300, 3100, 400),
}

self.logger.debug(f"Initialized end_format with extra_text: {self.end_format['extra_text']}")
self.logger.debug(f"Initialized end_format with extra_text_region: {self.end_format['extra_text_region']}")

self.existing_end_image = existing_end_image

self.extracted_info = None
self.persistent_artist = None

@@ -262,6 +290,13 @@ def copy_input_media(self, input_media, output_filename_no_extension):
self.logger.debug(f"Copying media from local path {input_media} to filename {output_filename_no_extension} + existing extension")

copied_file_name = output_filename_no_extension + os.path.splitext(input_media)[1]
self.logger.debug(f"Target filename: {copied_file_name}")

# Check if source and destination are the same
if os.path.abspath(input_media) == os.path.abspath(copied_file_name):
self.logger.info("Source and destination are the same file, skipping copy")
return input_media

self.logger.debug(f"Copying {input_media} to {copied_file_name}")
shutil.copy2(input_media, copied_file_name)

@@ -673,20 +708,23 @@ def _create_background(self, format, resolution):
def _render_all_text(self, draw, font_path, title_text, artist_text, format, render_bounding_boxes):
"""Render all text elements on the image."""
# Render title
region = self._render_text_in_region(draw, title_text.upper(), font_path, format["title_region"], format["title_color"])
if render_bounding_boxes:
self._draw_bounding_box(draw, region, format["title_color"])
if format["title_region"]:
region_parsed = self.parse_region(format["title_region"])
region = self._render_text_in_region(draw, title_text.upper(), font_path, region_parsed, format["title_color"])
if render_bounding_boxes:
self._draw_bounding_box(draw, region, format["title_color"])

# Render artist
region = self._render_text_in_region(draw, artist_text.upper(), font_path, format["artist_region"], format["artist_color"])
if render_bounding_boxes:
self._draw_bounding_box(draw, region, format["artist_color"])
if format["artist_region"]:
region_parsed = self.parse_region(format["artist_region"])
region = self._render_text_in_region(draw, artist_text.upper(), font_path, region_parsed, format["artist_color"])
if render_bounding_boxes:
self._draw_bounding_box(draw, region, format["artist_color"])

# Render extra text if provided
if format["extra_text"]:
region = self._render_text_in_region(
draw, format["extra_text"], font_path, format["extra_text_region"], format["extra_text_color"]
)
region_parsed = self.parse_region(format["extra_text_region"])
region = self._render_text_in_region(draw, format["extra_text"], font_path, region_parsed, format["extra_text_color"])
if render_bounding_boxes:
self._draw_bounding_box(draw, region, format["extra_text_color"])

Binary file added karaoke_prep/resources/Oswald-SemiBold.ttf
Binary file not shown.
15 changes: 8 additions & 7 deletions karaoke_prep/utils/finalise_cli.py
Original file line number Diff line number Diff line change
@@ -121,9 +121,9 @@ def main():
)

parser.add_argument(
"--cdg_styles_json",
"--style_params_json",
default=None,
help="Optional: Path to JSON file containing CDG style configuration. Required if --enable_cdg is used. Example: --cdg_styles_json='/path/to/cdg_styles.json'",
help="Optional: Path to JSON file containing CDG style configuration. Required if --enable_cdg is used. Example: --style_params_json='/path/to/cdg_styles.json'",
)

args = parser.parse_args()
@@ -136,14 +136,15 @@ def main():
# Load CDG styles if CDG generation is enabled
cdg_styles = None
if args.enable_cdg:
if not args.cdg_styles_json:
logger.error("CDG styles JSON file path (--cdg_styles_json) is required when --enable_cdg is used")
if not args.style_params_json:
logger.error("CDG styles JSON file path (--style_params_json) is required when --enable_cdg is used")
sys.exit(1)
try:
with open(args.cdg_styles_json, "r") as f:
cdg_styles = json.loads(f.read())
with open(args.style_params_json, "r") as f:
style_params = json.loads(f.read())
cdg_styles = style_params["cdg"]
except FileNotFoundError:
logger.error(f"CDG styles configuration file not found: {args.cdg_styles_json}")
logger.error(f"CDG styles configuration file not found: {args.style_params_json}")
sys.exit(1)
except json.JSONDecodeError as e:
logger.error(f"Invalid JSON in CDG styles configuration file: {e}")
346 changes: 119 additions & 227 deletions karaoke_prep/utils/prep_cli.py

Large diffs are not rendered by default.

2 changes: 1 addition & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
[tool.poetry]
name = "karaoke-prep"
version = "0.32.0"
version = "0.33.0"
description = "Prepare for karaoke video creation, by downloading audio and lyrics for a specified song or playlist from youtube and separating audio stems. After syncing, finalise the video with a title screen!"
authors = ["Andrew Beveridge <andrew@beveridge.uk>"]
license = "MIT"

0 comments on commit 5d8b697

Please sign in to comment.