Added support for background image, hard-coded nomad branded lyrics c…

…olours, simplified ASS config, improved output video quality to high quality 4k with no compression artifacts
nomadkaraoke · Nov 15, 2023 · a69b044 · a69b044
1 parent 52b9852
commit a69b044
Show file tree

Hide file tree

Showing 6 changed files with 399 additions and 345 deletions.
diff --git a/lyrics_transcriber/transcriber.py b/lyrics_transcriber/transcriber.py
@@ -26,6 +26,7 @@ def __init__(
         cache_dir="/tmp/lyrics-transcriber-cache/",
         log_level=logging.DEBUG,
         log_formatter=None,
+        background=None,
     ):
         self.logger = logging.getLogger(__name__)
         self.logger.setLevel(log_level)
@@ -46,6 +47,8 @@ def __init__(
         self.output_dir = output_dir
         self.audio_filepath = audio_filepath
 
+        self.background = background
+
         self.artist = artist
         self.title = title
         self.song_known = self.artist is not None and self.title is not None
@@ -341,20 +344,7 @@ def write_ass_file(self):
         intial_screens = self.create_screens()
         screens = subtitles.set_segment_end_times(intial_screens, int(self.result_metadata["song_duration"]))
         screens = subtitles.set_screen_start_times(screens)
-        lyric_subtitles_ass = subtitles.create_subtitles(
-            screens,
-            {
-                # "FontName": "Arial Narrow",
-                # "FontSize": 20,
-                # "PrimaryColor": (255, 0, 255, 255),
-                # "SecondaryColor": (0, 255, 255, 255),
-                "FontName": "Avenir Next",
-                "FontSize": 30,
-                "PrimaryColor": (4, 51, 255, 255),
-                "SecondaryColor": (255, 255, 255, 255),
-            },
-        )
-
+        lyric_subtitles_ass = subtitles.create_styled_subtitles(screens)
         lyric_subtitles_ass.write(ass_filepath)
 
     def create_video(self):
@@ -373,34 +363,79 @@ def create_video(self):
 
         ffmpeg_cmd = [
             "ffmpeg",
-            # Describe a video stream that is a black background
-            "-f",
-            "lavfi",
-            "-i",
-            # "color=c=black:s=1280x720:r=20",
-            "color=c=black:s=1920x1080:r=20",
-            # "color=c=black:s=3840x2160:r=30",
+            "-r", "30",  # Set frame rate to 30 fps for the following input
+        ]
+
+        background = False
+
+        # Check if a background image is provided, else use black background
+        if self.background:
+            self.logger.debug(f"self.background set to path: {self.background}")
+            if os.path.isfile(self.background):
+                self.logger.debug(f"background is valid file path: {self.background}")
+                background = self.background
+            else:
+                self.logger.error(f"background_image is NOT valid file path, falling back to black")
+
+        # fmt: off
+        if background:
+            self.logger.debug(f"background set: {background}")
+            ffmpeg_cmd += [
+                # Use provided image as background
+                "-loop", "1",  # Loop the image
+                "-i", self.background,  # Input image file
+            ]
+        else:
+            self.logger.debug(f"background not set, using solid black background")
+            ffmpeg_cmd += [
+                # Use black color as background
+                "-f", "lavfi",
+                # "-i", "color=c=black:s=1280x720:r=20",
+                # "-i", "color=c=black:s=1920x1080:r=20",
+                "-i", "color=c=black:s=3840x2160:r=30",
+            ]
+
+        video_codec = "libx264"
+        ffmpeg_codes = subprocess.getoutput("ffmpeg -codecs")
+
+        if "h264_videotoolbox" in ffmpeg_codes:
+            video_codec = "h264_videotoolbox"
+            self.logger.info(f"video codec set to hardware accelerated h264_videotoolbox")
+        elif "h264_qsv" in ffmpeg_codes:
+            video_codec = "h264_qsv"
+            self.logger.info(f"video codec set to hardware accelerated h264_qsv")
+
+        ffmpeg_cmd += [
             # Use accompaniment track as audio
-            "-i",
-            self.audio_filepath,
+            "-i", self.audio_filepath,
             # Set audio delay if needed
             # https://ffmpeg.org/ffmpeg-filters.html#adelay
             # "-af",
             # f"adelay=delays={audio_delay_ms}:all=1",
             # Re-encode audio as mp3
-            "-c:a",
-            "aac",
+            "-c:a", "aac",
             # Add subtitles
-            "-vf",
-            "ass=" + self.result_metadata["ass_subtitles_filepath"],
+            "-vf", "ass=" + self.result_metadata["ass_subtitles_filepath"],
+            # Encode as H264 using hardware acceleration if available
+            "-c:v", video_codec,
+            # Increase output video quality
+            "-preset", "slow",  # Use a slower preset for better compression efficiency
+            # "-crf", "1",  # Lower CRF for higher quality. Adjust as needed, lower is better quality
+            "-b:v", "5000k",  # Set the video bitrate, for example, 5000 kbps
+            "-minrate", "5000k",  # Minimum bitrate
+            "-maxrate", "20000k",  # Maximum bitrate
+            "-bufsize", "10000k",  # Set the buffer size, typically 2x maxrate
             # End encoding after the shortest stream
             "-shortest",
             # Overwrite files without asking
             "-y",
+            # Only encode the first 30 seconds (for testing, fast iteration when editing this)
+            # "-t", "30",
             *video_metadata,
             # Output path of video
             self.result_metadata["karaoke_video_filepath"],
         ]
+        # fmt: on
 
         self.logger.debug(f"running ffmpeg command to generate video: {ffmpeg_cmd}")
         ffmpeg_output = subprocess.check_output(ffmpeg_cmd, universal_newlines=True)

diff --git a/lyrics_transcriber/tuul_utils/ass.py b/lyrics_transcriber/tuul_utils/ass.py
@@ -145,14 +145,7 @@ def tag_argument_to_number(cls, arg, default_value=None):
             return float(match.group(1))
 
     class Style:
-        aliases = {
-            "PrimaryColour": "PrimaryColor",
-            "SecondaryColour": "SecondaryColor",
-            "TertiaryColor": "OutlineColor",
-            "TertiaryColour": "OutlineColor",
-            "OutlineColour": "OutlineColor",
-            "BackColour": "BackColor",
-        }
+        aliases = {}
         formatters = None
         order = [
             "Name",
@@ -188,10 +181,10 @@ def __init__(self):
             self.Name = ""
             self.Fontname = ""
             self.Fontsize = 1.0
-            self.PrimaryColor = (255, 255, 255, 255)
-            self.SecondaryColor = (255, 255, 255, 255)
-            self.OutlineColor = (255, 255, 255, 255)
-            self.BackColor = (255, 255, 255, 255)
+            self.PrimaryColour = (255, 255, 255, 255)
+            self.SecondaryColour = (255, 255, 255, 255)
+            self.OutlineColour = (255, 255, 255, 255)
+            self.BackColour = (255, 255, 255, 255)
             self.Bold = False
             self.Italic = False
             self.Underline = False
@@ -249,10 +242,10 @@ def copy(self, other=None):
             obj1.Name = obj2.Name
             obj1.Fontname = obj2.Fontname
             obj1.Fontsize = obj2.Fontsize
-            obj1.PrimaryColor = obj2.PrimaryColor
-            obj1.SecondaryColor = obj2.SecondaryColor
-            obj1.OutlineColor = obj2.OutlineColor
-            obj1.BackColor = obj2.BackColor
+            obj1.PrimaryColour = obj2.PrimaryColour
+            obj1.SecondaryColour = obj2.SecondaryColour
+            obj1.OutlineColour = obj2.OutlineColour
+            obj1.BackColour = obj2.BackColour
             obj1.Bold = obj2.Bold
             obj1.Italic = obj2.Italic
             obj1.Underline = obj2.Underline
@@ -281,10 +274,10 @@ def equals(self, other, names_can_differ=False):
                 and (names_can_differ or self.Name == other.Name)
                 and self.Fontname == other.Fontname
                 and self.Fontsize == other.Fontsize
-                and self.PrimaryColor == other.PrimaryColor
-                and self.SecondaryColor == other.SecondaryColor
-                and self.OutlineColor == other.OutlineColor
-                and self.BackColor == other.BackColor
+                and self.PrimaryColour == other.PrimaryColour
+                and self.SecondaryColour == other.SecondaryColour
+                and self.OutlineColour == other.OutlineColour
+                and self.BackColour == other.BackColour
                 and self.Bold == other.Bold
                 and self.Italic == other.Italic
                 and self.Underline == other.Underline
@@ -307,10 +300,10 @@ def equals(self, other, names_can_differ=False):
         "Name": (Formatters.same, Formatters.same),
         "Fontname": (Formatters.same, Formatters.same),
         "Fontsize": (Formatters.str_to_number, Formatters.number_to_str),
-        "PrimaryColor": (Formatters.str_to_color, Formatters.color_to_str),
-        "SecondaryColor": (Formatters.str_to_color, Formatters.color_to_str),
-        "OutlineColor": (Formatters.str_to_color, Formatters.color_to_str),
-        "BackColor": (Formatters.str_to_color, Formatters.color_to_str),
+        "PrimaryColour": (Formatters.str_to_color, Formatters.color_to_str),
+        "SecondaryColour": (Formatters.str_to_color, Formatters.color_to_str),
+        "OutlineColour": (Formatters.str_to_color, Formatters.color_to_str),
+        "BackColour": (Formatters.str_to_color, Formatters.color_to_str),
         "Bold": (Formatters.str_to_n1bool, Formatters.n1bool_to_str),
         "Italic": (Formatters.str_to_n1bool, Formatters.n1bool_to_str),
         "Underline": (Formatters.str_to_n1bool, Formatters.n1bool_to_str),

diff --git a/lyrics_transcriber/tuul_utils/subtitles.py b/lyrics_transcriber/tuul_utils/subtitles.py
@@ -228,32 +228,64 @@ def set_screen_start_times(screens: List[LyricsScreen]) -> List[LyricsScreen]:
     return screens
 
 
-def create_subtitles(lyric_screens: List[LyricsScreen], display_params: Dict) -> ass.ASS:
+def create_styled_subtitles(lyric_screens: List[LyricsScreen]) -> ass.ASS:
     a = ass.ASS()
+
     a.styles_format = [
-        "Name",
-        "Alignment",
-        "Fontname",
-        "Fontsize",
-        "PrimaryColour",
-        "SecondaryColour",
-        "Bold",
-        "ScaleX",
-        "ScaleY",
-        "Spacing",
-        "MarginL",
-        "MarginR",
-        "Encoding",
+        "Name",  # The name of the Style. Case sensitive. Cannot include commas.
+        "Fontname",  # The fontname as used by Windows. Case-sensitive.
+        "Fontsize",  # Font size
+        "PrimaryColour",  # This is the colour that a subtitle will normally appear in.
+        "SecondaryColour",  # This colour may be used instead of the Primary colour when a subtitle is automatically shifted to prevent an onscreen collsion, to distinguish the different subtitles.
+        "OutlineColour",  # This colour may be used instead of the Primary or Secondary colour when a subtitle is automatically shifted to prevent an onscreen collsion, to distinguish the different subtitles.
+        "BackColour",  # This is the colour of the subtitle outline or shadow, if these are used
+        "Bold",  # This defines whether text is bold (true) or not (false). -1 is True, 0 is False
+        "Italic",  # This defines whether text is italic (true) or not (false). -1 is True, 0 is False
+        "Underline",  # [-1 or 0]
+        "StrikeOut",  # [-1 or 0]
+        "ScaleX",  # Modifies the width of the font. [percent]
+        "ScaleY",  # Modifies the height of the font. [percent]
+        "Spacing",  # Extra space between characters. [pixels]
+        "Angle",  # The origin of the rotation is defined by the alignment. Can be a floating point number. [degrees]
+        "BorderStyle",  # 1=Outline + drop shadow, 3=Opaque box
+        "Outline",  # If BorderStyle is 1,  then this specifies the width of the outline around the text, in pixels. Values may be 0, 1, 2, 3 or 4.
+        "Shadow",  # If BorderStyle is 1,  then this specifies the depth of the drop shadow behind the text, in pixels. Values may be 0, 1, 2, 3 or 4. Drop shadow is always used in addition to an outline - SSA will force an outline of 1 pixel if no outline width is given.
+        "Alignment",  # This sets how text is "justified" within the Left/Right onscreen margins, and also the vertical placing. Values may be 1=Left, 2=Centered, 3=Right. Add 4 to the value for a "Toptitle". Add 8 to the value for a "Midtitle". eg. 5 = left-justified toptitle
+        "MarginL",  #
+        "MarginR",  #
+        "MarginV",  #
+        "Encoding",  #
     ]
+
     style = ass.ASS.Style()
     style.type = "Style"
-    style.Name = "Default"
-    style.Fontname = display_params["FontName"]
-    style.Fontsize = display_params["FontSize"]
-    style.Bold = True
-    style.PrimaryColor = display_params["PrimaryColor"]
-    style.SecondaryColor = display_params["SecondaryColor"]
-    style.Alignment = ass.ASS.ALIGN_TOP_CENTER
+    style.Name = "Nomad"
+    style.Fontname = "Avenir Next Bold"
+    style.Fontsize = 32
+
+    style.PrimaryColour = (112, 112, 247, 255)
+    style.SecondaryColour = (255, 255, 255, 255)
+    style.OutlineColour = (26, 58, 235, 255)
+    style.BackColour = (0, 255, 0, 255) # (26, 58, 235, 255)
+
+    style.Bold = False
+    style.Italic = False
+    style.Underline = False
+    style.StrikeOut = False
+
+    style.ScaleX = 100
+    style.ScaleY = 100
+    style.Spacing = 0
+    style.Angle = 0.0
+    style.BorderStyle = 1
+    style.Outline = 1
+    style.Shadow = 0
+    style.Alignment = ass.ASS.ALIGN_MIDDLE_CENTER
+    style.MarginL = 0
+    style.MarginR = 0
+    style.MarginV = 0
+    style.Encoding = 0
+
     a.add_style(style)
 
     a.events_format = ["Layer", "Style", "Start", "End", "MarginV", "Text"]

diff --git a/lyrics_transcriber/utils/cli.py b/lyrics_transcriber/utils/cli.py
@@ -58,6 +58,12 @@ def main():
         help="Optional: directory where the output lyrics files will be saved. Default: current directory",
     )
 
+    parser.add_argument(
+        "--background",
+        default=None,
+        help="Optional: image file path to use for karaoke video background. Default: solid black",
+    )
+
     args = parser.parse_args()
 
     log_level = getattr(logging, args.log_level.upper())
@@ -84,6 +90,7 @@ def main():
         cache_dir=args.cache_dir,
         log_formatter=log_formatter,
         log_level=log_level,
+        background=args.background,
     )
 
     result_metadata = transcriber.generate()