Skip to content

Commit

Permalink
Added support for background image, hard-coded nomad branded lyrics c…
Browse files Browse the repository at this point in the history
…olours, simplified ASS config, improved output video quality to high quality 4k with no compression artifacts
  • Loading branch information
beveradb committed Nov 15, 2023
1 parent 52b9852 commit a69b044
Show file tree
Hide file tree
Showing 6 changed files with 399 additions and 345 deletions.
89 changes: 62 additions & 27 deletions lyrics_transcriber/transcriber.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,7 @@ def __init__(
cache_dir="/tmp/lyrics-transcriber-cache/",
log_level=logging.DEBUG,
log_formatter=None,
background=None,
):
self.logger = logging.getLogger(__name__)
self.logger.setLevel(log_level)
Expand All @@ -46,6 +47,8 @@ def __init__(
self.output_dir = output_dir
self.audio_filepath = audio_filepath

self.background = background

self.artist = artist
self.title = title
self.song_known = self.artist is not None and self.title is not None
Expand Down Expand Up @@ -341,20 +344,7 @@ def write_ass_file(self):
intial_screens = self.create_screens()
screens = subtitles.set_segment_end_times(intial_screens, int(self.result_metadata["song_duration"]))
screens = subtitles.set_screen_start_times(screens)
lyric_subtitles_ass = subtitles.create_subtitles(
screens,
{
# "FontName": "Arial Narrow",
# "FontSize": 20,
# "PrimaryColor": (255, 0, 255, 255),
# "SecondaryColor": (0, 255, 255, 255),
"FontName": "Avenir Next",
"FontSize": 30,
"PrimaryColor": (4, 51, 255, 255),
"SecondaryColor": (255, 255, 255, 255),
},
)

lyric_subtitles_ass = subtitles.create_styled_subtitles(screens)
lyric_subtitles_ass.write(ass_filepath)

def create_video(self):
Expand All @@ -373,34 +363,79 @@ def create_video(self):

ffmpeg_cmd = [
"ffmpeg",
# Describe a video stream that is a black background
"-f",
"lavfi",
"-i",
# "color=c=black:s=1280x720:r=20",
"color=c=black:s=1920x1080:r=20",
# "color=c=black:s=3840x2160:r=30",
"-r", "30", # Set frame rate to 30 fps for the following input
]

background = False

# Check if a background image is provided, else use black background
if self.background:
self.logger.debug(f"self.background set to path: {self.background}")
if os.path.isfile(self.background):
self.logger.debug(f"background is valid file path: {self.background}")
background = self.background
else:
self.logger.error(f"background_image is NOT valid file path, falling back to black")

# fmt: off
if background:
self.logger.debug(f"background set: {background}")
ffmpeg_cmd += [
# Use provided image as background
"-loop", "1", # Loop the image
"-i", self.background, # Input image file
]
else:
self.logger.debug(f"background not set, using solid black background")
ffmpeg_cmd += [
# Use black color as background
"-f", "lavfi",
# "-i", "color=c=black:s=1280x720:r=20",
# "-i", "color=c=black:s=1920x1080:r=20",
"-i", "color=c=black:s=3840x2160:r=30",
]

video_codec = "libx264"
ffmpeg_codes = subprocess.getoutput("ffmpeg -codecs")

if "h264_videotoolbox" in ffmpeg_codes:
video_codec = "h264_videotoolbox"
self.logger.info(f"video codec set to hardware accelerated h264_videotoolbox")
elif "h264_qsv" in ffmpeg_codes:
video_codec = "h264_qsv"
self.logger.info(f"video codec set to hardware accelerated h264_qsv")

ffmpeg_cmd += [
# Use accompaniment track as audio
"-i",
self.audio_filepath,
"-i", self.audio_filepath,
# Set audio delay if needed
# https://ffmpeg.org/ffmpeg-filters.html#adelay
# "-af",
# f"adelay=delays={audio_delay_ms}:all=1",
# Re-encode audio as mp3
"-c:a",
"aac",
"-c:a", "aac",
# Add subtitles
"-vf",
"ass=" + self.result_metadata["ass_subtitles_filepath"],
"-vf", "ass=" + self.result_metadata["ass_subtitles_filepath"],
# Encode as H264 using hardware acceleration if available
"-c:v", video_codec,
# Increase output video quality
"-preset", "slow", # Use a slower preset for better compression efficiency
# "-crf", "1", # Lower CRF for higher quality. Adjust as needed, lower is better quality
"-b:v", "5000k", # Set the video bitrate, for example, 5000 kbps
"-minrate", "5000k", # Minimum bitrate
"-maxrate", "20000k", # Maximum bitrate
"-bufsize", "10000k", # Set the buffer size, typically 2x maxrate
# End encoding after the shortest stream
"-shortest",
# Overwrite files without asking
"-y",
# Only encode the first 30 seconds (for testing, fast iteration when editing this)
# "-t", "30",
*video_metadata,
# Output path of video
self.result_metadata["karaoke_video_filepath"],
]
# fmt: on

self.logger.debug(f"running ffmpeg command to generate video: {ffmpeg_cmd}")
ffmpeg_output = subprocess.check_output(ffmpeg_cmd, universal_newlines=True)
Expand Down
41 changes: 17 additions & 24 deletions lyrics_transcriber/tuul_utils/ass.py
Original file line number Diff line number Diff line change
Expand Up @@ -145,14 +145,7 @@ def tag_argument_to_number(cls, arg, default_value=None):
return float(match.group(1))

class Style:
aliases = {
"PrimaryColour": "PrimaryColor",
"SecondaryColour": "SecondaryColor",
"TertiaryColor": "OutlineColor",
"TertiaryColour": "OutlineColor",
"OutlineColour": "OutlineColor",
"BackColour": "BackColor",
}
aliases = {}
formatters = None
order = [
"Name",
Expand Down Expand Up @@ -188,10 +181,10 @@ def __init__(self):
self.Name = ""
self.Fontname = ""
self.Fontsize = 1.0
self.PrimaryColor = (255, 255, 255, 255)
self.SecondaryColor = (255, 255, 255, 255)
self.OutlineColor = (255, 255, 255, 255)
self.BackColor = (255, 255, 255, 255)
self.PrimaryColour = (255, 255, 255, 255)
self.SecondaryColour = (255, 255, 255, 255)
self.OutlineColour = (255, 255, 255, 255)
self.BackColour = (255, 255, 255, 255)
self.Bold = False
self.Italic = False
self.Underline = False
Expand Down Expand Up @@ -249,10 +242,10 @@ def copy(self, other=None):
obj1.Name = obj2.Name
obj1.Fontname = obj2.Fontname
obj1.Fontsize = obj2.Fontsize
obj1.PrimaryColor = obj2.PrimaryColor
obj1.SecondaryColor = obj2.SecondaryColor
obj1.OutlineColor = obj2.OutlineColor
obj1.BackColor = obj2.BackColor
obj1.PrimaryColour = obj2.PrimaryColour
obj1.SecondaryColour = obj2.SecondaryColour
obj1.OutlineColour = obj2.OutlineColour
obj1.BackColour = obj2.BackColour
obj1.Bold = obj2.Bold
obj1.Italic = obj2.Italic
obj1.Underline = obj2.Underline
Expand Down Expand Up @@ -281,10 +274,10 @@ def equals(self, other, names_can_differ=False):
and (names_can_differ or self.Name == other.Name)
and self.Fontname == other.Fontname
and self.Fontsize == other.Fontsize
and self.PrimaryColor == other.PrimaryColor
and self.SecondaryColor == other.SecondaryColor
and self.OutlineColor == other.OutlineColor
and self.BackColor == other.BackColor
and self.PrimaryColour == other.PrimaryColour
and self.SecondaryColour == other.SecondaryColour
and self.OutlineColour == other.OutlineColour
and self.BackColour == other.BackColour
and self.Bold == other.Bold
and self.Italic == other.Italic
and self.Underline == other.Underline
Expand All @@ -307,10 +300,10 @@ def equals(self, other, names_can_differ=False):
"Name": (Formatters.same, Formatters.same),
"Fontname": (Formatters.same, Formatters.same),
"Fontsize": (Formatters.str_to_number, Formatters.number_to_str),
"PrimaryColor": (Formatters.str_to_color, Formatters.color_to_str),
"SecondaryColor": (Formatters.str_to_color, Formatters.color_to_str),
"OutlineColor": (Formatters.str_to_color, Formatters.color_to_str),
"BackColor": (Formatters.str_to_color, Formatters.color_to_str),
"PrimaryColour": (Formatters.str_to_color, Formatters.color_to_str),
"SecondaryColour": (Formatters.str_to_color, Formatters.color_to_str),
"OutlineColour": (Formatters.str_to_color, Formatters.color_to_str),
"BackColour": (Formatters.str_to_color, Formatters.color_to_str),
"Bold": (Formatters.str_to_n1bool, Formatters.n1bool_to_str),
"Italic": (Formatters.str_to_n1bool, Formatters.n1bool_to_str),
"Underline": (Formatters.str_to_n1bool, Formatters.n1bool_to_str),
Expand Down
74 changes: 53 additions & 21 deletions lyrics_transcriber/tuul_utils/subtitles.py
Original file line number Diff line number Diff line change
Expand Up @@ -228,32 +228,64 @@ def set_screen_start_times(screens: List[LyricsScreen]) -> List[LyricsScreen]:
return screens


def create_subtitles(lyric_screens: List[LyricsScreen], display_params: Dict) -> ass.ASS:
def create_styled_subtitles(lyric_screens: List[LyricsScreen]) -> ass.ASS:
a = ass.ASS()

a.styles_format = [
"Name",
"Alignment",
"Fontname",
"Fontsize",
"PrimaryColour",
"SecondaryColour",
"Bold",
"ScaleX",
"ScaleY",
"Spacing",
"MarginL",
"MarginR",
"Encoding",
"Name", # The name of the Style. Case sensitive. Cannot include commas.
"Fontname", # The fontname as used by Windows. Case-sensitive.
"Fontsize", # Font size
"PrimaryColour", # This is the colour that a subtitle will normally appear in.
"SecondaryColour", # This colour may be used instead of the Primary colour when a subtitle is automatically shifted to prevent an onscreen collsion, to distinguish the different subtitles.
"OutlineColour", # This colour may be used instead of the Primary or Secondary colour when a subtitle is automatically shifted to prevent an onscreen collsion, to distinguish the different subtitles.
"BackColour", # This is the colour of the subtitle outline or shadow, if these are used
"Bold", # This defines whether text is bold (true) or not (false). -1 is True, 0 is False
"Italic", # This defines whether text is italic (true) or not (false). -1 is True, 0 is False
"Underline", # [-1 or 0]
"StrikeOut", # [-1 or 0]
"ScaleX", # Modifies the width of the font. [percent]
"ScaleY", # Modifies the height of the font. [percent]
"Spacing", # Extra space between characters. [pixels]
"Angle", # The origin of the rotation is defined by the alignment. Can be a floating point number. [degrees]
"BorderStyle", # 1=Outline + drop shadow, 3=Opaque box
"Outline", # If BorderStyle is 1, then this specifies the width of the outline around the text, in pixels. Values may be 0, 1, 2, 3 or 4.
"Shadow", # If BorderStyle is 1, then this specifies the depth of the drop shadow behind the text, in pixels. Values may be 0, 1, 2, 3 or 4. Drop shadow is always used in addition to an outline - SSA will force an outline of 1 pixel if no outline width is given.
"Alignment", # This sets how text is "justified" within the Left/Right onscreen margins, and also the vertical placing. Values may be 1=Left, 2=Centered, 3=Right. Add 4 to the value for a "Toptitle". Add 8 to the value for a "Midtitle". eg. 5 = left-justified toptitle
"MarginL", #
"MarginR", #
"MarginV", #
"Encoding", #
]

style = ass.ASS.Style()
style.type = "Style"
style.Name = "Default"
style.Fontname = display_params["FontName"]
style.Fontsize = display_params["FontSize"]
style.Bold = True
style.PrimaryColor = display_params["PrimaryColor"]
style.SecondaryColor = display_params["SecondaryColor"]
style.Alignment = ass.ASS.ALIGN_TOP_CENTER
style.Name = "Nomad"
style.Fontname = "Avenir Next Bold"
style.Fontsize = 32

style.PrimaryColour = (112, 112, 247, 255)
style.SecondaryColour = (255, 255, 255, 255)
style.OutlineColour = (26, 58, 235, 255)
style.BackColour = (0, 255, 0, 255) # (26, 58, 235, 255)

style.Bold = False
style.Italic = False
style.Underline = False
style.StrikeOut = False

style.ScaleX = 100
style.ScaleY = 100
style.Spacing = 0
style.Angle = 0.0
style.BorderStyle = 1
style.Outline = 1
style.Shadow = 0
style.Alignment = ass.ASS.ALIGN_MIDDLE_CENTER
style.MarginL = 0
style.MarginR = 0
style.MarginV = 0
style.Encoding = 0

a.add_style(style)

a.events_format = ["Layer", "Style", "Start", "End", "MarginV", "Text"]
Expand Down
7 changes: 7 additions & 0 deletions lyrics_transcriber/utils/cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -58,6 +58,12 @@ def main():
help="Optional: directory where the output lyrics files will be saved. Default: current directory",
)

parser.add_argument(
"--background",
default=None,
help="Optional: image file path to use for karaoke video background. Default: solid black",
)

args = parser.parse_args()

log_level = getattr(logging, args.log_level.upper())
Expand All @@ -84,6 +90,7 @@ def main():
cache_dir=args.cache_dir,
log_formatter=log_formatter,
log_level=log_level,
background=args.background,
)

result_metadata = transcriber.generate()
Expand Down
Loading

0 comments on commit a69b044

Please sign in to comment.