Skip to content

Commit

Permalink
Removed some unnecessary subtitle screen formatting, increased ASS re…
Browse files Browse the repository at this point in the history
…solution to match video resolution, updated dependencies to latest whisper etc.
beveradb committed Nov 21, 2023
1 parent d9e3fee commit 3c21424
Showing 5 changed files with 408 additions and 334 deletions.
45 changes: 17 additions & 28 deletions .github/removetriton.patch
Original file line number Diff line number Diff line change
@@ -1,41 +1,30 @@
712d711
< triton = "2.0.0"
1206,1232d1204
<
< [[package]]
1040d1039
< triton = ">=2.0.0,<3"
1657d1655
< triton = {version = "2.1.0", markers = "platform_system == \"Linux\" and platform_machine == \"x86_64\""}
1685,1709d1682
< name = "triton"
< version = "2.0.0"
< version = "2.1.0"
< description = "A language and compiler for custom Deep Learning operations"
< optional = false
< python-versions = "*"
< files = [
< {file = "triton-2.0.0-1-cp310-cp310-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:38806ee9663f4b0f7cd64790e96c579374089e58f49aac4a6608121aa55e2505"},
< {file = "triton-2.0.0-1-cp311-cp311-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:226941c7b8595219ddef59a1fdb821e8c744289a132415ddd584facedeb475b1"},
< {file = "triton-2.0.0-1-cp36-cp36m-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:4c9fc8c89874bc48eb7e7b2107a9b8d2c0bf139778637be5bfccb09191685cfd"},
< {file = "triton-2.0.0-1-cp37-cp37m-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:d2684b6a60b9f174f447f36f933e9a45f31db96cb723723ecd2dcfd1c57b778b"},
< {file = "triton-2.0.0-1-cp38-cp38-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:9d4978298b74fcf59a75fe71e535c092b023088933b2f1df933ec32615e4beef"},
< {file = "triton-2.0.0-1-cp39-cp39-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:74f118c12b437fb2ca25e1a04759173b517582fcf4c7be11913316c764213656"},
< {file = "triton-2.0.0-1-pp37-pypy37_pp73-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:9618815a8da1d9157514f08f855d9e9ff92e329cd81c0305003eb9ec25cc5add"},
< {file = "triton-2.0.0-1-pp38-pypy38_pp73-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:1aca3303629cd3136375b82cb9921727f804e47ebee27b2677fef23005c3851a"},
< {file = "triton-2.0.0-1-pp39-pypy39_pp73-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:e3e13aa8b527c9b642e3a9defcc0fbd8ffbe1c80d8ac8c15a01692478dc64d8a"},
< {file = "triton-2.0.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:8f05a7e64e4ca0565535e3d5d3405d7e49f9d308505bb7773d21fb26a4c008c2"},
< {file = "triton-2.0.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:bb4b99ca3c6844066e516658541d876c28a5f6e3a852286bbc97ad57134827fd"},
< {file = "triton-2.0.0-cp36-cp36m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:47b4d70dc92fb40af553b4460492c31dc7d3a114a979ffb7a5cdedb7eb546c08"},
< {file = "triton-2.0.0-cp37-cp37m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:fedce6a381901b1547e0e7e1f2546e4f65dca6d91e2d8a7305a2d1f5551895be"},
< {file = "triton-2.0.0-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:75834f27926eab6c7f00ce73aaf1ab5bfb9bec6eb57ab7c0bfc0a23fac803b4c"},
< {file = "triton-2.0.0-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:0117722f8c2b579cd429e0bee80f7731ae05f63fe8e9414acd9a679885fcbf42"},
< {file = "triton-2.0.0-pp37-pypy37_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:bcd9be5d0c2e45d2b7e6ddc6da20112b6862d69741576f9c3dbaf941d745ecae"},
< {file = "triton-2.0.0-pp38-pypy38_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:42a0d2c3fc2eab4ba71384f2e785fbfd47aa41ae05fa58bf12cb31dcbd0aeceb"},
< {file = "triton-2.0.0-pp39-pypy39_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:52c47b72c72693198163ece9d90a721299e4fb3b8e24fd13141e384ad952724f"},
< {file = "triton-2.1.0-0-cp310-cp310-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:66439923a30d5d48399b08a9eae10370f6c261a5ec864a64983bae63152d39d7"},
< {file = "triton-2.1.0-0-cp311-cp311-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:919b06453f0033ea52c13eaf7833de0e57db3178d23d4e04f9fc71c4f2c32bf8"},
< {file = "triton-2.1.0-0-cp37-cp37m-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:ae4bb8a91de790e1866405211c4d618379781188f40d5c4c399766914e84cd94"},
< {file = "triton-2.1.0-0-cp38-cp38-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:39f6fb6bdccb3e98f3152e3fbea724f1aeae7d749412bbb1fa9c441d474eba26"},
< {file = "triton-2.1.0-0-cp39-cp39-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:21544e522c02005a626c8ad63d39bdff2f31d41069592919ef281e964ed26446"},
< {file = "triton-2.1.0-0-pp37-pypy37_pp73-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:143582ca31dd89cd982bd3bf53666bab1c7527d41e185f9e3d8a3051ce1b663b"},
< {file = "triton-2.1.0-0-pp38-pypy38_pp73-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:82fc5aeeedf6e36be4e4530cbdcba81a09d65c18e02f52dc298696d45721f3bd"},
< {file = "triton-2.1.0-0-pp39-pypy39_pp73-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:81a96d110a738ff63339fc892ded095b31bd0d205e3aace262af8400d40b6fa8"},
< ]
1234,1243d1205
<
< [package.dependencies]
< cmake = "*"
< filelock = "*"
< lit = "*"
< torch = "*"
<
< [package.extras]
< build = ["cmake (>=3.18)", "lit"]
< tests = ["autopep8", "flake8", "isort", "numpy", "pytest", "scipy (>=1.7.1)"]
< tutorials = ["matplotlib", "pandas", "tabulate"]
<
< [[package]]
16 changes: 11 additions & 5 deletions lyrics_transcriber/transcriber.py
Original file line number Diff line number Diff line change
@@ -76,17 +76,21 @@ def __init__(

match video_resolution:
case "4k":
self.video_resolution_num = ("3840", "2160")
self.video_resolution_num = (3840, 2160)
self.font_size = 250
self.line_height = 250
case "1080p":
self.video_resolution_num = ("1920", "1080")
self.font_size = 140
self.video_resolution_num = (1920, 1080)
self.font_size = 120
self.line_height = 120
case "720p":
self.video_resolution_num = ("1280", "720")
self.video_resolution_num = (1280, 720)
self.font_size = 100
self.line_height = 100
case "360p":
self.video_resolution_num = ("640", "360")
self.video_resolution_num = (640, 360)
self.font_size = 50
self.line_height = 50
case _:
raise ValueError("Invalid video_resolution value. Must be one of: 4k, 1080p, 720p, 360p")

@@ -577,6 +581,8 @@ def create_screens(self):
if screen is None:
self.logger.debug(f"screen is none, creating new LyricsScreen")
screen = subtitles.LyricsScreen()
screen.video_size = self.video_resolution_num
screen.line_height = self.line_height
if line is None:
self.logger.debug(f"line is none, creating new LyricsLine")
line = subtitles.LyricsLine()
71 changes: 14 additions & 57 deletions lyrics_transcriber/utils/subtitles.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
from dataclasses import dataclass, field
from datetime import timedelta
from typing import Dict, List, Optional
from typing import Dict, List, Optional, Tuple
import json
import itertools
from pathlib import Path
@@ -13,14 +13,6 @@
Functions for generating ASS subtitles from lyric data
"""

VIDEO_SIZE = (400, 320)
LINE_HEIGHT = 30


class LyricMarker(IntEnum):
SEGMENT_START = 1
SEGMENT_END = 2


class LyricSegmentIterator:
def __init__(self, lyrics_segments: List[str]):
@@ -143,16 +135,17 @@ def from_dict(cls, data: dict) -> "LyricsLine":
class LyricsScreen:
lines: List[LyricsLine] = field(default_factory=list)
start_ts: Optional[timedelta] = None
video_size: Tuple[int, int] = None
line_height: int = None

@property
def end_ts(self) -> timedelta:
return self.lines[-1].end_ts

def get_line_y(self, line_num: int) -> int:
_, h = VIDEO_SIZE
_, h = self.video_size
line_count = len(self.lines)
line_height = LINE_HEIGHT
return (h / 2) - (line_count * line_height / 2) + (line_num * line_height)
return (h / 2) - (line_count * self.line_height / 2) + (line_num * self.line_height)

def as_ass_events(self, style: ass.ASS.Style) -> List[ass.ASS.Event]:
return [line.as_ass_event(self.start_ts, self.end_ts, style, self.get_line_y(i)) for i, line in enumerate(self.lines)]
@@ -185,46 +178,6 @@ def default(self, o):
return super().default(o)


def create_screens(logger, lyrics_segments, events_tuples):
segments = iter(LyricSegmentIterator(lyrics_segments=lyrics_segments))
events = iter(events_tuples)
screens: List[LyricsScreen] = []
prev_segment: Optional[LyricSegment] = None
line: Optional[LyricsLine] = None
screen: Optional[LyricsScreen] = None

try:
for event in events:
ts = event[0]
marker = event[1]
if marker == LyricMarker.SEGMENT_START:
segment_text: str = next(segments)
segment = LyricSegment(segment_text, ts)
if screen is None:
screen = LyricsScreen()
if line is None:
line = LyricsLine()
line.segments.append(segment)
if segment_text.endswith("\n"):
screen.lines.append(line)
line = None
if segment_text.endswith("\n\n"):
screens.append(screen)
screen = None
prev_segment = segment
elif marker == LyricMarker.SEGMENT_END:
if prev_segment is not None:
prev_segment.end_ts = ts
if line is not None:
screen.lines.append(line) # type: ignore[union-attr]
if screen is not None and len(screen.lines) > 0:
screens.append(screen) # type: ignore[arg-type]
except StopIteration as si:
logger.error(f"Reached end of segments before end of events. Events: {list(events)}, lyrics: {list(segments)}")

return screens


def set_segment_end_times(screens: List[LyricsScreen], song_duration_seconds: int) -> List[LyricsScreen]:
"""
Infer end times of lines for screens where they are not already set.
@@ -254,10 +207,14 @@ def set_screen_start_times(screens: List[LyricsScreen]) -> List[LyricsScreen]:
return screens


def create_styled_subtitles(lyric_screens: List[LyricsScreen], resolution, fontsize) -> ass.ASS:
def create_styled_subtitles(
lyric_screens: List[LyricsScreen],
resolution,
fontsize,
) -> ass.ASS:
a = ass.ASS()
a.set_resolution(resolution)

a.styles_format = [
"Name", # The name of the Style. Case sensitive. Cannot include commas.
"Fontname", # The fontname as used by Windows. Case-sensitive.
@@ -278,9 +235,9 @@ def create_styled_subtitles(lyric_screens: List[LyricsScreen], resolution, fonts
"Outline", # If BorderStyle is 1, then this specifies the width of the outline around the text, in pixels. Values may be 0, 1, 2, 3 or 4.
"Shadow", # If BorderStyle is 1, then this specifies the depth of the drop shadow behind the text, in pixels. Values may be 0, 1, 2, 3 or 4. Drop shadow is always used in addition to an outline - SSA will force an outline of 1 pixel if no outline width is given.
"Alignment", # This sets how text is "justified" within the Left/Right onscreen margins, and also the vertical placing. Values may be 1=Left, 2=Centered, 3=Right. Add 4 to the value for a "Toptitle". Add 8 to the value for a "Midtitle". eg. 5 = left-justified toptitle
"MarginL", #
"MarginR", #
"MarginV", #
"MarginL", # This defines the Left Margin in pixels. It is the distance from the left-hand edge of the screen.The three onscreen margins (MarginL, MarginR, MarginV) define areas in which the subtitle text will be displayed.
"MarginR", # This defines the Right Margin in pixels. It is the distance from the right-hand edge of the screen.
"MarginV", # MarginV. This defines the vertical Left Margin in pixels. For a subtitle, it is the distance from the bottom of the screen. For a toptitle, it is the distance from the top of the screen. For a midtitle, the value is ignored - the text will be vertically centred
"Encoding", #
]

Loading

0 comments on commit 3c21424

Please sign in to comment.