Skip to content

Commit

Permalink
Fixed max lyrics lines on screen issue
Browse files Browse the repository at this point in the history
  • Loading branch information
beveradb committed Jan 10, 2025
1 parent 124d137 commit 0cf0688
Show file tree
Hide file tree
Showing 3 changed files with 123 additions and 39 deletions.
118 changes: 101 additions & 17 deletions lyrics_transcriber/output/ass/lyrics_models.py
Original file line number Diff line number Diff line change
Expand Up @@ -86,7 +86,7 @@ def _create_karaoke_text(self, start_ts: timedelta) -> str:
# self.logger.debug(f"Added word: '{word.text}' with duration {duration}")

text_stripped = text.rstrip()
self.logger.debug(f"Created karaoke text for {len(self.segment.words)} words: {text_stripped}")
# self.logger.debug(f"Created karaoke text for {len(self.segment.words)} words: {text_stripped}")

return text_stripped

Expand All @@ -102,8 +102,12 @@ class LyricsScreen:
line_height: int
lines: List[LyricsLine] = field(default_factory=list)
logger: Optional[logging.Logger] = None
PRE_ROLL_TIME = 5.0
MAX_VISIBLE_LINES = 4
SCREEN_GAP_THRESHOLD = 5.0
POST_ROLL_TIME = 2.0
FADE_IN_MS = 300
FADE_OUT_MS = 300
TARGET_PRESHOW_TIME = 5.0

def __post_init__(self):
if self.logger is None:
Expand All @@ -112,38 +116,118 @@ def __post_init__(self):
@property
def start_ts(self) -> timedelta:
"""Get screen start timestamp."""
earliest_ts = min(line.segment.start_time for line in self.lines)
return timedelta(seconds=max(0, earliest_ts - self.PRE_ROLL_TIME))
return timedelta(seconds=min(line.segment.start_time for line in self.lines))

@property
def end_ts(self) -> timedelta:
"""Get screen end timestamp."""
return timedelta(seconds=max(line.segment.end_time for line in self.lines))

def as_ass_events(self, style: Style, next_screen_start: Optional[timedelta] = None, is_unified_screen: bool = False) -> List[Event]:
"""Convert screen to ASS events."""
latest_ts = max(line.segment.end_time for line in self.lines)
return timedelta(seconds=latest_ts + self.POST_ROLL_TIME)

def _visualize_timeline(self, active_lines: List[Tuple[float, int, str]], current_time: float, new_line_end: float, new_line_text: str):
"""Create ASCII visualization of line timing."""
timeline = ["Timeline:"]
timeline.append(f"Current time: {current_time:.2f}s")
timeline.append("Active lines:")
for end_time, y_pos, text in active_lines:
position = y_pos // self.line_height - (self._calculate_first_line_position() // self.line_height)
timeline.append(f" Line {position}: '{text}' ends at {end_time:.2f}s")
timeline.append(f"New line would end at: {new_line_end:.2f}s ('{new_line_text}')")
self.logger.debug("\n".join(timeline))

def as_ass_events(
self,
style: Style,
next_screen_start: Optional[timedelta] = None,
is_unified_screen: bool = False,
previous_active_lines: List[Tuple[float, int, str]] = None,
) -> Tuple[List[Event], List[Tuple[float, int, str]]]:
"""Convert screen to ASS events. Returns (events, active_lines)."""
events = []
y_position = self._calculate_first_line_position()

screen_fade_in_start = self.start_ts if is_unified_screen else None
self.logger.debug(f"Screen fade in start: {screen_fade_in_start} (unified_screen: {is_unified_screen})")
# Initialize active_lines with any still-visible lines from previous screens
active_lines = previous_active_lines.copy() if previous_active_lines else []
self.logger.debug(f"Starting with {len(active_lines)} previous active lines:")
for end, pos, text in active_lines:
self.logger.debug(f" - '{text}' ends at {end:.2f}s")

if is_unified_screen:
screen_fade_in_start = self.start_ts - timedelta(seconds=self.TARGET_PRESHOW_TIME)
self.logger.debug(f"Unified screen fade in at {screen_fade_in_start} (all lines together)")

for line in self.lines:
line_end = line.segment.end_time + self.POST_ROLL_TIME + (self.FADE_OUT_MS / 1000)
self.logger.debug(f"Adding unified line: '{line.segment.text}' ending at {line_end:.2f}s")
active_lines.append((line_end, y_position, line.segment.text))
events.append(
self._create_line_event(line=line, y_position=y_position, style=style, screen_fade_in_start=screen_fade_in_start)
)
y_position += self.line_height

self.logger.debug(f"After unified screen, active lines: {[(end, text) for end, _, text in active_lines]}")
else:
for i, line in enumerate(self.lines):
line_end = line.segment.end_time + self.POST_ROLL_TIME + (self.FADE_OUT_MS / 1000)
target_start = line.segment.start_time - self.TARGET_PRESHOW_TIME

self.logger.debug(f"Processing line {i+1}: '{line.segment.text}'")
self.logger.debug(f"Target start: {target_start:.2f}s, Must show by: {line.segment.start_time:.2f}s")

# Start with target time
fade_in_time = target_start

while True:
# Remove expired lines at this point in time
prev_active_count = len(active_lines)
active_lines = [(end, pos, text) for end, pos, text in active_lines if end > fade_in_time]
if prev_active_count != len(active_lines):
self.logger.debug(f"Removed {prev_active_count - len(active_lines)} expired lines at {fade_in_time:.2f}s")

self._visualize_timeline(active_lines, fade_in_time, line_end, line.segment.text)

if len(active_lines) < self.MAX_VISIBLE_LINES:
self.logger.debug(f"Found space for line at {fade_in_time:.2f}s ({len(active_lines)} active lines)")
break

# No room yet, wait until next line expires
if active_lines:
prev_time = fade_in_time
fade_in_time = active_lines[0][0] # Wait until earliest line expires
self.logger.debug(f"No space available at {prev_time:.2f}s, waiting until {fade_in_time:.2f}s")

# Safety check: ensure we don't delay past when line needs to be shown
if fade_in_time > line.segment.start_time:
self.logger.error(
f"Cannot find space for line before it needs to be shown! Required: {line.segment.start_time:.2f}s"
)
break

# Add this line to active lines
active_lines.append((line_end, y_position, line.segment.text))
active_lines.sort() # Sort by end time

self.logger.debug(f"Final decision: fade in at {fade_in_time:.2f}s with {len(active_lines)} active lines")

for line in self.lines:
events.append(self._create_line_event(line=line, y_position=y_position, style=style, screen_fade_in_start=screen_fade_in_start))
y_position += self.line_height
events.append(
self._create_line_event(
line=line, y_position=y_position, style=style, screen_fade_in_start=timedelta(seconds=fade_in_time)
)
)
y_position += self.line_height

return events
return events, active_lines

def _create_line_event(self, line: LyricsLine, y_position: int, style: Style, screen_fade_in_start: Optional[timedelta]) -> Event:
"""Create ASS event for a single line, handling screen transitions."""
if screen_fade_in_start is not None:
# During screen transitions, all lines appear at once
start_time = screen_fade_in_start
end_time = timedelta(seconds=line.segment.end_time + line.POST_ROLL_TIME)
end_time = timedelta(seconds=line.segment.end_time + self.POST_ROLL_TIME)
else:
# Normal line timing with individual pre-roll
start_time = timedelta(seconds=max(0, line.segment.start_time - line.PRE_ROLL_TIME))
end_time = timedelta(seconds=line.segment.end_time + line.POST_ROLL_TIME)
start_time = timedelta(seconds=max(0, line.segment.start_time - self.PRE_ROLL_TIME))
end_time = timedelta(seconds=line.segment.end_time + self.POST_ROLL_TIME)

e = Event()
e.type = "Dialogue"
Expand Down
4 changes: 2 additions & 2 deletions lyrics_transcriber/output/generator.py
Original file line number Diff line number Diff line change
Expand Up @@ -123,8 +123,8 @@ def generate_outputs(
outputs.lrc = self.lyrics_file.generate_lrc(resized_segments, output_prefix)

# Generate ASS
# outputs.ass = self.subtitle.generate_ass(resized_segments[:8], output_prefix)
outputs.ass = self.subtitle.generate_ass(resized_segments, output_prefix)
outputs.ass = self.subtitle.generate_ass(resized_segments[:16], output_prefix)
# outputs.ass = self.subtitle.generate_ass(resized_segments, output_prefix)

# Generate video if requested
if render_video and outputs.ass:
Expand Down
40 changes: 20 additions & 20 deletions lyrics_transcriber/output/subtitles.py
Original file line number Diff line number Diff line change
Expand Up @@ -158,29 +158,29 @@ def _create_styled_subtitles(
resolution,
fontsize,
) -> ASS:
"""Create styled ASS subtitles."""
a, style = self._create_styled_ass_instance(resolution, fontsize)

# First pass: identify screens that should fade in as a unit
unified_screens = set() # Set of indices for screens that should fade in together

for i, screen in enumerate(screens):
if i == 0:
# First screen always fades in as a unit
unified_screens.add(i)
continue

# Check gap from previous screen
prev_screen = screens[i - 1]
last_line_end = max(line.segment.end_time for line in prev_screen.lines)
effective_screen_end = last_line_end + prev_screen.lines[0].POST_ROLL_TIME + (prev_screen.lines[0].FADE_OUT_MS / 1000)
gap_to_current = screen.start_ts.total_seconds() - effective_screen_end

if gap_to_current >= screen.SCREEN_GAP_THRESHOLD:
unified_screens.add(i)

active_lines = []
for i, screen in enumerate(screens):
next_screen_start = screens[i + 1].start_ts if i < len(screens) - 1 else None
is_unified = i in unified_screens
[a.add(event) for event in screen.as_ass_events(style, next_screen_start, is_unified)]
is_unified = i == 0 or (
next_screen_start is not None and (screen.start_ts - screens[i - 1].end_ts).total_seconds() >= screen.SCREEN_GAP_THRESHOLD
)

self.logger.debug(f"Processing screen {i+1}:")
self.logger.debug(f"Active lines before processing: {[(end, text) for end, _, text in active_lines]}")

# Get events and updated active lines
events, active_lines = screen.as_ass_events(style, next_screen_start, is_unified, active_lines)
for event in events:
a.add(event)

# Update active_lines for next screen
prev_count = len(active_lines)
active_lines = [(end, pos, text) for end, pos, text in active_lines if isinstance(end, float)]
self.logger.debug(
f"Active lines after processing: {[(end, text) for end, _, text in active_lines]} (removed {prev_count - len(active_lines)} lines)"
)

return a

0 comments on commit 0cf0688

Please sign in to comment.