Skip to content

Commit

Permalink
FIxed model downloadign
Browse files Browse the repository at this point in the history
  • Loading branch information
beveradb committed Jan 28, 2025
1 parent b63a62c commit 3663aff
Show file tree
Hide file tree
Showing 2 changed files with 23 additions and 3 deletions.
24 changes: 22 additions & 2 deletions lyrics_transcriber/correction/handlers/syllables_match.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,22 +16,42 @@ class SyllablesMatchHandler(GapCorrectionHandler):
"""Handles gaps where number of syllables in reference text matches number of syllables in transcription."""

def __init__(self):
# Initialize logger first
self.logger = logging.getLogger(__name__)

# Marking SpacySyllables as used to prevent unused import warning
_ = SpacySyllables

# Load spacy model with syllables pipeline
self.nlp = spacy.load("en_core_web_sm")
try:
self.nlp = spacy.load("en_core_web_sm")
except OSError:
self.logger.info("Language model 'en_core_web_sm' not found. Attempting to download...")
import subprocess

try:
subprocess.check_call(["python", "-m", "spacy", "download", "en_core_web_sm"])
self.nlp = spacy.load("en_core_web_sm")
self.logger.info("Successfully downloaded and loaded en_core_web_sm")
except subprocess.CalledProcessError as e:
raise OSError(
"Language model 'en_core_web_sm' could not be downloaded. "
"Please install it manually with: python -m spacy download en_core_web_sm"
) from e

# Add syllables component to pipeline if not already present
if "syllables" not in self.nlp.pipe_names:
self.nlp.add_pipe("syllables", after="tagger")

# Initialize Pyphen for English
self.dic = pyphen.Pyphen(lang="en_US")

# Initialize NLTK's CMU dictionary
try:
self.cmudict = cmudict.dict()
except LookupError:
nltk.download("cmudict")
self.cmudict = cmudict.dict()
self.logger = logging.getLogger(__name__)

def _count_syllables_spacy(self, words: List[str]) -> int:
"""Count syllables using spacy_syllables."""
Expand Down
2 changes: 1 addition & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
[tool.poetry]
name = "lyrics-transcriber"
version = "0.34.0"
version = "0.34.1"
description = "Automatically create synchronised lyrics files in ASS and MidiCo LRC formats with word-level timestamps, using Whisper and lyrics from Genius and Spotify"
authors = ["Andrew Beveridge <[email protected]>"]
license = "MIT"
Expand Down

0 comments on commit 3663aff

Please sign in to comment.