From 3663aff2664626642f6b0091f2bf615726e3a593 Mon Sep 17 00:00:00 2001 From: Andrew Beveridge Date: Tue, 28 Jan 2025 15:38:31 -0500 Subject: [PATCH] FIxed model downloadign --- .../correction/handlers/syllables_match.py | 24 +++++++++++++++++-- pyproject.toml | 2 +- 2 files changed, 23 insertions(+), 3 deletions(-) diff --git a/lyrics_transcriber/correction/handlers/syllables_match.py b/lyrics_transcriber/correction/handlers/syllables_match.py index 04e2ee8..a4c611b 100644 --- a/lyrics_transcriber/correction/handlers/syllables_match.py +++ b/lyrics_transcriber/correction/handlers/syllables_match.py @@ -16,22 +16,42 @@ class SyllablesMatchHandler(GapCorrectionHandler): """Handles gaps where number of syllables in reference text matches number of syllables in transcription.""" def __init__(self): + # Initialize logger first + self.logger = logging.getLogger(__name__) + # Marking SpacySyllables as used to prevent unused import warning _ = SpacySyllables + # Load spacy model with syllables pipeline - self.nlp = spacy.load("en_core_web_sm") + try: + self.nlp = spacy.load("en_core_web_sm") + except OSError: + self.logger.info("Language model 'en_core_web_sm' not found. Attempting to download...") + import subprocess + + try: + subprocess.check_call(["python", "-m", "spacy", "download", "en_core_web_sm"]) + self.nlp = spacy.load("en_core_web_sm") + self.logger.info("Successfully downloaded and loaded en_core_web_sm") + except subprocess.CalledProcessError as e: + raise OSError( + "Language model 'en_core_web_sm' could not be downloaded. " + "Please install it manually with: python -m spacy download en_core_web_sm" + ) from e + # Add syllables component to pipeline if not already present if "syllables" not in self.nlp.pipe_names: self.nlp.add_pipe("syllables", after="tagger") + # Initialize Pyphen for English self.dic = pyphen.Pyphen(lang="en_US") + # Initialize NLTK's CMU dictionary try: self.cmudict = cmudict.dict() except LookupError: nltk.download("cmudict") self.cmudict = cmudict.dict() - self.logger = logging.getLogger(__name__) def _count_syllables_spacy(self, words: List[str]) -> int: """Count syllables using spacy_syllables.""" diff --git a/pyproject.toml b/pyproject.toml index b8da60f..169c687 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,6 +1,6 @@ [tool.poetry] name = "lyrics-transcriber" -version = "0.34.0" +version = "0.34.1" description = "Automatically create synchronised lyrics files in ASS and MidiCo LRC formats with word-level timestamps, using Whisper and lyrics from Genius and Spotify" authors = ["Andrew Beveridge "] license = "MIT"