From f20b712d8baec587e9a38b8ba7d5c536e2ba1690 Mon Sep 17 00:00:00 2001 From: Andrew Beveridge Date: Sun, 26 Jan 2025 15:08:42 -0500 Subject: [PATCH] Auto-download spacy model --- .../correction/phrase_analyzer.py | 17 +++++++++++++---- pyproject.toml | 2 +- 2 files changed, 14 insertions(+), 5 deletions(-) diff --git a/lyrics_transcriber/correction/phrase_analyzer.py b/lyrics_transcriber/correction/phrase_analyzer.py index 0741422..b3e1eaa 100644 --- a/lyrics_transcriber/correction/phrase_analyzer.py +++ b/lyrics_transcriber/correction/phrase_analyzer.py @@ -21,10 +21,19 @@ def __init__(self, logger: logging.Logger, language_code: str = "en_core_web_sm" try: self.nlp = spacy.load(language_code) except OSError: - self.logger.error(f"Failed to load language model: {language_code}") - raise OSError( - f"Language model '{language_code}' not found. " f"Please install it with: python -m spacy download {language_code}" - ) + self.logger.info(f"Language model {language_code} not found. Attempting to download...") + import subprocess + + try: + subprocess.check_call(["python", "-m", "spacy", "download", language_code]) + self.nlp = spacy.load(language_code) + self.logger.info(f"Successfully downloaded and loaded {language_code}") + except subprocess.CalledProcessError as e: + self.logger.error(f"Failed to download language model: {language_code}") + raise OSError( + f"Language model '{language_code}' could not be downloaded. " + f"Please install it manually with: python -m spacy download {language_code}" + ) from e def score_phrase(self, words: List[str], context: str) -> PhraseScore: """Score a phrase based on grammatical completeness and natural breaks. diff --git a/pyproject.toml b/pyproject.toml index d0df6c5..bbd8293 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,6 +1,6 @@ [tool.poetry] name = "lyrics-transcriber" -version = "0.32.2" +version = "0.32.3" description = "Automatically create synchronised lyrics files in ASS and MidiCo LRC formats with word-level timestamps, using Whisper and lyrics from Genius and Spotify" authors = ["Andrew Beveridge "] license = "MIT"