From 4c64e583a6ed5e30dccb22849b839999cd9a3257 Mon Sep 17 00:00:00 2001 From: Andrew Beveridge Date: Thu, 18 Jul 2024 00:18:39 -0400 Subject: [PATCH] Updated triton patch, added retry and backup mechanism for genius lyrics fetch --- .github/removetriton.patch | 22 ++++++------- lyrics_transcriber/transcriber.py | 36 +++++++++++++++------ poetry.lock | 53 +++++++++++++------------------ pyproject.toml | 5 +-- 4 files changed, 62 insertions(+), 54 deletions(-) diff --git a/.github/removetriton.patch b/.github/removetriton.patch index dd16d5d..fd5911a 100644 --- a/.github/removetriton.patch +++ b/.github/removetriton.patch @@ -1,20 +1,20 @@ -1164d1163 +1160d1159 < triton = ">=2.0.0,<3" -2067d2065 -< triton = {version = "2.3.0", markers = "platform_system == \"Linux\" and platform_machine == \"x86_64\" and python_version < \"3.12\""} -2163,2185d2160 +2081d2079 +< triton = {version = "2.3.1", markers = "platform_system == \"Linux\" and platform_machine == \"x86_64\" and python_version < \"3.12\""} +2177,2199d2174 < name = "triton" -< version = "2.3.0" +< version = "2.3.1" < description = "A language and compiler for custom Deep Learning operations" < optional = false < python-versions = "*" < files = [ -< {file = "triton-2.3.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:5ce4b8ff70c48e47274c66f269cce8861cf1dc347ceeb7a67414ca151b1822d8"}, -< {file = "triton-2.3.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:3c3d9607f85103afdb279938fc1dd2a66e4f5999a58eb48a346bd42738f986dd"}, -< {file = "triton-2.3.0-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:218d742e67480d9581bafb73ed598416cc8a56f6316152e5562ee65e33de01c0"}, -< {file = "triton-2.3.0-cp37-cp37m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:381ec6b3dac06922d3e4099cfc943ef032893b25415de295e82b1a82b0359d2c"}, -< {file = "triton-2.3.0-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:038e06a09c06a164fef9c48de3af1e13a63dc1ba3c792871e61a8e79720ea440"}, -< {file = "triton-2.3.0-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:6d8f636e0341ac348899a47a057c3daea99ea7db31528a225a3ba4ded28ccc65"}, +< {file = "triton-2.3.1-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:3c84595cbe5e546b1b290d2a58b1494df5a2ef066dd890655e5b8a8a92205c33"}, +< {file = "triton-2.3.1-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:c9d64ae33bcb3a7a18081e3a746e8cf87ca8623ca13d2c362413ce7a486f893e"}, +< {file = "triton-2.3.1-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:eaf80e8761a9e3498aa92e7bf83a085b31959c61f5e8ac14eedd018df6fccd10"}, +< {file = "triton-2.3.1-cp37-cp37m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:b13bf35a2b659af7159bf78e92798dc62d877aa991de723937329e2d382f1991"}, +< {file = "triton-2.3.1-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:63381e35ded3304704ea867ffde3b7cfc42c16a55b3062d41e017ef510433d66"}, +< {file = "triton-2.3.1-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:1d968264523c7a07911c8fb51b4e0d1b920204dae71491b1fe7b01b62a31e124"}, < ] < < [package.dependencies] diff --git a/lyrics_transcriber/transcriber.py b/lyrics_transcriber/transcriber.py index c0d8a26..23a5568 100644 --- a/lyrics_transcriber/transcriber.py +++ b/lyrics_transcriber/transcriber.py @@ -14,6 +14,8 @@ from .utils import subtitles from typing import List, Optional from openai import OpenAI +from tenacity import retry, stop_after_delay, wait_exponential, retry_if_exception_type +import requests class LyricsTranscriber: @@ -536,6 +538,16 @@ def write_spotify_lyrics_plain_text(self): self.outputs["spotify_lyrics_text"] += line["words"].strip() + "\n" f.write(line["words"].strip() + "\n") + @retry( + stop=stop_after_delay(120), # Stop after 2 minutes + wait=wait_exponential(multiplier=1, min=4, max=60), # Exponential backoff starting at 4 seconds + retry=retry_if_exception_type(requests.exceptions.RequestException), # Retry on request exceptions + reraise=True, # Reraise the last exception if all retries fail + ) + def fetch_genius_lyrics(self, genius, title, artist): + self.logger.debug(f"fetch_genius_lyrics attempting to fetch lyrics from Genius for {title} by {artist}") + return genius.search_song(title, artist) + def write_genius_lyrics_file(self): if self.genius_api_token and self.song_known: self.logger.debug(f"attempting genius fetch as genius_api_token and song name was set") @@ -556,18 +568,22 @@ def write_genius_lyrics_file(self): self.logger.debug(f"no cached lyrics found at genius_lyrics_cache_filepath: {genius_lyrics_cache_filepath}, fetching from Genius") genius = lyricsgenius.Genius(self.genius_api_token, verbose=(self.log_level == logging.DEBUG)) - song = genius.search_song(self.title, self.artist) - if song is None: - self.logger.warning(f'Could not find lyrics on Genius for "{self.title}" by {self.artist}') - return - lyrics = self.clean_genius_lyrics(song.lyrics) + try: + song = self.fetch_genius_lyrics(genius, self.title, self.artist) + if song is None: + self.logger.warning(f'Could not find lyrics on Genius for "{self.title}" by {self.artist}') + return + lyrics = self.clean_genius_lyrics(song.lyrics) - self.logger.debug(f"writing clean lyrics to genius_lyrics_cache_filepath: {genius_lyrics_cache_filepath}") - with open(genius_lyrics_cache_filepath, "w", encoding="utf-8") as f: - f.write(lyrics) + self.logger.debug(f"writing clean lyrics to genius_lyrics_cache_filepath: {genius_lyrics_cache_filepath}") + with open(genius_lyrics_cache_filepath, "w", encoding="utf-8") as f: + f.write(lyrics) - self.outputs["genius_lyrics_filepath"] = genius_lyrics_cache_filepath - self.outputs["genius_lyrics_text"] = lyrics + self.outputs["genius_lyrics_filepath"] = genius_lyrics_cache_filepath + self.outputs["genius_lyrics_text"] = lyrics + except requests.exceptions.RequestException as e: + self.logger.error(f"Failed to fetch lyrics from Genius after multiple retries: {e}") + raise def clean_genius_lyrics(self, lyrics): lyrics = lyrics.replace("\\n", "\n") diff --git a/poetry.lock b/poetry.lock index 5105924..08bed07 100644 --- a/poetry.lock +++ b/poetry.lock @@ -530,13 +530,13 @@ socks = ["socksio (==1.*)"] [[package]] name = "huggingface-hub" -version = "0.23.4" +version = "0.23.5" description = "Client library to download and publish models, datasets and other repos on the huggingface.co hub" optional = false python-versions = ">=3.8.0" files = [ - {file = "huggingface_hub-0.23.4-py3-none-any.whl", hash = "sha256:3a0b957aa87150addf0cc7bd71b4d954b78e749850e1e7fb29ebbd2db64ca037"}, - {file = "huggingface_hub-0.23.4.tar.gz", hash = "sha256:35d99016433900e44ae7efe1c209164a5a81dbbcd53a52f99c281dcd7ce22431"}, + {file = "huggingface_hub-0.23.5-py3-none-any.whl", hash = "sha256:d7a7d337615e11a45cc14a0ce5a605db6b038dc24af42866f731684825226e90"}, + {file = "huggingface_hub-0.23.5.tar.gz", hash = "sha256:67a9caba79b71235be3752852ca27da86bd54311d2424ca8afdb8dda056edf98"}, ] [package.dependencies] @@ -1119,13 +1119,13 @@ sympy = "*" [[package]] name = "openai" -version = "1.35.13" +version = "1.35.14" description = "The official Python library for the openai API" optional = false python-versions = ">=3.7.1" files = [ - {file = "openai-1.35.13-py3-none-any.whl", hash = "sha256:36ec3e93e0d1f243f69be85c89b9221a471c3e450dfd9df16c9829e3cdf63e60"}, - {file = "openai-1.35.13.tar.gz", hash = "sha256:c684f3945608baf7d2dcc0ef3ee6f3e27e4c66f21076df0b47be45d57e6ae6e4"}, + {file = "openai-1.35.14-py3-none-any.whl", hash = "sha256:adadf8c176e0b8c47ad782ed45dc20ef46438ee1f02c7103c4155cff79c8f68b"}, + {file = "openai-1.35.14.tar.gz", hash = "sha256:394ba1dfd12ecec1d634c50e512d24ff1858bbc2674ffcce309b822785a058de"}, ] [package.dependencies] @@ -1810,6 +1810,21 @@ files = [ {file = "tbb-2021.13.0-py3-none-win_amd64.whl", hash = "sha256:3528a53e4bbe64b07a6112b4c5a00ff3c61924ee46c9c68e004a1ac7ad1f09c3"}, ] +[[package]] +name = "tenacity" +version = "8.5.0" +description = "Retry code until it succeeds" +optional = false +python-versions = ">=3.8" +files = [ + {file = "tenacity-8.5.0-py3-none-any.whl", hash = "sha256:b594c2a5945830c267ce6b79a166228323ed52718f30302c1359836112346687"}, + {file = "tenacity-8.5.0.tar.gz", hash = "sha256:8bc6c0c8a09b31e6cad13c47afbed1a567518250a9a171418582ed8d9c20ca78"}, +] + +[package.extras] +doc = ["reno", "sphinx"] +test = ["pytest", "tornado (>=4.5)", "typeguard"] + [[package]] name = "text-unidecode" version = "1.3" @@ -2062,7 +2077,6 @@ nvidia-cusparse-cu12 = {version = "12.1.0.106", markers = "platform_system == \" nvidia-nccl-cu12 = {version = "2.20.5", markers = "platform_system == \"Linux\" and platform_machine == \"x86_64\""} nvidia-nvtx-cu12 = {version = "12.1.105", markers = "platform_system == \"Linux\" and platform_machine == \"x86_64\""} sympy = "*" -triton = {version = "2.3.1", markers = "platform_system == \"Linux\" and platform_machine == \"x86_64\" and python_version < \"3.12\""} typing-extensions = ">=4.8.0" [package.extras] @@ -2157,29 +2171,6 @@ torchhub = ["filelock", "huggingface-hub (>=0.23.2,<1.0)", "importlib-metadata", video = ["av (==9.2.0)", "decord (==0.6.0)"] vision = ["Pillow (>=10.0.1,<=15.0)"] -[[package]] -name = "triton" -version = "2.3.1" -description = "A language and compiler for custom Deep Learning operations" -optional = false -python-versions = "*" -files = [ - {file = "triton-2.3.1-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:3c84595cbe5e546b1b290d2a58b1494df5a2ef066dd890655e5b8a8a92205c33"}, - {file = "triton-2.3.1-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:c9d64ae33bcb3a7a18081e3a746e8cf87ca8623ca13d2c362413ce7a486f893e"}, - {file = "triton-2.3.1-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:eaf80e8761a9e3498aa92e7bf83a085b31959c61f5e8ac14eedd018df6fccd10"}, - {file = "triton-2.3.1-cp37-cp37m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:b13bf35a2b659af7159bf78e92798dc62d877aa991de723937329e2d382f1991"}, - {file = "triton-2.3.1-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:63381e35ded3304704ea867ffde3b7cfc42c16a55b3062d41e017ef510433d66"}, - {file = "triton-2.3.1-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:1d968264523c7a07911c8fb51b4e0d1b920204dae71491b1fe7b01b62a31e124"}, -] - -[package.dependencies] -filelock = "*" - -[package.extras] -build = ["cmake (>=3.20)", "lit"] -tests = ["autopep8", "flake8", "isort", "numpy", "pytest", "scipy (>=1.7.1)", "torch"] -tutorials = ["matplotlib", "pandas", "tabulate", "torch"] - [[package]] name = "typing-extensions" version = "4.12.2" @@ -2233,4 +2224,4 @@ vad-silero = ["onnxruntime", "torchaudio"] [metadata] lock-version = "2.0" python-versions = ">=3.9" -content-hash = "2c47cb7475f6e6b7389ced28fd6559647e68f6b6a14ad94ce58b7b9975ed36ca" +content-hash = "d1dec31363a552640b4c4978caecec15f2589236025f472e9bb7416afc335320" diff --git a/pyproject.toml b/pyproject.toml index 443e7cb..4949b82 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,6 +1,6 @@ [tool.poetry] name = "lyrics-transcriber" -version = "0.16.3" +version = "0.16.4" description = "Automatically create synchronised lyrics files in ASS and MidiCo LRC formats with word-level timestamps, using Whisper and lyrics from Genius and Spotify" authors = ["Andrew Beveridge "] license = "MIT" @@ -30,6 +30,7 @@ openai-whisper = ">=20231117" transformers = ">=4" auditok = ">=0.2" whisper-timestamped = ">=1" +tenacity = ">=8" # Note: after adding openai-whisper and whisper-timestamped with poetry lock, I then removed all traces of triton # from poetry.lock before running poetry install, as triton doesn't support macOS but isn't actually needed for whisper. # This was the only way I was able to get a working cross-platform build published to PyPI. @@ -47,4 +48,4 @@ lyrics-transcriber = 'lyrics_transcriber.utils.cli:main' [build-system] requires = ["poetry-core"] -build-backend = "poetry.core.masonry.api" +build-backend = "poetry.core.masonry.api" \ No newline at end of file