diff --git a/example-llm-chatcompletion-response.py b/example-llm-chatcompletion-response.py deleted file mode 100644 index cd5a2b8..0000000 --- a/example-llm-chatcompletion-response.py +++ /dev/null @@ -1,210 +0,0 @@ -ChatCompletion( - id='chatcmpl-8LzBzNRHRTo8eKK3OWu1CyGM8R1ag', - choices=[ - Choice(finish_reason='stop', index=0, message=ChatCompletionMessage(content=' - { - "segments": [ - { - "id": 4, - "text": "I'm nobody's fool and yet it's clear to me", - "words": [ - { - "text": "I'm", - "start": 32.58, - "end": 32.7, - "confidence": 0.854 - }, - { - "text": "nobody's", - "start": 32.7, - "end": 33.4, - "confidence": 0.992 - }, - { - "text": "fool", - "start": 33.4, - "end": 33.66, - "confidence": 0.997 - }, - { - "text": "and", - "start": 33.66, - "end": 33.88, - "confidence": 0.445 - }, - { - "text": "yet", - "start": 33.88, - "end": 34.18, - "confidence": 0.952 - }, - { - "text": "it's", - "start": 34.18, - "end": 34.48, - "confidence": 0.956 - }, - { - "text": "clear", - "start": 34.48, - "end": 34.86, - "confidence": 0.9 - }, - { - "text": "to", - "start": 34.86, - "end": 35.16, - "confidence": 0.843 - }, - { - "text": "me", - "start": 35.16, - "end": 35.9, - "confidence": 0.992 - } - ] - }, - { - "id": 5, - "text": "I don't have a strategy", - "words": [ - { - "text": "I", - "start": 36.46, - "end": 36.7, - "confidence": 0.994 - }, - { - "text": "don't", - "start": 36.7, - "end": 37.06, - "confidence": 0.999 - }, - { - "text": "have", - "start": 37.06, - "end": 37.3, - "confidence": 0.999 - }, - { - "text": "a", - "start": 37.3, - "end": 37.48, - "confidence": 0.986 - }, - { - "text": "strategy", - "start": 37.48, - "end": 38.52, - "confidence": 0.999 - } - ] - }, - { - "id": 6, - "text": "It's just like taking candy from a baby", - "words": [ - { - "text": "It's", - "start": 39.3, - "end": 39.58, - "confidence": 0.986 - }, - { - "text": "just", - "start": 39.58, - "end": 39.8, - "confidence": 0.992 - }, - { - "text": "like", - "start": 39.8, - "end": 40.06, - "confidence": 0.999 - }, - { - "text": "taking", - "start": 40.06, - "end": 40.46, - "confidence": 0.986 - }, - { - "text": "candy", - "start": 40.46, - "end": 41.0, - "confidence": 0.997 - }, - { - "text": "from", - "start": 41.0, - "end": 41.38, - "confidence": 0.996 - }, - { - "text": "a", - "start": 41.38, - "end": 41.6, - "confidence": 0.839 - }, - { - "text": "baby", - "start": 41.6, - "end": 42.2, - "confidence": 0.998 - } - ] - }, - { - "id": 7, - "text": "And I think I must be", - "words": [ - { - "text": "And", - "start": 42.86, - "end": 43.18, - "confidence": 0.958 - }, - { - "text": "I", - "start": 43.18, - "end": 43.4, - "confidence": 0.982 - }, - { - "text": "think", - "start": 43.4, - "end": 43.88, - "confidence": 0.998 - }, - { - "text": "I", - "start": 43.88, - "end": 44.2, - "confidence": 0.984 - }, - { - "text": "must", - "start": 44.2, - "end": 44.44, - "confidence": 0.964 - }, - { - "text": "be", - "start": 44.44, - "end": 44.6, - "confidence": 0.993 - } - ] - } - ] - }', - role='assistant', - function_call=None, - tool_calls=None)) - ], - created=1700250803, - model='gpt-4-1106-preview', - object='chat.completion', - system_fingerprint='fp_a24b4d720c', - usage=CompletionUsage(completion_tokens=1210, prompt_tokens=2329, total_tokens=3539) -) diff --git a/lyrics_transcriber/example-llm-response.json b/lyrics_transcriber/example-llm-response.json deleted file mode 100644 index d8bfb29..0000000 --- a/lyrics_transcriber/example-llm-response.json +++ /dev/null @@ -1,104 +0,0 @@ -{ - "CorrectedLyricsRawResponse": { - "segments": [ - { - "id": 0, - "text": "Don't know how to take it, don't know where to go", - "words": [ - {"text": "Don't", "start": 17.46, "end": 18.2, "confidence": 0.278}, - {"text": "know", "start": 18.2, "end": 18.42, "confidence": 0.965}, - {"text": "how", "start": 18.42, "end": 18.66, "confidence": 0.865}, - {"text": "to", "start": 18.66, "end": 18.88, "confidence": 0.994}, - {"text": "take", "start": 18.88, "end": 19.2, "confidence": 0.992}, - {"text": "it,", "start": 19.2, "end": 19.44, "confidence": 0.974}, - {"text": "don't", "start": 19.56, "end": 19.8, "confidence": 0.917}, - {"text": "know", "start": 19.8, "end": 20.02, "confidence": 0.989}, - {"text": "where", "start": 20.02, "end": 20.46, "confidence": 0.963}, - {"text": "to", "start": 20.46, "end": 20.76, "confidence": 0.983}, - {"text": "go", "start": 20.76, "end": 21.3, "confidence": 0.982} - ] - }, - { - "id": 1, - "text": "My resistance running low", - "words": [ - {"text": "My", "start": 22.04, "end": 22.32, "confidence": 0.535}, - {"text": "resistance", "start": 22.32, "end": 22.94, "confidence": 0.936}, - {"text": "running", "start": 22.94, "end": 23.66, "confidence": 0.89}, - {"text": "low", "start": 23.66, "end": 24.36, "confidence": 0.999} - ] - }, - { - "id": 2, - "text": "And every day the hold is getting tighter, and it troubles me so", - "words": [ - {"text": "And", "start": 24.36, "end": 25.14, "confidence": 0.485}, - {"text": "every", "start": 25.14, "end": 25.56, "confidence": 0.568}, - {"text": "day", "start": 25.56, "end": 25.88, "confidence": 0.997}, - {"text": "the", "start": 25.88, "end": 26.1, "confidence": 0.959}, - {"text": "hold", "start": 26.1, "end": 26.48, "confidence": 0.361}, - {"text": "is", "start": 26.48, "end": 26.68, "confidence": 0.947}, - {"text": "getting", "start": 26.68, "end": 27.08, "confidence": 0.996}, - {"text": "tighter,", "start": 27.08, "end": 27.84, "confidence": 0.975}, - {"text": "and", "start": 28.42, "end": 28.8, "confidence": 0.347}, - {"text": "it", "start": 28.8, "end": 28.98, "confidence": 0.821}, - {"text": "troubles", "start": 28.98, "end": 29.72, "confidence": 0.519}, - {"text": "me", "start": 29.72, "end": 30.02, "confidence": 0.987}, - {"text": "so", "start": 30.02, "end": 30.48, "confidence": 0.843} - ] - }, - { - "id": 3, - "text": "You know that I'm nobody's fool", - "words": [ - {"text": "You", "start": 30.56, "end": 30.8, "confidence": 0.676}, - {"text": "know", "start": 30.8, "end": 31.1, "confidence": 0.987}, - {"text": "that", "start": 31.1, "end": 31.46, "confidence": 0.984}, - {"text": "I'm", "start": 31.46, "end": 32.4, "confidence": 0.954}, - {"text": "nobody's", "start": 32.4, "end": 32.58, "confidence": 0.569}, - {"text": "fool", "start": 32.58, "end": 33.66, "confidence": 0.854} - ] - }, - { - "id": 4, - "text": "I'm nobody's fool and yet it's clear to me", - "words": [ - {"text": "I'm", "start": 32.58, "end": 32.7, "confidence": 0.854}, - {"text": "nobody's", "start": 32.7, "end": 33.4, "confidence": 0.992}, - {"text": "fool", "start": 33.4, "end": 33.66, "confidence": 0.997}, - {"text": "and", "start": 33.66, "end": 33.88, "confidence": 0.445}, - {"text": "yet", "start": 33.88, "end": 34.18, "confidence": 0.952}, - {"text": "it's", "start": 34.18, "end": 34.48, "confidence": 0.956}, - {"text": "clear", "start": 34.48, "end": 34.86, "confidence": 0.298}, - {"text": "to", "start": 34.86, "end": 35.16, "confidence": 0.843}, - {"text": "me", "start": 35.16, "end": 35.9, "confidence": 0.992} - ] - }, - { - "id": 5, - "text": "I don't have a strategy", - "words": [ - {"text": "I", "start": 36.46, "end": 36.7, "confidence": 0.994}, - {"text": "don't", "start": 36.7, "end": 37.06, "confidence": 0.999}, - {"text": "have", "start": 37.06, "end": 37.3, "confidence": 0.999}, - {"text": "a", "start": 37.3, "end": 37.48, "confidence": 0.986}, - {"text": "strategy", "start": 37.48, "end": 38.52, "confidence": 0.999} - ] - }, - { - "id": 6, - "text": "It's just like taking candy from a baby", - "words": [ - {"text": "It's", "start": 39.3, "end": 39.58, "confidence": 0.986}, - {"text": "just", "start": 39.58, "end": 39.8, "confidence": 0.992}, - {"text": "like", "start": 39.8, "end": 40.06, "confidence": 0.999}, - {"text": "taking", "start": 40.06, "end": 40.46, "confidence": 0.986}, - {"text": "candy", "start": 40.46, "end": 41.0, "confidence": 0.997}, - {"text": "from", "start": 41.0, "end": 41.38, "confidence": 0.996}, - {"text": "a", "start": 41.38, "end": 41.6, "confidence": 0.839}, - {"text": "baby", "start": 41.6, "end": 42.2, "confidence": 0.998} - ] - } - ] - } -} \ No newline at end of file diff --git a/lyrics_transcriber/llm_correction_instructions_3.txt b/lyrics_transcriber/llm_correction_instructions_3.txt index 1fd5875..098f195 100644 --- a/lyrics_transcriber/llm_correction_instructions_3.txt +++ b/lyrics_transcriber/llm_correction_instructions_3.txt @@ -2,15 +2,15 @@ As a song lyric corrector for a karaoke video studio, your job involves processi You work with two data sets: a reference data set of published lyrics and a machine-transcribed segment of a song. Your primary task is to compare these datasets and correct the transcribed lyrics to match the reference data as closely as possible. -Your response should be formatted in JSON, to be sent to an API endpoint. The JSON output will include: +Your response should be formatted in JSON, to be sent to an API endpoint. The JSON output must include every field below: -id: The identifier of the segment from the first data input. -text: The corrected lyric text for the segment. -words: A list containing each word in the segment, with fields for: - - text: The correct word. - - start: The start timestamp for the word, estimated if necessary. - - end: The end timestamp for the word, estimated if necessary. - - confidence: A score (0 to 1) indicating the confidence in the accuracy of the word. Retain existing confidence values for unchanged words. +- id: The identifier of the segment from the first data input. +- text: The corrected lyric text for the segment. +- words: A list containing each word in the segment, with fields for: + - text: The correct word. + - start: The start timestamp for the word, estimated if necessary. + - end: The end timestamp for the word, estimated if necessary. + - confidence: A score (0 to 1) indicating the confidence in the accuracy of the word. Retain existing confidence values for unchanged words. The reference data is generally accurate but may have imperfections or missing sections. The transcribed data includes timestamps and confidence scores for each word, but the accuracy of the words is only about 70-90%.