From 0dc22fd01395d52dc5acdef60b8ddeea10bbe03e Mon Sep 17 00:00:00 2001 From: Andrew Beveridge Date: Fri, 17 Nov 2023 16:24:20 -0600 Subject: [PATCH] Deleted old example files, tweaked prompt 3 --- example-llm-chatcompletion-response.py | 210 ------------------ lyrics_transcriber/example-llm-response.json | 104 --------- .../llm_correction_instructions_3.txt | 16 +- 3 files changed, 8 insertions(+), 322 deletions(-) delete mode 100644 example-llm-chatcompletion-response.py delete mode 100644 lyrics_transcriber/example-llm-response.json diff --git a/example-llm-chatcompletion-response.py b/example-llm-chatcompletion-response.py deleted file mode 100644 index cd5a2b8..0000000 --- a/example-llm-chatcompletion-response.py +++ /dev/null @@ -1,210 +0,0 @@ -ChatCompletion( - id='chatcmpl-8LzBzNRHRTo8eKK3OWu1CyGM8R1ag', - choices=[ - Choice(finish_reason='stop', index=0, message=ChatCompletionMessage(content=' - { - "segments": [ - { - "id": 4, - "text": "I'm nobody's fool and yet it's clear to me", - "words": [ - { - "text": "I'm", - "start": 32.58, - "end": 32.7, - "confidence": 0.854 - }, - { - "text": "nobody's", - "start": 32.7, - "end": 33.4, - "confidence": 0.992 - }, - { - "text": "fool", - "start": 33.4, - "end": 33.66, - "confidence": 0.997 - }, - { - "text": "and", - "start": 33.66, - "end": 33.88, - "confidence": 0.445 - }, - { - "text": "yet", - "start": 33.88, - "end": 34.18, - "confidence": 0.952 - }, - { - "text": "it's", - "start": 34.18, - "end": 34.48, - "confidence": 0.956 - }, - { - "text": "clear", - "start": 34.48, - "end": 34.86, - "confidence": 0.9 - }, - { - "text": "to", - "start": 34.86, - "end": 35.16, - "confidence": 0.843 - }, - { - "text": "me", - "start": 35.16, - "end": 35.9, - "confidence": 0.992 - } - ] - }, - { - "id": 5, - "text": "I don't have a strategy", - "words": [ - { - "text": "I", - "start": 36.46, - "end": 36.7, - "confidence": 0.994 - }, - { - "text": "don't", - "start": 36.7, - "end": 37.06, - "confidence": 0.999 - }, - { - "text": "have", - "start": 37.06, - "end": 37.3, - "confidence": 0.999 - }, - { - "text": "a", - "start": 37.3, - "end": 37.48, - "confidence": 0.986 - }, - { - "text": "strategy", - "start": 37.48, - "end": 38.52, - "confidence": 0.999 - } - ] - }, - { - "id": 6, - "text": "It's just like taking candy from a baby", - "words": [ - { - "text": "It's", - "start": 39.3, - "end": 39.58, - "confidence": 0.986 - }, - { - "text": "just", - "start": 39.58, - "end": 39.8, - "confidence": 0.992 - }, - { - "text": "like", - "start": 39.8, - "end": 40.06, - "confidence": 0.999 - }, - { - "text": "taking", - "start": 40.06, - "end": 40.46, - "confidence": 0.986 - }, - { - "text": "candy", - "start": 40.46, - "end": 41.0, - "confidence": 0.997 - }, - { - "text": "from", - "start": 41.0, - "end": 41.38, - "confidence": 0.996 - }, - { - "text": "a", - "start": 41.38, - "end": 41.6, - "confidence": 0.839 - }, - { - "text": "baby", - "start": 41.6, - "end": 42.2, - "confidence": 0.998 - } - ] - }, - { - "id": 7, - "text": "And I think I must be", - "words": [ - { - "text": "And", - "start": 42.86, - "end": 43.18, - "confidence": 0.958 - }, - { - "text": "I", - "start": 43.18, - "end": 43.4, - "confidence": 0.982 - }, - { - "text": "think", - "start": 43.4, - "end": 43.88, - "confidence": 0.998 - }, - { - "text": "I", - "start": 43.88, - "end": 44.2, - "confidence": 0.984 - }, - { - "text": "must", - "start": 44.2, - "end": 44.44, - "confidence": 0.964 - }, - { - "text": "be", - "start": 44.44, - "end": 44.6, - "confidence": 0.993 - } - ] - } - ] - }', - role='assistant', - function_call=None, - tool_calls=None)) - ], - created=1700250803, - model='gpt-4-1106-preview', - object='chat.completion', - system_fingerprint='fp_a24b4d720c', - usage=CompletionUsage(completion_tokens=1210, prompt_tokens=2329, total_tokens=3539) -) diff --git a/lyrics_transcriber/example-llm-response.json b/lyrics_transcriber/example-llm-response.json deleted file mode 100644 index d8bfb29..0000000 --- a/lyrics_transcriber/example-llm-response.json +++ /dev/null @@ -1,104 +0,0 @@ -{ - "CorrectedLyricsRawResponse": { - "segments": [ - { - "id": 0, - "text": "Don't know how to take it, don't know where to go", - "words": [ - {"text": "Don't", "start": 17.46, "end": 18.2, "confidence": 0.278}, - {"text": "know", "start": 18.2, "end": 18.42, "confidence": 0.965}, - {"text": "how", "start": 18.42, "end": 18.66, "confidence": 0.865}, - {"text": "to", "start": 18.66, "end": 18.88, "confidence": 0.994}, - {"text": "take", "start": 18.88, "end": 19.2, "confidence": 0.992}, - {"text": "it,", "start": 19.2, "end": 19.44, "confidence": 0.974}, - {"text": "don't", "start": 19.56, "end": 19.8, "confidence": 0.917}, - {"text": "know", "start": 19.8, "end": 20.02, "confidence": 0.989}, - {"text": "where", "start": 20.02, "end": 20.46, "confidence": 0.963}, - {"text": "to", "start": 20.46, "end": 20.76, "confidence": 0.983}, - {"text": "go", "start": 20.76, "end": 21.3, "confidence": 0.982} - ] - }, - { - "id": 1, - "text": "My resistance running low", - "words": [ - {"text": "My", "start": 22.04, "end": 22.32, "confidence": 0.535}, - {"text": "resistance", "start": 22.32, "end": 22.94, "confidence": 0.936}, - {"text": "running", "start": 22.94, "end": 23.66, "confidence": 0.89}, - {"text": "low", "start": 23.66, "end": 24.36, "confidence": 0.999} - ] - }, - { - "id": 2, - "text": "And every day the hold is getting tighter, and it troubles me so", - "words": [ - {"text": "And", "start": 24.36, "end": 25.14, "confidence": 0.485}, - {"text": "every", "start": 25.14, "end": 25.56, "confidence": 0.568}, - {"text": "day", "start": 25.56, "end": 25.88, "confidence": 0.997}, - {"text": "the", "start": 25.88, "end": 26.1, "confidence": 0.959}, - {"text": "hold", "start": 26.1, "end": 26.48, "confidence": 0.361}, - {"text": "is", "start": 26.48, "end": 26.68, "confidence": 0.947}, - {"text": "getting", "start": 26.68, "end": 27.08, "confidence": 0.996}, - {"text": "tighter,", "start": 27.08, "end": 27.84, "confidence": 0.975}, - {"text": "and", "start": 28.42, "end": 28.8, "confidence": 0.347}, - {"text": "it", "start": 28.8, "end": 28.98, "confidence": 0.821}, - {"text": "troubles", "start": 28.98, "end": 29.72, "confidence": 0.519}, - {"text": "me", "start": 29.72, "end": 30.02, "confidence": 0.987}, - {"text": "so", "start": 30.02, "end": 30.48, "confidence": 0.843} - ] - }, - { - "id": 3, - "text": "You know that I'm nobody's fool", - "words": [ - {"text": "You", "start": 30.56, "end": 30.8, "confidence": 0.676}, - {"text": "know", "start": 30.8, "end": 31.1, "confidence": 0.987}, - {"text": "that", "start": 31.1, "end": 31.46, "confidence": 0.984}, - {"text": "I'm", "start": 31.46, "end": 32.4, "confidence": 0.954}, - {"text": "nobody's", "start": 32.4, "end": 32.58, "confidence": 0.569}, - {"text": "fool", "start": 32.58, "end": 33.66, "confidence": 0.854} - ] - }, - { - "id": 4, - "text": "I'm nobody's fool and yet it's clear to me", - "words": [ - {"text": "I'm", "start": 32.58, "end": 32.7, "confidence": 0.854}, - {"text": "nobody's", "start": 32.7, "end": 33.4, "confidence": 0.992}, - {"text": "fool", "start": 33.4, "end": 33.66, "confidence": 0.997}, - {"text": "and", "start": 33.66, "end": 33.88, "confidence": 0.445}, - {"text": "yet", "start": 33.88, "end": 34.18, "confidence": 0.952}, - {"text": "it's", "start": 34.18, "end": 34.48, "confidence": 0.956}, - {"text": "clear", "start": 34.48, "end": 34.86, "confidence": 0.298}, - {"text": "to", "start": 34.86, "end": 35.16, "confidence": 0.843}, - {"text": "me", "start": 35.16, "end": 35.9, "confidence": 0.992} - ] - }, - { - "id": 5, - "text": "I don't have a strategy", - "words": [ - {"text": "I", "start": 36.46, "end": 36.7, "confidence": 0.994}, - {"text": "don't", "start": 36.7, "end": 37.06, "confidence": 0.999}, - {"text": "have", "start": 37.06, "end": 37.3, "confidence": 0.999}, - {"text": "a", "start": 37.3, "end": 37.48, "confidence": 0.986}, - {"text": "strategy", "start": 37.48, "end": 38.52, "confidence": 0.999} - ] - }, - { - "id": 6, - "text": "It's just like taking candy from a baby", - "words": [ - {"text": "It's", "start": 39.3, "end": 39.58, "confidence": 0.986}, - {"text": "just", "start": 39.58, "end": 39.8, "confidence": 0.992}, - {"text": "like", "start": 39.8, "end": 40.06, "confidence": 0.999}, - {"text": "taking", "start": 40.06, "end": 40.46, "confidence": 0.986}, - {"text": "candy", "start": 40.46, "end": 41.0, "confidence": 0.997}, - {"text": "from", "start": 41.0, "end": 41.38, "confidence": 0.996}, - {"text": "a", "start": 41.38, "end": 41.6, "confidence": 0.839}, - {"text": "baby", "start": 41.6, "end": 42.2, "confidence": 0.998} - ] - } - ] - } -} \ No newline at end of file diff --git a/lyrics_transcriber/llm_correction_instructions_3.txt b/lyrics_transcriber/llm_correction_instructions_3.txt index 1fd5875..098f195 100644 --- a/lyrics_transcriber/llm_correction_instructions_3.txt +++ b/lyrics_transcriber/llm_correction_instructions_3.txt @@ -2,15 +2,15 @@ As a song lyric corrector for a karaoke video studio, your job involves processi You work with two data sets: a reference data set of published lyrics and a machine-transcribed segment of a song. Your primary task is to compare these datasets and correct the transcribed lyrics to match the reference data as closely as possible. -Your response should be formatted in JSON, to be sent to an API endpoint. The JSON output will include: +Your response should be formatted in JSON, to be sent to an API endpoint. The JSON output must include every field below: -id: The identifier of the segment from the first data input. -text: The corrected lyric text for the segment. -words: A list containing each word in the segment, with fields for: - - text: The correct word. - - start: The start timestamp for the word, estimated if necessary. - - end: The end timestamp for the word, estimated if necessary. - - confidence: A score (0 to 1) indicating the confidence in the accuracy of the word. Retain existing confidence values for unchanged words. +- id: The identifier of the segment from the first data input. +- text: The corrected lyric text for the segment. +- words: A list containing each word in the segment, with fields for: + - text: The correct word. + - start: The start timestamp for the word, estimated if necessary. + - end: The end timestamp for the word, estimated if necessary. + - confidence: A score (0 to 1) indicating the confidence in the accuracy of the word. Retain existing confidence values for unchanged words. The reference data is generally accurate but may have imperfections or missing sections. The transcribed data includes timestamps and confidence scores for each word, but the accuracy of the words is only about 70-90%.