Added working test cases in promptfoo config

nomadkaraoke · Nov 22, 2023 · 60d691e · 60d691e
1 parent 3c21424
commit 60d691e
Show file tree

Hide file tree

Showing 4 changed files with 106 additions and 37 deletions.
diff --git a/lyrics_transcriber/llm_prompts/promptfooconfig.yaml b/lyrics_transcriber/llm_prompts/promptfooconfig.yaml
@@ -1,39 +1,61 @@
 # This configuration runs each prompt through a series of example inputs and checks if they meet requirements.
 # Learn more: https://promptfoo.dev/docs/configuration/guide
 
+description: Song lyric corrector for a karaoke video studio, responsible for reading lyrics inputs, correcting them and generating JSON-based responses containing the corrected lyrics according to predefined criteria.
+providers:
+  - id: openai:gpt-3.5-turbo-1106
+    config:
+      temperature: 0
+  # - id: openai:gpt-4-1106-preview
+  #   config:
+  #     temperature: 0
 prompts:
-  - file://llm_prompt_lyrics_correction_*.txt
-providers: [openai:gpt-3.5-turbo-0613, openai:gpt-4-1106-preview]
+  - file://llm_prompt_lyrics_correction_andrew_handwritten_20231118.txt
+
+defaultTest:
+  assert:
+    - type: is-json
+      value:
+        required: [id, text, words]
+        type: object
+        properties:
+          id:
+            type: number
+          text:
+            type: string
+          words:
+            type: array
+            items:
+              type: object
+              properties:
+                text:
+                  type: string
+                start:
+                  type: number
+                end:
+                  type: number
+                confidence:
+                  type: number
+
 tests:
-  - description: First test case - automatic review
+  - description: ABBA - Under Attack (segment 0)
     vars:
-      var1: first variable's value
-      var2: another value
-      var3: some other value
-    # For more information on assertions, see https://promptfoo.dev/docs/configuration/expected-outputs
+      reference_lyrics: file://test_data/ABBA-UnderAttack-Genius.txt
+      previous_two_corrected_lines: 
+      upcoming_two_uncorrected_lines: 
+      segment_input: |
+        {"id": 0, "start": 17.46, "end": 21.3, "confidence": 0.792, "text": " Don't know how to take it, don't know where to go", "words": [{"text": "Don't", "start": 17.46, "end": 18.2, "confidence": 0.278}, {"text": "know", "start": 18.2, "end": 18.42, "confidence": 0.965}, {"text": "how", "start": 18.42, "end": 18.66, "confidence": 0.865}, {"text": "to", "start": 18.66, "end": 18.88, "confidence": 0.994}, {"text": "take", "start": 18.88, "end": 19.2, "confidence": 0.992}, {"text": "it,", "start": 19.2, "end": 19.44, "confidence": 0.974}, {"text": "don't", "start": 19.56, "end": 19.8, "confidence": 0.917}, {"text": "know", "start": 19.8, "end": 20.02, "confidence": 0.989}, {"text": "where", "start": 20.02, "end": 20.46, "confidence": 0.963}, {"text": "to", "start": 20.46, "end": 20.76, "confidence": 0.983}, {"text": "go", "start": 20.76, "end": 21.3, "confidence": 0.982}]}
     assert:
-      - type: equals
-        value: expected LLM output goes here
       - type: contains
-        value: some text
-      - type: javascript
-        value: 1 / (output.length + 1)  # prefer shorter outputs
+        value: "Don't know how to take it, don't know where to go"
 
-  - description: Second test case - manual review
-    # Test cases don't need assertions if you prefer to manually review the output
+  - description: ABBA - Under Attack (segment 1)
     vars:
-      var1: new value
-      var2: another value
-      var3: third value
-
-  - description: Third test case - other types of automatic review
-    vars:
-      var1: yet another value
-      var2: and another
-      var3: dear llm, please output your response in json format
+      reference_lyrics: file://test_data/ABBA-UnderAttack-Genius.txt
+      previous_two_corrected_lines: 
+      upcoming_two_uncorrected_lines: 
+      segment_input: |
+        {"id": 1, "start": 22.04, "end": 27.84, "confidence": 0.763, "text": " My resistance running low And every day the hole is getting tighter", "words": [{"text": "My", "start": 22.04, "end": 22.32, "confidence": 0.535}, {"text": "resistance", "start": 22.32, "end": 22.94, "confidence": 0.936}, {"text": "running", "start": 22.94, "end": 23.66, "confidence": 0.89}, {"text": "low", "start": 23.66, "end": 24.36, "confidence": 0.999}, {"text": "And", "start": 24.36, "end": 25.14, "confidence": 0.485}, {"text": "every", "start": 25.14, "end": 25.56, "confidence": 0.568}, {"text": "day", "start": 25.56, "end": 25.88, "confidence": 0.997}, {"text": "the", "start": 25.88, "end": 26.1, "confidence": 0.959}, {"text": "hole", "start": 26.1, "end": 26.48, "confidence": 0.361}, {"text": "is", "start": 26.48, "end": 26.68, "confidence": 0.947}, {"text": "getting", "start": 26.68, "end": 27.08, "confidence": 0.996}, {"text": "tighter", "start": 27.08, "end": 27.84, "confidence": 0.975}]}
     assert:
-      - type: contains-json
-      - type: similar
-        value: ensures that output is semantically similar to this text
-      - type: model-graded-closedqa
-        value: ensure that output contains a reference to X
+      - type: contains
+        value: "My resistance running low And every day the hold is getting tighter"
diff --git a/lyrics_transcriber/llm_prompts/test_data/ABBA-UnderAttack-Genius.txt b/lyrics_transcriber/llm_prompts/test_data/ABBA-UnderAttack-Genius.txt
@@ -0,0 +1,48 @@
+Don't know how to take it, don't know where to go
+My resistance running low
+And every day the hold is getting tighter and it troubles me so
+(You know that I'm nobody's fool)
+I'm nobody's fool and yet it's clear to me
+I don't have a strategy
+It's just like taking candy from a baby and I think I must be
+
+Under attack, I'm being taken
+About to crack, defences breaking
+Won't somebody please have a heart
+Come and rescue me now 'cause I'm falling apart
+Under attack, I'm taking cover
+He's on my track, my chasing lover
+Thinking nothing can stop him now
+Should I want to, I'm not sure I would know how
+
+This is getting crazy, I should tell him so
+Really let my anger show
+Persuade him that the answer to his questions is a definite no
+(I'm kind of flattered I suppose)
+Guess I'm kind of flattered but I'm scared as well
+Something like a magic spell
+I hardly dare to think of what would happen, where I'd be if I fell
+
+Under attack, I'm being taken
+About to crack, defences breaking
+Won't somebody please have a heart
+Come and rescue me now 'cause I'm falling apart
+Under attack, I'm taking cover
+He's on my track, my chasing lover
+Thinking nothing's gonna stop him now
+Should I want to, I'm not sure I won't know how
+
+Under attack, I'm being taken
+About to crack, defences breaking
+Won't somebody see and save a heart
+Come and rescue me now 'cause I'm falling apart
+Under attack, I'm taking cover
+He's on my track, my chasing lover
+Thinking nothing can stop him now
+Should I want to, I'm not sure I would know how
+
+Under attack, I'm being taken
+About to crack, defences breaking
+Won't somebody please have a heart
+Come and rescue me now 'cause I'm falling apart
+Under attack, I'm taking cover
diff --git a/lyrics_transcriber/transcriber.py b/lyrics_transcriber/transcriber.py
@@ -303,14 +303,13 @@ def write_corrected_lyrics_data_file(self):
                 previous_two_corrected_lines = ""
                 upcoming_two_uncorrected_lines = ""
 
-                if segment["id"] > 2:
-                    for previous_segment in corrected_lyrics_dict["segments"]:
-                        if previous_segment["id"] in (segment["id"] - 2, segment["id"] - 1):
-                            previous_two_corrected_lines += previous_segment["text"].strip() + "\n"
-
-                    for next_segment in self.outputs["transcription_data_dict"]["segments"]:
-                        if next_segment["id"] in (segment["id"] + 1, segment["id"] + 2):
-                            upcoming_two_uncorrected_lines += next_segment["text"].strip() + "\n"
+                for previous_segment in corrected_lyrics_dict["segments"]:
+                    if previous_segment["id"] in (segment["id"] - 2, segment["id"] - 1):
+                        previous_two_corrected_lines += previous_segment["text"].strip() + "\n"
+
+                for next_segment in self.outputs["transcription_data_dict"]["segments"]:
+                    if next_segment["id"] in (segment["id"] + 1, segment["id"] + 2):
+                        upcoming_two_uncorrected_lines += next_segment["text"].strip() + "\n"
 
                 llm_transcript_segment += f"--- Segment {segment['id']} / {total_segments} ---\n"
                 llm_transcript_segment += f"Previous two corrected lines:\n\n{previous_two_corrected_lines}\nUpcoming two uncorrected lines:\n\n{upcoming_two_uncorrected_lines}\nData input:\n\n{segment_input}\n"

diff --git a/pyproject.toml b/pyproject.toml
@@ -1,6 +1,6 @@
 [tool.poetry]
 name = "lyrics-transcriber"
-version = "0.12.8"
+version = "0.12.9"
 description = "Automatically create synchronised lyrics files in ASS and MidiCo LRC formats with word-level timestamps, using Whisper and lyrics from Genius and Spotify"
 authors = ["Andrew Beveridge <[email protected]>"]
 license = "MIT"