diff --git a/lyrics_transcriber/correction/corrector.py b/lyrics_transcriber/correction/corrector.py
index 38cbb2d..9df196b 100644
--- a/lyrics_transcriber/correction/corrector.py
+++ b/lyrics_transcriber/correction/corrector.py
@@ -6,6 +6,9 @@
 from lyrics_transcriber.correction.handlers.base import GapCorrectionHandler
 from lyrics_transcriber.correction.handlers.word_count_match import WordCountMatchHandler
 from lyrics_transcriber.correction.handlers.extra_words import ExtraWordsHandler
+from lyrics_transcriber.correction.handlers.sound_alike import SoundAlikeHandler
+from lyrics_transcriber.correction.handlers.levenshtein import LevenshteinHandler
+from lyrics_transcriber.correction.handlers.repeat import RepeatCorrectionHandler
 
 
 class LyricsCorrector:
@@ -26,12 +29,9 @@ def __init__(
         self.handlers = handlers or [
             WordCountMatchHandler(),
             ExtraWordsHandler(),
-            # AnchorWordsInGapHandler(), # "Correct" words which are in the gap but are identical in the reference
-            # CombinedHandler(),  # Try combined matching first
-            # MetaphoneHandler(),  # Fall back to individual matchers
-            # SemanticHandler(),
-            # MultiWordLevenshteinHandler(),
-            # LevenshteinSimilarityHandler(),  # Last resort
+            RepeatCorrectionHandler(),
+            SoundAlikeHandler(),
+            LevenshteinHandler(),  # Last resort
             # HumanHandler(),  # Open web UI for human to review and correct
         ]
 
@@ -143,6 +143,11 @@ def _process_gaps(self, gap_sequences: List[GapSequence]) -> List[WordCorrection
                     break
 
                 self.logger.debug(f"Trying handler {handler.__class__.__name__}")
+
+                # Pass previous corrections to RepeatCorrectionHandler
+                if isinstance(handler, RepeatCorrectionHandler):
+                    handler.set_previous_corrections(all_corrections)
+
                 if handler.can_handle(gap):
                     self.logger.debug(f"{handler.__class__.__name__} can handle gap")
                     corrections = handler.handle(gap)
diff --git a/lyrics_transcriber/correction/handlers/__init__.py b/lyrics_transcriber/correction/handlers/__init__.py
index 6dba361..13a20aa 100644
--- a/lyrics_transcriber/correction/handlers/__init__.py
+++ b/lyrics_transcriber/correction/handlers/__init__.py
@@ -1,17 +1,15 @@
 from lyrics_transcriber.correction.handlers.base import GapCorrectionHandler
 from lyrics_transcriber.correction.handlers.word_count_match import WordCountMatchHandler
-from lyrics_transcriber.correction.handlers.levenshtein import LevenshteinSimilarityHandler
-from lyrics_transcriber.correction.handlers.multi_levenshtein import MultiWordLevenshteinHandler
-from lyrics_transcriber.correction.handlers.metaphone import MetaphoneHandler
-from lyrics_transcriber.correction.handlers.semantic import SemanticHandler
-from lyrics_transcriber.correction.handlers.combined import CombinedHandler
+from lyrics_transcriber.correction.handlers.levenshtein import LevenshteinHandler
+from lyrics_transcriber.correction.handlers.sound_alike import SoundAlikeHandler
+from lyrics_transcriber.correction.handlers.extra_words import ExtraWordsHandler
+from lyrics_transcriber.correction.handlers.human import HumanHandler
 
 __all__ = [
     "GapCorrectionHandler",
     "WordCountMatchHandler",
-    "LevenshteinSimilarityHandler",
-    "MultiWordLevenshteinHandler",
-    "MetaphoneHandler",
-    "SemanticHandler",
-    "CombinedHandler",
+    "LevenshteinHandler",
+    "SoundAlikeHandler",
+    "ExtraWordsHandler",
+    "HumanHandler",
 ]
diff --git a/lyrics_transcriber/correction/handlers/combined.py b/lyrics_transcriber/correction/handlers/combined.py
deleted file mode 100644
index 9c357a6..0000000
--- a/lyrics_transcriber/correction/handlers/combined.py
+++ /dev/null
@@ -1,86 +0,0 @@
-from typing import Dict, List, Optional, Set, Tuple
-
-from lyrics_transcriber.types import GapSequence, Word, WordCorrection
-from lyrics_transcriber.correction.handlers.base import GapCorrectionHandler
-from lyrics_transcriber.correction.handlers.metaphone import MetaphoneHandler
-from lyrics_transcriber.correction.handlers.semantic import SemanticHandler
-
-
-class CombinedHandler(GapCorrectionHandler):
-    """Combines phonetic and semantic matching with weighted scoring."""
-
-    def __init__(
-        self,
-        phonetic_weight: float = 0.6,
-        semantic_weight: float = 0.4,
-        combined_threshold: float = 0.5,
-        phonetic_threshold: float = 0.4,
-        semantic_threshold: float = 0.3,
-    ):
-        self.phonetic_matcher = MetaphoneHandler()
-        self.semantic_matcher = SemanticHandler()
-        self.phonetic_weight = phonetic_weight
-        self.semantic_weight = semantic_weight
-        self.combined_threshold = combined_threshold
-        self.phonetic_threshold = phonetic_threshold
-        self.semantic_threshold = semantic_threshold
-
-    def _find_best_match(self, word: str, reference_words: Dict[str, List[str]]) -> Tuple[Optional[str], float, float, float, Set[str]]:
-        """Find the best matching reference word using combined scoring."""
-        best_match = None
-        best_combined_score = 0.0
-        best_phonetic_score = 0.0
-        best_semantic_score = 0.0
-        matching_sources = set()
-
-        # Get unique reference words
-        all_ref_words = {w for words in reference_words.values() for w in words}
-
-        for ref_word in all_ref_words:
-            # Get phonetic similarity
-            phonetic_score = self.phonetic_matcher._get_phonetic_similarity(word, ref_word)
-
-            # Get semantic similarity
-            semantic_score = self.semantic_matcher._get_semantic_similarity(word, ref_word)
-
-            # Calculate combined score
-            combined_score = (phonetic_score * self.phonetic_weight) + (semantic_score * self.semantic_weight)
-
-            # Check if this is a better match
-            if (
-                combined_score > best_combined_score
-                and phonetic_score >= self.phonetic_threshold
-                and semantic_score >= self.semantic_threshold
-            ):
-                best_combined_score = combined_score
-                best_phonetic_score = phonetic_score
-                best_semantic_score = semantic_score
-                best_match = ref_word
-                matching_sources = {source for source, words in reference_words.items() if ref_word in words}
-
-        return best_match, best_phonetic_score, best_semantic_score, best_combined_score, matching_sources
-
-    def can_handle(self, gap: GapSequence, current_word_idx: int) -> bool:
-        """Check if we can handle this gap."""
-        return bool(gap.reference_words)
-
-    def handle(self, gap: GapSequence, word: Word, current_word_idx: int, segment_idx: int) -> Optional[WordCorrection]:
-        """Try to correct word using combined matching."""
-        if not word.text.strip():
-            return None
-
-        best_match, phonetic_score, semantic_score, combined_score, matching_sources = self._find_best_match(word.text, gap.reference_words)
-
-        if best_match and combined_score >= self.combined_threshold and best_match.lower() != word.text.lower():
-            return WordCorrection(
-                original_word=word.text,
-                corrected_word=best_match,
-                segment_index=segment_idx,
-                word_index=current_word_idx,
-                confidence=combined_score,
-                source=", ".join(matching_sources),
-                reason=f"Combined matching (phonetic: {phonetic_score:.2f}, semantic: {semantic_score:.2f})",
-                alternatives={},
-            )
-
-        return None
diff --git a/lyrics_transcriber/correction/handlers/human.py b/lyrics_transcriber/correction/handlers/human.py
index ca654dd..11d929b 100644
--- a/lyrics_transcriber/correction/handlers/human.py
+++ b/lyrics_transcriber/correction/handlers/human.py
@@ -7,9 +7,9 @@
 class HumanHandler(GapCorrectionHandler):
     """Handles gaps by opening a web UI for human to review the corrections made and manually fix any last gaps."""
 
-    def can_handle(self, gap: GapSequence, current_word_idx: int) -> bool:
+    def can_handle(self, gap: GapSequence) -> bool:
         return True
 
-    def handle(self, gap: GapSequence, word: Word, current_word_idx: int, segment_idx: int) -> Optional[WordCorrection]:
+    def handle(self, gap: GapSequence) -> Optional[WordCorrection]:
         # TODO: Open web UI for human to review the corrections made and manually fix any last gaps
         return None
diff --git a/lyrics_transcriber/correction/handlers/levenshtein.py b/lyrics_transcriber/correction/handlers/levenshtein.py
index 925e460..1209ab4 100644
--- a/lyrics_transcriber/correction/handlers/levenshtein.py
+++ b/lyrics_transcriber/correction/handlers/levenshtein.py
@@ -1,23 +1,126 @@
+from typing import List, Optional
 import string
-from typing import Dict, List, Optional, Set, Tuple
 import Levenshtein
+import logging
 
-from lyrics_transcriber.types import GapSequence, Word, WordCorrection
+from lyrics_transcriber.types import GapSequence, WordCorrection
 from lyrics_transcriber.correction.handlers.base import GapCorrectionHandler
 
 
-class LevenshteinSimilarityHandler(GapCorrectionHandler):
-    """Handles corrections based on Levenshtein (edit distance) similarity between words."""
-
-    def __init__(self, similarity_threshold: float = 0.65):
+class LevenshteinHandler(GapCorrectionHandler):
+    """Handles corrections based on Levenshtein (edit distance) similarity between words.
+
+    This handler looks for words that are similar in spelling to reference words in the same position.
+    The similarity calculation includes:
+    1. Basic Levenshtein ratio
+    2. Bonus for words starting with the same letter
+    3. Penalty for words starting with different letters
+    4. Bonus for similar length words
+
+    Examples:
+        Gap: "wold" (misspelling)
+        References:
+            genius: ["world"]
+            spotify: ["world"]
+        Result:
+            - Correct "wold" to "world" (high confidence due to small edit distance)
+
+        Gap: "worde" (misspelling)
+        References:
+            genius: ["world"]
+            spotify: ["words"]
+        Result:
+            - Correct "worde" to "world" (lower confidence due to disagreeing sources)
+    """
+
+    def __init__(self, similarity_threshold: float = 0.65, logger: Optional[logging.Logger] = None):
         self.similarity_threshold = similarity_threshold
+        self.logger = logger or logging.getLogger(__name__)
+
+    def can_handle(self, gap: GapSequence) -> bool:
+        """Check if we can handle this gap - we'll try if there are reference words."""
+        if not gap.reference_words:
+            self.logger.debug("No reference words available")
+            return False
+
+        if not gap.words:
+            self.logger.debug("No gap words available")
+            return False
+
+        # Check if any word has sufficient similarity to reference
+        for i, word in enumerate(gap.words):
+            for ref_words in gap.reference_words.values():
+                if i < len(ref_words):
+                    similarity = self._get_string_similarity(word, ref_words[i])
+                    if similarity >= self.similarity_threshold:
+                        self.logger.debug(f"Found similar word: '{word}' -> '{ref_words[i]}' ({similarity:.2f})")
+                        return True
+
+        self.logger.debug("No words meet similarity threshold")
+        return False
+
+    def handle(self, gap: GapSequence) -> List[WordCorrection]:
+        """Try to correct words based on string similarity."""
+        corrections = []
+
+        # Process each word in the gap
+        for i, word in enumerate(gap.words):
+            # Skip if word is empty or just punctuation
+            if not word.strip():
+                continue
+
+            # Skip exact matches
+            if any(i < len(ref_words) and word.lower() == ref_words[i].lower() for ref_words in gap.reference_words.values()):
+                self.logger.debug(f"Skipping exact match: '{word}'")
+                continue
+
+            # Find matching reference words at this position
+            matches = {}  # word -> (sources, similarity)
+            for source, ref_words in gap.reference_words.items():
+                if i >= len(ref_words):
+                    continue
+
+                ref_word = ref_words[i]
+                similarity = self._get_string_similarity(word, ref_word)
+
+                if similarity >= self.similarity_threshold:
+                    self.logger.debug(f"Found match: '{word}' -> '{ref_word}' ({similarity:.2f})")
+                    if ref_word not in matches:
+                        matches[ref_word] = ([], similarity)
+                    matches[ref_word][0].append(source)
+
+            # Create correction for best match if any found
+            if matches:
+                best_match, (sources, similarity) = max(
+                    matches.items(), key=lambda x: (len(x[1][0]), x[1][1])  # Sort by number of sources, then similarity
+                )
+
+                source_confidence = len(sources) / len(gap.reference_words)
+                final_confidence = similarity * source_confidence
+
+                self.logger.debug(f"Creating correction: {word} -> {best_match} (confidence: {final_confidence})")
+                corrections.append(
+                    WordCorrection(
+                        original_word=word,
+                        corrected_word=best_match,
+                        segment_index=0,
+                        word_index=gap.transcription_position + i,
+                        confidence=final_confidence,
+                        source=", ".join(sources),
+                        reason=f"LevenshteinHandler: String similarity ({final_confidence:.2f})",
+                        alternatives={k: len(v[0]) for k, v in matches.items()},
+                        is_deletion=False,
+                    )
+                )
+
+        return corrections
 
     def _clean_word(self, word: str) -> str:
         """Remove punctuation and standardize for comparison."""
         return word.strip().lower().strip(string.punctuation)
 
     def _get_string_similarity(self, word1: str, word2: str) -> float:
-        """Calculate string similarity using Levenshtein ratio."""
+        """Calculate string similarity using Levenshtein ratio with adjustments."""
         # Clean words
         w1, w2 = self._clean_word(word1), self._clean_word(word2)
         if not w1 or not w2:
@@ -38,50 +141,3 @@ def _get_string_similarity(self, word1: str, word2: str) -> float:
         similarity = (similarity + length_ratio) / 2
 
         return similarity
-
-    def _find_best_match(self, word: str, reference_words: Dict[str, List[str]]) -> Tuple[Optional[str], float, Set[str]]:
-        """Find the best matching reference word across all sources."""
-        best_match = None
-        best_similarity = 0.0
-        matching_sources = set()
-
-        # Get unique reference words
-        all_ref_words = {w for words in reference_words.values() for w in words}
-
-        for ref_word in all_ref_words:
-            similarity = self._get_string_similarity(word, ref_word)
-
-            if similarity > best_similarity:
-                best_similarity = similarity
-                best_match = ref_word
-                matching_sources = {source for source, words in reference_words.items() if ref_word in words}
-
-        return best_match, best_similarity, matching_sources
-
-    def can_handle(self, gap: GapSequence, current_word_idx: int) -> bool:
-        """Check if we can handle this gap - we'll try if there are reference words."""
-        return bool(gap.reference_words)
-
-    def handle(self, gap: GapSequence, word: Word, current_word_idx: int, segment_idx: int) -> Optional[WordCorrection]:
-        """Try to correct word based on string similarity."""
-        # Skip if word is empty or just punctuation
-        if not word.text.strip():
-            return None
-
-        # Find best matching reference word
-        best_match, similarity, matching_sources = self._find_best_match(word.text, gap.reference_words)
-
-        # Return correction if we found a good match
-        if best_match and similarity >= self.similarity_threshold and best_match.lower() != word.text.lower():
-            return WordCorrection(
-                original_word=word.text,
-                corrected_word=best_match,
-                segment_index=segment_idx,
-                word_index=current_word_idx,
-                confidence=similarity,
-                source=", ".join(matching_sources),
-                reason=f"String similarity ({similarity:.2f})",
-                alternatives={},
-            )
-
-        return None
diff --git a/lyrics_transcriber/correction/handlers/metaphone.py b/lyrics_transcriber/correction/handlers/metaphone.py
deleted file mode 100644
index adfe4a2..0000000
--- a/lyrics_transcriber/correction/handlers/metaphone.py
+++ /dev/null
@@ -1,77 +0,0 @@
-from typing import Dict, List, Optional, Set, Tuple
-from metaphone import doublemetaphone
-from nltk.metrics import edit_distance
-
-from lyrics_transcriber.types import GapSequence, Word, WordCorrection
-from lyrics_transcriber.correction.handlers.base import GapCorrectionHandler
-
-
-class MetaphoneHandler(GapCorrectionHandler):
-    """Handles corrections using Double Metaphone phonetic algorithm."""
-
-    def __init__(self, similarity_threshold: float = 0.7):
-        self.similarity_threshold = similarity_threshold
-
-    def _get_phonetic_similarity(self, word1: str, word2: str) -> float:
-        """Calculate phonetic similarity between two words using Double Metaphone."""
-        # Get phonetic codes
-        code1_primary, code1_secondary = doublemetaphone(word1)
-        code2_primary, code2_secondary = doublemetaphone(word2)
-
-        # Handle empty codes
-        if not code1_primary or not code2_primary:
-            return 0.0
-
-        # Compare primary codes
-        primary_similarity = 1 - (edit_distance(code1_primary, code2_primary) / max(len(code1_primary), len(code2_primary)))
-
-        # Compare secondary codes if available
-        if code1_secondary and code2_secondary:
-            secondary_similarity = 1 - (edit_distance(code1_secondary, code2_secondary) / max(len(code1_secondary), len(code2_secondary)))
-            return max(primary_similarity, secondary_similarity)
-
-        return primary_similarity
-
-    def _find_best_match(self, word: str, reference_words: Dict[str, List[str]]) -> Tuple[Optional[str], float, Set[str]]:
-        """Find the best matching reference word across all sources."""
-        best_match = None
-        best_similarity = 0.0
-        matching_sources = set()
-
-        # Get unique reference words
-        all_ref_words = {w for words in reference_words.values() for w in words}
-
-        for ref_word in all_ref_words:
-            similarity = self._get_phonetic_similarity(word, ref_word)
-
-            if similarity > best_similarity:
-                best_similarity = similarity
-                best_match = ref_word
-                matching_sources = {source for source, words in reference_words.items() if ref_word in words}
-
-        return best_match, best_similarity, matching_sources
-
-    def can_handle(self, gap: GapSequence, current_word_idx: int) -> bool:
-        """Check if we can handle this gap."""
-        return bool(gap.reference_words)
-
-    def handle(self, gap: GapSequence, word: Word, current_word_idx: int, segment_idx: int) -> Optional[WordCorrection]:
-        """Try to correct word based on phonetic similarity."""
-        if not word.text.strip():
-            return None
-
-        best_match, similarity, matching_sources = self._find_best_match(word.text, gap.reference_words)
-
-        if best_match and similarity >= self.similarity_threshold and best_match.lower() != word.text.lower():
-            return WordCorrection(
-                original_word=word.text,
-                corrected_word=best_match,
-                segment_index=segment_idx,
-                word_index=current_word_idx,
-                confidence=similarity,
-                source=", ".join(matching_sources),
-                reason=f"Metaphone phonetic similarity ({similarity:.2f})",
-                alternatives={},
-            )
-
-        return None
diff --git a/lyrics_transcriber/correction/handlers/multi_levenshtein.py b/lyrics_transcriber/correction/handlers/multi_levenshtein.py
deleted file mode 100644
index 7f2d20d..0000000
--- a/lyrics_transcriber/correction/handlers/multi_levenshtein.py
+++ /dev/null
@@ -1,97 +0,0 @@
-from typing import List, Optional, Tuple
-
-from lyrics_transcriber.types import GapSequence, Word, WordCorrection
-from lyrics_transcriber.correction.handlers.base import GapCorrectionHandler
-from lyrics_transcriber.correction.handlers.levenshtein import LevenshteinSimilarityHandler
-
-
-class MultiWordLevenshteinHandler(GapCorrectionHandler):
-    """Handles corrections by matching sequences of words."""
-
-    def __init__(self, similarity_threshold: float = 0.65):
-        self.similarity_threshold = similarity_threshold
-        self.levenshtein_matcher = LevenshteinSimilarityHandler(similarity_threshold)
-
-    def can_handle(self, gap: GapSequence, current_word_idx: int) -> bool:
-        """Check if we can handle this gap."""
-        if not gap.reference_words:
-            return False
-
-        # Don't handle cases where sources disagree
-        ref_words_lists = list(gap.reference_words.values())
-        if not all(words == ref_words_lists[0] for words in ref_words_lists[1:]):
-            return False
-
-        # Don't handle cases where reference has different length than gap
-        if any(len(words) != len(gap.words) for words in gap.reference_words.values()):
-            return False
-
-        return True
-
-    def _align_sequences(self, gap_words: List[str], ref_words: List[str]) -> List[Tuple[Optional[str], Optional[str], float]]:
-        """Align two sequences of words and return matches with confidence scores."""
-        alignments = []
-
-        # For each gap word, try to find the best match in the reference words
-        for i, gap_word in enumerate(gap_words):
-            best_match = None
-            best_score = 0.0
-
-            # First, try exact position match if available
-            if i < len(ref_words):
-                ref_word = ref_words[i]
-                # Use a base position confidence even if words aren't similar
-                position_score = 0.7  # Base confidence for position match
-
-                # If words are similar, boost the confidence
-                similarity = self.levenshtein_matcher._get_string_similarity(gap_word, ref_word)
-                score = max(position_score, similarity)
-
-                if score >= self.similarity_threshold:
-                    best_match = ref_word
-                    best_score = score
-
-            alignments.append((gap_word, best_match, best_score))
-
-        return alignments
-
-    def handle(self, gap: GapSequence, word: Word, current_word_idx: int, segment_idx: int) -> Optional[WordCorrection]:
-        """Try to correct word based on sequence alignment."""
-        if not word.text.strip():
-            return None
-
-        gap_pos = current_word_idx - gap.transcription_position
-
-        best_alignment = None
-        best_confidence = 0.0
-        best_sources = set()
-
-        for source, ref_words in gap.reference_words.items():
-            alignments = self._align_sequences(gap.words, ref_words)
-
-            if gap_pos < len(alignments):
-                gap_word, correction, confidence = alignments[gap_pos]
-
-                if correction and correction.lower() == word.text.lower():
-                    return None
-
-                if correction and confidence > best_confidence:
-                    best_alignment = correction
-                    best_confidence = confidence
-                    best_sources = {source}
-                elif correction and confidence == best_confidence:
-                    best_sources.add(source)
-
-        if best_alignment and best_confidence >= self.similarity_threshold:
-            return WordCorrection(
-                original_word=word.text,
-                corrected_word=best_alignment,
-                segment_index=segment_idx,
-                word_index=current_word_idx,
-                confidence=best_confidence,
-                source=", ".join(best_sources),
-                reason=f"Sequence alignment ({best_confidence:.2f})",
-                alternatives={},
-            )
-
-        return None
diff --git a/lyrics_transcriber/correction/handlers/repeat.py b/lyrics_transcriber/correction/handlers/repeat.py
new file mode 100644
index 0000000..a8c5de3
--- /dev/null
+++ b/lyrics_transcriber/correction/handlers/repeat.py
@@ -0,0 +1,62 @@
+from typing import List, Dict, Optional
+from lyrics_transcriber.types import GapSequence, WordCorrection
+import logging
+
+
+class RepeatCorrectionHandler:
+    """Handler that applies corrections that were previously made by other handlers."""
+
+    def __init__(self, logger: Optional[logging.Logger] = None, confidence_threshold: float = 0.7):
+        self.logger = logger or logging.getLogger(__name__)
+        self.confidence_threshold = confidence_threshold
+        self.previous_corrections: List[WordCorrection] = []
+
+    def can_handle(self, gap: GapSequence) -> bool:
+        """Check if any words in the gap match previous corrections."""
+        return bool(self.previous_corrections)
+
+    def set_previous_corrections(self, corrections: List[WordCorrection]) -> None:
+        """Store corrections from previous handlers to use as reference."""
+        self.previous_corrections = corrections
+
+    def handle(self, gap: GapSequence) -> List[WordCorrection]:
+        """Apply previous corrections to matching words in the current gap."""
+        corrections = []
+
+        # Build a map of original words to their corrections
+        correction_map: Dict[str, List[WordCorrection]] = {}
+        for corr in self.previous_corrections:
+            if corr.confidence >= self.confidence_threshold:
+                correction_map.setdefault(corr.original_word.lower(), []).append(corr)
+
+        # Check each word in the gap
+        for i, word in enumerate(gap.words):
+            word_lower = word.lower()
+            if word_lower in correction_map:
+                # Get the most common correction for this word
+                prev_corrections = correction_map[word_lower]
+                best_correction = max(
+                    prev_corrections,
+                    key=lambda c: (sum(1 for pc in prev_corrections if pc.corrected_word == c.corrected_word), c.confidence),
+                )
+
+                self.logger.debug(
+                    f"Applying previous correction: {word} -> {best_correction.corrected_word} "
+                    f"(confidence: {best_correction.confidence:.2f})"
+                )
+
+                corrections.append(
+                    WordCorrection(
+                        original_word=word,
+                        corrected_word=best_correction.corrected_word,
+                        segment_index=0,
+                        word_index=gap.transcription_position + i,
+                        confidence=best_correction.confidence * 0.9,  # Slightly lower confidence for repeats
+                        source=best_correction.source,
+                        reason=f"RepeatCorrectionHandler: Matches previous correction",
+                        alternatives={best_correction.corrected_word: 1},
+                        is_deletion=False,
+                    )
+                )
+
+        return corrections
diff --git a/lyrics_transcriber/correction/handlers/semantic.py b/lyrics_transcriber/correction/handlers/semantic.py
deleted file mode 100644
index def463d..0000000
--- a/lyrics_transcriber/correction/handlers/semantic.py
+++ /dev/null
@@ -1,80 +0,0 @@
-import torch
-from transformers import AutoTokenizer, AutoModel
-from typing import Dict, List, Optional, Set, Tuple
-
-from lyrics_transcriber.types import GapSequence, Word, WordCorrection
-from lyrics_transcriber.correction.handlers.base import GapCorrectionHandler
-
-
-class SemanticHandler(GapCorrectionHandler):
-    """Handles corrections using transformer-based semantic similarity."""
-
-    def __init__(self, model_name: str = "sentence-transformers/all-MiniLM-L6-v2", similarity_threshold: float = 0.3):
-        self.similarity_threshold = similarity_threshold
-        self.tokenizer = AutoTokenizer.from_pretrained(model_name)
-        self.model = AutoModel.from_pretrained(model_name)
-        self.device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
-        self.model.to(self.device)
-
-    def _get_embedding(self, text: str) -> torch.Tensor:
-        """Get embedding for a piece of text."""
-        inputs = self.tokenizer(text, return_tensors="pt", padding=True, truncation=True)
-        inputs = {k: v.to(self.device) for k, v in inputs.items()}
-
-        with torch.no_grad():
-            outputs = self.model(**inputs)
-            embedding = outputs.last_hidden_state.mean(dim=1)
-
-        return embedding
-
-    def _get_semantic_similarity(self, text1: str, text2: str) -> float:
-        """Calculate semantic similarity between two pieces of text."""
-        emb1 = self._get_embedding(text1)
-        emb2 = self._get_embedding(text2)
-
-        similarity = torch.nn.functional.cosine_similarity(emb1, emb2, dim=1)
-        return similarity.item()
-
-    def _find_best_match(self, word: str, reference_words: Dict[str, List[str]]) -> Tuple[Optional[str], float, Set[str]]:
-        """Find the best matching reference word across all sources."""
-        best_match = None
-        best_similarity = 0.0
-        matching_sources = set()
-
-        # Get unique reference words
-        all_ref_words = {w for words in reference_words.values() for w in words}
-
-        for ref_word in all_ref_words:
-            similarity = self._get_semantic_similarity(word, ref_word)
-
-            if similarity > best_similarity:
-                best_similarity = similarity
-                best_match = ref_word
-                matching_sources = {source for source, words in reference_words.items() if ref_word in words}
-
-        return best_match, best_similarity, matching_sources
-
-    def can_handle(self, gap: GapSequence, current_word_idx: int) -> bool:
-        """Check if we can handle this gap."""
-        return bool(gap.reference_words)
-
-    def handle(self, gap: GapSequence, word: Word, current_word_idx: int, segment_idx: int) -> Optional[WordCorrection]:
-        """Try to correct word based on semantic similarity."""
-        if not word.text.strip():
-            return None
-
-        best_match, similarity, matching_sources = self._find_best_match(word.text, gap.reference_words)
-
-        if best_match and similarity >= self.similarity_threshold and best_match.lower() != word.text.lower():
-            return WordCorrection(
-                original_word=word.text,
-                corrected_word=best_match,
-                segment_index=segment_idx,
-                word_index=current_word_idx,
-                confidence=similarity,
-                source=", ".join(matching_sources),
-                reason=f"Semantic similarity ({similarity:.2f})",
-                alternatives={},
-            )
-
-        return None
diff --git a/lyrics_transcriber/correction/handlers/sound_alike.py b/lyrics_transcriber/correction/handlers/sound_alike.py
new file mode 100644
index 0000000..c16725f
--- /dev/null
+++ b/lyrics_transcriber/correction/handlers/sound_alike.py
@@ -0,0 +1,216 @@
+from typing import List, Dict, Tuple, Optional
+import logging
+from metaphone import doublemetaphone
+from lyrics_transcriber.types import GapSequence, WordCorrection
+from lyrics_transcriber.correction.handlers.base import GapCorrectionHandler
+
+
+class SoundAlikeHandler(GapCorrectionHandler):
+    """Handles gaps where words sound similar to reference words but are spelled differently.
+
+    Uses Double Metaphone algorithm to detect sound-alike words. For each word in the gap,
+    it checks if its phonetic encoding matches any reference word's encoding.
+
+    The confidence of corrections is based on:
+    1. The ratio of reference sources agreeing on the correction
+    2. Whether the match was on primary (1.0) or secondary (0.8) metaphone code
+
+    Examples:
+        Gap: "shush look deep"
+        References:
+            genius: ["search", "look", "deep"]
+            spotify: ["search", "look", "deep"]
+        Result:
+            - Correct "shush" to "search" (confidence based on metaphone match type)
+            - Validate "look" and "deep" (exact matches)
+    """
+
+    def __init__(self, logger: Optional[logging.Logger] = None, similarity_threshold: float = 0.6):
+        """Initialize the handler.
+
+        Args:
+            logger: Optional logger instance
+            similarity_threshold: Minimum confidence threshold for matches (default: 0.6)
+        """
+        self.logger = logger or logging.getLogger(__name__)
+        self.similarity_threshold = similarity_threshold
+
+    def can_handle(self, gap: GapSequence) -> bool:
+        # Must have reference words
+        if not gap.reference_words:
+            self.logger.debug("No reference words available")
+            return False
+
+        # Gap must have words
+        if not gap.words:
+            self.logger.debug("No gap words available")
+            return False
+
+        # Check if any gap word has a metaphone match with any reference word
+        for word in gap.words:
+            word_codes = doublemetaphone(word)
+            self.logger.debug(f"Gap word '{word}' has metaphone codes: {word_codes}")
+            for ref_words in gap.reference_words.values():
+                for ref_word in ref_words:
+                    ref_codes = doublemetaphone(ref_word)
+                    self.logger.debug(f"Reference word '{ref_word}' has metaphone codes: {ref_codes}")
+                    if self._codes_match(word_codes, ref_codes):
+                        self.logger.debug(f"Found metaphone match between '{word}' and '{ref_word}'")
+                        return True
+        self.logger.debug("No metaphone matches found")
+        return False
+
+    def handle(self, gap: GapSequence) -> List[WordCorrection]:
+        corrections = []
+
+        # For each word in the gap
+        for i, word in enumerate(gap.words):
+            word_codes = doublemetaphone(word)
+            self.logger.debug(f"Processing '{word}' (codes: {word_codes})")
+
+            # Skip if word exactly matches any reference
+            exact_match = any(i < len(ref_words) and word.lower() == ref_words[i].lower() for ref_words in gap.reference_words.values())
+            if exact_match:
+                continue
+
+            # Find sound-alike matches in references
+            matches: Dict[str, Tuple[List[str], float]] = {}
+
+            for source, ref_words in gap.reference_words.items():
+                for j, ref_word in enumerate(ref_words):
+                    ref_codes = doublemetaphone(ref_word)
+
+                    match_confidence = self._get_match_confidence(word_codes, ref_codes)
+                    if match_confidence >= self.similarity_threshold:
+                        # Special handling for short codes - don't apply position penalty
+                        is_short_code = any(len(c) <= 2 for c in word_codes if c) or any(len(c) <= 2 for c in ref_codes if c)
+                        position_multiplier = 1.0 if is_short_code or i == j else 0.8
+
+                        adjusted_confidence = match_confidence * position_multiplier
+
+                        if adjusted_confidence >= self.similarity_threshold:
+                            if ref_word not in matches:
+                                matches[ref_word] = ([], adjusted_confidence)
+                            matches[ref_word][0].append(source)
+
+            # Create correction for best match if any found
+            if matches:
+                best_match, (sources, base_confidence) = max(matches.items(), key=lambda x: (len(x[1][0]), x[1][1]))
+
+                source_confidence = len(sources) / len(gap.reference_words)
+                final_confidence = base_confidence * source_confidence
+
+                self.logger.debug(f"Found match: {word} -> {best_match} (confidence: {final_confidence:.2f}, sources: {sources})")
+                corrections.append(
+                    WordCorrection(
+                        original_word=word,
+                        corrected_word=best_match,
+                        segment_index=0,
+                        word_index=gap.transcription_position + i,
+                        confidence=final_confidence,
+                        source=", ".join(sources),
+                        reason=f"SoundAlikeHandler: Phonetic match ({final_confidence:.2f} confidence)",
+                        alternatives={k: len(v[0]) for k, v in matches.items()},
+                        is_deletion=False,
+                    )
+                )
+
+        return corrections
+
+    def _codes_match(self, codes1: Tuple[str, str], codes2: Tuple[str, str]) -> float:
+        """Check if two sets of metaphone codes match and return match quality."""
+        # Get all non-empty codes
+        codes1_set = {c for c in codes1 if c}
+        codes2_set = {c for c in codes2 if c}
+
+        if not codes1_set or not codes2_set:
+            return 0.0
+
+        best_match = 0.0
+        for code1 in codes1_set:
+            for code2 in codes2_set:
+                # Special case for very short codes (like 'A' for 'you')
+                if len(code1) <= 2 or len(code2) <= 2:
+                    if code1 == code2:
+                        best_match = max(best_match, 1.0)
+                    elif code1 in code2 or code2 in code1:
+                        best_match = max(best_match, 0.8)
+                    elif code1[0] == code2[0]:  # Match first character
+                        best_match = max(best_match, 0.7)
+                    continue
+
+                # Skip if codes are too different in length
+                length_diff = abs(len(code1) - len(code2))
+                if length_diff > 3:
+                    continue
+
+                # Exact match
+                if code1 == code2:
+                    best_match = max(best_match, 1.0)
+                    continue
+
+                # Similar codes (allow 1-2 character differences)
+                if len(code1) >= 2 and len(code2) >= 2:
+                    # Compare first N characters where N is min length
+                    min_len = min(len(code1), len(code2))
+
+                    # Check for shared characters in any position
+                    shared_chars = sum(1 for c in code1 if c in code2)
+                    if shared_chars >= min(2, min_len):  # More lenient shared character requirement
+                        match_quality = 0.7 + (0.1 * shared_chars / max(len(code1), len(code2)))
+                        best_match = max(best_match, match_quality)
+                        continue
+
+                    # Compare aligned characters
+                    differences = sum(1 for a, b in zip(code1[:min_len], code2[:min_len]) if a != b)
+                    if differences <= 2:
+                        match_quality = 0.85 - (differences * 0.1)
+                        best_match = max(best_match, match_quality)
+                        continue
+
+                # Common prefix/suffix match with more lenient threshold
+                common_prefix_len = 0
+                for a, b in zip(code1, code2):
+                    if a != b:
+                        break
+                    common_prefix_len += 1
+
+                common_suffix_len = 0
+                for a, b in zip(code1[::-1], code2[::-1]):
+                    if a != b:
+                        break
+                    common_suffix_len += 1
+
+                if common_prefix_len >= 1 or common_suffix_len >= 1:  # Even more lenient prefix/suffix requirement
+                    match_quality = 0.7 + (0.1 * max(common_prefix_len, common_suffix_len))
+                    best_match = max(best_match, match_quality)
+                    continue
+
+                # Substring match
+                if len(code1) >= 2 and len(code2) >= 2:  # More lenient length requirement
+                    # Look for shared substrings of length 2 or more
+                    for length in range(min(len(code1), len(code2)), 1, -1):
+                        for i in range(len(code1) - length + 1):
+                            substring = code1[i : i + length]
+                            if substring in code2:
+                                match_quality = 0.7 + (0.1 * length / max(len(code1), len(code2)))
+                                best_match = max(best_match, match_quality)
+                                break
+
+        return best_match
+
+    def _get_match_confidence(self, codes1: Tuple[str, str], codes2: Tuple[str, str]) -> float:
+        """Calculate confidence score for a metaphone code match."""
+        match_quality = self._codes_match(codes1, codes2)
+        if match_quality == 0:
+            return 0.0
+
+        # Get primary codes (first code of each tuple)
+        code1, code2 = codes1[0], codes2[0]
+
+        # Boost confidence for codes that share prefixes
+        if code1 and code2 and len(code1) >= 2 and len(code2) >= 2:
+            if code1[:2] == code2[:2]:
+                match_quality = min(1.0, match_quality + 0.1)
+
+        return match_quality
diff --git a/tests/correction/handlers/test_levenshtein.py b/tests/correction/handlers/test_levenshtein.py
new file mode 100644
index 0000000..2f900da
--- /dev/null
+++ b/tests/correction/handlers/test_levenshtein.py
@@ -0,0 +1,119 @@
+import pytest
+import logging
+from lyrics_transcriber.correction.handlers.levenshtein import LevenshteinHandler
+from lyrics_transcriber.types import GapSequence
+
+
+@pytest.fixture
+def logger():
+    logger = logging.getLogger("test_levenshtein")
+    logger.setLevel(logging.DEBUG)
+    return logger
+
+
+def test_handle_basic_example(logger):
+    handler = LevenshteinHandler(logger=logger)
+    gap = GapSequence(
+        words=("wold", "worde"),
+        transcription_position=0,
+        preceding_anchor=None,
+        following_anchor=None,
+        reference_words={"genius": ["world", "words"], "spotify": ["world", "words"]},
+    )
+
+    corrections = handler.handle(gap)
+
+    assert len(corrections) == 2
+
+    assert corrections[0].original_word == "wold"
+    assert corrections[0].corrected_word == "world"
+    assert corrections[0].confidence > 0.8  # High confidence - small edit distance
+    assert corrections[0].source == "genius, spotify"
+
+    assert corrections[1].original_word == "worde"
+    assert corrections[1].corrected_word == "words"
+    assert corrections[1].confidence > 0.7
+
+
+def test_handle_sound_alike_example(logger):
+    handler = LevenshteinHandler(logger=logger)
+    gap = GapSequence(
+        words=("shush", "look", "deep"),
+        transcription_position=0,
+        preceding_anchor=None,
+        following_anchor=None,
+        reference_words={"genius": ["search", "look", "deep"], "spotify": ["search", "look", "deep"]},
+    )
+
+    # First check if handler thinks it can handle this
+    can_handle = handler.can_handle(gap)
+    logger.debug(f"Can handle 'shush' -> 'search': {can_handle}")
+
+    corrections = handler.handle(gap)
+    logger.debug(f"Corrections for sound-alike example: {corrections}")
+
+    # We expect this to fail or have very low confidence
+    # as Levenshtein distance between "shush" and "search" is quite large
+    assert len(corrections) <= 1  # Might not find any matches
+
+
+def test_handle_disagreeing_references(logger):
+    handler = LevenshteinHandler(logger=logger)
+    gap = GapSequence(
+        words=("worde",),
+        transcription_position=0,
+        preceding_anchor=None,
+        following_anchor=None,
+        reference_words={"genius": ["world"], "spotify": ["words"]},
+    )
+
+    corrections = handler.handle(gap)
+
+    assert len(corrections) == 1
+    assert corrections[0].confidence < 0.8  # Lower confidence due to disagreeing sources
+
+
+def test_preserves_exact_matches(logger):
+    handler = LevenshteinHandler(logger=logger)
+    gap = GapSequence(
+        words=("wold", "words", "test"),
+        transcription_position=0,
+        preceding_anchor=None,
+        following_anchor=None,
+        reference_words={"genius": ["world", "words", "test"], "spotify": ["world", "words", "test"]},
+    )
+
+    corrections = handler.handle(gap)
+
+    # Should only correct "wold", leaving exact matches alone
+    assert len(corrections) == 1
+    assert corrections[0].original_word == "wold"
+    assert corrections[0].corrected_word == "world"
+
+
+def test_similarity_thresholds(logger):
+    handler = LevenshteinHandler(similarity_threshold=0.8, logger=logger)
+    gap = GapSequence(
+        words=("completely",),  # More different from reference
+        transcription_position=0,
+        preceding_anchor=None,
+        following_anchor=None,
+        reference_words={"genius": ["different"], "spotify": ["different"]},
+    )
+
+    # With high threshold, should not find matches
+    assert handler.can_handle(gap) is False
+
+    # Lower threshold should still not match these very different words
+    handler.similarity_threshold = 0.6
+    assert handler.can_handle(gap) is False
+
+    # But should match similar words
+    gap = GapSequence(
+        words=("worde",),
+        transcription_position=0,
+        preceding_anchor=None,
+        following_anchor=None,
+        reference_words={"genius": ["words"], "spotify": ["words"]},
+    )
+    assert handler.can_handle(gap) is True
diff --git a/tests/correction/handlers/test_repeat.py b/tests/correction/handlers/test_repeat.py
new file mode 100644
index 0000000..099bde9
--- /dev/null
+++ b/tests/correction/handlers/test_repeat.py
@@ -0,0 +1,181 @@
+import pytest
+import logging
+from lyrics_transcriber.correction.handlers.repeat import RepeatCorrectionHandler
+from lyrics_transcriber.types import GapSequence, WordCorrection
+
+
+@pytest.fixture
+def logger():
+    logger = logging.getLogger("test_repeat")
+    logger.setLevel(logging.DEBUG)
+    return logger
+
+
+def test_cannot_handle_without_previous_corrections(logger):
+    handler = RepeatCorrectionHandler(logger)
+    gap = GapSequence(
+        words=("test", "words"),
+        transcription_position=0,
+        preceding_anchor=None,
+        following_anchor=None,
+        reference_words={"genius": ["some", "words"], "spotify": ["some", "words"]},
+    )
+
+    assert not handler.can_handle(gap)
+
+
+def test_handle_repeat_correction(logger):
+    handler = RepeatCorrectionHandler(logger)
+
+    # Set up previous corrections
+    previous_corrections = [
+        WordCorrection(
+            original_word="war",
+            corrected_word="waterloo",
+            segment_index=0,
+            word_index=0,
+            confidence=0.9,
+            source="genius",
+            reason="Previous handler correction",
+            alternatives={},
+            is_deletion=False,
+        )
+    ]
+    handler.set_previous_corrections(previous_corrections)
+
+    # Create gap with same word
+    gap = GapSequence(
+        words=("war", "again"),
+        transcription_position=5,
+        preceding_anchor=None,
+        following_anchor=None,
+        reference_words={"genius": ["some", "words"], "spotify": ["some", "words"]},
+    )
+
+    corrections = handler.handle(gap)
+
+    assert len(corrections) == 1
+    assert corrections[0].original_word == "war"
+    assert corrections[0].corrected_word == "waterloo"
+    assert corrections[0].word_index == 5  # Should use gap's transcription_position
+    assert corrections[0].confidence == 0.81  # 0.9 * 0.9
+    assert "previous correction" in corrections[0].reason.lower()
+
+
+def test_handle_multiple_previous_corrections(logger):
+    handler = RepeatCorrectionHandler(logger)
+
+    # Set up previous corrections with same word corrected differently
+    previous_corrections = [
+        WordCorrection(
+            original_word="word",
+            corrected_word="correction1",
+            segment_index=0,
+            word_index=0,
+            confidence=0.8,
+            source="genius",
+            reason="First correction",
+            alternatives={},
+            is_deletion=False,
+        ),
+        WordCorrection(
+            original_word="word",
+            corrected_word="correction2",
+            segment_index=0,
+            word_index=1,
+            confidence=0.9,
+            source="spotify",
+            reason="Second correction",
+            alternatives={},
+            is_deletion=False,
+        ),
+        WordCorrection(
+            original_word="word",
+            corrected_word="correction2",
+            segment_index=0,
+            word_index=2,
+            confidence=0.85,
+            source="genius",
+            reason="Third correction",
+            alternatives={},
+            is_deletion=False,
+        ),
+    ]
+    handler.set_previous_corrections(previous_corrections)
+
+    gap = GapSequence(
+        words=("word",),
+        transcription_position=10,
+        preceding_anchor=None,
+        following_anchor=None,
+        reference_words={"genius": ["some"], "spotify": ["some"]},
+    )
+
+    corrections = handler.handle(gap)
+
+    assert len(corrections) == 1
+    assert corrections[0].original_word == "word"
+    assert corrections[0].corrected_word == "correction2"  # Should pick most common correction
+    assert corrections[0].word_index == 10
+
+
+def test_ignore_low_confidence_corrections(logger):
+    handler = RepeatCorrectionHandler(logger, confidence_threshold=0.8)
+
+    # Set up previous corrections with low confidence
+    previous_corrections = [
+        WordCorrection(
+            original_word="test",
+            corrected_word="low_confidence",
+            segment_index=0,
+            word_index=0,
+            confidence=0.6,  # Below threshold
+            source="genius",
+            reason="Low confidence correction",
+            alternatives={},
+            is_deletion=False,
+        )
+    ]
+    handler.set_previous_corrections(previous_corrections)
+
+    gap = GapSequence(
+        words=("test",),
+        transcription_position=0,
+        preceding_anchor=None,
+        following_anchor=None,
+        reference_words={"genius": ["some"], "spotify": ["some"]},
+    )
+
+    corrections = handler.handle(gap)
+    assert len(corrections) == 0  # Should not apply low confidence corrections
+
+
+def test_case_insensitive_matching(logger):
+    handler = RepeatCorrectionHandler(logger)
+
+    previous_corrections = [
+        WordCorrection(
+            original_word="Word",
+            corrected_word="Correction",
+            segment_index=0,
+            word_index=0,
+            confidence=0.9,
+            source="genius",
+            reason="Previous correction",
+            alternatives={},
+            is_deletion=False,
+        )
+    ]
+    handler.set_previous_corrections(previous_corrections)
+
+    gap = GapSequence(
+        words=("word", "WORD", "Word"),  # Different cases
+        transcription_position=0,
+        preceding_anchor=None,
+        following_anchor=None,
+        reference_words={"genius": ["some", "words"], "spotify": ["some", "words"]},
+    )
+
+    corrections = handler.handle(gap)
+    assert len(corrections) == 3  # Should correct all variations
+    assert all(c.corrected_word == "Correction" for c in corrections)
diff --git a/tests/correction/handlers/test_sound_alike.py b/tests/correction/handlers/test_sound_alike.py
new file mode 100644
index 0000000..0616003
--- /dev/null
+++ b/tests/correction/handlers/test_sound_alike.py
@@ -0,0 +1,137 @@
+import pytest
+import logging
+from lyrics_transcriber.correction.handlers.sound_alike import SoundAlikeHandler
+from lyrics_transcriber.types import GapSequence
+
+
+@pytest.fixture
+def logger():
+    logger = logging.getLogger("test_sound_alike")
+    logger.setLevel(logging.DEBUG)
+    return logger
+
+
+def test_handle_phonetic_example(logger):
+    handler = SoundAlikeHandler(logger, similarity_threshold=0.7)
+    gap = GapSequence(
+        words=("fone", "lite", "nite"),  # Common phonetic misspellings
+        transcription_position=0,
+        preceding_anchor=None,
+        following_anchor=None,
+        reference_words={"genius": ["phone", "light", "night"], "spotify": ["phone", "light", "night"]},
+    )
+
+    corrections = handler.handle(gap)
+
+    assert len(corrections) == 3  # All words need correction
+
+    assert corrections[0].original_word == "fone"
+    assert corrections[0].corrected_word == "phone"
+    assert corrections[0].word_index == 0
+    assert corrections[0].confidence >= 0.7
+
+    assert corrections[1].original_word == "lite"
+    assert corrections[1].corrected_word == "light"
+    assert corrections[1].word_index == 1
+
+    assert corrections[2].original_word == "nite"
+    assert corrections[2].corrected_word == "night"
+    assert corrections[2].word_index == 2
+
+
+def test_handle_disagreeing_references(logger):
+    handler = SoundAlikeHandler(logger, similarity_threshold=0.7)
+    gap = GapSequence(
+        words=("fone",),
+        transcription_position=0,
+        preceding_anchor=None,
+        following_anchor=None,
+        reference_words={"genius": ["phone"], "spotify": ["foam"]},
+    )
+
+    corrections = handler.handle(gap)
+
+    assert len(corrections) == 1
+    assert corrections[0].confidence < 0.7  # Lower confidence due to disagreeing sources
+
+
+def test_cannot_handle_no_sound_alike_matches(logger):
+    handler = SoundAlikeHandler(logger, similarity_threshold=0.8)
+    gap = GapSequence(
+        words=("xyz", "abc", "def"),  # Use words with completely different phonetic codes
+        transcription_position=0,
+        preceding_anchor=None,
+        following_anchor=None,
+        reference_words={"genius": ["one", "two", "three"], "spotify": ["one", "two", "three"]},
+    )
+
+    corrections = handler.handle(gap)
+    assert len(corrections) == 0  # Should find no matches above threshold
+
+
+def test_handle_preserves_exact_matches(logger):
+    handler = SoundAlikeHandler(logger, similarity_threshold=0.7)
+    gap = GapSequence(
+        words=("fone", "light", "night"),  # middle and last words exact matches
+        transcription_position=0,
+        preceding_anchor=None,
+        following_anchor=None,
+        reference_words={"genius": ["phone", "light", "night"], "spotify": ["phone", "light", "night"]},
+    )
+
+    corrections = handler.handle(gap)
+
+    # Should only correct "fone", leaving exact matches alone
+    assert len(corrections) == 1
+    assert corrections[0].original_word == "fone"
+    assert corrections[0].corrected_word == "phone"
+
+
+def test_handle_complex_sound_alike_example(logger):
+    handler = SoundAlikeHandler(logger, similarity_threshold=0.65)
+    gap = GapSequence(
+        words=("relax", "your", "conscience"),
+        transcription_position=0,
+        preceding_anchor=None,
+        following_anchor=None,
+        reference_words={"genius": ["you", "relapse", "unconscious"], "spotify": ["you", "relapse", "unconscious"]},
+    )
+
+    corrections = handler.handle(gap)
+
+    # We expect corrections two words
+    assert len(corrections) == 2
+
+    # Sort corrections by word_index for easier testing
+    corrections.sort(key=lambda x: x.word_index)
+
+    # Check first word: "relax" -> "relapse"
+    assert corrections[0].original_word == "relax"
+    assert corrections[0].corrected_word == "relapse"
+    assert corrections[0].word_index == 0
+    assert corrections[0].confidence >= 0.65
+
+    # Check third word: "conscience" -> "unconscious"
+    assert corrections[1].original_word == "conscience"
+    assert corrections[1].corrected_word == "unconscious"
+    assert corrections[1].word_index == 2
+    assert corrections[1].confidence >= 0.65
+
+
+def test_handle_substring_code_match(logger):
+    """Test the substring code matching."""
+    handler = SoundAlikeHandler(logger, similarity_threshold=0.65)
+    gap = GapSequence(
+        words=("conscience",),
+        transcription_position=0,
+        preceding_anchor=None,
+        following_anchor=None,
+        reference_words={"genius": ["unconscious"], "spotify": ["unconscious"]},
+    )
+
+    corrections = handler.handle(gap)
+
+    assert len(corrections) == 1
+    assert corrections[0].original_word == "conscience"
+    assert corrections[0].corrected_word == "unconscious"
+    assert corrections[0].confidence >= 0.65