From 7e4910cd459ec631e0371429046209c9feb61cf3 Mon Sep 17 00:00:00 2001 From: psong Date: Wed, 14 Aug 2024 02:22:22 -0700 Subject: [PATCH] Fix string UTF-8 misformatting PANIC --- LeanCopilot/Models/ByT5.lean | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/LeanCopilot/Models/ByT5.lean b/LeanCopilot/Models/ByT5.lean index f1cb513..84117ce 100644 --- a/LeanCopilot/Models/ByT5.lean +++ b/LeanCopilot/Models/ByT5.lean @@ -281,7 +281,9 @@ def tokenize (text : String) : Array String := def detokenize (tokens : Array String) : String := - String.fromUTF8! ⟨tokens.map tokenToByte!⟩ + match (String.fromUTF8? ⟨tokens.map tokenToByte!⟩) with + | some s => s + | none => "" def eosToken := ""