feat: Added Claude 3 Opus and Claude 3 Sonnet support, with multimoda…

…l capabilities! resolve #1062
GaiZhenbiao · Mar 5, 2024 · 94991b8 · 94991b8
1 parent 6469bb1
commit 94991b8
Show file tree

Hide file tree

Showing 16 changed files with 239 additions and 131 deletions.
diff --git a/ChuanhuChatbot.py b/ChuanhuChatbot.py
@@ -18,7 +18,8 @@
 import colorama
 from modules.gradio_patch import reg_patch
 
-reg_patch()
+if not hfspaceflag:
+    reg_patch()
 
 logging.getLogger("httpx").setLevel(logging.WARNING)
 

diff --git a/README.md b/README.md
@@ -74,7 +74,7 @@
 | [MiniMax](https://api.minimax.chat/) |
 | [XMChat](https://github.com/MILVLG/xmchat) | 不支持流式传输
 | [Midjourney](https://www.midjourney.com/) | 不支持流式传输
-| [Claude](https://www.anthropic.com/) |
+| [Claude](https://www.anthropic.com/) | ✨ 现已支持Claude 3 Opus、Sonnet，Haiku将会在推出后的第一时间支持
 | DALL·E 3 |
 
 ## 使用技巧

diff --git a/locale/en_US.json b/locale/en_US.json
@@ -62,6 +62,8 @@
     "输入的不是数字，将使用默认值。": "The input is not a number, the default value will be used.",
     "额外模型列表": "Extra model list",
     "默认模型": "Default model",
+    "获取资源错误": "Error retrieving resources.",
+    "该模型不支持多模态输入": "This model does not support multi-modal input.",
     " 中。": ".",
     " 为: ": " as: ",
     " 吗？": " ?",

diff --git a/locale/ja_JP.json b/locale/ja_JP.json
@@ -1,4 +1,6 @@
 {
+    "获取资源错误": "リソースの取得エラー",
+    "该模型不支持多模态输入": "このモデルはマルチモーダル入力に対応していません。",
     " 中。": "中。",
     " 为: ": "対:",
     " 吗？": " を削除してもよろしいですか？",

diff --git a/locale/ko_KR.json b/locale/ko_KR.json
@@ -1,4 +1,6 @@
 {
+    "获取资源错误": "Error fetching resources",
+    "该模型不支持多模态输入": "이 모델은 다중 모달 입력을 지원하지 않습니다.",
     " 中。": "가운데입니다.",
     " 为: ": "되다",
     " 吗？": " 을(를) 삭제하시겠습니까?",

diff --git a/locale/ru_RU.json b/locale/ru_RU.json
diff --git a/locale/sv_SE.json b/locale/sv_SE.json
@@ -1,4 +1,6 @@
 {
+    "获取资源错误": "Fel vid hämtning av resurser",
+    "该模型不支持多模态输入": "Den här modellen stöder inte multitmodal inmatning.",
     " 中。": "Mitten.",
     " 为: ": "För:",
     " 吗？": " ?",

diff --git a/locale/vi_VN.json b/locale/vi_VN.json
diff --git a/modules/config.py b/modules/config.py
@@ -32,6 +32,7 @@
     "show_api_billing",
     "chat_name_method_index",
     "HIDE_MY_KEY",
+    "hfspaceflag",
 ]
 
 # 添加一个统一的config文件，避免文件过多造成的疑惑（优先级最低）
@@ -85,6 +86,8 @@ def load_config_to_environ(key_list):
 if os.environ.get("dockerrun") == "yes":
     dockerflag = True
 
+hfspaceflag = os.environ.get("HF_SPACE", "false") == "true"
+
 # 处理 api-key 以及 允许的用户列表
 my_api_key = config.get("openai_api_key", "")
 my_api_key = os.environ.get("OPENAI_API_KEY", my_api_key)

diff --git a/modules/models/Claude.py b/modules/models/Claude.py
@@ -1,4 +1,3 @@
-
 from anthropic import Anthropic, HUMAN_PROMPT, AI_PROMPT
 from ..presets import *
 from ..utils import *
@@ -14,42 +13,82 @@ def __init__(self, model_name, api_secret) -> None:
             raise Exception("请在配置文件或者环境变量中设置Claude的API Secret")
         self.claude_client = Anthropic(api_key=self.api_secret)
 
+    def _get_claude_style_history(self):
+        history = []
+        image_buffer = []
+        image_count = 0
+        for message in self.history:
+            if message["role"] == "user":
+                content = []
+                if image_buffer:
+                    if image_count == 1:
+                        content.append(
+                            {
+                                "type": "image",
+                                "source": {
+                                    "type": "base64",
+                                    "media_type": f"image/{self.get_image_type(image_buffer[0])}",
+                                    "data": self.get_base64_image(image_buffer[0]),
+                                },
+                            },
+                        )
+                    else:
+                        image_buffer_length = len(image_buffer)
+                        for idx, image in enumerate(image_buffer):
+                            content.append(
+                                {"type": "text", "text": f"Image {image_count - image_buffer_length + idx + 1}:"},
+                            )
+                            content.append(
+                                {
+                                    "type": "image",
+                                    "source": {
+                                        "type": "base64",
+                                        "media_type": f"image/{self.get_image_type(image)}",
+                                        "data": self.get_base64_image(image),
+                                    },
+                                },
+                            )
+                if content:
+                    content.append({"type": "text", "text": message["content"]})
+                    history.append(construct_user(content))
+                    image_buffer = []
+                else:
+                    history.append(message)
+            elif message["role"] == "assistant":
+                history.append(message)
+            elif message["role"] == "image":
+                image_buffer.append(message["content"])
+                image_count += 1
+        return history
 
     def get_answer_stream_iter(self):
         system_prompt = self.system_prompt
-        history = self.history
-        if system_prompt is not None:
-            history = [construct_system(system_prompt), *history]
+        history = self._get_claude_style_history()
 
-        completion = self.claude_client.completions.create(
+        with self.claude_client.messages.stream(
             model=self.model_name,
-            max_tokens_to_sample=300,
-            prompt=f"{HUMAN_PROMPT}{history}{AI_PROMPT}",
-            stream=True,
-        )
-        if completion is not None:
+            max_tokens=self.max_generation_token,
+            messages=history,
+            system=system_prompt,
+        ) as stream:
             partial_text = ""
-            for chunk in completion:
-                partial_text += chunk.completion
+            for text in stream.text_stream:
+                partial_text += text
                 yield partial_text
-        else:
-            yield STANDARD_ERROR_MSG + GENERAL_ERROR_MSG
-
 
     def get_answer_at_once(self):
         system_prompt = self.system_prompt
-        history = self.history
+        history = self._get_claude_style_history()
         if system_prompt is not None:
             history = [construct_system(system_prompt), *history]
 
-        completion = self.claude_client.completions.create(
+        response = self.claude_client.messages.create(
             model=self.model_name,
-            max_tokens_to_sample=300,
-            prompt=f"{HUMAN_PROMPT}{history}{AI_PROMPT}",
+            max_tokens=self.max_generation_token,
+            messages=history,
+            system=system_prompt,
         )
-        if completion is not None:
-            return completion.completion, len(completion.completion)
+        if response is not None:
+            return response["content"][0]["text"], response["usage"]["output_tokens"]
         else:
-            return "获取资源错误", 0
-
-
+            return i18n("获取资源错误"), 0
diff --git a/modules/models/base_model.py b/modules/models/base_model.py
@@ -5,6 +5,7 @@
 import logging
 import os
 import pathlib
+import base64
 import shutil
 import sys
 import traceback
@@ -254,11 +255,20 @@ def __init__(
     ) -> None:
         self.history = []
         self.all_token_counts = []
+        self.model_type = ModelType.get_type(model_name)
         try:
             self.model_name = MODEL_METADATA[model_name]["model_name"]
         except:
             self.model_name = model_name
-        self.model_type = ModelType.get_type(model_name)
+        try:
+            self.multimodal = MODEL_METADATA[model_name]["multimodal"]
+        except:
+            self.multimodal = False
+        if max_generation_token is None:
+            try:
+                max_generation_token = MODEL_METADATA[model_name]["max_generation"]
+            except:
+                pass
         try:
             self.token_upper_limit = MODEL_METADATA[model_name]["token_limit"]
         except KeyError:
@@ -389,15 +399,31 @@ def next_chatbot_at_once(self, inputs, chatbot, fake_input=None, display_append=
     def handle_file_upload(self, files, chatbot, language):
         """if the model accepts multi modal input, implement this function"""
         status = gr.Markdown.update()
+        image_files = []
+        other_files = []
         if files:
-            try:
-                construct_index(self.api_key, file_src=files)
-                status = i18n("索引构建完成")
-            except Exception as e:
-                import traceback
-                traceback.print_exc()
-                status = i18n("索引构建失败！") + str(e)
-        return gr.Files.update(), chatbot, status
+            for f in files:
+                if f.name.endswith((".jpg", ".png", ".jpeg", ".gif", ".webp")):
+                    image_files.append(f)
+                else:
+                    other_files.append(f)
+            if self.multimodal:
+                if image_files:
+                    chatbot.extend([(((image.name, None)), None) for image in image_files])
+                    self.history.extend([construct_image(image.name) for image in image_files])
+            else:
+                gr.Warning(i18n("该模型不支持多模态输入"))
+            if other_files:
+                try:
+                    construct_index(self.api_key, file_src=files)
+                    status = i18n("索引构建完成")
+                except Exception as e:
+                    import traceback
+                    traceback.print_exc()
+                    status = i18n("索引构建失败！") + str(e)
+        if not other_files:
+            other_files = None
+        return gr.File.update(value=other_files), chatbot, status
 
     def summarize_index(self, files, chatbot, language):
         status = gr.Markdown.update()
@@ -1096,6 +1122,16 @@ def clear_cuda_cache(self):
         gc.collect()
         torch.cuda.empty_cache()
 
+    def get_base64_image(self, image_path):
+        with open(image_path, "rb") as f:
+            return base64.b64encode(f.read()).decode("utf-8")
+
+    def get_image_type(self, image_path):
+        extension = os.path.splitext(image_path)[1][1:]
+        if extension == "jpg":
+            extension = "jpeg"
+        return extension
+
 
 class Base_Chat_Langchain_Client(BaseLLMModel):
     def __init__(self, model_name, user_name=""):

diff --git a/modules/models/models.py b/modules/models/models.py
@@ -127,7 +127,7 @@ def get_model(
                 "SPARK_API_KEY"), os.getenv("SPARK_API_SECRET"), user_name=user_name)
         elif model_type == ModelType.Claude:
             from .Claude import Claude_Client
-            model = Claude_Client(model_name="claude-2", api_secret=os.getenv("CLAUDE_API_SECRET"))
+            model = Claude_Client(model_name=model_name, api_secret=os.getenv("CLAUDE_API_SECRET"))
         elif model_type == ModelType.Qwen:
             from .Qwen import Qwen_Client
             model = Qwen_Client(model_name, user_name=user_name)

diff --git a/modules/presets.py b/modules/presets.py
@@ -82,7 +82,8 @@
     "讯飞星火大模型V3.0",
     "讯飞星火大模型V2.0",
     "讯飞星火大模型V1.5",
-    "Claude",
+    "Claude 3 Sonnet",
+    "Claude 3 Opus",
     "ERNIE-Bot-turbo",
     "ERNIE-Bot",
     "ERNIE-Bot-4",
@@ -164,6 +165,18 @@
         "model_name": "Claude",
         "token_limit": 4096,
     },
+    "Claude 3 Sonnet": {
+        "model_name": "claude-3-sonnet-20240229",
+        "token_limit": 200000,
+        "max_generation": 4096,
+        "multimodal": True
+    },
+    "Claude 3 Opus": {
+        "model_name": "claude-3-opus-20240229",
+        "token_limit": 200000,
+        "max_generation": 4096,
+        "multimodal": True
+    },
     "ERNIE-Bot-turbo": {
         "model_name": "ERNIE-Bot-turbo",
         "token_limit": 1024,

diff --git a/modules/utils.py b/modules/utils.py
@@ -362,6 +362,9 @@ def construct_text(role, text):
 def construct_user(text):
     return construct_text("user", text)
 
+def construct_image(path):
+    return construct_text("image", path)
+
 
 def construct_system(text):
     return construct_text("system", text)

diff --git a/readme/README_en.md b/readme/README_en.md
@@ -70,7 +70,8 @@
 | [MiniMax](https://api.minimax.chat/) |
 | [XMChat](https://github.com/MILVLG/xmchat) | Not support streaming
 | [Midjourney](https://www.midjourney.com/) | Not support streaming
-| [Claude](https://www.anthropic.com/) |
+| [Claude](https://www.anthropic.com/) | ✨ Now supports Claude 3 Opus and Sonnet, Haiku will be supported as soon as it is released
+| DALL·E 3 |
 
 ## Usage Tips
 

diff --git a/requirements.txt b/requirements.txt
@@ -31,7 +31,7 @@ python-docx
 websocket_client
 pydantic==2.5.2
 google-search-results
-anthropic==0.3.11
+anthropic==0.18.1
 Pillow>=10.1.0
 protobuf==3.20.3
 ollama>=0.1.6