diff --git a/modules/models/OpenAIVision.py b/modules/models/OpenAIVision.py index c600353d..0a834730 100644 --- a/modules/models/OpenAIVision.py +++ b/modules/models/OpenAIVision.py @@ -133,7 +133,9 @@ def _get_gpt4v_style_history(self): content.append( { "type": "image_url", - "image_url": f"data:image/{self.get_image_type(image)};base64,{self.get_base64_image(image)}" + "image_url": { + "url": f"data:image/{self.get_image_type(image)};base64,{self.get_base64_image(image)}", + } }, ) if content: diff --git a/modules/models/base_model.py b/modules/models/base_model.py index 9525a36c..68728f26 100644 --- a/modules/models/base_model.py +++ b/modules/models/base_model.py @@ -161,12 +161,16 @@ def get_type(cls, model_name: str): model_type = None model_name_lower = model_name.lower() if "gpt" in model_name_lower: - if "instruct" in model_name_lower: - model_type = ModelType.OpenAIInstruct - elif "vision" in model_name_lower: + try: + assert MODEL_METADATA[model_name]["multimodal"] == True model_type = ModelType.OpenAIVision - else: - model_type = ModelType.OpenAI + except: + if "instruct" in model_name_lower: + model_type = ModelType.OpenAIInstruct + elif "vision" in model_name_lower: + model_type = ModelType.OpenAIVision + else: + model_type = ModelType.OpenAI elif "chatglm" in model_name_lower: model_type = ModelType.ChatGLM elif "ollama" in model_name_lower: diff --git a/modules/presets.py b/modules/presets.py index d98079b4..96c8f7fe 100644 --- a/modules/presets.py +++ b/modules/presets.py @@ -62,7 +62,6 @@ "GPT4", "GPT4 32K", "GPT4 Turbo", - "GPT4 Vision", "Claude 3 Haiku", "Claude 3 Sonnet", "Claude 3 Opus", @@ -155,11 +154,12 @@ "token_limit": 32768, }, "GPT4 Turbo": { - "model_name": "gpt-4-turbo-preview", + "model_name": "gpt-4-turbo", "token_limit": 128000, + "multimodal": True }, "GPT4 Vision": { - "model_name": "gpt-4-vision-preview", + "model_name": "gpt-4-turbo", "token_limit": 128000, "multimodal": True },