pot-app · omegaduncan · Jan 18, 2025 · Jan 18, 2025
diff --git a/README.md b/README.md
@@ -1 +1,28 @@
 # Pot-App OpenAI 文字识别插件
+
+中文 | [English](README_EN.md)
+
+本插件提供了与 OpenAI 和 Google Gemini API 集成的图像识别功能。
+
+## API 密钥设置
+
+### OpenAI API
+1. 访问 [OpenAI API Keys](https://platform.openai.com/account/api-keys)
+2. 创建新的 API 密钥
+3. 复制密钥以供后续使用
+
+### Google Gemini API
+1. 访问 [Google AI Studio](https://aistudio.google.com/app/apikey)
+2. 生成新的 API 密钥
+3. 复制密钥以供后续使用
+
+## API 端点配置
+
+### OpenAI
+- 默认端点：`https://api.openai.com`
+- 如果使用官方 OpenAI API，无需指定 URL
+- 对于第三方 OpenAI 兼容 API，可以自定义端点 URL
+
+### Google Gemini
+- 手動輸入此端点：`https://generativelanguage.googleapis.com`
+- 使用 Gemini API 时必须指定此确切 URL
diff --git a/README_EN.md b/README_EN.md
@@ -0,0 +1,28 @@
+# Pot-App OpenAI Text Recognition Plugin
+
+[中文](README.md) | English
+
+This plugin provides integration with OpenAI and Google Gemini APIs for image recognition tasks.
+
+## API Key Setup
+
+### OpenAI API
+1. Visit [OpenAI API Keys](https://platform.openai.com/account/api-keys)
+2. Create a new API key
+3. Copy the key for later use
+
+### Google Gemini API
+1. Visit [Google AI Studio](https://aistudio.google.com/app/apikey)
+2. Generate a new API key
+3. Copy the key for later use
+
+## API Endpoint Configuration
+
+### OpenAI
+- Default endpoint: `https://api.openai.com`
+- You don't need to specify the URL if using the official OpenAI API
+- For third-party OpenAI-compatible APIs, you can customize the endpoint URL
+
+### Google Gemini
+- Manually enter this endpoint: `https://generativelanguage.googleapis.com`
+- Must specify this exact URL when using Gemini API
diff --git a/info.json b/info.json
@@ -8,12 +8,7 @@
     {
       "key": "model",
       "display": "模型",
-      "type": "select",
-      "options": {
-        "gpt-4o": "GPT-4o",
-        "gpt-4o-mini": "GPT-4o-Mini",
-        "gpt-4-vision-preview": "GPT-4 Vision Preview"
-      }
+      "type": "input"
     },
     {
       "key": "requestPath",

diff --git a/main.js b/main.js
@@ -3,69 +3,195 @@ async function recognize(base64, lang, options) {
     const { tauriFetch: fetch } = utils;
     let { model = "gpt-4o", apiKey, requestPath, customPrompt } = config;
 
-    if (!requestPath) {
-        requestPath = "https://api.openai.com";
-    }
-    if (!/https?:\/\/.+/.test(requestPath)) {
-        requestPath = `https://${requestPath}`;
-    }
-    if (requestPath.endsWith('/')) {
-        requestPath = requestPath.slice(0, -1);
-    }
-    if (!requestPath.endsWith('/chat/completions')) {
-        requestPath += '/v1/chat/completions';
-    }
-    if (!customPrompt) {
-        customPrompt = "Just recognize the text in the image. Do not offer unnecessary explanations.";
-    }else{
-        customPrompt = customPrompt.replaceAll("$lang", lang);
-    }
+    // 檢查是否是 Google API
+    const isGoogleAPI = requestPath?.includes('generativelanguage.googleapis.com');
+
+    if (isGoogleAPI) {
+        // Google Gemini API 格式
+        if (!model || model === "gpt-4o") {
+            model = "gemini-1.5-flash";
+        }
+
+        const headers = {
+            'Content-Type': 'application/json'
+        }
 
-    const headers = {
-        'Content-Type': 'application/json',
-        'Authorization': `Bearer ${apiKey}`
-    }
+        // 使用正確的 Google API endpoint
+        requestPath = `https://generativelanguage.googleapis.com/v1beta/models/${model}:generateContent`;
 
-    const body = {
-        model,
-        messages: [
-            {
-                "role": "system",
-                "content": [
+        const body = {
+            contents: [{
+                role: "user",
+                parts: [
                     {
-                        "type": "text",
-                        "text": customPrompt
+                        text: customPrompt || "Just recognize the text in the image. Do not offer unnecessary explanations."
+                    },
+                    {
+                        inlineData: {
+                            mimeType: "image/png",
+                            data: base64
+                        }
                     }
-                ],
+                ]
+            }]
+        };
+
+        // 添加 API key 到 URL
+        requestPath += `?key=${apiKey}`;
+
+        let res = await fetch(requestPath, {
+            method: 'POST',
+            url: requestPath,
+            headers: headers,
+            body: {
+                type: "Json",
+                payload: body
             },
-            {
-                "role": "user",
-                "content": [
-                    {
-                        "type": "image_url",
-                        "image_url": {
-                            "url": `data:image/png;base64,${base64}`,
-                            "detail": "high"
-                        },
-                    },
-                ],
+            responseType: 1
+        });
+
+        // 處理 Google API 的返回格式
+        if (res.ok) {
+            let result = res.data;
+            // 檢查返回格式並提供詳細錯誤信息
+            if (!result) {
+                throw `Empty response from Google API`;
             }
-        ],
-    }
-    let res = await fetch(requestPath, {
-        method: 'POST',
-        url: requestPath,
-        headers: headers,
-        body: {
-            type: "Json",
-            payload: body
+            if (!result.candidates) {
+                throw `No candidates in response: ${JSON.stringify(result)}`;
+            }
+            if (!result.candidates[0]) {
+                throw `Empty candidates array: ${JSON.stringify(result)}`;
+            }
+            if (!result.candidates[0].content) {
+                throw `No content in candidate: ${JSON.stringify(result.candidates[0])}`;
+            }
+            if (!result.candidates[0].content.parts) {
+                throw `No parts in content: ${JSON.stringify(result.candidates[0].content)}`;
+            }
+            if (!result.candidates[0].content.parts[0]) {
+                throw `Empty parts array: ${JSON.stringify(result.candidates[0].content.parts)}`;
+            }
+            if (!result.candidates[0].content.parts[0].text) {
+                throw `No text in part: ${JSON.stringify(result.candidates[0].content.parts[0])}`;
+            }
+            return result.candidates[0].content.parts[0].text;
+        } else {
+            throw `Request failed with status ${res.status}: ${JSON.stringify(res.data)}`;
         }
-    });
 
-    if (res.ok) {
-        let result = res.data;
-        return result.choices[0].message.content;
     } else {
-        throw `Http Request Error\nHttp Status: ${res.status}\n${JSON.stringify(res.data)}`;
+        // OpenAI API 處理邏輯
+        if (!requestPath) {
+            requestPath = "https://api.openai.com";
+        }
+        if (!/https?:\/\/.+/.test(requestPath)) {
+            requestPath = `https://${requestPath}`;
+        }
+        if (requestPath.endsWith('/')) {
+            requestPath = requestPath.slice(0, -1);
+        }
+        if (!requestPath.endsWith('/chat/completions')) {
+            requestPath += '/v1/chat/completions';
+        }
+        if (!customPrompt) {
+            customPrompt = "Just recognize the text in the image. Do not offer unnecessary explanations.";
+        }else{
+            customPrompt = customPrompt.replaceAll("$lang", lang);
+        }
+
+        const headers = {
+            'Content-Type': 'application/json',
+            'Authorization': `Bearer ${apiKey}`
+        }
+
+        const body = {
+            model,
+            messages: [
+                {
+                    "role": "user", 
+                    "content": [
+                        {
+                            "type": "text",
+                            "text": customPrompt
+                        },
+                        {
+                            "type": "image_url",
+                            "image_url": {
+                                "url": `data:image/png;base64,${base64}`,
+                                "detail": "high"
+                            }
+                        }
+                    ]
+                }
+            ]
+        };
+
+        let res = await fetch(requestPath, {
+            method: 'POST',
+            url: requestPath,
+            headers: headers,
+            body: {
+                type: "Json",
+                payload: body
+            },
+            responseType: 1
+        });
+
+        /* Debug 相關代碼
+        const debugBody = { ...body };
+        if (debugBody.messages?.[0]?.content) {
+            debugBody.messages[0].content = debugBody.messages[0].content.map(item => {
+                if (item.type === 'image_url') {
+                    return { 
+                        type: 'image_url', 
+                        image_url: { 
+                            url: '***[BASE64]***',
+                            detail: item.image_url.detail 
+                        } 
+                    };
+                }
+                return item;
+            });
+        }
+        */
+
+        if (res.ok) {
+            let result = res.data;
+            if (!result || !result.choices || !result.choices[0]) {
+                throw `Invalid API Response: ${JSON.stringify(result)}`;
+            }
+
+            const choice = result.choices[0];
+            let content = '';
+            if (choice.message && choice.message.content) {
+                content = choice.message.content;
+            } else if (choice.content) {
+                content = choice.content;
+            } else {
+                content = JSON.stringify(choice);
+            }
+
+            /* Debug 相關代碼
+            return `🔍 Debug Info:
+Request URL: ${requestPath}
+Request Headers: ${JSON.stringify(headers, null, 2)}
+Request Body: ${JSON.stringify(debugBody, null, 2)}
+
+📝 Response:
+${content}`;
+            */
+            return content;
+        } else {
+            /* Debug 相關代碼
+            throw `❌ Http Request Error
+URL: ${requestPath}
+Status: ${res.status}
+Headers: ${JSON.stringify(headers, null, 2)}
+Request Body: ${JSON.stringify(debugBody, null, 2)}
+Response: ${JSON.stringify(res.data, null, 2)}`;
+            */
+            throw `Request failed with status ${res.status}`;
+        }
     }
 }