From 334141472035610991d734cefbf466b56ec114c4 Mon Sep 17 00:00:00 2001 From: ToGe3688 Date: Fri, 27 Dec 2024 00:10:56 +0100 Subject: [PATCH] Added support for local files for image analysis --- README.md | 20 ++++++++-- io-package.json | 5 ++- main.js | 94 ++++++++++++++++++++++++++++++++++------------- package-lock.json | 9 ++--- package.json | 11 +++--- 5 files changed, 99 insertions(+), 40 deletions(-) diff --git a/README.md b/README.md index dce740b..869d828 100644 --- a/README.md +++ b/README.md @@ -120,8 +120,8 @@ Each tool appears in the ioBroker object tree. Use `Tools.$YourToolName.text_req #### Vision/Image Requests -If you have enabled vision/image requests, you can use `Tools.$YourToolName.image_url` to set an image URL for the tool to analyze. The image will be send when you set the state of the `Tools.$YourToolName.text_request` datapoint. -Note: As the image gets converted to a base64 string you can also use a local file path that ioBroker can access. +If you have enabled vision/image requests, you can use `Tools.$YourToolName.image_url` to set an image URL or local file path for the tool to analyze. The image will be send when you set the state of the `Tools.$YourToolName.text_request` datapoint. +Note: As the image gets converted to a base64 string you can also use a local url or file path that ioBroker can access. ### Script Integration (`sendTo`) @@ -136,7 +136,7 @@ sendTo('ai-toolbox.0', 'tool_request', { console.info(result); // Outputs the tool's response as text string }); ``` -#### Vision/Image Requests +#### Vision/Image Requests with URL ```javascript sendTo('ai-toolbox.0', 'tool_request', { 'tool': 'YOUR-TOOL-NAME', @@ -147,6 +147,17 @@ sendTo('ai-toolbox.0', 'tool_request', { }); ``` +#### Vision/Image Requests with Local File +```javascript +sendTo('ai-toolbox.0', 'tool_request', { + 'tool': 'YOUR-TOOL-NAME', + 'text': 'The message for the tool to respond to', + 'image_url': '/opt/iobroker/iobroker-data/telegram_0/photo/2024-12-26_22-20-20_high.jpg', +}, async (result) => { + console.info(result); // Outputs the tool's response as text string +}); +``` + ## Using Models without Tools ### Object Interaction @@ -528,6 +539,9 @@ Set the log level to `debug` in the ioBroker admin interface for detailed logs. This project is licensed under the MIT License. See the [LICENSE](LICENSE) file for details. ## Changelog +**0.0.6** - 2024-26-12 (ToGe3688) +* Added support for local files for image analysis + **0.0.4** - 2024-26-12 (ToGe3688) * Added vision capabilities for tools * Improved admin ui diff --git a/io-package.json b/io-package.json index e7d37ed..638cdcc 100644 --- a/io-package.json +++ b/io-package.json @@ -1,8 +1,11 @@ { "common": { "name": "ai-toolbox", - "version": "0.0.4", + "version": "0.0.6", "news": { + "0.0.6": { + "en": "Added local file import for vision / image request" + }, "0.0.4": { "en": "Added vision/image capability for tools, improved admin UI for tools" }, diff --git a/main.js b/main.js index 7350191..aa563c0 100644 --- a/main.js +++ b/main.js @@ -4,6 +4,8 @@ * Created with @iobroker/create-adapter v2.6.5 */ const utils = require("@iobroker/adapter-core"); +const fs = require("fs"); +const mime = require("mime-types"); const AnthropicAiProvider = require("./lib/anthropic-ai-provider"); const OpenAiProvider = require("./lib/openai-ai-provider"); const PerplexityAiProvider = require("./lib/perplexity-ai-provider"); @@ -1020,32 +1022,72 @@ class AiToolbox extends utils.Adapter { this.log.warn("Empty or invalid URL for image fetch"); return responseObject; } - const response = await fetch(url); - if (!response.ok) { - this.log.warn("Failed to fetch image from " + url + " with status: " + response.status); - return responseObject; - } - const mimeType = response.headers.get("content-type"); - if (!mimeType || !mimeType.includes("image")) { - this.log.warn("Response from " + url + " is not an image, mimeType: " + mimeType); - return responseObject; - } - const buffer = await response.arrayBuffer(); - if (!buffer) { - this.log.warn("Failed to fetch image from " + url + " as array buffer"); - return responseObject; - } - const base64 = btoa(String.fromCharCode(...new Uint8Array(buffer))); - if (!base64) { - this.log.warn("Failed to fetch image from " + url + " as base64"); - return responseObject; - } - if (mimeType && base64) { - this.log.info("Fetched image from " + url + " as base64, mimeType: " + mimeType); - responseObject.mimeType = mimeType; - responseObject.base64 = base64; - responseObject.base64withMime = `data:${mimeType};base64,${base64}`; - responseObject.success = true; + + // Check if url or local file + if (url.startsWith("https://") || url.startsWith("http://")) { + this.log.debug("Fetching image from URL: " + url); + try { + const response = await fetch(url); + if (!response.ok) { + this.log.warn("Failed to fetch image from " + url + " with status: " + response.status); + return responseObject; + } + const mimeType = response.headers.get("content-type"); + if (!mimeType || !mimeType.includes("image")) { + this.log.warn("Response from " + url + " is not an image, mimeType: " + mimeType); + return responseObject; + } + const buffer = await response.arrayBuffer(); + if (!buffer) { + this.log.warn("Failed to fetch image from " + url + " as array buffer"); + return responseObject; + } + const base64 = btoa(String.fromCharCode(...new Uint8Array(buffer))); + if (!base64) { + this.log.warn("Failed to fetch image from " + url + " as base64"); + return responseObject; + } + if (mimeType && base64) { + this.log.info("Fetched image from " + url + " as base64, mimeType: " + mimeType); + responseObject.mimeType = mimeType; + responseObject.base64 = base64; + responseObject.base64withMime = `data:${mimeType};base64,${base64}`; + responseObject.success = true; + } + } catch (e) { + this.log.error("Failed to fetch image from " + url + " with error: " + e); + } + + } else { + + // Read file from filesystem + try { + this.log.debug("Reading image file: " + url); + const file = fs.readFileSync(url); + if (!file) { + this.log.warn("Failed to read image file from " + url); + return responseObject; + } + const mimeType = mime.lookup(url); + if (!mimeType || !mimeType.includes("image")) { + this.log.warn("Response from " + url + " is not an image, mimeType: " + mimeType); + return responseObject; + } + const base64 = Buffer.from(file).toString("base64"); + if (!base64) { + this.log.warn("Failed to read image file from " + url + " as base64"); + return responseObject; + } + if (mimeType && base64) { + this.log.info("Read image file from " + url + " as base64, mimeType: " + mimeType); + responseObject.mimeType = mimeType; + responseObject.base64 = base64; + responseObject.base64withMime = `data:${mimeType};base64,${base64}`; + responseObject.success = true; + } + } catch (e) { + this.log.error("Failed to read image file from " + url + " with error: " + e); + } } return responseObject; } diff --git a/package-lock.json b/package-lock.json index d072483..1762b3d 100644 --- a/package-lock.json +++ b/package-lock.json @@ -1,15 +1,16 @@ { "name": "iobroker.ai-toolbox", - "version": "0.0.1", + "version": "0.0.6", "lockfileVersion": 3, "requires": true, "packages": { "": { "name": "iobroker.ai-toolbox", - "version": "0.0.1", + "version": "0.0.6", "license": "MIT", "dependencies": { - "@iobroker/adapter-core": "^3.2.3" + "@iobroker/adapter-core": "^3.2.3", + "mime-types": "^2.1.35" }, "devDependencies": { "@alcalzone/release-script": "^3.8.0", @@ -3570,7 +3571,6 @@ "version": "1.52.0", "resolved": "https://registry.npmjs.org/mime-db/-/mime-db-1.52.0.tgz", "integrity": "sha512-sPU4uV7dYlvtWJxwwxHD0PuihVNiE7TyAbQ5SWxDCB9mUYvOgroQOwYQQOKPJ8CIbE+1ETVlOoK1UC2nU3gYvg==", - "dev": true, "engines": { "node": ">= 0.6" } @@ -3579,7 +3579,6 @@ "version": "2.1.35", "resolved": "https://registry.npmjs.org/mime-types/-/mime-types-2.1.35.tgz", "integrity": "sha512-ZDY+bPm5zTTF+YpCrAU9nK0UgICYPT0QtT1NZWFv4s++TNkcgVaT0g6+4R2uI4MjQjzysHB1zxuWL50hzaeXiw==", - "dev": true, "dependencies": { "mime-db": "1.52.0" }, diff --git a/package.json b/package.json index d2bca9c..05f16fd 100644 --- a/package.json +++ b/package.json @@ -1,6 +1,6 @@ { "name": "iobroker.ai-toolbox", - "version": "0.0.4", + "version": "0.0.6", "description": "The ioBroker AI Toolbox Adapter is a powerful integration that enables users to create and manage custom AI tools within their ioBroker smart home environment. This versatile adapter supports multiple Large Language Models (LLMs) and provides a flexible framework for AI-based automation and interaction.", "author": { "name": "ToGe3688", @@ -34,7 +34,8 @@ "node": ">= 16" }, "dependencies": { - "@iobroker/adapter-core": "^3.2.3" + "@iobroker/adapter-core": "^3.2.3", + "mime-types": "^2.1.35" }, "devDependencies": { "@alcalzone/release-script": "^3.8.0", @@ -44,15 +45,15 @@ "@iobroker/adapter-dev": "^1.3.0", "@iobroker/testing": "^5.0.0", "@tsconfig/node20": "^20.1.4", - "@types/chai-as-promised": "^7.1.8", "@types/chai": "^4.3.20", + "@types/chai-as-promised": "^7.1.8", "@types/mocha": "^10.0.10", "@types/node": "^20.17.9", "@types/proxyquire": "^1.3.31", "@types/sinon": "^17.0.3", "@types/sinon-chai": "^3.2.12", - "chai-as-promised": "^7.1.2", "chai": "^4.5.0", + "chai-as-promised": "^7.1.2", "eslint": "^8.57.1", "mocha": "^11.0.1", "proxyquire": "^2.1.3", @@ -84,4 +85,4 @@ "url": "https://github.com/ToGe3688/ioBroker.ai-toolbox/issues" }, "readmeFilename": "README.md" -} \ No newline at end of file +}