diff --git a/OpenVINO/.gitignore b/OpenVINO/.gitignore new file mode 100644 index 00000000..a9f70133 --- /dev/null +++ b/OpenVINO/.gitignore @@ -0,0 +1,14 @@ +.vscode/ +__pycache__/ +models/llm/ +temp/ +test/ +dist/ +build/ +cache/ +test/ +env/ + +!tools/*.exe +llm_cache/ +TinyLlama-* \ No newline at end of file diff --git a/OpenVINO/model_config.py b/OpenVINO/model_config.py new file mode 100644 index 00000000..958c323f --- /dev/null +++ b/OpenVINO/model_config.py @@ -0,0 +1,4 @@ +openVINOConfig = { + "openvino": "../service/models/llm/openvino", +} + diff --git a/OpenVINO/openvino_adapter.py b/OpenVINO/openvino_adapter.py new file mode 100644 index 00000000..ce2d3733 --- /dev/null +++ b/OpenVINO/openvino_adapter.py @@ -0,0 +1,171 @@ +import threading +from queue import Empty, Queue +import json +import traceback +from typing import Dict, List, Callable +#from model_downloader import NotEnoughDiskSpaceException, DownloadException +#from psutil._common import bytes2human +from openvino_interface import LLMInterface +from openvino_params import LLMParams + + +RAG_PROMPT_FORMAT = "Answer the questions based on the information below. \n{context}\n\nQuestion: {prompt}" + +class LLM_SSE_Adapter: + msg_queue: Queue + finish: bool + singal: threading.Event + llm_interface: LLMInterface + should_stop: bool + + def __init__(self, llm_interface: LLMInterface): + self.msg_queue = Queue(-1) + self.finish = False + self.singal = threading.Event() + self.llm_interface = llm_interface + self.should_stop = False + + def put_msg(self, data): + self.msg_queue.put_nowait(data) + self.singal.set() + + def load_model_callback(self, event: str): + data = {"type": "load_model", "event": event} + self.put_msg(data) + + def text_in_callback(self, msg: str): + data = {"type": "text_in", "value": msg} + self.put_msg(data) + + def text_out_callback(self, msg: str, type=1): + data = {"type": "text_out", "value": msg, "dtype": type} + self.put_msg(data) + + def first_latency_callback(self, first_latency: str): + data = {"type": "first_token_latency", "value": first_latency} + self.put_msg(data) + + def after_latency_callback(self, after_latency: str): + data = {"type": "after_token_latency", "value": after_latency} + self.put_msg(data) + + def sr_latency_callback(self, sr_latency: str): + data = {"type": "sr_latency", "value": sr_latency} + self.put_msg(data) + + def error_callback(self, ex: Exception): + if ( + isinstance(ex, NotImplementedError) + and ex.__str__() == "Access to repositories lists is not implemented." + ): + self.put_msg( + { + "type": "error", + "err_type": "repositories_not_found", + } + ) + # elif isinstance(ex, NotEnoughDiskSpaceException): + # self.put_msg( + # { + # "type": "error", + # "err_type": "not_enough_disk_space", + # "need": bytes2human(ex.requires_space), + # "free": bytes2human(ex.free_space), + # } + # ) + # elif isinstance(ex, DownloadException): + # self.put_msg({"type": "error", "err_type": "download_exception"}) + # # elif isinstance(ex, llm_biz.StopGenerateException): + # # pass + elif isinstance(ex, RuntimeError): + self.put_msg({"type": "error", "err_type": "runtime_error"}) + else: + self.put_msg({"type": "error", "err_type": "unknow_exception"}) + print(f"exception:{str(ex)}") + + def text_conversation(self, params: LLMParams): + thread = threading.Thread( + target=self.text_conversation_run, + args=[params], + ) + thread.start() + return self.generator() + + + def stream_function(self, stream): + for output in stream: + if self.llm_interface.stop_generate: + self.llm_interface.stop_generate = False + break + + self.text_out_callback(output) + self.put_msg({"type": "finish"}) + + def text_conversation_run( + self, + params: LLMParams, + ): + try: + self.llm_interface.load_model(params, callback=self.load_model_callback) + + prompt = params.prompt + full_prompt = convert_prompt(prompt) + self.llm_interface.create_chat_completion(full_prompt, self.text_out_callback) + + except Exception as ex: + traceback.print_exc() + self.error_callback(ex) + finally: + self.finish = True + self.singal.set() + + def generator(self): + while True: + while not self.msg_queue.empty(): + try: + data = self.msg_queue.get_nowait() + msg = f"data:{json.dumps(data)}\0" + print(msg) + yield msg + except Empty(Exception): + break + if not self.finish: + self.singal.clear() + self.singal.wait() + else: + break + + +_default_prompt = { + "role": "system", + "content": "You are a helpful digital assistant. Please provide safe, ethical and accurate information to the user. Please keep the output text language the same as the user input.", + } + +def convert_prompt(prompt: List[Dict[str, str]]): + chat_history = [_default_prompt] + prompt_len = prompt.__len__() + i = 0 + while i < prompt_len: + chat_history.append({"role": "user", "content": prompt[i].get("question")}) + if i < prompt_len - 1: + chat_history.append( + {"role": "assistant", "content": prompt[i].get("answer")} + ) + i = i + 1 + return chat_history + + +def process_rag( + prompt: str, + device: str, + text_out_callback: Callable[[str, int], None] = None, + ): + import rag + rag.to(device) + query_success, context, rag_source = rag.query(prompt) + if query_success: + print("rag query input\r\n{}output:\r\n{}".format(prompt, context)) + prompt = RAG_PROMPT_FORMAT.format(prompt=prompt, context=context) + if text_out_callback is not None: + text_out_callback(rag_source, 2) + return prompt \ No newline at end of file diff --git a/OpenVINO/openvino_backend.py b/OpenVINO/openvino_backend.py new file mode 100644 index 00000000..1ca211b8 --- /dev/null +++ b/OpenVINO/openvino_backend.py @@ -0,0 +1,57 @@ +from typing import Dict, List, Callable +from os import path +from openvino_interface import LLMInterface +import sys +import openvino_genai +from openvino_params import LLMParams +import model_config +import gc + +class OpenVino(LLMInterface): + def __init__(self): + self._model = None + self.stop_generate = False + self._last_repo_id = None + + def load_model(self, params: LLMParams, callback: Callable[[str], None] = None): + model_repo_id = params.model_repo_id + if self._model is None or self._last_repo_id != model_repo_id: + if callback is not None: + callback("start") + self.unload_model() + + #model_base_path = model_config.openVINOConfig.get("openvino") + #namespace, repo, *model = model_repo_id.split("/") + #model_path = path.abspath(path.join(model_base_path,"---".join([namespace, repo]), "---".join(model))) + model_path = r".\TinyLlama-1.1B-Chat-v1.0" + + print(params.model_repo_id) + enable_compile_cache = dict() + enable_compile_cache["CACHE_DIR"] = "llm_cache" + self._model = openvino_genai.LLMPipeline(model_path, "GPU", **enable_compile_cache) + + self._tokenizer = self._model.get_tokenizer() + + self._last_repo_id = model_repo_id + if callback is not None: + callback("finish") + + self.config = openvino_genai.GenerationConfig() + self.config.max_new_tokens = 100 + print("Model loaded") + + def create_chat_completion(self, messages: List[Dict[str, str]], streamer: Callable[[str], None]): + tokenized_input = self._tokenizer.apply_chat_template(messages, add_generation_prompt=True) + print(tokenized_input) + return self._model.generate(tokenized_input, self.config, streamer) + + + def unload_model(self): + if self._model is not None: + #self._model.close() + del self._model + gc.collect() + self._model = None + + def get_backend_type(self): + return "openvino" \ No newline at end of file diff --git a/OpenVINO/openvino_interface.py b/OpenVINO/openvino_interface.py new file mode 100644 index 00000000..1743fe75 --- /dev/null +++ b/OpenVINO/openvino_interface.py @@ -0,0 +1,24 @@ +from abc import ABC, abstractmethod +from typing import Dict, List, Optional +from openvino_params import LLMParams + +class LLMInterface(ABC): + stop_generate: bool + _model: Optional[object] + + @abstractmethod + def load_model(self, params: LLMParams, **kwargs): + pass + + @abstractmethod + def unload_model(self): + pass + + @abstractmethod + def create_chat_completion(self, messages: List[Dict[str, str]]): + pass + + @abstractmethod + def get_backend_type(self): + pass + diff --git a/OpenVINO/openvino_params.py b/OpenVINO/openvino_params.py new file mode 100644 index 00000000..e06e1fe8 --- /dev/null +++ b/OpenVINO/openvino_params.py @@ -0,0 +1,15 @@ +from typing import Dict, List + +class LLMParams: + prompt: List[Dict[str, str]] + device: int + enable_rag: bool + model_repo_id: str + + def __init__( + self, prompt: list, device: int, enable_rag: bool, model_repo_id: str + ) -> None: + self.prompt = prompt + self.device = device + self.enable_rag = enable_rag + self.model_repo_id = model_repo_id \ No newline at end of file diff --git a/OpenVINO/openvino_rag.py b/OpenVINO/openvino_rag.py new file mode 100644 index 00000000..8e5918c5 --- /dev/null +++ b/OpenVINO/openvino_rag.py @@ -0,0 +1,190 @@ +import gc +import json +import os +import time +from typing import Any, List, Dict + +from langchain.text_splitter import RecursiveCharacterTextSplitter +from langchain.embeddings import LlamaCppEmbeddings +from langchain_community.document_loaders.markdown import UnstructuredMarkdownLoader +from langchain_community.document_loaders.pdf import PyPDFLoader +from langchain_community.document_loaders.text import TextLoader +from langchain_community.document_loaders.word_document import ( + UnstructuredWordDocumentLoader, + Docx2txtLoader, +) +from langchain_community.vectorstores.faiss import FAISS, Document + +#### CONFIGURATIONS ------------------------------------------------------------------------------------------------------------------------ +INDEX_DATABASE_PATH = "./db/" # Faiss database folder +CHUNK_SIZE = 1600 # Chunk size for text splitter +CHUNK_OVERLAP = 400 # Chunk overlap for text splitter +INDEX_NUM = 2 # Number of content pieces to retrieve +MAX_NEW_TOKENS = 320 # Max length of LLM output + + +# Embedding model class - create a wrapper for embedding model +class EmbeddingWrapper: + def __init__(self, model_path: str): + start = time.time() + print(f"******* loading {model_path} start ") + self.model = LlamaCppEmbeddings(model_path=model_path) + print( + "******* loading {} finish. cost {:3f}s".format( + model_path, time.time() - start + ) + ) + + def embed_documents(self, texts: List[str]) -> List[List[float]]: + t0 = time.time() + embeddings = self.model.embed_documents(texts) + t1 = time.time() + print("-----------LlamaCpp--embedding cost time(s): ", t1 - t0) + return embeddings + + def embed_query(self, text: str) -> List[float]: + return self.model.embed_query(text) + + +# Faiss database - manage embeddings and file indexing +class EmbeddingDatabase: + db: FAISS + embeddings: EmbeddingWrapper + text_splitter: RecursiveCharacterTextSplitter + index_list: List[Dict[str, Any]] + + def __init__(self, embeddings: EmbeddingWrapper): + self.embeddings = embeddings + index_cache = os.path.join(INDEX_DATABASE_PATH, "index.faiss") + self.db = ( + FAISS.load_local(INDEX_DATABASE_PATH, self.embeddings) + if os.path.exists(index_cache) + else None + ) + index_json = os.path.join(INDEX_DATABASE_PATH, "index.json") + self.index_list = ( + self.__load_exists_index(index_json) + if os.path.exists(index_json) + else list() + ) + self.text_splitter = RecursiveCharacterTextSplitter( + chunk_size=CHUNK_SIZE, chunk_overlap=CHUNK_OVERLAP, length_function=len + ) + + def __load_exists_index(self, index_json: str): + try: + with open(index_json, "r") as f: + return json.load(f) + except Exception as e: + print(f"load index.json error: {e}") + return list() + + def __save_index(self, file_base_name: str, md5: str, doc_ids: str): + self.index_list.append({"name": file_base_name, "md5": md5, "doc_ids": doc_ids}) + if not os.path.exists(INDEX_DATABASE_PATH): + os.makedirs(INDEX_DATABASE_PATH) + index_json = os.path.join(INDEX_DATABASE_PATH, "index.json") + with open(index_json, "w") as f: + json.dump(self.index_list, f) + self.db.save_local(INDEX_DATABASE_PATH) + + def __add_documents(self, file_base_name: str, docs: List[Document], md5: str): + if self.db is None: + self.db = FAISS.from_documents(docs, self.embeddings) + else: + self.db.add_documents(docs) + print(docs[0].metadata) + self.__save_index(file_base_name, md5, [doc.metadata["doc_id"] for doc in docs]) + + def __analyze_file_to_db(self, file: str, md5: str): + file_base_name = os.path.basename(file) + file_ext = os.path.splitext(file_base_name)[1].lower() + + if file_ext == ".txt": + raw_documents = TextLoader(file, encoding="utf-8").load() + elif file_ext == ".pdf": + raw_documents = PyPDFLoader(file).load() + elif file_ext == ".doc": + raw_documents = UnstructuredWordDocumentLoader(file).load() + elif file_ext == ".docx": + raw_documents = Docx2txtLoader(file).load() + elif file_ext == ".md": + raw_documents = UnstructuredMarkdownLoader(file).load() + else: + raise Exception(f"Unsupported file extension {file_ext}") + + docs = self.text_splitter.split_documents(raw_documents) + if docs: + print("Analyze {} got {} index files.".format(file_base_name, len(docs))) + self.__add_documents(file_base_name, docs, md5) + else: + raise Exception(f"Cannot analyze {file_base_name}") + + def add_index_file(self, file: str): + md5 = self.__calculate_md5(file) + for item in self.index_list: + if item["md5"] == md5: + print(f"{os.path.basename(file)} already indexed.") + return 1, md5 + + self.__analyze_file_to_db(file, md5) + return 0, md5 + + def query_database(self, query: str): + if not query: + raise Exception("Query cannot be None or empty") + + print("******* Querying database...") + if self.db is None: + return False, None, None + + docs = self.db.similarity_search_with_relevance_scores( + query, k=INDEX_NUM, score_threshold=0.4 + ) + if not docs: + return False, None, None + + doc_contents = [doc.page_content for doc, _ in docs] + source_files = {doc.metadata["source"] for doc, _ in docs} + return True, "\n\n".join(doc_contents), "\n".join(source_files) + + def __calculate_md5(self, file_path: str) -> str: + import hashlib + + hasher = hashlib.md5() + with open(file_path, "rb") as f: + buf = f.read() + hasher.update(buf) + return hasher.hexdigest() + + +def init(model_path: str): + global embedding_database, embedding_wrapper + embedding_wrapper = EmbeddingWrapper(model_path=model_path) + embedding_database = EmbeddingDatabase(embedding_wrapper) + + +def add_index_file(file: str): + return embedding_database.add_index_file(file) + + +def query(query: str): + return embedding_database.query_database(query) + + +def dispose(): + global embedding_database, embedding_wrapper + embedding_database = None + embedding_wrapper = None + gc.collect() + + +if __name__ == "__main__": + # Example Usage + init(model_path="/Users/daniel/silicon/AI-Playground/LlamaCPP/models/llm/gguf/bge-large-en-v1.5-q8_0.gguf") + add_index_file("/Users/daniel/silicon/AI-Playground/hello.txt") + success, context, source = query("What is the content about?") + print("Query success:", success) + print("Context:", context) + print("Source Files:", source) + dispose() diff --git a/OpenVINO/openvino_test.py b/OpenVINO/openvino_test.py new file mode 100644 index 00000000..1c5db2ce --- /dev/null +++ b/OpenVINO/openvino_test.py @@ -0,0 +1,26 @@ +import requests + + +url = "http://127.0.0.1:29000/api/llm/chat" +params = { + "prompt": [{"question": "Your name is Luca", "answer": "My name is Luca."}, {"question": "What is your name?"}], + "device": "", + "enable_rag": False, + "model_repo_id": "meta-llama-3.1-8b-instruct-q5_k_m.gguf", +} +response = requests.post(url, json=params, stream=True) +# Check if the response status code is 200 (OK) +response.raise_for_status() +e = 1 +# Iterate over the response lines +for line in response.iter_lines(): + e += 1 + if line: + # Decode the line (assuming UTF-8 encoding) + decoded_line = line.decode('utf-8') + + # SSE events typically start with "data: " + if decoded_line.startswith("data:"): + # Extract the data part + data = decoded_line[len("data:"):] + print(data) # Process the data as needed \ No newline at end of file diff --git a/OpenVINO/openvino_web_api.py b/OpenVINO/openvino_web_api.py new file mode 100644 index 00000000..ff9df685 --- /dev/null +++ b/OpenVINO/openvino_web_api.py @@ -0,0 +1,49 @@ +import os +os.environ['PATH'] = os.path.abspath('../openvino-env/Library/bin') + os.pathsep + os.environ['PATH'] +import sys +current_folder = os.path.dirname(os.path.abspath(__file__)) +sys.path.append(current_folder) +from apiflask import APIFlask +from flask import jsonify, request, Response, stream_with_context +from openvino_adapter import LLM_SSE_Adapter +from openvino_backend import OpenVino +from openvino_params import LLMParams + +app = APIFlask(__name__) +llm_backend = OpenVino() + + +@app.get("/health") +def health(): + return jsonify({"code": 0, "message": "success"}) + + +@app.post("/api/llm/chat") +def llm_chat(): + params = request.get_json() + params.pop("print_metrics", None) + llm_params = LLMParams(**params) + sse_invoker = LLM_SSE_Adapter(llm_backend) + it = sse_invoker.text_conversation(llm_params) + return Response(stream_with_context(it), content_type="text/event-stream") + + +@app.post("/api/free") +def free(): + llm_backend.unload_model() + return jsonify({"code": 0, "message": "success"}) + + +@app.get("/api/llm/stopGenerate") +def stop_llm_generate(): + llm_backend.stop_generate = True + return jsonify({"code": 0, "message": "success"}) + + +if __name__ == "__main__": + import argparse + + parser = argparse.ArgumentParser(description="AI Playground Web service") + parser.add_argument("--port", type=int, default=59997, help="Service listen port") + args = parser.parse_args() + app.run(host="127.0.0.1", port=args.port, use_reloader=False) diff --git a/OpenVINO/requirements.txt b/OpenVINO/requirements.txt new file mode 100644 index 00000000..031646de --- /dev/null +++ b/OpenVINO/requirements.txt @@ -0,0 +1,4 @@ +Flask==3.0.3 +apiflask==2.3.0 +optimum-intel[openvino] +openvino-genai \ No newline at end of file diff --git a/WebUI/.gitignore b/WebUI/.gitignore index 00fddc80..ef23ebb0 100644 --- a/WebUI/.gitignore +++ b/WebUI/.gitignore @@ -14,6 +14,7 @@ dist-ssr release/ ComfyUI/ LlamaCPP/ +OpenVino/ # Editor directories and files .vscode/* diff --git a/WebUI/electron/subprocesses/apiServiceRegistry.ts b/WebUI/electron/subprocesses/apiServiceRegistry.ts index d3802ec6..3bee778a 100644 --- a/WebUI/electron/subprocesses/apiServiceRegistry.ts +++ b/WebUI/electron/subprocesses/apiServiceRegistry.ts @@ -5,6 +5,7 @@ import { BrowserWindow } from "electron"; import { appLoggerInstance } from "../logging/logger.ts"; import getPort, {portNumbers} from "get-port"; import { LlamaCppBackendService} from "./llamaCppBackendService.ts"; +import { OpenVINOBackendService } from "./openVINOBackendService.ts"; export type backend = 'ai-backend' | 'comfyui-backend' @@ -76,6 +77,7 @@ export async function aiplaygroundApiServiceRegistry(win: BrowserWindow, setting instance.register(new AiBackendService('ai-backend', await getPort({port: portNumbers(59000, 59999)}), win, settings)) instance.register(new ComfyUiBackendService('comfyui-backend', await getPort({port: portNumbers(49000, 49999)}), win, settings)) instance.register(new LlamaCppBackendService('llamacpp-backend', await getPort({port: portNumbers(39000, 39999)}), win, settings)) + instance.register(new OpenVINOBackendService('openvino-backend', await getPort({port: portNumbers(29000, 29999)}), win, settings)) } return instance } diff --git a/WebUI/electron/subprocesses/openVINOBackendService.ts b/WebUI/electron/subprocesses/openVINOBackendService.ts new file mode 100644 index 00000000..83da284e --- /dev/null +++ b/WebUI/electron/subprocesses/openVINOBackendService.ts @@ -0,0 +1,87 @@ +import {app} from "electron"; +import {ChildProcess, spawn} from "node:child_process"; +import path from "node:path"; +import * as filesystem from 'fs-extra' +import {existingFileOrError} from './osProcessHelper.ts' +import { LsLevelZeroService, UvPipService, LongLivedPythonApiService } from "./service.ts"; + +export class OpenVINOBackendService extends LongLivedPythonApiService { + readonly serviceDir = path.resolve(path.join(this.baseDir, "OpenVINO")); + readonly pythonEnvDir = path.resolve(path.join(this.baseDir, `openvino-env`)); + // using ls_level_zero from default ai-backend env to avoid oneAPI dep conflicts + readonly lsLevelZeroDir = path.resolve(path.join(this.baseDir, "ai-backend-env")); + readonly isRequired = false; + + healthEndpointUrl = `${this.baseUrl}/health` + + readonly lsLevelZero = new LsLevelZeroService(this.lsLevelZeroDir); + readonly uvPip = new UvPipService(this.pythonEnvDir); + readonly python = this.uvPip.python; + + serviceIsSetUp(): boolean { + return filesystem.existsSync(this.python.getExePath()); + } + + isSetUp = this.serviceIsSetUp(); + + async *set_up(): AsyncIterable { + this.setStatus('installing') + this.appLogger.info("setting up service", this.name) + const self = this + + try { + yield {serviceName: self.name, step: "start", status: "executing", debugMessage: "starting to set up python environment"}; + await this.lsLevelZero.ensureInstalled(); + await this.uvPip.ensureInstalled(); + + const deviceArch = await self.lsLevelZero.detectDevice(); + yield {serviceName: self.name, step: `Detecting intel device`, status: "executing", debugMessage: `detected intel hardware ${deviceArch}`}; + + yield {serviceName: self.name, step: `install dependencies`, status: "executing", debugMessage: `installing dependencies`}; + const commonRequirements = existingFileOrError(path.join(self.serviceDir, 'requirements.txt')) + await this.uvPip.run(["install", "-r", commonRequirements]); + yield {serviceName: self.name, step: `install dependencies`, status: "executing", debugMessage: `dependencies installed`}; + + this.setStatus('notYetStarted') + yield {serviceName: self.name, step: "end", status: "success", debugMessage: `service set up completely`}; + } catch (e) { + self.appLogger.warn(`Set up of service failed due to ${e}`, self.name, true) + self.appLogger.warn(`Aborting set up of ${self.name} service environment`, self.name, true) + this.setStatus('installationFailed') + yield {serviceName: self.name, step: "end", status: "failed", debugMessage: `Failed to setup python environment due to ${e}`}; + } + } + + async spawnAPIProcess(): Promise<{ process: ChildProcess; didProcessExitEarlyTracker: Promise; }> { + const additionalEnvVariables = { + "SYCL_ENABLE_DEFAULT_CONTEXTS": "1", + "SYCL_CACHE_PERSISTENT": "1", + "PYTHONIOENCODING": "utf-8", + ...await this.lsLevelZero.getDeviceSelectorEnv(), + }; + + const apiProcess = spawn(this.python.getExePath(), ["openvino_web_api.py", "--port", this.port.toString()], { + cwd: this.serviceDir, + windowsHide: true, + env: Object.assign(process.env, additionalEnvVariables) + }); + + //must be at the same tick as the spawn function call + //otherwise we cannot really track errors given the nature of spawn() with a longlived process + const didProcessExitEarlyTracker = new Promise((resolve, reject) => { + apiProcess.on('error', (error) => { + this.appLogger.error(`encountered error of process in ${this.name} : ${error}`, this.name) + resolve(true); + }); + apiProcess.on('exit', () => { + this.appLogger.error(`encountered unexpected exit in ${this.name}.`, this.name) + resolve(true); + }); + }); + + return { + process: apiProcess, + didProcessExitEarlyTracker: didProcessExitEarlyTracker, + } + } +} diff --git a/WebUI/src/assets/js/const.ts b/WebUI/src/assets/js/const.ts index 2d7b5e90..486a166c 100644 --- a/WebUI/src/assets/js/const.ts +++ b/WebUI/src/assets/js/const.ts @@ -8,6 +8,7 @@ export module Const { export const MODEL_TYPE_INPAINT = 6; export const MODEL_TYPE_PREVIEW = 7; export const MODEL_TYPE_LLAMA_CPP = 8; + export const MODEL_TYPE_OPENVINO = 9; export const MODEL_TYPE_COMFY_UNET = 100; export const MODEL_TYPE_COMFY_CLIP = 101; export const MODEL_TYPE_COMFY_VAE = 102; diff --git a/WebUI/src/assets/js/store/backendServices.ts b/WebUI/src/assets/js/store/backendServices.ts index 122844d7..d86ae35d 100644 --- a/WebUI/src/assets/js/store/backendServices.ts +++ b/WebUI/src/assets/js/store/backendServices.ts @@ -7,6 +7,7 @@ export const useBackendServices = defineStore("backendServices", () => { ["ai-backend", new BackendServiceSetupProgressListener("ai-backend")], ["comfyui-backend", new BackendServiceSetupProgressListener("comfyui-backend")], ["llamacpp-backend", new BackendServiceSetupProgressListener("llamacpp-backend")], + ["openvino-backend", new BackendServiceSetupProgressListener("openvino-backend")], ]); window.electronAPI.getServices().catch(async (reason: any) => { diff --git a/WebUI/src/assets/js/store/globalSetup.ts b/WebUI/src/assets/js/store/globalSetup.ts index 33946931..990ef657 100644 --- a/WebUI/src/assets/js/store/globalSetup.ts +++ b/WebUI/src/assets/js/store/globalSetup.ts @@ -34,6 +34,7 @@ export const useGlobalSetup = defineStore("globalSetup", () => { enableRag: false, llm_model: "microsoft/Phi-3-mini-4k-instruct", ggufLLM_model: "meta-llama-3.1-8b-instruct.Q5_K_M.gguf", + openvino_model: "TinyLlama-1.1B-Chat-v1.0", sd_model: "Lykon/dreamshaper-8", inpaint_model: "Lykon/dreamshaper-8-inpainting", negativePrompt: "bad hands, nsfw", @@ -83,6 +84,7 @@ export const useGlobalSetup = defineStore("globalSetup", () => { async function initSetup() { const setupData = await window.electronAPI.getInitSetting(); const apiServiceInformation = await window.electronAPI.getServices() + console.log("apiServiceInformation", apiServiceInformation) paths.value = setupData.modelPaths; models.value = setupData.modelLists; models.value.inpaint.push(useI18N().state.ENHANCE_INPAINT_USE_IMAGE_MODEL); diff --git a/WebUI/src/assets/js/store/textInference.ts b/WebUI/src/assets/js/store/textInference.ts index 9afc2aba..28214d61 100644 --- a/WebUI/src/assets/js/store/textInference.ts +++ b/WebUI/src/assets/js/store/textInference.ts @@ -3,13 +3,14 @@ import { useGlobalSetup } from "./globalSetup"; import { z } from "zod"; import { useBackendServices } from "./backendServices"; -export const backendTypes = ['IPEX-LLM', 'LLAMA.CPP'] as const; +export const backendTypes = ['IPEX-LLM', 'LLAMA.CPP', 'OpenVINO'] as const; const backend = z.enum(backendTypes); export type Backend = z.infer; const backendModelKey = { 'IPEX-LLM': 'llm_model', 'LLAMA.CPP': 'ggufLLM_model', + 'OpenVINO': 'openvino_model', } export const useTextInference = defineStore("textInference", () => { @@ -19,14 +20,27 @@ export const useTextInference = defineStore("textInference", () => { const activeModel = ref(null); const llamaBackendUrl = computed(() => { const url = backendServices.info.find(item => item.serviceName === "llamacpp-backend")?.baseUrl; - console.log('url', url); + console.log('llama url', url); return url; }); + const openVINOBackendUrl = computed(() => { + const url = backendServices.info.find(item => item.serviceName === "openvino-backend")?.baseUrl; + console.log('openvino url', backendServices.info.find(item => item.serviceName === "openvino-backend")?.baseUrl); + return url; + }); + + watch([llamaBackendUrl], () => { console.log('llamaBackendUrl changed', llamaBackendUrl.value); } ); + + watch([openVINOBackendUrl], () => { + console.log('openVINOBackendUrl changed', openVINOBackendUrl.value); + }, + { immediate: true } + ); watch([activeModel], () => { console.log('activeModel changed', activeModel.value); @@ -37,6 +51,7 @@ export const useTextInference = defineStore("textInference", () => { backend, activeModel, llamaBackendUrl, + openVINOBackendUrl, } }, { persist: { diff --git a/WebUI/src/components/SettingsBasic.vue b/WebUI/src/components/SettingsBasic.vue index a858eece..511d3e12 100644 --- a/WebUI/src/components/SettingsBasic.vue +++ b/WebUI/src/components/SettingsBasic.vue @@ -117,7 +117,8 @@ const theme = useTheme(); const textInferenceBackendDisplayName: Record = { "IPEX-LLM": "IPEX-LLM", - "LLAMA.CPP": "Llama.cpp - GGUF" + "LLAMA.CPP": "Llama.cpp - GGUF", + "OpenVINO": "OpenVINO" } const themeToDisplayName = (theme: Theme) => { @@ -157,6 +158,8 @@ function mapBackendNames(name : Backend) : BackendServiceName | undefined { return "ai-backend" as BackendServiceName } else if(name === "LLAMA.CPP") { return 'llamacpp-backend' as BackendServiceName + } else if (name === "OpenVINO") { + return 'openvino-backend' as BackendServiceName } else { return undefined } diff --git a/WebUI/src/env.d.ts b/WebUI/src/env.d.ts index a5f93d8a..5d43c8c2 100644 --- a/WebUI/src/env.d.ts +++ b/WebUI/src/env.d.ts @@ -317,6 +317,6 @@ type CheckModelAlreadyLoadedResult = { type SDGenerateState = "no_start" | "input_image" | "load_model" | "load_model_components" | "generating" | "image_out" | "error" -type BackendServiceName = "ai-backend" | "comfyui-backend" | "llamacpp-backend" +type BackendServiceName = "ai-backend" | "comfyui-backend" | "llamacpp-backend" | "openvino-backend" type ApiServiceInformation = { serviceName: BackendServiceName, status: BackendStatus , baseUrl: string, port: number, isSetUp: boolean, isRequired: boolean } diff --git a/WebUI/src/lib/utils.ts b/WebUI/src/lib/utils.ts index 9f5db491..526f138f 100644 --- a/WebUI/src/lib/utils.ts +++ b/WebUI/src/lib/utils.ts @@ -57,6 +57,8 @@ export function mapServiceNameToDisplayName(serviceName: string) { return "AI Playground" case "llamacpp-backend": return "Llama.cpp - GGUF" + case "openvino-backend": + return "OpenVINO" default: return serviceName } diff --git a/WebUI/src/views/Answer.vue b/WebUI/src/views/Answer.vue index ff2b8543..1c28b73f 100644 --- a/WebUI/src/views/Answer.vue +++ b/WebUI/src/views/Answer.vue @@ -118,6 +118,15 @@ + + + + @@ -260,7 +269,25 @@ const iconSizeClass = computed(() => iconSizes[fontSizeIndex.value]); const isMaxSize = computed(() => fontSizeIndex.value >= fontSizes.length - 1); const isMinSize = computed(() => fontSizeIndex.value <= 0); const isHistoryVisible = ref(false); -const currentBackendAPI = computed(() => textInference.backend === 'LLAMA.CPP' ? textInference.llamaBackendUrl : globalSetup.apiHost); +const backendMapping = { + 'IPEX-LLM': { service: 'ai-backend', api: globalSetup.apiHost }, + 'LLAMA.CPP': { service: 'llamacpp-backend', api: textInference.llamaBackendUrl }, + 'OpenVINO': { service: 'openvino-backend', api: textInference.openVINOBackendUrl } + }; +const currentBackendAPI = computed(() => { + const backendKey = textInference.backend; + + switch (backendKey) { + case 'IPEX-LLM': + return globalSetup.apiHost; + case 'LLAMA.CPP': + return textInference.llamaBackendUrl; + case 'OpenVINO': + return textInference.openVINOBackendUrl; + default: + throw new Error(`Unknown backend: ${backendKey}`); + } +}); // Keep track of which conversation is receiving the in-progress text const currentlyGeneratingKey = ref(null); @@ -475,13 +502,11 @@ async function checkModel() { }); } - - async function generate(chatContext: ChatItem[]) { if (processing.value || chatContext.length == 0) { return; } try { - const inferenceBackendService: BackendServiceName = textInference.backend === 'IPEX-LLM' ? "ai-backend" : "llamacpp-backend" + const inferenceBackendService: BackendServiceName = backendMapping[textInference.backend].service as BackendServiceName; await globalSetup.resetLastUsedInferenceBackend(inferenceBackendService) globalSetup.updateLastUsedBackend(inferenceBackendService)