diff --git a/OpenVINO/.gitignore b/OpenVINO/.gitignore
new file mode 100644
index 00000000..a9f70133
--- /dev/null
+++ b/OpenVINO/.gitignore
@@ -0,0 +1,14 @@
+.vscode/
+__pycache__/
+models/llm/
+temp/
+test/
+dist/
+build/
+cache/
+test/
+env/
+
+!tools/*.exe
+llm_cache/
+TinyLlama-*
\ No newline at end of file
diff --git a/OpenVINO/model_config.py b/OpenVINO/model_config.py
new file mode 100644
index 00000000..958c323f
--- /dev/null
+++ b/OpenVINO/model_config.py
@@ -0,0 +1,4 @@
+openVINOConfig = {
+    "openvino": "../service/models/llm/openvino",
+}
+
diff --git a/OpenVINO/openvino_adapter.py b/OpenVINO/openvino_adapter.py
new file mode 100644
index 00000000..ce2d3733
--- /dev/null
+++ b/OpenVINO/openvino_adapter.py
@@ -0,0 +1,171 @@
+import threading
+from queue import Empty, Queue
+import json
+import traceback
+from typing import Dict, List, Callable
+#from model_downloader import NotEnoughDiskSpaceException, DownloadException
+#from psutil._common import bytes2human
+from openvino_interface import LLMInterface
+from openvino_params import LLMParams
+
+
+RAG_PROMPT_FORMAT = "Answer the questions based on the information below. \n{context}\n\nQuestion: {prompt}"
+
+class LLM_SSE_Adapter:
+    msg_queue: Queue
+    finish: bool
+    singal: threading.Event
+    llm_interface: LLMInterface
+    should_stop: bool
+
+    def __init__(self, llm_interface: LLMInterface):
+        self.msg_queue = Queue(-1)
+        self.finish = False
+        self.singal = threading.Event()
+        self.llm_interface = llm_interface
+        self.should_stop = False
+
+    def put_msg(self, data):
+        self.msg_queue.put_nowait(data)
+        self.singal.set()
+
+    def load_model_callback(self, event: str):
+        data = {"type": "load_model", "event": event}
+        self.put_msg(data)
+
+    def text_in_callback(self, msg: str):
+        data = {"type": "text_in", "value": msg}
+        self.put_msg(data)
+
+    def text_out_callback(self, msg: str, type=1):
+        data = {"type": "text_out", "value": msg, "dtype": type}
+        self.put_msg(data)
+
+    def first_latency_callback(self, first_latency: str):
+        data = {"type": "first_token_latency", "value": first_latency}
+        self.put_msg(data)
+
+    def after_latency_callback(self, after_latency: str):
+        data = {"type": "after_token_latency", "value": after_latency}
+        self.put_msg(data)
+
+    def sr_latency_callback(self, sr_latency: str):
+        data = {"type": "sr_latency", "value": sr_latency}
+        self.put_msg(data)
+
+    def error_callback(self, ex: Exception):
+        if (
+            isinstance(ex, NotImplementedError)
+            and ex.__str__() == "Access to repositories lists is not implemented."
+        ):
+            self.put_msg(
+                {
+                    "type": "error",
+                    "err_type": "repositories_not_found",
+                }
+            )
+        # elif isinstance(ex, NotEnoughDiskSpaceException):
+        #     self.put_msg(
+        #         {
+        #             "type": "error",
+        #             "err_type": "not_enough_disk_space",
+        #             "need": bytes2human(ex.requires_space),
+        #             "free": bytes2human(ex.free_space),
+        #         }
+        #     )
+        # elif isinstance(ex, DownloadException):
+        #     self.put_msg({"type": "error", "err_type": "download_exception"})
+        # # elif isinstance(ex, llm_biz.StopGenerateException):
+        # #     pass
+        elif isinstance(ex, RuntimeError):
+            self.put_msg({"type": "error", "err_type": "runtime_error"})
+        else:
+            self.put_msg({"type": "error", "err_type": "unknow_exception"})
+        print(f"exception:{str(ex)}")
+
+    def text_conversation(self, params: LLMParams):
+        thread = threading.Thread(
+            target=self.text_conversation_run,
+            args=[params],
+        )
+        thread.start()
+        return self.generator()
+    
+
+    def stream_function(self, stream):
+        for output in stream:
+            if self.llm_interface.stop_generate:
+                self.llm_interface.stop_generate = False
+                break
+            
+            self.text_out_callback(output)
+        self.put_msg({"type": "finish"})
+
+    def text_conversation_run(
+        self,
+        params: LLMParams,
+    ):
+        try:
+            self.llm_interface.load_model(params, callback=self.load_model_callback)
+            
+            prompt = params.prompt
+            full_prompt = convert_prompt(prompt)
+            self.llm_interface.create_chat_completion(full_prompt, self.text_out_callback)
+            
+        except Exception as ex:
+            traceback.print_exc()
+            self.error_callback(ex)
+        finally:
+            self.finish = True
+            self.singal.set()
+
+    def generator(self):
+        while True:
+            while not self.msg_queue.empty():
+                try:
+                    data = self.msg_queue.get_nowait()
+                    msg = f"data:{json.dumps(data)}\0"
+                    print(msg)
+                    yield msg
+                except Empty(Exception):
+                    break
+            if not self.finish:
+                self.singal.clear()
+                self.singal.wait()
+            else:
+                break
+
+
+_default_prompt = {
+        "role": "system",
+        "content": "You are a helpful digital assistant. Please provide safe, ethical and accurate information to the user. Please keep the output text language the same as the user input.",
+    }
+
+def convert_prompt(prompt: List[Dict[str, str]]):
+    chat_history = [_default_prompt]
+    prompt_len = prompt.__len__()
+    i = 0
+    while i < prompt_len:
+        chat_history.append({"role": "user", "content": prompt[i].get("question")})
+        if i < prompt_len - 1:
+            chat_history.append(
+                {"role": "assistant", "content": prompt[i].get("answer")}
+            )
+        i = i + 1
+    return chat_history
+
+
+def process_rag(
+        prompt: str,
+        device: str,
+        text_out_callback: Callable[[str, int], None] = None,
+    ):
+        import rag
+        rag.to(device)
+        query_success, context, rag_source = rag.query(prompt)
+        if query_success:
+            print("rag query input\r\n{}output:\r\n{}".format(prompt, context))
+            prompt = RAG_PROMPT_FORMAT.format(prompt=prompt, context=context)
+            if text_out_callback is not None:
+                text_out_callback(rag_source, 2)
+        return prompt
\ No newline at end of file
diff --git a/OpenVINO/openvino_backend.py b/OpenVINO/openvino_backend.py
new file mode 100644
index 00000000..1ca211b8
--- /dev/null
+++ b/OpenVINO/openvino_backend.py
@@ -0,0 +1,57 @@
+from typing import Dict, List, Callable
+from os import path
+from openvino_interface import LLMInterface
+import sys
+import openvino_genai
+from openvino_params import LLMParams
+import model_config
+import gc
+
+class OpenVino(LLMInterface):
+    def __init__(self):
+        self._model = None
+        self.stop_generate = False
+        self._last_repo_id = None
+
+    def load_model(self, params: LLMParams, callback: Callable[[str], None] = None):
+        model_repo_id = params.model_repo_id
+        if self._model is None or self._last_repo_id != model_repo_id:
+            if callback is not None:
+                callback("start")
+            self.unload_model()
+
+            #model_base_path = model_config.openVINOConfig.get("openvino")
+            #namespace, repo, *model = model_repo_id.split("/")
+            #model_path = path.abspath(path.join(model_base_path,"---".join([namespace, repo]), "---".join(model)))
+            model_path = r".\TinyLlama-1.1B-Chat-v1.0"
+
+            print(params.model_repo_id)
+            enable_compile_cache = dict()
+            enable_compile_cache["CACHE_DIR"] = "llm_cache"
+            self._model = openvino_genai.LLMPipeline(model_path, "GPU", **enable_compile_cache)
+
+            self._tokenizer = self._model.get_tokenizer()
+
+            self._last_repo_id = model_repo_id
+            if callback is not None:
+                callback("finish")
+
+            self.config = openvino_genai.GenerationConfig()
+            self.config.max_new_tokens = 100
+            print("Model loaded")
+
+    def create_chat_completion(self, messages: List[Dict[str, str]], streamer: Callable[[str], None]):
+        tokenized_input = self._tokenizer.apply_chat_template(messages, add_generation_prompt=True)
+        print(tokenized_input)
+        return self._model.generate(tokenized_input, self.config, streamer)
+       
+
+    def unload_model(self):
+        if self._model is not None:
+            #self._model.close()
+            del self._model
+        gc.collect()
+        self._model = None
+
+    def get_backend_type(self):
+        return "openvino"
\ No newline at end of file
diff --git a/OpenVINO/openvino_interface.py b/OpenVINO/openvino_interface.py
new file mode 100644
index 00000000..1743fe75
--- /dev/null
+++ b/OpenVINO/openvino_interface.py
@@ -0,0 +1,24 @@
+from abc import ABC, abstractmethod
+from typing import Dict, List, Optional
+from openvino_params import LLMParams
+
+class LLMInterface(ABC):
+    stop_generate: bool
+    _model: Optional[object]
+
+    @abstractmethod
+    def load_model(self, params: LLMParams, **kwargs):
+        pass
+
+    @abstractmethod
+    def unload_model(self):
+        pass
+
+    @abstractmethod
+    def create_chat_completion(self, messages: List[Dict[str, str]]):
+        pass 
+
+    @abstractmethod
+    def get_backend_type(self):
+        pass
+    
diff --git a/OpenVINO/openvino_params.py b/OpenVINO/openvino_params.py
new file mode 100644
index 00000000..e06e1fe8
--- /dev/null
+++ b/OpenVINO/openvino_params.py
@@ -0,0 +1,15 @@
+from typing import Dict, List
+
+class LLMParams:
+    prompt: List[Dict[str, str]]
+    device: int
+    enable_rag: bool 
+    model_repo_id: str
+
+    def __init__(
+        self, prompt: list, device: int, enable_rag: bool, model_repo_id: str
+    ) -> None:
+        self.prompt = prompt
+        self.device = device
+        self.enable_rag = enable_rag
+        self.model_repo_id = model_repo_id
\ No newline at end of file
diff --git a/OpenVINO/openvino_rag.py b/OpenVINO/openvino_rag.py
new file mode 100644
index 00000000..8e5918c5
--- /dev/null
+++ b/OpenVINO/openvino_rag.py
@@ -0,0 +1,190 @@
+import gc
+import json
+import os
+import time
+from typing import Any, List, Dict
+
+from langchain.text_splitter import RecursiveCharacterTextSplitter
+from langchain.embeddings import LlamaCppEmbeddings
+from langchain_community.document_loaders.markdown import UnstructuredMarkdownLoader
+from langchain_community.document_loaders.pdf import PyPDFLoader
+from langchain_community.document_loaders.text import TextLoader
+from langchain_community.document_loaders.word_document import (
+    UnstructuredWordDocumentLoader,
+    Docx2txtLoader,
+)
+from langchain_community.vectorstores.faiss import FAISS, Document
+
+#### CONFIGURATIONS ------------------------------------------------------------------------------------------------------------------------
+INDEX_DATABASE_PATH = "./db/"  # Faiss database folder
+CHUNK_SIZE = 1600  # Chunk size for text splitter
+CHUNK_OVERLAP = 400  # Chunk overlap for text splitter
+INDEX_NUM = 2  # Number of content pieces to retrieve
+MAX_NEW_TOKENS = 320  # Max length of LLM output
+
+
+# Embedding model class - create a wrapper for embedding model
+class EmbeddingWrapper:
+    def __init__(self, model_path: str):
+        start = time.time()
+        print(f"******* loading {model_path} start ")
+        self.model = LlamaCppEmbeddings(model_path=model_path)
+        print(
+            "******* loading {} finish. cost {:3f}s".format(
+                model_path, time.time() - start
+            )
+        )
+
+    def embed_documents(self, texts: List[str]) -> List[List[float]]:
+        t0 = time.time()
+        embeddings = self.model.embed_documents(texts)
+        t1 = time.time()
+        print("-----------LlamaCpp--embedding cost time(s): ", t1 - t0)
+        return embeddings
+
+    def embed_query(self, text: str) -> List[float]:
+        return self.model.embed_query(text)
+
+
+# Faiss database - manage embeddings and file indexing
+class EmbeddingDatabase:
+    db: FAISS
+    embeddings: EmbeddingWrapper
+    text_splitter: RecursiveCharacterTextSplitter
+    index_list: List[Dict[str, Any]]
+
+    def __init__(self, embeddings: EmbeddingWrapper):
+        self.embeddings = embeddings
+        index_cache = os.path.join(INDEX_DATABASE_PATH, "index.faiss")
+        self.db = (
+            FAISS.load_local(INDEX_DATABASE_PATH, self.embeddings)
+            if os.path.exists(index_cache)
+            else None
+        )
+        index_json = os.path.join(INDEX_DATABASE_PATH, "index.json")
+        self.index_list = (
+            self.__load_exists_index(index_json)
+            if os.path.exists(index_json)
+            else list()
+        )
+        self.text_splitter = RecursiveCharacterTextSplitter(
+            chunk_size=CHUNK_SIZE, chunk_overlap=CHUNK_OVERLAP, length_function=len
+        )
+
+    def __load_exists_index(self, index_json: str):
+        try:
+            with open(index_json, "r") as f:
+                return json.load(f)
+        except Exception as e:
+            print(f"load index.json error: {e}")
+            return list()
+
+    def __save_index(self, file_base_name: str, md5: str, doc_ids: str):
+        self.index_list.append({"name": file_base_name, "md5": md5, "doc_ids": doc_ids})
+        if not os.path.exists(INDEX_DATABASE_PATH):
+            os.makedirs(INDEX_DATABASE_PATH)
+        index_json = os.path.join(INDEX_DATABASE_PATH, "index.json")
+        with open(index_json, "w") as f:
+            json.dump(self.index_list, f)
+        self.db.save_local(INDEX_DATABASE_PATH)
+
+    def __add_documents(self, file_base_name: str, docs: List[Document], md5: str):
+        if self.db is None:
+            self.db = FAISS.from_documents(docs, self.embeddings)
+        else:
+            self.db.add_documents(docs)
+        print(docs[0].metadata)
+        self.__save_index(file_base_name, md5, [doc.metadata["doc_id"] for doc in docs])
+
+    def __analyze_file_to_db(self, file: str, md5: str):
+        file_base_name = os.path.basename(file)
+        file_ext = os.path.splitext(file_base_name)[1].lower()
+
+        if file_ext == ".txt":
+            raw_documents = TextLoader(file, encoding="utf-8").load()
+        elif file_ext == ".pdf":
+            raw_documents = PyPDFLoader(file).load()
+        elif file_ext == ".doc":
+            raw_documents = UnstructuredWordDocumentLoader(file).load()
+        elif file_ext == ".docx":
+            raw_documents = Docx2txtLoader(file).load()
+        elif file_ext == ".md":
+            raw_documents = UnstructuredMarkdownLoader(file).load()
+        else:
+            raise Exception(f"Unsupported file extension {file_ext}")
+
+        docs = self.text_splitter.split_documents(raw_documents)
+        if docs:
+            print("Analyze {} got {} index files.".format(file_base_name, len(docs)))
+            self.__add_documents(file_base_name, docs, md5)
+        else:
+            raise Exception(f"Cannot analyze {file_base_name}")
+
+    def add_index_file(self, file: str):
+        md5 = self.__calculate_md5(file)
+        for item in self.index_list:
+            if item["md5"] == md5:
+                print(f"{os.path.basename(file)} already indexed.")
+                return 1, md5
+
+        self.__analyze_file_to_db(file, md5)
+        return 0, md5
+
+    def query_database(self, query: str):
+        if not query:
+            raise Exception("Query cannot be None or empty")
+
+        print("******* Querying database...")
+        if self.db is None:
+            return False, None, None
+
+        docs = self.db.similarity_search_with_relevance_scores(
+            query, k=INDEX_NUM, score_threshold=0.4
+        )
+        if not docs:
+            return False, None, None
+
+        doc_contents = [doc.page_content for doc, _ in docs]
+        source_files = {doc.metadata["source"] for doc, _ in docs}
+        return True, "\n\n".join(doc_contents), "\n".join(source_files)
+
+    def __calculate_md5(self, file_path: str) -> str:
+        import hashlib
+
+        hasher = hashlib.md5()
+        with open(file_path, "rb") as f:
+            buf = f.read()
+            hasher.update(buf)
+        return hasher.hexdigest()
+
+
+def init(model_path: str):
+    global embedding_database, embedding_wrapper
+    embedding_wrapper = EmbeddingWrapper(model_path=model_path)
+    embedding_database = EmbeddingDatabase(embedding_wrapper)
+
+
+def add_index_file(file: str):
+    return embedding_database.add_index_file(file)
+
+
+def query(query: str):
+    return embedding_database.query_database(query)
+
+
+def dispose():
+    global embedding_database, embedding_wrapper
+    embedding_database = None
+    embedding_wrapper = None
+    gc.collect()
+
+
+if __name__ == "__main__":
+    # Example Usage
+    init(model_path="/Users/daniel/silicon/AI-Playground/LlamaCPP/models/llm/gguf/bge-large-en-v1.5-q8_0.gguf")
+    add_index_file("/Users/daniel/silicon/AI-Playground/hello.txt")
+    success, context, source = query("What is the content about?")
+    print("Query success:", success)
+    print("Context:", context)
+    print("Source Files:", source)
+    dispose()
diff --git a/OpenVINO/openvino_test.py b/OpenVINO/openvino_test.py
new file mode 100644
index 00000000..1c5db2ce
--- /dev/null
+++ b/OpenVINO/openvino_test.py
@@ -0,0 +1,26 @@
+import requests
+
+
+url = "http://127.0.0.1:29000/api/llm/chat"
+params = {
+  "prompt": [{"question": "Your name is Luca", "answer": "My name is Luca."}, {"question": "What is your name?"}],
+  "device": "",
+  "enable_rag": False,
+  "model_repo_id": "meta-llama-3.1-8b-instruct-q5_k_m.gguf",
+}
+response = requests.post(url, json=params, stream=True)
+# Check if the response status code is 200 (OK)
+response.raise_for_status()
+e = 1
+# Iterate over the response lines
+for line in response.iter_lines():
+    e += 1
+    if line:
+        # Decode the line (assuming UTF-8 encoding)
+        decoded_line = line.decode('utf-8')
+
+        # SSE events typically start with "data: "
+        if decoded_line.startswith("data:"):
+            # Extract the data part
+            data = decoded_line[len("data:"):]
+            print(data)  # Process the data as needed
\ No newline at end of file
diff --git a/OpenVINO/openvino_web_api.py b/OpenVINO/openvino_web_api.py
new file mode 100644
index 00000000..ff9df685
--- /dev/null
+++ b/OpenVINO/openvino_web_api.py
@@ -0,0 +1,49 @@
+import os
+os.environ['PATH'] = os.path.abspath('../openvino-env/Library/bin') + os.pathsep + os.environ['PATH']
+import sys
+current_folder = os.path.dirname(os.path.abspath(__file__))
+sys.path.append(current_folder)
+from apiflask import APIFlask
+from flask import jsonify, request, Response, stream_with_context
+from openvino_adapter import LLM_SSE_Adapter
+from openvino_backend import OpenVino
+from openvino_params import LLMParams
+
+app = APIFlask(__name__)
+llm_backend = OpenVino()
+
+
+@app.get("/health")
+def health():
+    return jsonify({"code": 0, "message": "success"})
+
+
+@app.post("/api/llm/chat")
+def llm_chat():
+    params = request.get_json()
+    params.pop("print_metrics", None)
+    llm_params = LLMParams(**params)
+    sse_invoker = LLM_SSE_Adapter(llm_backend)
+    it = sse_invoker.text_conversation(llm_params)
+    return Response(stream_with_context(it), content_type="text/event-stream")
+
+
+@app.post("/api/free")
+def free():
+    llm_backend.unload_model()
+    return jsonify({"code": 0, "message": "success"})
+
+
+@app.get("/api/llm/stopGenerate")
+def stop_llm_generate():
+    llm_backend.stop_generate = True
+    return jsonify({"code": 0, "message": "success"})
+
+
+if __name__ == "__main__":
+    import argparse
+
+    parser = argparse.ArgumentParser(description="AI Playground Web service")
+    parser.add_argument("--port", type=int, default=59997, help="Service listen port")
+    args = parser.parse_args()
+    app.run(host="127.0.0.1", port=args.port, use_reloader=False)
diff --git a/OpenVINO/requirements.txt b/OpenVINO/requirements.txt
new file mode 100644
index 00000000..031646de
--- /dev/null
+++ b/OpenVINO/requirements.txt
@@ -0,0 +1,4 @@
+Flask==3.0.3
+apiflask==2.3.0
+optimum-intel[openvino]
+openvino-genai
\ No newline at end of file
diff --git a/WebUI/.gitignore b/WebUI/.gitignore
index 00fddc80..ef23ebb0 100644
--- a/WebUI/.gitignore
+++ b/WebUI/.gitignore
@@ -14,6 +14,7 @@ dist-ssr
 release/
 ComfyUI/
 LlamaCPP/
+OpenVino/
 
 # Editor directories and files
 .vscode/*
diff --git a/WebUI/electron/subprocesses/apiServiceRegistry.ts b/WebUI/electron/subprocesses/apiServiceRegistry.ts
index d3802ec6..3bee778a 100644
--- a/WebUI/electron/subprocesses/apiServiceRegistry.ts
+++ b/WebUI/electron/subprocesses/apiServiceRegistry.ts
@@ -5,6 +5,7 @@ import { BrowserWindow } from "electron";
 import { appLoggerInstance } from "../logging/logger.ts";
 import getPort, {portNumbers} from "get-port";
 import { LlamaCppBackendService} from "./llamaCppBackendService.ts";
+import { OpenVINOBackendService } from "./openVINOBackendService.ts";
 
 export type backend = 'ai-backend' | 'comfyui-backend'
 
@@ -76,6 +77,7 @@ export async function aiplaygroundApiServiceRegistry(win: BrowserWindow, setting
         instance.register(new AiBackendService('ai-backend', await getPort({port: portNumbers(59000, 59999)}), win, settings))
         instance.register(new ComfyUiBackendService('comfyui-backend', await getPort({port: portNumbers(49000, 49999)}), win, settings))
         instance.register(new LlamaCppBackendService('llamacpp-backend', await getPort({port: portNumbers(39000, 39999)}), win, settings))
+        instance.register(new OpenVINOBackendService('openvino-backend', await getPort({port: portNumbers(29000, 29999)}), win, settings))
     }
     return instance
 }
diff --git a/WebUI/electron/subprocesses/openVINOBackendService.ts b/WebUI/electron/subprocesses/openVINOBackendService.ts
new file mode 100644
index 00000000..83da284e
--- /dev/null
+++ b/WebUI/electron/subprocesses/openVINOBackendService.ts
@@ -0,0 +1,87 @@
+import {app} from "electron";
+import {ChildProcess, spawn} from "node:child_process";
+import path from "node:path";
+import * as filesystem from 'fs-extra'
+import {existingFileOrError} from './osProcessHelper.ts'
+import { LsLevelZeroService, UvPipService, LongLivedPythonApiService } from "./service.ts";
+
+export class OpenVINOBackendService extends LongLivedPythonApiService {
+    readonly serviceDir = path.resolve(path.join(this.baseDir, "OpenVINO"));
+    readonly pythonEnvDir = path.resolve(path.join(this.baseDir, `openvino-env`));
+    // using ls_level_zero from default ai-backend env to avoid oneAPI dep conflicts
+    readonly lsLevelZeroDir = path.resolve(path.join(this.baseDir, "ai-backend-env"));
+    readonly isRequired = false;
+
+    healthEndpointUrl = `${this.baseUrl}/health`
+
+    readonly lsLevelZero = new LsLevelZeroService(this.lsLevelZeroDir);
+    readonly uvPip = new UvPipService(this.pythonEnvDir);
+    readonly python = this.uvPip.python;
+
+    serviceIsSetUp(): boolean {
+        return filesystem.existsSync(this.python.getExePath());
+    }
+
+    isSetUp = this.serviceIsSetUp();
+
+    async *set_up(): AsyncIterable<SetupProgress> {
+        this.setStatus('installing')
+        this.appLogger.info("setting up service", this.name)
+        const self = this
+
+        try {
+            yield {serviceName: self.name, step: "start", status: "executing", debugMessage: "starting to set up python environment"};
+            await this.lsLevelZero.ensureInstalled();
+            await this.uvPip.ensureInstalled();
+
+            const deviceArch = await self.lsLevelZero.detectDevice();
+            yield {serviceName: self.name, step: `Detecting intel device`, status: "executing", debugMessage: `detected intel hardware ${deviceArch}`};
+
+            yield {serviceName: self.name, step: `install dependencies`, status: "executing", debugMessage: `installing dependencies`};
+            const commonRequirements = existingFileOrError(path.join(self.serviceDir, 'requirements.txt'))
+            await this.uvPip.run(["install", "-r", commonRequirements]);
+            yield {serviceName: self.name, step: `install dependencies`, status: "executing", debugMessage: `dependencies installed`};
+
+            this.setStatus('notYetStarted')
+            yield {serviceName: self.name, step: "end", status: "success", debugMessage: `service set up completely`};
+        } catch (e) {
+            self.appLogger.warn(`Set up of service failed due to ${e}`, self.name, true)
+            self.appLogger.warn(`Aborting set up of ${self.name} service environment`, self.name, true)
+            this.setStatus('installationFailed')
+            yield {serviceName: self.name, step: "end", status: "failed", debugMessage: `Failed to setup python environment due to ${e}`};
+        }
+    }
+
+    async spawnAPIProcess(): Promise<{ process: ChildProcess; didProcessExitEarlyTracker: Promise<boolean>; }> {
+        const additionalEnvVariables = {
+            "SYCL_ENABLE_DEFAULT_CONTEXTS": "1",
+            "SYCL_CACHE_PERSISTENT": "1",
+            "PYTHONIOENCODING": "utf-8",
+            ...await this.lsLevelZero.getDeviceSelectorEnv(),
+        };
+
+        const apiProcess = spawn(this.python.getExePath(), ["openvino_web_api.py", "--port", this.port.toString()], {
+            cwd: this.serviceDir,
+            windowsHide: true,
+            env: Object.assign(process.env, additionalEnvVariables)
+        });
+
+        //must be at the same tick as the spawn function call
+        //otherwise we cannot really track errors given the nature of spawn() with a longlived process
+        const didProcessExitEarlyTracker = new Promise<boolean>((resolve, reject) => {
+            apiProcess.on('error', (error) => {
+                this.appLogger.error(`encountered error of process in ${this.name} : ${error}`, this.name)
+                resolve(true);
+            });
+            apiProcess.on('exit', () => {
+                this.appLogger.error(`encountered unexpected exit in ${this.name}.`, this.name)
+                resolve(true);
+            });
+        });
+
+        return {
+            process: apiProcess,
+            didProcessExitEarlyTracker: didProcessExitEarlyTracker,
+        }
+    }
+}
diff --git a/WebUI/src/assets/js/const.ts b/WebUI/src/assets/js/const.ts
index 2d7b5e90..486a166c 100644
--- a/WebUI/src/assets/js/const.ts
+++ b/WebUI/src/assets/js/const.ts
@@ -8,6 +8,7 @@ export module Const {
     export const MODEL_TYPE_INPAINT = 6;
     export const MODEL_TYPE_PREVIEW = 7;
     export const MODEL_TYPE_LLAMA_CPP = 8;
+    export const MODEL_TYPE_OPENVINO = 9;
     export const MODEL_TYPE_COMFY_UNET = 100;
     export const MODEL_TYPE_COMFY_CLIP = 101;
     export const MODEL_TYPE_COMFY_VAE = 102;
diff --git a/WebUI/src/assets/js/store/backendServices.ts b/WebUI/src/assets/js/store/backendServices.ts
index 122844d7..d86ae35d 100644
--- a/WebUI/src/assets/js/store/backendServices.ts
+++ b/WebUI/src/assets/js/store/backendServices.ts
@@ -7,6 +7,7 @@ export const useBackendServices = defineStore("backendServices", () => {
         ["ai-backend", new BackendServiceSetupProgressListener("ai-backend")],
         ["comfyui-backend", new BackendServiceSetupProgressListener("comfyui-backend")],
         ["llamacpp-backend", new BackendServiceSetupProgressListener("llamacpp-backend")],
+        ["openvino-backend", new BackendServiceSetupProgressListener("openvino-backend")],
     ]);
 
     window.electronAPI.getServices().catch(async (reason: any) => {
diff --git a/WebUI/src/assets/js/store/globalSetup.ts b/WebUI/src/assets/js/store/globalSetup.ts
index 33946931..990ef657 100644
--- a/WebUI/src/assets/js/store/globalSetup.ts
+++ b/WebUI/src/assets/js/store/globalSetup.ts
@@ -34,6 +34,7 @@ export const useGlobalSetup = defineStore("globalSetup", () => {
         enableRag: false,
         llm_model: "microsoft/Phi-3-mini-4k-instruct",
         ggufLLM_model: "meta-llama-3.1-8b-instruct.Q5_K_M.gguf",
+        openvino_model: "TinyLlama-1.1B-Chat-v1.0",
         sd_model: "Lykon/dreamshaper-8",
         inpaint_model: "Lykon/dreamshaper-8-inpainting",
         negativePrompt: "bad hands, nsfw",
@@ -83,6 +84,7 @@ export const useGlobalSetup = defineStore("globalSetup", () => {
     async function initSetup() {
         const setupData = await window.electronAPI.getInitSetting();
         const apiServiceInformation = await window.electronAPI.getServices()
+        console.log("apiServiceInformation", apiServiceInformation)
         paths.value = setupData.modelPaths;
         models.value = setupData.modelLists;
         models.value.inpaint.push(useI18N().state.ENHANCE_INPAINT_USE_IMAGE_MODEL);
diff --git a/WebUI/src/assets/js/store/textInference.ts b/WebUI/src/assets/js/store/textInference.ts
index 9afc2aba..28214d61 100644
--- a/WebUI/src/assets/js/store/textInference.ts
+++ b/WebUI/src/assets/js/store/textInference.ts
@@ -3,13 +3,14 @@ import { useGlobalSetup } from "./globalSetup";
 import { z } from "zod";
 import { useBackendServices } from "./backendServices";
 
-export const backendTypes = ['IPEX-LLM', 'LLAMA.CPP'] as const;
+export const backendTypes = ['IPEX-LLM', 'LLAMA.CPP', 'OpenVINO'] as const;
 const backend = z.enum(backendTypes);
 export type Backend = z.infer<typeof backend>;
 
 const backendModelKey = {
     'IPEX-LLM': 'llm_model',
     'LLAMA.CPP': 'ggufLLM_model',
+    'OpenVINO': 'openvino_model',
 }
 export const useTextInference = defineStore("textInference", () => {
 
@@ -19,14 +20,27 @@ export const useTextInference = defineStore("textInference", () => {
     const activeModel = ref<string | null>(null);
     const llamaBackendUrl = computed(() => {
         const url = backendServices.info.find(item => item.serviceName === "llamacpp-backend")?.baseUrl;
-        console.log('url', url);
+        console.log('llama url', url);
         return url;
     });
 
+    const openVINOBackendUrl = computed(() => {
+        const url = backendServices.info.find(item => item.serviceName === "openvino-backend")?.baseUrl;
+        console.log('openvino url', backendServices.info.find(item => item.serviceName === "openvino-backend")?.baseUrl);
+        return url;
+    });
+
+
     watch([llamaBackendUrl], () => {
         console.log('llamaBackendUrl changed', llamaBackendUrl.value);
     }
     );
+
+    watch([openVINOBackendUrl], () => {
+        console.log('openVINOBackendUrl changed', openVINOBackendUrl.value);
+    },
+    { immediate: true }
+    );
     
     watch([activeModel], () => {
         console.log('activeModel changed', activeModel.value);
@@ -37,6 +51,7 @@ export const useTextInference = defineStore("textInference", () => {
         backend,
         activeModel,
         llamaBackendUrl,
+        openVINOBackendUrl,
     }
 }, {
     persist: {
diff --git a/WebUI/src/components/SettingsBasic.vue b/WebUI/src/components/SettingsBasic.vue
index a858eece..511d3e12 100644
--- a/WebUI/src/components/SettingsBasic.vue
+++ b/WebUI/src/components/SettingsBasic.vue
@@ -117,7 +117,8 @@ const theme = useTheme();
 
 const textInferenceBackendDisplayName: Record<typeof backendTypes[number], string> = {
   "IPEX-LLM": "IPEX-LLM",
-  "LLAMA.CPP": "Llama.cpp - GGUF"
+  "LLAMA.CPP": "Llama.cpp - GGUF",
+  "OpenVINO": "OpenVINO"
 }
 
 const themeToDisplayName = (theme: Theme) => {
@@ -157,6 +158,8 @@ function mapBackendNames(name : Backend) : BackendServiceName | undefined {
     return "ai-backend" as BackendServiceName
   } else if(name === "LLAMA.CPP") {
       return 'llamacpp-backend' as BackendServiceName
+  } else if (name === "OpenVINO") {
+      return 'openvino-backend' as BackendServiceName
   } else {
       return undefined
   }
diff --git a/WebUI/src/env.d.ts b/WebUI/src/env.d.ts
index a5f93d8a..5d43c8c2 100644
--- a/WebUI/src/env.d.ts
+++ b/WebUI/src/env.d.ts
@@ -317,6 +317,6 @@ type CheckModelAlreadyLoadedResult = {
 
 type SDGenerateState = "no_start" | "input_image" | "load_model" | "load_model_components" | "generating" | "image_out" | "error"
 
-type BackendServiceName = "ai-backend" | "comfyui-backend" | "llamacpp-backend"
+type BackendServiceName = "ai-backend" | "comfyui-backend" | "llamacpp-backend" | "openvino-backend" 
 
 type ApiServiceInformation = { serviceName: BackendServiceName, status: BackendStatus , baseUrl: string, port: number, isSetUp: boolean, isRequired: boolean }
diff --git a/WebUI/src/lib/utils.ts b/WebUI/src/lib/utils.ts
index 9f5db491..526f138f 100644
--- a/WebUI/src/lib/utils.ts
+++ b/WebUI/src/lib/utils.ts
@@ -57,6 +57,8 @@ export function mapServiceNameToDisplayName(serviceName: string) {
       return "AI Playground"
     case "llamacpp-backend":
       return "Llama.cpp - GGUF"
+    case "openvino-backend":
+      return "OpenVINO"
     default:
       return serviceName
   }
diff --git a/WebUI/src/views/Answer.vue b/WebUI/src/views/Answer.vue
index ff2b8543..1c28b73f 100644
--- a/WebUI/src/views/Answer.vue
+++ b/WebUI/src/views/Answer.vue
@@ -118,6 +118,15 @@
                 <model-drop-down-item :model="slotItem.item"></model-drop-down-item>
               </template>
             </drop-selector>
+            <drop-selector v-if="textInference.backend === 'OpenVINO'" :array="models.ggufLLMs" @change="(i) => textInference.activeModel = i.name" class="w-96">
+              <template #selected>
+                <model-drop-down-item
+                  :model="models.ggufLLMs.find((m) => m.name === globalSetup.modelSettings.openvino_model)"></model-drop-down-item>
+              </template>
+              <template #list="slotItem">
+                <model-drop-down-item :model="slotItem.item"></model-drop-down-item>
+            </template>
+          </drop-selector>
             <button class="svg-icon i-generate-add w-10 h-10 text-purple-500 ml-1.5" @click="addLLMModel"></button>
             <button class="svg-icon i-refresh w-5 h-5 text-purple-500 flex-none ml-1"
                     @animationend="removeRonate360" @click="refreshLLMModles"></button>
@@ -260,7 +269,25 @@ const iconSizeClass = computed(() => iconSizes[fontSizeIndex.value]);
 const isMaxSize = computed(() => fontSizeIndex.value >= fontSizes.length - 1);
 const isMinSize = computed(() => fontSizeIndex.value <= 0);
 const isHistoryVisible = ref(false);
-const currentBackendAPI = computed(() => textInference.backend === 'LLAMA.CPP' ? textInference.llamaBackendUrl : globalSetup.apiHost);
+const backendMapping = {
+    'IPEX-LLM': { service: 'ai-backend', api: globalSetup.apiHost },
+    'LLAMA.CPP': { service: 'llamacpp-backend', api: textInference.llamaBackendUrl },
+    'OpenVINO': { service: 'openvino-backend', api: textInference.openVINOBackendUrl }
+  };
+const currentBackendAPI = computed(() => {
+  const backendKey = textInference.backend;
+
+  switch (backendKey) {
+    case 'IPEX-LLM':
+      return globalSetup.apiHost;
+    case 'LLAMA.CPP':
+      return textInference.llamaBackendUrl;
+    case 'OpenVINO':
+      return textInference.openVINOBackendUrl;
+    default:
+      throw new Error(`Unknown backend: ${backendKey}`);
+  }
+});
 
 // Keep track of which conversation is receiving the in-progress text
 const currentlyGeneratingKey = ref<string | null>(null);
@@ -475,13 +502,11 @@ async function checkModel() {
   });
 }
 
-
-
 async function generate(chatContext: ChatItem[]) {
   if (processing.value || chatContext.length == 0) { return; }
 
   try {
-    const inferenceBackendService: BackendServiceName = textInference.backend === 'IPEX-LLM' ? "ai-backend" : "llamacpp-backend"
+    const inferenceBackendService: BackendServiceName = backendMapping[textInference.backend].service as BackendServiceName;
     await globalSetup.resetLastUsedInferenceBackend(inferenceBackendService)
     globalSetup.updateLastUsedBackend(inferenceBackendService)