Skip to content

Commit

Permalink
Add OpenVINO model support and related configurations
Browse files Browse the repository at this point in the history
  • Loading branch information
DanielHirschTNG committed Jan 31, 2025
1 parent 007fb89 commit 9988e5f
Show file tree
Hide file tree
Showing 16 changed files with 125 additions and 39 deletions.
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -20,3 +20,4 @@ llama-cpp-env/
*-env/
build-envs/
portable-git/
llm_cache/*
20 changes: 11 additions & 9 deletions OpenVINO/openvino_adapter.py
Original file line number Diff line number Diff line change
Expand Up @@ -92,14 +92,16 @@ def text_conversation(self, params: LLMParams):
return self.generator()


def stream_function(self, stream):
for output in stream:
if self.llm_interface.stop_generate:
self.llm_interface.stop_generate = False
break

self.text_out_callback(output)
self.put_msg({"type": "finish"})
def stream_function(self, output):
self.text_out_callback(output)

if self.llm_interface.stop_generate:
self.put_msg("Stopping generation.")
self.llm_interface.stop_generate = False
return True # Stop generation

return False


def text_conversation_run(
self,
Expand All @@ -110,7 +112,7 @@ def text_conversation_run(

prompt = params.prompt
full_prompt = convert_prompt(prompt)
self.llm_interface.create_chat_completion(full_prompt, self.text_out_callback)
self.llm_interface.create_chat_completion(full_prompt, self.stream_function)

except Exception as ex:
traceback.print_exc()
Expand Down
22 changes: 10 additions & 12 deletions OpenVINO/openvino_backend.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@
import sys

Check failure on line 4 in OpenVINO/openvino_backend.py

View workflow job for this annotation

GitHub Actions / ruff

Ruff (F401)

OpenVINO/openvino_backend.py:4:8: F401 `sys` imported but unused
import openvino_genai
from openvino_params import LLMParams
import model_config
import openvino_model_config as model_config
import gc

class OpenVino(LLMInterface):
Expand All @@ -19,31 +19,29 @@ def load_model(self, params: LLMParams, callback: Callable[[str], None] = None):
if callback is not None:
callback("start")
self.unload_model()
callback(params.model_repo_id)

#model_base_path = model_config.openVINOConfig.get("openvino")
#namespace, repo, *model = model_repo_id.split("/")
#model_path = path.abspath(path.join(model_base_path,"---".join([namespace, repo]), "---".join(model)))
model_path = r".\TinyLlama-1.1B-Chat-v1.0"
model_base_path = model_config.openVINOConfig.get("openvino")
model_name = model_repo_id.replace("/", "---")
model_path = path.abspath(path.join(model_base_path, model_name))
callback("Model Path " + model_path)

print(params.model_repo_id)
enable_compile_cache = dict()
enable_compile_cache["CACHE_DIR"] = "llm_cache"
self._model = openvino_genai.LLMPipeline(model_path, "GPU", **enable_compile_cache)

self._tokenizer = self._model.get_tokenizer()

self._last_repo_id = model_repo_id
if callback is not None:
callback("finish")

self.config = openvino_genai.GenerationConfig()
self.config.max_new_tokens = 100
print("Model loaded")
self.config.max_new_tokens = 1024
callback("Model loaded")

def create_chat_completion(self, messages: List[Dict[str, str]], streamer: Callable[[str], None]):
tokenized_input = self._tokenizer.apply_chat_template(messages, add_generation_prompt=True)
print(tokenized_input)
return self._model.generate(tokenized_input, self.config, streamer)
full_prompt = self._tokenizer.apply_chat_template(messages, add_generation_prompt=True)
return self._model.generate(full_prompt, self.config, streamer)


def unload_model(self):
Expand Down
File renamed without changes.
4 changes: 4 additions & 0 deletions WebUI/electron/main.ts
Original file line number Diff line number Diff line change
Expand Up @@ -428,6 +428,10 @@ function initEventHandle() {
return pathsManager.scanGGUFLLMModels();
});

ipcMain.handle("getDownloadedOpenVINOModels", (event) => {
return pathsManager.scanOpenVINOModels();
});

ipcMain.handle("getDownloadedEmbeddingModels", (event) => {
return pathsManager.scanEmbedding(false);
});
Expand Down
16 changes: 16 additions & 0 deletions WebUI/electron/pathsManager.ts
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@ export class PathsManager {
modelPaths: ModelPaths = {
llm: "",
ggufLLM: "",
openvino: "",
embedding: "",
stableDiffusion: "",
inpaint: "",
Expand Down Expand Up @@ -123,6 +124,21 @@ export class PathsManager {

return [...modelsSet]
}
scanOpenVINOModels() {
const dir = this.modelPaths.openvino;
if (!fs.existsSync(dir)) {
fs.mkdirSync(dir, { recursive: true });
}
console.log('getting models', dir);
const modelsSet = fs.readdirSync(dir)
.filter(subDir => {
const fullpath = path.join(dir, subDir);
return fs.statSync(fullpath).isDirectory() && fs.existsSync(path.join(fullpath))})
.map(subDir => subDir.replace("---", "/"))
.reduce((set, modelName) => set.add(modelName), new Set<string>());

return [...modelsSet]
}
scanLora(returnDefaults = true) {
const models = returnDefaults ? [
"None",
Expand Down
1 change: 1 addition & 0 deletions WebUI/electron/preload.ts
Original file line number Diff line number Diff line change
Expand Up @@ -64,6 +64,7 @@ contextBridge.exposeInMainWorld("electronAPI", {
getDownloadedLoras: () => ipcRenderer.invoke("getDownloadedLoras"),
getDownloadedLLMs: () => ipcRenderer.invoke("getDownloadedLLMs"),
getDownloadedGGUFLLMs: () => ipcRenderer.invoke("getDownloadedGGUFLLMs"),
getDownloadedOpenVINOModels: () => ipcRenderer.invoke("getDownloadedOpenVINOModels"),
getDownloadedEmbeddingModels: () => ipcRenderer.invoke("getDownloadedEmbeddingModels"),
openImageWithSystem: (url: string) => ipcRenderer.send("openImageWithSystem", url),
selecteImage: (url: string) => ipcRenderer.send("selecteImage", url),
Expand Down
1 change: 1 addition & 0 deletions WebUI/external/model_config.dev.json
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
{
"llm": "../service/models/llm/checkpoints",
"ggufLLM": "../service/models/llm/ggufLLM",
"openvino": "../service/models/llm/openvino",
"embedding": "../service/models/llm/embedding",
"stableDiffusion": "../service/models/stable_diffusion/checkpoints",
"inpaint": "../service/models/stable_diffusion/inpaint",
Expand Down
1 change: 1 addition & 0 deletions WebUI/external/model_config.json
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
{
"llm": "./resources/service/models/llm/checkpoints",
"ggufLLM": "./resources/service/models/llm/ggufLLM",
"openvino": "./resources/service/models/llm/openvino",
"embedding": "./resources/service/models/llm/embedding",
"stableDiffusion": "./resources/service/models/stable_diffusion/checkpoints",
"inpaint": "./resources/service/models/stable_diffusion/inpaint",
Expand Down
15 changes: 14 additions & 1 deletion WebUI/src/assets/js/store/models.ts
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
import { acceptHMRUpdate, defineStore } from "pinia";

export type ModelType = "llm" | "embedding" | "stableDiffusion" | "inpaint" | "lora" | "vae" | "undefined" | "ggufLLM";
export type ModelType = "llm" | "embedding" | "stableDiffusion" | "inpaint" | "lora" | "vae" | "undefined" | "ggufLLM" | "openvino";

export type Model = {
name: string;
Expand All @@ -19,6 +19,13 @@ const predefinedModels: Model[] = [
{ name: 'bartowski/Llama-3.2-3B-Instruct-GGUF/Llama-3.2-3B-Instruct-Q8_0.gguf', type: 'ggufLLM', downloaded: false },
{ name: 'bartowski/Meta-Llama-3.1-8B-Instruct-GGUF/Meta-Llama-3.1-8B-Instruct-Q5_K_S.gguf', type: 'ggufLLM', downloaded: false },
{ name: 'HuggingFaceTB/SmolLM2-1.7B-Instruct-GGUF/smollm2-1.7b-instruct-q4_k_m.gguf', type: 'ggufLLM', downloaded: false },
// OpenVINO models
{ name: 'OpenVINO/Phi-3-medium-4k-instruct-int4-ov', type: 'openvino', downloaded: false },
{ name: 'OpenVINO/mixtral-8x7b-instruct-v0.1-int4-ov', type: 'openvino', downloaded: false },
{ name: 'OpenVINO/Mistral-7B-Instruct-v0.2-fp16-ov', type: 'openvino', downloaded: false },
{ name: 'OpenVINO/TinyLlama-1.1B-Chat-v1.0-int4-ov', type: 'openvino', downloaded: false },
{ name: 'OpenVINO/Phi-3.5-mini-instruct-fp16-ov', type: 'openvino', downloaded: false },

]

export const userModels: Model[] = [
Expand All @@ -32,18 +39,23 @@ export const useModels = defineStore("models", () => {
const downloadList = ref<DownloadModelParam[]>([]);
const ggufLLMs = computed(() => models.value.filter(m => m.type === 'ggufLLM'));

const openVINOModels = computed(() => models.value.filter(m => m.type === 'openvino'));

async function refreshModels() {
const sdModels = await window.electronAPI.getDownloadedDiffusionModels();
const llmModels = await window.electronAPI.getDownloadedLLMs();
const ggufModels = await window.electronAPI.getDownloadedGGUFLLMs();
const openVINOModels = await window.electronAPI.getDownloadedOpenVINOModels();
const loraModels = await window.electronAPI.getDownloadedLoras();
const inpaintModels = await window.electronAPI.getDownloadedInpaintModels();
const embeddingModels = await window.electronAPI.getDownloadedEmbeddingModels();


const downloadedModels = [
...sdModels.map<Model>(name => ({ name, type: 'stableDiffusion', downloaded: true })),
...llmModels.map<Model>(name => ({ name, type: 'llm', downloaded: true })),
...ggufModels.map<Model>(name => ({ name, type: 'ggufLLM', downloaded: true })),
...openVINOModels.map<Model>(name => ({ name, type: 'openvino', downloaded: true })),
...loraModels.map<Model>(name => ({ name, type: 'lora', downloaded: true })),
...inpaintModels.map<Model>(name => ({ name, type: 'inpaint', downloaded: true })),
...embeddingModels.map<Model>(name => ({ name, type: 'embedding', downloaded: true })),
Expand All @@ -63,6 +75,7 @@ export const useModels = defineStore("models", () => {
models,
llms,
ggufLLMs,
openVINOModels,
hfToken,
hfTokenIsValid: computed(() => hfToken.value?.startsWith('hf_')),
downloadList,
Expand Down
37 changes: 30 additions & 7 deletions WebUI/src/components/AddLLMDialog.vue
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,7 @@
import { Input } from '@/components/ui/input'
import { useGlobalSetup } from '@/assets/js/store/globalSetup';
import { useI18N } from '@/assets/js/store/i18n';
import { useModels, userModels } from '@/assets/js/store/models';
import { useModels, userModels, ModelType } from '@/assets/js/store/models';
import { useTextInference } from '@/assets/js/store/textInference';
Expand Down Expand Up @@ -112,15 +112,38 @@ async function addModel() {
}
async function registerModel() {
userModels.push({ name: modelRequest.value, type: textInference.backend === 'IPEX-LLM' ? 'llm' : 'ggufLLM', downloaded: false })
await models.refreshModels()
if (textInference.backend === 'IPEX-LLM') {
globalSetup.modelSettings.llm_model = modelRequest.value;
} else {
globalSetup.modelSettings.ggufLLM_model = modelRequest.value;
let modelType: ModelType;
switch (textInference.backend) {
case 'IPEX-LLM':
modelType = 'llm';
break;
case 'LLAMA.CPP':
modelType = 'ggufLLM';
break;
case 'OpenVINO':
modelType = 'openvino';
break;
default:
modelType = 'llm';
}
userModels.push({ name: modelRequest.value, type: modelType, downloaded: false });
await models.refreshModels();
switch (textInference.backend) {
case 'IPEX-LLM':
globalSetup.modelSettings.llm_model = modelRequest.value;
break;
case 'LLAMA.CPP':
globalSetup.modelSettings.ggufLLM_model = modelRequest.value;
break;
case 'OpenVINO':
globalSetup.modelSettings.openvino_model = modelRequest.value;
break;
}
}
async function isLLM(repo_id: string) {
const response = await fetch(`${globalSetup.apiHost}/api/isLLM?repo_id=${repo_id}`)
const data = await response.json()
Expand Down
3 changes: 2 additions & 1 deletion WebUI/src/env.d.ts
Original file line number Diff line number Diff line change
Expand Up @@ -55,6 +55,7 @@ type electronAPI = {
getDownloadedLoras(): Promise<string[]>,
getDownloadedLLMs(): Promise<string[]>,
getDownloadedGGUFLLMs(): Promise<string[]>,
getDownloadedOpenVINOModels(): Promise<string[]>,
getDownloadedEmbeddingModels(): Promise<string[]>,
openImageWithSystem(url: string): void,
selecteImage(url: string): void,
Expand Down Expand Up @@ -298,7 +299,7 @@ type CheckModelAlreadyLoadedParameters = {
additionalLicenseLink?: string;
}

type BackendType = "comfyui" | "default" | "llama_cpp"
type BackendType = "comfyui" | "default" | "llama_cpp" | "openvino"

type DownloadModelParam = CheckModelAlreadyLoadedParameters

Expand Down
26 changes: 17 additions & 9 deletions WebUI/src/views/Answer.vue
Original file line number Diff line number Diff line change
Expand Up @@ -118,10 +118,10 @@
<model-drop-down-item :model="slotItem.item"></model-drop-down-item>
</template>
</drop-selector>
<drop-selector v-if="textInference.backend === 'OpenVINO'" :array="models.ggufLLMs" @change="(i) => textInference.activeModel = i.name" class="w-96">
<drop-selector v-if="textInference.backend === 'OpenVINO'" :array="models.openVINOModels" @change="(i) => textInference.activeModel = i.name" class="w-96">
<template #selected>
<model-drop-down-item
:model="models.ggufLLMs.find((m) => m.name === globalSetup.modelSettings.openvino_model)"></model-drop-down-item>
:model="models.openVINOModels.find((m) => m.name === globalSetup.modelSettings.openvino_model)"></model-drop-down-item>
</template>
<template #list="slotItem">
<model-drop-down-item :model="slotItem.item"></model-drop-down-item>
Expand Down Expand Up @@ -269,11 +269,7 @@ const iconSizeClass = computed(() => iconSizes[fontSizeIndex.value]);
const isMaxSize = computed(() => fontSizeIndex.value >= fontSizes.length - 1);
const isMinSize = computed(() => fontSizeIndex.value <= 0);
const isHistoryVisible = ref(false);
const backendMapping = {
'IPEX-LLM': { service: 'ai-backend', api: globalSetup.apiHost },
'LLAMA.CPP': { service: 'llamacpp-backend', api: textInference.llamaBackendUrl },
'OpenVINO': { service: 'openvino-backend', api: textInference.openVINOBackendUrl }
};
const currentBackendAPI = computed(() => {
const backendKey = textInference.backend;
Expand Down Expand Up @@ -373,6 +369,11 @@ function scrollToBottom(smooth = true) {
}
async function updateTitle(conversation: ChatItem[]) {
const backendMapping = {
'IPEX-LLM': { service: 'ai-backend', api: globalSetup.apiHost, model: globalSetup.modelSettings.llm_model },
'LLAMA.CPP': { service: 'llamacpp-backend', api: textInference.llamaBackendUrl, model: globalSetup.modelSettings.ggufLLM_model },
'OpenVINO': { service: 'openvino-backend', api: textInference.openVINOBackendUrl, model: globalSetup.modelSettings.openvino_model }
};
const instruction = `Create me a short descriptive title for the following conversation in a maximum of 20 characters. Don't use unnecessary words like 'Conversation about': `;
const prompt = `${instruction}\n\n\`\`\`${JSON.stringify(conversation.slice(0, 3).map((item) => ({ question: item.question, answer: item.answer })))}\`\`\``;
console.log("prompt", prompt);
Expand All @@ -381,7 +382,7 @@ async function updateTitle(conversation: ChatItem[]) {
device: globalSetup.modelSettings.graphics,
prompt: chatContext,
enable_rag: false,
model_repo_id: textInference.backend === 'IPEX-LLM' ? globalSetup.modelSettings.llm_model : globalSetup.modelSettings.ggufLLM_model,
model_repo_id: backendMapping[textInference.backend].model,
print_metrics: false
};
const response = await fetch(`${ currentBackendAPI.value }/api/llm/chat`, {
Expand Down Expand Up @@ -486,6 +487,8 @@ async function checkModel() {
let checkList: CheckModelAlreadyLoadedParameters[];
if (textInference.backend === "LLAMA.CPP") {
checkList = [{ repo_id: globalSetup.modelSettings.ggufLLM_model, type: Const.MODEL_TYPE_LLAMA_CPP, backend: "llama_cpp" }];
} else if (textInference.backend === "OpenVINO") {
checkList = [{ repo_id: globalSetup.modelSettings.openvino_model, type: Const.MODEL_TYPE_OPENVINO, backend: "openvino" }];
} else {
checkList = [{ repo_id: globalSetup.modelSettings.llm_model, type: Const.MODEL_TYPE_LLM, backend: "default" }];
}
Expand All @@ -503,6 +506,11 @@ async function checkModel() {
}
async function generate(chatContext: ChatItem[]) {
const backendMapping = {
'IPEX-LLM': { service: 'ai-backend', api: globalSetup.apiHost, model: globalSetup.modelSettings.llm_model },
'LLAMA.CPP': { service: 'llamacpp-backend', api: textInference.llamaBackendUrl, model: globalSetup.modelSettings.ggufLLM_model },
'OpenVINO': { service: 'openvino-backend', api: textInference.openVINOBackendUrl, model: globalSetup.modelSettings.openvino_model }
};
if (processing.value || chatContext.length == 0) { return; }
try {
Expand All @@ -525,7 +533,7 @@ async function generate(chatContext: ChatItem[]) {
device: globalSetup.modelSettings.graphics,
prompt: chatContext,
enable_rag: ragData.enable && textInference.backend !== 'LLAMA.CPP',
model_repo_id: textInference.backend === 'IPEX-LLM' ? globalSetup.modelSettings.llm_model : globalSetup.modelSettings.ggufLLM_model,
model_repo_id: backendMapping[textInference.backend].model,
};
const response = await fetch(`${currentBackendAPI.value}/api/llm/chat`, {
method: "POST", headers: {
Expand Down
12 changes: 12 additions & 0 deletions service/aipg_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -46,12 +46,20 @@ def check_mmodel_exist(type: int, repo_id: str, backend: str) -> bool:
match(backend):
case "default":
return check_defaultbackend_mmodel_exist(type, repo_id)
case "openvino":
return check_openvino_model_exists(type, repo_id)
case "comfyui":
return check_comfyui_model_exists(type, repo_id)
case "llama_cpp":
return check_llama_cpp_model_exists(type, repo_id)
case _:
raise NameError("Unknown Backend")

def check_openvino_model_exists(type, repo_id) -> bool:
folder_name = repo_local_root_dir_name(repo_id)
dir = service_config.openvino_model_paths.get("openvino")
return os.path.exists(os.path.join(dir, folder_name))


def check_llama_cpp_model_exists(type, repo_id) -> bool:
model_dir = service_config.llama_cpp_model_paths.get(convert_model_type(type))
Expand Down Expand Up @@ -152,6 +160,8 @@ def convert_model_type(type: int):
return "preview"
elif type == 8:
return "ggufLLM"
elif type == 9:
return "openvino"
elif type == 100:
return "unet"
elif type == 101:
Expand All @@ -178,6 +188,8 @@ def get_model_path(type: int, backend: str):
return service_config.service_model_paths.get(convert_model_type(type))
case "llama_cpp":
return service_config.llama_cpp_model_paths.get(convert_model_type(type))
case "openvino":
return service_config.openvino_model_paths.get(convert_model_type(type))
case "comfyui":
return service_config.comfy_ui_model_paths.get(convert_model_type(type))

Expand Down
Loading

0 comments on commit 9988e5f

Please sign in to comment.