Starting point for MacOS development

-- Do not include in feature merging! -- Signed-off-by: julianbollig <[email protected]> <[email protected]> wip: make llamacpp rag work locally Changing for Llama-CPP to work Signed-off-by: julianbollig <[email protected]> Fixed more stuff Signed-off-by: julianbollig <[email protected]> run linter and formatter Signed-off-by: julianbollig <[email protected]>
TNG · Feb 5, 2025 · 52753ce · 52753ce
1 parent 22438db
commit 52753ce
Show file tree

Hide file tree

Showing 21 changed files with 131 additions and 77 deletions.
diff --git a/LlamaCPP/llama_rag.py b/LlamaCPP/llama_rag.py
@@ -3,6 +3,8 @@
 import os
 import time
 from typing import Any, List, Dict
+from uuid import uuid4
+
 
 from langchain.text_splitter import RecursiveCharacterTextSplitter
 from langchain.embeddings import LlamaCppEmbeddings
@@ -13,7 +15,11 @@
     UnstructuredWordDocumentLoader,
     Docx2txtLoader,
 )
+
+import nltk
+import faiss
 from langchain_community.vectorstores.faiss import FAISS, Document
+from langchain_community.docstore.in_memory import InMemoryDocstore
 
 #### CONFIGURATIONS ------------------------------------------------------------------------------------------------------------------------
 INDEX_DATABASE_PATH = "./db/"  # Faiss database folder
@@ -57,7 +63,7 @@ def __init__(self, embeddings: EmbeddingWrapper):
         self.embeddings = embeddings
         index_cache = os.path.join(INDEX_DATABASE_PATH, "index.faiss")
         self.db = (
-            FAISS.load_local(INDEX_DATABASE_PATH, self.embeddings)
+            FAISS.load_local(INDEX_DATABASE_PATH, self.embeddings.model, allow_dangerous_deserialization=True)
             if os.path.exists(index_cache)
             else None
         )
@@ -90,11 +96,19 @@ def __save_index(self, file_base_name: str, md5: str, doc_ids: str):
 
     def __add_documents(self, file_base_name: str, docs: List[Document], md5: str):
         if self.db is None:
-            self.db = FAISS.from_documents(docs, self.embeddings)
-        else:
-            self.db.add_documents(docs)
-        print(docs[0].metadata)
-        self.__save_index(file_base_name, md5, [doc.metadata["doc_id"] for doc in docs])
+            index = faiss.IndexFlatL2(len(self.embeddings.embed_query("hello world")))
+
+            self.db = FAISS(
+                embedding_function=self.embeddings.model,
+                index=index,
+                docstore=InMemoryDocstore(),
+                index_to_docstore_id={},
+            )
+
+        uuids = [str(uuid4()) for _ in range(len(docs))]
+        self.db.add_documents(documents=docs, ids=uuids)
+        print(docs[0])
+        self.__save_index(file_base_name, md5, uuids)
 
     def __analyze_file_to_db(self, file: str, md5: str):
         file_base_name = os.path.basename(file)
@@ -181,8 +195,8 @@ def dispose():
 
 if __name__ == "__main__":
     # Example Usage
-    init(model_path="/Users/daniel/silicon/AI-Playground/LlamaCPP/models/llm/gguf/bge-large-en-v1.5-q8_0.gguf")
-    add_index_file("/Users/daniel/silicon/AI-Playground/hello.txt")
+    init(model_path="/Users/julianbollig/Documents/Projects/AI-Playground/service/models/llm/ggufLLM/bge-large-en-v1.5-q8_0.gguf")
+    add_index_file("//Users/julianbollig/Documents/Projects/AI-Playground/SECURITY.md")
     success, context, source = query("What is the content about?")
     print("Query success:", success)
     print("Context:", context)

diff --git a/LlamaCPP/requirements.txt b/LlamaCPP/requirements.txt
@@ -4,3 +4,8 @@ dpcpp-cpp-rt==2025.0
 mkl-dpcpp==2025.0
 intel-sycl-rt==2025.0.0
 onednn-devel==2025.0.0
+langchain_community==0.3.0
+faiss-cpu==1.8.0
+docx2txt==0.8
+pypdf==4.2.0
+unstructured==0.14.6
diff --git a/WebUI/build/scripts/install-full-python-env.js b/WebUI/build/scripts/install-full-python-env.js
@@ -74,7 +74,7 @@ function main() {
   prepareTargetDir(targetDir)
   copyToTargetDir(envDir, targetDir)
 
-  const pythonExe = existingFileOrExit(path.join(targetDir, 'python.exe'))
+  const pythonExe = existingFileOrExit(path.join(targetDir, 'python'))
   const getPipFile = existingFileOrExit(path.join(targetDir, 'get-pip.py'))
 
   const platformSpecificRequirementsTxt = existingFileOrExit(

diff --git a/WebUI/electron/main.ts b/WebUI/electron/main.ts
@@ -602,13 +602,14 @@ function needAdminPermission() {
 }
 
 function isAdmin(): boolean {
-  const lib = koffi.load('Shell32.dll')
-  try {
-    const IsUserAnAdmin = lib.func('IsUserAnAdmin', 'bool', [])
-    return IsUserAnAdmin()
-  } finally {
-    lib.unload()
-  }
+  // const lib = koffi.load("Shell32.dll");
+  // try {
+  //     const IsUserAnAdmin = lib.func("IsUserAnAdmin", "bool", []);
+  //     return IsUserAnAdmin();
+  // } finally {
+  //     lib.unload();
+  // }
+  return true
 }
 
 app.whenReady().then(async () => {

diff --git a/WebUI/electron/subprocesses/aiBackendService.ts b/WebUI/electron/subprocesses/aiBackendService.ts
@@ -33,7 +33,7 @@ export class AiBackendService extends LongLivedPythonApiService {
       // lsLevelZero will ensure uv and pip are installed
       await this.lsLevelZero.ensureInstalled()
 
-      const deviceArch = await self.lsLevelZero.detectDevice()
+      const deviceArch: string = 'mac'
       yield {
         serviceName: self.name,
         step: `Detecting intel device`,

diff --git a/WebUI/electron/subprocesses/comfyUIBackendService.ts b/WebUI/electron/subprocesses/comfyUIBackendService.ts
@@ -118,7 +118,7 @@ export class ComfyUiBackendService extends LongLivedPythonApiService {
         status: 'executing',
         debugMessage: `Trying to identify intel hardware`,
       }
-      const deviceArch = await self.lsLevelZero.detectDevice()
+      const deviceArch: string = 'mac'
       yield {
         serviceName: self.name,
         step: `Detecting intel device`,

diff --git a/WebUI/electron/subprocesses/llamaCppBackendService.ts b/WebUI/electron/subprocesses/llamaCppBackendService.ts
@@ -39,7 +39,7 @@ export class LlamaCppBackendService extends LongLivedPythonApiService {
       await this.lsLevelZero.ensureInstalled()
       await this.uvPip.ensureInstalled()
 
-      const deviceArch = await self.lsLevelZero.detectDevice()
+      const deviceArch: string = 'mac'
       yield {
         serviceName: self.name,
         step: `Detecting intel device`,

diff --git a/WebUI/electron/subprocesses/service.ts b/WebUI/electron/subprocesses/service.ts
@@ -106,7 +106,7 @@ export class PythonService extends ExecutableService {
   }
 
   getExePath(): string {
-    return path.resolve(path.join(this.dir, 'python.exe'))
+    return path.resolve(path.join(this.dir, 'bin', 'python'))
   }
 
   async check(): Promise<void> {
@@ -131,7 +131,7 @@ export class PythonService extends ExecutableService {
 
   readonly prototypicalEnvDir = app.isPackaged
     ? path.join(this.baseDir, 'prototype-python-env')
-    : path.join(this.baseDir, 'build-envs/online/prototype-python-env')
+    : path.join(this.baseDir, 'env')
   private async clonePythonEnv(): Promise<void> {
     existingFileOrError(this.prototypicalEnvDir)
     if (filesystem.existsSync(this.dir)) {
@@ -368,17 +368,18 @@ export class LsLevelZeroService extends ExecutableService {
     return this.selectedDeviceIdx
   }
 
-  async getDeviceSelectorEnv(): Promise<{ ONEAPI_DEVICE_SELECTOR: string }> {
-    if (this.selectedDeviceIdx < 0 || this.selectedDeviceIdx >= this.allLevelZeroDevices.length) {
-      await this.detectDevice()
-    }
-
-    if (this.selectedDeviceIdx < 0) {
-      this.logError('No supported device')
-      return { ONEAPI_DEVICE_SELECTOR: 'level_zero:*' }
-    }
-
-    return { ONEAPI_DEVICE_SELECTOR: `level_zero:${this.selectedDeviceIdx}` }
+  async getDeviceSelectorEnv(): Promise<{}> {
+    // if (this.selectedDeviceIdx < 0 || this.selectedDeviceIdx >= this.allLevelZeroDevices.length) {
+    //     await this.detectDevice();
+    // }
+    //
+    // if (this.selectedDeviceIdx < 0) {
+    //     this.logError("No supported device");
+    //     return {ONEAPI_DEVICE_SELECTOR: "level_zero:*"};
+    // }
+    //
+    // return {ONEAPI_DEVICE_SELECTOR: `level_zero:${this.selectedDeviceIdx}`};
+    return {}
   }
 }
 
@@ -389,7 +390,7 @@ export class GitService extends ExecutableService {
   }
 
   getExePath(): string {
-    return path.resolve(path.join(this.dir, 'cmd/git.exe'))
+    return path.resolve('/usr/bin/git')
   }
 
   async run(args: string[] = [], extraEnv?: object, workDir?: string): Promise<string> {
@@ -511,7 +512,7 @@ export abstract class LongLivedPythonApiService implements ApiService {
   readonly baseDir = app.isPackaged ? process.resourcesPath : path.join(__dirname, '../../../')
   readonly prototypicalPythonEnv = app.isPackaged
     ? path.join(this.baseDir, 'prototype-python-env')
-    : path.join(this.baseDir, 'build-envs/online/prototype-python-env')
+    : path.join(this.baseDir, 'env')
   readonly customIntelExtensionForPytorch = path.join(
     app.isPackaged ? this.baseDir : path.join(__dirname, '../../external/'),
     ipexWheel,

diff --git a/WebUI/electron/subprocesses/updateIntelWorkflows.ts b/WebUI/electron/subprocesses/updateIntelWorkflows.ts
@@ -16,7 +16,7 @@ const externalRes = path.resolve(
   app.isPackaged ? process.resourcesPath : path.join(__dirname, '../../external/'),
 )
 
-const gitExePath = Path.join(resourcesBaseDir, 'portable-git', 'cmd', 'git.exe')
+const gitExePath = '/usr/bin/git'
 const workflowDirTargetPath = Path.join(externalRes, 'workflows')
 const workflowDirSpareGitRepoPath = Path.join(externalRes, 'workflows_intel')
 const intelWorkflowDirPath = Path.join(

diff --git a/WebUI/src/assets/js/store/globalSetup.ts b/WebUI/src/assets/js/store/globalSetup.ts
@@ -101,20 +101,27 @@ export const useGlobalSetup = defineStore('globalSetup', () => {
         models.value.scheduler.push(...(await initWebSettings(postJson)))
         models.value.scheduler.unshift('None')
         break
-      } catch (_error: unknown) {
+      } catch (error) {
         await util.delay(delay)
       }
     }
     await reloadGraphics()
-    if (graphicsList.value.length == 0) {
-      await window.electronAPI.showMessageBoxSync({
-        message: useI18N().state.ERROR_UNFOUND_GRAPHICS,
-        title: 'error',
-        icon: 'error',
-      })
-      window.electronAPI.exitApp()
-    }
-    loadUserSettings()
+    // if (graphicsList.value.length == 0) {
+    //     await window.electronAPI.showMessageBoxSync({ message: useI18N().state.ERROR_UNFOUND_GRAPHICS, title: "error", icon: "error" });
+    //     window.electronAPI.exitApp();
+    // }
+    await loadUserSettings()
+
+    // isComfyUiInstalled.value = await isComfyUIDownloaded()
+    // if (isComfyUiInstalled.value) {
+    //     window.electronAPI.wakeupComfyUIService()
+    //     setTimeout(() => {
+    //         //requires proper feedback on server startup...
+    //         useComfyUi().updateComfyState()
+    //         loadingState.value = "running";
+    //     }, 10000);
+    // } else {
+    //     loadingState.value = "running";
   }
 
   async function initWebSettings(postJson: string) {
@@ -259,7 +266,10 @@ export const useGlobalSetup = defineStore('globalSetup', () => {
       modelSettings.lora = models.value.lora[0]
       changeUserSetup = true
     }
-    if (!graphicsList.value.find((item) => item.index == modelSettings.graphics)) {
+    if (
+      !graphicsList.value.find((item) => item.index == modelSettings.graphics) &&
+      graphicsList.value.length != 0
+    ) {
       modelSettings.graphics = graphicsList.value[0].index
     }
     if (changeUserSetup) {

diff --git a/service/aipg_utils.py b/service/aipg_utils.py
@@ -238,11 +238,21 @@ def get_ESRGAN_size():
         return int(response.headers.get("Content-Length"))
 
 
-def get_support_graphics():
+def get_support_graphics(env_type: str):
+
     device_count = torch.xpu.device_count()
     graphics = list()
     for i in range(device_count):
         device_name = torch.xpu.get_device_name(i)
+        print('device_name', device_name)
+        if device_name == "Intel(R) Arc(TM) Graphics" or re.search("Intel\(R\) Arc\(TM\)", device_name) is not None:
+            graphics.append({"index": i, "name": device_name})
+    device_count = torch.cuda.device_count()
+    print('cuda device_count:', device_count)
+    service_config.env_type = env_type
+    for i in range(device_count):
+        device_name = torch.cuda.get_device_name(i)
+        print('device_name', device_name)
         graphics.append({"index": i, "name": device_name})
     return graphics
 

diff --git a/service/llm_biz.py b/service/llm_biz.py
@@ -17,7 +17,7 @@
     PreTrainedTokenizer,
 )
 
-from ipex_llm.transformers import AutoModelForCausalLM
+#from ipex_llm.transformers import AutoModelForCausalLM
 from typing import Callable
 from transformers.generation.stopping_criteria import (
     StoppingCriteria,
@@ -27,6 +27,15 @@
 import service_config
 
 
+# import ipex_llm.transformers.models.mistral
+
+# W/A for https://github.com/intel/AI-Playground/issues/94
+# Disable decoding_fast_path to avoid calling forward_qkv() which is not supported by bigdl-core-xe-*-23
+# ipex_llm.transformers.models.mistral.use_decoding_fast_path = (
+#     lambda *args, **kwargs: False
+# )
+
+
 class LLMParams:
     prompt: List[Dict[str, str]]
     device: int
@@ -174,8 +183,8 @@ def chat(
         # if prev genera not finish, stop it
         stop_generate()
 
-        torch.xpu.set_device(params.device)
-        service_config.device = f"xpu:{params.device}"
+        torch.cuda.set_device(params.device)
+        service_config.device = f"cuda:{params.device}"
         prompt = params.prompt
         enable_rag = params.enable_rag
         model_repo_id = params.model_repo_id
@@ -190,7 +199,7 @@ def chat(
             if _model is not None:
                 del _model
                 gc.collect()
-                torch.xpu.empty_cache()
+                torch.cuda.empty_cache()
 
             model_base_path = service_config.service_model_paths.get("llm")
             model_name = model_repo_id.replace("/", "---")
@@ -309,7 +318,7 @@ def dispose():
     del _model
     _model = None
     gc.collect()
-    torch.xpu.empty_cache()
+    torch.cuda.empty_cache()
 
 
 class StopGenerateException(Exception):

diff --git a/service/main.py b/service/main.py
@@ -3,8 +3,9 @@
 import time
 import traceback
 import torch
-from transformers import pipeline, PreTrainedModel, TextIteratorStreamer
 import intel_extension_for_pytorch as ipex
+from transformers import pipeline,PreTrainedModel,TextIteratorStreamer
+# import intel_extension_for_pytorch as ipex
 
 
 def stream_chat_generate(model: PreTrainedModel, args: dict):