Skip to content

Commit

Permalink
Starting point for MacOS development
Browse files Browse the repository at this point in the history
-- Do not include in feature merging! --

Signed-off-by: julianbollig <[email protected]> <[email protected]>

wip: make llamacpp rag work locally

Changing for Llama-CPP to work

Signed-off-by: julianbollig <[email protected]>

Fixed more stuff

Signed-off-by: julianbollig <[email protected]>

run linter and formatter

Signed-off-by: julianbollig <[email protected]>
  • Loading branch information
marijnvg-tng authored and julianbollig committed Feb 5, 2025
1 parent 22438db commit 52753ce
Show file tree
Hide file tree
Showing 21 changed files with 131 additions and 77 deletions.
30 changes: 22 additions & 8 deletions LlamaCPP/llama_rag.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,8 @@
import os
import time
from typing import Any, List, Dict
from uuid import uuid4


from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.embeddings import LlamaCppEmbeddings
Expand All @@ -13,7 +15,11 @@
UnstructuredWordDocumentLoader,
Docx2txtLoader,
)

import nltk

Check failure on line 19 in LlamaCPP/llama_rag.py

View workflow job for this annotation

GitHub Actions / ruff

Ruff (F401)

LlamaCPP/llama_rag.py:19:8: F401 `nltk` imported but unused
import faiss
from langchain_community.vectorstores.faiss import FAISS, Document
from langchain_community.docstore.in_memory import InMemoryDocstore

#### CONFIGURATIONS ------------------------------------------------------------------------------------------------------------------------
INDEX_DATABASE_PATH = "./db/" # Faiss database folder
Expand Down Expand Up @@ -57,7 +63,7 @@ def __init__(self, embeddings: EmbeddingWrapper):
self.embeddings = embeddings
index_cache = os.path.join(INDEX_DATABASE_PATH, "index.faiss")
self.db = (
FAISS.load_local(INDEX_DATABASE_PATH, self.embeddings)
FAISS.load_local(INDEX_DATABASE_PATH, self.embeddings.model, allow_dangerous_deserialization=True)
if os.path.exists(index_cache)
else None
)
Expand Down Expand Up @@ -90,11 +96,19 @@ def __save_index(self, file_base_name: str, md5: str, doc_ids: str):

def __add_documents(self, file_base_name: str, docs: List[Document], md5: str):
if self.db is None:
self.db = FAISS.from_documents(docs, self.embeddings)
else:
self.db.add_documents(docs)
print(docs[0].metadata)
self.__save_index(file_base_name, md5, [doc.metadata["doc_id"] for doc in docs])
index = faiss.IndexFlatL2(len(self.embeddings.embed_query("hello world")))

self.db = FAISS(
embedding_function=self.embeddings.model,
index=index,
docstore=InMemoryDocstore(),
index_to_docstore_id={},
)

uuids = [str(uuid4()) for _ in range(len(docs))]
self.db.add_documents(documents=docs, ids=uuids)
print(docs[0])
self.__save_index(file_base_name, md5, uuids)

def __analyze_file_to_db(self, file: str, md5: str):
file_base_name = os.path.basename(file)
Expand Down Expand Up @@ -181,8 +195,8 @@ def dispose():

if __name__ == "__main__":
# Example Usage
init(model_path="/Users/daniel/silicon/AI-Playground/LlamaCPP/models/llm/gguf/bge-large-en-v1.5-q8_0.gguf")
add_index_file("/Users/daniel/silicon/AI-Playground/hello.txt")
init(model_path="/Users/julianbollig/Documents/Projects/AI-Playground/service/models/llm/ggufLLM/bge-large-en-v1.5-q8_0.gguf")
add_index_file("//Users/julianbollig/Documents/Projects/AI-Playground/SECURITY.md")
success, context, source = query("What is the content about?")
print("Query success:", success)
print("Context:", context)
Expand Down
5 changes: 5 additions & 0 deletions LlamaCPP/requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -4,3 +4,8 @@ dpcpp-cpp-rt==2025.0
mkl-dpcpp==2025.0
intel-sycl-rt==2025.0.0
onednn-devel==2025.0.0
langchain_community==0.3.0
faiss-cpu==1.8.0
docx2txt==0.8
pypdf==4.2.0
unstructured==0.14.6
2 changes: 1 addition & 1 deletion WebUI/build/scripts/install-full-python-env.js
Original file line number Diff line number Diff line change
Expand Up @@ -74,7 +74,7 @@ function main() {
prepareTargetDir(targetDir)
copyToTargetDir(envDir, targetDir)

const pythonExe = existingFileOrExit(path.join(targetDir, 'python.exe'))
const pythonExe = existingFileOrExit(path.join(targetDir, 'python'))
const getPipFile = existingFileOrExit(path.join(targetDir, 'get-pip.py'))

const platformSpecificRequirementsTxt = existingFileOrExit(
Expand Down
15 changes: 8 additions & 7 deletions WebUI/electron/main.ts
Original file line number Diff line number Diff line change
Expand Up @@ -602,13 +602,14 @@ function needAdminPermission() {
}

function isAdmin(): boolean {
const lib = koffi.load('Shell32.dll')
try {
const IsUserAnAdmin = lib.func('IsUserAnAdmin', 'bool', [])
return IsUserAnAdmin()
} finally {
lib.unload()
}
// const lib = koffi.load("Shell32.dll");
// try {
// const IsUserAnAdmin = lib.func("IsUserAnAdmin", "bool", []);
// return IsUserAnAdmin();
// } finally {
// lib.unload();
// }
return true
}

app.whenReady().then(async () => {
Expand Down
2 changes: 1 addition & 1 deletion WebUI/electron/subprocesses/aiBackendService.ts
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,7 @@ export class AiBackendService extends LongLivedPythonApiService {
// lsLevelZero will ensure uv and pip are installed
await this.lsLevelZero.ensureInstalled()

const deviceArch = await self.lsLevelZero.detectDevice()
const deviceArch: string = 'mac'
yield {
serviceName: self.name,
step: `Detecting intel device`,
Expand Down
2 changes: 1 addition & 1 deletion WebUI/electron/subprocesses/comfyUIBackendService.ts
Original file line number Diff line number Diff line change
Expand Up @@ -118,7 +118,7 @@ export class ComfyUiBackendService extends LongLivedPythonApiService {
status: 'executing',
debugMessage: `Trying to identify intel hardware`,
}
const deviceArch = await self.lsLevelZero.detectDevice()
const deviceArch: string = 'mac'
yield {
serviceName: self.name,
step: `Detecting intel device`,
Expand Down
2 changes: 1 addition & 1 deletion WebUI/electron/subprocesses/llamaCppBackendService.ts
Original file line number Diff line number Diff line change
Expand Up @@ -39,7 +39,7 @@ export class LlamaCppBackendService extends LongLivedPythonApiService {
await this.lsLevelZero.ensureInstalled()
await this.uvPip.ensureInstalled()

const deviceArch = await self.lsLevelZero.detectDevice()
const deviceArch: string = 'mac'
yield {
serviceName: self.name,
step: `Detecting intel device`,
Expand Down
31 changes: 16 additions & 15 deletions WebUI/electron/subprocesses/service.ts
Original file line number Diff line number Diff line change
Expand Up @@ -106,7 +106,7 @@ export class PythonService extends ExecutableService {
}

getExePath(): string {
return path.resolve(path.join(this.dir, 'python.exe'))
return path.resolve(path.join(this.dir, 'bin', 'python'))
}

async check(): Promise<void> {
Expand All @@ -131,7 +131,7 @@ export class PythonService extends ExecutableService {

readonly prototypicalEnvDir = app.isPackaged
? path.join(this.baseDir, 'prototype-python-env')
: path.join(this.baseDir, 'build-envs/online/prototype-python-env')
: path.join(this.baseDir, 'env')
private async clonePythonEnv(): Promise<void> {
existingFileOrError(this.prototypicalEnvDir)
if (filesystem.existsSync(this.dir)) {
Expand Down Expand Up @@ -368,17 +368,18 @@ export class LsLevelZeroService extends ExecutableService {
return this.selectedDeviceIdx
}

async getDeviceSelectorEnv(): Promise<{ ONEAPI_DEVICE_SELECTOR: string }> {
if (this.selectedDeviceIdx < 0 || this.selectedDeviceIdx >= this.allLevelZeroDevices.length) {
await this.detectDevice()
}

if (this.selectedDeviceIdx < 0) {
this.logError('No supported device')
return { ONEAPI_DEVICE_SELECTOR: 'level_zero:*' }
}

return { ONEAPI_DEVICE_SELECTOR: `level_zero:${this.selectedDeviceIdx}` }
async getDeviceSelectorEnv(): Promise<{}> {
// if (this.selectedDeviceIdx < 0 || this.selectedDeviceIdx >= this.allLevelZeroDevices.length) {
// await this.detectDevice();
// }
//
// if (this.selectedDeviceIdx < 0) {
// this.logError("No supported device");
// return {ONEAPI_DEVICE_SELECTOR: "level_zero:*"};
// }
//
// return {ONEAPI_DEVICE_SELECTOR: `level_zero:${this.selectedDeviceIdx}`};
return {}
}
}

Expand All @@ -389,7 +390,7 @@ export class GitService extends ExecutableService {
}

getExePath(): string {
return path.resolve(path.join(this.dir, 'cmd/git.exe'))
return path.resolve('/usr/bin/git')
}

async run(args: string[] = [], extraEnv?: object, workDir?: string): Promise<string> {
Expand Down Expand Up @@ -511,7 +512,7 @@ export abstract class LongLivedPythonApiService implements ApiService {
readonly baseDir = app.isPackaged ? process.resourcesPath : path.join(__dirname, '../../../')
readonly prototypicalPythonEnv = app.isPackaged
? path.join(this.baseDir, 'prototype-python-env')
: path.join(this.baseDir, 'build-envs/online/prototype-python-env')
: path.join(this.baseDir, 'env')
readonly customIntelExtensionForPytorch = path.join(
app.isPackaged ? this.baseDir : path.join(__dirname, '../../external/'),
ipexWheel,
Expand Down
2 changes: 1 addition & 1 deletion WebUI/electron/subprocesses/updateIntelWorkflows.ts
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@ const externalRes = path.resolve(
app.isPackaged ? process.resourcesPath : path.join(__dirname, '../../external/'),
)

const gitExePath = Path.join(resourcesBaseDir, 'portable-git', 'cmd', 'git.exe')
const gitExePath = '/usr/bin/git'
const workflowDirTargetPath = Path.join(externalRes, 'workflows')
const workflowDirSpareGitRepoPath = Path.join(externalRes, 'workflows_intel')
const intelWorkflowDirPath = Path.join(
Expand Down
32 changes: 21 additions & 11 deletions WebUI/src/assets/js/store/globalSetup.ts
Original file line number Diff line number Diff line change
Expand Up @@ -101,20 +101,27 @@ export const useGlobalSetup = defineStore('globalSetup', () => {
models.value.scheduler.push(...(await initWebSettings(postJson)))
models.value.scheduler.unshift('None')
break
} catch (_error: unknown) {
} catch (error) {

Check failure on line 104 in WebUI/src/assets/js/store/globalSetup.ts

View workflow job for this annotation

GitHub Actions / lint-ts

'error' is defined but never used. Allowed unused caught errors must match /^_/u
await util.delay(delay)
}
}
await reloadGraphics()
if (graphicsList.value.length == 0) {
await window.electronAPI.showMessageBoxSync({
message: useI18N().state.ERROR_UNFOUND_GRAPHICS,
title: 'error',
icon: 'error',
})
window.electronAPI.exitApp()
}
loadUserSettings()
// if (graphicsList.value.length == 0) {
// await window.electronAPI.showMessageBoxSync({ message: useI18N().state.ERROR_UNFOUND_GRAPHICS, title: "error", icon: "error" });
// window.electronAPI.exitApp();
// }
await loadUserSettings()

// isComfyUiInstalled.value = await isComfyUIDownloaded()
// if (isComfyUiInstalled.value) {
// window.electronAPI.wakeupComfyUIService()
// setTimeout(() => {
// //requires proper feedback on server startup...
// useComfyUi().updateComfyState()
// loadingState.value = "running";
// }, 10000);
// } else {
// loadingState.value = "running";
}

async function initWebSettings(postJson: string) {
Expand Down Expand Up @@ -259,7 +266,10 @@ export const useGlobalSetup = defineStore('globalSetup', () => {
modelSettings.lora = models.value.lora[0]
changeUserSetup = true
}
if (!graphicsList.value.find((item) => item.index == modelSettings.graphics)) {
if (
!graphicsList.value.find((item) => item.index == modelSettings.graphics) &&
graphicsList.value.length != 0
) {
modelSettings.graphics = graphicsList.value[0].index
}
if (changeUserSetup) {
Expand Down
12 changes: 11 additions & 1 deletion service/aipg_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -238,11 +238,21 @@ def get_ESRGAN_size():
return int(response.headers.get("Content-Length"))


def get_support_graphics():
def get_support_graphics(env_type: str):

device_count = torch.xpu.device_count()
graphics = list()
for i in range(device_count):
device_name = torch.xpu.get_device_name(i)
print('device_name', device_name)
if device_name == "Intel(R) Arc(TM) Graphics" or re.search("Intel\(R\) Arc\(TM\)", device_name) is not None:

Check failure on line 248 in service/aipg_utils.py

View workflow job for this annotation

GitHub Actions / ruff

Ruff (F821)

service/aipg_utils.py:248:58: F821 Undefined name `re`
graphics.append({"index": i, "name": device_name})
device_count = torch.cuda.device_count()
print('cuda device_count:', device_count)
service_config.env_type = env_type
for i in range(device_count):
device_name = torch.cuda.get_device_name(i)
print('device_name', device_name)
graphics.append({"index": i, "name": device_name})
return graphics

Expand Down
19 changes: 14 additions & 5 deletions service/llm_biz.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@
PreTrainedTokenizer,
)

from ipex_llm.transformers import AutoModelForCausalLM
#from ipex_llm.transformers import AutoModelForCausalLM
from typing import Callable
from transformers.generation.stopping_criteria import (
StoppingCriteria,
Expand All @@ -27,6 +27,15 @@
import service_config


# import ipex_llm.transformers.models.mistral

# W/A for https://github.com/intel/AI-Playground/issues/94
# Disable decoding_fast_path to avoid calling forward_qkv() which is not supported by bigdl-core-xe-*-23
# ipex_llm.transformers.models.mistral.use_decoding_fast_path = (
# lambda *args, **kwargs: False
# )


class LLMParams:
prompt: List[Dict[str, str]]
device: int
Expand Down Expand Up @@ -174,8 +183,8 @@ def chat(
# if prev genera not finish, stop it
stop_generate()

torch.xpu.set_device(params.device)
service_config.device = f"xpu:{params.device}"
torch.cuda.set_device(params.device)
service_config.device = f"cuda:{params.device}"
prompt = params.prompt
enable_rag = params.enable_rag
model_repo_id = params.model_repo_id
Expand All @@ -190,7 +199,7 @@ def chat(
if _model is not None:
del _model
gc.collect()
torch.xpu.empty_cache()
torch.cuda.empty_cache()

model_base_path = service_config.service_model_paths.get("llm")
model_name = model_repo_id.replace("/", "---")
Expand Down Expand Up @@ -309,7 +318,7 @@ def dispose():
del _model
_model = None
gc.collect()
torch.xpu.empty_cache()
torch.cuda.empty_cache()


class StopGenerateException(Exception):
Expand Down
3 changes: 2 additions & 1 deletion service/main.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,8 +3,9 @@
import time
import traceback
import torch
from transformers import pipeline, PreTrainedModel, TextIteratorStreamer
import intel_extension_for_pytorch as ipex
from transformers import pipeline,PreTrainedModel,TextIteratorStreamer
# import intel_extension_for_pytorch as ipex


def stream_chat_generate(model: PreTrainedModel, args: dict):
Expand Down
Loading

0 comments on commit 52753ce

Please sign in to comment.