Skip to content

Commit

Permalink
feat: Use service worker to run WebLLM Engine
Browse files Browse the repository at this point in the history
  • Loading branch information
Neet-Nestor committed May 17, 2024
1 parent 01b6716 commit d44dab2
Show file tree
Hide file tree
Showing 50 changed files with 12,307 additions and 92 deletions.
2 changes: 1 addition & 1 deletion .eslintignore
Original file line number Diff line number Diff line change
@@ -1 +1 @@
public/serviceWorker.js
lib/**
8 changes: 7 additions & 1 deletion .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -43,4 +43,10 @@ dev
.env

*.key
*.key.pub
*.key.pub

# service worker generated files
public/sw.js
public/workbox-*.js
public/workbox-*.js.map
public/worker-*.js
10 changes: 0 additions & 10 deletions app/client/webllm-sw.ts

This file was deleted.

39 changes: 16 additions & 23 deletions app/client/webllm.ts
Original file line number Diff line number Diff line change
@@ -1,13 +1,12 @@
import {
EngineInterface,
CreateWebWorkerEngine,
CreateWebServiceWorkerEngine,
InitProgressReport,
prebuiltAppConfig,
ChatCompletionMessageParam,
} from "@mlc-ai/web-llm";

import { ChatOptions, LLMApi, LLMConfig } from "./api";
import { ChatCompletionMessageParam } from "@mlc-ai/web-llm";
import { useAppConfig } from "../store";

export class WebLLMApi implements LLMApi {
private currentModel?: string;
Expand All @@ -22,27 +21,21 @@ export class WebLLMApi implements LLMApi {
onUpdate?: (message: string, chunk: string) => void,
) {
this.currentModel = config.model;
this.engine = await CreateWebWorkerEngine(
new Worker(new URL("./webllm-sw.ts", import.meta.url), {
type: "module",
}),
config.model,
{
chatOpts: {
temperature: config.temperature,
top_p: config.top_p,
presence_penalty: config.presence_penalty,
frequency_penalty: config.frequency_penalty,
},
appConfig: {
...prebuiltAppConfig,
useIndexedDBCache: config.cache === "index_db",
},
initProgressCallback: (report: InitProgressReport) => {
onUpdate?.(report.text, report.text);
},
this.engine = await CreateWebServiceWorkerEngine(config.model, {
chatOpts: {
temperature: config.temperature,
top_p: config.top_p,
presence_penalty: config.presence_penalty,
frequency_penalty: config.frequency_penalty,
},
);
appConfig: {
...prebuiltAppConfig,
useIndexedDBCache: config.cache === "index_db",
},
initProgressCallback: (report: InitProgressReport) => {
onUpdate?.(report.text, report.text);
},
});
}

async chat(options: ChatOptions): Promise<void> {
Expand Down
5 changes: 5 additions & 0 deletions app/components/chat.tsx
Original file line number Diff line number Diff line change
Expand Up @@ -803,6 +803,11 @@ function _Chat() {
ChatControllerPool.stop(session.id, messageId);
};

// Reset session status on initial loading
useEffect(() => {
chatStore.resetGeneratingStatus();
}, []);

useEffect(() => {
chatStore.updateCurrentSession((session) => {
const stopTiming = Date.now() - REQUEST_TIMEOUT_MS;
Expand Down
2 changes: 0 additions & 2 deletions app/layout.tsx
Original file line number Diff line number Diff line change
Expand Up @@ -61,8 +61,6 @@ export default function RootLayout({
<link rel="mask-icon" href="/safari-pinned-tab.svg" color="#062578" />
<meta name="msapplication-TileColor" content="#2b5797" />
<meta name="theme-color" content="#ffffff" />

<script src="/serviceWorkerRegister.js" defer></script>
</head>
<body>
{children}
Expand Down
47 changes: 47 additions & 0 deletions app/service-worker.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,47 @@
import {
WebServiceWorkerEngineHandler,
EngineInterface,
Engine,
} from "@mlc-ai/web-llm";
import { defaultCache } from "@serwist/next/worker";
import type { PrecacheEntry, SerwistGlobalConfig } from "serwist";
import { Serwist } from "serwist";

// This declares the value of `injectionPoint` to TypeScript.
// `injectionPoint` is the string that will be replaced by the
// actual precache manifest. By default, this string is set to
// `"self.__SW_MANIFEST"`.
declare global {
interface WorkerGlobalScope extends SerwistGlobalConfig {
__SW_MANIFEST: (PrecacheEntry | string)[] | undefined;
}
}

declare const self: ServiceWorkerGlobalScope;

const serwist = new Serwist({
precacheEntries: self.__SW_MANIFEST,
skipWaiting: true,
clientsClaim: true,
navigationPreload: true,
runtimeCaching: defaultCache,
});

const CHATGPT_NEXT_WEB_CACHE = "chatgpt-next-web-cache";
const engine: EngineInterface = new Engine();
let handler: WebServiceWorkerEngineHandler;

self.addEventListener("install", function (event) {
event.waitUntil(
caches.open(CHATGPT_NEXT_WEB_CACHE).then(function (cache) {
return cache.addAll([]);
}),
);
});

self.addEventListener("activate", function (event) {
handler = new WebServiceWorkerEngineHandler(engine);
console.log("Web-LLM Service Worker Activated");
});

serwist.addEventListeners();
10 changes: 10 additions & 0 deletions app/store/chat.ts
Original file line number Diff line number Diff line change
Expand Up @@ -275,6 +275,16 @@ export const useChatStore = createPersistStore(
return session;
},

resetGeneratingStatus() {
set((state) => ({
...state,
sessions: state.sessions.map((session) => ({
...session,
isGenerating: false,
})),
}));
},

onNewMessage(message: ChatMessage) {
get().updateCurrentSession((session) => {
session.messages = session.messages.concat();
Expand Down
10 changes: 6 additions & 4 deletions app/utils/merge.ts
Original file line number Diff line number Diff line change
@@ -1,13 +1,15 @@
export function merge(target: any, source: any) {
Object.keys(source).forEach(function (key) {
if (
source.hasOwnProperty(key) && // Check if the property is not inherited
source[key] &&
typeof source[key] === "object" || key === "__proto__" || key === "constructor"
(source.hasOwnProperty(key) && // Check if the property is not inherited
source[key] &&
typeof source[key] === "object") ||
key === "__proto__" ||
key === "constructor"
) {
merge((target[key] = target[key] || {}), source[key]);
return;
}
target[key] = source[key];
});
}
}
7 changes: 7 additions & 0 deletions lib/@mlc-ai/web-llm/cache_util.d.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
import { AppConfig } from "./config";
export declare function hasModelInCache(modelId: string, appConfig?: AppConfig): Promise<boolean>;
export declare function deleteModelAllInfoInCache(modelId: string, appConfig?: AppConfig): Promise<void>;
export declare function deleteModelInCache(modelId: string, appConfig?: AppConfig): Promise<void>;
export declare function deleteChatConfigInCache(modelId: string, appConfig?: AppConfig): Promise<void>;
export declare function deleteModelWasmInCache(modelId: string, appConfig?: AppConfig): Promise<void>;
//# sourceMappingURL=cache_util.d.ts.map
1 change: 1 addition & 0 deletions lib/@mlc-ai/web-llm/cache_util.d.ts.map

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

159 changes: 159 additions & 0 deletions lib/@mlc-ai/web-llm/config.d.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,159 @@
import { ResponseFormat } from "./openai_api_protocols";
import { LogitProcessor, InitProgressCallback } from "./types";
/**
* Conversation template config
*/
export interface ConvTemplateConfig {
system_template: string;
system_message: string;
roles: Record<Role, string>;
role_templates?: Partial<Record<Role, string>>;
seps: Array<string>;
role_content_sep?: string;
role_empty_sep?: string;
offset: number;
stop_str: Array<string>;
system_prefix_token_ids?: Array<number>;
stop_token_ids: Array<number>;
add_role_after_system_message?: boolean;
}
export declare enum Role {
user = "user",
assistant = "assistant"
}
/**
* Place holders that can be used in role templates.
* For example, a role template of
* `<<question>> ${MessagePlaceholders.USER} <<function>> ${MessagePlaceholders.FUNCTION}`
* will insert the user message to ${MessagePlaceholders.USER}
* and insert the function message to ${MessagePlaceholders.FUNCTION}
* at run time.
*/
export declare enum MessagePlaceholders {
system = "{system_message}",
user = "{user_message}",
assistant = "{assistant_message}",
tool = "{tool_message}",
function = "{function_string}"
}
/**
* Config of one chat model, a data structure representing `mlc-chat-config.json`.
* This only corresponds to the chat-related fields and `tokenizer_files` of `mlc-chat-config.json`.
* Only these fields affect the conversation in runtime.
* i.e. The third part in https://llm.mlc.ai/docs/get_started/mlc_chat_config.html.
*
* This is initialized in `ChatModule.reload()` with the model's `mlc-chat-config.json`.
*/
export interface ChatConfig {
tokenizer_files: Array<string>;
conv_config?: Partial<ConvTemplateConfig>;
conv_template: string | ConvTemplateConfig;
mean_gen_len: number;
max_gen_len: number;
shift_fill_factor: number;
repetition_penalty: number;
frequency_penalty: number;
presence_penalty: number;
top_p: number;
temperature: number;
bos_token_id?: number;
}
/**
* Custom options that can be used to override known config values.
*/
export interface ChatOptions extends Partial<ChatConfig> {
}
/**
* Optional configurations for `CreateEngine()` and `CreateWebWorkerEngine()`.
*
* chatOpts: To optionally override the `mlc-chat-config.json` of `modelId`.
* appConfig: Configure the app, including the list of models and whether to use IndexedDB cache.
* initProgressCallback: A callback for showing the progress of loading the model.
* logitProcessorRegistry: A register for stateful logit processors, see `webllm.LogitProcessor`.
*
* @note All fields are optional, and `logitProcessorRegistry` is only used for `CreateEngine()`
* not `CreateWebWorkerEngine()`.
*/
export interface EngineConfig {
chatOpts?: ChatOptions;
appConfig?: AppConfig;
initProgressCallback?: InitProgressCallback;
logitProcessorRegistry?: Map<string, LogitProcessor>;
}
/**
* Config for a single generation.
* Essentially `ChatConfig` without `tokenizer_files`, `conv_config`, or `conv_template`.
* We also support additional fields not present in `mlc-chat-config.json` due to OpenAI-like APIs.
*
* Note that all values are optional. If unspecified, we use whatever values in `ChatConfig`
* initialized during `ChatModule.reload()`.
*/
export interface GenerationConfig {
mean_gen_len?: number;
shift_fill_factor?: number;
repetition_penalty?: number;
top_p?: number | null;
temperature?: number | null;
max_gen_len?: number | null;
frequency_penalty?: number | null;
presence_penalty?: number | null;
stop?: string | null | Array<string>;
n?: number | null;
logit_bias?: Record<string, number> | null;
logprobs?: boolean | null;
top_logprobs?: number | null;
response_format?: ResponseFormat | null;
}
export declare function postInitAndCheckGenerationConfigValues(config: GenerationConfig): void;
/**
* Information for a model.
* @param model_url: the huggingface link to download the model weights.
* @param model_id: what we call the model.
* @param model_lib_url: link to the model library (wasm file) the model uses.
* @param vram_required_MB: amount of vram in MB required to run the model (can use
* `utils/vram_requirements` to calculate).
* @param low_resource_required: whether the model can run on limited devices (e.g. Android phone).
* @param buffer_size_required_bytes: required `maxStorageBufferBindingSize`, different for each device.
* @param required_features: feature needed to run this model (e.g. shader-f16).
*/
export interface ModelRecord {
model_url: string;
model_id: string;
model_lib_url: string;
vram_required_MB?: number;
low_resource_required?: boolean;
buffer_size_required_bytes?: number;
required_features?: Array<string>;
}
/**
* Extra configuration that can be
* passed to the load.
*
* @param model_list: models to be used.
* @param useIndexedDBCache: if true, will use IndexedDBCache to cache models and other artifacts.
* If false or unspecified, will use the Cache API. For more information of the two, see:
* https://developer.mozilla.org/en-US/docs/Web/API/Storage_API/Storage_quotas_and_eviction_criteria#what_technologies_store_data_in_the_browser
*
* @note Note that the Cache API is more well-tested in WebLLM as of now.
*/
export interface AppConfig {
model_list: Array<ModelRecord>;
useIndexedDBCache?: boolean;
}
/**
* modelVersion: the prebuilt model libraries that the current npm is compatible with, affects the
* `model_lib_url`s in `prebuiltAppConfig`.
*
* @note The model version does not have to match the npm version, since not each npm update
* requires an update of the model libraries.
*/
export declare const modelVersion = "v0_2_34";
export declare const modelLibURLPrefix = "https://raw.githubusercontent.com/mlc-ai/binary-mlc-llm-libs/main/web-llm-models/";
/**
* Default models and model library mapping to be used if unspecified.
*
* @note This is the only source of truth of which prebuilt model libraries are compatible with the
* current WebLLM npm version.
*/
export declare const prebuiltAppConfig: AppConfig;
//# sourceMappingURL=config.d.ts.map
1 change: 1 addition & 0 deletions lib/@mlc-ai/web-llm/config.d.ts.map

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

Loading

0 comments on commit d44dab2

Please sign in to comment.