diff --git a/packages/core/src/helper.ts b/packages/core/src/helper.ts index 7176680b81a..7e75b50fed7 100644 --- a/packages/core/src/helper.ts +++ b/packages/core/src/helper.ts @@ -1,4 +1,3 @@ -import { encodingForModel, type TiktokenModel } from "js-tiktoken"; import { RecursiveCharacterTextSplitter } from "langchain/text_splitter"; import logger from "./logger.ts"; import { type IAgentRuntime, type ModelSettings } from "./types.ts"; @@ -11,61 +10,6 @@ export function logFunctionCall(functionName: string, runtime?: IAgentRuntime) { }); } -export async function trimTokens( - context: string, - maxTokens: number, - runtime: IAgentRuntime -) { - logFunctionCall('trimTokens', runtime); - if (!context) return ""; - if (maxTokens <= 0) throw new Error("maxTokens must be positive"); - - const tokenizerModel = runtime.getSetting("TOKENIZER_MODEL"); - const tokenizerType = runtime.getSetting("TOKENIZER_TYPE"); - - if (!tokenizerModel || !tokenizerType) { - // Default to TikToken truncation using the "gpt-4o" model if tokenizer settings are not defined - return truncateTiktoken("gpt-4o", context, maxTokens); - } - - return truncateTiktoken( - tokenizerModel as TiktokenModel, - context, - maxTokens - ); - - logger.warn(`Unsupported tokenizer type: ${tokenizerType}`); - return truncateTiktoken("gpt-4o", context, maxTokens); -} - -async function truncateTiktoken( - model: TiktokenModel, - context: string, - maxTokens: number -) { - try { - const encoding = encodingForModel(model); - - // Encode the text into tokens - const tokens = encoding.encode(context); - - // If already within limits, return unchanged - if (tokens.length <= maxTokens) { - return context; - } - - // Keep the most recent tokens by slicing from the end - const truncatedTokens = tokens.slice(-maxTokens); - - // Decode back to text - js-tiktoken decode() returns a string directly - return encoding.decode(truncatedTokens); - } catch (error) { - logger.error("Error in trimTokens:", error); - // Return truncated string if tokenization fails - return context.slice(-maxTokens * 4); // Rough estimate of 4 chars per token - } -} - export async function splitChunks( content: string, chunkSize = 512, diff --git a/packages/core/src/index.ts b/packages/core/src/index.ts index 145983b7a9d..44cbacfa504 100644 --- a/packages/core/src/index.ts +++ b/packages/core/src/index.ts @@ -9,7 +9,7 @@ export * from "./evaluators.ts"; export * from "./generation.ts"; export * from "./goals.ts"; export * from "./helper.ts"; -export { default as knowledge } from "./knowledge.ts"; +export * from "./knowledge.ts"; export * from "./logger.ts"; export * from "./memory.ts"; export * from "./messages.ts"; @@ -20,4 +20,4 @@ export * from "./relationships.ts"; export * from "./runtime.ts"; export * from "./settings.ts"; export * from "./types.ts"; -export * from "./uuid.ts"; +export * from "./uuid.ts"; \ No newline at end of file diff --git a/packages/core/src/types.ts b/packages/core/src/types.ts index f004449420a..45798ddf0d2 100644 --- a/packages/core/src/types.ts +++ b/packages/core/src/types.ts @@ -120,6 +120,8 @@ export enum ModelType { TEXT_SMALL = "text_small", TEXT_LARGE = "text_large", TEXT_EMBEDDING = "text_embedding", + TOKENIZE_TEXT = "tokenize_text", + DETOKENIZE_TEXT = "detokenize_text", IMAGE = "image", IMAGE_DESCRIPTION = "image_description", TRANSCRIPTION = "transcription", @@ -1008,3 +1010,13 @@ export type GenerateTextParams = { modelType: ModelType; stopSequences?: string[]; }; + +export interface TokenizeTextParams { + context: string; + modelType: ModelType; +} + +export interface DetokenizeTextParams { + tokens: number[]; + modelType: ModelType; +} \ No newline at end of file diff --git a/packages/plugin-openai/src/index.ts b/packages/plugin-openai/src/index.ts index 0212d68dcd4..f6c9e55e218 100644 --- a/packages/plugin-openai/src/index.ts +++ b/packages/plugin-openai/src/index.ts @@ -1,7 +1,28 @@ import { createOpenAI } from "@ai-sdk/openai"; import type { Plugin } from "@elizaos/core"; import { GenerateTextParams, ModelType } from "@elizaos/core"; +import { DetokenizeTextParams, TokenizeTextParams } from "@elizaos/core"; import { generateText as aiGenerateText } from "ai"; +import { encodingForModel, type TiktokenModel } from "js-tiktoken"; + +async function tokenizeText( + model: ModelType, + context: string, +) { + const modelName = model === ModelType.TEXT_SMALL ? process.env.OPENAI_SMALL_MODEL ?? process.env.SMALL_MODEL ?? "gpt-4o-mini" : process.env.LARGE_MODEL ?? "gpt-4o"; + const encoding = encodingForModel(modelName as TiktokenModel); + const tokens = encoding.encode(context); + return tokens; +} + +async function detokenizeText( + model: ModelType, + tokens: number[], +) { + const modelName = model === ModelType.TEXT_SMALL ? process.env.OPENAI_SMALL_MODEL ?? process.env.SMALL_MODEL ?? "gpt-4o-mini" : process.env.OPENAI_LARGE_MODEL ?? process.env.LARGE_MODEL ?? "gpt-4o"; + const encoding = encodingForModel(modelName as TiktokenModel); + return encoding.decode(tokens); +} export const openaiPlugin: Plugin = { name: "openai", @@ -34,6 +55,20 @@ export const openaiPlugin: Plugin = { console.log("data", data); return data.data[0].embedding; }, + [ModelType.TOKENIZE_TEXT]: async ({ + context, + modelType, + }: TokenizeTextParams + ) => { + return tokenizeText(modelType ?? ModelType.TEXT_LARGE, context); + }, + [ModelType.DETOKENIZE_TEXT]: async ({ + tokens, + modelType, + }: DetokenizeTextParams + ) => { + return detokenizeText(modelType ?? ModelType.TEXT_LARGE, tokens); + }, [ModelType.TEXT_LARGE]: async ({ runtime, context, diff --git a/scripts/smokeTests.sh b/scripts/smokeTests.sh index bfa41eaea7e..3bb668d6585 100755 --- a/scripts/smokeTests.sh +++ b/scripts/smokeTests.sh @@ -52,7 +52,7 @@ TIMER=0 # Start the application and capture logs in the background # 27 includes success and that's what the level we're looking for is -DEFAULT_LOG_LEVEL=success bun start --character=characters/trump.character.json > "$OUTFILE" 2>&1 & +DEFAULT_LOG_LEVEL=success bun start > "$OUTFILE" 2>&1 & APP_PID=$! # Capture the PID of the background process diff --git a/scripts/start.sh b/scripts/start.sh index d5b46aba360..98c1c78996f 100755 --- a/scripts/start.sh +++ b/scripts/start.sh @@ -245,7 +245,6 @@ create_character_template() { { "name": "$name", "clients": [], - "modelProvider": "anthropic", "settings": { "voice": { "model": "en_GB-alan-medium"