fix a buncha bullshit

elizaOS · Feb 8, 2025 · 6788fcb · 6788fcb
1 parent c05c93c
commit 6788fcb
Show file tree

Hide file tree

Showing 6 changed files with 50 additions and 60 deletions.
diff --git a/packages/core/src/helper.ts b/packages/core/src/helper.ts
@@ -1,4 +1,3 @@
-import { encodingForModel, type TiktokenModel } from "js-tiktoken";
 import { RecursiveCharacterTextSplitter } from "langchain/text_splitter";
 import logger from "./logger.ts";
 import { type IAgentRuntime, type ModelSettings } from "./types.ts";
@@ -11,61 +10,6 @@ export function logFunctionCall(functionName: string, runtime?: IAgentRuntime) {
     });
 }
 
-export async function trimTokens(
-    context: string,
-    maxTokens: number,
-    runtime: IAgentRuntime
-) {
-    logFunctionCall('trimTokens', runtime);
-    if (!context) return "";
-    if (maxTokens <= 0) throw new Error("maxTokens must be positive");
-
-    const tokenizerModel = runtime.getSetting("TOKENIZER_MODEL");
-    const tokenizerType = runtime.getSetting("TOKENIZER_TYPE");
-
-    if (!tokenizerModel || !tokenizerType) {
-        // Default to TikToken truncation using the "gpt-4o" model if tokenizer settings are not defined
-        return truncateTiktoken("gpt-4o", context, maxTokens);
-    }
-
-    return truncateTiktoken(
-        tokenizerModel as TiktokenModel,
-        context,
-        maxTokens
-    );
-
-    logger.warn(`Unsupported tokenizer type: ${tokenizerType}`);
-    return truncateTiktoken("gpt-4o", context, maxTokens);
-}
-
-async function truncateTiktoken(
-    model: TiktokenModel,
-    context: string,
-    maxTokens: number
-) {
-    try {
-        const encoding = encodingForModel(model);
-
-        // Encode the text into tokens
-        const tokens = encoding.encode(context);
-
-        // If already within limits, return unchanged
-        if (tokens.length <= maxTokens) {
-            return context;
-        }
-
-        // Keep the most recent tokens by slicing from the end
-        const truncatedTokens = tokens.slice(-maxTokens);
-
-        // Decode back to text - js-tiktoken decode() returns a string directly
-        return encoding.decode(truncatedTokens);
-    } catch (error) {
-        logger.error("Error in trimTokens:", error);
-        // Return truncated string if tokenization fails
-        return context.slice(-maxTokens * 4); // Rough estimate of 4 chars per token
-    }
-}
-
 export async function splitChunks(
     content: string,
     chunkSize = 512,

diff --git a/packages/core/src/index.ts b/packages/core/src/index.ts
@@ -9,7 +9,7 @@ export * from "./evaluators.ts";
 export * from "./generation.ts";
 export * from "./goals.ts";
 export * from "./helper.ts";
-export { default as knowledge } from "./knowledge.ts";
+export * from "./knowledge.ts";
 export * from "./logger.ts";
 export * from "./memory.ts";
 export * from "./messages.ts";
@@ -20,4 +20,4 @@ export * from "./relationships.ts";
 export * from "./runtime.ts";
 export * from "./settings.ts";
 export * from "./types.ts";
-export * from "./uuid.ts";
+export * from "./uuid.ts";
diff --git a/packages/core/src/types.ts b/packages/core/src/types.ts
@@ -120,6 +120,8 @@ export enum ModelType {
   TEXT_SMALL = "text_small",
   TEXT_LARGE = "text_large",
   TEXT_EMBEDDING = "text_embedding",
+  TOKENIZE_TEXT = "tokenize_text",
+  DETOKENIZE_TEXT = "detokenize_text",
   IMAGE = "image",
   IMAGE_DESCRIPTION = "image_description",
   TRANSCRIPTION = "transcription",
@@ -1008,3 +1010,13 @@ export type GenerateTextParams = {
   modelType: ModelType;
   stopSequences?: string[];
 };
+
+export interface TokenizeTextParams {
+  context: string;
+  modelType: ModelType;
+}
+
+export interface DetokenizeTextParams {
+  tokens: number[];
+  modelType: ModelType;
+}
diff --git a/packages/plugin-openai/src/index.ts b/packages/plugin-openai/src/index.ts
@@ -1,7 +1,28 @@
 import { createOpenAI } from "@ai-sdk/openai";
 import type { Plugin } from "@elizaos/core";
 import { GenerateTextParams, ModelType } from "@elizaos/core";
+import { DetokenizeTextParams, TokenizeTextParams } from "@elizaos/core";
 import { generateText as aiGenerateText } from "ai";
+import { encodingForModel, type TiktokenModel } from "js-tiktoken";
+
+async function tokenizeText(
+  model: ModelType,
+  context: string,
+) {
+  const modelName = model === ModelType.TEXT_SMALL ? process.env.OPENAI_SMALL_MODEL ?? process.env.SMALL_MODEL ?? "gpt-4o-mini" : process.env.LARGE_MODEL ?? "gpt-4o";
+  const encoding = encodingForModel(modelName as TiktokenModel);
+  const tokens = encoding.encode(context);
+  return tokens;
+}
+
+async function detokenizeText(
+  model: ModelType,
+  tokens: number[],
+) {
+  const modelName = model === ModelType.TEXT_SMALL ? process.env.OPENAI_SMALL_MODEL ?? process.env.SMALL_MODEL ?? "gpt-4o-mini" : process.env.OPENAI_LARGE_MODEL ?? process.env.LARGE_MODEL ?? "gpt-4o";
+  const encoding = encodingForModel(modelName as TiktokenModel);
+  return encoding.decode(tokens);
+}
 
 export const openaiPlugin: Plugin = {
   name: "openai",
@@ -34,6 +55,20 @@ export const openaiPlugin: Plugin = {
         console.log("data", data);
         return data.data[0].embedding;
       },
+      [ModelType.TOKENIZE_TEXT]: async ({
+        context,
+        modelType,
+      }: TokenizeTextParams
+    ) => {
+      return tokenizeText(modelType ?? ModelType.TEXT_LARGE, context);
+    },
+    [ModelType.DETOKENIZE_TEXT]: async ({
+      tokens,
+      modelType,
+    }: DetokenizeTextParams
+  ) => {
+    return detokenizeText(modelType ?? ModelType.TEXT_LARGE, tokens);
+  },
       [ModelType.TEXT_LARGE]: async ({
         runtime,
         context,

diff --git a/scripts/smokeTests.sh b/scripts/smokeTests.sh
@@ -52,7 +52,7 @@ TIMER=0
 
 # Start the application and capture logs in the background
 # 27 includes success and that's what the level we're looking for is
-DEFAULT_LOG_LEVEL=success bun start --character=characters/trump.character.json > "$OUTFILE" 2>&1 &
+DEFAULT_LOG_LEVEL=success bun start > "$OUTFILE" 2>&1 &
 
 APP_PID=$!  # Capture the PID of the background process
 

diff --git a/scripts/start.sh b/scripts/start.sh
@@ -245,7 +245,6 @@ create_character_template() {
 {
     "name": "$name",
     "clients": [],
-    "modelProvider": "anthropic",
     "settings": {
         "voice": {
             "model": "en_GB-alan-medium"