Skip to content

Commit

Permalink
fix a buncha bullshit
Browse files Browse the repository at this point in the history
  • Loading branch information
lalalune committed Feb 8, 2025
1 parent c05c93c commit 6788fcb
Show file tree
Hide file tree
Showing 6 changed files with 50 additions and 60 deletions.
56 changes: 0 additions & 56 deletions packages/core/src/helper.ts
Original file line number Diff line number Diff line change
@@ -1,4 +1,3 @@
import { encodingForModel, type TiktokenModel } from "js-tiktoken";
import { RecursiveCharacterTextSplitter } from "langchain/text_splitter";
import logger from "./logger.ts";
import { type IAgentRuntime, type ModelSettings } from "./types.ts";
Expand All @@ -11,61 +10,6 @@ export function logFunctionCall(functionName: string, runtime?: IAgentRuntime) {
});
}

export async function trimTokens(
context: string,
maxTokens: number,
runtime: IAgentRuntime
) {
logFunctionCall('trimTokens', runtime);
if (!context) return "";
if (maxTokens <= 0) throw new Error("maxTokens must be positive");

const tokenizerModel = runtime.getSetting("TOKENIZER_MODEL");
const tokenizerType = runtime.getSetting("TOKENIZER_TYPE");

if (!tokenizerModel || !tokenizerType) {
// Default to TikToken truncation using the "gpt-4o" model if tokenizer settings are not defined
return truncateTiktoken("gpt-4o", context, maxTokens);
}

return truncateTiktoken(
tokenizerModel as TiktokenModel,
context,
maxTokens
);

logger.warn(`Unsupported tokenizer type: ${tokenizerType}`);
return truncateTiktoken("gpt-4o", context, maxTokens);
}

async function truncateTiktoken(
model: TiktokenModel,
context: string,
maxTokens: number
) {
try {
const encoding = encodingForModel(model);

// Encode the text into tokens
const tokens = encoding.encode(context);

// If already within limits, return unchanged
if (tokens.length <= maxTokens) {
return context;
}

// Keep the most recent tokens by slicing from the end
const truncatedTokens = tokens.slice(-maxTokens);

// Decode back to text - js-tiktoken decode() returns a string directly
return encoding.decode(truncatedTokens);
} catch (error) {
logger.error("Error in trimTokens:", error);
// Return truncated string if tokenization fails
return context.slice(-maxTokens * 4); // Rough estimate of 4 chars per token
}
}

export async function splitChunks(
content: string,
chunkSize = 512,
Expand Down
4 changes: 2 additions & 2 deletions packages/core/src/index.ts
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@ export * from "./evaluators.ts";
export * from "./generation.ts";
export * from "./goals.ts";
export * from "./helper.ts";
export { default as knowledge } from "./knowledge.ts";
export * from "./knowledge.ts";
export * from "./logger.ts";
export * from "./memory.ts";
export * from "./messages.ts";
Expand All @@ -20,4 +20,4 @@ export * from "./relationships.ts";
export * from "./runtime.ts";
export * from "./settings.ts";
export * from "./types.ts";
export * from "./uuid.ts";
export * from "./uuid.ts";
12 changes: 12 additions & 0 deletions packages/core/src/types.ts
Original file line number Diff line number Diff line change
Expand Up @@ -120,6 +120,8 @@ export enum ModelType {
TEXT_SMALL = "text_small",
TEXT_LARGE = "text_large",
TEXT_EMBEDDING = "text_embedding",
TOKENIZE_TEXT = "tokenize_text",
DETOKENIZE_TEXT = "detokenize_text",
IMAGE = "image",
IMAGE_DESCRIPTION = "image_description",
TRANSCRIPTION = "transcription",
Expand Down Expand Up @@ -1008,3 +1010,13 @@ export type GenerateTextParams = {
modelType: ModelType;
stopSequences?: string[];
};

export interface TokenizeTextParams {
context: string;
modelType: ModelType;
}

export interface DetokenizeTextParams {
tokens: number[];
modelType: ModelType;
}
35 changes: 35 additions & 0 deletions packages/plugin-openai/src/index.ts
Original file line number Diff line number Diff line change
@@ -1,7 +1,28 @@
import { createOpenAI } from "@ai-sdk/openai";
import type { Plugin } from "@elizaos/core";
import { GenerateTextParams, ModelType } from "@elizaos/core";
import { DetokenizeTextParams, TokenizeTextParams } from "@elizaos/core";
import { generateText as aiGenerateText } from "ai";
import { encodingForModel, type TiktokenModel } from "js-tiktoken";

async function tokenizeText(
model: ModelType,
context: string,
) {
const modelName = model === ModelType.TEXT_SMALL ? process.env.OPENAI_SMALL_MODEL ?? process.env.SMALL_MODEL ?? "gpt-4o-mini" : process.env.LARGE_MODEL ?? "gpt-4o";
const encoding = encodingForModel(modelName as TiktokenModel);
const tokens = encoding.encode(context);
return tokens;
}

async function detokenizeText(
model: ModelType,
tokens: number[],
) {
const modelName = model === ModelType.TEXT_SMALL ? process.env.OPENAI_SMALL_MODEL ?? process.env.SMALL_MODEL ?? "gpt-4o-mini" : process.env.OPENAI_LARGE_MODEL ?? process.env.LARGE_MODEL ?? "gpt-4o";
const encoding = encodingForModel(modelName as TiktokenModel);
return encoding.decode(tokens);
}

export const openaiPlugin: Plugin = {
name: "openai",
Expand Down Expand Up @@ -34,6 +55,20 @@ export const openaiPlugin: Plugin = {
console.log("data", data);
return data.data[0].embedding;
},
[ModelType.TOKENIZE_TEXT]: async ({
context,
modelType,
}: TokenizeTextParams
) => {
return tokenizeText(modelType ?? ModelType.TEXT_LARGE, context);
},
[ModelType.DETOKENIZE_TEXT]: async ({
tokens,
modelType,
}: DetokenizeTextParams
) => {
return detokenizeText(modelType ?? ModelType.TEXT_LARGE, tokens);
},
[ModelType.TEXT_LARGE]: async ({
runtime,
context,
Expand Down
2 changes: 1 addition & 1 deletion scripts/smokeTests.sh
Original file line number Diff line number Diff line change
Expand Up @@ -52,7 +52,7 @@ TIMER=0

# Start the application and capture logs in the background
# 27 includes success and that's what the level we're looking for is
DEFAULT_LOG_LEVEL=success bun start --character=characters/trump.character.json > "$OUTFILE" 2>&1 &
DEFAULT_LOG_LEVEL=success bun start > "$OUTFILE" 2>&1 &

APP_PID=$! # Capture the PID of the background process

Expand Down
1 change: 0 additions & 1 deletion scripts/start.sh
Original file line number Diff line number Diff line change
Expand Up @@ -245,7 +245,6 @@ create_character_template() {
{
"name": "$name",
"clients": [],
"modelProvider": "anthropic",
"settings": {
"voice": {
"model": "en_GB-alan-medium"
Expand Down

0 comments on commit 6788fcb

Please sign in to comment.