Skip to content

Commit

Permalink
video handling and transcription working
Browse files Browse the repository at this point in the history
  • Loading branch information
lalalune committed Feb 10, 2025
1 parent 125d3b8 commit 97e69f8
Show file tree
Hide file tree
Showing 10 changed files with 84 additions and 629 deletions.
Binary file modified bun.lockb
Binary file not shown.
14 changes: 7 additions & 7 deletions packages/agent/src/api.ts
Original file line number Diff line number Diff line change
@@ -1,11 +1,11 @@
import {
type AgentRuntime,
type Character,
logger,
getEnvVariable,
IAgentRuntime,
logger,
type UUID,
validateCharacterConfig,
validateUuid,
validateUuid
} from "@elizaos/core";
import bodyParser from "body-parser";
import cors from "cors";
Expand Down Expand Up @@ -46,7 +46,7 @@ function validateUUIDParams(
}

export function createApiRouter(
agents: Map<string, AgentRuntime>,
agents: Map<string, IAgentRuntime>,
directClient: CharacterServer
): express.Router {
const router = express.Router();
Expand Down Expand Up @@ -117,7 +117,7 @@ export function createApiRouter(
};
if (!agentId) return;

const agent: AgentRuntime = agents.get(agentId);
const agent: IAgentRuntime = agents.get(agentId);

if (agent) {
agent.stop();
Expand All @@ -134,7 +134,7 @@ export function createApiRouter(
};
if (!agentId) return;

let agent: AgentRuntime = agents.get(agentId);
let agent: IAgentRuntime = agents.get(agentId);

// update character
if (agent) {
Expand Down Expand Up @@ -338,7 +338,7 @@ export function createApiRouter(
router.post("/agents/:agentId/stop", async (req, res) => {
const agentId = req.params.agentId;
console.log("agentId", agentId);
const agent: AgentRuntime = agents.get(agentId);
const agent: IAgentRuntime = agents.get(agentId);

// update character
if (agent) {
Expand Down
10 changes: 2 additions & 8 deletions packages/agent/src/server.ts
Original file line number Diff line number Diff line change
Expand Up @@ -166,10 +166,7 @@ export class CharacterServer {
return;
}

const transcription = await runtime.call(AsyncHandlerType.TRANSCRIPTION, {
file: fs.createReadStream(audioFile.path),
model: "whisper-1",
});
const transcription = await runtime.call(AsyncHandlerType.TRANSCRIPTION, fs.createReadStream(audioFile.path));

Check failure

Code scanning / CodeQL

Uncontrolled data used in path expression High

This path depends on a
user-provided value
.

res.json(transcription);
}

Check failure

Code scanning / CodeQL

Missing rate limiting High

This route handler performs
a file system access
, but is not rate-limited.
Expand Down Expand Up @@ -823,10 +820,7 @@ export class CharacterServer {
// Get the text to convert to speech
const textToSpeak = response.text;

const speechResponse = await runtime.call(AsyncHandlerType.TRANSCRIPTION, {
text: textToSpeak,
runtime,
});
const speechResponse = await runtime.call(AsyncHandlerType.TEXT_TO_SPEECH, textToSpeak);

if (!speechResponse.ok) {
throw new Error(
Expand Down
4 changes: 2 additions & 2 deletions packages/core/src/types.ts
Original file line number Diff line number Diff line change
@@ -1,5 +1,3 @@
import type { Readable } from "stream";

/**
* Represents a UUID string in the format "xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx"
*/
Expand Down Expand Up @@ -1026,6 +1024,8 @@ export interface IAgentRuntime {
call<T = any>(handlerType: AsyncHandlerType, params: T): Promise<any>;
registerHandler(handlerType: AsyncHandlerType, handler: (params: any) => Promise<any>): void;
getHandler(handlerType: AsyncHandlerType): ((params: any) => Promise<any>) | undefined;

stop(): Promise<void>;
}

export enum LoggingLevel {
Expand Down
18 changes: 0 additions & 18 deletions packages/plugin-discord/src/index.ts
Original file line number Diff line number Diff line change
Expand Up @@ -18,15 +18,7 @@ import {
type User,
} from "discord.js";
import { EventEmitter } from "events";
import chat_with_attachments from "./actions/chat_with_attachments.ts";
import download_media from "./actions/download_media.ts";
import joinvoice from "./actions/joinvoice.ts";
import leavevoice from "./actions/leavevoice.ts";
import summarize from "./actions/summarize_conversation.ts";
import transcribe_media from "./actions/transcribe_media.ts";
import { MessageManager } from "./messages.ts";
import channelStateProvider from "./providers/channelState.ts";
import voiceStateProvider from "./providers/voiceState.ts";
import { VoiceManager } from "./voice.ts";
import { IDiscordClient } from "./types.ts";

Expand Down Expand Up @@ -69,16 +61,6 @@ export class DiscordClient extends EventEmitter implements IDiscordClient {
this.client.login(this.apiToken);

this.setupEventListeners();

this.runtime.registerAction(joinvoice);
this.runtime.registerAction(leavevoice);
this.runtime.registerAction(summarize);
this.runtime.registerAction(chat_with_attachments);
this.runtime.registerAction(transcribe_media);
this.runtime.registerAction(download_media);

this.runtime.providers.push(channelStateProvider);
this.runtime.providers.push(voiceStateProvider);
}

private setupEventListeners() {
Expand Down
234 changes: 0 additions & 234 deletions packages/plugin-discord/src/messages.ts
Original file line number Diff line number Diff line change
Expand Up @@ -35,16 +35,6 @@ interface MessageContext {
timestamp: number;
}

interface AutoPostConfig {
enabled: boolean;
monitorTime: number;
inactivityThreshold: number; // milliseconds
mainChannelId: string;
announcementChannelIds: string[];
lastAutoPost?: number;
minTimeBetweenPosts?: number; // minimum time between auto posts
}

export type InterestChannels = {
[key: string]: {
currentHandler: string | undefined;
Expand All @@ -63,7 +53,6 @@ export class MessageManager {
private discordClient: any;
private voiceManager: VoiceManager;
//Auto post
private autoPostConfig: AutoPostConfig;
private lastChannelActivity: { [channelId: string]: number } = {};
private autoPostInterval: NodeJS.Timeout;

Expand All @@ -73,19 +62,6 @@ export class MessageManager {
this.discordClient = discordClient;
this.runtime = discordClient.runtime;
this.attachmentManager = new AttachmentManager(this.runtime);

this.autoPostConfig = {
enabled: this.runtime.character.clientConfig?.discord?.autoPost?.enabled || false,
monitorTime: this.runtime.character.clientConfig?.discord?.autoPost?.monitorTime || 300000,
inactivityThreshold: this.runtime.character.clientConfig?.discord?.autoPost?.inactivityThreshold || 3600000, // 1 hour default
mainChannelId: this.runtime.character.clientConfig?.discord?.autoPost?.mainChannelId,
announcementChannelIds: this.runtime.character.clientConfig?.discord?.autoPost?.announcementChannelIds || [],
minTimeBetweenPosts: this.runtime.character.clientConfig?.discord?.autoPost?.minTimeBetweenPosts || 7200000, // 2 hours default
};

if (this.autoPostConfig.enabled) {
this._startAutoPostMonitoring();
}
}

async handleMessage(message: DiscordMessage) {
Expand Down Expand Up @@ -405,216 +381,6 @@ export class MessageManager {
}
}

private _startAutoPostMonitoring(): void {
// Wait for client to be ready
if (!this.client.isReady()) {
logger.info('[AutoPost Discord] Client not ready, waiting for ready event')
this.client.once('ready', () => {
logger.info('[AutoPost Discord] Client ready, starting monitoring')
this._initializeAutoPost();
});
} else {
logger.info('[AutoPost Discord] Client already ready, starting monitoring')
this._initializeAutoPost();
}
}

private _initializeAutoPost(): void {
// Give the client a moment to fully load its cache
setTimeout(() => {
// Monitor with random intervals between 2-6 hours
this.autoPostInterval = setInterval(() => {
this._checkChannelActivity();
}, Math.floor(Math.random() * (4 * 60 * 60 * 1000) + 2 * 60 * 60 * 1000));

// Start monitoring announcement channels
this._monitorAnnouncementChannels();
}, 5000); // 5 second delay to ensure everything is loaded
}

private async _checkChannelActivity(): Promise<void> {
if (!this.autoPostConfig.enabled || !this.autoPostConfig.mainChannelId) return;

const channel = this.client.channels.cache.get(this.autoPostConfig.mainChannelId) as TextChannel;
if (!channel) return;

try {
// Get last message time
const messages = await channel.messages.fetch({ limit: 1 });
const lastMessage = messages.first();
const lastMessageTime = lastMessage ? lastMessage.createdTimestamp : 0;

const now = Date.now();
const timeSinceLastMessage = now - lastMessageTime;
const timeSinceLastAutoPost = now - (this.autoPostConfig.lastAutoPost || 0);

// Add some randomness to the inactivity threshold (±30 minutes)
const randomThreshold = this.autoPostConfig.inactivityThreshold +
(Math.random() * 1800000 - 900000);

// Check if we should post
if ((timeSinceLastMessage > randomThreshold) &&
timeSinceLastAutoPost > (this.autoPostConfig.minTimeBetweenPosts || 0)) {

try {
// Create memory and generate response
const roomId = stringToUuid(channel.id + "-" + this.runtime.agentId);

const memory = {
id: stringToUuid(`autopost-${Date.now()}`),
userId: this.runtime.agentId,
agentId: this.runtime.agentId,
roomId,
content: { text: "AUTO_POST_ENGAGEMENT", source: "discord" },
createdAt: Date.now()
};

let state = await this.runtime.composeState(memory, {
discordClient: this.client,
discordMessage: null,
agentName: this.runtime.character.name || this.client.user?.displayName
});

// Generate response using template
const context = composeContext({
state,
template: this.runtime.character.templates?.discordAutoPostTemplate || discordAutoPostTemplate
});

const responseContent = await this._generateResponse(memory, state, context);
if (!responseContent?.text) return;

// Send message and update memory
const messages = await sendMessageInChunks(channel, responseContent.text.trim(), null, []);

// Create and store memories
const memories = messages.map(m => ({
id: stringToUuid(m.id + "-" + this.runtime.agentId),
userId: this.runtime.agentId,
agentId: this.runtime.agentId,
content: {
...responseContent,
url: m.url,
},
roomId,
createdAt: m.createdTimestamp,
}));

for (const m of memories) {
await this.runtime.messageManager.createMemory(m);
}

// Update state and last post time
this.autoPostConfig.lastAutoPost = Date.now();
state = await this.runtime.updateRecentMessageState(state);
await this.runtime.evaluate(memory, state, true);
} catch (error) {
logger.warn("[AutoPost Discord] Error:", error);
}
} else {
logger.warn("[AutoPost Discord] Activity within threshold. Not posting.");
}
} catch (error) {
logger.warn("[AutoPost Discord] Error checking last message:", error);
}
}

private async _monitorAnnouncementChannels(): Promise<void> {
if (!this.autoPostConfig.enabled || !this.autoPostConfig.announcementChannelIds.length) {
logger.warn('[AutoPost Discord] Auto post config disabled or no announcement channels')
return;
}

for (const announcementChannelId of this.autoPostConfig.announcementChannelIds) {
const channel = this.client.channels.cache.get(announcementChannelId);

if (channel) {
// Check if it's either a text channel or announcement channel
// ChannelType.GuildAnnouncement is 5
// ChannelType.GuildText is 0
if (channel instanceof TextChannel || channel.type === ChannelType.GuildAnnouncement) {
const newsChannel = channel as TextChannel;
try {
newsChannel.createMessageCollector().on('collect', async (message: DiscordMessage) => {
if (message.author.bot || Date.now() - message.createdTimestamp > 300000) return;

const mainChannel = this.client.channels.cache.get(this.autoPostConfig.mainChannelId) as TextChannel;
if (!mainChannel) return;

try {
// Create memory and generate response
const roomId = stringToUuid(mainChannel.id + "-" + this.runtime.agentId);
const memory = {
id: stringToUuid(`announcement-${Date.now()}`),
userId: this.runtime.agentId,
agentId: this.runtime.agentId,
roomId,
content: {
text: message.content,
source: "discord",
metadata: { announcementUrl: message.url }
},
createdAt: Date.now()
};

let state = await this.runtime.composeState(memory, {
discordClient: this.client,
discordMessage: message,
announcementContent: message?.content,
announcementChannelId: channel.id,
agentName: this.runtime.character.name || this.client.user?.displayName
});

// Generate response using template
const context = composeContext({
state,
template: this.runtime.character.templates?.discordAnnouncementHypeTemplate || discordAnnouncementHypeTemplate

});

const responseContent = await this._generateResponse(memory, state, context);
if (!responseContent?.text) return;

// Send message and update memory
const messages = await sendMessageInChunks(mainChannel, responseContent.text.trim(), null, []);

// Create and store memories
const memories = messages.map(m => ({
id: stringToUuid(m.id + "-" + this.runtime.agentId),
userId: this.runtime.agentId,
agentId: this.runtime.agentId,
content: {
...responseContent,
url: m.url,
},
roomId,
createdAt: m.createdTimestamp,
}));

for (const m of memories) {
await this.runtime.messageManager.createMemory(m);
}

// Update state
state = await this.runtime.updateRecentMessageState(state);
await this.runtime.evaluate(memory, state, true);
} catch (error) {
logger.warn("[AutoPost Discord] Announcement Error:", error);
}
});
logger.info(`[AutoPost Discord] Successfully set up collector for announcement channel: ${newsChannel.name}`);
} catch (error) {
logger.warn(`[AutoPost Discord] Error setting up announcement channel collector:`, error);
}
} else {
logger.warn(`[AutoPost Discord] Channel ${announcementChannelId} is not a valid announcement or text channel, type:`, channel.type);
}
} else {
logger.warn(`[AutoPost Discord] Could not find channel ${announcementChannelId} directly`);
}
}
}

async processMessageMedia(
message: DiscordMessage
): Promise<{ processedContent: string; attachments: Media[] }> {
Expand Down
2 changes: 1 addition & 1 deletion packages/plugin-node/package.json
Original file line number Diff line number Diff line change
Expand Up @@ -75,7 +75,7 @@
"wav-encoder": "1.3.0",
"wavefile": "11.0.0",
"yargs": "17.7.2",
"youtube-dl-exec": "3.0.10",
"youtube-dl-exec": "3.0.15",
"cookie": "0.7.0"
},
"devDependencies": {
Expand Down
Loading

0 comments on commit 97e69f8

Please sign in to comment.