✨ feat: Support display OpenRouter's "reasoning" output by transforming

its "reasoning" to be consistent with most other platforms: reasoning outputs wrapped by <think> XML tag.
lobehub · Feb 9, 2025 · ca6d009 · ca6d009
1 parent 7c7b5ba
commit ca6d009
Show file tree

Hide file tree

Showing 4 changed files with 129 additions and 2 deletions.
diff --git a/src/libs/agent-runtime/openrouter/index.test.ts b/src/libs/agent-runtime/openrouter/index.test.ts
@@ -79,14 +79,14 @@ describe('LobeOpenRouterAI', () => {
 
       // Assert
       expect(instance['client'].chat.completions.create).toHaveBeenCalledWith(
-        {
+        expect.objectContaining({
           max_tokens: 1024,
           messages: [{ content: 'Hello', role: 'user' }],
           stream: true,
           model: 'mistralai/mistral-7b-instruct:free',
           temperature: 0.7,
           top_p: 1,
-        },
+        }),
         { headers: { Accept: '*/*' } },
       );
       expect(result).toBeInstanceOf(Response);

diff --git a/src/libs/agent-runtime/openrouter/index.ts b/src/libs/agent-runtime/openrouter/index.ts
@@ -2,17 +2,22 @@ import { LOBE_DEFAULT_MODEL_LIST } from '@/config/modelProviders';
 
 import { ModelProvider } from '../types';
 import { LobeOpenAICompatibleFactory } from '../utils/openaiCompatibleFactory';
+import { OpenRouterReasoningStream } from '../utils/streams';
 import { OpenRouterModelCard } from './type';
 
+// <— your new file
+
 export const LobeOpenRouterAI = LobeOpenAICompatibleFactory({
   baseURL: 'https://openrouter.ai/api/v1',
   chatCompletion: {
     handlePayload: (payload) => {
       return {
         ...payload,
         include_reasoning: true,
+        stream: payload.stream ?? true,
       } as any;
     },
+    handleStream: OpenRouterReasoningStream,
   },
   constructorOptions: {
     defaultHeaders: {

diff --git a/src/libs/agent-runtime/utils/streams/index.ts b/src/libs/agent-runtime/utils/streams/index.ts
@@ -4,6 +4,7 @@ export * from './bedrock';
 export * from './google-ai';
 export * from './ollama';
 export * from './openai';
+export * from './openrouter';
 export * from './protocol';
 export * from './qwen';
 export * from './spark';
diff --git a/src/libs/agent-runtime/utils/streams/openrouter.ts b/src/libs/agent-runtime/utils/streams/openrouter.ts
@@ -0,0 +1,121 @@
+import OpenAI from 'openai';
+import type { Stream } from 'openai/streaming';
+
+import { ChatStreamCallbacks } from '../../types';
+import {
+  StreamProtocolChunk,
+  convertIterableToStream,
+  createCallbacksTransformer,
+  createSSEProtocolTransformer,
+} from './protocol';
+
+/**
+ * Create a closure to track whether we’ve inserted `<think>` and/or closed it.
+ */
+function createOpenRouterReasoningTransformer() {
+  let reasoningStarted = false;
+  let contentStarted = false;
+  let insertedThink = false;
+
+  return function transformOpenRouterChunk(chunk: OpenAI.ChatCompletionChunk): StreamProtocolChunk {
+    const choice = chunk.choices?.[0];
+    if (!choice || !choice.delta) {
+      // No delta => just emit generic "data"
+      return {
+        data: chunk,
+        id: chunk.id,
+        type: 'data',
+      };
+    }
+
+    const { content, reasoning } = choice.delta as {
+      content?: string | null;
+      reasoning?: string | null;
+    };
+
+    // Convert empty string, null, or undefined to a simple “nothing” check:
+    const isContentNonEmpty = typeof content === 'string' && content.length > 0;
+    const isReasoningNonEmpty = typeof reasoning === 'string' && reasoning.length > 0;
+
+    // Prepare an output string that we will treat as the “transformed content” for this chunk
+    let transformed = '';
+
+    if (!contentStarted && isReasoningNonEmpty) {
+      // We are still in the “reasoning” phase
+      if (!reasoningStarted) {
+        reasoningStarted = true;
+      }
+      if (!insertedThink) {
+        // First piece of reasoning => prepend <think>
+        transformed = `<think>${reasoning}`;
+        insertedThink = true;
+      } else {
+        // Subsequent reasoning => just append text
+        transformed = reasoning;
+      }
+
+      return {
+        data: transformed,
+        id: chunk.id,
+        type: 'text', // SSE “event: text”
+      };
+    } else if (isContentNonEmpty) {
+      // We now have actual content
+      if (!contentStarted) {
+        contentStarted = true;
+        // If we had been doing reasoning, close it
+        if (reasoningStarted && insertedThink) {
+          transformed = `</think>${content}`;
+        } else {
+          transformed = content;
+        }
+      } else {
+        // Already started content => just append new chunk
+        transformed = content;
+      }
+
+      return {
+        data: transformed,
+        id: chunk.id,
+        type: 'text',
+      };
+    }
+
+    // If this chunk indicates finishing
+    if (choice.finish_reason) {
+      return {
+        data: choice.finish_reason,
+        id: chunk.id,
+        type: 'stop',
+      };
+    }
+
+    // Fallback: if we have no “content” or “reasoning,” or it’s empty
+    return {
+      data: choice.delta,
+      id: chunk.id,
+      type: 'data',
+    };
+  };
+}
+
+/**
+ * The main stream entry point for OpenRouter, similar to Qwen’s “QwenAIStream.”
+ */
+export function OpenRouterReasoningStream(
+  stream: Stream<OpenAI.ChatCompletionChunk> | ReadableStream,
+  callbacks?: ChatStreamCallbacks,
+) {
+  // Convert the stream if it’s an AsyncIterable
+  const readableStream =
+    stream instanceof ReadableStream ? stream : convertIterableToStream(stream);
+
+  // Create our chunk-by-chunk transformer
+  const transformFn = createOpenRouterReasoningTransformer();
+
+  // 1. Transform each chunk to a standard SSE protocol event
+  // 2. Pipe it through the user’s callback hooks
+  return readableStream
+    .pipeThrough(createSSEProtocolTransformer(transformFn))
+    .pipeThrough(createCallbacksTransformer(callbacks));
+}