feat(#383): Streaming AI responses via Matrix message edits
Some checks failed
ci/woodpecker/push/api Pipeline failed
Some checks failed
ci/woodpecker/push/api Pipeline failed
- Add MatrixStreamingService with editMessage, setTypingIndicator, streamResponse - Rate-limited edits (500ms) for incremental streaming output - Typing indicator management during generation - Graceful error handling and fallback for non-streaming scenarios - Add optional editMessage to IChatProvider interface - Add getClient() accessor to MatrixService for streaming service - Register MatrixStreamingService in BridgeModule - Tests: 20 tests pass Refs #383 Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
236
apps/api/src/bridge/matrix/matrix-streaming.service.ts
Normal file
236
apps/api/src/bridge/matrix/matrix-streaming.service.ts
Normal file
@@ -0,0 +1,236 @@
|
||||
import { Injectable, Logger } from "@nestjs/common";
|
||||
import type { MatrixClient } from "matrix-bot-sdk";
|
||||
import { MatrixService } from "./matrix.service";
|
||||
|
||||
/**
|
||||
* Options for the streamResponse method
|
||||
*/
|
||||
export interface StreamResponseOptions {
|
||||
/** Custom initial message (defaults to "Thinking...") */
|
||||
initialMessage?: string;
|
||||
/** Thread root event ID for threaded responses */
|
||||
threadId?: string;
|
||||
/** Whether to show token usage in the final message */
|
||||
showTokenUsage?: boolean;
|
||||
/** Token usage stats to display in the final message */
|
||||
tokenUsage?: { prompt: number; completion: number; total: number };
|
||||
}
|
||||
|
||||
/**
|
||||
* Matrix message content for m.room.message events
|
||||
*/
|
||||
interface MatrixMessageContent {
|
||||
msgtype: string;
|
||||
body: string;
|
||||
"m.new_content"?: {
|
||||
msgtype: string;
|
||||
body: string;
|
||||
};
|
||||
"m.relates_to"?: {
|
||||
rel_type: string;
|
||||
event_id: string;
|
||||
is_falling_back?: boolean;
|
||||
"m.in_reply_to"?: {
|
||||
event_id: string;
|
||||
};
|
||||
};
|
||||
}
|
||||
|
||||
/** Minimum interval between message edits (milliseconds) */
|
||||
const EDIT_INTERVAL_MS = 500;
|
||||
|
||||
/** Typing indicator timeout (milliseconds) */
|
||||
const TYPING_TIMEOUT_MS = 30000;
|
||||
|
||||
/**
|
||||
* Matrix Streaming Service
|
||||
*
|
||||
* Provides streaming AI response capabilities for Matrix rooms using
|
||||
* incremental message edits. Tokens from an LLM are buffered and the
|
||||
* response message is edited at rate-limited intervals, providing a
|
||||
* smooth streaming experience without excessive API calls.
|
||||
*
|
||||
* Key features:
|
||||
* - Rate-limited edits (max every 500ms)
|
||||
* - Typing indicator management during generation
|
||||
* - Graceful error handling with user-visible error notices
|
||||
* - Thread support for contextual responses
|
||||
* - LLM-agnostic design via AsyncIterable<string> token stream
|
||||
*/
|
||||
@Injectable()
|
||||
export class MatrixStreamingService {
|
||||
private readonly logger = new Logger(MatrixStreamingService.name);
|
||||
|
||||
constructor(private readonly matrixService: MatrixService) {}
|
||||
|
||||
/**
|
||||
* Edit an existing Matrix message using the m.replace relation.
|
||||
*
|
||||
* Sends a new event that replaces the content of an existing message.
|
||||
* Includes fallback content for clients that don't support edits.
|
||||
*
|
||||
* @param roomId - The Matrix room ID
|
||||
* @param eventId - The original event ID to replace
|
||||
* @param newContent - The updated message text
|
||||
*/
|
||||
async editMessage(roomId: string, eventId: string, newContent: string): Promise<void> {
|
||||
const client = this.getClientOrThrow();
|
||||
|
||||
const editContent: MatrixMessageContent = {
|
||||
"m.new_content": {
|
||||
msgtype: "m.text",
|
||||
body: newContent,
|
||||
},
|
||||
"m.relates_to": {
|
||||
rel_type: "m.replace",
|
||||
event_id: eventId,
|
||||
},
|
||||
// Fallback for clients that don't support edits
|
||||
msgtype: "m.text",
|
||||
body: `* ${newContent}`,
|
||||
};
|
||||
|
||||
await client.sendEvent(roomId, "m.room.message", editContent);
|
||||
}
|
||||
|
||||
/**
|
||||
* Set the typing indicator for the bot in a room.
|
||||
*
|
||||
* @param roomId - The Matrix room ID
|
||||
* @param typing - Whether the bot is typing
|
||||
*/
|
||||
async setTypingIndicator(roomId: string, typing: boolean): Promise<void> {
|
||||
const client = this.getClientOrThrow();
|
||||
|
||||
await client.setTyping(roomId, typing, typing ? TYPING_TIMEOUT_MS : undefined);
|
||||
}
|
||||
|
||||
/**
|
||||
* Send an initial message for streaming, optionally in a thread.
|
||||
*
|
||||
* Returns the event ID of the sent message, which can be used for
|
||||
* subsequent edits via editMessage.
|
||||
*
|
||||
* @param roomId - The Matrix room ID
|
||||
* @param content - The initial message content
|
||||
* @param threadId - Optional thread root event ID
|
||||
* @returns The event ID of the sent message
|
||||
*/
|
||||
async sendStreamingMessage(roomId: string, content: string, threadId?: string): Promise<string> {
|
||||
const client = this.getClientOrThrow();
|
||||
|
||||
const messageContent: MatrixMessageContent = {
|
||||
msgtype: "m.text",
|
||||
body: content,
|
||||
};
|
||||
|
||||
if (threadId) {
|
||||
messageContent["m.relates_to"] = {
|
||||
rel_type: "m.thread",
|
||||
event_id: threadId,
|
||||
is_falling_back: true,
|
||||
"m.in_reply_to": {
|
||||
event_id: threadId,
|
||||
},
|
||||
};
|
||||
}
|
||||
|
||||
const eventId: string = await client.sendMessage(roomId, messageContent);
|
||||
return eventId;
|
||||
}
|
||||
|
||||
/**
|
||||
* Stream an AI response to a Matrix room using incremental message edits.
|
||||
*
|
||||
* This is the main streaming method. It:
|
||||
* 1. Sends an initial "Thinking..." message
|
||||
* 2. Starts the typing indicator
|
||||
* 3. Buffers incoming tokens from the async iterable
|
||||
* 4. Edits the message every 500ms with accumulated text
|
||||
* 5. On completion: sends a final clean edit, clears typing
|
||||
* 6. On error: edits message with error notice, clears typing
|
||||
*
|
||||
* @param roomId - The Matrix room ID
|
||||
* @param tokenStream - AsyncIterable that yields string tokens
|
||||
* @param options - Optional configuration for the stream
|
||||
*/
|
||||
async streamResponse(
|
||||
roomId: string,
|
||||
tokenStream: AsyncIterable<string>,
|
||||
options?: StreamResponseOptions
|
||||
): Promise<void> {
|
||||
// Validate connection before starting
|
||||
this.getClientOrThrow();
|
||||
|
||||
const initialMessage = options?.initialMessage ?? "Thinking...";
|
||||
const threadId = options?.threadId;
|
||||
|
||||
// Step 1: Send initial message
|
||||
const eventId = await this.sendStreamingMessage(roomId, initialMessage, threadId);
|
||||
|
||||
// Step 2: Start typing indicator
|
||||
await this.setTypingIndicator(roomId, true);
|
||||
|
||||
// Step 3: Buffer and stream tokens
|
||||
let accumulatedText = "";
|
||||
let lastEditTime = 0;
|
||||
let hasError = false;
|
||||
|
||||
try {
|
||||
for await (const token of tokenStream) {
|
||||
accumulatedText += token;
|
||||
|
||||
const now = Date.now();
|
||||
const elapsed = now - lastEditTime;
|
||||
|
||||
if (elapsed >= EDIT_INTERVAL_MS && accumulatedText.length > 0) {
|
||||
await this.editMessage(roomId, eventId, accumulatedText);
|
||||
lastEditTime = now;
|
||||
}
|
||||
}
|
||||
} catch (error: unknown) {
|
||||
hasError = true;
|
||||
const errorMessage = error instanceof Error ? error.message : "Unknown error occurred";
|
||||
|
||||
this.logger.error(`Stream error in room ${roomId}: ${errorMessage}`);
|
||||
|
||||
// Edit message to show error
|
||||
const errorContent = accumulatedText
|
||||
? `${accumulatedText}\n\n[Streaming error: ${errorMessage}]`
|
||||
: `[Streaming error: ${errorMessage}]`;
|
||||
|
||||
await this.editMessage(roomId, eventId, errorContent);
|
||||
} finally {
|
||||
// Step 4: Clear typing indicator
|
||||
await this.setTypingIndicator(roomId, false);
|
||||
}
|
||||
|
||||
// Step 5: Final edit with clean output (if no error)
|
||||
if (!hasError) {
|
||||
let finalContent = accumulatedText || "(No response generated)";
|
||||
|
||||
if (options?.showTokenUsage && options.tokenUsage) {
|
||||
const { prompt, completion, total } = options.tokenUsage;
|
||||
finalContent += `\n\n---\nTokens: ${String(total)} (prompt: ${String(prompt)}, completion: ${String(completion)})`;
|
||||
}
|
||||
|
||||
await this.editMessage(roomId, eventId, finalContent);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Get the Matrix client from the parent MatrixService, or throw if not connected.
|
||||
*/
|
||||
private getClientOrThrow(): MatrixClient {
|
||||
if (!this.matrixService.isConnected()) {
|
||||
throw new Error("Matrix client is not connected");
|
||||
}
|
||||
|
||||
const client = this.matrixService.getClient();
|
||||
if (!client) {
|
||||
throw new Error("Matrix client is not connected");
|
||||
}
|
||||
|
||||
return client;
|
||||
}
|
||||
}
|
||||
Reference in New Issue
Block a user