feat(web): implement SSE chat streaming with real-time token rendering (#516)
Some checks failed
ci/woodpecker/push/web Pipeline failed
Some checks failed
ci/woodpecker/push/web Pipeline failed
Co-authored-by: Jason Woltje <jason@diversecanvas.com> Co-committed-by: Jason Woltje <jason@diversecanvas.com>
This commit was merged in pull request #516.
This commit is contained in:
@@ -3,7 +3,8 @@
|
||||
* Handles LLM chat interactions via /api/llm/chat
|
||||
*/
|
||||
|
||||
import { apiPost } from "./client";
|
||||
import { apiPost, fetchCsrfToken, getCsrfToken } from "./client";
|
||||
import { API_BASE_URL } from "../config";
|
||||
|
||||
export interface ChatMessage {
|
||||
role: "system" | "user" | "assistant";
|
||||
@@ -31,6 +32,19 @@ export interface ChatResponse {
|
||||
evalCount?: number;
|
||||
}
|
||||
|
||||
/**
|
||||
* Parsed SSE data chunk from the LLM stream
|
||||
*/
|
||||
interface SseChunk {
|
||||
error?: string;
|
||||
message?: {
|
||||
role: string;
|
||||
content: string;
|
||||
};
|
||||
model?: string;
|
||||
done?: boolean;
|
||||
}
|
||||
|
||||
/**
|
||||
* Send a chat message to the LLM
|
||||
*/
|
||||
@@ -39,19 +53,122 @@ export async function sendChatMessage(request: ChatRequest): Promise<ChatRespons
|
||||
}
|
||||
|
||||
/**
|
||||
* Stream a chat message from the LLM (not implemented yet)
|
||||
* TODO: Implement streaming support
|
||||
* Get or refresh the CSRF token for streaming requests.
|
||||
*/
|
||||
async function ensureCsrfTokenForStream(): Promise<string> {
|
||||
const existing = getCsrfToken();
|
||||
if (existing) {
|
||||
return existing;
|
||||
}
|
||||
return fetchCsrfToken();
|
||||
}
|
||||
|
||||
/**
|
||||
* Stream a chat message from the LLM using SSE over fetch.
|
||||
*
|
||||
* The backend accepts stream: true in the request body and responds with
|
||||
* Server-Sent Events:
|
||||
* data: {"message":{"content":"token"},...}\n\n for each token
|
||||
* data: [DONE]\n\n when the stream is complete
|
||||
* data: {"error":"message"}\n\n on error
|
||||
*
|
||||
* @param request - Chat request (stream field will be forced to true)
|
||||
* @param onChunk - Called with each token string as it arrives
|
||||
* @param onComplete - Called when the stream finishes successfully
|
||||
* @param onError - Called if the stream encounters an error
|
||||
* @param signal - Optional AbortSignal for cancellation
|
||||
*/
|
||||
export function streamChatMessage(
|
||||
request: ChatRequest,
|
||||
onChunk: (chunk: string) => void,
|
||||
onComplete: () => void,
|
||||
onError: (error: Error) => void
|
||||
onError: (error: Error) => void,
|
||||
signal?: AbortSignal
|
||||
): void {
|
||||
// Streaming implementation would go here
|
||||
void request;
|
||||
void onChunk;
|
||||
void onComplete;
|
||||
void onError;
|
||||
throw new Error("Streaming not implemented yet");
|
||||
void (async (): Promise<void> => {
|
||||
try {
|
||||
const csrfToken = await ensureCsrfTokenForStream();
|
||||
|
||||
const response = await fetch(`${API_BASE_URL}/api/llm/chat`, {
|
||||
method: "POST",
|
||||
headers: {
|
||||
"Content-Type": "application/json",
|
||||
"X-CSRF-Token": csrfToken,
|
||||
},
|
||||
credentials: "include",
|
||||
body: JSON.stringify({ ...request, stream: true }),
|
||||
signal: signal ?? null,
|
||||
});
|
||||
|
||||
if (!response.ok) {
|
||||
const errorText = await response.text().catch(() => response.statusText);
|
||||
throw new Error(`Stream request failed: ${errorText}`);
|
||||
}
|
||||
|
||||
if (!response.body) {
|
||||
throw new Error("Response body is not readable");
|
||||
}
|
||||
|
||||
const reader = response.body.getReader();
|
||||
const decoder = new TextDecoder("utf-8");
|
||||
let buffer = "";
|
||||
|
||||
let readerDone = false;
|
||||
while (!readerDone) {
|
||||
const { done, value } = await reader.read();
|
||||
readerDone = done;
|
||||
if (done) {
|
||||
break;
|
||||
}
|
||||
|
||||
buffer += decoder.decode(value, { stream: true });
|
||||
|
||||
// SSE messages are separated by double newlines
|
||||
const parts = buffer.split("\n\n");
|
||||
// Keep the last (potentially incomplete) part
|
||||
buffer = parts.pop() ?? "";
|
||||
|
||||
for (const part of parts) {
|
||||
const trimmed = part.trim();
|
||||
if (!trimmed) continue;
|
||||
|
||||
for (const line of trimmed.split("\n")) {
|
||||
if (!line.startsWith("data: ")) continue;
|
||||
|
||||
const data = line.slice("data: ".length).trim();
|
||||
|
||||
if (data === "[DONE]") {
|
||||
onComplete();
|
||||
return;
|
||||
}
|
||||
|
||||
try {
|
||||
const parsed = JSON.parse(data) as SseChunk;
|
||||
|
||||
if (parsed.error) {
|
||||
throw new Error(parsed.error);
|
||||
}
|
||||
|
||||
if (parsed.message?.content) {
|
||||
onChunk(parsed.message.content);
|
||||
}
|
||||
} catch (parseErr) {
|
||||
if (parseErr instanceof SyntaxError) {
|
||||
continue;
|
||||
}
|
||||
throw parseErr;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Natural end of stream without [DONE]
|
||||
onComplete();
|
||||
} catch (err: unknown) {
|
||||
if (err instanceof DOMException && err.name === "AbortError") {
|
||||
return;
|
||||
}
|
||||
onError(err instanceof Error ? err : new Error(String(err)));
|
||||
}
|
||||
})();
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user