feat(web): implement SSE chat streaming with real-time token rendering (#516)

Co-authored-by: Jason Woltje <jason@diversecanvas.com> Co-committed-by: Jason Woltje <jason@diversecanvas.com>
2026-02-26 02:39:43 +00:00
parent 6290fc3d53
commit 7de0e734b0
8 changed files with 797 additions and 297 deletions
--- a/apps/web/src/lib/api/chat.ts
+++ b/apps/web/src/lib/api/chat.ts
@@ -3,7 +3,8 @@
 * Handles LLM chat interactions via /api/llm/chat
 */

-import { apiPost } from "./client";
+import { apiPost, fetchCsrfToken, getCsrfToken } from "./client";
+import { API_BASE_URL } from "../config";

 export interface ChatMessage {
  role: "system" | "user" | "assistant";
@@ -31,6 +32,19 @@ export interface ChatResponse {
  evalCount?: number;
 }

+/**
+ * Parsed SSE data chunk from the LLM stream
+ */
+interface SseChunk {
+  error?: string;
+  message?: {
+    role: string;
+    content: string;
+  };
+  model?: string;
+  done?: boolean;
+}
+
 /**
 * Send a chat message to the LLM
 */
@@ -39,19 +53,122 @@ export async function sendChatMessage(request: ChatRequest): Promise<ChatRespons
 }

 /**
- * Stream a chat message from the LLM (not implemented yet)
- * TODO: Implement streaming support
+ * Get or refresh the CSRF token for streaming requests.
+ */
+async function ensureCsrfTokenForStream(): Promise<string> {
+  const existing = getCsrfToken();
+  if (existing) {
+    return existing;
+  }
+  return fetchCsrfToken();
+}
+
+/**
+ * Stream a chat message from the LLM using SSE over fetch.
+ *
+ * The backend accepts stream: true in the request body and responds with
+ * Server-Sent Events:
+ *   data: {"message":{"content":"token"},...}\n\n  for each token
+ *   data: [DONE]\n\n  when the stream is complete
+ *   data: {"error":"message"}\n\n  on error
+ *
+ * @param request - Chat request (stream field will be forced to true)
+ * @param onChunk - Called with each token string as it arrives
+ * @param onComplete - Called when the stream finishes successfully
+ * @param onError - Called if the stream encounters an error
+ * @param signal - Optional AbortSignal for cancellation
 */
 export function streamChatMessage(
  request: ChatRequest,
  onChunk: (chunk: string) => void,
  onComplete: () => void,
-  onError: (error: Error) => void
+  onError: (error: Error) => void,
+  signal?: AbortSignal
 ): void {
-  // Streaming implementation would go here
-  void request;
-  void onChunk;
-  void onComplete;
-  void onError;
-  throw new Error("Streaming not implemented yet");
+  void (async (): Promise<void> => {
+    try {
+      const csrfToken = await ensureCsrfTokenForStream();
+
+      const response = await fetch(`${API_BASE_URL}/api/llm/chat`, {
+        method: "POST",
+        headers: {
+          "Content-Type": "application/json",
+          "X-CSRF-Token": csrfToken,
+        },
+        credentials: "include",
+        body: JSON.stringify({ ...request, stream: true }),
+        signal: signal ?? null,
+      });
+
+      if (!response.ok) {
+        const errorText = await response.text().catch(() => response.statusText);
+        throw new Error(`Stream request failed: ${errorText}`);
+      }
+
+      if (!response.body) {
+        throw new Error("Response body is not readable");
+      }
+
+      const reader = response.body.getReader();
+      const decoder = new TextDecoder("utf-8");
+      let buffer = "";
+
+      let readerDone = false;
+      while (!readerDone) {
+        const { done, value } = await reader.read();
+        readerDone = done;
+        if (done) {
+          break;
+        }
+
+        buffer += decoder.decode(value, { stream: true });
+
+        // SSE messages are separated by double newlines
+        const parts = buffer.split("\n\n");
+        // Keep the last (potentially incomplete) part
+        buffer = parts.pop() ?? "";
+
+        for (const part of parts) {
+          const trimmed = part.trim();
+          if (!trimmed) continue;
+
+          for (const line of trimmed.split("\n")) {
+            if (!line.startsWith("data: ")) continue;
+
+            const data = line.slice("data: ".length).trim();
+
+            if (data === "[DONE]") {
+              onComplete();
+              return;
+            }
+
+            try {
+              const parsed = JSON.parse(data) as SseChunk;
+
+              if (parsed.error) {
+                throw new Error(parsed.error);
+              }
+
+              if (parsed.message?.content) {
+                onChunk(parsed.message.content);
+              }
+            } catch (parseErr) {
+              if (parseErr instanceof SyntaxError) {
+                continue;
+              }
+              throw parseErr;
+            }
+          }
+        }
+      }
+
+      // Natural end of stream without [DONE]
+      onComplete();
+    } catch (err: unknown) {
+      if (err instanceof DOMException && err.name === "AbortError") {
+        return;
+      }
+      onError(err instanceof Error ? err : new Error(String(err)));
+    }
+  })();
 }