stack/apps/web/src/lib/api/chat.ts

/**
 * Chat API client
 * Handles LLM chat interactions via /api/chat/stream (streaming) and /api/llm/chat (fallback)
 */

import { apiPost, fetchCsrfToken, getCsrfToken } from "./client";
import { API_BASE_URL } from "../config";

export interface ChatMessage {
  role: "system" | "user" | "assistant";
  content: string;
}

export interface ChatRequest {
  model: string;
  messages: ChatMessage[];
  stream?: boolean;
  temperature?: number;
  maxTokens?: number;
  systemPrompt?: string;
}

export interface ChatResponse {
  model: string;
  message: {
    role: "assistant";
    content: string;
  };
  done: boolean;
  totalDuration?: number;
  promptEvalCount?: number;
  evalCount?: number;
}

/**
 * Parsed SSE data chunk from OpenAI-compatible stream
 */
interface OpenAiSseChunk {
  id?: string;
  object?: string;
  created?: number;
  model?: string;
  choices?: {
    index: number;
    delta?: {
      role?: string;
      content?: string;
    };
    finish_reason?: string | null;
  }[];
  error?: string;
}

/**
 * Parsed SSE data chunk from legacy /api/llm/chat stream
 */
interface LegacySseChunk {
  error?: string;
  message?: {
    role: string;
    content: string;
  };
  model?: string;
  done?: boolean;
}

/**
 * Parsed SSE data chunk with simple token format
 */
interface SimpleTokenChunk {
  token?: string;
  done?: boolean;
  error?: string;
}

/**
 * Send a chat message to the LLM (non-streaming fallback)
 * Uses /api/llm/chat endpoint which supports both streaming and non-streaming
 */
export async function sendChatMessage(request: ChatRequest): Promise<ChatResponse> {
  return apiPost<ChatResponse>("/api/llm/chat", request);
}

/**
 * Get or refresh the CSRF token for streaming requests.
 */
async function ensureCsrfTokenForStream(): Promise<string> {
  const existing = getCsrfToken();
  if (existing) {
    return existing;
  }
  return fetchCsrfToken();
}

/**
 * Stream a chat message from the LLM using SSE over fetch.
 *
 * Uses /api/chat/stream endpoint which proxies to OpenClaw.
 * The backend responds with Server-Sent Events in one of these formats:
 *
 * OpenAI-compatible format:
 *   data: {"choices":[{"delta":{"content":"token"}}],...}\n\n
 *   data: [DONE]\n\n
 *
 * Legacy format (from /api/llm/chat):
 *   data: {"message":{"content":"token"},...}\n\n
 *   data: [DONE]\n\n
 *
 * Simple token format:
 *   data: {"token":"..."}\n\n
 *   data: {"done":true}\n\n
 *
 * @param request - Chat request (stream field will be forced to true)
 * @param onChunk - Called with each token string as it arrives
 * @param onComplete - Called when the stream finishes successfully
 * @param onError - Called if the stream encounters an error
 * @param signal - Optional AbortSignal for cancellation
 */
export function streamChatMessage(
  request: ChatRequest,
  onChunk: (chunk: string) => void,
  onComplete: () => void,
  onError: (error: Error) => void,
  signal?: AbortSignal
): void {
  void (async (): Promise<void> => {
    try {
      const csrfToken = await ensureCsrfTokenForStream();

      const response = await fetch(`${API_BASE_URL}/api/chat/stream`, {
        method: "POST",
        headers: {
          "Content-Type": "application/json",
          "X-CSRF-Token": csrfToken,
        },
        credentials: "include",
        body: JSON.stringify({ messages: request.messages, stream: true }),
        signal: signal ?? null,
      });

      if (!response.ok) {
        const errorText = await response.text().catch(() => response.statusText);
        throw new Error(`Stream request failed: ${errorText}`);
      }

      if (!response.body) {
        throw new Error("Response body is not readable");
      }

      const reader = response.body.getReader();
      const decoder = new TextDecoder("utf-8");
      let buffer = "";

      let readerDone = false;
      while (!readerDone) {
        const { done, value } = await reader.read();
        readerDone = done;
        if (done) {
          break;
        }

        buffer += decoder.decode(value, { stream: true });

        // SSE messages are separated by double newlines
        const parts = buffer.split("\n\n");
        // Keep the last (potentially incomplete) part
        buffer = parts.pop() ?? "";

        for (const part of parts) {
          const trimmed = part.trim();
          if (!trimmed) continue;

          // Handle event: error format
          const eventMatch = /^event:\s*(\S+)\n/i.exec(trimmed);
          const dataMatch = /^data:\s*(.+)$/im.exec(trimmed);

          if (eventMatch?.[1] === "error" && dataMatch?.[1]) {
            try {
              const errorData = JSON.parse(dataMatch[1].trim()) as {
                error?: string;
              };
              throw new Error(errorData.error ?? "Stream error occurred");
            } catch (parseErr) {
              if (parseErr instanceof SyntaxError) {
                throw new Error("Stream error occurred");
              }
              throw parseErr;
            }
          }

          // Standard SSE format: data: {...}
          for (const line of trimmed.split("\n")) {
            if (!line.startsWith("data: ")) continue;

            const data = line.slice("data: ".length).trim();

            if (data === "[DONE]") {
              onComplete();
              return;
            }

            try {
              const parsed: unknown = JSON.parse(data);

              // Handle OpenAI format (from /api/chat/stream via OpenClaw)
              const openAiChunk = parsed as OpenAiSseChunk;
              if (openAiChunk.choices?.[0]?.delta?.content) {
                onChunk(openAiChunk.choices[0].delta.content);
                continue;
              }

              // Handle legacy format (from /api/llm/chat)
              const legacyChunk = parsed as LegacySseChunk;
              if (legacyChunk.message?.content) {
                onChunk(legacyChunk.message.content);
                continue;
              }

              // Handle simple token format
              const simpleChunk = parsed as SimpleTokenChunk;
              if (simpleChunk.token) {
                onChunk(simpleChunk.token);
                continue;
              }

              // Handle done flag in simple format
              if (simpleChunk.done === true) {
                onComplete();
                return;
              }

              // Handle error in any format
              const error = openAiChunk.error ?? legacyChunk.error ?? simpleChunk.error;
              if (error) {
                throw new Error(error);
              }
            } catch (parseErr) {
              if (parseErr instanceof SyntaxError) {
                continue;
              }
              throw parseErr;
            }
          }
        }
      }

      // Natural end of stream without [DONE] or done flag
      onComplete();
    } catch (err: unknown) {
      if (err instanceof DOMException && err.name === "AbortError") {
        return;
      }
      onError(err instanceof Error ? err : new Error(String(err)));
    }
  })();
}