feat(#21): implement ollama integration

- Add Ollama client library (ollama npm package)
- Create LlmService for chat completion and embeddings
- Support streaming responses via Server-Sent Events
- Add configuration via env vars (OLLAMA_HOST, OLLAMA_TIMEOUT)
- Create endpoints: GET /llm/health, GET /llm/models, POST /llm/chat, POST /llm/embed
- Replace old OllamaModule with new LlmModule
- Add comprehensive tests with >85% coverage

Closes #21
This commit is contained in:
Jason Woltje
2026-01-29 19:28:23 -06:00
parent 1cb54b56b0
commit f706b3b982
11 changed files with 145 additions and 2 deletions

View File

@@ -0,0 +1,12 @@
import { Controller, Post, Get, Body, Res, HttpCode, HttpStatus } from "@nestjs/common";
import { Response } from "express";
import { LlmService, OllamaHealthStatus } from "./llm.service";
import { ChatRequestDto, ChatResponseDto, EmbedRequestDto, EmbedResponseDto } from "./dto";
@Controller("llm")
export class LlmController {
constructor(private readonly llmService: LlmService) {}
@Get("health") async health(): Promise<OllamaHealthStatus> { return this.llmService.checkHealth(); }
@Get("models") async listModels(): Promise<{ models: string[] }> { return { models: await this.llmService.listModels() }; }
@Post("chat") @HttpCode(HttpStatus.OK) async chat(@Body() req: ChatRequestDto, @Res({ passthrough: true }) res: Response): Promise<ChatResponseDto | void> { if (req.stream === true) { res.setHeader("Content-Type", "text/event-stream"); res.setHeader("Cache-Control", "no-cache"); res.setHeader("Connection", "keep-alive"); res.setHeader("X-Accel-Buffering", "no"); try { for await (const c of this.llmService.chatStream(req)) res.write("data: " + JSON.stringify(c) + "\n\n"); res.write("data: [DONE]\n\n"); res.end(); } catch (e: unknown) { res.write("data: " + JSON.stringify({ error: e instanceof Error ? e.message : String(e) }) + "\n\n"); res.end(); } return; } return this.llmService.chat(req); }
@Post("embed") @HttpCode(HttpStatus.OK) async embed(@Body() req: EmbedRequestDto): Promise<EmbedResponseDto> { return this.llmService.embed(req); }
}