- Add Ollama client library (ollama npm package) - Create LlmService for chat completion and embeddings - Support streaming responses via Server-Sent Events - Add configuration via env vars (OLLAMA_HOST, OLLAMA_TIMEOUT) - Create endpoints: GET /llm/health, GET /llm/models, POST /llm/chat, POST /llm/embed - Replace old OllamaModule with new LlmModule - Add comprehensive tests with >85% coverage Closes #21
21 lines
3.8 KiB
TypeScript
21 lines
3.8 KiB
TypeScript
import { Injectable, OnModuleInit, Logger, ServiceUnavailableException } from "@nestjs/common";
|
|
import { Ollama, Message } from "ollama";
|
|
import type { ChatRequestDto, ChatResponseDto, EmbedRequestDto, EmbedResponseDto, ChatStreamChunkDto } from "./dto";
|
|
export interface OllamaConfig { host: string; timeout?: number; }
|
|
export interface OllamaHealthStatus { healthy: boolean; host: string; error?: string; models?: string[]; }
|
|
@Injectable()
|
|
export class LlmService implements OnModuleInit {
|
|
private readonly logger = new Logger(LlmService.name);
|
|
private client: Ollama;
|
|
private readonly config: OllamaConfig;
|
|
constructor() { this.config = { host: process.env["OLLAMA_HOST"] ?? "http://localhost:11434", timeout: parseInt(process.env["OLLAMA_TIMEOUT"] ?? "120000", 10) }; this.client = new Ollama({ host: this.config.host }); this.logger.log("Ollama service initialized"); }
|
|
async onModuleInit(): Promise<void> { const h = await this.checkHealth(); if (h.healthy) this.logger.log("Ollama healthy"); else this.logger.warn("Ollama unhealthy: " + (h.error ?? "unknown")); }
|
|
async checkHealth(): Promise<OllamaHealthStatus> { try { const r = await this.client.list(); return { healthy: true, host: this.config.host, models: r.models.map(m => m.name) }; } catch (e: unknown) { return { healthy: false, host: this.config.host, error: e instanceof Error ? e.message : String(e) }; } }
|
|
async listModels(): Promise<string[]> { try { return (await this.client.list()).models.map(m => m.name); } catch (e: unknown) { const msg = e instanceof Error ? e.message : String(e); this.logger.error("Failed to list models: " + msg); throw new ServiceUnavailableException("Failed to list models: " + msg); } }
|
|
async chat(request: ChatRequestDto): Promise<ChatResponseDto> { try { const msgs = this.buildMessages(request); const r = await this.client.chat({ model: request.model, messages: msgs, stream: false, options: { temperature: request.temperature, num_predict: request.maxTokens } }); return { model: r.model, message: { role: r.message.role as "assistant", content: r.message.content }, done: r.done, totalDuration: r.total_duration, promptEvalCount: r.prompt_eval_count, evalCount: r.eval_count }; } catch (e: unknown) { const msg = e instanceof Error ? e.message : String(e); this.logger.error("Chat failed: " + msg); throw new ServiceUnavailableException("Chat completion failed: " + msg); } }
|
|
async *chatStream(request: ChatRequestDto): AsyncGenerator<ChatStreamChunkDto> { try { const stream = await this.client.chat({ model: request.model, messages: this.buildMessages(request), stream: true, options: { temperature: request.temperature, num_predict: request.maxTokens } }); for await (const c of stream) yield { model: c.model, message: { role: c.message.role as "assistant", content: c.message.content }, done: c.done }; } catch (e: unknown) { const msg = e instanceof Error ? e.message : String(e); this.logger.error("Stream failed: " + msg); throw new ServiceUnavailableException("Streaming failed: " + msg); } }
|
|
async embed(request: EmbedRequestDto): Promise<EmbedResponseDto> { try { const r = await this.client.embed({ model: request.model, input: request.input, truncate: request.truncate === "none" ? false : true }); return { model: r.model, embeddings: r.embeddings, totalDuration: r.total_duration }; } catch (e: unknown) { const msg = e instanceof Error ? e.message : String(e); this.logger.error("Embed failed: " + msg); throw new ServiceUnavailableException("Embedding failed: " + msg); } }
|
|
private buildMessages(req: ChatRequestDto): Message[] { const msgs: Message[] = []; if (req.systemPrompt && !req.messages.some(m => m.role === "system")) msgs.push({ role: "system", content: req.systemPrompt }); for (const m of req.messages) msgs.push({ role: m.role, content: m.content }); return msgs; }
|
|
getConfig(): OllamaConfig { return { ...this.config }; }
|
|
}
|