Merge branch 'develop' into feature/know-link-resolution

This commit is contained in:
2026-01-30 01:42:44 +00:00
18 changed files with 563 additions and 46 deletions

View File

@@ -15,7 +15,7 @@ import { LayoutsModule } from "./layouts/layouts.module";
import { KnowledgeModule } from "./knowledge/knowledge.module";
import { UsersModule } from "./users/users.module";
import { WebSocketModule } from "./websocket/websocket.module";
import { OllamaModule } from "./ollama/ollama.module";
import { LlmModule } from "./llm/llm.module";
@Module({
imports: [
@@ -33,7 +33,7 @@ import { OllamaModule } from "./ollama/ollama.module";
KnowledgeModule,
UsersModule,
WebSocketModule,
OllamaModule,
LlmModule,
],
controllers: [AppController],
providers: [AppService],

View File

@@ -0,0 +1,7 @@
import { IsArray, IsString, IsOptional, IsBoolean, IsNumber, ValidateNested, IsIn } from "class-validator";
import { Type } from "class-transformer";
export type ChatRole = "system" | "user" | "assistant";
export class ChatMessageDto { @IsString() @IsIn(["system", "user", "assistant"]) role!: ChatRole; @IsString() content!: string; }
export class ChatRequestDto { @IsString() model!: string; @IsArray() @ValidateNested({ each: true }) @Type(() => ChatMessageDto) messages!: ChatMessageDto[]; @IsOptional() @IsBoolean() stream?: boolean; @IsOptional() @IsNumber() temperature?: number; @IsOptional() @IsNumber() maxTokens?: number; @IsOptional() @IsString() systemPrompt?: string; }
export interface ChatResponseDto { model: string; message: { role: ChatRole; content: string }; done: boolean; totalDuration?: number; promptEvalCount?: number; evalCount?: number; }
export interface ChatStreamChunkDto { model: string; message: { role: ChatRole; content: string }; done: boolean; }

View File

@@ -0,0 +1,3 @@
import { IsArray, IsString, IsOptional } from "class-validator";
export class EmbedRequestDto { @IsString() model!: string; @IsArray() @IsString({ each: true }) input!: string[]; @IsOptional() @IsString() truncate?: "start" | "end" | "none"; }
export interface EmbedResponseDto { model: string; embeddings: number[][]; totalDuration?: number; }

View File

@@ -0,0 +1,2 @@
export * from "./chat.dto";
export * from "./embed.dto";

View File

@@ -0,0 +1,4 @@
export * from "./llm.module";
export * from "./llm.service";
export * from "./llm.controller";
export * from "./dto";

View File

@@ -0,0 +1,15 @@
import { describe, it, expect, beforeEach, vi } from "vitest";
import { Test, TestingModule } from "@nestjs/testing";
import { LlmController } from "./llm.controller";
import { LlmService } from "./llm.service";
import type { ChatRequestDto, EmbedRequestDto } from "./dto";
describe("LlmController", () => {
let controller: LlmController;
const mockService = { checkHealth: vi.fn(), listModels: vi.fn(), chat: vi.fn(), chatStream: vi.fn(), embed: vi.fn() };
beforeEach(async () => { vi.clearAllMocks(); controller = (await Test.createTestingModule({ controllers: [LlmController], providers: [{ provide: LlmService, useValue: mockService }] }).compile()).get(LlmController); });
it("should be defined", () => { expect(controller).toBeDefined(); });
describe("health", () => { it("should return status", async () => { const s = { healthy: true, host: "h" }; mockService.checkHealth.mockResolvedValue(s); expect(await controller.health()).toEqual(s); }); });
describe("listModels", () => { it("should return models", async () => { mockService.listModels.mockResolvedValue(["m1"]); expect(await controller.listModels()).toEqual({ models: ["m1"] }); }); });
describe("chat", () => { const req: ChatRequestDto = { model: "m", messages: [{ role: "user", content: "x" }] }; const res = { setHeader: vi.fn(), write: vi.fn(), end: vi.fn() }; it("should return response", async () => { const r = { model: "m", message: { role: "assistant", content: "y" }, done: true }; mockService.chat.mockResolvedValue(r); expect(await controller.chat(req, res as any)).toEqual(r); }); it("should stream", async () => { mockService.chatStream.mockReturnValue((async function* () { yield { model: "m", message: { role: "a", content: "x" }, done: true }; })()); await controller.chat({ ...req, stream: true }, res as any); expect(res.setHeader).toHaveBeenCalled(); expect(res.end).toHaveBeenCalled(); }); });
describe("embed", () => { it("should return embeddings", async () => { const r = { model: "m", embeddings: [[0.1]] }; mockService.embed.mockResolvedValue(r); expect(await controller.embed({ model: "m", input: ["x"] })).toEqual(r); }); });
});

View File

@@ -0,0 +1,12 @@
import { Controller, Post, Get, Body, Res, HttpCode, HttpStatus } from "@nestjs/common";
import { Response } from "express";
import { LlmService, OllamaHealthStatus } from "./llm.service";
import { ChatRequestDto, ChatResponseDto, EmbedRequestDto, EmbedResponseDto } from "./dto";
@Controller("llm")
export class LlmController {
constructor(private readonly llmService: LlmService) {}
@Get("health") async health(): Promise<OllamaHealthStatus> { return this.llmService.checkHealth(); }
@Get("models") async listModels(): Promise<{ models: string[] }> { return { models: await this.llmService.listModels() }; }
@Post("chat") @HttpCode(HttpStatus.OK) async chat(@Body() req: ChatRequestDto, @Res({ passthrough: true }) res: Response): Promise<ChatResponseDto | void> { if (req.stream === true) { res.setHeader("Content-Type", "text/event-stream"); res.setHeader("Cache-Control", "no-cache"); res.setHeader("Connection", "keep-alive"); res.setHeader("X-Accel-Buffering", "no"); try { for await (const c of this.llmService.chatStream(req)) res.write("data: " + JSON.stringify(c) + "\n\n"); res.write("data: [DONE]\n\n"); res.end(); } catch (e: unknown) { res.write("data: " + JSON.stringify({ error: e instanceof Error ? e.message : String(e) }) + "\n\n"); res.end(); } return; } return this.llmService.chat(req); }
@Post("embed") @HttpCode(HttpStatus.OK) async embed(@Body() req: EmbedRequestDto): Promise<EmbedResponseDto> { return this.llmService.embed(req); }
}

View File

@@ -0,0 +1,5 @@
import { Module } from "@nestjs/common";
import { LlmController } from "./llm.controller";
import { LlmService } from "./llm.service";
@Module({ controllers: [LlmController], providers: [LlmService], exports: [LlmService] })
export class LlmModule {}

View File

@@ -0,0 +1,19 @@
import { describe, it, expect, beforeEach, vi, afterEach } from "vitest";
import { Test, TestingModule } from "@nestjs/testing";
import { ServiceUnavailableException } from "@nestjs/common";
import { LlmService } from "./llm.service";
import type { ChatRequestDto, EmbedRequestDto } from "./dto";
const mockList = vi.fn(); const mockChat = vi.fn(); const mockEmbed = vi.fn();
vi.mock("ollama", () => ({ Ollama: class { list = mockList; chat = mockChat; embed = mockEmbed; } }));
describe("LlmService", () => {
let service: LlmService;
const originalEnv = { ...process.env };
beforeEach(async () => { process.env = { ...originalEnv, OLLAMA_HOST: "http://test:11434", OLLAMA_TIMEOUT: "60000" }; vi.clearAllMocks(); service = (await Test.createTestingModule({ providers: [LlmService] }).compile()).get(LlmService); });
afterEach(() => { process.env = originalEnv; });
it("should be defined", () => { expect(service).toBeDefined(); });
describe("checkHealth", () => { it("should return healthy", async () => { mockList.mockResolvedValue({ models: [{ name: "llama3.2" }] }); const r = await service.checkHealth(); expect(r.healthy).toBe(true); }); it("should return unhealthy on error", async () => { mockList.mockRejectedValue(new Error("fail")); const r = await service.checkHealth(); expect(r.healthy).toBe(false); }); });
describe("listModels", () => { it("should return models", async () => { mockList.mockResolvedValue({ models: [{ name: "llama3.2" }] }); expect(await service.listModels()).toEqual(["llama3.2"]); }); it("should throw on error", async () => { mockList.mockRejectedValue(new Error("fail")); await expect(service.listModels()).rejects.toThrow(ServiceUnavailableException); }); });
describe("chat", () => { const req: ChatRequestDto = { model: "llama3.2", messages: [{ role: "user", content: "Hi" }] }; it("should return response", async () => { mockChat.mockResolvedValue({ model: "llama3.2", message: { role: "assistant", content: "Hello" }, done: true }); const r = await service.chat(req); expect(r.message.content).toBe("Hello"); }); it("should throw on error", async () => { mockChat.mockRejectedValue(new Error("fail")); await expect(service.chat(req)).rejects.toThrow(ServiceUnavailableException); }); });
describe("chatStream", () => { it("should yield chunks", async () => { mockChat.mockResolvedValue((async function* () { yield { model: "m", message: { role: "a", content: "x" }, done: true }; })()); const chunks = []; for await (const c of service.chatStream({ model: "m", messages: [{ role: "user", content: "x" }], stream: true })) chunks.push(c); expect(chunks.length).toBe(1); }); });
describe("embed", () => { it("should return embeddings", async () => { mockEmbed.mockResolvedValue({ model: "m", embeddings: [[0.1]] }); const r = await service.embed({ model: "m", input: ["x"] }); expect(r.embeddings).toEqual([[0.1]]); }); });
});

View File

@@ -0,0 +1,20 @@
import { Injectable, OnModuleInit, Logger, ServiceUnavailableException } from "@nestjs/common";
import { Ollama, Message } from "ollama";
import type { ChatRequestDto, ChatResponseDto, EmbedRequestDto, EmbedResponseDto, ChatStreamChunkDto } from "./dto";
export interface OllamaConfig { host: string; timeout?: number; }
export interface OllamaHealthStatus { healthy: boolean; host: string; error?: string; models?: string[]; }
@Injectable()
export class LlmService implements OnModuleInit {
private readonly logger = new Logger(LlmService.name);
private client: Ollama;
private readonly config: OllamaConfig;
constructor() { this.config = { host: process.env["OLLAMA_HOST"] ?? "http://localhost:11434", timeout: parseInt(process.env["OLLAMA_TIMEOUT"] ?? "120000", 10) }; this.client = new Ollama({ host: this.config.host }); this.logger.log("Ollama service initialized"); }
async onModuleInit(): Promise<void> { const h = await this.checkHealth(); if (h.healthy) this.logger.log("Ollama healthy"); else this.logger.warn("Ollama unhealthy: " + (h.error ?? "unknown")); }
async checkHealth(): Promise<OllamaHealthStatus> { try { const r = await this.client.list(); return { healthy: true, host: this.config.host, models: r.models.map(m => m.name) }; } catch (e: unknown) { return { healthy: false, host: this.config.host, error: e instanceof Error ? e.message : String(e) }; } }
async listModels(): Promise<string[]> { try { return (await this.client.list()).models.map(m => m.name); } catch (e: unknown) { const msg = e instanceof Error ? e.message : String(e); this.logger.error("Failed to list models: " + msg); throw new ServiceUnavailableException("Failed to list models: " + msg); } }
async chat(request: ChatRequestDto): Promise<ChatResponseDto> { try { const msgs = this.buildMessages(request); const r = await this.client.chat({ model: request.model, messages: msgs, stream: false, options: { temperature: request.temperature, num_predict: request.maxTokens } }); return { model: r.model, message: { role: r.message.role as "assistant", content: r.message.content }, done: r.done, totalDuration: r.total_duration, promptEvalCount: r.prompt_eval_count, evalCount: r.eval_count }; } catch (e: unknown) { const msg = e instanceof Error ? e.message : String(e); this.logger.error("Chat failed: " + msg); throw new ServiceUnavailableException("Chat completion failed: " + msg); } }
async *chatStream(request: ChatRequestDto): AsyncGenerator<ChatStreamChunkDto> { try { const stream = await this.client.chat({ model: request.model, messages: this.buildMessages(request), stream: true, options: { temperature: request.temperature, num_predict: request.maxTokens } }); for await (const c of stream) yield { model: c.model, message: { role: c.message.role as "assistant", content: c.message.content }, done: c.done }; } catch (e: unknown) { const msg = e instanceof Error ? e.message : String(e); this.logger.error("Stream failed: " + msg); throw new ServiceUnavailableException("Streaming failed: " + msg); } }
async embed(request: EmbedRequestDto): Promise<EmbedResponseDto> { try { const r = await this.client.embed({ model: request.model, input: request.input, truncate: request.truncate === "none" ? false : true }); return { model: r.model, embeddings: r.embeddings, totalDuration: r.total_duration }; } catch (e: unknown) { const msg = e instanceof Error ? e.message : String(e); this.logger.error("Embed failed: " + msg); throw new ServiceUnavailableException("Embedding failed: " + msg); } }
private buildMessages(req: ChatRequestDto): Message[] { const msgs: Message[] = []; if (req.systemPrompt && !req.messages.some(m => m.role === "system")) msgs.push({ role: "system", content: req.systemPrompt }); for (const m of req.messages) msgs.push({ role: m.role, content: m.content }); return msgs; }
getConfig(): OllamaConfig { return { ...this.config }; }
}