Merge branch 'develop' into feature/know-link-resolution

2026-01-30 01:42:44 +00:00
parent 24768bd664 a75265e535
commit cb0a16effa
18 changed files with 563 additions and 46 deletions
--- a/apps/api/src/app.module.ts
+++ b/apps/api/src/app.module.ts
@@ -15,7 +15,7 @@ import { LayoutsModule } from "./layouts/layouts.module";
 import { KnowledgeModule } from "./knowledge/knowledge.module";
 import { UsersModule } from "./users/users.module";
 import { WebSocketModule } from "./websocket/websocket.module";
-import { OllamaModule } from "./ollama/ollama.module";
+import { LlmModule } from "./llm/llm.module";

@Module({
  imports: [
@@ -33,7 +33,7 @@ import { OllamaModule } from "./ollama/ollama.module";
    KnowledgeModule,
    UsersModule,
    WebSocketModule,
-    OllamaModule,
+    LlmModule,
  ],
  controllers: [AppController],
  providers: [AppService],
--- a/apps/api/src/llm/dto/chat.dto.ts
+++ b/apps/api/src/llm/dto/chat.dto.ts
@@ -0,0 +1,7 @@
+import { IsArray, IsString, IsOptional, IsBoolean, IsNumber, ValidateNested, IsIn } from "class-validator";
+import { Type } from "class-transformer";
+export type ChatRole = "system" | "user" | "assistant";
+export class ChatMessageDto { @IsString() @IsIn(["system", "user", "assistant"]) role!: ChatRole; @IsString() content!: string; }
+export class ChatRequestDto { @IsString() model!: string; @IsArray() @ValidateNested({ each: true }) @Type(() => ChatMessageDto) messages!: ChatMessageDto[]; @IsOptional() @IsBoolean() stream?: boolean; @IsOptional() @IsNumber() temperature?: number; @IsOptional() @IsNumber() maxTokens?: number; @IsOptional() @IsString() systemPrompt?: string; }
+export interface ChatResponseDto { model: string; message: { role: ChatRole; content: string }; done: boolean; totalDuration?: number; promptEvalCount?: number; evalCount?: number; }
+export interface ChatStreamChunkDto { model: string; message: { role: ChatRole; content: string }; done: boolean; }
--- a/apps/api/src/llm/dto/embed.dto.ts
+++ b/apps/api/src/llm/dto/embed.dto.ts
@@ -0,0 +1,3 @@
+import { IsArray, IsString, IsOptional } from "class-validator";
+export class EmbedRequestDto { @IsString() model!: string; @IsArray() @IsString({ each: true }) input!: string[]; @IsOptional() @IsString() truncate?: "start" | "end" | "none"; }
+export interface EmbedResponseDto { model: string; embeddings: number[][]; totalDuration?: number; }
--- a/apps/api/src/llm/dto/index.ts
+++ b/apps/api/src/llm/dto/index.ts
@@ -0,0 +1,2 @@
+export * from "./chat.dto";
+export * from "./embed.dto";
--- a/apps/api/src/llm/index.ts
+++ b/apps/api/src/llm/index.ts
@@ -0,0 +1,4 @@
+export * from "./llm.module";
+export * from "./llm.service";
+export * from "./llm.controller";
+export * from "./dto";
--- a/apps/api/src/llm/llm.controller.spec.ts
+++ b/apps/api/src/llm/llm.controller.spec.ts
@@ -0,0 +1,15 @@
+import { describe, it, expect, beforeEach, vi } from "vitest";
+import { Test, TestingModule } from "@nestjs/testing";
+import { LlmController } from "./llm.controller";
+import { LlmService } from "./llm.service";
+import type { ChatRequestDto, EmbedRequestDto } from "./dto";
+describe("LlmController", () => {
+  let controller: LlmController;
+  const mockService = { checkHealth: vi.fn(), listModels: vi.fn(), chat: vi.fn(), chatStream: vi.fn(), embed: vi.fn() };
+  beforeEach(async () => { vi.clearAllMocks(); controller = (await Test.createTestingModule({ controllers: [LlmController], providers: [{ provide: LlmService, useValue: mockService }] }).compile()).get(LlmController); });
+  it("should be defined", () => { expect(controller).toBeDefined(); });
+  describe("health", () => { it("should return status", async () => { const s = { healthy: true, host: "h" }; mockService.checkHealth.mockResolvedValue(s); expect(await controller.health()).toEqual(s); }); });
+  describe("listModels", () => { it("should return models", async () => { mockService.listModels.mockResolvedValue(["m1"]); expect(await controller.listModels()).toEqual({ models: ["m1"] }); }); });
+  describe("chat", () => { const req: ChatRequestDto = { model: "m", messages: [{ role: "user", content: "x" }] }; const res = { setHeader: vi.fn(), write: vi.fn(), end: vi.fn() }; it("should return response", async () => { const r = { model: "m", message: { role: "assistant", content: "y" }, done: true }; mockService.chat.mockResolvedValue(r); expect(await controller.chat(req, res as any)).toEqual(r); }); it("should stream", async () => { mockService.chatStream.mockReturnValue((async function* () { yield { model: "m", message: { role: "a", content: "x" }, done: true }; })()); await controller.chat({ ...req, stream: true }, res as any); expect(res.setHeader).toHaveBeenCalled(); expect(res.end).toHaveBeenCalled(); }); });
+  describe("embed", () => { it("should return embeddings", async () => { const r = { model: "m", embeddings: [[0.1]] }; mockService.embed.mockResolvedValue(r); expect(await controller.embed({ model: "m", input: ["x"] })).toEqual(r); }); });
+});
--- a/apps/api/src/llm/llm.controller.ts
+++ b/apps/api/src/llm/llm.controller.ts
@@ -0,0 +1,12 @@
+import { Controller, Post, Get, Body, Res, HttpCode, HttpStatus } from "@nestjs/common";
+import { Response } from "express";
+import { LlmService, OllamaHealthStatus } from "./llm.service";
+import { ChatRequestDto, ChatResponseDto, EmbedRequestDto, EmbedResponseDto } from "./dto";
+@Controller("llm")
+export class LlmController {
+  constructor(private readonly llmService: LlmService) {}
+  @Get("health") async health(): Promise<OllamaHealthStatus> { return this.llmService.checkHealth(); }
+  @Get("models") async listModels(): Promise<{ models: string[] }> { return { models: await this.llmService.listModels() }; }
+  @Post("chat") @HttpCode(HttpStatus.OK) async chat(@Body() req: ChatRequestDto, @Res({ passthrough: true }) res: Response): Promise<ChatResponseDto | void> { if (req.stream === true) { res.setHeader("Content-Type", "text/event-stream"); res.setHeader("Cache-Control", "no-cache"); res.setHeader("Connection", "keep-alive"); res.setHeader("X-Accel-Buffering", "no"); try { for await (const c of this.llmService.chatStream(req)) res.write("data: " + JSON.stringify(c) + "\n\n"); res.write("data: [DONE]\n\n"); res.end(); } catch (e: unknown) { res.write("data: " + JSON.stringify({ error: e instanceof Error ? e.message : String(e) }) + "\n\n"); res.end(); } return; } return this.llmService.chat(req); }
+  @Post("embed") @HttpCode(HttpStatus.OK) async embed(@Body() req: EmbedRequestDto): Promise<EmbedResponseDto> { return this.llmService.embed(req); }
+}
--- a/apps/api/src/llm/llm.module.ts
+++ b/apps/api/src/llm/llm.module.ts
@@ -0,0 +1,5 @@
+import { Module } from "@nestjs/common";
+import { LlmController } from "./llm.controller";
+import { LlmService } from "./llm.service";
+@Module({ controllers: [LlmController], providers: [LlmService], exports: [LlmService] })
+export class LlmModule {}
--- a/apps/api/src/llm/llm.service.spec.ts
+++ b/apps/api/src/llm/llm.service.spec.ts
@@ -0,0 +1,19 @@
+import { describe, it, expect, beforeEach, vi, afterEach } from "vitest";
+import { Test, TestingModule } from "@nestjs/testing";
+import { ServiceUnavailableException } from "@nestjs/common";
+import { LlmService } from "./llm.service";
+import type { ChatRequestDto, EmbedRequestDto } from "./dto";
+const mockList = vi.fn(); const mockChat = vi.fn(); const mockEmbed = vi.fn();
+vi.mock("ollama", () => ({ Ollama: class { list = mockList; chat = mockChat; embed = mockEmbed; } }));
+describe("LlmService", () => {
+  let service: LlmService;
+  const originalEnv = { ...process.env };
+  beforeEach(async () => { process.env = { ...originalEnv, OLLAMA_HOST: "http://test:11434", OLLAMA_TIMEOUT: "60000" }; vi.clearAllMocks(); service = (await Test.createTestingModule({ providers: [LlmService] }).compile()).get(LlmService); });
+  afterEach(() => { process.env = originalEnv; });
+  it("should be defined", () => { expect(service).toBeDefined(); });
+  describe("checkHealth", () => { it("should return healthy", async () => { mockList.mockResolvedValue({ models: [{ name: "llama3.2" }] }); const r = await service.checkHealth(); expect(r.healthy).toBe(true); }); it("should return unhealthy on error", async () => { mockList.mockRejectedValue(new Error("fail")); const r = await service.checkHealth(); expect(r.healthy).toBe(false); }); });
+  describe("listModels", () => { it("should return models", async () => { mockList.mockResolvedValue({ models: [{ name: "llama3.2" }] }); expect(await service.listModels()).toEqual(["llama3.2"]); }); it("should throw on error", async () => { mockList.mockRejectedValue(new Error("fail")); await expect(service.listModels()).rejects.toThrow(ServiceUnavailableException); }); });
+  describe("chat", () => { const req: ChatRequestDto = { model: "llama3.2", messages: [{ role: "user", content: "Hi" }] }; it("should return response", async () => { mockChat.mockResolvedValue({ model: "llama3.2", message: { role: "assistant", content: "Hello" }, done: true }); const r = await service.chat(req); expect(r.message.content).toBe("Hello"); }); it("should throw on error", async () => { mockChat.mockRejectedValue(new Error("fail")); await expect(service.chat(req)).rejects.toThrow(ServiceUnavailableException); }); });
+  describe("chatStream", () => { it("should yield chunks", async () => { mockChat.mockResolvedValue((async function* () { yield { model: "m", message: { role: "a", content: "x" }, done: true }; })()); const chunks = []; for await (const c of service.chatStream({ model: "m", messages: [{ role: "user", content: "x" }], stream: true })) chunks.push(c); expect(chunks.length).toBe(1); }); });
+  describe("embed", () => { it("should return embeddings", async () => { mockEmbed.mockResolvedValue({ model: "m", embeddings: [[0.1]] }); const r = await service.embed({ model: "m", input: ["x"] }); expect(r.embeddings).toEqual([[0.1]]); }); });
+});
--- a/apps/api/src/llm/llm.service.ts
+++ b/apps/api/src/llm/llm.service.ts
@@ -0,0 +1,20 @@
+import { Injectable, OnModuleInit, Logger, ServiceUnavailableException } from "@nestjs/common";
+import { Ollama, Message } from "ollama";
+import type { ChatRequestDto, ChatResponseDto, EmbedRequestDto, EmbedResponseDto, ChatStreamChunkDto } from "./dto";
+export interface OllamaConfig { host: string; timeout?: number; }
+export interface OllamaHealthStatus { healthy: boolean; host: string; error?: string; models?: string[]; }
+@Injectable()
+export class LlmService implements OnModuleInit {
+  private readonly logger = new Logger(LlmService.name);
+  private client: Ollama;
+  private readonly config: OllamaConfig;
+  constructor() { this.config = { host: process.env["OLLAMA_HOST"] ?? "http://localhost:11434", timeout: parseInt(process.env["OLLAMA_TIMEOUT"] ?? "120000", 10) }; this.client = new Ollama({ host: this.config.host }); this.logger.log("Ollama service initialized"); }
+  async onModuleInit(): Promise<void> { const h = await this.checkHealth(); if (h.healthy) this.logger.log("Ollama healthy"); else this.logger.warn("Ollama unhealthy: " + (h.error ?? "unknown")); }
+  async checkHealth(): Promise<OllamaHealthStatus> { try { const r = await this.client.list(); return { healthy: true, host: this.config.host, models: r.models.map(m => m.name) }; } catch (e: unknown) { return { healthy: false, host: this.config.host, error: e instanceof Error ? e.message : String(e) }; } }
+  async listModels(): Promise<string[]> { try { return (await this.client.list()).models.map(m => m.name); } catch (e: unknown) { const msg = e instanceof Error ? e.message : String(e); this.logger.error("Failed to list models: " + msg); throw new ServiceUnavailableException("Failed to list models: " + msg); } }
+  async chat(request: ChatRequestDto): Promise<ChatResponseDto> { try { const msgs = this.buildMessages(request); const r = await this.client.chat({ model: request.model, messages: msgs, stream: false, options: { temperature: request.temperature, num_predict: request.maxTokens } }); return { model: r.model, message: { role: r.message.role as "assistant", content: r.message.content }, done: r.done, totalDuration: r.total_duration, promptEvalCount: r.prompt_eval_count, evalCount: r.eval_count }; } catch (e: unknown) { const msg = e instanceof Error ? e.message : String(e); this.logger.error("Chat failed: " + msg); throw new ServiceUnavailableException("Chat completion failed: " + msg); } }
+  async *chatStream(request: ChatRequestDto): AsyncGenerator<ChatStreamChunkDto> { try { const stream = await this.client.chat({ model: request.model, messages: this.buildMessages(request), stream: true, options: { temperature: request.temperature, num_predict: request.maxTokens } }); for await (const c of stream) yield { model: c.model, message: { role: c.message.role as "assistant", content: c.message.content }, done: c.done }; } catch (e: unknown) { const msg = e instanceof Error ? e.message : String(e); this.logger.error("Stream failed: " + msg); throw new ServiceUnavailableException("Streaming failed: " + msg); } }
+  async embed(request: EmbedRequestDto): Promise<EmbedResponseDto> { try { const r = await this.client.embed({ model: request.model, input: request.input, truncate: request.truncate === "none" ? false : true }); return { model: r.model, embeddings: r.embeddings, totalDuration: r.total_duration }; } catch (e: unknown) { const msg = e instanceof Error ? e.message : String(e); this.logger.error("Embed failed: " + msg); throw new ServiceUnavailableException("Embedding failed: " + msg); } }
+  private buildMessages(req: ChatRequestDto): Message[] { const msgs: Message[] = []; if (req.systemPrompt && !req.messages.some(m => m.role === "system")) msgs.push({ role: "system", content: req.systemPrompt }); for (const m of req.messages) msgs.push({ role: m.role, content: m.content }); return msgs; }
+  getConfig(): OllamaConfig { return { ...this.config }; }
+}