diff --git a/apps/api/src/app.module.ts b/apps/api/src/app.module.ts index f5e3d84..10e32b7 100644 --- a/apps/api/src/app.module.ts +++ b/apps/api/src/app.module.ts @@ -15,7 +15,7 @@ import { LayoutsModule } from "./layouts/layouts.module"; import { KnowledgeModule } from "./knowledge/knowledge.module"; import { UsersModule } from "./users/users.module"; import { WebSocketModule } from "./websocket/websocket.module"; -import { OllamaModule } from "./ollama/ollama.module"; +import { LlmModule } from "./llm/llm.module"; @Module({ imports: [ @@ -33,7 +33,7 @@ import { OllamaModule } from "./ollama/ollama.module"; KnowledgeModule, UsersModule, WebSocketModule, - OllamaModule, + LlmModule, ], controllers: [AppController], providers: [AppService], diff --git a/apps/api/src/llm/dto/chat.dto.ts b/apps/api/src/llm/dto/chat.dto.ts new file mode 100644 index 0000000..d2e5a80 --- /dev/null +++ b/apps/api/src/llm/dto/chat.dto.ts @@ -0,0 +1,7 @@ +import { IsArray, IsString, IsOptional, IsBoolean, IsNumber, ValidateNested, IsIn } from "class-validator"; +import { Type } from "class-transformer"; +export type ChatRole = "system" | "user" | "assistant"; +export class ChatMessageDto { @IsString() @IsIn(["system", "user", "assistant"]) role!: ChatRole; @IsString() content!: string; } +export class ChatRequestDto { @IsString() model!: string; @IsArray() @ValidateNested({ each: true }) @Type(() => ChatMessageDto) messages!: ChatMessageDto[]; @IsOptional() @IsBoolean() stream?: boolean; @IsOptional() @IsNumber() temperature?: number; @IsOptional() @IsNumber() maxTokens?: number; @IsOptional() @IsString() systemPrompt?: string; } +export interface ChatResponseDto { model: string; message: { role: ChatRole; content: string }; done: boolean; totalDuration?: number; promptEvalCount?: number; evalCount?: number; } +export interface ChatStreamChunkDto { model: string; message: { role: ChatRole; content: string }; done: boolean; } diff --git a/apps/api/src/llm/dto/embed.dto.ts b/apps/api/src/llm/dto/embed.dto.ts new file mode 100644 index 0000000..6f017c6 --- /dev/null +++ b/apps/api/src/llm/dto/embed.dto.ts @@ -0,0 +1,3 @@ +import { IsArray, IsString, IsOptional } from "class-validator"; +export class EmbedRequestDto { @IsString() model!: string; @IsArray() @IsString({ each: true }) input!: string[]; @IsOptional() @IsString() truncate?: "start" | "end" | "none"; } +export interface EmbedResponseDto { model: string; embeddings: number[][]; totalDuration?: number; } diff --git a/apps/api/src/llm/dto/index.ts b/apps/api/src/llm/dto/index.ts new file mode 100644 index 0000000..e0ed4eb --- /dev/null +++ b/apps/api/src/llm/dto/index.ts @@ -0,0 +1,2 @@ +export * from "./chat.dto"; +export * from "./embed.dto"; diff --git a/apps/api/src/llm/index.ts b/apps/api/src/llm/index.ts new file mode 100644 index 0000000..4a101fa --- /dev/null +++ b/apps/api/src/llm/index.ts @@ -0,0 +1,4 @@ +export * from "./llm.module"; +export * from "./llm.service"; +export * from "./llm.controller"; +export * from "./dto"; diff --git a/apps/api/src/llm/llm.controller.spec.ts b/apps/api/src/llm/llm.controller.spec.ts new file mode 100644 index 0000000..7822f38 --- /dev/null +++ b/apps/api/src/llm/llm.controller.spec.ts @@ -0,0 +1,15 @@ +import { describe, it, expect, beforeEach, vi } from "vitest"; +import { Test, TestingModule } from "@nestjs/testing"; +import { LlmController } from "./llm.controller"; +import { LlmService } from "./llm.service"; +import type { ChatRequestDto, EmbedRequestDto } from "./dto"; +describe("LlmController", () => { + let controller: LlmController; + const mockService = { checkHealth: vi.fn(), listModels: vi.fn(), chat: vi.fn(), chatStream: vi.fn(), embed: vi.fn() }; + beforeEach(async () => { vi.clearAllMocks(); controller = (await Test.createTestingModule({ controllers: [LlmController], providers: [{ provide: LlmService, useValue: mockService }] }).compile()).get(LlmController); }); + it("should be defined", () => { expect(controller).toBeDefined(); }); + describe("health", () => { it("should return status", async () => { const s = { healthy: true, host: "h" }; mockService.checkHealth.mockResolvedValue(s); expect(await controller.health()).toEqual(s); }); }); + describe("listModels", () => { it("should return models", async () => { mockService.listModels.mockResolvedValue(["m1"]); expect(await controller.listModels()).toEqual({ models: ["m1"] }); }); }); + describe("chat", () => { const req: ChatRequestDto = { model: "m", messages: [{ role: "user", content: "x" }] }; const res = { setHeader: vi.fn(), write: vi.fn(), end: vi.fn() }; it("should return response", async () => { const r = { model: "m", message: { role: "assistant", content: "y" }, done: true }; mockService.chat.mockResolvedValue(r); expect(await controller.chat(req, res as any)).toEqual(r); }); it("should stream", async () => { mockService.chatStream.mockReturnValue((async function* () { yield { model: "m", message: { role: "a", content: "x" }, done: true }; })()); await controller.chat({ ...req, stream: true }, res as any); expect(res.setHeader).toHaveBeenCalled(); expect(res.end).toHaveBeenCalled(); }); }); + describe("embed", () => { it("should return embeddings", async () => { const r = { model: "m", embeddings: [[0.1]] }; mockService.embed.mockResolvedValue(r); expect(await controller.embed({ model: "m", input: ["x"] })).toEqual(r); }); }); +}); diff --git a/apps/api/src/llm/llm.controller.ts b/apps/api/src/llm/llm.controller.ts new file mode 100644 index 0000000..b55c4ef --- /dev/null +++ b/apps/api/src/llm/llm.controller.ts @@ -0,0 +1,12 @@ +import { Controller, Post, Get, Body, Res, HttpCode, HttpStatus } from "@nestjs/common"; +import { Response } from "express"; +import { LlmService, OllamaHealthStatus } from "./llm.service"; +import { ChatRequestDto, ChatResponseDto, EmbedRequestDto, EmbedResponseDto } from "./dto"; +@Controller("llm") +export class LlmController { + constructor(private readonly llmService: LlmService) {} + @Get("health") async health(): Promise { return this.llmService.checkHealth(); } + @Get("models") async listModels(): Promise<{ models: string[] }> { return { models: await this.llmService.listModels() }; } + @Post("chat") @HttpCode(HttpStatus.OK) async chat(@Body() req: ChatRequestDto, @Res({ passthrough: true }) res: Response): Promise { if (req.stream === true) { res.setHeader("Content-Type", "text/event-stream"); res.setHeader("Cache-Control", "no-cache"); res.setHeader("Connection", "keep-alive"); res.setHeader("X-Accel-Buffering", "no"); try { for await (const c of this.llmService.chatStream(req)) res.write("data: " + JSON.stringify(c) + "\n\n"); res.write("data: [DONE]\n\n"); res.end(); } catch (e: unknown) { res.write("data: " + JSON.stringify({ error: e instanceof Error ? e.message : String(e) }) + "\n\n"); res.end(); } return; } return this.llmService.chat(req); } + @Post("embed") @HttpCode(HttpStatus.OK) async embed(@Body() req: EmbedRequestDto): Promise { return this.llmService.embed(req); } +} diff --git a/apps/api/src/llm/llm.module.ts b/apps/api/src/llm/llm.module.ts new file mode 100644 index 0000000..3aab60e --- /dev/null +++ b/apps/api/src/llm/llm.module.ts @@ -0,0 +1,5 @@ +import { Module } from "@nestjs/common"; +import { LlmController } from "./llm.controller"; +import { LlmService } from "./llm.service"; +@Module({ controllers: [LlmController], providers: [LlmService], exports: [LlmService] }) +export class LlmModule {} diff --git a/apps/api/src/llm/llm.service.spec.ts b/apps/api/src/llm/llm.service.spec.ts new file mode 100644 index 0000000..4b262b7 --- /dev/null +++ b/apps/api/src/llm/llm.service.spec.ts @@ -0,0 +1,19 @@ +import { describe, it, expect, beforeEach, vi, afterEach } from "vitest"; +import { Test, TestingModule } from "@nestjs/testing"; +import { ServiceUnavailableException } from "@nestjs/common"; +import { LlmService } from "./llm.service"; +import type { ChatRequestDto, EmbedRequestDto } from "./dto"; +const mockList = vi.fn(); const mockChat = vi.fn(); const mockEmbed = vi.fn(); +vi.mock("ollama", () => ({ Ollama: class { list = mockList; chat = mockChat; embed = mockEmbed; } })); +describe("LlmService", () => { + let service: LlmService; + const originalEnv = { ...process.env }; + beforeEach(async () => { process.env = { ...originalEnv, OLLAMA_HOST: "http://test:11434", OLLAMA_TIMEOUT: "60000" }; vi.clearAllMocks(); service = (await Test.createTestingModule({ providers: [LlmService] }).compile()).get(LlmService); }); + afterEach(() => { process.env = originalEnv; }); + it("should be defined", () => { expect(service).toBeDefined(); }); + describe("checkHealth", () => { it("should return healthy", async () => { mockList.mockResolvedValue({ models: [{ name: "llama3.2" }] }); const r = await service.checkHealth(); expect(r.healthy).toBe(true); }); it("should return unhealthy on error", async () => { mockList.mockRejectedValue(new Error("fail")); const r = await service.checkHealth(); expect(r.healthy).toBe(false); }); }); + describe("listModels", () => { it("should return models", async () => { mockList.mockResolvedValue({ models: [{ name: "llama3.2" }] }); expect(await service.listModels()).toEqual(["llama3.2"]); }); it("should throw on error", async () => { mockList.mockRejectedValue(new Error("fail")); await expect(service.listModels()).rejects.toThrow(ServiceUnavailableException); }); }); + describe("chat", () => { const req: ChatRequestDto = { model: "llama3.2", messages: [{ role: "user", content: "Hi" }] }; it("should return response", async () => { mockChat.mockResolvedValue({ model: "llama3.2", message: { role: "assistant", content: "Hello" }, done: true }); const r = await service.chat(req); expect(r.message.content).toBe("Hello"); }); it("should throw on error", async () => { mockChat.mockRejectedValue(new Error("fail")); await expect(service.chat(req)).rejects.toThrow(ServiceUnavailableException); }); }); + describe("chatStream", () => { it("should yield chunks", async () => { mockChat.mockResolvedValue((async function* () { yield { model: "m", message: { role: "a", content: "x" }, done: true }; })()); const chunks = []; for await (const c of service.chatStream({ model: "m", messages: [{ role: "user", content: "x" }], stream: true })) chunks.push(c); expect(chunks.length).toBe(1); }); }); + describe("embed", () => { it("should return embeddings", async () => { mockEmbed.mockResolvedValue({ model: "m", embeddings: [[0.1]] }); const r = await service.embed({ model: "m", input: ["x"] }); expect(r.embeddings).toEqual([[0.1]]); }); }); +}); diff --git a/apps/api/src/llm/llm.service.ts b/apps/api/src/llm/llm.service.ts new file mode 100644 index 0000000..10f32e4 --- /dev/null +++ b/apps/api/src/llm/llm.service.ts @@ -0,0 +1,20 @@ +import { Injectable, OnModuleInit, Logger, ServiceUnavailableException } from "@nestjs/common"; +import { Ollama, Message } from "ollama"; +import type { ChatRequestDto, ChatResponseDto, EmbedRequestDto, EmbedResponseDto, ChatStreamChunkDto } from "./dto"; +export interface OllamaConfig { host: string; timeout?: number; } +export interface OllamaHealthStatus { healthy: boolean; host: string; error?: string; models?: string[]; } +@Injectable() +export class LlmService implements OnModuleInit { + private readonly logger = new Logger(LlmService.name); + private client: Ollama; + private readonly config: OllamaConfig; + constructor() { this.config = { host: process.env["OLLAMA_HOST"] ?? "http://localhost:11434", timeout: parseInt(process.env["OLLAMA_TIMEOUT"] ?? "120000", 10) }; this.client = new Ollama({ host: this.config.host }); this.logger.log("Ollama service initialized"); } + async onModuleInit(): Promise { const h = await this.checkHealth(); if (h.healthy) this.logger.log("Ollama healthy"); else this.logger.warn("Ollama unhealthy: " + (h.error ?? "unknown")); } + async checkHealth(): Promise { try { const r = await this.client.list(); return { healthy: true, host: this.config.host, models: r.models.map(m => m.name) }; } catch (e: unknown) { return { healthy: false, host: this.config.host, error: e instanceof Error ? e.message : String(e) }; } } + async listModels(): Promise { try { return (await this.client.list()).models.map(m => m.name); } catch (e: unknown) { const msg = e instanceof Error ? e.message : String(e); this.logger.error("Failed to list models: " + msg); throw new ServiceUnavailableException("Failed to list models: " + msg); } } + async chat(request: ChatRequestDto): Promise { try { const msgs = this.buildMessages(request); const r = await this.client.chat({ model: request.model, messages: msgs, stream: false, options: { temperature: request.temperature, num_predict: request.maxTokens } }); return { model: r.model, message: { role: r.message.role as "assistant", content: r.message.content }, done: r.done, totalDuration: r.total_duration, promptEvalCount: r.prompt_eval_count, evalCount: r.eval_count }; } catch (e: unknown) { const msg = e instanceof Error ? e.message : String(e); this.logger.error("Chat failed: " + msg); throw new ServiceUnavailableException("Chat completion failed: " + msg); } } + async *chatStream(request: ChatRequestDto): AsyncGenerator { try { const stream = await this.client.chat({ model: request.model, messages: this.buildMessages(request), stream: true, options: { temperature: request.temperature, num_predict: request.maxTokens } }); for await (const c of stream) yield { model: c.model, message: { role: c.message.role as "assistant", content: c.message.content }, done: c.done }; } catch (e: unknown) { const msg = e instanceof Error ? e.message : String(e); this.logger.error("Stream failed: " + msg); throw new ServiceUnavailableException("Streaming failed: " + msg); } } + async embed(request: EmbedRequestDto): Promise { try { const r = await this.client.embed({ model: request.model, input: request.input, truncate: request.truncate === "none" ? false : true }); return { model: r.model, embeddings: r.embeddings, totalDuration: r.total_duration }; } catch (e: unknown) { const msg = e instanceof Error ? e.message : String(e); this.logger.error("Embed failed: " + msg); throw new ServiceUnavailableException("Embedding failed: " + msg); } } + private buildMessages(req: ChatRequestDto): Message[] { const msgs: Message[] = []; if (req.systemPrompt && !req.messages.some(m => m.role === "system")) msgs.push({ role: "system", content: req.systemPrompt }); for (const m of req.messages) msgs.push({ role: m.role, content: m.content }); return msgs; } + getConfig(): OllamaConfig { return { ...this.config }; } +} diff --git a/pnpm-lock.yaml b/pnpm-lock.yaml index 3553c9e..c3df81d 100644 --- a/pnpm-lock.yaml +++ b/pnpm-lock.yaml @@ -147,6 +147,15 @@ importers: apps/web: dependencies: + '@dnd-kit/core': + specifier: ^6.3.1 + version: 6.3.1(react-dom@19.2.4(react@19.2.4))(react@19.2.4) + '@dnd-kit/sortable': + specifier: ^10.0.0 + version: 10.0.0(@dnd-kit/core@6.3.1(react-dom@19.2.4(react@19.2.4))(react@19.2.4))(react@19.2.4) + '@dnd-kit/utilities': + specifier: ^3.2.2 + version: 3.2.2(react@19.2.4) '@mosaic/shared': specifier: workspace:* version: link:../../packages/shared @@ -586,6 +595,28 @@ packages: resolution: {integrity: sha512-Vd/9EVDiu6PPJt9yAh6roZP6El1xHrdvIVGjyBsHR0RYwNHgL7FJPyIIW4fANJNG6FtyZfvlRPpFI4ZM/lubvw==} engines: {node: '>=18'} + '@dnd-kit/accessibility@3.1.1': + resolution: {integrity: sha512-2P+YgaXF+gRsIihwwY1gCsQSYnu9Zyj2py8kY5fFvUM1qm2WA2u639R6YNVfU4GWr+ZM5mqEsfHZZLoRONbemw==} + peerDependencies: + react: '>=16.8.0' + + '@dnd-kit/core@6.3.1': + resolution: {integrity: sha512-xkGBRQQab4RLwgXxoqETICr6S5JlogafbhNsidmrkVv2YRs5MLwpjoF2qpiGjQt8S9AoxtIV603s0GIUpY5eYQ==} + peerDependencies: + react: '>=16.8.0' + react-dom: '>=16.8.0' + + '@dnd-kit/sortable@10.0.0': + resolution: {integrity: sha512-+xqhmIIzvAYMGfBYYnbKuNicfSsk4RksY2XdmJhT+HAC01nix6fHCztU68jooFiMUB01Ky3F0FyOvhG/BZrWkg==} + peerDependencies: + '@dnd-kit/core': ^6.3.0 + react: '>=16.8.0' + + '@dnd-kit/utilities@3.2.2': + resolution: {integrity: sha512-+MKAJEOfaBe5SmV6t34p80MMKhjvUz0vRrvVJbPT0WElzaOJ/1xs+D+KDv+tD/NE5ujfrChEcshd4fLn0wpiqg==} + peerDependencies: + react: '>=16.8.0' + '@emnapi/runtime@1.8.1': resolution: {integrity: sha512-mehfKSMWjjNol8659Z8KxEMrdSJDDot5SXMq00dM8BN4o+CLNXQ0xH2V7EchNHV4RmbZLmmPdEaXZc5H2FXmDg==} @@ -4760,6 +4791,31 @@ snapshots: '@csstools/css-tokenizer@3.0.4': {} + '@dnd-kit/accessibility@3.1.1(react@19.2.4)': + dependencies: + react: 19.2.4 + tslib: 2.8.1 + + '@dnd-kit/core@6.3.1(react-dom@19.2.4(react@19.2.4))(react@19.2.4)': + dependencies: + '@dnd-kit/accessibility': 3.1.1(react@19.2.4) + '@dnd-kit/utilities': 3.2.2(react@19.2.4) + react: 19.2.4 + react-dom: 19.2.4(react@19.2.4) + tslib: 2.8.1 + + '@dnd-kit/sortable@10.0.0(@dnd-kit/core@6.3.1(react-dom@19.2.4(react@19.2.4))(react@19.2.4))(react@19.2.4)': + dependencies: + '@dnd-kit/core': 6.3.1(react-dom@19.2.4(react@19.2.4))(react@19.2.4) + '@dnd-kit/utilities': 3.2.2(react@19.2.4) + react: 19.2.4 + tslib: 2.8.1 + + '@dnd-kit/utilities@3.2.2(react@19.2.4)': + dependencies: + react: 19.2.4 + tslib: 2.8.1 + '@emnapi/runtime@1.8.1': dependencies: tslib: 2.8.1