From 94afeb67e3eb1584eb725fe8c81180af7ccca9bb Mon Sep 17 00:00:00 2001 From: Jason Woltje Date: Sat, 31 Jan 2026 12:10:43 -0600 Subject: [PATCH] feat(#123): port Ollama LLM provider MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Implemented first concrete LLM provider following the provider interface pattern. Implementation: - OllamaProvider class implementing LlmProviderInterface - All required methods: initialize(), checkHealth(), listModels(), chat(), chatStream(), embed(), getConfig() - OllamaProviderConfig extending LlmProviderConfig - Proper error handling with NestJS Logger - Configuration immutability protection Features: - System prompt injection support - Temperature and max tokens configuration - Embedding with truncation control (defaults to enabled) - Streaming and non-streaming chat completions - Health check with model listing Testing: - 21 comprehensive test cases (TDD approach) - 100% statement, function, and line coverage - 86.36% branch coverage (exceeds 85% requirement) - All error scenarios tested - Mock-based unit tests Code Review Fixes: - Fixed truncate logic to match original LlmService behavior (defaults to true) - Added test for system prompt deduplication - Increased branch coverage from 77% to 86% Quality Gates: - ✅ All 21 tests passing - ✅ Linting clean - ✅ Type checking passed - ✅ Code review approved Fixes #123 Co-Authored-By: Claude Sonnet 4.5 --- .../src/llm/providers/ollama.provider.spec.ts | 435 ++++++++++++++++++ apps/api/src/llm/providers/ollama.provider.ts | 295 ++++++++++++ docs/scratchpads/123-ollama-provider.md | 82 ++++ 3 files changed, 812 insertions(+) create mode 100644 apps/api/src/llm/providers/ollama.provider.spec.ts create mode 100644 apps/api/src/llm/providers/ollama.provider.ts create mode 100644 docs/scratchpads/123-ollama-provider.md diff --git a/apps/api/src/llm/providers/ollama.provider.spec.ts b/apps/api/src/llm/providers/ollama.provider.spec.ts new file mode 100644 index 0000000..a6140ca --- /dev/null +++ b/apps/api/src/llm/providers/ollama.provider.spec.ts @@ -0,0 +1,435 @@ +import { describe, it, expect, beforeEach, vi, type Mock } from "vitest"; +import { OllamaProvider, type OllamaProviderConfig } from "./ollama.provider"; +import type { ChatRequestDto, EmbedRequestDto } from "../dto"; + +// Mock the ollama module +vi.mock("ollama", () => { + return { + Ollama: vi.fn().mockImplementation(function (this: unknown) { + return { + list: vi.fn(), + chat: vi.fn(), + embed: vi.fn(), + }; + }), + }; +}); + +describe("OllamaProvider", () => { + let provider: OllamaProvider; + let config: OllamaProviderConfig; + let mockOllamaInstance: { + list: Mock; + chat: Mock; + embed: Mock; + }; + + beforeEach(() => { + // Reset all mocks + vi.clearAllMocks(); + + // Setup test configuration + config = { + endpoint: "http://localhost:11434", + timeout: 30000, + }; + + provider = new OllamaProvider(config); + + // Get the mock instance created by the constructor + mockOllamaInstance = (provider as any).client; + }); + + describe("constructor and initialization", () => { + it("should create provider with correct name and type", () => { + expect(provider.name).toBe("Ollama"); + expect(provider.type).toBe("ollama"); + }); + + it("should initialize successfully", async () => { + await expect(provider.initialize()).resolves.toBeUndefined(); + }); + }); + + describe("checkHealth", () => { + it("should return healthy status when Ollama is reachable", async () => { + const mockModels = [{ name: "llama2" }, { name: "mistral" }]; + mockOllamaInstance.list.mockResolvedValue({ models: mockModels }); + + const health = await provider.checkHealth(); + + expect(health).toEqual({ + healthy: true, + provider: "ollama", + endpoint: config.endpoint, + models: ["llama2", "mistral"], + }); + expect(mockOllamaInstance.list).toHaveBeenCalledOnce(); + }); + + it("should return unhealthy status when Ollama is unreachable", async () => { + const error = new Error("Connection refused"); + mockOllamaInstance.list.mockRejectedValue(error); + + const health = await provider.checkHealth(); + + expect(health).toEqual({ + healthy: false, + provider: "ollama", + endpoint: config.endpoint, + error: "Connection refused", + }); + }); + + it("should handle non-Error exceptions", async () => { + mockOllamaInstance.list.mockRejectedValue("string error"); + + const health = await provider.checkHealth(); + + expect(health.healthy).toBe(false); + expect(health.error).toBe("string error"); + }); + }); + + describe("listModels", () => { + it("should return array of model names", async () => { + const mockModels = [{ name: "llama2" }, { name: "mistral" }, { name: "codellama" }]; + mockOllamaInstance.list.mockResolvedValue({ models: mockModels }); + + const models = await provider.listModels(); + + expect(models).toEqual(["llama2", "mistral", "codellama"]); + expect(mockOllamaInstance.list).toHaveBeenCalledOnce(); + }); + + it("should throw error when listing models fails", async () => { + const error = new Error("Failed to connect"); + mockOllamaInstance.list.mockRejectedValue(error); + + await expect(provider.listModels()).rejects.toThrow("Failed to list models"); + }); + }); + + describe("chat", () => { + it("should perform chat completion successfully", async () => { + const request: ChatRequestDto = { + model: "llama2", + messages: [{ role: "user", content: "Hello" }], + }; + + const mockResponse = { + model: "llama2", + message: { role: "assistant", content: "Hi there!" }, + done: true, + total_duration: 1000000, + prompt_eval_count: 10, + eval_count: 5, + }; + + mockOllamaInstance.chat.mockResolvedValue(mockResponse); + + const response = await provider.chat(request); + + expect(response).toEqual({ + model: "llama2", + message: { role: "assistant", content: "Hi there!" }, + done: true, + totalDuration: 1000000, + promptEvalCount: 10, + evalCount: 5, + }); + + expect(mockOllamaInstance.chat).toHaveBeenCalledWith({ + model: "llama2", + messages: [{ role: "user", content: "Hello" }], + stream: false, + options: {}, + }); + }); + + it("should include system prompt in messages", async () => { + const request: ChatRequestDto = { + model: "llama2", + messages: [{ role: "user", content: "Hello" }], + systemPrompt: "You are a helpful assistant", + }; + + mockOllamaInstance.chat.mockResolvedValue({ + model: "llama2", + message: { role: "assistant", content: "Hi!" }, + done: true, + }); + + await provider.chat(request); + + expect(mockOllamaInstance.chat).toHaveBeenCalledWith({ + model: "llama2", + messages: [ + { role: "system", content: "You are a helpful assistant" }, + { role: "user", content: "Hello" }, + ], + stream: false, + options: {}, + }); + }); + + it("should not duplicate system prompt when already in messages", async () => { + const request: ChatRequestDto = { + model: "llama2", + messages: [ + { role: "system", content: "Existing system prompt" }, + { role: "user", content: "Hello" }, + ], + systemPrompt: "New system prompt (should be ignored)", + }; + + mockOllamaInstance.chat.mockResolvedValue({ + model: "llama2", + message: { role: "assistant", content: "Hi!" }, + done: true, + }); + + await provider.chat(request); + + expect(mockOllamaInstance.chat).toHaveBeenCalledWith({ + model: "llama2", + messages: [ + { role: "system", content: "Existing system prompt" }, + { role: "user", content: "Hello" }, + ], + stream: false, + options: {}, + }); + }); + + it("should pass temperature and maxTokens as options", async () => { + const request: ChatRequestDto = { + model: "llama2", + messages: [{ role: "user", content: "Hello" }], + temperature: 0.7, + maxTokens: 100, + }; + + mockOllamaInstance.chat.mockResolvedValue({ + model: "llama2", + message: { role: "assistant", content: "Hi!" }, + done: true, + }); + + await provider.chat(request); + + expect(mockOllamaInstance.chat).toHaveBeenCalledWith({ + model: "llama2", + messages: [{ role: "user", content: "Hello" }], + stream: false, + options: { + temperature: 0.7, + num_predict: 100, + }, + }); + }); + + it("should throw error when chat fails", async () => { + const request: ChatRequestDto = { + model: "llama2", + messages: [{ role: "user", content: "Hello" }], + }; + + mockOllamaInstance.chat.mockRejectedValue(new Error("Model not found")); + + await expect(provider.chat(request)).rejects.toThrow("Chat completion failed"); + }); + }); + + describe("chatStream", () => { + it("should stream chat completion chunks", async () => { + const request: ChatRequestDto = { + model: "llama2", + messages: [{ role: "user", content: "Hello" }], + }; + + const mockChunks = [ + { model: "llama2", message: { role: "assistant", content: "Hi" }, done: false }, + { model: "llama2", message: { role: "assistant", content: " there" }, done: false }, + { model: "llama2", message: { role: "assistant", content: "!" }, done: true }, + ]; + + // Mock async generator + async function* mockStreamGenerator() { + for (const chunk of mockChunks) { + yield chunk; + } + } + + mockOllamaInstance.chat.mockResolvedValue(mockStreamGenerator()); + + const chunks = []; + for await (const chunk of provider.chatStream(request)) { + chunks.push(chunk); + } + + expect(chunks).toHaveLength(3); + expect(chunks[0]).toEqual({ + model: "llama2", + message: { role: "assistant", content: "Hi" }, + done: false, + }); + expect(chunks[2].done).toBe(true); + + expect(mockOllamaInstance.chat).toHaveBeenCalledWith({ + model: "llama2", + messages: [{ role: "user", content: "Hello" }], + stream: true, + options: {}, + }); + }); + + it("should pass options in streaming mode", async () => { + const request: ChatRequestDto = { + model: "llama2", + messages: [{ role: "user", content: "Hello" }], + temperature: 0.5, + maxTokens: 50, + }; + + async function* mockStreamGenerator() { + yield { model: "llama2", message: { role: "assistant", content: "Hi" }, done: true }; + } + + mockOllamaInstance.chat.mockResolvedValue(mockStreamGenerator()); + + const generator = provider.chatStream(request); + await generator.next(); + + expect(mockOllamaInstance.chat).toHaveBeenCalledWith({ + model: "llama2", + messages: [{ role: "user", content: "Hello" }], + stream: true, + options: { + temperature: 0.5, + num_predict: 50, + }, + }); + }); + + it("should throw error when streaming fails", async () => { + const request: ChatRequestDto = { + model: "llama2", + messages: [{ role: "user", content: "Hello" }], + }; + + mockOllamaInstance.chat.mockRejectedValue(new Error("Stream error")); + + const generator = provider.chatStream(request); + + await expect(generator.next()).rejects.toThrow("Streaming failed"); + }); + }); + + describe("embed", () => { + it("should generate embeddings successfully", async () => { + const request: EmbedRequestDto = { + model: "nomic-embed-text", + input: ["Hello world", "Test embedding"], + }; + + const mockResponse = { + model: "nomic-embed-text", + embeddings: [ + [0.1, 0.2, 0.3], + [0.4, 0.5, 0.6], + ], + total_duration: 500000, + }; + + mockOllamaInstance.embed.mockResolvedValue(mockResponse); + + const response = await provider.embed(request); + + expect(response).toEqual({ + model: "nomic-embed-text", + embeddings: [ + [0.1, 0.2, 0.3], + [0.4, 0.5, 0.6], + ], + totalDuration: 500000, + }); + + expect(mockOllamaInstance.embed).toHaveBeenCalledWith({ + model: "nomic-embed-text", + input: ["Hello world", "Test embedding"], + truncate: true, + }); + }); + + it("should handle truncate option", async () => { + const request: EmbedRequestDto = { + model: "nomic-embed-text", + input: ["Test"], + truncate: "start", + }; + + mockOllamaInstance.embed.mockResolvedValue({ + model: "nomic-embed-text", + embeddings: [[0.1, 0.2]], + }); + + await provider.embed(request); + + expect(mockOllamaInstance.embed).toHaveBeenCalledWith({ + model: "nomic-embed-text", + input: ["Test"], + truncate: true, + }); + }); + + it("should respect truncate none option", async () => { + const request: EmbedRequestDto = { + model: "nomic-embed-text", + input: ["Test"], + truncate: "none", + }; + + mockOllamaInstance.embed.mockResolvedValue({ + model: "nomic-embed-text", + embeddings: [[0.1, 0.2]], + }); + + await provider.embed(request); + + expect(mockOllamaInstance.embed).toHaveBeenCalledWith({ + model: "nomic-embed-text", + input: ["Test"], + truncate: false, + }); + }); + + it("should throw error when embedding fails", async () => { + const request: EmbedRequestDto = { + model: "nomic-embed-text", + input: ["Test"], + }; + + mockOllamaInstance.embed.mockRejectedValue(new Error("Embedding error")); + + await expect(provider.embed(request)).rejects.toThrow("Embedding failed"); + }); + }); + + describe("getConfig", () => { + it("should return copy of configuration", () => { + const returnedConfig = provider.getConfig(); + + expect(returnedConfig).toEqual(config); + expect(returnedConfig).not.toBe(config); // Should be a copy, not reference + }); + + it("should prevent external modification of config", () => { + const returnedConfig = provider.getConfig(); + returnedConfig.endpoint = "http://modified:11434"; + + const secondCall = provider.getConfig(); + expect(secondCall.endpoint).toBe("http://localhost:11434"); // Original unchanged + }); + }); +}); diff --git a/apps/api/src/llm/providers/ollama.provider.ts b/apps/api/src/llm/providers/ollama.provider.ts new file mode 100644 index 0000000..668dde7 --- /dev/null +++ b/apps/api/src/llm/providers/ollama.provider.ts @@ -0,0 +1,295 @@ +import { Logger } from "@nestjs/common"; +import { Ollama, type Message } from "ollama"; +import type { + LlmProviderInterface, + LlmProviderConfig, + LlmProviderHealthStatus, +} from "./llm-provider.interface"; +import type { ChatRequestDto, ChatResponseDto, EmbedRequestDto, EmbedResponseDto } from "../dto"; + +/** + * Configuration for Ollama LLM provider. + * Extends base LlmProviderConfig with Ollama-specific options. + * + * @example + * ```typescript + * const config: OllamaProviderConfig = { + * endpoint: "http://localhost:11434", + * timeout: 30000 + * }; + * ``` + */ +export interface OllamaProviderConfig extends LlmProviderConfig { + /** + * Ollama server endpoint URL + * @default "http://localhost:11434" + */ + endpoint: string; + + /** + * Request timeout in milliseconds + * @default 30000 + */ + timeout?: number; +} + +/** + * Ollama LLM provider implementation. + * Provides integration with locally-hosted or remote Ollama instances. + * + * @example + * ```typescript + * const provider = new OllamaProvider({ + * endpoint: "http://localhost:11434", + * timeout: 30000 + * }); + * + * await provider.initialize(); + * + * const response = await provider.chat({ + * model: "llama2", + * messages: [{ role: "user", content: "Hello" }] + * }); + * ``` + */ +export class OllamaProvider implements LlmProviderInterface { + readonly name = "Ollama"; + readonly type = "ollama" as const; + + private readonly logger = new Logger(OllamaProvider.name); + private readonly client: Ollama; + private readonly config: OllamaProviderConfig; + + /** + * Creates a new Ollama provider instance. + * + * @param config - Ollama provider configuration + */ + constructor(config: OllamaProviderConfig) { + this.config = { + ...config, + timeout: config.timeout ?? 30000, + }; + + this.client = new Ollama({ host: this.config.endpoint }); + this.logger.log(`Ollama provider initialized with endpoint: ${this.config.endpoint}`); + } + + /** + * Initialize the Ollama provider. + * This is a no-op for Ollama as the client is initialized in the constructor. + */ + async initialize(): Promise { + // Ollama client is initialized in constructor + // No additional setup required + } + + /** + * Check if the Ollama server is healthy and reachable. + * + * @returns Health status with available models if healthy + */ + async checkHealth(): Promise { + try { + const response = await this.client.list(); + const models = response.models.map((m) => m.name); + + return { + healthy: true, + provider: "ollama", + endpoint: this.config.endpoint, + models, + }; + } catch (error: unknown) { + const errorMessage = error instanceof Error ? error.message : String(error); + this.logger.warn(`Ollama health check failed: ${errorMessage}`); + + return { + healthy: false, + provider: "ollama", + endpoint: this.config.endpoint, + error: errorMessage, + }; + } + } + + /** + * List all available models from the Ollama server. + * + * @returns Array of model names + * @throws {Error} If the request fails + */ + async listModels(): Promise { + try { + const response = await this.client.list(); + return response.models.map((m) => m.name); + } catch (error: unknown) { + const errorMessage = error instanceof Error ? error.message : String(error); + this.logger.error(`Failed to list models: ${errorMessage}`); + throw new Error(`Failed to list models: ${errorMessage}`); + } + } + + /** + * Perform a synchronous chat completion. + * + * @param request - Chat request with messages and configuration + * @returns Complete chat response + * @throws {Error} If the request fails + */ + async chat(request: ChatRequestDto): Promise { + try { + const messages = this.buildMessages(request); + const options = this.buildChatOptions(request); + + const response = await this.client.chat({ + model: request.model, + messages, + stream: false, + options, + }); + + return { + model: response.model, + message: { + role: response.message.role as "assistant", + content: response.message.content, + }, + done: response.done, + totalDuration: response.total_duration, + promptEvalCount: response.prompt_eval_count, + evalCount: response.eval_count, + }; + } catch (error: unknown) { + const errorMessage = error instanceof Error ? error.message : String(error); + this.logger.error(`Chat completion failed: ${errorMessage}`); + throw new Error(`Chat completion failed: ${errorMessage}`); + } + } + + /** + * Perform a streaming chat completion. + * Yields response chunks as they arrive from the Ollama server. + * + * @param request - Chat request with messages and configuration + * @yields Chat response chunks + * @throws {Error} If the request fails + */ + async *chatStream(request: ChatRequestDto): AsyncGenerator { + try { + const messages = this.buildMessages(request); + const options = this.buildChatOptions(request); + + const stream = await this.client.chat({ + model: request.model, + messages, + stream: true, + options, + }); + + for await (const chunk of stream) { + yield { + model: chunk.model, + message: { + role: chunk.message.role as "assistant", + content: chunk.message.content, + }, + done: chunk.done, + }; + } + } catch (error: unknown) { + const errorMessage = error instanceof Error ? error.message : String(error); + this.logger.error(`Streaming failed: ${errorMessage}`); + throw new Error(`Streaming failed: ${errorMessage}`); + } + } + + /** + * Generate embeddings for the given input texts. + * + * @param request - Embedding request with model and input texts + * @returns Embeddings response with vector arrays + * @throws {Error} If the request fails + */ + async embed(request: EmbedRequestDto): Promise { + try { + const response = await this.client.embed({ + model: request.model, + input: request.input, + truncate: request.truncate === "none" ? false : true, + }); + + return { + model: response.model, + embeddings: response.embeddings, + totalDuration: response.total_duration, + }; + } catch (error: unknown) { + const errorMessage = error instanceof Error ? error.message : String(error); + this.logger.error(`Embedding failed: ${errorMessage}`); + throw new Error(`Embedding failed: ${errorMessage}`); + } + } + + /** + * Get the current provider configuration. + * Returns a copy to prevent external modification. + * + * @returns Provider configuration object + */ + getConfig(): OllamaProviderConfig { + return { ...this.config }; + } + + /** + * Build message array from chat request. + * Prepends system prompt if provided and not already in messages. + * + * @param request - Chat request + * @returns Array of messages for Ollama + */ + private buildMessages(request: ChatRequestDto): Message[] { + const messages: Message[] = []; + + // Add system prompt if provided and not already in messages + if (request.systemPrompt && !request.messages.some((m) => m.role === "system")) { + messages.push({ + role: "system", + content: request.systemPrompt, + }); + } + + // Add all request messages + for (const message of request.messages) { + messages.push({ + role: message.role, + content: message.content, + }); + } + + return messages; + } + + /** + * Build Ollama-specific chat options from request. + * + * @param request - Chat request + * @returns Ollama options object + */ + private buildChatOptions(request: ChatRequestDto): { + temperature?: number; + num_predict?: number; + } { + const options: { temperature?: number; num_predict?: number } = {}; + + if (request.temperature !== undefined) { + options.temperature = request.temperature; + } + + if (request.maxTokens !== undefined) { + options.num_predict = request.maxTokens; + } + + return options; + } +} diff --git a/docs/scratchpads/123-ollama-provider.md b/docs/scratchpads/123-ollama-provider.md new file mode 100644 index 0000000..b042684 --- /dev/null +++ b/docs/scratchpads/123-ollama-provider.md @@ -0,0 +1,82 @@ +# Issue #123: Port Ollama LLM Provider + +## Objective + +Create a reusable OllamaProvider class that implements LlmProviderInterface. This extracts the existing Ollama functionality from LlmService into a provider-based architecture, enabling multi-provider support. + +## Approach + +Following TDD principles: + +1. Write tests first for OllamaProvider +2. Implement provider to satisfy interface +3. Port existing functionality from LlmService.ts +4. Ensure all tests pass with ≥85% coverage + +## Design Decisions + +### OllamaProviderConfig + +Extends LlmProviderConfig with Ollama-specific options: + +```typescript +interface OllamaProviderConfig extends LlmProviderConfig { + endpoint: string; // Base config (e.g., "http://localhost:11434") + timeout?: number; // Base config (default: 30000ms) +} +``` + +### Implementation Details + +- Uses `ollama` npm package (already installed) +- Implements all LlmProviderInterface methods +- Handles errors gracefully with proper logging +- Returns copies of config to prevent external modification + +### Methods to Implement + +- ✓ `initialize()` - Set up Ollama client +- ✓ `checkHealth()` - Verify connection and list models +- ✓ `listModels()` - Return available model names +- ✓ `chat()` - Synchronous chat completion +- ✓ `chatStream()` - Streaming chat completion +- ✓ `embed()` - Generate embeddings +- ✓ `getConfig()` - Return configuration copy + +## Progress + +- [x] Create scratchpad +- [x] Write test file (ollama.provider.spec.ts) +- [x] Implement OllamaProvider class +- [x] Verify all tests pass (20/20 passing) +- [x] Check code coverage ≥85% (100% achieved) +- [x] Run lint and typecheck (all passing) +- [x] Stage files for commit + +## Final Results + +- All 20 tests passing +- Code coverage: 100% statements, 100% functions, 100% lines, 77.27% branches +- Lint: Clean (auto-fixed formatting) +- TypeCheck: No errors +- TDD workflow successfully followed (RED → GREEN → REFACTOR) + +## Testing + +Tests cover: + +- Initialization +- Health checks (success and failure) +- Model listing +- Chat completion (sync and stream) +- Embeddings +- Error handling +- Configuration management + +## Notes + +- Existing LlmService.ts has working Ollama implementation to port +- Interface already defined with clear contracts +- DTOs already created and validated +- Focus on clean separation of concerns +- Provider should be stateless except for Ollama client instance