feat(#123): port Ollama LLM provider
Implemented first concrete LLM provider following the provider interface pattern. Implementation: - OllamaProvider class implementing LlmProviderInterface - All required methods: initialize(), checkHealth(), listModels(), chat(), chatStream(), embed(), getConfig() - OllamaProviderConfig extending LlmProviderConfig - Proper error handling with NestJS Logger - Configuration immutability protection Features: - System prompt injection support - Temperature and max tokens configuration - Embedding with truncation control (defaults to enabled) - Streaming and non-streaming chat completions - Health check with model listing Testing: - 21 comprehensive test cases (TDD approach) - 100% statement, function, and line coverage - 86.36% branch coverage (exceeds 85% requirement) - All error scenarios tested - Mock-based unit tests Code Review Fixes: - Fixed truncate logic to match original LlmService behavior (defaults to true) - Added test for system prompt deduplication - Increased branch coverage from 77% to 86% Quality Gates: - ✅ All 21 tests passing - ✅ Linting clean - ✅ Type checking passed - ✅ Code review approved Fixes #123 Co-Authored-By: Claude Sonnet 4.5 <noreply@anthropic.com>
This commit is contained in:
295
apps/api/src/llm/providers/ollama.provider.ts
Normal file
295
apps/api/src/llm/providers/ollama.provider.ts
Normal file
@@ -0,0 +1,295 @@
|
||||
import { Logger } from "@nestjs/common";
|
||||
import { Ollama, type Message } from "ollama";
|
||||
import type {
|
||||
LlmProviderInterface,
|
||||
LlmProviderConfig,
|
||||
LlmProviderHealthStatus,
|
||||
} from "./llm-provider.interface";
|
||||
import type { ChatRequestDto, ChatResponseDto, EmbedRequestDto, EmbedResponseDto } from "../dto";
|
||||
|
||||
/**
|
||||
* Configuration for Ollama LLM provider.
|
||||
* Extends base LlmProviderConfig with Ollama-specific options.
|
||||
*
|
||||
* @example
|
||||
* ```typescript
|
||||
* const config: OllamaProviderConfig = {
|
||||
* endpoint: "http://localhost:11434",
|
||||
* timeout: 30000
|
||||
* };
|
||||
* ```
|
||||
*/
|
||||
export interface OllamaProviderConfig extends LlmProviderConfig {
|
||||
/**
|
||||
* Ollama server endpoint URL
|
||||
* @default "http://localhost:11434"
|
||||
*/
|
||||
endpoint: string;
|
||||
|
||||
/**
|
||||
* Request timeout in milliseconds
|
||||
* @default 30000
|
||||
*/
|
||||
timeout?: number;
|
||||
}
|
||||
|
||||
/**
|
||||
* Ollama LLM provider implementation.
|
||||
* Provides integration with locally-hosted or remote Ollama instances.
|
||||
*
|
||||
* @example
|
||||
* ```typescript
|
||||
* const provider = new OllamaProvider({
|
||||
* endpoint: "http://localhost:11434",
|
||||
* timeout: 30000
|
||||
* });
|
||||
*
|
||||
* await provider.initialize();
|
||||
*
|
||||
* const response = await provider.chat({
|
||||
* model: "llama2",
|
||||
* messages: [{ role: "user", content: "Hello" }]
|
||||
* });
|
||||
* ```
|
||||
*/
|
||||
export class OllamaProvider implements LlmProviderInterface {
|
||||
readonly name = "Ollama";
|
||||
readonly type = "ollama" as const;
|
||||
|
||||
private readonly logger = new Logger(OllamaProvider.name);
|
||||
private readonly client: Ollama;
|
||||
private readonly config: OllamaProviderConfig;
|
||||
|
||||
/**
|
||||
* Creates a new Ollama provider instance.
|
||||
*
|
||||
* @param config - Ollama provider configuration
|
||||
*/
|
||||
constructor(config: OllamaProviderConfig) {
|
||||
this.config = {
|
||||
...config,
|
||||
timeout: config.timeout ?? 30000,
|
||||
};
|
||||
|
||||
this.client = new Ollama({ host: this.config.endpoint });
|
||||
this.logger.log(`Ollama provider initialized with endpoint: ${this.config.endpoint}`);
|
||||
}
|
||||
|
||||
/**
|
||||
* Initialize the Ollama provider.
|
||||
* This is a no-op for Ollama as the client is initialized in the constructor.
|
||||
*/
|
||||
async initialize(): Promise<void> {
|
||||
// Ollama client is initialized in constructor
|
||||
// No additional setup required
|
||||
}
|
||||
|
||||
/**
|
||||
* Check if the Ollama server is healthy and reachable.
|
||||
*
|
||||
* @returns Health status with available models if healthy
|
||||
*/
|
||||
async checkHealth(): Promise<LlmProviderHealthStatus> {
|
||||
try {
|
||||
const response = await this.client.list();
|
||||
const models = response.models.map((m) => m.name);
|
||||
|
||||
return {
|
||||
healthy: true,
|
||||
provider: "ollama",
|
||||
endpoint: this.config.endpoint,
|
||||
models,
|
||||
};
|
||||
} catch (error: unknown) {
|
||||
const errorMessage = error instanceof Error ? error.message : String(error);
|
||||
this.logger.warn(`Ollama health check failed: ${errorMessage}`);
|
||||
|
||||
return {
|
||||
healthy: false,
|
||||
provider: "ollama",
|
||||
endpoint: this.config.endpoint,
|
||||
error: errorMessage,
|
||||
};
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* List all available models from the Ollama server.
|
||||
*
|
||||
* @returns Array of model names
|
||||
* @throws {Error} If the request fails
|
||||
*/
|
||||
async listModels(): Promise<string[]> {
|
||||
try {
|
||||
const response = await this.client.list();
|
||||
return response.models.map((m) => m.name);
|
||||
} catch (error: unknown) {
|
||||
const errorMessage = error instanceof Error ? error.message : String(error);
|
||||
this.logger.error(`Failed to list models: ${errorMessage}`);
|
||||
throw new Error(`Failed to list models: ${errorMessage}`);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Perform a synchronous chat completion.
|
||||
*
|
||||
* @param request - Chat request with messages and configuration
|
||||
* @returns Complete chat response
|
||||
* @throws {Error} If the request fails
|
||||
*/
|
||||
async chat(request: ChatRequestDto): Promise<ChatResponseDto> {
|
||||
try {
|
||||
const messages = this.buildMessages(request);
|
||||
const options = this.buildChatOptions(request);
|
||||
|
||||
const response = await this.client.chat({
|
||||
model: request.model,
|
||||
messages,
|
||||
stream: false,
|
||||
options,
|
||||
});
|
||||
|
||||
return {
|
||||
model: response.model,
|
||||
message: {
|
||||
role: response.message.role as "assistant",
|
||||
content: response.message.content,
|
||||
},
|
||||
done: response.done,
|
||||
totalDuration: response.total_duration,
|
||||
promptEvalCount: response.prompt_eval_count,
|
||||
evalCount: response.eval_count,
|
||||
};
|
||||
} catch (error: unknown) {
|
||||
const errorMessage = error instanceof Error ? error.message : String(error);
|
||||
this.logger.error(`Chat completion failed: ${errorMessage}`);
|
||||
throw new Error(`Chat completion failed: ${errorMessage}`);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Perform a streaming chat completion.
|
||||
* Yields response chunks as they arrive from the Ollama server.
|
||||
*
|
||||
* @param request - Chat request with messages and configuration
|
||||
* @yields Chat response chunks
|
||||
* @throws {Error} If the request fails
|
||||
*/
|
||||
async *chatStream(request: ChatRequestDto): AsyncGenerator<ChatResponseDto> {
|
||||
try {
|
||||
const messages = this.buildMessages(request);
|
||||
const options = this.buildChatOptions(request);
|
||||
|
||||
const stream = await this.client.chat({
|
||||
model: request.model,
|
||||
messages,
|
||||
stream: true,
|
||||
options,
|
||||
});
|
||||
|
||||
for await (const chunk of stream) {
|
||||
yield {
|
||||
model: chunk.model,
|
||||
message: {
|
||||
role: chunk.message.role as "assistant",
|
||||
content: chunk.message.content,
|
||||
},
|
||||
done: chunk.done,
|
||||
};
|
||||
}
|
||||
} catch (error: unknown) {
|
||||
const errorMessage = error instanceof Error ? error.message : String(error);
|
||||
this.logger.error(`Streaming failed: ${errorMessage}`);
|
||||
throw new Error(`Streaming failed: ${errorMessage}`);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Generate embeddings for the given input texts.
|
||||
*
|
||||
* @param request - Embedding request with model and input texts
|
||||
* @returns Embeddings response with vector arrays
|
||||
* @throws {Error} If the request fails
|
||||
*/
|
||||
async embed(request: EmbedRequestDto): Promise<EmbedResponseDto> {
|
||||
try {
|
||||
const response = await this.client.embed({
|
||||
model: request.model,
|
||||
input: request.input,
|
||||
truncate: request.truncate === "none" ? false : true,
|
||||
});
|
||||
|
||||
return {
|
||||
model: response.model,
|
||||
embeddings: response.embeddings,
|
||||
totalDuration: response.total_duration,
|
||||
};
|
||||
} catch (error: unknown) {
|
||||
const errorMessage = error instanceof Error ? error.message : String(error);
|
||||
this.logger.error(`Embedding failed: ${errorMessage}`);
|
||||
throw new Error(`Embedding failed: ${errorMessage}`);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Get the current provider configuration.
|
||||
* Returns a copy to prevent external modification.
|
||||
*
|
||||
* @returns Provider configuration object
|
||||
*/
|
||||
getConfig(): OllamaProviderConfig {
|
||||
return { ...this.config };
|
||||
}
|
||||
|
||||
/**
|
||||
* Build message array from chat request.
|
||||
* Prepends system prompt if provided and not already in messages.
|
||||
*
|
||||
* @param request - Chat request
|
||||
* @returns Array of messages for Ollama
|
||||
*/
|
||||
private buildMessages(request: ChatRequestDto): Message[] {
|
||||
const messages: Message[] = [];
|
||||
|
||||
// Add system prompt if provided and not already in messages
|
||||
if (request.systemPrompt && !request.messages.some((m) => m.role === "system")) {
|
||||
messages.push({
|
||||
role: "system",
|
||||
content: request.systemPrompt,
|
||||
});
|
||||
}
|
||||
|
||||
// Add all request messages
|
||||
for (const message of request.messages) {
|
||||
messages.push({
|
||||
role: message.role,
|
||||
content: message.content,
|
||||
});
|
||||
}
|
||||
|
||||
return messages;
|
||||
}
|
||||
|
||||
/**
|
||||
* Build Ollama-specific chat options from request.
|
||||
*
|
||||
* @param request - Chat request
|
||||
* @returns Ollama options object
|
||||
*/
|
||||
private buildChatOptions(request: ChatRequestDto): {
|
||||
temperature?: number;
|
||||
num_predict?: number;
|
||||
} {
|
||||
const options: { temperature?: number; num_predict?: number } = {};
|
||||
|
||||
if (request.temperature !== undefined) {
|
||||
options.temperature = request.temperature;
|
||||
}
|
||||
|
||||
if (request.maxTokens !== undefined) {
|
||||
options.num_predict = request.maxTokens;
|
||||
}
|
||||
|
||||
return options;
|
||||
}
|
||||
}
|
||||
Reference in New Issue
Block a user