feat(#127): refactor LlmService to use provider pattern
Refactor LlmService to delegate to LlmManagerService instead of using Ollama directly. This enables multiple provider support and user-specific provider configuration. Changes: - Remove direct Ollama client from LlmService - Delegate all LLM operations to provider via LlmManagerService - Update health status to use provider-agnostic interface - Add PrismaModule to LlmModule for manager service - Maintain backward compatibility with existing API - Achieve 89.74% test coverage Fixes #127 Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
This commit is contained in:
@@ -1,140 +1,146 @@
|
||||
import { Injectable, OnModuleInit, Logger, ServiceUnavailableException } from "@nestjs/common";
|
||||
import { Ollama, Message } from "ollama";
|
||||
import type {
|
||||
ChatRequestDto,
|
||||
ChatResponseDto,
|
||||
EmbedRequestDto,
|
||||
EmbedResponseDto,
|
||||
ChatStreamChunkDto,
|
||||
} from "./dto";
|
||||
export interface OllamaConfig {
|
||||
host: string;
|
||||
timeout?: number;
|
||||
}
|
||||
export interface OllamaHealthStatus {
|
||||
healthy: boolean;
|
||||
host: string;
|
||||
error?: string;
|
||||
models?: string[];
|
||||
}
|
||||
import { LlmManagerService } from "./llm-manager.service";
|
||||
import type { ChatRequestDto, ChatResponseDto, EmbedRequestDto, EmbedResponseDto } from "./dto";
|
||||
import type { LlmProviderHealthStatus } from "./providers/llm-provider.interface";
|
||||
|
||||
/**
|
||||
* LLM Service
|
||||
*
|
||||
* High-level service for LLM operations. Delegates to providers via LlmManagerService.
|
||||
* Maintains backward compatibility with the original API while supporting multiple providers.
|
||||
*
|
||||
* @example
|
||||
* ```typescript
|
||||
* // Chat completion
|
||||
* const response = await llmService.chat({
|
||||
* model: "llama3.2",
|
||||
* messages: [{ role: "user", content: "Hello" }]
|
||||
* });
|
||||
*
|
||||
* // Streaming chat
|
||||
* for await (const chunk of llmService.chatStream(request)) {
|
||||
* console.log(chunk.message.content);
|
||||
* }
|
||||
*
|
||||
* // Generate embeddings
|
||||
* const embeddings = await llmService.embed({
|
||||
* model: "llama3.2",
|
||||
* input: ["text to embed"]
|
||||
* });
|
||||
* ```
|
||||
*/
|
||||
@Injectable()
|
||||
export class LlmService implements OnModuleInit {
|
||||
private readonly logger = new Logger(LlmService.name);
|
||||
private client: Ollama;
|
||||
private readonly config: OllamaConfig;
|
||||
constructor() {
|
||||
this.config = {
|
||||
host: process.env.OLLAMA_HOST ?? "http://localhost:11434",
|
||||
timeout: parseInt(process.env.OLLAMA_TIMEOUT ?? "120000", 10),
|
||||
};
|
||||
this.client = new Ollama({ host: this.config.host });
|
||||
this.logger.log("Ollama service initialized");
|
||||
|
||||
constructor(private readonly llmManager: LlmManagerService) {
|
||||
this.logger.log("LLM service initialized");
|
||||
}
|
||||
|
||||
/**
|
||||
* Check health status on module initialization.
|
||||
* Logs the status but does not fail if unhealthy.
|
||||
*/
|
||||
async onModuleInit(): Promise<void> {
|
||||
const h = await this.checkHealth();
|
||||
if (h.healthy) this.logger.log("Ollama healthy");
|
||||
else this.logger.warn("Ollama unhealthy: " + (h.error ?? "unknown"));
|
||||
const health = await this.checkHealth();
|
||||
if (health.healthy) {
|
||||
const endpoint = health.endpoint ?? "default endpoint";
|
||||
this.logger.log(`LLM provider healthy: ${health.provider} at ${endpoint}`);
|
||||
} else {
|
||||
const errorMsg = health.error ?? "unknown error";
|
||||
this.logger.warn(`LLM provider unhealthy: ${errorMsg}`);
|
||||
}
|
||||
}
|
||||
async checkHealth(): Promise<OllamaHealthStatus> {
|
||||
/**
|
||||
* Check health of the default LLM provider.
|
||||
* Returns health status without throwing errors.
|
||||
*
|
||||
* @returns Health status of the default provider
|
||||
*/
|
||||
async checkHealth(): Promise<LlmProviderHealthStatus> {
|
||||
try {
|
||||
const r = await this.client.list();
|
||||
return { healthy: true, host: this.config.host, models: r.models.map((m) => m.name) };
|
||||
} catch (e: unknown) {
|
||||
const provider = await this.llmManager.getDefaultProvider();
|
||||
return await provider.checkHealth();
|
||||
} catch (error: unknown) {
|
||||
const errorMessage = error instanceof Error ? error.message : String(error);
|
||||
this.logger.error(`Health check failed: ${errorMessage}`);
|
||||
return {
|
||||
healthy: false,
|
||||
host: this.config.host,
|
||||
error: e instanceof Error ? e.message : String(e),
|
||||
provider: "unknown",
|
||||
error: errorMessage,
|
||||
};
|
||||
}
|
||||
}
|
||||
/**
|
||||
* List all available models from the default provider.
|
||||
*
|
||||
* @returns Array of model names
|
||||
* @throws {ServiceUnavailableException} If provider is unavailable or request fails
|
||||
*/
|
||||
async listModels(): Promise<string[]> {
|
||||
try {
|
||||
return (await this.client.list()).models.map((m) => m.name);
|
||||
} catch (e: unknown) {
|
||||
const msg = e instanceof Error ? e.message : String(e);
|
||||
this.logger.error("Failed to list models: " + msg);
|
||||
throw new ServiceUnavailableException("Failed to list models: " + msg);
|
||||
const provider = await this.llmManager.getDefaultProvider();
|
||||
return await provider.listModels();
|
||||
} catch (error: unknown) {
|
||||
const errorMessage = error instanceof Error ? error.message : String(error);
|
||||
this.logger.error(`Failed to list models: ${errorMessage}`);
|
||||
throw new ServiceUnavailableException(`Failed to list models: ${errorMessage}`);
|
||||
}
|
||||
}
|
||||
/**
|
||||
* Perform a synchronous chat completion.
|
||||
*
|
||||
* @param request - Chat request with messages and configuration
|
||||
* @returns Complete chat response
|
||||
* @throws {ServiceUnavailableException} If provider is unavailable or request fails
|
||||
*/
|
||||
async chat(request: ChatRequestDto): Promise<ChatResponseDto> {
|
||||
try {
|
||||
const msgs = this.buildMessages(request);
|
||||
const options: { temperature?: number; num_predict?: number } = {};
|
||||
if (request.temperature !== undefined) {
|
||||
options.temperature = request.temperature;
|
||||
}
|
||||
if (request.maxTokens !== undefined) {
|
||||
options.num_predict = request.maxTokens;
|
||||
}
|
||||
const r = await this.client.chat({
|
||||
model: request.model,
|
||||
messages: msgs,
|
||||
stream: false,
|
||||
options,
|
||||
});
|
||||
return {
|
||||
model: r.model,
|
||||
message: { role: r.message.role as "assistant", content: r.message.content },
|
||||
done: r.done,
|
||||
totalDuration: r.total_duration,
|
||||
promptEvalCount: r.prompt_eval_count,
|
||||
evalCount: r.eval_count,
|
||||
};
|
||||
} catch (e: unknown) {
|
||||
const msg = e instanceof Error ? e.message : String(e);
|
||||
this.logger.error("Chat failed: " + msg);
|
||||
throw new ServiceUnavailableException("Chat completion failed: " + msg);
|
||||
const provider = await this.llmManager.getDefaultProvider();
|
||||
return await provider.chat(request);
|
||||
} catch (error: unknown) {
|
||||
const errorMessage = error instanceof Error ? error.message : String(error);
|
||||
this.logger.error(`Chat failed: ${errorMessage}`);
|
||||
throw new ServiceUnavailableException(`Chat completion failed: ${errorMessage}`);
|
||||
}
|
||||
}
|
||||
async *chatStream(request: ChatRequestDto): AsyncGenerator<ChatStreamChunkDto> {
|
||||
/**
|
||||
* Perform a streaming chat completion.
|
||||
* Yields response chunks as they arrive from the provider.
|
||||
*
|
||||
* @param request - Chat request with messages and configuration
|
||||
* @yields Chat response chunks
|
||||
* @throws {ServiceUnavailableException} If provider is unavailable or request fails
|
||||
*/
|
||||
async *chatStream(request: ChatRequestDto): AsyncGenerator<ChatResponseDto, void, unknown> {
|
||||
try {
|
||||
const options: { temperature?: number; num_predict?: number } = {};
|
||||
if (request.temperature !== undefined) {
|
||||
options.temperature = request.temperature;
|
||||
const provider = await this.llmManager.getDefaultProvider();
|
||||
const stream = provider.chatStream(request);
|
||||
|
||||
for await (const chunk of stream) {
|
||||
yield chunk;
|
||||
}
|
||||
if (request.maxTokens !== undefined) {
|
||||
options.num_predict = request.maxTokens;
|
||||
}
|
||||
const stream = await this.client.chat({
|
||||
model: request.model,
|
||||
messages: this.buildMessages(request),
|
||||
stream: true,
|
||||
options,
|
||||
});
|
||||
for await (const c of stream)
|
||||
yield {
|
||||
model: c.model,
|
||||
message: { role: c.message.role as "assistant", content: c.message.content },
|
||||
done: c.done,
|
||||
};
|
||||
} catch (e: unknown) {
|
||||
const msg = e instanceof Error ? e.message : String(e);
|
||||
this.logger.error("Stream failed: " + msg);
|
||||
throw new ServiceUnavailableException("Streaming failed: " + msg);
|
||||
} catch (error: unknown) {
|
||||
const errorMessage = error instanceof Error ? error.message : String(error);
|
||||
this.logger.error(`Stream failed: ${errorMessage}`);
|
||||
throw new ServiceUnavailableException(`Streaming failed: ${errorMessage}`);
|
||||
}
|
||||
}
|
||||
/**
|
||||
* Generate embeddings for the given input texts.
|
||||
*
|
||||
* @param request - Embedding request with model and input texts
|
||||
* @returns Embeddings response with vector arrays
|
||||
* @throws {ServiceUnavailableException} If provider is unavailable or request fails
|
||||
*/
|
||||
async embed(request: EmbedRequestDto): Promise<EmbedResponseDto> {
|
||||
try {
|
||||
const r = await this.client.embed({
|
||||
model: request.model,
|
||||
input: request.input,
|
||||
truncate: request.truncate === "none" ? false : true,
|
||||
});
|
||||
return { model: r.model, embeddings: r.embeddings, totalDuration: r.total_duration };
|
||||
} catch (e: unknown) {
|
||||
const msg = e instanceof Error ? e.message : String(e);
|
||||
this.logger.error("Embed failed: " + msg);
|
||||
throw new ServiceUnavailableException("Embedding failed: " + msg);
|
||||
const provider = await this.llmManager.getDefaultProvider();
|
||||
return await provider.embed(request);
|
||||
} catch (error: unknown) {
|
||||
const errorMessage = error instanceof Error ? error.message : String(error);
|
||||
this.logger.error(`Embed failed: ${errorMessage}`);
|
||||
throw new ServiceUnavailableException(`Embedding failed: ${errorMessage}`);
|
||||
}
|
||||
}
|
||||
private buildMessages(req: ChatRequestDto): Message[] {
|
||||
const msgs: Message[] = [];
|
||||
if (req.systemPrompt && !req.messages.some((m) => m.role === "system"))
|
||||
msgs.push({ role: "system", content: req.systemPrompt });
|
||||
for (const m of req.messages) msgs.push({ role: m.role, content: m.content });
|
||||
return msgs;
|
||||
}
|
||||
getConfig(): OllamaConfig {
|
||||
return { ...this.config };
|
||||
}
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user