stack/apps/gateway/src/agent/adapters/ollama.adapter.ts

import { Logger } from '@nestjs/common';
import type { ModelRegistry } from '@mariozechner/pi-coding-agent';
import type {
  CompletionEvent,
  CompletionParams,
  IProviderAdapter,
  ModelInfo,
  ProviderHealth,
} from '@mosaic/types';

/** Embedding models that Ollama ships with out of the box */
const OLLAMA_EMBEDDING_MODELS: ReadonlyArray<{
  id: string;
  contextWindow: number;
  dimensions: number;
}> = [
  { id: 'nomic-embed-text', contextWindow: 8192, dimensions: 768 },
  { id: 'mxbai-embed-large', contextWindow: 512, dimensions: 1024 },
];

interface OllamaEmbeddingResponse {
  embedding?: number[];
}

/**
 * Ollama provider adapter.
 *
 * Registers local Ollama models with the Pi ModelRegistry via the OpenAI-compatible
 * completions API. Also exposes embedding models and an `embed()` method for
 * vector generation (used by EmbeddingService / M3-009).
 *
 * Configuration is driven by environment variables:
 *   OLLAMA_BASE_URL or OLLAMA_HOST — base URL of the Ollama instance
 *   OLLAMA_MODELS              — comma-separated list of model IDs (default: llama3.2,codellama,mistral)
 */
export class OllamaAdapter implements IProviderAdapter {
  readonly name = 'ollama';

  private readonly logger = new Logger(OllamaAdapter.name);
  private registeredModels: ModelInfo[] = [];

  constructor(private readonly registry: ModelRegistry) {}

  async register(): Promise<void> {
    const ollamaUrl = process.env['OLLAMA_BASE_URL'] ?? process.env['OLLAMA_HOST'];
    if (!ollamaUrl) {
      this.logger.debug('Skipping Ollama provider registration: OLLAMA_BASE_URL not set');
      return;
    }

    const modelsEnv = process.env['OLLAMA_MODELS'] ?? 'llama3.2,codellama,mistral';
    const modelIds = modelsEnv
      .split(',')
      .map((id: string) => id.trim())
      .filter(Boolean);

    this.registry.registerProvider('ollama', {
      baseUrl: `${ollamaUrl}/v1`,
      apiKey: 'ollama',
      api: 'openai-completions' as never,
      models: modelIds.map((id) => ({
        id,
        name: id,
        reasoning: false,
        input: ['text'] as ('text' | 'image')[],
        cost: { input: 0, output: 0, cacheRead: 0, cacheWrite: 0 },
        contextWindow: 8192,
        maxTokens: 4096,
      })),
    });

    // Chat / completion models
    const completionModels: ModelInfo[] = modelIds.map((id) => ({
      id,
      provider: 'ollama',
      name: id,
      reasoning: false,
      contextWindow: 8192,
      maxTokens: 4096,
      inputTypes: ['text'] as ('text' | 'image')[],
      cost: { input: 0, output: 0, cacheRead: 0, cacheWrite: 0 },
    }));

    // Embedding models (tracked in registeredModels but not in Pi registry,
    // which only handles completion models)
    const embeddingModels: ModelInfo[] = OLLAMA_EMBEDDING_MODELS.map((em) => ({
      id: em.id,
      provider: 'ollama',
      name: em.id,
      reasoning: false,
      contextWindow: em.contextWindow,
      maxTokens: 0,
      inputTypes: ['text'] as ('text' | 'image')[],
      cost: { input: 0, output: 0, cacheRead: 0, cacheWrite: 0 },
    }));

    this.registeredModels = [...completionModels, ...embeddingModels];

    this.logger.log(
      `Ollama provider registered at ${ollamaUrl} with models: ${modelIds.join(', ')} ` +
        `and embedding models: ${OLLAMA_EMBEDDING_MODELS.map((em) => em.id).join(', ')}`,
    );
  }

  listModels(): ModelInfo[] {
    return this.registeredModels;
  }

  async healthCheck(): Promise<ProviderHealth> {
    const ollamaUrl = process.env['OLLAMA_BASE_URL'] ?? process.env['OLLAMA_HOST'];
    if (!ollamaUrl) {
      return {
        status: 'down',
        lastChecked: new Date().toISOString(),
        error: 'OLLAMA_BASE_URL not configured',
      };
    }

    const checkUrl = `${ollamaUrl}/v1/models`;
    const start = Date.now();

    try {
      const res = await fetch(checkUrl, {
        method: 'GET',
        headers: { Accept: 'application/json' },
        signal: AbortSignal.timeout(5000),
      });
      const latencyMs = Date.now() - start;

      if (!res.ok) {
        return {
          status: 'degraded',
          latencyMs,
          lastChecked: new Date().toISOString(),
          error: `HTTP ${res.status}`,
        };
      }

      return { status: 'healthy', latencyMs, lastChecked: new Date().toISOString() };
    } catch (err) {
      const latencyMs = Date.now() - start;
      const error = err instanceof Error ? err.message : String(err);
      return { status: 'down', latencyMs, lastChecked: new Date().toISOString(), error };
    }
  }

  /**
   * Generate an embedding vector for the given text using Ollama's /api/embeddings endpoint.
   *
   * Defaults to 'nomic-embed-text' when no model is specified.
   * Intended for use by EmbeddingService (M3-009).
   *
   * @param text - The input text to embed.
   * @param model - Optional embedding model ID (default: 'nomic-embed-text').
   * @returns A float array representing the embedding vector.
   */
  async embed(text: string, model = 'nomic-embed-text'): Promise<number[]> {
    const ollamaUrl = process.env['OLLAMA_BASE_URL'] ?? process.env['OLLAMA_HOST'];
    if (!ollamaUrl) {
      throw new Error('OllamaAdapter: OLLAMA_BASE_URL not configured');
    }

    const embeddingUrl = `${ollamaUrl}/api/embeddings`;

    const res = await fetch(embeddingUrl, {
      method: 'POST',
      headers: { 'Content-Type': 'application/json' },
      body: JSON.stringify({ model, prompt: text }),
      signal: AbortSignal.timeout(30000),
    });

    if (!res.ok) {
      throw new Error(`OllamaAdapter.embed: request failed with HTTP ${res.status}`);
    }

    const json = (await res.json()) as OllamaEmbeddingResponse;

    if (!Array.isArray(json.embedding)) {
      throw new Error('OllamaAdapter.embed: unexpected response — missing embedding array');
    }

    return json.embedding;
  }

  /**
   * createCompletion is reserved for future direct-completion use.
   * The current integration routes completions through Pi SDK's ModelRegistry/AgentSession.
   */
  async *createCompletion(_params: CompletionParams): AsyncIterable<CompletionEvent> {
    throw new Error(
      'OllamaAdapter.createCompletion is not yet implemented. ' +
        'Use Pi SDK AgentSession for completions.',
    );
    // Satisfy the AsyncGenerator return type — unreachable but required for TypeScript.
    yield undefined as never;
  }
}