import { Injectable, OnModuleInit, OnModuleDestroy, Logger } from "@nestjs/common"; import { NodeSDK } from "@opentelemetry/sdk-node"; import { getNodeAutoInstrumentations } from "@opentelemetry/auto-instrumentations-node"; import { OTLPTraceExporter } from "@opentelemetry/exporter-trace-otlp-http"; import { Resource } from "@opentelemetry/resources"; import { ATTR_SERVICE_NAME } from "@opentelemetry/semantic-conventions"; import type { Tracer, Span, SpanOptions } from "@opentelemetry/api"; import { trace, SpanStatusCode } from "@opentelemetry/api"; /** * Service responsible for OpenTelemetry distributed tracing. * Initializes the OTEL SDK with Jaeger/OTLP exporters and provides * tracing utilities for HTTP requests and LLM operations. * * @example * ```typescript * const span = telemetryService.startSpan('operation-name', { * attributes: { 'custom.key': 'value' } * }); * try { * // Perform operation * } catch (error) { * telemetryService.recordException(span, error); * } finally { * span.end(); * } * ``` */ @Injectable() export class TelemetryService implements OnModuleInit, OnModuleDestroy { private readonly logger = new Logger(TelemetryService.name); private sdk?: NodeSDK; private tracer!: Tracer; private enabled: boolean; private serviceName: string; private shutdownPromise?: Promise; constructor() { this.enabled = process.env.OTEL_ENABLED !== "false"; this.serviceName = process.env.OTEL_SERVICE_NAME ?? "mosaic-api"; } /** * Initialize the OpenTelemetry SDK with configured exporters. * This is called automatically by NestJS when the module is initialized. */ onModuleInit(): void { if (!this.enabled) { this.logger.log("OpenTelemetry tracing is disabled"); this.tracer = trace.getTracer("noop"); return; } try { const exporter = this.createExporter(); const resource = new Resource({ [ATTR_SERVICE_NAME]: this.serviceName, }); this.sdk = new NodeSDK({ resource, traceExporter: exporter, instrumentations: [ getNodeAutoInstrumentations({ "@opentelemetry/instrumentation-fs": { enabled: false, // Disable file system instrumentation to reduce noise }, }), ], }); this.sdk.start(); this.tracer = trace.getTracer(this.serviceName); this.logger.log(`OpenTelemetry SDK started for service: ${this.serviceName}`); } catch (error) { this.logger.error("Failed to initialize OpenTelemetry SDK", error); // Fallback to noop tracer to prevent application failures this.tracer = trace.getTracer("noop"); } } /** * Shutdown the OpenTelemetry SDK gracefully. * This is called automatically by NestJS when the module is destroyed. */ async onModuleDestroy(): Promise { if (!this.sdk) { return; } // Prevent multiple concurrent shutdowns if (this.shutdownPromise) { return this.shutdownPromise; } this.shutdownPromise = (async () => { try { if (this.sdk) { await this.sdk.shutdown(); } this.logger.log("OpenTelemetry SDK shut down successfully"); } catch (error) { this.logger.error("Error shutting down OpenTelemetry SDK", error); } })(); return this.shutdownPromise; } /** * Get the tracer instance for creating spans. * * @returns The configured tracer instance */ getTracer(): Tracer { return this.tracer; } /** * Start a new span with the given name and options. * * @param name - The name of the span * @param options - Optional span configuration * @returns A new span instance * * @example * ```typescript * const span = telemetryService.startSpan('database-query', { * attributes: { * 'db.system': 'postgresql', * 'db.statement': 'SELECT * FROM users' * } * }); * ``` */ startSpan(name: string, options?: SpanOptions): Span { return this.tracer.startSpan(name, options); } /** * Record an exception on a span and set its status to error. * * @param span - The span to record the exception on * @param error - The error to record * * @example * ```typescript * try { * // Some operation * } catch (error) { * telemetryService.recordException(span, error); * throw error; * } * ``` */ recordException(span: Span, error: Error): void { span.recordException(error); span.setStatus({ code: SpanStatusCode.ERROR, message: error.message, }); } /** * Create the appropriate trace exporter based on environment configuration. * Uses OTLP HTTP exporter (compatible with Jaeger, Tempo, and other backends). * * @returns Configured trace exporter */ private createExporter(): OTLPTraceExporter { const otlpEndpoint = process.env.OTEL_EXPORTER_OTLP_ENDPOINT ?? process.env.OTEL_EXPORTER_JAEGER_ENDPOINT ?? "http://localhost:4318/v1/traces"; this.logger.log(`Using OTLP HTTP exporter: ${otlpEndpoint}`); return new OTLPTraceExporter({ url: otlpEndpoint, }); } }