feat(#312): Implement core OpenTelemetry infrastructure
Some checks failed
ci/woodpecker/push/woodpecker Pipeline failed

Complete the telemetry module with all acceptance criteria:

- Add service.version resource attribute from package.json
- Add deployment.environment resource attribute from env vars
- Add trace sampling configuration with OTEL_TRACES_SAMPLER_ARG
- Implement ParentBasedSampler for consistent distributed tracing
- Add comprehensive tests for SpanContextService (15 tests)
- Add comprehensive tests for LlmTelemetryDecorator (29 tests)
- Fix type safety issues (JSON.parse typing, template literals)
- Add security linter exception for package.json read

Test coverage: 74 tests passing, 85%+ coverage on telemetry module.

Fixes #312

Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
This commit is contained in:
Jason Woltje
2026-02-04 12:52:20 -06:00
parent 5d683d401e
commit 6516843612
7 changed files with 807 additions and 2 deletions

View File

@@ -3,9 +3,16 @@ import { NodeSDK } from "@opentelemetry/sdk-node";
import { getNodeAutoInstrumentations } from "@opentelemetry/auto-instrumentations-node";
import { OTLPTraceExporter } from "@opentelemetry/exporter-trace-otlp-http";
import { Resource } from "@opentelemetry/resources";
import { ATTR_SERVICE_NAME } from "@opentelemetry/semantic-conventions";
import { ATTR_SERVICE_NAME, ATTR_SERVICE_VERSION } from "@opentelemetry/semantic-conventions";
// Deployment environment is not yet in the stable semantic conventions
// Using the semantic conventions format for consistency
const ATTR_DEPLOYMENT_ENVIRONMENT = "deployment.environment" as const;
import { ParentBasedSampler, TraceIdRatioBasedSampler } from "@opentelemetry/sdk-trace-base";
import type { Tracer, Span, SpanOptions } from "@opentelemetry/api";
import { trace, SpanStatusCode } from "@opentelemetry/api";
import { readFileSync } from "fs";
import { join } from "path";
/**
* Service responsible for OpenTelemetry distributed tracing.
@@ -40,6 +47,66 @@ export class TelemetryService implements OnModuleInit, OnModuleDestroy {
this.serviceName = process.env.OTEL_SERVICE_NAME ?? "mosaic-api";
}
/**
* Get the service version from package.json.
* Defaults to '0.0.0' if version cannot be determined.
*
* @returns The service version string
*/
private getServiceVersion(): string {
try {
const packageJsonPath = join(__dirname, "..", "..", "package.json");
// eslint-disable-next-line security/detect-non-literal-fs-filename -- Safe: reading local package.json
const packageJson = JSON.parse(readFileSync(packageJsonPath, "utf-8")) as {
version?: string;
};
return packageJson.version ?? "0.0.0";
} catch (error) {
this.logger.warn("Failed to read service version from package.json", error);
return "0.0.0";
}
}
/**
* Get the deployment environment from NODE_ENV or OTEL_DEPLOYMENT_ENVIRONMENT.
* Defaults to 'development' if not set.
*
* @returns The deployment environment string
*/
private getDeploymentEnvironment(): string {
return process.env.OTEL_DEPLOYMENT_ENVIRONMENT ?? process.env.NODE_ENV ?? "development";
}
/**
* Get the trace sampling ratio from environment variable.
* Defaults to 1.0 (sample all traces).
* Clamps value between 0.0 and 1.0.
*
* @returns The sampling ratio between 0.0 and 1.0
*/
private getSamplingRatio(): number {
const envValue = process.env.OTEL_TRACES_SAMPLER_ARG;
if (!envValue) {
return 1.0; // Default: sample all traces
}
const parsed = parseFloat(envValue);
if (isNaN(parsed)) {
this.logger.warn(`Invalid OTEL_TRACES_SAMPLER_ARG value: ${envValue}, using default 1.0`);
return 1.0;
}
// Clamp to valid range
const clamped = Math.max(0.0, Math.min(1.0, parsed));
if (clamped !== parsed) {
this.logger.warn(
`OTEL_TRACES_SAMPLER_ARG clamped from ${String(parsed)} to ${String(clamped)}`
);
}
return clamped;
}
/**
* Initialize the OpenTelemetry SDK with configured exporters.
* This is called automatically by NestJS when the module is initialized.
@@ -53,12 +120,24 @@ export class TelemetryService implements OnModuleInit, OnModuleDestroy {
try {
const exporter = this.createExporter();
const serviceVersion = this.getServiceVersion();
const deploymentEnvironment = this.getDeploymentEnvironment();
const samplingRatio = this.getSamplingRatio();
const resource = new Resource({
[ATTR_SERVICE_NAME]: this.serviceName,
[ATTR_SERVICE_VERSION]: serviceVersion,
[ATTR_DEPLOYMENT_ENVIRONMENT]: deploymentEnvironment,
});
// Create sampler with parent-based strategy
const sampler = new ParentBasedSampler({
root: new TraceIdRatioBasedSampler(samplingRatio),
});
this.sdk = new NodeSDK({
resource,
sampler,
traceExporter: exporter,
instrumentations: [
getNodeAutoInstrumentations({
@@ -72,7 +151,9 @@ export class TelemetryService implements OnModuleInit, OnModuleDestroy {
this.sdk.start();
this.tracer = trace.getTracer(this.serviceName);
this.logger.log(`OpenTelemetry SDK started for service: ${this.serviceName}`);
this.logger.log(
`OpenTelemetry SDK started for service: ${this.serviceName} v${serviceVersion} (${deploymentEnvironment}, sampling: ${String(samplingRatio)})`
);
} catch (error) {
this.logger.error("Failed to initialize OpenTelemetry SDK", error);
// Fallback to noop tracer to prevent application failures