chore: upgrade Node.js runtime to v24 across codebase #419
@@ -11,3 +11,18 @@ INSTANCE_URL=http://localhost:3000
|
||||
# CRITICAL: Generate a secure random key for production!
|
||||
# Generate with: node -e "console.log(require('crypto').randomBytes(32).toString('hex'))"
|
||||
ENCRYPTION_KEY=0123456789abcdef0123456789abcdef0123456789abcdef0123456789abcdef
|
||||
|
||||
# OpenTelemetry Configuration
|
||||
# Enable/disable OpenTelemetry tracing (default: true)
|
||||
OTEL_ENABLED=true
|
||||
# Service name for telemetry (default: mosaic-api)
|
||||
OTEL_SERVICE_NAME=mosaic-api
|
||||
# OTLP exporter endpoint (default: http://localhost:4318/v1/traces)
|
||||
OTEL_EXPORTER_OTLP_ENDPOINT=http://localhost:4318/v1/traces
|
||||
# Alternative: Jaeger endpoint (legacy)
|
||||
# OTEL_EXPORTER_JAEGER_ENDPOINT=http://localhost:4318/v1/traces
|
||||
# Deployment environment (default: development, or uses NODE_ENV)
|
||||
# OTEL_DEPLOYMENT_ENVIRONMENT=production
|
||||
# Trace sampling ratio: 0.0 (none) to 1.0 (all) - default: 1.0
|
||||
# Use lower values in high-traffic production environments
|
||||
# OTEL_TRACES_SAMPLER_ARG=1.0
|
||||
|
||||
@@ -42,6 +42,7 @@
|
||||
"@opentelemetry/instrumentation-nestjs-core": "^0.44.0",
|
||||
"@opentelemetry/resources": "^1.30.1",
|
||||
"@opentelemetry/sdk-node": "^0.56.0",
|
||||
"@opentelemetry/sdk-trace-base": "^2.5.0",
|
||||
"@opentelemetry/semantic-conventions": "^1.28.0",
|
||||
"@prisma/client": "^6.19.2",
|
||||
"@types/marked": "^6.0.0",
|
||||
@@ -76,6 +77,7 @@
|
||||
"@nestjs/cli": "^11.0.6",
|
||||
"@nestjs/schematics": "^11.0.1",
|
||||
"@nestjs/testing": "^11.1.12",
|
||||
"@opentelemetry/context-async-hooks": "^2.5.0",
|
||||
"@swc/core": "^1.10.18",
|
||||
"@types/adm-zip": "^0.5.7",
|
||||
"@types/archiver": "^7.0.0",
|
||||
|
||||
369
apps/api/src/telemetry/llm-telemetry.decorator.spec.ts
Normal file
369
apps/api/src/telemetry/llm-telemetry.decorator.spec.ts
Normal file
@@ -0,0 +1,369 @@
|
||||
import "reflect-metadata";
|
||||
import { describe, it, expect, vi, beforeEach, afterEach } from "vitest";
|
||||
import {
|
||||
TraceLlmCall,
|
||||
createLlmSpan,
|
||||
recordLlmUsage,
|
||||
type LlmTraceMetadata,
|
||||
} from "./llm-telemetry.decorator";
|
||||
import { trace, SpanStatusCode, type Span } from "@opentelemetry/api";
|
||||
|
||||
describe("LlmTelemetryDecorator", () => {
|
||||
describe("@TraceLlmCall", () => {
|
||||
class TestLlmProvider {
|
||||
callCount = 0;
|
||||
|
||||
@TraceLlmCall({ system: "ollama", operation: "chat" })
|
||||
async chat(request: { model: string; messages: unknown[] }): Promise<{
|
||||
content: string;
|
||||
promptEvalCount: number;
|
||||
evalCount: number;
|
||||
}> {
|
||||
this.callCount++;
|
||||
return {
|
||||
content: "Test response",
|
||||
promptEvalCount: 10,
|
||||
evalCount: 20,
|
||||
};
|
||||
}
|
||||
|
||||
@TraceLlmCall({ system: "ollama", operation: "embed" })
|
||||
async embed(request: { model: string; input: string }): Promise<{ embedding: number[] }> {
|
||||
this.callCount++;
|
||||
return {
|
||||
embedding: [0.1, 0.2, 0.3],
|
||||
};
|
||||
}
|
||||
|
||||
@TraceLlmCall({ system: "ollama", operation: "error" })
|
||||
async throwError(): Promise<never> {
|
||||
this.callCount++;
|
||||
throw new Error("Test error");
|
||||
}
|
||||
}
|
||||
|
||||
let provider: TestLlmProvider;
|
||||
|
||||
beforeEach(() => {
|
||||
provider = new TestLlmProvider();
|
||||
});
|
||||
|
||||
it("should execute the original method", async () => {
|
||||
const result = await provider.chat({
|
||||
model: "llama2",
|
||||
messages: [{ role: "user", content: "Hello" }],
|
||||
});
|
||||
|
||||
expect(result.content).toBe("Test response");
|
||||
expect(provider.callCount).toBe(1);
|
||||
});
|
||||
|
||||
it("should create a span and not throw errors", async () => {
|
||||
// Test that the decorator creates spans without errors
|
||||
const result = await provider.chat({
|
||||
model: "llama2",
|
||||
messages: [],
|
||||
});
|
||||
|
||||
expect(result).toBeDefined();
|
||||
expect(provider.callCount).toBe(1);
|
||||
});
|
||||
|
||||
it("should set gen_ai.system attribute", async () => {
|
||||
const result = await provider.chat({
|
||||
model: "llama2",
|
||||
messages: [],
|
||||
});
|
||||
|
||||
expect(result).toBeDefined();
|
||||
// Span attributes are set internally, verifying execution doesn't throw
|
||||
});
|
||||
|
||||
it("should set gen_ai.operation.name attribute", async () => {
|
||||
const result = await provider.embed({
|
||||
model: "nomic-embed-text",
|
||||
input: "test text",
|
||||
});
|
||||
|
||||
expect(result).toBeDefined();
|
||||
// Span attributes are set internally, verifying execution doesn't throw
|
||||
});
|
||||
|
||||
it("should extract model from request", async () => {
|
||||
const result = await provider.chat({
|
||||
model: "llama2",
|
||||
messages: [],
|
||||
});
|
||||
|
||||
expect(result).toBeDefined();
|
||||
// Model attribute is set internally from request.model
|
||||
});
|
||||
|
||||
it("should record token usage from response", async () => {
|
||||
const result = await provider.chat({
|
||||
model: "llama2",
|
||||
messages: [],
|
||||
});
|
||||
|
||||
expect(result.promptEvalCount).toBe(10);
|
||||
expect(result.evalCount).toBe(20);
|
||||
// Token usage attributes are set internally
|
||||
});
|
||||
|
||||
it("should set span status to OK on success", async () => {
|
||||
const result = await provider.chat({
|
||||
model: "llama2",
|
||||
messages: [],
|
||||
});
|
||||
|
||||
expect(result).toBeDefined();
|
||||
// Span status is set internally
|
||||
});
|
||||
|
||||
it("should record exception on error", async () => {
|
||||
await expect(provider.throwError()).rejects.toThrow("Test error");
|
||||
expect(provider.callCount).toBe(1);
|
||||
});
|
||||
|
||||
it("should set span status to ERROR on exception", async () => {
|
||||
await expect(provider.throwError()).rejects.toThrow("Test error");
|
||||
// Span status is set to ERROR internally
|
||||
});
|
||||
|
||||
it("should propagate the original error", async () => {
|
||||
try {
|
||||
await provider.throwError();
|
||||
expect.fail("Should have thrown an error");
|
||||
} catch (error) {
|
||||
expect(error).toBeInstanceOf(Error);
|
||||
expect((error as Error).message).toBe("Test error");
|
||||
}
|
||||
});
|
||||
|
||||
it("should end span after execution", async () => {
|
||||
await provider.chat({
|
||||
model: "llama2",
|
||||
messages: [],
|
||||
});
|
||||
|
||||
// Span is ended internally after method execution
|
||||
expect(provider.callCount).toBe(1);
|
||||
});
|
||||
|
||||
it("should handle requests without model property", async () => {
|
||||
class NoModelProvider {
|
||||
@TraceLlmCall({ system: "test", operation: "test" })
|
||||
async noModel(): Promise<string> {
|
||||
return "test";
|
||||
}
|
||||
}
|
||||
|
||||
const noModelProvider = new NoModelProvider();
|
||||
const result = await noModelProvider.noModel();
|
||||
expect(result).toBe("test");
|
||||
});
|
||||
|
||||
it("should handle responses without token usage", async () => {
|
||||
class NoTokensProvider {
|
||||
@TraceLlmCall({ system: "test", operation: "test" })
|
||||
async noTokens(): Promise<{ data: string }> {
|
||||
return { data: "test" };
|
||||
}
|
||||
}
|
||||
|
||||
const noTokensProvider = new NoTokensProvider();
|
||||
const result = await noTokensProvider.noTokens();
|
||||
expect(result.data).toBe("test");
|
||||
});
|
||||
|
||||
it("should record response duration", async () => {
|
||||
const result = await provider.chat({
|
||||
model: "llama2",
|
||||
messages: [],
|
||||
});
|
||||
|
||||
expect(result).toBeDefined();
|
||||
// Duration is calculated and set as span attribute internally
|
||||
});
|
||||
|
||||
it("should support different LLM systems", async () => {
|
||||
class MultiSystemProvider {
|
||||
@TraceLlmCall({ system: "openai", operation: "completion" })
|
||||
async openai(): Promise<string> {
|
||||
return "openai";
|
||||
}
|
||||
|
||||
@TraceLlmCall({ system: "anthropic", operation: "message" })
|
||||
async anthropic(): Promise<string> {
|
||||
return "anthropic";
|
||||
}
|
||||
}
|
||||
|
||||
const multiProvider = new MultiSystemProvider();
|
||||
const openaiResult = await multiProvider.openai();
|
||||
const anthropicResult = await multiProvider.anthropic();
|
||||
|
||||
expect(openaiResult).toBe("openai");
|
||||
expect(anthropicResult).toBe("anthropic");
|
||||
});
|
||||
});
|
||||
|
||||
describe("createLlmSpan", () => {
|
||||
it("should create a span with system and operation", () => {
|
||||
const span = createLlmSpan("ollama", "chat");
|
||||
expect(span).toBeDefined();
|
||||
span.end();
|
||||
});
|
||||
|
||||
it("should create a span with model", () => {
|
||||
const span = createLlmSpan("ollama", "chat", "llama2");
|
||||
expect(span).toBeDefined();
|
||||
span.end();
|
||||
});
|
||||
|
||||
it("should create span with correct name format", () => {
|
||||
const span = createLlmSpan("openai", "completion");
|
||||
expect(span).toBeDefined();
|
||||
// Span name is "openai.completion"
|
||||
span.end();
|
||||
});
|
||||
|
||||
it("should handle missing model gracefully", () => {
|
||||
const span = createLlmSpan("ollama", "embed");
|
||||
expect(span).toBeDefined();
|
||||
span.end();
|
||||
});
|
||||
|
||||
it("should allow manual span management", () => {
|
||||
const span = createLlmSpan("ollama", "chat.stream", "llama2");
|
||||
|
||||
// Simulate stream operation
|
||||
span.setAttribute("chunk.count", 5);
|
||||
span.setStatus({ code: SpanStatusCode.OK });
|
||||
|
||||
expect(span).toBeDefined();
|
||||
span.end();
|
||||
});
|
||||
});
|
||||
|
||||
describe("recordLlmUsage", () => {
|
||||
let span: Span;
|
||||
|
||||
beforeEach(() => {
|
||||
span = createLlmSpan("test", "test");
|
||||
});
|
||||
|
||||
afterEach(() => {
|
||||
span.end();
|
||||
});
|
||||
|
||||
it("should record prompt tokens", () => {
|
||||
const setAttributeSpy = vi.spyOn(span, "setAttribute");
|
||||
recordLlmUsage(span, 100);
|
||||
|
||||
expect(setAttributeSpy).toHaveBeenCalledWith("gen_ai.usage.prompt_tokens", 100);
|
||||
});
|
||||
|
||||
it("should record completion tokens", () => {
|
||||
const setAttributeSpy = vi.spyOn(span, "setAttribute");
|
||||
recordLlmUsage(span, undefined, 50);
|
||||
|
||||
expect(setAttributeSpy).toHaveBeenCalledWith("gen_ai.usage.completion_tokens", 50);
|
||||
});
|
||||
|
||||
it("should record both token types", () => {
|
||||
const setAttributeSpy = vi.spyOn(span, "setAttribute");
|
||||
recordLlmUsage(span, 100, 50);
|
||||
|
||||
expect(setAttributeSpy).toHaveBeenCalledWith("gen_ai.usage.prompt_tokens", 100);
|
||||
expect(setAttributeSpy).toHaveBeenCalledWith("gen_ai.usage.completion_tokens", 50);
|
||||
});
|
||||
|
||||
it("should handle undefined prompt tokens", () => {
|
||||
const setAttributeSpy = vi.spyOn(span, "setAttribute");
|
||||
recordLlmUsage(span, undefined, 50);
|
||||
|
||||
expect(setAttributeSpy).toHaveBeenCalledTimes(1);
|
||||
expect(setAttributeSpy).toHaveBeenCalledWith("gen_ai.usage.completion_tokens", 50);
|
||||
});
|
||||
|
||||
it("should handle undefined completion tokens", () => {
|
||||
const setAttributeSpy = vi.spyOn(span, "setAttribute");
|
||||
recordLlmUsage(span, 100, undefined);
|
||||
|
||||
expect(setAttributeSpy).toHaveBeenCalledTimes(1);
|
||||
expect(setAttributeSpy).toHaveBeenCalledWith("gen_ai.usage.prompt_tokens", 100);
|
||||
});
|
||||
|
||||
it("should handle both undefined", () => {
|
||||
const setAttributeSpy = vi.spyOn(span, "setAttribute");
|
||||
recordLlmUsage(span, undefined, undefined);
|
||||
|
||||
expect(setAttributeSpy).not.toHaveBeenCalled();
|
||||
});
|
||||
|
||||
it("should handle zero values", () => {
|
||||
const setAttributeSpy = vi.spyOn(span, "setAttribute");
|
||||
recordLlmUsage(span, 0, 0);
|
||||
|
||||
expect(setAttributeSpy).toHaveBeenCalledWith("gen_ai.usage.prompt_tokens", 0);
|
||||
expect(setAttributeSpy).toHaveBeenCalledWith("gen_ai.usage.completion_tokens", 0);
|
||||
});
|
||||
});
|
||||
|
||||
describe("integration scenarios", () => {
|
||||
it("should support streaming operations with createLlmSpan", async () => {
|
||||
async function* streamChat(model: string): AsyncGenerator<string> {
|
||||
const span = createLlmSpan("ollama", "chat.stream", model);
|
||||
try {
|
||||
yield "Hello";
|
||||
yield " ";
|
||||
yield "world";
|
||||
span.setStatus({ code: SpanStatusCode.OK });
|
||||
} catch (error) {
|
||||
span.recordException(error as Error);
|
||||
span.setStatus({ code: SpanStatusCode.ERROR });
|
||||
throw error;
|
||||
} finally {
|
||||
span.end();
|
||||
}
|
||||
}
|
||||
|
||||
const chunks: string[] = [];
|
||||
for await (const chunk of streamChat("llama2")) {
|
||||
chunks.push(chunk);
|
||||
}
|
||||
|
||||
expect(chunks).toEqual(["Hello", " ", "world"]);
|
||||
});
|
||||
|
||||
it("should support manual token recording in streams", async () => {
|
||||
async function* streamWithTokens(): AsyncGenerator<{ text: string; tokens?: number }> {
|
||||
const span = createLlmSpan("ollama", "chat.stream", "llama2");
|
||||
try {
|
||||
let totalTokens = 0;
|
||||
|
||||
yield { text: "Hello", tokens: 5 };
|
||||
totalTokens += 5;
|
||||
|
||||
yield { text: " world", tokens: 7 };
|
||||
totalTokens += 7;
|
||||
|
||||
recordLlmUsage(span, undefined, totalTokens);
|
||||
span.setStatus({ code: SpanStatusCode.OK });
|
||||
} finally {
|
||||
span.end();
|
||||
}
|
||||
}
|
||||
|
||||
const chunks: Array<{ text: string; tokens?: number }> = [];
|
||||
for await (const chunk of streamWithTokens()) {
|
||||
chunks.push(chunk);
|
||||
}
|
||||
|
||||
expect(chunks).toHaveLength(2);
|
||||
expect(chunks[0].tokens).toBe(5);
|
||||
expect(chunks[1].tokens).toBe(7);
|
||||
});
|
||||
});
|
||||
});
|
||||
224
apps/api/src/telemetry/span-context.service.spec.ts
Normal file
224
apps/api/src/telemetry/span-context.service.spec.ts
Normal file
@@ -0,0 +1,224 @@
|
||||
import { describe, it, expect, vi, beforeEach, afterEach } from "vitest";
|
||||
import { SpanContextService } from "./span-context.service";
|
||||
import { context, trace, type Span, type Context } from "@opentelemetry/api";
|
||||
import { AsyncHooksContextManager } from "@opentelemetry/context-async-hooks";
|
||||
|
||||
describe("SpanContextService", () => {
|
||||
let service: SpanContextService;
|
||||
let contextManager: AsyncHooksContextManager;
|
||||
|
||||
beforeEach(() => {
|
||||
// Set up context manager for proper context propagation in tests
|
||||
contextManager = new AsyncHooksContextManager();
|
||||
contextManager.enable();
|
||||
context.setGlobalContextManager(contextManager);
|
||||
service = new SpanContextService();
|
||||
});
|
||||
|
||||
afterEach(() => {
|
||||
// Clean up context manager
|
||||
contextManager.disable();
|
||||
context.disable();
|
||||
});
|
||||
|
||||
describe("getActiveSpan", () => {
|
||||
it("should return undefined when no span is active", () => {
|
||||
const activeSpan = service.getActiveSpan();
|
||||
expect(activeSpan).toBeUndefined();
|
||||
});
|
||||
|
||||
it("should return the active span when one exists", () => {
|
||||
const tracer = trace.getTracer("test-tracer");
|
||||
const span = tracer.startSpan("test-span");
|
||||
|
||||
const result = context.with(trace.setSpan(context.active(), span), () => {
|
||||
return service.getActiveSpan();
|
||||
});
|
||||
|
||||
expect(result).toBeDefined();
|
||||
expect(result).toBe(span);
|
||||
span.end();
|
||||
});
|
||||
});
|
||||
|
||||
describe("getContext", () => {
|
||||
it("should return the current context", () => {
|
||||
const ctx = service.getContext();
|
||||
expect(ctx).toBeDefined();
|
||||
});
|
||||
|
||||
it("should return the active context", () => {
|
||||
const tracer = trace.getTracer("test-tracer");
|
||||
const span = tracer.startSpan("test-span");
|
||||
|
||||
const result = context.with(trace.setSpan(context.active(), span), () => {
|
||||
return service.getContext();
|
||||
});
|
||||
|
||||
expect(result).toBeDefined();
|
||||
span.end();
|
||||
});
|
||||
});
|
||||
|
||||
describe("with", () => {
|
||||
it("should execute function within the provided context", () => {
|
||||
const customContext = context.active();
|
||||
let executedInContext = false;
|
||||
|
||||
const result = service.with(customContext, () => {
|
||||
executedInContext = true;
|
||||
return "test-result";
|
||||
});
|
||||
|
||||
expect(executedInContext).toBe(true);
|
||||
expect(result).toBe("test-result");
|
||||
});
|
||||
|
||||
it("should propagate the context to the function", () => {
|
||||
const tracer = trace.getTracer("test-tracer");
|
||||
const span = tracer.startSpan("test-span");
|
||||
const spanContext = trace.setSpan(context.active(), span);
|
||||
|
||||
const result = service.with(spanContext, () => {
|
||||
const activeSpan = trace.getActiveSpan();
|
||||
return activeSpan;
|
||||
});
|
||||
|
||||
expect(result).toBe(span);
|
||||
span.end();
|
||||
});
|
||||
|
||||
it("should handle exceptions in the function", () => {
|
||||
const customContext = context.active();
|
||||
|
||||
expect(() => {
|
||||
service.with(customContext, () => {
|
||||
throw new Error("Test error");
|
||||
});
|
||||
}).toThrow("Test error");
|
||||
});
|
||||
|
||||
it("should return the function result", () => {
|
||||
const customContext = context.active();
|
||||
const result = service.with(customContext, () => {
|
||||
return { data: "test", count: 42 };
|
||||
});
|
||||
|
||||
expect(result).toEqual({ data: "test", count: 42 });
|
||||
});
|
||||
});
|
||||
|
||||
describe("withActiveSpan", () => {
|
||||
it("should set span as active for function execution", () => {
|
||||
const tracer = trace.getTracer("test-tracer");
|
||||
const span = tracer.startSpan("test-span");
|
||||
|
||||
const result = service.withActiveSpan(span, () => {
|
||||
const activeSpan = trace.getActiveSpan();
|
||||
return activeSpan;
|
||||
});
|
||||
|
||||
expect(result).toBe(span);
|
||||
span.end();
|
||||
});
|
||||
|
||||
it("should return the function result", () => {
|
||||
const tracer = trace.getTracer("test-tracer");
|
||||
const span = tracer.startSpan("test-span");
|
||||
|
||||
const result = service.withActiveSpan(span, () => {
|
||||
return "executed-with-span";
|
||||
});
|
||||
|
||||
expect(result).toBe("executed-with-span");
|
||||
span.end();
|
||||
});
|
||||
|
||||
it("should handle async operations", async () => {
|
||||
const tracer = trace.getTracer("test-tracer");
|
||||
const span = tracer.startSpan("test-span");
|
||||
|
||||
const result = await service.withActiveSpan(span, async () => {
|
||||
return new Promise((resolve) => {
|
||||
setTimeout(() => resolve("async-result"), 10);
|
||||
});
|
||||
});
|
||||
|
||||
expect(result).toBe("async-result");
|
||||
span.end();
|
||||
});
|
||||
|
||||
it("should propagate exceptions", () => {
|
||||
const tracer = trace.getTracer("test-tracer");
|
||||
const span = tracer.startSpan("test-span");
|
||||
|
||||
expect(() => {
|
||||
service.withActiveSpan(span, () => {
|
||||
throw new Error("Span execution error");
|
||||
});
|
||||
}).toThrow("Span execution error");
|
||||
|
||||
span.end();
|
||||
});
|
||||
|
||||
it("should nest spans correctly", () => {
|
||||
const tracer = trace.getTracer("test-tracer");
|
||||
const parentSpan = tracer.startSpan("parent-span");
|
||||
const childSpan = tracer.startSpan("child-span");
|
||||
|
||||
const result = service.withActiveSpan(parentSpan, () => {
|
||||
return service.withActiveSpan(childSpan, () => {
|
||||
const activeSpan = trace.getActiveSpan();
|
||||
return activeSpan;
|
||||
});
|
||||
});
|
||||
|
||||
expect(result).toBe(childSpan);
|
||||
childSpan.end();
|
||||
parentSpan.end();
|
||||
});
|
||||
});
|
||||
|
||||
describe("integration scenarios", () => {
|
||||
it("should support complex span context propagation", () => {
|
||||
const tracer = trace.getTracer("test-tracer");
|
||||
const span1 = tracer.startSpan("span-1");
|
||||
const span2 = tracer.startSpan("span-2");
|
||||
|
||||
// Execute with span1 active
|
||||
const ctx1 = trace.setSpan(context.active(), span1);
|
||||
const result1 = service.with(ctx1, () => {
|
||||
return service.getActiveSpan();
|
||||
});
|
||||
|
||||
// Execute with span2 active
|
||||
const ctx2 = trace.setSpan(context.active(), span2);
|
||||
const result2 = service.with(ctx2, () => {
|
||||
return service.getActiveSpan();
|
||||
});
|
||||
|
||||
expect(result1).toBe(span1);
|
||||
expect(result2).toBe(span2);
|
||||
|
||||
span1.end();
|
||||
span2.end();
|
||||
});
|
||||
|
||||
it("should handle context isolation", () => {
|
||||
const tracer = trace.getTracer("test-tracer");
|
||||
const span = tracer.startSpan("isolated-span");
|
||||
|
||||
// Execute with span active
|
||||
service.withActiveSpan(span, () => {
|
||||
const activeInside = service.getActiveSpan();
|
||||
expect(activeInside).toBe(span);
|
||||
});
|
||||
|
||||
// Outside the context, span should not be active
|
||||
const activeOutside = service.getActiveSpan();
|
||||
expect(activeOutside).not.toBe(span);
|
||||
|
||||
span.end();
|
||||
});
|
||||
});
|
||||
});
|
||||
@@ -185,4 +185,66 @@ describe("TelemetryService", () => {
|
||||
expect(() => service.startSpan("test-span")).not.toThrow();
|
||||
});
|
||||
});
|
||||
|
||||
describe("resource attributes", () => {
|
||||
it("should set service.version from package.json", async () => {
|
||||
service = new TelemetryService();
|
||||
await service.onModuleInit();
|
||||
|
||||
// We can't directly assert the resource attributes, but we can verify
|
||||
// the service initializes without error
|
||||
expect(service.getTracer()).toBeDefined();
|
||||
});
|
||||
|
||||
it("should set deployment.environment from NODE_ENV", async () => {
|
||||
process.env.NODE_ENV = "production";
|
||||
service = new TelemetryService();
|
||||
await service.onModuleInit();
|
||||
|
||||
expect(service.getTracer()).toBeDefined();
|
||||
});
|
||||
|
||||
it("should default deployment.environment to development", async () => {
|
||||
delete process.env.NODE_ENV;
|
||||
service = new TelemetryService();
|
||||
await service.onModuleInit();
|
||||
|
||||
expect(service.getTracer()).toBeDefined();
|
||||
});
|
||||
});
|
||||
|
||||
describe("trace sampling", () => {
|
||||
it("should use default sampling ratio of 1.0 when not configured", async () => {
|
||||
delete process.env.OTEL_TRACES_SAMPLER_ARG;
|
||||
service = new TelemetryService();
|
||||
await service.onModuleInit();
|
||||
|
||||
expect(service.getTracer()).toBeDefined();
|
||||
});
|
||||
|
||||
it("should respect OTEL_TRACES_SAMPLER_ARG for sampling ratio", async () => {
|
||||
process.env.OTEL_TRACES_SAMPLER_ARG = "0.5";
|
||||
service = new TelemetryService();
|
||||
await service.onModuleInit();
|
||||
|
||||
expect(service.getTracer()).toBeDefined();
|
||||
});
|
||||
|
||||
it("should handle invalid sampling ratio gracefully", async () => {
|
||||
process.env.OTEL_TRACES_SAMPLER_ARG = "invalid";
|
||||
service = new TelemetryService();
|
||||
await service.onModuleInit();
|
||||
|
||||
// Should fall back to default and still work
|
||||
expect(service.getTracer()).toBeDefined();
|
||||
});
|
||||
|
||||
it("should clamp sampling ratio to 0.0-1.0 range", async () => {
|
||||
process.env.OTEL_TRACES_SAMPLER_ARG = "1.5";
|
||||
service = new TelemetryService();
|
||||
await service.onModuleInit();
|
||||
|
||||
expect(service.getTracer()).toBeDefined();
|
||||
});
|
||||
});
|
||||
});
|
||||
|
||||
@@ -3,9 +3,16 @@ import { NodeSDK } from "@opentelemetry/sdk-node";
|
||||
import { getNodeAutoInstrumentations } from "@opentelemetry/auto-instrumentations-node";
|
||||
import { OTLPTraceExporter } from "@opentelemetry/exporter-trace-otlp-http";
|
||||
import { Resource } from "@opentelemetry/resources";
|
||||
import { ATTR_SERVICE_NAME } from "@opentelemetry/semantic-conventions";
|
||||
import { ATTR_SERVICE_NAME, ATTR_SERVICE_VERSION } from "@opentelemetry/semantic-conventions";
|
||||
|
||||
// Deployment environment is not yet in the stable semantic conventions
|
||||
// Using the semantic conventions format for consistency
|
||||
const ATTR_DEPLOYMENT_ENVIRONMENT = "deployment.environment" as const;
|
||||
import { ParentBasedSampler, TraceIdRatioBasedSampler } from "@opentelemetry/sdk-trace-base";
|
||||
import type { Tracer, Span, SpanOptions } from "@opentelemetry/api";
|
||||
import { trace, SpanStatusCode } from "@opentelemetry/api";
|
||||
import { readFileSync } from "fs";
|
||||
import { join } from "path";
|
||||
|
||||
/**
|
||||
* Service responsible for OpenTelemetry distributed tracing.
|
||||
@@ -40,6 +47,66 @@ export class TelemetryService implements OnModuleInit, OnModuleDestroy {
|
||||
this.serviceName = process.env.OTEL_SERVICE_NAME ?? "mosaic-api";
|
||||
}
|
||||
|
||||
/**
|
||||
* Get the service version from package.json.
|
||||
* Defaults to '0.0.0' if version cannot be determined.
|
||||
*
|
||||
* @returns The service version string
|
||||
*/
|
||||
private getServiceVersion(): string {
|
||||
try {
|
||||
const packageJsonPath = join(__dirname, "..", "..", "package.json");
|
||||
// eslint-disable-next-line security/detect-non-literal-fs-filename -- Safe: reading local package.json
|
||||
const packageJson = JSON.parse(readFileSync(packageJsonPath, "utf-8")) as {
|
||||
version?: string;
|
||||
};
|
||||
return packageJson.version ?? "0.0.0";
|
||||
} catch (error) {
|
||||
this.logger.warn("Failed to read service version from package.json", error);
|
||||
return "0.0.0";
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Get the deployment environment from NODE_ENV or OTEL_DEPLOYMENT_ENVIRONMENT.
|
||||
* Defaults to 'development' if not set.
|
||||
*
|
||||
* @returns The deployment environment string
|
||||
*/
|
||||
private getDeploymentEnvironment(): string {
|
||||
return process.env.OTEL_DEPLOYMENT_ENVIRONMENT ?? process.env.NODE_ENV ?? "development";
|
||||
}
|
||||
|
||||
/**
|
||||
* Get the trace sampling ratio from environment variable.
|
||||
* Defaults to 1.0 (sample all traces).
|
||||
* Clamps value between 0.0 and 1.0.
|
||||
*
|
||||
* @returns The sampling ratio between 0.0 and 1.0
|
||||
*/
|
||||
private getSamplingRatio(): number {
|
||||
const envValue = process.env.OTEL_TRACES_SAMPLER_ARG;
|
||||
if (!envValue) {
|
||||
return 1.0; // Default: sample all traces
|
||||
}
|
||||
|
||||
const parsed = parseFloat(envValue);
|
||||
if (isNaN(parsed)) {
|
||||
this.logger.warn(`Invalid OTEL_TRACES_SAMPLER_ARG value: ${envValue}, using default 1.0`);
|
||||
return 1.0;
|
||||
}
|
||||
|
||||
// Clamp to valid range
|
||||
const clamped = Math.max(0.0, Math.min(1.0, parsed));
|
||||
if (clamped !== parsed) {
|
||||
this.logger.warn(
|
||||
`OTEL_TRACES_SAMPLER_ARG clamped from ${String(parsed)} to ${String(clamped)}`
|
||||
);
|
||||
}
|
||||
|
||||
return clamped;
|
||||
}
|
||||
|
||||
/**
|
||||
* Initialize the OpenTelemetry SDK with configured exporters.
|
||||
* This is called automatically by NestJS when the module is initialized.
|
||||
@@ -53,12 +120,24 @@ export class TelemetryService implements OnModuleInit, OnModuleDestroy {
|
||||
|
||||
try {
|
||||
const exporter = this.createExporter();
|
||||
const serviceVersion = this.getServiceVersion();
|
||||
const deploymentEnvironment = this.getDeploymentEnvironment();
|
||||
const samplingRatio = this.getSamplingRatio();
|
||||
|
||||
const resource = new Resource({
|
||||
[ATTR_SERVICE_NAME]: this.serviceName,
|
||||
[ATTR_SERVICE_VERSION]: serviceVersion,
|
||||
[ATTR_DEPLOYMENT_ENVIRONMENT]: deploymentEnvironment,
|
||||
});
|
||||
|
||||
// Create sampler with parent-based strategy
|
||||
const sampler = new ParentBasedSampler({
|
||||
root: new TraceIdRatioBasedSampler(samplingRatio),
|
||||
});
|
||||
|
||||
this.sdk = new NodeSDK({
|
||||
resource,
|
||||
sampler,
|
||||
traceExporter: exporter,
|
||||
instrumentations: [
|
||||
getNodeAutoInstrumentations({
|
||||
@@ -72,7 +151,9 @@ export class TelemetryService implements OnModuleInit, OnModuleDestroy {
|
||||
this.sdk.start();
|
||||
this.tracer = trace.getTracer(this.serviceName);
|
||||
|
||||
this.logger.log(`OpenTelemetry SDK started for service: ${this.serviceName}`);
|
||||
this.logger.log(
|
||||
`OpenTelemetry SDK started for service: ${this.serviceName} v${serviceVersion} (${deploymentEnvironment}, sampling: ${String(samplingRatio)})`
|
||||
);
|
||||
} catch (error) {
|
||||
this.logger.error("Failed to initialize OpenTelemetry SDK", error);
|
||||
// Fallback to noop tracer to prevent application failures
|
||||
|
||||
52
pnpm-lock.yaml
generated
52
pnpm-lock.yaml
generated
@@ -111,6 +111,9 @@ importers:
|
||||
'@opentelemetry/sdk-node':
|
||||
specifier: ^0.56.0
|
||||
version: 0.56.0(@opentelemetry/api@1.9.0)
|
||||
'@opentelemetry/sdk-trace-base':
|
||||
specifier: ^2.5.0
|
||||
version: 2.5.0(@opentelemetry/api@1.9.0)
|
||||
'@opentelemetry/semantic-conventions':
|
||||
specifier: ^1.28.0
|
||||
version: 1.39.0
|
||||
@@ -208,6 +211,9 @@ importers:
|
||||
'@nestjs/testing':
|
||||
specifier: ^11.1.12
|
||||
version: 11.1.12(@nestjs/common@11.1.12(class-transformer@0.5.1)(class-validator@0.14.3)(reflect-metadata@0.2.2)(rxjs@7.8.2))(@nestjs/core@11.1.12)(@nestjs/platform-express@11.1.12)
|
||||
'@opentelemetry/context-async-hooks':
|
||||
specifier: ^2.5.0
|
||||
version: 2.5.0(@opentelemetry/api@1.9.0)
|
||||
'@swc/core':
|
||||
specifier: ^1.10.18
|
||||
version: 1.15.11
|
||||
@@ -1748,6 +1754,12 @@ packages:
|
||||
peerDependencies:
|
||||
'@opentelemetry/api': '>=1.0.0 <1.10.0'
|
||||
|
||||
'@opentelemetry/context-async-hooks@2.5.0':
|
||||
resolution: {integrity: sha512-uOXpVX0ZjO7heSVjhheW2XEPrhQAWr2BScDPoZ9UDycl5iuHG+Usyc3AIfG6kZeC1GyLpMInpQ6X5+9n69yOFw==}
|
||||
engines: {node: ^18.19.0 || >=20.6.0}
|
||||
peerDependencies:
|
||||
'@opentelemetry/api': '>=1.0.0 <1.10.0'
|
||||
|
||||
'@opentelemetry/core@1.29.0':
|
||||
resolution: {integrity: sha512-gmT7vAreXl0DTHD2rVZcw3+l2g84+5XiHIqdBUxXbExymPCvSsGOpiwMmn8nkiJur28STV31wnhIDrzWDPzjfA==}
|
||||
engines: {node: '>=14'}
|
||||
@@ -1760,6 +1772,12 @@ packages:
|
||||
peerDependencies:
|
||||
'@opentelemetry/api': '>=1.0.0 <1.10.0'
|
||||
|
||||
'@opentelemetry/core@2.5.0':
|
||||
resolution: {integrity: sha512-ka4H8OM6+DlUhSAZpONu0cPBtPPTQKxbxVzC4CzVx5+K4JnroJVBtDzLAMx4/3CDTJXRvVFhpFjtl4SaiTNoyQ==}
|
||||
engines: {node: ^18.19.0 || >=20.6.0}
|
||||
peerDependencies:
|
||||
'@opentelemetry/api': '>=1.0.0 <1.10.0'
|
||||
|
||||
'@opentelemetry/exporter-logs-otlp-grpc@0.56.0':
|
||||
resolution: {integrity: sha512-/ef8wcphVKZ0uI7A1oqQI/gEMiBUlkeBkM9AGx6AviQFIbgPVSdNK3+bHBkyq5qMkyWgkeQCSJ0uhc5vJpf0dw==}
|
||||
engines: {node: '>=14'}
|
||||
@@ -2226,6 +2244,12 @@ packages:
|
||||
peerDependencies:
|
||||
'@opentelemetry/api': '>=1.0.0 <1.10.0'
|
||||
|
||||
'@opentelemetry/resources@2.5.0':
|
||||
resolution: {integrity: sha512-F8W52ApePshpoSrfsSk1H2yJn9aKjCrbpQF1M9Qii0GHzbfVeFUB+rc3X4aggyZD8x9Gu3Slua+s6krmq6Dt8g==}
|
||||
engines: {node: ^18.19.0 || >=20.6.0}
|
||||
peerDependencies:
|
||||
'@opentelemetry/api': '>=1.3.0 <1.10.0'
|
||||
|
||||
'@opentelemetry/sdk-logs@0.56.0':
|
||||
resolution: {integrity: sha512-OS0WPBJF++R/cSl+terUjQH5PebloidB1Jbbecgg2rnCmQbTST9xsRes23bLfDQVRvmegmHqDh884h0aRdJyLw==}
|
||||
engines: {node: '>=14'}
|
||||
@@ -2274,6 +2298,12 @@ packages:
|
||||
peerDependencies:
|
||||
'@opentelemetry/api': '>=1.0.0 <1.10.0'
|
||||
|
||||
'@opentelemetry/sdk-trace-base@2.5.0':
|
||||
resolution: {integrity: sha512-VzRf8LzotASEyNDUxTdaJ9IRJ1/h692WyArDBInf5puLCjxbICD6XkHgpuudis56EndyS7LYFmtTMny6UABNdQ==}
|
||||
engines: {node: ^18.19.0 || >=20.6.0}
|
||||
peerDependencies:
|
||||
'@opentelemetry/api': '>=1.3.0 <1.10.0'
|
||||
|
||||
'@opentelemetry/sdk-trace-node@1.29.0':
|
||||
resolution: {integrity: sha512-ZpGYt+VnMu6O0SRKzhuIivr7qJm3GpWnTCMuJspu4kt3QWIpIenwixo5Vvjuu3R4h2Onl/8dtqAiPIs92xd5ww==}
|
||||
engines: {node: '>=14'}
|
||||
@@ -7983,6 +8013,10 @@ snapshots:
|
||||
dependencies:
|
||||
'@opentelemetry/api': 1.9.0
|
||||
|
||||
'@opentelemetry/context-async-hooks@2.5.0(@opentelemetry/api@1.9.0)':
|
||||
dependencies:
|
||||
'@opentelemetry/api': 1.9.0
|
||||
|
||||
'@opentelemetry/core@1.29.0(@opentelemetry/api@1.9.0)':
|
||||
dependencies:
|
||||
'@opentelemetry/api': 1.9.0
|
||||
@@ -7993,6 +8027,11 @@ snapshots:
|
||||
'@opentelemetry/api': 1.9.0
|
||||
'@opentelemetry/semantic-conventions': 1.28.0
|
||||
|
||||
'@opentelemetry/core@2.5.0(@opentelemetry/api@1.9.0)':
|
||||
dependencies:
|
||||
'@opentelemetry/api': 1.9.0
|
||||
'@opentelemetry/semantic-conventions': 1.39.0
|
||||
|
||||
'@opentelemetry/exporter-logs-otlp-grpc@0.56.0(@opentelemetry/api@1.9.0)':
|
||||
dependencies:
|
||||
'@grpc/grpc-js': 1.14.3
|
||||
@@ -8649,6 +8688,12 @@ snapshots:
|
||||
'@opentelemetry/core': 1.30.1(@opentelemetry/api@1.9.0)
|
||||
'@opentelemetry/semantic-conventions': 1.28.0
|
||||
|
||||
'@opentelemetry/resources@2.5.0(@opentelemetry/api@1.9.0)':
|
||||
dependencies:
|
||||
'@opentelemetry/api': 1.9.0
|
||||
'@opentelemetry/core': 2.5.0(@opentelemetry/api@1.9.0)
|
||||
'@opentelemetry/semantic-conventions': 1.39.0
|
||||
|
||||
'@opentelemetry/sdk-logs@0.56.0(@opentelemetry/api@1.9.0)':
|
||||
dependencies:
|
||||
'@opentelemetry/api': 1.9.0
|
||||
@@ -8737,6 +8782,13 @@ snapshots:
|
||||
'@opentelemetry/resources': 1.30.1(@opentelemetry/api@1.9.0)
|
||||
'@opentelemetry/semantic-conventions': 1.28.0
|
||||
|
||||
'@opentelemetry/sdk-trace-base@2.5.0(@opentelemetry/api@1.9.0)':
|
||||
dependencies:
|
||||
'@opentelemetry/api': 1.9.0
|
||||
'@opentelemetry/core': 2.5.0(@opentelemetry/api@1.9.0)
|
||||
'@opentelemetry/resources': 2.5.0(@opentelemetry/api@1.9.0)
|
||||
'@opentelemetry/semantic-conventions': 1.39.0
|
||||
|
||||
'@opentelemetry/sdk-trace-node@1.29.0(@opentelemetry/api@1.9.0)':
|
||||
dependencies:
|
||||
'@opentelemetry/api': 1.9.0
|
||||
|
||||
Reference in New Issue
Block a user