merge: resolve conflicts with develop (M10-Telemetry + M12-MatrixBridge)

Merge origin/develop into feature/m13-speech-services to incorporate M10-Telemetry and M12-MatrixBridge changes. Resolved 4 conflicts: - .env.example: Added speech config alongside telemetry + matrix config - Makefile: Added speech targets alongside matrix targets - app.module.ts: Import both MosaicTelemetryModule and SpeechModule - docs/tasks.md: Combined all milestone task tracking sections Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
2026-02-15 12:31:08 -06:00
parent af9c5799af 11d284554d
commit cf28efa880
68 changed files with 12466 additions and 358 deletions
--- a/.env.example
+++ b/.env.example
@@ -316,6 +316,22 @@ RATE_LIMIT_STORAGE=redis
 # multi-tenant isolation. Each Discord bot instance should be configured for
 # a single workspace.
 # ======================
 # Matrix Bridge (Optional)
 # ======================
 # Matrix bot integration for chat-based control via Matrix protocol
 # Requires a Matrix account with an access token for the bot user
 # MATRIX_HOMESERVER_URL=https://matrix.example.com
 # MATRIX_ACCESS_TOKEN=
 # MATRIX_BOT_USER_ID=@mosaic-bot:example.com
 # MATRIX_CONTROL_ROOM_ID=!roomid:example.com
 # MATRIX_WORKSPACE_ID=your-workspace-uuid
 #
 # SECURITY: MATRIX_WORKSPACE_ID must be a valid workspace UUID from your database.
 # All Matrix commands will execute within this workspace context for proper
 # multi-tenant isolation. Each Matrix bot instance should be configured for
 # a single workspace.
 # ======================
 # Orchestrator Configuration
 # ======================
@@ -389,6 +405,54 @@ SPEECH_MAX_DURATION_SECONDS=600
 # Maximum text length for TTS in characters (default: 4096)
 SPEECH_MAX_TEXT_LENGTH=4096
 # ======================
 # Mosaic Telemetry (Task Completion Tracking & Predictions)
 # ======================
 # Telemetry tracks task completion patterns to provide time estimates and predictions.
 # Data is sent to the Mosaic Telemetry API (a separate service).
 # Master switch: set to false to completely disable telemetry (no HTTP calls will be made)
 MOSAIC_TELEMETRY_ENABLED=true
 # URL of the telemetry API server
 # For Docker Compose (internal): http://telemetry-api:8000
 # For production/swarm: https://tel-api.mosaicstack.dev
 MOSAIC_TELEMETRY_SERVER_URL=http://telemetry-api:8000
 # API key for authenticating with the telemetry server
 # Generate with: openssl rand -hex 32
 MOSAIC_TELEMETRY_API_KEY=your-64-char-hex-api-key-here
 # Unique identifier for this Mosaic Stack instance
 # Generate with: uuidgen or python -c "import uuid; print(uuid.uuid4())"
 MOSAIC_TELEMETRY_INSTANCE_ID=your-instance-uuid-here
 # Dry run mode: set to true to log telemetry events to console instead of sending HTTP requests
 # Useful for development and debugging telemetry payloads
 MOSAIC_TELEMETRY_DRY_RUN=false
 # ======================
 # Matrix Dev Environment (docker-compose.matrix.yml overlay)
 # ======================
 # These variables configure the local Matrix dev environment.
 # Only used when running: docker compose -f docker/docker-compose.yml -f docker/docker-compose.matrix.yml up
 #
 # Synapse homeserver
 # SYNAPSE_CLIENT_PORT=8008
 # SYNAPSE_FEDERATION_PORT=8448
 # SYNAPSE_POSTGRES_DB=synapse
 # SYNAPSE_POSTGRES_USER=synapse
 # SYNAPSE_POSTGRES_PASSWORD=synapse_dev_password
 #
 # Element Web client
 # ELEMENT_PORT=8501
 #
 # Matrix bridge connection (set after running docker/matrix/scripts/setup-bot.sh)
 # MATRIX_HOMESERVER_URL=http://localhost:8008
 # MATRIX_ACCESS_TOKEN=<obtained from setup-bot.sh>
 # MATRIX_BOT_USER_ID=@mosaic-bot:localhost
 # MATRIX_SERVER_NAME=localhost
 # ======================
 # Logging & Debugging
 # ======================
--- a/.npmrc
+++ b/.npmrc
@@ -0,0 +1 @@
@mosaicstack:registry=https://git.mosaicstack.dev/api/packages/mosaic/npm/
--- a/.woodpecker/coordinator.yml
+++ b/.woodpecker/coordinator.yml
@@ -30,7 +30,7 @@ steps:
      - python -m venv venv
      - . venv/bin/activate
      - pip install --no-cache-dir --upgrade "pip>=25.3"
-      - pip install --no-cache-dir -e ".[dev]"
+      - pip install --no-cache-dir --extra-index-url https://git.mosaicstack.dev/api/packages/mosaic/pypi/simple/ -e ".[dev]"
      - pip install --no-cache-dir bandit pip-audit
  ruff-check:
--- a/21
+++ b/21
@@ -1,4 +1,4 @@
-.PHONY: help install dev build test docker-up docker-down docker-logs docker-ps docker-build docker-restart docker-test speech-up speech-down speech-logs clean
+.PHONY: help install dev build test docker-up docker-down docker-logs docker-ps docker-build docker-restart docker-test speech-up speech-down speech-logs clean matrix-up matrix-down matrix-logs matrix-setup-bot
 # Default target
 help:
@@ -29,6 +29,12 @@ help:
 	@echo "  make speech-down            Stop speech services"
 	@echo "  make speech-logs            View speech service logs"
 	@echo ""
 	@echo "Matrix Dev Environment:"
 	@echo "  make matrix-up              Start Matrix services (Synapse + Element)"
 	@echo "  make matrix-down            Stop Matrix services"
 	@echo "  make matrix-logs            View Matrix service logs"
 	@echo "  make matrix-setup-bot       Create bot account and get access token"
 	@echo ""
 	@echo "Database:"
 	@echo "  make db-migrate       Run database migrations"
 	@echo "  make db-seed          Seed development data"
@@ -100,6 +106,19 @@ speech-down:
 speech-logs:
 	docker compose -f docker-compose.yml -f docker-compose.speech.yml logs -f speaches kokoro-tts
 # Matrix Dev Environment
 matrix-up:
 	docker compose -f docker/docker-compose.yml -f docker/docker-compose.matrix.yml up -d
 matrix-down:
 	docker compose -f docker/docker-compose.yml -f docker/docker-compose.matrix.yml down
 matrix-logs:
 	docker compose -f docker/docker-compose.yml -f docker/docker-compose.matrix.yml logs -f synapse element-web
 matrix-setup-bot:
 	docker/matrix/scripts/setup-bot.sh
 # Database operations
 db-migrate:
 	cd apps/api && pnpm prisma:migrate
--- a/README.md
+++ b/README.md
@@ -782,6 +782,7 @@ Complete documentation is organized in a Bookstack-compatible structure in the `
 - **[Overview](docs/3-architecture/1-overview/)** — System design and components
 - **[Authentication](docs/3-architecture/2-authentication/)** — BetterAuth and OIDC integration
 - **[Design Principles](docs/3-architecture/3-design-principles/1-pda-friendly.md)** — PDA-friendly patterns (non-negotiable)
 - **[Telemetry](docs/telemetry.md)** — AI task completion tracking, predictions, and SDK reference
 ### 🔌 API Reference
--- a/apps/api/package.json
+++ b/apps/api/package.json
@@ -27,6 +27,7 @@
  "dependencies": {
    "@anthropic-ai/sdk": "^0.72.1",
    "@mosaic/shared": "workspace:*",
    "@mosaicstack/telemetry-client": "^0.1.0",
    "@nestjs/axios": "^4.0.1",
    "@nestjs/bullmq": "^11.0.4",
    "@nestjs/common": "^11.1.12",
@@ -64,6 +65,7 @@
    "marked": "^17.0.1",
    "marked-gfm-heading-id": "^4.1.3",
    "marked-highlight": "^2.2.3",
    "matrix-bot-sdk": "^0.8.0",
    "ollama": "^0.6.3",
    "openai": "^6.17.0",
    "reflect-metadata": "^0.2.2",
--- a/apps/api/prisma/migrations/20260215000000_add_matrix_room_id/migration.sql
+++ b/apps/api/prisma/migrations/20260215000000_add_matrix_room_id/migration.sql
@@ -0,0 +1,2 @@
 -- AlterTable
 ALTER TABLE "workspaces" ADD COLUMN "matrix_room_id" TEXT;
--- a/apps/api/prisma/schema.prisma
+++ b/apps/api/prisma/schema.prisma
@@ -265,6 +265,7 @@ model Workspace {
  name         String
  ownerId      String   @map("owner_id") @db.Uuid
  settings     Json     @default("{}")
  matrixRoomId String?  @map("matrix_room_id")
  createdAt    DateTime @default(now()) @map("created_at") @db.Timestamptz
  updatedAt    DateTime @updatedAt @map("updated_at") @db.Timestamptz
--- a/apps/api/src/app.module.ts
+++ b/apps/api/src/app.module.ts
@@ -37,6 +37,7 @@ import { JobStepsModule } from "./job-steps/job-steps.module";
 import { CoordinatorIntegrationModule } from "./coordinator-integration/coordinator-integration.module";
 import { FederationModule } from "./federation/federation.module";
 import { CredentialsModule } from "./credentials/credentials.module";
 import { MosaicTelemetryModule } from "./mosaic-telemetry";
 import { SpeechModule } from "./speech/speech.module";
 import { RlsContextInterceptor } from "./common/interceptors/rls-context.interceptor";
@@ -98,6 +99,7 @@ import { RlsContextInterceptor } from "./common/interceptors/rls-context.interce
    CoordinatorIntegrationModule,
    FederationModule,
    CredentialsModule,
    MosaicTelemetryModule,
    SpeechModule,
  ],
  controllers: [AppController, CsrfController],
--- a/apps/api/src/bridge/bridge.constants.ts
+++ b/apps/api/src/bridge/bridge.constants.ts
@@ -0,0 +1,15 @@
 /**
 * Bridge Module Constants
 *
 * Injection tokens for the bridge module.
 */
 /**
 * Injection token for the array of active IChatProvider instances.
 *
 * Use this token to inject all configured chat providers:
 * ```
 * @Inject(CHAT_PROVIDERS) private readonly chatProviders: IChatProvider[]
 * ```
 */
 export const CHAT_PROVIDERS = "CHAT_PROVIDERS";
--- a/apps/api/src/bridge/bridge.module.spec.ts
+++ b/apps/api/src/bridge/bridge.module.spec.ts
@@ -1,10 +1,13 @@
 import { Test, TestingModule } from "@nestjs/testing";
 import { BridgeModule } from "./bridge.module";
 import { DiscordService } from "./discord/discord.service";
 import { MatrixService } from "./matrix/matrix.service";
 import { StitcherService } from "../stitcher/stitcher.service";
 import { PrismaService } from "../prisma/prisma.service";
 import { BullMqService } from "../bullmq/bullmq.service";
-import { describe, it, expect, beforeEach, vi } from "vitest";
+import { CHAT_PROVIDERS } from "./bridge.constants";
 import type { IChatProvider } from "./interfaces";
 import { describe, it, expect, beforeEach, afterEach, vi } from "vitest";
 // Mock discord.js
 const mockReadyCallbacks: Array<() => void> = [];
@@ -53,20 +56,93 @@ vi.mock("discord.js", () => {
  };
 });
-describe("BridgeModule", () => {
+// Mock matrix-bot-sdk
-  let module: TestingModule;
+vi.mock("matrix-bot-sdk", () => {
  return {
    MatrixClient: class MockMatrixClient {
      start = vi.fn().mockResolvedValue(undefined);
      stop = vi.fn();
      on = vi.fn();
      sendMessage = vi.fn().mockResolvedValue("$mock-event-id");
    },
    SimpleFsStorageProvider: class MockStorage {
      constructor(_path: string) {
        // no-op
      }
    },
    AutojoinRoomsMixin: {
      setupOnClient: vi.fn(),
    },
  };
 });
-  beforeEach(async () => {
+/**
-    // Set environment variables
+ * Saved environment variables to restore after each test
-    process.env.DISCORD_BOT_TOKEN = "test-token";
+ */
-    process.env.DISCORD_GUILD_ID = "test-guild-id";
+interface SavedEnvVars {
-    process.env.DISCORD_CONTROL_CHANNEL_ID = "test-channel-id";
+  DISCORD_BOT_TOKEN?: string;
  DISCORD_GUILD_ID?: string;
  DISCORD_CONTROL_CHANNEL_ID?: string;
  MATRIX_ACCESS_TOKEN?: string;
  MATRIX_HOMESERVER_URL?: string;
  MATRIX_BOT_USER_ID?: string;
  MATRIX_CONTROL_ROOM_ID?: string;
  MATRIX_WORKSPACE_ID?: string;
  ENCRYPTION_KEY?: string;
 }
 describe("BridgeModule", () => {
  let savedEnv: SavedEnvVars;
  beforeEach(() => {
    // Save current env vars
    savedEnv = {
      DISCORD_BOT_TOKEN: process.env.DISCORD_BOT_TOKEN,
      DISCORD_GUILD_ID: process.env.DISCORD_GUILD_ID,
      DISCORD_CONTROL_CHANNEL_ID: process.env.DISCORD_CONTROL_CHANNEL_ID,
      MATRIX_ACCESS_TOKEN: process.env.MATRIX_ACCESS_TOKEN,
      MATRIX_HOMESERVER_URL: process.env.MATRIX_HOMESERVER_URL,
      MATRIX_BOT_USER_ID: process.env.MATRIX_BOT_USER_ID,
      MATRIX_CONTROL_ROOM_ID: process.env.MATRIX_CONTROL_ROOM_ID,
      MATRIX_WORKSPACE_ID: process.env.MATRIX_WORKSPACE_ID,
      ENCRYPTION_KEY: process.env.ENCRYPTION_KEY,
    };
    // Clear all bridge env vars
    delete process.env.DISCORD_BOT_TOKEN;
    delete process.env.DISCORD_GUILD_ID;
    delete process.env.DISCORD_CONTROL_CHANNEL_ID;
    delete process.env.MATRIX_ACCESS_TOKEN;
    delete process.env.MATRIX_HOMESERVER_URL;
    delete process.env.MATRIX_BOT_USER_ID;
    delete process.env.MATRIX_CONTROL_ROOM_ID;
    delete process.env.MATRIX_WORKSPACE_ID;
    // Set encryption key (needed by StitcherService)
    process.env.ENCRYPTION_KEY = "0123456789abcdef0123456789abcdef0123456789abcdef0123456789abcdef";
    // Clear ready callbacks
    mockReadyCallbacks.length = 0;
-    module = await Test.createTestingModule({
+    vi.clearAllMocks();
  });
  afterEach(() => {
    // Restore env vars
    for (const [key, value] of Object.entries(savedEnv)) {
      if (value === undefined) {
        delete process.env[key];
      } else {
        process.env[key] = value;
      }
    }
  });
  /**
   * Helper to compile a test module with BridgeModule
   */
  async function compileModule(): Promise<TestingModule> {
    return Test.createTestingModule({
      imports: [BridgeModule],
    })
      .overrideProvider(PrismaService)
@@ -74,12 +150,38 @@ describe("BridgeModule", () => {
      .overrideProvider(BullMqService)
      .useValue({})
      .compile();
  }
-    // Clear all mocks
+  /**
-    vi.clearAllMocks();
+   * Helper to set Discord env vars
   */
  function setDiscordEnv(): void {
    process.env.DISCORD_BOT_TOKEN = "test-discord-token";
    process.env.DISCORD_GUILD_ID = "test-guild-id";
    process.env.DISCORD_CONTROL_CHANNEL_ID = "test-channel-id";
  }
  /**
   * Helper to set Matrix env vars
   */
  function setMatrixEnv(): void {
    process.env.MATRIX_ACCESS_TOKEN = "test-matrix-token";
    process.env.MATRIX_HOMESERVER_URL = "https://matrix.example.com";
    process.env.MATRIX_BOT_USER_ID = "@bot:example.com";
    process.env.MATRIX_CONTROL_ROOM_ID = "!room:example.com";
    process.env.MATRIX_WORKSPACE_ID = "test-workspace-id";
  }
  describe("with both Discord and Matrix configured", () => {
    let module: TestingModule;
    beforeEach(async () => {
      setDiscordEnv();
      setMatrixEnv();
      module = await compileModule();
    });
-  it("should be defined", () => {
+    it("should compile the module", () => {
      expect(module).toBeDefined();
    });
@@ -89,9 +191,103 @@ describe("BridgeModule", () => {
      expect(discordService).toBeInstanceOf(DiscordService);
    });
-  it("should provide StitcherService", () => {
+    it("should provide MatrixService", () => {
      const matrixService = module.get<MatrixService>(MatrixService);
      expect(matrixService).toBeDefined();
      expect(matrixService).toBeInstanceOf(MatrixService);
    });
    it("should provide CHAT_PROVIDERS with both providers", () => {
      const chatProviders = module.get<IChatProvider[]>(CHAT_PROVIDERS);
      expect(chatProviders).toBeDefined();
      expect(chatProviders).toHaveLength(2);
      expect(chatProviders[0]).toBeInstanceOf(DiscordService);
      expect(chatProviders[1]).toBeInstanceOf(MatrixService);
    });
    it("should provide StitcherService via StitcherModule", () => {
      const stitcherService = module.get<StitcherService>(StitcherService);
      expect(stitcherService).toBeDefined();
      expect(stitcherService).toBeInstanceOf(StitcherService);
    });
  });
  describe("with only Discord configured", () => {
    let module: TestingModule;
    beforeEach(async () => {
      setDiscordEnv();
      module = await compileModule();
    });
    it("should compile the module", () => {
      expect(module).toBeDefined();
    });
    it("should provide DiscordService", () => {
      const discordService = module.get<DiscordService>(DiscordService);
      expect(discordService).toBeDefined();
      expect(discordService).toBeInstanceOf(DiscordService);
    });
    it("should provide CHAT_PROVIDERS with only Discord", () => {
      const chatProviders = module.get<IChatProvider[]>(CHAT_PROVIDERS);
      expect(chatProviders).toBeDefined();
      expect(chatProviders).toHaveLength(1);
      expect(chatProviders[0]).toBeInstanceOf(DiscordService);
    });
  });
  describe("with only Matrix configured", () => {
    let module: TestingModule;
    beforeEach(async () => {
      setMatrixEnv();
      module = await compileModule();
    });
    it("should compile the module", () => {
      expect(module).toBeDefined();
    });
    it("should provide MatrixService", () => {
      const matrixService = module.get<MatrixService>(MatrixService);
      expect(matrixService).toBeDefined();
      expect(matrixService).toBeInstanceOf(MatrixService);
    });
    it("should provide CHAT_PROVIDERS with only Matrix", () => {
      const chatProviders = module.get<IChatProvider[]>(CHAT_PROVIDERS);
      expect(chatProviders).toBeDefined();
      expect(chatProviders).toHaveLength(1);
      expect(chatProviders[0]).toBeInstanceOf(MatrixService);
    });
  });
  describe("with neither bridge configured", () => {
    let module: TestingModule;
    beforeEach(async () => {
      // No env vars set for either bridge
      module = await compileModule();
    });
    it("should compile the module without errors", () => {
      expect(module).toBeDefined();
    });
    it("should provide CHAT_PROVIDERS as an empty array", () => {
      const chatProviders = module.get<IChatProvider[]>(CHAT_PROVIDERS);
      expect(chatProviders).toBeDefined();
      expect(chatProviders).toHaveLength(0);
      expect(Array.isArray(chatProviders)).toBe(true);
    });
  });
  describe("CHAT_PROVIDERS token", () => {
    it("should be a string constant", () => {
      expect(CHAT_PROVIDERS).toBe("CHAT_PROVIDERS");
      expect(typeof CHAT_PROVIDERS).toBe("string");
    });
  });
 });
--- a/apps/api/src/bridge/bridge.module.ts
+++ b/apps/api/src/bridge/bridge.module.ts
@@ -1,16 +1,81 @@
-import { Module } from "@nestjs/common";
+import { Logger, Module } from "@nestjs/common";
 import { DiscordService } from "./discord/discord.service";
 import { MatrixService } from "./matrix/matrix.service";
 import { MatrixRoomService } from "./matrix/matrix-room.service";
 import { MatrixStreamingService } from "./matrix/matrix-streaming.service";
 import { CommandParserService } from "./parser/command-parser.service";
 import { StitcherModule } from "../stitcher/stitcher.module";
 import { CHAT_PROVIDERS } from "./bridge.constants";
 import type { IChatProvider } from "./interfaces";
 const logger = new Logger("BridgeModule");
 /**
 * Bridge Module - Chat platform integrations
 *
- * Provides integration with chat platforms (Discord, Slack, Matrix, etc.)
+ * Provides integration with chat platforms (Discord, Matrix, etc.)
 * for controlling Mosaic Stack via chat commands.
 *
 * Both services are always registered as providers, but the CHAT_PROVIDERS
 * injection token only includes bridges whose environment variables are set:
 * - Discord: included when DISCORD_BOT_TOKEN is set
 * - Matrix: included when MATRIX_ACCESS_TOKEN is set
 *
 * Both bridges can run simultaneously, and no error occurs if neither is configured.
 * Consumers should inject CHAT_PROVIDERS for bridge-agnostic access to all active providers.
 *
 * CommandParserService provides shared, platform-agnostic command parsing.
 * MatrixRoomService handles workspace-to-Matrix-room mapping.
 */
@Module({
  imports: [StitcherModule],
-  providers: [DiscordService],
+  providers: [
-  exports: [DiscordService],
+    CommandParserService,
    MatrixRoomService,
    MatrixStreamingService,
    DiscordService,
    MatrixService,
    {
      provide: CHAT_PROVIDERS,
      useFactory: (discord: DiscordService, matrix: MatrixService): IChatProvider[] => {
        const providers: IChatProvider[] = [];
        if (process.env.DISCORD_BOT_TOKEN) {
          providers.push(discord);
          logger.log("Discord bridge enabled (DISCORD_BOT_TOKEN detected)");
        }
        if (process.env.MATRIX_ACCESS_TOKEN) {
          const missingVars = [
            "MATRIX_HOMESERVER_URL",
            "MATRIX_BOT_USER_ID",
            "MATRIX_WORKSPACE_ID",
          ].filter((v) => !process.env[v]);
          if (missingVars.length > 0) {
            logger.warn(
              `Matrix bridge enabled but missing: ${missingVars.join(", ")}. connect() will fail.`
            );
          }
          providers.push(matrix);
          logger.log("Matrix bridge enabled (MATRIX_ACCESS_TOKEN detected)");
        }
        if (providers.length === 0) {
          logger.warn("No chat bridges configured. Set DISCORD_BOT_TOKEN or MATRIX_ACCESS_TOKEN.");
        }
        return providers;
      },
      inject: [DiscordService, MatrixService],
    },
  ],
  exports: [
    DiscordService,
    MatrixService,
    MatrixRoomService,
    MatrixStreamingService,
    CommandParserService,
    CHAT_PROVIDERS,
  ],
 })
 export class BridgeModule {}
--- a/apps/api/src/bridge/discord/discord.service.spec.ts
+++ b/apps/api/src/bridge/discord/discord.service.spec.ts
@@ -187,6 +187,7 @@ describe("DiscordService", () => {
      await service.connect();
      await service.sendThreadMessage({
        threadId: "thread-123",
        channelId: "test-channel-id",
        content: "Step completed",
      });
--- a/apps/api/src/bridge/discord/discord.service.ts
+++ b/apps/api/src/bridge/discord/discord.service.ts
@@ -305,6 +305,7 @@ export class DiscordService implements IChatProvider {
    // Send confirmation to thread
    await this.sendThreadMessage({
      threadId,
      channelId: message.channelId,
      content: `Job created: ${result.jobId}\nStatus: ${result.status}\nQueue: ${result.queueName}`,
    });
  }
--- a/apps/api/src/bridge/interfaces/chat-provider.interface.ts
+++ b/apps/api/src/bridge/interfaces/chat-provider.interface.ts
@@ -28,6 +28,7 @@ export interface ThreadCreateOptions {
 export interface ThreadMessageOptions {
  threadId: string;
  channelId: string;
  content: string;
 }
@@ -76,4 +77,17 @@ export interface IChatProvider {
   * Parse a command from a message
   */
  parseCommand(message: ChatMessage): ChatCommand | null;
  /**
   * Edit an existing message in a channel.
   *
   * Optional method for providers that support message editing
   * (e.g., Matrix via m.replace, Discord via message.edit).
   * Used for streaming AI responses with incremental updates.
   *
   * @param channelId - The channel/room ID
   * @param messageId - The original message/event ID to edit
   * @param content - The updated message content
   */
  editMessage?(channelId: string, messageId: string, content: string): Promise<void>;
 }
--- a/apps/api/src/bridge/matrix/index.ts
+++ b/apps/api/src/bridge/matrix/index.ts
@@ -0,0 +1,4 @@
 export { MatrixService } from "./matrix.service";
 export { MatrixRoomService } from "./matrix-room.service";
 export { MatrixStreamingService } from "./matrix-streaming.service";
 export type { StreamResponseOptions } from "./matrix-streaming.service";
--- a/apps/api/src/bridge/matrix/matrix-bridge.integration.spec.ts
+++ b/apps/api/src/bridge/matrix/matrix-bridge.integration.spec.ts
--- a/apps/api/src/bridge/matrix/matrix-room.service.spec.ts
+++ b/apps/api/src/bridge/matrix/matrix-room.service.spec.ts
@@ -0,0 +1,212 @@
 import { Test, TestingModule } from "@nestjs/testing";
 import { MatrixRoomService } from "./matrix-room.service";
 import { MatrixService } from "./matrix.service";
 import { PrismaService } from "../../prisma/prisma.service";
 import { vi, describe, it, expect, beforeEach } from "vitest";
 // Mock matrix-bot-sdk to avoid native module import errors
 vi.mock("matrix-bot-sdk", () => {
  return {
    MatrixClient: class MockMatrixClient {},
    SimpleFsStorageProvider: class MockStorageProvider {
      constructor(_filename: string) {
        // No-op for testing
      }
    },
    AutojoinRoomsMixin: {
      setupOnClient: vi.fn(),
    },
  };
 });
 describe("MatrixRoomService", () => {
  let service: MatrixRoomService;
  const mockCreateRoom = vi.fn().mockResolvedValue("!new-room:example.com");
  const mockMatrixClient = {
    createRoom: mockCreateRoom,
  };
  const mockMatrixService = {
    isConnected: vi.fn().mockReturnValue(true),
    getClient: vi.fn().mockReturnValue(mockMatrixClient),
  };
  const mockPrismaService = {
    workspace: {
      findUnique: vi.fn(),
      findFirst: vi.fn(),
      update: vi.fn(),
    },
  };
  beforeEach(async () => {
    process.env.MATRIX_SERVER_NAME = "example.com";
    const module: TestingModule = await Test.createTestingModule({
      providers: [
        MatrixRoomService,
        {
          provide: PrismaService,
          useValue: mockPrismaService,
        },
        {
          provide: MatrixService,
          useValue: mockMatrixService,
        },
      ],
    }).compile();
    service = module.get<MatrixRoomService>(MatrixRoomService);
    vi.clearAllMocks();
    // Restore defaults after clearing
    mockMatrixService.isConnected.mockReturnValue(true);
    mockCreateRoom.mockResolvedValue("!new-room:example.com");
    mockPrismaService.workspace.update.mockResolvedValue({});
  });
  describe("provisionRoom", () => {
    it("should create a Matrix room and store the mapping", async () => {
      const roomId = await service.provisionRoom(
        "workspace-uuid-1",
        "My Workspace",
        "my-workspace"
      );
      expect(roomId).toBe("!new-room:example.com");
      expect(mockCreateRoom).toHaveBeenCalledWith({
        name: "Mosaic: My Workspace",
        room_alias_name: "mosaic-my-workspace",
        topic: "Mosaic workspace: My Workspace",
        preset: "private_chat",
        visibility: "private",
      });
      expect(mockPrismaService.workspace.update).toHaveBeenCalledWith({
        where: { id: "workspace-uuid-1" },
        data: { matrixRoomId: "!new-room:example.com" },
      });
    });
    it("should return null when Matrix is not configured (no MatrixService)", async () => {
      // Create a service without MatrixService
      const module: TestingModule = await Test.createTestingModule({
        providers: [
          MatrixRoomService,
          {
            provide: PrismaService,
            useValue: mockPrismaService,
          },
        ],
      }).compile();
      const serviceWithoutMatrix = module.get<MatrixRoomService>(MatrixRoomService);
      const roomId = await serviceWithoutMatrix.provisionRoom(
        "workspace-uuid-1",
        "My Workspace",
        "my-workspace"
      );
      expect(roomId).toBeNull();
      expect(mockCreateRoom).not.toHaveBeenCalled();
      expect(mockPrismaService.workspace.update).not.toHaveBeenCalled();
    });
    it("should return null when Matrix is not connected", async () => {
      mockMatrixService.isConnected.mockReturnValue(false);
      const roomId = await service.provisionRoom(
        "workspace-uuid-1",
        "My Workspace",
        "my-workspace"
      );
      expect(roomId).toBeNull();
      expect(mockCreateRoom).not.toHaveBeenCalled();
    });
  });
  describe("getRoomForWorkspace", () => {
    it("should return the room ID for a mapped workspace", async () => {
      mockPrismaService.workspace.findUnique.mockResolvedValue({
        matrixRoomId: "!mapped-room:example.com",
      });
      const roomId = await service.getRoomForWorkspace("workspace-uuid-1");
      expect(roomId).toBe("!mapped-room:example.com");
      expect(mockPrismaService.workspace.findUnique).toHaveBeenCalledWith({
        where: { id: "workspace-uuid-1" },
        select: { matrixRoomId: true },
      });
    });
    it("should return null for an unmapped workspace", async () => {
      mockPrismaService.workspace.findUnique.mockResolvedValue({
        matrixRoomId: null,
      });
      const roomId = await service.getRoomForWorkspace("workspace-uuid-2");
      expect(roomId).toBeNull();
    });
    it("should return null for a non-existent workspace", async () => {
      mockPrismaService.workspace.findUnique.mockResolvedValue(null);
      const roomId = await service.getRoomForWorkspace("non-existent-uuid");
      expect(roomId).toBeNull();
    });
  });
  describe("getWorkspaceForRoom", () => {
    it("should return the workspace ID for a mapped room", async () => {
      mockPrismaService.workspace.findFirst.mockResolvedValue({
        id: "workspace-uuid-1",
      });
      const workspaceId = await service.getWorkspaceForRoom("!mapped-room:example.com");
      expect(workspaceId).toBe("workspace-uuid-1");
      expect(mockPrismaService.workspace.findFirst).toHaveBeenCalledWith({
        where: { matrixRoomId: "!mapped-room:example.com" },
        select: { id: true },
      });
    });
    it("should return null for an unmapped room", async () => {
      mockPrismaService.workspace.findFirst.mockResolvedValue(null);
      const workspaceId = await service.getWorkspaceForRoom("!unknown-room:example.com");
      expect(workspaceId).toBeNull();
    });
  });
  describe("linkWorkspaceToRoom", () => {
    it("should store the room mapping in the workspace", async () => {
      await service.linkWorkspaceToRoom("workspace-uuid-1", "!existing-room:example.com");
      expect(mockPrismaService.workspace.update).toHaveBeenCalledWith({
        where: { id: "workspace-uuid-1" },
        data: { matrixRoomId: "!existing-room:example.com" },
      });
    });
  });
  describe("unlinkWorkspace", () => {
    it("should remove the room mapping from the workspace", async () => {
      await service.unlinkWorkspace("workspace-uuid-1");
      expect(mockPrismaService.workspace.update).toHaveBeenCalledWith({
        where: { id: "workspace-uuid-1" },
        data: { matrixRoomId: null },
      });
    });
  });
 });
--- a/apps/api/src/bridge/matrix/matrix-room.service.ts
+++ b/apps/api/src/bridge/matrix/matrix-room.service.ts
@@ -0,0 +1,151 @@
 import { Injectable, Logger, Optional, Inject } from "@nestjs/common";
 import { PrismaService } from "../../prisma/prisma.service";
 import { MatrixService } from "./matrix.service";
 import type { MatrixClient, RoomCreateOptions } from "matrix-bot-sdk";
 /**
 * MatrixRoomService - Workspace-to-Matrix-Room mapping and provisioning
 *
 * Responsibilities:
 * - Provision Matrix rooms for Mosaic workspaces
 * - Map workspaces to Matrix room IDs
 * - Link/unlink existing rooms to workspaces
 *
 * Room provisioning creates a private Matrix room with:
 * - Name: "Mosaic: {workspace_name}"
 * - Alias: #mosaic-{workspace_slug}:{server_name}
 * - Room ID stored in workspace.matrixRoomId
 */
@Injectable()
 export class MatrixRoomService {
  private readonly logger = new Logger(MatrixRoomService.name);
  constructor(
    private readonly prisma: PrismaService,
    @Optional() @Inject(MatrixService) private readonly matrixService: MatrixService | null
  ) {}
  /**
   * Provision a Matrix room for a workspace and store the mapping.
   *
   * @param workspaceId - The workspace UUID
   * @param workspaceName - Human-readable workspace name
   * @param workspaceSlug - URL-safe workspace identifier for the room alias
   * @returns The Matrix room ID, or null if Matrix is not configured
   */
  async provisionRoom(
    workspaceId: string,
    workspaceName: string,
    workspaceSlug: string
  ): Promise<string | null> {
    if (!this.matrixService?.isConnected()) {
      this.logger.warn("Matrix is not configured or not connected; skipping room provisioning");
      return null;
    }
    const client = this.getMatrixClient();
    if (!client) {
      this.logger.warn("Matrix client is not available; skipping room provisioning");
      return null;
    }
    const roomOptions: RoomCreateOptions = {
      name: `Mosaic: ${workspaceName}`,
      room_alias_name: `mosaic-${workspaceSlug}`,
      topic: `Mosaic workspace: ${workspaceName}`,
      preset: "private_chat",
      visibility: "private",
    };
    this.logger.log(
      `Provisioning Matrix room for workspace "${workspaceName}" (${workspaceId})...`
    );
    const roomId = await client.createRoom(roomOptions);
    // Store the room mapping
    try {
      await this.prisma.workspace.update({
        where: { id: workspaceId },
        data: { matrixRoomId: roomId },
      });
    } catch (dbError: unknown) {
      this.logger.error(
        `Failed to store room mapping for workspace ${workspaceId}, room ${roomId} may be orphaned: ${dbError instanceof Error ? dbError.message : "unknown"}`
      );
      throw dbError;
    }
    this.logger.log(`Matrix room ${roomId} provisioned and linked to workspace ${workspaceId}`);
    return roomId;
  }
  /**
   * Look up the Matrix room ID mapped to a workspace.
   *
   * @param workspaceId - The workspace UUID
   * @returns The Matrix room ID, or null if no room is mapped
   */
  async getRoomForWorkspace(workspaceId: string): Promise<string | null> {
    const workspace = await this.prisma.workspace.findUnique({
      where: { id: workspaceId },
      select: { matrixRoomId: true },
    });
    return workspace?.matrixRoomId ?? null;
  }
  /**
   * Reverse lookup: find the workspace that owns a given Matrix room.
   *
   * @param roomId - The Matrix room ID (e.g. "!abc:example.com")
   * @returns The workspace ID, or null if the room is not mapped to any workspace
   */
  async getWorkspaceForRoom(roomId: string): Promise<string | null> {
    const workspace = await this.prisma.workspace.findFirst({
      where: { matrixRoomId: roomId },
      select: { id: true },
    });
    return workspace?.id ?? null;
  }
  /**
   * Manually link an existing Matrix room to a workspace.
   *
   * @param workspaceId - The workspace UUID
   * @param roomId - The Matrix room ID to link
   */
  async linkWorkspaceToRoom(workspaceId: string, roomId: string): Promise<void> {
    await this.prisma.workspace.update({
      where: { id: workspaceId },
      data: { matrixRoomId: roomId },
    });
    this.logger.log(`Linked workspace ${workspaceId} to Matrix room ${roomId}`);
  }
  /**
   * Remove the Matrix room mapping from a workspace.
   *
   * @param workspaceId - The workspace UUID
   */
  async unlinkWorkspace(workspaceId: string): Promise<void> {
    await this.prisma.workspace.update({
      where: { id: workspaceId },
      data: { matrixRoomId: null },
    });
    this.logger.log(`Unlinked Matrix room from workspace ${workspaceId}`);
  }
  /**
   * Access the underlying MatrixClient from the MatrixService
   * via the public getClient() accessor.
   */
  private getMatrixClient(): MatrixClient | null {
    if (!this.matrixService) return null;
    return this.matrixService.getClient();
  }
 }
--- a/apps/api/src/bridge/matrix/matrix-streaming.service.spec.ts
+++ b/apps/api/src/bridge/matrix/matrix-streaming.service.spec.ts
@@ -0,0 +1,408 @@
 import { Test, TestingModule } from "@nestjs/testing";
 import { MatrixStreamingService } from "./matrix-streaming.service";
 import { MatrixService } from "./matrix.service";
 import { vi, describe, it, expect, beforeEach, afterEach } from "vitest";
 import type { StreamResponseOptions } from "./matrix-streaming.service";
 // Mock matrix-bot-sdk to prevent native module loading
 vi.mock("matrix-bot-sdk", () => {
  return {
    MatrixClient: class MockMatrixClient {},
    SimpleFsStorageProvider: class MockStorageProvider {
      constructor(_filename: string) {
        // No-op for testing
      }
    },
    AutojoinRoomsMixin: {
      setupOnClient: vi.fn(),
    },
  };
 });
 // Mock MatrixClient
 const mockClient = {
  sendMessage: vi.fn().mockResolvedValue("$initial-event-id"),
  sendEvent: vi.fn().mockResolvedValue("$edit-event-id"),
  setTyping: vi.fn().mockResolvedValue(undefined),
 };
 // Mock MatrixService
 const mockMatrixService = {
  isConnected: vi.fn().mockReturnValue(true),
  getClient: vi.fn().mockReturnValue(mockClient),
 };
 /**
 * Helper: create an async iterable from an array of strings with optional delays
 */
 async function* createTokenStream(
  tokens: string[],
  delayMs = 0
 ): AsyncGenerator<string, void, undefined> {
  for (const token of tokens) {
    if (delayMs > 0) {
      await new Promise((resolve) => setTimeout(resolve, delayMs));
    }
    yield token;
  }
 }
 /**
 * Helper: create a token stream that throws an error mid-stream
 */
 async function* createErrorStream(
  tokens: string[],
  errorAfter: number
 ): AsyncGenerator<string, void, undefined> {
  let count = 0;
  for (const token of tokens) {
    if (count >= errorAfter) {
      throw new Error("LLM provider connection lost");
    }
    yield token;
    count++;
  }
 }
 describe("MatrixStreamingService", () => {
  let service: MatrixStreamingService;
  beforeEach(async () => {
    vi.useFakeTimers({ shouldAdvanceTime: true });
    const module: TestingModule = await Test.createTestingModule({
      providers: [
        MatrixStreamingService,
        {
          provide: MatrixService,
          useValue: mockMatrixService,
        },
      ],
    }).compile();
    service = module.get<MatrixStreamingService>(MatrixStreamingService);
    // Clear all mocks
    vi.clearAllMocks();
    // Re-apply default mock returns after clearing
    mockMatrixService.isConnected.mockReturnValue(true);
    mockMatrixService.getClient.mockReturnValue(mockClient);
    mockClient.sendMessage.mockResolvedValue("$initial-event-id");
    mockClient.sendEvent.mockResolvedValue("$edit-event-id");
    mockClient.setTyping.mockResolvedValue(undefined);
  });
  afterEach(() => {
    vi.useRealTimers();
  });
  describe("editMessage", () => {
    it("should send a m.replace event to edit an existing message", async () => {
      await service.editMessage("!room:example.com", "$original-event-id", "Updated content");
      expect(mockClient.sendEvent).toHaveBeenCalledWith("!room:example.com", "m.room.message", {
        "m.new_content": {
          msgtype: "m.text",
          body: "Updated content",
        },
        "m.relates_to": {
          rel_type: "m.replace",
          event_id: "$original-event-id",
        },
        // Fallback for clients that don't support edits
        msgtype: "m.text",
        body: "* Updated content",
      });
    });
    it("should throw error when client is not connected", async () => {
      mockMatrixService.isConnected.mockReturnValue(false);
      await expect(
        service.editMessage("!room:example.com", "$event-id", "content")
      ).rejects.toThrow("Matrix client is not connected");
    });
    it("should throw error when client is null", async () => {
      mockMatrixService.getClient.mockReturnValue(null);
      await expect(
        service.editMessage("!room:example.com", "$event-id", "content")
      ).rejects.toThrow("Matrix client is not connected");
    });
  });
  describe("setTypingIndicator", () => {
    it("should call client.setTyping with true and timeout", async () => {
      await service.setTypingIndicator("!room:example.com", true);
      expect(mockClient.setTyping).toHaveBeenCalledWith("!room:example.com", true, 30000);
    });
    it("should call client.setTyping with false to clear indicator", async () => {
      await service.setTypingIndicator("!room:example.com", false);
      expect(mockClient.setTyping).toHaveBeenCalledWith("!room:example.com", false, undefined);
    });
    it("should throw error when client is not connected", async () => {
      mockMatrixService.isConnected.mockReturnValue(false);
      await expect(service.setTypingIndicator("!room:example.com", true)).rejects.toThrow(
        "Matrix client is not connected"
      );
    });
  });
  describe("sendStreamingMessage", () => {
    it("should send an initial message and return the event ID", async () => {
      const eventId = await service.sendStreamingMessage("!room:example.com", "Thinking...");
      expect(eventId).toBe("$initial-event-id");
      expect(mockClient.sendMessage).toHaveBeenCalledWith("!room:example.com", {
        msgtype: "m.text",
        body: "Thinking...",
      });
    });
    it("should send a thread message when threadId is provided", async () => {
      const eventId = await service.sendStreamingMessage(
        "!room:example.com",
        "Thinking...",
        "$thread-root-id"
      );
      expect(eventId).toBe("$initial-event-id");
      expect(mockClient.sendMessage).toHaveBeenCalledWith("!room:example.com", {
        msgtype: "m.text",
        body: "Thinking...",
        "m.relates_to": {
          rel_type: "m.thread",
          event_id: "$thread-root-id",
          is_falling_back: true,
          "m.in_reply_to": {
            event_id: "$thread-root-id",
          },
        },
      });
    });
    it("should throw error when client is not connected", async () => {
      mockMatrixService.isConnected.mockReturnValue(false);
      await expect(service.sendStreamingMessage("!room:example.com", "Test")).rejects.toThrow(
        "Matrix client is not connected"
      );
    });
  });
  describe("streamResponse", () => {
    it("should send initial 'Thinking...' message and start typing indicator", async () => {
      vi.useRealTimers();
      const tokens = ["Hello", " world"];
      const stream = createTokenStream(tokens);
      await service.streamResponse("!room:example.com", stream);
      // Should have sent initial message
      expect(mockClient.sendMessage).toHaveBeenCalledWith(
        "!room:example.com",
        expect.objectContaining({
          msgtype: "m.text",
          body: "Thinking...",
        })
      );
      // Should have started typing indicator
      expect(mockClient.setTyping).toHaveBeenCalledWith("!room:example.com", true, 30000);
    });
    it("should use custom initial message when provided", async () => {
      vi.useRealTimers();
      const tokens = ["Hi"];
      const stream = createTokenStream(tokens);
      const options: StreamResponseOptions = { initialMessage: "Processing..." };
      await service.streamResponse("!room:example.com", stream, options);
      expect(mockClient.sendMessage).toHaveBeenCalledWith(
        "!room:example.com",
        expect.objectContaining({
          body: "Processing...",
        })
      );
    });
    it("should edit message with accumulated tokens on completion", async () => {
      vi.useRealTimers();
      const tokens = ["Hello", " ", "world", "!"];
      const stream = createTokenStream(tokens);
      await service.streamResponse("!room:example.com", stream);
      // The final edit should contain the full accumulated text
      const sendEventCalls = mockClient.sendEvent.mock.calls;
      const lastEditCall = sendEventCalls[sendEventCalls.length - 1];
      expect(lastEditCall).toBeDefined();
      // eslint-disable-next-line @typescript-eslint/no-unsafe-member-access
      expect(lastEditCall[2]["m.new_content"].body).toBe("Hello world!");
    });
    it("should clear typing indicator on completion", async () => {
      vi.useRealTimers();
      const tokens = ["Done"];
      const stream = createTokenStream(tokens);
      await service.streamResponse("!room:example.com", stream);
      // Last setTyping call should be false
      const typingCalls = mockClient.setTyping.mock.calls;
      const lastTypingCall = typingCalls[typingCalls.length - 1];
      expect(lastTypingCall).toEqual(["!room:example.com", false, undefined]);
    });
    it("should rate-limit edits to at most one every 500ms", async () => {
      vi.useRealTimers();
      // Send tokens with small delays - all within one 500ms window
      const tokens = ["a", "b", "c", "d", "e"];
      const stream = createTokenStream(tokens, 50); // 50ms between tokens = 250ms total
      await service.streamResponse("!room:example.com", stream);
      // With 250ms total streaming time (5 tokens * 50ms), all tokens arrive
      // within one 500ms window. We expect at most 1 intermediate edit + 1 final edit,
      // or just the final edit. The key point is that there should NOT be 5 separate edits.
      const editCalls = mockClient.sendEvent.mock.calls.filter(
        (call) => call[1] === "m.room.message"
      );
      // Should have fewer edits than tokens (rate limiting in effect)
      expect(editCalls.length).toBeLessThanOrEqual(2);
      // Should have at least the final edit
      expect(editCalls.length).toBeGreaterThanOrEqual(1);
    });
    it("should handle errors gracefully and edit message with error notice", async () => {
      vi.useRealTimers();
      const stream = createErrorStream(["Hello", " ", "world"], 2);
      await service.streamResponse("!room:example.com", stream);
      // Should edit message with error content
      const sendEventCalls = mockClient.sendEvent.mock.calls;
      const lastEditCall = sendEventCalls[sendEventCalls.length - 1];
      expect(lastEditCall).toBeDefined();
      // eslint-disable-next-line @typescript-eslint/no-unsafe-member-access
      const finalBody = lastEditCall[2]["m.new_content"].body as string;
      expect(finalBody).toContain("error");
      // Should clear typing on error
      const typingCalls = mockClient.setTyping.mock.calls;
      const lastTypingCall = typingCalls[typingCalls.length - 1];
      expect(lastTypingCall).toEqual(["!room:example.com", false, undefined]);
    });
    it("should include token usage in final message when provided", async () => {
      vi.useRealTimers();
      const tokens = ["Hello"];
      const stream = createTokenStream(tokens);
      const options: StreamResponseOptions = {
        showTokenUsage: true,
        tokenUsage: { prompt: 10, completion: 5, total: 15 },
      };
      await service.streamResponse("!room:example.com", stream, options);
      const sendEventCalls = mockClient.sendEvent.mock.calls;
      const lastEditCall = sendEventCalls[sendEventCalls.length - 1];
      expect(lastEditCall).toBeDefined();
      // eslint-disable-next-line @typescript-eslint/no-unsafe-member-access
      const finalBody = lastEditCall[2]["m.new_content"].body as string;
      expect(finalBody).toContain("15");
    });
    it("should throw error when client is not connected", async () => {
      mockMatrixService.isConnected.mockReturnValue(false);
      const stream = createTokenStream(["test"]);
      await expect(service.streamResponse("!room:example.com", stream)).rejects.toThrow(
        "Matrix client is not connected"
      );
    });
    it("should handle empty token stream", async () => {
      vi.useRealTimers();
      const stream = createTokenStream([]);
      await service.streamResponse("!room:example.com", stream);
      // Should still send initial message
      expect(mockClient.sendMessage).toHaveBeenCalled();
      // Should edit with empty/no-content message
      const sendEventCalls = mockClient.sendEvent.mock.calls;
      expect(sendEventCalls.length).toBeGreaterThanOrEqual(1);
      // Should clear typing
      const typingCalls = mockClient.setTyping.mock.calls;
      const lastTypingCall = typingCalls[typingCalls.length - 1];
      expect(lastTypingCall).toEqual(["!room:example.com", false, undefined]);
    });
    it("should support thread context in streamResponse", async () => {
      vi.useRealTimers();
      const tokens = ["Reply"];
      const stream = createTokenStream(tokens);
      const options: StreamResponseOptions = { threadId: "$thread-root" };
      await service.streamResponse("!room:example.com", stream, options);
      // Initial message should include thread relation
      expect(mockClient.sendMessage).toHaveBeenCalledWith(
        "!room:example.com",
        expect.objectContaining({
          "m.relates_to": expect.objectContaining({
            rel_type: "m.thread",
            event_id: "$thread-root",
          }),
        })
      );
    });
    it("should perform multiple edits for long-running streams", async () => {
      vi.useRealTimers();
      // Create tokens with 200ms delays - total ~2000ms, should get multiple edit windows
      const tokens = Array.from({ length: 10 }, (_, i) => `token${String(i)} `);
      const stream = createTokenStream(tokens, 200);
      await service.streamResponse("!room:example.com", stream);
      // With 10 tokens at 200ms each = 2000ms total, at 500ms intervals
      // we expect roughly 3-4 intermediate edits + 1 final = 4-5 total
      const editCalls = mockClient.sendEvent.mock.calls.filter(
        (call) => call[1] === "m.room.message"
      );
      // Should have multiple edits (at least 2) but far fewer than 10
      expect(editCalls.length).toBeGreaterThanOrEqual(2);
      expect(editCalls.length).toBeLessThanOrEqual(8);
    });
  });
 });
--- a/apps/api/src/bridge/matrix/matrix-streaming.service.ts
+++ b/apps/api/src/bridge/matrix/matrix-streaming.service.ts
@@ -0,0 +1,248 @@
 import { Injectable, Logger } from "@nestjs/common";
 import type { MatrixClient } from "matrix-bot-sdk";
 import { MatrixService } from "./matrix.service";
 /**
 * Options for the streamResponse method
 */
 export interface StreamResponseOptions {
  /** Custom initial message (defaults to "Thinking...") */
  initialMessage?: string;
  /** Thread root event ID for threaded responses */
  threadId?: string;
  /** Whether to show token usage in the final message */
  showTokenUsage?: boolean;
  /** Token usage stats to display in the final message */
  tokenUsage?: { prompt: number; completion: number; total: number };
 }
 /**
 * Matrix message content for m.room.message events
 */
 interface MatrixMessageContent {
  msgtype: string;
  body: string;
  "m.new_content"?: {
    msgtype: string;
    body: string;
  };
  "m.relates_to"?: {
    rel_type: string;
    event_id: string;
    is_falling_back?: boolean;
    "m.in_reply_to"?: {
      event_id: string;
    };
  };
 }
 /** Minimum interval between message edits (milliseconds) */
 const EDIT_INTERVAL_MS = 500;
 /** Typing indicator timeout (milliseconds) */
 const TYPING_TIMEOUT_MS = 30000;
 /**
 * Matrix Streaming Service
 *
 * Provides streaming AI response capabilities for Matrix rooms using
 * incremental message edits. Tokens from an LLM are buffered and the
 * response message is edited at rate-limited intervals, providing a
 * smooth streaming experience without excessive API calls.
 *
 * Key features:
 * - Rate-limited edits (max every 500ms)
 * - Typing indicator management during generation
 * - Graceful error handling with user-visible error notices
 * - Thread support for contextual responses
 * - LLM-agnostic design via AsyncIterable<string> token stream
 */
@Injectable()
 export class MatrixStreamingService {
  private readonly logger = new Logger(MatrixStreamingService.name);
  constructor(private readonly matrixService: MatrixService) {}
  /**
   * Edit an existing Matrix message using the m.replace relation.
   *
   * Sends a new event that replaces the content of an existing message.
   * Includes fallback content for clients that don't support edits.
   *
   * @param roomId - The Matrix room ID
   * @param eventId - The original event ID to replace
   * @param newContent - The updated message text
   */
  async editMessage(roomId: string, eventId: string, newContent: string): Promise<void> {
    const client = this.getClientOrThrow();
    const editContent: MatrixMessageContent = {
      "m.new_content": {
        msgtype: "m.text",
        body: newContent,
      },
      "m.relates_to": {
        rel_type: "m.replace",
        event_id: eventId,
      },
      // Fallback for clients that don't support edits
      msgtype: "m.text",
      body: `* ${newContent}`,
    };
    await client.sendEvent(roomId, "m.room.message", editContent);
  }
  /**
   * Set the typing indicator for the bot in a room.
   *
   * @param roomId - The Matrix room ID
   * @param typing - Whether the bot is typing
   */
  async setTypingIndicator(roomId: string, typing: boolean): Promise<void> {
    const client = this.getClientOrThrow();
    await client.setTyping(roomId, typing, typing ? TYPING_TIMEOUT_MS : undefined);
  }
  /**
   * Send an initial message for streaming, optionally in a thread.
   *
   * Returns the event ID of the sent message, which can be used for
   * subsequent edits via editMessage.
   *
   * @param roomId - The Matrix room ID
   * @param content - The initial message content
   * @param threadId - Optional thread root event ID
   * @returns The event ID of the sent message
   */
  async sendStreamingMessage(roomId: string, content: string, threadId?: string): Promise<string> {
    const client = this.getClientOrThrow();
    const messageContent: MatrixMessageContent = {
      msgtype: "m.text",
      body: content,
    };
    if (threadId) {
      messageContent["m.relates_to"] = {
        rel_type: "m.thread",
        event_id: threadId,
        is_falling_back: true,
        "m.in_reply_to": {
          event_id: threadId,
        },
      };
    }
    const eventId: string = await client.sendMessage(roomId, messageContent);
    return eventId;
  }
  /**
   * Stream an AI response to a Matrix room using incremental message edits.
   *
   * This is the main streaming method. It:
   * 1. Sends an initial "Thinking..." message
   * 2. Starts the typing indicator
   * 3. Buffers incoming tokens from the async iterable
   * 4. Edits the message every 500ms with accumulated text
   * 5. On completion: sends a final clean edit, clears typing
   * 6. On error: edits message with error notice, clears typing
   *
   * @param roomId - The Matrix room ID
   * @param tokenStream - AsyncIterable that yields string tokens
   * @param options - Optional configuration for the stream
   */
  async streamResponse(
    roomId: string,
    tokenStream: AsyncIterable<string>,
    options?: StreamResponseOptions
  ): Promise<void> {
    // Validate connection before starting
    this.getClientOrThrow();
    const initialMessage = options?.initialMessage ?? "Thinking...";
    const threadId = options?.threadId;
    // Step 1: Send initial message
    const eventId = await this.sendStreamingMessage(roomId, initialMessage, threadId);
    // Step 2: Start typing indicator
    await this.setTypingIndicator(roomId, true);
    // Step 3: Buffer and stream tokens
    let accumulatedText = "";
    let lastEditTime = 0;
    let hasError = false;
    try {
      for await (const token of tokenStream) {
        accumulatedText += token;
        const now = Date.now();
        const elapsed = now - lastEditTime;
        if (elapsed >= EDIT_INTERVAL_MS && accumulatedText.length > 0) {
          await this.editMessage(roomId, eventId, accumulatedText);
          lastEditTime = now;
        }
      }
    } catch (error: unknown) {
      hasError = true;
      const errorMessage = error instanceof Error ? error.message : "Unknown error occurred";
      this.logger.error(`Stream error in room ${roomId}: ${errorMessage}`);
      // Edit message to show error
      try {
        const errorContent = accumulatedText
          ? `${accumulatedText}\n\n[Streaming error: ${errorMessage}]`
          : `[Streaming error: ${errorMessage}]`;
        await this.editMessage(roomId, eventId, errorContent);
      } catch (editError: unknown) {
        this.logger.warn(
          `Failed to edit error message in ${roomId}: ${editError instanceof Error ? editError.message : "unknown"}`
        );
      }
    } finally {
      // Step 4: Clear typing indicator
      try {
        await this.setTypingIndicator(roomId, false);
      } catch (typingError: unknown) {
        this.logger.warn(
          `Failed to clear typing indicator in ${roomId}: ${typingError instanceof Error ? typingError.message : "unknown"}`
        );
      }
    }
    // Step 5: Final edit with clean output (if no error)
    if (!hasError) {
      let finalContent = accumulatedText || "(No response generated)";
      if (options?.showTokenUsage && options.tokenUsage) {
        const { prompt, completion, total } = options.tokenUsage;
        finalContent += `\n\n---\nTokens: ${String(total)} (prompt: ${String(prompt)}, completion: ${String(completion)})`;
      }
      await this.editMessage(roomId, eventId, finalContent);
    }
  }
  /**
   * Get the Matrix client from the parent MatrixService, or throw if not connected.
   */
  private getClientOrThrow(): MatrixClient {
    if (!this.matrixService.isConnected()) {
      throw new Error("Matrix client is not connected");
    }
    const client = this.matrixService.getClient();
    if (!client) {
      throw new Error("Matrix client is not connected");
    }
    return client;
  }
 }
--- a/apps/api/src/bridge/matrix/matrix.service.spec.ts
+++ b/apps/api/src/bridge/matrix/matrix.service.spec.ts
@@ -0,0 +1,979 @@
 import { Test, TestingModule } from "@nestjs/testing";
 import { MatrixService } from "./matrix.service";
 import { MatrixRoomService } from "./matrix-room.service";
 import { StitcherService } from "../../stitcher/stitcher.service";
 import { CommandParserService } from "../parser/command-parser.service";
 import { vi, describe, it, expect, beforeEach } from "vitest";
 import type { ChatMessage } from "../interfaces";
 // Mock matrix-bot-sdk
 const mockMessageCallbacks: Array<(roomId: string, event: Record<string, unknown>) => void> = [];
 const mockEventCallbacks: Array<(roomId: string, event: Record<string, unknown>) => void> = [];
 const mockClient = {
  start: vi.fn().mockResolvedValue(undefined),
  stop: vi.fn(),
  on: vi
    .fn()
    .mockImplementation(
      (event: string, callback: (roomId: string, evt: Record<string, unknown>) => void) => {
        if (event === "room.message") {
          mockMessageCallbacks.push(callback);
        }
        if (event === "room.event") {
          mockEventCallbacks.push(callback);
        }
      }
    ),
  sendMessage: vi.fn().mockResolvedValue("$event-id-123"),
  sendEvent: vi.fn().mockResolvedValue("$event-id-456"),
 };
 vi.mock("matrix-bot-sdk", () => {
  return {
    MatrixClient: class MockMatrixClient {
      start = mockClient.start;
      stop = mockClient.stop;
      on = mockClient.on;
      sendMessage = mockClient.sendMessage;
      sendEvent = mockClient.sendEvent;
    },
    SimpleFsStorageProvider: class MockStorageProvider {
      constructor(_filename: string) {
        // No-op for testing
      }
    },
    AutojoinRoomsMixin: {
      setupOnClient: vi.fn(),
    },
  };
 });
 describe("MatrixService", () => {
  let service: MatrixService;
  let stitcherService: StitcherService;
  let commandParser: CommandParserService;
  let matrixRoomService: MatrixRoomService;
  const mockStitcherService = {
    dispatchJob: vi.fn().mockResolvedValue({
      jobId: "test-job-id",
      queueName: "main",
      status: "PENDING",
    }),
    trackJobEvent: vi.fn().mockResolvedValue(undefined),
  };
  const mockMatrixRoomService = {
    getWorkspaceForRoom: vi.fn().mockResolvedValue(null),
    getRoomForWorkspace: vi.fn().mockResolvedValue(null),
    provisionRoom: vi.fn().mockResolvedValue(null),
    linkWorkspaceToRoom: vi.fn().mockResolvedValue(undefined),
    unlinkWorkspace: vi.fn().mockResolvedValue(undefined),
  };
  beforeEach(async () => {
    // Set environment variables for testing
    process.env.MATRIX_HOMESERVER_URL = "https://matrix.example.com";
    process.env.MATRIX_ACCESS_TOKEN = "test-access-token";
    process.env.MATRIX_BOT_USER_ID = "@mosaic-bot:example.com";
    process.env.MATRIX_CONTROL_ROOM_ID = "!test-room:example.com";
    process.env.MATRIX_WORKSPACE_ID = "test-workspace-id";
    // Clear callbacks
    mockMessageCallbacks.length = 0;
    mockEventCallbacks.length = 0;
    const module: TestingModule = await Test.createTestingModule({
      providers: [
        MatrixService,
        CommandParserService,
        {
          provide: StitcherService,
          useValue: mockStitcherService,
        },
        {
          provide: MatrixRoomService,
          useValue: mockMatrixRoomService,
        },
      ],
    }).compile();
    service = module.get<MatrixService>(MatrixService);
    stitcherService = module.get<StitcherService>(StitcherService);
    commandParser = module.get<CommandParserService>(CommandParserService);
    matrixRoomService = module.get(MatrixRoomService) as MatrixRoomService;
    // Clear all mocks
    vi.clearAllMocks();
  });
  describe("Connection Management", () => {
    it("should connect to Matrix", async () => {
      await service.connect();
      expect(mockClient.start).toHaveBeenCalled();
    });
    it("should disconnect from Matrix", async () => {
      await service.connect();
      await service.disconnect();
      expect(mockClient.stop).toHaveBeenCalled();
    });
    it("should check connection status", async () => {
      expect(service.isConnected()).toBe(false);
      await service.connect();
      expect(service.isConnected()).toBe(true);
      await service.disconnect();
      expect(service.isConnected()).toBe(false);
    });
  });
  describe("Message Handling", () => {
    it("should send a message to a room", async () => {
      await service.connect();
      await service.sendMessage("!test-room:example.com", "Hello, Matrix!");
      expect(mockClient.sendMessage).toHaveBeenCalledWith("!test-room:example.com", {
        msgtype: "m.text",
        body: "Hello, Matrix!",
      });
    });
    it("should throw error if client is not connected", async () => {
      await expect(service.sendMessage("!room:example.com", "Test")).rejects.toThrow(
        "Matrix client is not connected"
      );
    });
  });
  describe("Thread Management", () => {
    it("should create a thread by sending an initial message", async () => {
      await service.connect();
      const threadId = await service.createThread({
        channelId: "!test-room:example.com",
        name: "Job #42",
        message: "Starting job...",
      });
      expect(threadId).toBe("$event-id-123");
      expect(mockClient.sendMessage).toHaveBeenCalledWith("!test-room:example.com", {
        msgtype: "m.text",
        body: "[Job #42] Starting job...",
      });
    });
    it("should send a message to a thread with m.thread relation", async () => {
      await service.connect();
      await service.sendThreadMessage({
        threadId: "$root-event-id",
        channelId: "!test-room:example.com",
        content: "Step completed",
      });
      expect(mockClient.sendMessage).toHaveBeenCalledWith("!test-room:example.com", {
        msgtype: "m.text",
        body: "Step completed",
        "m.relates_to": {
          rel_type: "m.thread",
          event_id: "$root-event-id",
          is_falling_back: true,
          "m.in_reply_to": {
            event_id: "$root-event-id",
          },
        },
      });
    });
    it("should fall back to controlRoomId when channelId is empty", async () => {
      await service.connect();
      await service.sendThreadMessage({
        threadId: "$root-event-id",
        channelId: "",
        content: "Fallback message",
      });
      expect(mockClient.sendMessage).toHaveBeenCalledWith("!test-room:example.com", {
        msgtype: "m.text",
        body: "Fallback message",
        "m.relates_to": {
          rel_type: "m.thread",
          event_id: "$root-event-id",
          is_falling_back: true,
          "m.in_reply_to": {
            event_id: "$root-event-id",
          },
        },
      });
    });
    it("should throw error when creating thread without connection", async () => {
      await expect(
        service.createThread({
          channelId: "!room:example.com",
          name: "Test",
          message: "Test",
        })
      ).rejects.toThrow("Matrix client is not connected");
    });
    it("should throw error when sending thread message without connection", async () => {
      await expect(
        service.sendThreadMessage({
          threadId: "$event-id",
          channelId: "!room:example.com",
          content: "Test",
        })
      ).rejects.toThrow("Matrix client is not connected");
    });
  });
  describe("Command Parsing with shared CommandParserService", () => {
    it("should parse @mosaic fix #42 via shared parser", () => {
      const message: ChatMessage = {
        id: "msg-1",
        channelId: "!room:example.com",
        authorId: "@user:example.com",
        authorName: "@user:example.com",
        content: "@mosaic fix #42",
        timestamp: new Date(),
      };
      const command = service.parseCommand(message);
      expect(command).not.toBeNull();
      expect(command?.command).toBe("fix");
      expect(command?.args).toContain("#42");
    });
    it("should parse !mosaic fix #42 by normalizing to @mosaic for the shared parser", () => {
      const message: ChatMessage = {
        id: "msg-1",
        channelId: "!room:example.com",
        authorId: "@user:example.com",
        authorName: "@user:example.com",
        content: "!mosaic fix #42",
        timestamp: new Date(),
      };
      const command = service.parseCommand(message);
      expect(command).not.toBeNull();
      expect(command?.command).toBe("fix");
      expect(command?.args).toContain("#42");
    });
    it("should parse @mosaic status command via shared parser", () => {
      const message: ChatMessage = {
        id: "msg-2",
        channelId: "!room:example.com",
        authorId: "@user:example.com",
        authorName: "@user:example.com",
        content: "@mosaic status job-123",
        timestamp: new Date(),
      };
      const command = service.parseCommand(message);
      expect(command).not.toBeNull();
      expect(command?.command).toBe("status");
      expect(command?.args).toContain("job-123");
    });
    it("should parse @mosaic cancel command via shared parser", () => {
      const message: ChatMessage = {
        id: "msg-3",
        channelId: "!room:example.com",
        authorId: "@user:example.com",
        authorName: "@user:example.com",
        content: "@mosaic cancel job-456",
        timestamp: new Date(),
      };
      const command = service.parseCommand(message);
      expect(command).not.toBeNull();
      expect(command?.command).toBe("cancel");
    });
    it("should parse @mosaic help command via shared parser", () => {
      const message: ChatMessage = {
        id: "msg-6",
        channelId: "!room:example.com",
        authorId: "@user:example.com",
        authorName: "@user:example.com",
        content: "@mosaic help",
        timestamp: new Date(),
      };
      const command = service.parseCommand(message);
      expect(command).not.toBeNull();
      expect(command?.command).toBe("help");
    });
    it("should return null for non-command messages", () => {
      const message: ChatMessage = {
        id: "msg-7",
        channelId: "!room:example.com",
        authorId: "@user:example.com",
        authorName: "@user:example.com",
        content: "Just a regular message",
        timestamp: new Date(),
      };
      const command = service.parseCommand(message);
      expect(command).toBeNull();
    });
    it("should return null for messages without @mosaic or !mosaic mention", () => {
      const message: ChatMessage = {
        id: "msg-8",
        channelId: "!room:example.com",
        authorId: "@user:example.com",
        authorName: "@user:example.com",
        content: "fix 42",
        timestamp: new Date(),
      };
      const command = service.parseCommand(message);
      expect(command).toBeNull();
    });
    it("should return null for @mosaic mention without a command", () => {
      const message: ChatMessage = {
        id: "msg-11",
        channelId: "!room:example.com",
        authorId: "@user:example.com",
        authorName: "@user:example.com",
        content: "@mosaic",
        timestamp: new Date(),
      };
      const command = service.parseCommand(message);
      expect(command).toBeNull();
    });
  });
  describe("Event-driven message reception", () => {
    it("should ignore messages from the bot itself", async () => {
      await service.connect();
      const parseCommandSpy = vi.spyOn(commandParser, "parseCommand");
      // Simulate a message from the bot
      expect(mockMessageCallbacks.length).toBeGreaterThan(0);
      const callback = mockMessageCallbacks[0];
      callback?.("!test-room:example.com", {
        event_id: "$msg-1",
        sender: "@mosaic-bot:example.com",
        origin_server_ts: Date.now(),
        content: {
          msgtype: "m.text",
          body: "@mosaic fix #42",
        },
      });
      // Should not attempt to parse
      expect(parseCommandSpy).not.toHaveBeenCalled();
    });
    it("should ignore messages in unmapped rooms", async () => {
      // MatrixRoomService returns null for unknown rooms
      mockMatrixRoomService.getWorkspaceForRoom.mockResolvedValue(null);
      await service.connect();
      const callback = mockMessageCallbacks[0];
      callback?.("!unknown-room:example.com", {
        event_id: "$msg-1",
        sender: "@user:example.com",
        origin_server_ts: Date.now(),
        content: {
          msgtype: "m.text",
          body: "@mosaic fix #42",
        },
      });
      // Wait for async processing
      await new Promise((resolve) => setTimeout(resolve, 50));
      // Should not dispatch to stitcher
      expect(stitcherService.dispatchJob).not.toHaveBeenCalled();
    });
    it("should process commands in the control room (fallback workspace)", async () => {
      // MatrixRoomService returns null, but room matches controlRoomId
      mockMatrixRoomService.getWorkspaceForRoom.mockResolvedValue(null);
      await service.connect();
      const callback = mockMessageCallbacks[0];
      callback?.("!test-room:example.com", {
        event_id: "$msg-1",
        sender: "@user:example.com",
        origin_server_ts: Date.now(),
        content: {
          msgtype: "m.text",
          body: "@mosaic help",
        },
      });
      // Wait for async processing
      await new Promise((resolve) => setTimeout(resolve, 50));
      // Should send help message
      expect(mockClient.sendMessage).toHaveBeenCalledWith(
        "!test-room:example.com",
        expect.objectContaining({
          body: expect.stringContaining("Available commands:"),
        })
      );
    });
    it("should process commands in rooms mapped via MatrixRoomService", async () => {
      // MatrixRoomService resolves the workspace
      mockMatrixRoomService.getWorkspaceForRoom.mockResolvedValue("mapped-workspace-id");
      await service.connect();
      const callback = mockMessageCallbacks[0];
      callback?.("!mapped-room:example.com", {
        event_id: "$msg-1",
        sender: "@user:example.com",
        origin_server_ts: Date.now(),
        content: {
          msgtype: "m.text",
          body: "@mosaic fix #42",
        },
      });
      // Wait for async processing
      await new Promise((resolve) => setTimeout(resolve, 50));
      // Should dispatch with the mapped workspace ID
      expect(stitcherService.dispatchJob).toHaveBeenCalledWith(
        expect.objectContaining({
          workspaceId: "mapped-workspace-id",
        })
      );
    });
    it("should handle !mosaic prefix in incoming messages", async () => {
      mockMatrixRoomService.getWorkspaceForRoom.mockResolvedValue("test-workspace-id");
      await service.connect();
      const callback = mockMessageCallbacks[0];
      callback?.("!test-room:example.com", {
        event_id: "$msg-1",
        sender: "@user:example.com",
        origin_server_ts: Date.now(),
        content: {
          msgtype: "m.text",
          body: "!mosaic help",
        },
      });
      // Wait for async processing
      await new Promise((resolve) => setTimeout(resolve, 50));
      // Should send help message (normalized !mosaic -> @mosaic for parser)
      expect(mockClient.sendMessage).toHaveBeenCalledWith(
        "!test-room:example.com",
        expect.objectContaining({
          body: expect.stringContaining("Available commands:"),
        })
      );
    });
    it("should send help text when user tries an unknown command", async () => {
      mockMatrixRoomService.getWorkspaceForRoom.mockResolvedValue("test-workspace-id");
      await service.connect();
      const callback = mockMessageCallbacks[0];
      callback?.("!test-room:example.com", {
        event_id: "$msg-1",
        sender: "@user:example.com",
        origin_server_ts: Date.now(),
        content: {
          msgtype: "m.text",
          body: "@mosaic invalidcommand",
        },
      });
      // Wait for async processing
      await new Promise((resolve) => setTimeout(resolve, 50));
      // Should send error/help message (CommandParserService returns help text for unknown actions)
      expect(mockClient.sendMessage).toHaveBeenCalledWith(
        "!test-room:example.com",
        expect.objectContaining({
          body: expect.stringContaining("Available commands"),
        })
      );
    });
    it("should ignore non-text messages", async () => {
      mockMatrixRoomService.getWorkspaceForRoom.mockResolvedValue("test-workspace-id");
      await service.connect();
      const callback = mockMessageCallbacks[0];
      callback?.("!test-room:example.com", {
        event_id: "$msg-1",
        sender: "@user:example.com",
        origin_server_ts: Date.now(),
        content: {
          msgtype: "m.image",
          body: "photo.jpg",
        },
      });
      // Wait for async processing
      await new Promise((resolve) => setTimeout(resolve, 50));
      // Should not attempt any message sending
      expect(mockClient.sendMessage).not.toHaveBeenCalled();
    });
  });
  describe("Command Execution", () => {
    it("should forward fix command to stitcher and create a thread", async () => {
      const message: ChatMessage = {
        id: "msg-1",
        channelId: "!test-room:example.com",
        authorId: "@user:example.com",
        authorName: "@user:example.com",
        content: "@mosaic fix 42",
        timestamp: new Date(),
      };
      await service.connect();
      await service.handleCommand({
        command: "fix",
        args: ["42"],
        message,
      });
      expect(stitcherService.dispatchJob).toHaveBeenCalledWith({
        workspaceId: "test-workspace-id",
        type: "code-task",
        priority: 10,
        metadata: {
          issueNumber: 42,
          command: "fix",
          channelId: "!test-room:example.com",
          threadId: "$event-id-123",
          authorId: "@user:example.com",
          authorName: "@user:example.com",
        },
      });
    });
    it("should handle fix with #-prefixed issue number", async () => {
      const message: ChatMessage = {
        id: "msg-1",
        channelId: "!test-room:example.com",
        authorId: "@user:example.com",
        authorName: "@user:example.com",
        content: "@mosaic fix #42",
        timestamp: new Date(),
      };
      await service.connect();
      await service.handleCommand({
        command: "fix",
        args: ["#42"],
        message,
      });
      expect(stitcherService.dispatchJob).toHaveBeenCalledWith(
        expect.objectContaining({
          metadata: expect.objectContaining({
            issueNumber: 42,
          }),
        })
      );
    });
    it("should respond with help message", async () => {
      const message: ChatMessage = {
        id: "msg-1",
        channelId: "!test-room:example.com",
        authorId: "@user:example.com",
        authorName: "@user:example.com",
        content: "@mosaic help",
        timestamp: new Date(),
      };
      await service.connect();
      await service.handleCommand({
        command: "help",
        args: [],
        message,
      });
      expect(mockClient.sendMessage).toHaveBeenCalledWith(
        "!test-room:example.com",
        expect.objectContaining({
          body: expect.stringContaining("Available commands:"),
        })
      );
    });
    it("should include retry command in help output", async () => {
      const message: ChatMessage = {
        id: "msg-1",
        channelId: "!test-room:example.com",
        authorId: "@user:example.com",
        authorName: "@user:example.com",
        content: "@mosaic help",
        timestamp: new Date(),
      };
      await service.connect();
      await service.handleCommand({
        command: "help",
        args: [],
        message,
      });
      expect(mockClient.sendMessage).toHaveBeenCalledWith(
        "!test-room:example.com",
        expect.objectContaining({
          body: expect.stringContaining("retry"),
        })
      );
    });
    it("should send error for fix command without issue number", async () => {
      const message: ChatMessage = {
        id: "msg-1",
        channelId: "!test-room:example.com",
        authorId: "@user:example.com",
        authorName: "@user:example.com",
        content: "@mosaic fix",
        timestamp: new Date(),
      };
      await service.connect();
      await service.handleCommand({
        command: "fix",
        args: [],
        message,
      });
      expect(mockClient.sendMessage).toHaveBeenCalledWith(
        "!test-room:example.com",
        expect.objectContaining({
          body: expect.stringContaining("Usage:"),
        })
      );
    });
    it("should send error for fix command with non-numeric issue", async () => {
      const message: ChatMessage = {
        id: "msg-1",
        channelId: "!test-room:example.com",
        authorId: "@user:example.com",
        authorName: "@user:example.com",
        content: "@mosaic fix abc",
        timestamp: new Date(),
      };
      await service.connect();
      await service.handleCommand({
        command: "fix",
        args: ["abc"],
        message,
      });
      expect(mockClient.sendMessage).toHaveBeenCalledWith(
        "!test-room:example.com",
        expect.objectContaining({
          body: expect.stringContaining("Invalid issue number"),
        })
      );
    });
    it("should dispatch fix command with workspace from MatrixRoomService", async () => {
      mockMatrixRoomService.getWorkspaceForRoom.mockResolvedValue("dynamic-workspace-id");
      await service.connect();
      const callback = mockMessageCallbacks[0];
      callback?.("!mapped-room:example.com", {
        event_id: "$msg-1",
        sender: "@user:example.com",
        origin_server_ts: Date.now(),
        content: {
          msgtype: "m.text",
          body: "@mosaic fix #99",
        },
      });
      // Wait for async processing
      await new Promise((resolve) => setTimeout(resolve, 50));
      expect(stitcherService.dispatchJob).toHaveBeenCalledWith(
        expect.objectContaining({
          workspaceId: "dynamic-workspace-id",
          metadata: expect.objectContaining({
            issueNumber: 99,
          }),
        })
      );
    });
  });
  describe("Configuration", () => {
    it("should throw error if MATRIX_HOMESERVER_URL is not set", async () => {
      delete process.env.MATRIX_HOMESERVER_URL;
      const module: TestingModule = await Test.createTestingModule({
        providers: [
          MatrixService,
          CommandParserService,
          {
            provide: StitcherService,
            useValue: mockStitcherService,
          },
          {
            provide: MatrixRoomService,
            useValue: mockMatrixRoomService,
          },
        ],
      }).compile();
      const newService = module.get<MatrixService>(MatrixService);
      await expect(newService.connect()).rejects.toThrow("MATRIX_HOMESERVER_URL is required");
      // Restore for other tests
      process.env.MATRIX_HOMESERVER_URL = "https://matrix.example.com";
    });
    it("should throw error if MATRIX_ACCESS_TOKEN is not set", async () => {
      delete process.env.MATRIX_ACCESS_TOKEN;
      const module: TestingModule = await Test.createTestingModule({
        providers: [
          MatrixService,
          CommandParserService,
          {
            provide: StitcherService,
            useValue: mockStitcherService,
          },
          {
            provide: MatrixRoomService,
            useValue: mockMatrixRoomService,
          },
        ],
      }).compile();
      const newService = module.get<MatrixService>(MatrixService);
      await expect(newService.connect()).rejects.toThrow("MATRIX_ACCESS_TOKEN is required");
      // Restore for other tests
      process.env.MATRIX_ACCESS_TOKEN = "test-access-token";
    });
    it("should throw error if MATRIX_BOT_USER_ID is not set", async () => {
      delete process.env.MATRIX_BOT_USER_ID;
      const module: TestingModule = await Test.createTestingModule({
        providers: [
          MatrixService,
          CommandParserService,
          {
            provide: StitcherService,
            useValue: mockStitcherService,
          },
          {
            provide: MatrixRoomService,
            useValue: mockMatrixRoomService,
          },
        ],
      }).compile();
      const newService = module.get<MatrixService>(MatrixService);
      await expect(newService.connect()).rejects.toThrow("MATRIX_BOT_USER_ID is required");
      // Restore for other tests
      process.env.MATRIX_BOT_USER_ID = "@mosaic-bot:example.com";
    });
    it("should throw error if MATRIX_WORKSPACE_ID is not set", async () => {
      delete process.env.MATRIX_WORKSPACE_ID;
      const module: TestingModule = await Test.createTestingModule({
        providers: [
          MatrixService,
          CommandParserService,
          {
            provide: StitcherService,
            useValue: mockStitcherService,
          },
          {
            provide: MatrixRoomService,
            useValue: mockMatrixRoomService,
          },
        ],
      }).compile();
      const newService = module.get<MatrixService>(MatrixService);
      await expect(newService.connect()).rejects.toThrow("MATRIX_WORKSPACE_ID is required");
      // Restore for other tests
      process.env.MATRIX_WORKSPACE_ID = "test-workspace-id";
    });
    it("should use configured workspace ID from environment", async () => {
      const testWorkspaceId = "configured-workspace-456";
      process.env.MATRIX_WORKSPACE_ID = testWorkspaceId;
      const module: TestingModule = await Test.createTestingModule({
        providers: [
          MatrixService,
          CommandParserService,
          {
            provide: StitcherService,
            useValue: mockStitcherService,
          },
          {
            provide: MatrixRoomService,
            useValue: mockMatrixRoomService,
          },
        ],
      }).compile();
      const newService = module.get<MatrixService>(MatrixService);
      const message: ChatMessage = {
        id: "msg-1",
        channelId: "!test-room:example.com",
        authorId: "@user:example.com",
        authorName: "@user:example.com",
        content: "@mosaic fix 42",
        timestamp: new Date(),
      };
      await newService.connect();
      await newService.handleCommand({
        command: "fix",
        args: ["42"],
        message,
      });
      expect(mockStitcherService.dispatchJob).toHaveBeenCalledWith(
        expect.objectContaining({
          workspaceId: testWorkspaceId,
        })
      );
      // Restore for other tests
      process.env.MATRIX_WORKSPACE_ID = "test-workspace-id";
    });
  });
  describe("Error Logging Security", () => {
    it("should sanitize sensitive data in error logs", async () => {
      const loggerErrorSpy = vi.spyOn(
        (service as Record<string, unknown>)["logger"] as { error: (...args: unknown[]) => void },
        "error"
      );
      await service.connect();
      // Trigger room.event handler with null event to exercise error path
      expect(mockEventCallbacks.length).toBeGreaterThan(0);
      mockEventCallbacks[0]?.("!room:example.com", null as unknown as Record<string, unknown>);
      // Verify error was logged
      expect(loggerErrorSpy).toHaveBeenCalled();
      // Get the logged error
      const loggedArgs = loggerErrorSpy.mock.calls[0];
      const loggedError = loggedArgs?.[1] as Record<string, unknown>;
      // Verify non-sensitive error info is preserved
      expect(loggedError).toBeDefined();
      expect((loggedError as { message: string }).message).toBe("Received null event from Matrix");
    });
    it("should not include access token in error output", () => {
      // Verify the access token is stored privately and not exposed
      const serviceAsRecord = service as unknown as Record<string, unknown>;
      // The accessToken should exist but should not appear in any public-facing method output
      expect(serviceAsRecord["accessToken"]).toBe("test-access-token");
      // Verify isConnected does not leak token
      const connected = service.isConnected();
      expect(String(connected)).not.toContain("test-access-token");
    });
  });
  describe("MatrixRoomService reverse lookup", () => {
    it("should call getWorkspaceForRoom when processing messages", async () => {
      mockMatrixRoomService.getWorkspaceForRoom.mockResolvedValue("resolved-workspace");
      await service.connect();
      const callback = mockMessageCallbacks[0];
      callback?.("!some-room:example.com", {
        event_id: "$msg-1",
        sender: "@user:example.com",
        origin_server_ts: Date.now(),
        content: {
          msgtype: "m.text",
          body: "@mosaic help",
        },
      });
      // Wait for async processing
      await new Promise((resolve) => setTimeout(resolve, 50));
      expect(matrixRoomService.getWorkspaceForRoom).toHaveBeenCalledWith("!some-room:example.com");
    });
    it("should fall back to control room workspace when MatrixRoomService returns null", async () => {
      mockMatrixRoomService.getWorkspaceForRoom.mockResolvedValue(null);
      await service.connect();
      const callback = mockMessageCallbacks[0];
      // Send to the control room (fallback path)
      callback?.("!test-room:example.com", {
        event_id: "$msg-1",
        sender: "@user:example.com",
        origin_server_ts: Date.now(),
        content: {
          msgtype: "m.text",
          body: "@mosaic fix #10",
        },
      });
      // Wait for async processing
      await new Promise((resolve) => setTimeout(resolve, 50));
      // Should dispatch with the env-configured workspace
      expect(stitcherService.dispatchJob).toHaveBeenCalledWith(
        expect.objectContaining({
          workspaceId: "test-workspace-id",
        })
      );
    });
  });
 });
--- a/apps/api/src/bridge/matrix/matrix.service.ts
+++ b/apps/api/src/bridge/matrix/matrix.service.ts
@@ -0,0 +1,649 @@
 import { Injectable, Logger, Optional, Inject } from "@nestjs/common";
 import { MatrixClient, SimpleFsStorageProvider, AutojoinRoomsMixin } from "matrix-bot-sdk";
 import { StitcherService } from "../../stitcher/stitcher.service";
 import { CommandParserService } from "../parser/command-parser.service";
 import { CommandAction } from "../parser/command.interface";
 import type { ParsedCommand } from "../parser/command.interface";
 import { MatrixRoomService } from "./matrix-room.service";
 import { sanitizeForLogging } from "../../common/utils";
 import type {
  IChatProvider,
  ChatMessage,
  ChatCommand,
  ThreadCreateOptions,
  ThreadMessageOptions,
 } from "../interfaces";
 /**
 * Matrix room message event content
 */
 interface MatrixMessageContent {
  msgtype: string;
  body: string;
  "m.relates_to"?: MatrixRelatesTo;
 }
 /**
 * Matrix relationship metadata for threads (MSC3440)
 */
 interface MatrixRelatesTo {
  rel_type: string;
  event_id: string;
  is_falling_back?: boolean;
  "m.in_reply_to"?: {
    event_id: string;
  };
 }
 /**
 * Matrix room event structure
 */
 interface MatrixRoomEvent {
  event_id: string;
  sender: string;
  origin_server_ts: number;
  content: MatrixMessageContent;
 }
 /**
 * Matrix Service - Matrix chat platform integration
 *
 * Responsibilities:
 * - Connect to Matrix via access token
 * - Listen for commands in mapped rooms (via MatrixRoomService)
 * - Parse commands using shared CommandParserService
 * - Forward commands to stitcher
 * - Receive status updates from herald
 * - Post updates to threads (MSC3440)
 */
@Injectable()
 export class MatrixService implements IChatProvider {
  private readonly logger = new Logger(MatrixService.name);
  private client: MatrixClient | null = null;
  private connected = false;
  private readonly homeserverUrl: string;
  private readonly accessToken: string;
  private readonly botUserId: string;
  private readonly controlRoomId: string;
  private readonly workspaceId: string;
  constructor(
    private readonly stitcherService: StitcherService,
    @Optional()
    @Inject(CommandParserService)
    private readonly commandParser: CommandParserService | null,
    @Optional()
    @Inject(MatrixRoomService)
    private readonly matrixRoomService: MatrixRoomService | null
  ) {
    this.homeserverUrl = process.env.MATRIX_HOMESERVER_URL ?? "";
    this.accessToken = process.env.MATRIX_ACCESS_TOKEN ?? "";
    this.botUserId = process.env.MATRIX_BOT_USER_ID ?? "";
    this.controlRoomId = process.env.MATRIX_CONTROL_ROOM_ID ?? "";
    this.workspaceId = process.env.MATRIX_WORKSPACE_ID ?? "";
  }
  /**
   * Connect to Matrix homeserver
   */
  async connect(): Promise<void> {
    if (!this.homeserverUrl) {
      throw new Error("MATRIX_HOMESERVER_URL is required");
    }
    if (!this.accessToken) {
      throw new Error("MATRIX_ACCESS_TOKEN is required");
    }
    if (!this.workspaceId) {
      throw new Error("MATRIX_WORKSPACE_ID is required");
    }
    if (!this.botUserId) {
      throw new Error("MATRIX_BOT_USER_ID is required");
    }
    this.logger.log("Connecting to Matrix...");
    const storage = new SimpleFsStorageProvider("matrix-bot-storage.json");
    this.client = new MatrixClient(this.homeserverUrl, this.accessToken, storage);
    // Auto-join rooms when invited
    AutojoinRoomsMixin.setupOnClient(this.client);
    // Setup event handlers
    this.setupEventHandlers();
    // Start syncing
    await this.client.start();
    this.connected = true;
    this.logger.log(`Matrix bot connected as ${this.botUserId}`);
  }
  /**
   * Setup event handlers for Matrix client
   */
  private setupEventHandlers(): void {
    if (!this.client) return;
    this.client.on("room.message", (roomId: string, event: MatrixRoomEvent) => {
      // Ignore messages from the bot itself
      if (event.sender === this.botUserId) return;
      // Only handle text messages
      if (event.content.msgtype !== "m.text") return;
      this.handleRoomMessage(roomId, event).catch((error: unknown) => {
        this.logger.error(
          `Error handling room message in ${roomId}:`,
          error instanceof Error ? error.message : error
        );
      });
    });
    this.client.on("room.event", (_roomId: string, event: MatrixRoomEvent | null) => {
      // Handle errors emitted as events
      if (!event) {
        const error = new Error("Received null event from Matrix");
        const sanitizedError = sanitizeForLogging(error);
        this.logger.error("Matrix client error:", sanitizedError);
      }
    });
  }
  /**
   * Handle an incoming room message.
   *
   * Resolves the workspace for the room (via MatrixRoomService or fallback
   * to the control room), then delegates to the shared CommandParserService
   * for platform-agnostic command parsing and dispatches the result.
   */
  private async handleRoomMessage(roomId: string, event: MatrixRoomEvent): Promise<void> {
    // Resolve workspace: try MatrixRoomService first, fall back to control room
    let resolvedWorkspaceId: string | null = null;
    if (this.matrixRoomService) {
      resolvedWorkspaceId = await this.matrixRoomService.getWorkspaceForRoom(roomId);
    }
    // Fallback: if the room is the configured control room, use the env workspace
    if (!resolvedWorkspaceId && roomId === this.controlRoomId) {
      resolvedWorkspaceId = this.workspaceId;
    }
    // If room is not mapped to any workspace, ignore the message
    if (!resolvedWorkspaceId) {
      return;
    }
    const messageContent = event.content.body;
    // Build ChatMessage for interface compatibility
    const chatMessage: ChatMessage = {
      id: event.event_id,
      channelId: roomId,
      authorId: event.sender,
      authorName: event.sender,
      content: messageContent,
      timestamp: new Date(event.origin_server_ts),
      ...(event.content["m.relates_to"]?.rel_type === "m.thread" && {
        threadId: event.content["m.relates_to"].event_id,
      }),
    };
    // Use shared CommandParserService if available
    if (this.commandParser) {
      // Normalize !mosaic to @mosaic for the shared parser
      const normalizedContent = messageContent.replace(/^!mosaic/i, "@mosaic");
      const result = this.commandParser.parseCommand(normalizedContent);
      if (result.success) {
        await this.handleParsedCommand(result.command, chatMessage, resolvedWorkspaceId);
      } else if (normalizedContent.toLowerCase().startsWith("@mosaic")) {
        // The user tried to use a command but it failed to parse -- send help
        await this.sendMessage(roomId, result.error.help ?? result.error.message);
      }
      return;
    }
    // Fallback: use the built-in parseCommand if CommandParserService not injected
    const command = this.parseCommand(chatMessage);
    if (command) {
      await this.handleCommand(command);
    }
  }
  /**
   * Handle a command parsed by the shared CommandParserService.
   *
   * Routes the ParsedCommand to the appropriate handler, passing
   * along workspace context for job dispatch.
   */
  private async handleParsedCommand(
    parsed: ParsedCommand,
    message: ChatMessage,
    workspaceId: string
  ): Promise<void> {
    this.logger.log(
      `Handling command: ${parsed.action} from ${message.authorName} in workspace ${workspaceId}`
    );
    switch (parsed.action) {
      case CommandAction.FIX:
        await this.handleFixCommand(parsed.rawArgs, message, workspaceId);
        break;
      case CommandAction.STATUS:
        await this.handleStatusCommand(parsed.rawArgs, message);
        break;
      case CommandAction.CANCEL:
        await this.handleCancelCommand(parsed.rawArgs, message);
        break;
      case CommandAction.VERBOSE:
        await this.handleVerboseCommand(parsed.rawArgs, message);
        break;
      case CommandAction.QUIET:
        await this.handleQuietCommand(parsed.rawArgs, message);
        break;
      case CommandAction.HELP:
        await this.handleHelpCommand(parsed.rawArgs, message);
        break;
      case CommandAction.RETRY:
        await this.handleRetryCommand(parsed.rawArgs, message);
        break;
      default:
        await this.sendMessage(
          message.channelId,
          `Unknown command. Type \`@mosaic help\` or \`!mosaic help\` for available commands.`
        );
    }
  }
  /**
   * Disconnect from Matrix
   */
  disconnect(): Promise<void> {
    this.logger.log("Disconnecting from Matrix...");
    this.connected = false;
    if (this.client) {
      this.client.stop();
    }
    return Promise.resolve();
  }
  /**
   * Check if the provider is connected
   */
  isConnected(): boolean {
    return this.connected;
  }
  /**
   * Get the underlying MatrixClient instance.
   *
   * Used by MatrixStreamingService for low-level operations
   * (message edits, typing indicators) that require direct client access.
   *
   * @returns The MatrixClient instance, or null if not connected
   */
  getClient(): MatrixClient | null {
    return this.client;
  }
  /**
   * Send a message to a room
   */
  async sendMessage(roomId: string, content: string): Promise<void> {
    if (!this.client) {
      throw new Error("Matrix client is not connected");
    }
    const messageContent: MatrixMessageContent = {
      msgtype: "m.text",
      body: content,
    };
    await this.client.sendMessage(roomId, messageContent);
  }
  /**
   * Create a thread for job updates (MSC3440)
   *
   * Matrix threads are created by sending an initial message
   * and then replying with m.thread relation. The initial
   * message event ID becomes the thread root.
   */
  async createThread(options: ThreadCreateOptions): Promise<string> {
    if (!this.client) {
      throw new Error("Matrix client is not connected");
    }
    const { channelId, name, message } = options;
    // Send the initial message that becomes the thread root
    const initialContent: MatrixMessageContent = {
      msgtype: "m.text",
      body: `[${name}] ${message}`,
    };
    const eventId = await this.client.sendMessage(channelId, initialContent);
    return eventId;
  }
  /**
   * Send a message to a thread (MSC3440)
   *
   * Uses m.thread relation to associate the message with the thread root event.
   */
  async sendThreadMessage(options: ThreadMessageOptions): Promise<void> {
    if (!this.client) {
      throw new Error("Matrix client is not connected");
    }
    const { threadId, channelId, content } = options;
    // Use the channelId from options (threads are room-scoped), fall back to control room
    const roomId = channelId || this.controlRoomId;
    const threadContent: MatrixMessageContent = {
      msgtype: "m.text",
      body: content,
      "m.relates_to": {
        rel_type: "m.thread",
        event_id: threadId,
        is_falling_back: true,
        "m.in_reply_to": {
          event_id: threadId,
        },
      },
    };
    await this.client.sendMessage(roomId, threadContent);
  }
  /**
   * Parse a command from a message (IChatProvider interface).
   *
   * Delegates to the shared CommandParserService when available,
   * falling back to built-in parsing for backwards compatibility.
   */
  parseCommand(message: ChatMessage): ChatCommand | null {
    const { content } = message;
    // Try shared parser first
    if (this.commandParser) {
      const normalizedContent = content.replace(/^!mosaic/i, "@mosaic");
      const result = this.commandParser.parseCommand(normalizedContent);
      if (result.success) {
        return {
          command: result.command.action,
          args: result.command.rawArgs,
          message,
        };
      }
      return null;
    }
    // Fallback: built-in parsing for when CommandParserService is not injected
    const lowerContent = content.toLowerCase();
    if (!lowerContent.includes("@mosaic") && !lowerContent.includes("!mosaic")) {
      return null;
    }
    const parts = content.trim().split(/\s+/);
    const mosaicIndex = parts.findIndex(
      (part) => part.toLowerCase().includes("@mosaic") || part.toLowerCase().includes("!mosaic")
    );
    if (mosaicIndex === -1 || mosaicIndex === parts.length - 1) {
      return null;
    }
    const commandPart = parts[mosaicIndex + 1];
    if (!commandPart) {
      return null;
    }
    const command = commandPart.toLowerCase();
    const args = parts.slice(mosaicIndex + 2);
    const validCommands = ["fix", "status", "cancel", "verbose", "quiet", "help"];
    if (!validCommands.includes(command)) {
      return null;
    }
    return {
      command,
      args,
      message,
    };
  }
  /**
   * Handle a parsed command (ChatCommand format, used by fallback path)
   */
  async handleCommand(command: ChatCommand): Promise<void> {
    const { command: cmd, args, message } = command;
    this.logger.log(
      `Handling command: ${cmd} with args: ${args.join(", ")} from ${message.authorName}`
    );
    switch (cmd) {
      case "fix":
        await this.handleFixCommand(args, message, this.workspaceId);
        break;
      case "status":
        await this.handleStatusCommand(args, message);
        break;
      case "cancel":
        await this.handleCancelCommand(args, message);
        break;
      case "verbose":
        await this.handleVerboseCommand(args, message);
        break;
      case "quiet":
        await this.handleQuietCommand(args, message);
        break;
      case "help":
        await this.handleHelpCommand(args, message);
        break;
      default:
        await this.sendMessage(
          message.channelId,
          `Unknown command: ${cmd}. Type \`@mosaic help\` or \`!mosaic help\` for available commands.`
        );
    }
  }
  /**
   * Handle fix command - Start a job for an issue
   */
  private async handleFixCommand(
    args: string[],
    message: ChatMessage,
    workspaceId?: string
  ): Promise<void> {
    if (args.length === 0 || !args[0]) {
      await this.sendMessage(
        message.channelId,
        "Usage: `@mosaic fix <issue-number>` or `!mosaic fix <issue-number>`"
      );
      return;
    }
    // Parse issue number: handle both "#42" and "42" formats
    const issueArg = args[0].replace(/^#/, "");
    const issueNumber = parseInt(issueArg, 10);
    if (isNaN(issueNumber)) {
      await this.sendMessage(
        message.channelId,
        "Invalid issue number. Please provide a numeric issue number."
      );
      return;
    }
    const targetWorkspaceId = workspaceId ?? this.workspaceId;
    // Create thread for job updates
    const threadId = await this.createThread({
      channelId: message.channelId,
      name: `Job #${String(issueNumber)}`,
      message: `Starting job for issue #${String(issueNumber)}...`,
    });
    // Dispatch job to stitcher
    try {
      const result = await this.stitcherService.dispatchJob({
        workspaceId: targetWorkspaceId,
        type: "code-task",
        priority: 10,
        metadata: {
          issueNumber,
          command: "fix",
          channelId: message.channelId,
          threadId: threadId,
          authorId: message.authorId,
          authorName: message.authorName,
        },
      });
      // Send confirmation to thread
      await this.sendThreadMessage({
        threadId,
        channelId: message.channelId,
        content: `Job created: ${result.jobId}\nStatus: ${result.status}\nQueue: ${result.queueName}`,
      });
    } catch (error: unknown) {
      const errorMessage = error instanceof Error ? error.message : "Unknown error";
      this.logger.error(
        `Failed to dispatch job for issue #${String(issueNumber)}: ${errorMessage}`
      );
      await this.sendThreadMessage({
        threadId,
        channelId: message.channelId,
        content: `Failed to start job: ${errorMessage}`,
      });
    }
  }
  /**
   * Handle status command - Get job status
   */
  private async handleStatusCommand(args: string[], message: ChatMessage): Promise<void> {
    if (args.length === 0 || !args[0]) {
      await this.sendMessage(
        message.channelId,
        "Usage: `@mosaic status <job-id>` or `!mosaic status <job-id>`"
      );
      return;
    }
    const jobId = args[0];
    // TODO: Implement job status retrieval from stitcher
    await this.sendMessage(
      message.channelId,
      `Status command not yet implemented for job: ${jobId}`
    );
  }
  /**
   * Handle cancel command - Cancel a running job
   */
  private async handleCancelCommand(args: string[], message: ChatMessage): Promise<void> {
    if (args.length === 0 || !args[0]) {
      await this.sendMessage(
        message.channelId,
        "Usage: `@mosaic cancel <job-id>` or `!mosaic cancel <job-id>`"
      );
      return;
    }
    const jobId = args[0];
    // TODO: Implement job cancellation in stitcher
    await this.sendMessage(
      message.channelId,
      `Cancel command not yet implemented for job: ${jobId}`
    );
  }
  /**
   * Handle retry command - Retry a failed job
   */
  private async handleRetryCommand(args: string[], message: ChatMessage): Promise<void> {
    if (args.length === 0 || !args[0]) {
      await this.sendMessage(
        message.channelId,
        "Usage: `@mosaic retry <job-id>` or `!mosaic retry <job-id>`"
      );
      return;
    }
    const jobId = args[0];
    // TODO: Implement job retry in stitcher
    await this.sendMessage(
      message.channelId,
      `Retry command not yet implemented for job: ${jobId}`
    );
  }
  /**
   * Handle verbose command - Stream full logs to thread
   */
  private async handleVerboseCommand(args: string[], message: ChatMessage): Promise<void> {
    if (args.length === 0 || !args[0]) {
      await this.sendMessage(
        message.channelId,
        "Usage: `@mosaic verbose <job-id>` or `!mosaic verbose <job-id>`"
      );
      return;
    }
    const jobId = args[0];
    // TODO: Implement verbose logging
    await this.sendMessage(message.channelId, `Verbose mode not yet implemented for job: ${jobId}`);
  }
  /**
   * Handle quiet command - Reduce notifications
   */
  private async handleQuietCommand(_args: string[], message: ChatMessage): Promise<void> {
    // TODO: Implement quiet mode
    await this.sendMessage(
      message.channelId,
      "Quiet mode not yet implemented. Currently showing milestone updates only."
    );
  }
  /**
   * Handle help command - Show available commands
   */
  private async handleHelpCommand(_args: string[], message: ChatMessage): Promise<void> {
    const helpMessage = `
 **Available commands:**
 \`@mosaic fix <issue>\` or \`!mosaic fix <issue>\` - Start job for issue
 \`@mosaic status <job>\` or \`!mosaic status <job>\` - Get job status
 \`@mosaic cancel <job>\` or \`!mosaic cancel <job>\` - Cancel running job
 \`@mosaic retry <job>\` or \`!mosaic retry <job>\` - Retry failed job
 \`@mosaic verbose <job>\` or \`!mosaic verbose <job>\` - Stream full logs to thread
 \`@mosaic quiet\` or \`!mosaic quiet\` - Reduce notifications
 \`@mosaic help\` or \`!mosaic help\` - Show this help message
 **Noise Management:**
 - Main room: Low verbosity (milestones only)
 - Job threads: Medium verbosity (step completions)
 - DMs: Configurable per user
    `.trim();
    await this.sendMessage(message.channelId, helpMessage);
  }
 }
--- a/apps/api/src/herald/herald.module.ts
+++ b/apps/api/src/herald/herald.module.ts
@@ -10,7 +10,7 @@ import { BridgeModule } from "../bridge/bridge.module";
 * - Subscribe to job events
 * - Format status messages with PDA-friendly language
 * - Route to appropriate channels based on workspace config
- * - Support Discord (via bridge) and PR comments
+ * - Broadcast to ALL active chat providers via CHAT_PROVIDERS token
 */
@Module({
  imports: [PrismaModule, BridgeModule],
--- a/apps/api/src/herald/herald.service.spec.ts
+++ b/apps/api/src/herald/herald.service.spec.ts
@@ -2,7 +2,8 @@ import { Test, TestingModule } from "@nestjs/testing";
 import { vi, describe, it, expect, beforeEach } from "vitest";
 import { HeraldService } from "./herald.service";
 import { PrismaService } from "../prisma/prisma.service";
-import { DiscordService } from "../bridge/discord/discord.service";
+import { CHAT_PROVIDERS } from "../bridge/bridge.constants";
 import type { IChatProvider } from "../bridge/interfaces/chat-provider.interface";
 import {
  JOB_CREATED,
  JOB_STARTED,
@@ -14,10 +15,31 @@ import {
  GATE_FAILED,
 } from "../job-events/event-types";
 function createMockProvider(
  name: string,
  connected = true
 ): IChatProvider & {
  sendMessage: ReturnType<typeof vi.fn>;
  sendThreadMessage: ReturnType<typeof vi.fn>;
  createThread: ReturnType<typeof vi.fn>;
  isConnected: ReturnType<typeof vi.fn>;
  connect: ReturnType<typeof vi.fn>;
  disconnect: ReturnType<typeof vi.fn>;
  parseCommand: ReturnType<typeof vi.fn>;
 } {
  return {
    connect: vi.fn().mockResolvedValue(undefined),
    disconnect: vi.fn().mockResolvedValue(undefined),
    isConnected: vi.fn().mockReturnValue(connected),
    sendMessage: vi.fn().mockResolvedValue(undefined),
    createThread: vi.fn().mockResolvedValue("thread-id"),
    sendThreadMessage: vi.fn().mockResolvedValue(undefined),
    parseCommand: vi.fn().mockReturnValue(null),
  };
 }
 describe("HeraldService", () => {
  let service: HeraldService;
  let prisma: PrismaService;
  let discord: DiscordService;
  const mockPrisma = {
    workspace: {
@@ -31,14 +53,15 @@ describe("HeraldService", () => {
    },
  };
-  const mockDiscord = {
+  let mockProviderA: ReturnType<typeof createMockProvider>;
-    isConnected: vi.fn(),
+  let mockProviderB: ReturnType<typeof createMockProvider>;
-    sendMessage: vi.fn(),
+  let chatProviders: IChatProvider[];
    sendThreadMessage: vi.fn(),
    createThread: vi.fn(),
  };
  beforeEach(async () => {
    mockProviderA = createMockProvider("providerA", true);
    mockProviderB = createMockProvider("providerB", true);
    chatProviders = [mockProviderA, mockProviderB];
    const module: TestingModule = await Test.createTestingModule({
      providers: [
        HeraldService,
@@ -47,25 +70,47 @@ describe("HeraldService", () => {
          useValue: mockPrisma,
        },
        {
-          provide: DiscordService,
+          provide: CHAT_PROVIDERS,
-          useValue: mockDiscord,
+          useValue: chatProviders,
        },
      ],
    }).compile();
    service = module.get<HeraldService>(HeraldService);
    prisma = module.get<PrismaService>(PrismaService);
    discord = module.get<DiscordService>(DiscordService);
    // Reset mocks
    vi.clearAllMocks();
    // Restore default connected state after clearAllMocks
    mockProviderA.isConnected.mockReturnValue(true);
    mockProviderB.isConnected.mockReturnValue(true);
  });
  describe("broadcastJobEvent", () => {
-    it("should broadcast job.created event to configured channel", async () => {
+    const baseSetup = (): {
-      // Arrange
+      jobId: string;
      workspaceId: string;
    } => {
      const workspaceId = "workspace-1";
      const jobId = "job-1";
      mockPrisma.runnerJob.findUnique.mockResolvedValue({
        id: jobId,
        workspaceId,
        type: "code-task",
      });
      mockPrisma.jobEvent.findFirst.mockResolvedValue({
        payload: {
          metadata: { issueNumber: 42, threadId: "thread-123", channelId: "channel-abc" },
        },
      });
      return { jobId, workspaceId };
    };
    it("should broadcast to all connected providers", async () => {
      // Arrange
      const { jobId } = baseSetup();
      const event = {
        id: "event-1",
        jobId,
@@ -75,46 +120,25 @@ describe("HeraldService", () => {
        payload: { issueNumber: 42 },
      };
      mockPrisma.workspace.findUnique.mockResolvedValue({
        id: workspaceId,
        settings: {
          herald: {
            channelMappings: {
              "code-task": "channel-123",
            },
          },
        },
      });
      mockPrisma.runnerJob.findUnique.mockResolvedValue({
        id: jobId,
        workspaceId,
        type: "code-task",
      });
      mockPrisma.jobEvent.findFirst.mockResolvedValue({
        payload: {
          metadata: { issueNumber: 42, threadId: "thread-123" },
        },
      });
      mockDiscord.isConnected.mockReturnValue(true);
      mockDiscord.sendThreadMessage.mockResolvedValue(undefined);
      // Act
      await service.broadcastJobEvent(jobId, event);
      // Assert
-      expect(mockDiscord.sendThreadMessage).toHaveBeenCalledWith({
+      expect(mockProviderA.sendThreadMessage).toHaveBeenCalledWith({
        threadId: "thread-123",
        channelId: "channel-abc",
        content: expect.stringContaining("Job created"),
      });
      expect(mockProviderB.sendThreadMessage).toHaveBeenCalledWith({
        threadId: "thread-123",
        channelId: "channel-abc",
        content: expect.stringContaining("Job created"),
      });
    });
-    it("should broadcast job.started event", async () => {
+    it("should broadcast job.started event to all providers", async () => {
      // Arrange
-      const workspaceId = "workspace-1";
+      const { jobId } = baseSetup();
      const jobId = "job-1";
      const event = {
        id: "event-1",
        jobId,
@@ -124,40 +148,25 @@ describe("HeraldService", () => {
        payload: {},
      };
      mockPrisma.workspace.findUnique.mockResolvedValue({
        id: workspaceId,
        settings: { herald: { channelMappings: {} } },
      });
      mockPrisma.runnerJob.findUnique.mockResolvedValue({
        id: jobId,
        workspaceId,
        type: "code-task",
      });
      mockPrisma.jobEvent.findFirst.mockResolvedValue({
        payload: {
          metadata: { threadId: "thread-123" },
        },
      });
      mockDiscord.isConnected.mockReturnValue(true);
      mockDiscord.sendThreadMessage.mockResolvedValue(undefined);
      // Act
      await service.broadcastJobEvent(jobId, event);
      // Assert
-      expect(mockDiscord.sendThreadMessage).toHaveBeenCalledWith({
+      expect(mockProviderA.sendThreadMessage).toHaveBeenCalledWith({
        threadId: "thread-123",
        channelId: "channel-abc",
        content: expect.stringContaining("Job started"),
      });
      expect(mockProviderB.sendThreadMessage).toHaveBeenCalledWith({
        threadId: "thread-123",
        channelId: "channel-abc",
        content: expect.stringContaining("Job started"),
      });
    });
    it("should broadcast job.completed event with success message", async () => {
      // Arrange
-      const workspaceId = "workspace-1";
+      const { jobId } = baseSetup();
      const jobId = "job-1";
      const event = {
        id: "event-1",
        jobId,
@@ -167,40 +176,20 @@ describe("HeraldService", () => {
        payload: { duration: 120 },
      };
      mockPrisma.workspace.findUnique.mockResolvedValue({
        id: workspaceId,
        settings: { herald: { channelMappings: {} } },
      });
      mockPrisma.runnerJob.findUnique.mockResolvedValue({
        id: jobId,
        workspaceId,
        type: "code-task",
      });
      mockPrisma.jobEvent.findFirst.mockResolvedValue({
        payload: {
          metadata: { threadId: "thread-123" },
        },
      });
      mockDiscord.isConnected.mockReturnValue(true);
      mockDiscord.sendThreadMessage.mockResolvedValue(undefined);
      // Act
      await service.broadcastJobEvent(jobId, event);
      // Assert
-      expect(mockDiscord.sendThreadMessage).toHaveBeenCalledWith({
+      expect(mockProviderA.sendThreadMessage).toHaveBeenCalledWith({
        threadId: "thread-123",
        channelId: "channel-abc",
        content: expect.stringContaining("completed"),
      });
    });
    it("should broadcast job.failed event with PDA-friendly language", async () => {
      // Arrange
-      const workspaceId = "workspace-1";
+      const { jobId } = baseSetup();
      const jobId = "job-1";
      const event = {
        id: "event-1",
        jobId,
@@ -210,43 +199,30 @@ describe("HeraldService", () => {
        payload: { error: "Build failed" },
      };
      mockPrisma.workspace.findUnique.mockResolvedValue({
        id: workspaceId,
        settings: { herald: { channelMappings: {} } },
      });
      mockPrisma.runnerJob.findUnique.mockResolvedValue({
        id: jobId,
        workspaceId,
        type: "code-task",
      });
      mockPrisma.jobEvent.findFirst.mockResolvedValue({
        payload: {
          metadata: { threadId: "thread-123" },
        },
      });
      mockDiscord.isConnected.mockReturnValue(true);
      mockDiscord.sendThreadMessage.mockResolvedValue(undefined);
      // Act
      await service.broadcastJobEvent(jobId, event);
      // Assert
-      expect(mockDiscord.sendThreadMessage).toHaveBeenCalledWith({
+      expect(mockProviderA.sendThreadMessage).toHaveBeenCalledWith({
        threadId: "thread-123",
        channelId: "channel-abc",
        content: expect.stringContaining("encountered an issue"),
      });
      // Verify the actual message doesn't contain demanding language
-      const actualCall = mockDiscord.sendThreadMessage.mock.calls[0][0];
+      const actualCall = mockProviderA.sendThreadMessage.mock.calls[0][0] as {
        threadId: string;
        channelId: string;
        content: string;
      };
      expect(actualCall.content).not.toMatch(/FAILED|ERROR|CRITICAL|URGENT/);
    });
-    it("should skip broadcasting if Discord is not connected", async () => {
+    it("should skip disconnected providers", async () => {
      // Arrange
-      const workspaceId = "workspace-1";
+      const { jobId } = baseSetup();
-      const jobId = "job-1";
+      mockProviderA.isConnected.mockReturnValue(true);
      mockProviderB.isConnected.mockReturnValue(false);
      const event = {
        id: "event-1",
        jobId,
@@ -256,14 +232,36 @@ describe("HeraldService", () => {
        payload: {},
      };
-      mockPrisma.workspace.findUnique.mockResolvedValue({
+      // Act
-        id: workspaceId,
+      await service.broadcastJobEvent(jobId, event);
-        settings: { herald: { channelMappings: {} } },
+
      // Assert
      expect(mockProviderA.sendThreadMessage).toHaveBeenCalledTimes(1);
      expect(mockProviderB.sendThreadMessage).not.toHaveBeenCalled();
    });
    it("should handle empty providers array without crashing", async () => {
      // Arrange — rebuild module with empty providers
      const module: TestingModule = await Test.createTestingModule({
        providers: [
          HeraldService,
          {
            provide: PrismaService,
            useValue: mockPrisma,
          },
          {
            provide: CHAT_PROVIDERS,
            useValue: [],
          },
        ],
      }).compile();
      const emptyService = module.get<HeraldService>(HeraldService);
      const jobId = "job-1";
      mockPrisma.runnerJob.findUnique.mockResolvedValue({
        id: jobId,
-        workspaceId,
+        workspaceId: "workspace-1",
        type: "code-task",
      });
@@ -273,36 +271,68 @@ describe("HeraldService", () => {
        },
      });
-      mockDiscord.isConnected.mockReturnValue(false);
+      const event = {
        id: "event-1",
        jobId,
        type: JOB_CREATED,
        timestamp: new Date(),
        actor: "system",
        payload: {},
      };
-      // Act
+      // Act & Assert — should not throw
      await expect(emptyService.broadcastJobEvent(jobId, event)).resolves.not.toThrow();
    });
    it("should continue broadcasting when one provider errors", async () => {
      // Arrange
      const { jobId } = baseSetup();
      mockProviderA.sendThreadMessage.mockRejectedValue(new Error("Provider A rate limit"));
      mockProviderB.sendThreadMessage.mockResolvedValue(undefined);
      const event = {
        id: "event-1",
        jobId,
        type: JOB_CREATED,
        timestamp: new Date(),
        actor: "system",
        payload: {},
      };
      // Act — should not throw despite provider A failing
      await service.broadcastJobEvent(jobId, event);
-      // Assert
+      // Assert — provider B should still have been called
-      expect(mockDiscord.sendThreadMessage).not.toHaveBeenCalled();
+      expect(mockProviderA.sendThreadMessage).toHaveBeenCalledTimes(1);
      expect(mockProviderB.sendThreadMessage).toHaveBeenCalledTimes(1);
    });
    it("should not throw when all providers error", async () => {
      // Arrange
      const { jobId } = baseSetup();
      mockProviderA.sendThreadMessage.mockRejectedValue(new Error("Provider A down"));
      mockProviderB.sendThreadMessage.mockRejectedValue(new Error("Provider B down"));
      const event = {
        id: "event-1",
        jobId,
        type: JOB_CREATED,
        timestamp: new Date(),
        actor: "system",
        payload: {},
      };
      // Act & Assert — should not throw; provider errors are logged, not propagated
      await expect(service.broadcastJobEvent(jobId, event)).resolves.not.toThrow();
    });
    it("should skip broadcasting if job has no threadId", async () => {
      // Arrange
      const workspaceId = "workspace-1";
      const jobId = "job-1";
      const event = {
        id: "event-1",
        jobId,
        type: JOB_CREATED,
        timestamp: new Date(),
        actor: "system",
        payload: {},
      };
      mockPrisma.workspace.findUnique.mockResolvedValue({
        id: workspaceId,
        settings: { herald: { channelMappings: {} } },
      });
      mockPrisma.runnerJob.findUnique.mockResolvedValue({
        id: jobId,
-        workspaceId,
+        workspaceId: "workspace-1",
        type: "code-task",
      });
@@ -312,16 +342,45 @@ describe("HeraldService", () => {
        },
      });
-      mockDiscord.isConnected.mockReturnValue(true);
+      const event = {
        id: "event-1",
        jobId,
        type: JOB_CREATED,
        timestamp: new Date(),
        actor: "system",
        payload: {},
      };
      // Act
      await service.broadcastJobEvent(jobId, event);
      // Assert
-      expect(mockDiscord.sendThreadMessage).not.toHaveBeenCalled();
+      expect(mockProviderA.sendThreadMessage).not.toHaveBeenCalled();
      expect(mockProviderB.sendThreadMessage).not.toHaveBeenCalled();
    });
-    // ERROR HANDLING TESTS - Issue #185
+    it("should skip broadcasting if job not found", async () => {
      // Arrange
      const jobId = "nonexistent-job";
      mockPrisma.runnerJob.findUnique.mockResolvedValue(null);
      const event = {
        id: "event-1",
        jobId,
        type: JOB_CREATED,
        timestamp: new Date(),
        actor: "system",
        payload: {},
      };
      // Act
      await service.broadcastJobEvent(jobId, event);
      // Assert
      expect(mockProviderA.sendThreadMessage).not.toHaveBeenCalled();
    });
    // ERROR HANDLING TESTS - database errors should still propagate
    it("should propagate database errors when job lookup fails", async () => {
      // Arrange
@@ -344,43 +403,8 @@ describe("HeraldService", () => {
      );
    });
    it("should propagate Discord send failures with context", async () => {
      // Arrange
      const workspaceId = "workspace-1";
      const jobId = "job-1";
      const event = {
        id: "event-1",
        jobId,
        type: JOB_CREATED,
        timestamp: new Date(),
        actor: "system",
        payload: {},
      };
      mockPrisma.runnerJob.findUnique.mockResolvedValue({
        id: jobId,
        workspaceId,
        type: "code-task",
      });
      mockPrisma.jobEvent.findFirst.mockResolvedValue({
        payload: {
          metadata: { threadId: "thread-123" },
        },
      });
      mockDiscord.isConnected.mockReturnValue(true);
      const discordError = new Error("Rate limit exceeded");
      mockDiscord.sendThreadMessage.mockRejectedValue(discordError);
      // Act & Assert
      await expect(service.broadcastJobEvent(jobId, event)).rejects.toThrow("Rate limit exceeded");
    });
    it("should propagate errors when fetching job events fails", async () => {
      // Arrange
      const workspaceId = "workspace-1";
      const jobId = "job-1";
      const event = {
        id: "event-1",
@@ -393,61 +417,16 @@ describe("HeraldService", () => {
      mockPrisma.runnerJob.findUnique.mockResolvedValue({
        id: jobId,
-        workspaceId,
+        workspaceId: "workspace-1",
        type: "code-task",
      });
      const dbError = new Error("Query timeout");
      mockPrisma.jobEvent.findFirst.mockRejectedValue(dbError);
      mockDiscord.isConnected.mockReturnValue(true);
      // Act & Assert
      await expect(service.broadcastJobEvent(jobId, event)).rejects.toThrow("Query timeout");
    });
    it("should include job context in error messages", async () => {
      // Arrange
      const workspaceId = "workspace-1";
      const jobId = "test-job-123";
      const event = {
        id: "event-1",
        jobId,
        type: JOB_COMPLETED,
        timestamp: new Date(),
        actor: "system",
        payload: {},
      };
      mockPrisma.runnerJob.findUnique.mockResolvedValue({
        id: jobId,
        workspaceId,
        type: "code-task",
      });
      mockPrisma.jobEvent.findFirst.mockResolvedValue({
        payload: {
          metadata: { threadId: "thread-123" },
        },
      });
      mockDiscord.isConnected.mockReturnValue(true);
      const discordError = new Error("Network failure");
      mockDiscord.sendThreadMessage.mockRejectedValue(discordError);
      // Act & Assert
      try {
        await service.broadcastJobEvent(jobId, event);
        // Should not reach here
        expect(true).toBe(false);
      } catch (error) {
        // Verify error was thrown
        expect(error).toBeDefined();
        // Verify original error is preserved
        expect((error as Error).message).toContain("Network failure");
      }
    });
  });
  describe("formatJobEventMessage", () => {
@@ -473,7 +452,6 @@ describe("HeraldService", () => {
      const message = service.formatJobEventMessage(event, job, metadata);
      // Assert
      expect(message).toContain("🟢");
      expect(message).toContain("Job created");
      expect(message).toContain("#42");
      expect(message.length).toBeLessThan(200); // Keep it scannable
@@ -526,7 +504,6 @@ describe("HeraldService", () => {
      const message = service.formatJobEventMessage(event, job, metadata);
      // Assert
      expect(message).toMatch(/✅|🟢/);
      expect(message).toContain("completed");
      expect(message).not.toMatch(/COMPLETED|SUCCESS/);
    });
--- a/apps/api/src/herald/herald.service.ts
+++ b/apps/api/src/herald/herald.service.ts
@@ -1,6 +1,7 @@
-import { Injectable, Logger } from "@nestjs/common";
+import { Inject, Injectable, Logger } from "@nestjs/common";
 import { PrismaService } from "../prisma/prisma.service";
-import { DiscordService } from "../bridge/discord/discord.service";
+import { CHAT_PROVIDERS } from "../bridge/bridge.constants";
 import type { IChatProvider } from "../bridge/interfaces/chat-provider.interface";
 import {
  JOB_CREATED,
  JOB_STARTED,
@@ -21,7 +22,7 @@ import {
 * - Subscribe to job events
 * - Format status messages with PDA-friendly language
 * - Route to appropriate channels based on workspace config
- * - Support Discord (via bridge) and PR comments
+ * - Broadcast to ALL active chat providers (Discord, Matrix, etc.)
 */
@Injectable()
 export class HeraldService {
@@ -29,11 +30,11 @@ export class HeraldService {
  constructor(
    private readonly prisma: PrismaService,
-    private readonly discord: DiscordService
+    @Inject(CHAT_PROVIDERS) private readonly chatProviders: IChatProvider[]
  ) {}
  /**
-   * Broadcast a job event to the appropriate channel
+   * Broadcast a job event to all connected chat providers
   */
  async broadcastJobEvent(
    jobId: string,
@@ -47,7 +48,6 @@ export class HeraldService {
      payload: unknown;
    }
  ): Promise<void> {
    try {
    // Get job details
    const job = await this.prisma.runnerJob.findUnique({
      where: { id: jobId },
@@ -63,12 +63,6 @@ export class HeraldService {
      return;
    }
      // Check if Discord is connected
      if (!this.discord.isConnected()) {
        this.logger.debug("Discord not connected, skipping broadcast");
        return;
      }
    // Get threadId from first event payload (job.created event has metadata)
    const firstEvent = await this.prisma.jobEvent.findFirst({
      where: {
@@ -83,6 +77,7 @@ export class HeraldService {
    const firstEventPayload = firstEvent?.payload as Record<string, unknown> | undefined;
    const metadata = firstEventPayload?.metadata as Record<string, unknown> | undefined;
    const threadId = metadata?.threadId as string | undefined;
    const channelId = metadata?.channelId as string | undefined;
    if (!threadId) {
      this.logger.debug(`Job ${jobId} has no threadId, skipping broadcast`);
@@ -92,21 +87,29 @@ export class HeraldService {
    // Format message
    const message = this.formatJobEventMessage(event, job, metadata);
-      // Send to thread
+    // Broadcast to all connected providers
-      await this.discord.sendThreadMessage({
+    for (const provider of this.chatProviders) {
      if (!provider.isConnected()) {
        continue;
      }
      try {
        await provider.sendThreadMessage({
          threadId,
          channelId: channelId ?? "",
          content: message,
        });
      } catch (error: unknown) {
        // Log and continue — one provider failure must not block others
        const providerName = provider.constructor.name;
        this.logger.error(
          `Failed to broadcast event ${event.type} for job ${jobId} via ${providerName}:`,
          error instanceof Error ? error.message : error
        );
      }
    }
    this.logger.debug(`Broadcasted event ${event.type} for job ${jobId} to thread ${threadId}`);
    } catch (error) {
      // Log the error with full context for debugging
      this.logger.error(`Failed to broadcast event ${event.type} for job ${jobId}:`, error);
      // Re-throw the error so callers can handle it appropriately
      // This enables proper error tracking, retry logic, and alerting
      throw error;
    }
  }
  /**
--- a/apps/api/src/llm/llm-cost-table.ts
+++ b/apps/api/src/llm/llm-cost-table.ts
@@ -0,0 +1,109 @@
 /**
 * LLM Cost Table
 *
 * Maps model names to per-token costs in microdollars (USD * 1,000,000).
 * For example, $0.003 per 1K tokens = 3,000 microdollars per 1K tokens = 3 microdollars per token.
 *
 * Costs are split into input (prompt) and output (completion) pricing.
 * Ollama models run locally and are free (0 cost).
 */
 /**
 * Per-token cost in microdollars for a single model.
 */
 export interface ModelCost {
  /** Cost per input token in microdollars */
  inputPerToken: number;
  /** Cost per output token in microdollars */
  outputPerToken: number;
 }
 /**
 * Cost table mapping model name prefixes to per-token pricing.
 *
 * Model matching is prefix-based: "claude-sonnet-4-5" matches "claude-sonnet-4-5-20250929".
 * More specific prefixes are checked first (longest match wins).
 *
 * Prices sourced from provider pricing pages as of 2026-02.
 */
 const MODEL_COSTS: Record<string, ModelCost> = {
  // Anthropic Claude models (per-token microdollars)
  // claude-sonnet-4-5: $3/M input, $15/M output
  "claude-sonnet-4-5": { inputPerToken: 3, outputPerToken: 15 },
  // claude-opus-4: $15/M input, $75/M output
  "claude-opus-4": { inputPerToken: 15, outputPerToken: 75 },
  // claude-3-5-haiku / claude-haiku-4-5: $0.80/M input, $4/M output
  "claude-haiku-4-5": { inputPerToken: 0.8, outputPerToken: 4 },
  "claude-3-5-haiku": { inputPerToken: 0.8, outputPerToken: 4 },
  // claude-3-5-sonnet: $3/M input, $15/M output
  "claude-3-5-sonnet": { inputPerToken: 3, outputPerToken: 15 },
  // claude-3-opus: $15/M input, $75/M output
  "claude-3-opus": { inputPerToken: 15, outputPerToken: 75 },
  // claude-3-sonnet: $3/M input, $15/M output
  "claude-3-sonnet": { inputPerToken: 3, outputPerToken: 15 },
  // claude-3-haiku: $0.25/M input, $1.25/M output
  "claude-3-haiku": { inputPerToken: 0.25, outputPerToken: 1.25 },
  // OpenAI models (per-token microdollars)
  // gpt-4o: $2.50/M input, $10/M output
  "gpt-4o-mini": { inputPerToken: 0.15, outputPerToken: 0.6 },
  "gpt-4o": { inputPerToken: 2.5, outputPerToken: 10 },
  // gpt-4-turbo: $10/M input, $30/M output
  "gpt-4-turbo": { inputPerToken: 10, outputPerToken: 30 },
  // gpt-4: $30/M input, $60/M output
  "gpt-4": { inputPerToken: 30, outputPerToken: 60 },
  // gpt-3.5-turbo: $0.50/M input, $1.50/M output
  "gpt-3.5-turbo": { inputPerToken: 0.5, outputPerToken: 1.5 },
  // Ollama / local models: free
  // These are catch-all entries; any model not matched above falls through to getModelCost default
 };
 /**
 * Sorted model prefixes from longest to shortest for greedy prefix matching.
 * Ensures "gpt-4o-mini" matches before "gpt-4o" and "claude-3-5-haiku" before "claude-3-haiku".
 */
 const SORTED_PREFIXES = Object.keys(MODEL_COSTS).sort((a, b) => b.length - a.length);
 /**
 * Look up per-token cost for a given model name.
 *
 * Uses longest-prefix matching: the model name is compared against known
 * prefixes from longest to shortest. If no prefix matches, returns zero cost
 * (assumes local/free model).
 *
 * @param modelName - Full model name (e.g. "claude-sonnet-4-5-20250929", "gpt-4o")
 * @returns Per-token cost in microdollars
 */
 export function getModelCost(modelName: string): ModelCost {
  const normalized = modelName.toLowerCase();
  for (const prefix of SORTED_PREFIXES) {
    if (normalized.startsWith(prefix)) {
      const cost = MODEL_COSTS[prefix];
      if (cost !== undefined) {
        return cost;
      }
    }
  }
  // Unknown or local model — assume free
  return { inputPerToken: 0, outputPerToken: 0 };
 }
 /**
 * Calculate total cost in microdollars for a given model and token counts.
 *
 * @param modelName - Full model name
 * @param inputTokens - Number of input (prompt) tokens
 * @param outputTokens - Number of output (completion) tokens
 * @returns Total cost in microdollars (USD * 1,000,000)
 */
 export function calculateCostMicrodollars(
  modelName: string,
  inputTokens: number,
  outputTokens: number
 ): number {
  const cost = getModelCost(modelName);
  return Math.round(cost.inputPerToken * inputTokens + cost.outputPerToken * outputTokens);
 }
--- a/apps/api/src/llm/llm-telemetry-tracker.service.spec.ts
+++ b/apps/api/src/llm/llm-telemetry-tracker.service.spec.ts
@@ -0,0 +1,487 @@
 import { describe, it, expect, beforeEach, vi } from "vitest";
 import { Test, TestingModule } from "@nestjs/testing";
 import { TaskType, Complexity, Harness, Provider, Outcome } from "@mosaicstack/telemetry-client";
 import type { TaskCompletionEvent, EventBuilderParams } from "@mosaicstack/telemetry-client";
 import { MosaicTelemetryService } from "../mosaic-telemetry/mosaic-telemetry.service";
 import {
  LlmTelemetryTrackerService,
  estimateTokens,
  mapProviderType,
  mapHarness,
  inferTaskType,
 } from "./llm-telemetry-tracker.service";
 import type { LlmCompletionParams } from "./llm-telemetry-tracker.service";
 import { getModelCost, calculateCostMicrodollars } from "./llm-cost-table";
 // ---------- Cost Table Tests ----------
 describe("llm-cost-table", () => {
  describe("getModelCost", () => {
    it("should return cost for claude-sonnet-4-5 models", () => {
      const cost = getModelCost("claude-sonnet-4-5-20250929");
      expect(cost.inputPerToken).toBe(3);
      expect(cost.outputPerToken).toBe(15);
    });
    it("should return cost for claude-opus-4 models", () => {
      const cost = getModelCost("claude-opus-4-6");
      expect(cost.inputPerToken).toBe(15);
      expect(cost.outputPerToken).toBe(75);
    });
    it("should return cost for claude-haiku-4-5 models", () => {
      const cost = getModelCost("claude-haiku-4-5-20251001");
      expect(cost.inputPerToken).toBe(0.8);
      expect(cost.outputPerToken).toBe(4);
    });
    it("should return cost for gpt-4o", () => {
      const cost = getModelCost("gpt-4o");
      expect(cost.inputPerToken).toBe(2.5);
      expect(cost.outputPerToken).toBe(10);
    });
    it("should return cost for gpt-4o-mini (longer prefix matches first)", () => {
      const cost = getModelCost("gpt-4o-mini");
      expect(cost.inputPerToken).toBe(0.15);
      expect(cost.outputPerToken).toBe(0.6);
    });
    it("should return zero cost for unknown/local models", () => {
      const cost = getModelCost("llama3.2");
      expect(cost.inputPerToken).toBe(0);
      expect(cost.outputPerToken).toBe(0);
    });
    it("should return zero cost for ollama models", () => {
      const cost = getModelCost("mistral:7b");
      expect(cost.inputPerToken).toBe(0);
      expect(cost.outputPerToken).toBe(0);
    });
    it("should be case-insensitive", () => {
      const cost = getModelCost("Claude-Sonnet-4-5-20250929");
      expect(cost.inputPerToken).toBe(3);
    });
  });
  describe("calculateCostMicrodollars", () => {
    it("should calculate cost for claude-sonnet-4-5 with token counts", () => {
      // 1000 input tokens * 3 + 500 output tokens * 15 = 3000 + 7500 = 10500
      const cost = calculateCostMicrodollars("claude-sonnet-4-5-20250929", 1000, 500);
      expect(cost).toBe(10500);
    });
    it("should return 0 for local models", () => {
      const cost = calculateCostMicrodollars("llama3.2", 1000, 500);
      expect(cost).toBe(0);
    });
    it("should return 0 when token counts are 0", () => {
      const cost = calculateCostMicrodollars("claude-opus-4-6", 0, 0);
      expect(cost).toBe(0);
    });
    it("should round the result to integer microdollars", () => {
      // gpt-4o-mini: 0.15 * 3 + 0.6 * 7 = 0.45 + 4.2 = 4.65 -> rounds to 5
      const cost = calculateCostMicrodollars("gpt-4o-mini", 3, 7);
      expect(cost).toBe(5);
    });
  });
 });
 // ---------- Helper Function Tests ----------
 describe("helper functions", () => {
  describe("estimateTokens", () => {
    it("should estimate ~1 token per 4 characters", () => {
      expect(estimateTokens("abcd")).toBe(1);
      expect(estimateTokens("abcdefgh")).toBe(2);
    });
    it("should round up for partial tokens", () => {
      expect(estimateTokens("abc")).toBe(1);
      expect(estimateTokens("abcde")).toBe(2);
    });
    it("should return 0 for empty string", () => {
      expect(estimateTokens("")).toBe(0);
    });
  });
  describe("mapProviderType", () => {
    it("should map claude to ANTHROPIC", () => {
      expect(mapProviderType("claude")).toBe(Provider.ANTHROPIC);
    });
    it("should map openai to OPENAI", () => {
      expect(mapProviderType("openai")).toBe(Provider.OPENAI);
    });
    it("should map ollama to OLLAMA", () => {
      expect(mapProviderType("ollama")).toBe(Provider.OLLAMA);
    });
  });
  describe("mapHarness", () => {
    it("should map ollama to OLLAMA_LOCAL", () => {
      expect(mapHarness("ollama")).toBe(Harness.OLLAMA_LOCAL);
    });
    it("should map claude to API_DIRECT", () => {
      expect(mapHarness("claude")).toBe(Harness.API_DIRECT);
    });
    it("should map openai to API_DIRECT", () => {
      expect(mapHarness("openai")).toBe(Harness.API_DIRECT);
    });
  });
  describe("inferTaskType", () => {
    it("should return IMPLEMENTATION for embed operation", () => {
      expect(inferTaskType("embed")).toBe(TaskType.IMPLEMENTATION);
    });
    it("should return UNKNOWN when no context provided for chat", () => {
      expect(inferTaskType("chat")).toBe(TaskType.UNKNOWN);
    });
    it("should return PLANNING for brain context", () => {
      expect(inferTaskType("chat", "brain")).toBe(TaskType.PLANNING);
    });
    it("should return PLANNING for planning context", () => {
      expect(inferTaskType("chat", "planning")).toBe(TaskType.PLANNING);
    });
    it("should return CODE_REVIEW for review context", () => {
      expect(inferTaskType("chat", "code-review")).toBe(TaskType.CODE_REVIEW);
    });
    it("should return TESTING for test context", () => {
      expect(inferTaskType("chat", "test-generation")).toBe(TaskType.TESTING);
    });
    it("should return DEBUGGING for debug context", () => {
      expect(inferTaskType("chatStream", "debug-session")).toBe(TaskType.DEBUGGING);
    });
    it("should return REFACTORING for refactor context", () => {
      expect(inferTaskType("chat", "refactor")).toBe(TaskType.REFACTORING);
    });
    it("should return DOCUMENTATION for doc context", () => {
      expect(inferTaskType("chat", "documentation")).toBe(TaskType.DOCUMENTATION);
    });
    it("should return CONFIGURATION for config context", () => {
      expect(inferTaskType("chat", "config-update")).toBe(TaskType.CONFIGURATION);
    });
    it("should return SECURITY_AUDIT for security context", () => {
      expect(inferTaskType("chat", "security-check")).toBe(TaskType.SECURITY_AUDIT);
    });
    it("should return IMPLEMENTATION for chat context", () => {
      expect(inferTaskType("chat", "chat")).toBe(TaskType.IMPLEMENTATION);
    });
    it("should be case-insensitive", () => {
      expect(inferTaskType("chat", "BRAIN")).toBe(TaskType.PLANNING);
    });
    it("should return UNKNOWN for unrecognized context", () => {
      expect(inferTaskType("chat", "something-else")).toBe(TaskType.UNKNOWN);
    });
  });
 });
 // ---------- LlmTelemetryTrackerService Tests ----------
 describe("LlmTelemetryTrackerService", () => {
  let service: LlmTelemetryTrackerService;
  let mockTelemetryService: {
    eventBuilder: { build: ReturnType<typeof vi.fn> } | null;
    trackTaskCompletion: ReturnType<typeof vi.fn>;
    isEnabled: boolean;
  };
  const mockEvent: TaskCompletionEvent = {
    instance_id: "test-instance",
    event_id: "test-event",
    schema_version: "1.0.0",
    timestamp: new Date().toISOString(),
    task_duration_ms: 1000,
    task_type: TaskType.IMPLEMENTATION,
    complexity: Complexity.LOW,
    harness: Harness.API_DIRECT,
    model: "claude-sonnet-4-5-20250929",
    provider: Provider.ANTHROPIC,
    estimated_input_tokens: 100,
    estimated_output_tokens: 200,
    actual_input_tokens: 100,
    actual_output_tokens: 200,
    estimated_cost_usd_micros: 3300,
    actual_cost_usd_micros: 3300,
    quality_gate_passed: true,
    quality_gates_run: [],
    quality_gates_failed: [],
    context_compactions: 0,
    context_rotations: 0,
    context_utilization_final: 0,
    outcome: Outcome.SUCCESS,
    retry_count: 0,
  };
  beforeEach(async () => {
    mockTelemetryService = {
      eventBuilder: {
        build: vi.fn().mockReturnValue(mockEvent),
      },
      trackTaskCompletion: vi.fn(),
      isEnabled: true,
    };
    const module: TestingModule = await Test.createTestingModule({
      providers: [
        LlmTelemetryTrackerService,
        {
          provide: MosaicTelemetryService,
          useValue: mockTelemetryService,
        },
      ],
    }).compile();
    service = module.get<LlmTelemetryTrackerService>(LlmTelemetryTrackerService);
  });
  it("should be defined", () => {
    expect(service).toBeDefined();
  });
  describe("trackLlmCompletion", () => {
    const baseParams: LlmCompletionParams = {
      model: "claude-sonnet-4-5-20250929",
      providerType: "claude",
      operation: "chat",
      durationMs: 1200,
      inputTokens: 150,
      outputTokens: 300,
      callingContext: "chat",
      success: true,
    };
    it("should build and track a telemetry event for Anthropic provider", () => {
      service.trackLlmCompletion(baseParams);
      expect(mockTelemetryService.eventBuilder?.build).toHaveBeenCalledWith(
        expect.objectContaining({
          task_duration_ms: 1200,
          task_type: TaskType.IMPLEMENTATION,
          complexity: Complexity.LOW,
          harness: Harness.API_DIRECT,
          model: "claude-sonnet-4-5-20250929",
          provider: Provider.ANTHROPIC,
          actual_input_tokens: 150,
          actual_output_tokens: 300,
          outcome: Outcome.SUCCESS,
        })
      );
      expect(mockTelemetryService.trackTaskCompletion).toHaveBeenCalledWith(mockEvent);
    });
    it("should build and track a telemetry event for OpenAI provider", () => {
      service.trackLlmCompletion({
        ...baseParams,
        model: "gpt-4o",
        providerType: "openai",
      });
      expect(mockTelemetryService.eventBuilder?.build).toHaveBeenCalledWith(
        expect.objectContaining({
          model: "gpt-4o",
          provider: Provider.OPENAI,
          harness: Harness.API_DIRECT,
        })
      );
    });
    it("should build and track a telemetry event for Ollama provider", () => {
      service.trackLlmCompletion({
        ...baseParams,
        model: "llama3.2",
        providerType: "ollama",
      });
      expect(mockTelemetryService.eventBuilder?.build).toHaveBeenCalledWith(
        expect.objectContaining({
          model: "llama3.2",
          provider: Provider.OLLAMA,
          harness: Harness.OLLAMA_LOCAL,
        })
      );
    });
    it("should calculate cost in microdollars correctly", () => {
      service.trackLlmCompletion(baseParams);
      // claude-sonnet-4-5: 150 * 3 + 300 * 15 = 450 + 4500 = 4950
      const expectedActualCost = 4950;
      expect(mockTelemetryService.eventBuilder?.build).toHaveBeenCalledWith(
        expect.objectContaining({
          // Estimated values are 0 when no PredictionService is injected
          estimated_cost_usd_micros: 0,
          actual_cost_usd_micros: expectedActualCost,
        })
      );
    });
    it("should calculate zero cost for ollama models", () => {
      service.trackLlmCompletion({
        ...baseParams,
        model: "llama3.2",
        providerType: "ollama",
      });
      expect(mockTelemetryService.eventBuilder?.build).toHaveBeenCalledWith(
        expect.objectContaining({
          estimated_cost_usd_micros: 0,
          actual_cost_usd_micros: 0,
        })
      );
    });
    it("should track FAILURE outcome when success is false", () => {
      service.trackLlmCompletion({
        ...baseParams,
        success: false,
      });
      expect(mockTelemetryService.eventBuilder?.build).toHaveBeenCalledWith(
        expect.objectContaining({
          outcome: Outcome.FAILURE,
        })
      );
    });
    it("should infer task type from calling context", () => {
      service.trackLlmCompletion({
        ...baseParams,
        callingContext: "brain",
      });
      expect(mockTelemetryService.eventBuilder?.build).toHaveBeenCalledWith(
        expect.objectContaining({
          task_type: TaskType.PLANNING,
        })
      );
    });
    it("should set empty quality gates arrays for direct LLM calls", () => {
      service.trackLlmCompletion(baseParams);
      expect(mockTelemetryService.eventBuilder?.build).toHaveBeenCalledWith(
        expect.objectContaining({
          quality_gate_passed: true,
          quality_gates_run: [],
          quality_gates_failed: [],
        })
      );
    });
    it("should silently skip when telemetry is disabled (eventBuilder is null)", () => {
      mockTelemetryService.eventBuilder = null;
      // Should not throw
      service.trackLlmCompletion(baseParams);
      expect(mockTelemetryService.trackTaskCompletion).not.toHaveBeenCalled();
    });
    it("should not throw when eventBuilder.build throws an error", () => {
      mockTelemetryService.eventBuilder = {
        build: vi.fn().mockImplementation(() => {
          throw new Error("Build failed");
        }),
      };
      // Should not throw
      expect(() => service.trackLlmCompletion(baseParams)).not.toThrow();
    });
    it("should not throw when trackTaskCompletion throws an error", () => {
      mockTelemetryService.trackTaskCompletion.mockImplementation(() => {
        throw new Error("Track failed");
      });
      // Should not throw
      expect(() => service.trackLlmCompletion(baseParams)).not.toThrow();
    });
    it("should handle streaming operation with estimated tokens", () => {
      service.trackLlmCompletion({
        ...baseParams,
        operation: "chatStream",
        inputTokens: 50,
        outputTokens: 100,
      });
      expect(mockTelemetryService.eventBuilder?.build).toHaveBeenCalledWith(
        expect.objectContaining({
          actual_input_tokens: 50,
          actual_output_tokens: 100,
          // Estimated values are 0 when no PredictionService is injected
          estimated_input_tokens: 0,
          estimated_output_tokens: 0,
        })
      );
    });
    it("should handle embed operation", () => {
      service.trackLlmCompletion({
        ...baseParams,
        operation: "embed",
        outputTokens: 0,
        callingContext: undefined,
      });
      expect(mockTelemetryService.eventBuilder?.build).toHaveBeenCalledWith(
        expect.objectContaining({
          task_type: TaskType.IMPLEMENTATION,
          actual_output_tokens: 0,
        })
      );
    });
    it("should pass all required EventBuilderParams fields", () => {
      service.trackLlmCompletion(baseParams);
      const buildCall = (mockTelemetryService.eventBuilder?.build as ReturnType<typeof vi.fn>).mock
        .calls[0][0] as EventBuilderParams;
      // Verify all required fields are present
      expect(buildCall).toHaveProperty("task_duration_ms");
      expect(buildCall).toHaveProperty("task_type");
      expect(buildCall).toHaveProperty("complexity");
      expect(buildCall).toHaveProperty("harness");
      expect(buildCall).toHaveProperty("model");
      expect(buildCall).toHaveProperty("provider");
      expect(buildCall).toHaveProperty("estimated_input_tokens");
      expect(buildCall).toHaveProperty("estimated_output_tokens");
      expect(buildCall).toHaveProperty("actual_input_tokens");
      expect(buildCall).toHaveProperty("actual_output_tokens");
      expect(buildCall).toHaveProperty("estimated_cost_usd_micros");
      expect(buildCall).toHaveProperty("actual_cost_usd_micros");
      expect(buildCall).toHaveProperty("quality_gate_passed");
      expect(buildCall).toHaveProperty("quality_gates_run");
      expect(buildCall).toHaveProperty("quality_gates_failed");
      expect(buildCall).toHaveProperty("context_compactions");
      expect(buildCall).toHaveProperty("context_rotations");
      expect(buildCall).toHaveProperty("context_utilization_final");
      expect(buildCall).toHaveProperty("outcome");
      expect(buildCall).toHaveProperty("retry_count");
    });
  });
 });
--- a/apps/api/src/llm/llm-telemetry-tracker.service.ts
+++ b/apps/api/src/llm/llm-telemetry-tracker.service.ts
@@ -0,0 +1,224 @@
 import { Injectable, Logger, Optional } from "@nestjs/common";
 import { MosaicTelemetryService } from "../mosaic-telemetry/mosaic-telemetry.service";
 import { PredictionService } from "../mosaic-telemetry/prediction.service";
 import { TaskType, Complexity, Harness, Provider, Outcome } from "@mosaicstack/telemetry-client";
 import type { LlmProviderType } from "./providers/llm-provider.interface";
 import { calculateCostMicrodollars } from "./llm-cost-table";
 /**
 * Parameters for tracking an LLM completion event.
 */
 export interface LlmCompletionParams {
  /** Full model name (e.g. "claude-sonnet-4-5-20250929") */
  model: string;
  /** Provider type discriminator */
  providerType: LlmProviderType;
  /** Operation type that was performed */
  operation: "chat" | "chatStream" | "embed";
  /** Duration of the LLM call in milliseconds */
  durationMs: number;
  /** Number of input (prompt) tokens consumed */
  inputTokens: number;
  /** Number of output (completion) tokens generated */
  outputTokens: number;
  /**
   * Optional calling context hint for task type inference.
   * Examples: "brain", "chat", "embed", "planning", "code-review"
   */
  callingContext?: string | undefined;
  /** Whether the call succeeded or failed */
  success: boolean;
 }
 /**
 * Estimated token count from text length.
 * Uses a rough approximation of ~4 characters per token (GPT/Claude average).
 */
 export function estimateTokens(text: string): number {
  return Math.ceil(text.length / 4);
 }
 /** Map LLM provider type to telemetry Provider enum */
 export function mapProviderType(providerType: LlmProviderType): Provider {
  switch (providerType) {
    case "claude":
      return Provider.ANTHROPIC;
    case "openai":
      return Provider.OPENAI;
    case "ollama":
      return Provider.OLLAMA;
    default:
      return Provider.UNKNOWN;
  }
 }
 /** Map LLM provider type to telemetry Harness enum */
 export function mapHarness(providerType: LlmProviderType): Harness {
  switch (providerType) {
    case "ollama":
      return Harness.OLLAMA_LOCAL;
    default:
      return Harness.API_DIRECT;
  }
 }
 /**
 * Infer the task type from calling context and operation.
 *
 * @param operation - The LLM operation (chat, chatStream, embed)
 * @param callingContext - Optional hint about the caller's purpose
 * @returns Inferred TaskType
 */
 export function inferTaskType(
  operation: "chat" | "chatStream" | "embed",
  callingContext?: string
 ): TaskType {
  // Embedding operations are typically for indexing/search
  if (operation === "embed") {
    return TaskType.IMPLEMENTATION;
  }
  if (!callingContext) {
    return TaskType.UNKNOWN;
  }
  const ctx = callingContext.toLowerCase();
  if (ctx.includes("brain") || ctx.includes("planning") || ctx.includes("plan")) {
    return TaskType.PLANNING;
  }
  if (ctx.includes("review") || ctx.includes("code-review")) {
    return TaskType.CODE_REVIEW;
  }
  if (ctx.includes("test")) {
    return TaskType.TESTING;
  }
  if (ctx.includes("debug")) {
    return TaskType.DEBUGGING;
  }
  if (ctx.includes("refactor")) {
    return TaskType.REFACTORING;
  }
  if (ctx.includes("doc")) {
    return TaskType.DOCUMENTATION;
  }
  if (ctx.includes("config")) {
    return TaskType.CONFIGURATION;
  }
  if (ctx.includes("security") || ctx.includes("audit")) {
    return TaskType.SECURITY_AUDIT;
  }
  if (ctx.includes("chat") || ctx.includes("implement")) {
    return TaskType.IMPLEMENTATION;
  }
  return TaskType.UNKNOWN;
 }
 /**
 * LLM Telemetry Tracker Service
 *
 * Builds and submits telemetry events for LLM completions.
 * All tracking is non-blocking and fire-and-forget; telemetry errors
 * never propagate to the caller.
 *
 * @example
 * ```typescript
 * // After a successful chat completion
 * this.telemetryTracker.trackLlmCompletion({
 *   model: "claude-sonnet-4-5-20250929",
 *   providerType: "claude",
 *   operation: "chat",
 *   durationMs: 1200,
 *   inputTokens: 150,
 *   outputTokens: 300,
 *   callingContext: "chat",
 *   success: true,
 * });
 * ```
 */
@Injectable()
 export class LlmTelemetryTrackerService {
  private readonly logger = new Logger(LlmTelemetryTrackerService.name);
  constructor(
    private readonly telemetry: MosaicTelemetryService,
    @Optional() private readonly predictionService?: PredictionService
  ) {}
  /**
   * Track an LLM completion event via Mosaic Telemetry.
   *
   * This method is intentionally fire-and-forget. It catches all errors
   * internally and logs them without propagating to the caller.
   *
   * @param params - LLM completion parameters
   */
  trackLlmCompletion(params: LlmCompletionParams): void {
    try {
      const builder = this.telemetry.eventBuilder;
      if (!builder) {
        // Telemetry is disabled — silently skip
        return;
      }
      const taskType = inferTaskType(params.operation, params.callingContext);
      const provider = mapProviderType(params.providerType);
      const costMicrodollars = calculateCostMicrodollars(
        params.model,
        params.inputTokens,
        params.outputTokens
      );
      // Query predictions for estimated fields (graceful degradation)
      let estimatedInputTokens = 0;
      let estimatedOutputTokens = 0;
      let estimatedCostMicros = 0;
      if (this.predictionService) {
        const prediction = this.predictionService.getEstimate(
          taskType,
          params.model,
          provider,
          Complexity.LOW
        );
        if (prediction?.prediction && prediction.metadata.confidence !== "none") {
          estimatedInputTokens = prediction.prediction.input_tokens.median;
          estimatedOutputTokens = prediction.prediction.output_tokens.median;
          estimatedCostMicros = prediction.prediction.cost_usd_micros.median ?? 0;
        }
      }
      const event = builder.build({
        task_duration_ms: params.durationMs,
        task_type: taskType,
        complexity: Complexity.LOW,
        harness: mapHarness(params.providerType),
        model: params.model,
        provider,
        estimated_input_tokens: estimatedInputTokens,
        estimated_output_tokens: estimatedOutputTokens,
        actual_input_tokens: params.inputTokens,
        actual_output_tokens: params.outputTokens,
        estimated_cost_usd_micros: estimatedCostMicros,
        actual_cost_usd_micros: costMicrodollars,
        quality_gate_passed: true,
        quality_gates_run: [],
        quality_gates_failed: [],
        context_compactions: 0,
        context_rotations: 0,
        context_utilization_final: 0,
        outcome: params.success ? Outcome.SUCCESS : Outcome.FAILURE,
        retry_count: 0,
      });
      this.telemetry.trackTaskCompletion(event);
    } catch (error: unknown) {
      // Never let telemetry errors propagate
      const msg = error instanceof Error ? error.message : String(error);
      this.logger.warn(`Failed to track LLM telemetry event: ${msg}`);
    }
  }
 }
--- a/apps/api/src/llm/llm.module.ts
+++ b/apps/api/src/llm/llm.module.ts
@@ -3,13 +3,14 @@ import { LlmController } from "./llm.controller";
 import { LlmProviderAdminController } from "./llm-provider-admin.controller";
 import { LlmService } from "./llm.service";
 import { LlmManagerService } from "./llm-manager.service";
 import { LlmTelemetryTrackerService } from "./llm-telemetry-tracker.service";
 import { PrismaModule } from "../prisma/prisma.module";
 import { LlmUsageModule } from "../llm-usage/llm-usage.module";
@Module({
  imports: [PrismaModule, LlmUsageModule],
  controllers: [LlmController, LlmProviderAdminController],
-  providers: [LlmService, LlmManagerService],
+  providers: [LlmService, LlmManagerService, LlmTelemetryTrackerService],
  exports: [LlmService, LlmManagerService],
 })
 export class LlmModule {}
--- a/apps/api/src/llm/llm.service.spec.ts
+++ b/apps/api/src/llm/llm.service.spec.ts
@@ -3,6 +3,7 @@ import { Test, TestingModule } from "@nestjs/testing";
 import { ServiceUnavailableException } from "@nestjs/common";
 import { LlmService } from "./llm.service";
 import { LlmManagerService } from "./llm-manager.service";
 import { LlmTelemetryTrackerService } from "./llm-telemetry-tracker.service";
 import type { ChatRequestDto, EmbedRequestDto, ChatResponseDto, EmbedResponseDto } from "./dto";
 import type {
  LlmProviderInterface,
@@ -14,6 +15,9 @@ describe("LlmService", () => {
  let mockManagerService: {
    getDefaultProvider: ReturnType<typeof vi.fn>;
  };
  let mockTelemetryTracker: {
    trackLlmCompletion: ReturnType<typeof vi.fn>;
  };
  let mockProvider: {
    chat: ReturnType<typeof vi.fn>;
    chatStream: ReturnType<typeof vi.fn>;
@@ -41,6 +45,11 @@ describe("LlmService", () => {
      getDefaultProvider: vi.fn().mockResolvedValue(mockProvider),
    };
    // Create mock telemetry tracker
    mockTelemetryTracker = {
      trackLlmCompletion: vi.fn(),
    };
    const module: TestingModule = await Test.createTestingModule({
      providers: [
        LlmService,
@@ -48,6 +57,10 @@ describe("LlmService", () => {
          provide: LlmManagerService,
          useValue: mockManagerService,
        },
        {
          provide: LlmTelemetryTrackerService,
          useValue: mockTelemetryTracker,
        },
      ],
    }).compile();
@@ -135,6 +148,45 @@ describe("LlmService", () => {
      expect(result).toEqual(response);
    });
    it("should track telemetry on successful chat", async () => {
      const response: ChatResponseDto = {
        model: "llama3.2",
        message: { role: "assistant", content: "Hello" },
        done: true,
        promptEvalCount: 10,
        evalCount: 20,
      };
      mockProvider.chat.mockResolvedValue(response);
      await service.chat(request, "chat");
      expect(mockTelemetryTracker.trackLlmCompletion).toHaveBeenCalledWith(
        expect.objectContaining({
          model: "llama3.2",
          providerType: "ollama",
          operation: "chat",
          inputTokens: 10,
          outputTokens: 20,
          callingContext: "chat",
          success: true,
        })
      );
    });
    it("should track telemetry on failed chat", async () => {
      mockProvider.chat.mockRejectedValue(new Error("Chat failed"));
      await expect(service.chat(request)).rejects.toThrow(ServiceUnavailableException);
      expect(mockTelemetryTracker.trackLlmCompletion).toHaveBeenCalledWith(
        expect.objectContaining({
          model: "llama3.2",
          operation: "chat",
          success: false,
        })
      );
    });
    it("should throw ServiceUnavailableException on error", async () => {
      mockProvider.chat.mockRejectedValue(new Error("Chat failed"));
@@ -177,6 +229,94 @@ describe("LlmService", () => {
      expect(chunks[1].message.content).toBe(" world");
    });
    it("should track telemetry after stream completes", async () => {
      async function* mockGenerator(): AsyncGenerator<ChatResponseDto> {
        yield {
          model: "llama3.2",
          message: { role: "assistant", content: "Hello" },
          done: false,
        };
        yield {
          model: "llama3.2",
          message: { role: "assistant", content: " world" },
          done: true,
          promptEvalCount: 5,
          evalCount: 10,
        };
      }
      mockProvider.chatStream.mockReturnValue(mockGenerator());
      const chunks: ChatResponseDto[] = [];
      for await (const chunk of service.chatStream(request, "brain")) {
        chunks.push(chunk);
      }
      expect(mockTelemetryTracker.trackLlmCompletion).toHaveBeenCalledWith(
        expect.objectContaining({
          model: "llama3.2",
          providerType: "ollama",
          operation: "chatStream",
          inputTokens: 5,
          outputTokens: 10,
          callingContext: "brain",
          success: true,
        })
      );
    });
    it("should estimate tokens when provider does not return counts in stream", async () => {
      async function* mockGenerator(): AsyncGenerator<ChatResponseDto> {
        yield {
          model: "llama3.2",
          message: { role: "assistant", content: "Hello world" },
          done: false,
        };
        yield {
          model: "llama3.2",
          message: { role: "assistant", content: "" },
          done: true,
        };
      }
      mockProvider.chatStream.mockReturnValue(mockGenerator());
      const chunks: ChatResponseDto[] = [];
      for await (const chunk of service.chatStream(request)) {
        chunks.push(chunk);
      }
      // Should use estimated tokens since no actual counts provided
      expect(mockTelemetryTracker.trackLlmCompletion).toHaveBeenCalledWith(
        expect.objectContaining({
          operation: "chatStream",
          success: true,
          // Input estimated from "Hi" -> ceil(2/4) = 1
          inputTokens: 1,
          // Output estimated from "Hello world" -> ceil(11/4) = 3
          outputTokens: 3,
        })
      );
    });
    it("should track telemetry on stream failure", async () => {
      async function* errorGenerator(): AsyncGenerator<ChatResponseDto> {
        throw new Error("Stream failed");
      }
      mockProvider.chatStream.mockReturnValue(errorGenerator());
      const generator = service.chatStream(request);
      await expect(generator.next()).rejects.toThrow(ServiceUnavailableException);
      expect(mockTelemetryTracker.trackLlmCompletion).toHaveBeenCalledWith(
        expect.objectContaining({
          operation: "chatStream",
          success: false,
        })
      );
    });
    it("should throw ServiceUnavailableException on error", async () => {
      async function* errorGenerator(): AsyncGenerator<ChatResponseDto> {
        throw new Error("Stream failed");
@@ -210,6 +350,41 @@ describe("LlmService", () => {
      expect(result).toEqual(response);
    });
    it("should track telemetry on successful embed", async () => {
      const response: EmbedResponseDto = {
        model: "llama3.2",
        embeddings: [[0.1, 0.2, 0.3]],
        totalDuration: 500,
      };
      mockProvider.embed.mockResolvedValue(response);
      await service.embed(request, "embed");
      expect(mockTelemetryTracker.trackLlmCompletion).toHaveBeenCalledWith(
        expect.objectContaining({
          model: "llama3.2",
          providerType: "ollama",
          operation: "embed",
          outputTokens: 0,
          callingContext: "embed",
          success: true,
        })
      );
    });
    it("should track telemetry on failed embed", async () => {
      mockProvider.embed.mockRejectedValue(new Error("Embedding failed"));
      await expect(service.embed(request)).rejects.toThrow(ServiceUnavailableException);
      expect(mockTelemetryTracker.trackLlmCompletion).toHaveBeenCalledWith(
        expect.objectContaining({
          operation: "embed",
          success: false,
        })
      );
    });
    it("should throw ServiceUnavailableException on error", async () => {
      mockProvider.embed.mockRejectedValue(new Error("Embedding failed"));
--- a/apps/api/src/llm/llm.service.ts
+++ b/apps/api/src/llm/llm.service.ts
@@ -1,13 +1,15 @@
 import { Injectable, OnModuleInit, Logger, ServiceUnavailableException } from "@nestjs/common";
 import { LlmManagerService } from "./llm-manager.service";
 import { LlmTelemetryTrackerService, estimateTokens } from "./llm-telemetry-tracker.service";
 import type { ChatRequestDto, ChatResponseDto, EmbedRequestDto, EmbedResponseDto } from "./dto";
-import type { LlmProviderHealthStatus } from "./providers/llm-provider.interface";
+import type { LlmProviderHealthStatus, LlmProviderType } from "./providers/llm-provider.interface";
 /**
 * LLM Service
 *
 * High-level service for LLM operations. Delegates to providers via LlmManagerService.
 * Maintains backward compatibility with the original API while supporting multiple providers.
 * Automatically tracks completions via Mosaic Telemetry (non-blocking).
 *
 * @example
 * ```typescript
@@ -33,7 +35,10 @@ import type { LlmProviderHealthStatus } from "./providers/llm-provider.interface
 export class LlmService implements OnModuleInit {
  private readonly logger = new Logger(LlmService.name);
-  constructor(private readonly llmManager: LlmManagerService) {
+  constructor(
    private readonly llmManager: LlmManagerService,
    private readonly telemetryTracker: LlmTelemetryTrackerService
  ) {
    this.logger.log("LLM service initialized");
  }
@@ -91,14 +96,45 @@ export class LlmService implements OnModuleInit {
   * Perform a synchronous chat completion.
   *
   * @param request - Chat request with messages and configuration
   * @param callingContext - Optional context hint for telemetry task type inference
   * @returns Complete chat response
   * @throws {ServiceUnavailableException} If provider is unavailable or request fails
   */
-  async chat(request: ChatRequestDto): Promise<ChatResponseDto> {
+  async chat(request: ChatRequestDto, callingContext?: string): Promise<ChatResponseDto> {
    const startTime = Date.now();
    let providerType: LlmProviderType = "ollama";
    try {
      const provider = await this.llmManager.getDefaultProvider();
-      return await provider.chat(request);
+      providerType = provider.type;
      const response = await provider.chat(request);
      // Fire-and-forget telemetry tracking
      this.telemetryTracker.trackLlmCompletion({
        model: response.model,
        providerType,
        operation: "chat",
        durationMs: Date.now() - startTime,
        inputTokens: response.promptEvalCount ?? 0,
        outputTokens: response.evalCount ?? 0,
        callingContext,
        success: true,
      });
      return response;
    } catch (error: unknown) {
      // Track failure (fire-and-forget)
      this.telemetryTracker.trackLlmCompletion({
        model: request.model,
        providerType,
        operation: "chat",
        durationMs: Date.now() - startTime,
        inputTokens: 0,
        outputTokens: 0,
        callingContext,
        success: false,
      });
      const errorMessage = error instanceof Error ? error.message : String(error);
      this.logger.error(`Chat failed: ${errorMessage}`);
      throw new ServiceUnavailableException(`Chat completion failed: ${errorMessage}`);
@@ -107,20 +143,75 @@ export class LlmService implements OnModuleInit {
  /**
   * Perform a streaming chat completion.
   * Yields response chunks as they arrive from the provider.
   * Aggregates token usage and tracks telemetry after the stream ends.
   *
   * @param request - Chat request with messages and configuration
   * @param callingContext - Optional context hint for telemetry task type inference
   * @yields Chat response chunks
   * @throws {ServiceUnavailableException} If provider is unavailable or request fails
   */
-  async *chatStream(request: ChatRequestDto): AsyncGenerator<ChatResponseDto, void, unknown> {
+  async *chatStream(
    request: ChatRequestDto,
    callingContext?: string
  ): AsyncGenerator<ChatResponseDto, void, unknown> {
    const startTime = Date.now();
    let providerType: LlmProviderType = "ollama";
    let aggregatedContent = "";
    let lastChunkInputTokens = 0;
    let lastChunkOutputTokens = 0;
    try {
      const provider = await this.llmManager.getDefaultProvider();
      providerType = provider.type;
      const stream = provider.chatStream(request);
      for await (const chunk of stream) {
        // Accumulate content for token estimation
        aggregatedContent += chunk.message.content;
        // Some providers include token counts on the final chunk
        if (chunk.promptEvalCount !== undefined) {
          lastChunkInputTokens = chunk.promptEvalCount;
        }
        if (chunk.evalCount !== undefined) {
          lastChunkOutputTokens = chunk.evalCount;
        }
        yield chunk;
      }
      // After stream completes, track telemetry
      // Use actual token counts if available, otherwise estimate from content length
      const inputTokens =
        lastChunkInputTokens > 0
          ? lastChunkInputTokens
          : estimateTokens(request.messages.map((m) => m.content).join(" "));
      const outputTokens =
        lastChunkOutputTokens > 0 ? lastChunkOutputTokens : estimateTokens(aggregatedContent);
      this.telemetryTracker.trackLlmCompletion({
        model: request.model,
        providerType,
        operation: "chatStream",
        durationMs: Date.now() - startTime,
        inputTokens,
        outputTokens,
        callingContext,
        success: true,
      });
    } catch (error: unknown) {
      // Track failure (fire-and-forget)
      this.telemetryTracker.trackLlmCompletion({
        model: request.model,
        providerType,
        operation: "chatStream",
        durationMs: Date.now() - startTime,
        inputTokens: 0,
        outputTokens: 0,
        callingContext,
        success: false,
      });
      const errorMessage = error instanceof Error ? error.message : String(error);
      this.logger.error(`Stream failed: ${errorMessage}`);
      throw new ServiceUnavailableException(`Streaming failed: ${errorMessage}`);
@@ -130,14 +221,48 @@ export class LlmService implements OnModuleInit {
   * Generate embeddings for the given input texts.
   *
   * @param request - Embedding request with model and input texts
   * @param callingContext - Optional context hint for telemetry task type inference
   * @returns Embeddings response with vector arrays
   * @throws {ServiceUnavailableException} If provider is unavailable or request fails
   */
-  async embed(request: EmbedRequestDto): Promise<EmbedResponseDto> {
+  async embed(request: EmbedRequestDto, callingContext?: string): Promise<EmbedResponseDto> {
    const startTime = Date.now();
    let providerType: LlmProviderType = "ollama";
    try {
      const provider = await this.llmManager.getDefaultProvider();
-      return await provider.embed(request);
+      providerType = provider.type;
      const response = await provider.embed(request);
      // Estimate input tokens from the input text
      const inputTokens = estimateTokens(request.input.join(" "));
      // Fire-and-forget telemetry tracking
      this.telemetryTracker.trackLlmCompletion({
        model: response.model,
        providerType,
        operation: "embed",
        durationMs: Date.now() - startTime,
        inputTokens,
        outputTokens: 0, // Embeddings don't produce output tokens
        callingContext,
        success: true,
      });
      return response;
    } catch (error: unknown) {
      // Track failure (fire-and-forget)
      this.telemetryTracker.trackLlmCompletion({
        model: request.model,
        providerType,
        operation: "embed",
        durationMs: Date.now() - startTime,
        inputTokens: 0,
        outputTokens: 0,
        callingContext,
        success: false,
      });
      const errorMessage = error instanceof Error ? error.message : String(error);
      this.logger.error(`Embed failed: ${errorMessage}`);
      throw new ServiceUnavailableException(`Embedding failed: ${errorMessage}`);
--- a/apps/api/src/mosaic-telemetry/index.ts
+++ b/apps/api/src/mosaic-telemetry/index.ts
@@ -0,0 +1,17 @@
 /**
 * Mosaic Telemetry module — task completion tracking and crowd-sourced predictions.
 *
 * **Not to be confused with the OpenTelemetry (OTEL) TelemetryModule** at
 * `src/telemetry/`, which handles distributed request tracing.
 *
 * @module mosaic-telemetry
 */
 export { MosaicTelemetryModule } from "./mosaic-telemetry.module";
 export { MosaicTelemetryService } from "./mosaic-telemetry.service";
 export {
  loadMosaicTelemetryConfig,
  toSdkConfig,
  MOSAIC_TELEMETRY_ENV,
  type MosaicTelemetryModuleConfig,
 } from "./mosaic-telemetry.config";
--- a/apps/api/src/mosaic-telemetry/mosaic-telemetry.config.ts
+++ b/apps/api/src/mosaic-telemetry/mosaic-telemetry.config.ts
@@ -0,0 +1,78 @@
 import type { ConfigService } from "@nestjs/config";
 import type { TelemetryConfig } from "@mosaicstack/telemetry-client";
 /**
 * Configuration interface for the Mosaic Telemetry module.
 * Maps environment variables to SDK configuration.
 */
 export interface MosaicTelemetryModuleConfig {
  /** Whether telemetry collection is enabled. Default: true */
  enabled: boolean;
  /** Base URL of the telemetry server */
  serverUrl: string;
  /** API key for authentication (64-char hex string) */
  apiKey: string;
  /** Instance UUID for this client */
  instanceId: string;
  /** If true, log events instead of sending them. Default: false */
  dryRun: boolean;
 }
 /**
 * Environment variable names used by the Mosaic Telemetry module.
 */
 export const MOSAIC_TELEMETRY_ENV = {
  ENABLED: "MOSAIC_TELEMETRY_ENABLED",
  SERVER_URL: "MOSAIC_TELEMETRY_SERVER_URL",
  API_KEY: "MOSAIC_TELEMETRY_API_KEY",
  INSTANCE_ID: "MOSAIC_TELEMETRY_INSTANCE_ID",
  DRY_RUN: "MOSAIC_TELEMETRY_DRY_RUN",
 } as const;
 /**
 * Read Mosaic Telemetry configuration from environment variables via NestJS ConfigService.
 *
 * @param configService - NestJS ConfigService instance
 * @returns Parsed module configuration
 */
 export function loadMosaicTelemetryConfig(
  configService: ConfigService
 ): MosaicTelemetryModuleConfig {
  const enabledRaw = configService.get<string>(MOSAIC_TELEMETRY_ENV.ENABLED, "true");
  const dryRunRaw = configService.get<string>(MOSAIC_TELEMETRY_ENV.DRY_RUN, "false");
  return {
    enabled: enabledRaw.toLowerCase() === "true",
    serverUrl: configService.get<string>(MOSAIC_TELEMETRY_ENV.SERVER_URL, ""),
    apiKey: configService.get<string>(MOSAIC_TELEMETRY_ENV.API_KEY, ""),
    instanceId: configService.get<string>(MOSAIC_TELEMETRY_ENV.INSTANCE_ID, ""),
    dryRun: dryRunRaw.toLowerCase() === "true",
  };
 }
 /**
 * Convert module config to SDK TelemetryConfig format.
 * Includes the onError callback for NestJS Logger integration.
 *
 * @param config - Module configuration
 * @param onError - Error callback (typically NestJS Logger)
 * @returns SDK-compatible TelemetryConfig
 */
 export function toSdkConfig(
  config: MosaicTelemetryModuleConfig,
  onError?: (error: Error) => void
 ): TelemetryConfig {
  const sdkConfig: TelemetryConfig = {
    serverUrl: config.serverUrl,
    apiKey: config.apiKey,
    instanceId: config.instanceId,
    enabled: config.enabled,
    dryRun: config.dryRun,
  };
  if (onError) {
    sdkConfig.onError = onError;
  }
  return sdkConfig;
 }
--- a/apps/api/src/mosaic-telemetry/mosaic-telemetry.controller.ts
+++ b/apps/api/src/mosaic-telemetry/mosaic-telemetry.controller.ts
@@ -0,0 +1,92 @@
 import { Controller, Get, Query, UseGuards, BadRequestException } from "@nestjs/common";
 import { AuthGuard } from "../auth/guards/auth.guard";
 import { PredictionService } from "./prediction.service";
 import {
  TaskType,
  Complexity,
  Provider,
  type PredictionResponse,
 } from "@mosaicstack/telemetry-client";
 /**
 * Valid values for query parameter validation.
 */
 const VALID_TASK_TYPES = new Set<string>(Object.values(TaskType));
 const VALID_COMPLEXITIES = new Set<string>(Object.values(Complexity));
 const VALID_PROVIDERS = new Set<string>(Object.values(Provider));
 /**
 * Response DTO for the estimate endpoint.
 */
 interface EstimateResponseDto {
  data: PredictionResponse | null;
 }
 /**
 * Mosaic Telemetry Controller
 *
 * Provides API endpoints for accessing telemetry prediction data.
 * All endpoints require authentication via AuthGuard.
 *
 * This controller is intentionally lightweight - it delegates to PredictionService
 * for the actual prediction logic and returns results directly to the frontend.
 */
@Controller("telemetry")
@UseGuards(AuthGuard)
 export class MosaicTelemetryController {
  constructor(private readonly predictionService: PredictionService) {}
  /**
   * GET /api/telemetry/estimate
   *
   * Get a cost/token estimate for a given task configuration.
   * Returns prediction data including confidence level, or null if
   * no prediction is available.
   *
   * @param taskType - Task type enum value (e.g. "implementation", "planning")
   * @param model - Model name (e.g. "claude-sonnet-4-5")
   * @param provider - Provider enum value (e.g. "anthropic", "openai")
   * @param complexity - Complexity level (e.g. "low", "medium", "high")
   * @returns Prediction response with estimates and confidence
   */
  @Get("estimate")
  getEstimate(
    @Query("taskType") taskType: string,
    @Query("model") model: string,
    @Query("provider") provider: string,
    @Query("complexity") complexity: string
  ): EstimateResponseDto {
    if (!taskType || !model || !provider || !complexity) {
      throw new BadRequestException(
        "Missing query parameters. Required: taskType, model, provider, complexity"
      );
    }
    if (!VALID_TASK_TYPES.has(taskType)) {
      throw new BadRequestException(
        `Invalid taskType "${taskType}". Valid values: ${[...VALID_TASK_TYPES].join(", ")}`
      );
    }
    if (!VALID_PROVIDERS.has(provider)) {
      throw new BadRequestException(
        `Invalid provider "${provider}". Valid values: ${[...VALID_PROVIDERS].join(", ")}`
      );
    }
    if (!VALID_COMPLEXITIES.has(complexity)) {
      throw new BadRequestException(
        `Invalid complexity "${complexity}". Valid values: ${[...VALID_COMPLEXITIES].join(", ")}`
      );
    }
    const prediction = this.predictionService.getEstimate(
      taskType as TaskType,
      model,
      provider as Provider,
      complexity as Complexity
    );
    return { data: prediction };
  }
 }
--- a/apps/api/src/mosaic-telemetry/mosaic-telemetry.module.spec.ts
+++ b/apps/api/src/mosaic-telemetry/mosaic-telemetry.module.spec.ts
@@ -0,0 +1,212 @@
 import { describe, it, expect, vi, beforeEach } from "vitest";
 import { Test, TestingModule } from "@nestjs/testing";
 import { ConfigModule } from "@nestjs/config";
 import { MosaicTelemetryModule } from "./mosaic-telemetry.module";
 import { MosaicTelemetryService } from "./mosaic-telemetry.service";
 // Mock the telemetry client to avoid real HTTP calls
 vi.mock("@mosaicstack/telemetry-client", async (importOriginal) => {
  const actual = await importOriginal<typeof import("@mosaicstack/telemetry-client")>();
  class MockTelemetryClient {
    private _isRunning = false;
    constructor(_config: unknown) {
      // no-op
    }
    get eventBuilder() {
      return { build: vi.fn().mockReturnValue({ event_id: "test-event-id" }) };
    }
    start(): void {
      this._isRunning = true;
    }
    async stop(): Promise<void> {
      this._isRunning = false;
    }
    track(_event: unknown): void {
      // no-op
    }
    getPrediction(_query: unknown): unknown {
      return null;
    }
    async refreshPredictions(_queries: unknown): Promise<void> {
      // no-op
    }
    get queueSize(): number {
      return 0;
    }
    get isRunning(): boolean {
      return this._isRunning;
    }
  }
  return {
    ...actual,
    TelemetryClient: MockTelemetryClient,
  };
 });
 describe("MosaicTelemetryModule", () => {
  let module: TestingModule;
  beforeEach(() => {
    vi.clearAllMocks();
  });
  describe("module initialization", () => {
    it("should compile the module successfully", async () => {
      module = await Test.createTestingModule({
        imports: [
          ConfigModule.forRoot({
            isGlobal: true,
            envFilePath: [],
            load: [
              () => ({
                MOSAIC_TELEMETRY_ENABLED: "false",
              }),
            ],
          }),
          MosaicTelemetryModule,
        ],
      }).compile();
      expect(module).toBeDefined();
      await module.close();
    });
    it("should provide MosaicTelemetryService", async () => {
      module = await Test.createTestingModule({
        imports: [
          ConfigModule.forRoot({
            isGlobal: true,
            envFilePath: [],
            load: [
              () => ({
                MOSAIC_TELEMETRY_ENABLED: "false",
              }),
            ],
          }),
          MosaicTelemetryModule,
        ],
      }).compile();
      const service = module.get<MosaicTelemetryService>(MosaicTelemetryService);
      expect(service).toBeDefined();
      expect(service).toBeInstanceOf(MosaicTelemetryService);
      await module.close();
    });
    it("should export MosaicTelemetryService for injection in other modules", async () => {
      module = await Test.createTestingModule({
        imports: [
          ConfigModule.forRoot({
            isGlobal: true,
            envFilePath: [],
            load: [
              () => ({
                MOSAIC_TELEMETRY_ENABLED: "false",
              }),
            ],
          }),
          MosaicTelemetryModule,
        ],
      }).compile();
      const service = module.get(MosaicTelemetryService);
      expect(service).toBeDefined();
      await module.close();
    });
  });
  describe("lifecycle integration", () => {
    it("should initialize service on module init when enabled", async () => {
      module = await Test.createTestingModule({
        imports: [
          ConfigModule.forRoot({
            isGlobal: true,
            envFilePath: [],
            load: [
              () => ({
                MOSAIC_TELEMETRY_ENABLED: "true",
                MOSAIC_TELEMETRY_SERVER_URL: "https://tel.test.local",
                MOSAIC_TELEMETRY_API_KEY: "a".repeat(64),
                MOSAIC_TELEMETRY_INSTANCE_ID: "550e8400-e29b-41d4-a716-446655440000",
                MOSAIC_TELEMETRY_DRY_RUN: "false",
              }),
            ],
          }),
          MosaicTelemetryModule,
        ],
      }).compile();
      await module.init();
      const service = module.get<MosaicTelemetryService>(MosaicTelemetryService);
      expect(service.isEnabled).toBe(true);
      await module.close();
    });
    it("should not start client when disabled via env", async () => {
      module = await Test.createTestingModule({
        imports: [
          ConfigModule.forRoot({
            isGlobal: true,
            envFilePath: [],
            load: [
              () => ({
                MOSAIC_TELEMETRY_ENABLED: "false",
              }),
            ],
          }),
          MosaicTelemetryModule,
        ],
      }).compile();
      await module.init();
      const service = module.get<MosaicTelemetryService>(MosaicTelemetryService);
      expect(service.isEnabled).toBe(false);
      await module.close();
    });
    it("should cleanly shut down on module destroy", async () => {
      module = await Test.createTestingModule({
        imports: [
          ConfigModule.forRoot({
            isGlobal: true,
            envFilePath: [],
            load: [
              () => ({
                MOSAIC_TELEMETRY_ENABLED: "true",
                MOSAIC_TELEMETRY_SERVER_URL: "https://tel.test.local",
                MOSAIC_TELEMETRY_API_KEY: "a".repeat(64),
                MOSAIC_TELEMETRY_INSTANCE_ID: "550e8400-e29b-41d4-a716-446655440000",
                MOSAIC_TELEMETRY_DRY_RUN: "false",
              }),
            ],
          }),
          MosaicTelemetryModule,
        ],
      }).compile();
      await module.init();
      const service = module.get<MosaicTelemetryService>(MosaicTelemetryService);
      expect(service.isEnabled).toBe(true);
      await expect(module.close()).resolves.not.toThrow();
    });
  });
 });
--- a/apps/api/src/mosaic-telemetry/mosaic-telemetry.module.ts
+++ b/apps/api/src/mosaic-telemetry/mosaic-telemetry.module.ts
@@ -0,0 +1,41 @@
 import { Module, Global } from "@nestjs/common";
 import { ConfigModule } from "@nestjs/config";
 import { AuthModule } from "../auth/auth.module";
 import { MosaicTelemetryService } from "./mosaic-telemetry.service";
 import { PredictionService } from "./prediction.service";
 import { MosaicTelemetryController } from "./mosaic-telemetry.controller";
 /**
 * Global module providing Mosaic Telemetry integration via @mosaicstack/telemetry-client.
 *
 * Tracks task completion events and provides crowd-sourced predictions for
 * token usage, cost estimation, and quality metrics.
 *
 * **This is separate from the OpenTelemetry (OTEL) TelemetryModule** which
 * handles distributed request tracing. This module is specifically for
 * Mosaic Stack's own telemetry aggregation service.
 *
 * Configuration via environment variables:
 * - MOSAIC_TELEMETRY_ENABLED (boolean, default: true)
 * - MOSAIC_TELEMETRY_SERVER_URL (string)
 * - MOSAIC_TELEMETRY_API_KEY (string, 64-char hex)
 * - MOSAIC_TELEMETRY_INSTANCE_ID (string, UUID)
 * - MOSAIC_TELEMETRY_DRY_RUN (boolean, default: false)
 *
 * @example
 * ```typescript
 * // In any service (no need to import module — it's global):
 * @Injectable()
 * export class MyService {
 *   constructor(private readonly telemetry: MosaicTelemetryService) {}
 * }
 * ```
 */
@Global()
@Module({
  imports: [ConfigModule, AuthModule],
  controllers: [MosaicTelemetryController],
  providers: [MosaicTelemetryService, PredictionService],
  exports: [MosaicTelemetryService, PredictionService],
 })
 export class MosaicTelemetryModule {}
--- a/apps/api/src/mosaic-telemetry/mosaic-telemetry.service.spec.ts
+++ b/apps/api/src/mosaic-telemetry/mosaic-telemetry.service.spec.ts
@@ -0,0 +1,504 @@
 import { describe, it, expect, vi, beforeEach, afterEach } from "vitest";
 import { ConfigService } from "@nestjs/config";
 import { MOSAIC_TELEMETRY_ENV } from "./mosaic-telemetry.config";
 import type {
  TaskCompletionEvent,
  PredictionQuery,
  PredictionResponse,
 } from "@mosaicstack/telemetry-client";
 import { TaskType, Complexity, Provider, Outcome } from "@mosaicstack/telemetry-client";
 // Track mock instances created during tests
 const mockStartFn = vi.fn();
 const mockStopFn = vi.fn().mockResolvedValue(undefined);
 const mockTrackFn = vi.fn();
 const mockGetPredictionFn = vi.fn().mockReturnValue(null);
 const mockRefreshPredictionsFn = vi.fn().mockResolvedValue(undefined);
 const mockBuildFn = vi.fn().mockReturnValue({ event_id: "test-event-id" });
 vi.mock("@mosaicstack/telemetry-client", async (importOriginal) => {
  const actual = await importOriginal<typeof import("@mosaicstack/telemetry-client")>();
  class MockTelemetryClient {
    private _isRunning = false;
    constructor(_config: unknown) {
      // no-op
    }
    get eventBuilder() {
      return { build: mockBuildFn };
    }
    start(): void {
      this._isRunning = true;
      mockStartFn();
    }
    async stop(): Promise<void> {
      this._isRunning = false;
      await mockStopFn();
    }
    track(event: unknown): void {
      mockTrackFn(event);
    }
    getPrediction(query: unknown): unknown {
      return mockGetPredictionFn(query);
    }
    async refreshPredictions(queries: unknown): Promise<void> {
      await mockRefreshPredictionsFn(queries);
    }
    get queueSize(): number {
      return 0;
    }
    get isRunning(): boolean {
      return this._isRunning;
    }
  }
  return {
    ...actual,
    TelemetryClient: MockTelemetryClient,
  };
 });
 // Lazy-import the service after the mock is in place
 const { MosaicTelemetryService } = await import("./mosaic-telemetry.service");
 /**
 * Create a ConfigService mock that returns environment values from the provided map.
 */
 function createConfigService(envMap: Record<string, string | undefined> = {}): ConfigService {
  const configService = {
    get: vi.fn((key: string, defaultValue?: string): string => {
      const value = envMap[key];
      if (value !== undefined) {
        return value;
      }
      return defaultValue ?? "";
    }),
  } as unknown as ConfigService;
  return configService;
 }
 /**
 * Default env config for an enabled telemetry service.
 */
 const ENABLED_CONFIG: Record<string, string> = {
  [MOSAIC_TELEMETRY_ENV.ENABLED]: "true",
  [MOSAIC_TELEMETRY_ENV.SERVER_URL]: "https://tel.test.local",
  [MOSAIC_TELEMETRY_ENV.API_KEY]: "a".repeat(64),
  [MOSAIC_TELEMETRY_ENV.INSTANCE_ID]: "550e8400-e29b-41d4-a716-446655440000",
  [MOSAIC_TELEMETRY_ENV.DRY_RUN]: "false",
 };
 /**
 * Create a minimal TaskCompletionEvent for testing.
 */
 function createTestEvent(): TaskCompletionEvent {
  return {
    schema_version: "1.0.0",
    event_id: "test-event-123",
    timestamp: new Date().toISOString(),
    instance_id: "550e8400-e29b-41d4-a716-446655440000",
    task_duration_ms: 5000,
    task_type: TaskType.FEATURE,
    complexity: Complexity.MEDIUM,
    harness: "claude-code" as TaskCompletionEvent["harness"],
    model: "claude-sonnet-4-20250514",
    provider: Provider.ANTHROPIC,
    estimated_input_tokens: 1000,
    estimated_output_tokens: 500,
    actual_input_tokens: 1100,
    actual_output_tokens: 450,
    estimated_cost_usd_micros: 5000,
    actual_cost_usd_micros: 4800,
    quality_gate_passed: true,
    quality_gates_run: [],
    quality_gates_failed: [],
    context_compactions: 0,
    context_rotations: 0,
    context_utilization_final: 0.45,
    outcome: Outcome.SUCCESS,
    retry_count: 0,
  };
 }
 describe("MosaicTelemetryService", () => {
  let service: InstanceType<typeof MosaicTelemetryService>;
  afterEach(async () => {
    if (service) {
      await service.onModuleDestroy();
    }
    vi.clearAllMocks();
  });
  describe("onModuleInit", () => {
    it("should initialize the client when enabled with valid config", () => {
      const configService = createConfigService(ENABLED_CONFIG);
      service = new MosaicTelemetryService(configService);
      service.onModuleInit();
      expect(mockStartFn).toHaveBeenCalledOnce();
      expect(service.isEnabled).toBe(true);
    });
    it("should not initialize client when disabled", () => {
      const configService = createConfigService({
        ...ENABLED_CONFIG,
        [MOSAIC_TELEMETRY_ENV.ENABLED]: "false",
      });
      service = new MosaicTelemetryService(configService);
      service.onModuleInit();
      expect(mockStartFn).not.toHaveBeenCalled();
      expect(service.isEnabled).toBe(false);
    });
    it("should disable when server URL is missing", () => {
      const configService = createConfigService({
        ...ENABLED_CONFIG,
        [MOSAIC_TELEMETRY_ENV.SERVER_URL]: "",
      });
      service = new MosaicTelemetryService(configService);
      service.onModuleInit();
      expect(service.isEnabled).toBe(false);
    });
    it("should disable when API key is missing", () => {
      const configService = createConfigService({
        ...ENABLED_CONFIG,
        [MOSAIC_TELEMETRY_ENV.API_KEY]: "",
      });
      service = new MosaicTelemetryService(configService);
      service.onModuleInit();
      expect(service.isEnabled).toBe(false);
    });
    it("should disable when instance ID is missing", () => {
      const configService = createConfigService({
        ...ENABLED_CONFIG,
        [MOSAIC_TELEMETRY_ENV.INSTANCE_ID]: "",
      });
      service = new MosaicTelemetryService(configService);
      service.onModuleInit();
      expect(service.isEnabled).toBe(false);
    });
    it("should log dry-run mode when configured", () => {
      const configService = createConfigService({
        ...ENABLED_CONFIG,
        [MOSAIC_TELEMETRY_ENV.DRY_RUN]: "true",
      });
      service = new MosaicTelemetryService(configService);
      service.onModuleInit();
      expect(mockStartFn).toHaveBeenCalledOnce();
    });
  });
  describe("onModuleDestroy", () => {
    it("should stop the client on shutdown", async () => {
      const configService = createConfigService(ENABLED_CONFIG);
      service = new MosaicTelemetryService(configService);
      service.onModuleInit();
      await service.onModuleDestroy();
      expect(mockStopFn).toHaveBeenCalledOnce();
    });
    it("should not throw when client is not initialized (disabled)", async () => {
      const configService = createConfigService({
        ...ENABLED_CONFIG,
        [MOSAIC_TELEMETRY_ENV.ENABLED]: "false",
      });
      service = new MosaicTelemetryService(configService);
      service.onModuleInit();
      await expect(service.onModuleDestroy()).resolves.not.toThrow();
    });
    it("should not throw when called multiple times", async () => {
      const configService = createConfigService(ENABLED_CONFIG);
      service = new MosaicTelemetryService(configService);
      service.onModuleInit();
      await service.onModuleDestroy();
      await expect(service.onModuleDestroy()).resolves.not.toThrow();
    });
  });
  describe("trackTaskCompletion", () => {
    it("should queue event via client.track() when enabled", () => {
      const configService = createConfigService(ENABLED_CONFIG);
      service = new MosaicTelemetryService(configService);
      service.onModuleInit();
      const event = createTestEvent();
      service.trackTaskCompletion(event);
      expect(mockTrackFn).toHaveBeenCalledWith(event);
    });
    it("should be a no-op when disabled", () => {
      const configService = createConfigService({
        ...ENABLED_CONFIG,
        [MOSAIC_TELEMETRY_ENV.ENABLED]: "false",
      });
      service = new MosaicTelemetryService(configService);
      service.onModuleInit();
      const event = createTestEvent();
      service.trackTaskCompletion(event);
      expect(mockTrackFn).not.toHaveBeenCalled();
    });
  });
  describe("getPrediction", () => {
    const testQuery: PredictionQuery = {
      task_type: TaskType.FEATURE,
      model: "claude-sonnet-4-20250514",
      provider: Provider.ANTHROPIC,
      complexity: Complexity.MEDIUM,
    };
    it("should return cached prediction when available", () => {
      const mockPrediction: PredictionResponse = {
        prediction: {
          input_tokens: { p10: 100, p25: 200, median: 300, p75: 400, p90: 500 },
          output_tokens: { p10: 50, p25: 100, median: 150, p75: 200, p90: 250 },
          cost_usd_micros: { median: 5000 },
          duration_ms: { median: 10000 },
          correction_factors: { input: 1.0, output: 1.0 },
          quality: { gate_pass_rate: 0.95, success_rate: 0.9 },
        },
        metadata: {
          sample_size: 100,
          fallback_level: 0,
          confidence: "high",
          last_updated: new Date().toISOString(),
          cache_hit: true,
        },
      };
      const configService = createConfigService(ENABLED_CONFIG);
      service = new MosaicTelemetryService(configService);
      service.onModuleInit();
      mockGetPredictionFn.mockReturnValueOnce(mockPrediction);
      const result = service.getPrediction(testQuery);
      expect(result).toEqual(mockPrediction);
      expect(mockGetPredictionFn).toHaveBeenCalledWith(testQuery);
    });
    it("should return null when disabled", () => {
      const configService = createConfigService({
        ...ENABLED_CONFIG,
        [MOSAIC_TELEMETRY_ENV.ENABLED]: "false",
      });
      service = new MosaicTelemetryService(configService);
      service.onModuleInit();
      const result = service.getPrediction(testQuery);
      expect(result).toBeNull();
    });
    it("should return null when no cached prediction exists", () => {
      const configService = createConfigService(ENABLED_CONFIG);
      service = new MosaicTelemetryService(configService);
      service.onModuleInit();
      mockGetPredictionFn.mockReturnValueOnce(null);
      const result = service.getPrediction(testQuery);
      expect(result).toBeNull();
    });
  });
  describe("refreshPredictions", () => {
    const testQueries: PredictionQuery[] = [
      {
        task_type: TaskType.FEATURE,
        model: "claude-sonnet-4-20250514",
        provider: Provider.ANTHROPIC,
        complexity: Complexity.MEDIUM,
      },
    ];
    it("should call client.refreshPredictions when enabled", async () => {
      const configService = createConfigService(ENABLED_CONFIG);
      service = new MosaicTelemetryService(configService);
      service.onModuleInit();
      await service.refreshPredictions(testQueries);
      expect(mockRefreshPredictionsFn).toHaveBeenCalledWith(testQueries);
    });
    it("should be a no-op when disabled", async () => {
      const configService = createConfigService({
        ...ENABLED_CONFIG,
        [MOSAIC_TELEMETRY_ENV.ENABLED]: "false",
      });
      service = new MosaicTelemetryService(configService);
      service.onModuleInit();
      await service.refreshPredictions(testQueries);
      expect(mockRefreshPredictionsFn).not.toHaveBeenCalled();
    });
  });
  describe("eventBuilder", () => {
    it("should return EventBuilder when enabled", () => {
      const configService = createConfigService(ENABLED_CONFIG);
      service = new MosaicTelemetryService(configService);
      service.onModuleInit();
      const builder = service.eventBuilder;
      expect(builder).toBeDefined();
      expect(builder).not.toBeNull();
      expect(typeof builder?.build).toBe("function");
    });
    it("should return null when disabled", () => {
      const configService = createConfigService({
        ...ENABLED_CONFIG,
        [MOSAIC_TELEMETRY_ENV.ENABLED]: "false",
      });
      service = new MosaicTelemetryService(configService);
      service.onModuleInit();
      const builder = service.eventBuilder;
      expect(builder).toBeNull();
    });
  });
  describe("isEnabled", () => {
    it("should return true when client is running", () => {
      const configService = createConfigService(ENABLED_CONFIG);
      service = new MosaicTelemetryService(configService);
      service.onModuleInit();
      expect(service.isEnabled).toBe(true);
    });
    it("should return false when disabled", () => {
      const configService = createConfigService({
        ...ENABLED_CONFIG,
        [MOSAIC_TELEMETRY_ENV.ENABLED]: "false",
      });
      service = new MosaicTelemetryService(configService);
      service.onModuleInit();
      expect(service.isEnabled).toBe(false);
    });
  });
  describe("queueSize", () => {
    it("should return 0 when disabled", () => {
      const configService = createConfigService({
        ...ENABLED_CONFIG,
        [MOSAIC_TELEMETRY_ENV.ENABLED]: "false",
      });
      service = new MosaicTelemetryService(configService);
      service.onModuleInit();
      expect(service.queueSize).toBe(0);
    });
    it("should delegate to client.queueSize when enabled", () => {
      const configService = createConfigService(ENABLED_CONFIG);
      service = new MosaicTelemetryService(configService);
      service.onModuleInit();
      expect(service.queueSize).toBe(0);
    });
  });
  describe("disabled mode (comprehensive)", () => {
    beforeEach(() => {
      const configService = createConfigService({
        ...ENABLED_CONFIG,
        [MOSAIC_TELEMETRY_ENV.ENABLED]: "false",
      });
      service = new MosaicTelemetryService(configService);
      service.onModuleInit();
    });
    it("should not make any HTTP calls when disabled", () => {
      const event = createTestEvent();
      service.trackTaskCompletion(event);
      expect(mockTrackFn).not.toHaveBeenCalled();
      expect(mockStartFn).not.toHaveBeenCalled();
    });
    it("should safely handle all method calls when disabled", async () => {
      expect(() => service.trackTaskCompletion(createTestEvent())).not.toThrow();
      expect(
        service.getPrediction({
          task_type: TaskType.FEATURE,
          model: "test",
          provider: Provider.ANTHROPIC,
          complexity: Complexity.LOW,
        })
      ).toBeNull();
      await expect(service.refreshPredictions([])).resolves.not.toThrow();
      expect(service.eventBuilder).toBeNull();
      expect(service.isEnabled).toBe(false);
      expect(service.queueSize).toBe(0);
    });
  });
  describe("dry-run mode", () => {
    it("should create client in dry-run mode", () => {
      const configService = createConfigService({
        ...ENABLED_CONFIG,
        [MOSAIC_TELEMETRY_ENV.DRY_RUN]: "true",
      });
      service = new MosaicTelemetryService(configService);
      service.onModuleInit();
      expect(mockStartFn).toHaveBeenCalledOnce();
      expect(service.isEnabled).toBe(true);
    });
    it("should accept events in dry-run mode", () => {
      const configService = createConfigService({
        ...ENABLED_CONFIG,
        [MOSAIC_TELEMETRY_ENV.DRY_RUN]: "true",
      });
      service = new MosaicTelemetryService(configService);
      service.onModuleInit();
      const event = createTestEvent();
      service.trackTaskCompletion(event);
      expect(mockTrackFn).toHaveBeenCalledWith(event);
    });
  });
 });
--- a/apps/api/src/mosaic-telemetry/mosaic-telemetry.service.ts
+++ b/apps/api/src/mosaic-telemetry/mosaic-telemetry.service.ts
@@ -0,0 +1,164 @@
 import { Injectable, Logger, OnModuleInit, OnModuleDestroy } from "@nestjs/common";
 import { ConfigService } from "@nestjs/config";
 import {
  TelemetryClient,
  type TaskCompletionEvent,
  type PredictionQuery,
  type PredictionResponse,
  type EventBuilder,
 } from "@mosaicstack/telemetry-client";
 import {
  loadMosaicTelemetryConfig,
  toSdkConfig,
  type MosaicTelemetryModuleConfig,
 } from "./mosaic-telemetry.config";
 /**
 * NestJS service wrapping the @mosaicstack/telemetry-client SDK.
 *
 * Provides convenience methods for tracking task completions and reading
 * crowd-sourced predictions. When telemetry is disabled via
 * MOSAIC_TELEMETRY_ENABLED=false, all methods are safe no-ops.
 *
 * This service is provided globally by MosaicTelemetryModule — any service
 * can inject it without importing the module explicitly.
 *
 * @example
 * ```typescript
 * @Injectable()
 * export class TasksService {
 *   constructor(private readonly telemetry: MosaicTelemetryService) {}
 *
 *   async completeTask(taskId: string): Promise<void> {
 *     // ... complete the task ...
 *     const event = this.telemetry.eventBuilder.build({ ... });
 *     this.telemetry.trackTaskCompletion(event);
 *   }
 * }
 * ```
 */
@Injectable()
 export class MosaicTelemetryService implements OnModuleInit, OnModuleDestroy {
  private readonly logger = new Logger(MosaicTelemetryService.name);
  private client: TelemetryClient | null = null;
  private config: MosaicTelemetryModuleConfig | null = null;
  constructor(private readonly configService: ConfigService) {}
  /**
   * Initialize the telemetry client on module startup.
   * Reads configuration from environment variables and starts background submission.
   */
  onModuleInit(): void {
    this.config = loadMosaicTelemetryConfig(this.configService);
    if (!this.config.enabled) {
      this.logger.log("Mosaic Telemetry is disabled");
      return;
    }
    if (!this.config.serverUrl || !this.config.apiKey || !this.config.instanceId) {
      this.logger.warn(
        "Mosaic Telemetry is enabled but missing configuration " +
          "(MOSAIC_TELEMETRY_SERVER_URL, MOSAIC_TELEMETRY_API_KEY, or MOSAIC_TELEMETRY_INSTANCE_ID). " +
          "Telemetry will remain disabled."
      );
      this.config = { ...this.config, enabled: false };
      return;
    }
    const sdkConfig = toSdkConfig(this.config, (error: Error) => {
      this.logger.error(`Telemetry client error: ${error.message}`, error.stack);
    });
    this.client = new TelemetryClient(sdkConfig);
    this.client.start();
    const mode = this.config.dryRun ? "dry-run" : "live";
    this.logger.log(`Mosaic Telemetry client started (${mode}) -> ${this.config.serverUrl}`);
  }
  /**
   * Stop the telemetry client on module shutdown.
   * Flushes any remaining queued events before stopping.
   */
  async onModuleDestroy(): Promise<void> {
    if (this.client) {
      this.logger.log("Stopping Mosaic Telemetry client...");
      await this.client.stop();
      this.client = null;
      this.logger.log("Mosaic Telemetry client stopped");
    }
  }
  /**
   * Queue a task completion event for batch submission.
   * No-op when telemetry is disabled.
   *
   * @param event - The task completion event to track
   */
  trackTaskCompletion(event: TaskCompletionEvent): void {
    if (!this.client) {
      return;
    }
    this.client.track(event);
  }
  /**
   * Get a cached prediction for the given query.
   * Returns null when telemetry is disabled or if not cached/expired.
   *
   * @param query - The prediction query parameters
   * @returns Cached prediction response, or null
   */
  getPrediction(query: PredictionQuery): PredictionResponse | null {
    if (!this.client) {
      return null;
    }
    return this.client.getPrediction(query);
  }
  /**
   * Force-refresh predictions from the telemetry server.
   * No-op when telemetry is disabled.
   *
   * @param queries - Array of prediction queries to refresh
   */
  async refreshPredictions(queries: PredictionQuery[]): Promise<void> {
    if (!this.client) {
      return;
    }
    await this.client.refreshPredictions(queries);
  }
  /**
   * Get the EventBuilder for constructing TaskCompletionEvent objects.
   * Returns null when telemetry is disabled.
   *
   * @returns EventBuilder instance, or null if disabled
   */
  get eventBuilder(): EventBuilder | null {
    if (!this.client) {
      return null;
    }
    return this.client.eventBuilder;
  }
  /**
   * Whether the telemetry client is currently active and running.
   */
  get isEnabled(): boolean {
    return this.client?.isRunning ?? false;
  }
  /**
   * Number of events currently queued for submission.
   * Returns 0 when telemetry is disabled.
   */
  get queueSize(): number {
    if (!this.client) {
      return 0;
    }
    return this.client.queueSize;
  }
 }
--- a/apps/api/src/mosaic-telemetry/prediction.service.spec.ts
+++ b/apps/api/src/mosaic-telemetry/prediction.service.spec.ts
@@ -0,0 +1,297 @@
 import { describe, it, expect, beforeEach, vi } from "vitest";
 import { Test, TestingModule } from "@nestjs/testing";
 import { TaskType, Complexity, Provider } from "@mosaicstack/telemetry-client";
 import type { PredictionResponse, PredictionQuery } from "@mosaicstack/telemetry-client";
 import { MosaicTelemetryService } from "./mosaic-telemetry.service";
 import { PredictionService } from "./prediction.service";
 describe("PredictionService", () => {
  let service: PredictionService;
  let mockTelemetryService: {
    isEnabled: boolean;
    getPrediction: ReturnType<typeof vi.fn>;
    refreshPredictions: ReturnType<typeof vi.fn>;
  };
  const mockPredictionResponse: PredictionResponse = {
    prediction: {
      input_tokens: {
        p10: 50,
        p25: 80,
        median: 120,
        p75: 200,
        p90: 350,
      },
      output_tokens: {
        p10: 100,
        p25: 150,
        median: 250,
        p75: 400,
        p90: 600,
      },
      cost_usd_micros: {
        p10: 500,
        p25: 800,
        median: 1200,
        p75: 2000,
        p90: 3500,
      },
      duration_ms: {
        p10: 200,
        p25: 400,
        median: 800,
        p75: 1500,
        p90: 3000,
      },
      correction_factors: {
        input: 1.0,
        output: 1.0,
      },
      quality: {
        gate_pass_rate: 0.95,
        success_rate: 0.92,
      },
    },
    metadata: {
      sample_size: 150,
      fallback_level: 0,
      confidence: "high",
      last_updated: "2026-02-15T00:00:00Z",
      cache_hit: true,
    },
  };
  const nullPredictionResponse: PredictionResponse = {
    prediction: null,
    metadata: {
      sample_size: 0,
      fallback_level: 3,
      confidence: "none",
      last_updated: null,
      cache_hit: false,
    },
  };
  beforeEach(async () => {
    mockTelemetryService = {
      isEnabled: true,
      getPrediction: vi.fn().mockReturnValue(mockPredictionResponse),
      refreshPredictions: vi.fn().mockResolvedValue(undefined),
    };
    const module: TestingModule = await Test.createTestingModule({
      providers: [
        PredictionService,
        {
          provide: MosaicTelemetryService,
          useValue: mockTelemetryService,
        },
      ],
    }).compile();
    service = module.get<PredictionService>(PredictionService);
  });
  it("should be defined", () => {
    expect(service).toBeDefined();
  });
  // ---------- getEstimate ----------
  describe("getEstimate", () => {
    it("should return prediction response for valid query", () => {
      const result = service.getEstimate(
        TaskType.IMPLEMENTATION,
        "claude-sonnet-4-5",
        Provider.ANTHROPIC,
        Complexity.LOW
      );
      expect(result).toEqual(mockPredictionResponse);
      expect(mockTelemetryService.getPrediction).toHaveBeenCalledWith({
        task_type: TaskType.IMPLEMENTATION,
        model: "claude-sonnet-4-5",
        provider: Provider.ANTHROPIC,
        complexity: Complexity.LOW,
      });
    });
    it("should pass correct query parameters to telemetry service", () => {
      service.getEstimate(TaskType.CODE_REVIEW, "gpt-4o", Provider.OPENAI, Complexity.HIGH);
      expect(mockTelemetryService.getPrediction).toHaveBeenCalledWith({
        task_type: TaskType.CODE_REVIEW,
        model: "gpt-4o",
        provider: Provider.OPENAI,
        complexity: Complexity.HIGH,
      });
    });
    it("should return null when telemetry returns null", () => {
      mockTelemetryService.getPrediction.mockReturnValue(null);
      const result = service.getEstimate(
        TaskType.IMPLEMENTATION,
        "claude-sonnet-4-5",
        Provider.ANTHROPIC,
        Complexity.LOW
      );
      expect(result).toBeNull();
    });
    it("should return null prediction response when confidence is none", () => {
      mockTelemetryService.getPrediction.mockReturnValue(nullPredictionResponse);
      const result = service.getEstimate(
        TaskType.IMPLEMENTATION,
        "unknown-model",
        Provider.UNKNOWN,
        Complexity.LOW
      );
      expect(result).toEqual(nullPredictionResponse);
      expect(result?.metadata.confidence).toBe("none");
    });
    it("should return null and not throw when getPrediction throws", () => {
      mockTelemetryService.getPrediction.mockImplementation(() => {
        throw new Error("Prediction fetch failed");
      });
      const result = service.getEstimate(
        TaskType.IMPLEMENTATION,
        "claude-sonnet-4-5",
        Provider.ANTHROPIC,
        Complexity.LOW
      );
      expect(result).toBeNull();
    });
    it("should handle non-Error thrown objects gracefully", () => {
      mockTelemetryService.getPrediction.mockImplementation(() => {
        throw "string error";
      });
      const result = service.getEstimate(
        TaskType.IMPLEMENTATION,
        "claude-sonnet-4-5",
        Provider.ANTHROPIC,
        Complexity.LOW
      );
      expect(result).toBeNull();
    });
  });
  // ---------- refreshCommonPredictions ----------
  describe("refreshCommonPredictions", () => {
    it("should call refreshPredictions with multiple query combinations", async () => {
      await service.refreshCommonPredictions();
      expect(mockTelemetryService.refreshPredictions).toHaveBeenCalledTimes(1);
      const queries: PredictionQuery[] = mockTelemetryService.refreshPredictions.mock.calls[0][0];
      // Should have queries for cross-product of models, task types, and complexities
      expect(queries.length).toBeGreaterThan(0);
      // Verify all queries have valid structure
      for (const query of queries) {
        expect(query).toHaveProperty("task_type");
        expect(query).toHaveProperty("model");
        expect(query).toHaveProperty("provider");
        expect(query).toHaveProperty("complexity");
      }
    });
    it("should include Anthropic model predictions", async () => {
      await service.refreshCommonPredictions();
      const queries: PredictionQuery[] = mockTelemetryService.refreshPredictions.mock.calls[0][0];
      const anthropicQueries = queries.filter(
        (q: PredictionQuery) => q.provider === Provider.ANTHROPIC
      );
      expect(anthropicQueries.length).toBeGreaterThan(0);
    });
    it("should include OpenAI model predictions", async () => {
      await service.refreshCommonPredictions();
      const queries: PredictionQuery[] = mockTelemetryService.refreshPredictions.mock.calls[0][0];
      const openaiQueries = queries.filter((q: PredictionQuery) => q.provider === Provider.OPENAI);
      expect(openaiQueries.length).toBeGreaterThan(0);
    });
    it("should not call refreshPredictions when telemetry is disabled", async () => {
      mockTelemetryService.isEnabled = false;
      await service.refreshCommonPredictions();
      expect(mockTelemetryService.refreshPredictions).not.toHaveBeenCalled();
    });
    it("should not throw when refreshPredictions rejects", async () => {
      mockTelemetryService.refreshPredictions.mockRejectedValue(new Error("Server unreachable"));
      // Should not throw
      await expect(service.refreshCommonPredictions()).resolves.not.toThrow();
    });
    it("should include common task types in queries", async () => {
      await service.refreshCommonPredictions();
      const queries: PredictionQuery[] = mockTelemetryService.refreshPredictions.mock.calls[0][0];
      const taskTypes = new Set(queries.map((q: PredictionQuery) => q.task_type));
      expect(taskTypes.has(TaskType.IMPLEMENTATION)).toBe(true);
      expect(taskTypes.has(TaskType.PLANNING)).toBe(true);
      expect(taskTypes.has(TaskType.CODE_REVIEW)).toBe(true);
    });
    it("should include common complexity levels in queries", async () => {
      await service.refreshCommonPredictions();
      const queries: PredictionQuery[] = mockTelemetryService.refreshPredictions.mock.calls[0][0];
      const complexities = new Set(queries.map((q: PredictionQuery) => q.complexity));
      expect(complexities.has(Complexity.LOW)).toBe(true);
      expect(complexities.has(Complexity.MEDIUM)).toBe(true);
    });
  });
  // ---------- onModuleInit ----------
  describe("onModuleInit", () => {
    it("should trigger refreshCommonPredictions on init when telemetry is enabled", () => {
      // refreshPredictions is async, but onModuleInit fires it and forgets
      service.onModuleInit();
      // Give the promise microtask a chance to execute
      expect(mockTelemetryService.isEnabled).toBe(true);
      // refreshPredictions will be called asynchronously
    });
    it("should not refresh when telemetry is disabled", () => {
      mockTelemetryService.isEnabled = false;
      service.onModuleInit();
      // refreshPredictions should not be called since we returned early
      expect(mockTelemetryService.refreshPredictions).not.toHaveBeenCalled();
    });
    it("should not throw when refresh fails on init", () => {
      mockTelemetryService.refreshPredictions.mockRejectedValue(new Error("Connection refused"));
      // Should not throw
      expect(() => service.onModuleInit()).not.toThrow();
    });
  });
 });
--- a/apps/api/src/mosaic-telemetry/prediction.service.ts
+++ b/apps/api/src/mosaic-telemetry/prediction.service.ts
@@ -0,0 +1,161 @@
 import { Injectable, Logger, OnModuleInit } from "@nestjs/common";
 import {
  TaskType,
  Complexity,
  Provider,
  type PredictionQuery,
  type PredictionResponse,
 } from "@mosaicstack/telemetry-client";
 import { MosaicTelemetryService } from "./mosaic-telemetry.service";
 /**
 * Common model-provider combinations used for pre-fetching predictions.
 * These represent the most frequently used LLM configurations.
 */
 const COMMON_MODELS: { model: string; provider: Provider }[] = [
  { model: "claude-sonnet-4-5", provider: Provider.ANTHROPIC },
  { model: "claude-opus-4", provider: Provider.ANTHROPIC },
  { model: "claude-haiku-4-5", provider: Provider.ANTHROPIC },
  { model: "gpt-4o", provider: Provider.OPENAI },
  { model: "gpt-4o-mini", provider: Provider.OPENAI },
 ];
 /**
 * Common task types to pre-fetch predictions for.
 */
 const COMMON_TASK_TYPES: TaskType[] = [
  TaskType.IMPLEMENTATION,
  TaskType.PLANNING,
  TaskType.CODE_REVIEW,
 ];
 /**
 * Common complexity levels to pre-fetch predictions for.
 */
 const COMMON_COMPLEXITIES: Complexity[] = [Complexity.LOW, Complexity.MEDIUM];
 /**
 * PredictionService
 *
 * Provides pre-task cost and token estimates using crowd-sourced prediction data
 * from the Mosaic Telemetry server. Predictions are cached by the underlying SDK
 * with a 6-hour TTL.
 *
 * This service is intentionally non-blocking: if predictions are unavailable
 * (telemetry disabled, server unreachable, no data), all methods return null
 * without throwing errors. Task execution should never be blocked by prediction
 * failures.
 *
 * @example
 * ```typescript
 * const estimate = this.predictionService.getEstimate(
 *   TaskType.IMPLEMENTATION,
 *   "claude-sonnet-4-5",
 *   Provider.ANTHROPIC,
 *   Complexity.LOW,
 * );
 * if (estimate?.prediction) {
 *   console.log(`Estimated cost: ${estimate.prediction.cost_usd_micros}`);
 * }
 * ```
 */
@Injectable()
 export class PredictionService implements OnModuleInit {
  private readonly logger = new Logger(PredictionService.name);
  constructor(private readonly telemetry: MosaicTelemetryService) {}
  /**
   * Refresh common predictions on startup.
   * Runs asynchronously and never blocks module initialization.
   */
  onModuleInit(): void {
    if (!this.telemetry.isEnabled) {
      this.logger.log("Telemetry disabled - skipping prediction refresh");
      return;
    }
    // Fire-and-forget: refresh in the background
    this.refreshCommonPredictions().catch((error: unknown) => {
      const msg = error instanceof Error ? error.message : String(error);
      this.logger.warn(`Failed to refresh common predictions on startup: ${msg}`);
    });
  }
  /**
   * Get a cost/token estimate for a given task configuration.
   *
   * Returns the cached prediction from the SDK, or null if:
   * - Telemetry is disabled
   * - No prediction data exists for this combination
   * - The prediction has expired
   *
   * @param taskType - The type of task to estimate
   * @param model - The model name (e.g. "claude-sonnet-4-5")
   * @param provider - The provider enum value
   * @param complexity - The complexity level
   * @returns Prediction response with estimates and confidence, or null
   */
  getEstimate(
    taskType: TaskType,
    model: string,
    provider: Provider,
    complexity: Complexity
  ): PredictionResponse | null {
    try {
      const query: PredictionQuery = {
        task_type: taskType,
        model,
        provider,
        complexity,
      };
      return this.telemetry.getPrediction(query);
    } catch (error: unknown) {
      const msg = error instanceof Error ? error.message : String(error);
      this.logger.warn(`Failed to get prediction estimate: ${msg}`);
      return null;
    }
  }
  /**
   * Refresh predictions for commonly used (taskType, model, provider, complexity) combinations.
   *
   * Generates the cross-product of common models, task types, and complexities,
   * then batch-refreshes them from the telemetry server. The SDK caches the
   * results with a 6-hour TTL.
   *
   * This method is safe to call at any time. If telemetry is disabled or the
   * server is unreachable, it completes without error.
   */
  async refreshCommonPredictions(): Promise<void> {
    if (!this.telemetry.isEnabled) {
      return;
    }
    const queries: PredictionQuery[] = [];
    for (const { model, provider } of COMMON_MODELS) {
      for (const taskType of COMMON_TASK_TYPES) {
        for (const complexity of COMMON_COMPLEXITIES) {
          queries.push({
            task_type: taskType,
            model,
            provider,
            complexity,
          });
        }
      }
    }
    this.logger.log(`Refreshing ${String(queries.length)} common prediction queries...`);
    try {
      await this.telemetry.refreshPredictions(queries);
      this.logger.log(`Successfully refreshed ${String(queries.length)} predictions`);
    } catch (error: unknown) {
      const msg = error instanceof Error ? error.message : String(error);
      this.logger.warn(`Failed to refresh predictions: ${msg}`);
    }
  }
 }
--- a/apps/coordinator/Dockerfile
+++ b/apps/coordinator/Dockerfile
@@ -9,8 +9,9 @@ RUN apt-get update && \
    build-essential \
    && rm -rf /var/lib/apt/lists/*
-# Copy dependency files
+# Copy dependency files and private registry config
 COPY pyproject.toml .
 COPY pip.conf /etc/pip.conf
 # Create virtual environment and install dependencies
 RUN python -m venv /opt/venv
--- a/apps/coordinator/pip.conf
+++ b/apps/coordinator/pip.conf
@@ -0,0 +1,2 @@
 [global]
 extra-index-url = https://git.mosaicstack.dev/api/packages/mosaic/pypi/simple/
--- a/apps/coordinator/pyproject.toml
+++ b/apps/coordinator/pyproject.toml
@@ -15,6 +15,7 @@ dependencies = [
    "opentelemetry-sdk>=1.20.0",
    "opentelemetry-instrumentation-fastapi>=0.41b0",
    "opentelemetry-exporter-otlp>=1.20.0",
    "mosaicstack-telemetry>=0.1.0",
 ]
 [project.optional-dependencies]
--- a/apps/coordinator/src/coordinator.py
+++ b/apps/coordinator/src/coordinator.py
@@ -2,12 +2,24 @@
 import asyncio
 import logging
 import time
 from typing import TYPE_CHECKING, Any
 from mosaicstack_telemetry import (  # type: ignore[import-untyped]
    Complexity,
    Harness,
    Outcome,
    Provider,
    QualityGate,
    TaskType,
    TelemetryClient,
 )
 from src.circuit_breaker import CircuitBreaker, CircuitBreakerError
 from src.context_monitor import ContextMonitor
 from src.forced_continuation import ForcedContinuationService
 from src.models import ContextAction
 from src.mosaic_telemetry import build_task_event
 from src.quality_orchestrator import QualityOrchestrator, VerificationResult
 from src.queue import QueueItem, QueueManager
 from src.tracing_decorators import trace_agent_operation
@@ -17,6 +29,49 @@ if TYPE_CHECKING:
 logger = logging.getLogger(__name__)
 # ---------------------------------------------------------------------------
 # Agent-name → telemetry-field mapping helpers
 # ---------------------------------------------------------------------------
 # Maps assigned_agent strings to (model, Provider, Harness)
 _AGENT_TELEMETRY_MAP: dict[str, tuple[str, Provider, Harness]] = {
    "sonnet": ("claude-sonnet-4-20250514", Provider.ANTHROPIC, Harness.CLAUDE_CODE),
    "opus": ("claude-opus-4-20250514", Provider.ANTHROPIC, Harness.CLAUDE_CODE),
    "haiku": ("claude-haiku-3.5-20241022", Provider.ANTHROPIC, Harness.CLAUDE_CODE),
    "glm": ("glm-4", Provider.CUSTOM, Harness.CUSTOM),
    "minimax": ("minimax", Provider.CUSTOM, Harness.CUSTOM),
 }
 _DIFFICULTY_TO_COMPLEXITY: dict[str, Complexity] = {
    "easy": Complexity.LOW,
    "medium": Complexity.MEDIUM,
    "hard": Complexity.HIGH,
 }
 _GATE_NAME_TO_ENUM: dict[str, QualityGate] = {
    "build": QualityGate.BUILD,
    "lint": QualityGate.LINT,
    "test": QualityGate.TEST,
    "coverage": QualityGate.COVERAGE,
 }
 def _resolve_agent_fields(
    assigned_agent: str,
 ) -> tuple[str, Provider, Harness]:
    """Resolve agent name to (model, provider, harness) for telemetry.
    Args:
        assigned_agent: The agent name string from issue metadata.
    Returns:
        Tuple of (model_name, Provider, Harness).
    """
    return _AGENT_TELEMETRY_MAP.get(
        assigned_agent,
        ("unknown", Provider.UNKNOWN, Harness.UNKNOWN),
    )
 class Coordinator:
    """Main orchestration loop for processing the issue queue.
@@ -41,6 +96,8 @@ class Coordinator:
        poll_interval: float = 5.0,
        circuit_breaker_threshold: int = 5,
        circuit_breaker_cooldown: float = 30.0,
        telemetry_client: TelemetryClient | None = None,
        instance_id: str = "",
    ) -> None:
        """Initialize the Coordinator.
@@ -49,12 +106,16 @@ class Coordinator:
            poll_interval: Seconds between queue polls (default: 5.0)
            circuit_breaker_threshold: Consecutive failures before opening circuit (default: 5)
            circuit_breaker_cooldown: Seconds to wait before retry after circuit opens (default: 30)
            telemetry_client: Optional Mosaic telemetry client for tracking task events
            instance_id: UUID identifying this coordinator instance for telemetry
        """
        self.queue_manager = queue_manager
        self.poll_interval = poll_interval
        self._running = False
        self._stop_event: asyncio.Event | None = None
        self._active_agents: dict[int, dict[str, Any]] = {}
        self._telemetry_client = telemetry_client
        self._instance_id = instance_id
        # Circuit breaker for preventing infinite retry loops (SEC-ORCH-7)
        self._circuit_breaker = CircuitBreaker(
@@ -197,7 +258,8 @@ class Coordinator:
        """Process the next ready item from the queue.
        Gets the next ready item, spawns an agent to process it,
-        and marks it complete on success.
+        and marks it complete on success.  Emits a Mosaic telemetry
        TaskCompletionEvent after each task attempt.
        Returns:
            The QueueItem that was processed, or None if queue is empty
@@ -218,6 +280,10 @@ class Coordinator:
        # Mark as in progress
        self.queue_manager.mark_in_progress(item.issue_number)
        # Track timing for telemetry
        start_mono = time.monotonic()
        outcome = Outcome.FAILURE
        # Spawn agent (stub implementation)
        try:
            success = await self.spawn_agent(item)
@@ -225,6 +291,7 @@ class Coordinator:
            if success:
                # Mark as complete
                self.queue_manager.mark_complete(item.issue_number)
                outcome = Outcome.SUCCESS
                logger.info(f"Issue #{item.issue_number} completed successfully")
            else:
                logger.warning(f"Issue #{item.issue_number} agent failed - remains in progress")
@@ -233,8 +300,81 @@ class Coordinator:
            logger.error(f"Error spawning agent for issue #{item.issue_number}: {e}")
            # Item remains in progress on error
        finally:
            elapsed_ms = int((time.monotonic() - start_mono) * 1000)
            self._emit_task_telemetry(item, outcome=outcome, duration_ms=elapsed_ms)
        return item
    def _emit_task_telemetry(
        self,
        item: QueueItem,
        *,
        outcome: Outcome,
        duration_ms: int,
        retry_count: int = 0,
        actual_input_tokens: int = 0,
        actual_output_tokens: int = 0,
        quality_passed: bool = False,
        quality_gates_run: list[QualityGate] | None = None,
        quality_gates_failed: list[QualityGate] | None = None,
    ) -> None:
        """Emit a Mosaic telemetry TaskCompletionEvent (non-blocking).
        This method never raises; any telemetry errors are logged and swallowed
        so they do not interfere with task processing.
        Args:
            item: The QueueItem that was processed.
            outcome: Task outcome (SUCCESS, FAILURE, TIMEOUT, etc.).
            duration_ms: Wall-clock duration in milliseconds.
            retry_count: Number of retries before this attempt.
            actual_input_tokens: Actual input tokens consumed by the harness.
            actual_output_tokens: Actual output tokens consumed by the harness.
            quality_passed: Whether all quality gates passed.
            quality_gates_run: Quality gates that were executed.
            quality_gates_failed: Quality gates that failed.
        """
        if self._telemetry_client is None or not self._instance_id:
            return
        try:
            model, provider, harness = _resolve_agent_fields(
                item.metadata.assigned_agent,
            )
            complexity = _DIFFICULTY_TO_COMPLEXITY.get(
                item.metadata.difficulty, Complexity.MEDIUM
            )
            event = build_task_event(
                instance_id=self._instance_id,
                task_type=TaskType.IMPLEMENTATION,
                complexity=complexity,
                outcome=outcome,
                duration_ms=duration_ms,
                model=model,
                provider=provider,
                harness=harness,
                actual_input_tokens=actual_input_tokens,
                actual_output_tokens=actual_output_tokens,
                estimated_input_tokens=item.metadata.estimated_context,
                quality_passed=quality_passed,
                quality_gates_run=quality_gates_run,
                quality_gates_failed=quality_gates_failed,
                retry_count=retry_count,
            )
            self._telemetry_client.track(event)
            logger.debug(
                "Telemetry event emitted for issue #%d (outcome=%s)",
                item.issue_number,
                outcome.value,
            )
        except Exception:
            logger.exception(
                "Failed to emit telemetry for issue #%d (non-fatal)",
                item.issue_number,
            )
    @trace_agent_operation(operation_name="spawn_agent")
    async def spawn_agent(self, item: QueueItem) -> bool:
        """Spawn an agent to process the given item.
@@ -294,6 +434,8 @@ class OrchestrationLoop:
        poll_interval: float = 5.0,
        circuit_breaker_threshold: int = 5,
        circuit_breaker_cooldown: float = 30.0,
        telemetry_client: TelemetryClient | None = None,
        instance_id: str = "",
    ) -> None:
        """Initialize the OrchestrationLoop.
@@ -305,6 +447,8 @@ class OrchestrationLoop:
            poll_interval: Seconds between queue polls (default: 5.0)
            circuit_breaker_threshold: Consecutive failures before opening circuit (default: 5)
            circuit_breaker_cooldown: Seconds to wait before retry after circuit opens (default: 30)
            telemetry_client: Optional Mosaic telemetry client for tracking task events
            instance_id: UUID identifying this coordinator instance for telemetry
        """
        self.queue_manager = queue_manager
        self.quality_orchestrator = quality_orchestrator
@@ -314,6 +458,11 @@ class OrchestrationLoop:
        self._running = False
        self._stop_event: asyncio.Event | None = None
        self._active_agents: dict[int, dict[str, Any]] = {}
        self._telemetry_client = telemetry_client
        self._instance_id = instance_id
        # Per-issue retry tracking
        self._retry_counts: dict[int, int] = {}
        # Metrics tracking
        self._processed_count = 0
@@ -493,6 +642,7 @@ class OrchestrationLoop:
        3. Spawns an agent to process it
        4. Runs quality gates on completion
        5. Handles rejection with forced continuation or marks complete
        6. Emits a Mosaic telemetry TaskCompletionEvent
        Returns:
            The QueueItem that was processed, or None if queue is empty
@@ -524,12 +674,21 @@ class OrchestrationLoop:
            "status": "running",
        }
        # Track timing for telemetry
        start_mono = time.monotonic()
        outcome = Outcome.FAILURE
        quality_passed = False
        gates_run: list[QualityGate] = []
        gates_failed: list[QualityGate] = []
        retry_count = self._retry_counts.get(item.issue_number, 0)
        try:
            # Spawn agent (stub implementation)
            agent_success = await self._spawn_agent(item)
            if not agent_success:
                logger.warning(f"Issue #{item.issue_number} agent failed - remains in progress")
                self._retry_counts[item.issue_number] = retry_count + 1
                return item
            # Check context usage (stub - no real monitoring in Phase 0)
@@ -538,24 +697,123 @@ class OrchestrationLoop:
            # Run quality gates on completion
            verification = await self._verify_quality(item)
            # Map gate results for telemetry
            gates_run = [
                _GATE_NAME_TO_ENUM[name]
                for name in verification.gate_results
                if name in _GATE_NAME_TO_ENUM
            ]
            gates_failed = [
                _GATE_NAME_TO_ENUM[name]
                for name, result in verification.gate_results.items()
                if name in _GATE_NAME_TO_ENUM and not result.passed
            ]
            quality_passed = verification.all_passed
            if verification.all_passed:
                # All gates passed - mark as complete
                self.queue_manager.mark_complete(item.issue_number)
                self._success_count += 1
                outcome = Outcome.SUCCESS
                # Clear retry counter on success
                self._retry_counts.pop(item.issue_number, None)
                logger.info(
                    f"Issue #{item.issue_number} completed successfully - all gates passed"
                )
            else:
                # Gates failed - generate continuation prompt
                self._rejection_count += 1
                outcome = Outcome.FAILURE
                self._retry_counts[item.issue_number] = retry_count + 1
                await self._handle_rejection(item, verification)
        except Exception as e:
            logger.error(f"Error processing issue #{item.issue_number}: {e}")
            # Item remains in progress on error
        finally:
            elapsed_ms = int((time.monotonic() - start_mono) * 1000)
            self._emit_task_telemetry(
                item,
                outcome=outcome,
                duration_ms=elapsed_ms,
                retry_count=retry_count,
                quality_passed=quality_passed,
                quality_gates_run=gates_run,
                quality_gates_failed=gates_failed,
            )
        return item
    def _emit_task_telemetry(
        self,
        item: QueueItem,
        *,
        outcome: Outcome,
        duration_ms: int,
        retry_count: int = 0,
        actual_input_tokens: int = 0,
        actual_output_tokens: int = 0,
        quality_passed: bool = False,
        quality_gates_run: list[QualityGate] | None = None,
        quality_gates_failed: list[QualityGate] | None = None,
    ) -> None:
        """Emit a Mosaic telemetry TaskCompletionEvent (non-blocking).
        This method never raises; any telemetry errors are logged and swallowed
        so they do not interfere with task processing.
        Args:
            item: The QueueItem that was processed.
            outcome: Task outcome (SUCCESS, FAILURE, TIMEOUT, etc.).
            duration_ms: Wall-clock duration in milliseconds.
            retry_count: Number of retries before this attempt.
            actual_input_tokens: Actual input tokens consumed by the harness.
            actual_output_tokens: Actual output tokens consumed by the harness.
            quality_passed: Whether all quality gates passed.
            quality_gates_run: Quality gates that were executed.
            quality_gates_failed: Quality gates that failed.
        """
        if self._telemetry_client is None or not self._instance_id:
            return
        try:
            model, provider, harness = _resolve_agent_fields(
                item.metadata.assigned_agent,
            )
            complexity = _DIFFICULTY_TO_COMPLEXITY.get(
                item.metadata.difficulty, Complexity.MEDIUM
            )
            event = build_task_event(
                instance_id=self._instance_id,
                task_type=TaskType.IMPLEMENTATION,
                complexity=complexity,
                outcome=outcome,
                duration_ms=duration_ms,
                model=model,
                provider=provider,
                harness=harness,
                actual_input_tokens=actual_input_tokens,
                actual_output_tokens=actual_output_tokens,
                estimated_input_tokens=item.metadata.estimated_context,
                quality_passed=quality_passed,
                quality_gates_run=quality_gates_run,
                quality_gates_failed=quality_gates_failed,
                retry_count=retry_count,
            )
            self._telemetry_client.track(event)
            logger.debug(
                "Telemetry event emitted for issue #%d (outcome=%s)",
                item.issue_number,
                outcome.value,
            )
        except Exception:
            logger.exception(
                "Failed to emit telemetry for issue #%d (non-fatal)",
                item.issue_number,
            )
    async def _spawn_agent(self, item: QueueItem) -> bool:
        """Spawn an agent to process the given item.
--- a/apps/coordinator/src/main.py
+++ b/apps/coordinator/src/main.py
@@ -9,6 +9,7 @@ from pathlib import Path
 from typing import Any
 from fastapi import FastAPI
 from mosaicstack_telemetry import TelemetryClient  # type: ignore[import-untyped]
 from pydantic import BaseModel
 from slowapi import Limiter, _rate_limit_exceeded_handler
 from slowapi.errors import RateLimitExceeded
@@ -18,6 +19,7 @@ from starlette.responses import Response
 from .config import settings
 from .coordinator import Coordinator
 from .mosaic_telemetry import create_telemetry_config
 from .queue import QueueManager
 from .telemetry import TelemetryService, shutdown_telemetry
 from .webhook import router as webhook_router
@@ -76,6 +78,18 @@ async def lifespan(app: FastAPI) -> AsyncIterator[dict[str, Any]]:
        telemetry_service.initialize()
        logger.info("OpenTelemetry telemetry initialized")
    # Initialize Mosaic telemetry client
    mosaic_telemetry_config = create_telemetry_config()
    mosaic_telemetry_client: TelemetryClient | None = None
    if mosaic_telemetry_config.enabled:
        mosaic_telemetry_client = TelemetryClient(mosaic_telemetry_config)
        await mosaic_telemetry_client.start_async()
        app.state.mosaic_telemetry = mosaic_telemetry_client
        logger.info("Mosaic telemetry client started")
    else:
        app.state.mosaic_telemetry = None
        logger.info("Mosaic telemetry disabled via configuration")
    # Initialize queue manager
    queue_file = Path("queue.json")
    queue_manager = QueueManager(queue_file=queue_file)
@@ -86,6 +100,8 @@ async def lifespan(app: FastAPI) -> AsyncIterator[dict[str, Any]]:
        _coordinator = Coordinator(
            queue_manager=queue_manager,
            poll_interval=settings.coordinator_poll_interval,
            telemetry_client=mosaic_telemetry_client,
            instance_id=mosaic_telemetry_config.instance_id or "",
        )
        logger.info(
            f"Coordinator initialized (poll interval: {settings.coordinator_poll_interval}s, "
@@ -115,6 +131,11 @@ async def lifespan(app: FastAPI) -> AsyncIterator[dict[str, Any]]:
                pass
        logger.info("Coordinator stopped")
    # Shutdown Mosaic telemetry client
    if mosaic_telemetry_client is not None:
        await mosaic_telemetry_client.stop_async()
        logger.info("Mosaic telemetry client stopped")
    # Shutdown OpenTelemetry
    if telemetry_enabled:
        shutdown_telemetry()
--- a/apps/coordinator/src/mosaic_telemetry.py
+++ b/apps/coordinator/src/mosaic_telemetry.py
@@ -0,0 +1,157 @@
 """Mosaic Stack telemetry integration for the Coordinator.
 This module provides helpers for tracking task completion events using the
 mosaicstack-telemetry SDK. It is separate from the OpenTelemetry distributed
 tracing configured in telemetry.py.
 Environment variables (auto-read by the SDK):
    MOSAIC_TELEMETRY_ENABLED: Enable/disable telemetry (default: true)
    MOSAIC_TELEMETRY_SERVER_URL: Telemetry server endpoint
    MOSAIC_TELEMETRY_API_KEY: API key for authentication
    MOSAIC_TELEMETRY_INSTANCE_ID: UUID identifying this coordinator instance
 """
 from __future__ import annotations
 import logging
 from typing import TYPE_CHECKING
 from mosaicstack_telemetry import (  # type: ignore[import-untyped]
    Complexity,
    EventBuilder,
    Harness,
    Outcome,
    Provider,
    QualityGate,
    TaskType,
    TelemetryClient,
    TelemetryConfig,
 )
 if TYPE_CHECKING:
    from fastapi import FastAPI
    from mosaicstack_telemetry import TaskCompletionEvent
 logger = logging.getLogger(__name__)
 def get_telemetry_client(app: FastAPI) -> TelemetryClient | None:
    """Retrieve the Mosaic telemetry client from FastAPI app state.
    Args:
        app: The FastAPI application instance.
    Returns:
        The TelemetryClient if initialised and telemetry is enabled,
        or None if telemetry is disabled or not yet initialised.
    """
    client: TelemetryClient | None = getattr(app.state, "mosaic_telemetry", None)
    return client
 def build_task_event(
    *,
    instance_id: str,
    task_type: TaskType = TaskType.IMPLEMENTATION,
    complexity: Complexity = Complexity.MEDIUM,
    outcome: Outcome = Outcome.SUCCESS,
    duration_ms: int = 0,
    model: str = "claude-sonnet-4-20250514",
    provider: Provider = Provider.ANTHROPIC,
    harness: Harness = Harness.CLAUDE_CODE,
    estimated_input_tokens: int = 0,
    estimated_output_tokens: int = 0,
    actual_input_tokens: int = 0,
    actual_output_tokens: int = 0,
    estimated_cost_micros: int = 0,
    actual_cost_micros: int = 0,
    quality_passed: bool = False,
    quality_gates_run: list[QualityGate] | None = None,
    quality_gates_failed: list[QualityGate] | None = None,
    context_compactions: int = 0,
    context_rotations: int = 0,
    context_utilization: float = 0.0,
    retry_count: int = 0,
    language: str | None = "typescript",
 ) -> TaskCompletionEvent:
    """Build a TaskCompletionEvent for a coordinator task.
    Provides sensible defaults for the coordinator context (Claude Code harness,
    Anthropic provider, TypeScript language).
    Args:
        instance_id: UUID identifying this coordinator instance.
        task_type: The kind of task that was performed.
        complexity: Complexity level of the task.
        outcome: Whether the task succeeded, failed, etc.
        duration_ms: Task duration in milliseconds.
        model: The AI model used.
        provider: The AI model provider.
        harness: The coding harness used.
        estimated_input_tokens: Estimated input token count.
        estimated_output_tokens: Estimated output token count.
        actual_input_tokens: Actual input token count.
        actual_output_tokens: Actual output token count.
        estimated_cost_micros: Estimated cost in USD micros.
        actual_cost_micros: Actual cost in USD micros.
        quality_passed: Whether all quality gates passed.
        quality_gates_run: List of quality gates that were executed.
        quality_gates_failed: List of quality gates that failed.
        context_compactions: Number of context compactions during the task.
        context_rotations: Number of context rotations during the task.
        context_utilization: Final context window utilization (0.0-1.0).
        retry_count: Number of retries before the task completed.
        language: Primary programming language (default: typescript).
    Returns:
        A fully populated TaskCompletionEvent ready to be tracked.
    """
    builder = (
        EventBuilder(instance_id=instance_id)
        .task_type(task_type)
        .complexity_level(complexity)
        .harness_type(harness)
        .model(model)
        .provider(provider)
        .duration_ms(duration_ms)
        .outcome_value(outcome)
        .tokens(
            estimated_in=estimated_input_tokens,
            estimated_out=estimated_output_tokens,
            actual_in=actual_input_tokens,
            actual_out=actual_output_tokens,
        )
        .cost(estimated=estimated_cost_micros, actual=actual_cost_micros)
        .quality(
            passed=quality_passed,
            gates_run=quality_gates_run or [],
            gates_failed=quality_gates_failed or [],
        )
        .context(
            compactions=context_compactions,
            rotations=context_rotations,
            utilization=context_utilization,
        )
        .retry_count(retry_count)
        .language(language)
    )
    return builder.build()
 def create_telemetry_config() -> TelemetryConfig:
    """Create a TelemetryConfig instance.
    The config reads from MOSAIC_TELEMETRY_* environment variables automatically.
    Validation warnings are logged but do not prevent creation.
    Returns:
        A TelemetryConfig instance with env-var overrides applied.
    """
    config = TelemetryConfig()
    errors = config.validate()
    if errors and config.enabled:
        logger.warning(
            "Mosaic telemetry config has validation issues (telemetry may not submit): %s",
            "; ".join(errors),
        )
    return config
--- a/apps/coordinator/tests/test_mosaic_telemetry.py
+++ b/apps/coordinator/tests/test_mosaic_telemetry.py
@@ -0,0 +1,426 @@
 """Tests for Mosaic Stack telemetry integration (mosaic_telemetry module).
 These tests cover the mosaicstack-telemetry SDK integration, NOT the
 OpenTelemetry distributed tracing (which is tested in test_telemetry.py).
 """
 from __future__ import annotations
 from unittest.mock import MagicMock, patch
 import pytest
 from fastapi import FastAPI
 from mosaicstack_telemetry import (
    Complexity,
    Harness,
    Outcome,
    Provider,
    QualityGate,
    TaskCompletionEvent,
    TaskType,
    TelemetryClient,
    TelemetryConfig,
 )
 from src.mosaic_telemetry import (
    build_task_event,
    create_telemetry_config,
    get_telemetry_client,
 )
 # ---------------------------------------------------------------------------
 # TelemetryConfig creation from environment variables
 # ---------------------------------------------------------------------------
 class TestCreateTelemetryConfig:
    """Tests for create_telemetry_config helper."""
    def test_config_reads_enabled_from_env(self) -> None:
        """TelemetryConfig should read MOSAIC_TELEMETRY_ENABLED from env."""
        with patch.dict(
            "os.environ",
            {"MOSAIC_TELEMETRY_ENABLED": "true"},
            clear=False,
        ):
            config = create_telemetry_config()
            assert config.enabled is True
    def test_config_disabled_from_env(self) -> None:
        """TelemetryConfig should be disabled when env var is false."""
        with patch.dict(
            "os.environ",
            {"MOSAIC_TELEMETRY_ENABLED": "false"},
            clear=False,
        ):
            config = create_telemetry_config()
            assert config.enabled is False
    def test_config_reads_server_url_from_env(self) -> None:
        """TelemetryConfig should read MOSAIC_TELEMETRY_SERVER_URL from env."""
        with patch.dict(
            "os.environ",
            {"MOSAIC_TELEMETRY_SERVER_URL": "https://telemetry.example.com"},
            clear=False,
        ):
            config = create_telemetry_config()
            assert config.server_url == "https://telemetry.example.com"
    def test_config_reads_api_key_from_env(self) -> None:
        """TelemetryConfig should read MOSAIC_TELEMETRY_API_KEY from env."""
        api_key = "a" * 64  # 64-char hex string
        with patch.dict(
            "os.environ",
            {"MOSAIC_TELEMETRY_API_KEY": api_key},
            clear=False,
        ):
            config = create_telemetry_config()
            assert config.api_key == api_key
    def test_config_reads_instance_id_from_env(self) -> None:
        """TelemetryConfig should read MOSAIC_TELEMETRY_INSTANCE_ID from env."""
        instance_id = "12345678-1234-1234-1234-123456789abc"
        with patch.dict(
            "os.environ",
            {"MOSAIC_TELEMETRY_INSTANCE_ID": instance_id},
            clear=False,
        ):
            config = create_telemetry_config()
            assert config.instance_id == instance_id
    def test_config_defaults_to_enabled(self) -> None:
        """TelemetryConfig should default to enabled when env var is not set."""
        with patch.dict(
            "os.environ",
            {},
            clear=True,
        ):
            config = create_telemetry_config()
            assert config.enabled is True
    def test_config_logs_validation_warnings_when_enabled(self) -> None:
        """Config creation should log warnings for validation errors when enabled."""
        with (
            patch.dict(
                "os.environ",
                {"MOSAIC_TELEMETRY_ENABLED": "true"},
                clear=True,
            ),
            patch("src.mosaic_telemetry.logger") as mock_logger,
        ):
            config = create_telemetry_config()
            # server_url, api_key, and instance_id are all empty = validation errors
            assert config.enabled is True
            mock_logger.warning.assert_called_once()
            warning_msg = mock_logger.warning.call_args[0][0]
            assert "validation issues" in warning_msg
    def test_config_no_warnings_when_disabled(self) -> None:
        """Config creation should not log warnings when telemetry is disabled."""
        with (
            patch.dict(
                "os.environ",
                {"MOSAIC_TELEMETRY_ENABLED": "false"},
                clear=True,
            ),
            patch("src.mosaic_telemetry.logger") as mock_logger,
        ):
            create_telemetry_config()
            mock_logger.warning.assert_not_called()
    def test_config_strips_trailing_slashes(self) -> None:
        """TelemetryConfig should strip trailing slashes from server_url."""
        with patch.dict(
            "os.environ",
            {"MOSAIC_TELEMETRY_SERVER_URL": "https://telemetry.example.com/"},
            clear=False,
        ):
            config = create_telemetry_config()
            assert config.server_url == "https://telemetry.example.com"
 # ---------------------------------------------------------------------------
 # get_telemetry_client from app state
 # ---------------------------------------------------------------------------
 class TestGetTelemetryClient:
    """Tests for get_telemetry_client helper."""
    def test_returns_client_when_set(self) -> None:
        """Should return the telemetry client from app state."""
        app = FastAPI()
        mock_client = MagicMock(spec=TelemetryClient)
        app.state.mosaic_telemetry = mock_client
        result = get_telemetry_client(app)
        assert result is mock_client
    def test_returns_none_when_not_set(self) -> None:
        """Should return None when mosaic_telemetry is not in app state."""
        app = FastAPI()
        # Do not set app.state.mosaic_telemetry
        result = get_telemetry_client(app)
        assert result is None
    def test_returns_none_when_explicitly_none(self) -> None:
        """Should return None when mosaic_telemetry is explicitly set to None."""
        app = FastAPI()
        app.state.mosaic_telemetry = None
        result = get_telemetry_client(app)
        assert result is None
 # ---------------------------------------------------------------------------
 # build_task_event helper
 # ---------------------------------------------------------------------------
 class TestBuildTaskEvent:
    """Tests for build_task_event helper."""
    VALID_INSTANCE_ID = "12345678-1234-1234-1234-123456789abc"
    def test_builds_event_with_defaults(self) -> None:
        """Should build a TaskCompletionEvent with default values."""
        event = build_task_event(instance_id=self.VALID_INSTANCE_ID)
        assert isinstance(event, TaskCompletionEvent)
        assert str(event.instance_id) == self.VALID_INSTANCE_ID
        assert event.task_type == TaskType.IMPLEMENTATION
        assert event.complexity == Complexity.MEDIUM
        assert event.outcome == Outcome.SUCCESS
        assert event.harness == Harness.CLAUDE_CODE
        assert event.provider == Provider.ANTHROPIC
        assert event.language == "typescript"
    def test_builds_event_with_custom_task_type(self) -> None:
        """Should respect custom task_type parameter."""
        event = build_task_event(
            instance_id=self.VALID_INSTANCE_ID,
            task_type=TaskType.TESTING,
        )
        assert event.task_type == TaskType.TESTING
    def test_builds_event_with_custom_outcome(self) -> None:
        """Should respect custom outcome parameter."""
        event = build_task_event(
            instance_id=self.VALID_INSTANCE_ID,
            outcome=Outcome.FAILURE,
        )
        assert event.outcome == Outcome.FAILURE
    def test_builds_event_with_duration(self) -> None:
        """Should set duration_ms correctly."""
        event = build_task_event(
            instance_id=self.VALID_INSTANCE_ID,
            duration_ms=45000,
        )
        assert event.task_duration_ms == 45000
    def test_builds_event_with_token_counts(self) -> None:
        """Should set all token counts correctly."""
        event = build_task_event(
            instance_id=self.VALID_INSTANCE_ID,
            estimated_input_tokens=1000,
            estimated_output_tokens=500,
            actual_input_tokens=1100,
            actual_output_tokens=480,
        )
        assert event.estimated_input_tokens == 1000
        assert event.estimated_output_tokens == 500
        assert event.actual_input_tokens == 1100
        assert event.actual_output_tokens == 480
    def test_builds_event_with_cost(self) -> None:
        """Should set cost fields correctly."""
        event = build_task_event(
            instance_id=self.VALID_INSTANCE_ID,
            estimated_cost_micros=50000,
            actual_cost_micros=48000,
        )
        assert event.estimated_cost_usd_micros == 50000
        assert event.actual_cost_usd_micros == 48000
    def test_builds_event_with_quality_gates(self) -> None:
        """Should set quality gate information correctly."""
        gates_run = [QualityGate.LINT, QualityGate.TEST, QualityGate.BUILD]
        gates_failed = [QualityGate.TEST]
        event = build_task_event(
            instance_id=self.VALID_INSTANCE_ID,
            quality_passed=False,
            quality_gates_run=gates_run,
            quality_gates_failed=gates_failed,
        )
        assert event.quality_gate_passed is False
        assert event.quality_gates_run == gates_run
        assert event.quality_gates_failed == gates_failed
    def test_builds_event_with_context_info(self) -> None:
        """Should set context compaction/rotation/utilization correctly."""
        event = build_task_event(
            instance_id=self.VALID_INSTANCE_ID,
            context_compactions=2,
            context_rotations=1,
            context_utilization=0.75,
        )
        assert event.context_compactions == 2
        assert event.context_rotations == 1
        assert event.context_utilization_final == 0.75
    def test_builds_event_with_retry_count(self) -> None:
        """Should set retry count correctly."""
        event = build_task_event(
            instance_id=self.VALID_INSTANCE_ID,
            retry_count=3,
        )
        assert event.retry_count == 3
    def test_builds_event_with_custom_language(self) -> None:
        """Should allow overriding the default language."""
        event = build_task_event(
            instance_id=self.VALID_INSTANCE_ID,
            language="python",
        )
        assert event.language == "python"
 # ---------------------------------------------------------------------------
 # TelemetryClient lifecycle (disabled mode)
 # ---------------------------------------------------------------------------
 class TestTelemetryDisabledMode:
    """Tests for disabled telemetry mode (no HTTP calls)."""
    def test_disabled_client_does_not_start(self) -> None:
        """Client start_async should be a no-op when disabled."""
        config = TelemetryConfig(enabled=False)
        client = TelemetryClient(config)
        # Should not raise
        assert client.is_running is False
    def test_disabled_client_track_is_noop(self) -> None:
        """Tracking events when disabled should silently drop them."""
        config = TelemetryConfig(enabled=False)
        client = TelemetryClient(config)
        event = build_task_event(
            instance_id="12345678-1234-1234-1234-123456789abc",
        )
        # Should not raise, should silently drop
        client.track(event)
        assert client.queue_size == 0
    @pytest.mark.asyncio
    async def test_disabled_client_start_stop_async(self) -> None:
        """Async start/stop should be safe when disabled."""
        config = TelemetryConfig(enabled=False)
        client = TelemetryClient(config)
        await client.start_async()
        assert client.is_running is False
        await client.stop_async()
 # ---------------------------------------------------------------------------
 # Lifespan integration
 # ---------------------------------------------------------------------------
 class TestLifespanIntegration:
    """Tests for Mosaic telemetry in the FastAPI lifespan."""
    @pytest.mark.asyncio
    async def test_lifespan_sets_mosaic_telemetry_on_app_state(self) -> None:
        """Lifespan should store mosaic_telemetry client on app.state."""
        with patch.dict(
            "os.environ",
            {
                "GITEA_WEBHOOK_SECRET": "test-secret",
                "GITEA_URL": "https://git.mosaicstack.dev",
                "ANTHROPIC_API_KEY": "test-key",
                "MOSAIC_TELEMETRY_ENABLED": "true",
                "MOSAIC_TELEMETRY_SERVER_URL": "https://telemetry.example.com",
                "MOSAIC_TELEMETRY_API_KEY": "a" * 64,
                "MOSAIC_TELEMETRY_INSTANCE_ID": "12345678-1234-1234-1234-123456789abc",
                "OTEL_ENABLED": "false",
                "COORDINATOR_ENABLED": "false",
            },
        ):
            # Reload config to pick up test env vars
            import importlib
            from src import config
            importlib.reload(config)
            from src.main import lifespan
            app = FastAPI()
            async with lifespan(app) as _state:
                client = getattr(app.state, "mosaic_telemetry", None)
                assert client is not None
                assert isinstance(client, TelemetryClient)
    @pytest.mark.asyncio
    async def test_lifespan_sets_none_when_disabled(self) -> None:
        """Lifespan should set mosaic_telemetry to None when disabled."""
        with patch.dict(
            "os.environ",
            {
                "GITEA_WEBHOOK_SECRET": "test-secret",
                "GITEA_URL": "https://git.mosaicstack.dev",
                "ANTHROPIC_API_KEY": "test-key",
                "MOSAIC_TELEMETRY_ENABLED": "false",
                "OTEL_ENABLED": "false",
                "COORDINATOR_ENABLED": "false",
            },
        ):
            import importlib
            from src import config
            importlib.reload(config)
            from src.main import lifespan
            app = FastAPI()
            async with lifespan(app) as _state:
                client = getattr(app.state, "mosaic_telemetry", None)
                assert client is None
    @pytest.mark.asyncio
    async def test_lifespan_stops_client_on_shutdown(self) -> None:
        """Lifespan should call stop_async on shutdown."""
        with patch.dict(
            "os.environ",
            {
                "GITEA_WEBHOOK_SECRET": "test-secret",
                "GITEA_URL": "https://git.mosaicstack.dev",
                "ANTHROPIC_API_KEY": "test-key",
                "MOSAIC_TELEMETRY_ENABLED": "true",
                "MOSAIC_TELEMETRY_SERVER_URL": "https://telemetry.example.com",
                "MOSAIC_TELEMETRY_API_KEY": "a" * 64,
                "MOSAIC_TELEMETRY_INSTANCE_ID": "12345678-1234-1234-1234-123456789abc",
                "OTEL_ENABLED": "false",
                "COORDINATOR_ENABLED": "false",
            },
        ):
            import importlib
            from src import config
            importlib.reload(config)
            from src.main import lifespan
            app = FastAPI()
            async with lifespan(app) as _state:
                client = app.state.mosaic_telemetry
                assert isinstance(client, TelemetryClient)
                # Client was started
                # After context manager exits, stop_async should have been called
            # After lifespan exits, client should no longer be running
            # (stop_async was called in the shutdown section)
            assert not client.is_running
--- a/apps/coordinator/tests/test_task_telemetry.py
+++ b/apps/coordinator/tests/test_task_telemetry.py
@@ -0,0 +1,796 @@
 """Tests for task completion telemetry instrumentation in the coordinator.
 These tests verify that the Coordinator and OrchestrationLoop correctly
 emit TaskCompletionEvents via the Mosaic telemetry SDK after each task
 dispatch attempt.
 """
 from __future__ import annotations
 import tempfile
 from collections.abc import Generator
 from pathlib import Path
 from unittest.mock import AsyncMock, MagicMock
 import pytest
 from mosaicstack_telemetry import (  # type: ignore[import-untyped]
    Complexity,
    Harness,
    Outcome,
    Provider,
    QualityGate,
    TaskCompletionEvent,
    TaskType,
    TelemetryClient,
 )
 from src.coordinator import (
    _AGENT_TELEMETRY_MAP,
    _DIFFICULTY_TO_COMPLEXITY,
    _GATE_NAME_TO_ENUM,
    Coordinator,
    OrchestrationLoop,
    _resolve_agent_fields,
 )
 from src.gates.quality_gate import GateResult
 from src.models import IssueMetadata
 from src.quality_orchestrator import QualityOrchestrator, VerificationResult
 from src.queue import QueueManager
 VALID_INSTANCE_ID = "12345678-1234-1234-1234-123456789abc"
 # ---------------------------------------------------------------------------
 # Fixtures
 # ---------------------------------------------------------------------------
@pytest.fixture
 def temp_queue_file() -> Generator[Path, None, None]:
    """Create a temporary file for queue persistence."""
    with tempfile.NamedTemporaryFile(mode="w", delete=False, suffix=".json") as f:
        temp_path = Path(f.name)
    yield temp_path
    if temp_path.exists():
        temp_path.unlink()
@pytest.fixture
 def queue_manager(temp_queue_file: Path) -> QueueManager:
    """Create a queue manager with temporary storage."""
    return QueueManager(queue_file=temp_queue_file)
@pytest.fixture
 def mock_telemetry_client() -> MagicMock:
    """Create a mock TelemetryClient."""
    client = MagicMock(spec=TelemetryClient)
    client.track = MagicMock()
    return client
@pytest.fixture
 def sonnet_metadata() -> IssueMetadata:
    """Metadata for a sonnet agent task."""
    return IssueMetadata(
        assigned_agent="sonnet",
        difficulty="medium",
        estimated_context=50000,
    )
@pytest.fixture
 def opus_metadata() -> IssueMetadata:
    """Metadata for an opus agent task (hard difficulty)."""
    return IssueMetadata(
        assigned_agent="opus",
        difficulty="hard",
        estimated_context=120000,
    )
 # ---------------------------------------------------------------------------
 # _resolve_agent_fields tests
 # ---------------------------------------------------------------------------
 class TestResolveAgentFields:
    """Tests for the _resolve_agent_fields helper."""
    def test_known_agent_sonnet(self) -> None:
        """Should return correct fields for sonnet agent."""
        model, provider, harness = _resolve_agent_fields("sonnet")
        assert model == "claude-sonnet-4-20250514"
        assert provider == Provider.ANTHROPIC
        assert harness == Harness.CLAUDE_CODE
    def test_known_agent_opus(self) -> None:
        """Should return correct fields for opus agent."""
        model, provider, harness = _resolve_agent_fields("opus")
        assert model == "claude-opus-4-20250514"
        assert provider == Provider.ANTHROPIC
        assert harness == Harness.CLAUDE_CODE
    def test_known_agent_haiku(self) -> None:
        """Should return correct fields for haiku agent."""
        model, provider, harness = _resolve_agent_fields("haiku")
        assert model == "claude-haiku-3.5-20241022"
        assert provider == Provider.ANTHROPIC
        assert harness == Harness.CLAUDE_CODE
    def test_known_agent_glm(self) -> None:
        """Should return correct fields for glm (self-hosted) agent."""
        model, provider, harness = _resolve_agent_fields("glm")
        assert model == "glm-4"
        assert provider == Provider.CUSTOM
        assert harness == Harness.CUSTOM
    def test_known_agent_minimax(self) -> None:
        """Should return correct fields for minimax (self-hosted) agent."""
        model, provider, harness = _resolve_agent_fields("minimax")
        assert model == "minimax"
        assert provider == Provider.CUSTOM
        assert harness == Harness.CUSTOM
    def test_unknown_agent_returns_defaults(self) -> None:
        """Should return unknown values for unrecognised agent names."""
        model, provider, harness = _resolve_agent_fields("nonexistent")
        assert model == "unknown"
        assert provider == Provider.UNKNOWN
        assert harness == Harness.UNKNOWN
    def test_all_map_entries_covered(self) -> None:
        """Ensure every entry in _AGENT_TELEMETRY_MAP is resolvable."""
        for agent_name in _AGENT_TELEMETRY_MAP:
            model, provider, harness = _resolve_agent_fields(agent_name)
            assert model != "unknown"
 # ---------------------------------------------------------------------------
 # Coordinator telemetry emission tests
 # ---------------------------------------------------------------------------
 class TestCoordinatorTelemetry:
    """Tests for telemetry emission in the Coordinator class."""
    @pytest.mark.asyncio
    async def test_emits_success_event_on_completion(
        self,
        queue_manager: QueueManager,
        mock_telemetry_client: MagicMock,
        sonnet_metadata: IssueMetadata,
    ) -> None:
        """Should emit a SUCCESS event when task completes successfully."""
        queue_manager.enqueue(100, sonnet_metadata)
        coordinator = Coordinator(
            queue_manager=queue_manager,
            telemetry_client=mock_telemetry_client,
            instance_id=VALID_INSTANCE_ID,
        )
        await coordinator.process_queue()
        mock_telemetry_client.track.assert_called_once()
        event = mock_telemetry_client.track.call_args[0][0]
        assert isinstance(event, TaskCompletionEvent)
        assert event.outcome == Outcome.SUCCESS
        assert event.task_type == TaskType.IMPLEMENTATION
        assert event.complexity == Complexity.MEDIUM
        assert event.provider == Provider.ANTHROPIC
        assert event.harness == Harness.CLAUDE_CODE
        assert str(event.instance_id) == VALID_INSTANCE_ID
        assert event.task_duration_ms >= 0
    @pytest.mark.asyncio
    async def test_emits_failure_event_when_agent_fails(
        self,
        queue_manager: QueueManager,
        mock_telemetry_client: MagicMock,
        sonnet_metadata: IssueMetadata,
    ) -> None:
        """Should emit a FAILURE event when spawn_agent returns False."""
        queue_manager.enqueue(101, sonnet_metadata)
        coordinator = Coordinator(
            queue_manager=queue_manager,
            telemetry_client=mock_telemetry_client,
            instance_id=VALID_INSTANCE_ID,
        )
        # Override spawn_agent to fail
        coordinator.spawn_agent = AsyncMock(return_value=False)  # type: ignore[method-assign]
        await coordinator.process_queue()
        mock_telemetry_client.track.assert_called_once()
        event = mock_telemetry_client.track.call_args[0][0]
        assert event.outcome == Outcome.FAILURE
    @pytest.mark.asyncio
    async def test_emits_failure_event_on_exception(
        self,
        queue_manager: QueueManager,
        mock_telemetry_client: MagicMock,
        sonnet_metadata: IssueMetadata,
    ) -> None:
        """Should emit a FAILURE event when spawn_agent raises an exception."""
        queue_manager.enqueue(102, sonnet_metadata)
        coordinator = Coordinator(
            queue_manager=queue_manager,
            telemetry_client=mock_telemetry_client,
            instance_id=VALID_INSTANCE_ID,
        )
        coordinator.spawn_agent = AsyncMock(side_effect=RuntimeError("agent crashed"))  # type: ignore[method-assign]
        await coordinator.process_queue()
        mock_telemetry_client.track.assert_called_once()
        event = mock_telemetry_client.track.call_args[0][0]
        assert event.outcome == Outcome.FAILURE
    @pytest.mark.asyncio
    async def test_maps_difficulty_to_complexity(
        self,
        queue_manager: QueueManager,
        mock_telemetry_client: MagicMock,
        opus_metadata: IssueMetadata,
    ) -> None:
        """Should map difficulty='hard' to Complexity.HIGH in the event."""
        queue_manager.enqueue(103, opus_metadata)
        coordinator = Coordinator(
            queue_manager=queue_manager,
            telemetry_client=mock_telemetry_client,
            instance_id=VALID_INSTANCE_ID,
        )
        await coordinator.process_queue()
        event = mock_telemetry_client.track.call_args[0][0]
        assert event.complexity == Complexity.HIGH
    @pytest.mark.asyncio
    async def test_maps_agent_to_model_and_provider(
        self,
        queue_manager: QueueManager,
        mock_telemetry_client: MagicMock,
        opus_metadata: IssueMetadata,
    ) -> None:
        """Should map 'opus' agent to opus model and ANTHROPIC provider."""
        queue_manager.enqueue(104, opus_metadata)
        coordinator = Coordinator(
            queue_manager=queue_manager,
            telemetry_client=mock_telemetry_client,
            instance_id=VALID_INSTANCE_ID,
        )
        await coordinator.process_queue()
        event = mock_telemetry_client.track.call_args[0][0]
        assert "opus" in event.model
        assert event.provider == Provider.ANTHROPIC
        assert event.harness == Harness.CLAUDE_CODE
    @pytest.mark.asyncio
    async def test_no_event_when_telemetry_disabled(
        self,
        queue_manager: QueueManager,
        sonnet_metadata: IssueMetadata,
    ) -> None:
        """Should not call track when telemetry_client is None."""
        queue_manager.enqueue(105, sonnet_metadata)
        coordinator = Coordinator(
            queue_manager=queue_manager,
            telemetry_client=None,
            instance_id=VALID_INSTANCE_ID,
        )
        # Should not raise
        await coordinator.process_queue()
    @pytest.mark.asyncio
    async def test_no_event_when_instance_id_empty(
        self,
        queue_manager: QueueManager,
        mock_telemetry_client: MagicMock,
        sonnet_metadata: IssueMetadata,
    ) -> None:
        """Should not call track when instance_id is empty."""
        queue_manager.enqueue(106, sonnet_metadata)
        coordinator = Coordinator(
            queue_manager=queue_manager,
            telemetry_client=mock_telemetry_client,
            instance_id="",
        )
        await coordinator.process_queue()
        mock_telemetry_client.track.assert_not_called()
    @pytest.mark.asyncio
    async def test_telemetry_exception_does_not_propagate(
        self,
        queue_manager: QueueManager,
        sonnet_metadata: IssueMetadata,
    ) -> None:
        """Telemetry failures must never break task processing."""
        queue_manager.enqueue(107, sonnet_metadata)
        bad_client = MagicMock(spec=TelemetryClient)
        bad_client.track = MagicMock(side_effect=RuntimeError("telemetry down"))
        coordinator = Coordinator(
            queue_manager=queue_manager,
            telemetry_client=bad_client,
            instance_id=VALID_INSTANCE_ID,
        )
        # Should complete without raising, despite telemetry failure
        result = await coordinator.process_queue()
        assert result is not None
        assert result.issue_number == 107
    @pytest.mark.asyncio
    async def test_no_event_when_queue_empty(
        self,
        queue_manager: QueueManager,
        mock_telemetry_client: MagicMock,
    ) -> None:
        """Should not emit any event when the queue is empty."""
        coordinator = Coordinator(
            queue_manager=queue_manager,
            telemetry_client=mock_telemetry_client,
            instance_id=VALID_INSTANCE_ID,
        )
        result = await coordinator.process_queue()
        assert result is None
        mock_telemetry_client.track.assert_not_called()
    @pytest.mark.asyncio
    async def test_estimated_input_tokens_from_metadata(
        self,
        queue_manager: QueueManager,
        mock_telemetry_client: MagicMock,
        sonnet_metadata: IssueMetadata,
    ) -> None:
        """Should set estimated_input_tokens from issue metadata."""
        queue_manager.enqueue(108, sonnet_metadata)
        coordinator = Coordinator(
            queue_manager=queue_manager,
            telemetry_client=mock_telemetry_client,
            instance_id=VALID_INSTANCE_ID,
        )
        await coordinator.process_queue()
        event = mock_telemetry_client.track.call_args[0][0]
        assert event.estimated_input_tokens == 50000
 # ---------------------------------------------------------------------------
 # OrchestrationLoop telemetry emission tests
 # ---------------------------------------------------------------------------
 def _make_orchestration_loop(
    queue_manager: QueueManager,
    telemetry_client: TelemetryClient | None = None,
    instance_id: str = VALID_INSTANCE_ID,
    quality_result: VerificationResult | None = None,
 ) -> OrchestrationLoop:
    """Create an OrchestrationLoop with mocked dependencies.
    Args:
        queue_manager: Queue manager instance.
        telemetry_client: Optional telemetry client.
        instance_id: Coordinator instance ID.
        quality_result: Override quality verification result.
    Returns:
        Configured OrchestrationLoop.
    """
    # Create quality orchestrator mock
    qo = MagicMock(spec=QualityOrchestrator)
    default_result = quality_result or VerificationResult(
        all_passed=True,
        gate_results={
            "build": GateResult(passed=True, message="Build OK"),
            "lint": GateResult(passed=True, message="Lint OK"),
            "test": GateResult(passed=True, message="Test OK"),
            "coverage": GateResult(passed=True, message="Coverage OK"),
        },
    )
    qo.verify_completion = AsyncMock(return_value=default_result)
    # Continuation service mock
    from src.forced_continuation import ForcedContinuationService
    cs = MagicMock(spec=ForcedContinuationService)
    cs.generate_prompt = MagicMock(return_value="Fix: build failed")
    # Context monitor mock
    from src.context_monitor import ContextMonitor
    cm = MagicMock(spec=ContextMonitor)
    cm.determine_action = AsyncMock(return_value="continue")
    return OrchestrationLoop(
        queue_manager=queue_manager,
        quality_orchestrator=qo,
        continuation_service=cs,
        context_monitor=cm,
        telemetry_client=telemetry_client,
        instance_id=instance_id,
    )
 class TestOrchestrationLoopTelemetry:
    """Tests for telemetry emission in the OrchestrationLoop class."""
    @pytest.mark.asyncio
    async def test_emits_success_with_quality_gates(
        self,
        queue_manager: QueueManager,
        mock_telemetry_client: MagicMock,
        sonnet_metadata: IssueMetadata,
    ) -> None:
        """Should emit SUCCESS event with quality gate details."""
        queue_manager.enqueue(200, sonnet_metadata)
        loop = _make_orchestration_loop(
            queue_manager, telemetry_client=mock_telemetry_client
        )
        await loop.process_next_issue()
        mock_telemetry_client.track.assert_called_once()
        event = mock_telemetry_client.track.call_args[0][0]
        assert event.outcome == Outcome.SUCCESS
        assert event.quality_gate_passed is True
        assert set(event.quality_gates_run) == {
            QualityGate.BUILD,
            QualityGate.LINT,
            QualityGate.TEST,
            QualityGate.COVERAGE,
        }
        assert event.quality_gates_failed == []
    @pytest.mark.asyncio
    async def test_emits_failure_with_failed_gates(
        self,
        queue_manager: QueueManager,
        mock_telemetry_client: MagicMock,
        sonnet_metadata: IssueMetadata,
    ) -> None:
        """Should emit FAILURE event with failed gate details."""
        queue_manager.enqueue(201, sonnet_metadata)
        failed_result = VerificationResult(
            all_passed=False,
            gate_results={
                "build": GateResult(passed=True, message="Build OK"),
                "lint": GateResult(passed=True, message="Lint OK"),
                "test": GateResult(passed=False, message="3 tests failed"),
                "coverage": GateResult(passed=False, message="Coverage 70% < 85%"),
            },
        )
        loop = _make_orchestration_loop(
            queue_manager,
            telemetry_client=mock_telemetry_client,
            quality_result=failed_result,
        )
        await loop.process_next_issue()
        mock_telemetry_client.track.assert_called_once()
        event = mock_telemetry_client.track.call_args[0][0]
        assert event.outcome == Outcome.FAILURE
        assert event.quality_gate_passed is False
        assert set(event.quality_gates_failed) == {
            QualityGate.TEST,
            QualityGate.COVERAGE,
        }
        assert set(event.quality_gates_run) == {
            QualityGate.BUILD,
            QualityGate.LINT,
            QualityGate.TEST,
            QualityGate.COVERAGE,
        }
    @pytest.mark.asyncio
    async def test_retry_count_starts_at_zero(
        self,
        queue_manager: QueueManager,
        mock_telemetry_client: MagicMock,
        sonnet_metadata: IssueMetadata,
    ) -> None:
        """First attempt should report retry_count=0."""
        queue_manager.enqueue(202, sonnet_metadata)
        loop = _make_orchestration_loop(
            queue_manager, telemetry_client=mock_telemetry_client
        )
        await loop.process_next_issue()
        event = mock_telemetry_client.track.call_args[0][0]
        assert event.retry_count == 0
    @pytest.mark.asyncio
    async def test_retry_count_increments_on_failure(
        self,
        queue_manager: QueueManager,
        mock_telemetry_client: MagicMock,
        sonnet_metadata: IssueMetadata,
    ) -> None:
        """Retry count should increment after a quality gate failure."""
        queue_manager.enqueue(203, sonnet_metadata)
        failed_result = VerificationResult(
            all_passed=False,
            gate_results={
                "build": GateResult(passed=False, message="Build failed"),
            },
        )
        loop = _make_orchestration_loop(
            queue_manager,
            telemetry_client=mock_telemetry_client,
            quality_result=failed_result,
        )
        # First attempt
        await loop.process_next_issue()
        event1 = mock_telemetry_client.track.call_args[0][0]
        assert event1.retry_count == 0
        # Re-enqueue and process again (simulates retry)
        queue_manager.enqueue(203, sonnet_metadata)
        mock_telemetry_client.track.reset_mock()
        await loop.process_next_issue()
        event2 = mock_telemetry_client.track.call_args[0][0]
        assert event2.retry_count == 1
    @pytest.mark.asyncio
    async def test_retry_count_clears_on_success(
        self,
        queue_manager: QueueManager,
        mock_telemetry_client: MagicMock,
        sonnet_metadata: IssueMetadata,
    ) -> None:
        """Retry count should be cleared after a successful completion."""
        queue_manager.enqueue(204, sonnet_metadata)
        # First: fail
        failed_result = VerificationResult(
            all_passed=False,
            gate_results={
                "build": GateResult(passed=False, message="Build failed"),
            },
        )
        loop = _make_orchestration_loop(
            queue_manager,
            telemetry_client=mock_telemetry_client,
            quality_result=failed_result,
        )
        await loop.process_next_issue()
        assert loop._retry_counts.get(204) == 1
        # Now succeed
        success_result = VerificationResult(
            all_passed=True,
            gate_results={
                "build": GateResult(passed=True, message="Build OK"),
            },
        )
        loop.quality_orchestrator.verify_completion = AsyncMock(return_value=success_result)  # type: ignore[method-assign]
        queue_manager.enqueue(204, sonnet_metadata)
        mock_telemetry_client.track.reset_mock()
        await loop.process_next_issue()
        assert 204 not in loop._retry_counts
    @pytest.mark.asyncio
    async def test_emits_failure_when_agent_spawn_fails(
        self,
        queue_manager: QueueManager,
        mock_telemetry_client: MagicMock,
        sonnet_metadata: IssueMetadata,
    ) -> None:
        """Should emit FAILURE when _spawn_agent returns False."""
        queue_manager.enqueue(205, sonnet_metadata)
        loop = _make_orchestration_loop(
            queue_manager, telemetry_client=mock_telemetry_client
        )
        loop._spawn_agent = AsyncMock(return_value=False)  # type: ignore[method-assign]
        await loop.process_next_issue()
        mock_telemetry_client.track.assert_called_once()
        event = mock_telemetry_client.track.call_args[0][0]
        assert event.outcome == Outcome.FAILURE
    @pytest.mark.asyncio
    async def test_no_event_when_telemetry_disabled(
        self,
        queue_manager: QueueManager,
        sonnet_metadata: IssueMetadata,
    ) -> None:
        """Should not call track when telemetry_client is None."""
        queue_manager.enqueue(206, sonnet_metadata)
        loop = _make_orchestration_loop(
            queue_manager, telemetry_client=None
        )
        # Should not raise
        result = await loop.process_next_issue()
        assert result is not None
    @pytest.mark.asyncio
    async def test_telemetry_exception_does_not_propagate(
        self,
        queue_manager: QueueManager,
        sonnet_metadata: IssueMetadata,
    ) -> None:
        """Telemetry failures must never disrupt task processing."""
        queue_manager.enqueue(207, sonnet_metadata)
        bad_client = MagicMock(spec=TelemetryClient)
        bad_client.track = MagicMock(side_effect=RuntimeError("telemetry down"))
        loop = _make_orchestration_loop(
            queue_manager, telemetry_client=bad_client
        )
        result = await loop.process_next_issue()
        assert result is not None
        assert result.issue_number == 207
    @pytest.mark.asyncio
    async def test_duration_is_positive(
        self,
        queue_manager: QueueManager,
        mock_telemetry_client: MagicMock,
        sonnet_metadata: IssueMetadata,
    ) -> None:
        """Duration should be a non-negative integer."""
        queue_manager.enqueue(208, sonnet_metadata)
        loop = _make_orchestration_loop(
            queue_manager, telemetry_client=mock_telemetry_client
        )
        await loop.process_next_issue()
        event = mock_telemetry_client.track.call_args[0][0]
        assert event.task_duration_ms >= 0
    @pytest.mark.asyncio
    async def test_maps_glm_agent_correctly(
        self,
        queue_manager: QueueManager,
        mock_telemetry_client: MagicMock,
    ) -> None:
        """Should map GLM (self-hosted) agent to CUSTOM provider/harness."""
        glm_meta = IssueMetadata(
            assigned_agent="glm",
            difficulty="medium",
            estimated_context=30000,
        )
        queue_manager.enqueue(209, glm_meta)
        loop = _make_orchestration_loop(
            queue_manager, telemetry_client=mock_telemetry_client
        )
        await loop.process_next_issue()
        event = mock_telemetry_client.track.call_args[0][0]
        assert event.model == "glm-4"
        assert event.provider == Provider.CUSTOM
        assert event.harness == Harness.CUSTOM
    @pytest.mark.asyncio
    async def test_maps_easy_difficulty_to_low_complexity(
        self,
        queue_manager: QueueManager,
        mock_telemetry_client: MagicMock,
    ) -> None:
        """Should map difficulty='easy' to Complexity.LOW."""
        easy_meta = IssueMetadata(
            assigned_agent="haiku",
            difficulty="easy",
            estimated_context=10000,
        )
        queue_manager.enqueue(210, easy_meta)
        loop = _make_orchestration_loop(
            queue_manager, telemetry_client=mock_telemetry_client
        )
        await loop.process_next_issue()
        event = mock_telemetry_client.track.call_args[0][0]
        assert event.complexity == Complexity.LOW
    @pytest.mark.asyncio
    async def test_no_event_when_queue_empty(
        self,
        queue_manager: QueueManager,
        mock_telemetry_client: MagicMock,
    ) -> None:
        """Should not emit an event when queue is empty."""
        loop = _make_orchestration_loop(
            queue_manager, telemetry_client=mock_telemetry_client
        )
        result = await loop.process_next_issue()
        assert result is None
        mock_telemetry_client.track.assert_not_called()
    @pytest.mark.asyncio
    async def test_unknown_gate_names_excluded(
        self,
        queue_manager: QueueManager,
        mock_telemetry_client: MagicMock,
        sonnet_metadata: IssueMetadata,
    ) -> None:
        """Gate names not in _GATE_NAME_TO_ENUM should be excluded from telemetry."""
        queue_manager.enqueue(211, sonnet_metadata)
        result_with_unknown = VerificationResult(
            all_passed=False,
            gate_results={
                "build": GateResult(passed=True, message="Build OK"),
                "unknown_gate": GateResult(passed=False, message="Unknown gate"),
            },
        )
        loop = _make_orchestration_loop(
            queue_manager,
            telemetry_client=mock_telemetry_client,
            quality_result=result_with_unknown,
        )
        await loop.process_next_issue()
        event = mock_telemetry_client.track.call_args[0][0]
        assert QualityGate.BUILD in event.quality_gates_run
        # unknown_gate should not appear
        assert len(event.quality_gates_run) == 1
        assert len(event.quality_gates_failed) == 0
 # ---------------------------------------------------------------------------
 # Mapping dict completeness tests
 # ---------------------------------------------------------------------------
 class TestMappingCompleteness:
    """Tests to verify mapping dicts cover expected values."""
    def test_difficulty_map_covers_all_metadata_values(self) -> None:
        """All valid difficulty levels should have Complexity mappings."""
        expected_difficulties = {"easy", "medium", "hard"}
        assert expected_difficulties == set(_DIFFICULTY_TO_COMPLEXITY.keys())
    def test_gate_name_map_covers_all_orchestrator_gates(self) -> None:
        """All gate names used by QualityOrchestrator should be mappable."""
        expected_gates = {"build", "lint", "test", "coverage"}
        assert expected_gates == set(_GATE_NAME_TO_ENUM.keys())
    def test_agent_map_covers_all_configured_agents(self) -> None:
        """All agents used by the coordinator should have telemetry mappings."""
        expected_agents = {"sonnet", "opus", "haiku", "glm", "minimax"}
        assert expected_agents == set(_AGENT_TELEMETRY_MAP.keys())
--- a/apps/web/package.json
+++ b/apps/web/package.json
@@ -33,6 +33,7 @@
    "react": "^19.0.0",
    "react-dom": "^19.0.0",
    "react-grid-layout": "^2.2.2",
    "recharts": "^3.7.0",
    "socket.io-client": "^4.8.3"
  },
  "devDependencies": {
--- a/apps/web/src/app/(authenticated)/usage/page.test.tsx
+++ b/apps/web/src/app/(authenticated)/usage/page.test.tsx
@@ -0,0 +1,288 @@
 import { describe, it, expect, vi, beforeEach } from "vitest";
 import { render, screen, waitFor, fireEvent } from "@testing-library/react";
 import type { ReactNode } from "react";
 import UsagePage from "./page";
 // ─── Component Prop Types ────────────────────────────────────────────
 interface ChildrenProps {
  children: ReactNode;
 }
 interface StyledChildrenProps extends ChildrenProps {
  className?: string;
 }
 // ─── Mocks ───────────────────────────────────────────────────────────
 // Mock @/components/ui/card — @mosaic/ui can't be resolved in vitest
 vi.mock("@/components/ui/card", () => ({
  Card: ({ children, className }: StyledChildrenProps): React.JSX.Element => (
    <div className={className}>{children}</div>
  ),
  CardHeader: ({ children }: ChildrenProps): React.JSX.Element => <div>{children}</div>,
  CardContent: ({ children, className }: StyledChildrenProps): React.JSX.Element => (
    <div className={className}>{children}</div>
  ),
  CardFooter: ({ children }: ChildrenProps): React.JSX.Element => <div>{children}</div>,
  CardTitle: ({ children, className }: StyledChildrenProps): React.JSX.Element => (
    <h3 className={className}>{children}</h3>
  ),
  CardDescription: ({ children, className }: StyledChildrenProps): React.JSX.Element => (
    <p className={className}>{children}</p>
  ),
 }));
 // Mock recharts — jsdom has no SVG layout engine, so we render stubs
 vi.mock("recharts", () => ({
  LineChart: ({ children }: ChildrenProps): React.JSX.Element => (
    <div data-testid="recharts-line-chart">{children}</div>
  ),
  Line: (): React.JSX.Element => <div />,
  BarChart: ({ children }: ChildrenProps): React.JSX.Element => (
    <div data-testid="recharts-bar-chart">{children}</div>
  ),
  Bar: (): React.JSX.Element => <div />,
  PieChart: ({ children }: ChildrenProps): React.JSX.Element => (
    <div data-testid="recharts-pie-chart">{children}</div>
  ),
  Pie: (): React.JSX.Element => <div />,
  Cell: (): React.JSX.Element => <div />,
  XAxis: (): React.JSX.Element => <div />,
  YAxis: (): React.JSX.Element => <div />,
  CartesianGrid: (): React.JSX.Element => <div />,
  Tooltip: (): React.JSX.Element => <div />,
  ResponsiveContainer: ({ children }: ChildrenProps): React.JSX.Element => <div>{children}</div>,
  Legend: (): React.JSX.Element => <div />,
 }));
 // Mock the telemetry API module
 vi.mock("@/lib/api/telemetry", () => ({
  fetchUsageSummary: vi.fn(),
  fetchTokenUsage: vi.fn(),
  fetchCostBreakdown: vi.fn(),
  fetchTaskOutcomes: vi.fn(),
 }));
 // Import mocked modules after vi.mock
 import {
  fetchUsageSummary,
  fetchTokenUsage,
  fetchCostBreakdown,
  fetchTaskOutcomes,
 } from "@/lib/api/telemetry";
 // ─── Test Data ───────────────────────────────────────────────────────
 const mockSummary = {
  totalTokens: 245800,
  totalCost: 3.42,
  taskCount: 47,
  avgQualityGatePassRate: 0.87,
 };
 const mockTokenUsage = [
  { date: "2026-02-08", inputTokens: 10000, outputTokens: 5000, totalTokens: 15000 },
  { date: "2026-02-09", inputTokens: 12000, outputTokens: 6000, totalTokens: 18000 },
 ];
 const mockCostBreakdown = [
  { model: "claude-sonnet-4-5", provider: "anthropic", cost: 18.5, taskCount: 124 },
  { model: "gpt-4o", provider: "openai", cost: 12.3, taskCount: 89 },
 ];
 const mockTaskOutcomes = [
  { outcome: "Success", count: 312, color: "#6EBF8B" },
  { outcome: "Partial", count: 48, color: "#F5C862" },
 ];
 function setupMocks(overrides?: { empty?: boolean; error?: boolean }): void {
  if (overrides?.error) {
    vi.mocked(fetchUsageSummary).mockRejectedValue(new Error("Network error"));
    vi.mocked(fetchTokenUsage).mockRejectedValue(new Error("Network error"));
    vi.mocked(fetchCostBreakdown).mockRejectedValue(new Error("Network error"));
    vi.mocked(fetchTaskOutcomes).mockRejectedValue(new Error("Network error"));
    return;
  }
  const summary = overrides?.empty ? { ...mockSummary, taskCount: 0 } : mockSummary;
  vi.mocked(fetchUsageSummary).mockResolvedValue(summary);
  vi.mocked(fetchTokenUsage).mockResolvedValue(mockTokenUsage);
  vi.mocked(fetchCostBreakdown).mockResolvedValue(mockCostBreakdown);
  vi.mocked(fetchTaskOutcomes).mockResolvedValue(mockTaskOutcomes);
 }
 // ─── Tests ───────────────────────────────────────────────────────────
 describe("UsagePage", (): void => {
  beforeEach((): void => {
    vi.clearAllMocks();
  });
  it("should render the page title and subtitle", (): void => {
    setupMocks();
    render(<UsagePage />);
    expect(screen.getByRole("heading", { level: 1 })).toHaveTextContent("Usage");
    expect(screen.getByText("Token usage and cost overview")).toBeInTheDocument();
  });
  it("should have proper layout structure", (): void => {
    setupMocks();
    const { container } = render(<UsagePage />);
    const main = container.querySelector("main");
    expect(main).toBeInTheDocument();
  });
  it("should show loading skeleton initially", (): void => {
    setupMocks();
    render(<UsagePage />);
    expect(screen.getByTestId("loading-skeleton")).toBeInTheDocument();
  });
  it("should render summary cards after loading", async (): Promise<void> => {
    setupMocks();
    render(<UsagePage />);
    await waitFor((): void => {
      expect(screen.getByTestId("summary-cards")).toBeInTheDocument();
    });
    // Check summary card values
    expect(screen.getByText("Total Tokens")).toBeInTheDocument();
    expect(screen.getByText("245.8K")).toBeInTheDocument();
    expect(screen.getByText("Estimated Cost")).toBeInTheDocument();
    expect(screen.getByText("$3.42")).toBeInTheDocument();
    expect(screen.getByText("Task Count")).toBeInTheDocument();
    expect(screen.getByText("47")).toBeInTheDocument();
    expect(screen.getByText("Quality Gate Pass Rate")).toBeInTheDocument();
    expect(screen.getByText("87.0%")).toBeInTheDocument();
  });
  it("should render all chart sections after loading", async (): Promise<void> => {
    setupMocks();
    render(<UsagePage />);
    await waitFor((): void => {
      expect(screen.getByTestId("token-usage-chart")).toBeInTheDocument();
      expect(screen.getByTestId("cost-breakdown-chart")).toBeInTheDocument();
      expect(screen.getByTestId("task-outcomes-chart")).toBeInTheDocument();
    });
  });
  it("should render the time range selector with three options", (): void => {
    setupMocks();
    render(<UsagePage />);
    expect(screen.getByText("7 Days")).toBeInTheDocument();
    expect(screen.getByText("30 Days")).toBeInTheDocument();
    expect(screen.getByText("90 Days")).toBeInTheDocument();
  });
  it("should have 30 Days selected by default", (): void => {
    setupMocks();
    render(<UsagePage />);
    const button30d = screen.getByText("30 Days");
    expect(button30d).toHaveAttribute("aria-pressed", "true");
  });
  it("should change time range when a different option is clicked", async (): Promise<void> => {
    setupMocks();
    render(<UsagePage />);
    // Wait for initial load
    await waitFor((): void => {
      expect(screen.getByTestId("summary-cards")).toBeInTheDocument();
    });
    // Click 7 Days
    const button7d = screen.getByText("7 Days");
    fireEvent.click(button7d);
    expect(button7d).toHaveAttribute("aria-pressed", "true");
    expect(screen.getByText("30 Days")).toHaveAttribute("aria-pressed", "false");
  });
  it("should refetch data when time range changes", async (): Promise<void> => {
    setupMocks();
    render(<UsagePage />);
    // Wait for initial load (30d default)
    await waitFor((): void => {
      expect(screen.getByTestId("summary-cards")).toBeInTheDocument();
    });
    // Initial call was with "30d"
    expect(fetchUsageSummary).toHaveBeenCalledWith("30d");
    // Change to 7d
    fireEvent.click(screen.getByText("7 Days"));
    await waitFor((): void => {
      expect(fetchUsageSummary).toHaveBeenCalledWith("7d");
    });
  });
  it("should show empty state when no tasks exist", async (): Promise<void> => {
    setupMocks({ empty: true });
    render(<UsagePage />);
    await waitFor((): void => {
      expect(screen.getByTestId("empty-state")).toBeInTheDocument();
    });
    expect(screen.getByText("No usage data yet")).toBeInTheDocument();
  });
  it("should show error state on fetch failure", async (): Promise<void> => {
    setupMocks({ error: true });
    render(<UsagePage />);
    await waitFor((): void => {
      expect(screen.getByText("Network error")).toBeInTheDocument();
    });
    expect(screen.getByText("Try again")).toBeInTheDocument();
  });
  it("should retry loading when Try again button is clicked after error", async (): Promise<void> => {
    setupMocks({ error: true });
    render(<UsagePage />);
    await waitFor((): void => {
      expect(screen.getByText("Try again")).toBeInTheDocument();
    });
    // Now set up success mocks and click retry
    setupMocks();
    fireEvent.click(screen.getByText("Try again"));
    await waitFor((): void => {
      expect(screen.getByTestId("summary-cards")).toBeInTheDocument();
    });
  });
  it("should display chart section titles", async (): Promise<void> => {
    setupMocks();
    render(<UsagePage />);
    await waitFor((): void => {
      expect(screen.getByText("Token Usage Over Time")).toBeInTheDocument();
      expect(screen.getByText("Cost by Model")).toBeInTheDocument();
      expect(screen.getByText("Task Outcomes")).toBeInTheDocument();
    });
  });
  it("should render recharts components within chart containers", async (): Promise<void> => {
    setupMocks();
    render(<UsagePage />);
    await waitFor((): void => {
      expect(screen.getByTestId("recharts-line-chart")).toBeInTheDocument();
      expect(screen.getByTestId("recharts-bar-chart")).toBeInTheDocument();
      expect(screen.getByTestId("recharts-pie-chart")).toBeInTheDocument();
    });
  });
 });
--- a/apps/web/src/app/(authenticated)/usage/page.tsx
+++ b/apps/web/src/app/(authenticated)/usage/page.tsx
@@ -0,0 +1,430 @@
 "use client";
 import { useState, useEffect, useCallback } from "react";
 import type { ReactElement } from "react";
 import {
  LineChart,
  Line,
  BarChart,
  Bar,
  PieChart,
  Pie,
  XAxis,
  YAxis,
  CartesianGrid,
  Tooltip,
  ResponsiveContainer,
  Legend,
 } from "recharts";
 import { Card, CardHeader, CardContent, CardTitle, CardDescription } from "@/components/ui/card";
 import {
  fetchUsageSummary,
  fetchTokenUsage,
  fetchCostBreakdown,
  fetchTaskOutcomes,
 } from "@/lib/api/telemetry";
 import type {
  TimeRange,
  UsageSummary,
  TokenUsagePoint,
  CostBreakdownItem,
  TaskOutcomeItem,
 } from "@/lib/api/telemetry";
 // ─── Constants ───────────────────────────────────────────────────────
 const TIME_RANGES: { value: TimeRange; label: string }[] = [
  { value: "7d", label: "7 Days" },
  { value: "30d", label: "30 Days" },
  { value: "90d", label: "90 Days" },
 ];
 // Calm, PDA-friendly chart colors (no aggressive reds)
 const CHART_COLORS = {
  inputTokens: "#6366F1", // Indigo
  outputTokens: "#38BDF8", // Sky blue
  grid: "#E2E8F0", // Slate 200
  barFill: "#818CF8", // Indigo 400
 };
 // ─── Helpers ─────────────────────────────────────────────────────────
 function formatNumber(value: number): string {
  if (value >= 1_000_000) {
    return `${(value / 1_000_000).toFixed(1)}M`;
  }
  if (value >= 1_000) {
    return `${(value / 1_000).toFixed(1)}K`;
  }
  return value.toFixed(0);
 }
 function formatCurrency(value: number): string {
  return `$${value.toFixed(2)}`;
 }
 function formatPercent(value: number): string {
  return `${(value * 100).toFixed(1)}%`;
 }
 function formatDateLabel(dateStr: string): string {
  const date = new Date(dateStr + "T00:00:00");
  return date.toLocaleDateString("en-US", { month: "short", day: "numeric" });
 }
 /**
 * Map TaskOutcomeItem[] to recharts-compatible data with `fill` property.
 * This replaces deprecated Cell component (removed in Recharts 4.0).
 */
 function toFillData(
  outcomes: TaskOutcomeItem[]
 ): { outcome: string; count: number; fill: string }[] {
  return outcomes.map((item) => ({
    outcome: item.outcome,
    count: item.count,
    fill: item.color,
  }));
 }
 // ─── Sub-components ──────────────────────────────────────────────────
 function SummaryCard({
  title,
  value,
  subtitle,
 }: {
  title: string;
  value: string;
  subtitle?: string;
 }): ReactElement {
  return (
    <Card>
      <CardContent className="pt-6">
        <p className="text-sm font-medium text-gray-500">{title}</p>
        <p className="text-2xl font-bold text-gray-900 mt-1">{value}</p>
        {subtitle ? <p className="text-xs text-gray-400 mt-1">{subtitle}</p> : null}
      </CardContent>
    </Card>
  );
 }
 function LoadingSkeleton(): ReactElement {
  return (
    <div className="space-y-6" data-testid="loading-skeleton">
      {/* Summary cards skeleton */}
      <div className="grid grid-cols-1 sm:grid-cols-2 lg:grid-cols-4 gap-4">
        {Array.from({ length: 4 }).map((_, i) => (
          <Card key={i}>
            <CardContent className="pt-6">
              <div className="h-4 bg-gray-200 rounded w-24 animate-pulse" />
              <div className="h-8 bg-gray-200 rounded w-16 mt-2 animate-pulse" />
            </CardContent>
          </Card>
        ))}
      </div>
      {/* Chart skeletons */}
      <div className="grid grid-cols-1 lg:grid-cols-2 gap-6">
        {Array.from({ length: 3 }).map((_, i) => (
          <Card key={i} className={i === 0 ? "lg:col-span-2" : ""}>
            <CardHeader>
              <div className="h-6 bg-gray-200 rounded w-40 animate-pulse" />
            </CardHeader>
            <CardContent>
              <div className="h-64 bg-gray-100 rounded animate-pulse" />
            </CardContent>
          </Card>
        ))}
      </div>
    </div>
  );
 }
 function EmptyState(): ReactElement {
  return (
    <div
      className="flex flex-col items-center justify-center py-16 text-center"
      data-testid="empty-state"
    >
      <div className="text-4xl mb-4">📊</div>
      <h2 className="text-xl font-semibold text-gray-700 mb-2">No usage data yet</h2>
      <p className="text-gray-500 max-w-md">
        Once you start using AI-powered features, your token usage and cost data will appear here.
      </p>
    </div>
  );
 }
 // ─── Main Page Component ─────────────────────────────────────────────
 export default function UsagePage(): ReactElement {
  const [timeRange, setTimeRange] = useState<TimeRange>("30d");
  const [isLoading, setIsLoading] = useState(true);
  const [isEmpty, setIsEmpty] = useState(false);
  const [error, setError] = useState<string | null>(null);
  const [summary, setSummary] = useState<UsageSummary | null>(null);
  const [tokenUsage, setTokenUsage] = useState<TokenUsagePoint[]>([]);
  const [costBreakdown, setCostBreakdown] = useState<CostBreakdownItem[]>([]);
  const [taskOutcomes, setTaskOutcomes] = useState<TaskOutcomeItem[]>([]);
  const loadData = useCallback(async (range: TimeRange): Promise<void> => {
    setIsLoading(true);
    setError(null);
    try {
      const [summaryData, tokenData, costData, outcomeData] = await Promise.all([
        fetchUsageSummary(range),
        fetchTokenUsage(range),
        fetchCostBreakdown(range),
        fetchTaskOutcomes(range),
      ]);
      setSummary(summaryData);
      setTokenUsage(tokenData);
      setCostBreakdown(costData);
      setTaskOutcomes(outcomeData);
      // Check if there's any meaningful data
      setIsEmpty(summaryData.taskCount === 0);
    } catch (err) {
      setError(
        err instanceof Error
          ? err.message
          : "We had trouble loading usage data. Please try again when you're ready."
      );
    } finally {
      setIsLoading(false);
    }
  }, []);
  useEffect(() => {
    void loadData(timeRange);
  }, [timeRange, loadData]);
  function handleTimeRangeChange(range: TimeRange): void {
    setTimeRange(range);
  }
  return (
    <main className="container mx-auto px-4 py-8">
      {/* Header */}
      <div className="flex flex-col sm:flex-row sm:items-center sm:justify-between mb-8 gap-4">
        <div>
          <h1 className="text-3xl font-bold text-gray-900">Usage</h1>
          <p className="text-gray-600 mt-1">Token usage and cost overview</p>
        </div>
        {/* Time range selector */}
        <div className="flex gap-1 bg-gray-100 rounded-lg p-1" role="group" aria-label="Time range">
          {TIME_RANGES.map(({ value, label }) => (
            <button
              key={value}
              onClick={() => {
                handleTimeRangeChange(value);
              }}
              className={`px-4 py-2 text-sm font-medium rounded-md transition-colors ${
                timeRange === value
                  ? "bg-white text-gray-900 shadow-sm"
                  : "text-gray-600 hover:text-gray-900"
              }`}
              aria-pressed={timeRange === value}
            >
              {label}
            </button>
          ))}
        </div>
      </div>
      {/* Error state */}
      {error !== null ? (
        <div className="rounded-lg border border-amber-200 bg-amber-50 p-6 text-center">
          <p className="text-amber-800">{error}</p>
          <button
            onClick={() => void loadData(timeRange)}
            className="mt-4 rounded-md bg-amber-600 px-4 py-2 text-sm font-medium text-white hover:bg-amber-700 transition-colors"
          >
            Try again
          </button>
        </div>
      ) : isLoading ? (
        <LoadingSkeleton />
      ) : isEmpty ? (
        <EmptyState />
      ) : (
        <div className="space-y-6">
          {/* Summary Cards */}
          <div
            className="grid grid-cols-1 sm:grid-cols-2 lg:grid-cols-4 gap-4"
            data-testid="summary-cards"
          >
            <SummaryCard
              title="Total Tokens"
              value={summary ? formatNumber(summary.totalTokens) : "0"}
              subtitle="Input + Output"
            />
            <SummaryCard
              title="Estimated Cost"
              value={summary ? formatCurrency(summary.totalCost) : "$0.00"}
              subtitle="Based on provider pricing"
            />
            <SummaryCard
              title="Task Count"
              value={summary ? formatNumber(summary.taskCount) : "0"}
              subtitle="AI-assisted tasks"
            />
            <SummaryCard
              title="Quality Gate Pass Rate"
              value={summary ? formatPercent(summary.avgQualityGatePassRate) : "0%"}
              subtitle="Build, lint, test, typecheck"
            />
          </div>
          {/* Charts */}
          <div className="grid grid-cols-1 lg:grid-cols-2 gap-6">
            {/* Token Usage Over Time — Full width */}
            <Card className="lg:col-span-2">
              <CardHeader>
                <CardTitle className="text-lg">Token Usage Over Time</CardTitle>
                <CardDescription>Input and output tokens by day</CardDescription>
              </CardHeader>
              <CardContent>
                <div className="h-72" data-testid="token-usage-chart">
                  <ResponsiveContainer width="100%" height="100%">
                    <LineChart data={tokenUsage}>
                      <CartesianGrid strokeDasharray="3 3" stroke={CHART_COLORS.grid} />
                      <XAxis
                        dataKey="date"
                        tickFormatter={formatDateLabel}
                        tick={{ fontSize: 12, fill: "#64748B" }}
                        interval="preserveStartEnd"
                      />
                      <YAxis
                        tickFormatter={formatNumber}
                        tick={{ fontSize: 12, fill: "#64748B" }}
                        width={60}
                      />
                      <Tooltip
                        formatter={
                          ((value: number, name: string) => [
                            formatNumber(value),
                            name === "inputTokens" ? "Input Tokens" : "Output Tokens",
                          ]) as never
                        }
                        labelFormatter={((label: string) => formatDateLabel(label)) as never}
                        contentStyle={{
                          borderRadius: "8px",
                          border: "1px solid #E2E8F0",
                          boxShadow: "0 2px 8px rgba(0,0,0,0.08)",
                        }}
                      />
                      <Legend
                        formatter={(value: string) =>
                          value === "inputTokens" ? "Input Tokens" : "Output Tokens"
                        }
                      />
                      <Line
                        type="monotone"
                        dataKey="inputTokens"
                        stroke={CHART_COLORS.inputTokens}
                        strokeWidth={2}
                        dot={false}
                        activeDot={{ r: 4 }}
                      />
                      <Line
                        type="monotone"
                        dataKey="outputTokens"
                        stroke={CHART_COLORS.outputTokens}
                        strokeWidth={2}
                        dot={false}
                        activeDot={{ r: 4 }}
                      />
                    </LineChart>
                  </ResponsiveContainer>
                </div>
              </CardContent>
            </Card>
            {/* Cost Breakdown by Model */}
            <Card>
              <CardHeader>
                <CardTitle className="text-lg">Cost by Model</CardTitle>
                <CardDescription>Estimated cost breakdown</CardDescription>
              </CardHeader>
              <CardContent>
                <div className="h-72" data-testid="cost-breakdown-chart">
                  <ResponsiveContainer width="100%" height="100%">
                    <BarChart data={costBreakdown} layout="vertical">
                      <CartesianGrid strokeDasharray="3 3" stroke={CHART_COLORS.grid} />
                      <XAxis
                        type="number"
                        tickFormatter={(v: number) => formatCurrency(v)}
                        tick={{ fontSize: 12, fill: "#64748B" }}
                      />
                      <YAxis
                        type="category"
                        dataKey="model"
                        tick={{ fontSize: 11, fill: "#64748B" }}
                        width={140}
                      />
                      <Tooltip
                        formatter={((value: number) => [formatCurrency(value), "Cost"]) as never}
                        contentStyle={{
                          borderRadius: "8px",
                          border: "1px solid #E2E8F0",
                          boxShadow: "0 2px 8px rgba(0,0,0,0.08)",
                        }}
                      />
                      <Bar dataKey="cost" fill={CHART_COLORS.barFill} radius={[0, 4, 4, 0]} />
                    </BarChart>
                  </ResponsiveContainer>
                </div>
              </CardContent>
            </Card>
            {/* Task Outcomes */}
            <Card>
              <CardHeader>
                <CardTitle className="text-lg">Task Outcomes</CardTitle>
                <CardDescription>Distribution of task completion results</CardDescription>
              </CardHeader>
              <CardContent>
                <div
                  className="h-72 flex items-center justify-center"
                  data-testid="task-outcomes-chart"
                >
                  <ResponsiveContainer width="100%" height="100%">
                    <PieChart>
                      <Pie
                        data={toFillData(taskOutcomes)}
                        cx="50%"
                        cy="50%"
                        innerRadius={60}
                        outerRadius={100}
                        paddingAngle={2}
                        dataKey="count"
                        nameKey="outcome"
                        label={
                          ((props: { outcome?: string; count?: number }) =>
                            `${props.outcome ?? ""}: ${String(props.count ?? 0)}`) as never
                        }
                      />
                      <Tooltip
                        formatter={((value: number, name: string) => [value, name]) as never}
                        contentStyle={{
                          borderRadius: "8px",
                          border: "1px solid #E2E8F0",
                          boxShadow: "0 2px 8px rgba(0,0,0,0.08)",
                        }}
                      />
                      <Legend />
                    </PieChart>
                  </ResponsiveContainer>
                </div>
              </CardContent>
            </Card>
          </div>
        </div>
      )}
    </main>
  );
 }
--- a/apps/web/src/components/layout/Navigation.tsx
+++ b/apps/web/src/components/layout/Navigation.tsx
@@ -16,6 +16,7 @@ export function Navigation(): React.JSX.Element {
    { href: "/tasks", label: "Tasks" },
    { href: "/calendar", label: "Calendar" },
    { href: "/knowledge", label: "Knowledge" },
    { href: "/usage", label: "Usage" },
  ];
  // Global keyboard shortcut for search (Cmd+K or Ctrl+K)
--- a/apps/web/src/lib/api/index.ts
+++ b/apps/web/src/lib/api/index.ts
@@ -12,3 +12,4 @@ export * from "./knowledge";
 export * from "./domains";
 export * from "./teams";
 export * from "./personalities";
 export * from "./telemetry";
--- a/apps/web/src/lib/api/telemetry.ts
+++ b/apps/web/src/lib/api/telemetry.ts
@@ -0,0 +1,187 @@
 /**
 * Telemetry API Client
 * Handles telemetry data fetching for the usage dashboard.
 *
 * NOTE: Currently returns mock/placeholder data since the telemetry API
 * aggregation endpoints don't exist yet. The important thing is the UI structure.
 * When the backend endpoints are ready, replace mock calls with real apiGet() calls.
 */
 import { apiGet, type ApiResponse } from "./client";
 // ─── Types ───────────────────────────────────────────────────────────
 export type TimeRange = "7d" | "30d" | "90d";
 export interface UsageSummary {
  totalTokens: number;
  totalCost: number;
  taskCount: number;
  avgQualityGatePassRate: number;
 }
 export interface TokenUsagePoint {
  date: string;
  inputTokens: number;
  outputTokens: number;
  totalTokens: number;
 }
 export interface CostBreakdownItem {
  model: string;
  provider: string;
  cost: number;
  taskCount: number;
 }
 export interface TaskOutcomeItem {
  outcome: string;
  count: number;
  color: string;
 }
 export interface EstimateParams {
  taskType: string;
  model: string;
  provider: string;
  complexity: string;
 }
 export interface EstimateResponse {
  prediction: {
    input_tokens: { median: number; p75: number; p90: number };
    output_tokens: { median: number; p75: number; p90: number };
    cost_usd_micros: Record<string, number>;
    quality: { gate_pass_rate: number; success_rate: number };
  } | null;
  metadata: {
    sample_size: number;
    confidence: "none" | "low" | "medium" | "high";
  };
 }
 // ─── Mock Data Generators ────────────────────────────────────────────
 function generateDateRange(range: TimeRange): string[] {
  const days = range === "7d" ? 7 : range === "30d" ? 30 : 90;
  const dates: string[] = [];
  const now = new Date();
  for (let i = days - 1; i >= 0; i--) {
    const d = new Date(now);
    d.setDate(d.getDate() - i);
    dates.push(d.toISOString().split("T")[0] ?? "");
  }
  return dates;
 }
 function generateMockTokenUsage(range: TimeRange): TokenUsagePoint[] {
  const dates = generateDateRange(range);
  return dates.map((date) => {
    const baseInput = 8000 + Math.floor(Math.random() * 12000);
    const baseOutput = 3000 + Math.floor(Math.random() * 7000);
    return {
      date,
      inputTokens: baseInput,
      outputTokens: baseOutput,
      totalTokens: baseInput + baseOutput,
    };
  });
 }
 function generateMockSummary(range: TimeRange): UsageSummary {
  const multiplier = range === "7d" ? 1 : range === "30d" ? 4 : 12;
  return {
    totalTokens: 245_800 * multiplier,
    totalCost: 3.42 * multiplier,
    taskCount: 47 * multiplier,
    avgQualityGatePassRate: 0.87,
  };
 }
 function generateMockCostBreakdown(): CostBreakdownItem[] {
  return [
    { model: "claude-sonnet-4-5", provider: "anthropic", cost: 18.5, taskCount: 124 },
    { model: "gpt-4o", provider: "openai", cost: 12.3, taskCount: 89 },
    { model: "claude-haiku-3.5", provider: "anthropic", cost: 4.2, taskCount: 156 },
    { model: "llama-3.3-70b", provider: "ollama", cost: 0, taskCount: 67 },
    { model: "gemini-2.0-flash", provider: "google", cost: 2.8, taskCount: 42 },
  ];
 }
 // PDA-friendly colors: calm, no aggressive reds
 function generateMockTaskOutcomes(): TaskOutcomeItem[] {
  return [
    { outcome: "Success", count: 312, color: "#6EBF8B" },
    { outcome: "Partial", count: 48, color: "#F5C862" },
    { outcome: "Timeout", count: 18, color: "#94A3B8" },
    { outcome: "Incomplete", count: 22, color: "#C4A5DE" },
  ];
 }
 // ─── API Functions ───────────────────────────────────────────────────
 /**
 * Fetch usage summary data (total tokens, cost, task count, quality rate)
 */
 export async function fetchUsageSummary(timeRange: TimeRange): Promise<UsageSummary> {
  // TODO: Replace with real API call when backend aggregation endpoints are ready
  // const response = await apiGet<ApiResponse<UsageSummary>>(`/api/telemetry/summary?range=${timeRange}`);
  // return response.data;
  void apiGet; // suppress unused import warning in the meantime
  await new Promise((resolve) => setTimeout(resolve, 200));
  return generateMockSummary(timeRange);
 }
 /**
 * Fetch token usage time series for charts
 */
 export async function fetchTokenUsage(timeRange: TimeRange): Promise<TokenUsagePoint[]> {
  // TODO: Replace with real API call
  // const response = await apiGet<ApiResponse<TokenUsagePoint[]>>(`/api/telemetry/tokens?range=${timeRange}`);
  // return response.data;
  await new Promise((resolve) => setTimeout(resolve, 250));
  return generateMockTokenUsage(timeRange);
 }
 /**
 * Fetch cost breakdown by model
 */
 export async function fetchCostBreakdown(timeRange: TimeRange): Promise<CostBreakdownItem[]> {
  // TODO: Replace with real API call
  // const response = await apiGet<ApiResponse<CostBreakdownItem[]>>(`/api/telemetry/costs?range=${timeRange}`);
  // return response.data;
  await new Promise((resolve) => setTimeout(resolve, 200));
  void timeRange;
  return generateMockCostBreakdown();
 }
 /**
 * Fetch task outcome distribution
 */
 export async function fetchTaskOutcomes(timeRange: TimeRange): Promise<TaskOutcomeItem[]> {
  // TODO: Replace with real API call
  // const response = await apiGet<ApiResponse<TaskOutcomeItem[]>>(`/api/telemetry/outcomes?range=${timeRange}`);
  // return response.data;
  await new Promise((resolve) => setTimeout(resolve, 150));
  void timeRange;
  return generateMockTaskOutcomes();
 }
 /**
 * Fetch cost/token estimate for a given task configuration.
 * Uses the real GET /api/telemetry/estimate endpoint from TEL-006.
 */
 export async function fetchEstimate(params: EstimateParams): Promise<EstimateResponse> {
  const query = new URLSearchParams({
    taskType: params.taskType,
    model: params.model,
    provider: params.provider,
    complexity: params.complexity,
  }).toString();
  const response = await apiGet<ApiResponse<EstimateResponse>>(`/api/telemetry/estimate?${query}`);
  return response.data;
 }
--- a/docker-compose.swarm.portainer.yml
+++ b/docker-compose.swarm.portainer.yml
@@ -255,6 +255,12 @@ services:
      COORDINATOR_POLL_INTERVAL: ${COORDINATOR_POLL_INTERVAL:-5.0}
      COORDINATOR_MAX_CONCURRENT_AGENTS: ${COORDINATOR_MAX_CONCURRENT_AGENTS:-10}
      COORDINATOR_ENABLED: ${COORDINATOR_ENABLED:-true}
      # Telemetry (task completion tracking & predictions)
      MOSAIC_TELEMETRY_ENABLED: ${MOSAIC_TELEMETRY_ENABLED:-false}
      MOSAIC_TELEMETRY_SERVER_URL: ${MOSAIC_TELEMETRY_SERVER_URL:-https://tel-api.mosaicstack.dev}
      MOSAIC_TELEMETRY_API_KEY: ${MOSAIC_TELEMETRY_API_KEY:-}
      MOSAIC_TELEMETRY_INSTANCE_ID: ${MOSAIC_TELEMETRY_INSTANCE_ID:-}
      MOSAIC_TELEMETRY_DRY_RUN: ${MOSAIC_TELEMETRY_DRY_RUN:-false}
    healthcheck:
      test:
        [
@@ -295,6 +301,12 @@ services:
      OLLAMA_ENDPOINT: ${OLLAMA_ENDPOINT:-http://ollama:11434}
      OPENBAO_ADDR: ${OPENBAO_ADDR:-http://openbao:8200}
      ENCRYPTION_KEY: ${ENCRYPTION_KEY}
      # Telemetry (task completion tracking & predictions)
      MOSAIC_TELEMETRY_ENABLED: ${MOSAIC_TELEMETRY_ENABLED:-false}
      MOSAIC_TELEMETRY_SERVER_URL: ${MOSAIC_TELEMETRY_SERVER_URL:-https://tel-api.mosaicstack.dev}
      MOSAIC_TELEMETRY_API_KEY: ${MOSAIC_TELEMETRY_API_KEY:-}
      MOSAIC_TELEMETRY_INSTANCE_ID: ${MOSAIC_TELEMETRY_INSTANCE_ID:-}
      MOSAIC_TELEMETRY_DRY_RUN: ${MOSAIC_TELEMETRY_DRY_RUN:-false}
    healthcheck:
      test:
        [
--- a/docker-compose.swarm.yml
+++ b/docker-compose.swarm.yml
@@ -283,6 +283,12 @@ services:
      COORDINATOR_POLL_INTERVAL: ${COORDINATOR_POLL_INTERVAL:-5.0}
      COORDINATOR_MAX_CONCURRENT_AGENTS: ${COORDINATOR_MAX_CONCURRENT_AGENTS:-10}
      COORDINATOR_ENABLED: ${COORDINATOR_ENABLED:-true}
      # Telemetry (task completion tracking & predictions)
      MOSAIC_TELEMETRY_ENABLED: ${MOSAIC_TELEMETRY_ENABLED:-false}
      MOSAIC_TELEMETRY_SERVER_URL: ${MOSAIC_TELEMETRY_SERVER_URL:-https://tel-api.mosaicstack.dev}
      MOSAIC_TELEMETRY_API_KEY: ${MOSAIC_TELEMETRY_API_KEY:-}
      MOSAIC_TELEMETRY_INSTANCE_ID: ${MOSAIC_TELEMETRY_INSTANCE_ID:-}
      MOSAIC_TELEMETRY_DRY_RUN: ${MOSAIC_TELEMETRY_DRY_RUN:-false}
    healthcheck:
      test:
        [
@@ -324,6 +330,12 @@ services:
      OPENBAO_ADDR: ${OPENBAO_ADDR:-http://openbao:8200}
      ORCHESTRATOR_URL: ${ORCHESTRATOR_URL:-http://orchestrator:3001}
      ENCRYPTION_KEY: ${ENCRYPTION_KEY}
      # Telemetry (task completion tracking & predictions)
      MOSAIC_TELEMETRY_ENABLED: ${MOSAIC_TELEMETRY_ENABLED:-false}
      MOSAIC_TELEMETRY_SERVER_URL: ${MOSAIC_TELEMETRY_SERVER_URL:-https://tel-api.mosaicstack.dev}
      MOSAIC_TELEMETRY_API_KEY: ${MOSAIC_TELEMETRY_API_KEY:-}
      MOSAIC_TELEMETRY_INSTANCE_ID: ${MOSAIC_TELEMETRY_INSTANCE_ID:-}
      MOSAIC_TELEMETRY_DRY_RUN: ${MOSAIC_TELEMETRY_DRY_RUN:-false}
    healthcheck:
      test:
        [
--- a/docker-compose.yml
+++ b/docker-compose.yml
@@ -377,6 +377,12 @@ services:
      OLLAMA_ENDPOINT: ${OLLAMA_ENDPOINT:-http://ollama:11434}
      # OpenBao (optional)
      OPENBAO_ADDR: ${OPENBAO_ADDR:-http://openbao:8200}
      # Telemetry (task completion tracking & predictions)
      MOSAIC_TELEMETRY_ENABLED: ${MOSAIC_TELEMETRY_ENABLED:-false}
      MOSAIC_TELEMETRY_SERVER_URL: ${MOSAIC_TELEMETRY_SERVER_URL:-http://telemetry-api:8000}
      MOSAIC_TELEMETRY_API_KEY: ${MOSAIC_TELEMETRY_API_KEY:-}
      MOSAIC_TELEMETRY_INSTANCE_ID: ${MOSAIC_TELEMETRY_INSTANCE_ID:-}
      MOSAIC_TELEMETRY_DRY_RUN: ${MOSAIC_TELEMETRY_DRY_RUN:-false}
    volumes:
      - openbao_init:/openbao/init:ro
    ports:
--- a/docker/docker-compose.matrix.yml
+++ b/docker/docker-compose.matrix.yml
@@ -0,0 +1,129 @@
 # ==============================================
 # Matrix Dev Environment (Synapse + Element Web)
 # ==============================================
 #
 # Development-only overlay for testing the Matrix bridge locally.
 # NOT for production — use docker-compose.sample.matrix.yml for production.
 #
 # Usage:
 #   docker compose -f docker/docker-compose.yml -f docker/docker-compose.matrix.yml up -d
 #
 # Or with Makefile:
 #   make matrix-up
 #
 # This overlay:
 #   - Adds Synapse homeserver (localhost:8008) using shared PostgreSQL
 #   - Adds Element Web client (localhost:8501)
 #   - Creates a separate 'synapse' database in the shared PostgreSQL instance
 #   - Enables open registration for easy dev testing
 #
 # After first startup, create the bot account:
 #   docker/matrix/scripts/setup-bot.sh
 #
 # ==============================================
 services:
  # ======================
  # Synapse Database Init
  # ======================
  # Creates the 'synapse' database and user in the shared PostgreSQL instance.
  # Runs once and exits — idempotent, safe to run repeatedly.
  synapse-db-init:
    image: postgres:17-alpine
    container_name: mosaic-synapse-db-init
    restart: "no"
    environment:
      PGHOST: postgres
      PGPORT: 5432
      PGUSER: ${POSTGRES_USER:-mosaic}
      PGPASSWORD: ${POSTGRES_PASSWORD:-mosaic_dev_password}
      SYNAPSE_DB: ${SYNAPSE_POSTGRES_DB:-synapse}
      SYNAPSE_USER: ${SYNAPSE_POSTGRES_USER:-synapse}
      SYNAPSE_PASSWORD: ${SYNAPSE_POSTGRES_PASSWORD:-synapse_dev_password}
    entrypoint: ["sh", "-c"]
    command:
      - |
        until pg_isready -h postgres -p 5432 -U $${PGUSER}; do
          echo "Waiting for PostgreSQL..."
          sleep 2
        done
        echo "PostgreSQL is ready. Creating Synapse database and user..."
        psql -h postgres -U $${PGUSER} -tc "SELECT 1 FROM pg_roles WHERE rolname='$${SYNAPSE_USER}'" | grep -q 1 || \
          psql -h postgres -U $${PGUSER} -c "CREATE USER $${SYNAPSE_USER} WITH PASSWORD '$${SYNAPSE_PASSWORD}';"
        psql -h postgres -U $${PGUSER} -tc "SELECT 1 FROM pg_database WHERE datname='$${SYNAPSE_DB}'" | grep -q 1 || \
          psql -h postgres -U $${PGUSER} -c "CREATE DATABASE $${SYNAPSE_DB} OWNER $${SYNAPSE_USER} ENCODING 'UTF8' LC_COLLATE='C' LC_CTYPE='C' TEMPLATE template0;"
        echo "Synapse database ready: $${SYNAPSE_DB}"
    depends_on:
      postgres:
        condition: service_healthy
    networks:
      - mosaic-network
  # ======================
  # Synapse (Matrix Homeserver)
  # ======================
  synapse:
    image: matrixdotorg/synapse:latest
    container_name: mosaic-synapse
    restart: unless-stopped
    environment:
      SYNAPSE_CONFIG_DIR: /data
      SYNAPSE_CONFIG_PATH: /data/homeserver.yaml
    ports:
      - "${SYNAPSE_CLIENT_PORT:-8008}:8008"
      - "${SYNAPSE_FEDERATION_PORT:-8448}:8448"
    volumes:
      - ./matrix/synapse/homeserver.yaml:/data/homeserver.yaml:ro
      - synapse_data:/data/media_store
      - synapse_signing_key:/data/keys
    depends_on:
      postgres:
        condition: service_healthy
      synapse-db-init:
        condition: service_completed_successfully
    healthcheck:
      test: ["CMD-SHELL", "curl -fSs http://localhost:8008/health || exit 1"]
      interval: 15s
      timeout: 5s
      retries: 5
      start_period: 30s
    networks:
      - mosaic-network
    labels:
      com.mosaic.service: "matrix-synapse"
      com.mosaic.description: "Matrix homeserver (dev)"
  # ======================
  # Element Web (Matrix Client)
  # ======================
  element-web:
    image: vectorim/element-web:latest
    container_name: mosaic-element-web
    restart: unless-stopped
    ports:
      - "${ELEMENT_PORT:-8501}:80"
    volumes:
      - ./matrix/element/config.json:/app/config.json:ro
    depends_on:
      synapse:
        condition: service_healthy
    healthcheck:
      test: ["CMD-SHELL", "wget --no-verbose --tries=1 --spider http://localhost:80 || exit 1"]
      interval: 30s
      timeout: 5s
      retries: 3
      start_period: 10s
    networks:
      - mosaic-network
    labels:
      com.mosaic.service: "matrix-element"
      com.mosaic.description: "Element Web client (dev)"
 volumes:
  synapse_data:
    name: mosaic-synapse-data
  synapse_signing_key:
    name: mosaic-synapse-signing-key
--- a/docker/docker-compose.yml
+++ b/docker/docker-compose.yml
@@ -51,6 +51,12 @@ services:
      LOG_LEVEL: ${LOG_LEVEL:-info}
      HOST: 0.0.0.0
      PORT: 8000
      # Telemetry (task completion tracking & predictions)
      MOSAIC_TELEMETRY_ENABLED: ${MOSAIC_TELEMETRY_ENABLED:-false}
      MOSAIC_TELEMETRY_SERVER_URL: ${MOSAIC_TELEMETRY_SERVER_URL:-http://telemetry-api:8000}
      MOSAIC_TELEMETRY_API_KEY: ${MOSAIC_TELEMETRY_API_KEY:-}
      MOSAIC_TELEMETRY_INSTANCE_ID: ${MOSAIC_TELEMETRY_INSTANCE_ID:-}
      MOSAIC_TELEMETRY_DRY_RUN: ${MOSAIC_TELEMETRY_DRY_RUN:-false}
    ports:
      - "8000:8000"
    healthcheck:
@@ -122,6 +128,29 @@ services:
      com.mosaic.service: "secrets-init"
      com.mosaic.description: "OpenBao auto-initialization sidecar"
  # ======================
  # Telemetry API (Optional - for local development)
  # ======================
  # Uncomment to run the telemetry API locally for self-contained development.
  # For production, use an external telemetry API URL instead.
  # telemetry-api:
  #   image: git.mosaicstack.dev/mosaic/telemetry-api:latest
  #   container_name: mosaic-telemetry-api
  #   restart: unless-stopped
  #   environment:
  #     HOST: 0.0.0.0
  #     PORT: 8000
  #   ports:
  #     - "8001:8000"
  #   healthcheck:
  #     test: ["CMD", "curl", "-f", "http://localhost:8000/health"]
  #     interval: 30s
  #     timeout: 10s
  #     retries: 3
  #     start_period: 10s
  #   networks:
  #     - mosaic-network
 volumes:
  postgres_data:
    name: mosaic-postgres-data
--- a/docker/matrix/element/config.json
+++ b/docker/matrix/element/config.json
@@ -0,0 +1,30 @@
 {
  "default_server_config": {
    "m.homeserver": {
      "base_url": "http://localhost:8008",
      "server_name": "localhost"
    }
  },
  "brand": "Mosaic Stack Dev",
  "default_theme": "dark",
  "room_directory": {
    "servers": ["localhost"]
  },
  "features": {
    "feature_video_rooms": false,
    "feature_group_calls": false
  },
  "show_labs_settings": true,
  "piwik": false,
  "posthog": {
    "enabled": false
  },
  "privacy_policy_url": null,
  "terms_and_conditions_links": [],
  "setting_defaults": {
    "breadcrumbs": true,
    "custom_themes": []
  },
  "disable_guests": true,
  "disable_3pid_login": true
 }
--- a/docker/matrix/scripts/setup-bot.sh
+++ b/docker/matrix/scripts/setup-bot.sh
@@ -0,0 +1,203 @@
 #!/usr/bin/env bash
 # ==============================================
 # Matrix Bot Account Setup Script
 # ==============================================
 #
 # Creates the Mosaic bot user on the local Synapse instance and retrieves
 # an access token. Idempotent — safe to run multiple times.
 #
 # Usage:
 #   docker/matrix/scripts/setup-bot.sh
 #   docker/matrix/scripts/setup-bot.sh --username custom-bot --password custom-pass
 #
 # Prerequisites:
 #   - Synapse must be running (docker compose -f ... up synapse)
 #   - Synapse must be healthy (check with: curl http://localhost:8008/health)
 #
 # Output:
 #   Prints the environment variables needed for MatrixService configuration.
 #
 # ==============================================
 set -euo pipefail
 # Defaults
 SYNAPSE_URL="${SYNAPSE_URL:-http://localhost:8008}"
 BOT_USERNAME="${BOT_USERNAME:-mosaic-bot}"
 BOT_PASSWORD="${BOT_PASSWORD:-mosaic-bot-dev-password}"
 BOT_DISPLAY_NAME="${BOT_DISPLAY_NAME:-Mosaic Bot}"
 ADMIN_USERNAME="${ADMIN_USERNAME:-admin}"
 ADMIN_PASSWORD="${ADMIN_PASSWORD:-admin-dev-password}"
 # Parse arguments
 while [[ $# -gt 0 ]]; do
  case $1 in
    --username) BOT_USERNAME="$2"; shift 2 ;;
    --password) BOT_PASSWORD="$2"; shift 2 ;;
    --synapse-url) SYNAPSE_URL="$2"; shift 2 ;;
    --admin-username) ADMIN_USERNAME="$2"; shift 2 ;;
    --admin-password) ADMIN_PASSWORD="$2"; shift 2 ;;
    --help|-h)
      echo "Usage: $0 [OPTIONS]"
      echo ""
      echo "Options:"
      echo "  --username NAME        Bot username (default: mosaic-bot)"
      echo "  --password PASS        Bot password (default: mosaic-bot-dev-password)"
      echo "  --synapse-url URL      Synapse URL (default: http://localhost:8008)"
      echo "  --admin-username NAME  Admin username (default: admin)"
      echo "  --admin-password PASS  Admin password (default: admin-dev-password)"
      echo "  --help, -h             Show this help"
      exit 0
      ;;
    *) echo "Unknown option: $1"; exit 1 ;;
  esac
 done
 echo "=== Mosaic Stack — Matrix Bot Setup ==="
 echo ""
 echo "Synapse URL: ${SYNAPSE_URL}"
 echo "Bot username: ${BOT_USERNAME}"
 echo ""
 # Wait for Synapse to be ready
 echo "Checking Synapse health..."
 for i in $(seq 1 30); do
  if curl -fsSo /dev/null "${SYNAPSE_URL}/health" 2>/dev/null; then
    echo "Synapse is healthy."
    break
  fi
  if [ "$i" -eq 30 ]; then
    echo "ERROR: Synapse is not responding at ${SYNAPSE_URL}/health after 30 attempts."
    echo "Make sure Synapse is running:"
    echo "  docker compose -f docker/docker-compose.yml -f docker/docker-compose.matrix.yml up -d"
    exit 1
  fi
  echo "  Waiting for Synapse... (attempt ${i}/30)"
  sleep 2
 done
 echo ""
 # Step 1: Register admin account (if not exists)
 echo "Step 1: Registering admin account '${ADMIN_USERNAME}'..."
 ADMIN_REGISTER_RESPONSE=$(curl -sS -X POST "${SYNAPSE_URL}/_synapse/admin/v1/register" \
  -H "Content-Type: application/json" \
  -d "{}" 2>/dev/null || true)
 NONCE=$(echo "${ADMIN_REGISTER_RESPONSE}" | python3 -c "import sys,json; print(json.load(sys.stdin).get('nonce',''))" 2>/dev/null || true)
 if [ -n "${NONCE}" ]; then
  # Generate HMAC for admin registration using the nonce
  # For dev, we use register_new_matrix_user via docker exec instead
  echo "  Using docker exec to register admin via Synapse CLI..."
  docker exec mosaic-synapse register_new_matrix_user \
    -u "${ADMIN_USERNAME}" \
    -p "${ADMIN_PASSWORD}" \
    -a \
    -c /data/homeserver.yaml \
    http://localhost:8008 2>/dev/null && echo "  Admin account created." || echo "  Admin account already exists (or registration failed — continuing)."
 else
  echo "  Attempting registration via docker exec..."
  docker exec mosaic-synapse register_new_matrix_user \
    -u "${ADMIN_USERNAME}" \
    -p "${ADMIN_PASSWORD}" \
    -a \
    -c /data/homeserver.yaml \
    http://localhost:8008 2>/dev/null && echo "  Admin account created." || echo "  Admin account already exists (or registration failed — continuing)."
 fi
 echo ""
 # Step 2: Get admin access token
 echo "Step 2: Obtaining admin access token..."
 ADMIN_LOGIN_RESPONSE=$(curl -sS -X POST "${SYNAPSE_URL}/_matrix/client/v3/login" \
  -H "Content-Type: application/json" \
  -d "$(jq -n \
    --arg user "$ADMIN_USERNAME" \
    --arg pw "$ADMIN_PASSWORD" \
    '{type: "m.login.password", identifier: {type: "m.id.user", user: $user}, password: $pw}')" \
  2>/dev/null)
 ADMIN_TOKEN=$(echo "${ADMIN_LOGIN_RESPONSE}" | python3 -c "import sys,json; print(json.load(sys.stdin).get('access_token',''))" 2>/dev/null || true)
 if [ -z "${ADMIN_TOKEN}" ]; then
  echo "ERROR: Could not obtain admin access token."
  echo "Response: ${ADMIN_LOGIN_RESPONSE}"
  echo ""
  echo "Try registering the admin account manually:"
  echo "  docker exec -it mosaic-synapse register_new_matrix_user -u ${ADMIN_USERNAME} -p ${ADMIN_PASSWORD} -a -c /data/homeserver.yaml http://localhost:8008"
  exit 1
 fi
 echo "  Admin token obtained."
 echo ""
 # Step 3: Register bot account via admin API (idempotent)
 echo "Step 3: Registering bot account '${BOT_USERNAME}'..."
 BOT_REGISTER_RESPONSE=$(curl -sS -X PUT "${SYNAPSE_URL}/_synapse/admin/v2/users/@${BOT_USERNAME}:localhost" \
  -H "Authorization: Bearer ${ADMIN_TOKEN}" \
  -H "Content-Type: application/json" \
  -d "$(jq -n \
    --arg pw "$BOT_PASSWORD" \
    --arg dn "$BOT_DISPLAY_NAME" \
    '{password: $pw, displayname: $dn, admin: false, deactivated: false}')" \
  2>/dev/null)
 BOT_EXISTS=$(echo "${BOT_REGISTER_RESPONSE}" | python3 -c "import sys,json; d=json.load(sys.stdin); print('yes' if d.get('name') else 'no')" 2>/dev/null || echo "no")
 if [ "${BOT_EXISTS}" = "yes" ]; then
  echo "  Bot account '@${BOT_USERNAME}:localhost' is ready."
 else
  echo "  WARNING: Bot registration response unexpected: ${BOT_REGISTER_RESPONSE}"
  echo "  Continuing anyway — bot may already exist."
 fi
 echo ""
 # Step 4: Get bot access token
 echo "Step 4: Obtaining bot access token..."
 BOT_LOGIN_RESPONSE=$(curl -sS -X POST "${SYNAPSE_URL}/_matrix/client/v3/login" \
  -H "Content-Type: application/json" \
  -d "$(jq -n \
    --arg user "$BOT_USERNAME" \
    --arg pw "$BOT_PASSWORD" \
    '{type: "m.login.password", identifier: {type: "m.id.user", user: $user}, password: $pw}')" \
  2>/dev/null)
 BOT_TOKEN=$(echo "${BOT_LOGIN_RESPONSE}" | python3 -c "import sys,json; print(json.load(sys.stdin).get('access_token',''))" 2>/dev/null || true)
 if [ -z "${BOT_TOKEN}" ]; then
  echo "ERROR: Could not obtain bot access token."
  echo "Response: ${BOT_LOGIN_RESPONSE}"
  exit 1
 fi
 echo "  Bot token obtained."
 echo ""
 # Step 5: Output configuration
 echo "============================================"
 echo "  Matrix Bot Setup Complete"
 echo "============================================"
 echo ""
 echo "Add the following to your .env file:"
 echo ""
 echo "  # Matrix Bridge Configuration"
 echo "  MATRIX_HOMESERVER_URL=http://localhost:8008"
 echo "  MATRIX_ACCESS_TOKEN=${BOT_TOKEN}"
 echo "  MATRIX_BOT_USER_ID=@${BOT_USERNAME}:localhost"
 echo "  MATRIX_SERVER_NAME=localhost"
 echo ""
 echo "Or, if running the API inside Docker (same compose network):"
 echo ""
 echo "  MATRIX_HOMESERVER_URL=http://synapse:8008"
 echo "  MATRIX_ACCESS_TOKEN=${BOT_TOKEN}"
 echo "  MATRIX_BOT_USER_ID=@${BOT_USERNAME}:localhost"
 echo "  MATRIX_SERVER_NAME=localhost"
 echo ""
 echo "Element Web is available at: http://localhost:8501"
 echo "  Login with any registered user to test messaging."
 echo ""
 echo "Admin account: ${ADMIN_USERNAME} / ${ADMIN_PASSWORD}"
 echo "Bot account:   ${BOT_USERNAME} / ${BOT_PASSWORD}"
 echo "============================================"
--- a/docker/matrix/synapse/homeserver.yaml
+++ b/docker/matrix/synapse/homeserver.yaml
@@ -0,0 +1,131 @@
 # ==============================================
 # Synapse Homeserver Configuration — Development Only
 # ==============================================
 #
 # This config is for LOCAL DEVELOPMENT with the Mosaic Stack docker-compose overlay.
 # Do NOT use this in production. See docker-compose.sample.matrix.yml for production.
 #
 # Server name is set to 'localhost' — this is permanent and cannot be changed
 # after the database has been initialized.
 #
 # ==============================================
 server_name: "localhost"
 pid_file: /data/homeserver.pid
 public_baseurl: "http://localhost:8008/"
 # ======================
 # Network Listeners
 # ======================
 listeners:
  # Client API (used by Element Web, Mosaic bridge, etc.)
  - port: 8008
    tls: false
    type: http
    x_forwarded: true
    bind_addresses: ["0.0.0.0"]
    resources:
      - names: [client, federation]
        compress: false
 # ======================
 # Database (Shared PostgreSQL)
 # ======================
 database:
  name: psycopg2
  txn_limit: 10000
  args:
    user: "synapse"
    password: "synapse_dev_password"
    database: "synapse"
    host: "postgres"
    port: 5432
    cp_min: 5
    cp_max: 10
 # ======================
 # Media Storage
 # ======================
 media_store_path: /data/media_store
 max_upload_size: 50M
 url_preview_enabled: true
 url_preview_ip_range_blacklist:
  - "127.0.0.0/8"
  - "10.0.0.0/8"
  - "172.16.0.0/12"
  - "192.168.0.0/16"
  - "100.64.0.0/10"
  - "192.0.0.0/24"
  - "169.254.0.0/16"
  - "198.18.0.0/15"
  - "::1/128"
  - "fe80::/10"
  - "fc00::/7"
  - "2001:db8::/32"
  - "ff00::/8"
  - "fec0::/10"
 # ======================
 # Registration (Dev Only)
 # ======================
 enable_registration: true
 enable_registration_without_verification: true
 # ======================
 # Signing Keys
 # ======================
 # Auto-generated on first startup and persisted in the signing_key volume
 signing_key_path: "/data/keys/localhost.signing.key"
 # Suppress warning about trusted key servers in dev
 suppress_key_server_warning: true
 trusted_key_servers: []
 # ======================
 # Room Configuration
 # ======================
 enable_room_list_search: true
 allow_public_rooms_over_federation: false
 # ======================
 # Rate Limiting (Relaxed for Dev)
 # ======================
 rc_message:
  per_second: 100
  burst_count: 200
 rc_registration:
  per_second: 10
  burst_count: 50
 rc_login:
  address:
    per_second: 10
    burst_count: 50
  account:
    per_second: 10
    burst_count: 50
 # ======================
 # Logging
 # ======================
 log_config: "/data/localhost.log.config"
 # Inline log config — write to stdout for docker logs
 # Synapse falls back to a basic console logger if the log_config file is missing,
 # so we leave log_config pointing to a non-existent file intentionally.
 # Override: mount a custom log config file at /data/localhost.log.config
 # ======================
 # Miscellaneous
 # ======================
 report_stats: false
 macaroon_secret_key: "dev-macaroon-secret-change-in-production"
 form_secret: "dev-form-secret-change-in-production"
 # Enable presence for dev
 use_presence: true
 # Retention policy (optional, keep messages for 180 days in dev)
 retention:
  enabled: false
--- a/docs/MATRIX-BRIDGE.md
+++ b/docs/MATRIX-BRIDGE.md
@@ -0,0 +1,537 @@
 # Matrix Bridge
 Integration between Mosaic Stack and the Matrix protocol, enabling workspace management
 and job orchestration through Matrix chat rooms.
 ## Overview
 The Matrix bridge connects Mosaic Stack to any Matrix homeserver (Synapse, Dendrite, Conduit,
 etc.), allowing users to interact with the platform through Matrix clients like Element,
 FluffyChat, or any other Matrix-compatible application.
 Key capabilities:
 - **Command interface** -- Issue bot commands (`@mosaic fix #42`) from any mapped Matrix room
 - **Workspace-room mapping** -- Each Mosaic workspace can be linked to a Matrix room
 - **Threaded job updates** -- Job progress is posted to MSC3440 threads, keeping rooms clean
 - **Streaming AI responses** -- LLM output streams to Matrix via rate-limited message edits
 - **Multi-provider broadcasting** -- HeraldService broadcasts status updates to all active
  chat providers (Discord and Matrix can run simultaneously)
 ### Architecture
 ```
 Matrix Client (Element, FluffyChat, etc.)
         |
         v
  Synapse Homeserver
         |
    matrix-bot-sdk
         |
         v
 +------------------+       +---------------------+
 |  MatrixService   |<----->| CommandParserService |
 |  (IChatProvider) |       | (shared, all platforms)
 +------------------+       +---------------------+
    |         |
    |         v
    |   +--------------------+
    |   | MatrixRoomService  |  workspace <-> room mapping
    |   +--------------------+
    |         |
    v         v
 +------------------+     +----------------+
 | StitcherService  |     | PrismaService  |
 | (job dispatch)   |     | (database)     |
 +------------------+     +----------------+
         |
         v
 +------------------+
 |  HeraldService   |  broadcasts to CHAT_PROVIDERS[]
 +------------------+
         |
         v
 +---------------------------+
 | MatrixStreamingService    |  streaming AI responses
 | (m.replace edits, typing) |
 +---------------------------+
 ```
 ## Quick Start
 ### 1. Start the dev environment
 The Matrix dev environment uses a Docker Compose overlay that adds Synapse and Element Web
 alongside the existing Mosaic Stack services.
 ```bash
 # Using Makefile (recommended)
 make matrix-up
 # Or manually
 docker compose -f docker/docker-compose.yml -f docker/docker-compose.matrix.yml up -d
 ```
 This starts:
 | Service     | URL                   | Purpose                 |
 | ----------- | --------------------- | ----------------------- |
 | Synapse     | http://localhost:8008 | Matrix homeserver       |
 | Element Web | http://localhost:8501 | Web-based Matrix client |
 Both services share the existing Mosaic PostgreSQL instance. A `synapse-db-init` container
 runs once to create the `synapse` database and user, then exits.
 ### 2. Create the bot account
 After Synapse is healthy, run the setup script to create admin and bot accounts:
 ```bash
 make matrix-setup-bot
 # Or directly
 docker/matrix/scripts/setup-bot.sh
 ```
 The script:
 1. Registers an admin account (`admin` / `admin-dev-password`)
 2. Obtains an admin access token
 3. Creates the bot account (`mosaic-bot` / `mosaic-bot-dev-password`)
 4. Retrieves the bot access token
 5. Prints the environment variables to add to `.env`
 Custom credentials can be passed:
 ```bash
 docker/matrix/scripts/setup-bot.sh \
  --username custom-bot \
  --password custom-pass \
  --admin-username myadmin \
  --admin-password myadmin-pass
 ```
 ### 3. Configure environment variables
 Copy the output from the setup script into your `.env` file:
 ```bash
 # Matrix Bridge Configuration
 MATRIX_HOMESERVER_URL=http://localhost:8008
 MATRIX_ACCESS_TOKEN=<token from setup-bot.sh>
 MATRIX_BOT_USER_ID=@mosaic-bot:localhost
 MATRIX_CONTROL_ROOM_ID=!roomid:localhost
 MATRIX_WORKSPACE_ID=<your-workspace-uuid>
 ```
 If running the API inside the Docker Compose network, use the internal hostname:
 ```bash
 MATRIX_HOMESERVER_URL=http://synapse:8008
 ```
 ### 4. Restart the API
 ```bash
 pnpm dev:api
 # or
 make docker-restart
 ```
 The BridgeModule will detect `MATRIX_ACCESS_TOKEN` and enable the Matrix bridge
 automatically.
 ### 5. Test in Element Web
 1. Open http://localhost:8501
 2. Register or log in with any account
 3. Create a room and invite `@mosaic-bot:localhost`
 4. Send `@mosaic help` or `!mosaic help`
 ## Configuration
 ### Environment Variables
 | Variable                 | Description                                   | Example                       |
 | ------------------------ | --------------------------------------------- | ----------------------------- |
 | `MATRIX_HOMESERVER_URL`  | Matrix server URL                             | `http://localhost:8008`       |
 | `MATRIX_ACCESS_TOKEN`    | Bot access token (from setup script or login) | `syt_bW9z...`                 |
 | `MATRIX_BOT_USER_ID`     | Bot's full Matrix user ID                     | `@mosaic-bot:localhost`       |
 | `MATRIX_CONTROL_ROOM_ID` | Default room for status broadcasts            | `!abcdef:localhost`           |
 | `MATRIX_WORKSPACE_ID`    | Default workspace UUID for the control room   | `550e8400-e29b-41d4-a716-...` |
 All variables are read from `process.env` at service construction time. The bridge activates
 only when `MATRIX_ACCESS_TOKEN` is set.
 ### Dev Environment Variables (docker-compose.matrix.yml)
 These configure the local Synapse and Element Web instances:
 | Variable                    | Default                | Purpose                   |
 | --------------------------- | ---------------------- | ------------------------- |
 | `SYNAPSE_POSTGRES_DB`       | `synapse`              | Synapse database name     |
 | `SYNAPSE_POSTGRES_USER`     | `synapse`              | Synapse database user     |
 | `SYNAPSE_POSTGRES_PASSWORD` | `synapse_dev_password` | Synapse database password |
 | `SYNAPSE_CLIENT_PORT`       | `8008`                 | Synapse client API port   |
 | `SYNAPSE_FEDERATION_PORT`   | `8448`                 | Synapse federation port   |
 | `ELEMENT_PORT`              | `8501`                 | Element Web port          |
 ## Architecture
 ### Service Responsibilities
 **MatrixService** (`apps/api/src/bridge/matrix/matrix.service.ts`)
 The primary Matrix integration. Implements the `IChatProvider` interface.
 - Connects to the homeserver using `matrix-bot-sdk`
 - Listens for `room.message` events in all joined rooms
 - Resolves workspace context via MatrixRoomService (or falls back to control room)
 - Normalizes `!mosaic` prefix to `@mosaic` for the shared CommandParserService
 - Dispatches parsed commands to StitcherService for job execution
 - Creates MSC3440 threads for job updates
 - Auto-joins rooms when invited (`AutojoinRoomsMixin`)
 **MatrixRoomService** (`apps/api/src/bridge/matrix/matrix-room.service.ts`)
 Manages the mapping between Mosaic workspaces and Matrix rooms.
 - **Provision**: Creates a private Matrix room named `Mosaic: {workspace_name}` with alias
  `#mosaic-{slug}:{server}`
 - **Link/Unlink**: Maps existing rooms to workspaces via `workspace.matrixRoomId`
 - **Lookup**: Forward lookup (workspace -> room) and reverse lookup (room -> workspace)
 - Room mappings are stored in the `workspace` table's `matrixRoomId` column
 **MatrixStreamingService** (`apps/api/src/bridge/matrix/matrix-streaming.service.ts`)
 Streams AI responses to Matrix rooms using incremental message edits.
 - Sends an initial "Thinking..." placeholder message
 - Activates typing indicator during generation
 - Buffers incoming tokens and edits the message every 500ms (rate-limited)
 - On completion, sends a final clean edit with optional token usage stats
 - On error, edits the message with an error notice
 - Supports threaded responses via MSC3440
 **CommandParserService** (`apps/api/src/bridge/parser/command-parser.service.ts`)
 Shared, platform-agnostic command parser used by both Discord and Matrix bridges.
 - Parses `@mosaic <action> [args]` commands
 - Supports issue references in multiple formats: `#42`, `owner/repo#42`, full URL
 - Returns typed `ParsedCommand` objects or structured parse errors with help text
 **BridgeModule** (`apps/api/src/bridge/bridge.module.ts`)
 Conditional module loader. Inspects environment variables at startup:
 - If `DISCORD_BOT_TOKEN` is set, Discord bridge is added to `CHAT_PROVIDERS`
 - If `MATRIX_ACCESS_TOKEN` is set, Matrix bridge is added to `CHAT_PROVIDERS`
 - Both can run simultaneously; neither is a dependency of the other
 **HeraldService** (`apps/api/src/herald/herald.service.ts`)
 Status broadcaster that sends job event updates to all active chat providers.
 - Iterates over the `CHAT_PROVIDERS` injection token
 - Sends thread messages for job lifecycle events (created, started, completed, failed, etc.)
 - Uses PDA-friendly language (no "OVERDUE", "URGENT", etc.)
 - If one provider fails, others still receive the broadcast
 ### Data Flow
 ```
 1. User sends "@mosaic fix #42" in a Matrix room
 2. MatrixService receives room.message event
 3. MatrixRoomService resolves room -> workspace mapping
 4. CommandParserService parses the command (action=FIX, issue=#42)
 5. MatrixService creates a thread (MSC3440) for job updates
 6. StitcherService dispatches the job with workspace context
 7. HeraldService receives job events and broadcasts to all CHAT_PROVIDERS
 8. Thread messages appear in the Matrix room thread
 ```
 ### Thread Model (MSC3440)
 Matrix threads are implemented per [MSC3440](https://github.com/matrix-org/matrix-spec-proposals/pull/3440):
 - A **thread root** is created by sending a regular `m.room.message` event
 - Subsequent messages reference the root via `m.relates_to` with `rel_type: "m.thread"`
 - The `is_falling_back: true` flag and `m.in_reply_to` provide compatibility with clients
  that do not support threads
 - Thread root event IDs are stored in job metadata for HeraldService to post updates
 ## Commands
 All commands accept either `@mosaic` or `!mosaic` prefix. The `!mosaic` form is
 normalized to `@mosaic` internally before parsing.
 | Command                    | Description                   | Example                      |
 | -------------------------- | ----------------------------- | ---------------------------- |
 | `@mosaic fix <issue>`      | Start a job for an issue      | `@mosaic fix #42`            |
 | `@mosaic status <job-id>`  | Check job status              | `@mosaic status job-abc123`  |
 | `@mosaic cancel <job-id>`  | Cancel a running job          | `@mosaic cancel job-abc123`  |
 | `@mosaic retry <job-id>`   | Retry a failed job            | `@mosaic retry job-abc123`   |
 | `@mosaic verbose <job-id>` | Stream full logs to thread    | `@mosaic verbose job-abc123` |
 | `@mosaic quiet`            | Reduce notification verbosity | `@mosaic quiet`              |
 | `@mosaic help`             | Show available commands       | `@mosaic help`               |
 ### Issue Reference Formats
 The `fix` command accepts issue references in multiple formats:
 ```
@mosaic fix #42                                    # Current repo
@mosaic fix owner/repo#42                          # Cross-repo
@mosaic fix https://git.example.com/o/r/issues/42  # Full URL
 ```
 ### Noise Management
 Job updates are scoped to threads to keep main rooms clean:
 - **Main room**: Low verbosity -- milestone completions only
 - **Job threads**: Medium verbosity -- step completions and status changes
 - **DMs**: Configurable per user (planned)
 ## Workspace-Room Mapping
 Each Mosaic workspace can be associated with one Matrix room. The mapping is stored in the
 `workspace` table's `matrixRoomId` column.
 ### Automatic Provisioning
 When a workspace needs a Matrix room, MatrixRoomService provisions one:
 ```
 Room name:  "Mosaic: My Workspace"
 Room alias: #mosaic-my-workspace:localhost
 Visibility: private
 ```
 The room ID is then stored in `workspace.matrixRoomId`.
 ### Manual Linking
 Existing rooms can be linked to workspaces:
 ```typescript
 await matrixRoomService.linkWorkspaceToRoom(workspaceId, "!roomid:localhost");
 ```
 And unlinked:
 ```typescript
 await matrixRoomService.unlinkWorkspace(workspaceId);
 ```
 ### Message Routing
 When a message arrives in a room:
 1. MatrixRoomService performs a reverse lookup: room ID -> workspace ID
 2. If no mapping is found, the service checks if the room is the configured control room
   (`MATRIX_CONTROL_ROOM_ID`) and uses `MATRIX_WORKSPACE_ID` as fallback
 3. If still unmapped, the message is ignored
 This ensures commands only execute within a valid workspace context.
 ## Streaming Responses
 MatrixStreamingService enables real-time AI response streaming in Matrix rooms.
 ### How It Works
 1. An initial placeholder message ("Thinking...") is sent to the room
 2. The bot's typing indicator is activated
 3. Tokens from the LLM arrive via an `AsyncIterable<string>`
 4. Tokens are buffered and the message is edited via `m.replace` events
 5. Edits are rate-limited to a maximum of once every **500ms** to avoid flooding the
   homeserver
 6. When streaming completes, a final clean edit is sent and the typing indicator clears
 7. On error, the message is edited to include an error notice
 ### Message Edit Format (m.replace)
 ```json
 {
  "m.new_content": {
    "msgtype": "m.text",
    "body": "Updated response text"
  },
  "m.relates_to": {
    "rel_type": "m.replace",
    "event_id": "$original_event_id"
  },
  "msgtype": "m.text",
  "body": "* Updated response text"
 }
 ```
 The top-level `body` prefixed with `*` serves as a fallback for clients that do not
 support message edits.
 ### Thread Support
 Streaming responses can target a specific thread by passing `threadId` in the options.
 The initial message and all edits will include the `m.thread` relation.
 ## Development
 ### Running Tests
 ```bash
 # All bridge tests
 pnpm test -- --filter @mosaic/api -- matrix
 # Individual service tests
 pnpm test -- --filter @mosaic/api -- matrix.service
 pnpm test -- --filter @mosaic/api -- matrix-room.service
 pnpm test -- --filter @mosaic/api -- matrix-streaming.service
 pnpm test -- --filter @mosaic/api -- command-parser
 pnpm test -- --filter @mosaic/api -- bridge.module
 ```
 ### Adding a New Command
 1. Add the action to the `CommandAction` enum in
   `apps/api/src/bridge/parser/command.interface.ts`
 2. Add parsing logic in `CommandParserService.parseActionArguments()`
   (`apps/api/src/bridge/parser/command-parser.service.ts`)
 3. Add the handler case in `MatrixService.handleParsedCommand()`
   (`apps/api/src/bridge/matrix/matrix.service.ts`)
 4. Implement the handler method (e.g., `handleNewCommand()`)
 5. Update the help text in `MatrixService.handleHelpCommand()`
 6. Add tests for the new command in both the parser and service spec files
 ### Extending the Bridge
 The `IChatProvider` interface (`apps/api/src/bridge/interfaces/chat-provider.interface.ts`)
 defines the contract all chat bridges implement:
 ```typescript
 interface IChatProvider {
  connect(): Promise<void>;
  disconnect(): Promise<void>;
  isConnected(): boolean;
  sendMessage(channelId: string, content: string): Promise<void>;
  createThread(options: ThreadCreateOptions): Promise<string>;
  sendThreadMessage(options: ThreadMessageOptions): Promise<void>;
  parseCommand(message: ChatMessage): ChatCommand | null;
  editMessage?(channelId: string, messageId: string, content: string): Promise<void>;
 }
 ```
 To add a new chat platform:
 1. Create a new service implementing `IChatProvider`
 2. Register it in `BridgeModule` with a conditional check on its environment variable
 3. Add it to the `CHAT_PROVIDERS` factory
 4. HeraldService will automatically broadcast to it with no further changes
 ### File Layout
 ```
 apps/api/src/
  bridge/
    bridge.module.ts              # Conditional module loader
    bridge.constants.ts           # CHAT_PROVIDERS injection token
    interfaces/
      chat-provider.interface.ts  # IChatProvider contract
      index.ts
    parser/
      command-parser.service.ts   # Shared command parser
      command-parser.spec.ts
      command.interface.ts        # Command types and enums
    matrix/
      matrix.service.ts           # Core Matrix integration
      matrix.service.spec.ts
      matrix-room.service.ts      # Workspace-room mapping
      matrix-room.service.spec.ts
      matrix-streaming.service.ts # Streaming AI responses
      matrix-streaming.service.spec.ts
    discord/
      discord.service.ts          # Discord integration (parallel)
  herald/
    herald.module.ts
    herald.service.ts             # Status broadcasting
    herald.service.spec.ts
 docker/
  docker-compose.matrix.yml        # Dev overlay (Synapse + Element)
  docker-compose.sample.matrix.yml # Production sample (Swarm)
  matrix/
    synapse/
      homeserver.yaml             # Dev Synapse config
    element/
      config.json                 # Dev Element Web config
    scripts/
      setup-bot.sh                # Bot account setup
 ```
 ## Deployment
 ### Production Considerations
 The dev environment uses relaxed settings that are not suitable for production.
 Review and address the following before deploying:
 **Synapse Configuration**
 - Set a proper `server_name` (this is permanent and cannot change after first run)
 - Disable open registration (`enable_registration: false`)
 - Replace dev secrets (`macaroon_secret_key`, `form_secret`) with strong random values
 - Configure proper rate limiting (dev config allows 100 msg/sec)
 - Set up TLS termination (via reverse proxy or Synapse directly)
 - Consider a dedicated PostgreSQL instance rather than the shared Mosaic database
 **Bot Security**
 - Generate a strong bot password (not the dev default)
 - Store the access token securely (use a secrets manager or encrypted `.env`)
 - The bot auto-joins rooms when invited -- consider restricting this in production
  by removing `AutojoinRoomsMixin` and implementing allow-list logic
 **Environment Variables**
 - `MATRIX_WORKSPACE_ID` should be a valid workspace UUID from your database; all
  commands from the control room execute within this workspace context
 **Network**
 - If Synapse runs on a separate host, ensure `MATRIX_HOMESERVER_URL` points to the
  correct endpoint
 - For federation, configure DNS SRV records and `.well-known` delegation
 ### Sample Production Stack
 A production-ready Docker Swarm compose file is provided at
 `docker/docker-compose.sample.matrix.yml`. It includes:
 - Synapse with Traefik labels for automatic TLS
 - Element Web with its own domain
 - Dedicated PostgreSQL instance for Synapse
 - Optional coturn (TURN/STUN) for voice/video
 Deploy via Portainer or Docker Swarm CLI:
 ```bash
 docker stack deploy -c docker/docker-compose.sample.matrix.yml matrix
 ```
 After deploying, follow the post-deploy steps in the compose file comments to create
 accounts and configure the Mosaic Stack connection.
 ### Makefile Targets
 | Target                  | Description                               |
 | ----------------------- | ----------------------------------------- |
 | `make matrix-up`        | Start Synapse + Element Web (dev overlay) |
 | `make matrix-down`      | Stop Matrix services                      |
 | `make matrix-logs`      | Follow Synapse and Element logs           |
 | `make matrix-setup-bot` | Run bot account setup script              |
--- a/docs/tasks.md
+++ b/docs/tasks.md
@@ -1,4 +1,102 @@
-# Tasks — M13-SpeechServices (0.0.13)
+# Tasks
 ## M10-Telemetry (0.0.10) — Telemetry Integration
 **Orchestrator:** Claude Code
 **Started:** 2026-02-15
 **Branch:** feature/m10-telemetry
 **Milestone:** M10-Telemetry (0.0.10)
 | id      | status | description                                                  | issue | repo        | branch                | depends_on      | blocks                  | agent | started_at        | completed_at      | estimate | used |
 | ------- | ------ | ------------------------------------------------------------ | ----- | ----------- | --------------------- | --------------- | ----------------------- | ----- | ----------------- | ----------------- | -------- | ---- |
 | TEL-001 | done   | Install @mosaicstack/telemetry-client in API + NestJS module | #369  | api         | feature/m10-telemetry |                 | TEL-004,TEL-006,TEL-007 | w-1   | 2026-02-15T10:00Z | 2026-02-15T10:37Z | 20K      | 25K  |
 | TEL-002 | done   | Install mosaicstack-telemetry in Coordinator                 | #370  | coordinator | feature/m10-telemetry |                 | TEL-005,TEL-006         | w-2   | 2026-02-15T10:00Z | 2026-02-15T10:34Z | 15K      | 20K  |
 | TEL-003 | done   | Add telemetry config to docker-compose and .env              | #374  | devops      | feature/m10-telemetry |                 |                         | w-3   | 2026-02-15T10:38Z | 2026-02-15T10:40Z | 8K       | 10K  |
 | TEL-004 | done   | Track LLM task completions via Mosaic Telemetry              | #371  | api         | feature/m10-telemetry | TEL-001         | TEL-007                 | w-4   | 2026-02-15T10:38Z | 2026-02-15T10:44Z | 25K      | 30K  |
 | TEL-005 | done   | Track orchestrator agent task completions                    | #372  | coordinator | feature/m10-telemetry | TEL-002         |                         | w-5   | 2026-02-15T10:45Z | 2026-02-15T10:52Z | 20K      | 25K  |
 | TEL-006 | done   | Prediction integration for cost estimation                   | #373  | api         | feature/m10-telemetry | TEL-001,TEL-002 | TEL-007                 | w-6   | 2026-02-15T10:45Z | 2026-02-15T10:51Z | 20K      | 25K  |
 | TEL-007 | done   | Frontend: Token usage and cost dashboard                     | #375  | web         | feature/m10-telemetry | TEL-004,TEL-006 | TEL-008                 | w-7   | 2026-02-15T10:53Z | 2026-02-15T11:03Z | 30K      | 115K |
 | TEL-008 | done   | Documentation: Telemetry integration guide                   | #376  | docs        | feature/m10-telemetry | TEL-007         |                         | w-8   | 2026-02-15T10:53Z | 2026-02-15T10:58Z | 15K      | 75K  |
 ---
 ## M11-CIPipeline (0.0.11) — CI Pipeline #360 Remediation
 **Orchestrator:** Claude Code
 **Started:** 2026-02-12
 **Branch:** fix/ci-\*
 **Epic:** #360
 ### CI Fix Round 6
 | id          | status | description                                                                                  | issue | repo         | branch     | depends_on              | blocks      | agent | started_at        | completed_at      | estimate | used |
 | ----------- | ------ | -------------------------------------------------------------------------------------------- | ----- | ------------ | ---------- | ----------------------- | ----------- | ----- | ----------------- | ----------------- | -------- | ---- |
 | CI-FIX6-001 | done   | Add @mosaic/ui build to web.yml build-shared step (fixes 10 test suites + 20 typecheck errs) |       | ci           | fix/ci-366 |                         | CI-FIX6-003 | w-14  | 2026-02-12T21:00Z | 2026-02-12T21:01Z | 3K       | 3K   |
 | CI-FIX6-002 | done   | Move spec file removal to builder stage (layer-aware); add tar CVEs to .trivyignore          |       | orchestrator | fix/ci-366 |                         | CI-FIX6-004 | w-15  | 2026-02-12T21:00Z | 2026-02-12T21:15Z | 3K       | 5K   |
 | CI-FIX6-003 | done   | Add React.ChangeEvent types to ~10 web files with untyped event handlers (49 lint + 19 TS)   |       | web          | fix/ci-366 | CI-FIX6-001             | CI-FIX6-004 | w-16  | 2026-02-12T21:02Z | 2026-02-12T21:08Z | 12K      | 8K   |
 | CI-FIX6-004 | done   | Verification: pnpm lint && pnpm typecheck && pnpm test on web; Dockerfile find validation    |       | all          | fix/ci-366 | CI-FIX6-002,CI-FIX6-003 |             | orch  | 2026-02-12T21:08Z | 2026-02-12T21:10Z | 5K       | 2K   |
 ---
 ## M12-MatrixBridge (0.0.12) — Matrix/Element Bridge Integration
 **Orchestrator:** Claude Code
 **Started:** 2026-02-15
 **Branch:** feature/m12-matrix-bridge
 **Epic:** #377
 | id     | status | description                                                     | issue | repo   | branch                    | depends_on                                | blocks                                    | agent    | started_at        | completed_at      | estimate | used |
 | ------ | ------ | --------------------------------------------------------------- | ----- | ------ | ------------------------- | ----------------------------------------- | ----------------------------------------- | -------- | ----------------- | ----------------- | -------- | ---- |
 | MB-001 | done   | Install matrix-bot-sdk and create MatrixService skeleton        | #378  | api    | feature/m12-matrix-bridge |                                           | MB-003,MB-004,MB-005,MB-006,MB-007,MB-008 | worker-1 | 2026-02-15T10:00Z | 2026-02-15T10:20Z | 20K      | 15K  |
 | MB-002 | done   | Add Synapse + Element Web to docker-compose for dev             | #384  | docker | feature/m12-matrix-bridge |                                           |                                           | worker-2 | 2026-02-15T10:00Z | 2026-02-15T10:15Z | 15K      | 5K   |
 | MB-003 | done   | Register MatrixService in BridgeModule with conditional loading | #379  | api    | feature/m12-matrix-bridge | MB-001                                    | MB-008                                    | worker-3 | 2026-02-15T10:25Z | 2026-02-15T10:35Z | 12K      | 20K  |
 | MB-004 | done   | Workspace-to-Matrix-Room mapping and provisioning               | #380  | api    | feature/m12-matrix-bridge | MB-001                                    | MB-005,MB-006,MB-008                      | worker-4 | 2026-02-15T10:25Z | 2026-02-15T10:35Z | 20K      | 39K  |
 | MB-005 | done   | Matrix command handling — receive and dispatch commands         | #381  | api    | feature/m12-matrix-bridge | MB-001,MB-004                             | MB-007,MB-008                             | worker-5 | 2026-02-15T10:40Z | 2026-02-15T14:27Z | 20K      | 27K  |
 | MB-006 | done   | Herald Service: Add Matrix output adapter                       | #382  | api    | feature/m12-matrix-bridge | MB-001,MB-004                             | MB-008                                    | worker-6 | 2026-02-15T10:40Z | 2026-02-15T14:25Z | 18K      | 109K |
 | MB-007 | done   | Streaming AI responses via Matrix message edits                 | #383  | api    | feature/m12-matrix-bridge | MB-001,MB-005                             | MB-008                                    | worker-7 | 2026-02-15T14:30Z | 2026-02-15T14:35Z | 20K      | 28K  |
 | MB-008 | done   | Matrix bridge E2E integration tests                             | #385  | api    | feature/m12-matrix-bridge | MB-001,MB-003,MB-004,MB-005,MB-006,MB-007 | MB-009                                    | worker-8 | 2026-02-15T14:38Z | 2026-02-15T14:40Z | 25K      | 35K  |
 | MB-009 | done   | Documentation: Matrix bridge setup and architecture             | #386  | docs   | feature/m12-matrix-bridge | MB-008                                    |                                           | worker-9 | 2026-02-15T14:38Z | 2026-02-15T14:39Z | 10K      | 12K  |
 | MB-010 | done   | Sample Matrix swarm deployment compose file                     | #387  | docker | feature/m12-matrix-bridge |                                           |                                           |          |                   | 2026-02-15        | 0        | 0    |
 | MB-011 | done | Remediate code review and security review findings | #377 | api | feature/m12-matrix-bridge | MB-001..MB-010 | | worker-10 | 2026-02-15T15:00Z | 2026-02-15T15:10Z | 30K | 145K |
 ### Phase Summary
 | Phase                  | Tasks          | Description                             |
 | ---------------------- | -------------- | --------------------------------------- |
 | 1 - Foundation         | MB-001, MB-002 | SDK install, dev infrastructure         |
 | 2 - Module Integration | MB-003, MB-004 | Module registration, DB mapping         |
 | 3 - Core Features      | MB-005, MB-006 | Command handling, Herald adapter        |
 | 4 - Advanced Features  | MB-007         | Streaming responses                     |
 | 5 - Testing            | MB-008         | E2E integration tests                   |
 | 6 - Documentation      | MB-009         | Setup guide, architecture docs          |
 | 7 - Review Remediation | MB-011         | Fix all code review + security findings |
 ### Review Findings Resolved (MB-011)
 | #   | Severity | Finding                                                    | Fix                                                            |
 | --- | -------- | ---------------------------------------------------------- | -------------------------------------------------------------- |
 | 1   | CRITICAL | sendThreadMessage hardcodes controlRoomId — wrong room     | Added channelId to ThreadMessageOptions, use options.channelId |
 | 2   | CRITICAL | void handleRoomMessage swallows ALL errors                 | Added .catch() with logger.error                               |
 | 3   | CRITICAL | handleFixCommand: dead thread on dispatch failure          | Wrapped dispatch in try-catch with user-visible error          |
 | 4   | CRITICAL | provisionRoom: orphaned Matrix room on DB failure          | try-catch around DB update with logged warning                 |
 | 5   | HIGH     | Missing MATRIX_BOT_USER_ID validation (infinite loop risk) | Added throw in connect() if missing                            |
 | 6   | HIGH     | streamResponse finally block can throw/mask errors         | Wrapped setTypingIndicator in nested try-catch                 |
 | 7   | HIGH     | streamResponse catch editMessage can throw/mask            | Wrapped editMessage in nested try-catch                        |
 | 8   | HIGH     | HeraldService error log missing provider identity          | Added provider.constructor.name to error log                   |
 | 9   | HIGH     | MatrixRoomService uses unsafe type assertion               | Replaced with public getClient() method                        |
 | 10  | HIGH     | BridgeModule factory incomplete env var validation         | Added warnings for missing vars when token set                 |
 | 11  | MEDIUM   | setup-bot.sh JSON injection via shell variables            | Replaced with jq -n for safe JSON construction                 |
 ### Notes
 - #387 already completed in commit 6e20fc5
 - #377 is the EPIC issue — closed after all reviews remediated
 - 187 tests passing after remediation (41 matrix, 20 streaming, 10 room, 26 integration, 27 herald, 25 discord, + others)
 ---
 ## M13-SpeechServices (0.0.13) — TTS & STT Integration
 **Orchestrator:** Claude Code
 **Started:** 2026-02-15
@@ -6,14 +104,14 @@
 **Milestone:** M13-SpeechServices (0.0.13)
 **Epic:** #388
-## Phase 1: Foundation (Config + Module + Providers)
+### Phase 1: Foundation (Config + Module + Providers)
 | id         | status | description                                                              | issue | repo | branch                      | depends_on | blocks                           | agent    | started_at        | completed_at      | estimate | used | notes             |
 | ---------- | ------ | ------------------------------------------------------------------------ | ----- | ---- | --------------------------- | ---------- | -------------------------------- | -------- | ----------------- | ----------------- | -------- | ---- | ----------------- |
 | SP-CFG-001 | done   | #401: Speech services environment variables and ConfigModule integration | #401  | api  | feature/m13-speech-services |            | SP-MOD-001,SP-DOC-001            | worker-1 | 2026-02-15T06:00Z | 2026-02-15T06:07Z | 15K      | 15K  | 51 tests, 4cc43be |
 | SP-MOD-001 | done   | #389: Create SpeechModule with provider abstraction layer                | #389  | api  | feature/m13-speech-services | SP-CFG-001 | SP-STT-001,SP-TTS-001,SP-MID-001 | worker-2 | 2026-02-15T06:08Z | 2026-02-15T06:14Z | 25K      | 25K  | 27 tests, c40373f |
-## Phase 2: Providers (STT + TTS)
+### Phase 2: Providers (STT + TTS)
 | id         | status | description                                                            | issue | repo | branch                      | depends_on | blocks                                     | agent    | started_at        | completed_at      | estimate | used | notes             |
 | ---------- | ------ | ---------------------------------------------------------------------- | ----- | ---- | --------------------------- | ---------- | ------------------------------------------ | -------- | ----------------- | ----------------- | -------- | ---- | ----------------- |
@@ -23,7 +121,7 @@
 | SP-TTS-003 | done   | #394: Implement Chatterbox TTS provider (premium tier, voice cloning)  | #394  | api  | feature/m13-speech-services | SP-TTS-001 | SP-EP-002                                  | worker-7 | 2026-02-15T06:26Z | 2026-02-15T06:34Z | 15K      | 25K  | 26 tests, d37c78f |
 | SP-TTS-004 | done   | #395: Implement Piper TTS provider via OpenedAI Speech (fallback tier) | #395  | api  | feature/m13-speech-services | SP-TTS-001 | SP-EP-002                                  | worker-8 | 2026-02-15T06:35Z | 2026-02-15T06:44Z | 12K      | 15K  | 37 tests, 6c46556 |
-## Phase 3: Middleware + REST Endpoints
+### Phase 3: Middleware + REST Endpoints
 | id         | status | description                                                | issue | repo | branch                      | depends_on                                  | blocks              | agent     | started_at        | completed_at      | estimate | used | notes             |
 | ---------- | ------ | ---------------------------------------------------------- | ----- | ---- | --------------------------- | ------------------------------------------- | ------------------- | --------- | ----------------- | ----------------- | -------- | ---- | ----------------- |
@@ -31,20 +129,20 @@
 | SP-EP-001  | done   | #392: Create /api/speech/transcribe REST endpoint          | #392  | api  | feature/m13-speech-services | SP-STT-001,SP-MID-001                       | SP-WS-001,SP-FE-001 | worker-10 | 2026-02-15T06:45Z | 2026-02-15T06:52Z | 20K      | 25K  | 10 tests, 527262a |
 | SP-EP-002  | done   | #396: Create /api/speech/synthesize REST endpoint          | #396  | api  | feature/m13-speech-services | SP-TTS-002,SP-TTS-003,SP-TTS-004,SP-MID-001 | SP-FE-002           | worker-11 | 2026-02-15T06:45Z | 2026-02-15T06:53Z | 20K      | 35K  | 17 tests, 527262a |
-## Phase 4: WebSocket Streaming
+### Phase 4: WebSocket Streaming
 | id        | status | description                                                | issue | repo | branch                      | depends_on           | blocks    | agent     | started_at        | completed_at      | estimate | used | notes             |
 | --------- | ------ | ---------------------------------------------------------- | ----- | ---- | --------------------------- | -------------------- | --------- | --------- | ----------------- | ----------------- | -------- | ---- | ----------------- |
 | SP-WS-001 | done   | #397: Implement WebSocket streaming transcription endpoint | #397  | api  | feature/m13-speech-services | SP-STT-001,SP-EP-001 | SP-FE-001 | worker-12 | 2026-02-15T06:54Z | 2026-02-15T07:00Z | 20K      | 30K  | 29 tests, 28c9e6f |
-## Phase 5: Docker/DevOps
+### Phase 5: Docker/DevOps
 | id         | status | description                                                    | issue | repo   | branch                      | depends_on | blocks     | agent     | started_at        | completed_at      | estimate | used | notes   |
 | ---------- | ------ | -------------------------------------------------------------- | ----- | ------ | --------------------------- | ---------- | ---------- | --------- | ----------------- | ----------------- | -------- | ---- | ------- |
 | SP-DOC-001 | done   | #399: Docker Compose dev overlay for speech services           | #399  | devops | feature/m13-speech-services | SP-CFG-001 | SP-DOC-002 | worker-3  | 2026-02-15T06:08Z | 2026-02-15T06:10Z | 10K      | 15K  | 52553c8 |
 | SP-DOC-002 | done   | #400: Docker Compose swarm/prod deployment for speech services | #400  | devops | feature/m13-speech-services | SP-DOC-001 |            | worker-13 | 2026-02-15T06:54Z | 2026-02-15T06:56Z | 10K      | 8K   | b3d6d73 |
-## Phase 6: Frontend
+### Phase 6: Frontend
 | id        | status | description                                                               | issue | repo | branch                      | depends_on          | blocks     | agent     | started_at        | completed_at      | estimate | used | notes             |
 | --------- | ------ | ------------------------------------------------------------------------- | ----- | ---- | --------------------------- | ------------------- | ---------- | --------- | ----------------- | ----------------- | -------- | ---- | ----------------- |
@@ -52,7 +150,7 @@
 | SP-FE-002 | done   | #403: Frontend audio playback component for TTS output                    | #403  | web  | feature/m13-speech-services | SP-EP-002           | SP-FE-003  | worker-15 | 2026-02-15T07:01Z | 2026-02-15T07:11Z | 20K      | 50K  | 32 tests, 74d6c10 |
 | SP-FE-003 | done   | #404: Frontend speech settings page (provider selection, voice config)    | #404  | web  | feature/m13-speech-services | SP-FE-001,SP-FE-002 | SP-E2E-001 | worker-16 | 2026-02-15T07:13Z | 2026-02-15T07:22Z | 20K      | 35K  | 30 tests, bc86947 |
-## Phase 7: Testing + Documentation
+### Phase 7: Testing + Documentation
 | id          | status | description                                                             | issue | repo | branch                      | depends_on                              | blocks      | agent     | started_at        | completed_at      | estimate | used | notes             |
 | ----------- | ------ | ----------------------------------------------------------------------- | ----- | ---- | --------------------------- | --------------------------------------- | ----------- | --------- | ----------------- | ----------------- | -------- | ---- | ----------------- |
--- a/docs/telemetry.md
+++ b/docs/telemetry.md
@@ -0,0 +1,735 @@
 # Mosaic Telemetry Integration Guide
 ## 1. Overview
 ### What is Mosaic Telemetry?
 Mosaic Telemetry is a task completion tracking system purpose-built for AI operations within Mosaic Stack. It captures detailed metrics about every AI task execution -- token usage, cost, duration, outcome, and quality gate results -- and submits them to a central telemetry API for aggregation and analysis.
 The aggregated data powers a **prediction system** that provides pre-task estimates for cost, token usage, and expected quality, enabling informed decisions before dispatching work to AI agents.
 ### How It Differs from OpenTelemetry
 Mosaic Stack uses **two separate telemetry systems** that serve different purposes:
 | Aspect                            | OpenTelemetry (OTEL)                          | Mosaic Telemetry                             |
 | --------------------------------- | --------------------------------------------- | -------------------------------------------- |
 | **Purpose**                       | Distributed request tracing and observability | AI task completion metrics and predictions   |
 | **What it tracks**                | HTTP requests, spans, latency, errors         | Token counts, costs, outcomes, quality gates |
 | **Data destination**              | OTEL Collector (Jaeger, Grafana, etc.)        | Mosaic Telemetry API (PostgreSQL-backed)     |
 | **Module location (API)**         | `apps/api/src/telemetry/`                     | `apps/api/src/mosaic-telemetry/`             |
 | **Module location (Coordinator)** | `apps/coordinator/src/telemetry.py`           | `apps/coordinator/src/mosaic_telemetry.py`   |
 Both systems can run simultaneously. They are completely independent.
 ### Architecture
 ```
 +------------------+     +------------------+
 |   Mosaic API     |     |  Coordinator     |
 |   (NestJS)       |     |  (FastAPI)       |
 +--------+---------+     +--------+---------+
         |                        |
    Track events             Track events
         |                        |
         v                        v
 +------------------------------------------+
 |    Telemetry Client SDK                  |
 |    (JS: @mosaicstack/telemetry-client)   |
 |    (Py: mosaicstack-telemetry)           |
 |                                          |
 |  - Event queue (in-memory)               |
 |  - Batch submission (5-min intervals)    |
 |  - Prediction cache (6hr TTL)            |
 +-------------------+----------------------+
                    |
              HTTP POST /events
              HTTP POST /predictions
                    |
                    v
 +------------------------------------------+
 |    Mosaic Telemetry API                  |
 |    (Separate service)                    |
 |                                          |
 |  - Event ingestion & validation          |
 |  - Aggregation & statistics              |
 |  - Prediction generation                 |
 +-------------------+----------------------+
                    |
                    v
            +---------------+
            |  PostgreSQL   |
            +---------------+
 ```
 **Data flow:**
 1. Application code calls `trackTaskCompletion()` (JS) or `client.track()` (Python)
 2. Events are queued in memory (up to 1,000 events)
 3. A background timer flushes the queue every 5 minutes in batches of up to 100
 4. The telemetry API ingests events, validates them, and stores them in PostgreSQL
 5. Prediction queries are served from aggregated data with a 6-hour cache TTL
 ---
 ## 2. Configuration Guide
 ### Environment Variables
 All configuration is done through environment variables prefixed with `MOSAIC_TELEMETRY_`:
 | Variable                       | Type    | Default | Description                                                                                                                          |
 | ------------------------------ | ------- | ------- | ------------------------------------------------------------------------------------------------------------------------------------ |
 | `MOSAIC_TELEMETRY_ENABLED`     | boolean | `true`  | Master switch. Set to `false` to completely disable telemetry (no HTTP calls).                                                       |
 | `MOSAIC_TELEMETRY_SERVER_URL`  | string  | (none)  | URL of the telemetry API server. For Docker Compose: `http://telemetry-api:8000`. For production: `https://tel-api.mosaicstack.dev`. |
 | `MOSAIC_TELEMETRY_API_KEY`     | string  | (none)  | API key for authenticating with the telemetry server. Generate with: `openssl rand -hex 32` (64-char hex string).                    |
 | `MOSAIC_TELEMETRY_INSTANCE_ID` | string  | (none)  | Unique UUID identifying this Mosaic Stack instance. Generate with: `uuidgen` or `python -c "import uuid; print(uuid.uuid4())"`.      |
 | `MOSAIC_TELEMETRY_DRY_RUN`     | boolean | `false` | When `true`, events are logged to console instead of being sent via HTTP. Useful for development.                                    |
 ### Enabling Telemetry
 To enable telemetry, set all three required variables in your `.env` file:
 ```bash
 MOSAIC_TELEMETRY_ENABLED=true
 MOSAIC_TELEMETRY_SERVER_URL=http://telemetry-api:8000
 MOSAIC_TELEMETRY_API_KEY=<your-64-char-hex-api-key>
 MOSAIC_TELEMETRY_INSTANCE_ID=<your-uuid>
 ```
 If `MOSAIC_TELEMETRY_ENABLED` is `true` but any of `SERVER_URL`, `API_KEY`, or `INSTANCE_ID` is missing, the service logs a warning and disables telemetry gracefully. This is intentional: telemetry configuration issues never prevent the application from starting.
 ### Disabling Telemetry
 Set `MOSAIC_TELEMETRY_ENABLED=false` in your `.env`. No HTTP calls will be made, and all tracking methods become safe no-ops.
 ### Dry-Run Mode
 For local development and debugging, enable dry-run mode:
 ```bash
 MOSAIC_TELEMETRY_ENABLED=true
 MOSAIC_TELEMETRY_DRY_RUN=true
 MOSAIC_TELEMETRY_SERVER_URL=http://localhost:8000  # Not actually called
 MOSAIC_TELEMETRY_API_KEY=0000000000000000000000000000000000000000000000000000000000000000
 MOSAIC_TELEMETRY_INSTANCE_ID=00000000-0000-0000-0000-000000000000
 ```
 In dry-run mode, the SDK logs event payloads to the console instead of submitting them via HTTP. This lets you verify that tracking points are firing correctly without needing a running telemetry API.
 ### Docker Compose Configuration
 Both `docker-compose.yml` (root) and `docker/docker-compose.yml` pass telemetry environment variables to the API service:
 ```yaml
 services:
  mosaic-api:
    environment:
      # Telemetry (task completion tracking & predictions)
      MOSAIC_TELEMETRY_ENABLED: ${MOSAIC_TELEMETRY_ENABLED:-false}
      MOSAIC_TELEMETRY_SERVER_URL: ${MOSAIC_TELEMETRY_SERVER_URL:-http://telemetry-api:8000}
      MOSAIC_TELEMETRY_API_KEY: ${MOSAIC_TELEMETRY_API_KEY:-}
      MOSAIC_TELEMETRY_INSTANCE_ID: ${MOSAIC_TELEMETRY_INSTANCE_ID:-}
      MOSAIC_TELEMETRY_DRY_RUN: ${MOSAIC_TELEMETRY_DRY_RUN:-false}
 ```
 Note that telemetry defaults to `false` in Docker Compose. Set `MOSAIC_TELEMETRY_ENABLED=true` in your `.env` to activate it.
 An optional local telemetry API service is available (commented out in `docker/docker-compose.yml`). Uncomment it to run a self-contained development environment:
 ```yaml
 # Uncomment in docker/docker-compose.yml
 telemetry-api:
  image: git.mosaicstack.dev/mosaic/telemetry-api:latest
  container_name: mosaic-telemetry-api
  restart: unless-stopped
  environment:
    HOST: 0.0.0.0
    PORT: 8000
  ports:
    - "8001:8000"
  healthcheck:
    test: ["CMD", "curl", "-f", "http://localhost:8000/health"]
    interval: 30s
    timeout: 10s
    retries: 3
    start_period: 10s
  networks:
    - mosaic-network
 ```
 ---
 ## 3. What Gets Tracked
 ### TaskCompletionEvent Schema
 Every tracked event conforms to the `TaskCompletionEvent` interface. This is the core data structure submitted to the telemetry API:
 | Field                       | Type                | Description                                                    |
 | --------------------------- | ------------------- | -------------------------------------------------------------- |
 | `instance_id`               | `string`            | UUID of the Mosaic Stack instance that generated the event     |
 | `event_id`                  | `string`            | Unique UUID for this event (auto-generated by the SDK)         |
 | `schema_version`            | `string`            | Schema version for forward compatibility (auto-set by the SDK) |
 | `timestamp`                 | `string`            | ISO 8601 timestamp of event creation (auto-set by the SDK)     |
 | `task_duration_ms`          | `number`            | How long the task took in milliseconds                         |
 | `task_type`                 | `TaskType`          | Type of task performed (see enum below)                        |
 | `complexity`                | `Complexity`        | Complexity level of the task                                   |
 | `harness`                   | `Harness`           | The coding harness or tool used                                |
 | `model`                     | `string`            | AI model name (e.g., `"claude-sonnet-4-5"`)                    |
 | `provider`                  | `Provider`          | AI model provider                                              |
 | `estimated_input_tokens`    | `number`            | Pre-task estimated input tokens (from predictions)             |
 | `estimated_output_tokens`   | `number`            | Pre-task estimated output tokens (from predictions)            |
 | `actual_input_tokens`       | `number`            | Actual input tokens consumed                                   |
 | `actual_output_tokens`      | `number`            | Actual output tokens generated                                 |
 | `estimated_cost_usd_micros` | `number`            | Pre-task estimated cost in microdollars (USD \* 1,000,000)     |
 | `actual_cost_usd_micros`    | `number`            | Actual cost in microdollars                                    |
 | `quality_gate_passed`       | `boolean`           | Whether all quality gates passed                               |
 | `quality_gates_run`         | `QualityGate[]`     | List of quality gates that were executed                       |
 | `quality_gates_failed`      | `QualityGate[]`     | List of quality gates that failed                              |
 | `context_compactions`       | `number`            | Number of context window compactions during the task           |
 | `context_rotations`         | `number`            | Number of context window rotations during the task             |
 | `context_utilization_final` | `number`            | Final context window utilization (0.0 to 1.0)                  |
 | `outcome`                   | `Outcome`           | Task outcome                                                   |
 | `retry_count`               | `number`            | Number of retries before completion                            |
 | `language`                  | `string?`           | Primary programming language (optional)                        |
 | `repo_size_category`        | `RepoSizeCategory?` | Repository size category (optional)                            |
 ### Enum Values
 **TaskType:**
 `planning`, `implementation`, `code_review`, `testing`, `debugging`, `refactoring`, `documentation`, `configuration`, `security_audit`, `unknown`
 **Complexity:**
 `low`, `medium`, `high`, `critical`
 **Harness:**
 `claude_code`, `opencode`, `kilo_code`, `aider`, `api_direct`, `ollama_local`, `custom`, `unknown`
 **Provider:**
 `anthropic`, `openai`, `openrouter`, `ollama`, `google`, `mistral`, `custom`, `unknown`
 **QualityGate:**
 `build`, `lint`, `test`, `coverage`, `typecheck`, `security`
 **Outcome:**
 `success`, `failure`, `partial`, `timeout`
 **RepoSizeCategory:**
 `tiny`, `small`, `medium`, `large`, `huge`
 ### API Service: LLM Call Tracking
 The NestJS API tracks every LLM service call (chat, streaming chat, and embeddings) via `LlmTelemetryTrackerService` at `apps/api/src/llm/llm-telemetry-tracker.service.ts`.
 Tracked operations:
 - **`chat`** -- Synchronous chat completions
 - **`chatStream`** -- Streaming chat completions
 - **`embed`** -- Embedding generation
 For each call, the tracker captures:
 - Model name and provider type
 - Input and output token counts
 - Duration in milliseconds
 - Success or failure outcome
 - Calculated cost from the built-in cost table (`apps/api/src/llm/llm-cost-table.ts`)
 - Task type inferred from calling context (e.g., `"brain"` maps to `planning`, `"review"` maps to `code_review`)
 The cost table uses longest-prefix matching on model names and covers all major Anthropic and OpenAI models. Ollama/local models are treated as zero-cost.
 ### Coordinator: Agent Task Dispatch Tracking
 The FastAPI coordinator tracks agent task completions in `apps/coordinator/src/mosaic_telemetry.py` and `apps/coordinator/src/coordinator.py`.
 After each agent task dispatch (success or failure), the coordinator emits a `TaskCompletionEvent` capturing:
 - Task duration from start to finish
 - Agent model, provider, and harness (resolved from the `assigned_agent` field)
 - Task outcome (`success`, `failure`, `partial`, `timeout`)
 - Quality gate results (build, lint, test, etc.)
 - Retry count for the issue
 - Complexity level from issue metadata
 The coordinator uses the `build_task_event()` helper function which provides sensible defaults for the coordinator context (Claude Code harness, Anthropic provider, TypeScript language).
 ### Event Lifecycle
 ```
 1. Application code calls trackTaskCompletion() or client.track()
          |
          v
 2. Event is added to in-memory queue (max 1,000 events)
          |
          v
 3. Background timer fires every 5 minutes (submitIntervalMs)
          |
          v
 4. Queue is drained in batches of up to 100 events (batchSize)
          |
          v
 5. Each batch is POSTed to the telemetry API
          |
          v
 6. API validates, stores, and acknowledges each event
 ```
 If the telemetry API is unreachable, events remain in the queue and are retried on the next interval (up to 3 retries per submission). Telemetry errors are logged but never propagated to calling code.
 ---
 ## 4. Prediction System
 ### How Predictions Work
 The Mosaic Telemetry API aggregates historical task completion data across all contributing instances. From this data, it generates statistical predictions for new tasks based on their characteristics (task type, model, provider, complexity).
 Predictions include percentile distributions (p10, p25, median, p75, p90) for token usage and cost, plus quality metrics (gate pass rate, success rate).
 ### Querying Predictions via API
 The API exposes a prediction endpoint at:
 ```
 GET /api/telemetry/estimate?taskType=<taskType>&model=<model>&provider=<provider>&complexity=<complexity>
 ```
 **Authentication:** Requires a valid session (Bearer token via `AuthGuard`).
 **Query Parameters (all required):**
 | Parameter    | Type         | Example             | Description           |
 | ------------ | ------------ | ------------------- | --------------------- |
 | `taskType`   | `TaskType`   | `implementation`    | Task type to estimate |
 | `model`      | `string`     | `claude-sonnet-4-5` | Model name            |
 | `provider`   | `Provider`   | `anthropic`         | Provider name         |
 | `complexity` | `Complexity` | `medium`            | Complexity level      |
 **Example Request:**
 ```bash
 curl -X GET \
  'http://localhost:3001/api/telemetry/estimate?taskType=implementation&model=claude-sonnet-4-5&provider=anthropic&complexity=medium' \
  -H 'Authorization: Bearer YOUR_SESSION_TOKEN'
 ```
 **Response:**
 ```json
 {
  "data": {
    "prediction": {
      "input_tokens": {
        "p10": 500,
        "p25": 1200,
        "median": 2500,
        "p75": 5000,
        "p90": 10000
      },
      "output_tokens": {
        "p10": 200,
        "p25": 800,
        "median": 1500,
        "p75": 3000,
        "p90": 6000
      },
      "cost_usd_micros": {
        "median": 30000
      },
      "duration_ms": {
        "median": 5000
      },
      "correction_factors": {
        "input": 1.0,
        "output": 1.0
      },
      "quality": {
        "gate_pass_rate": 0.85,
        "success_rate": 0.92
      }
    },
    "metadata": {
      "sample_size": 150,
      "fallback_level": 0,
      "confidence": "high",
      "last_updated": "2026-02-15T10:00:00Z",
      "cache_hit": true
    }
  }
 }
 ```
 If no prediction data is available, the response returns `{ "data": null }`.
 ### Confidence Levels
 The prediction system reports a confidence level based on sample size and data freshness:
 | Confidence | Meaning                                                        |
 | ---------- | -------------------------------------------------------------- |
 | `high`     | Substantial sample size, recent data, all dimensions matched   |
 | `medium`   | Moderate sample, some dimension fallback                       |
 | `low`      | Small sample or significant fallback from requested dimensions |
 | `none`     | No data available for this combination                         |
 ### Fallback Behavior
 When exact matches are unavailable, the prediction system falls back through progressively broader aggregations:
 1. **Exact match** -- task_type + model + provider + complexity
 2. **Drop complexity** -- task_type + model + provider
 3. **Drop model** -- task_type + provider
 4. **Global** -- task_type only
 The `fallback_level` field in metadata indicates which level was used (0 = exact match).
 ### Cache Strategy
 Predictions are cached in-memory by the SDK with a **6-hour TTL** (`predictionCacheTtlMs: 21_600_000`). The `PredictionService` pre-fetches common combinations on startup to warm the cache:
 - **Models:** claude-sonnet-4-5, claude-opus-4, claude-haiku-4-5, gpt-4o, gpt-4o-mini
 - **Task types:** implementation, planning, code_review
 - **Complexities:** low, medium
 This produces 30 pre-cached queries (5 models x 3 task types x 2 complexities). Subsequent requests for these combinations are served from cache without any HTTP call.
 ---
 ## 5. SDK Reference
 ### JavaScript: @mosaicstack/telemetry-client
 **Registry:** Gitea npm registry at `git.mosaicstack.dev`
 **Version:** 0.1.0
 **Installation:**
 ```bash
 pnpm add @mosaicstack/telemetry-client
 ```
 **Key Exports:**
 ```typescript
 // Client
 import { TelemetryClient, EventBuilder, resolveConfig } from "@mosaicstack/telemetry-client";
 // Types
 import type {
  TelemetryConfig,
  TaskCompletionEvent,
  EventBuilderParams,
  PredictionQuery,
  PredictionResponse,
  PredictionData,
  PredictionMetadata,
  TokenDistribution,
 } from "@mosaicstack/telemetry-client";
 // Enums
 import {
  TaskType,
  Complexity,
  Harness,
  Provider,
  QualityGate,
  Outcome,
  RepoSizeCategory,
 } from "@mosaicstack/telemetry-client";
 ```
 **TelemetryClient API:**
 | Method                                                              | Description                                                  |
 | ------------------------------------------------------------------- | ------------------------------------------------------------ |
 | `constructor(config: TelemetryConfig)`                              | Create a new client with the given configuration             |
 | `start(): void`                                                     | Start background batch submission (idempotent)               |
 | `stop(): Promise<void>`                                             | Stop background submission, flush remaining events           |
 | `track(event: TaskCompletionEvent): void`                           | Queue an event for batch submission (never throws)           |
 | `getPrediction(query: PredictionQuery): PredictionResponse \| null` | Get a cached prediction (returns null if not cached/expired) |
 | `refreshPredictions(queries: PredictionQuery[]): Promise<void>`     | Force-refresh predictions from the server                    |
 | `eventBuilder: EventBuilder`                                        | Get the EventBuilder for constructing events                 |
 | `queueSize: number`                                                 | Number of events currently queued                            |
 | `isRunning: boolean`                                                | Whether the client is currently running                      |
 **TelemetryConfig Options:**
 | Option                 | Type                     | Default             | Description                        |
 | ---------------------- | ------------------------ | ------------------- | ---------------------------------- |
 | `serverUrl`            | `string`                 | (required)          | Base URL of the telemetry server   |
 | `apiKey`               | `string`                 | (required)          | 64-char hex API key                |
 | `instanceId`           | `string`                 | (required)          | UUID for this instance             |
 | `enabled`              | `boolean`                | `true`              | Enable/disable telemetry           |
 | `submitIntervalMs`     | `number`                 | `300_000` (5 min)   | Interval between batch submissions |
 | `maxQueueSize`         | `number`                 | `1000`              | Maximum queued events              |
 | `batchSize`            | `number`                 | `100`               | Maximum events per batch           |
 | `requestTimeoutMs`     | `number`                 | `10_000` (10 sec)   | HTTP request timeout               |
 | `predictionCacheTtlMs` | `number`                 | `21_600_000` (6 hr) | Prediction cache TTL               |
 | `dryRun`               | `boolean`                | `false`             | Log events instead of sending      |
 | `maxRetries`           | `number`                 | `3`                 | Retries per submission             |
 | `onError`              | `(error: Error) => void` | noop                | Error callback                     |
 **EventBuilder Usage:**
 ```typescript
 const event = client.eventBuilder.build({
  task_duration_ms: 1500,
  task_type: TaskType.IMPLEMENTATION,
  complexity: Complexity.LOW,
  harness: Harness.API_DIRECT,
  model: "claude-sonnet-4-5",
  provider: Provider.ANTHROPIC,
  estimated_input_tokens: 0,
  estimated_output_tokens: 0,
  actual_input_tokens: 200,
  actual_output_tokens: 500,
  estimated_cost_usd_micros: 0,
  actual_cost_usd_micros: 8100,
  quality_gate_passed: true,
  quality_gates_run: [QualityGate.LINT, QualityGate.TEST],
  quality_gates_failed: [],
  context_compactions: 0,
  context_rotations: 0,
  context_utilization_final: 0.3,
  outcome: Outcome.SUCCESS,
  retry_count: 0,
  language: "typescript",
 });
 client.track(event);
 ```
 ### Python: mosaicstack-telemetry
 **Registry:** Gitea PyPI registry at `git.mosaicstack.dev`
 **Version:** 0.1.0
 **Installation:**
 ```bash
 pip install mosaicstack-telemetry
 ```
 **Key Imports:**
 ```python
 from mosaicstack_telemetry import (
    TelemetryClient,
    TelemetryConfig,
    EventBuilder,
    TaskType,
    Complexity,
    Harness,
    Provider,
    QualityGate,
    Outcome,
 )
 ```
 **Python Client Usage:**
 ```python
 # Create config (reads MOSAIC_TELEMETRY_* env vars automatically)
 config = TelemetryConfig()
 errors = config.validate()
 # Create and start client
 client = TelemetryClient(config)
 await client.start_async()
 # Build and track an event
 builder = EventBuilder(instance_id=config.instance_id)
 event = (
    builder
    .task_type(TaskType.IMPLEMENTATION)
    .complexity_level(Complexity.MEDIUM)
    .harness_type(Harness.CLAUDE_CODE)
    .model("claude-sonnet-4-5")
    .provider(Provider.ANTHROPIC)
    .duration_ms(5000)
    .outcome_value(Outcome.SUCCESS)
    .tokens(
        estimated_in=0,
        estimated_out=0,
        actual_in=3000,
        actual_out=1500,
    )
    .cost(estimated=0, actual=52500)
    .quality(
        passed=True,
        gates_run=[QualityGate.BUILD, QualityGate.LINT, QualityGate.TEST],
        gates_failed=[],
    )
    .context(compactions=0, rotations=0, utilization=0.4)
    .retry_count(0)
    .language("typescript")
    .build()
 )
 client.track(event)
 # Shutdown (flushes remaining events)
 await client.stop_async()
 ```
 ---
 ## 6. Development Guide
 ### Testing Locally with Dry-Run Mode
 The fastest way to develop with telemetry is to use dry-run mode. This logs event payloads to the console without needing a running telemetry API:
 ```bash
 # In your .env
 MOSAIC_TELEMETRY_ENABLED=true
 MOSAIC_TELEMETRY_DRY_RUN=true
 MOSAIC_TELEMETRY_SERVER_URL=http://localhost:8000
 MOSAIC_TELEMETRY_API_KEY=0000000000000000000000000000000000000000000000000000000000000000
 MOSAIC_TELEMETRY_INSTANCE_ID=00000000-0000-0000-0000-000000000000
 ```
 Start the API server and trigger LLM operations. You will see telemetry event payloads logged in the console output.
 ### Adding New Tracking Points
 To add telemetry tracking to a new service in the NestJS API:
 **Step 1:** Inject `MosaicTelemetryService` into your service. Because `MosaicTelemetryModule` is global, no module import is needed:
 ```typescript
 import { Injectable } from "@nestjs/common";
 import { MosaicTelemetryService } from "../mosaic-telemetry/mosaic-telemetry.service";
 import { TaskType, Complexity, Harness, Provider, Outcome } from "@mosaicstack/telemetry-client";
@Injectable()
 export class MyService {
  constructor(private readonly telemetry: MosaicTelemetryService) {}
 }
 ```
 **Step 2:** Build and track events after task completion:
 ```typescript
 async performTask(): Promise<void> {
  const start = Date.now();
  // ... perform the task ...
  const duration = Date.now() - start;
  const builder = this.telemetry.eventBuilder;
  if (builder) {
    const event = builder.build({
      task_duration_ms: duration,
      task_type: TaskType.IMPLEMENTATION,
      complexity: Complexity.MEDIUM,
      harness: Harness.API_DIRECT,
      model: "claude-sonnet-4-5",
      provider: Provider.ANTHROPIC,
      estimated_input_tokens: 0,
      estimated_output_tokens: 0,
      actual_input_tokens: inputTokens,
      actual_output_tokens: outputTokens,
      estimated_cost_usd_micros: 0,
      actual_cost_usd_micros: costMicros,
      quality_gate_passed: true,
      quality_gates_run: [],
      quality_gates_failed: [],
      context_compactions: 0,
      context_rotations: 0,
      context_utilization_final: 0,
      outcome: Outcome.SUCCESS,
      retry_count: 0,
    });
    this.telemetry.trackTaskCompletion(event);
  }
 }
 ```
 **Step 3:** For LLM-specific tracking, use `LlmTelemetryTrackerService` instead, which handles cost calculation and task type inference automatically:
 ```typescript
 import { LlmTelemetryTrackerService } from "../llm/llm-telemetry-tracker.service";
@Injectable()
 export class MyLlmService {
  constructor(private readonly telemetryTracker: LlmTelemetryTrackerService) {}
  async chat(): Promise<void> {
    const start = Date.now();
    // ... call LLM ...
    this.telemetryTracker.trackLlmCompletion({
      model: "claude-sonnet-4-5",
      providerType: "claude",
      operation: "chat",
      durationMs: Date.now() - start,
      inputTokens: 150,
      outputTokens: 300,
      callingContext: "brain", // Used for task type inference
      success: true,
    });
  }
 }
 ```
 ### Adding Tracking in the Coordinator (Python)
 Use the `build_task_event()` helper from `src/mosaic_telemetry.py`:
 ```python
 from src.mosaic_telemetry import build_task_event, get_telemetry_client
 client = get_telemetry_client(app)
 if client is not None:
    event = build_task_event(
        instance_id=instance_id,
        task_type=TaskType.IMPLEMENTATION,
        complexity=Complexity.MEDIUM,
        outcome=Outcome.SUCCESS,
        duration_ms=5000,
        model="claude-sonnet-4-5",
        provider=Provider.ANTHROPIC,
        harness=Harness.CLAUDE_CODE,
        actual_input_tokens=3000,
        actual_output_tokens=1500,
        actual_cost_micros=52500,
    )
    client.track(event)
 ```
 ### Troubleshooting
 **Telemetry events not appearing:**
 1. Check that `MOSAIC_TELEMETRY_ENABLED=true` is set
 2. Verify all three required variables are set: `SERVER_URL`, `API_KEY`, `INSTANCE_ID`
 3. Look for warning logs: `"Mosaic Telemetry is enabled but missing configuration"` indicates a missing variable
 4. Try dry-run mode to confirm events are being generated
 **Console shows "Mosaic Telemetry is disabled":**
 This is the expected message when `MOSAIC_TELEMETRY_ENABLED=false`. If you intended telemetry to be active, set it to `true`.
 **Events queuing but not submitting:**
 - Check that the telemetry API server at `MOSAIC_TELEMETRY_SERVER_URL` is reachable
 - Verify the API key is a valid 64-character hex string
 - The default submission interval is 5 minutes; wait at least one interval or call `stop()` to force a flush
 **Prediction endpoint returns null:**
 - Predictions require sufficient historical data in the telemetry API
 - Check the `metadata.confidence` field; `"none"` means no data exists for this combination
 - Predictions are cached for 6 hours; new data takes time to appear
 - The `PredictionService` logs startup refresh status; check logs for errors
 **"Telemetry client error" in logs:**
 - These are non-fatal. The SDK never blocks application logic.
 - Common causes: network timeout, invalid API key, server-side validation failure
 - Check the telemetry API logs for corresponding errors
--- a/package.json
+++ b/package.json
@@ -57,8 +57,10 @@
  "pnpm": {
    "overrides": {
      "@isaacs/brace-expansion": ">=5.0.1",
      "form-data": ">=2.5.4",
      "lodash": ">=4.17.23",
      "lodash-es": ">=4.17.23",
      "qs": ">=6.14.1",
      "undici": ">=6.23.0"
    }
  }
--- a/pnpm-lock.yaml
+++ b/pnpm-lock.yaml
		`@@ -0,0 +1 @@`
							`@mosaicstack:registry=https://git.mosaicstack.dev/api/packages/mosaic/npm/`
		`@@ -0,0 +1,2 @@`
							`-- AlterTable`
							`ALTER TABLE "workspaces" ADD COLUMN "matrix_room_id" TEXT;`
		`@@ -0,0 +1,2 @@`
							`[global]`
							`extra-index-url = https://git.mosaicstack.dev/api/packages/mosaic/pypi/simple/`