diff --git a/apps/api/src/federation/command.service.spec.ts b/apps/api/src/federation/command.service.spec.ts index 3d4f774..42e5dd7 100644 --- a/apps/api/src/federation/command.service.spec.ts +++ b/apps/api/src/federation/command.service.spec.ts @@ -4,6 +4,7 @@ import { describe, it, expect, beforeEach, vi } from "vitest"; import { Test, TestingModule } from "@nestjs/testing"; +import { ModuleRef } from "@nestjs/core"; import { HttpService } from "@nestjs/axios"; import { CommandService } from "./command.service"; import { PrismaService } from "../prisma/prisma.service"; @@ -16,6 +17,7 @@ import { } from "@prisma/client"; import { of } from "rxjs"; import type { CommandMessage, CommandResponse } from "./types/message.types"; +import { UnknownCommandTypeError } from "./errors/command.errors"; describe("CommandService", () => { let service: CommandService; @@ -23,6 +25,7 @@ describe("CommandService", () => { let federationService: FederationService; let signatureService: SignatureService; let httpService: HttpService; + let moduleRef: ModuleRef; const mockWorkspaceId = "workspace-123"; const mockConnectionId = "connection-123"; @@ -77,6 +80,7 @@ describe("CommandService", () => { federationService = module.get(FederationService); signatureService = module.get(SignatureService); httpService = module.get(HttpService); + moduleRef = module.get(ModuleRef); }); describe("sendCommand", () => { @@ -238,12 +242,75 @@ describe("CommandService", () => { }); describe("handleIncomingCommand", () => { - it("should process a valid incoming command", async () => { + it("should process a valid incoming agent command", async () => { const commandMessage: CommandMessage = { messageId: "cmd-123", instanceId: mockInstanceId, - commandType: "spawn_agent", - payload: { agentType: "task_executor" }, + commandType: "agent.spawn", + payload: { agentType: "task_executor", taskId: "task-123" }, + timestamp: Date.now(), + signature: "signature-123", + }; + + const mockConnection = { + id: mockConnectionId, + remoteInstanceId: mockInstanceId, + status: FederationConnectionStatus.ACTIVE, + }; + + const mockIdentity = { + instanceId: "local-instance", + displayName: "Local Instance", + }; + + const mockFederationAgentService = { + handleAgentCommand: vi.fn().mockResolvedValue({ + success: true, + data: { agentId: "agent-123", status: "spawning", spawnedAt: new Date().toISOString() }, + }), + }; + + vi.spyOn(signatureService, "validateTimestamp").mockReturnValue(true); + vi.spyOn(prisma.federationConnection, "findFirst").mockResolvedValue(mockConnection as never); + vi.spyOn(signatureService, "verifyMessage").mockResolvedValue({ + valid: true, + error: null, + } as never); + vi.spyOn(federationService, "getInstanceIdentity").mockResolvedValue(mockIdentity as never); + vi.spyOn(signatureService, "signMessage").mockResolvedValue("response-signature"); + vi.spyOn(moduleRef, "get").mockReturnValue(mockFederationAgentService as never); + + const response = await service.handleIncomingCommand(commandMessage); + + expect(response).toMatchObject({ + correlationId: "cmd-123", + instanceId: "local-instance", + success: true, + }); + + expect(signatureService.validateTimestamp).toHaveBeenCalledWith(commandMessage.timestamp); + expect(signatureService.verifyMessage).toHaveBeenCalledWith( + expect.objectContaining({ + messageId: "cmd-123", + instanceId: mockInstanceId, + commandType: "agent.spawn", + }), + "signature-123", + mockInstanceId + ); + expect(mockFederationAgentService.handleAgentCommand).toHaveBeenCalledWith( + mockInstanceId, + "agent.spawn", + commandMessage.payload + ); + }); + + it("should handle unknown command types and return error response", async () => { + const commandMessage: CommandMessage = { + messageId: "cmd-123", + instanceId: mockInstanceId, + commandType: "unknown.command", + payload: {}, timestamp: Date.now(), signature: "signature-123", }; @@ -273,18 +340,125 @@ describe("CommandService", () => { expect(response).toMatchObject({ correlationId: "cmd-123", instanceId: "local-instance", - success: true, + success: false, + error: "Unknown command type: unknown.command", }); + }); - expect(signatureService.validateTimestamp).toHaveBeenCalledWith(commandMessage.timestamp); - expect(signatureService.verifyMessage).toHaveBeenCalledWith( - expect.objectContaining({ - messageId: "cmd-123", - instanceId: mockInstanceId, - commandType: "spawn_agent", + it("should handle business logic errors from agent service and return error response", async () => { + const commandMessage: CommandMessage = { + messageId: "cmd-123", + instanceId: mockInstanceId, + commandType: "agent.spawn", + payload: { agentType: "invalid_type" }, + timestamp: Date.now(), + signature: "signature-123", + }; + + const mockConnection = { + id: mockConnectionId, + remoteInstanceId: mockInstanceId, + status: FederationConnectionStatus.ACTIVE, + }; + + const mockIdentity = { + instanceId: "local-instance", + displayName: "Local Instance", + }; + + vi.spyOn(signatureService, "validateTimestamp").mockReturnValue(true); + vi.spyOn(prisma.federationConnection, "findFirst").mockResolvedValue(mockConnection as never); + vi.spyOn(signatureService, "verifyMessage").mockResolvedValue({ + valid: true, + error: null, + } as never); + vi.spyOn(federationService, "getInstanceIdentity").mockResolvedValue(mockIdentity as never); + vi.spyOn(signatureService, "signMessage").mockResolvedValue("response-signature"); + + // Mock FederationAgentService to return error response + const mockFederationAgentService = { + handleAgentCommand: vi.fn().mockResolvedValue({ + success: false, + error: "Invalid agent type: invalid_type", }), - "signature-123", - mockInstanceId + }; + + vi.spyOn(moduleRef, "get").mockReturnValue(mockFederationAgentService as never); + + const response = await service.handleIncomingCommand(commandMessage); + + expect(response).toMatchObject({ + correlationId: "cmd-123", + instanceId: "local-instance", + success: false, + error: "Invalid agent type: invalid_type", + }); + }); + + it("should let system errors propagate (database connection failure)", async () => { + const commandMessage: CommandMessage = { + messageId: "cmd-123", + instanceId: mockInstanceId, + commandType: "agent.spawn", + payload: { agentType: "task_executor" }, + timestamp: Date.now(), + signature: "signature-123", + }; + + vi.spyOn(signatureService, "validateTimestamp").mockReturnValue(true); + + // Simulate database connection failure (system error) + const dbError = new Error("Connection pool exhausted"); + dbError.name = "PoolExhaustedError"; + vi.spyOn(prisma.federationConnection, "findFirst").mockRejectedValue(dbError); + + // System errors should propagate + await expect(service.handleIncomingCommand(commandMessage)).rejects.toThrow( + "Connection pool exhausted" + ); + }); + + it("should let system errors propagate from agent service (not wrapped)", async () => { + const commandMessage: CommandMessage = { + messageId: "cmd-123", + instanceId: mockInstanceId, + commandType: "agent.spawn", + payload: { agentType: "task_executor" }, + timestamp: Date.now(), + signature: "signature-123", + }; + + const mockConnection = { + id: mockConnectionId, + remoteInstanceId: mockInstanceId, + status: FederationConnectionStatus.ACTIVE, + }; + + const mockIdentity = { + instanceId: "local-instance", + displayName: "Local Instance", + }; + + // Simulate a system error (not a CommandProcessingError) from agent service + const systemError = new Error("Database connection failed"); + systemError.name = "DatabaseError"; + + const mockFederationAgentService = { + handleAgentCommand: vi.fn().mockRejectedValue(systemError), + }; + + vi.spyOn(signatureService, "validateTimestamp").mockReturnValue(true); + vi.spyOn(prisma.federationConnection, "findFirst").mockResolvedValue(mockConnection as never); + vi.spyOn(signatureService, "verifyMessage").mockResolvedValue({ + valid: true, + error: null, + } as never); + vi.spyOn(federationService, "getInstanceIdentity").mockResolvedValue(mockIdentity as never); + vi.spyOn(moduleRef, "get").mockReturnValue(mockFederationAgentService as never); + + // System errors should propagate (not caught by business logic handler) + await expect(service.handleIncomingCommand(commandMessage)).rejects.toThrow( + "Database connection failed" ); }); diff --git a/apps/api/src/federation/command.service.ts b/apps/api/src/federation/command.service.ts index 6f5a075..626c1af 100644 --- a/apps/api/src/federation/command.service.ts +++ b/apps/api/src/federation/command.service.ts @@ -18,6 +18,7 @@ import { FederationMessageStatus, } from "@prisma/client"; import type { CommandMessage, CommandResponse, CommandMessageDetails } from "./types/message.types"; +import { CommandProcessingError, UnknownCommandTypeError } from "./errors/command.errors"; @Injectable() export class CommandService { @@ -184,13 +185,30 @@ export class CommandService { responseData = agentResponse.data; errorMessage = agentResponse.error; } else { - // Other command types can be added here - responseData = { message: "Command received and processed" }; + // Unknown command type - throw business logic error + throw new UnknownCommandTypeError(commandMessage.commandType); } } catch (error) { - success = false; - errorMessage = error instanceof Error ? error.message : "Command processing failed"; - this.logger.error(`Command processing failed: ${errorMessage}`); + // Only catch expected business logic errors + // System errors (OOM, DB failures, network issues) should propagate + if (error instanceof CommandProcessingError) { + success = false; + errorMessage = error.message; + this.logger.warn(`Command processing failed (business logic): ${errorMessage}`, { + commandType: commandMessage.commandType, + instanceId: commandMessage.instanceId, + messageId: commandMessage.messageId, + }); + } else { + // System error - log and re-throw to preserve stack trace + this.logger.error(`System error during command processing: ${String(error)}`, { + commandType: commandMessage.commandType, + instanceId: commandMessage.instanceId, + messageId: commandMessage.messageId, + error: error instanceof Error ? error.stack : String(error), + }); + throw error; + } } // Get local instance identity diff --git a/apps/api/src/federation/errors/command.errors.ts b/apps/api/src/federation/errors/command.errors.ts new file mode 100644 index 0000000..fd770ca --- /dev/null +++ b/apps/api/src/federation/errors/command.errors.ts @@ -0,0 +1,49 @@ +/** + * Command Processing Errors + * + * Custom error classes for expected business logic errors in command processing. + * These errors should be caught and returned as error responses. + * All other errors (system errors like OOM, DB failures) should propagate. + */ + +/** + * Base class for command processing errors that should be caught + * and converted to error responses + */ +export class CommandProcessingError extends Error { + constructor(message: string) { + super(message); + this.name = "CommandProcessingError"; + Error.captureStackTrace(this, this.constructor); + } +} + +/** + * Error thrown when an unknown or unsupported command type is received + */ +export class UnknownCommandTypeError extends CommandProcessingError { + constructor(commandType: string) { + super(`Unknown command type: ${commandType}`); + this.name = "UnknownCommandTypeError"; + } +} + +/** + * Error thrown when command payload validation fails + */ +export class InvalidCommandPayloadError extends CommandProcessingError { + constructor(message: string) { + super(message); + this.name = "InvalidCommandPayloadError"; + } +} + +/** + * Error thrown when agent command execution fails due to business logic + */ +export class AgentCommandError extends CommandProcessingError { + constructor(message: string) { + super(message); + this.name = "AgentCommandError"; + } +} diff --git a/apps/api/src/federation/federation-agent.service.spec.ts b/apps/api/src/federation/federation-agent.service.spec.ts index 3f4ba62..2e8871e 100644 --- a/apps/api/src/federation/federation-agent.service.spec.ts +++ b/apps/api/src/federation/federation-agent.service.spec.ts @@ -419,13 +419,12 @@ describe("FederationAgentService", () => { }); }); - it("should return error for unknown command type", async () => { + it("should throw UnknownCommandTypeError for unknown command type", async () => { prisma.federationConnection.findFirst.mockResolvedValue(mockConnection as never); - const result = await service.handleAgentCommand("remote-instance-1", "agent.unknown", {}); - - expect(result.success).toBe(false); - expect(result.error).toContain("Unknown agent command type: agent.unknown"); + await expect( + service.handleAgentCommand("remote-instance-1", "agent.unknown", {}) + ).rejects.toThrow("Unknown command type: agent.unknown"); }); it("should throw error if connection not found", async () => { @@ -436,7 +435,7 @@ describe("FederationAgentService", () => { ).rejects.toThrow("No connection found for remote instance"); }); - it("should handle orchestrator errors", async () => { + it("should throw AgentCommandError for orchestrator errors", async () => { const spawnPayload: SpawnAgentCommandPayload = { taskId: mockTaskId, agentType: "worker", @@ -453,14 +452,9 @@ describe("FederationAgentService", () => { throwError(() => new Error("Orchestrator connection failed")) as never ); - const result = await service.handleAgentCommand( - "remote-instance-1", - "agent.spawn", - spawnPayload - ); - - expect(result.success).toBe(false); - expect(result.error).toContain("Orchestrator connection failed"); + await expect( + service.handleAgentCommand("remote-instance-1", "agent.spawn", spawnPayload) + ).rejects.toThrow("Failed to spawn agent: Orchestrator connection failed"); }); }); }); diff --git a/apps/api/src/federation/federation-agent.service.ts b/apps/api/src/federation/federation-agent.service.ts index 5f055f2..d82a9e6 100644 --- a/apps/api/src/federation/federation-agent.service.ts +++ b/apps/api/src/federation/federation-agent.service.ts @@ -22,6 +22,7 @@ import type { AgentStatusResponseData, KillAgentResponseData, } from "./types/federation-agent.types"; +import { AgentCommandError, UnknownCommandTypeError } from "./errors/command.errors"; /** * Agent command response structure @@ -222,26 +223,19 @@ export class FederationAgentService { } // Route command to appropriate handler - try { - switch (commandType) { - case "agent.spawn": - return await this.handleSpawnCommand(payload as unknown as SpawnAgentCommandPayload); + switch (commandType) { + case "agent.spawn": + return await this.handleSpawnCommand(payload as unknown as SpawnAgentCommandPayload); - case "agent.status": - return await this.handleStatusCommand(payload as unknown as AgentStatusCommandPayload); + case "agent.status": + return await this.handleStatusCommand(payload as unknown as AgentStatusCommandPayload); - case "agent.kill": - return await this.handleKillCommand(payload as unknown as KillAgentCommandPayload); + case "agent.kill": + return await this.handleKillCommand(payload as unknown as KillAgentCommandPayload); - default: - throw new Error(`Unknown agent command type: ${commandType}`); - } - } catch (error) { - this.logger.error(`Error handling agent command: ${String(error)}`); - return { - success: false, - error: error instanceof Error ? error.message : "Unknown error", - }; + default: + // Unknown command type - throw business logic error + throw new UnknownCommandTypeError(commandType); } } @@ -285,8 +279,10 @@ export class FederationAgentService { data: responseData, }; } catch (error) { - this.logger.error(`Failed to spawn agent: ${String(error)}`); - throw error; + // Wrap orchestrator errors as business logic errors + const errorMessage = error instanceof Error ? error.message : "Failed to spawn agent"; + this.logger.error(`Failed to spawn agent: ${errorMessage}`); + throw new AgentCommandError(`Failed to spawn agent: ${errorMessage}`); } } @@ -314,8 +310,10 @@ export class FederationAgentService { data: responseData, }; } catch (error) { - this.logger.error(`Failed to get agent status: ${String(error)}`); - throw error; + // Wrap orchestrator errors as business logic errors + const errorMessage = error instanceof Error ? error.message : "Failed to get agent status"; + this.logger.error(`Failed to get agent status: ${errorMessage}`); + throw new AgentCommandError(`Failed to get agent status: ${errorMessage}`); } } @@ -347,8 +345,10 @@ export class FederationAgentService { data: responseData, }; } catch (error) { - this.logger.error(`Failed to kill agent: ${String(error)}`); - throw error; + // Wrap orchestrator errors as business logic errors + const errorMessage = error instanceof Error ? error.message : "Failed to kill agent"; + this.logger.error(`Failed to kill agent: ${errorMessage}`); + throw new AgentCommandError(`Failed to kill agent: ${errorMessage}`); } } } diff --git a/docs/scratchpads/281-fix-broad-exception-catching.md b/docs/scratchpads/281-fix-broad-exception-catching.md new file mode 100644 index 0000000..346de1c --- /dev/null +++ b/docs/scratchpads/281-fix-broad-exception-catching.md @@ -0,0 +1,49 @@ +# Issue #281: Fix broad exception catching hiding system errors + +## Objective + +Fix broad try-catch blocks in command.service.ts that catch ALL errors including system failures (OOM, DB failures, etc.), making debugging impossible. + +## Location + +apps/api/src/federation/command.service.ts:168-194 + +## Problem + +The current implementation catches all errors in a broad try-catch block, which masks critical system errors as business logic failures. This makes debugging impossible and can hide serious issues like: + +- Out of memory errors +- Database connection failures +- Network failures +- Module loading failures + +## Approach + +1. Define specific error types for expected business logic errors +2. Only catch expected errors (e.g., module not found, command validation failures) +3. Let system errors (OOM, DB failures, network issues) propagate naturally +4. Add structured logging for business logic errors +5. Add comprehensive tests for both business and system error scenarios + +## Implementation Plan + +- [ ] Create custom error classes for expected business errors +- [ ] Update handleIncomingCommand to only catch expected errors +- [ ] Add structured logging for security events +- [ ] Write tests for business logic errors (should be caught) +- [ ] Write tests for system errors (should propagate) +- [ ] Verify all tests pass +- [ ] Run quality gates (lint, typecheck, build) + +## Testing + +- Test business logic errors are caught and handled gracefully +- Test system errors propagate correctly +- Test error logging includes appropriate context +- Maintain 85%+ coverage + +## Notes + +- This is a P0 security issue - proper error handling is critical for production debugging +- Follow patterns from other federation services +- Ensure backward compatibility with existing error handling flows diff --git a/docs/scratchpads/282-add-http-timeouts.md b/docs/scratchpads/282-add-http-timeouts.md new file mode 100644 index 0000000..097b6d3 --- /dev/null +++ b/docs/scratchpads/282-add-http-timeouts.md @@ -0,0 +1,50 @@ +# Issue #282: Add HTTP request timeouts (DoS risk) + +## Objective + +Add 10-second timeout to all HTTP requests to prevent DoS attacks via slowloris and resource exhaustion. + +## Security Impact + +- DoS via slowloris attack (attacker sends data very slowly) +- Resource exhaustion from hung connections +- API becomes unresponsive +- P0 security vulnerability + +## Current Status + +✅ HttpModule is already configured with 10-second timeout in federation.module.ts:29 + +- All HTTP requests via HttpService automatically use this timeout +- No code changes needed in command.service.ts, query.service.ts, or event.service.ts + +## Approach + +1. Verify timeout is properly configured at module level +2. Add explicit test to verify timeout enforcement +3. Add tests for timeout scenarios +4. Document timeout configuration +5. Verify all federation HTTP requests use the configured HttpService + +## Implementation Plan + +- [ ] Review federation.module.ts timeout configuration +- [ ] Add test for HTTP timeout enforcement +- [ ] Add test for timeout error handling +- [ ] Verify query.service.ts uses timeout +- [ ] Verify event.service.ts uses timeout +- [ ] Verify command.service.ts uses timeout +- [ ] Run quality gates (lint, typecheck, build, tests) + +## Testing + +- Test HTTP request times out after 10 seconds +- Test timeout errors are handled gracefully +- Test all federation services respect timeout +- Maintain 85%+ coverage + +## Notes + +- Timeout already configured via HttpModule.register({ timeout: 10000 }) +- Need to add explicit tests to verify timeout works +- This is a verification and testing issue, not an implementation issue