Compare commits

...

12 Commits

Author SHA1 Message Date
5d66e00710 feat(api): ContainerLifecycleService for per-user OpenClaw (MS22-P1d)
All checks were successful
ci/woodpecker/push/ci Pipeline was successful
2026-03-01 09:23:43 -06:00
d3c8b8cadd feat(api): internal agent config endpoint (MS22-P1c) (#609)
All checks were successful
ci/woodpecker/push/ci Pipeline was successful
Co-authored-by: Jason Woltje <jason@diversecanvas.com>
Co-committed-by: Jason Woltje <jason@diversecanvas.com>
2026-03-01 15:14:06 +00:00
a3a0d7afca chore(orchestrator): add MS22 PRD, mark P1a+P1b done (#608)
Co-authored-by: Jason Woltje <jason@diversecanvas.com>
Co-committed-by: Jason Woltje <jason@diversecanvas.com>
2026-03-01 15:05:35 +00:00
ab2b68c93c Merge pull request 'feat(api): agent fleet DB schema + migration (MS22-P1a)' (#607) from feat/ms22-p1a-schema into main
All checks were successful
ci/woodpecker/push/ci Pipeline was successful
Reviewed-on: #607
2026-03-01 15:03:23 +00:00
c1ec0ad7ef Merge pull request 'feat(api): CryptoService for API key encryption (MS22-P1b)' (#606) from feat/ms22-p1b-crypto into main
All checks were successful
ci/woodpecker/push/ci Pipeline was successful
Reviewed-on: #606
2026-03-01 15:02:50 +00:00
e5b772f7cb Merge pull request 'chore(orchestrator): MS22 Phase 1 task breakdown' (#605) from chore/ms22-p1-tasks into main
Reviewed-on: #605
2026-03-01 15:02:27 +00:00
7a46c81897 feat(api): add agent fleet Prisma schema (MS22-P1a)
All checks were successful
ci/woodpecker/push/ci Pipeline was successful
2026-03-01 08:42:10 -06:00
3688f89c37 feat(api): add CryptoService for secret encryption (MS22-P1b)
All checks were successful
ci/woodpecker/push/ci Pipeline was successful
2026-03-01 08:41:28 -06:00
e59e517d5c feat(api): add CryptoService for secret encryption (MS22-P1b) 2026-03-01 08:40:40 -06:00
fab833a710 chore(orchestrator): add MS22 Phase 1 task breakdown (11 tasks) 2026-03-01 08:36:19 -06:00
4294deda49 docs(design): MS22 DB-centric agent fleet architecture (#604)
Co-authored-by: Jason Woltje <jason@diversecanvas.com>
Co-committed-by: Jason Woltje <jason@diversecanvas.com>
2026-03-01 14:35:14 +00:00
2fe858d61a chore(orchestrator): MS21 complete — UI-001-QA and TEST-004 done (#602)
Co-authored-by: Jason Woltje <jason@diversecanvas.com>
Co-committed-by: Jason Woltje <jason@diversecanvas.com>
2026-03-01 14:16:11 +00:00
19 changed files with 2647 additions and 3 deletions

View File

@@ -59,6 +59,7 @@
"class-validator": "^0.14.3",
"cookie-parser": "^1.4.7",
"discord.js": "^14.25.1",
"dockerode": "^4.0.9",
"gray-matter": "^4.0.3",
"highlight.js": "^11.11.1",
"ioredis": "^5.9.2",
@@ -88,6 +89,7 @@
"@types/archiver": "^7.0.0",
"@types/bcryptjs": "^3.0.0",
"@types/cookie-parser": "^1.4.10",
"@types/dockerode": "^3.3.47",
"@types/express": "^5.0.1",
"@types/highlight.js": "^10.1.0",
"@types/node": "^22.13.4",

View File

@@ -0,0 +1,109 @@
-- CreateTable
CREATE TABLE "SystemConfig" (
"id" TEXT NOT NULL,
"key" TEXT NOT NULL,
"value" TEXT NOT NULL,
"encrypted" BOOLEAN NOT NULL DEFAULT false,
"updatedAt" TIMESTAMP(3) NOT NULL,
CONSTRAINT "SystemConfig_pkey" PRIMARY KEY ("id")
);
-- CreateTable
CREATE TABLE "BreakglassUser" (
"id" TEXT NOT NULL,
"username" TEXT NOT NULL,
"passwordHash" TEXT NOT NULL,
"isActive" BOOLEAN NOT NULL DEFAULT true,
"createdAt" TIMESTAMP(3) NOT NULL DEFAULT CURRENT_TIMESTAMP,
"updatedAt" TIMESTAMP(3) NOT NULL,
CONSTRAINT "BreakglassUser_pkey" PRIMARY KEY ("id")
);
-- CreateTable
CREATE TABLE "LlmProvider" (
"id" TEXT NOT NULL,
"userId" TEXT NOT NULL,
"name" TEXT NOT NULL,
"displayName" TEXT NOT NULL,
"type" TEXT NOT NULL,
"baseUrl" TEXT,
"apiKey" TEXT,
"apiType" TEXT NOT NULL DEFAULT 'openai-completions',
"models" JSONB NOT NULL DEFAULT '[]',
"isActive" BOOLEAN NOT NULL DEFAULT true,
"createdAt" TIMESTAMP(3) NOT NULL DEFAULT CURRENT_TIMESTAMP,
"updatedAt" TIMESTAMP(3) NOT NULL,
CONSTRAINT "LlmProvider_pkey" PRIMARY KEY ("id")
);
-- CreateTable
CREATE TABLE "UserContainer" (
"id" TEXT NOT NULL,
"userId" TEXT NOT NULL,
"containerId" TEXT,
"containerName" TEXT NOT NULL,
"gatewayPort" INTEGER,
"gatewayToken" TEXT NOT NULL,
"status" TEXT NOT NULL DEFAULT 'stopped',
"lastActiveAt" TIMESTAMP(3),
"idleTimeoutMin" INTEGER NOT NULL DEFAULT 30,
"config" JSONB NOT NULL DEFAULT '{}',
"createdAt" TIMESTAMP(3) NOT NULL DEFAULT CURRENT_TIMESTAMP,
"updatedAt" TIMESTAMP(3) NOT NULL,
CONSTRAINT "UserContainer_pkey" PRIMARY KEY ("id")
);
-- CreateTable
CREATE TABLE "SystemContainer" (
"id" TEXT NOT NULL,
"name" TEXT NOT NULL,
"role" TEXT NOT NULL,
"containerId" TEXT,
"gatewayPort" INTEGER,
"gatewayToken" TEXT NOT NULL,
"status" TEXT NOT NULL DEFAULT 'stopped',
"primaryModel" TEXT NOT NULL,
"isActive" BOOLEAN NOT NULL DEFAULT true,
"createdAt" TIMESTAMP(3) NOT NULL DEFAULT CURRENT_TIMESTAMP,
"updatedAt" TIMESTAMP(3) NOT NULL,
CONSTRAINT "SystemContainer_pkey" PRIMARY KEY ("id")
);
-- CreateTable
CREATE TABLE "UserAgentConfig" (
"id" TEXT NOT NULL,
"userId" TEXT NOT NULL,
"primaryModel" TEXT,
"fallbackModels" JSONB NOT NULL DEFAULT '[]',
"personality" TEXT,
"createdAt" TIMESTAMP(3) NOT NULL DEFAULT CURRENT_TIMESTAMP,
"updatedAt" TIMESTAMP(3) NOT NULL,
CONSTRAINT "UserAgentConfig_pkey" PRIMARY KEY ("id")
);
-- CreateIndex
CREATE UNIQUE INDEX "SystemConfig_key_key" ON "SystemConfig"("key");
-- CreateIndex
CREATE UNIQUE INDEX "BreakglassUser_username_key" ON "BreakglassUser"("username");
-- CreateIndex
CREATE INDEX "LlmProvider_userId_idx" ON "LlmProvider"("userId");
-- CreateIndex
CREATE UNIQUE INDEX "LlmProvider_userId_name_key" ON "LlmProvider"("userId", "name");
-- CreateIndex
CREATE UNIQUE INDEX "UserContainer_userId_key" ON "UserContainer"("userId");
-- CreateIndex
CREATE UNIQUE INDEX "SystemContainer_name_key" ON "SystemContainer"("name");
-- CreateIndex
CREATE UNIQUE INDEX "UserAgentConfig_userId_key" ON "UserAgentConfig"("userId");

View File

@@ -1625,3 +1625,81 @@ model ConversationArchive {
@@index([startedAt])
@@map("conversation_archives")
}
// ============================================
// AGENT FLEET MODULE
// ============================================
model SystemConfig {
id String @id @default(cuid())
key String @unique
value String
encrypted Boolean @default(false)
updatedAt DateTime @updatedAt
}
model BreakglassUser {
id String @id @default(cuid())
username String @unique
passwordHash String
isActive Boolean @default(true)
createdAt DateTime @default(now())
updatedAt DateTime @updatedAt
}
model LlmProvider {
id String @id @default(cuid())
userId String
name String
displayName String
type String
baseUrl String?
apiKey String?
apiType String @default("openai-completions")
models Json @default("[]")
isActive Boolean @default(true)
createdAt DateTime @default(now())
updatedAt DateTime @updatedAt
@@unique([userId, name])
@@index([userId])
}
model UserContainer {
id String @id @default(cuid())
userId String @unique
containerId String?
containerName String
gatewayPort Int?
gatewayToken String
status String @default("stopped")
lastActiveAt DateTime?
idleTimeoutMin Int @default(30)
config Json @default("{}")
createdAt DateTime @default(now())
updatedAt DateTime @updatedAt
}
model SystemContainer {
id String @id @default(cuid())
name String @unique
role String
containerId String?
gatewayPort Int?
gatewayToken String
status String @default("stopped")
primaryModel String
isActive Boolean @default(true)
createdAt DateTime @default(now())
updatedAt DateTime @updatedAt
}
model UserAgentConfig {
id String @id @default(cuid())
userId String @unique
primaryModel String?
fallbackModels Json @default("[]")
personality String?
createdAt DateTime @default(now())
updatedAt DateTime @updatedAt
}

View File

@@ -0,0 +1,40 @@
import {
Controller,
ForbiddenException,
Get,
Param,
Req,
UnauthorizedException,
UseGuards,
} from "@nestjs/common";
import { AgentConfigService } from "./agent-config.service";
import { AgentConfigGuard, type AgentConfigRequest } from "./agent-config.guard";
@Controller("internal")
@UseGuards(AgentConfigGuard)
export class AgentConfigController {
constructor(private readonly agentConfigService: AgentConfigService) {}
// GET /api/internal/agent-config/:id
// Auth: Bearer token (validated against UserContainer.gatewayToken or SystemContainer.gatewayToken)
// Returns: assembled openclaw.json
//
// The :id param is the container record ID (cuid)
// Token must match the container requesting its own config
@Get("agent-config/:id")
async getAgentConfig(
@Param("id") id: string,
@Req() request: AgentConfigRequest
): Promise<object> {
const containerAuth = request.containerAuth;
if (!containerAuth) {
throw new UnauthorizedException("Missing container authentication context");
}
if (containerAuth.id !== id) {
throw new ForbiddenException("Token is not authorized for the requested container");
}
return this.agentConfigService.generateConfigForContainer(containerAuth.type, id);
}
}

View File

@@ -0,0 +1,43 @@
import { CanActivate, ExecutionContext, Injectable, UnauthorizedException } from "@nestjs/common";
import type { Request } from "express";
import { AgentConfigService, type ContainerTokenValidation } from "./agent-config.service";
export interface AgentConfigRequest extends Request {
containerAuth?: ContainerTokenValidation;
}
@Injectable()
export class AgentConfigGuard implements CanActivate {
constructor(private readonly agentConfigService: AgentConfigService) {}
async canActivate(context: ExecutionContext): Promise<boolean> {
const request = context.switchToHttp().getRequest<AgentConfigRequest>();
const token = this.extractBearerToken(request.headers.authorization);
if (!token) {
throw new UnauthorizedException("Missing Bearer token");
}
const containerAuth = await this.agentConfigService.validateContainerToken(token);
if (!containerAuth) {
throw new UnauthorizedException("Invalid container token");
}
request.containerAuth = containerAuth;
return true;
}
private extractBearerToken(headerValue: string | string[] | undefined): string | null {
const normalizedHeader = Array.isArray(headerValue) ? headerValue[0] : headerValue;
if (!normalizedHeader) {
return null;
}
const [scheme, token] = normalizedHeader.split(" ");
if (!scheme || !token || scheme.toLowerCase() !== "bearer") {
return null;
}
return token;
}
}

View File

@@ -0,0 +1,14 @@
import { Module } from "@nestjs/common";
import { PrismaModule } from "../prisma/prisma.module";
import { CryptoModule } from "../crypto/crypto.module";
import { AgentConfigController } from "./agent-config.controller";
import { AgentConfigService } from "./agent-config.service";
import { AgentConfigGuard } from "./agent-config.guard";
@Module({
imports: [PrismaModule, CryptoModule],
controllers: [AgentConfigController],
providers: [AgentConfigService, AgentConfigGuard],
exports: [AgentConfigService],
})
export class AgentConfigModule {}

View File

@@ -0,0 +1,215 @@
import { beforeEach, describe, expect, it, vi } from "vitest";
import { AgentConfigService } from "./agent-config.service";
import { PrismaService } from "../prisma/prisma.service";
import { CryptoService } from "../crypto/crypto.service";
describe("AgentConfigService", () => {
let service: AgentConfigService;
const mockPrismaService = {
userAgentConfig: {
findUnique: vi.fn(),
},
llmProvider: {
findMany: vi.fn(),
},
userContainer: {
findUnique: vi.fn(),
findMany: vi.fn(),
},
systemContainer: {
findUnique: vi.fn(),
findMany: vi.fn(),
},
};
const mockCryptoService = {
isEncrypted: vi.fn((value: string) => value.startsWith("enc:")),
decrypt: vi.fn((value: string) => value.replace(/^enc:/, "")),
};
beforeEach(() => {
vi.clearAllMocks();
service = new AgentConfigService(
mockPrismaService as unknown as PrismaService,
mockCryptoService as unknown as CryptoService
);
});
it("generateUserConfig returns valid openclaw.json structure", async () => {
mockPrismaService.userAgentConfig.findUnique.mockResolvedValue({
id: "cfg-1",
userId: "user-1",
primaryModel: "my-zai/glm-5",
});
mockPrismaService.userContainer.findUnique.mockResolvedValue({
id: "container-1",
userId: "user-1",
gatewayPort: 19001,
});
mockPrismaService.llmProvider.findMany.mockResolvedValue([
{
id: "provider-1",
userId: "user-1",
name: "my-zai",
displayName: "Z.ai",
type: "zai",
baseUrl: "https://api.z.ai/v1",
apiKey: "enc:secret-zai-key",
apiType: "openai-completions",
models: [{ id: "glm-5" }],
isActive: true,
createdAt: new Date(),
updatedAt: new Date(),
},
]);
const result = await service.generateUserConfig("user-1");
expect(result).toEqual({
gateway: {
mode: "local",
port: 19001,
bind: "lan",
auth: { mode: "token" },
http: {
endpoints: {
chatCompletions: { enabled: true },
},
},
},
agents: {
defaults: {
model: {
primary: "my-zai/glm-5",
},
},
},
models: {
providers: {
"my-zai": {
apiKey: "secret-zai-key",
baseUrl: "https://api.z.ai/v1",
models: {
"glm-5": {},
},
},
},
},
});
});
it("generateUserConfig decrypts API keys correctly", async () => {
mockPrismaService.userAgentConfig.findUnique.mockResolvedValue({
id: "cfg-1",
userId: "user-1",
primaryModel: "openai-work/gpt-4.1",
});
mockPrismaService.userContainer.findUnique.mockResolvedValue({
id: "container-1",
userId: "user-1",
gatewayPort: 18789,
});
mockPrismaService.llmProvider.findMany.mockResolvedValue([
{
id: "provider-1",
userId: "user-1",
name: "openai-work",
displayName: "OpenAI Work",
type: "openai",
baseUrl: "https://api.openai.com/v1",
apiKey: "enc:encrypted-openai-key",
apiType: "openai-completions",
models: [{ id: "gpt-4.1" }],
isActive: true,
createdAt: new Date(),
updatedAt: new Date(),
},
]);
const result = await service.generateUserConfig("user-1");
expect(mockCryptoService.decrypt).toHaveBeenCalledWith("enc:encrypted-openai-key");
expect(result.models.providers["openai-work"]?.apiKey).toBe("encrypted-openai-key");
});
it("generateUserConfig handles user with no providers", async () => {
mockPrismaService.userAgentConfig.findUnique.mockResolvedValue({
id: "cfg-1",
userId: "user-2",
primaryModel: "openai/gpt-4o-mini",
});
mockPrismaService.userContainer.findUnique.mockResolvedValue({
id: "container-2",
userId: "user-2",
gatewayPort: null,
});
mockPrismaService.llmProvider.findMany.mockResolvedValue([]);
const result = await service.generateUserConfig("user-2");
expect(result.models.providers).toEqual({});
expect(result.gateway.port).toBe(18789);
});
it("validateContainerToken returns correct type for user container", async () => {
mockPrismaService.userContainer.findMany.mockResolvedValue([
{
id: "user-container-1",
gatewayToken: "enc:user-token-1",
},
]);
mockPrismaService.systemContainer.findMany.mockResolvedValue([]);
const result = await service.validateContainerToken("user-token-1");
expect(result).toEqual({
type: "user",
id: "user-container-1",
});
});
it("validateContainerToken returns correct type for system container", async () => {
mockPrismaService.userContainer.findMany.mockResolvedValue([]);
mockPrismaService.systemContainer.findMany.mockResolvedValue([
{
id: "system-container-1",
gatewayToken: "enc:system-token-1",
},
]);
const result = await service.validateContainerToken("system-token-1");
expect(result).toEqual({
type: "system",
id: "system-container-1",
});
});
it("validateContainerToken returns null for invalid token", async () => {
mockPrismaService.userContainer.findMany.mockResolvedValue([
{
id: "user-container-1",
gatewayToken: "enc:user-token-1",
},
]);
mockPrismaService.systemContainer.findMany.mockResolvedValue([
{
id: "system-container-1",
gatewayToken: "enc:system-token-1",
},
]);
const result = await service.validateContainerToken("no-match");
expect(result).toBeNull();
});
});

View File

@@ -0,0 +1,288 @@
import { Injectable, NotFoundException } from "@nestjs/common";
import type { LlmProvider } from "@prisma/client";
import { timingSafeEqual } from "node:crypto";
import { PrismaService } from "../prisma/prisma.service";
import { CryptoService } from "../crypto/crypto.service";
const DEFAULT_GATEWAY_PORT = 18789;
const DEFAULT_PRIMARY_MODEL = "openai/gpt-4o-mini";
type ContainerType = "user" | "system";
export interface ContainerTokenValidation {
type: ContainerType;
id: string;
}
type OpenClawModelMap = Record<string, Record<string, never>>;
interface OpenClawProviderConfig {
apiKey?: string;
baseUrl?: string;
models: OpenClawModelMap;
}
interface OpenClawConfig {
gateway: {
mode: "local";
port: number;
bind: "lan";
auth: { mode: "token" };
http: {
endpoints: {
chatCompletions: { enabled: true };
};
};
};
agents: {
defaults: {
model: {
primary: string;
};
};
};
models: {
providers: Record<string, OpenClawProviderConfig>;
};
}
@Injectable()
export class AgentConfigService {
constructor(
private readonly prisma: PrismaService,
private readonly crypto: CryptoService
) {}
// Generate complete openclaw.json for a user container
async generateUserConfig(userId: string): Promise<OpenClawConfig> {
const [userAgentConfig, providers, userContainer] = await Promise.all([
this.prisma.userAgentConfig.findUnique({
where: { userId },
}),
this.prisma.llmProvider.findMany({
where: {
userId,
isActive: true,
},
orderBy: {
createdAt: "asc",
},
}),
this.prisma.userContainer.findUnique({
where: { userId },
}),
]);
if (!userContainer) {
throw new NotFoundException(`User container not found for user ${userId}`);
}
const primaryModel =
userAgentConfig?.primaryModel ??
this.resolvePrimaryModelFromProviders(providers) ??
DEFAULT_PRIMARY_MODEL;
return this.buildOpenClawConfig(primaryModel, userContainer.gatewayPort, providers);
}
// Generate config for a system container
async generateSystemConfig(containerId: string): Promise<OpenClawConfig> {
const systemContainer = await this.prisma.systemContainer.findUnique({
where: { id: containerId },
});
if (!systemContainer) {
throw new NotFoundException(`System container ${containerId} not found`);
}
return this.buildOpenClawConfig(
systemContainer.primaryModel || DEFAULT_PRIMARY_MODEL,
systemContainer.gatewayPort,
[]
);
}
async generateConfigForContainer(
type: ContainerType,
containerId: string
): Promise<OpenClawConfig> {
if (type === "system") {
return this.generateSystemConfig(containerId);
}
const userContainer = await this.prisma.userContainer.findUnique({
where: { id: containerId },
select: { userId: true },
});
if (!userContainer) {
throw new NotFoundException(`User container ${containerId} not found`);
}
return this.generateUserConfig(userContainer.userId);
}
// Validate a container's bearer token
async validateContainerToken(token: string): Promise<ContainerTokenValidation | null> {
if (!token) {
return null;
}
const [userContainers, systemContainers] = await Promise.all([
this.prisma.userContainer.findMany({
select: {
id: true,
gatewayToken: true,
},
}),
this.prisma.systemContainer.findMany({
select: {
id: true,
gatewayToken: true,
},
}),
]);
for (const container of userContainers) {
const storedToken = this.decryptContainerToken(container.gatewayToken);
if (storedToken && this.tokensEqual(storedToken, token)) {
return { type: "user", id: container.id };
}
}
for (const container of systemContainers) {
const storedToken = this.decryptContainerToken(container.gatewayToken);
if (storedToken && this.tokensEqual(storedToken, token)) {
return { type: "system", id: container.id };
}
}
return null;
}
private buildOpenClawConfig(
primaryModel: string,
gatewayPort: number | null,
providers: LlmProvider[]
): OpenClawConfig {
return {
gateway: {
mode: "local",
port: gatewayPort ?? DEFAULT_GATEWAY_PORT,
bind: "lan",
auth: { mode: "token" },
http: {
endpoints: {
chatCompletions: { enabled: true },
},
},
},
agents: {
defaults: {
model: {
primary: primaryModel,
},
},
},
models: {
providers: this.buildProviderConfig(providers),
},
};
}
private buildProviderConfig(providers: LlmProvider[]): Record<string, OpenClawProviderConfig> {
const providerConfig: Record<string, OpenClawProviderConfig> = {};
for (const provider of providers) {
const config: OpenClawProviderConfig = {
models: this.extractModels(provider.models),
};
const apiKey = this.decryptIfNeeded(provider.apiKey);
if (apiKey) {
config.apiKey = apiKey;
}
if (provider.baseUrl) {
config.baseUrl = provider.baseUrl;
}
providerConfig[provider.name] = config;
}
return providerConfig;
}
private extractModels(models: unknown): OpenClawModelMap {
const modelMap: OpenClawModelMap = {};
if (!Array.isArray(models)) {
return modelMap;
}
for (const modelEntry of models) {
if (typeof modelEntry === "string") {
modelMap[modelEntry] = {};
continue;
}
if (this.hasModelId(modelEntry)) {
modelMap[modelEntry.id] = {};
}
}
return modelMap;
}
private resolvePrimaryModelFromProviders(providers: LlmProvider[]): string | null {
for (const provider of providers) {
const modelIds = Object.keys(this.extractModels(provider.models));
const firstModelId = modelIds[0];
if (firstModelId) {
return `${provider.name}/${firstModelId}`;
}
}
return null;
}
private decryptIfNeeded(value: string | null | undefined): string | undefined {
if (!value) {
return undefined;
}
if (this.crypto.isEncrypted(value)) {
return this.crypto.decrypt(value);
}
return value;
}
private decryptContainerToken(value: string): string | null {
try {
return this.decryptIfNeeded(value) ?? null;
} catch {
return null;
}
}
private tokensEqual(left: string, right: string): boolean {
const leftBuffer = Buffer.from(left, "utf8");
const rightBuffer = Buffer.from(right, "utf8");
if (leftBuffer.length !== rightBuffer.length) {
return false;
}
return timingSafeEqual(leftBuffer, rightBuffer);
}
private hasModelId(modelEntry: unknown): modelEntry is { id: string } {
if (typeof modelEntry !== "object" || modelEntry === null || !("id" in modelEntry)) {
return false;
}
return typeof (modelEntry as { id?: unknown }).id === "string";
}
}

View File

@@ -39,6 +39,7 @@ import { JobStepsModule } from "./job-steps/job-steps.module";
import { CoordinatorIntegrationModule } from "./coordinator-integration/coordinator-integration.module";
import { FederationModule } from "./federation/federation.module";
import { CredentialsModule } from "./credentials/credentials.module";
import { CryptoModule } from "./crypto/crypto.module";
import { MosaicTelemetryModule } from "./mosaic-telemetry";
import { SpeechModule } from "./speech/speech.module";
import { DashboardModule } from "./dashboard/dashboard.module";
@@ -50,6 +51,8 @@ import { TeamsModule } from "./teams/teams.module";
import { ImportModule } from "./import/import.module";
import { ConversationArchiveModule } from "./conversation-archive/conversation-archive.module";
import { RlsContextInterceptor } from "./common/interceptors/rls-context.interceptor";
import { AgentConfigModule } from "./agent-config/agent-config.module";
import { ContainerLifecycleModule } from "./container-lifecycle/container-lifecycle.module";
@Module({
imports: [
@@ -111,6 +114,7 @@ import { RlsContextInterceptor } from "./common/interceptors/rls-context.interce
CoordinatorIntegrationModule,
FederationModule,
CredentialsModule,
CryptoModule,
MosaicTelemetryModule,
SpeechModule,
DashboardModule,
@@ -121,6 +125,8 @@ import { RlsContextInterceptor } from "./common/interceptors/rls-context.interce
TeamsModule,
ImportModule,
ConversationArchiveModule,
AgentConfigModule,
ContainerLifecycleModule,
],
controllers: [AppController, CsrfController],
providers: [

View File

@@ -0,0 +1,11 @@
import { Module } from "@nestjs/common";
import { PrismaModule } from "../prisma/prisma.module";
import { CryptoModule } from "../crypto/crypto.module";
import { ContainerLifecycleService } from "./container-lifecycle.service";
@Module({
imports: [PrismaModule, CryptoModule],
providers: [ContainerLifecycleService],
exports: [ContainerLifecycleService],
})
export class ContainerLifecycleModule {}

View File

@@ -0,0 +1,593 @@
import { beforeEach, describe, expect, it, vi } from "vitest";
import type { ConfigService } from "@nestjs/config";
import type { PrismaService } from "../prisma/prisma.service";
import type { CryptoService } from "../crypto/crypto.service";
interface MockUserContainerRecord {
id: string;
userId: string;
containerId: string | null;
containerName: string;
gatewayPort: number | null;
gatewayToken: string;
status: string;
lastActiveAt: Date | null;
idleTimeoutMin: number;
config: Record<string, unknown>;
createdAt: Date;
updatedAt: Date;
}
const dockerMock = vi.hoisted(() => {
interface MockDockerContainerState {
id: string;
name: string;
running: boolean;
port: number;
}
const containers = new Map<string, MockDockerContainerState>();
const handles = new Map<
string,
{
inspect: ReturnType<typeof vi.fn>;
start: ReturnType<typeof vi.fn>;
stop: ReturnType<typeof vi.fn>;
}
>();
const ensureHandle = (id: string) => {
const existing = handles.get(id);
if (existing) {
return existing;
}
const handle = {
inspect: vi.fn(async () => {
const container = containers.get(id);
if (!container) {
throw { statusCode: 404 };
}
return {
Id: container.id,
State: {
Running: container.running,
},
NetworkSettings: {
Ports: {
"18789/tcp": [{ HostPort: String(container.port) }],
},
},
};
}),
start: vi.fn(async () => {
const container = containers.get(id);
if (!container) {
throw { statusCode: 404 };
}
container.running = true;
}),
stop: vi.fn(async () => {
const container = containers.get(id);
if (!container) {
throw { statusCode: 404 };
}
container.running = false;
}),
};
handles.set(id, handle);
return handle;
};
const listContainers = vi.fn(
async (options?: { all?: boolean; filters?: { name?: string[] } }) => {
const nameFilter = options?.filters?.name?.[0];
return [...containers.values()]
.filter((container) => (nameFilter ? container.name.includes(nameFilter) : true))
.map((container) => ({
Id: container.id,
Names: [`/${container.name}`],
}));
}
);
const getContainer = vi.fn((id: string) => ensureHandle(id));
const createContainer = vi.fn(
async (options: {
name?: string;
HostConfig?: { PortBindings?: Record<string, Array<{ HostPort?: string }>> };
}) => {
const id = `ctr-${containers.size + 1}`;
const name = options.name ?? id;
const hostPort = options.HostConfig?.PortBindings?.["18789/tcp"]?.[0]?.HostPort;
const port = hostPort ? Number.parseInt(hostPort, 10) : 0;
containers.set(id, {
id,
name,
running: false,
port,
});
return ensureHandle(id);
}
);
const dockerInstance = {
listContainers,
getContainer,
createContainer,
};
const constructorSpy = vi.fn();
class DockerConstructorMock {
constructor(options?: unknown) {
constructorSpy(options);
return dockerInstance;
}
}
const registerContainer = (container: MockDockerContainerState) => {
containers.set(container.id, { ...container });
ensureHandle(container.id);
};
const reset = () => {
containers.clear();
handles.clear();
constructorSpy.mockClear();
listContainers.mockClear();
getContainer.mockClear();
createContainer.mockClear();
};
return {
DockerConstructorMock,
constructorSpy,
createContainer,
handles,
registerContainer,
reset,
};
});
vi.mock("dockerode", () => ({
default: dockerMock.DockerConstructorMock,
}));
import { ContainerLifecycleService } from "./container-lifecycle.service";
function createConfigMock(values: Record<string, string> = {}) {
return {
get: vi.fn((key: string) => values[key]),
};
}
function createCryptoMock() {
return {
generateToken: vi.fn(() => "generated-token"),
encrypt: vi.fn((value: string) => `enc:${value}`),
decrypt: vi.fn((value: string) => value.replace(/^enc:/, "")),
isEncrypted: vi.fn((value: string) => value.startsWith("enc:")),
};
}
function projectRecord(
record: MockUserContainerRecord,
select?: Record<string, boolean>
): Partial<MockUserContainerRecord> {
if (!select) {
return { ...record };
}
const projection: Partial<MockUserContainerRecord> = {};
for (const [field, enabled] of Object.entries(select)) {
if (enabled) {
const key = field as keyof MockUserContainerRecord;
projection[key] = record[key];
}
}
return projection;
}
function createPrismaMock(initialRecords: MockUserContainerRecord[] = []) {
const records = new Map<string, MockUserContainerRecord>();
for (const record of initialRecords) {
records.set(record.userId, { ...record });
}
const userContainer = {
findUnique: vi.fn(
async (args: {
where: { userId?: string; id?: string };
select?: Record<string, boolean>;
}) => {
let record: MockUserContainerRecord | undefined;
if (args.where.userId) {
record = records.get(args.where.userId);
} else if (args.where.id) {
record = [...records.values()].find((entry) => entry.id === args.where.id);
}
if (!record) {
return null;
}
return projectRecord(record, args.select);
}
),
create: vi.fn(
async (args: {
data: Partial<MockUserContainerRecord> & {
userId: string;
containerName: string;
gatewayToken: string;
};
}) => {
const now = new Date();
const next: MockUserContainerRecord = {
id: args.data.id ?? `uc-${records.size + 1}`,
userId: args.data.userId,
containerId: args.data.containerId ?? null,
containerName: args.data.containerName,
gatewayPort: args.data.gatewayPort ?? null,
gatewayToken: args.data.gatewayToken,
status: args.data.status ?? "stopped",
lastActiveAt: args.data.lastActiveAt ?? null,
idleTimeoutMin: args.data.idleTimeoutMin ?? 30,
config: args.data.config ?? {},
createdAt: now,
updatedAt: now,
};
records.set(next.userId, next);
return { ...next };
}
),
update: vi.fn(
async (args: { where: { userId: string }; data: Partial<MockUserContainerRecord> }) => {
const record = records.get(args.where.userId);
if (!record) {
throw new Error(`Record ${args.where.userId} not found`);
}
const updated: MockUserContainerRecord = {
...record,
...args.data,
updatedAt: new Date(),
};
records.set(updated.userId, updated);
return { ...updated };
}
),
updateMany: vi.fn(
async (args: { where: { userId: string }; data: Partial<MockUserContainerRecord> }) => {
const record = records.get(args.where.userId);
if (!record) {
return { count: 0 };
}
const updated: MockUserContainerRecord = {
...record,
...args.data,
updatedAt: new Date(),
};
records.set(updated.userId, updated);
return { count: 1 };
}
),
findMany: vi.fn(
async (args?: {
where?: {
status?: string;
lastActiveAt?: { not: null };
gatewayPort?: { not: null };
};
select?: Record<string, boolean>;
}) => {
let rows = [...records.values()];
if (args?.where?.status) {
rows = rows.filter((record) => record.status === args.where?.status);
}
if (args?.where?.lastActiveAt?.not === null) {
rows = rows.filter((record) => record.lastActiveAt !== null);
}
if (args?.where?.gatewayPort?.not === null) {
rows = rows.filter((record) => record.gatewayPort !== null);
}
return rows.map((record) => projectRecord(record, args?.select));
}
),
};
return {
prisma: {
userContainer,
},
records,
};
}
function createRecord(overrides: Partial<MockUserContainerRecord>): MockUserContainerRecord {
const now = new Date();
return {
id: overrides.id ?? "uc-default",
userId: overrides.userId ?? "user-default",
containerId: overrides.containerId ?? null,
containerName: overrides.containerName ?? "mosaic-user-user-default",
gatewayPort: overrides.gatewayPort ?? null,
gatewayToken: overrides.gatewayToken ?? "enc:token-default",
status: overrides.status ?? "stopped",
lastActiveAt: overrides.lastActiveAt ?? null,
idleTimeoutMin: overrides.idleTimeoutMin ?? 30,
config: overrides.config ?? {},
createdAt: overrides.createdAt ?? now,
updatedAt: overrides.updatedAt ?? now,
};
}
describe("ContainerLifecycleService", () => {
beforeEach(() => {
dockerMock.reset();
});
it("ensureRunning creates container when none exists", async () => {
const { prisma, records } = createPrismaMock();
const crypto = createCryptoMock();
const config = createConfigMock();
const service = new ContainerLifecycleService(
prisma as unknown as PrismaService,
crypto as unknown as CryptoService,
config as unknown as ConfigService
);
const result = await service.ensureRunning("user-1");
expect(result).toEqual({
url: "http://mosaic-user-user-1:19000",
token: "generated-token",
});
const updatedRecord = records.get("user-1");
expect(updatedRecord?.status).toBe("running");
expect(updatedRecord?.containerId).toBe("ctr-1");
expect(updatedRecord?.gatewayPort).toBe(19000);
expect(updatedRecord?.gatewayToken).toBe("enc:generated-token");
expect(dockerMock.createContainer).toHaveBeenCalledTimes(1);
const [createCall] = dockerMock.createContainer.mock.calls[0] as [
{
name: string;
Image: string;
Env: string[];
HostConfig: { Binds: string[]; NetworkMode: string };
},
];
expect(createCall.name).toBe("mosaic-user-user-1");
expect(createCall.Image).toBe("alpine/openclaw:latest");
expect(createCall.HostConfig.Binds).toEqual(["mosaic-user-user-1-state:/home/node/.openclaw"]);
expect(createCall.HostConfig.NetworkMode).toBe("mosaic-internal");
expect(createCall.Env).toContain("AGENT_TOKEN=generated-token");
});
it("ensureRunning starts existing stopped container", async () => {
const { prisma, records } = createPrismaMock([
createRecord({
id: "uc-1",
userId: "user-2",
containerId: "ctr-stopped",
containerName: "mosaic-user-user-2",
gatewayToken: "enc:existing-token",
status: "stopped",
}),
]);
const crypto = createCryptoMock();
const config = createConfigMock();
const service = new ContainerLifecycleService(
prisma as unknown as PrismaService,
crypto as unknown as CryptoService,
config as unknown as ConfigService
);
dockerMock.registerContainer({
id: "ctr-stopped",
name: "mosaic-user-user-2",
running: false,
port: 19042,
});
const result = await service.ensureRunning("user-2");
expect(result).toEqual({
url: "http://mosaic-user-user-2:19042",
token: "existing-token",
});
const handle = dockerMock.handles.get("ctr-stopped");
expect(handle?.start).toHaveBeenCalledTimes(1);
expect(records.get("user-2")?.status).toBe("running");
expect(records.get("user-2")?.gatewayPort).toBe(19042);
});
it("ensureRunning returns existing running container", async () => {
const { prisma } = createPrismaMock([
createRecord({
id: "uc-2",
userId: "user-3",
containerId: "ctr-running",
containerName: "mosaic-user-user-3",
gatewayPort: 19043,
gatewayToken: "enc:running-token",
status: "running",
}),
]);
const crypto = createCryptoMock();
const config = createConfigMock();
const service = new ContainerLifecycleService(
prisma as unknown as PrismaService,
crypto as unknown as CryptoService,
config as unknown as ConfigService
);
dockerMock.registerContainer({
id: "ctr-running",
name: "mosaic-user-user-3",
running: true,
port: 19043,
});
const result = await service.ensureRunning("user-3");
expect(result).toEqual({
url: "http://mosaic-user-user-3:19043",
token: "running-token",
});
expect(dockerMock.createContainer).not.toHaveBeenCalled();
const handle = dockerMock.handles.get("ctr-running");
expect(handle?.start).not.toHaveBeenCalled();
});
it("stop gracefully stops container and updates DB", async () => {
const { prisma, records } = createPrismaMock([
createRecord({
id: "uc-stop",
userId: "user-stop",
containerId: "ctr-stop",
containerName: "mosaic-user-user-stop",
gatewayPort: 19044,
status: "running",
}),
]);
const crypto = createCryptoMock();
const config = createConfigMock();
const service = new ContainerLifecycleService(
prisma as unknown as PrismaService,
crypto as unknown as CryptoService,
config as unknown as ConfigService
);
dockerMock.registerContainer({
id: "ctr-stop",
name: "mosaic-user-user-stop",
running: true,
port: 19044,
});
await service.stop("user-stop");
const handle = dockerMock.handles.get("ctr-stop");
expect(handle?.stop).toHaveBeenCalledWith({ t: 10 });
const updatedRecord = records.get("user-stop");
expect(updatedRecord?.status).toBe("stopped");
expect(updatedRecord?.containerId).toBeNull();
expect(updatedRecord?.gatewayPort).toBeNull();
});
it("reapIdle stops only containers past their idle timeout", async () => {
const now = Date.now();
const { prisma, records } = createPrismaMock([
createRecord({
id: "uc-old",
userId: "user-old",
containerId: "ctr-old",
containerName: "mosaic-user-user-old",
gatewayPort: 19045,
status: "running",
lastActiveAt: new Date(now - 60 * 60 * 1000),
idleTimeoutMin: 30,
}),
createRecord({
id: "uc-fresh",
userId: "user-fresh",
containerId: "ctr-fresh",
containerName: "mosaic-user-user-fresh",
gatewayPort: 19046,
status: "running",
lastActiveAt: new Date(now - 5 * 60 * 1000),
idleTimeoutMin: 30,
}),
]);
const crypto = createCryptoMock();
const config = createConfigMock();
const service = new ContainerLifecycleService(
prisma as unknown as PrismaService,
crypto as unknown as CryptoService,
config as unknown as ConfigService
);
dockerMock.registerContainer({
id: "ctr-old",
name: "mosaic-user-user-old",
running: true,
port: 19045,
});
dockerMock.registerContainer({
id: "ctr-fresh",
name: "mosaic-user-user-fresh",
running: true,
port: 19046,
});
const result = await service.reapIdle();
expect(result).toEqual({
stopped: ["user-old"],
});
expect(records.get("user-old")?.status).toBe("stopped");
expect(records.get("user-fresh")?.status).toBe("running");
const oldHandle = dockerMock.handles.get("ctr-old");
const freshHandle = dockerMock.handles.get("ctr-fresh");
expect(oldHandle?.stop).toHaveBeenCalledTimes(1);
expect(freshHandle?.stop).not.toHaveBeenCalled();
});
it("touch updates lastActiveAt", async () => {
const { prisma, records } = createPrismaMock([
createRecord({
id: "uc-touch",
userId: "user-touch",
containerName: "mosaic-user-user-touch",
lastActiveAt: null,
}),
]);
const crypto = createCryptoMock();
const config = createConfigMock();
const service = new ContainerLifecycleService(
prisma as unknown as PrismaService,
crypto as unknown as CryptoService,
config as unknown as ConfigService
);
await service.touch("user-touch");
const updatedRecord = records.get("user-touch");
expect(updatedRecord?.lastActiveAt).toBeInstanceOf(Date);
});
it("getStatus returns null for unknown user", async () => {
const { prisma } = createPrismaMock();
const crypto = createCryptoMock();
const config = createConfigMock();
const service = new ContainerLifecycleService(
prisma as unknown as PrismaService,
crypto as unknown as CryptoService,
config as unknown as ConfigService
);
const status = await service.getStatus("missing-user");
expect(status).toBeNull();
});
});

View File

@@ -0,0 +1,532 @@
import { Injectable, Logger } from "@nestjs/common";
import { ConfigService } from "@nestjs/config";
import Docker from "dockerode";
import { PrismaService } from "../prisma/prisma.service";
import { CryptoService } from "../crypto/crypto.service";
const DEFAULT_DOCKER_SOCKET_PATH = "/var/run/docker.sock";
const DEFAULT_DOCKER_TCP_PORT = 2375;
const DEFAULT_OPENCLAW_IMAGE = "alpine/openclaw:latest";
const DEFAULT_OPENCLAW_NETWORK = "mosaic-internal";
const DEFAULT_OPENCLAW_PORT_RANGE_START = 19000;
const DEFAULT_MOSAIC_API_URL = "http://mosaic-api:3000/api";
const OPENCLAW_GATEWAY_PORT_KEY = "18789/tcp";
const OPENCLAW_STATE_PATH = "/home/node/.openclaw";
const CONTAINER_STOP_TIMEOUT_SECONDS = 10;
interface ContainerHandle {
inspect(): Promise<DockerInspect>;
start(): Promise<void>;
stop(options?: { t?: number }): Promise<void>;
}
interface DockerInspect {
Id?: string;
State?: {
Running?: boolean;
Health?: {
Status?: string;
};
};
NetworkSettings?: {
Ports?: Record<string, { HostPort?: string }[] | null>;
};
HostConfig?: {
PortBindings?: Record<string, { HostPort?: string }[] | null>;
};
}
interface UserContainerRecord {
id: string;
userId: string;
containerId: string | null;
containerName: string;
gatewayPort: number | null;
gatewayToken: string;
status: string;
lastActiveAt: Date | null;
idleTimeoutMin: number;
}
interface ContainerLookup {
containerId: string | null;
containerName: string;
}
@Injectable()
export class ContainerLifecycleService {
private readonly logger = new Logger(ContainerLifecycleService.name);
private readonly docker: Docker;
constructor(
private readonly prisma: PrismaService,
private readonly crypto: CryptoService,
private readonly config: ConfigService
) {
const dockerHost = this.config.get<string>("DOCKER_HOST");
this.docker = this.createDockerClient(dockerHost);
}
// Ensure a user's container is running. Creates if needed, starts if stopped.
// Returns the container's internal URL and gateway token.
async ensureRunning(userId: string): Promise<{ url: string; token: string }> {
const containerRecord = await this.getOrCreateContainerRecord(userId);
const token = this.getGatewayToken(containerRecord.gatewayToken);
const existingContainer = await this.resolveContainer(containerRecord);
let container: ContainerHandle;
if (existingContainer) {
container = existingContainer;
const inspect = await container.inspect();
if (!inspect.State?.Running) {
await container.start();
}
} else {
const port = await this.findAvailableGatewayPort();
container = await this.createContainer(containerRecord, token, port);
await container.start();
}
const inspect = await container.inspect();
const containerId = inspect.Id;
if (!containerId) {
throw new Error(
`Docker inspect did not return container ID for ${containerRecord.containerName}`
);
}
const gatewayPort = this.extractGatewayPort(inspect);
if (!gatewayPort) {
throw new Error(`Could not determine gateway port for ${containerRecord.containerName}`);
}
const now = new Date();
await this.prisma.userContainer.update({
where: { userId },
data: {
containerId,
gatewayPort,
status: "running",
lastActiveAt: now,
},
});
return {
url: `http://${containerRecord.containerName}:${String(gatewayPort)}`,
token,
};
}
// Stop a user's container
async stop(userId: string): Promise<void> {
const containerRecord = await this.prisma.userContainer.findUnique({
where: { userId },
});
if (!containerRecord) {
return;
}
const container = await this.resolveContainer(containerRecord);
if (container) {
try {
await container.stop({ t: CONTAINER_STOP_TIMEOUT_SECONDS });
} catch (error) {
if (!this.isDockerNotFound(error) && !this.isAlreadyStopped(error)) {
throw error;
}
}
}
await this.prisma.userContainer.update({
where: { userId },
data: {
status: "stopped",
containerId: null,
gatewayPort: null,
},
});
}
// Stop idle containers (called by cron/scheduler)
async reapIdle(): Promise<{ stopped: string[] }> {
const now = Date.now();
const runningContainers = await this.prisma.userContainer.findMany({
where: {
status: "running",
lastActiveAt: { not: null },
},
select: {
userId: true,
lastActiveAt: true,
idleTimeoutMin: true,
},
});
const stopped: string[] = [];
for (const container of runningContainers) {
const lastActiveAt = container.lastActiveAt;
if (!lastActiveAt) {
continue;
}
const idleLimitMs = container.idleTimeoutMin * 60 * 1000;
if (now - lastActiveAt.getTime() < idleLimitMs) {
continue;
}
try {
await this.stop(container.userId);
stopped.push(container.userId);
} catch (error) {
this.logger.warn(
`Failed to stop idle container for user ${container.userId}: ${this.getErrorMessage(error)}`
);
}
}
return { stopped };
}
// Health check all running containers
async healthCheckAll(): Promise<{ userId: string; healthy: boolean; error?: string }[]> {
const runningContainers = await this.prisma.userContainer.findMany({
where: {
status: "running",
},
select: {
userId: true,
containerId: true,
containerName: true,
},
});
const results: { userId: string; healthy: boolean; error?: string }[] = [];
for (const containerRecord of runningContainers) {
const container = await this.resolveContainer(containerRecord);
if (!container) {
results.push({
userId: containerRecord.userId,
healthy: false,
error: "Container not found",
});
continue;
}
try {
const inspect = await container.inspect();
const isRunning = inspect.State?.Running === true;
const healthState = inspect.State?.Health?.Status;
const healthy = isRunning && healthState !== "unhealthy";
if (healthy) {
results.push({
userId: containerRecord.userId,
healthy: true,
});
continue;
}
results.push({
userId: containerRecord.userId,
healthy: false,
error:
healthState === "unhealthy" ? "Container healthcheck failed" : "Container not running",
});
} catch (error) {
results.push({
userId: containerRecord.userId,
healthy: false,
error: this.getErrorMessage(error),
});
}
}
return results;
}
// Restart a container with fresh config (for config updates)
async restart(userId: string): Promise<void> {
await this.stop(userId);
await this.ensureRunning(userId);
}
// Update lastActiveAt timestamp (called on each chat request)
async touch(userId: string): Promise<void> {
await this.prisma.userContainer.updateMany({
where: { userId },
data: {
lastActiveAt: new Date(),
},
});
}
// Get container status for a user
async getStatus(
userId: string
): Promise<{ status: string; port?: number; lastActive?: Date } | null> {
const container = await this.prisma.userContainer.findUnique({
where: { userId },
select: {
status: true,
gatewayPort: true,
lastActiveAt: true,
},
});
if (!container) {
return null;
}
const status: { status: string; port?: number; lastActive?: Date } = {
status: container.status,
};
if (container.gatewayPort !== null) {
status.port = container.gatewayPort;
}
if (container.lastActiveAt !== null) {
status.lastActive = container.lastActiveAt;
}
return status;
}
private createDockerClient(dockerHost?: string): Docker {
if (!dockerHost || dockerHost.trim().length === 0) {
return new Docker({ socketPath: DEFAULT_DOCKER_SOCKET_PATH });
}
if (dockerHost.startsWith("unix://")) {
return new Docker({ socketPath: dockerHost.slice("unix://".length) });
}
if (dockerHost.startsWith("tcp://")) {
const parsed = new URL(dockerHost.replace("tcp://", "http://"));
return new Docker({
host: parsed.hostname,
port: this.parseInteger(parsed.port, DEFAULT_DOCKER_TCP_PORT),
protocol: "http",
});
}
if (dockerHost.startsWith("http://") || dockerHost.startsWith("https://")) {
const parsed = new URL(dockerHost);
const protocol = parsed.protocol.replace(":", "");
return new Docker({
host: parsed.hostname,
port: this.parseInteger(parsed.port, DEFAULT_DOCKER_TCP_PORT),
protocol: protocol === "https" ? "https" : "http",
});
}
return new Docker({ socketPath: dockerHost });
}
private async getOrCreateContainerRecord(userId: string): Promise<UserContainerRecord> {
const existingContainer = await this.prisma.userContainer.findUnique({
where: { userId },
});
if (existingContainer) {
return existingContainer;
}
const token = this.crypto.generateToken();
const containerName = this.getContainerName(userId);
return this.prisma.userContainer.create({
data: {
userId,
containerName,
gatewayToken: this.crypto.encrypt(token),
status: "stopped",
},
});
}
private getContainerName(userId: string): string {
return `mosaic-user-${userId}`;
}
private getVolumeName(userId: string): string {
return `mosaic-user-${userId}-state`;
}
private getOpenClawImage(): string {
return this.config.get<string>("OPENCLAW_IMAGE") ?? DEFAULT_OPENCLAW_IMAGE;
}
private getOpenClawNetwork(): string {
return this.config.get<string>("OPENCLAW_NETWORK") ?? DEFAULT_OPENCLAW_NETWORK;
}
private getMosaicApiUrl(): string {
return this.config.get<string>("MOSAIC_API_URL") ?? DEFAULT_MOSAIC_API_URL;
}
private getPortRangeStart(): number {
return this.parseInteger(
this.config.get<string>("OPENCLAW_PORT_RANGE_START"),
DEFAULT_OPENCLAW_PORT_RANGE_START
);
}
private async resolveContainer(record: ContainerLookup): Promise<ContainerHandle | null> {
if (record.containerId) {
const byId = this.docker.getContainer(record.containerId) as unknown as ContainerHandle;
if (await this.containerExists(byId)) {
return byId;
}
}
const byName = await this.findContainerByName(record.containerName);
if (byName) {
return byName;
}
return null;
}
private async findContainerByName(containerName: string): Promise<ContainerHandle | null> {
const containers = await this.docker.listContainers({
all: true,
filters: {
name: [containerName],
},
});
const match = containers.find((container) => {
const names = container.Names;
return names.some((name) => name === `/${containerName}` || name.includes(containerName));
});
if (!match?.Id) {
return null;
}
return this.docker.getContainer(match.Id) as unknown as ContainerHandle;
}
private async containerExists(container: ContainerHandle): Promise<boolean> {
try {
await container.inspect();
return true;
} catch (error) {
if (this.isDockerNotFound(error)) {
return false;
}
throw error;
}
}
private async createContainer(
containerRecord: UserContainerRecord,
token: string,
gatewayPort: number
): Promise<ContainerHandle> {
const container = await this.docker.createContainer({
name: containerRecord.containerName,
Image: this.getOpenClawImage(),
Env: [
`MOSAIC_API_URL=${this.getMosaicApiUrl()}`,
`AGENT_TOKEN=${token}`,
`AGENT_ID=${containerRecord.id}`,
],
ExposedPorts: {
[OPENCLAW_GATEWAY_PORT_KEY]: {},
},
HostConfig: {
Binds: [`${this.getVolumeName(containerRecord.userId)}:${OPENCLAW_STATE_PATH}`],
PortBindings: {
[OPENCLAW_GATEWAY_PORT_KEY]: [{ HostPort: String(gatewayPort) }],
},
NetworkMode: this.getOpenClawNetwork(),
},
});
return container as unknown as ContainerHandle;
}
private extractGatewayPort(inspect: DockerInspect): number | null {
const networkPort = inspect.NetworkSettings?.Ports?.[OPENCLAW_GATEWAY_PORT_KEY]?.[0]?.HostPort;
if (networkPort) {
return this.parseInteger(networkPort, 0) || null;
}
const hostPort = inspect.HostConfig?.PortBindings?.[OPENCLAW_GATEWAY_PORT_KEY]?.[0]?.HostPort;
if (hostPort) {
return this.parseInteger(hostPort, 0) || null;
}
return null;
}
private async findAvailableGatewayPort(): Promise<number> {
const usedPorts = await this.prisma.userContainer.findMany({
where: {
gatewayPort: { not: null },
},
select: {
gatewayPort: true,
},
});
const takenPorts = new Set<number>();
for (const entry of usedPorts) {
if (entry.gatewayPort !== null) {
takenPorts.add(entry.gatewayPort);
}
}
let candidate = this.getPortRangeStart();
while (takenPorts.has(candidate)) {
candidate += 1;
}
return candidate;
}
private getGatewayToken(storedToken: string): string {
if (this.crypto.isEncrypted(storedToken)) {
return this.crypto.decrypt(storedToken);
}
return storedToken;
}
private parseInteger(value: string | undefined, fallback: number): number {
if (!value) {
return fallback;
}
const parsed = Number.parseInt(value, 10);
return Number.isFinite(parsed) ? parsed : fallback;
}
private isDockerNotFound(error: unknown): boolean {
return this.getDockerStatusCode(error) === 404;
}
private isAlreadyStopped(error: unknown): boolean {
return this.getDockerStatusCode(error) === 304;
}
private getDockerStatusCode(error: unknown): number | null {
if (typeof error !== "object" || error === null || !("statusCode" in error)) {
return null;
}
const statusCode = error.statusCode;
return typeof statusCode === "number" ? statusCode : null;
}
private getErrorMessage(error: unknown): string {
if (error instanceof Error) {
return error.message;
}
return "Unknown error";
}
}

View File

@@ -0,0 +1,10 @@
import { Module } from "@nestjs/common";
import { ConfigModule } from "@nestjs/config";
import { CryptoService } from "./crypto.service";
@Module({
imports: [ConfigModule],
providers: [CryptoService],
exports: [CryptoService],
})
export class CryptoModule {}

View File

@@ -0,0 +1,71 @@
import { describe, it, expect, beforeEach } from "vitest";
import { ConfigService } from "@nestjs/config";
import { CryptoService } from "./crypto.service";
function createConfigService(secret?: string): ConfigService {
return {
get: (key: string) => {
if (key === "MOSAIC_SECRET_KEY") {
return secret;
}
return undefined;
},
} as unknown as ConfigService;
}
describe("CryptoService", () => {
let service: CryptoService;
beforeEach(() => {
service = new CryptoService(createConfigService("this-is-a-test-secret-key-with-32+chars"));
});
it("encrypt -> decrypt roundtrip", () => {
const plaintext = "my-secret-api-key";
const encrypted = service.encrypt(plaintext);
const decrypted = service.decrypt(encrypted);
expect(encrypted.startsWith("enc:")).toBe(true);
expect(decrypted).toBe(plaintext);
});
it("decrypt rejects tampered ciphertext", () => {
const encrypted = service.encrypt("sensitive-token");
const payload = encrypted.slice(4);
const bytes = Buffer.from(payload, "base64");
bytes[bytes.length - 1] = bytes[bytes.length - 1]! ^ 0xff;
const tampered = `enc:${bytes.toString("base64")}`;
expect(() => service.decrypt(tampered)).toThrow();
});
it("decrypt rejects non-encrypted string", () => {
expect(() => service.decrypt("plain-text-value")).toThrow();
});
it("isEncrypted detects prefix correctly", () => {
expect(service.isEncrypted("enc:abc")).toBe(true);
expect(service.isEncrypted("ENC:abc")).toBe(false);
expect(service.isEncrypted("plain-text")).toBe(false);
});
it("generateToken returns 64-char hex string", () => {
const token = service.generateToken();
expect(token).toMatch(/^[0-9a-f]{64}$/);
});
it("different plaintexts produce different ciphertexts (random IV)", () => {
const encryptedA = service.encrypt("value-a");
const encryptedB = service.encrypt("value-b");
expect(encryptedA).not.toBe(encryptedB);
});
it("missing MOSAIC_SECRET_KEY throws on construction", () => {
expect(() => new CryptoService(createConfigService(undefined))).toThrow();
});
});

View File

@@ -0,0 +1,82 @@
import { Injectable } from "@nestjs/common";
import { ConfigService } from "@nestjs/config";
import { createCipheriv, createDecipheriv, hkdfSync, randomBytes } from "crypto";
const ALGORITHM = "aes-256-gcm";
const ENCRYPTED_PREFIX = "enc:";
const IV_LENGTH = 12;
const AUTH_TAG_LENGTH = 16;
const DERIVED_KEY_LENGTH = 32;
const HKDF_SALT = "mosaic.crypto.v1";
const HKDF_INFO = "mosaic-db-secret-encryption";
@Injectable()
export class CryptoService {
private readonly key: Buffer;
constructor(private readonly config: ConfigService) {
const secret = this.config.get<string>("MOSAIC_SECRET_KEY");
if (!secret) {
throw new Error("MOSAIC_SECRET_KEY environment variable is required");
}
if (secret.length < 32) {
throw new Error("MOSAIC_SECRET_KEY must be at least 32 characters");
}
this.key = Buffer.from(
hkdfSync(
"sha256",
Buffer.from(secret, "utf8"),
Buffer.from(HKDF_SALT, "utf8"),
Buffer.from(HKDF_INFO, "utf8"),
DERIVED_KEY_LENGTH
)
);
}
encrypt(plaintext: string): string {
const iv = randomBytes(IV_LENGTH);
const cipher = createCipheriv(ALGORITHM, this.key, iv);
const ciphertext = Buffer.concat([cipher.update(plaintext, "utf8"), cipher.final()]);
const authTag = cipher.getAuthTag();
const payload = Buffer.concat([iv, ciphertext, authTag]).toString("base64");
return `${ENCRYPTED_PREFIX}${payload}`;
}
decrypt(encrypted: string): string {
if (!this.isEncrypted(encrypted)) {
throw new Error("Value is not encrypted");
}
const payloadBase64 = encrypted.slice(ENCRYPTED_PREFIX.length);
try {
const payload = Buffer.from(payloadBase64, "base64");
if (payload.length < IV_LENGTH + AUTH_TAG_LENGTH) {
throw new Error("Encrypted payload is too short");
}
const iv = payload.subarray(0, IV_LENGTH);
const authTag = payload.subarray(payload.length - AUTH_TAG_LENGTH);
const ciphertext = payload.subarray(IV_LENGTH, payload.length - AUTH_TAG_LENGTH);
const decipher = createDecipheriv(ALGORITHM, this.key, iv);
decipher.setAuthTag(authTag);
return Buffer.concat([decipher.update(ciphertext), decipher.final()]).toString("utf8");
} catch {
throw new Error("Failed to decrypt value");
}
}
isEncrypted(value: string): boolean {
return value.startsWith(ENCRYPTED_PREFIX);
}
generateToken(): string {
return randomBytes(32).toString("hex");
}
}

114
docs/PRD-MS22.md Normal file
View File

@@ -0,0 +1,114 @@
# PRD: MS22 — Fleet Evolution (DB-Centric Agent Architecture)
## Metadata
- Owner: Jason Woltje
- Date: 2026-03-01
- Status: in-progress
- Design Doc: `docs/design/MS22-DB-CENTRIC-ARCHITECTURE.md`
## Problem Statement
Mosaic Stack needs a multi-user agent fleet where each user gets their own isolated OpenClaw instance with their own LLM provider credentials and agent config. The system must be Docker-first with minimal environment variables and all configuration managed through the WebUI.
## Objectives
1. **Minimal bootstrap** — 2 env vars (`DATABASE_URL`, `MOSAIC_SECRET_KEY`) to start the entire stack
2. **DB-centric config** — All runtime config in Postgres, managed via WebUI
3. **Per-user isolation** — Each user gets their own OpenClaw container with own API keys, memory, sessions
4. **Onboarding wizard** — First-boot experience: breakglass admin → OIDC → LLM provider → agent config
5. **Settings UI** — Runtime management of providers, agents, and auth config
6. **Mosaic as gatekeeper** — Users never talk to OpenClaw directly; Mosaic proxies all requests
7. **Zero cross-user access** — Full container, volume, and DB isolation between users
## Security Requirements
- User A cannot access User B's API keys, chat history, or agent memory
- All API keys stored encrypted (AES-256-GCM) in database
- Breakglass admin always works as OIDC fallback
- OIDC config stored in DB (not env vars) — configured via settings UI
- Container-to-container communication blocked by default
- Admin cannot decrypt other users' API keys
## Phase 0: Knowledge Layer — COMPLETE
- Findings API (pgvector, CRUD, similarity search)
- AgentMemory API (key/value store)
- ConversationArchive API (pgvector, ingest, search)
- OpenClaw mosaic skill
- Session log ingestion pipeline
## Phase 1: DB-Centric Agent Fleet
### Phase 1a: DB Schema — COMPLETE
- SystemConfig, BreakglassUser, LlmProvider, UserContainer, SystemContainer, UserAgentConfig tables
### Phase 1b: Encryption Service — COMPLETE
- CryptoService (AES-256-GCM using MOSAIC_SECRET_KEY)
### Phase 1c: Internal Config API
- `GET /api/internal/agent-config/:id` — assembles openclaw.json from DB
- Auth: bearer token (container's own gateway token)
- Returns complete openclaw.json with decrypted provider credentials
### Phase 1d: Container Lifecycle Manager
- Docker API integration via `dockerode` npm package
- Start/stop/health-check/reap user containers
- Auto-generate gateway tokens, assign ports
- Docker socket access required (`/var/run/docker.sock`)
### Phase 1e: Onboarding API
- First-boot detection (`SystemConfig.onboarding.completed`)
- `POST /api/onboarding/breakglass` — create admin user
- `POST /api/onboarding/oidc` — save OIDC provider config
- `POST /api/onboarding/provider` — add LLM provider + test connection
- `POST /api/onboarding/complete` — mark done
### Phase 1f: Onboarding Wizard UI
- Multi-step wizard component
- Skip-able OIDC step
- LLM provider connection test
### Phase 1g: Settings API
- CRUD: LLM providers (per-user scoped)
- CRUD: Agent config (model assignments, personalities)
- CRUD: OIDC config (admin only)
- Breakglass password reset (admin only)
### Phase 1h: Settings UI
- Settings/Providers page
- Settings/Agent Config page
- Settings/Auth page (OIDC + breakglass)
### Phase 1i: Chat Proxy
- Route WebUI chat to user's OpenClaw container
- SSE streaming pass-through
- Ensure container is running before proxying (auto-start)
### Phase 1j: Docker Compose + Entrypoint
- Simplified compose (core services only — user containers are dynamic)
- Entrypoint: fetch config from API, write openclaw.json, start gateway
- Health check integration
### Phase 1k: Idle Reaper
- Cron job to stop inactive user containers
- Configurable idle timeout (default 30min)
- Preserve state volumes
## Future Phases (out of scope)
- Phase 2: Agent fleet standup (predefined agent roles)
- Phase 3: WebUI chat + task management integration
- Phase 4: Multi-LLM provider management UI (advanced)
- Team workspaces (shared agent contexts) — explicitly out of scope

View File

@@ -25,12 +25,12 @@
| MS21-MIG-003 | not-started | phase-3 | Run migration on production database | #568 | api | — | MS21-MIG-001,MS21-TEST-003 | MS21-VER-001 | — | — | — | 5K | — | Needs deploy coordination; not automatable |
| MS21-MIG-004 | done | phase-3 | Import API endpoints (6/6 tests) | #568 | api | feat/ms21-import-api | MS21-DB-001 | — | codex | 2026-02-28 | 2026-02-28 | 20K | 24K | PR #567 merged, CI green. Review: 0 blockers, 4 should-fix, 1 medium sec (no audit log). |
| MS21-UI-001 | done | phase-4 | Settings/users page | #569 | web | feat/ms21-ui-users | MS21-API-001,MS21-API-002 | — | codex | 2026-02-28 | 2026-02-28 | 20K | ~30K | PR #573 merged. Review: 0 blockers, 4 should-fix → MS21-UI-001-QA |
| MS21-UI-001-QA | in-progress | phase-4 | QA: fix 4 review findings (pagination, error state, self-deactivate guard, tests) | #569 | web | fix/ms21-ui-001-qa | MS21-UI-001 | — | — | — | — | 15K | — | 0 blockers; merged per framework. Should-fix: pagination cap, error/empty collision, self-deactivate guard, no tests. |
| MS21-UI-001-QA | done | phase-4 | QA: fix 4 review findings (pagination, error state, self-deactivate guard, tests) | #569 | web | fix/ms21-ui-001-qa | MS21-UI-001 | — | — | — | — | 15K | — | 0 blockers; merged per framework. Should-fix: pagination cap, error/empty collision, self-deactivate guard, no tests. |
| MS21-UI-002 | done | phase-4 | User detail/edit and invite dialogs | #569 | web | feat/ms21-ui-users | MS21-UI-001 | — | — | — | — | 15K | — | |
| MS21-UI-003 | done | phase-4 | Settings/workspaces page (wire to real API) | #569 | web | feat/ms21-ui-workspaces | MS21-API-003 | — | codex | 2026-02-28 | 2026-02-28 | 15K | ~25K | PR #574 merged. Review: 0 critical, 1 low (raw errors in UI) |
| MS21-UI-004 | done | phase-4 | Workspace member management UI | #569 | web | feat/ms21-ui-workspaces | MS21-UI-003,MS21-API-003 | — | — | — | — | 15K | — | Components exist |
| MS21-UI-005 | done | phase-4 | Settings/teams page | #569 | web | feat/ms21-ui-teams | MS21-API-004 | — | — | — | — | 15K | — | |
| MS21-TEST-004 | in-progress | phase-4 | Frontend component tests | #569 | web | test/ms21-ui | MS21-UI-001,MS21-UI-002,MS21-UI-003,MS21-UI-004,MS21-UI-005 | — | — | — | — | 20K | — | |
| MS21-TEST-004 | done | phase-4 | Frontend component tests | #569 | web | test/ms21-ui | MS21-UI-001,MS21-UI-002,MS21-UI-003,MS21-UI-004,MS21-UI-005 | — | — | — | — | 20K | — | |
| MS21-RBAC-001 | done | phase-5 | Sidebar navigation role gating | #570 | web | feat/ms21-rbac | MS21-UI-001 | — | — | — | — | 10K | — | |
| MS21-RBAC-002 | done | phase-5 | Settings page access restriction | #570 | web | feat/ms21-rbac | MS21-RBAC-001 | — | — | — | — | 8K | — | |
| MS21-RBAC-003 | done | phase-5 | Action button permission gating | #570 | web | feat/ms21-rbac | MS21-RBAC-001 | — | — | — | — | 8K | — | |
@@ -71,3 +71,21 @@ Remaining estimate: ~143K tokens (Codex budget).
| MS22-SKILL-001 | done | p0-knowledge | OpenClaw mosaic skill (agents read/write findings/memory) | TASKS:P0 | stack | feat/ms22-openclaw-skill | MS22-API-001,MS22-API-002 | MS22-VER-P0 | — | — | — | 15K | — | Skill in ~/.agents/skills/mosaic/ |
| MS22-INGEST-001 | done | p0-knowledge | Session log ingestion pipeline (OpenClaw logs → ConvArchive) | TASKS:P0 | stack | feat/ms22-ingest | MS22-API-004 | MS22-VER-P0 | — | — | — | 20K | — | Script to batch-ingest existing logs |
| MS22-VER-P0 | done | p0-knowledge | Phase 0 verification: all modules deployed + smoke tested | TASKS:P0 | stack | — | MS22-TEST-001,MS22-SKILL-001,MS22-INGEST-001,MS22-API-003 | — | — | — | — | 5K | — | |
## MS22 Phase 1: DB-Centric Agent Fleet (reworked)
Design doc: `docs/design/MS22-DB-CENTRIC-ARCHITECTURE.md`
| Task ID | Status | Phase | Description | Issue | Scope | Branch | Depends On | Blocks | Assigned Worker | Started | Completed | Est Tokens | Act Tokens | Notes |
| -------- | ----------- | -------- | --------------------------------------------------------------------------------------------------------------------- | ----- | ------- | ---------------------------- | ---------- | --------------- | --------------- | ------- | --------- | ---------- | ---------- | ----- |
| MS22-P1a | done | phase-1a | Prisma schema: SystemConfig, BreakglassUser, LlmProvider, UserContainer, SystemContainer, UserAgentConfig + migration | — | api | feat/ms22-p1a-schema | — | P1b,P1c,P1d,P1e | — | — | — | 20K | — | |
| MS22-P1b | done | phase-1b | Encryption service (AES-256-GCM) for API keys and tokens | — | api | feat/ms22-p1b-crypto | — | P1c,P1e,P1g | — | — | — | 15K | — | |
| MS22-P1c | not-started | phase-1c | Internal config endpoint: assemble openclaw.json from DB | — | api | feat/ms22-p1c-config-api | P1a,P1b | P1i,P1j | — | — | — | 20K | — | |
| MS22-P1d | not-started | phase-1d | ContainerLifecycleService: Docker API (dockerode) start/stop/health/reap | — | api | feat/ms22-p1d-container-mgr | P1a | P1i,P1k | — | — | — | 25K | — | |
| MS22-P1e | not-started | phase-1e | Onboarding API: breakglass, OIDC, provider, agents, complete | — | api | feat/ms22-p1e-onboarding-api | P1a,P1b | P1f | — | — | — | 20K | — | |
| MS22-P1f | not-started | phase-1f | Onboarding wizard WebUI (multi-step form) | — | web | feat/ms22-p1f-onboarding-ui | P1e | — | — | — | — | 25K | — | |
| MS22-P1g | not-started | phase-1g | Settings API: CRUD providers, agent config, OIDC, breakglass | — | api | feat/ms22-p1g-settings-api | P1a,P1b | P1h | — | — | — | 20K | — | |
| MS22-P1h | not-started | phase-1h | Settings UI: Providers, Agent Config, Auth pages | — | web | feat/ms22-p1h-settings-ui | P1g | — | — | — | — | 25K | — | |
| MS22-P1i | not-started | phase-1i | Chat proxy: route WebUI chat to user's OpenClaw container (SSE) | — | api+web | feat/ms22-p1i-chat-proxy | P1c,P1d | — | — | — | — | 20K | — | |
| MS22-P1j | not-started | phase-1j | Docker entrypoint + health checks + core compose | — | docker | feat/ms22-p1j-docker | P1c | — | — | — | — | 10K | — | |
| MS22-P1k | not-started | phase-1k | Idle reaper cron: stop inactive user containers | — | api | feat/ms22-p1k-idle-reaper | P1d | — | — | — | — | 10K | — | |

View File

@@ -0,0 +1,413 @@
# MS22 Phase 1: DB-Centric Multi-User Agent Architecture
## Design Principles
1. **2 env vars to bootstrap**`DATABASE_URL` + `MOSAIC_SECRET_KEY`
2. **DB-centric config** — All runtime config in Postgres, managed via WebUI
3. **Mosaic is the gatekeeper** — Users authenticate to Mosaic, never to OpenClaw directly
4. **Per-user agent isolation** — Each user gets their own OpenClaw container(s) with their own credentials
5. **Onboarding-first** — Breakglass user + wizard on first boot
6. **Generic product** — No hardcoded names, models, providers, or endpoints
## Architecture Overview
```
┌─────────────────────────────────────────────────────┐
│ MOSAIC WEBUI │
│ (Auth: breakglass local + OIDC via settings) │
└──────────────────────┬──────────────────────────────┘
┌─────────────────────────────────────────────────────┐
│ MOSAIC API │
│ │
│ ┌──────────────┐ ┌────────────────┐ ┌─────────┐ │
│ │ Onboarding │ │ Container │ │ Config │ │
│ │ Wizard │ │ Lifecycle Mgr │ │ Store │ │
│ └──────────────┘ └───────┬────────┘ └─────────┘ │
│ │ │
└────────────────────────────┼────────────────────────┘
│ Docker API
┌──────────────────┼──────────────────┐
│ │ │
▼ ▼ ▼
┌─────────────┐ ┌─────────────┐ ┌─────────────┐
│ OpenClaw │ │ OpenClaw │ │ OpenClaw │
│ User A │ │ User B │ │ System │
│ │ │ │ │ (admin) │
│ Claude Max │ │ Z.ai key │ │ Shared key │
│ own memory │ │ own memory │ │ monitoring │
└─────────────┘ └─────────────┘ └─────────────┘
Scale to zero Scale to zero Always on
after idle after idle
```
## Container Lifecycle
### User containers (on-demand)
1. User logs in → Mosaic checks `UserContainer` table
2. No running container → Mosaic calls Docker API to create one
3. Injects user's encrypted API keys via config endpoint
4. Routes chat requests to user's container
5. Idle timeout (configurable, default 30min) → scale to zero
6. State volume persists (sessions, memory, auth tokens)
7. Next request → container restarts, picks up state from volume
### System containers (always-on, optional)
- Admin-provisioned for system tasks (monitoring, scheduled jobs)
- Use admin-configured shared API keys
- Not tied to any user
## Auth Layers
| Flow | Method |
| ------------------------------- | ---------------------------------------------------------------------- |
| User → Mosaic WebUI | Breakglass (local) or OIDC (configured in settings) |
| Mosaic API → OpenClaw container | Bearer token (auto-generated per container, stored encrypted in DB) |
| OpenClaw → LLM providers | User's own API keys (delivered via config endpoint, decrypted from DB) |
| Admin → System settings | RBAC (admin role required) |
| Internal config endpoint | Bearer token (container authenticates to fetch its config) |
## Database Schema
### System Tables
```prisma
model SystemConfig {
id String @id @default(cuid())
key String @unique // "oidc.issuerUrl", "oidc.clientId", "onboarding.completed"
value String // plaintext or encrypted (prefix: "enc:")
encrypted Boolean @default(false)
updatedAt DateTime @updatedAt
}
model BreakglassUser {
id String @id @default(cuid())
username String @unique
passwordHash String // bcrypt
isActive Boolean @default(true)
createdAt DateTime @default(now())
updatedAt DateTime @updatedAt
}
```
### Provider Tables (per-user)
```prisma
model LlmProvider {
id String @id @default(cuid())
userId String // owner — each user manages their own providers
name String // "my-zai", "work-openai", "local-ollama"
displayName String // "Z.ai", "OpenAI (Work)", "Local Ollama"
type String // "zai" | "openai" | "anthropic" | "ollama" | "custom"
baseUrl String? // null for built-in, URL for custom/ollama
apiKey String? // encrypted
apiType String @default("openai-completions")
models Json @default("[]") // [{id, name, contextWindow, maxTokens}]
isActive Boolean @default(true)
createdAt DateTime @default(now())
updatedAt DateTime @updatedAt
@@unique([userId, name])
}
```
### Container Tables
```prisma
model UserContainer {
id String @id @default(cuid())
userId String @unique
containerId String? // Docker container ID (null = not running)
containerName String // "mosaic-user-{userId}"
gatewayPort Int? // assigned port (null = not running)
gatewayToken String // encrypted — auto-generated
status String @default("stopped") // "running" | "stopped" | "starting" | "error"
lastActiveAt DateTime?
idleTimeoutMin Int @default(30)
config Json @default("{}") // cached openclaw.json for this user
createdAt DateTime @default(now())
updatedAt DateTime @updatedAt
}
model SystemContainer {
id String @id @default(cuid())
name String @unique // "mosaic-system-ops", "mosaic-system-monitor"
role String // "operations" | "monitor" | "scheduler"
containerId String?
gatewayPort Int?
gatewayToken String // encrypted
status String @default("stopped")
providerId String? // references admin-level LlmProvider
primaryModel String // "zai/glm-5", etc.
isActive Boolean @default(true)
createdAt DateTime @default(now())
updatedAt DateTime @updatedAt
}
```
### User Agent Preferences
```prisma
model UserAgentConfig {
id String @id @default(cuid())
userId String @unique
primaryModel String? // user's preferred model
fallbackModels Json @default("[]")
personality String? // custom SOUL.md content
providerId String? // default provider for this user
createdAt DateTime @default(now())
updatedAt DateTime @updatedAt
}
```
## Internal Config Endpoint
`GET /api/internal/agent-config/:containerType/:id`
- Auth: Bearer token (container's own gateway token)
- Returns: Complete `openclaw.json` generated from DB
- For user containers: includes user's providers, model prefs, personality
- For system containers: includes admin provider config
Response assembles openclaw.json dynamically:
```json
{
"gateway": { "mode": "local", "port": 18789, "bind": "lan", "auth": { "mode": "token" } ... },
"agents": { "defaults": { "model": { "primary": "<from UserAgentConfig>" } } },
"models": { "providers": { "<from LlmProvider rows>": { ... } } }
}
```
## Container Lifecycle Manager
NestJS service that manages Docker containers:
```typescript
class ContainerLifecycleService {
// Create and start a user's OpenClaw container
async ensureRunning(userId: string): Promise<{ url: string; token: string }>;
// Stop idle containers (called by cron/scheduler)
async reapIdle(): Promise<number>;
// Stop a specific user's container
async stop(userId: string): Promise<void>;
// Health check all running containers
async healthCheckAll(): Promise<HealthStatus[]>;
// Restart container with updated config
async restart(userId: string): Promise<void>;
}
```
Uses Docker Engine API (`/var/run/docker.sock` or TCP) via `dockerode` npm package.
## Onboarding Wizard
### First-Boot Detection
- API checks: `SystemConfig.get("onboarding.completed")` → null = first boot
- WebUI redirects to `/onboarding` if not completed
### Steps
**Step 1: Create Breakglass Admin**
- Username + password → bcrypt → `BreakglassUser` table
- This user always works, even if OIDC is misconfigured
**Step 2: Configure Authentication (optional)**
- OIDC: provider URL, client ID, client secret → encrypted in `SystemConfig`
- Skip = breakglass-only auth (can add OIDC later in settings)
**Step 3: Add Your First LLM Provider**
- Pick type → enter API key/endpoint → test connection → save to `LlmProvider`
- This becomes the admin's default provider
**Step 4: System Agents (optional)**
- Configure always-on system agents for monitoring/ops
- Or skip — users can just use their own personal agents
**Step 5: Complete**
- Sets `SystemConfig("onboarding.completed") = true`
- Redirects to dashboard
### Post-Onboarding: User Self-Service
- Each user adds their own LLM providers in profile settings
- Each user configures their preferred model, personality
- First chat request triggers container creation
## Docker Compose (final)
```yaml
services:
mosaic-api:
image: mosaic/api:latest
environment:
DATABASE_URL: ${DATABASE_URL}
MOSAIC_SECRET_KEY: ${MOSAIC_SECRET_KEY}
volumes:
- /var/run/docker.sock:/var/run/docker.sock # Docker API access
networks:
- internal
mosaic-web:
image: mosaic/web:latest
environment:
NEXT_PUBLIC_API_URL: http://mosaic-api:4000
networks:
- internal
postgres:
image: postgres:17
environment:
POSTGRES_DB: mosaic
POSTGRES_USER: mosaic
POSTGRES_PASSWORD: ${DATABASE_PASSWORD}
volumes:
- postgres-data:/var/lib/postgresql/data
networks:
- internal
# System agent (optional, admin-provisioned)
# mosaic-system:
# image: alpine/openclaw:latest
# ... (managed by ContainerLifecycleService)
# User containers are NOT in this file —
# they are dynamically created by ContainerLifecycleService
# via the Docker API at runtime.
networks:
internal:
driver: overlay
volumes:
postgres-data:
```
Note: User OpenClaw containers are **not** defined in docker-compose. They are
created dynamically by the `ContainerLifecycleService` when users start chatting.
## Entrypoint (for dynamically created containers)
```sh
#!/bin/sh
set -e
: "${MOSAIC_API_URL:?required}"
: "${AGENT_TOKEN:?required}"
: "${AGENT_ID:?required}"
# Fetch config from Mosaic API
curl -sf "${MOSAIC_API_URL}/api/internal/agent-config/${AGENT_ID}" \
-H "Authorization: Bearer ${AGENT_TOKEN}" \
-o /tmp/openclaw.json
export OPENCLAW_CONFIG_PATH=/tmp/openclaw.json
exec openclaw gateway run --bind lan --auth token
```
Container env vars (injected by ContainerLifecycleService):
- `MOSAIC_API_URL` — internal API URL
- `AGENT_TOKEN` — this container's bearer token (from DB)
- `AGENT_ID` — container ID for config lookup
## Config Update Strategy
When a user changes settings (model, provider, personality):
1. Mosaic API updates DB
2. API calls `ContainerLifecycleService.restart(userId)`
3. Container restarts, fetches fresh config from API
4. OpenClaw gateway starts with new config
5. State volume preserves sessions/memory across restarts
## Task Breakdown
| Task | Phase | Scope | Dependencies |
| -------- | -------------- | --------------------------------------------------------------------------------------------------------------------- | ------------ |
| MS22-P1a | Schema | Prisma models: SystemConfig, BreakglassUser, LlmProvider, UserContainer, SystemContainer, UserAgentConfig. Migration. | — |
| MS22-P1b | Crypto | Encryption service for API keys/tokens (AES-256-GCM using MOSAIC_SECRET_KEY) | P1a |
| MS22-P1c | Config API | Internal config endpoint: assembles openclaw.json from DB | P1a, P1b |
| MS22-P1d | Container Mgr | ContainerLifecycleService: Docker API integration (dockerode), start/stop/health/reap | P1a |
| MS22-P1e | Onboarding API | Onboarding endpoints: breakglass, OIDC, provider, complete | P1a, P1b |
| MS22-P1f | Onboarding UI | Multi-step wizard in WebUI | P1e |
| MS22-P1g | Settings API | CRUD: providers, agent config, OIDC, breakglass | P1a, P1b |
| MS22-P1h | Settings UI | Settings pages: Providers, Agent Config, Auth | P1g |
| MS22-P1i | Chat Proxy | Route WebUI chat → user's OpenClaw container (SSE) | P1c, P1d |
| MS22-P1j | Docker | Entrypoint script, health checks, compose for core services | P1c |
| MS22-P1k | Idle Reaper | Cron service to stop idle user containers | P1d |
## Open Questions (Resolved)
1. ~~Config updates → restart?~~ **Yes.** Mosaic restarts the container, fresh config on boot.
2. ~~CLI alternative for breakglass?~~ **Yes.** Both WebUI wizard and CLI (`mosaic admin create-breakglass`).
3. ~~Config cache TTL?~~ **Yes.** Config fetched once at startup, changes trigger restart.
## Security Isolation Model
### Core Principle: ZERO cross-user access
Every user is fully sandboxed. No exceptions.
### Container Isolation
- Each user gets their **own** OpenClaw container (separate process, PID namespace)
- Each container has its **own** Docker volume (sessions, memory, workspace)
- Containers run on an **internal-only** Docker network — no external exposure
- Users NEVER talk to OpenClaw directly — Mosaic proxies all requests
- Container gateway tokens are unique per-user and single-purpose
### Data Isolation (enforced at API + DB level)
| Data | Isolation | Enforcement |
| ---------------- | ------------------------- | --------------------------------------------------------------------------------- |
| LLM API keys | Per-user, encrypted | `LlmProvider.userId` — all queries scoped by authenticated user |
| Chat history | Per-user container volume | Separate Docker volume per user, not shared |
| Agent memory | Per-user container volume | Separate Docker volume per user |
| Agent config | Per-user | `UserAgentConfig.userId` — scoped queries |
| Container access | Per-user | `UserContainer.userId` — Mosaic validates user owns the container before proxying |
### API Enforcement
- **All user-facing endpoints** include `WHERE userId = authenticatedUser.id`
- **No admin endpoint** exposes another user's API keys (even to admins)
- **Chat proxy** validates: authenticated user → owns target container → forwards request
- **Config endpoint** validates: container token matches the container requesting config
- **Provider CRUD** is fully user-scoped — User A cannot list, read, or modify User B's providers
### What admins CAN see
- Container status (running/stopped) — not contents
- User list and roles
- System-level config (OIDC, system agents)
- Aggregate usage metrics (not individual conversations)
### What admins CANNOT see
- Other users' API keys (encrypted, no decrypt endpoint)
- Other users' chat history (in container volumes, not in Mosaic DB)
- Other users' agent memory/workspace contents
### Future: Team Workspaces (NOT in scope)
Team/shared workspaces are a potential future feature where users opt-in to
shared agent contexts. This requires explicit consent, shared-key management,
and a different isolation model. **Not designed here. Not built now.**
### Attack Surface Notes
- Docker socket access (`/var/run/docker.sock`) is required by Mosaic API for container management. This is a privileged operation — the Mosaic API container must be trusted.
- `MOSAIC_SECRET_KEY` is the root of trust for encryption. Rotation requires re-encrypting all secrets in DB.
- Container-to-container communication is blocked by default (no shared network between user containers unless explicitly configured).

7
pnpm-lock.yaml generated
View File

@@ -171,6 +171,9 @@ importers:
discord.js:
specifier: ^14.25.1
version: 14.25.1
dockerode:
specifier: ^4.0.9
version: 4.0.9
gray-matter:
specifier: ^4.0.3
version: 4.0.3
@@ -253,6 +256,9 @@ importers:
'@types/cookie-parser':
specifier: ^1.4.10
version: 1.4.10(@types/express@5.0.6)
'@types/dockerode':
specifier: ^3.3.47
version: 3.3.47
'@types/express':
specifier: ^5.0.1
version: 5.0.6
@@ -1604,7 +1610,6 @@ packages:
'@mosaicstack/telemetry-client@0.1.1':
resolution: {integrity: sha512-1udg6p4cs8rhQgQ2pKCfi7EpRlJieRRhA5CIqthRQ6HQZLgQ0wH+632jEulov3rlHSM1iplIQ+AAe5DWrvSkEA==, tarball: https://git.mosaicstack.dev/api/packages/mosaic/npm/%40mosaicstack%2Ftelemetry-client/-/0.1.1/telemetry-client-0.1.1.tgz}
engines: {node: '>=18'}
'@mrleebo/prisma-ast@0.13.1':
resolution: {integrity: sha512-XyroGQXcHrZdvmrGJvsA9KNeOOgGMg1Vg9OlheUsBOSKznLMDl+YChxbkboRHvtFYJEMRYmlV3uoo/njCw05iw==}