Compare commits
1 Commits
infra/fix-
...
fix/dashbo
| Author | SHA1 | Date | |
|---|---|---|---|
| a8930317ce |
@@ -337,44 +337,3 @@ steps:
|
||||
- security-trivy-api
|
||||
- security-trivy-orchestrator
|
||||
- security-trivy-web
|
||||
|
||||
# ─── Deploy to Docker Swarm (main only) ─────────────────────
|
||||
|
||||
# ─── Deploy to Docker Swarm via Portainer (main only) ─────────────────────
|
||||
|
||||
deploy-swarm:
|
||||
image: alpine:3
|
||||
environment:
|
||||
SSH_PRIVATE_KEY:
|
||||
from_secret: ssh_private_key
|
||||
SSH_KNOWN_HOSTS:
|
||||
from_secret: ssh_known_hosts
|
||||
PORTAINER_URL:
|
||||
from_secret: portainer_url
|
||||
PORTAINER_API_KEY:
|
||||
from_secret: portainer_api_key
|
||||
commands:
|
||||
- apk add --no-cache curl openssh-client
|
||||
- |
|
||||
set -e
|
||||
echo "🚀 Deploying to Docker Swarm..."
|
||||
|
||||
# Setup SSH for fallback
|
||||
mkdir -p ~/.ssh
|
||||
echo "$SSH_KNOWN_HOSTS" > ~/.ssh/known_hosts
|
||||
chmod 600 ~/.ssh/known_hosts
|
||||
echo "$SSH_PRIVATE_KEY" > ~/.ssh/id_ed25519
|
||||
chmod 600 ~/.ssh/id_ed25519
|
||||
|
||||
# Force service updates (images are pulled from public registry)
|
||||
ssh -o StrictHostKeyChecking=no localadmin@10.1.1.45 \
|
||||
"docker service update --with-registry-auth --force mosaic-stack-api && \
|
||||
docker service update --with-registry-auth --force mosaic-stack-web && \
|
||||
docker service update --with-registry-auth --force mosaic-stack-orchestrator && \
|
||||
docker service update --with-registry-auth --force mosaic-stack-coordinator && \
|
||||
echo '✅ All services updated'"
|
||||
when:
|
||||
- branch: [main]
|
||||
event: [push, manual, tag]
|
||||
depends_on:
|
||||
- link-packages
|
||||
|
||||
@@ -1,4 +1,4 @@
|
||||
import { Controller, Get, Query, Res, UseGuards } from "@nestjs/common";
|
||||
import { Controller, Get, Res, UseGuards } from "@nestjs/common";
|
||||
import { AgentStatus } from "@prisma/client";
|
||||
import type { Response } from "express";
|
||||
import { AuthGuard } from "../auth/guards/auth.guard";
|
||||
@@ -6,7 +6,6 @@ import { PrismaService } from "../prisma/prisma.service";
|
||||
|
||||
const AGENT_POLL_INTERVAL_MS = 5_000;
|
||||
const SSE_HEARTBEAT_MS = 15_000;
|
||||
const DEFAULT_EVENTS_LIMIT = 25;
|
||||
|
||||
interface OrchestratorAgentDto {
|
||||
id: string;
|
||||
@@ -16,26 +15,6 @@ interface OrchestratorAgentDto {
|
||||
createdAt: Date;
|
||||
}
|
||||
|
||||
interface OrchestratorEventDto {
|
||||
type: string;
|
||||
timestamp: string;
|
||||
agentId?: string;
|
||||
taskId?: string;
|
||||
data?: Record<string, unknown>;
|
||||
}
|
||||
|
||||
interface OrchestratorHealthDto {
|
||||
status: "healthy" | "degraded" | "unhealthy";
|
||||
database: "connected" | "disconnected";
|
||||
agents: {
|
||||
total: number;
|
||||
working: number;
|
||||
idle: number;
|
||||
errored: number;
|
||||
};
|
||||
timestamp: string;
|
||||
}
|
||||
|
||||
@Controller("orchestrator")
|
||||
@UseGuards(AuthGuard)
|
||||
export class OrchestratorController {
|
||||
@@ -46,81 +25,6 @@ export class OrchestratorController {
|
||||
return this.fetchActiveAgents();
|
||||
}
|
||||
|
||||
@Get("events/recent")
|
||||
async getRecentEvents(
|
||||
@Query("limit") limit?: string
|
||||
): Promise<{ events: OrchestratorEventDto[] }> {
|
||||
const eventsLimit = limit ? parseInt(limit, 10) : DEFAULT_EVENTS_LIMIT;
|
||||
const safeLimit = Math.min(Math.max(eventsLimit, 1), 100);
|
||||
|
||||
// Fetch recent agent activity to derive events
|
||||
const agents = await this.prisma.agent.findMany({
|
||||
where: {
|
||||
status: {
|
||||
not: AgentStatus.TERMINATED,
|
||||
},
|
||||
},
|
||||
orderBy: {
|
||||
createdAt: "desc",
|
||||
},
|
||||
take: safeLimit,
|
||||
});
|
||||
|
||||
// Derive events from agent status changes
|
||||
const events: OrchestratorEventDto[] = agents.map((agent) => ({
|
||||
type: `agent:${agent.status.toLowerCase()}`,
|
||||
timestamp: agent.createdAt.toISOString(),
|
||||
agentId: agent.id,
|
||||
data: {
|
||||
name: agent.name,
|
||||
role: agent.role,
|
||||
model: agent.model,
|
||||
},
|
||||
}));
|
||||
|
||||
return { events };
|
||||
}
|
||||
|
||||
@Get("health")
|
||||
async getHealth(): Promise<OrchestratorHealthDto> {
|
||||
let databaseConnected = false;
|
||||
let agents: OrchestratorAgentDto[] = [];
|
||||
|
||||
try {
|
||||
// Check database connectivity
|
||||
await this.prisma.$queryRaw`SELECT 1`;
|
||||
databaseConnected = true;
|
||||
|
||||
// Get agent counts
|
||||
agents = await this.fetchActiveAgents();
|
||||
} catch {
|
||||
databaseConnected = false;
|
||||
}
|
||||
|
||||
const working = agents.filter((a) => a.status === AgentStatus.WORKING).length;
|
||||
const idle = agents.filter((a) => a.status === AgentStatus.IDLE).length;
|
||||
const errored = agents.filter((a) => a.status === AgentStatus.ERROR).length;
|
||||
|
||||
let status: OrchestratorHealthDto["status"] = "healthy";
|
||||
if (!databaseConnected) {
|
||||
status = "unhealthy";
|
||||
} else if (errored > 0) {
|
||||
status = "degraded";
|
||||
}
|
||||
|
||||
return {
|
||||
status,
|
||||
database: databaseConnected ? "connected" : "disconnected",
|
||||
agents: {
|
||||
total: agents.length,
|
||||
working,
|
||||
idle,
|
||||
errored,
|
||||
},
|
||||
timestamp: new Date().toISOString(),
|
||||
};
|
||||
}
|
||||
|
||||
@Get("events")
|
||||
async streamEvents(@Res() res: Response): Promise<void> {
|
||||
res.setHeader("Content-Type", "text/event-stream");
|
||||
|
||||
@@ -601,21 +601,9 @@ class TestCoordinatorIntegration:
|
||||
coordinator = Coordinator(queue_manager=queue_manager, poll_interval=0.02)
|
||||
|
||||
task = asyncio.create_task(coordinator.start())
|
||||
|
||||
# Poll for completion with timeout instead of fixed sleep
|
||||
deadline = asyncio.get_event_loop().time() + 5.0 # 5 second timeout
|
||||
while asyncio.get_event_loop().time() < deadline:
|
||||
all_completed = True
|
||||
for i in range(157, 162):
|
||||
item = queue_manager.get_item(i)
|
||||
if item is None or item.status != QueueItemStatus.COMPLETED:
|
||||
all_completed = False
|
||||
break
|
||||
if all_completed:
|
||||
break
|
||||
await asyncio.sleep(0.05)
|
||||
|
||||
await asyncio.sleep(0.5) # Allow time for processing
|
||||
await coordinator.stop()
|
||||
|
||||
task.cancel()
|
||||
try:
|
||||
await task
|
||||
|
||||
@@ -16,21 +16,6 @@ interface Agent {
|
||||
error?: string;
|
||||
}
|
||||
|
||||
function isWorking(status: string): boolean {
|
||||
const s = status.toLowerCase();
|
||||
return s === "running" || s === "working";
|
||||
}
|
||||
|
||||
function isIdle(status: string): boolean {
|
||||
const s = status.toLowerCase();
|
||||
return s === "idle" || s === "spawning" || s === "waiting" || s === "queued";
|
||||
}
|
||||
|
||||
function isErrored(status: string): boolean {
|
||||
const s = status.toLowerCase();
|
||||
return s === "failed" || s === "error";
|
||||
}
|
||||
|
||||
export function AgentStatusWidget({ id: _id, config: _config }: WidgetProps): React.JSX.Element {
|
||||
const [agents, setAgents] = useState<Agent[]>([]);
|
||||
const [isLoading, setIsLoading] = useState(true);
|
||||
@@ -89,20 +74,25 @@ export function AgentStatusWidget({ id: _id, config: _config }: WidgetProps): Re
|
||||
}, [fetchAgents]);
|
||||
|
||||
const getStatusIcon = (status: string): React.JSX.Element => {
|
||||
if (isWorking(status)) {
|
||||
return <Activity className="w-4 h-4 text-blue-500 animate-pulse" />;
|
||||
const statusLower = status.toLowerCase();
|
||||
switch (statusLower) {
|
||||
case "running":
|
||||
case "working":
|
||||
return <Activity className="w-4 h-4 text-blue-500 animate-pulse" />;
|
||||
case "spawning":
|
||||
case "queued":
|
||||
return <Clock className="w-4 h-4 text-yellow-500" />;
|
||||
case "completed":
|
||||
return <CheckCircle className="w-4 h-4 text-green-500" />;
|
||||
case "failed":
|
||||
case "error":
|
||||
return <AlertCircle className="w-4 h-4 text-red-500" />;
|
||||
case "terminated":
|
||||
case "killed":
|
||||
return <CheckCircle className="w-4 h-4 text-gray-500" />;
|
||||
default:
|
||||
return <Clock className="w-4 h-4 text-gray-400" />;
|
||||
}
|
||||
if (isIdle(status)) {
|
||||
return <Clock className="w-4 h-4 text-yellow-500" />;
|
||||
}
|
||||
if (isErrored(status)) {
|
||||
return <AlertCircle className="w-4 h-4 text-red-500" />;
|
||||
}
|
||||
const s = status.toLowerCase();
|
||||
if (s === "completed" || s === "terminated" || s === "killed") {
|
||||
return <CheckCircle className="w-4 h-4 text-gray-500" />;
|
||||
}
|
||||
return <Clock className="w-4 h-4 text-gray-400" />;
|
||||
};
|
||||
|
||||
const getStatusText = (status: string): string => {
|
||||
@@ -131,9 +121,9 @@ export function AgentStatusWidget({ id: _id, config: _config }: WidgetProps): Re
|
||||
|
||||
const stats = {
|
||||
total: agents.length,
|
||||
working: agents.filter((a) => isWorking(a.status)).length,
|
||||
idle: agents.filter((a) => isIdle(a.status)).length,
|
||||
error: agents.filter((a) => isErrored(a.status)).length,
|
||||
working: agents.filter((a) => a.status.toLowerCase() === "running").length,
|
||||
idle: agents.filter((a) => a.status.toLowerCase() === "spawning").length,
|
||||
error: agents.filter((a) => a.status.toLowerCase() === "failed").length,
|
||||
};
|
||||
|
||||
if (isLoading) {
|
||||
@@ -186,9 +176,9 @@ export function AgentStatusWidget({ id: _id, config: _config }: WidgetProps): Re
|
||||
<div
|
||||
key={agent.agentId}
|
||||
className={`p-3 rounded-lg border ${
|
||||
isErrored(agent.status)
|
||||
agent.status.toLowerCase() === "failed"
|
||||
? "bg-red-50 border-red-200"
|
||||
: isWorking(agent.status)
|
||||
: agent.status.toLowerCase() === "running"
|
||||
? "bg-blue-50 border-blue-200"
|
||||
: "bg-gray-50 border-gray-200"
|
||||
}`}
|
||||
|
||||
@@ -9,8 +9,6 @@
|
||||
# - OpenBao: Standalone container (see docker-compose.openbao.yml)
|
||||
# - Authentik: External OIDC provider
|
||||
# - Ollama: External AI inference
|
||||
# - PostgreSQL: Provided by the openbrain stack (openbrain_brain-db)
|
||||
# Deploy openbrain stack before this stack.
|
||||
#
|
||||
# Usage (Portainer):
|
||||
# 1. Stacks -> Add Stack -> Upload or paste
|
||||
@@ -38,75 +36,37 @@
|
||||
# Required vars use plain ${VAR} — the app validates at startup.
|
||||
#
|
||||
# ==============================================
|
||||
# DATABASE (openbrain_brain-db — external)
|
||||
# ==============================================
|
||||
#
|
||||
# This stack uses the PostgreSQL instance from the openbrain stack.
|
||||
# The openbrain stack must be deployed first and its brain-internal
|
||||
# overlay network must exist.
|
||||
#
|
||||
# Required env vars for DB access:
|
||||
# BRAIN_DB_ADMIN_USER — openbrain superuser (default: openbrain)
|
||||
# BRAIN_DB_ADMIN_PASSWORD — openbrain superuser password
|
||||
# (must match openbrain stack POSTGRES_PASSWORD)
|
||||
# POSTGRES_USER — mosaic application DB user (created by mosaic-db-init)
|
||||
# POSTGRES_PASSWORD — mosaic application DB password
|
||||
# POSTGRES_DB — mosaic application database name (default: mosaic)
|
||||
#
|
||||
# ==============================================
|
||||
|
||||
services:
|
||||
# ============================================
|
||||
# DATABASE INIT
|
||||
# CORE INFRASTRUCTURE
|
||||
# ============================================
|
||||
|
||||
# ======================
|
||||
# Mosaic Database Init
|
||||
# PostgreSQL Database
|
||||
# ======================
|
||||
# Creates the mosaic application user and database in the shared
|
||||
# openbrain PostgreSQL instance (openbrain_brain-db).
|
||||
# Runs once and exits. Idempotent — safe to run on every deploy.
|
||||
mosaic-db-init:
|
||||
image: postgres:17-alpine
|
||||
postgres:
|
||||
image: git.mosaicstack.dev/mosaic/stack-postgres:${IMAGE_TAG:-latest}
|
||||
environment:
|
||||
PGHOST: openbrain_brain-db
|
||||
PGPORT: 5432
|
||||
PGUSER: ${BRAIN_DB_ADMIN_USER:-openbrain}
|
||||
PGPASSWORD: ${BRAIN_DB_ADMIN_PASSWORD}
|
||||
MOSAIC_USER: ${POSTGRES_USER}
|
||||
MOSAIC_PASSWORD: ${POSTGRES_PASSWORD}
|
||||
MOSAIC_DB: ${POSTGRES_DB:-mosaic}
|
||||
entrypoint: ["sh", "-c"]
|
||||
command:
|
||||
- |
|
||||
until pg_isready -h openbrain_brain-db -p 5432 -U $${PGUSER}; do
|
||||
echo "Waiting for openbrain_brain-db..."
|
||||
sleep 2
|
||||
done
|
||||
echo "Database ready. Creating mosaic user and database..."
|
||||
|
||||
psql -h openbrain_brain-db -U $${PGUSER} -tc "SELECT 1 FROM pg_roles WHERE rolname='$${MOSAIC_USER}'" | grep -q 1 || \
|
||||
psql -h openbrain_brain-db -U $${PGUSER} -c "CREATE USER $${MOSAIC_USER} WITH PASSWORD '$${MOSAIC_PASSWORD}';"
|
||||
|
||||
psql -h openbrain_brain-db -U $${PGUSER} -tc "SELECT 1 FROM pg_database WHERE datname='$${MOSAIC_DB}'" | grep -q 1 || \
|
||||
psql -h openbrain_brain-db -U $${PGUSER} -c "CREATE DATABASE $${MOSAIC_DB} OWNER $${MOSAIC_USER} ENCODING 'UTF8' LC_COLLATE='C' LC_CTYPE='C' TEMPLATE template0;"
|
||||
|
||||
echo "Enabling required extensions in $${MOSAIC_DB}..."
|
||||
psql -h openbrain_brain-db -U $${PGUSER} -d $${MOSAIC_DB} -c "CREATE EXTENSION IF NOT EXISTS vector;"
|
||||
psql -h openbrain_brain-db -U $${PGUSER} -d $${MOSAIC_DB} -c "CREATE EXTENSION IF NOT EXISTS \"uuid-ossp\";"
|
||||
|
||||
echo "Mosaic database ready: $${MOSAIC_DB}"
|
||||
POSTGRES_USER: ${POSTGRES_USER}
|
||||
POSTGRES_PASSWORD: ${POSTGRES_PASSWORD}
|
||||
POSTGRES_DB: ${POSTGRES_DB}
|
||||
POSTGRES_SHARED_BUFFERS: ${POSTGRES_SHARED_BUFFERS:-256MB}
|
||||
POSTGRES_EFFECTIVE_CACHE_SIZE: ${POSTGRES_EFFECTIVE_CACHE_SIZE:-1GB}
|
||||
POSTGRES_MAX_CONNECTIONS: ${POSTGRES_MAX_CONNECTIONS:-100}
|
||||
volumes:
|
||||
- postgres_data:/var/lib/postgresql/data
|
||||
healthcheck:
|
||||
test: ["CMD-SHELL", "pg_isready -U ${POSTGRES_USER} -d ${POSTGRES_DB}"]
|
||||
interval: 10s
|
||||
timeout: 5s
|
||||
retries: 5
|
||||
start_period: 30s
|
||||
networks:
|
||||
- openbrain-brain-internal
|
||||
- internal
|
||||
deploy:
|
||||
restart_policy:
|
||||
condition: on-failure
|
||||
delay: 5s
|
||||
max_attempts: 5
|
||||
|
||||
# ============================================
|
||||
# CORE INFRASTRUCTURE
|
||||
# ============================================
|
||||
|
||||
# ======================
|
||||
# Valkey Cache
|
||||
@@ -145,7 +105,7 @@ services:
|
||||
NODE_ENV: production
|
||||
PORT: ${API_PORT:-3001}
|
||||
API_HOST: ${API_HOST:-0.0.0.0}
|
||||
DATABASE_URL: postgresql://${POSTGRES_USER}:${POSTGRES_PASSWORD}@openbrain_brain-db:5432/${POSTGRES_DB:-mosaic}
|
||||
DATABASE_URL: postgresql://${POSTGRES_USER}:${POSTGRES_PASSWORD}@postgres:5432/${POSTGRES_DB}
|
||||
VALKEY_URL: redis://valkey:6379
|
||||
# Auth (external Authentik)
|
||||
OIDC_ENABLED: ${OIDC_ENABLED:-false}
|
||||
@@ -203,7 +163,6 @@ services:
|
||||
networks:
|
||||
- internal
|
||||
- traefik-public
|
||||
- openbrain-brain-internal
|
||||
deploy:
|
||||
restart_policy:
|
||||
condition: on-failure
|
||||
@@ -348,36 +307,36 @@ services:
|
||||
# ======================
|
||||
# Synapse Database Init
|
||||
# ======================
|
||||
# Creates the 'synapse' database in the shared openbrain PostgreSQL instance.
|
||||
# Creates the 'synapse' database in the shared PostgreSQL instance.
|
||||
# Runs once and exits. Idempotent — safe to run on every deploy.
|
||||
synapse-db-init:
|
||||
image: postgres:17-alpine
|
||||
environment:
|
||||
PGHOST: openbrain_brain-db
|
||||
PGHOST: postgres
|
||||
PGPORT: 5432
|
||||
PGUSER: ${BRAIN_DB_ADMIN_USER:-openbrain}
|
||||
PGPASSWORD: ${BRAIN_DB_ADMIN_PASSWORD}
|
||||
PGUSER: ${POSTGRES_USER}
|
||||
PGPASSWORD: ${POSTGRES_PASSWORD}
|
||||
SYNAPSE_DB: ${SYNAPSE_POSTGRES_DB}
|
||||
SYNAPSE_USER: ${SYNAPSE_POSTGRES_USER}
|
||||
SYNAPSE_PASSWORD: ${SYNAPSE_POSTGRES_PASSWORD}
|
||||
entrypoint: ["sh", "-c"]
|
||||
command:
|
||||
- |
|
||||
until pg_isready -h openbrain_brain-db -p 5432 -U $${PGUSER}; do
|
||||
echo "Waiting for openbrain_brain-db..."
|
||||
until pg_isready -h postgres -p 5432 -U $${PGUSER}; do
|
||||
echo "Waiting for PostgreSQL..."
|
||||
sleep 2
|
||||
done
|
||||
echo "Database ready. Creating Synapse user and database..."
|
||||
echo "PostgreSQL is ready. Creating Synapse database and user..."
|
||||
|
||||
psql -h openbrain_brain-db -U $${PGUSER} -tc "SELECT 1 FROM pg_roles WHERE rolname='$${SYNAPSE_USER}'" | grep -q 1 || \
|
||||
psql -h openbrain_brain-db -U $${PGUSER} -c "CREATE USER $${SYNAPSE_USER} WITH PASSWORD '$${SYNAPSE_PASSWORD}';"
|
||||
psql -h postgres -U $${PGUSER} -tc "SELECT 1 FROM pg_roles WHERE rolname='$${SYNAPSE_USER}'" | grep -q 1 || \
|
||||
psql -h postgres -U $${PGUSER} -c "CREATE USER $${SYNAPSE_USER} WITH PASSWORD '$${SYNAPSE_PASSWORD}';"
|
||||
|
||||
psql -h openbrain_brain-db -U $${PGUSER} -tc "SELECT 1 FROM pg_database WHERE datname='$${SYNAPSE_DB}'" | grep -q 1 || \
|
||||
psql -h openbrain_brain-db -U $${PGUSER} -c "CREATE DATABASE $${SYNAPSE_DB} OWNER $${SYNAPSE_USER} ENCODING 'UTF8' LC_COLLATE='C' LC_CTYPE='C' TEMPLATE template0;"
|
||||
psql -h postgres -U $${PGUSER} -tc "SELECT 1 FROM pg_database WHERE datname='$${SYNAPSE_DB}'" | grep -q 1 || \
|
||||
psql -h postgres -U $${PGUSER} -c "CREATE DATABASE $${SYNAPSE_DB} OWNER $${SYNAPSE_USER} ENCODING 'UTF8' LC_COLLATE='C' LC_CTYPE='C' TEMPLATE template0;"
|
||||
|
||||
echo "Synapse database ready: $${SYNAPSE_DB}"
|
||||
networks:
|
||||
- openbrain-brain-internal
|
||||
- internal
|
||||
deploy:
|
||||
restart_policy:
|
||||
condition: on-failure
|
||||
@@ -492,6 +451,7 @@ services:
|
||||
# Volumes
|
||||
# ======================
|
||||
volumes:
|
||||
postgres_data:
|
||||
valkey_data:
|
||||
orchestrator_workspace:
|
||||
speaches_models:
|
||||
@@ -504,6 +464,3 @@ networks:
|
||||
driver: overlay
|
||||
traefik-public:
|
||||
external: true
|
||||
openbrain-brain-internal:
|
||||
external: true
|
||||
name: openbrain_brain-internal
|
||||
|
||||
Reference in New Issue
Block a user