Files
stack/docker-compose.swarm.portainer.yml
Jason Woltje 6fd8e85266
All checks were successful
ci/woodpecker/push/infra Pipeline was successful
ci/woodpecker/push/orchestrator Pipeline was successful
fix(orchestrator): make provider-aware Claude key startup requirements
2026-02-17 17:15:42 -06:00

449 lines
15 KiB
YAML

# ==============================================
# Mosaic Stack — Docker Swarm / Portainer
# ==============================================
#
# The canonical deployment file for Mosaic Stack on Docker Swarm.
# Includes all services except OpenBao (standalone) and external services.
#
# External services (not in this file):
# - OpenBao: Standalone container (see docker-compose.openbao.yml)
# - Authentik: External OIDC provider
# - Ollama: External AI inference
#
# Usage (Portainer):
# 1. Stacks -> Add Stack -> Upload or paste
# 2. Set environment variables (see .env.example for full reference)
# 3. Deploy
#
# Usage (CLI):
# docker stack deploy -c docker-compose.swarm.portainer.yml mosaic
#
# Host paths required for Matrix:
# /opt/mosaic/synapse/homeserver.yaml
# /opt/mosaic/synapse/element-config.json
# /opt/mosaic/synapse/media_store/ (auto-populated)
# /opt/mosaic/synapse/keys/ (auto-populated)
#
# ==============================================
# ENVIRONMENT VARIABLE CONVENTION
# ==============================================
#
# ${VAR} — REQUIRED. Must be set in Portainer env vars.
# ${VAR:-default} — OPTIONAL. Falls back to a sensible default.
# ${VAR:-} — OPTIONAL. Empty string is acceptable.
#
# NOTE: Portainer does not support ${VAR:?msg} syntax.
# Required vars use plain ${VAR} — the app validates at startup.
#
# ==============================================
services:
# ============================================
# CORE INFRASTRUCTURE
# ============================================
# ======================
# PostgreSQL Database
# ======================
postgres:
image: git.mosaicstack.dev/mosaic/stack-postgres:${IMAGE_TAG:-latest}
environment:
POSTGRES_USER: ${POSTGRES_USER}
POSTGRES_PASSWORD: ${POSTGRES_PASSWORD}
POSTGRES_DB: ${POSTGRES_DB}
POSTGRES_SHARED_BUFFERS: ${POSTGRES_SHARED_BUFFERS:-256MB}
POSTGRES_EFFECTIVE_CACHE_SIZE: ${POSTGRES_EFFECTIVE_CACHE_SIZE:-1GB}
POSTGRES_MAX_CONNECTIONS: ${POSTGRES_MAX_CONNECTIONS:-100}
volumes:
- postgres_data:/var/lib/postgresql/data
healthcheck:
test: ["CMD-SHELL", "pg_isready -U ${POSTGRES_USER} -d ${POSTGRES_DB}"]
interval: 10s
timeout: 5s
retries: 5
start_period: 30s
networks:
- internal
deploy:
restart_policy:
condition: on-failure
# ======================
# Valkey Cache
# ======================
valkey:
image: valkey/valkey:8-alpine
command:
- valkey-server
- --maxmemory ${VALKEY_MAXMEMORY:-256mb}
- --maxmemory-policy noeviction
- --appendonly yes
volumes:
- valkey_data:/data
healthcheck:
test: ["CMD", "valkey-cli", "ping"]
interval: 10s
timeout: 5s
retries: 5
start_period: 10s
networks:
- internal
deploy:
restart_policy:
condition: on-failure
# ============================================
# MOSAIC APPLICATION
# ============================================
# ======================
# Mosaic API
# ======================
api:
image: git.mosaicstack.dev/mosaic/stack-api:${IMAGE_TAG:-latest}
environment:
NODE_ENV: production
PORT: ${API_PORT:-3001}
API_HOST: ${API_HOST:-0.0.0.0}
DATABASE_URL: postgresql://${POSTGRES_USER}:${POSTGRES_PASSWORD}@postgres:5432/${POSTGRES_DB}
VALKEY_URL: redis://valkey:6379
# Auth (external Authentik)
OIDC_ENABLED: ${OIDC_ENABLED:-false}
OIDC_ISSUER: ${OIDC_ISSUER}
OIDC_CLIENT_ID: ${OIDC_CLIENT_ID}
OIDC_CLIENT_SECRET: ${OIDC_CLIENT_SECRET}
OIDC_REDIRECT_URI: ${OIDC_REDIRECT_URI:-}
JWT_SECRET: ${JWT_SECRET:-change-this-to-a-random-secret}
JWT_EXPIRATION: ${JWT_EXPIRATION:-24h}
BETTER_AUTH_SECRET: ${BETTER_AUTH_SECRET}
CSRF_SECRET: ${CSRF_SECRET}
# External services
OLLAMA_ENDPOINT: ${OLLAMA_ENDPOINT}
OPENBAO_ADDR: ${OPENBAO_ADDR}
ENCRYPTION_KEY: ${ENCRYPTION_KEY}
# Matrix bridge (optional — configure after Synapse is running)
MATRIX_HOMESERVER_URL: ${MATRIX_HOMESERVER_URL:-http://synapse:8008}
MATRIX_ACCESS_TOKEN: ${MATRIX_ACCESS_TOKEN:-}
MATRIX_BOT_USER_ID: ${MATRIX_BOT_USER_ID:-}
MATRIX_CONTROL_ROOM_ID: ${MATRIX_CONTROL_ROOM_ID:-}
MATRIX_WORKSPACE_ID: ${MATRIX_WORKSPACE_ID:-}
MATRIX_SERVER_NAME: ${MATRIX_SERVER_NAME:-}
# Speech
SPEECH_MAX_UPLOAD_SIZE: ${SPEECH_MAX_UPLOAD_SIZE:-25000000}
SPEECH_MAX_DURATION_SECONDS: ${SPEECH_MAX_DURATION_SECONDS:-600}
SPEECH_MAX_TEXT_LENGTH: ${SPEECH_MAX_TEXT_LENGTH:-4096}
# Telemetry (disabled by default)
MOSAIC_TELEMETRY_ENABLED: ${MOSAIC_TELEMETRY_ENABLED:-false}
MOSAIC_TELEMETRY_SERVER_URL: ${MOSAIC_TELEMETRY_SERVER_URL:-}
MOSAIC_TELEMETRY_API_KEY: ${MOSAIC_TELEMETRY_API_KEY:-}
MOSAIC_TELEMETRY_INSTANCE_ID: ${MOSAIC_TELEMETRY_INSTANCE_ID:-}
MOSAIC_TELEMETRY_DRY_RUN: ${MOSAIC_TELEMETRY_DRY_RUN:-false}
# Frontend URLs (for CORS and auth redirects)
NEXT_PUBLIC_APP_URL: ${NEXT_PUBLIC_APP_URL}
NEXT_PUBLIC_API_URL: ${NEXT_PUBLIC_API_URL}
TRUSTED_ORIGINS: ${TRUSTED_ORIGINS:-}
healthcheck:
test:
[
"CMD-SHELL",
'node -e "require(''http'').get(''http://localhost:${API_PORT:-3001}/health'', (r) => {process.exit(r.statusCode === 200 ? 0 : 1)})"',
]
interval: 30s
timeout: 10s
retries: 3
start_period: 40s
networks:
- internal
- traefik-public
deploy:
restart_policy:
condition: on-failure
labels:
- "traefik.enable=true"
- "traefik.http.routers.mosaic-api.rule=Host(`${MOSAIC_API_DOMAIN}`)"
- "traefik.http.routers.mosaic-api.entrypoints=${TRAEFIK_ENTRYPOINT:-websecure}"
- "traefik.http.routers.mosaic-api.tls=${TRAEFIK_TLS_ENABLED:-true}"
- "traefik.http.routers.mosaic-api.tls.certresolver=${TRAEFIK_CERTRESOLVER:-}"
- "traefik.http.services.mosaic-api.loadbalancer.server.port=${API_PORT:-3001}"
- "traefik.docker.network=${TRAEFIK_DOCKER_NETWORK:-traefik-public}"
# ======================
# Mosaic Web
# ======================
web:
image: git.mosaicstack.dev/mosaic/stack-web:${IMAGE_TAG:-latest}
environment:
NODE_ENV: production
PORT: ${WEB_PORT:-3000}
NEXT_PUBLIC_API_URL: ${NEXT_PUBLIC_API_URL}
healthcheck:
test:
[
"CMD-SHELL",
'node -e "require(''http'').get(''http://localhost:${WEB_PORT:-3000}'', (r) => {process.exit(r.statusCode === 200 ? 0 : 1)})"',
]
interval: 30s
timeout: 10s
retries: 3
start_period: 40s
networks:
- traefik-public
deploy:
restart_policy:
condition: on-failure
labels:
- "traefik.enable=true"
- "traefik.http.routers.mosaic-web.rule=Host(`${MOSAIC_WEB_DOMAIN}`)"
- "traefik.http.routers.mosaic-web.entrypoints=${TRAEFIK_ENTRYPOINT:-websecure}"
- "traefik.http.routers.mosaic-web.tls=${TRAEFIK_TLS_ENABLED:-true}"
- "traefik.http.routers.mosaic-web.tls.certresolver=${TRAEFIK_CERTRESOLVER:-}"
- "traefik.http.services.mosaic-web.loadbalancer.server.port=${WEB_PORT:-3000}"
- "traefik.docker.network=${TRAEFIK_DOCKER_NETWORK:-traefik-public}"
# ======================
# Mosaic Coordinator
# ======================
coordinator:
image: git.mosaicstack.dev/mosaic/stack-coordinator:${IMAGE_TAG:-latest}
environment:
GITEA_WEBHOOK_SECRET: ${GITEA_WEBHOOK_SECRET}
GITEA_URL: ${GITEA_URL}
ANTHROPIC_API_KEY: ${ANTHROPIC_API_KEY}
LOG_LEVEL: ${LOG_LEVEL:-info}
HOST: 0.0.0.0
PORT: 8000
COORDINATOR_POLL_INTERVAL: ${COORDINATOR_POLL_INTERVAL:-5.0}
COORDINATOR_MAX_CONCURRENT_AGENTS: ${COORDINATOR_MAX_CONCURRENT_AGENTS:-10}
COORDINATOR_ENABLED: ${COORDINATOR_ENABLED:-true}
# Telemetry (disabled by default)
MOSAIC_TELEMETRY_ENABLED: ${MOSAIC_TELEMETRY_ENABLED:-false}
MOSAIC_TELEMETRY_SERVER_URL: ${MOSAIC_TELEMETRY_SERVER_URL:-}
MOSAIC_TELEMETRY_API_KEY: ${MOSAIC_TELEMETRY_API_KEY:-}
MOSAIC_TELEMETRY_INSTANCE_ID: ${MOSAIC_TELEMETRY_INSTANCE_ID:-}
MOSAIC_TELEMETRY_DRY_RUN: ${MOSAIC_TELEMETRY_DRY_RUN:-false}
healthcheck:
test:
[
"CMD",
"python",
"-c",
"import urllib.request; urllib.request.urlopen('http://localhost:8000/health')",
]
interval: 30s
timeout: 10s
retries: 3
start_period: 5s
networks:
- internal
deploy:
restart_policy:
condition: on-failure
# ======================
# Mosaic Orchestrator
# ======================
orchestrator:
image: git.mosaicstack.dev/mosaic/stack-orchestrator:${IMAGE_TAG:-latest}
user: "1000:1000"
environment:
NODE_ENV: production
ORCHESTRATOR_PORT: 3001
AI_PROVIDER: ${AI_PROVIDER:-ollama}
VALKEY_URL: redis://valkey:6379
# Claude API (required only when AI_PROVIDER=claude)
CLAUDE_API_KEY: ${CLAUDE_API_KEY}
DOCKER_SOCKET: /var/run/docker.sock
GIT_USER_NAME: "Mosaic Orchestrator"
GIT_USER_EMAIL: "orchestrator@mosaicstack.dev"
KILLSWITCH_ENABLED: "true"
SANDBOX_ENABLED: "true"
volumes:
- /var/run/docker.sock:/var/run/docker.sock:ro
- orchestrator_workspace:/workspace
healthcheck:
test:
[
"CMD-SHELL",
'node -e "require(''http'').get(''http://localhost:3001/health'', (r) => {process.exit(r.statusCode === 200 ? 0 : 1)})"',
]
interval: 30s
timeout: 10s
retries: 3
start_period: 40s
networks:
- internal
cap_drop:
- ALL
cap_add:
- NET_BIND_SERVICE
tmpfs:
- /tmp:noexec,nosuid,size=100m
deploy:
restart_policy:
condition: on-failure
# ============================================
# MATRIX (Synapse + Element Web)
# ============================================
# ======================
# Synapse Database Init
# ======================
# Creates the 'synapse' database in the shared PostgreSQL instance.
# Runs once and exits. Idempotent — safe to run on every deploy.
synapse-db-init:
image: postgres:17-alpine
environment:
PGHOST: postgres
PGPORT: 5432
PGUSER: ${POSTGRES_USER}
PGPASSWORD: ${POSTGRES_PASSWORD}
SYNAPSE_DB: ${SYNAPSE_POSTGRES_DB}
SYNAPSE_USER: ${SYNAPSE_POSTGRES_USER}
SYNAPSE_PASSWORD: ${SYNAPSE_POSTGRES_PASSWORD}
entrypoint: ["sh", "-c"]
command:
- |
until pg_isready -h postgres -p 5432 -U $${PGUSER}; do
echo "Waiting for PostgreSQL..."
sleep 2
done
echo "PostgreSQL is ready. Creating Synapse database and user..."
psql -h postgres -U $${PGUSER} -tc "SELECT 1 FROM pg_roles WHERE rolname='$${SYNAPSE_USER}'" | grep -q 1 || \
psql -h postgres -U $${PGUSER} -c "CREATE USER $${SYNAPSE_USER} WITH PASSWORD '$${SYNAPSE_PASSWORD}';"
psql -h postgres -U $${PGUSER} -tc "SELECT 1 FROM pg_database WHERE datname='$${SYNAPSE_DB}'" | grep -q 1 || \
psql -h postgres -U $${PGUSER} -c "CREATE DATABASE $${SYNAPSE_DB} OWNER $${SYNAPSE_USER} ENCODING 'UTF8' LC_COLLATE='C' LC_CTYPE='C' TEMPLATE template0;"
echo "Synapse database ready: $${SYNAPSE_DB}"
networks:
- internal
deploy:
restart_policy:
condition: on-failure
delay: 5s
max_attempts: 5
# ======================
# Synapse (Matrix Homeserver)
# ======================
synapse:
image: matrixdotorg/synapse:${SYNAPSE_IMAGE_TAG:-latest}
environment:
SYNAPSE_CONFIG_DIR: /data
SYNAPSE_CONFIG_PATH: /data/homeserver.yaml
volumes:
- /opt/mosaic/synapse:/data
healthcheck:
test: ["CMD-SHELL", "curl -fSs http://localhost:8008/health || exit 1"]
interval: 15s
timeout: 5s
retries: 5
start_period: 30s
networks:
- internal
- traefik-public
deploy:
restart_policy:
condition: on-failure
delay: 10s
max_attempts: 10
labels:
- "traefik.enable=true"
- "traefik.http.routers.matrix.rule=Host(`${MATRIX_DOMAIN}`)"
- "traefik.http.routers.matrix.entrypoints=${TRAEFIK_ENTRYPOINT:-websecure}"
- "traefik.http.routers.matrix.tls=${TRAEFIK_TLS_ENABLED:-true}"
- "traefik.http.routers.matrix.tls.certresolver=${TRAEFIK_CERTRESOLVER:-}"
- "traefik.http.services.matrix.loadbalancer.server.port=8008"
- "traefik.docker.network=${TRAEFIK_DOCKER_NETWORK:-traefik-public}"
# ======================
# Element Web (Matrix Client)
# ======================
element-web:
image: vectorim/element-web:${ELEMENT_IMAGE_TAG:-latest}
volumes:
- /opt/mosaic/synapse/element-config.json:/app/config.json:ro
healthcheck:
test: ["CMD-SHELL", "wget --no-verbose --tries=1 --spider http://localhost:80 || exit 1"]
interval: 30s
timeout: 5s
retries: 3
start_period: 10s
networks:
- internal
- traefik-public
deploy:
restart_policy:
condition: on-failure
delay: 5s
labels:
- "traefik.enable=true"
- "traefik.http.routers.element.rule=Host(`${ELEMENT_DOMAIN}`)"
- "traefik.http.routers.element.entrypoints=${TRAEFIK_ENTRYPOINT:-websecure}"
- "traefik.http.routers.element.tls=${TRAEFIK_TLS_ENABLED:-true}"
- "traefik.http.routers.element.tls.certresolver=${TRAEFIK_CERTRESOLVER:-}"
- "traefik.http.services.element.loadbalancer.server.port=80"
- "traefik.docker.network=${TRAEFIK_DOCKER_NETWORK:-traefik-public}"
# ============================================
# SPEECH SERVICES
# ============================================
# ======================
# Speaches (STT + basic TTS)
# ======================
speaches:
image: ghcr.io/speaches-ai/speaches:latest-cpu
environment:
WHISPER__MODEL: ${SPEACHES_WHISPER_MODEL:-Systran/faster-whisper-large-v3-turbo}
volumes:
- speaches_models:/root/.cache/huggingface
healthcheck:
test: ["CMD-SHELL", "curl -f http://localhost:8000/health || exit 1"]
interval: 30s
timeout: 10s
retries: 5
start_period: 120s
networks:
- internal
deploy:
restart_policy:
condition: on-failure
# ======================
# Kokoro TTS
# ======================
kokoro-tts:
image: ghcr.io/remsky/kokoro-fastapi-cpu:latest
healthcheck:
test: ["CMD-SHELL", "curl -f http://localhost:8880/health || exit 1"]
interval: 30s
timeout: 10s
retries: 5
start_period: 120s
networks:
- internal
deploy:
restart_policy:
condition: on-failure
# ======================
# Volumes
# ======================
volumes:
postgres_data:
valkey_data:
orchestrator_workspace:
speaches_models:
# ======================
# Networks
# ======================
networks:
internal:
driver: overlay
traefik-public:
external: true