From 9901fba61e4ce15a64ec8e28aa7af738e5b6be5d Mon Sep 17 00:00:00 2001 From: Jason Woltje Date: Sun, 22 Feb 2026 07:40:24 +0000 Subject: [PATCH] docs: add Coolify deployment guide and compose file (#444) Co-authored-by: Jason Woltje Co-committed-by: Jason Woltje --- docker-compose.coolify.yml | 280 +++++++++++++++++++++++++++++++++++++ docs/COOLIFY-DEPLOYMENT.md | 137 ++++++++++++++++++ 2 files changed, 417 insertions(+) create mode 100644 docker-compose.coolify.yml create mode 100644 docs/COOLIFY-DEPLOYMENT.md diff --git a/docker-compose.coolify.yml b/docker-compose.coolify.yml new file mode 100644 index 0000000..60f0cc5 --- /dev/null +++ b/docker-compose.coolify.yml @@ -0,0 +1,280 @@ +# ============================================== +# Mosaic Stack — Coolify Core Deployment +# ============================================== +# +# Core services only. For Matrix, speech, and other optional +# services, deploy them as separate Coolify services or extend +# this file. +# +# Usage (Coolify): +# 1. New Resource -> Docker Compose +# 2. Paste this file +# 3. Set environment variables in Coolify UI +# 4. Configure domains for web + api in Coolify UI +# 5. Deploy +# +# NOTE: Traefik labels are NOT included here. Coolify manages +# routing and TLS via its own proxy integration. Configure +# domains in the Coolify service settings. +# +# ============================================== + +services: + # ====================== + # PostgreSQL Database + # ====================== + postgres: + image: git.mosaicstack.dev/mosaic/stack-postgres:${IMAGE_TAG:-latest} + restart: unless-stopped + environment: + - POSTGRES_USER=${POSTGRES_USER} + - POSTGRES_PASSWORD=${POSTGRES_PASSWORD} + - POSTGRES_DB=${POSTGRES_DB} + - POSTGRES_SHARED_BUFFERS=${POSTGRES_SHARED_BUFFERS:-256MB} + - POSTGRES_EFFECTIVE_CACHE_SIZE=${POSTGRES_EFFECTIVE_CACHE_SIZE:-1GB} + - POSTGRES_MAX_CONNECTIONS=${POSTGRES_MAX_CONNECTIONS:-100} + volumes: + - postgres_data:/var/lib/postgresql/data + healthcheck: + test: ["CMD-SHELL", "pg_isready -U ${POSTGRES_USER} -d ${POSTGRES_DB}"] + interval: 10s + timeout: 5s + retries: 5 + start_period: 30s + networks: + - internal + + # ====================== + # Valkey Cache + # ====================== + valkey: + image: valkey/valkey:8-alpine + restart: unless-stopped + command: + - valkey-server + - --maxmemory ${VALKEY_MAXMEMORY:-256mb} + - --maxmemory-policy noeviction + - --appendonly yes + volumes: + - valkey_data:/data + healthcheck: + test: ["CMD", "valkey-cli", "ping"] + interval: 10s + timeout: 5s + retries: 5 + start_period: 10s + networks: + - internal + + # ====================== + # Mosaic API + # ====================== + api: + image: git.mosaicstack.dev/mosaic/stack-api:${IMAGE_TAG:-latest} + restart: unless-stopped + environment: + # Coolify domain assignment (magic variable — tells Coolify this service gets a domain on port 3001) + - SERVICE_FQDN_API_3001 + - NODE_ENV=production + - PORT=${API_PORT:-3001} + - API_HOST=${API_HOST:-0.0.0.0} + # Database + - DATABASE_URL=postgresql://${POSTGRES_USER}:${POSTGRES_PASSWORD}@postgres:5432/${POSTGRES_DB} + # Cache + - VALKEY_URL=redis://valkey:6379 + # Auth (external Authentik — optional) + - OIDC_ENABLED=${OIDC_ENABLED:-false} + - OIDC_ISSUER=${OIDC_ISSUER:-} + - OIDC_CLIENT_ID=${OIDC_CLIENT_ID:-} + - OIDC_CLIENT_SECRET=${OIDC_CLIENT_SECRET:-} + - OIDC_REDIRECT_URI=${OIDC_REDIRECT_URI:-} + # JWT + - JWT_SECRET=${JWT_SECRET} + - JWT_EXPIRATION=${JWT_EXPIRATION:-24h} + # Better Auth + - BETTER_AUTH_SECRET=${BETTER_AUTH_SECRET} + - BETTER_AUTH_URL=${BETTER_AUTH_URL:-} + - CSRF_SECRET=${CSRF_SECRET} + - COOKIE_DOMAIN=${COOKIE_DOMAIN:-} + # Encryption + - ENCRYPTION_KEY=${ENCRYPTION_KEY} + # External services (optional — leave empty to disable) + - OLLAMA_ENDPOINT=${OLLAMA_ENDPOINT:-} + - OLLAMA_MODEL=${OLLAMA_MODEL:-llama3.2} + - OPENBAO_ADDR=${OPENBAO_ADDR:-} + # Knowledge module + - KNOWLEDGE_CACHE_ENABLED=${KNOWLEDGE_CACHE_ENABLED:-true} + - KNOWLEDGE_CACHE_TTL=${KNOWLEDGE_CACHE_TTL:-300} + - SEMANTIC_SEARCH_SIMILARITY_THRESHOLD=${SEMANTIC_SEARCH_SIMILARITY_THRESHOLD:-0.5} + # Rate limiting + - RATE_LIMIT_TTL=${RATE_LIMIT_TTL:-60} + - RATE_LIMIT_GLOBAL_LIMIT=${RATE_LIMIT_GLOBAL_LIMIT:-100} + - RATE_LIMIT_STORAGE=${RATE_LIMIT_STORAGE:-redis} + # Speech services (disabled — not in core stack) + - STT_ENABLED=${STT_ENABLED:-false} + - TTS_ENABLED=${TTS_ENABLED:-false} + # Matrix bridge (disabled — not in core stack) + - MATRIX_ACCESS_TOKEN=${MATRIX_ACCESS_TOKEN:-} + # Telemetry (disabled by default) + - MOSAIC_TELEMETRY_ENABLED=${MOSAIC_TELEMETRY_ENABLED:-false} + - MOSAIC_TELEMETRY_SERVER_URL=${MOSAIC_TELEMETRY_SERVER_URL:-} + - MOSAIC_TELEMETRY_API_KEY=${MOSAIC_TELEMETRY_API_KEY:-} + - MOSAIC_TELEMETRY_INSTANCE_ID=${MOSAIC_TELEMETRY_INSTANCE_ID:-} + - MOSAIC_TELEMETRY_DRY_RUN=${MOSAIC_TELEMETRY_DRY_RUN:-false} + # Frontend URLs (for CORS and auth redirects) + - NEXT_PUBLIC_APP_URL=${NEXT_PUBLIC_APP_URL} + - NEXT_PUBLIC_API_URL=${NEXT_PUBLIC_API_URL} + - TRUSTED_ORIGINS=${TRUSTED_ORIGINS:-} + depends_on: + postgres: + condition: service_healthy + valkey: + condition: service_healthy + healthcheck: + test: + [ + "CMD-SHELL", + 'node -e "require(''http'').get(''http://localhost:${API_PORT:-3001}/health'', (r) => {process.exit(r.statusCode === 200 ? 0 : 1)})"', + ] + interval: 30s + timeout: 10s + retries: 3 + start_period: 40s + networks: + - internal + + # ====================== + # Mosaic Web + # ====================== + web: + image: git.mosaicstack.dev/mosaic/stack-web:${IMAGE_TAG:-latest} + restart: unless-stopped + environment: + # Coolify domain assignment (magic variable — tells Coolify this service gets a domain on port 3000) + - SERVICE_FQDN_WEB_3000 + - NODE_ENV=production + - PORT=${WEB_PORT:-3000} + - NEXT_PUBLIC_API_URL=${NEXT_PUBLIC_API_URL} + - NEXT_PUBLIC_APP_URL=${NEXT_PUBLIC_APP_URL} + - NEXT_PUBLIC_ORCHESTRATOR_URL=${NEXT_PUBLIC_ORCHESTRATOR_URL:-} + - NEXT_PUBLIC_AUTH_MODE=${NEXT_PUBLIC_AUTH_MODE:-real} + - ORCHESTRATOR_API_KEY=${ORCHESTRATOR_API_KEY:-} + depends_on: + api: + condition: service_healthy + healthcheck: + test: + [ + "CMD-SHELL", + 'node -e "require(''http'').get(''http://localhost:${WEB_PORT:-3000}'', (r) => {process.exit(r.statusCode === 200 ? 0 : 1)})"', + ] + interval: 30s + timeout: 10s + retries: 3 + start_period: 40s + networks: + - internal + + # ====================== + # Mosaic Coordinator + # ====================== + coordinator: + image: git.mosaicstack.dev/mosaic/stack-coordinator:${IMAGE_TAG:-latest} + restart: unless-stopped + environment: + - GITEA_WEBHOOK_SECRET=${GITEA_WEBHOOK_SECRET:-} + - GITEA_URL=${GITEA_URL:-} + - ANTHROPIC_API_KEY=${ANTHROPIC_API_KEY:-} + - LOG_LEVEL=${LOG_LEVEL:-info} + - HOST=0.0.0.0 + - PORT=8000 + - COORDINATOR_POLL_INTERVAL=${COORDINATOR_POLL_INTERVAL:-5.0} + - COORDINATOR_MAX_CONCURRENT_AGENTS=${COORDINATOR_MAX_CONCURRENT_AGENTS:-10} + - COORDINATOR_ENABLED=${COORDINATOR_ENABLED:-true} + # Telemetry + - MOSAIC_TELEMETRY_ENABLED=${MOSAIC_TELEMETRY_ENABLED:-false} + - MOSAIC_TELEMETRY_SERVER_URL=${MOSAIC_TELEMETRY_SERVER_URL:-} + - MOSAIC_TELEMETRY_API_KEY=${MOSAIC_TELEMETRY_API_KEY:-} + - MOSAIC_TELEMETRY_INSTANCE_ID=${MOSAIC_TELEMETRY_INSTANCE_ID:-} + - MOSAIC_TELEMETRY_DRY_RUN=${MOSAIC_TELEMETRY_DRY_RUN:-false} + healthcheck: + test: + [ + "CMD", + "python", + "-c", + "import urllib.request; urllib.request.urlopen('http://localhost:8000/health')", + ] + interval: 30s + timeout: 10s + retries: 3 + start_period: 5s + networks: + - internal + + # ====================== + # Mosaic Orchestrator + # ====================== + orchestrator: + image: git.mosaicstack.dev/mosaic/stack-orchestrator:${IMAGE_TAG:-latest} + restart: unless-stopped + user: "1000:1000" + environment: + - NODE_ENV=production + - ORCHESTRATOR_PORT=3001 + - AI_PROVIDER=${AI_PROVIDER:-ollama} + - OLLAMA_ENDPOINT=${OLLAMA_ENDPOINT:-} + - OLLAMA_MODEL=${OLLAMA_MODEL:-llama3.2} + - VALKEY_URL=redis://valkey:6379 + - VALKEY_HOST=valkey + - VALKEY_PORT=6379 + - CLAUDE_API_KEY=${CLAUDE_API_KEY:-} + - ORCHESTRATOR_API_KEY=${ORCHESTRATOR_API_KEY:-} + - DOCKER_SOCKET=/var/run/docker.sock + - GIT_USER_NAME=Mosaic Orchestrator + - GIT_USER_EMAIL=orchestrator@mosaicstack.dev + - KILLSWITCH_ENABLED=true + - SANDBOX_ENABLED=true + volumes: + - /var/run/docker.sock:/var/run/docker.sock:ro + - orchestrator_workspace:/workspace + depends_on: + valkey: + condition: service_healthy + api: + condition: service_healthy + healthcheck: + test: + [ + "CMD-SHELL", + 'node -e "require(''http'').get(''http://localhost:3001/health'', (r) => {process.exit(r.statusCode === 200 ? 0 : 1)})"', + ] + interval: 30s + timeout: 10s + retries: 3 + start_period: 40s + networks: + - internal + security_opt: + - no-new-privileges:true + cap_drop: + - ALL + cap_add: + - NET_BIND_SERVICE + tmpfs: + - /tmp:noexec,nosuid,size=100m + +# ====================== +# Volumes +# ====================== +volumes: + postgres_data: + valkey_data: + orchestrator_workspace: + +# ====================== +# Networks +# ====================== +networks: + internal: + driver: bridge diff --git a/docs/COOLIFY-DEPLOYMENT.md b/docs/COOLIFY-DEPLOYMENT.md new file mode 100644 index 0000000..efa82f8 --- /dev/null +++ b/docs/COOLIFY-DEPLOYMENT.md @@ -0,0 +1,137 @@ +# Mosaic Stack — Coolify Deployment + +## Overview + +Coolify deployment on VM `10.1.1.44` (Proxmox). Replaces the Docker Swarm deployment on w-docker0 (`10.1.1.45`). + +## Architecture + +``` +Internet → Cloudflare → Public IP (174.137.97.162) + → Main Traefik (10.1.1.43) — TCP TLS passthrough for *.woltje.com + → Coolify Traefik (10.1.1.44) — terminates TLS via Cloudflare DNS-01 wildcard certs + → Service containers +``` + +## Services (Core Stack) + +| Service | Image | Internal Port | External Domain | +| ------------ | ----------------------------------------------- | --------------- | ----------------------- | +| postgres | `git.mosaicstack.dev/mosaic/stack-postgres` | 5432 | — | +| valkey | `valkey/valkey:8-alpine` | 6379 | — | +| api | `git.mosaicstack.dev/mosaic/stack-api` | 3001 | `api.mosaic.woltje.com` | +| web | `git.mosaicstack.dev/mosaic/stack-web` | 3000 | `mosaic.woltje.com` | +| coordinator | `git.mosaicstack.dev/mosaic/stack-coordinator` | 8000 | — | +| orchestrator | `git.mosaicstack.dev/mosaic/stack-orchestrator` | 3001 (internal) | — | + +Matrix (synapse, element-web) and speech services (speaches, kokoro-tts) are NOT included in the core stack. Deploy separately if needed. + +## Compose File + +`docker-compose.coolify.yml` in the repo root. This is the Coolify-compatible version of the deployment compose. + +Key differences from the Swarm compose (`docker-compose.swarm.portainer.yml`): + +- No `deploy:` blocks (Swarm-only) +- No Traefik labels (Coolify manages routing) +- Bridge network instead of overlay +- `restart: unless-stopped` instead of Swarm restart policies +- `SERVICE_FQDN_*` magic environment variables for Coolify domain assignment +- List-style environment syntax (required for Coolify magic vars) + +## Coolify IDs + +| Resource | UUID | +| ----------- | -------------------------- | +| Project | `rs04g008kgkkw4s0wgsk40w4` | +| Environment | `gko8csc804g8og0oosc8ccs8` | +| Service | `ug0ssok4g44wocok8kws8gg8` | +| Server | `as8kcogk08skskkcsok888g4` | + +### Application UUIDs + +| App | UUID | +| ------------ | --------------------------- | +| postgres | `jcw0ogskkw040os48ggkgkc8` | +| valkey | `skssgwcggc0c8owoogcso8og` | +| api | `mc40cgwwo8okwwoko84408k4k` | +| web | `c48gcwgc40ok44scscowc8cc` | +| coordinator | `s8gwog4c44w08c8sgkcg04k8` | +| orchestrator | `uo4wkg88co0ckc4c4k44sowc` | + +## Coolify API + +Base URL: `http://10.1.1.44:8000/api/v1` +Auth: Bearer token from `credentials.json` → `coolify.app_token` + +### Patterns & Gotchas + +- **Compose must be base64-encoded** when sending via `docker_compose_raw` field +- **`SERVICE_FQDN_*` magic vars**: Coolify reads these from the compose to auto-assign domains. Format: `SERVICE_FQDN_{NAME}_{PORT}` (e.g., `SERVICE_FQDN_API_3001`). Must use list-style env syntax (`- SERVICE_FQDN_API_3001`), NOT dict-style. +- **FQDN updates on sub-applications**: Coolify API doesn't support updating FQDNs on compose service sub-apps via REST. Workaround: update directly in Coolify's PostgreSQL DB (`coolify-db` container, `service_applications` table). +- **Environment variable management**: Use `PATCH /api/v1/services/{uuid}/envs` with `{ "key": "VAR_NAME", "value": "val", "is_preview": false }` +- **Service start**: `POST /api/v1/services/{uuid}/start` +- **Coolify uses PostgreSQL** (not SQLite) for its internal database — container `coolify-db` + +### DB Access (for workarounds) + +```bash +ssh localadmin@10.1.1.44 +docker exec -it coolify-db psql -U coolify -d coolify + +-- Check service app FQDNs +SELECT name, fqdn FROM service_applications WHERE service_id = ( + SELECT id FROM services WHERE uuid = 'ug0ssok4g44wocok8kws8gg8' +); +``` + +## Environment Variables + +All env vars are set via Coolify API and stored in `/data/coolify/services/{uuid}/.env` on the node. + +Critical vars that were missing initially: + +- `BETTER_AUTH_URL` — **Required** in production. API won't start without it. Set to `https://api.mosaic.woltje.com`. + +## Current State (2026-02-22) + +### Working + +- All 6 containers running and healthy +- API health endpoint responds at `https://api.mosaic.woltje.com/health` +- Database migrations completed +- Inter-service networking (api→postgres, api→valkey) confirmed via health checks + +### Issues + +1. **DNS: `mosaic.woltje.com` points to wrong server** + - Resolves to `10.1.1.45` (old Swarm node) instead of through Cloudflare (`174.137.97.162`) + - `api.mosaic.woltje.com` resolves correctly through Cloudflare + - Fix: Update Cloudflare DNS A record for `mosaic.woltje.com` + +2. **Coordinator: OTLP exporter noise** + - Trying to export traces to `localhost:4318` which doesn't exist + - Container is healthy, errors are non-critical + - Fix: Set `MOSAIC_TELEMETRY_ENABLED=false` in Coolify env vars, or deploy an OTLP collector + +3. **Coolify managed lifecycle** + - CoolifyTask was failing when starting the service via API/UI + - Containers were started manually via `docker compose up -d` from the service directory + - Coolify recognizes the containers (correct naming convention) but may not properly manage restarts/redeploys + - Needs investigation: check Coolify task logs, verify compose processing + +4. **Full connectivity verification needed** + - web→api communication untested (blocked by DNS issue) + - Orchestrator→valkey and orchestrator→api connectivity unverified + - Coordinator webhook endpoint untested + +## SSH Access + +```bash +ssh localadmin@10.1.1.44 +# Note: localadmin cannot sudo without TTY/password +# Use docker to access files: +docker run --rm -v /data/coolify/services:/srv alpine cat /srv/{uuid}/docker-compose.yml +# Use docker exec for Coolify DB: +docker exec -it coolify-db psql -U coolify -d coolify +```