feat: initial alpha scaffold — FastAPI + MCP + pgvector
Implements v0.0.1 of OpenBrain: - FastAPI REST API (capture, search, recent, stats) with Bearer auth - MCP server (streamable HTTP at /mcp) exposing all 4 tools - pgvector schema (vector(1024) for bge-m3) - asyncpg connection pool with lazy init + graceful close - Ollama embedding client with fallback (stores thought without vector if Ollama unreachable) - Woodpecker CI pipeline (lint + kaniko build + push to Gitea registry) - Portainer/Swarm deployment compose - Mosaic framework files: AGENTS.md, PRD.md, TASKS.md, scratchpad Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
0
src/__init__.py
Normal file
0
src/__init__.py
Normal file
127
src/brain.py
Normal file
127
src/brain.py
Normal file
@@ -0,0 +1,127 @@
|
||||
"""Core brain operations — capture, search, recent, stats."""
|
||||
import json
|
||||
from src import db, embeddings
|
||||
from src.models import CaptureRequest, Thought, SearchRequest, SearchResult, Stats
|
||||
|
||||
|
||||
async def capture(req: CaptureRequest) -> Thought:
|
||||
pool = await db.get_pool()
|
||||
embedding = await embeddings.embed(req.content)
|
||||
|
||||
async with pool.acquire() as conn:
|
||||
if embedding is not None:
|
||||
vec = f"[{','.join(str(v) for v in embedding)}]"
|
||||
row = await conn.fetchrow(
|
||||
"""
|
||||
INSERT INTO thoughts (content, embedding, source, metadata)
|
||||
VALUES ($1, $2::vector, $3, $4::jsonb)
|
||||
RETURNING id::text, content, source, metadata, created_at, embedding IS NOT NULL AS embedded
|
||||
""",
|
||||
req.content, vec, req.source, json.dumps(req.metadata),
|
||||
)
|
||||
else:
|
||||
row = await conn.fetchrow(
|
||||
"""
|
||||
INSERT INTO thoughts (content, source, metadata)
|
||||
VALUES ($1, $2, $3::jsonb)
|
||||
RETURNING id::text, content, source, metadata, created_at, embedding IS NOT NULL AS embedded
|
||||
""",
|
||||
req.content, req.source, json.dumps(req.metadata),
|
||||
)
|
||||
|
||||
return Thought(
|
||||
id=row["id"],
|
||||
content=row["content"],
|
||||
source=row["source"],
|
||||
metadata=json.loads(row["metadata"]) if isinstance(row["metadata"], str) else row["metadata"],
|
||||
created_at=row["created_at"],
|
||||
embedded=row["embedded"],
|
||||
)
|
||||
|
||||
|
||||
async def search(req: SearchRequest) -> list[SearchResult]:
|
||||
embedding = await embeddings.embed(req.query)
|
||||
if embedding is None:
|
||||
return []
|
||||
|
||||
pool = await db.get_pool()
|
||||
vec = f"[{','.join(str(v) for v in embedding)}]"
|
||||
|
||||
async with pool.acquire() as conn:
|
||||
if req.source:
|
||||
rows = await conn.fetch(
|
||||
"""
|
||||
SELECT id::text, content, source, metadata, created_at,
|
||||
1 - (embedding <=> $1::vector) AS similarity
|
||||
FROM thoughts
|
||||
WHERE embedding IS NOT NULL AND source = $2
|
||||
ORDER BY embedding <=> $1::vector
|
||||
LIMIT $3
|
||||
""",
|
||||
vec, req.source, req.limit,
|
||||
)
|
||||
else:
|
||||
rows = await conn.fetch(
|
||||
"""
|
||||
SELECT id::text, content, source, metadata, created_at,
|
||||
1 - (embedding <=> $1::vector) AS similarity
|
||||
FROM thoughts
|
||||
WHERE embedding IS NOT NULL
|
||||
ORDER BY embedding <=> $1::vector
|
||||
LIMIT $2
|
||||
""",
|
||||
vec, req.limit,
|
||||
)
|
||||
|
||||
return [
|
||||
SearchResult(
|
||||
id=r["id"],
|
||||
content=r["content"],
|
||||
source=r["source"],
|
||||
similarity=float(r["similarity"]),
|
||||
created_at=r["created_at"],
|
||||
metadata=json.loads(r["metadata"]) if isinstance(r["metadata"], str) else r["metadata"],
|
||||
)
|
||||
for r in rows
|
||||
]
|
||||
|
||||
|
||||
async def recent(limit: int = 20) -> list[Thought]:
|
||||
pool = await db.get_pool()
|
||||
async with pool.acquire() as conn:
|
||||
rows = await conn.fetch(
|
||||
"""
|
||||
SELECT id::text, content, source, metadata, created_at,
|
||||
embedding IS NOT NULL AS embedded
|
||||
FROM thoughts
|
||||
ORDER BY created_at DESC
|
||||
LIMIT $1
|
||||
""",
|
||||
limit,
|
||||
)
|
||||
return [
|
||||
Thought(
|
||||
id=r["id"],
|
||||
content=r["content"],
|
||||
source=r["source"],
|
||||
metadata=json.loads(r["metadata"]) if isinstance(r["metadata"], str) else r["metadata"],
|
||||
created_at=r["created_at"],
|
||||
embedded=r["embedded"],
|
||||
)
|
||||
for r in rows
|
||||
]
|
||||
|
||||
|
||||
async def stats() -> Stats:
|
||||
pool = await db.get_pool()
|
||||
async with pool.acquire() as conn:
|
||||
total = await conn.fetchval("SELECT COUNT(*) FROM thoughts")
|
||||
embedded = await conn.fetchval("SELECT COUNT(*) FROM thoughts WHERE embedding IS NOT NULL")
|
||||
sources = await conn.fetch(
|
||||
"SELECT source, COUNT(*) AS count FROM thoughts GROUP BY source ORDER BY count DESC"
|
||||
)
|
||||
return Stats(
|
||||
total_thoughts=total,
|
||||
embedded_count=embedded,
|
||||
sources=[{"source": r["source"], "count": r["count"]} for r in sources],
|
||||
)
|
||||
23
src/config.py
Normal file
23
src/config.py
Normal file
@@ -0,0 +1,23 @@
|
||||
from pydantic_settings import BaseSettings, SettingsConfigDict
|
||||
|
||||
|
||||
class Settings(BaseSettings):
|
||||
model_config = SettingsConfigDict(env_file=".env", env_file_encoding="utf-8")
|
||||
|
||||
# Database
|
||||
database_url: str = "postgresql://openbrain:openbrain@localhost:5432/openbrain"
|
||||
|
||||
# Auth
|
||||
api_key: str # Required — no default, must be set
|
||||
|
||||
# Ollama
|
||||
ollama_url: str = "http://localhost:11434"
|
||||
ollama_embedding_model: str = "bge-m3:latest"
|
||||
|
||||
# Service
|
||||
host: str = "0.0.0.0"
|
||||
port: int = 8000
|
||||
log_level: str = "info"
|
||||
|
||||
|
||||
settings = Settings()
|
||||
18
src/db.py
Normal file
18
src/db.py
Normal file
@@ -0,0 +1,18 @@
|
||||
import asyncpg
|
||||
from src.config import settings
|
||||
|
||||
_pool: asyncpg.Pool | None = None
|
||||
|
||||
|
||||
async def get_pool() -> asyncpg.Pool:
|
||||
global _pool
|
||||
if _pool is None:
|
||||
_pool = await asyncpg.create_pool(settings.database_url, min_size=2, max_size=10)
|
||||
return _pool
|
||||
|
||||
|
||||
async def close_pool() -> None:
|
||||
global _pool
|
||||
if _pool:
|
||||
await _pool.close()
|
||||
_pool = None
|
||||
16
src/embeddings.py
Normal file
16
src/embeddings.py
Normal file
@@ -0,0 +1,16 @@
|
||||
import httpx
|
||||
from src.config import settings
|
||||
|
||||
|
||||
async def embed(text: str) -> list[float] | None:
|
||||
"""Generate embedding via Ollama. Returns None if Ollama is unreachable."""
|
||||
try:
|
||||
async with httpx.AsyncClient(timeout=30.0) as client:
|
||||
response = await client.post(
|
||||
f"{settings.ollama_url}/api/embeddings",
|
||||
json={"model": settings.ollama_embedding_model, "prompt": text},
|
||||
)
|
||||
response.raise_for_status()
|
||||
return response.json()["embedding"]
|
||||
except Exception:
|
||||
return None
|
||||
126
src/main.py
Normal file
126
src/main.py
Normal file
@@ -0,0 +1,126 @@
|
||||
"""OpenBrain — FastAPI REST + MCP server (single process)."""
|
||||
import contextlib
|
||||
import logging
|
||||
|
||||
from fastapi import Depends, FastAPI, HTTPException, Security
|
||||
from fastapi.security import HTTPAuthorizationCredentials, HTTPBearer
|
||||
from mcp.server.fastmcp import FastMCP
|
||||
|
||||
from src import brain, db
|
||||
from src.config import settings
|
||||
from src.models import CaptureRequest, SearchRequest, SearchResult, Stats, Thought
|
||||
|
||||
logging.basicConfig(level=settings.log_level.upper())
|
||||
logger = logging.getLogger("openbrain")
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Auth
|
||||
# ---------------------------------------------------------------------------
|
||||
bearer = HTTPBearer()
|
||||
|
||||
|
||||
def require_api_key(credentials: HTTPAuthorizationCredentials = Security(bearer)) -> str:
|
||||
if credentials.credentials != settings.api_key:
|
||||
raise HTTPException(status_code=401, detail="Invalid API key")
|
||||
return credentials.credentials
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# MCP server
|
||||
# ---------------------------------------------------------------------------
|
||||
mcp = FastMCP("openbrain", stateless_http=True)
|
||||
|
||||
|
||||
@mcp.tool()
|
||||
async def capture(content: str, source: str = "unknown", metadata: dict | None = None) -> dict:
|
||||
"""Store a thought or piece of information in your brain.
|
||||
|
||||
Args:
|
||||
content: The text to remember
|
||||
source: Which agent or tool is capturing this (e.g. 'claude-code', 'codex')
|
||||
metadata: Optional key/value pairs (tags, project, etc.)
|
||||
"""
|
||||
thought = await brain.capture(CaptureRequest(content=content, source=source, metadata=metadata or {}))
|
||||
return thought.model_dump(mode="json")
|
||||
|
||||
|
||||
@mcp.tool()
|
||||
async def search(query: str, limit: int = 10, source: str | None = None) -> list[dict]:
|
||||
"""Search your brain by meaning (semantic search).
|
||||
|
||||
Args:
|
||||
query: What you're looking for — describe it naturally
|
||||
limit: Max results to return (default 10)
|
||||
source: Optional — filter to a specific agent/tool
|
||||
"""
|
||||
results = await brain.search(SearchRequest(query=query, limit=limit, source=source))
|
||||
return [r.model_dump(mode="json") for r in results]
|
||||
|
||||
|
||||
@mcp.tool()
|
||||
async def recent(limit: int = 20) -> list[dict]:
|
||||
"""Get recently captured thoughts.
|
||||
|
||||
Args:
|
||||
limit: How many to return (default 20)
|
||||
"""
|
||||
thoughts = await brain.recent(limit=limit)
|
||||
return [t.model_dump(mode="json") for t in thoughts]
|
||||
|
||||
|
||||
@mcp.tool()
|
||||
async def stats() -> dict:
|
||||
"""Get statistics about your brain — total thoughts, embedding coverage, sources."""
|
||||
s = await brain.stats()
|
||||
return s.model_dump(mode="json")
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# FastAPI app
|
||||
# ---------------------------------------------------------------------------
|
||||
@contextlib.asynccontextmanager
|
||||
async def lifespan(app: FastAPI):
|
||||
logger.info("OpenBrain starting up")
|
||||
await db.get_pool() # Warm the connection pool
|
||||
yield
|
||||
await db.close_pool()
|
||||
logger.info("OpenBrain shut down")
|
||||
|
||||
|
||||
app = FastAPI(
|
||||
title="OpenBrain",
|
||||
description="Self-hosted semantic brain — pgvector + MCP for any AI agent",
|
||||
version="0.0.1",
|
||||
lifespan=lifespan,
|
||||
)
|
||||
|
||||
# Mount MCP server at /mcp (HTTP streamable transport)
|
||||
app.mount("/mcp", mcp.streamable_http_app())
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# REST endpoints (for direct API access and health checks)
|
||||
# ---------------------------------------------------------------------------
|
||||
@app.get("/health")
|
||||
async def health() -> dict:
|
||||
return {"status": "ok", "version": "0.0.1"}
|
||||
|
||||
|
||||
@app.post("/v1/thoughts", response_model=Thought)
|
||||
async def api_capture(req: CaptureRequest, _: str = Depends(require_api_key)) -> Thought:
|
||||
return await brain.capture(req)
|
||||
|
||||
|
||||
@app.post("/v1/search", response_model=list[SearchResult])
|
||||
async def api_search(req: SearchRequest, _: str = Depends(require_api_key)) -> list[SearchResult]:
|
||||
return await brain.search(req)
|
||||
|
||||
|
||||
@app.get("/v1/thoughts/recent", response_model=list[Thought])
|
||||
async def api_recent(limit: int = 20, _: str = Depends(require_api_key)) -> list[Thought]:
|
||||
return await brain.recent(limit=limit)
|
||||
|
||||
|
||||
@app.get("/v1/stats", response_model=Stats)
|
||||
async def api_stats(_: str = Depends(require_api_key)) -> Stats:
|
||||
return await brain.stats()
|
||||
39
src/models.py
Normal file
39
src/models.py
Normal file
@@ -0,0 +1,39 @@
|
||||
from datetime import datetime
|
||||
from typing import Any
|
||||
from pydantic import BaseModel
|
||||
|
||||
|
||||
class CaptureRequest(BaseModel):
|
||||
content: str
|
||||
source: str = "unknown"
|
||||
metadata: dict[str, Any] = {}
|
||||
|
||||
|
||||
class Thought(BaseModel):
|
||||
id: str
|
||||
content: str
|
||||
source: str
|
||||
metadata: dict[str, Any]
|
||||
created_at: datetime
|
||||
embedded: bool
|
||||
|
||||
|
||||
class SearchRequest(BaseModel):
|
||||
query: str
|
||||
limit: int = 10
|
||||
source: str | None = None
|
||||
|
||||
|
||||
class SearchResult(BaseModel):
|
||||
id: str
|
||||
content: str
|
||||
source: str
|
||||
similarity: float
|
||||
created_at: datetime
|
||||
metadata: dict[str, Any]
|
||||
|
||||
|
||||
class Stats(BaseModel):
|
||||
total_thoughts: int
|
||||
embedded_count: int
|
||||
sources: list[dict[str, Any]]
|
||||
Reference in New Issue
Block a user