Initial project structure

This commit is contained in:
2026-02-07 22:36:37 -06:00
commit 0b29302f43
26 changed files with 3308 additions and 0 deletions

View File

@@ -0,0 +1,69 @@
"""Mosaic Stack Telemetry — Python client SDK.
A lightweight client for reporting AI coding task-completion telemetry
and querying crowd-sourced predictions from a Mosaic Stack Telemetry server.
"""
from mosaicstack_telemetry.client import TelemetryClient
from mosaicstack_telemetry.config import TelemetryConfig
from mosaicstack_telemetry.event_builder import EventBuilder
from mosaicstack_telemetry.prediction_cache import PredictionCache
from mosaicstack_telemetry.queue import EventQueue
from mosaicstack_telemetry.types.common import (
BatchEventRequest,
BatchEventResponse,
BatchEventResult,
TelemetryError,
)
from mosaicstack_telemetry.types.events import (
Complexity,
Harness,
Outcome,
Provider,
QualityGate,
RepoSizeCategory,
TaskCompletionEvent,
TaskType,
)
from mosaicstack_telemetry.types.predictions import (
CorrectionFactors,
PredictionData,
PredictionMetadata,
PredictionQuery,
PredictionResponse,
QualityPrediction,
TokenDistribution,
)
__version__ = "0.1.0"
__all__ = [
# Client
"TelemetryClient",
"TelemetryConfig",
"EventBuilder",
"EventQueue",
"PredictionCache",
# Types - Events
"TaskCompletionEvent",
"TaskType",
"Complexity",
"Harness",
"Provider",
"QualityGate",
"Outcome",
"RepoSizeCategory",
# Types - Predictions
"PredictionQuery",
"PredictionResponse",
"PredictionData",
"PredictionMetadata",
"TokenDistribution",
"CorrectionFactors",
"QualityPrediction",
# Types - Common
"BatchEventRequest",
"BatchEventResponse",
"BatchEventResult",
"TelemetryError",
]

View File

@@ -0,0 +1,109 @@
"""Asynchronous submitter using asyncio.Task for periodic flushing."""
from __future__ import annotations
import asyncio
import logging
from typing import TYPE_CHECKING
import httpx
from mosaicstack_telemetry.submitter import submit_batch_async
if TYPE_CHECKING:
from mosaicstack_telemetry.config import TelemetryConfig
from mosaicstack_telemetry.queue import EventQueue
logger = logging.getLogger("mosaicstack_telemetry")
class AsyncSubmitter:
"""Periodic event submitter using asyncio.Task and httpx.AsyncClient."""
def __init__(self, config: TelemetryConfig, queue: EventQueue) -> None:
self._config = config
self._queue = queue
self._client: httpx.AsyncClient | None = None
self._task: asyncio.Task[None] | None = None
self._running = False
@property
def is_running(self) -> bool:
"""Whether the submitter is currently running."""
return self._running
async def start(self) -> None:
"""Start the periodic flush loop."""
if self._running:
return
self._client = httpx.AsyncClient()
self._running = True
self._task = asyncio.create_task(self._loop())
logger.info(
"Async submitter started (interval=%.1fs)",
self._config.submit_interval_seconds,
)
async def stop(self) -> None:
"""Stop the periodic flush loop and perform a final flush."""
if not self._running:
return
self._running = False
if self._task is not None:
self._task.cancel()
try:
await self._task
except asyncio.CancelledError:
pass
self._task = None
# Final flush
await self.flush()
if self._client is not None:
await self._client.aclose()
self._client = None
logger.info("Async submitter stopped")
async def flush(self) -> None:
"""Flush all queued events immediately."""
while not self._queue.is_empty:
events = self._queue.drain(self._config.batch_size)
if not events:
break
client = self._client
if client is None:
client = httpx.AsyncClient()
try:
result = await submit_batch_async(client, self._config, events)
finally:
await client.aclose()
else:
result = await submit_batch_async(client, self._config, events)
if result is None:
logger.warning("Batch submission failed, re-queuing %d events", len(events))
self._queue.put_back(events)
break
if result.rejected > 0:
logger.warning(
"Batch partially rejected: %d accepted, %d rejected",
result.accepted,
result.rejected,
)
else:
logger.debug("Batch submitted: %d events accepted", result.accepted)
async def _loop(self) -> None:
"""Periodic flush loop."""
while self._running:
try:
await asyncio.sleep(self._config.submit_interval_seconds)
if self._running:
await self.flush()
except asyncio.CancelledError:
break
except Exception:
logger.exception("Error during periodic async flush")

View File

@@ -0,0 +1,118 @@
"""Synchronous submitter using threading.Timer for periodic flushing."""
from __future__ import annotations
import logging
import threading
from typing import TYPE_CHECKING
import httpx
from mosaicstack_telemetry.submitter import submit_batch_sync
if TYPE_CHECKING:
from mosaicstack_telemetry.config import TelemetryConfig
from mosaicstack_telemetry.queue import EventQueue
logger = logging.getLogger("mosaicstack_telemetry")
class SyncSubmitter:
"""Periodic event submitter using threading.Timer and httpx.Client."""
def __init__(self, config: TelemetryConfig, queue: EventQueue) -> None:
self._config = config
self._queue = queue
self._client: httpx.Client | None = None
self._timer: threading.Timer | None = None
self._running = False
self._lock = threading.Lock()
@property
def is_running(self) -> bool:
"""Whether the submitter is currently running."""
return self._running
def start(self) -> None:
"""Start the periodic flush loop."""
with self._lock:
if self._running:
return
self._client = httpx.Client()
self._running = True
self._schedule_next()
logger.info("Sync submitter started (interval=%.1fs)", self._config.submit_interval_seconds)
def stop(self) -> None:
"""Stop the periodic flush loop and perform a final flush."""
with self._lock:
if not self._running:
return
self._running = False
if self._timer is not None:
self._timer.cancel()
self._timer = None
# Final flush outside the lock to avoid deadlock
self.flush()
with self._lock:
if self._client is not None:
self._client.close()
self._client = None
logger.info("Sync submitter stopped")
def flush(self) -> None:
"""Flush all queued events immediately."""
while not self._queue.is_empty:
events = self._queue.drain(self._config.batch_size)
if not events:
break
with self._lock:
client = self._client
if client is None:
# Create a temporary client for the flush
client = httpx.Client()
try:
result = submit_batch_sync(client, self._config, events)
finally:
client.close()
else:
result = submit_batch_sync(client, self._config, events)
if result is None:
# Submission failed, re-queue events
logger.warning("Batch submission failed, re-queuing %d events", len(events))
self._queue.put_back(events)
break
if result.rejected > 0:
logger.warning(
"Batch partially rejected: %d accepted, %d rejected",
result.accepted,
result.rejected,
)
else:
logger.debug("Batch submitted: %d events accepted", result.accepted)
def _schedule_next(self) -> None:
"""Schedule the next flush iteration."""
if not self._running:
return
self._timer = threading.Timer(self._config.submit_interval_seconds, self._tick)
self._timer.daemon = True
self._timer.start()
def _tick(self) -> None:
"""Timer callback: flush and reschedule."""
if not self._running:
return
try:
self.flush()
except Exception:
logger.exception("Error during periodic flush")
finally:
with self._lock:
if self._running:
self._schedule_next()

View File

@@ -0,0 +1,196 @@
"""Main TelemetryClient — the public entry point for the SDK."""
from __future__ import annotations
import logging
from typing import Any
import httpx
from mosaicstack_telemetry._async import AsyncSubmitter
from mosaicstack_telemetry._sync import SyncSubmitter
from mosaicstack_telemetry.config import TelemetryConfig
from mosaicstack_telemetry.prediction_cache import PredictionCache
from mosaicstack_telemetry.queue import EventQueue
from mosaicstack_telemetry.types.events import TaskCompletionEvent
from mosaicstack_telemetry.types.predictions import (
PredictionQuery,
PredictionResponse,
)
logger = logging.getLogger("mosaicstack_telemetry")
class TelemetryClient:
"""Main client for Mosaic Stack Telemetry.
Supports both sync and async usage patterns:
**Sync (threading-based):**
client = TelemetryClient(config)
client.start()
client.track(event)
client.stop()
**Async (asyncio-based):**
client = TelemetryClient(config)
await client.start_async()
client.track(event)
await client.stop_async()
**Context managers:**
with TelemetryClient(config) as client:
client.track(event)
async with TelemetryClient(config) as client:
client.track(event)
"""
def __init__(self, config: TelemetryConfig) -> None:
errors = config.validate()
if errors and config.enabled:
logger.warning("Telemetry config validation errors: %s", "; ".join(errors))
self._config = config
self._queue = EventQueue(max_size=config.max_queue_size)
self._prediction_cache = PredictionCache(ttl_seconds=config.prediction_cache_ttl_seconds)
self._sync_submitter: SyncSubmitter | None = None
self._async_submitter: AsyncSubmitter | None = None
def start(self) -> None:
"""Start background submission using threading.Timer loop."""
if not self._config.enabled:
logger.info("Telemetry disabled, skipping start")
return
self._sync_submitter = SyncSubmitter(self._config, self._queue)
self._sync_submitter.start()
async def start_async(self) -> None:
"""Start with asyncio.Task for async contexts."""
if not self._config.enabled:
logger.info("Telemetry disabled, skipping async start")
return
self._async_submitter = AsyncSubmitter(self._config, self._queue)
await self._async_submitter.start()
def stop(self) -> None:
"""Stop background submission, flush remaining events synchronously."""
if self._sync_submitter is not None:
self._sync_submitter.stop()
self._sync_submitter = None
async def stop_async(self) -> None:
"""Async stop and flush."""
if self._async_submitter is not None:
await self._async_submitter.stop()
self._async_submitter = None
def track(self, event: TaskCompletionEvent) -> None:
"""Queue an event for submission. Always synchronous. Never blocks or throws.
If telemetry is disabled, the event is silently dropped.
"""
try:
if not self._config.enabled:
return
self._queue.put(event)
logger.debug("Event queued: %s", event.event_id)
except Exception:
logger.exception("Unexpected error in track()")
def get_prediction(self, query: PredictionQuery) -> PredictionResponse | None:
"""Get a cached prediction. Returns None if not cached or expired."""
return self._prediction_cache.get(query)
def refresh_predictions_sync(self, queries: list[PredictionQuery]) -> None:
"""Fetch fresh predictions from server synchronously."""
if not queries:
return
url = f"{self._config.server_url}/v1/predictions/batch"
body = {"queries": [q.model_dump(mode="json") for q in queries]}
try:
with httpx.Client() as client:
response = client.post(
url,
json=body,
headers={"User-Agent": self._config.user_agent},
timeout=self._config.request_timeout_seconds,
)
if response.status_code == 200:
data = response.json()
results = data.get("results", [])
for query, result_data in zip(queries, results):
pred = PredictionResponse.model_validate(result_data)
self._prediction_cache.put(query, pred)
logger.debug("Refreshed %d predictions", len(results))
else:
logger.warning(
"Prediction refresh failed with status %d",
response.status_code,
)
except Exception:
logger.exception("Error refreshing predictions")
async def refresh_predictions(self, queries: list[PredictionQuery]) -> None:
"""Fetch fresh predictions from server asynchronously."""
if not queries:
return
url = f"{self._config.server_url}/v1/predictions/batch"
body = {"queries": [q.model_dump(mode="json") for q in queries]}
try:
async with httpx.AsyncClient() as client:
response = await client.post(
url,
json=body,
headers={"User-Agent": self._config.user_agent},
timeout=self._config.request_timeout_seconds,
)
if response.status_code == 200:
data = response.json()
results = data.get("results", [])
for query, result_data in zip(queries, results):
pred = PredictionResponse.model_validate(result_data)
self._prediction_cache.put(query, pred)
logger.debug("Refreshed %d predictions", len(results))
else:
logger.warning(
"Prediction refresh failed with status %d",
response.status_code,
)
except Exception:
logger.exception("Error refreshing predictions")
@property
def queue_size(self) -> int:
"""Number of events currently in the queue."""
return self._queue.size
@property
def is_running(self) -> bool:
"""Whether background submission is active."""
if self._sync_submitter is not None:
return self._sync_submitter.is_running
if self._async_submitter is not None:
return self._async_submitter.is_running
return False
# Sync context manager
def __enter__(self) -> TelemetryClient:
self.start()
return self
def __exit__(self, *exc: Any) -> None:
self.stop()
# Async context manager
async def __aenter__(self) -> TelemetryClient:
await self.start_async()
return self
async def __aexit__(self, *exc: Any) -> None:
await self.stop_async()

View File

@@ -0,0 +1,91 @@
"""Telemetry client configuration."""
from __future__ import annotations
import os
import re
from dataclasses import dataclass, field
_HEX_64_RE = re.compile(r"^[0-9a-fA-F]{64}$")
_UUID_RE = re.compile(
r"^[0-9a-fA-F]{8}-[0-9a-fA-F]{4}-[0-9a-fA-F]{4}-[0-9a-fA-F]{4}-[0-9a-fA-F]{12}$"
)
@dataclass
class TelemetryConfig:
"""Configuration for the telemetry client.
Values can be provided directly or loaded from environment variables:
- MOSAIC_TELEMETRY_ENABLED -> enabled
- MOSAIC_TELEMETRY_SERVER_URL -> server_url
- MOSAIC_TELEMETRY_API_KEY -> api_key
- MOSAIC_TELEMETRY_INSTANCE_ID -> instance_id
"""
server_url: str = ""
api_key: str = ""
instance_id: str = ""
enabled: bool = True
submit_interval_seconds: float = 300.0
max_queue_size: int = 1000
batch_size: int = 100
request_timeout_seconds: float = 10.0
prediction_cache_ttl_seconds: float = 21600.0
dry_run: bool = False
max_retries: int = 3
user_agent: str = field(default="mosaicstack-telemetry-python/0.1.0")
def __post_init__(self) -> None:
"""Load environment variable overrides and validate."""
env_enabled = os.environ.get("MOSAIC_TELEMETRY_ENABLED")
if env_enabled is not None:
self.enabled = env_enabled.lower() in ("1", "true", "yes")
env_url = os.environ.get("MOSAIC_TELEMETRY_SERVER_URL")
if env_url and not self.server_url:
self.server_url = env_url
env_key = os.environ.get("MOSAIC_TELEMETRY_API_KEY")
if env_key and not self.api_key:
self.api_key = env_key
env_instance = os.environ.get("MOSAIC_TELEMETRY_INSTANCE_ID")
if env_instance and not self.instance_id:
self.instance_id = env_instance
# Strip trailing slashes from server_url
self.server_url = self.server_url.rstrip("/")
def validate(self) -> list[str]:
"""Validate configuration and return list of errors (empty if valid)."""
errors: list[str] = []
if not self.server_url:
errors.append("server_url is required")
elif not self.server_url.startswith(("http://", "https://")):
errors.append("server_url must start with http:// or https://")
if not self.api_key:
errors.append("api_key is required")
elif not _HEX_64_RE.match(self.api_key):
errors.append("api_key must be a 64-character hex string")
if not self.instance_id:
errors.append("instance_id is required")
elif not _UUID_RE.match(self.instance_id):
errors.append("instance_id must be a valid UUID string")
if self.submit_interval_seconds <= 0:
errors.append("submit_interval_seconds must be positive")
if self.max_queue_size <= 0:
errors.append("max_queue_size must be positive")
if self.batch_size <= 0 or self.batch_size > 100:
errors.append("batch_size must be between 1 and 100")
if self.request_timeout_seconds <= 0:
errors.append("request_timeout_seconds must be positive")
return errors

View File

@@ -0,0 +1,207 @@
"""Convenience builder for constructing TaskCompletionEvent instances."""
from __future__ import annotations
from datetime import datetime, timezone
from uuid import UUID, uuid4
from mosaicstack_telemetry.types.events import (
Complexity,
Harness,
Outcome,
Provider,
QualityGate,
RepoSizeCategory,
TaskCompletionEvent,
TaskType,
)
class EventBuilder:
"""Fluent builder for TaskCompletionEvent.
Provides a convenient way to construct events with sensible defaults
and a chainable API.
Example::
event = (
EventBuilder(instance_id="...")
.task_type(TaskType.IMPLEMENTATION)
.model("claude-sonnet-4-20250514")
.provider(Provider.ANTHROPIC)
.harness(Harness.CLAUDE_CODE)
.complexity(Complexity.MEDIUM)
.outcome(Outcome.SUCCESS)
.duration_ms(45000)
.tokens(estimated_in=1000, estimated_out=500, actual_in=1100, actual_out=480)
.cost(estimated=50000, actual=48000)
.quality(passed=True, gates_run=[QualityGate.LINT, QualityGate.TEST])
.context(compactions=0, rotations=0, utilization=0.3)
.build()
)
"""
def __init__(self, instance_id: str | UUID) -> None:
self._instance_id = UUID(str(instance_id))
self._event_id: UUID = uuid4()
self._timestamp: datetime = datetime.now(timezone.utc)
self._task_duration_ms: int = 0
self._task_type: TaskType = TaskType.UNKNOWN
self._complexity: Complexity = Complexity.MEDIUM
self._harness: Harness = Harness.UNKNOWN
self._model: str = "unknown"
self._provider: Provider = Provider.UNKNOWN
self._estimated_input_tokens: int = 0
self._estimated_output_tokens: int = 0
self._actual_input_tokens: int = 0
self._actual_output_tokens: int = 0
self._estimated_cost_usd_micros: int = 0
self._actual_cost_usd_micros: int = 0
self._quality_gate_passed: bool = False
self._quality_gates_run: list[QualityGate] = []
self._quality_gates_failed: list[QualityGate] = []
self._context_compactions: int = 0
self._context_rotations: int = 0
self._context_utilization_final: float = 0.0
self._outcome: Outcome = Outcome.FAILURE
self._retry_count: int = 0
self._language: str | None = None
self._repo_size_category: RepoSizeCategory | None = None
def event_id(self, value: str | UUID) -> EventBuilder:
"""Set a specific event ID (default: auto-generated UUID)."""
self._event_id = UUID(str(value))
return self
def timestamp(self, value: datetime) -> EventBuilder:
"""Set the event timestamp (default: now UTC)."""
self._timestamp = value
return self
def task_type(self, value: TaskType) -> EventBuilder:
"""Set the task type."""
self._task_type = value
return self
def complexity_level(self, value: Complexity) -> EventBuilder:
"""Set the complexity level."""
self._complexity = value
return self
def harness_type(self, value: Harness) -> EventBuilder:
"""Set the harness type."""
self._harness = value
return self
def model(self, value: str) -> EventBuilder:
"""Set the model name."""
self._model = value
return self
def provider(self, value: Provider) -> EventBuilder:
"""Set the provider."""
self._provider = value
return self
def duration_ms(self, value: int) -> EventBuilder:
"""Set the task duration in milliseconds."""
self._task_duration_ms = value
return self
def tokens(
self,
*,
estimated_in: int = 0,
estimated_out: int = 0,
actual_in: int = 0,
actual_out: int = 0,
) -> EventBuilder:
"""Set token counts."""
self._estimated_input_tokens = estimated_in
self._estimated_output_tokens = estimated_out
self._actual_input_tokens = actual_in
self._actual_output_tokens = actual_out
return self
def cost(self, *, estimated: int = 0, actual: int = 0) -> EventBuilder:
"""Set cost in USD micros."""
self._estimated_cost_usd_micros = estimated
self._actual_cost_usd_micros = actual
return self
def quality(
self,
*,
passed: bool,
gates_run: list[QualityGate] | None = None,
gates_failed: list[QualityGate] | None = None,
) -> EventBuilder:
"""Set quality gate results."""
self._quality_gate_passed = passed
self._quality_gates_run = gates_run or []
self._quality_gates_failed = gates_failed or []
return self
def context(
self,
*,
compactions: int = 0,
rotations: int = 0,
utilization: float = 0.0,
) -> EventBuilder:
"""Set context window metrics."""
self._context_compactions = compactions
self._context_rotations = rotations
self._context_utilization_final = utilization
return self
def outcome_value(self, value: Outcome) -> EventBuilder:
"""Set the task outcome."""
self._outcome = value
return self
def retry_count(self, value: int) -> EventBuilder:
"""Set the retry count."""
self._retry_count = value
return self
def language(self, value: str | None) -> EventBuilder:
"""Set the programming language."""
self._language = value
return self
def repo_size(self, value: RepoSizeCategory | None) -> EventBuilder:
"""Set the repository size category."""
self._repo_size_category = value
return self
def build(self) -> TaskCompletionEvent:
"""Build and return the TaskCompletionEvent."""
return TaskCompletionEvent(
instance_id=self._instance_id,
event_id=self._event_id,
timestamp=self._timestamp,
task_duration_ms=self._task_duration_ms,
task_type=self._task_type,
complexity=self._complexity,
harness=self._harness,
model=self._model,
provider=self._provider,
estimated_input_tokens=self._estimated_input_tokens,
estimated_output_tokens=self._estimated_output_tokens,
actual_input_tokens=self._actual_input_tokens,
actual_output_tokens=self._actual_output_tokens,
estimated_cost_usd_micros=self._estimated_cost_usd_micros,
actual_cost_usd_micros=self._actual_cost_usd_micros,
quality_gate_passed=self._quality_gate_passed,
quality_gates_run=self._quality_gates_run,
quality_gates_failed=self._quality_gates_failed,
context_compactions=self._context_compactions,
context_rotations=self._context_rotations,
context_utilization_final=self._context_utilization_final,
outcome=self._outcome,
retry_count=self._retry_count,
language=self._language,
repo_size_category=self._repo_size_category,
)

View File

@@ -0,0 +1,56 @@
"""Thread-safe prediction cache with TTL."""
from __future__ import annotations
import logging
import threading
import time
from mosaicstack_telemetry.types.predictions import PredictionQuery, PredictionResponse
logger = logging.getLogger("mosaicstack_telemetry")
def _cache_key(query: PredictionQuery) -> str:
"""Generate a deterministic cache key from a prediction query."""
return f"{query.task_type.value}:{query.model}:{query.provider.value}:{query.complexity.value}"
class PredictionCache:
"""Thread-safe dict-based cache with TTL for prediction responses."""
def __init__(self, ttl_seconds: float = 21600.0) -> None:
self._ttl = ttl_seconds
self._store: dict[str, tuple[PredictionResponse, float]] = {}
self._lock = threading.Lock()
def get(self, query: PredictionQuery) -> PredictionResponse | None:
"""Get a cached prediction, or None if not found or expired."""
key = _cache_key(query)
with self._lock:
entry = self._store.get(key)
if entry is None:
return None
response, expires_at = entry
if time.monotonic() > expires_at:
del self._store[key]
return None
return response
def put(self, query: PredictionQuery, response: PredictionResponse) -> None:
"""Store a prediction response with TTL."""
key = _cache_key(query)
expires_at = time.monotonic() + self._ttl
with self._lock:
self._store[key] = (response, expires_at)
def clear(self) -> None:
"""Invalidate all cached predictions."""
with self._lock:
self._store.clear()
@property
def size(self) -> int:
"""Number of entries in the cache (including possibly expired)."""
with self._lock:
return len(self._store)

View File

@@ -0,0 +1,70 @@
"""Thread-safe bounded event queue."""
from __future__ import annotations
import logging
import threading
from collections import deque
from mosaicstack_telemetry.types.events import TaskCompletionEvent
logger = logging.getLogger("mosaicstack_telemetry")
class EventQueue:
"""Thread-safe bounded FIFO queue for telemetry events.
When the queue is full, the oldest events are evicted (FIFO eviction)
to make room for new ones.
"""
def __init__(self, max_size: int = 1000) -> None:
self._max_size = max_size
self._deque: deque[TaskCompletionEvent] = deque(maxlen=max_size)
self._lock = threading.Lock()
def put(self, event: TaskCompletionEvent) -> None:
"""Add an event to the queue. Never blocks.
If the queue is full, the oldest event is silently evicted.
"""
with self._lock:
if len(self._deque) >= self._max_size:
logger.warning(
"Event queue full (%d items), evicting oldest event",
self._max_size,
)
self._deque.append(event)
def drain(self, max_items: int) -> list[TaskCompletionEvent]:
"""Remove and return up to max_items events from the front of the queue."""
with self._lock:
count = min(max_items, len(self._deque))
items: list[TaskCompletionEvent] = []
for _ in range(count):
items.append(self._deque.popleft())
return items
def put_back(self, events: list[TaskCompletionEvent]) -> None:
"""Put events back at the front of the queue (for retry scenarios).
Events are added to the left (front) so they get drained first next time.
If adding them would exceed max_size, only as many as fit are re-added.
"""
with self._lock:
available = self._max_size - len(self._deque)
to_add = events[:available]
for event in reversed(to_add):
self._deque.appendleft(event)
@property
def size(self) -> int:
"""Current number of events in the queue."""
with self._lock:
return len(self._deque)
@property
def is_empty(self) -> bool:
"""Whether the queue is empty."""
with self._lock:
return len(self._deque) == 0

View File

@@ -0,0 +1,205 @@
"""Batch submission logic with retry and backoff."""
from __future__ import annotations
import logging
import random
import time
from typing import TYPE_CHECKING
import httpx
from mosaicstack_telemetry.types.common import BatchEventRequest, BatchEventResponse
from mosaicstack_telemetry.types.events import TaskCompletionEvent
if TYPE_CHECKING:
from mosaicstack_telemetry.config import TelemetryConfig
logger = logging.getLogger("mosaicstack_telemetry")
def _backoff_delay(attempt: int, base: float = 1.0, maximum: float = 60.0) -> float:
"""Calculate exponential backoff with jitter."""
delay = min(base * (2**attempt), maximum)
jitter = random.uniform(0, delay * 0.5) # noqa: S311
return delay + jitter
def submit_batch_sync(
client: httpx.Client,
config: TelemetryConfig,
events: list[TaskCompletionEvent],
) -> BatchEventResponse | None:
"""Submit a batch of events synchronously with retry logic.
Returns the BatchEventResponse on success, or None if all retries failed.
"""
url = f"{config.server_url}/v1/events/batch"
request_body = BatchEventRequest(events=events)
for attempt in range(config.max_retries + 1):
try:
if config.dry_run:
logger.info(
"[DRY RUN] Would submit batch of %d events to %s",
len(events),
url,
)
return BatchEventResponse(
accepted=len(events),
rejected=0,
results=[],
)
response = client.post(
url,
json=request_body.model_dump(mode="json"),
headers={
"Authorization": f"Bearer {config.api_key}",
"Content-Type": "application/json",
"User-Agent": config.user_agent,
},
timeout=config.request_timeout_seconds,
)
if response.status_code == 202:
return BatchEventResponse.model_validate(response.json())
if response.status_code == 429:
retry_after = response.headers.get("Retry-After")
delay = float(retry_after) if retry_after else _backoff_delay(attempt)
logger.warning(
"Rate limited (429), retrying after %.1f seconds (attempt %d/%d)",
delay,
attempt + 1,
config.max_retries + 1,
)
time.sleep(delay)
continue
if response.status_code == 403:
logger.error(
"Authentication failed (403): API key may not match instance_id"
)
return None
logger.warning(
"Unexpected status %d from server (attempt %d/%d): %s",
response.status_code,
attempt + 1,
config.max_retries + 1,
response.text[:200],
)
except httpx.TimeoutException:
logger.warning(
"Request timed out (attempt %d/%d)",
attempt + 1,
config.max_retries + 1,
)
except httpx.HTTPError as exc:
logger.warning(
"Network error (attempt %d/%d): %s",
attempt + 1,
config.max_retries + 1,
exc,
)
if attempt < config.max_retries:
delay = _backoff_delay(attempt)
logger.debug("Backing off for %.1f seconds before retry", delay)
time.sleep(delay)
logger.error("All %d attempts failed for batch of %d events", config.max_retries + 1, len(events))
return None
async def submit_batch_async(
client: httpx.AsyncClient,
config: TelemetryConfig,
events: list[TaskCompletionEvent],
) -> BatchEventResponse | None:
"""Submit a batch of events asynchronously with retry logic.
Returns the BatchEventResponse on success, or None if all retries failed.
"""
import asyncio
url = f"{config.server_url}/v1/events/batch"
request_body = BatchEventRequest(events=events)
for attempt in range(config.max_retries + 1):
try:
if config.dry_run:
logger.info(
"[DRY RUN] Would submit batch of %d events to %s",
len(events),
url,
)
return BatchEventResponse(
accepted=len(events),
rejected=0,
results=[],
)
response = await client.post(
url,
json=request_body.model_dump(mode="json"),
headers={
"Authorization": f"Bearer {config.api_key}",
"Content-Type": "application/json",
"User-Agent": config.user_agent,
},
timeout=config.request_timeout_seconds,
)
if response.status_code == 202:
return BatchEventResponse.model_validate(response.json())
if response.status_code == 429:
retry_after = response.headers.get("Retry-After")
delay = float(retry_after) if retry_after else _backoff_delay(attempt)
logger.warning(
"Rate limited (429), retrying after %.1f seconds (attempt %d/%d)",
delay,
attempt + 1,
config.max_retries + 1,
)
await asyncio.sleep(delay)
continue
if response.status_code == 403:
logger.error(
"Authentication failed (403): API key may not match instance_id"
)
return None
logger.warning(
"Unexpected status %d from server (attempt %d/%d): %s",
response.status_code,
attempt + 1,
config.max_retries + 1,
response.text[:200],
)
except httpx.TimeoutException:
logger.warning(
"Request timed out (attempt %d/%d)",
attempt + 1,
config.max_retries + 1,
)
except httpx.HTTPError as exc:
logger.warning(
"Network error (attempt %d/%d): %s",
attempt + 1,
config.max_retries + 1,
exc,
)
if attempt < config.max_retries:
delay = _backoff_delay(attempt)
logger.debug("Backing off for %.1f seconds before retry", delay)
await asyncio.sleep(delay)
logger.error("All %d attempts failed for batch of %d events", config.max_retries + 1, len(events))
return None

View File

@@ -0,0 +1,49 @@
"""Mosaic Stack Telemetry type definitions."""
from mosaicstack_telemetry.types.common import (
BatchEventRequest,
BatchEventResponse,
BatchEventResult,
TelemetryError,
)
from mosaicstack_telemetry.types.events import (
Complexity,
Harness,
Outcome,
Provider,
QualityGate,
RepoSizeCategory,
TaskCompletionEvent,
TaskType,
)
from mosaicstack_telemetry.types.predictions import (
CorrectionFactors,
PredictionData,
PredictionMetadata,
PredictionQuery,
PredictionResponse,
QualityPrediction,
TokenDistribution,
)
__all__ = [
"BatchEventRequest",
"BatchEventResponse",
"BatchEventResult",
"Complexity",
"CorrectionFactors",
"Harness",
"Outcome",
"PredictionData",
"PredictionMetadata",
"PredictionQuery",
"PredictionResponse",
"Provider",
"QualityGate",
"QualityPrediction",
"RepoSizeCategory",
"TaskCompletionEvent",
"TaskType",
"TelemetryError",
"TokenDistribution",
]

View File

@@ -0,0 +1,35 @@
"""Common types shared across the SDK."""
from __future__ import annotations
from uuid import UUID
from pydantic import BaseModel, Field
from mosaicstack_telemetry.types.events import TaskCompletionEvent
class TelemetryError(Exception):
"""Base exception for telemetry client errors."""
class BatchEventRequest(BaseModel):
"""Request body for batch event submission."""
events: list[TaskCompletionEvent] = Field(min_length=1, max_length=100)
class BatchEventResult(BaseModel):
"""Result for a single event in a batch submission."""
event_id: UUID
status: str # "accepted" or "rejected"
error: str | None = None
class BatchEventResponse(BaseModel):
"""Response from the batch event submission endpoint."""
accepted: int
rejected: int
results: list[BatchEventResult]

View File

@@ -0,0 +1,122 @@
"""Task completion event types and enums."""
from __future__ import annotations
from datetime import datetime, timezone
from enum import Enum
from uuid import UUID, uuid4
from pydantic import BaseModel, Field
class TaskType(str, Enum):
"""Type of task being performed."""
PLANNING = "planning"
IMPLEMENTATION = "implementation"
CODE_REVIEW = "code_review"
TESTING = "testing"
DEBUGGING = "debugging"
REFACTORING = "refactoring"
DOCUMENTATION = "documentation"
CONFIGURATION = "configuration"
SECURITY_AUDIT = "security_audit"
UNKNOWN = "unknown"
class Complexity(str, Enum):
"""Task complexity level."""
LOW = "low"
MEDIUM = "medium"
HIGH = "high"
CRITICAL = "critical"
class Harness(str, Enum):
"""AI coding harness used."""
CLAUDE_CODE = "claude_code"
OPENCODE = "opencode"
KILO_CODE = "kilo_code"
AIDER = "aider"
API_DIRECT = "api_direct"
OLLAMA_LOCAL = "ollama_local"
CUSTOM = "custom"
UNKNOWN = "unknown"
class Provider(str, Enum):
"""AI model provider."""
ANTHROPIC = "anthropic"
OPENAI = "openai"
OPENROUTER = "openrouter"
OLLAMA = "ollama"
GOOGLE = "google"
MISTRAL = "mistral"
CUSTOM = "custom"
UNKNOWN = "unknown"
class QualityGate(str, Enum):
"""Quality gate type."""
BUILD = "build"
LINT = "lint"
TEST = "test"
COVERAGE = "coverage"
TYPECHECK = "typecheck"
SECURITY = "security"
class Outcome(str, Enum):
"""Task outcome."""
SUCCESS = "success"
FAILURE = "failure"
PARTIAL = "partial"
TIMEOUT = "timeout"
class RepoSizeCategory(str, Enum):
"""Repository size category."""
TINY = "tiny"
SMALL = "small"
MEDIUM = "medium"
LARGE = "large"
HUGE = "huge"
class TaskCompletionEvent(BaseModel):
"""A single task completion telemetry event."""
instance_id: UUID
event_id: UUID = Field(default_factory=uuid4)
schema_version: str = "1.0"
timestamp: datetime = Field(
default_factory=lambda: datetime.now(timezone.utc),
)
task_duration_ms: int = Field(ge=0, le=86_400_000)
task_type: TaskType
complexity: Complexity
harness: Harness
model: str = Field(min_length=1, max_length=100)
provider: Provider
estimated_input_tokens: int = Field(ge=0, le=10_000_000)
estimated_output_tokens: int = Field(ge=0, le=10_000_000)
actual_input_tokens: int = Field(ge=0, le=10_000_000)
actual_output_tokens: int = Field(ge=0, le=10_000_000)
estimated_cost_usd_micros: int = Field(ge=0, le=100_000_000)
actual_cost_usd_micros: int = Field(ge=0, le=100_000_000)
quality_gate_passed: bool
quality_gates_run: list[QualityGate] = Field(default_factory=list)
quality_gates_failed: list[QualityGate] = Field(default_factory=list)
context_compactions: int = Field(ge=0, le=100)
context_rotations: int = Field(ge=0, le=50)
context_utilization_final: float = Field(ge=0.0, le=1.0)
outcome: Outcome
retry_count: int = Field(ge=0, le=20)
language: str | None = Field(default=None, max_length=30)
repo_size_category: RepoSizeCategory | None = None

View File

@@ -0,0 +1,72 @@
"""Prediction request and response types."""
from __future__ import annotations
from datetime import datetime
from pydantic import BaseModel
from mosaicstack_telemetry.types.events import Complexity, Provider, TaskType
class TokenDistribution(BaseModel):
"""Token usage distribution percentiles."""
p10: int
p25: int
median: int
p75: int
p90: int
class CorrectionFactors(BaseModel):
"""Correction factors for estimated vs actual tokens."""
input: float
output: float
class QualityPrediction(BaseModel):
"""Quality gate prediction data."""
gate_pass_rate: float
success_rate: float
class PredictionData(BaseModel):
"""Full prediction data for a task type/model/provider/complexity combination."""
input_tokens: TokenDistribution
output_tokens: TokenDistribution
cost_usd_micros: dict[str, int]
duration_ms: dict[str, int]
correction_factors: CorrectionFactors
quality: QualityPrediction
class PredictionMetadata(BaseModel):
"""Metadata about how a prediction was generated."""
sample_size: int
fallback_level: int
confidence: str # "none", "low", "medium", "high"
last_updated: datetime | None = None
dimensions_matched: dict[str, str | None] | None = None
fallback_note: str | None = None
cache_hit: bool = False
class PredictionResponse(BaseModel):
"""Response from the prediction endpoint."""
prediction: PredictionData | None = None
metadata: PredictionMetadata
class PredictionQuery(BaseModel):
"""Query parameters for a prediction request."""
task_type: TaskType
model: str
provider: Provider
complexity: Complexity