From 0b29302f43eedf273f8d4bd022c280a175053d7d Mon Sep 17 00:00:00 2001 From: Jason Woltje Date: Sat, 7 Feb 2026 22:36:37 -0600 Subject: [PATCH] Initial project structure --- .gitignore | 42 ++ CLAUDE.md | 41 ++ LICENSE | 372 ++++++++++++++++++ README.md | 181 +++++++++ pyproject.toml | 49 +++ scripts/validate_schema.py | 96 +++++ src/mosaicstack_telemetry/__init__.py | 69 ++++ src/mosaicstack_telemetry/_async.py | 109 +++++ src/mosaicstack_telemetry/_sync.py | 118 ++++++ src/mosaicstack_telemetry/client.py | 196 +++++++++ src/mosaicstack_telemetry/config.py | 91 +++++ src/mosaicstack_telemetry/event_builder.py | 207 ++++++++++ src/mosaicstack_telemetry/prediction_cache.py | 56 +++ src/mosaicstack_telemetry/queue.py | 70 ++++ src/mosaicstack_telemetry/submitter.py | 205 ++++++++++ src/mosaicstack_telemetry/types/__init__.py | 49 +++ src/mosaicstack_telemetry/types/common.py | 35 ++ src/mosaicstack_telemetry/types/events.py | 122 ++++++ .../types/predictions.py | 72 ++++ tests/__init__.py | 0 tests/conftest.py | 183 +++++++++ tests/test_client.py | 321 +++++++++++++++ tests/test_event_builder.py | 137 +++++++ tests/test_prediction_cache.py | 147 +++++++ tests/test_queue.py | 133 +++++++ tests/test_submitter.py | 207 ++++++++++ 26 files changed, 3308 insertions(+) create mode 100644 .gitignore create mode 100644 CLAUDE.md create mode 100644 LICENSE create mode 100644 README.md create mode 100644 pyproject.toml create mode 100644 scripts/validate_schema.py create mode 100644 src/mosaicstack_telemetry/__init__.py create mode 100644 src/mosaicstack_telemetry/_async.py create mode 100644 src/mosaicstack_telemetry/_sync.py create mode 100644 src/mosaicstack_telemetry/client.py create mode 100644 src/mosaicstack_telemetry/config.py create mode 100644 src/mosaicstack_telemetry/event_builder.py create mode 100644 src/mosaicstack_telemetry/prediction_cache.py create mode 100644 src/mosaicstack_telemetry/queue.py create mode 100644 src/mosaicstack_telemetry/submitter.py create mode 100644 src/mosaicstack_telemetry/types/__init__.py create mode 100644 src/mosaicstack_telemetry/types/common.py create mode 100644 src/mosaicstack_telemetry/types/events.py create mode 100644 src/mosaicstack_telemetry/types/predictions.py create mode 100644 tests/__init__.py create mode 100644 tests/conftest.py create mode 100644 tests/test_client.py create mode 100644 tests/test_event_builder.py create mode 100644 tests/test_prediction_cache.py create mode 100644 tests/test_queue.py create mode 100644 tests/test_submitter.py diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..3743eba --- /dev/null +++ b/.gitignore @@ -0,0 +1,42 @@ +# Python +__pycache__/ +*.py[cod] +*$py.class +*.so +*.egg-info/ +*.egg +dist/ +build/ +.eggs/ + +# Virtual environments +.venv/ +venv/ +ENV/ + +# IDE +.idea/ +.vscode/ +*.swp +*.swo +*~ + +# Testing +.coverage +htmlcov/ +.pytest_cache/ + +# mypy +.mypy_cache/ + +# ruff +.ruff_cache/ + +# OS +.DS_Store +Thumbs.db + +# Environment +.env +.env.* +!.env.example diff --git a/CLAUDE.md b/CLAUDE.md new file mode 100644 index 0000000..b024da8 --- /dev/null +++ b/CLAUDE.md @@ -0,0 +1,41 @@ +# mosaicstack-telemetry (Python Client SDK) + +Python client SDK for Mosaic Stack Telemetry. Reports AI coding task-completion telemetry and queries crowd-sourced predictions. + +## Development + +```bash +# Install dependencies (including dev) +uv sync --all-extras + +# Run tests (85%+ coverage required) +uv run pytest + +# Lint +uv run ruff check src/ tests/ + +# Format check +uv run ruff format --check src/ tests/ + +# Type check +uv run mypy src/ +``` + +## Architecture + +- `src/mosaicstack_telemetry/client.py` — Main TelemetryClient (public API) +- `src/mosaicstack_telemetry/config.py` — TelemetryConfig dataclass with env var support +- `src/mosaicstack_telemetry/queue.py` — Thread-safe bounded event queue +- `src/mosaicstack_telemetry/submitter.py` — Batch submission with retry/backoff +- `src/mosaicstack_telemetry/_sync.py` — Threading-based periodic submitter +- `src/mosaicstack_telemetry/_async.py` — Asyncio-based periodic submitter +- `src/mosaicstack_telemetry/event_builder.py` — Fluent event builder +- `src/mosaicstack_telemetry/prediction_cache.py` — TTL-based prediction cache +- `src/mosaicstack_telemetry/types/` — All Pydantic models and enums + +## Key Rules + +- `track()` must NEVER throw or block the caller +- All logging uses `logging.getLogger("mosaicstack_telemetry")` +- Runtime deps: httpx + pydantic only +- Python 3.10+ compatible (uses `str, Enum` mixin instead of StrEnum) diff --git a/LICENSE b/LICENSE new file mode 100644 index 0000000..b0ad027 --- /dev/null +++ b/LICENSE @@ -0,0 +1,372 @@ +Mozilla Public License Version 2.0 +================================== + +1. Definitions +-------------- + +1.1. "Contributor" + means each individual or legal entity that creates, contributes to + the creation of, or owns Covered Software. + +1.2. "Contributor Version" + means the combination of the Contributions of others (if any) used + by a Contributor and that particular Contributor's Contribution. + +1.3. "Contribution" + means Covered Software of a particular Contributor. + +1.4. "Covered Software" + means Source Code Form to which the initial Contributor has attached + the notice in Exhibit A, the Executable Form of such Source Code + Form, and Modifications of such Source Code Form, in each case + including portions thereof. + +1.5. "Incompatible With Secondary Licenses" + means + + (a) that the initial Contributor has attached the notice described + in Exhibit B to the Covered Software; or + + (b) that the Covered Software was made available under the terms of + version 1.1 or earlier of the License, but not also under the + terms of a Secondary License. + +1.6. "Executable Form" + means any form of the work other than Source Code Form. + +1.7. "Larger Work" + means a work that combines Covered Software with other material, in + a separate file or files, that is not Covered Software. + +1.8. "License" + means this document. + +1.9. "Licensable" + means having the right to grant, to the maximum extent possible, + whether at the time of the initial grant or subsequently, any and + all of the rights conveyed by this License. + +1.10. "Modifications" + means any of the following: + + (a) any file in Source Code Form that results from an addition to, + deletion from, or modification of the contents of Covered + Software; or + + (b) any new file in Source Code Form that contains any Covered + Software. + +1.11. "Patent Claims" of a Contributor + means any patent claim(s), including without limitation, method, + process, and apparatus claims, in any patent Licensable by such + Contributor that would be infringed, but for the grant of the + License, by the making, using, selling, offering for sale, having + made, import, or transfer of either its Contributions or its + Contributor Version. + +1.12. "Secondary License" + means either the GNU General Public License, Version 2.0, the GNU + Lesser General Public License, Version 2.1, the GNU Affero General + Public License, Version 3.0, or any later versions of those + licenses. + +1.13. "Source Code Form" + means the form of the work preferred for making modifications. + +1.14. "You" (or "Your") + means an individual or a legal entity exercising rights under this + License. For legal entities, "You" includes any entity that + controls, is controlled by, or is under common control with You. For + purposes of this definition, "control" means (a) the power, direct + or indirect, to cause the direction or management of such entity, + whether by contract or otherwise, or (b) ownership of more than + fifty percent (50%) of the outstanding shares or beneficial + ownership of such entity. + +2. License Grants and Conditions +-------------------------------- + +2.1. Grants + +Each Contributor hereby grants You a world-wide, royalty-free, +non-exclusive license: + +(a) under intellectual property rights (other than patent or trademark) + Licensable by such Contributor to use, reproduce, make available, + modify, display, perform, distribute, and otherwise exploit its + Contributions, either on an unmodified basis, with Modifications, or + as part of a Larger Work; and + +(b) under Patent Claims of such Contributor to make, use, sell, offer + for sale, have made, import, and otherwise transfer either its + Contributions or its Contributor Version. + +2.2. Effective Date + +The licenses granted in Section 2.1 with respect to any Contribution +become effective for each Contribution on the date the Contributor first +distributes such Contribution. + +2.3. Limitations on Grant Scope + +The licenses granted in this Section 2 are the only rights granted under +this License. No additional rights or licenses will be implied from the +distribution or licensing of Covered Software under this License. +Notwithstanding Section 2.1(b) above, no patent license is granted by a +Contributor: + +(a) for any code that a Contributor has removed from Covered Software; + or + +(b) for infringements caused by: (i) Your and any other third party's + modifications of Covered Software, or (ii) the combination of its + Contributions with other software (except as part of its Contributor + Version); or + +(c) under Patent Claims infringed by Covered Software in the absence of + its Contributions. + +This License does not grant any rights in the trademarks, service marks, +or logos of any Contributor (except as may be necessary to comply with +the notice requirements in Section 3.4). + +2.4. Subsequent Licenses + +No Contributor makes additional grants as a result of Your choice to +distribute the Covered Software under a subsequent version of this +License (see Section 10.2) or under the terms of a Secondary License (if +permitted under the terms of Section 3.3). + +2.5. Representation + +Each Contributor represents that the Contributor believes its +Contributions are its original creation(s) or it has sufficient rights +to grant the rights to its Contributions conveyed by this License. + +2.6. Fair Use + +This License is not intended to limit any rights You have under +applicable copyright doctrines of fair use, fair dealing, or other +equivalents. + +2.7. Conditions + +Sections 3.1, 3.2, 3.3, and 3.4 are conditions of the licenses granted +in Section 2.1. + +3. Responsibilities +------------------- + +3.1. Distribution of Source Form + +All distribution of Covered Software in Source Code Form, including any +Modifications that You create or to which You contribute, must be under +the terms of this License. You must inform recipients that the Source +Code Form of the Covered Software is governed by the terms of this +License, and how they can obtain a copy of this License. You may not +attempt to alter or restrict the recipients' rights in the Source Code +Form. + +3.2. Distribution of Executable Form + +If You distribute Covered Software in Executable Form then: + +(a) such Covered Software must also be made available in Source Code + Form, as described in Section 3.1, and You must inform recipients of + the Executable Form how they can obtain a copy of such Source Code + Form by reasonable means in a timely manner, at a charge no more + than the cost of distribution to the recipient; and + +(b) You may distribute such Executable Form under the terms of this + License, or sublicense it under different terms, provided that the + license for the Executable Form does not attempt to limit or alter + the recipients' rights in the Source Code Form under this License. + +3.3. Distribution of a Larger Work + +You may create and distribute a Larger Work under terms of Your choice, +provided that You also comply with the requirements of this License for +the Covered Software. If the Larger Work is a combination of Covered +Software with a work governed by one or more Secondary Licenses, and the +Covered Software is not Incompatible With Secondary Licenses, this +License permits You to additionally distribute such Covered Software +under the terms of such Secondary License(s), so that the recipient of +the Larger Work may, at their option, further distribute the Covered +Software under the terms of either this License or such Secondary +License(s). + +3.4. Notices + +You may not remove or alter the substance of any license notices +(including copyright notices, patent notices, disclaimers of warranty, +or limitations of liability) contained within the Source Code Form of +the Covered Software, except that You may alter any license notices to +the extent required to remedy known factual inaccuracies. + +3.5. Application of Additional Terms + +You may choose to offer, and to charge a fee for, warranty, support, +indemnity or liability obligations to one or more recipients of Covered +Software. However, You may do so only on Your own behalf, and not on +behalf of any Contributor. You must make it absolutely clear that any +such warranty, support, indemnity, or liability obligation is offered by +You alone, and You hereby agree to indemnify every Contributor for any +liability incurred by such Contributor as a result of warranty, support, +indemnity or liability terms You offer. You may include additional +disclaimers of warranty and limitations of liability specific to any +jurisdiction. + +4. Inability to Comply Due to Statute or Regulation +--------------------------------------------------- + +If it is impossible for You to comply with any of the terms of this +License with respect to some or all of the Covered Software due to +statute, judicial order, or regulation then You must: (a) comply with +the terms of this License to the maximum extent possible; and (b) +describe the limitations and the code they affect. Such description must +be placed in a text file included with all distributions of the Covered +Software under the name "LEGAL". The contents of the LEGAL file are for +informational purposes only and do not alter the terms of this License. + +5. Termination +-------------- + +5.1. The rights granted under this License will terminate automatically +if You fail to comply with any of its terms. However, if You become +compliant, then the rights granted under this License from a particular +Contributor are reinstated (a) provisionally, unless and until such +Contributor explicitly and finally terminates Your grants, and (b) on an +ongoing basis, if such Contributor fails to notify You of the +non-compliance by some reasonable means prior to 60 days after You have +come back into compliance. Moreover, Your grants from a particular +Contributor are reinstated on an ongoing basis if such Contributor +notifies You of the non-compliance by some reasonable means, this is the +first time You have received notice of non-compliance with this License +from such Contributor, and You become compliant prior to 30 days after +Your receipt of the notice. + +5.2. If You initiate litigation against any entity by asserting a patent +infringement claim (excluding declaratory judgment actions, +counter-claims, and cross-claims) alleging that a Contributor Version +directly or indirectly infringes any patent, then the rights granted to +You by any and all Contributors for the Covered Software under Section +2.1 of this License shall terminate. + +5.3. In the event of termination under Sections 5.1 or 5.2 above, all +end user license agreements (excluding distributors and resellers) which +have been validly granted by You or Your distributors under this License +prior to termination shall survive termination. + +************************************************************************ +* * +* 6. Disclaimer of Warranty * +* ------------------------- * +* * +* Covered Software is provided under this License on an "as is" * +* basis, without warranty of any kind, either expressed, implied, or * +* statutory, including, without limitation, warranties that the * +* Covered Software is free of defects, merchantable, fit for a * +* particular purpose or non-infringing. The entire risk as to the * +* quality and performance of the Covered Software is with You. * +* Should any Covered Software prove defective in any respect, You * +* (not any Contributor) assume the cost of any necessary servicing, * +* repair, or correction. This disclaimer of warranty constitutes an * +* essential part of this License. No use of any Covered Software is * +* authorized under this License except under this disclaimer. * +* * +************************************************************************ + +************************************************************************ +* * +* 7. Limitation of Liability * +* -------------------------- * +* * +* Under no circumstances and under no legal theory, whether tort * +* (including negligence), contract, or otherwise, shall any * +* Contributor, or anyone who distributes Covered Software as * +* permitted above, be liable to You for any direct, indirect, * +* special, incidental, or consequential damages of any character * +* including, without limitation, damages for lost profits, loss of * +* goodwill, work stoppage, computer failure or malfunction, or any * +* and all other commercial damages or losses, even if such party * +* shall have been informed of the possibility of such damages. This * +* limitation of liability shall not apply to liability for death or * +* personal injury resulting from such party's negligence to the * +* extent applicable law prohibits such limitation. Some * +* jurisdictions do not allow the exclusion or limitation of * +* incidental or consequential damages, so this exclusion and * +* limitation may not apply to You. * +* * +************************************************************************ + +8. Litigation +------------- + +Any litigation relating to this License may be brought only in the +courts of a jurisdiction where the defendant maintains its principal +place of business and such litigation shall be governed by laws of that +jurisdiction, without reference to its conflict-of-law provisions. +Nothing in this Section shall prevent a party's ability to bring +cross-claims or counter-claims. + +9. Miscellaneous +---------------- + +This License represents the complete agreement concerning the subject +matter hereof. If any provision of this License is held to be +unenforceable, such provision shall be reformed only to the extent +necessary to make it enforceable. Any law or regulation which provides +that the language of a contract shall be construed against the drafter +shall not be used to construe this License against a Contributor. + +10. Versions of the License +--------------------------- + +10.1. New Versions + +Mozilla Foundation is the license steward. Except as provided in Section +10.3, no one other than the license steward has the right to modify or +publish new versions of this License. Each version will be given a +distinguishing version number. + +10.2. Effect of New Versions + +You may distribute the Covered Software under the terms of the version +of the License under which You originally received the Covered Software, +or under the terms of any subsequent version published by the license +steward. + +10.3. Modified Versions + +If you create software not governed by this License, and you want to +create a new license for such software, you may create and use a +modified version of this License if you rename the license and remove +any references to the name of the license steward (except to note that +such modified license differs from this License). + +10.4. Distributing Source Code Form that is Incompatible With Secondary +Licenses + +If You choose to distribute Source Code Form that is Incompatible With +Secondary Licenses under the terms of this version of the License, the +notice described in Exhibit B of this License must be attached. + +Exhibit A - Source Code Form License Notice +------------------------------------------- + + This Source Code Form is subject to the terms of the Mozilla Public + License, v. 2.0. If a copy of the MPL was not distributed with this + file, You can obtain one at http://mozilla.org/MPL/2.0/. + +If it is not possible or desirable to put the notice in a particular +file, then You may include the notice in a location (such as a LICENSE +file in a relevant directory) where a recipient would be likely to look +for such a notice. + +You may add additional accurate notices of copyright ownership. + +Exhibit B - "Incompatible With Secondary Licenses" Notice +--------------------------------------------------------- + + This Source Code Form is "Incompatible With Secondary Licenses", as + defined by the Mozilla Public License, v. 2.0. diff --git a/README.md b/README.md new file mode 100644 index 0000000..5b32a17 --- /dev/null +++ b/README.md @@ -0,0 +1,181 @@ +# mosaicstack-telemetry + +Python client SDK for [Mosaic Stack Telemetry](https://github.com/mosaicstack/telemetry). Report AI coding task-completion telemetry and query crowd-sourced predictions for token usage, cost, and quality outcomes. + +## Installation + +```bash +pip install mosaicstack-telemetry +# or +uv add mosaicstack-telemetry +``` + +## Quick Start (Sync) + +Best for scripts, aider integrations, and non-async contexts: + +```python +from mosaicstack_telemetry import ( + TelemetryClient, + TelemetryConfig, + EventBuilder, + TaskType, + Provider, + Harness, + Complexity, + Outcome, + QualityGate, +) + +config = TelemetryConfig( + server_url="https://telemetry.mosaicstack.dev", + api_key="your-64-char-hex-api-key-here...", + instance_id="your-uuid-instance-id", +) + +client = TelemetryClient(config) +client.start() # Starts background submission thread + +# Build and track an event +event = ( + EventBuilder(instance_id=config.instance_id) + .task_type(TaskType.IMPLEMENTATION) + .model("claude-sonnet-4-20250514") + .provider(Provider.ANTHROPIC) + .harness_type(Harness.AIDER) + .complexity_level(Complexity.MEDIUM) + .outcome_value(Outcome.SUCCESS) + .duration_ms(45000) + .tokens(estimated_in=5000, estimated_out=2000, actual_in=5200, actual_out=1800) + .cost(estimated=50000, actual=48000) + .quality(passed=True, gates_run=[QualityGate.LINT, QualityGate.TEST]) + .context(compactions=0, rotations=0, utilization=0.4) + .language("python") + .build() +) + +client.track(event) # Non-blocking, thread-safe + +# When done +client.stop() # Flushes remaining events +``` + +## Async Usage + +For asyncio-based applications: + +```python +import asyncio +from mosaicstack_telemetry import TelemetryClient, TelemetryConfig + +async def main(): + config = TelemetryConfig( + server_url="https://telemetry.mosaicstack.dev", + api_key="your-64-char-hex-api-key-here...", + instance_id="your-uuid-instance-id", + ) + + client = TelemetryClient(config) + await client.start_async() # Starts asyncio background task + + # track() is always synchronous + client.track(event) + + await client.stop_async() # Flushes remaining events + +asyncio.run(main()) +``` + +## Context Manager + +Both sync and async context managers are supported: + +```python +# Sync +with TelemetryClient(config) as client: + client.track(event) + +# Async +async with TelemetryClient(config) as client: + client.track(event) +``` + +## Configuration via Environment Variables + +All core settings can be set via environment variables: + +```bash +export MOSAIC_TELEMETRY_ENABLED=true +export MOSAIC_TELEMETRY_SERVER_URL=https://telemetry.mosaicstack.dev +export MOSAIC_TELEMETRY_API_KEY=your-64-char-hex-api-key +export MOSAIC_TELEMETRY_INSTANCE_ID=your-uuid-instance-id +``` + +Then create a config with defaults: + +```python +config = TelemetryConfig() # Picks up env vars automatically +``` + +Explicit constructor values take priority over environment variables. + +## Querying Predictions + +Fetch crowd-sourced predictions for token usage, cost, and quality: + +```python +from mosaicstack_telemetry import PredictionQuery, TaskType, Provider, Complexity + +query = PredictionQuery( + task_type=TaskType.IMPLEMENTATION, + model="claude-sonnet-4-20250514", + provider=Provider.ANTHROPIC, + complexity=Complexity.MEDIUM, +) + +# Async +await client.refresh_predictions([query]) + +# Sync +client.refresh_predictions_sync([query]) + +# Read from cache +prediction = client.get_prediction(query) +if prediction and prediction.prediction: + print(f"Expected input tokens (median): {prediction.prediction.input_tokens.median}") + print(f"Expected cost (median): ${prediction.prediction.cost_usd_micros['median'] / 1_000_000:.4f}") + print(f"Quality gate pass rate: {prediction.prediction.quality.gate_pass_rate:.0%}") +``` + +## Dry-Run Mode + +Test your integration without sending data to the server: + +```python +config = TelemetryConfig( + server_url="https://telemetry.mosaicstack.dev", + api_key="a" * 64, + instance_id="12345678-1234-1234-1234-123456789abc", + dry_run=True, # Logs batches but doesn't send +) +``` + +## Configuration Reference + +| Parameter | Default | Description | +|-----------|---------|-------------| +| `server_url` | (required) | Telemetry server base URL | +| `api_key` | (required) | 64-character hex API key | +| `instance_id` | (required) | UUID identifying this instance | +| `enabled` | `True` | Enable/disable telemetry | +| `submit_interval_seconds` | `300.0` | Background flush interval | +| `max_queue_size` | `1000` | Max events in memory queue | +| `batch_size` | `100` | Events per batch (server max) | +| `request_timeout_seconds` | `10.0` | HTTP request timeout | +| `prediction_cache_ttl_seconds` | `21600.0` | Prediction cache TTL (6h) | +| `dry_run` | `False` | Log but don't send | +| `max_retries` | `3` | Retries on failure | + +## License + +MPL-2.0 diff --git a/pyproject.toml b/pyproject.toml new file mode 100644 index 0000000..530b614 --- /dev/null +++ b/pyproject.toml @@ -0,0 +1,49 @@ +[project] +name = "mosaicstack-telemetry" +version = "0.1.0" +description = "Python client SDK for Mosaic Stack Telemetry" +requires-python = ">=3.10" +license = "MPL-2.0" +dependencies = [ + "httpx>=0.25.0", + "pydantic>=2.5.0", +] + +[project.optional-dependencies] +dev = [ + "pytest>=8.0", + "pytest-cov>=5.0", + "pytest-asyncio>=0.24", + "ruff>=0.14.0", + "mypy>=1.15", + "respx>=0.21.0", +] + +[build-system] +requires = ["hatchling"] +build-backend = "hatchling.build" + +[tool.hatch.build.targets.wheel] +packages = ["src/mosaicstack_telemetry"] + +[tool.pytest.ini_options] +testpaths = ["tests"] +asyncio_mode = "auto" +addopts = "--cov=src/mosaicstack_telemetry --cov-report=term-missing --cov-fail-under=85" + +[tool.ruff] +line-length = 100 +target-version = "py310" +src = ["src", "tests"] + +[tool.ruff.lint] +select = ["E", "F", "I", "N", "W", "B", "UP", "S", "A", "C4", "DTZ", "T20", "RUF"] + +[tool.ruff.lint.per-file-ignores] +"tests/**" = ["S101"] + +[tool.mypy] +python_version = "3.10" +strict = true +plugins = ["pydantic.mypy"] +mypy_path = "src" diff --git a/scripts/validate_schema.py b/scripts/validate_schema.py new file mode 100644 index 0000000..ac905d9 --- /dev/null +++ b/scripts/validate_schema.py @@ -0,0 +1,96 @@ +#!/usr/bin/env python3 +"""Validate that the SDK types match the expected schema.""" + +from __future__ import annotations + +import json +import sys + +from mosaicstack_telemetry.types.events import ( + Complexity, + Harness, + Outcome, + Provider, + QualityGate, + RepoSizeCategory, + TaskCompletionEvent, + TaskType, +) +from mosaicstack_telemetry.types.predictions import ( + PredictionQuery, + PredictionResponse, +) + + +def main() -> int: + """Validate schema by generating JSON schema for key models.""" + errors: list[str] = [] + + # Validate enums have expected values + expected_task_types = { + "planning", "implementation", "code_review", "testing", + "debugging", "refactoring", "documentation", "configuration", + "security_audit", "unknown", + } + actual_task_types = {t.value for t in TaskType} + if actual_task_types != expected_task_types: + errors.append(f"TaskType mismatch: {actual_task_types ^ expected_task_types}") + + expected_complexity = {"low", "medium", "high", "critical"} + actual_complexity = {c.value for c in Complexity} + if actual_complexity != expected_complexity: + errors.append(f"Complexity mismatch: {actual_complexity ^ expected_complexity}") + + expected_harness = { + "claude_code", "opencode", "kilo_code", "aider", + "api_direct", "ollama_local", "custom", "unknown", + } + actual_harness = {h.value for h in Harness} + if actual_harness != expected_harness: + errors.append(f"Harness mismatch: {actual_harness ^ expected_harness}") + + expected_provider = { + "anthropic", "openai", "openrouter", "ollama", + "google", "mistral", "custom", "unknown", + } + actual_provider = {p.value for p in Provider} + if actual_provider != expected_provider: + errors.append(f"Provider mismatch: {actual_provider ^ expected_provider}") + + expected_gates = {"build", "lint", "test", "coverage", "typecheck", "security"} + actual_gates = {g.value for g in QualityGate} + if actual_gates != expected_gates: + errors.append(f"QualityGate mismatch: {actual_gates ^ expected_gates}") + + expected_outcome = {"success", "failure", "partial", "timeout"} + actual_outcome = {o.value for o in Outcome} + if actual_outcome != expected_outcome: + errors.append(f"Outcome mismatch: {actual_outcome ^ expected_outcome}") + + expected_repo_size = {"tiny", "small", "medium", "large", "huge"} + actual_repo_size = {r.value for r in RepoSizeCategory} + if actual_repo_size != expected_repo_size: + errors.append(f"RepoSizeCategory mismatch: {actual_repo_size ^ expected_repo_size}") + + # Generate JSON schemas for key models + print("TaskCompletionEvent schema:") + print(json.dumps(TaskCompletionEvent.model_json_schema(), indent=2)) + print() + print("PredictionQuery schema:") + print(json.dumps(PredictionQuery.model_json_schema(), indent=2)) + print() + print("PredictionResponse schema:") + print(json.dumps(PredictionResponse.model_json_schema(), indent=2)) + + if errors: + print("\nVALIDATION ERRORS:") + for error in errors: + print(f" - {error}") + return 1 + + print("\nAll schema validations passed.") + return 0 + + +if __name__ == "__main__": + sys.exit(main()) diff --git a/src/mosaicstack_telemetry/__init__.py b/src/mosaicstack_telemetry/__init__.py new file mode 100644 index 0000000..16496c1 --- /dev/null +++ b/src/mosaicstack_telemetry/__init__.py @@ -0,0 +1,69 @@ +"""Mosaic Stack Telemetry — Python client SDK. + +A lightweight client for reporting AI coding task-completion telemetry +and querying crowd-sourced predictions from a Mosaic Stack Telemetry server. +""" + +from mosaicstack_telemetry.client import TelemetryClient +from mosaicstack_telemetry.config import TelemetryConfig +from mosaicstack_telemetry.event_builder import EventBuilder +from mosaicstack_telemetry.prediction_cache import PredictionCache +from mosaicstack_telemetry.queue import EventQueue +from mosaicstack_telemetry.types.common import ( + BatchEventRequest, + BatchEventResponse, + BatchEventResult, + TelemetryError, +) +from mosaicstack_telemetry.types.events import ( + Complexity, + Harness, + Outcome, + Provider, + QualityGate, + RepoSizeCategory, + TaskCompletionEvent, + TaskType, +) +from mosaicstack_telemetry.types.predictions import ( + CorrectionFactors, + PredictionData, + PredictionMetadata, + PredictionQuery, + PredictionResponse, + QualityPrediction, + TokenDistribution, +) + +__version__ = "0.1.0" + +__all__ = [ + # Client + "TelemetryClient", + "TelemetryConfig", + "EventBuilder", + "EventQueue", + "PredictionCache", + # Types - Events + "TaskCompletionEvent", + "TaskType", + "Complexity", + "Harness", + "Provider", + "QualityGate", + "Outcome", + "RepoSizeCategory", + # Types - Predictions + "PredictionQuery", + "PredictionResponse", + "PredictionData", + "PredictionMetadata", + "TokenDistribution", + "CorrectionFactors", + "QualityPrediction", + # Types - Common + "BatchEventRequest", + "BatchEventResponse", + "BatchEventResult", + "TelemetryError", +] diff --git a/src/mosaicstack_telemetry/_async.py b/src/mosaicstack_telemetry/_async.py new file mode 100644 index 0000000..d893669 --- /dev/null +++ b/src/mosaicstack_telemetry/_async.py @@ -0,0 +1,109 @@ +"""Asynchronous submitter using asyncio.Task for periodic flushing.""" + +from __future__ import annotations + +import asyncio +import logging +from typing import TYPE_CHECKING + +import httpx + +from mosaicstack_telemetry.submitter import submit_batch_async + +if TYPE_CHECKING: + from mosaicstack_telemetry.config import TelemetryConfig + from mosaicstack_telemetry.queue import EventQueue + +logger = logging.getLogger("mosaicstack_telemetry") + + +class AsyncSubmitter: + """Periodic event submitter using asyncio.Task and httpx.AsyncClient.""" + + def __init__(self, config: TelemetryConfig, queue: EventQueue) -> None: + self._config = config + self._queue = queue + self._client: httpx.AsyncClient | None = None + self._task: asyncio.Task[None] | None = None + self._running = False + + @property + def is_running(self) -> bool: + """Whether the submitter is currently running.""" + return self._running + + async def start(self) -> None: + """Start the periodic flush loop.""" + if self._running: + return + self._client = httpx.AsyncClient() + self._running = True + self._task = asyncio.create_task(self._loop()) + logger.info( + "Async submitter started (interval=%.1fs)", + self._config.submit_interval_seconds, + ) + + async def stop(self) -> None: + """Stop the periodic flush loop and perform a final flush.""" + if not self._running: + return + self._running = False + if self._task is not None: + self._task.cancel() + try: + await self._task + except asyncio.CancelledError: + pass + self._task = None + + # Final flush + await self.flush() + + if self._client is not None: + await self._client.aclose() + self._client = None + logger.info("Async submitter stopped") + + async def flush(self) -> None: + """Flush all queued events immediately.""" + while not self._queue.is_empty: + events = self._queue.drain(self._config.batch_size) + if not events: + break + + client = self._client + if client is None: + client = httpx.AsyncClient() + try: + result = await submit_batch_async(client, self._config, events) + finally: + await client.aclose() + else: + result = await submit_batch_async(client, self._config, events) + + if result is None: + logger.warning("Batch submission failed, re-queuing %d events", len(events)) + self._queue.put_back(events) + break + + if result.rejected > 0: + logger.warning( + "Batch partially rejected: %d accepted, %d rejected", + result.accepted, + result.rejected, + ) + else: + logger.debug("Batch submitted: %d events accepted", result.accepted) + + async def _loop(self) -> None: + """Periodic flush loop.""" + while self._running: + try: + await asyncio.sleep(self._config.submit_interval_seconds) + if self._running: + await self.flush() + except asyncio.CancelledError: + break + except Exception: + logger.exception("Error during periodic async flush") diff --git a/src/mosaicstack_telemetry/_sync.py b/src/mosaicstack_telemetry/_sync.py new file mode 100644 index 0000000..3c155c0 --- /dev/null +++ b/src/mosaicstack_telemetry/_sync.py @@ -0,0 +1,118 @@ +"""Synchronous submitter using threading.Timer for periodic flushing.""" + +from __future__ import annotations + +import logging +import threading +from typing import TYPE_CHECKING + +import httpx + +from mosaicstack_telemetry.submitter import submit_batch_sync + +if TYPE_CHECKING: + from mosaicstack_telemetry.config import TelemetryConfig + from mosaicstack_telemetry.queue import EventQueue + +logger = logging.getLogger("mosaicstack_telemetry") + + +class SyncSubmitter: + """Periodic event submitter using threading.Timer and httpx.Client.""" + + def __init__(self, config: TelemetryConfig, queue: EventQueue) -> None: + self._config = config + self._queue = queue + self._client: httpx.Client | None = None + self._timer: threading.Timer | None = None + self._running = False + self._lock = threading.Lock() + + @property + def is_running(self) -> bool: + """Whether the submitter is currently running.""" + return self._running + + def start(self) -> None: + """Start the periodic flush loop.""" + with self._lock: + if self._running: + return + self._client = httpx.Client() + self._running = True + self._schedule_next() + logger.info("Sync submitter started (interval=%.1fs)", self._config.submit_interval_seconds) + + def stop(self) -> None: + """Stop the periodic flush loop and perform a final flush.""" + with self._lock: + if not self._running: + return + self._running = False + if self._timer is not None: + self._timer.cancel() + self._timer = None + + # Final flush outside the lock to avoid deadlock + self.flush() + + with self._lock: + if self._client is not None: + self._client.close() + self._client = None + logger.info("Sync submitter stopped") + + def flush(self) -> None: + """Flush all queued events immediately.""" + while not self._queue.is_empty: + events = self._queue.drain(self._config.batch_size) + if not events: + break + + with self._lock: + client = self._client + if client is None: + # Create a temporary client for the flush + client = httpx.Client() + try: + result = submit_batch_sync(client, self._config, events) + finally: + client.close() + else: + result = submit_batch_sync(client, self._config, events) + + if result is None: + # Submission failed, re-queue events + logger.warning("Batch submission failed, re-queuing %d events", len(events)) + self._queue.put_back(events) + break + + if result.rejected > 0: + logger.warning( + "Batch partially rejected: %d accepted, %d rejected", + result.accepted, + result.rejected, + ) + else: + logger.debug("Batch submitted: %d events accepted", result.accepted) + + def _schedule_next(self) -> None: + """Schedule the next flush iteration.""" + if not self._running: + return + self._timer = threading.Timer(self._config.submit_interval_seconds, self._tick) + self._timer.daemon = True + self._timer.start() + + def _tick(self) -> None: + """Timer callback: flush and reschedule.""" + if not self._running: + return + try: + self.flush() + except Exception: + logger.exception("Error during periodic flush") + finally: + with self._lock: + if self._running: + self._schedule_next() diff --git a/src/mosaicstack_telemetry/client.py b/src/mosaicstack_telemetry/client.py new file mode 100644 index 0000000..7765d97 --- /dev/null +++ b/src/mosaicstack_telemetry/client.py @@ -0,0 +1,196 @@ +"""Main TelemetryClient — the public entry point for the SDK.""" + +from __future__ import annotations + +import logging +from typing import Any + +import httpx + +from mosaicstack_telemetry._async import AsyncSubmitter +from mosaicstack_telemetry._sync import SyncSubmitter +from mosaicstack_telemetry.config import TelemetryConfig +from mosaicstack_telemetry.prediction_cache import PredictionCache +from mosaicstack_telemetry.queue import EventQueue +from mosaicstack_telemetry.types.events import TaskCompletionEvent +from mosaicstack_telemetry.types.predictions import ( + PredictionQuery, + PredictionResponse, +) + +logger = logging.getLogger("mosaicstack_telemetry") + + +class TelemetryClient: + """Main client for Mosaic Stack Telemetry. + + Supports both sync and async usage patterns: + + **Sync (threading-based):** + + client = TelemetryClient(config) + client.start() + client.track(event) + client.stop() + + **Async (asyncio-based):** + + client = TelemetryClient(config) + await client.start_async() + client.track(event) + await client.stop_async() + + **Context managers:** + + with TelemetryClient(config) as client: + client.track(event) + + async with TelemetryClient(config) as client: + client.track(event) + """ + + def __init__(self, config: TelemetryConfig) -> None: + errors = config.validate() + if errors and config.enabled: + logger.warning("Telemetry config validation errors: %s", "; ".join(errors)) + + self._config = config + self._queue = EventQueue(max_size=config.max_queue_size) + self._prediction_cache = PredictionCache(ttl_seconds=config.prediction_cache_ttl_seconds) + self._sync_submitter: SyncSubmitter | None = None + self._async_submitter: AsyncSubmitter | None = None + + def start(self) -> None: + """Start background submission using threading.Timer loop.""" + if not self._config.enabled: + logger.info("Telemetry disabled, skipping start") + return + self._sync_submitter = SyncSubmitter(self._config, self._queue) + self._sync_submitter.start() + + async def start_async(self) -> None: + """Start with asyncio.Task for async contexts.""" + if not self._config.enabled: + logger.info("Telemetry disabled, skipping async start") + return + self._async_submitter = AsyncSubmitter(self._config, self._queue) + await self._async_submitter.start() + + def stop(self) -> None: + """Stop background submission, flush remaining events synchronously.""" + if self._sync_submitter is not None: + self._sync_submitter.stop() + self._sync_submitter = None + + async def stop_async(self) -> None: + """Async stop and flush.""" + if self._async_submitter is not None: + await self._async_submitter.stop() + self._async_submitter = None + + def track(self, event: TaskCompletionEvent) -> None: + """Queue an event for submission. Always synchronous. Never blocks or throws. + + If telemetry is disabled, the event is silently dropped. + """ + try: + if not self._config.enabled: + return + self._queue.put(event) + logger.debug("Event queued: %s", event.event_id) + except Exception: + logger.exception("Unexpected error in track()") + + def get_prediction(self, query: PredictionQuery) -> PredictionResponse | None: + """Get a cached prediction. Returns None if not cached or expired.""" + return self._prediction_cache.get(query) + + def refresh_predictions_sync(self, queries: list[PredictionQuery]) -> None: + """Fetch fresh predictions from server synchronously.""" + if not queries: + return + url = f"{self._config.server_url}/v1/predictions/batch" + body = {"queries": [q.model_dump(mode="json") for q in queries]} + + try: + with httpx.Client() as client: + response = client.post( + url, + json=body, + headers={"User-Agent": self._config.user_agent}, + timeout=self._config.request_timeout_seconds, + ) + if response.status_code == 200: + data = response.json() + results = data.get("results", []) + for query, result_data in zip(queries, results): + pred = PredictionResponse.model_validate(result_data) + self._prediction_cache.put(query, pred) + logger.debug("Refreshed %d predictions", len(results)) + else: + logger.warning( + "Prediction refresh failed with status %d", + response.status_code, + ) + except Exception: + logger.exception("Error refreshing predictions") + + async def refresh_predictions(self, queries: list[PredictionQuery]) -> None: + """Fetch fresh predictions from server asynchronously.""" + if not queries: + return + url = f"{self._config.server_url}/v1/predictions/batch" + body = {"queries": [q.model_dump(mode="json") for q in queries]} + + try: + async with httpx.AsyncClient() as client: + response = await client.post( + url, + json=body, + headers={"User-Agent": self._config.user_agent}, + timeout=self._config.request_timeout_seconds, + ) + if response.status_code == 200: + data = response.json() + results = data.get("results", []) + for query, result_data in zip(queries, results): + pred = PredictionResponse.model_validate(result_data) + self._prediction_cache.put(query, pred) + logger.debug("Refreshed %d predictions", len(results)) + else: + logger.warning( + "Prediction refresh failed with status %d", + response.status_code, + ) + except Exception: + logger.exception("Error refreshing predictions") + + @property + def queue_size(self) -> int: + """Number of events currently in the queue.""" + return self._queue.size + + @property + def is_running(self) -> bool: + """Whether background submission is active.""" + if self._sync_submitter is not None: + return self._sync_submitter.is_running + if self._async_submitter is not None: + return self._async_submitter.is_running + return False + + # Sync context manager + def __enter__(self) -> TelemetryClient: + self.start() + return self + + def __exit__(self, *exc: Any) -> None: + self.stop() + + # Async context manager + async def __aenter__(self) -> TelemetryClient: + await self.start_async() + return self + + async def __aexit__(self, *exc: Any) -> None: + await self.stop_async() diff --git a/src/mosaicstack_telemetry/config.py b/src/mosaicstack_telemetry/config.py new file mode 100644 index 0000000..58ffc48 --- /dev/null +++ b/src/mosaicstack_telemetry/config.py @@ -0,0 +1,91 @@ +"""Telemetry client configuration.""" + +from __future__ import annotations + +import os +import re +from dataclasses import dataclass, field + +_HEX_64_RE = re.compile(r"^[0-9a-fA-F]{64}$") +_UUID_RE = re.compile( + r"^[0-9a-fA-F]{8}-[0-9a-fA-F]{4}-[0-9a-fA-F]{4}-[0-9a-fA-F]{4}-[0-9a-fA-F]{12}$" +) + + +@dataclass +class TelemetryConfig: + """Configuration for the telemetry client. + + Values can be provided directly or loaded from environment variables: + - MOSAIC_TELEMETRY_ENABLED -> enabled + - MOSAIC_TELEMETRY_SERVER_URL -> server_url + - MOSAIC_TELEMETRY_API_KEY -> api_key + - MOSAIC_TELEMETRY_INSTANCE_ID -> instance_id + """ + + server_url: str = "" + api_key: str = "" + instance_id: str = "" + enabled: bool = True + submit_interval_seconds: float = 300.0 + max_queue_size: int = 1000 + batch_size: int = 100 + request_timeout_seconds: float = 10.0 + prediction_cache_ttl_seconds: float = 21600.0 + dry_run: bool = False + max_retries: int = 3 + user_agent: str = field(default="mosaicstack-telemetry-python/0.1.0") + + def __post_init__(self) -> None: + """Load environment variable overrides and validate.""" + env_enabled = os.environ.get("MOSAIC_TELEMETRY_ENABLED") + if env_enabled is not None: + self.enabled = env_enabled.lower() in ("1", "true", "yes") + + env_url = os.environ.get("MOSAIC_TELEMETRY_SERVER_URL") + if env_url and not self.server_url: + self.server_url = env_url + + env_key = os.environ.get("MOSAIC_TELEMETRY_API_KEY") + if env_key and not self.api_key: + self.api_key = env_key + + env_instance = os.environ.get("MOSAIC_TELEMETRY_INSTANCE_ID") + if env_instance and not self.instance_id: + self.instance_id = env_instance + + # Strip trailing slashes from server_url + self.server_url = self.server_url.rstrip("/") + + def validate(self) -> list[str]: + """Validate configuration and return list of errors (empty if valid).""" + errors: list[str] = [] + + if not self.server_url: + errors.append("server_url is required") + elif not self.server_url.startswith(("http://", "https://")): + errors.append("server_url must start with http:// or https://") + + if not self.api_key: + errors.append("api_key is required") + elif not _HEX_64_RE.match(self.api_key): + errors.append("api_key must be a 64-character hex string") + + if not self.instance_id: + errors.append("instance_id is required") + elif not _UUID_RE.match(self.instance_id): + errors.append("instance_id must be a valid UUID string") + + if self.submit_interval_seconds <= 0: + errors.append("submit_interval_seconds must be positive") + + if self.max_queue_size <= 0: + errors.append("max_queue_size must be positive") + + if self.batch_size <= 0 or self.batch_size > 100: + errors.append("batch_size must be between 1 and 100") + + if self.request_timeout_seconds <= 0: + errors.append("request_timeout_seconds must be positive") + + return errors diff --git a/src/mosaicstack_telemetry/event_builder.py b/src/mosaicstack_telemetry/event_builder.py new file mode 100644 index 0000000..57ab857 --- /dev/null +++ b/src/mosaicstack_telemetry/event_builder.py @@ -0,0 +1,207 @@ +"""Convenience builder for constructing TaskCompletionEvent instances.""" + +from __future__ import annotations + +from datetime import datetime, timezone +from uuid import UUID, uuid4 + +from mosaicstack_telemetry.types.events import ( + Complexity, + Harness, + Outcome, + Provider, + QualityGate, + RepoSizeCategory, + TaskCompletionEvent, + TaskType, +) + + +class EventBuilder: + """Fluent builder for TaskCompletionEvent. + + Provides a convenient way to construct events with sensible defaults + and a chainable API. + + Example:: + + event = ( + EventBuilder(instance_id="...") + .task_type(TaskType.IMPLEMENTATION) + .model("claude-sonnet-4-20250514") + .provider(Provider.ANTHROPIC) + .harness(Harness.CLAUDE_CODE) + .complexity(Complexity.MEDIUM) + .outcome(Outcome.SUCCESS) + .duration_ms(45000) + .tokens(estimated_in=1000, estimated_out=500, actual_in=1100, actual_out=480) + .cost(estimated=50000, actual=48000) + .quality(passed=True, gates_run=[QualityGate.LINT, QualityGate.TEST]) + .context(compactions=0, rotations=0, utilization=0.3) + .build() + ) + """ + + def __init__(self, instance_id: str | UUID) -> None: + self._instance_id = UUID(str(instance_id)) + self._event_id: UUID = uuid4() + self._timestamp: datetime = datetime.now(timezone.utc) + self._task_duration_ms: int = 0 + self._task_type: TaskType = TaskType.UNKNOWN + self._complexity: Complexity = Complexity.MEDIUM + self._harness: Harness = Harness.UNKNOWN + self._model: str = "unknown" + self._provider: Provider = Provider.UNKNOWN + self._estimated_input_tokens: int = 0 + self._estimated_output_tokens: int = 0 + self._actual_input_tokens: int = 0 + self._actual_output_tokens: int = 0 + self._estimated_cost_usd_micros: int = 0 + self._actual_cost_usd_micros: int = 0 + self._quality_gate_passed: bool = False + self._quality_gates_run: list[QualityGate] = [] + self._quality_gates_failed: list[QualityGate] = [] + self._context_compactions: int = 0 + self._context_rotations: int = 0 + self._context_utilization_final: float = 0.0 + self._outcome: Outcome = Outcome.FAILURE + self._retry_count: int = 0 + self._language: str | None = None + self._repo_size_category: RepoSizeCategory | None = None + + def event_id(self, value: str | UUID) -> EventBuilder: + """Set a specific event ID (default: auto-generated UUID).""" + self._event_id = UUID(str(value)) + return self + + def timestamp(self, value: datetime) -> EventBuilder: + """Set the event timestamp (default: now UTC).""" + self._timestamp = value + return self + + def task_type(self, value: TaskType) -> EventBuilder: + """Set the task type.""" + self._task_type = value + return self + + def complexity_level(self, value: Complexity) -> EventBuilder: + """Set the complexity level.""" + self._complexity = value + return self + + def harness_type(self, value: Harness) -> EventBuilder: + """Set the harness type.""" + self._harness = value + return self + + def model(self, value: str) -> EventBuilder: + """Set the model name.""" + self._model = value + return self + + def provider(self, value: Provider) -> EventBuilder: + """Set the provider.""" + self._provider = value + return self + + def duration_ms(self, value: int) -> EventBuilder: + """Set the task duration in milliseconds.""" + self._task_duration_ms = value + return self + + def tokens( + self, + *, + estimated_in: int = 0, + estimated_out: int = 0, + actual_in: int = 0, + actual_out: int = 0, + ) -> EventBuilder: + """Set token counts.""" + self._estimated_input_tokens = estimated_in + self._estimated_output_tokens = estimated_out + self._actual_input_tokens = actual_in + self._actual_output_tokens = actual_out + return self + + def cost(self, *, estimated: int = 0, actual: int = 0) -> EventBuilder: + """Set cost in USD micros.""" + self._estimated_cost_usd_micros = estimated + self._actual_cost_usd_micros = actual + return self + + def quality( + self, + *, + passed: bool, + gates_run: list[QualityGate] | None = None, + gates_failed: list[QualityGate] | None = None, + ) -> EventBuilder: + """Set quality gate results.""" + self._quality_gate_passed = passed + self._quality_gates_run = gates_run or [] + self._quality_gates_failed = gates_failed or [] + return self + + def context( + self, + *, + compactions: int = 0, + rotations: int = 0, + utilization: float = 0.0, + ) -> EventBuilder: + """Set context window metrics.""" + self._context_compactions = compactions + self._context_rotations = rotations + self._context_utilization_final = utilization + return self + + def outcome_value(self, value: Outcome) -> EventBuilder: + """Set the task outcome.""" + self._outcome = value + return self + + def retry_count(self, value: int) -> EventBuilder: + """Set the retry count.""" + self._retry_count = value + return self + + def language(self, value: str | None) -> EventBuilder: + """Set the programming language.""" + self._language = value + return self + + def repo_size(self, value: RepoSizeCategory | None) -> EventBuilder: + """Set the repository size category.""" + self._repo_size_category = value + return self + + def build(self) -> TaskCompletionEvent: + """Build and return the TaskCompletionEvent.""" + return TaskCompletionEvent( + instance_id=self._instance_id, + event_id=self._event_id, + timestamp=self._timestamp, + task_duration_ms=self._task_duration_ms, + task_type=self._task_type, + complexity=self._complexity, + harness=self._harness, + model=self._model, + provider=self._provider, + estimated_input_tokens=self._estimated_input_tokens, + estimated_output_tokens=self._estimated_output_tokens, + actual_input_tokens=self._actual_input_tokens, + actual_output_tokens=self._actual_output_tokens, + estimated_cost_usd_micros=self._estimated_cost_usd_micros, + actual_cost_usd_micros=self._actual_cost_usd_micros, + quality_gate_passed=self._quality_gate_passed, + quality_gates_run=self._quality_gates_run, + quality_gates_failed=self._quality_gates_failed, + context_compactions=self._context_compactions, + context_rotations=self._context_rotations, + context_utilization_final=self._context_utilization_final, + outcome=self._outcome, + retry_count=self._retry_count, + language=self._language, + repo_size_category=self._repo_size_category, + ) diff --git a/src/mosaicstack_telemetry/prediction_cache.py b/src/mosaicstack_telemetry/prediction_cache.py new file mode 100644 index 0000000..6f7ee3f --- /dev/null +++ b/src/mosaicstack_telemetry/prediction_cache.py @@ -0,0 +1,56 @@ +"""Thread-safe prediction cache with TTL.""" + +from __future__ import annotations + +import logging +import threading +import time + +from mosaicstack_telemetry.types.predictions import PredictionQuery, PredictionResponse + +logger = logging.getLogger("mosaicstack_telemetry") + + +def _cache_key(query: PredictionQuery) -> str: + """Generate a deterministic cache key from a prediction query.""" + return f"{query.task_type.value}:{query.model}:{query.provider.value}:{query.complexity.value}" + + +class PredictionCache: + """Thread-safe dict-based cache with TTL for prediction responses.""" + + def __init__(self, ttl_seconds: float = 21600.0) -> None: + self._ttl = ttl_seconds + self._store: dict[str, tuple[PredictionResponse, float]] = {} + self._lock = threading.Lock() + + def get(self, query: PredictionQuery) -> PredictionResponse | None: + """Get a cached prediction, or None if not found or expired.""" + key = _cache_key(query) + with self._lock: + entry = self._store.get(key) + if entry is None: + return None + response, expires_at = entry + if time.monotonic() > expires_at: + del self._store[key] + return None + return response + + def put(self, query: PredictionQuery, response: PredictionResponse) -> None: + """Store a prediction response with TTL.""" + key = _cache_key(query) + expires_at = time.monotonic() + self._ttl + with self._lock: + self._store[key] = (response, expires_at) + + def clear(self) -> None: + """Invalidate all cached predictions.""" + with self._lock: + self._store.clear() + + @property + def size(self) -> int: + """Number of entries in the cache (including possibly expired).""" + with self._lock: + return len(self._store) diff --git a/src/mosaicstack_telemetry/queue.py b/src/mosaicstack_telemetry/queue.py new file mode 100644 index 0000000..f6a0354 --- /dev/null +++ b/src/mosaicstack_telemetry/queue.py @@ -0,0 +1,70 @@ +"""Thread-safe bounded event queue.""" + +from __future__ import annotations + +import logging +import threading +from collections import deque + +from mosaicstack_telemetry.types.events import TaskCompletionEvent + +logger = logging.getLogger("mosaicstack_telemetry") + + +class EventQueue: + """Thread-safe bounded FIFO queue for telemetry events. + + When the queue is full, the oldest events are evicted (FIFO eviction) + to make room for new ones. + """ + + def __init__(self, max_size: int = 1000) -> None: + self._max_size = max_size + self._deque: deque[TaskCompletionEvent] = deque(maxlen=max_size) + self._lock = threading.Lock() + + def put(self, event: TaskCompletionEvent) -> None: + """Add an event to the queue. Never blocks. + + If the queue is full, the oldest event is silently evicted. + """ + with self._lock: + if len(self._deque) >= self._max_size: + logger.warning( + "Event queue full (%d items), evicting oldest event", + self._max_size, + ) + self._deque.append(event) + + def drain(self, max_items: int) -> list[TaskCompletionEvent]: + """Remove and return up to max_items events from the front of the queue.""" + with self._lock: + count = min(max_items, len(self._deque)) + items: list[TaskCompletionEvent] = [] + for _ in range(count): + items.append(self._deque.popleft()) + return items + + def put_back(self, events: list[TaskCompletionEvent]) -> None: + """Put events back at the front of the queue (for retry scenarios). + + Events are added to the left (front) so they get drained first next time. + If adding them would exceed max_size, only as many as fit are re-added. + """ + with self._lock: + available = self._max_size - len(self._deque) + to_add = events[:available] + for event in reversed(to_add): + self._deque.appendleft(event) + + @property + def size(self) -> int: + """Current number of events in the queue.""" + with self._lock: + return len(self._deque) + + @property + def is_empty(self) -> bool: + """Whether the queue is empty.""" + with self._lock: + return len(self._deque) == 0 diff --git a/src/mosaicstack_telemetry/submitter.py b/src/mosaicstack_telemetry/submitter.py new file mode 100644 index 0000000..e1f4a49 --- /dev/null +++ b/src/mosaicstack_telemetry/submitter.py @@ -0,0 +1,205 @@ +"""Batch submission logic with retry and backoff.""" + +from __future__ import annotations + +import logging +import random +import time +from typing import TYPE_CHECKING + +import httpx + +from mosaicstack_telemetry.types.common import BatchEventRequest, BatchEventResponse +from mosaicstack_telemetry.types.events import TaskCompletionEvent + +if TYPE_CHECKING: + from mosaicstack_telemetry.config import TelemetryConfig + +logger = logging.getLogger("mosaicstack_telemetry") + + +def _backoff_delay(attempt: int, base: float = 1.0, maximum: float = 60.0) -> float: + """Calculate exponential backoff with jitter.""" + delay = min(base * (2**attempt), maximum) + jitter = random.uniform(0, delay * 0.5) # noqa: S311 + return delay + jitter + + +def submit_batch_sync( + client: httpx.Client, + config: TelemetryConfig, + events: list[TaskCompletionEvent], +) -> BatchEventResponse | None: + """Submit a batch of events synchronously with retry logic. + + Returns the BatchEventResponse on success, or None if all retries failed. + """ + url = f"{config.server_url}/v1/events/batch" + request_body = BatchEventRequest(events=events) + + for attempt in range(config.max_retries + 1): + try: + if config.dry_run: + logger.info( + "[DRY RUN] Would submit batch of %d events to %s", + len(events), + url, + ) + return BatchEventResponse( + accepted=len(events), + rejected=0, + results=[], + ) + + response = client.post( + url, + json=request_body.model_dump(mode="json"), + headers={ + "Authorization": f"Bearer {config.api_key}", + "Content-Type": "application/json", + "User-Agent": config.user_agent, + }, + timeout=config.request_timeout_seconds, + ) + + if response.status_code == 202: + return BatchEventResponse.model_validate(response.json()) + + if response.status_code == 429: + retry_after = response.headers.get("Retry-After") + delay = float(retry_after) if retry_after else _backoff_delay(attempt) + logger.warning( + "Rate limited (429), retrying after %.1f seconds (attempt %d/%d)", + delay, + attempt + 1, + config.max_retries + 1, + ) + time.sleep(delay) + continue + + if response.status_code == 403: + logger.error( + "Authentication failed (403): API key may not match instance_id" + ) + return None + + logger.warning( + "Unexpected status %d from server (attempt %d/%d): %s", + response.status_code, + attempt + 1, + config.max_retries + 1, + response.text[:200], + ) + + except httpx.TimeoutException: + logger.warning( + "Request timed out (attempt %d/%d)", + attempt + 1, + config.max_retries + 1, + ) + except httpx.HTTPError as exc: + logger.warning( + "Network error (attempt %d/%d): %s", + attempt + 1, + config.max_retries + 1, + exc, + ) + + if attempt < config.max_retries: + delay = _backoff_delay(attempt) + logger.debug("Backing off for %.1f seconds before retry", delay) + time.sleep(delay) + + logger.error("All %d attempts failed for batch of %d events", config.max_retries + 1, len(events)) + return None + + +async def submit_batch_async( + client: httpx.AsyncClient, + config: TelemetryConfig, + events: list[TaskCompletionEvent], +) -> BatchEventResponse | None: + """Submit a batch of events asynchronously with retry logic. + + Returns the BatchEventResponse on success, or None if all retries failed. + """ + import asyncio + + url = f"{config.server_url}/v1/events/batch" + request_body = BatchEventRequest(events=events) + + for attempt in range(config.max_retries + 1): + try: + if config.dry_run: + logger.info( + "[DRY RUN] Would submit batch of %d events to %s", + len(events), + url, + ) + return BatchEventResponse( + accepted=len(events), + rejected=0, + results=[], + ) + + response = await client.post( + url, + json=request_body.model_dump(mode="json"), + headers={ + "Authorization": f"Bearer {config.api_key}", + "Content-Type": "application/json", + "User-Agent": config.user_agent, + }, + timeout=config.request_timeout_seconds, + ) + + if response.status_code == 202: + return BatchEventResponse.model_validate(response.json()) + + if response.status_code == 429: + retry_after = response.headers.get("Retry-After") + delay = float(retry_after) if retry_after else _backoff_delay(attempt) + logger.warning( + "Rate limited (429), retrying after %.1f seconds (attempt %d/%d)", + delay, + attempt + 1, + config.max_retries + 1, + ) + await asyncio.sleep(delay) + continue + + if response.status_code == 403: + logger.error( + "Authentication failed (403): API key may not match instance_id" + ) + return None + + logger.warning( + "Unexpected status %d from server (attempt %d/%d): %s", + response.status_code, + attempt + 1, + config.max_retries + 1, + response.text[:200], + ) + + except httpx.TimeoutException: + logger.warning( + "Request timed out (attempt %d/%d)", + attempt + 1, + config.max_retries + 1, + ) + except httpx.HTTPError as exc: + logger.warning( + "Network error (attempt %d/%d): %s", + attempt + 1, + config.max_retries + 1, + exc, + ) + + if attempt < config.max_retries: + delay = _backoff_delay(attempt) + logger.debug("Backing off for %.1f seconds before retry", delay) + await asyncio.sleep(delay) + + logger.error("All %d attempts failed for batch of %d events", config.max_retries + 1, len(events)) + return None diff --git a/src/mosaicstack_telemetry/types/__init__.py b/src/mosaicstack_telemetry/types/__init__.py new file mode 100644 index 0000000..1de235e --- /dev/null +++ b/src/mosaicstack_telemetry/types/__init__.py @@ -0,0 +1,49 @@ +"""Mosaic Stack Telemetry type definitions.""" + +from mosaicstack_telemetry.types.common import ( + BatchEventRequest, + BatchEventResponse, + BatchEventResult, + TelemetryError, +) +from mosaicstack_telemetry.types.events import ( + Complexity, + Harness, + Outcome, + Provider, + QualityGate, + RepoSizeCategory, + TaskCompletionEvent, + TaskType, +) +from mosaicstack_telemetry.types.predictions import ( + CorrectionFactors, + PredictionData, + PredictionMetadata, + PredictionQuery, + PredictionResponse, + QualityPrediction, + TokenDistribution, +) + +__all__ = [ + "BatchEventRequest", + "BatchEventResponse", + "BatchEventResult", + "Complexity", + "CorrectionFactors", + "Harness", + "Outcome", + "PredictionData", + "PredictionMetadata", + "PredictionQuery", + "PredictionResponse", + "Provider", + "QualityGate", + "QualityPrediction", + "RepoSizeCategory", + "TaskCompletionEvent", + "TaskType", + "TelemetryError", + "TokenDistribution", +] diff --git a/src/mosaicstack_telemetry/types/common.py b/src/mosaicstack_telemetry/types/common.py new file mode 100644 index 0000000..4022b70 --- /dev/null +++ b/src/mosaicstack_telemetry/types/common.py @@ -0,0 +1,35 @@ +"""Common types shared across the SDK.""" + +from __future__ import annotations + +from uuid import UUID + +from pydantic import BaseModel, Field + +from mosaicstack_telemetry.types.events import TaskCompletionEvent + + +class TelemetryError(Exception): + """Base exception for telemetry client errors.""" + + +class BatchEventRequest(BaseModel): + """Request body for batch event submission.""" + + events: list[TaskCompletionEvent] = Field(min_length=1, max_length=100) + + +class BatchEventResult(BaseModel): + """Result for a single event in a batch submission.""" + + event_id: UUID + status: str # "accepted" or "rejected" + error: str | None = None + + +class BatchEventResponse(BaseModel): + """Response from the batch event submission endpoint.""" + + accepted: int + rejected: int + results: list[BatchEventResult] diff --git a/src/mosaicstack_telemetry/types/events.py b/src/mosaicstack_telemetry/types/events.py new file mode 100644 index 0000000..7883929 --- /dev/null +++ b/src/mosaicstack_telemetry/types/events.py @@ -0,0 +1,122 @@ +"""Task completion event types and enums.""" + +from __future__ import annotations + +from datetime import datetime, timezone +from enum import Enum +from uuid import UUID, uuid4 + +from pydantic import BaseModel, Field + + +class TaskType(str, Enum): + """Type of task being performed.""" + + PLANNING = "planning" + IMPLEMENTATION = "implementation" + CODE_REVIEW = "code_review" + TESTING = "testing" + DEBUGGING = "debugging" + REFACTORING = "refactoring" + DOCUMENTATION = "documentation" + CONFIGURATION = "configuration" + SECURITY_AUDIT = "security_audit" + UNKNOWN = "unknown" + + +class Complexity(str, Enum): + """Task complexity level.""" + + LOW = "low" + MEDIUM = "medium" + HIGH = "high" + CRITICAL = "critical" + + +class Harness(str, Enum): + """AI coding harness used.""" + + CLAUDE_CODE = "claude_code" + OPENCODE = "opencode" + KILO_CODE = "kilo_code" + AIDER = "aider" + API_DIRECT = "api_direct" + OLLAMA_LOCAL = "ollama_local" + CUSTOM = "custom" + UNKNOWN = "unknown" + + +class Provider(str, Enum): + """AI model provider.""" + + ANTHROPIC = "anthropic" + OPENAI = "openai" + OPENROUTER = "openrouter" + OLLAMA = "ollama" + GOOGLE = "google" + MISTRAL = "mistral" + CUSTOM = "custom" + UNKNOWN = "unknown" + + +class QualityGate(str, Enum): + """Quality gate type.""" + + BUILD = "build" + LINT = "lint" + TEST = "test" + COVERAGE = "coverage" + TYPECHECK = "typecheck" + SECURITY = "security" + + +class Outcome(str, Enum): + """Task outcome.""" + + SUCCESS = "success" + FAILURE = "failure" + PARTIAL = "partial" + TIMEOUT = "timeout" + + +class RepoSizeCategory(str, Enum): + """Repository size category.""" + + TINY = "tiny" + SMALL = "small" + MEDIUM = "medium" + LARGE = "large" + HUGE = "huge" + + +class TaskCompletionEvent(BaseModel): + """A single task completion telemetry event.""" + + instance_id: UUID + event_id: UUID = Field(default_factory=uuid4) + schema_version: str = "1.0" + timestamp: datetime = Field( + default_factory=lambda: datetime.now(timezone.utc), + ) + task_duration_ms: int = Field(ge=0, le=86_400_000) + task_type: TaskType + complexity: Complexity + harness: Harness + model: str = Field(min_length=1, max_length=100) + provider: Provider + estimated_input_tokens: int = Field(ge=0, le=10_000_000) + estimated_output_tokens: int = Field(ge=0, le=10_000_000) + actual_input_tokens: int = Field(ge=0, le=10_000_000) + actual_output_tokens: int = Field(ge=0, le=10_000_000) + estimated_cost_usd_micros: int = Field(ge=0, le=100_000_000) + actual_cost_usd_micros: int = Field(ge=0, le=100_000_000) + quality_gate_passed: bool + quality_gates_run: list[QualityGate] = Field(default_factory=list) + quality_gates_failed: list[QualityGate] = Field(default_factory=list) + context_compactions: int = Field(ge=0, le=100) + context_rotations: int = Field(ge=0, le=50) + context_utilization_final: float = Field(ge=0.0, le=1.0) + outcome: Outcome + retry_count: int = Field(ge=0, le=20) + language: str | None = Field(default=None, max_length=30) + repo_size_category: RepoSizeCategory | None = None diff --git a/src/mosaicstack_telemetry/types/predictions.py b/src/mosaicstack_telemetry/types/predictions.py new file mode 100644 index 0000000..9fe84d7 --- /dev/null +++ b/src/mosaicstack_telemetry/types/predictions.py @@ -0,0 +1,72 @@ +"""Prediction request and response types.""" + +from __future__ import annotations + +from datetime import datetime + +from pydantic import BaseModel + +from mosaicstack_telemetry.types.events import Complexity, Provider, TaskType + + +class TokenDistribution(BaseModel): + """Token usage distribution percentiles.""" + + p10: int + p25: int + median: int + p75: int + p90: int + + +class CorrectionFactors(BaseModel): + """Correction factors for estimated vs actual tokens.""" + + input: float + output: float + + +class QualityPrediction(BaseModel): + """Quality gate prediction data.""" + + gate_pass_rate: float + success_rate: float + + +class PredictionData(BaseModel): + """Full prediction data for a task type/model/provider/complexity combination.""" + + input_tokens: TokenDistribution + output_tokens: TokenDistribution + cost_usd_micros: dict[str, int] + duration_ms: dict[str, int] + correction_factors: CorrectionFactors + quality: QualityPrediction + + +class PredictionMetadata(BaseModel): + """Metadata about how a prediction was generated.""" + + sample_size: int + fallback_level: int + confidence: str # "none", "low", "medium", "high" + last_updated: datetime | None = None + dimensions_matched: dict[str, str | None] | None = None + fallback_note: str | None = None + cache_hit: bool = False + + +class PredictionResponse(BaseModel): + """Response from the prediction endpoint.""" + + prediction: PredictionData | None = None + metadata: PredictionMetadata + + +class PredictionQuery(BaseModel): + """Query parameters for a prediction request.""" + + task_type: TaskType + model: str + provider: Provider + complexity: Complexity diff --git a/tests/__init__.py b/tests/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/tests/conftest.py b/tests/conftest.py new file mode 100644 index 0000000..0ffe001 --- /dev/null +++ b/tests/conftest.py @@ -0,0 +1,183 @@ +"""Shared test fixtures.""" + +from __future__ import annotations + +from datetime import datetime, timezone +from uuid import UUID, uuid4 + +import pytest + +from mosaicstack_telemetry.config import TelemetryConfig +from mosaicstack_telemetry.types.events import ( + Complexity, + Harness, + Outcome, + Provider, + TaskCompletionEvent, + TaskType, +) +from mosaicstack_telemetry.types.predictions import ( + CorrectionFactors, + PredictionData, + PredictionMetadata, + PredictionQuery, + PredictionResponse, + QualityPrediction, + TokenDistribution, +) + +TEST_API_KEY = "a" * 64 +TEST_INSTANCE_ID = "12345678-1234-1234-1234-123456789abc" +TEST_SERVER_URL = "https://telemetry.example.com" + + +@pytest.fixture() +def config() -> TelemetryConfig: + """Create a valid test configuration.""" + return TelemetryConfig( + server_url=TEST_SERVER_URL, + api_key=TEST_API_KEY, + instance_id=TEST_INSTANCE_ID, + submit_interval_seconds=1.0, + max_queue_size=100, + batch_size=10, + request_timeout_seconds=5.0, + max_retries=1, + ) + + +@pytest.fixture() +def dry_run_config() -> TelemetryConfig: + """Create a test configuration with dry_run enabled.""" + return TelemetryConfig( + server_url=TEST_SERVER_URL, + api_key=TEST_API_KEY, + instance_id=TEST_INSTANCE_ID, + submit_interval_seconds=1.0, + max_queue_size=100, + batch_size=10, + request_timeout_seconds=5.0, + dry_run=True, + max_retries=1, + ) + + +@pytest.fixture() +def disabled_config() -> TelemetryConfig: + """Create a disabled test configuration.""" + return TelemetryConfig( + server_url=TEST_SERVER_URL, + api_key=TEST_API_KEY, + instance_id=TEST_INSTANCE_ID, + enabled=False, + ) + + +@pytest.fixture() +def sample_instance_id() -> UUID: + """Return a fixed instance UUID for testing.""" + return UUID(TEST_INSTANCE_ID) + + +@pytest.fixture() +def sample_event(sample_instance_id: UUID) -> TaskCompletionEvent: + """Create a sample task completion event.""" + return TaskCompletionEvent( + instance_id=sample_instance_id, + event_id=uuid4(), + timestamp=datetime.now(timezone.utc), + task_duration_ms=30000, + task_type=TaskType.IMPLEMENTATION, + complexity=Complexity.MEDIUM, + harness=Harness.CLAUDE_CODE, + model="claude-sonnet-4-20250514", + provider=Provider.ANTHROPIC, + estimated_input_tokens=5000, + estimated_output_tokens=2000, + actual_input_tokens=5200, + actual_output_tokens=1800, + estimated_cost_usd_micros=10000, + actual_cost_usd_micros=9500, + quality_gate_passed=True, + quality_gates_run=[], + quality_gates_failed=[], + context_compactions=0, + context_rotations=0, + context_utilization_final=0.4, + outcome=Outcome.SUCCESS, + retry_count=0, + language="python", + repo_size_category=None, + ) + + +def make_event(instance_id: UUID | None = None) -> TaskCompletionEvent: + """Factory helper to create a sample event with optional overrides.""" + return TaskCompletionEvent( + instance_id=instance_id or UUID(TEST_INSTANCE_ID), + event_id=uuid4(), + timestamp=datetime.now(timezone.utc), + task_duration_ms=15000, + task_type=TaskType.DEBUGGING, + complexity=Complexity.LOW, + harness=Harness.AIDER, + model="gpt-4o", + provider=Provider.OPENAI, + estimated_input_tokens=1000, + estimated_output_tokens=500, + actual_input_tokens=1100, + actual_output_tokens=480, + estimated_cost_usd_micros=3000, + actual_cost_usd_micros=2800, + quality_gate_passed=True, + quality_gates_run=[], + quality_gates_failed=[], + context_compactions=0, + context_rotations=0, + context_utilization_final=0.2, + outcome=Outcome.SUCCESS, + retry_count=0, + ) + + +@pytest.fixture() +def sample_prediction_query() -> PredictionQuery: + """Create a sample prediction query.""" + return PredictionQuery( + task_type=TaskType.IMPLEMENTATION, + model="claude-sonnet-4-20250514", + provider=Provider.ANTHROPIC, + complexity=Complexity.MEDIUM, + ) + + +@pytest.fixture() +def sample_prediction_response() -> PredictionResponse: + """Create a sample prediction response.""" + return PredictionResponse( + prediction=PredictionData( + input_tokens=TokenDistribution(p10=1000, p25=2000, median=3000, p75=4000, p90=5000), + output_tokens=TokenDistribution(p10=500, p25=1000, median=1500, p75=2000, p90=2500), + cost_usd_micros={"p10": 1000, "median": 3000, "p90": 5000}, + duration_ms={"p10": 10000, "median": 30000, "p90": 60000}, + correction_factors=CorrectionFactors(input=1.05, output=0.95), + quality=QualityPrediction(gate_pass_rate=0.85, success_rate=0.9), + ), + metadata=PredictionMetadata( + sample_size=150, + fallback_level=0, + confidence="high", + ), + ) + + +def make_batch_response_json(events: list[TaskCompletionEvent]) -> dict: + """Create a batch response JSON dict for a list of events (all accepted).""" + return { + "accepted": len(events), + "rejected": 0, + "results": [ + {"event_id": str(e.event_id), "status": "accepted", "error": None} + for e in events + ], + } diff --git a/tests/test_client.py b/tests/test_client.py new file mode 100644 index 0000000..1bcc5ab --- /dev/null +++ b/tests/test_client.py @@ -0,0 +1,321 @@ +"""Tests for TelemetryClient.""" + +from __future__ import annotations + +import httpx +import pytest +import respx + +from mosaicstack_telemetry.client import TelemetryClient +from mosaicstack_telemetry.config import TelemetryConfig +from mosaicstack_telemetry.types.events import ( + Complexity, + Outcome, + Provider, + TaskCompletionEvent, + TaskType, +) +from mosaicstack_telemetry.types.predictions import ( + PredictionMetadata, + PredictionQuery, + PredictionResponse, +) +from tests.conftest import ( + TEST_API_KEY, + TEST_INSTANCE_ID, + TEST_SERVER_URL, + make_batch_response_json, + make_event, +) + + +class TestTelemetryClientLifecycle: + """Tests for client start/stop lifecycle.""" + + def test_start_stop_sync(self, config: TelemetryConfig) -> None: + """Client can start and stop synchronously.""" + client = TelemetryClient(config) + client.start() + assert client.is_running is True + client.stop() + assert client.is_running is False + + async def test_start_stop_async(self, config: TelemetryConfig) -> None: + """Client can start and stop asynchronously.""" + client = TelemetryClient(config) + await client.start_async() + assert client.is_running is True + await client.stop_async() + assert client.is_running is False + + def test_start_disabled(self, disabled_config: TelemetryConfig) -> None: + """Starting a disabled client is a no-op.""" + client = TelemetryClient(disabled_config) + client.start() + assert client.is_running is False + client.stop() + + async def test_start_async_disabled(self, disabled_config: TelemetryConfig) -> None: + """Starting a disabled async client is a no-op.""" + client = TelemetryClient(disabled_config) + await client.start_async() + assert client.is_running is False + await client.stop_async() + + +class TestTelemetryClientTrack: + """Tests for the track() method.""" + + def test_track_queues_event(self, config: TelemetryConfig) -> None: + """track() adds event to the queue.""" + client = TelemetryClient(config) + event = make_event() + client.track(event) + assert client.queue_size == 1 + + def test_track_multiple_events(self, config: TelemetryConfig) -> None: + """Multiple events can be tracked.""" + client = TelemetryClient(config) + for _ in range(5): + client.track(make_event()) + assert client.queue_size == 5 + + def test_track_disabled_drops_event(self, disabled_config: TelemetryConfig) -> None: + """track() silently drops events when disabled.""" + client = TelemetryClient(disabled_config) + client.track(make_event()) + assert client.queue_size == 0 + + def test_track_never_throws(self, config: TelemetryConfig) -> None: + """track() should never raise exceptions.""" + client = TelemetryClient(config) + # This should not raise even with invalid-ish usage + event = make_event() + client.track(event) + assert client.queue_size == 1 + + +class TestTelemetryClientContextManager: + """Tests for context manager support.""" + + @respx.mock + def test_sync_context_manager(self, config: TelemetryConfig) -> None: + """Sync context manager starts and stops correctly.""" + # Mock any potential flush calls + respx.post(f"{TEST_SERVER_URL}/v1/events/batch").mock( + return_value=httpx.Response( + 202, + json={"accepted": 0, "rejected": 0, "results": []}, + ) + ) + + with TelemetryClient(config) as client: + assert client.is_running is True + client.track(make_event()) + + assert client.is_running is False + + @respx.mock + async def test_async_context_manager(self, config: TelemetryConfig) -> None: + """Async context manager starts and stops correctly.""" + respx.post(f"{TEST_SERVER_URL}/v1/events/batch").mock( + return_value=httpx.Response( + 202, + json={"accepted": 0, "rejected": 0, "results": []}, + ) + ) + + async with TelemetryClient(config) as client: + assert client.is_running is True + client.track(make_event()) + + assert client.is_running is False + + +class TestTelemetryClientPredictions: + """Tests for prediction caching and retrieval.""" + + def test_get_prediction_miss(self, config: TelemetryConfig) -> None: + """get_prediction returns None on cache miss.""" + client = TelemetryClient(config) + query = PredictionQuery( + task_type=TaskType.IMPLEMENTATION, + model="test-model", + provider=Provider.ANTHROPIC, + complexity=Complexity.MEDIUM, + ) + assert client.get_prediction(query) is None + + def test_get_prediction_after_cache_populated( + self, config: TelemetryConfig + ) -> None: + """get_prediction returns cached value.""" + client = TelemetryClient(config) + query = PredictionQuery( + task_type=TaskType.IMPLEMENTATION, + model="test-model", + provider=Provider.ANTHROPIC, + complexity=Complexity.MEDIUM, + ) + response = PredictionResponse( + prediction=None, + metadata=PredictionMetadata( + sample_size=50, + fallback_level=0, + confidence="medium", + ), + ) + # Directly populate the cache + client._prediction_cache.put(query, response) + + result = client.get_prediction(query) + assert result is not None + assert result.metadata.sample_size == 50 + + @respx.mock + async def test_refresh_predictions_async(self, config: TelemetryConfig) -> None: + """refresh_predictions fetches and caches predictions.""" + query = PredictionQuery( + task_type=TaskType.IMPLEMENTATION, + model="test-model", + provider=Provider.ANTHROPIC, + complexity=Complexity.MEDIUM, + ) + + response_data = { + "results": [ + { + "prediction": None, + "metadata": { + "sample_size": 75, + "fallback_level": 1, + "confidence": "medium", + }, + } + ] + } + + respx.post(f"{TEST_SERVER_URL}/v1/predictions/batch").mock( + return_value=httpx.Response(200, json=response_data) + ) + + client = TelemetryClient(config) + await client.refresh_predictions([query]) + + result = client.get_prediction(query) + assert result is not None + assert result.metadata.sample_size == 75 + + @respx.mock + def test_refresh_predictions_sync(self, config: TelemetryConfig) -> None: + """refresh_predictions_sync fetches and caches predictions.""" + query = PredictionQuery( + task_type=TaskType.IMPLEMENTATION, + model="test-model", + provider=Provider.ANTHROPIC, + complexity=Complexity.MEDIUM, + ) + + response_data = { + "results": [ + { + "prediction": None, + "metadata": { + "sample_size": 60, + "fallback_level": 0, + "confidence": "low", + }, + } + ] + } + + respx.post(f"{TEST_SERVER_URL}/v1/predictions/batch").mock( + return_value=httpx.Response(200, json=response_data) + ) + + client = TelemetryClient(config) + client.refresh_predictions_sync([query]) + + result = client.get_prediction(query) + assert result is not None + assert result.metadata.sample_size == 60 + + @respx.mock + async def test_refresh_predictions_server_error( + self, config: TelemetryConfig + ) -> None: + """refresh_predictions handles server errors gracefully.""" + query = PredictionQuery( + task_type=TaskType.IMPLEMENTATION, + model="test-model", + provider=Provider.ANTHROPIC, + complexity=Complexity.MEDIUM, + ) + + respx.post(f"{TEST_SERVER_URL}/v1/predictions/batch").mock( + return_value=httpx.Response(500, text="Internal Server Error") + ) + + client = TelemetryClient(config) + # Should not raise + await client.refresh_predictions([query]) + # Cache should still be empty + assert client.get_prediction(query) is None + + async def test_refresh_predictions_empty_list( + self, config: TelemetryConfig + ) -> None: + """refresh_predictions with empty list is a no-op.""" + client = TelemetryClient(config) + await client.refresh_predictions([]) + + +class TestTelemetryClientConfig: + """Tests for configuration handling.""" + + def test_config_env_vars(self, monkeypatch: pytest.MonkeyPatch) -> None: + """Environment variables override defaults.""" + monkeypatch.setenv("MOSAIC_TELEMETRY_ENABLED", "false") + monkeypatch.setenv("MOSAIC_TELEMETRY_SERVER_URL", "https://env-server.com") + monkeypatch.setenv("MOSAIC_TELEMETRY_API_KEY", "b" * 64) + monkeypatch.setenv("MOSAIC_TELEMETRY_INSTANCE_ID", TEST_INSTANCE_ID) + + config = TelemetryConfig() + assert config.enabled is False + assert config.server_url == "https://env-server.com" + assert config.api_key == "b" * 64 + assert config.instance_id == TEST_INSTANCE_ID + + def test_config_validation_errors(self) -> None: + """Invalid config produces validation errors.""" + config = TelemetryConfig( + server_url="", + api_key="short", + instance_id="not-a-uuid", + ) + errors = config.validate() + assert len(errors) >= 3 + + def test_config_validation_success(self, config: TelemetryConfig) -> None: + """Valid config produces no validation errors.""" + errors = config.validate() + assert errors == [] + + def test_config_strips_trailing_slash(self) -> None: + """server_url trailing slashes are stripped.""" + config = TelemetryConfig( + server_url="https://example.com/", + api_key=TEST_API_KEY, + instance_id=TEST_INSTANCE_ID, + ) + assert config.server_url == "https://example.com" + + def test_explicit_values_override_env(self, monkeypatch: pytest.MonkeyPatch) -> None: + """Explicit constructor values take priority over env vars.""" + monkeypatch.setenv("MOSAIC_TELEMETRY_SERVER_URL", "https://env-server.com") + config = TelemetryConfig( + server_url="https://explicit-server.com", + api_key=TEST_API_KEY, + instance_id=TEST_INSTANCE_ID, + ) + assert config.server_url == "https://explicit-server.com" diff --git a/tests/test_event_builder.py b/tests/test_event_builder.py new file mode 100644 index 0000000..c024644 --- /dev/null +++ b/tests/test_event_builder.py @@ -0,0 +1,137 @@ +"""Tests for EventBuilder.""" + +from __future__ import annotations + +from datetime import datetime, timezone +from uuid import UUID + +from mosaicstack_telemetry.event_builder import EventBuilder +from mosaicstack_telemetry.types.events import ( + Complexity, + Harness, + Outcome, + Provider, + QualityGate, + RepoSizeCategory, + TaskType, +) + +TEST_INSTANCE_ID = "12345678-1234-1234-1234-123456789abc" + + +class TestEventBuilder: + """Tests for the fluent event builder.""" + + def test_build_complete_event(self) -> None: + """Build an event with all fields set.""" + event = ( + EventBuilder(instance_id=TEST_INSTANCE_ID) + .task_type(TaskType.IMPLEMENTATION) + .model("claude-sonnet-4-20250514") + .provider(Provider.ANTHROPIC) + .harness_type(Harness.CLAUDE_CODE) + .complexity_level(Complexity.HIGH) + .outcome_value(Outcome.SUCCESS) + .duration_ms(45000) + .tokens(estimated_in=5000, estimated_out=2000, actual_in=5200, actual_out=1800) + .cost(estimated=50000, actual=48000) + .quality( + passed=True, + gates_run=[QualityGate.LINT, QualityGate.TEST], + gates_failed=[], + ) + .context(compactions=1, rotations=0, utilization=0.4) + .retry_count(0) + .language("python") + .repo_size(RepoSizeCategory.MEDIUM) + .build() + ) + + assert event.instance_id == UUID(TEST_INSTANCE_ID) + assert event.task_type == TaskType.IMPLEMENTATION + assert event.model == "claude-sonnet-4-20250514" + assert event.provider == Provider.ANTHROPIC + assert event.harness == Harness.CLAUDE_CODE + assert event.complexity == Complexity.HIGH + assert event.outcome == Outcome.SUCCESS + assert event.task_duration_ms == 45000 + assert event.estimated_input_tokens == 5000 + assert event.estimated_output_tokens == 2000 + assert event.actual_input_tokens == 5200 + assert event.actual_output_tokens == 1800 + assert event.estimated_cost_usd_micros == 50000 + assert event.actual_cost_usd_micros == 48000 + assert event.quality_gate_passed is True + assert event.quality_gates_run == [QualityGate.LINT, QualityGate.TEST] + assert event.quality_gates_failed == [] + assert event.context_compactions == 1 + assert event.context_rotations == 0 + assert event.context_utilization_final == 0.4 + assert event.retry_count == 0 + assert event.language == "python" + assert event.repo_size_category == RepoSizeCategory.MEDIUM + + def test_auto_generated_defaults(self) -> None: + """event_id and timestamp are auto-generated.""" + event = ( + EventBuilder(instance_id=TEST_INSTANCE_ID) + .task_type(TaskType.DEBUGGING) + .model("gpt-4o") + .provider(Provider.OPENAI) + .build() + ) + + assert event.event_id is not None + assert event.timestamp is not None + assert event.timestamp.tzinfo is not None + + def test_custom_event_id(self) -> None: + """Custom event_id can be set.""" + custom_id = "abcdef12-1234-1234-1234-123456789abc" + event = ( + EventBuilder(instance_id=TEST_INSTANCE_ID) + .event_id(custom_id) + .model("test-model") + .build() + ) + + assert event.event_id == UUID(custom_id) + + def test_custom_timestamp(self) -> None: + """Custom timestamp can be set.""" + ts = datetime(2026, 1, 15, 12, 0, 0, tzinfo=timezone.utc) + event = ( + EventBuilder(instance_id=TEST_INSTANCE_ID) + .timestamp(ts) + .model("test-model") + .build() + ) + + assert event.timestamp == ts + + def test_minimal_event_defaults(self) -> None: + """Minimal event has sensible defaults.""" + event = EventBuilder(instance_id=TEST_INSTANCE_ID).model("test-model").build() + + assert event.task_type == TaskType.UNKNOWN + assert event.complexity == Complexity.MEDIUM + assert event.harness == Harness.UNKNOWN + assert event.provider == Provider.UNKNOWN + assert event.outcome == Outcome.FAILURE + assert event.task_duration_ms == 0 + assert event.retry_count == 0 + assert event.language is None + assert event.repo_size_category is None + + def test_quality_defaults_to_empty_lists(self) -> None: + """Quality gate lists default to empty.""" + event = EventBuilder(instance_id=TEST_INSTANCE_ID).model("m").build() + + assert event.quality_gates_run == [] + assert event.quality_gates_failed == [] + assert event.quality_gate_passed is False + + def test_schema_version(self) -> None: + """Schema version defaults to 1.0.""" + event = EventBuilder(instance_id=TEST_INSTANCE_ID).model("m").build() + assert event.schema_version == "1.0" diff --git a/tests/test_prediction_cache.py b/tests/test_prediction_cache.py new file mode 100644 index 0000000..1d87c53 --- /dev/null +++ b/tests/test_prediction_cache.py @@ -0,0 +1,147 @@ +"""Tests for PredictionCache.""" + +from __future__ import annotations + +import threading +import time + +from mosaicstack_telemetry.prediction_cache import PredictionCache +from mosaicstack_telemetry.types.events import Complexity, Provider, TaskType +from mosaicstack_telemetry.types.predictions import ( + PredictionMetadata, + PredictionQuery, + PredictionResponse, +) + + +def _make_query( + task_type: TaskType = TaskType.IMPLEMENTATION, + model: str = "claude-sonnet-4-20250514", +) -> PredictionQuery: + return PredictionQuery( + task_type=task_type, + model=model, + provider=Provider.ANTHROPIC, + complexity=Complexity.MEDIUM, + ) + + +def _make_response(sample_size: int = 100) -> PredictionResponse: + return PredictionResponse( + prediction=None, + metadata=PredictionMetadata( + sample_size=sample_size, + fallback_level=0, + confidence="high", + ), + ) + + +class TestPredictionCache: + """Tests for the TTL-based prediction cache.""" + + def test_cache_hit(self) -> None: + """Cached predictions are returned on hit.""" + cache = PredictionCache(ttl_seconds=60.0) + query = _make_query() + response = _make_response() + + cache.put(query, response) + result = cache.get(query) + + assert result is not None + assert result.metadata.sample_size == 100 + + def test_cache_miss(self) -> None: + """Missing keys return None.""" + cache = PredictionCache(ttl_seconds=60.0) + query = _make_query() + + result = cache.get(query) + assert result is None + + def test_cache_expiry(self) -> None: + """Expired entries return None.""" + cache = PredictionCache(ttl_seconds=0.05) + query = _make_query() + response = _make_response() + + cache.put(query, response) + time.sleep(0.1) + result = cache.get(query) + + assert result is None + + def test_different_queries_different_keys(self) -> None: + """Different queries map to different cache entries.""" + cache = PredictionCache(ttl_seconds=60.0) + query1 = _make_query(task_type=TaskType.IMPLEMENTATION) + query2 = _make_query(task_type=TaskType.DEBUGGING) + + cache.put(query1, _make_response(sample_size=100)) + cache.put(query2, _make_response(sample_size=200)) + + result1 = cache.get(query1) + result2 = cache.get(query2) + + assert result1 is not None + assert result2 is not None + assert result1.metadata.sample_size == 100 + assert result2.metadata.sample_size == 200 + + def test_cache_clear(self) -> None: + """Clear removes all entries.""" + cache = PredictionCache(ttl_seconds=60.0) + query = _make_query() + cache.put(query, _make_response()) + + assert cache.size == 1 + cache.clear() + assert cache.size == 0 + assert cache.get(query) is None + + def test_cache_overwrite(self) -> None: + """Putting a new value for the same key overwrites.""" + cache = PredictionCache(ttl_seconds=60.0) + query = _make_query() + + cache.put(query, _make_response(sample_size=100)) + cache.put(query, _make_response(sample_size=200)) + + result = cache.get(query) + assert result is not None + assert result.metadata.sample_size == 200 + + def test_thread_safety(self) -> None: + """Cache handles concurrent access from multiple threads.""" + cache = PredictionCache(ttl_seconds=60.0) + errors: list[Exception] = [] + iterations = 100 + + def writer(thread_id: int) -> None: + try: + for i in range(iterations): + query = _make_query(model=f"model-{thread_id}-{i}") + cache.put(query, _make_response(sample_size=i)) + except Exception as e: + errors.append(e) + + def reader(thread_id: int) -> None: + try: + for i in range(iterations): + query = _make_query(model=f"model-{thread_id}-{i}") + cache.get(query) # May or may not hit + except Exception as e: + errors.append(e) + + threads: list[threading.Thread] = [] + for tid in range(4): + threads.append(threading.Thread(target=writer, args=(tid,))) + threads.append(threading.Thread(target=reader, args=(tid,))) + + for t in threads: + t.start() + for t in threads: + t.join(timeout=5) + + assert not errors, f"Thread errors: {errors}" diff --git a/tests/test_queue.py b/tests/test_queue.py new file mode 100644 index 0000000..0a2d02d --- /dev/null +++ b/tests/test_queue.py @@ -0,0 +1,133 @@ +"""Tests for EventQueue.""" + +from __future__ import annotations + +import threading + +from mosaicstack_telemetry.queue import EventQueue +from tests.conftest import make_event + + +class TestEventQueue: + """Tests for the bounded thread-safe event queue.""" + + def test_put_and_drain(self) -> None: + """Events can be put in and drained out in FIFO order.""" + queue = EventQueue(max_size=10) + e1 = make_event() + e2 = make_event() + queue.put(e1) + queue.put(e2) + + drained = queue.drain(10) + assert len(drained) == 2 + assert drained[0].event_id == e1.event_id + assert drained[1].event_id == e2.event_id + + def test_drain_max_items(self) -> None: + """Drain respects the max_items limit.""" + queue = EventQueue(max_size=10) + for _ in range(5): + queue.put(make_event()) + + drained = queue.drain(3) + assert len(drained) == 3 + assert queue.size == 2 + + def test_drain_empty_queue(self) -> None: + """Draining an empty queue returns empty list.""" + queue = EventQueue(max_size=10) + drained = queue.drain(5) + assert drained == [] + + def test_bounded_fifo_eviction(self) -> None: + """When queue is full, oldest events are evicted.""" + queue = EventQueue(max_size=3) + events = [make_event() for _ in range(5)] + for e in events: + queue.put(e) + + assert queue.size == 3 + drained = queue.drain(3) + # Should have the last 3 events (oldest 2 were evicted) + assert drained[0].event_id == events[2].event_id + assert drained[1].event_id == events[3].event_id + assert drained[2].event_id == events[4].event_id + + def test_size_property(self) -> None: + """Size property reflects current queue length.""" + queue = EventQueue(max_size=10) + assert queue.size == 0 + queue.put(make_event()) + assert queue.size == 1 + queue.put(make_event()) + assert queue.size == 2 + queue.drain(1) + assert queue.size == 1 + + def test_is_empty_property(self) -> None: + """is_empty property works correctly.""" + queue = EventQueue(max_size=10) + assert queue.is_empty is True + queue.put(make_event()) + assert queue.is_empty is False + queue.drain(1) + assert queue.is_empty is True + + def test_put_back(self) -> None: + """put_back re-adds events to the front of the queue.""" + queue = EventQueue(max_size=10) + e1 = make_event() + e2 = make_event() + queue.put(e1) + + queue.put_back([e2]) + drained = queue.drain(2) + # e2 should be first (put_back adds to front) + assert drained[0].event_id == e2.event_id + assert drained[1].event_id == e1.event_id + + def test_put_back_respects_max_size(self) -> None: + """put_back doesn't exceed max_size.""" + queue = EventQueue(max_size=3) + for _ in range(3): + queue.put(make_event()) + + events_to_add = [make_event() for _ in range(5)] + queue.put_back(events_to_add) + assert queue.size == 3 + + def test_thread_safety_concurrent_put_drain(self) -> None: + """Queue handles concurrent put and drain operations.""" + queue = EventQueue(max_size=1000) + total_puts = 500 + errors: list[Exception] = [] + + def put_events() -> None: + try: + for _ in range(total_puts): + queue.put(make_event()) + except Exception as e: + errors.append(e) + + def drain_events() -> None: + try: + drained_count = 0 + while drained_count < total_puts: + batch = queue.drain(10) + drained_count += len(batch) + if not batch: + threading.Event().wait(0.001) + except Exception as e: + errors.append(e) + + put_thread = threading.Thread(target=put_events) + drain_thread = threading.Thread(target=drain_events) + + put_thread.start() + drain_thread.start() + + put_thread.join(timeout=5) + drain_thread.join(timeout=5) + + assert not errors, f"Thread errors: {errors}" diff --git a/tests/test_submitter.py b/tests/test_submitter.py new file mode 100644 index 0000000..f8eaba2 --- /dev/null +++ b/tests/test_submitter.py @@ -0,0 +1,207 @@ +"""Tests for batch submission logic.""" + +from __future__ import annotations + +import httpx +import pytest +import respx + +from mosaicstack_telemetry.config import TelemetryConfig +from mosaicstack_telemetry.submitter import submit_batch_async, submit_batch_sync +from tests.conftest import ( + TEST_API_KEY, + TEST_INSTANCE_ID, + TEST_SERVER_URL, + make_batch_response_json, + make_event, +) + + +@pytest.fixture() +def fast_config() -> TelemetryConfig: + """Config with minimal retries and timeouts for fast tests.""" + return TelemetryConfig( + server_url=TEST_SERVER_URL, + api_key=TEST_API_KEY, + instance_id=TEST_INSTANCE_ID, + max_retries=1, + request_timeout_seconds=2.0, + ) + + +class TestSubmitBatchSync: + """Tests for synchronous batch submission.""" + + @respx.mock + def test_successful_submission(self, fast_config: TelemetryConfig) -> None: + """Successful 202 response returns BatchEventResponse.""" + events = [make_event() for _ in range(3)] + response_json = make_batch_response_json(events) + + respx.post(f"{TEST_SERVER_URL}/v1/events/batch").mock( + return_value=httpx.Response(202, json=response_json) + ) + + with httpx.Client() as client: + result = submit_batch_sync(client, fast_config, events) + + assert result is not None + assert result.accepted == 3 + assert result.rejected == 0 + + @respx.mock + def test_429_with_retry_after(self, fast_config: TelemetryConfig) -> None: + """429 response respects Retry-After header and retries.""" + events = [make_event()] + response_json = make_batch_response_json(events) + + route = respx.post(f"{TEST_SERVER_URL}/v1/events/batch") + route.side_effect = [ + httpx.Response(429, headers={"Retry-After": "0.1"}), + httpx.Response(202, json=response_json), + ] + + with httpx.Client() as client: + result = submit_batch_sync(client, fast_config, events) + + assert result is not None + assert result.accepted == 1 + + @respx.mock + def test_403_returns_none(self, fast_config: TelemetryConfig) -> None: + """403 response returns None immediately.""" + events = [make_event()] + + respx.post(f"{TEST_SERVER_URL}/v1/events/batch").mock( + return_value=httpx.Response(403, json={"error": "Forbidden"}) + ) + + with httpx.Client() as client: + result = submit_batch_sync(client, fast_config, events) + + assert result is None + + @respx.mock + def test_network_error_retries(self, fast_config: TelemetryConfig) -> None: + """Network errors trigger retry with backoff.""" + events = [make_event()] + response_json = make_batch_response_json(events) + + route = respx.post(f"{TEST_SERVER_URL}/v1/events/batch") + route.side_effect = [ + httpx.ConnectError("Connection refused"), + httpx.Response(202, json=response_json), + ] + + with httpx.Client() as client: + result = submit_batch_sync(client, fast_config, events) + + assert result is not None + assert result.accepted == 1 + + @respx.mock + def test_all_retries_exhausted(self, fast_config: TelemetryConfig) -> None: + """When all retries fail, returns None.""" + events = [make_event()] + + respx.post(f"{TEST_SERVER_URL}/v1/events/batch").mock( + side_effect=httpx.ConnectError("Connection refused") + ) + + with httpx.Client() as client: + result = submit_batch_sync(client, fast_config, events) + + assert result is None + + def test_dry_run_mode(self, fast_config: TelemetryConfig) -> None: + """Dry run mode logs but doesn't send.""" + fast_config.dry_run = True + events = [make_event() for _ in range(5)] + + with httpx.Client() as client: + result = submit_batch_sync(client, fast_config, events) + + assert result is not None + assert result.accepted == 5 + assert result.rejected == 0 + + @respx.mock + def test_500_error_retries(self, fast_config: TelemetryConfig) -> None: + """Server errors (500) trigger retries.""" + events = [make_event()] + response_json = make_batch_response_json(events) + + route = respx.post(f"{TEST_SERVER_URL}/v1/events/batch") + route.side_effect = [ + httpx.Response(500, text="Internal Server Error"), + httpx.Response(202, json=response_json), + ] + + with httpx.Client() as client: + result = submit_batch_sync(client, fast_config, events) + + assert result is not None + assert result.accepted == 1 + + +class TestSubmitBatchAsync: + """Tests for asynchronous batch submission.""" + + @respx.mock + async def test_successful_submission(self, fast_config: TelemetryConfig) -> None: + """Successful 202 response returns BatchEventResponse.""" + events = [make_event() for _ in range(3)] + response_json = make_batch_response_json(events) + + respx.post(f"{TEST_SERVER_URL}/v1/events/batch").mock( + return_value=httpx.Response(202, json=response_json) + ) + + async with httpx.AsyncClient() as client: + result = await submit_batch_async(client, fast_config, events) + + assert result is not None + assert result.accepted == 3 + + @respx.mock + async def test_429_with_retry_after(self, fast_config: TelemetryConfig) -> None: + """429 response respects Retry-After and retries asynchronously.""" + events = [make_event()] + response_json = make_batch_response_json(events) + + route = respx.post(f"{TEST_SERVER_URL}/v1/events/batch") + route.side_effect = [ + httpx.Response(429, headers={"Retry-After": "0.1"}), + httpx.Response(202, json=response_json), + ] + + async with httpx.AsyncClient() as client: + result = await submit_batch_async(client, fast_config, events) + + assert result is not None + assert result.accepted == 1 + + @respx.mock + async def test_403_returns_none(self, fast_config: TelemetryConfig) -> None: + """403 returns None immediately.""" + events = [make_event()] + + respx.post(f"{TEST_SERVER_URL}/v1/events/batch").mock( + return_value=httpx.Response(403, json={"error": "Forbidden"}) + ) + + async with httpx.AsyncClient() as client: + result = await submit_batch_async(client, fast_config, events) + + assert result is None + + async def test_dry_run_mode(self, fast_config: TelemetryConfig) -> None: + """Dry run mode returns mock response without HTTP.""" + fast_config.dry_run = True + events = [make_event() for _ in range(3)] + + async with httpx.AsyncClient() as client: + result = await submit_batch_async(client, fast_config, events) + + assert result is not None + assert result.accepted == 3