Initial project structure
This commit is contained in:
42
.gitignore
vendored
Normal file
42
.gitignore
vendored
Normal file
@@ -0,0 +1,42 @@
|
||||
# Python
|
||||
__pycache__/
|
||||
*.py[cod]
|
||||
*$py.class
|
||||
*.so
|
||||
*.egg-info/
|
||||
*.egg
|
||||
dist/
|
||||
build/
|
||||
.eggs/
|
||||
|
||||
# Virtual environments
|
||||
.venv/
|
||||
venv/
|
||||
ENV/
|
||||
|
||||
# IDE
|
||||
.idea/
|
||||
.vscode/
|
||||
*.swp
|
||||
*.swo
|
||||
*~
|
||||
|
||||
# Testing
|
||||
.coverage
|
||||
htmlcov/
|
||||
.pytest_cache/
|
||||
|
||||
# mypy
|
||||
.mypy_cache/
|
||||
|
||||
# ruff
|
||||
.ruff_cache/
|
||||
|
||||
# OS
|
||||
.DS_Store
|
||||
Thumbs.db
|
||||
|
||||
# Environment
|
||||
.env
|
||||
.env.*
|
||||
!.env.example
|
||||
41
CLAUDE.md
Normal file
41
CLAUDE.md
Normal file
@@ -0,0 +1,41 @@
|
||||
# mosaicstack-telemetry (Python Client SDK)
|
||||
|
||||
Python client SDK for Mosaic Stack Telemetry. Reports AI coding task-completion telemetry and queries crowd-sourced predictions.
|
||||
|
||||
## Development
|
||||
|
||||
```bash
|
||||
# Install dependencies (including dev)
|
||||
uv sync --all-extras
|
||||
|
||||
# Run tests (85%+ coverage required)
|
||||
uv run pytest
|
||||
|
||||
# Lint
|
||||
uv run ruff check src/ tests/
|
||||
|
||||
# Format check
|
||||
uv run ruff format --check src/ tests/
|
||||
|
||||
# Type check
|
||||
uv run mypy src/
|
||||
```
|
||||
|
||||
## Architecture
|
||||
|
||||
- `src/mosaicstack_telemetry/client.py` — Main TelemetryClient (public API)
|
||||
- `src/mosaicstack_telemetry/config.py` — TelemetryConfig dataclass with env var support
|
||||
- `src/mosaicstack_telemetry/queue.py` — Thread-safe bounded event queue
|
||||
- `src/mosaicstack_telemetry/submitter.py` — Batch submission with retry/backoff
|
||||
- `src/mosaicstack_telemetry/_sync.py` — Threading-based periodic submitter
|
||||
- `src/mosaicstack_telemetry/_async.py` — Asyncio-based periodic submitter
|
||||
- `src/mosaicstack_telemetry/event_builder.py` — Fluent event builder
|
||||
- `src/mosaicstack_telemetry/prediction_cache.py` — TTL-based prediction cache
|
||||
- `src/mosaicstack_telemetry/types/` — All Pydantic models and enums
|
||||
|
||||
## Key Rules
|
||||
|
||||
- `track()` must NEVER throw or block the caller
|
||||
- All logging uses `logging.getLogger("mosaicstack_telemetry")`
|
||||
- Runtime deps: httpx + pydantic only
|
||||
- Python 3.10+ compatible (uses `str, Enum` mixin instead of StrEnum)
|
||||
372
LICENSE
Normal file
372
LICENSE
Normal file
@@ -0,0 +1,372 @@
|
||||
Mozilla Public License Version 2.0
|
||||
==================================
|
||||
|
||||
1. Definitions
|
||||
--------------
|
||||
|
||||
1.1. "Contributor"
|
||||
means each individual or legal entity that creates, contributes to
|
||||
the creation of, or owns Covered Software.
|
||||
|
||||
1.2. "Contributor Version"
|
||||
means the combination of the Contributions of others (if any) used
|
||||
by a Contributor and that particular Contributor's Contribution.
|
||||
|
||||
1.3. "Contribution"
|
||||
means Covered Software of a particular Contributor.
|
||||
|
||||
1.4. "Covered Software"
|
||||
means Source Code Form to which the initial Contributor has attached
|
||||
the notice in Exhibit A, the Executable Form of such Source Code
|
||||
Form, and Modifications of such Source Code Form, in each case
|
||||
including portions thereof.
|
||||
|
||||
1.5. "Incompatible With Secondary Licenses"
|
||||
means
|
||||
|
||||
(a) that the initial Contributor has attached the notice described
|
||||
in Exhibit B to the Covered Software; or
|
||||
|
||||
(b) that the Covered Software was made available under the terms of
|
||||
version 1.1 or earlier of the License, but not also under the
|
||||
terms of a Secondary License.
|
||||
|
||||
1.6. "Executable Form"
|
||||
means any form of the work other than Source Code Form.
|
||||
|
||||
1.7. "Larger Work"
|
||||
means a work that combines Covered Software with other material, in
|
||||
a separate file or files, that is not Covered Software.
|
||||
|
||||
1.8. "License"
|
||||
means this document.
|
||||
|
||||
1.9. "Licensable"
|
||||
means having the right to grant, to the maximum extent possible,
|
||||
whether at the time of the initial grant or subsequently, any and
|
||||
all of the rights conveyed by this License.
|
||||
|
||||
1.10. "Modifications"
|
||||
means any of the following:
|
||||
|
||||
(a) any file in Source Code Form that results from an addition to,
|
||||
deletion from, or modification of the contents of Covered
|
||||
Software; or
|
||||
|
||||
(b) any new file in Source Code Form that contains any Covered
|
||||
Software.
|
||||
|
||||
1.11. "Patent Claims" of a Contributor
|
||||
means any patent claim(s), including without limitation, method,
|
||||
process, and apparatus claims, in any patent Licensable by such
|
||||
Contributor that would be infringed, but for the grant of the
|
||||
License, by the making, using, selling, offering for sale, having
|
||||
made, import, or transfer of either its Contributions or its
|
||||
Contributor Version.
|
||||
|
||||
1.12. "Secondary License"
|
||||
means either the GNU General Public License, Version 2.0, the GNU
|
||||
Lesser General Public License, Version 2.1, the GNU Affero General
|
||||
Public License, Version 3.0, or any later versions of those
|
||||
licenses.
|
||||
|
||||
1.13. "Source Code Form"
|
||||
means the form of the work preferred for making modifications.
|
||||
|
||||
1.14. "You" (or "Your")
|
||||
means an individual or a legal entity exercising rights under this
|
||||
License. For legal entities, "You" includes any entity that
|
||||
controls, is controlled by, or is under common control with You. For
|
||||
purposes of this definition, "control" means (a) the power, direct
|
||||
or indirect, to cause the direction or management of such entity,
|
||||
whether by contract or otherwise, or (b) ownership of more than
|
||||
fifty percent (50%) of the outstanding shares or beneficial
|
||||
ownership of such entity.
|
||||
|
||||
2. License Grants and Conditions
|
||||
--------------------------------
|
||||
|
||||
2.1. Grants
|
||||
|
||||
Each Contributor hereby grants You a world-wide, royalty-free,
|
||||
non-exclusive license:
|
||||
|
||||
(a) under intellectual property rights (other than patent or trademark)
|
||||
Licensable by such Contributor to use, reproduce, make available,
|
||||
modify, display, perform, distribute, and otherwise exploit its
|
||||
Contributions, either on an unmodified basis, with Modifications, or
|
||||
as part of a Larger Work; and
|
||||
|
||||
(b) under Patent Claims of such Contributor to make, use, sell, offer
|
||||
for sale, have made, import, and otherwise transfer either its
|
||||
Contributions or its Contributor Version.
|
||||
|
||||
2.2. Effective Date
|
||||
|
||||
The licenses granted in Section 2.1 with respect to any Contribution
|
||||
become effective for each Contribution on the date the Contributor first
|
||||
distributes such Contribution.
|
||||
|
||||
2.3. Limitations on Grant Scope
|
||||
|
||||
The licenses granted in this Section 2 are the only rights granted under
|
||||
this License. No additional rights or licenses will be implied from the
|
||||
distribution or licensing of Covered Software under this License.
|
||||
Notwithstanding Section 2.1(b) above, no patent license is granted by a
|
||||
Contributor:
|
||||
|
||||
(a) for any code that a Contributor has removed from Covered Software;
|
||||
or
|
||||
|
||||
(b) for infringements caused by: (i) Your and any other third party's
|
||||
modifications of Covered Software, or (ii) the combination of its
|
||||
Contributions with other software (except as part of its Contributor
|
||||
Version); or
|
||||
|
||||
(c) under Patent Claims infringed by Covered Software in the absence of
|
||||
its Contributions.
|
||||
|
||||
This License does not grant any rights in the trademarks, service marks,
|
||||
or logos of any Contributor (except as may be necessary to comply with
|
||||
the notice requirements in Section 3.4).
|
||||
|
||||
2.4. Subsequent Licenses
|
||||
|
||||
No Contributor makes additional grants as a result of Your choice to
|
||||
distribute the Covered Software under a subsequent version of this
|
||||
License (see Section 10.2) or under the terms of a Secondary License (if
|
||||
permitted under the terms of Section 3.3).
|
||||
|
||||
2.5. Representation
|
||||
|
||||
Each Contributor represents that the Contributor believes its
|
||||
Contributions are its original creation(s) or it has sufficient rights
|
||||
to grant the rights to its Contributions conveyed by this License.
|
||||
|
||||
2.6. Fair Use
|
||||
|
||||
This License is not intended to limit any rights You have under
|
||||
applicable copyright doctrines of fair use, fair dealing, or other
|
||||
equivalents.
|
||||
|
||||
2.7. Conditions
|
||||
|
||||
Sections 3.1, 3.2, 3.3, and 3.4 are conditions of the licenses granted
|
||||
in Section 2.1.
|
||||
|
||||
3. Responsibilities
|
||||
-------------------
|
||||
|
||||
3.1. Distribution of Source Form
|
||||
|
||||
All distribution of Covered Software in Source Code Form, including any
|
||||
Modifications that You create or to which You contribute, must be under
|
||||
the terms of this License. You must inform recipients that the Source
|
||||
Code Form of the Covered Software is governed by the terms of this
|
||||
License, and how they can obtain a copy of this License. You may not
|
||||
attempt to alter or restrict the recipients' rights in the Source Code
|
||||
Form.
|
||||
|
||||
3.2. Distribution of Executable Form
|
||||
|
||||
If You distribute Covered Software in Executable Form then:
|
||||
|
||||
(a) such Covered Software must also be made available in Source Code
|
||||
Form, as described in Section 3.1, and You must inform recipients of
|
||||
the Executable Form how they can obtain a copy of such Source Code
|
||||
Form by reasonable means in a timely manner, at a charge no more
|
||||
than the cost of distribution to the recipient; and
|
||||
|
||||
(b) You may distribute such Executable Form under the terms of this
|
||||
License, or sublicense it under different terms, provided that the
|
||||
license for the Executable Form does not attempt to limit or alter
|
||||
the recipients' rights in the Source Code Form under this License.
|
||||
|
||||
3.3. Distribution of a Larger Work
|
||||
|
||||
You may create and distribute a Larger Work under terms of Your choice,
|
||||
provided that You also comply with the requirements of this License for
|
||||
the Covered Software. If the Larger Work is a combination of Covered
|
||||
Software with a work governed by one or more Secondary Licenses, and the
|
||||
Covered Software is not Incompatible With Secondary Licenses, this
|
||||
License permits You to additionally distribute such Covered Software
|
||||
under the terms of such Secondary License(s), so that the recipient of
|
||||
the Larger Work may, at their option, further distribute the Covered
|
||||
Software under the terms of either this License or such Secondary
|
||||
License(s).
|
||||
|
||||
3.4. Notices
|
||||
|
||||
You may not remove or alter the substance of any license notices
|
||||
(including copyright notices, patent notices, disclaimers of warranty,
|
||||
or limitations of liability) contained within the Source Code Form of
|
||||
the Covered Software, except that You may alter any license notices to
|
||||
the extent required to remedy known factual inaccuracies.
|
||||
|
||||
3.5. Application of Additional Terms
|
||||
|
||||
You may choose to offer, and to charge a fee for, warranty, support,
|
||||
indemnity or liability obligations to one or more recipients of Covered
|
||||
Software. However, You may do so only on Your own behalf, and not on
|
||||
behalf of any Contributor. You must make it absolutely clear that any
|
||||
such warranty, support, indemnity, or liability obligation is offered by
|
||||
You alone, and You hereby agree to indemnify every Contributor for any
|
||||
liability incurred by such Contributor as a result of warranty, support,
|
||||
indemnity or liability terms You offer. You may include additional
|
||||
disclaimers of warranty and limitations of liability specific to any
|
||||
jurisdiction.
|
||||
|
||||
4. Inability to Comply Due to Statute or Regulation
|
||||
---------------------------------------------------
|
||||
|
||||
If it is impossible for You to comply with any of the terms of this
|
||||
License with respect to some or all of the Covered Software due to
|
||||
statute, judicial order, or regulation then You must: (a) comply with
|
||||
the terms of this License to the maximum extent possible; and (b)
|
||||
describe the limitations and the code they affect. Such description must
|
||||
be placed in a text file included with all distributions of the Covered
|
||||
Software under the name "LEGAL". The contents of the LEGAL file are for
|
||||
informational purposes only and do not alter the terms of this License.
|
||||
|
||||
5. Termination
|
||||
--------------
|
||||
|
||||
5.1. The rights granted under this License will terminate automatically
|
||||
if You fail to comply with any of its terms. However, if You become
|
||||
compliant, then the rights granted under this License from a particular
|
||||
Contributor are reinstated (a) provisionally, unless and until such
|
||||
Contributor explicitly and finally terminates Your grants, and (b) on an
|
||||
ongoing basis, if such Contributor fails to notify You of the
|
||||
non-compliance by some reasonable means prior to 60 days after You have
|
||||
come back into compliance. Moreover, Your grants from a particular
|
||||
Contributor are reinstated on an ongoing basis if such Contributor
|
||||
notifies You of the non-compliance by some reasonable means, this is the
|
||||
first time You have received notice of non-compliance with this License
|
||||
from such Contributor, and You become compliant prior to 30 days after
|
||||
Your receipt of the notice.
|
||||
|
||||
5.2. If You initiate litigation against any entity by asserting a patent
|
||||
infringement claim (excluding declaratory judgment actions,
|
||||
counter-claims, and cross-claims) alleging that a Contributor Version
|
||||
directly or indirectly infringes any patent, then the rights granted to
|
||||
You by any and all Contributors for the Covered Software under Section
|
||||
2.1 of this License shall terminate.
|
||||
|
||||
5.3. In the event of termination under Sections 5.1 or 5.2 above, all
|
||||
end user license agreements (excluding distributors and resellers) which
|
||||
have been validly granted by You or Your distributors under this License
|
||||
prior to termination shall survive termination.
|
||||
|
||||
************************************************************************
|
||||
* *
|
||||
* 6. Disclaimer of Warranty *
|
||||
* ------------------------- *
|
||||
* *
|
||||
* Covered Software is provided under this License on an "as is" *
|
||||
* basis, without warranty of any kind, either expressed, implied, or *
|
||||
* statutory, including, without limitation, warranties that the *
|
||||
* Covered Software is free of defects, merchantable, fit for a *
|
||||
* particular purpose or non-infringing. The entire risk as to the *
|
||||
* quality and performance of the Covered Software is with You. *
|
||||
* Should any Covered Software prove defective in any respect, You *
|
||||
* (not any Contributor) assume the cost of any necessary servicing, *
|
||||
* repair, or correction. This disclaimer of warranty constitutes an *
|
||||
* essential part of this License. No use of any Covered Software is *
|
||||
* authorized under this License except under this disclaimer. *
|
||||
* *
|
||||
************************************************************************
|
||||
|
||||
************************************************************************
|
||||
* *
|
||||
* 7. Limitation of Liability *
|
||||
* -------------------------- *
|
||||
* *
|
||||
* Under no circumstances and under no legal theory, whether tort *
|
||||
* (including negligence), contract, or otherwise, shall any *
|
||||
* Contributor, or anyone who distributes Covered Software as *
|
||||
* permitted above, be liable to You for any direct, indirect, *
|
||||
* special, incidental, or consequential damages of any character *
|
||||
* including, without limitation, damages for lost profits, loss of *
|
||||
* goodwill, work stoppage, computer failure or malfunction, or any *
|
||||
* and all other commercial damages or losses, even if such party *
|
||||
* shall have been informed of the possibility of such damages. This *
|
||||
* limitation of liability shall not apply to liability for death or *
|
||||
* personal injury resulting from such party's negligence to the *
|
||||
* extent applicable law prohibits such limitation. Some *
|
||||
* jurisdictions do not allow the exclusion or limitation of *
|
||||
* incidental or consequential damages, so this exclusion and *
|
||||
* limitation may not apply to You. *
|
||||
* *
|
||||
************************************************************************
|
||||
|
||||
8. Litigation
|
||||
-------------
|
||||
|
||||
Any litigation relating to this License may be brought only in the
|
||||
courts of a jurisdiction where the defendant maintains its principal
|
||||
place of business and such litigation shall be governed by laws of that
|
||||
jurisdiction, without reference to its conflict-of-law provisions.
|
||||
Nothing in this Section shall prevent a party's ability to bring
|
||||
cross-claims or counter-claims.
|
||||
|
||||
9. Miscellaneous
|
||||
----------------
|
||||
|
||||
This License represents the complete agreement concerning the subject
|
||||
matter hereof. If any provision of this License is held to be
|
||||
unenforceable, such provision shall be reformed only to the extent
|
||||
necessary to make it enforceable. Any law or regulation which provides
|
||||
that the language of a contract shall be construed against the drafter
|
||||
shall not be used to construe this License against a Contributor.
|
||||
|
||||
10. Versions of the License
|
||||
---------------------------
|
||||
|
||||
10.1. New Versions
|
||||
|
||||
Mozilla Foundation is the license steward. Except as provided in Section
|
||||
10.3, no one other than the license steward has the right to modify or
|
||||
publish new versions of this License. Each version will be given a
|
||||
distinguishing version number.
|
||||
|
||||
10.2. Effect of New Versions
|
||||
|
||||
You may distribute the Covered Software under the terms of the version
|
||||
of the License under which You originally received the Covered Software,
|
||||
or under the terms of any subsequent version published by the license
|
||||
steward.
|
||||
|
||||
10.3. Modified Versions
|
||||
|
||||
If you create software not governed by this License, and you want to
|
||||
create a new license for such software, you may create and use a
|
||||
modified version of this License if you rename the license and remove
|
||||
any references to the name of the license steward (except to note that
|
||||
such modified license differs from this License).
|
||||
|
||||
10.4. Distributing Source Code Form that is Incompatible With Secondary
|
||||
Licenses
|
||||
|
||||
If You choose to distribute Source Code Form that is Incompatible With
|
||||
Secondary Licenses under the terms of this version of the License, the
|
||||
notice described in Exhibit B of this License must be attached.
|
||||
|
||||
Exhibit A - Source Code Form License Notice
|
||||
-------------------------------------------
|
||||
|
||||
This Source Code Form is subject to the terms of the Mozilla Public
|
||||
License, v. 2.0. If a copy of the MPL was not distributed with this
|
||||
file, You can obtain one at http://mozilla.org/MPL/2.0/.
|
||||
|
||||
If it is not possible or desirable to put the notice in a particular
|
||||
file, then You may include the notice in a location (such as a LICENSE
|
||||
file in a relevant directory) where a recipient would be likely to look
|
||||
for such a notice.
|
||||
|
||||
You may add additional accurate notices of copyright ownership.
|
||||
|
||||
Exhibit B - "Incompatible With Secondary Licenses" Notice
|
||||
---------------------------------------------------------
|
||||
|
||||
This Source Code Form is "Incompatible With Secondary Licenses", as
|
||||
defined by the Mozilla Public License, v. 2.0.
|
||||
181
README.md
Normal file
181
README.md
Normal file
@@ -0,0 +1,181 @@
|
||||
# mosaicstack-telemetry
|
||||
|
||||
Python client SDK for [Mosaic Stack Telemetry](https://github.com/mosaicstack/telemetry). Report AI coding task-completion telemetry and query crowd-sourced predictions for token usage, cost, and quality outcomes.
|
||||
|
||||
## Installation
|
||||
|
||||
```bash
|
||||
pip install mosaicstack-telemetry
|
||||
# or
|
||||
uv add mosaicstack-telemetry
|
||||
```
|
||||
|
||||
## Quick Start (Sync)
|
||||
|
||||
Best for scripts, aider integrations, and non-async contexts:
|
||||
|
||||
```python
|
||||
from mosaicstack_telemetry import (
|
||||
TelemetryClient,
|
||||
TelemetryConfig,
|
||||
EventBuilder,
|
||||
TaskType,
|
||||
Provider,
|
||||
Harness,
|
||||
Complexity,
|
||||
Outcome,
|
||||
QualityGate,
|
||||
)
|
||||
|
||||
config = TelemetryConfig(
|
||||
server_url="https://telemetry.mosaicstack.dev",
|
||||
api_key="your-64-char-hex-api-key-here...",
|
||||
instance_id="your-uuid-instance-id",
|
||||
)
|
||||
|
||||
client = TelemetryClient(config)
|
||||
client.start() # Starts background submission thread
|
||||
|
||||
# Build and track an event
|
||||
event = (
|
||||
EventBuilder(instance_id=config.instance_id)
|
||||
.task_type(TaskType.IMPLEMENTATION)
|
||||
.model("claude-sonnet-4-20250514")
|
||||
.provider(Provider.ANTHROPIC)
|
||||
.harness_type(Harness.AIDER)
|
||||
.complexity_level(Complexity.MEDIUM)
|
||||
.outcome_value(Outcome.SUCCESS)
|
||||
.duration_ms(45000)
|
||||
.tokens(estimated_in=5000, estimated_out=2000, actual_in=5200, actual_out=1800)
|
||||
.cost(estimated=50000, actual=48000)
|
||||
.quality(passed=True, gates_run=[QualityGate.LINT, QualityGate.TEST])
|
||||
.context(compactions=0, rotations=0, utilization=0.4)
|
||||
.language("python")
|
||||
.build()
|
||||
)
|
||||
|
||||
client.track(event) # Non-blocking, thread-safe
|
||||
|
||||
# When done
|
||||
client.stop() # Flushes remaining events
|
||||
```
|
||||
|
||||
## Async Usage
|
||||
|
||||
For asyncio-based applications:
|
||||
|
||||
```python
|
||||
import asyncio
|
||||
from mosaicstack_telemetry import TelemetryClient, TelemetryConfig
|
||||
|
||||
async def main():
|
||||
config = TelemetryConfig(
|
||||
server_url="https://telemetry.mosaicstack.dev",
|
||||
api_key="your-64-char-hex-api-key-here...",
|
||||
instance_id="your-uuid-instance-id",
|
||||
)
|
||||
|
||||
client = TelemetryClient(config)
|
||||
await client.start_async() # Starts asyncio background task
|
||||
|
||||
# track() is always synchronous
|
||||
client.track(event)
|
||||
|
||||
await client.stop_async() # Flushes remaining events
|
||||
|
||||
asyncio.run(main())
|
||||
```
|
||||
|
||||
## Context Manager
|
||||
|
||||
Both sync and async context managers are supported:
|
||||
|
||||
```python
|
||||
# Sync
|
||||
with TelemetryClient(config) as client:
|
||||
client.track(event)
|
||||
|
||||
# Async
|
||||
async with TelemetryClient(config) as client:
|
||||
client.track(event)
|
||||
```
|
||||
|
||||
## Configuration via Environment Variables
|
||||
|
||||
All core settings can be set via environment variables:
|
||||
|
||||
```bash
|
||||
export MOSAIC_TELEMETRY_ENABLED=true
|
||||
export MOSAIC_TELEMETRY_SERVER_URL=https://telemetry.mosaicstack.dev
|
||||
export MOSAIC_TELEMETRY_API_KEY=your-64-char-hex-api-key
|
||||
export MOSAIC_TELEMETRY_INSTANCE_ID=your-uuid-instance-id
|
||||
```
|
||||
|
||||
Then create a config with defaults:
|
||||
|
||||
```python
|
||||
config = TelemetryConfig() # Picks up env vars automatically
|
||||
```
|
||||
|
||||
Explicit constructor values take priority over environment variables.
|
||||
|
||||
## Querying Predictions
|
||||
|
||||
Fetch crowd-sourced predictions for token usage, cost, and quality:
|
||||
|
||||
```python
|
||||
from mosaicstack_telemetry import PredictionQuery, TaskType, Provider, Complexity
|
||||
|
||||
query = PredictionQuery(
|
||||
task_type=TaskType.IMPLEMENTATION,
|
||||
model="claude-sonnet-4-20250514",
|
||||
provider=Provider.ANTHROPIC,
|
||||
complexity=Complexity.MEDIUM,
|
||||
)
|
||||
|
||||
# Async
|
||||
await client.refresh_predictions([query])
|
||||
|
||||
# Sync
|
||||
client.refresh_predictions_sync([query])
|
||||
|
||||
# Read from cache
|
||||
prediction = client.get_prediction(query)
|
||||
if prediction and prediction.prediction:
|
||||
print(f"Expected input tokens (median): {prediction.prediction.input_tokens.median}")
|
||||
print(f"Expected cost (median): ${prediction.prediction.cost_usd_micros['median'] / 1_000_000:.4f}")
|
||||
print(f"Quality gate pass rate: {prediction.prediction.quality.gate_pass_rate:.0%}")
|
||||
```
|
||||
|
||||
## Dry-Run Mode
|
||||
|
||||
Test your integration without sending data to the server:
|
||||
|
||||
```python
|
||||
config = TelemetryConfig(
|
||||
server_url="https://telemetry.mosaicstack.dev",
|
||||
api_key="a" * 64,
|
||||
instance_id="12345678-1234-1234-1234-123456789abc",
|
||||
dry_run=True, # Logs batches but doesn't send
|
||||
)
|
||||
```
|
||||
|
||||
## Configuration Reference
|
||||
|
||||
| Parameter | Default | Description |
|
||||
|-----------|---------|-------------|
|
||||
| `server_url` | (required) | Telemetry server base URL |
|
||||
| `api_key` | (required) | 64-character hex API key |
|
||||
| `instance_id` | (required) | UUID identifying this instance |
|
||||
| `enabled` | `True` | Enable/disable telemetry |
|
||||
| `submit_interval_seconds` | `300.0` | Background flush interval |
|
||||
| `max_queue_size` | `1000` | Max events in memory queue |
|
||||
| `batch_size` | `100` | Events per batch (server max) |
|
||||
| `request_timeout_seconds` | `10.0` | HTTP request timeout |
|
||||
| `prediction_cache_ttl_seconds` | `21600.0` | Prediction cache TTL (6h) |
|
||||
| `dry_run` | `False` | Log but don't send |
|
||||
| `max_retries` | `3` | Retries on failure |
|
||||
|
||||
## License
|
||||
|
||||
MPL-2.0
|
||||
49
pyproject.toml
Normal file
49
pyproject.toml
Normal file
@@ -0,0 +1,49 @@
|
||||
[project]
|
||||
name = "mosaicstack-telemetry"
|
||||
version = "0.1.0"
|
||||
description = "Python client SDK for Mosaic Stack Telemetry"
|
||||
requires-python = ">=3.10"
|
||||
license = "MPL-2.0"
|
||||
dependencies = [
|
||||
"httpx>=0.25.0",
|
||||
"pydantic>=2.5.0",
|
||||
]
|
||||
|
||||
[project.optional-dependencies]
|
||||
dev = [
|
||||
"pytest>=8.0",
|
||||
"pytest-cov>=5.0",
|
||||
"pytest-asyncio>=0.24",
|
||||
"ruff>=0.14.0",
|
||||
"mypy>=1.15",
|
||||
"respx>=0.21.0",
|
||||
]
|
||||
|
||||
[build-system]
|
||||
requires = ["hatchling"]
|
||||
build-backend = "hatchling.build"
|
||||
|
||||
[tool.hatch.build.targets.wheel]
|
||||
packages = ["src/mosaicstack_telemetry"]
|
||||
|
||||
[tool.pytest.ini_options]
|
||||
testpaths = ["tests"]
|
||||
asyncio_mode = "auto"
|
||||
addopts = "--cov=src/mosaicstack_telemetry --cov-report=term-missing --cov-fail-under=85"
|
||||
|
||||
[tool.ruff]
|
||||
line-length = 100
|
||||
target-version = "py310"
|
||||
src = ["src", "tests"]
|
||||
|
||||
[tool.ruff.lint]
|
||||
select = ["E", "F", "I", "N", "W", "B", "UP", "S", "A", "C4", "DTZ", "T20", "RUF"]
|
||||
|
||||
[tool.ruff.lint.per-file-ignores]
|
||||
"tests/**" = ["S101"]
|
||||
|
||||
[tool.mypy]
|
||||
python_version = "3.10"
|
||||
strict = true
|
||||
plugins = ["pydantic.mypy"]
|
||||
mypy_path = "src"
|
||||
96
scripts/validate_schema.py
Normal file
96
scripts/validate_schema.py
Normal file
@@ -0,0 +1,96 @@
|
||||
#!/usr/bin/env python3
|
||||
"""Validate that the SDK types match the expected schema."""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import json
|
||||
import sys
|
||||
|
||||
from mosaicstack_telemetry.types.events import (
|
||||
Complexity,
|
||||
Harness,
|
||||
Outcome,
|
||||
Provider,
|
||||
QualityGate,
|
||||
RepoSizeCategory,
|
||||
TaskCompletionEvent,
|
||||
TaskType,
|
||||
)
|
||||
from mosaicstack_telemetry.types.predictions import (
|
||||
PredictionQuery,
|
||||
PredictionResponse,
|
||||
)
|
||||
|
||||
|
||||
def main() -> int:
|
||||
"""Validate schema by generating JSON schema for key models."""
|
||||
errors: list[str] = []
|
||||
|
||||
# Validate enums have expected values
|
||||
expected_task_types = {
|
||||
"planning", "implementation", "code_review", "testing",
|
||||
"debugging", "refactoring", "documentation", "configuration",
|
||||
"security_audit", "unknown",
|
||||
}
|
||||
actual_task_types = {t.value for t in TaskType}
|
||||
if actual_task_types != expected_task_types:
|
||||
errors.append(f"TaskType mismatch: {actual_task_types ^ expected_task_types}")
|
||||
|
||||
expected_complexity = {"low", "medium", "high", "critical"}
|
||||
actual_complexity = {c.value for c in Complexity}
|
||||
if actual_complexity != expected_complexity:
|
||||
errors.append(f"Complexity mismatch: {actual_complexity ^ expected_complexity}")
|
||||
|
||||
expected_harness = {
|
||||
"claude_code", "opencode", "kilo_code", "aider",
|
||||
"api_direct", "ollama_local", "custom", "unknown",
|
||||
}
|
||||
actual_harness = {h.value for h in Harness}
|
||||
if actual_harness != expected_harness:
|
||||
errors.append(f"Harness mismatch: {actual_harness ^ expected_harness}")
|
||||
|
||||
expected_provider = {
|
||||
"anthropic", "openai", "openrouter", "ollama",
|
||||
"google", "mistral", "custom", "unknown",
|
||||
}
|
||||
actual_provider = {p.value for p in Provider}
|
||||
if actual_provider != expected_provider:
|
||||
errors.append(f"Provider mismatch: {actual_provider ^ expected_provider}")
|
||||
|
||||
expected_gates = {"build", "lint", "test", "coverage", "typecheck", "security"}
|
||||
actual_gates = {g.value for g in QualityGate}
|
||||
if actual_gates != expected_gates:
|
||||
errors.append(f"QualityGate mismatch: {actual_gates ^ expected_gates}")
|
||||
|
||||
expected_outcome = {"success", "failure", "partial", "timeout"}
|
||||
actual_outcome = {o.value for o in Outcome}
|
||||
if actual_outcome != expected_outcome:
|
||||
errors.append(f"Outcome mismatch: {actual_outcome ^ expected_outcome}")
|
||||
|
||||
expected_repo_size = {"tiny", "small", "medium", "large", "huge"}
|
||||
actual_repo_size = {r.value for r in RepoSizeCategory}
|
||||
if actual_repo_size != expected_repo_size:
|
||||
errors.append(f"RepoSizeCategory mismatch: {actual_repo_size ^ expected_repo_size}")
|
||||
|
||||
# Generate JSON schemas for key models
|
||||
print("TaskCompletionEvent schema:")
|
||||
print(json.dumps(TaskCompletionEvent.model_json_schema(), indent=2))
|
||||
print()
|
||||
print("PredictionQuery schema:")
|
||||
print(json.dumps(PredictionQuery.model_json_schema(), indent=2))
|
||||
print()
|
||||
print("PredictionResponse schema:")
|
||||
print(json.dumps(PredictionResponse.model_json_schema(), indent=2))
|
||||
|
||||
if errors:
|
||||
print("\nVALIDATION ERRORS:")
|
||||
for error in errors:
|
||||
print(f" - {error}")
|
||||
return 1
|
||||
|
||||
print("\nAll schema validations passed.")
|
||||
return 0
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
sys.exit(main())
|
||||
69
src/mosaicstack_telemetry/__init__.py
Normal file
69
src/mosaicstack_telemetry/__init__.py
Normal file
@@ -0,0 +1,69 @@
|
||||
"""Mosaic Stack Telemetry — Python client SDK.
|
||||
|
||||
A lightweight client for reporting AI coding task-completion telemetry
|
||||
and querying crowd-sourced predictions from a Mosaic Stack Telemetry server.
|
||||
"""
|
||||
|
||||
from mosaicstack_telemetry.client import TelemetryClient
|
||||
from mosaicstack_telemetry.config import TelemetryConfig
|
||||
from mosaicstack_telemetry.event_builder import EventBuilder
|
||||
from mosaicstack_telemetry.prediction_cache import PredictionCache
|
||||
from mosaicstack_telemetry.queue import EventQueue
|
||||
from mosaicstack_telemetry.types.common import (
|
||||
BatchEventRequest,
|
||||
BatchEventResponse,
|
||||
BatchEventResult,
|
||||
TelemetryError,
|
||||
)
|
||||
from mosaicstack_telemetry.types.events import (
|
||||
Complexity,
|
||||
Harness,
|
||||
Outcome,
|
||||
Provider,
|
||||
QualityGate,
|
||||
RepoSizeCategory,
|
||||
TaskCompletionEvent,
|
||||
TaskType,
|
||||
)
|
||||
from mosaicstack_telemetry.types.predictions import (
|
||||
CorrectionFactors,
|
||||
PredictionData,
|
||||
PredictionMetadata,
|
||||
PredictionQuery,
|
||||
PredictionResponse,
|
||||
QualityPrediction,
|
||||
TokenDistribution,
|
||||
)
|
||||
|
||||
__version__ = "0.1.0"
|
||||
|
||||
__all__ = [
|
||||
# Client
|
||||
"TelemetryClient",
|
||||
"TelemetryConfig",
|
||||
"EventBuilder",
|
||||
"EventQueue",
|
||||
"PredictionCache",
|
||||
# Types - Events
|
||||
"TaskCompletionEvent",
|
||||
"TaskType",
|
||||
"Complexity",
|
||||
"Harness",
|
||||
"Provider",
|
||||
"QualityGate",
|
||||
"Outcome",
|
||||
"RepoSizeCategory",
|
||||
# Types - Predictions
|
||||
"PredictionQuery",
|
||||
"PredictionResponse",
|
||||
"PredictionData",
|
||||
"PredictionMetadata",
|
||||
"TokenDistribution",
|
||||
"CorrectionFactors",
|
||||
"QualityPrediction",
|
||||
# Types - Common
|
||||
"BatchEventRequest",
|
||||
"BatchEventResponse",
|
||||
"BatchEventResult",
|
||||
"TelemetryError",
|
||||
]
|
||||
109
src/mosaicstack_telemetry/_async.py
Normal file
109
src/mosaicstack_telemetry/_async.py
Normal file
@@ -0,0 +1,109 @@
|
||||
"""Asynchronous submitter using asyncio.Task for periodic flushing."""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import asyncio
|
||||
import logging
|
||||
from typing import TYPE_CHECKING
|
||||
|
||||
import httpx
|
||||
|
||||
from mosaicstack_telemetry.submitter import submit_batch_async
|
||||
|
||||
if TYPE_CHECKING:
|
||||
from mosaicstack_telemetry.config import TelemetryConfig
|
||||
from mosaicstack_telemetry.queue import EventQueue
|
||||
|
||||
logger = logging.getLogger("mosaicstack_telemetry")
|
||||
|
||||
|
||||
class AsyncSubmitter:
|
||||
"""Periodic event submitter using asyncio.Task and httpx.AsyncClient."""
|
||||
|
||||
def __init__(self, config: TelemetryConfig, queue: EventQueue) -> None:
|
||||
self._config = config
|
||||
self._queue = queue
|
||||
self._client: httpx.AsyncClient | None = None
|
||||
self._task: asyncio.Task[None] | None = None
|
||||
self._running = False
|
||||
|
||||
@property
|
||||
def is_running(self) -> bool:
|
||||
"""Whether the submitter is currently running."""
|
||||
return self._running
|
||||
|
||||
async def start(self) -> None:
|
||||
"""Start the periodic flush loop."""
|
||||
if self._running:
|
||||
return
|
||||
self._client = httpx.AsyncClient()
|
||||
self._running = True
|
||||
self._task = asyncio.create_task(self._loop())
|
||||
logger.info(
|
||||
"Async submitter started (interval=%.1fs)",
|
||||
self._config.submit_interval_seconds,
|
||||
)
|
||||
|
||||
async def stop(self) -> None:
|
||||
"""Stop the periodic flush loop and perform a final flush."""
|
||||
if not self._running:
|
||||
return
|
||||
self._running = False
|
||||
if self._task is not None:
|
||||
self._task.cancel()
|
||||
try:
|
||||
await self._task
|
||||
except asyncio.CancelledError:
|
||||
pass
|
||||
self._task = None
|
||||
|
||||
# Final flush
|
||||
await self.flush()
|
||||
|
||||
if self._client is not None:
|
||||
await self._client.aclose()
|
||||
self._client = None
|
||||
logger.info("Async submitter stopped")
|
||||
|
||||
async def flush(self) -> None:
|
||||
"""Flush all queued events immediately."""
|
||||
while not self._queue.is_empty:
|
||||
events = self._queue.drain(self._config.batch_size)
|
||||
if not events:
|
||||
break
|
||||
|
||||
client = self._client
|
||||
if client is None:
|
||||
client = httpx.AsyncClient()
|
||||
try:
|
||||
result = await submit_batch_async(client, self._config, events)
|
||||
finally:
|
||||
await client.aclose()
|
||||
else:
|
||||
result = await submit_batch_async(client, self._config, events)
|
||||
|
||||
if result is None:
|
||||
logger.warning("Batch submission failed, re-queuing %d events", len(events))
|
||||
self._queue.put_back(events)
|
||||
break
|
||||
|
||||
if result.rejected > 0:
|
||||
logger.warning(
|
||||
"Batch partially rejected: %d accepted, %d rejected",
|
||||
result.accepted,
|
||||
result.rejected,
|
||||
)
|
||||
else:
|
||||
logger.debug("Batch submitted: %d events accepted", result.accepted)
|
||||
|
||||
async def _loop(self) -> None:
|
||||
"""Periodic flush loop."""
|
||||
while self._running:
|
||||
try:
|
||||
await asyncio.sleep(self._config.submit_interval_seconds)
|
||||
if self._running:
|
||||
await self.flush()
|
||||
except asyncio.CancelledError:
|
||||
break
|
||||
except Exception:
|
||||
logger.exception("Error during periodic async flush")
|
||||
118
src/mosaicstack_telemetry/_sync.py
Normal file
118
src/mosaicstack_telemetry/_sync.py
Normal file
@@ -0,0 +1,118 @@
|
||||
"""Synchronous submitter using threading.Timer for periodic flushing."""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import logging
|
||||
import threading
|
||||
from typing import TYPE_CHECKING
|
||||
|
||||
import httpx
|
||||
|
||||
from mosaicstack_telemetry.submitter import submit_batch_sync
|
||||
|
||||
if TYPE_CHECKING:
|
||||
from mosaicstack_telemetry.config import TelemetryConfig
|
||||
from mosaicstack_telemetry.queue import EventQueue
|
||||
|
||||
logger = logging.getLogger("mosaicstack_telemetry")
|
||||
|
||||
|
||||
class SyncSubmitter:
|
||||
"""Periodic event submitter using threading.Timer and httpx.Client."""
|
||||
|
||||
def __init__(self, config: TelemetryConfig, queue: EventQueue) -> None:
|
||||
self._config = config
|
||||
self._queue = queue
|
||||
self._client: httpx.Client | None = None
|
||||
self._timer: threading.Timer | None = None
|
||||
self._running = False
|
||||
self._lock = threading.Lock()
|
||||
|
||||
@property
|
||||
def is_running(self) -> bool:
|
||||
"""Whether the submitter is currently running."""
|
||||
return self._running
|
||||
|
||||
def start(self) -> None:
|
||||
"""Start the periodic flush loop."""
|
||||
with self._lock:
|
||||
if self._running:
|
||||
return
|
||||
self._client = httpx.Client()
|
||||
self._running = True
|
||||
self._schedule_next()
|
||||
logger.info("Sync submitter started (interval=%.1fs)", self._config.submit_interval_seconds)
|
||||
|
||||
def stop(self) -> None:
|
||||
"""Stop the periodic flush loop and perform a final flush."""
|
||||
with self._lock:
|
||||
if not self._running:
|
||||
return
|
||||
self._running = False
|
||||
if self._timer is not None:
|
||||
self._timer.cancel()
|
||||
self._timer = None
|
||||
|
||||
# Final flush outside the lock to avoid deadlock
|
||||
self.flush()
|
||||
|
||||
with self._lock:
|
||||
if self._client is not None:
|
||||
self._client.close()
|
||||
self._client = None
|
||||
logger.info("Sync submitter stopped")
|
||||
|
||||
def flush(self) -> None:
|
||||
"""Flush all queued events immediately."""
|
||||
while not self._queue.is_empty:
|
||||
events = self._queue.drain(self._config.batch_size)
|
||||
if not events:
|
||||
break
|
||||
|
||||
with self._lock:
|
||||
client = self._client
|
||||
if client is None:
|
||||
# Create a temporary client for the flush
|
||||
client = httpx.Client()
|
||||
try:
|
||||
result = submit_batch_sync(client, self._config, events)
|
||||
finally:
|
||||
client.close()
|
||||
else:
|
||||
result = submit_batch_sync(client, self._config, events)
|
||||
|
||||
if result is None:
|
||||
# Submission failed, re-queue events
|
||||
logger.warning("Batch submission failed, re-queuing %d events", len(events))
|
||||
self._queue.put_back(events)
|
||||
break
|
||||
|
||||
if result.rejected > 0:
|
||||
logger.warning(
|
||||
"Batch partially rejected: %d accepted, %d rejected",
|
||||
result.accepted,
|
||||
result.rejected,
|
||||
)
|
||||
else:
|
||||
logger.debug("Batch submitted: %d events accepted", result.accepted)
|
||||
|
||||
def _schedule_next(self) -> None:
|
||||
"""Schedule the next flush iteration."""
|
||||
if not self._running:
|
||||
return
|
||||
self._timer = threading.Timer(self._config.submit_interval_seconds, self._tick)
|
||||
self._timer.daemon = True
|
||||
self._timer.start()
|
||||
|
||||
def _tick(self) -> None:
|
||||
"""Timer callback: flush and reschedule."""
|
||||
if not self._running:
|
||||
return
|
||||
try:
|
||||
self.flush()
|
||||
except Exception:
|
||||
logger.exception("Error during periodic flush")
|
||||
finally:
|
||||
with self._lock:
|
||||
if self._running:
|
||||
self._schedule_next()
|
||||
196
src/mosaicstack_telemetry/client.py
Normal file
196
src/mosaicstack_telemetry/client.py
Normal file
@@ -0,0 +1,196 @@
|
||||
"""Main TelemetryClient — the public entry point for the SDK."""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import logging
|
||||
from typing import Any
|
||||
|
||||
import httpx
|
||||
|
||||
from mosaicstack_telemetry._async import AsyncSubmitter
|
||||
from mosaicstack_telemetry._sync import SyncSubmitter
|
||||
from mosaicstack_telemetry.config import TelemetryConfig
|
||||
from mosaicstack_telemetry.prediction_cache import PredictionCache
|
||||
from mosaicstack_telemetry.queue import EventQueue
|
||||
from mosaicstack_telemetry.types.events import TaskCompletionEvent
|
||||
from mosaicstack_telemetry.types.predictions import (
|
||||
PredictionQuery,
|
||||
PredictionResponse,
|
||||
)
|
||||
|
||||
logger = logging.getLogger("mosaicstack_telemetry")
|
||||
|
||||
|
||||
class TelemetryClient:
|
||||
"""Main client for Mosaic Stack Telemetry.
|
||||
|
||||
Supports both sync and async usage patterns:
|
||||
|
||||
**Sync (threading-based):**
|
||||
|
||||
client = TelemetryClient(config)
|
||||
client.start()
|
||||
client.track(event)
|
||||
client.stop()
|
||||
|
||||
**Async (asyncio-based):**
|
||||
|
||||
client = TelemetryClient(config)
|
||||
await client.start_async()
|
||||
client.track(event)
|
||||
await client.stop_async()
|
||||
|
||||
**Context managers:**
|
||||
|
||||
with TelemetryClient(config) as client:
|
||||
client.track(event)
|
||||
|
||||
async with TelemetryClient(config) as client:
|
||||
client.track(event)
|
||||
"""
|
||||
|
||||
def __init__(self, config: TelemetryConfig) -> None:
|
||||
errors = config.validate()
|
||||
if errors and config.enabled:
|
||||
logger.warning("Telemetry config validation errors: %s", "; ".join(errors))
|
||||
|
||||
self._config = config
|
||||
self._queue = EventQueue(max_size=config.max_queue_size)
|
||||
self._prediction_cache = PredictionCache(ttl_seconds=config.prediction_cache_ttl_seconds)
|
||||
self._sync_submitter: SyncSubmitter | None = None
|
||||
self._async_submitter: AsyncSubmitter | None = None
|
||||
|
||||
def start(self) -> None:
|
||||
"""Start background submission using threading.Timer loop."""
|
||||
if not self._config.enabled:
|
||||
logger.info("Telemetry disabled, skipping start")
|
||||
return
|
||||
self._sync_submitter = SyncSubmitter(self._config, self._queue)
|
||||
self._sync_submitter.start()
|
||||
|
||||
async def start_async(self) -> None:
|
||||
"""Start with asyncio.Task for async contexts."""
|
||||
if not self._config.enabled:
|
||||
logger.info("Telemetry disabled, skipping async start")
|
||||
return
|
||||
self._async_submitter = AsyncSubmitter(self._config, self._queue)
|
||||
await self._async_submitter.start()
|
||||
|
||||
def stop(self) -> None:
|
||||
"""Stop background submission, flush remaining events synchronously."""
|
||||
if self._sync_submitter is not None:
|
||||
self._sync_submitter.stop()
|
||||
self._sync_submitter = None
|
||||
|
||||
async def stop_async(self) -> None:
|
||||
"""Async stop and flush."""
|
||||
if self._async_submitter is not None:
|
||||
await self._async_submitter.stop()
|
||||
self._async_submitter = None
|
||||
|
||||
def track(self, event: TaskCompletionEvent) -> None:
|
||||
"""Queue an event for submission. Always synchronous. Never blocks or throws.
|
||||
|
||||
If telemetry is disabled, the event is silently dropped.
|
||||
"""
|
||||
try:
|
||||
if not self._config.enabled:
|
||||
return
|
||||
self._queue.put(event)
|
||||
logger.debug("Event queued: %s", event.event_id)
|
||||
except Exception:
|
||||
logger.exception("Unexpected error in track()")
|
||||
|
||||
def get_prediction(self, query: PredictionQuery) -> PredictionResponse | None:
|
||||
"""Get a cached prediction. Returns None if not cached or expired."""
|
||||
return self._prediction_cache.get(query)
|
||||
|
||||
def refresh_predictions_sync(self, queries: list[PredictionQuery]) -> None:
|
||||
"""Fetch fresh predictions from server synchronously."""
|
||||
if not queries:
|
||||
return
|
||||
url = f"{self._config.server_url}/v1/predictions/batch"
|
||||
body = {"queries": [q.model_dump(mode="json") for q in queries]}
|
||||
|
||||
try:
|
||||
with httpx.Client() as client:
|
||||
response = client.post(
|
||||
url,
|
||||
json=body,
|
||||
headers={"User-Agent": self._config.user_agent},
|
||||
timeout=self._config.request_timeout_seconds,
|
||||
)
|
||||
if response.status_code == 200:
|
||||
data = response.json()
|
||||
results = data.get("results", [])
|
||||
for query, result_data in zip(queries, results):
|
||||
pred = PredictionResponse.model_validate(result_data)
|
||||
self._prediction_cache.put(query, pred)
|
||||
logger.debug("Refreshed %d predictions", len(results))
|
||||
else:
|
||||
logger.warning(
|
||||
"Prediction refresh failed with status %d",
|
||||
response.status_code,
|
||||
)
|
||||
except Exception:
|
||||
logger.exception("Error refreshing predictions")
|
||||
|
||||
async def refresh_predictions(self, queries: list[PredictionQuery]) -> None:
|
||||
"""Fetch fresh predictions from server asynchronously."""
|
||||
if not queries:
|
||||
return
|
||||
url = f"{self._config.server_url}/v1/predictions/batch"
|
||||
body = {"queries": [q.model_dump(mode="json") for q in queries]}
|
||||
|
||||
try:
|
||||
async with httpx.AsyncClient() as client:
|
||||
response = await client.post(
|
||||
url,
|
||||
json=body,
|
||||
headers={"User-Agent": self._config.user_agent},
|
||||
timeout=self._config.request_timeout_seconds,
|
||||
)
|
||||
if response.status_code == 200:
|
||||
data = response.json()
|
||||
results = data.get("results", [])
|
||||
for query, result_data in zip(queries, results):
|
||||
pred = PredictionResponse.model_validate(result_data)
|
||||
self._prediction_cache.put(query, pred)
|
||||
logger.debug("Refreshed %d predictions", len(results))
|
||||
else:
|
||||
logger.warning(
|
||||
"Prediction refresh failed with status %d",
|
||||
response.status_code,
|
||||
)
|
||||
except Exception:
|
||||
logger.exception("Error refreshing predictions")
|
||||
|
||||
@property
|
||||
def queue_size(self) -> int:
|
||||
"""Number of events currently in the queue."""
|
||||
return self._queue.size
|
||||
|
||||
@property
|
||||
def is_running(self) -> bool:
|
||||
"""Whether background submission is active."""
|
||||
if self._sync_submitter is not None:
|
||||
return self._sync_submitter.is_running
|
||||
if self._async_submitter is not None:
|
||||
return self._async_submitter.is_running
|
||||
return False
|
||||
|
||||
# Sync context manager
|
||||
def __enter__(self) -> TelemetryClient:
|
||||
self.start()
|
||||
return self
|
||||
|
||||
def __exit__(self, *exc: Any) -> None:
|
||||
self.stop()
|
||||
|
||||
# Async context manager
|
||||
async def __aenter__(self) -> TelemetryClient:
|
||||
await self.start_async()
|
||||
return self
|
||||
|
||||
async def __aexit__(self, *exc: Any) -> None:
|
||||
await self.stop_async()
|
||||
91
src/mosaicstack_telemetry/config.py
Normal file
91
src/mosaicstack_telemetry/config.py
Normal file
@@ -0,0 +1,91 @@
|
||||
"""Telemetry client configuration."""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import os
|
||||
import re
|
||||
from dataclasses import dataclass, field
|
||||
|
||||
_HEX_64_RE = re.compile(r"^[0-9a-fA-F]{64}$")
|
||||
_UUID_RE = re.compile(
|
||||
r"^[0-9a-fA-F]{8}-[0-9a-fA-F]{4}-[0-9a-fA-F]{4}-[0-9a-fA-F]{4}-[0-9a-fA-F]{12}$"
|
||||
)
|
||||
|
||||
|
||||
@dataclass
|
||||
class TelemetryConfig:
|
||||
"""Configuration for the telemetry client.
|
||||
|
||||
Values can be provided directly or loaded from environment variables:
|
||||
- MOSAIC_TELEMETRY_ENABLED -> enabled
|
||||
- MOSAIC_TELEMETRY_SERVER_URL -> server_url
|
||||
- MOSAIC_TELEMETRY_API_KEY -> api_key
|
||||
- MOSAIC_TELEMETRY_INSTANCE_ID -> instance_id
|
||||
"""
|
||||
|
||||
server_url: str = ""
|
||||
api_key: str = ""
|
||||
instance_id: str = ""
|
||||
enabled: bool = True
|
||||
submit_interval_seconds: float = 300.0
|
||||
max_queue_size: int = 1000
|
||||
batch_size: int = 100
|
||||
request_timeout_seconds: float = 10.0
|
||||
prediction_cache_ttl_seconds: float = 21600.0
|
||||
dry_run: bool = False
|
||||
max_retries: int = 3
|
||||
user_agent: str = field(default="mosaicstack-telemetry-python/0.1.0")
|
||||
|
||||
def __post_init__(self) -> None:
|
||||
"""Load environment variable overrides and validate."""
|
||||
env_enabled = os.environ.get("MOSAIC_TELEMETRY_ENABLED")
|
||||
if env_enabled is not None:
|
||||
self.enabled = env_enabled.lower() in ("1", "true", "yes")
|
||||
|
||||
env_url = os.environ.get("MOSAIC_TELEMETRY_SERVER_URL")
|
||||
if env_url and not self.server_url:
|
||||
self.server_url = env_url
|
||||
|
||||
env_key = os.environ.get("MOSAIC_TELEMETRY_API_KEY")
|
||||
if env_key and not self.api_key:
|
||||
self.api_key = env_key
|
||||
|
||||
env_instance = os.environ.get("MOSAIC_TELEMETRY_INSTANCE_ID")
|
||||
if env_instance and not self.instance_id:
|
||||
self.instance_id = env_instance
|
||||
|
||||
# Strip trailing slashes from server_url
|
||||
self.server_url = self.server_url.rstrip("/")
|
||||
|
||||
def validate(self) -> list[str]:
|
||||
"""Validate configuration and return list of errors (empty if valid)."""
|
||||
errors: list[str] = []
|
||||
|
||||
if not self.server_url:
|
||||
errors.append("server_url is required")
|
||||
elif not self.server_url.startswith(("http://", "https://")):
|
||||
errors.append("server_url must start with http:// or https://")
|
||||
|
||||
if not self.api_key:
|
||||
errors.append("api_key is required")
|
||||
elif not _HEX_64_RE.match(self.api_key):
|
||||
errors.append("api_key must be a 64-character hex string")
|
||||
|
||||
if not self.instance_id:
|
||||
errors.append("instance_id is required")
|
||||
elif not _UUID_RE.match(self.instance_id):
|
||||
errors.append("instance_id must be a valid UUID string")
|
||||
|
||||
if self.submit_interval_seconds <= 0:
|
||||
errors.append("submit_interval_seconds must be positive")
|
||||
|
||||
if self.max_queue_size <= 0:
|
||||
errors.append("max_queue_size must be positive")
|
||||
|
||||
if self.batch_size <= 0 or self.batch_size > 100:
|
||||
errors.append("batch_size must be between 1 and 100")
|
||||
|
||||
if self.request_timeout_seconds <= 0:
|
||||
errors.append("request_timeout_seconds must be positive")
|
||||
|
||||
return errors
|
||||
207
src/mosaicstack_telemetry/event_builder.py
Normal file
207
src/mosaicstack_telemetry/event_builder.py
Normal file
@@ -0,0 +1,207 @@
|
||||
"""Convenience builder for constructing TaskCompletionEvent instances."""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
from datetime import datetime, timezone
|
||||
from uuid import UUID, uuid4
|
||||
|
||||
from mosaicstack_telemetry.types.events import (
|
||||
Complexity,
|
||||
Harness,
|
||||
Outcome,
|
||||
Provider,
|
||||
QualityGate,
|
||||
RepoSizeCategory,
|
||||
TaskCompletionEvent,
|
||||
TaskType,
|
||||
)
|
||||
|
||||
|
||||
class EventBuilder:
|
||||
"""Fluent builder for TaskCompletionEvent.
|
||||
|
||||
Provides a convenient way to construct events with sensible defaults
|
||||
and a chainable API.
|
||||
|
||||
Example::
|
||||
|
||||
event = (
|
||||
EventBuilder(instance_id="...")
|
||||
.task_type(TaskType.IMPLEMENTATION)
|
||||
.model("claude-sonnet-4-20250514")
|
||||
.provider(Provider.ANTHROPIC)
|
||||
.harness(Harness.CLAUDE_CODE)
|
||||
.complexity(Complexity.MEDIUM)
|
||||
.outcome(Outcome.SUCCESS)
|
||||
.duration_ms(45000)
|
||||
.tokens(estimated_in=1000, estimated_out=500, actual_in=1100, actual_out=480)
|
||||
.cost(estimated=50000, actual=48000)
|
||||
.quality(passed=True, gates_run=[QualityGate.LINT, QualityGate.TEST])
|
||||
.context(compactions=0, rotations=0, utilization=0.3)
|
||||
.build()
|
||||
)
|
||||
"""
|
||||
|
||||
def __init__(self, instance_id: str | UUID) -> None:
|
||||
self._instance_id = UUID(str(instance_id))
|
||||
self._event_id: UUID = uuid4()
|
||||
self._timestamp: datetime = datetime.now(timezone.utc)
|
||||
self._task_duration_ms: int = 0
|
||||
self._task_type: TaskType = TaskType.UNKNOWN
|
||||
self._complexity: Complexity = Complexity.MEDIUM
|
||||
self._harness: Harness = Harness.UNKNOWN
|
||||
self._model: str = "unknown"
|
||||
self._provider: Provider = Provider.UNKNOWN
|
||||
self._estimated_input_tokens: int = 0
|
||||
self._estimated_output_tokens: int = 0
|
||||
self._actual_input_tokens: int = 0
|
||||
self._actual_output_tokens: int = 0
|
||||
self._estimated_cost_usd_micros: int = 0
|
||||
self._actual_cost_usd_micros: int = 0
|
||||
self._quality_gate_passed: bool = False
|
||||
self._quality_gates_run: list[QualityGate] = []
|
||||
self._quality_gates_failed: list[QualityGate] = []
|
||||
self._context_compactions: int = 0
|
||||
self._context_rotations: int = 0
|
||||
self._context_utilization_final: float = 0.0
|
||||
self._outcome: Outcome = Outcome.FAILURE
|
||||
self._retry_count: int = 0
|
||||
self._language: str | None = None
|
||||
self._repo_size_category: RepoSizeCategory | None = None
|
||||
|
||||
def event_id(self, value: str | UUID) -> EventBuilder:
|
||||
"""Set a specific event ID (default: auto-generated UUID)."""
|
||||
self._event_id = UUID(str(value))
|
||||
return self
|
||||
|
||||
def timestamp(self, value: datetime) -> EventBuilder:
|
||||
"""Set the event timestamp (default: now UTC)."""
|
||||
self._timestamp = value
|
||||
return self
|
||||
|
||||
def task_type(self, value: TaskType) -> EventBuilder:
|
||||
"""Set the task type."""
|
||||
self._task_type = value
|
||||
return self
|
||||
|
||||
def complexity_level(self, value: Complexity) -> EventBuilder:
|
||||
"""Set the complexity level."""
|
||||
self._complexity = value
|
||||
return self
|
||||
|
||||
def harness_type(self, value: Harness) -> EventBuilder:
|
||||
"""Set the harness type."""
|
||||
self._harness = value
|
||||
return self
|
||||
|
||||
def model(self, value: str) -> EventBuilder:
|
||||
"""Set the model name."""
|
||||
self._model = value
|
||||
return self
|
||||
|
||||
def provider(self, value: Provider) -> EventBuilder:
|
||||
"""Set the provider."""
|
||||
self._provider = value
|
||||
return self
|
||||
|
||||
def duration_ms(self, value: int) -> EventBuilder:
|
||||
"""Set the task duration in milliseconds."""
|
||||
self._task_duration_ms = value
|
||||
return self
|
||||
|
||||
def tokens(
|
||||
self,
|
||||
*,
|
||||
estimated_in: int = 0,
|
||||
estimated_out: int = 0,
|
||||
actual_in: int = 0,
|
||||
actual_out: int = 0,
|
||||
) -> EventBuilder:
|
||||
"""Set token counts."""
|
||||
self._estimated_input_tokens = estimated_in
|
||||
self._estimated_output_tokens = estimated_out
|
||||
self._actual_input_tokens = actual_in
|
||||
self._actual_output_tokens = actual_out
|
||||
return self
|
||||
|
||||
def cost(self, *, estimated: int = 0, actual: int = 0) -> EventBuilder:
|
||||
"""Set cost in USD micros."""
|
||||
self._estimated_cost_usd_micros = estimated
|
||||
self._actual_cost_usd_micros = actual
|
||||
return self
|
||||
|
||||
def quality(
|
||||
self,
|
||||
*,
|
||||
passed: bool,
|
||||
gates_run: list[QualityGate] | None = None,
|
||||
gates_failed: list[QualityGate] | None = None,
|
||||
) -> EventBuilder:
|
||||
"""Set quality gate results."""
|
||||
self._quality_gate_passed = passed
|
||||
self._quality_gates_run = gates_run or []
|
||||
self._quality_gates_failed = gates_failed or []
|
||||
return self
|
||||
|
||||
def context(
|
||||
self,
|
||||
*,
|
||||
compactions: int = 0,
|
||||
rotations: int = 0,
|
||||
utilization: float = 0.0,
|
||||
) -> EventBuilder:
|
||||
"""Set context window metrics."""
|
||||
self._context_compactions = compactions
|
||||
self._context_rotations = rotations
|
||||
self._context_utilization_final = utilization
|
||||
return self
|
||||
|
||||
def outcome_value(self, value: Outcome) -> EventBuilder:
|
||||
"""Set the task outcome."""
|
||||
self._outcome = value
|
||||
return self
|
||||
|
||||
def retry_count(self, value: int) -> EventBuilder:
|
||||
"""Set the retry count."""
|
||||
self._retry_count = value
|
||||
return self
|
||||
|
||||
def language(self, value: str | None) -> EventBuilder:
|
||||
"""Set the programming language."""
|
||||
self._language = value
|
||||
return self
|
||||
|
||||
def repo_size(self, value: RepoSizeCategory | None) -> EventBuilder:
|
||||
"""Set the repository size category."""
|
||||
self._repo_size_category = value
|
||||
return self
|
||||
|
||||
def build(self) -> TaskCompletionEvent:
|
||||
"""Build and return the TaskCompletionEvent."""
|
||||
return TaskCompletionEvent(
|
||||
instance_id=self._instance_id,
|
||||
event_id=self._event_id,
|
||||
timestamp=self._timestamp,
|
||||
task_duration_ms=self._task_duration_ms,
|
||||
task_type=self._task_type,
|
||||
complexity=self._complexity,
|
||||
harness=self._harness,
|
||||
model=self._model,
|
||||
provider=self._provider,
|
||||
estimated_input_tokens=self._estimated_input_tokens,
|
||||
estimated_output_tokens=self._estimated_output_tokens,
|
||||
actual_input_tokens=self._actual_input_tokens,
|
||||
actual_output_tokens=self._actual_output_tokens,
|
||||
estimated_cost_usd_micros=self._estimated_cost_usd_micros,
|
||||
actual_cost_usd_micros=self._actual_cost_usd_micros,
|
||||
quality_gate_passed=self._quality_gate_passed,
|
||||
quality_gates_run=self._quality_gates_run,
|
||||
quality_gates_failed=self._quality_gates_failed,
|
||||
context_compactions=self._context_compactions,
|
||||
context_rotations=self._context_rotations,
|
||||
context_utilization_final=self._context_utilization_final,
|
||||
outcome=self._outcome,
|
||||
retry_count=self._retry_count,
|
||||
language=self._language,
|
||||
repo_size_category=self._repo_size_category,
|
||||
)
|
||||
56
src/mosaicstack_telemetry/prediction_cache.py
Normal file
56
src/mosaicstack_telemetry/prediction_cache.py
Normal file
@@ -0,0 +1,56 @@
|
||||
"""Thread-safe prediction cache with TTL."""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import logging
|
||||
import threading
|
||||
import time
|
||||
|
||||
from mosaicstack_telemetry.types.predictions import PredictionQuery, PredictionResponse
|
||||
|
||||
logger = logging.getLogger("mosaicstack_telemetry")
|
||||
|
||||
|
||||
def _cache_key(query: PredictionQuery) -> str:
|
||||
"""Generate a deterministic cache key from a prediction query."""
|
||||
return f"{query.task_type.value}:{query.model}:{query.provider.value}:{query.complexity.value}"
|
||||
|
||||
|
||||
class PredictionCache:
|
||||
"""Thread-safe dict-based cache with TTL for prediction responses."""
|
||||
|
||||
def __init__(self, ttl_seconds: float = 21600.0) -> None:
|
||||
self._ttl = ttl_seconds
|
||||
self._store: dict[str, tuple[PredictionResponse, float]] = {}
|
||||
self._lock = threading.Lock()
|
||||
|
||||
def get(self, query: PredictionQuery) -> PredictionResponse | None:
|
||||
"""Get a cached prediction, or None if not found or expired."""
|
||||
key = _cache_key(query)
|
||||
with self._lock:
|
||||
entry = self._store.get(key)
|
||||
if entry is None:
|
||||
return None
|
||||
response, expires_at = entry
|
||||
if time.monotonic() > expires_at:
|
||||
del self._store[key]
|
||||
return None
|
||||
return response
|
||||
|
||||
def put(self, query: PredictionQuery, response: PredictionResponse) -> None:
|
||||
"""Store a prediction response with TTL."""
|
||||
key = _cache_key(query)
|
||||
expires_at = time.monotonic() + self._ttl
|
||||
with self._lock:
|
||||
self._store[key] = (response, expires_at)
|
||||
|
||||
def clear(self) -> None:
|
||||
"""Invalidate all cached predictions."""
|
||||
with self._lock:
|
||||
self._store.clear()
|
||||
|
||||
@property
|
||||
def size(self) -> int:
|
||||
"""Number of entries in the cache (including possibly expired)."""
|
||||
with self._lock:
|
||||
return len(self._store)
|
||||
70
src/mosaicstack_telemetry/queue.py
Normal file
70
src/mosaicstack_telemetry/queue.py
Normal file
@@ -0,0 +1,70 @@
|
||||
"""Thread-safe bounded event queue."""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import logging
|
||||
import threading
|
||||
from collections import deque
|
||||
|
||||
from mosaicstack_telemetry.types.events import TaskCompletionEvent
|
||||
|
||||
logger = logging.getLogger("mosaicstack_telemetry")
|
||||
|
||||
|
||||
class EventQueue:
|
||||
"""Thread-safe bounded FIFO queue for telemetry events.
|
||||
|
||||
When the queue is full, the oldest events are evicted (FIFO eviction)
|
||||
to make room for new ones.
|
||||
"""
|
||||
|
||||
def __init__(self, max_size: int = 1000) -> None:
|
||||
self._max_size = max_size
|
||||
self._deque: deque[TaskCompletionEvent] = deque(maxlen=max_size)
|
||||
self._lock = threading.Lock()
|
||||
|
||||
def put(self, event: TaskCompletionEvent) -> None:
|
||||
"""Add an event to the queue. Never blocks.
|
||||
|
||||
If the queue is full, the oldest event is silently evicted.
|
||||
"""
|
||||
with self._lock:
|
||||
if len(self._deque) >= self._max_size:
|
||||
logger.warning(
|
||||
"Event queue full (%d items), evicting oldest event",
|
||||
self._max_size,
|
||||
)
|
||||
self._deque.append(event)
|
||||
|
||||
def drain(self, max_items: int) -> list[TaskCompletionEvent]:
|
||||
"""Remove and return up to max_items events from the front of the queue."""
|
||||
with self._lock:
|
||||
count = min(max_items, len(self._deque))
|
||||
items: list[TaskCompletionEvent] = []
|
||||
for _ in range(count):
|
||||
items.append(self._deque.popleft())
|
||||
return items
|
||||
|
||||
def put_back(self, events: list[TaskCompletionEvent]) -> None:
|
||||
"""Put events back at the front of the queue (for retry scenarios).
|
||||
|
||||
Events are added to the left (front) so they get drained first next time.
|
||||
If adding them would exceed max_size, only as many as fit are re-added.
|
||||
"""
|
||||
with self._lock:
|
||||
available = self._max_size - len(self._deque)
|
||||
to_add = events[:available]
|
||||
for event in reversed(to_add):
|
||||
self._deque.appendleft(event)
|
||||
|
||||
@property
|
||||
def size(self) -> int:
|
||||
"""Current number of events in the queue."""
|
||||
with self._lock:
|
||||
return len(self._deque)
|
||||
|
||||
@property
|
||||
def is_empty(self) -> bool:
|
||||
"""Whether the queue is empty."""
|
||||
with self._lock:
|
||||
return len(self._deque) == 0
|
||||
205
src/mosaicstack_telemetry/submitter.py
Normal file
205
src/mosaicstack_telemetry/submitter.py
Normal file
@@ -0,0 +1,205 @@
|
||||
"""Batch submission logic with retry and backoff."""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import logging
|
||||
import random
|
||||
import time
|
||||
from typing import TYPE_CHECKING
|
||||
|
||||
import httpx
|
||||
|
||||
from mosaicstack_telemetry.types.common import BatchEventRequest, BatchEventResponse
|
||||
from mosaicstack_telemetry.types.events import TaskCompletionEvent
|
||||
|
||||
if TYPE_CHECKING:
|
||||
from mosaicstack_telemetry.config import TelemetryConfig
|
||||
|
||||
logger = logging.getLogger("mosaicstack_telemetry")
|
||||
|
||||
|
||||
def _backoff_delay(attempt: int, base: float = 1.0, maximum: float = 60.0) -> float:
|
||||
"""Calculate exponential backoff with jitter."""
|
||||
delay = min(base * (2**attempt), maximum)
|
||||
jitter = random.uniform(0, delay * 0.5) # noqa: S311
|
||||
return delay + jitter
|
||||
|
||||
|
||||
def submit_batch_sync(
|
||||
client: httpx.Client,
|
||||
config: TelemetryConfig,
|
||||
events: list[TaskCompletionEvent],
|
||||
) -> BatchEventResponse | None:
|
||||
"""Submit a batch of events synchronously with retry logic.
|
||||
|
||||
Returns the BatchEventResponse on success, or None if all retries failed.
|
||||
"""
|
||||
url = f"{config.server_url}/v1/events/batch"
|
||||
request_body = BatchEventRequest(events=events)
|
||||
|
||||
for attempt in range(config.max_retries + 1):
|
||||
try:
|
||||
if config.dry_run:
|
||||
logger.info(
|
||||
"[DRY RUN] Would submit batch of %d events to %s",
|
||||
len(events),
|
||||
url,
|
||||
)
|
||||
return BatchEventResponse(
|
||||
accepted=len(events),
|
||||
rejected=0,
|
||||
results=[],
|
||||
)
|
||||
|
||||
response = client.post(
|
||||
url,
|
||||
json=request_body.model_dump(mode="json"),
|
||||
headers={
|
||||
"Authorization": f"Bearer {config.api_key}",
|
||||
"Content-Type": "application/json",
|
||||
"User-Agent": config.user_agent,
|
||||
},
|
||||
timeout=config.request_timeout_seconds,
|
||||
)
|
||||
|
||||
if response.status_code == 202:
|
||||
return BatchEventResponse.model_validate(response.json())
|
||||
|
||||
if response.status_code == 429:
|
||||
retry_after = response.headers.get("Retry-After")
|
||||
delay = float(retry_after) if retry_after else _backoff_delay(attempt)
|
||||
logger.warning(
|
||||
"Rate limited (429), retrying after %.1f seconds (attempt %d/%d)",
|
||||
delay,
|
||||
attempt + 1,
|
||||
config.max_retries + 1,
|
||||
)
|
||||
time.sleep(delay)
|
||||
continue
|
||||
|
||||
if response.status_code == 403:
|
||||
logger.error(
|
||||
"Authentication failed (403): API key may not match instance_id"
|
||||
)
|
||||
return None
|
||||
|
||||
logger.warning(
|
||||
"Unexpected status %d from server (attempt %d/%d): %s",
|
||||
response.status_code,
|
||||
attempt + 1,
|
||||
config.max_retries + 1,
|
||||
response.text[:200],
|
||||
)
|
||||
|
||||
except httpx.TimeoutException:
|
||||
logger.warning(
|
||||
"Request timed out (attempt %d/%d)",
|
||||
attempt + 1,
|
||||
config.max_retries + 1,
|
||||
)
|
||||
except httpx.HTTPError as exc:
|
||||
logger.warning(
|
||||
"Network error (attempt %d/%d): %s",
|
||||
attempt + 1,
|
||||
config.max_retries + 1,
|
||||
exc,
|
||||
)
|
||||
|
||||
if attempt < config.max_retries:
|
||||
delay = _backoff_delay(attempt)
|
||||
logger.debug("Backing off for %.1f seconds before retry", delay)
|
||||
time.sleep(delay)
|
||||
|
||||
logger.error("All %d attempts failed for batch of %d events", config.max_retries + 1, len(events))
|
||||
return None
|
||||
|
||||
|
||||
async def submit_batch_async(
|
||||
client: httpx.AsyncClient,
|
||||
config: TelemetryConfig,
|
||||
events: list[TaskCompletionEvent],
|
||||
) -> BatchEventResponse | None:
|
||||
"""Submit a batch of events asynchronously with retry logic.
|
||||
|
||||
Returns the BatchEventResponse on success, or None if all retries failed.
|
||||
"""
|
||||
import asyncio
|
||||
|
||||
url = f"{config.server_url}/v1/events/batch"
|
||||
request_body = BatchEventRequest(events=events)
|
||||
|
||||
for attempt in range(config.max_retries + 1):
|
||||
try:
|
||||
if config.dry_run:
|
||||
logger.info(
|
||||
"[DRY RUN] Would submit batch of %d events to %s",
|
||||
len(events),
|
||||
url,
|
||||
)
|
||||
return BatchEventResponse(
|
||||
accepted=len(events),
|
||||
rejected=0,
|
||||
results=[],
|
||||
)
|
||||
|
||||
response = await client.post(
|
||||
url,
|
||||
json=request_body.model_dump(mode="json"),
|
||||
headers={
|
||||
"Authorization": f"Bearer {config.api_key}",
|
||||
"Content-Type": "application/json",
|
||||
"User-Agent": config.user_agent,
|
||||
},
|
||||
timeout=config.request_timeout_seconds,
|
||||
)
|
||||
|
||||
if response.status_code == 202:
|
||||
return BatchEventResponse.model_validate(response.json())
|
||||
|
||||
if response.status_code == 429:
|
||||
retry_after = response.headers.get("Retry-After")
|
||||
delay = float(retry_after) if retry_after else _backoff_delay(attempt)
|
||||
logger.warning(
|
||||
"Rate limited (429), retrying after %.1f seconds (attempt %d/%d)",
|
||||
delay,
|
||||
attempt + 1,
|
||||
config.max_retries + 1,
|
||||
)
|
||||
await asyncio.sleep(delay)
|
||||
continue
|
||||
|
||||
if response.status_code == 403:
|
||||
logger.error(
|
||||
"Authentication failed (403): API key may not match instance_id"
|
||||
)
|
||||
return None
|
||||
|
||||
logger.warning(
|
||||
"Unexpected status %d from server (attempt %d/%d): %s",
|
||||
response.status_code,
|
||||
attempt + 1,
|
||||
config.max_retries + 1,
|
||||
response.text[:200],
|
||||
)
|
||||
|
||||
except httpx.TimeoutException:
|
||||
logger.warning(
|
||||
"Request timed out (attempt %d/%d)",
|
||||
attempt + 1,
|
||||
config.max_retries + 1,
|
||||
)
|
||||
except httpx.HTTPError as exc:
|
||||
logger.warning(
|
||||
"Network error (attempt %d/%d): %s",
|
||||
attempt + 1,
|
||||
config.max_retries + 1,
|
||||
exc,
|
||||
)
|
||||
|
||||
if attempt < config.max_retries:
|
||||
delay = _backoff_delay(attempt)
|
||||
logger.debug("Backing off for %.1f seconds before retry", delay)
|
||||
await asyncio.sleep(delay)
|
||||
|
||||
logger.error("All %d attempts failed for batch of %d events", config.max_retries + 1, len(events))
|
||||
return None
|
||||
49
src/mosaicstack_telemetry/types/__init__.py
Normal file
49
src/mosaicstack_telemetry/types/__init__.py
Normal file
@@ -0,0 +1,49 @@
|
||||
"""Mosaic Stack Telemetry type definitions."""
|
||||
|
||||
from mosaicstack_telemetry.types.common import (
|
||||
BatchEventRequest,
|
||||
BatchEventResponse,
|
||||
BatchEventResult,
|
||||
TelemetryError,
|
||||
)
|
||||
from mosaicstack_telemetry.types.events import (
|
||||
Complexity,
|
||||
Harness,
|
||||
Outcome,
|
||||
Provider,
|
||||
QualityGate,
|
||||
RepoSizeCategory,
|
||||
TaskCompletionEvent,
|
||||
TaskType,
|
||||
)
|
||||
from mosaicstack_telemetry.types.predictions import (
|
||||
CorrectionFactors,
|
||||
PredictionData,
|
||||
PredictionMetadata,
|
||||
PredictionQuery,
|
||||
PredictionResponse,
|
||||
QualityPrediction,
|
||||
TokenDistribution,
|
||||
)
|
||||
|
||||
__all__ = [
|
||||
"BatchEventRequest",
|
||||
"BatchEventResponse",
|
||||
"BatchEventResult",
|
||||
"Complexity",
|
||||
"CorrectionFactors",
|
||||
"Harness",
|
||||
"Outcome",
|
||||
"PredictionData",
|
||||
"PredictionMetadata",
|
||||
"PredictionQuery",
|
||||
"PredictionResponse",
|
||||
"Provider",
|
||||
"QualityGate",
|
||||
"QualityPrediction",
|
||||
"RepoSizeCategory",
|
||||
"TaskCompletionEvent",
|
||||
"TaskType",
|
||||
"TelemetryError",
|
||||
"TokenDistribution",
|
||||
]
|
||||
35
src/mosaicstack_telemetry/types/common.py
Normal file
35
src/mosaicstack_telemetry/types/common.py
Normal file
@@ -0,0 +1,35 @@
|
||||
"""Common types shared across the SDK."""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
from uuid import UUID
|
||||
|
||||
from pydantic import BaseModel, Field
|
||||
|
||||
from mosaicstack_telemetry.types.events import TaskCompletionEvent
|
||||
|
||||
|
||||
class TelemetryError(Exception):
|
||||
"""Base exception for telemetry client errors."""
|
||||
|
||||
|
||||
class BatchEventRequest(BaseModel):
|
||||
"""Request body for batch event submission."""
|
||||
|
||||
events: list[TaskCompletionEvent] = Field(min_length=1, max_length=100)
|
||||
|
||||
|
||||
class BatchEventResult(BaseModel):
|
||||
"""Result for a single event in a batch submission."""
|
||||
|
||||
event_id: UUID
|
||||
status: str # "accepted" or "rejected"
|
||||
error: str | None = None
|
||||
|
||||
|
||||
class BatchEventResponse(BaseModel):
|
||||
"""Response from the batch event submission endpoint."""
|
||||
|
||||
accepted: int
|
||||
rejected: int
|
||||
results: list[BatchEventResult]
|
||||
122
src/mosaicstack_telemetry/types/events.py
Normal file
122
src/mosaicstack_telemetry/types/events.py
Normal file
@@ -0,0 +1,122 @@
|
||||
"""Task completion event types and enums."""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
from datetime import datetime, timezone
|
||||
from enum import Enum
|
||||
from uuid import UUID, uuid4
|
||||
|
||||
from pydantic import BaseModel, Field
|
||||
|
||||
|
||||
class TaskType(str, Enum):
|
||||
"""Type of task being performed."""
|
||||
|
||||
PLANNING = "planning"
|
||||
IMPLEMENTATION = "implementation"
|
||||
CODE_REVIEW = "code_review"
|
||||
TESTING = "testing"
|
||||
DEBUGGING = "debugging"
|
||||
REFACTORING = "refactoring"
|
||||
DOCUMENTATION = "documentation"
|
||||
CONFIGURATION = "configuration"
|
||||
SECURITY_AUDIT = "security_audit"
|
||||
UNKNOWN = "unknown"
|
||||
|
||||
|
||||
class Complexity(str, Enum):
|
||||
"""Task complexity level."""
|
||||
|
||||
LOW = "low"
|
||||
MEDIUM = "medium"
|
||||
HIGH = "high"
|
||||
CRITICAL = "critical"
|
||||
|
||||
|
||||
class Harness(str, Enum):
|
||||
"""AI coding harness used."""
|
||||
|
||||
CLAUDE_CODE = "claude_code"
|
||||
OPENCODE = "opencode"
|
||||
KILO_CODE = "kilo_code"
|
||||
AIDER = "aider"
|
||||
API_DIRECT = "api_direct"
|
||||
OLLAMA_LOCAL = "ollama_local"
|
||||
CUSTOM = "custom"
|
||||
UNKNOWN = "unknown"
|
||||
|
||||
|
||||
class Provider(str, Enum):
|
||||
"""AI model provider."""
|
||||
|
||||
ANTHROPIC = "anthropic"
|
||||
OPENAI = "openai"
|
||||
OPENROUTER = "openrouter"
|
||||
OLLAMA = "ollama"
|
||||
GOOGLE = "google"
|
||||
MISTRAL = "mistral"
|
||||
CUSTOM = "custom"
|
||||
UNKNOWN = "unknown"
|
||||
|
||||
|
||||
class QualityGate(str, Enum):
|
||||
"""Quality gate type."""
|
||||
|
||||
BUILD = "build"
|
||||
LINT = "lint"
|
||||
TEST = "test"
|
||||
COVERAGE = "coverage"
|
||||
TYPECHECK = "typecheck"
|
||||
SECURITY = "security"
|
||||
|
||||
|
||||
class Outcome(str, Enum):
|
||||
"""Task outcome."""
|
||||
|
||||
SUCCESS = "success"
|
||||
FAILURE = "failure"
|
||||
PARTIAL = "partial"
|
||||
TIMEOUT = "timeout"
|
||||
|
||||
|
||||
class RepoSizeCategory(str, Enum):
|
||||
"""Repository size category."""
|
||||
|
||||
TINY = "tiny"
|
||||
SMALL = "small"
|
||||
MEDIUM = "medium"
|
||||
LARGE = "large"
|
||||
HUGE = "huge"
|
||||
|
||||
|
||||
class TaskCompletionEvent(BaseModel):
|
||||
"""A single task completion telemetry event."""
|
||||
|
||||
instance_id: UUID
|
||||
event_id: UUID = Field(default_factory=uuid4)
|
||||
schema_version: str = "1.0"
|
||||
timestamp: datetime = Field(
|
||||
default_factory=lambda: datetime.now(timezone.utc),
|
||||
)
|
||||
task_duration_ms: int = Field(ge=0, le=86_400_000)
|
||||
task_type: TaskType
|
||||
complexity: Complexity
|
||||
harness: Harness
|
||||
model: str = Field(min_length=1, max_length=100)
|
||||
provider: Provider
|
||||
estimated_input_tokens: int = Field(ge=0, le=10_000_000)
|
||||
estimated_output_tokens: int = Field(ge=0, le=10_000_000)
|
||||
actual_input_tokens: int = Field(ge=0, le=10_000_000)
|
||||
actual_output_tokens: int = Field(ge=0, le=10_000_000)
|
||||
estimated_cost_usd_micros: int = Field(ge=0, le=100_000_000)
|
||||
actual_cost_usd_micros: int = Field(ge=0, le=100_000_000)
|
||||
quality_gate_passed: bool
|
||||
quality_gates_run: list[QualityGate] = Field(default_factory=list)
|
||||
quality_gates_failed: list[QualityGate] = Field(default_factory=list)
|
||||
context_compactions: int = Field(ge=0, le=100)
|
||||
context_rotations: int = Field(ge=0, le=50)
|
||||
context_utilization_final: float = Field(ge=0.0, le=1.0)
|
||||
outcome: Outcome
|
||||
retry_count: int = Field(ge=0, le=20)
|
||||
language: str | None = Field(default=None, max_length=30)
|
||||
repo_size_category: RepoSizeCategory | None = None
|
||||
72
src/mosaicstack_telemetry/types/predictions.py
Normal file
72
src/mosaicstack_telemetry/types/predictions.py
Normal file
@@ -0,0 +1,72 @@
|
||||
"""Prediction request and response types."""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
from datetime import datetime
|
||||
|
||||
from pydantic import BaseModel
|
||||
|
||||
from mosaicstack_telemetry.types.events import Complexity, Provider, TaskType
|
||||
|
||||
|
||||
class TokenDistribution(BaseModel):
|
||||
"""Token usage distribution percentiles."""
|
||||
|
||||
p10: int
|
||||
p25: int
|
||||
median: int
|
||||
p75: int
|
||||
p90: int
|
||||
|
||||
|
||||
class CorrectionFactors(BaseModel):
|
||||
"""Correction factors for estimated vs actual tokens."""
|
||||
|
||||
input: float
|
||||
output: float
|
||||
|
||||
|
||||
class QualityPrediction(BaseModel):
|
||||
"""Quality gate prediction data."""
|
||||
|
||||
gate_pass_rate: float
|
||||
success_rate: float
|
||||
|
||||
|
||||
class PredictionData(BaseModel):
|
||||
"""Full prediction data for a task type/model/provider/complexity combination."""
|
||||
|
||||
input_tokens: TokenDistribution
|
||||
output_tokens: TokenDistribution
|
||||
cost_usd_micros: dict[str, int]
|
||||
duration_ms: dict[str, int]
|
||||
correction_factors: CorrectionFactors
|
||||
quality: QualityPrediction
|
||||
|
||||
|
||||
class PredictionMetadata(BaseModel):
|
||||
"""Metadata about how a prediction was generated."""
|
||||
|
||||
sample_size: int
|
||||
fallback_level: int
|
||||
confidence: str # "none", "low", "medium", "high"
|
||||
last_updated: datetime | None = None
|
||||
dimensions_matched: dict[str, str | None] | None = None
|
||||
fallback_note: str | None = None
|
||||
cache_hit: bool = False
|
||||
|
||||
|
||||
class PredictionResponse(BaseModel):
|
||||
"""Response from the prediction endpoint."""
|
||||
|
||||
prediction: PredictionData | None = None
|
||||
metadata: PredictionMetadata
|
||||
|
||||
|
||||
class PredictionQuery(BaseModel):
|
||||
"""Query parameters for a prediction request."""
|
||||
|
||||
task_type: TaskType
|
||||
model: str
|
||||
provider: Provider
|
||||
complexity: Complexity
|
||||
0
tests/__init__.py
Normal file
0
tests/__init__.py
Normal file
183
tests/conftest.py
Normal file
183
tests/conftest.py
Normal file
@@ -0,0 +1,183 @@
|
||||
"""Shared test fixtures."""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
from datetime import datetime, timezone
|
||||
from uuid import UUID, uuid4
|
||||
|
||||
import pytest
|
||||
|
||||
from mosaicstack_telemetry.config import TelemetryConfig
|
||||
from mosaicstack_telemetry.types.events import (
|
||||
Complexity,
|
||||
Harness,
|
||||
Outcome,
|
||||
Provider,
|
||||
TaskCompletionEvent,
|
||||
TaskType,
|
||||
)
|
||||
from mosaicstack_telemetry.types.predictions import (
|
||||
CorrectionFactors,
|
||||
PredictionData,
|
||||
PredictionMetadata,
|
||||
PredictionQuery,
|
||||
PredictionResponse,
|
||||
QualityPrediction,
|
||||
TokenDistribution,
|
||||
)
|
||||
|
||||
TEST_API_KEY = "a" * 64
|
||||
TEST_INSTANCE_ID = "12345678-1234-1234-1234-123456789abc"
|
||||
TEST_SERVER_URL = "https://telemetry.example.com"
|
||||
|
||||
|
||||
@pytest.fixture()
|
||||
def config() -> TelemetryConfig:
|
||||
"""Create a valid test configuration."""
|
||||
return TelemetryConfig(
|
||||
server_url=TEST_SERVER_URL,
|
||||
api_key=TEST_API_KEY,
|
||||
instance_id=TEST_INSTANCE_ID,
|
||||
submit_interval_seconds=1.0,
|
||||
max_queue_size=100,
|
||||
batch_size=10,
|
||||
request_timeout_seconds=5.0,
|
||||
max_retries=1,
|
||||
)
|
||||
|
||||
|
||||
@pytest.fixture()
|
||||
def dry_run_config() -> TelemetryConfig:
|
||||
"""Create a test configuration with dry_run enabled."""
|
||||
return TelemetryConfig(
|
||||
server_url=TEST_SERVER_URL,
|
||||
api_key=TEST_API_KEY,
|
||||
instance_id=TEST_INSTANCE_ID,
|
||||
submit_interval_seconds=1.0,
|
||||
max_queue_size=100,
|
||||
batch_size=10,
|
||||
request_timeout_seconds=5.0,
|
||||
dry_run=True,
|
||||
max_retries=1,
|
||||
)
|
||||
|
||||
|
||||
@pytest.fixture()
|
||||
def disabled_config() -> TelemetryConfig:
|
||||
"""Create a disabled test configuration."""
|
||||
return TelemetryConfig(
|
||||
server_url=TEST_SERVER_URL,
|
||||
api_key=TEST_API_KEY,
|
||||
instance_id=TEST_INSTANCE_ID,
|
||||
enabled=False,
|
||||
)
|
||||
|
||||
|
||||
@pytest.fixture()
|
||||
def sample_instance_id() -> UUID:
|
||||
"""Return a fixed instance UUID for testing."""
|
||||
return UUID(TEST_INSTANCE_ID)
|
||||
|
||||
|
||||
@pytest.fixture()
|
||||
def sample_event(sample_instance_id: UUID) -> TaskCompletionEvent:
|
||||
"""Create a sample task completion event."""
|
||||
return TaskCompletionEvent(
|
||||
instance_id=sample_instance_id,
|
||||
event_id=uuid4(),
|
||||
timestamp=datetime.now(timezone.utc),
|
||||
task_duration_ms=30000,
|
||||
task_type=TaskType.IMPLEMENTATION,
|
||||
complexity=Complexity.MEDIUM,
|
||||
harness=Harness.CLAUDE_CODE,
|
||||
model="claude-sonnet-4-20250514",
|
||||
provider=Provider.ANTHROPIC,
|
||||
estimated_input_tokens=5000,
|
||||
estimated_output_tokens=2000,
|
||||
actual_input_tokens=5200,
|
||||
actual_output_tokens=1800,
|
||||
estimated_cost_usd_micros=10000,
|
||||
actual_cost_usd_micros=9500,
|
||||
quality_gate_passed=True,
|
||||
quality_gates_run=[],
|
||||
quality_gates_failed=[],
|
||||
context_compactions=0,
|
||||
context_rotations=0,
|
||||
context_utilization_final=0.4,
|
||||
outcome=Outcome.SUCCESS,
|
||||
retry_count=0,
|
||||
language="python",
|
||||
repo_size_category=None,
|
||||
)
|
||||
|
||||
|
||||
def make_event(instance_id: UUID | None = None) -> TaskCompletionEvent:
|
||||
"""Factory helper to create a sample event with optional overrides."""
|
||||
return TaskCompletionEvent(
|
||||
instance_id=instance_id or UUID(TEST_INSTANCE_ID),
|
||||
event_id=uuid4(),
|
||||
timestamp=datetime.now(timezone.utc),
|
||||
task_duration_ms=15000,
|
||||
task_type=TaskType.DEBUGGING,
|
||||
complexity=Complexity.LOW,
|
||||
harness=Harness.AIDER,
|
||||
model="gpt-4o",
|
||||
provider=Provider.OPENAI,
|
||||
estimated_input_tokens=1000,
|
||||
estimated_output_tokens=500,
|
||||
actual_input_tokens=1100,
|
||||
actual_output_tokens=480,
|
||||
estimated_cost_usd_micros=3000,
|
||||
actual_cost_usd_micros=2800,
|
||||
quality_gate_passed=True,
|
||||
quality_gates_run=[],
|
||||
quality_gates_failed=[],
|
||||
context_compactions=0,
|
||||
context_rotations=0,
|
||||
context_utilization_final=0.2,
|
||||
outcome=Outcome.SUCCESS,
|
||||
retry_count=0,
|
||||
)
|
||||
|
||||
|
||||
@pytest.fixture()
|
||||
def sample_prediction_query() -> PredictionQuery:
|
||||
"""Create a sample prediction query."""
|
||||
return PredictionQuery(
|
||||
task_type=TaskType.IMPLEMENTATION,
|
||||
model="claude-sonnet-4-20250514",
|
||||
provider=Provider.ANTHROPIC,
|
||||
complexity=Complexity.MEDIUM,
|
||||
)
|
||||
|
||||
|
||||
@pytest.fixture()
|
||||
def sample_prediction_response() -> PredictionResponse:
|
||||
"""Create a sample prediction response."""
|
||||
return PredictionResponse(
|
||||
prediction=PredictionData(
|
||||
input_tokens=TokenDistribution(p10=1000, p25=2000, median=3000, p75=4000, p90=5000),
|
||||
output_tokens=TokenDistribution(p10=500, p25=1000, median=1500, p75=2000, p90=2500),
|
||||
cost_usd_micros={"p10": 1000, "median": 3000, "p90": 5000},
|
||||
duration_ms={"p10": 10000, "median": 30000, "p90": 60000},
|
||||
correction_factors=CorrectionFactors(input=1.05, output=0.95),
|
||||
quality=QualityPrediction(gate_pass_rate=0.85, success_rate=0.9),
|
||||
),
|
||||
metadata=PredictionMetadata(
|
||||
sample_size=150,
|
||||
fallback_level=0,
|
||||
confidence="high",
|
||||
),
|
||||
)
|
||||
|
||||
|
||||
def make_batch_response_json(events: list[TaskCompletionEvent]) -> dict:
|
||||
"""Create a batch response JSON dict for a list of events (all accepted)."""
|
||||
return {
|
||||
"accepted": len(events),
|
||||
"rejected": 0,
|
||||
"results": [
|
||||
{"event_id": str(e.event_id), "status": "accepted", "error": None}
|
||||
for e in events
|
||||
],
|
||||
}
|
||||
321
tests/test_client.py
Normal file
321
tests/test_client.py
Normal file
@@ -0,0 +1,321 @@
|
||||
"""Tests for TelemetryClient."""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import httpx
|
||||
import pytest
|
||||
import respx
|
||||
|
||||
from mosaicstack_telemetry.client import TelemetryClient
|
||||
from mosaicstack_telemetry.config import TelemetryConfig
|
||||
from mosaicstack_telemetry.types.events import (
|
||||
Complexity,
|
||||
Outcome,
|
||||
Provider,
|
||||
TaskCompletionEvent,
|
||||
TaskType,
|
||||
)
|
||||
from mosaicstack_telemetry.types.predictions import (
|
||||
PredictionMetadata,
|
||||
PredictionQuery,
|
||||
PredictionResponse,
|
||||
)
|
||||
from tests.conftest import (
|
||||
TEST_API_KEY,
|
||||
TEST_INSTANCE_ID,
|
||||
TEST_SERVER_URL,
|
||||
make_batch_response_json,
|
||||
make_event,
|
||||
)
|
||||
|
||||
|
||||
class TestTelemetryClientLifecycle:
|
||||
"""Tests for client start/stop lifecycle."""
|
||||
|
||||
def test_start_stop_sync(self, config: TelemetryConfig) -> None:
|
||||
"""Client can start and stop synchronously."""
|
||||
client = TelemetryClient(config)
|
||||
client.start()
|
||||
assert client.is_running is True
|
||||
client.stop()
|
||||
assert client.is_running is False
|
||||
|
||||
async def test_start_stop_async(self, config: TelemetryConfig) -> None:
|
||||
"""Client can start and stop asynchronously."""
|
||||
client = TelemetryClient(config)
|
||||
await client.start_async()
|
||||
assert client.is_running is True
|
||||
await client.stop_async()
|
||||
assert client.is_running is False
|
||||
|
||||
def test_start_disabled(self, disabled_config: TelemetryConfig) -> None:
|
||||
"""Starting a disabled client is a no-op."""
|
||||
client = TelemetryClient(disabled_config)
|
||||
client.start()
|
||||
assert client.is_running is False
|
||||
client.stop()
|
||||
|
||||
async def test_start_async_disabled(self, disabled_config: TelemetryConfig) -> None:
|
||||
"""Starting a disabled async client is a no-op."""
|
||||
client = TelemetryClient(disabled_config)
|
||||
await client.start_async()
|
||||
assert client.is_running is False
|
||||
await client.stop_async()
|
||||
|
||||
|
||||
class TestTelemetryClientTrack:
|
||||
"""Tests for the track() method."""
|
||||
|
||||
def test_track_queues_event(self, config: TelemetryConfig) -> None:
|
||||
"""track() adds event to the queue."""
|
||||
client = TelemetryClient(config)
|
||||
event = make_event()
|
||||
client.track(event)
|
||||
assert client.queue_size == 1
|
||||
|
||||
def test_track_multiple_events(self, config: TelemetryConfig) -> None:
|
||||
"""Multiple events can be tracked."""
|
||||
client = TelemetryClient(config)
|
||||
for _ in range(5):
|
||||
client.track(make_event())
|
||||
assert client.queue_size == 5
|
||||
|
||||
def test_track_disabled_drops_event(self, disabled_config: TelemetryConfig) -> None:
|
||||
"""track() silently drops events when disabled."""
|
||||
client = TelemetryClient(disabled_config)
|
||||
client.track(make_event())
|
||||
assert client.queue_size == 0
|
||||
|
||||
def test_track_never_throws(self, config: TelemetryConfig) -> None:
|
||||
"""track() should never raise exceptions."""
|
||||
client = TelemetryClient(config)
|
||||
# This should not raise even with invalid-ish usage
|
||||
event = make_event()
|
||||
client.track(event)
|
||||
assert client.queue_size == 1
|
||||
|
||||
|
||||
class TestTelemetryClientContextManager:
|
||||
"""Tests for context manager support."""
|
||||
|
||||
@respx.mock
|
||||
def test_sync_context_manager(self, config: TelemetryConfig) -> None:
|
||||
"""Sync context manager starts and stops correctly."""
|
||||
# Mock any potential flush calls
|
||||
respx.post(f"{TEST_SERVER_URL}/v1/events/batch").mock(
|
||||
return_value=httpx.Response(
|
||||
202,
|
||||
json={"accepted": 0, "rejected": 0, "results": []},
|
||||
)
|
||||
)
|
||||
|
||||
with TelemetryClient(config) as client:
|
||||
assert client.is_running is True
|
||||
client.track(make_event())
|
||||
|
||||
assert client.is_running is False
|
||||
|
||||
@respx.mock
|
||||
async def test_async_context_manager(self, config: TelemetryConfig) -> None:
|
||||
"""Async context manager starts and stops correctly."""
|
||||
respx.post(f"{TEST_SERVER_URL}/v1/events/batch").mock(
|
||||
return_value=httpx.Response(
|
||||
202,
|
||||
json={"accepted": 0, "rejected": 0, "results": []},
|
||||
)
|
||||
)
|
||||
|
||||
async with TelemetryClient(config) as client:
|
||||
assert client.is_running is True
|
||||
client.track(make_event())
|
||||
|
||||
assert client.is_running is False
|
||||
|
||||
|
||||
class TestTelemetryClientPredictions:
|
||||
"""Tests for prediction caching and retrieval."""
|
||||
|
||||
def test_get_prediction_miss(self, config: TelemetryConfig) -> None:
|
||||
"""get_prediction returns None on cache miss."""
|
||||
client = TelemetryClient(config)
|
||||
query = PredictionQuery(
|
||||
task_type=TaskType.IMPLEMENTATION,
|
||||
model="test-model",
|
||||
provider=Provider.ANTHROPIC,
|
||||
complexity=Complexity.MEDIUM,
|
||||
)
|
||||
assert client.get_prediction(query) is None
|
||||
|
||||
def test_get_prediction_after_cache_populated(
|
||||
self, config: TelemetryConfig
|
||||
) -> None:
|
||||
"""get_prediction returns cached value."""
|
||||
client = TelemetryClient(config)
|
||||
query = PredictionQuery(
|
||||
task_type=TaskType.IMPLEMENTATION,
|
||||
model="test-model",
|
||||
provider=Provider.ANTHROPIC,
|
||||
complexity=Complexity.MEDIUM,
|
||||
)
|
||||
response = PredictionResponse(
|
||||
prediction=None,
|
||||
metadata=PredictionMetadata(
|
||||
sample_size=50,
|
||||
fallback_level=0,
|
||||
confidence="medium",
|
||||
),
|
||||
)
|
||||
# Directly populate the cache
|
||||
client._prediction_cache.put(query, response)
|
||||
|
||||
result = client.get_prediction(query)
|
||||
assert result is not None
|
||||
assert result.metadata.sample_size == 50
|
||||
|
||||
@respx.mock
|
||||
async def test_refresh_predictions_async(self, config: TelemetryConfig) -> None:
|
||||
"""refresh_predictions fetches and caches predictions."""
|
||||
query = PredictionQuery(
|
||||
task_type=TaskType.IMPLEMENTATION,
|
||||
model="test-model",
|
||||
provider=Provider.ANTHROPIC,
|
||||
complexity=Complexity.MEDIUM,
|
||||
)
|
||||
|
||||
response_data = {
|
||||
"results": [
|
||||
{
|
||||
"prediction": None,
|
||||
"metadata": {
|
||||
"sample_size": 75,
|
||||
"fallback_level": 1,
|
||||
"confidence": "medium",
|
||||
},
|
||||
}
|
||||
]
|
||||
}
|
||||
|
||||
respx.post(f"{TEST_SERVER_URL}/v1/predictions/batch").mock(
|
||||
return_value=httpx.Response(200, json=response_data)
|
||||
)
|
||||
|
||||
client = TelemetryClient(config)
|
||||
await client.refresh_predictions([query])
|
||||
|
||||
result = client.get_prediction(query)
|
||||
assert result is not None
|
||||
assert result.metadata.sample_size == 75
|
||||
|
||||
@respx.mock
|
||||
def test_refresh_predictions_sync(self, config: TelemetryConfig) -> None:
|
||||
"""refresh_predictions_sync fetches and caches predictions."""
|
||||
query = PredictionQuery(
|
||||
task_type=TaskType.IMPLEMENTATION,
|
||||
model="test-model",
|
||||
provider=Provider.ANTHROPIC,
|
||||
complexity=Complexity.MEDIUM,
|
||||
)
|
||||
|
||||
response_data = {
|
||||
"results": [
|
||||
{
|
||||
"prediction": None,
|
||||
"metadata": {
|
||||
"sample_size": 60,
|
||||
"fallback_level": 0,
|
||||
"confidence": "low",
|
||||
},
|
||||
}
|
||||
]
|
||||
}
|
||||
|
||||
respx.post(f"{TEST_SERVER_URL}/v1/predictions/batch").mock(
|
||||
return_value=httpx.Response(200, json=response_data)
|
||||
)
|
||||
|
||||
client = TelemetryClient(config)
|
||||
client.refresh_predictions_sync([query])
|
||||
|
||||
result = client.get_prediction(query)
|
||||
assert result is not None
|
||||
assert result.metadata.sample_size == 60
|
||||
|
||||
@respx.mock
|
||||
async def test_refresh_predictions_server_error(
|
||||
self, config: TelemetryConfig
|
||||
) -> None:
|
||||
"""refresh_predictions handles server errors gracefully."""
|
||||
query = PredictionQuery(
|
||||
task_type=TaskType.IMPLEMENTATION,
|
||||
model="test-model",
|
||||
provider=Provider.ANTHROPIC,
|
||||
complexity=Complexity.MEDIUM,
|
||||
)
|
||||
|
||||
respx.post(f"{TEST_SERVER_URL}/v1/predictions/batch").mock(
|
||||
return_value=httpx.Response(500, text="Internal Server Error")
|
||||
)
|
||||
|
||||
client = TelemetryClient(config)
|
||||
# Should not raise
|
||||
await client.refresh_predictions([query])
|
||||
# Cache should still be empty
|
||||
assert client.get_prediction(query) is None
|
||||
|
||||
async def test_refresh_predictions_empty_list(
|
||||
self, config: TelemetryConfig
|
||||
) -> None:
|
||||
"""refresh_predictions with empty list is a no-op."""
|
||||
client = TelemetryClient(config)
|
||||
await client.refresh_predictions([])
|
||||
|
||||
|
||||
class TestTelemetryClientConfig:
|
||||
"""Tests for configuration handling."""
|
||||
|
||||
def test_config_env_vars(self, monkeypatch: pytest.MonkeyPatch) -> None:
|
||||
"""Environment variables override defaults."""
|
||||
monkeypatch.setenv("MOSAIC_TELEMETRY_ENABLED", "false")
|
||||
monkeypatch.setenv("MOSAIC_TELEMETRY_SERVER_URL", "https://env-server.com")
|
||||
monkeypatch.setenv("MOSAIC_TELEMETRY_API_KEY", "b" * 64)
|
||||
monkeypatch.setenv("MOSAIC_TELEMETRY_INSTANCE_ID", TEST_INSTANCE_ID)
|
||||
|
||||
config = TelemetryConfig()
|
||||
assert config.enabled is False
|
||||
assert config.server_url == "https://env-server.com"
|
||||
assert config.api_key == "b" * 64
|
||||
assert config.instance_id == TEST_INSTANCE_ID
|
||||
|
||||
def test_config_validation_errors(self) -> None:
|
||||
"""Invalid config produces validation errors."""
|
||||
config = TelemetryConfig(
|
||||
server_url="",
|
||||
api_key="short",
|
||||
instance_id="not-a-uuid",
|
||||
)
|
||||
errors = config.validate()
|
||||
assert len(errors) >= 3
|
||||
|
||||
def test_config_validation_success(self, config: TelemetryConfig) -> None:
|
||||
"""Valid config produces no validation errors."""
|
||||
errors = config.validate()
|
||||
assert errors == []
|
||||
|
||||
def test_config_strips_trailing_slash(self) -> None:
|
||||
"""server_url trailing slashes are stripped."""
|
||||
config = TelemetryConfig(
|
||||
server_url="https://example.com/",
|
||||
api_key=TEST_API_KEY,
|
||||
instance_id=TEST_INSTANCE_ID,
|
||||
)
|
||||
assert config.server_url == "https://example.com"
|
||||
|
||||
def test_explicit_values_override_env(self, monkeypatch: pytest.MonkeyPatch) -> None:
|
||||
"""Explicit constructor values take priority over env vars."""
|
||||
monkeypatch.setenv("MOSAIC_TELEMETRY_SERVER_URL", "https://env-server.com")
|
||||
config = TelemetryConfig(
|
||||
server_url="https://explicit-server.com",
|
||||
api_key=TEST_API_KEY,
|
||||
instance_id=TEST_INSTANCE_ID,
|
||||
)
|
||||
assert config.server_url == "https://explicit-server.com"
|
||||
137
tests/test_event_builder.py
Normal file
137
tests/test_event_builder.py
Normal file
@@ -0,0 +1,137 @@
|
||||
"""Tests for EventBuilder."""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
from datetime import datetime, timezone
|
||||
from uuid import UUID
|
||||
|
||||
from mosaicstack_telemetry.event_builder import EventBuilder
|
||||
from mosaicstack_telemetry.types.events import (
|
||||
Complexity,
|
||||
Harness,
|
||||
Outcome,
|
||||
Provider,
|
||||
QualityGate,
|
||||
RepoSizeCategory,
|
||||
TaskType,
|
||||
)
|
||||
|
||||
TEST_INSTANCE_ID = "12345678-1234-1234-1234-123456789abc"
|
||||
|
||||
|
||||
class TestEventBuilder:
|
||||
"""Tests for the fluent event builder."""
|
||||
|
||||
def test_build_complete_event(self) -> None:
|
||||
"""Build an event with all fields set."""
|
||||
event = (
|
||||
EventBuilder(instance_id=TEST_INSTANCE_ID)
|
||||
.task_type(TaskType.IMPLEMENTATION)
|
||||
.model("claude-sonnet-4-20250514")
|
||||
.provider(Provider.ANTHROPIC)
|
||||
.harness_type(Harness.CLAUDE_CODE)
|
||||
.complexity_level(Complexity.HIGH)
|
||||
.outcome_value(Outcome.SUCCESS)
|
||||
.duration_ms(45000)
|
||||
.tokens(estimated_in=5000, estimated_out=2000, actual_in=5200, actual_out=1800)
|
||||
.cost(estimated=50000, actual=48000)
|
||||
.quality(
|
||||
passed=True,
|
||||
gates_run=[QualityGate.LINT, QualityGate.TEST],
|
||||
gates_failed=[],
|
||||
)
|
||||
.context(compactions=1, rotations=0, utilization=0.4)
|
||||
.retry_count(0)
|
||||
.language("python")
|
||||
.repo_size(RepoSizeCategory.MEDIUM)
|
||||
.build()
|
||||
)
|
||||
|
||||
assert event.instance_id == UUID(TEST_INSTANCE_ID)
|
||||
assert event.task_type == TaskType.IMPLEMENTATION
|
||||
assert event.model == "claude-sonnet-4-20250514"
|
||||
assert event.provider == Provider.ANTHROPIC
|
||||
assert event.harness == Harness.CLAUDE_CODE
|
||||
assert event.complexity == Complexity.HIGH
|
||||
assert event.outcome == Outcome.SUCCESS
|
||||
assert event.task_duration_ms == 45000
|
||||
assert event.estimated_input_tokens == 5000
|
||||
assert event.estimated_output_tokens == 2000
|
||||
assert event.actual_input_tokens == 5200
|
||||
assert event.actual_output_tokens == 1800
|
||||
assert event.estimated_cost_usd_micros == 50000
|
||||
assert event.actual_cost_usd_micros == 48000
|
||||
assert event.quality_gate_passed is True
|
||||
assert event.quality_gates_run == [QualityGate.LINT, QualityGate.TEST]
|
||||
assert event.quality_gates_failed == []
|
||||
assert event.context_compactions == 1
|
||||
assert event.context_rotations == 0
|
||||
assert event.context_utilization_final == 0.4
|
||||
assert event.retry_count == 0
|
||||
assert event.language == "python"
|
||||
assert event.repo_size_category == RepoSizeCategory.MEDIUM
|
||||
|
||||
def test_auto_generated_defaults(self) -> None:
|
||||
"""event_id and timestamp are auto-generated."""
|
||||
event = (
|
||||
EventBuilder(instance_id=TEST_INSTANCE_ID)
|
||||
.task_type(TaskType.DEBUGGING)
|
||||
.model("gpt-4o")
|
||||
.provider(Provider.OPENAI)
|
||||
.build()
|
||||
)
|
||||
|
||||
assert event.event_id is not None
|
||||
assert event.timestamp is not None
|
||||
assert event.timestamp.tzinfo is not None
|
||||
|
||||
def test_custom_event_id(self) -> None:
|
||||
"""Custom event_id can be set."""
|
||||
custom_id = "abcdef12-1234-1234-1234-123456789abc"
|
||||
event = (
|
||||
EventBuilder(instance_id=TEST_INSTANCE_ID)
|
||||
.event_id(custom_id)
|
||||
.model("test-model")
|
||||
.build()
|
||||
)
|
||||
|
||||
assert event.event_id == UUID(custom_id)
|
||||
|
||||
def test_custom_timestamp(self) -> None:
|
||||
"""Custom timestamp can be set."""
|
||||
ts = datetime(2026, 1, 15, 12, 0, 0, tzinfo=timezone.utc)
|
||||
event = (
|
||||
EventBuilder(instance_id=TEST_INSTANCE_ID)
|
||||
.timestamp(ts)
|
||||
.model("test-model")
|
||||
.build()
|
||||
)
|
||||
|
||||
assert event.timestamp == ts
|
||||
|
||||
def test_minimal_event_defaults(self) -> None:
|
||||
"""Minimal event has sensible defaults."""
|
||||
event = EventBuilder(instance_id=TEST_INSTANCE_ID).model("test-model").build()
|
||||
|
||||
assert event.task_type == TaskType.UNKNOWN
|
||||
assert event.complexity == Complexity.MEDIUM
|
||||
assert event.harness == Harness.UNKNOWN
|
||||
assert event.provider == Provider.UNKNOWN
|
||||
assert event.outcome == Outcome.FAILURE
|
||||
assert event.task_duration_ms == 0
|
||||
assert event.retry_count == 0
|
||||
assert event.language is None
|
||||
assert event.repo_size_category is None
|
||||
|
||||
def test_quality_defaults_to_empty_lists(self) -> None:
|
||||
"""Quality gate lists default to empty."""
|
||||
event = EventBuilder(instance_id=TEST_INSTANCE_ID).model("m").build()
|
||||
|
||||
assert event.quality_gates_run == []
|
||||
assert event.quality_gates_failed == []
|
||||
assert event.quality_gate_passed is False
|
||||
|
||||
def test_schema_version(self) -> None:
|
||||
"""Schema version defaults to 1.0."""
|
||||
event = EventBuilder(instance_id=TEST_INSTANCE_ID).model("m").build()
|
||||
assert event.schema_version == "1.0"
|
||||
147
tests/test_prediction_cache.py
Normal file
147
tests/test_prediction_cache.py
Normal file
@@ -0,0 +1,147 @@
|
||||
"""Tests for PredictionCache."""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import threading
|
||||
import time
|
||||
|
||||
from mosaicstack_telemetry.prediction_cache import PredictionCache
|
||||
from mosaicstack_telemetry.types.events import Complexity, Provider, TaskType
|
||||
from mosaicstack_telemetry.types.predictions import (
|
||||
PredictionMetadata,
|
||||
PredictionQuery,
|
||||
PredictionResponse,
|
||||
)
|
||||
|
||||
|
||||
def _make_query(
|
||||
task_type: TaskType = TaskType.IMPLEMENTATION,
|
||||
model: str = "claude-sonnet-4-20250514",
|
||||
) -> PredictionQuery:
|
||||
return PredictionQuery(
|
||||
task_type=task_type,
|
||||
model=model,
|
||||
provider=Provider.ANTHROPIC,
|
||||
complexity=Complexity.MEDIUM,
|
||||
)
|
||||
|
||||
|
||||
def _make_response(sample_size: int = 100) -> PredictionResponse:
|
||||
return PredictionResponse(
|
||||
prediction=None,
|
||||
metadata=PredictionMetadata(
|
||||
sample_size=sample_size,
|
||||
fallback_level=0,
|
||||
confidence="high",
|
||||
),
|
||||
)
|
||||
|
||||
|
||||
class TestPredictionCache:
|
||||
"""Tests for the TTL-based prediction cache."""
|
||||
|
||||
def test_cache_hit(self) -> None:
|
||||
"""Cached predictions are returned on hit."""
|
||||
cache = PredictionCache(ttl_seconds=60.0)
|
||||
query = _make_query()
|
||||
response = _make_response()
|
||||
|
||||
cache.put(query, response)
|
||||
result = cache.get(query)
|
||||
|
||||
assert result is not None
|
||||
assert result.metadata.sample_size == 100
|
||||
|
||||
def test_cache_miss(self) -> None:
|
||||
"""Missing keys return None."""
|
||||
cache = PredictionCache(ttl_seconds=60.0)
|
||||
query = _make_query()
|
||||
|
||||
result = cache.get(query)
|
||||
assert result is None
|
||||
|
||||
def test_cache_expiry(self) -> None:
|
||||
"""Expired entries return None."""
|
||||
cache = PredictionCache(ttl_seconds=0.05)
|
||||
query = _make_query()
|
||||
response = _make_response()
|
||||
|
||||
cache.put(query, response)
|
||||
time.sleep(0.1)
|
||||
result = cache.get(query)
|
||||
|
||||
assert result is None
|
||||
|
||||
def test_different_queries_different_keys(self) -> None:
|
||||
"""Different queries map to different cache entries."""
|
||||
cache = PredictionCache(ttl_seconds=60.0)
|
||||
query1 = _make_query(task_type=TaskType.IMPLEMENTATION)
|
||||
query2 = _make_query(task_type=TaskType.DEBUGGING)
|
||||
|
||||
cache.put(query1, _make_response(sample_size=100))
|
||||
cache.put(query2, _make_response(sample_size=200))
|
||||
|
||||
result1 = cache.get(query1)
|
||||
result2 = cache.get(query2)
|
||||
|
||||
assert result1 is not None
|
||||
assert result2 is not None
|
||||
assert result1.metadata.sample_size == 100
|
||||
assert result2.metadata.sample_size == 200
|
||||
|
||||
def test_cache_clear(self) -> None:
|
||||
"""Clear removes all entries."""
|
||||
cache = PredictionCache(ttl_seconds=60.0)
|
||||
query = _make_query()
|
||||
cache.put(query, _make_response())
|
||||
|
||||
assert cache.size == 1
|
||||
cache.clear()
|
||||
assert cache.size == 0
|
||||
assert cache.get(query) is None
|
||||
|
||||
def test_cache_overwrite(self) -> None:
|
||||
"""Putting a new value for the same key overwrites."""
|
||||
cache = PredictionCache(ttl_seconds=60.0)
|
||||
query = _make_query()
|
||||
|
||||
cache.put(query, _make_response(sample_size=100))
|
||||
cache.put(query, _make_response(sample_size=200))
|
||||
|
||||
result = cache.get(query)
|
||||
assert result is not None
|
||||
assert result.metadata.sample_size == 200
|
||||
|
||||
def test_thread_safety(self) -> None:
|
||||
"""Cache handles concurrent access from multiple threads."""
|
||||
cache = PredictionCache(ttl_seconds=60.0)
|
||||
errors: list[Exception] = []
|
||||
iterations = 100
|
||||
|
||||
def writer(thread_id: int) -> None:
|
||||
try:
|
||||
for i in range(iterations):
|
||||
query = _make_query(model=f"model-{thread_id}-{i}")
|
||||
cache.put(query, _make_response(sample_size=i))
|
||||
except Exception as e:
|
||||
errors.append(e)
|
||||
|
||||
def reader(thread_id: int) -> None:
|
||||
try:
|
||||
for i in range(iterations):
|
||||
query = _make_query(model=f"model-{thread_id}-{i}")
|
||||
cache.get(query) # May or may not hit
|
||||
except Exception as e:
|
||||
errors.append(e)
|
||||
|
||||
threads: list[threading.Thread] = []
|
||||
for tid in range(4):
|
||||
threads.append(threading.Thread(target=writer, args=(tid,)))
|
||||
threads.append(threading.Thread(target=reader, args=(tid,)))
|
||||
|
||||
for t in threads:
|
||||
t.start()
|
||||
for t in threads:
|
||||
t.join(timeout=5)
|
||||
|
||||
assert not errors, f"Thread errors: {errors}"
|
||||
133
tests/test_queue.py
Normal file
133
tests/test_queue.py
Normal file
@@ -0,0 +1,133 @@
|
||||
"""Tests for EventQueue."""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import threading
|
||||
|
||||
from mosaicstack_telemetry.queue import EventQueue
|
||||
from tests.conftest import make_event
|
||||
|
||||
|
||||
class TestEventQueue:
|
||||
"""Tests for the bounded thread-safe event queue."""
|
||||
|
||||
def test_put_and_drain(self) -> None:
|
||||
"""Events can be put in and drained out in FIFO order."""
|
||||
queue = EventQueue(max_size=10)
|
||||
e1 = make_event()
|
||||
e2 = make_event()
|
||||
queue.put(e1)
|
||||
queue.put(e2)
|
||||
|
||||
drained = queue.drain(10)
|
||||
assert len(drained) == 2
|
||||
assert drained[0].event_id == e1.event_id
|
||||
assert drained[1].event_id == e2.event_id
|
||||
|
||||
def test_drain_max_items(self) -> None:
|
||||
"""Drain respects the max_items limit."""
|
||||
queue = EventQueue(max_size=10)
|
||||
for _ in range(5):
|
||||
queue.put(make_event())
|
||||
|
||||
drained = queue.drain(3)
|
||||
assert len(drained) == 3
|
||||
assert queue.size == 2
|
||||
|
||||
def test_drain_empty_queue(self) -> None:
|
||||
"""Draining an empty queue returns empty list."""
|
||||
queue = EventQueue(max_size=10)
|
||||
drained = queue.drain(5)
|
||||
assert drained == []
|
||||
|
||||
def test_bounded_fifo_eviction(self) -> None:
|
||||
"""When queue is full, oldest events are evicted."""
|
||||
queue = EventQueue(max_size=3)
|
||||
events = [make_event() for _ in range(5)]
|
||||
for e in events:
|
||||
queue.put(e)
|
||||
|
||||
assert queue.size == 3
|
||||
drained = queue.drain(3)
|
||||
# Should have the last 3 events (oldest 2 were evicted)
|
||||
assert drained[0].event_id == events[2].event_id
|
||||
assert drained[1].event_id == events[3].event_id
|
||||
assert drained[2].event_id == events[4].event_id
|
||||
|
||||
def test_size_property(self) -> None:
|
||||
"""Size property reflects current queue length."""
|
||||
queue = EventQueue(max_size=10)
|
||||
assert queue.size == 0
|
||||
queue.put(make_event())
|
||||
assert queue.size == 1
|
||||
queue.put(make_event())
|
||||
assert queue.size == 2
|
||||
queue.drain(1)
|
||||
assert queue.size == 1
|
||||
|
||||
def test_is_empty_property(self) -> None:
|
||||
"""is_empty property works correctly."""
|
||||
queue = EventQueue(max_size=10)
|
||||
assert queue.is_empty is True
|
||||
queue.put(make_event())
|
||||
assert queue.is_empty is False
|
||||
queue.drain(1)
|
||||
assert queue.is_empty is True
|
||||
|
||||
def test_put_back(self) -> None:
|
||||
"""put_back re-adds events to the front of the queue."""
|
||||
queue = EventQueue(max_size=10)
|
||||
e1 = make_event()
|
||||
e2 = make_event()
|
||||
queue.put(e1)
|
||||
|
||||
queue.put_back([e2])
|
||||
drained = queue.drain(2)
|
||||
# e2 should be first (put_back adds to front)
|
||||
assert drained[0].event_id == e2.event_id
|
||||
assert drained[1].event_id == e1.event_id
|
||||
|
||||
def test_put_back_respects_max_size(self) -> None:
|
||||
"""put_back doesn't exceed max_size."""
|
||||
queue = EventQueue(max_size=3)
|
||||
for _ in range(3):
|
||||
queue.put(make_event())
|
||||
|
||||
events_to_add = [make_event() for _ in range(5)]
|
||||
queue.put_back(events_to_add)
|
||||
assert queue.size == 3
|
||||
|
||||
def test_thread_safety_concurrent_put_drain(self) -> None:
|
||||
"""Queue handles concurrent put and drain operations."""
|
||||
queue = EventQueue(max_size=1000)
|
||||
total_puts = 500
|
||||
errors: list[Exception] = []
|
||||
|
||||
def put_events() -> None:
|
||||
try:
|
||||
for _ in range(total_puts):
|
||||
queue.put(make_event())
|
||||
except Exception as e:
|
||||
errors.append(e)
|
||||
|
||||
def drain_events() -> None:
|
||||
try:
|
||||
drained_count = 0
|
||||
while drained_count < total_puts:
|
||||
batch = queue.drain(10)
|
||||
drained_count += len(batch)
|
||||
if not batch:
|
||||
threading.Event().wait(0.001)
|
||||
except Exception as e:
|
||||
errors.append(e)
|
||||
|
||||
put_thread = threading.Thread(target=put_events)
|
||||
drain_thread = threading.Thread(target=drain_events)
|
||||
|
||||
put_thread.start()
|
||||
drain_thread.start()
|
||||
|
||||
put_thread.join(timeout=5)
|
||||
drain_thread.join(timeout=5)
|
||||
|
||||
assert not errors, f"Thread errors: {errors}"
|
||||
207
tests/test_submitter.py
Normal file
207
tests/test_submitter.py
Normal file
@@ -0,0 +1,207 @@
|
||||
"""Tests for batch submission logic."""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import httpx
|
||||
import pytest
|
||||
import respx
|
||||
|
||||
from mosaicstack_telemetry.config import TelemetryConfig
|
||||
from mosaicstack_telemetry.submitter import submit_batch_async, submit_batch_sync
|
||||
from tests.conftest import (
|
||||
TEST_API_KEY,
|
||||
TEST_INSTANCE_ID,
|
||||
TEST_SERVER_URL,
|
||||
make_batch_response_json,
|
||||
make_event,
|
||||
)
|
||||
|
||||
|
||||
@pytest.fixture()
|
||||
def fast_config() -> TelemetryConfig:
|
||||
"""Config with minimal retries and timeouts for fast tests."""
|
||||
return TelemetryConfig(
|
||||
server_url=TEST_SERVER_URL,
|
||||
api_key=TEST_API_KEY,
|
||||
instance_id=TEST_INSTANCE_ID,
|
||||
max_retries=1,
|
||||
request_timeout_seconds=2.0,
|
||||
)
|
||||
|
||||
|
||||
class TestSubmitBatchSync:
|
||||
"""Tests for synchronous batch submission."""
|
||||
|
||||
@respx.mock
|
||||
def test_successful_submission(self, fast_config: TelemetryConfig) -> None:
|
||||
"""Successful 202 response returns BatchEventResponse."""
|
||||
events = [make_event() for _ in range(3)]
|
||||
response_json = make_batch_response_json(events)
|
||||
|
||||
respx.post(f"{TEST_SERVER_URL}/v1/events/batch").mock(
|
||||
return_value=httpx.Response(202, json=response_json)
|
||||
)
|
||||
|
||||
with httpx.Client() as client:
|
||||
result = submit_batch_sync(client, fast_config, events)
|
||||
|
||||
assert result is not None
|
||||
assert result.accepted == 3
|
||||
assert result.rejected == 0
|
||||
|
||||
@respx.mock
|
||||
def test_429_with_retry_after(self, fast_config: TelemetryConfig) -> None:
|
||||
"""429 response respects Retry-After header and retries."""
|
||||
events = [make_event()]
|
||||
response_json = make_batch_response_json(events)
|
||||
|
||||
route = respx.post(f"{TEST_SERVER_URL}/v1/events/batch")
|
||||
route.side_effect = [
|
||||
httpx.Response(429, headers={"Retry-After": "0.1"}),
|
||||
httpx.Response(202, json=response_json),
|
||||
]
|
||||
|
||||
with httpx.Client() as client:
|
||||
result = submit_batch_sync(client, fast_config, events)
|
||||
|
||||
assert result is not None
|
||||
assert result.accepted == 1
|
||||
|
||||
@respx.mock
|
||||
def test_403_returns_none(self, fast_config: TelemetryConfig) -> None:
|
||||
"""403 response returns None immediately."""
|
||||
events = [make_event()]
|
||||
|
||||
respx.post(f"{TEST_SERVER_URL}/v1/events/batch").mock(
|
||||
return_value=httpx.Response(403, json={"error": "Forbidden"})
|
||||
)
|
||||
|
||||
with httpx.Client() as client:
|
||||
result = submit_batch_sync(client, fast_config, events)
|
||||
|
||||
assert result is None
|
||||
|
||||
@respx.mock
|
||||
def test_network_error_retries(self, fast_config: TelemetryConfig) -> None:
|
||||
"""Network errors trigger retry with backoff."""
|
||||
events = [make_event()]
|
||||
response_json = make_batch_response_json(events)
|
||||
|
||||
route = respx.post(f"{TEST_SERVER_URL}/v1/events/batch")
|
||||
route.side_effect = [
|
||||
httpx.ConnectError("Connection refused"),
|
||||
httpx.Response(202, json=response_json),
|
||||
]
|
||||
|
||||
with httpx.Client() as client:
|
||||
result = submit_batch_sync(client, fast_config, events)
|
||||
|
||||
assert result is not None
|
||||
assert result.accepted == 1
|
||||
|
||||
@respx.mock
|
||||
def test_all_retries_exhausted(self, fast_config: TelemetryConfig) -> None:
|
||||
"""When all retries fail, returns None."""
|
||||
events = [make_event()]
|
||||
|
||||
respx.post(f"{TEST_SERVER_URL}/v1/events/batch").mock(
|
||||
side_effect=httpx.ConnectError("Connection refused")
|
||||
)
|
||||
|
||||
with httpx.Client() as client:
|
||||
result = submit_batch_sync(client, fast_config, events)
|
||||
|
||||
assert result is None
|
||||
|
||||
def test_dry_run_mode(self, fast_config: TelemetryConfig) -> None:
|
||||
"""Dry run mode logs but doesn't send."""
|
||||
fast_config.dry_run = True
|
||||
events = [make_event() for _ in range(5)]
|
||||
|
||||
with httpx.Client() as client:
|
||||
result = submit_batch_sync(client, fast_config, events)
|
||||
|
||||
assert result is not None
|
||||
assert result.accepted == 5
|
||||
assert result.rejected == 0
|
||||
|
||||
@respx.mock
|
||||
def test_500_error_retries(self, fast_config: TelemetryConfig) -> None:
|
||||
"""Server errors (500) trigger retries."""
|
||||
events = [make_event()]
|
||||
response_json = make_batch_response_json(events)
|
||||
|
||||
route = respx.post(f"{TEST_SERVER_URL}/v1/events/batch")
|
||||
route.side_effect = [
|
||||
httpx.Response(500, text="Internal Server Error"),
|
||||
httpx.Response(202, json=response_json),
|
||||
]
|
||||
|
||||
with httpx.Client() as client:
|
||||
result = submit_batch_sync(client, fast_config, events)
|
||||
|
||||
assert result is not None
|
||||
assert result.accepted == 1
|
||||
|
||||
|
||||
class TestSubmitBatchAsync:
|
||||
"""Tests for asynchronous batch submission."""
|
||||
|
||||
@respx.mock
|
||||
async def test_successful_submission(self, fast_config: TelemetryConfig) -> None:
|
||||
"""Successful 202 response returns BatchEventResponse."""
|
||||
events = [make_event() for _ in range(3)]
|
||||
response_json = make_batch_response_json(events)
|
||||
|
||||
respx.post(f"{TEST_SERVER_URL}/v1/events/batch").mock(
|
||||
return_value=httpx.Response(202, json=response_json)
|
||||
)
|
||||
|
||||
async with httpx.AsyncClient() as client:
|
||||
result = await submit_batch_async(client, fast_config, events)
|
||||
|
||||
assert result is not None
|
||||
assert result.accepted == 3
|
||||
|
||||
@respx.mock
|
||||
async def test_429_with_retry_after(self, fast_config: TelemetryConfig) -> None:
|
||||
"""429 response respects Retry-After and retries asynchronously."""
|
||||
events = [make_event()]
|
||||
response_json = make_batch_response_json(events)
|
||||
|
||||
route = respx.post(f"{TEST_SERVER_URL}/v1/events/batch")
|
||||
route.side_effect = [
|
||||
httpx.Response(429, headers={"Retry-After": "0.1"}),
|
||||
httpx.Response(202, json=response_json),
|
||||
]
|
||||
|
||||
async with httpx.AsyncClient() as client:
|
||||
result = await submit_batch_async(client, fast_config, events)
|
||||
|
||||
assert result is not None
|
||||
assert result.accepted == 1
|
||||
|
||||
@respx.mock
|
||||
async def test_403_returns_none(self, fast_config: TelemetryConfig) -> None:
|
||||
"""403 returns None immediately."""
|
||||
events = [make_event()]
|
||||
|
||||
respx.post(f"{TEST_SERVER_URL}/v1/events/batch").mock(
|
||||
return_value=httpx.Response(403, json={"error": "Forbidden"})
|
||||
)
|
||||
|
||||
async with httpx.AsyncClient() as client:
|
||||
result = await submit_batch_async(client, fast_config, events)
|
||||
|
||||
assert result is None
|
||||
|
||||
async def test_dry_run_mode(self, fast_config: TelemetryConfig) -> None:
|
||||
"""Dry run mode returns mock response without HTTP."""
|
||||
fast_config.dry_run = True
|
||||
events = [make_event() for _ in range(3)]
|
||||
|
||||
async with httpx.AsyncClient() as client:
|
||||
result = await submit_batch_async(client, fast_config, events)
|
||||
|
||||
assert result is not None
|
||||
assert result.accepted == 3
|
||||
Reference in New Issue
Block a user