Initial project structure
This commit is contained in:
42
.gitignore
vendored
Normal file
42
.gitignore
vendored
Normal file
@@ -0,0 +1,42 @@
|
|||||||
|
# Python
|
||||||
|
__pycache__/
|
||||||
|
*.py[cod]
|
||||||
|
*$py.class
|
||||||
|
*.so
|
||||||
|
*.egg-info/
|
||||||
|
*.egg
|
||||||
|
dist/
|
||||||
|
build/
|
||||||
|
.eggs/
|
||||||
|
|
||||||
|
# Virtual environments
|
||||||
|
.venv/
|
||||||
|
venv/
|
||||||
|
ENV/
|
||||||
|
|
||||||
|
# IDE
|
||||||
|
.idea/
|
||||||
|
.vscode/
|
||||||
|
*.swp
|
||||||
|
*.swo
|
||||||
|
*~
|
||||||
|
|
||||||
|
# Testing
|
||||||
|
.coverage
|
||||||
|
htmlcov/
|
||||||
|
.pytest_cache/
|
||||||
|
|
||||||
|
# mypy
|
||||||
|
.mypy_cache/
|
||||||
|
|
||||||
|
# ruff
|
||||||
|
.ruff_cache/
|
||||||
|
|
||||||
|
# OS
|
||||||
|
.DS_Store
|
||||||
|
Thumbs.db
|
||||||
|
|
||||||
|
# Environment
|
||||||
|
.env
|
||||||
|
.env.*
|
||||||
|
!.env.example
|
||||||
41
CLAUDE.md
Normal file
41
CLAUDE.md
Normal file
@@ -0,0 +1,41 @@
|
|||||||
|
# mosaicstack-telemetry (Python Client SDK)
|
||||||
|
|
||||||
|
Python client SDK for Mosaic Stack Telemetry. Reports AI coding task-completion telemetry and queries crowd-sourced predictions.
|
||||||
|
|
||||||
|
## Development
|
||||||
|
|
||||||
|
```bash
|
||||||
|
# Install dependencies (including dev)
|
||||||
|
uv sync --all-extras
|
||||||
|
|
||||||
|
# Run tests (85%+ coverage required)
|
||||||
|
uv run pytest
|
||||||
|
|
||||||
|
# Lint
|
||||||
|
uv run ruff check src/ tests/
|
||||||
|
|
||||||
|
# Format check
|
||||||
|
uv run ruff format --check src/ tests/
|
||||||
|
|
||||||
|
# Type check
|
||||||
|
uv run mypy src/
|
||||||
|
```
|
||||||
|
|
||||||
|
## Architecture
|
||||||
|
|
||||||
|
- `src/mosaicstack_telemetry/client.py` — Main TelemetryClient (public API)
|
||||||
|
- `src/mosaicstack_telemetry/config.py` — TelemetryConfig dataclass with env var support
|
||||||
|
- `src/mosaicstack_telemetry/queue.py` — Thread-safe bounded event queue
|
||||||
|
- `src/mosaicstack_telemetry/submitter.py` — Batch submission with retry/backoff
|
||||||
|
- `src/mosaicstack_telemetry/_sync.py` — Threading-based periodic submitter
|
||||||
|
- `src/mosaicstack_telemetry/_async.py` — Asyncio-based periodic submitter
|
||||||
|
- `src/mosaicstack_telemetry/event_builder.py` — Fluent event builder
|
||||||
|
- `src/mosaicstack_telemetry/prediction_cache.py` — TTL-based prediction cache
|
||||||
|
- `src/mosaicstack_telemetry/types/` — All Pydantic models and enums
|
||||||
|
|
||||||
|
## Key Rules
|
||||||
|
|
||||||
|
- `track()` must NEVER throw or block the caller
|
||||||
|
- All logging uses `logging.getLogger("mosaicstack_telemetry")`
|
||||||
|
- Runtime deps: httpx + pydantic only
|
||||||
|
- Python 3.10+ compatible (uses `str, Enum` mixin instead of StrEnum)
|
||||||
372
LICENSE
Normal file
372
LICENSE
Normal file
@@ -0,0 +1,372 @@
|
|||||||
|
Mozilla Public License Version 2.0
|
||||||
|
==================================
|
||||||
|
|
||||||
|
1. Definitions
|
||||||
|
--------------
|
||||||
|
|
||||||
|
1.1. "Contributor"
|
||||||
|
means each individual or legal entity that creates, contributes to
|
||||||
|
the creation of, or owns Covered Software.
|
||||||
|
|
||||||
|
1.2. "Contributor Version"
|
||||||
|
means the combination of the Contributions of others (if any) used
|
||||||
|
by a Contributor and that particular Contributor's Contribution.
|
||||||
|
|
||||||
|
1.3. "Contribution"
|
||||||
|
means Covered Software of a particular Contributor.
|
||||||
|
|
||||||
|
1.4. "Covered Software"
|
||||||
|
means Source Code Form to which the initial Contributor has attached
|
||||||
|
the notice in Exhibit A, the Executable Form of such Source Code
|
||||||
|
Form, and Modifications of such Source Code Form, in each case
|
||||||
|
including portions thereof.
|
||||||
|
|
||||||
|
1.5. "Incompatible With Secondary Licenses"
|
||||||
|
means
|
||||||
|
|
||||||
|
(a) that the initial Contributor has attached the notice described
|
||||||
|
in Exhibit B to the Covered Software; or
|
||||||
|
|
||||||
|
(b) that the Covered Software was made available under the terms of
|
||||||
|
version 1.1 or earlier of the License, but not also under the
|
||||||
|
terms of a Secondary License.
|
||||||
|
|
||||||
|
1.6. "Executable Form"
|
||||||
|
means any form of the work other than Source Code Form.
|
||||||
|
|
||||||
|
1.7. "Larger Work"
|
||||||
|
means a work that combines Covered Software with other material, in
|
||||||
|
a separate file or files, that is not Covered Software.
|
||||||
|
|
||||||
|
1.8. "License"
|
||||||
|
means this document.
|
||||||
|
|
||||||
|
1.9. "Licensable"
|
||||||
|
means having the right to grant, to the maximum extent possible,
|
||||||
|
whether at the time of the initial grant or subsequently, any and
|
||||||
|
all of the rights conveyed by this License.
|
||||||
|
|
||||||
|
1.10. "Modifications"
|
||||||
|
means any of the following:
|
||||||
|
|
||||||
|
(a) any file in Source Code Form that results from an addition to,
|
||||||
|
deletion from, or modification of the contents of Covered
|
||||||
|
Software; or
|
||||||
|
|
||||||
|
(b) any new file in Source Code Form that contains any Covered
|
||||||
|
Software.
|
||||||
|
|
||||||
|
1.11. "Patent Claims" of a Contributor
|
||||||
|
means any patent claim(s), including without limitation, method,
|
||||||
|
process, and apparatus claims, in any patent Licensable by such
|
||||||
|
Contributor that would be infringed, but for the grant of the
|
||||||
|
License, by the making, using, selling, offering for sale, having
|
||||||
|
made, import, or transfer of either its Contributions or its
|
||||||
|
Contributor Version.
|
||||||
|
|
||||||
|
1.12. "Secondary License"
|
||||||
|
means either the GNU General Public License, Version 2.0, the GNU
|
||||||
|
Lesser General Public License, Version 2.1, the GNU Affero General
|
||||||
|
Public License, Version 3.0, or any later versions of those
|
||||||
|
licenses.
|
||||||
|
|
||||||
|
1.13. "Source Code Form"
|
||||||
|
means the form of the work preferred for making modifications.
|
||||||
|
|
||||||
|
1.14. "You" (or "Your")
|
||||||
|
means an individual or a legal entity exercising rights under this
|
||||||
|
License. For legal entities, "You" includes any entity that
|
||||||
|
controls, is controlled by, or is under common control with You. For
|
||||||
|
purposes of this definition, "control" means (a) the power, direct
|
||||||
|
or indirect, to cause the direction or management of such entity,
|
||||||
|
whether by contract or otherwise, or (b) ownership of more than
|
||||||
|
fifty percent (50%) of the outstanding shares or beneficial
|
||||||
|
ownership of such entity.
|
||||||
|
|
||||||
|
2. License Grants and Conditions
|
||||||
|
--------------------------------
|
||||||
|
|
||||||
|
2.1. Grants
|
||||||
|
|
||||||
|
Each Contributor hereby grants You a world-wide, royalty-free,
|
||||||
|
non-exclusive license:
|
||||||
|
|
||||||
|
(a) under intellectual property rights (other than patent or trademark)
|
||||||
|
Licensable by such Contributor to use, reproduce, make available,
|
||||||
|
modify, display, perform, distribute, and otherwise exploit its
|
||||||
|
Contributions, either on an unmodified basis, with Modifications, or
|
||||||
|
as part of a Larger Work; and
|
||||||
|
|
||||||
|
(b) under Patent Claims of such Contributor to make, use, sell, offer
|
||||||
|
for sale, have made, import, and otherwise transfer either its
|
||||||
|
Contributions or its Contributor Version.
|
||||||
|
|
||||||
|
2.2. Effective Date
|
||||||
|
|
||||||
|
The licenses granted in Section 2.1 with respect to any Contribution
|
||||||
|
become effective for each Contribution on the date the Contributor first
|
||||||
|
distributes such Contribution.
|
||||||
|
|
||||||
|
2.3. Limitations on Grant Scope
|
||||||
|
|
||||||
|
The licenses granted in this Section 2 are the only rights granted under
|
||||||
|
this License. No additional rights or licenses will be implied from the
|
||||||
|
distribution or licensing of Covered Software under this License.
|
||||||
|
Notwithstanding Section 2.1(b) above, no patent license is granted by a
|
||||||
|
Contributor:
|
||||||
|
|
||||||
|
(a) for any code that a Contributor has removed from Covered Software;
|
||||||
|
or
|
||||||
|
|
||||||
|
(b) for infringements caused by: (i) Your and any other third party's
|
||||||
|
modifications of Covered Software, or (ii) the combination of its
|
||||||
|
Contributions with other software (except as part of its Contributor
|
||||||
|
Version); or
|
||||||
|
|
||||||
|
(c) under Patent Claims infringed by Covered Software in the absence of
|
||||||
|
its Contributions.
|
||||||
|
|
||||||
|
This License does not grant any rights in the trademarks, service marks,
|
||||||
|
or logos of any Contributor (except as may be necessary to comply with
|
||||||
|
the notice requirements in Section 3.4).
|
||||||
|
|
||||||
|
2.4. Subsequent Licenses
|
||||||
|
|
||||||
|
No Contributor makes additional grants as a result of Your choice to
|
||||||
|
distribute the Covered Software under a subsequent version of this
|
||||||
|
License (see Section 10.2) or under the terms of a Secondary License (if
|
||||||
|
permitted under the terms of Section 3.3).
|
||||||
|
|
||||||
|
2.5. Representation
|
||||||
|
|
||||||
|
Each Contributor represents that the Contributor believes its
|
||||||
|
Contributions are its original creation(s) or it has sufficient rights
|
||||||
|
to grant the rights to its Contributions conveyed by this License.
|
||||||
|
|
||||||
|
2.6. Fair Use
|
||||||
|
|
||||||
|
This License is not intended to limit any rights You have under
|
||||||
|
applicable copyright doctrines of fair use, fair dealing, or other
|
||||||
|
equivalents.
|
||||||
|
|
||||||
|
2.7. Conditions
|
||||||
|
|
||||||
|
Sections 3.1, 3.2, 3.3, and 3.4 are conditions of the licenses granted
|
||||||
|
in Section 2.1.
|
||||||
|
|
||||||
|
3. Responsibilities
|
||||||
|
-------------------
|
||||||
|
|
||||||
|
3.1. Distribution of Source Form
|
||||||
|
|
||||||
|
All distribution of Covered Software in Source Code Form, including any
|
||||||
|
Modifications that You create or to which You contribute, must be under
|
||||||
|
the terms of this License. You must inform recipients that the Source
|
||||||
|
Code Form of the Covered Software is governed by the terms of this
|
||||||
|
License, and how they can obtain a copy of this License. You may not
|
||||||
|
attempt to alter or restrict the recipients' rights in the Source Code
|
||||||
|
Form.
|
||||||
|
|
||||||
|
3.2. Distribution of Executable Form
|
||||||
|
|
||||||
|
If You distribute Covered Software in Executable Form then:
|
||||||
|
|
||||||
|
(a) such Covered Software must also be made available in Source Code
|
||||||
|
Form, as described in Section 3.1, and You must inform recipients of
|
||||||
|
the Executable Form how they can obtain a copy of such Source Code
|
||||||
|
Form by reasonable means in a timely manner, at a charge no more
|
||||||
|
than the cost of distribution to the recipient; and
|
||||||
|
|
||||||
|
(b) You may distribute such Executable Form under the terms of this
|
||||||
|
License, or sublicense it under different terms, provided that the
|
||||||
|
license for the Executable Form does not attempt to limit or alter
|
||||||
|
the recipients' rights in the Source Code Form under this License.
|
||||||
|
|
||||||
|
3.3. Distribution of a Larger Work
|
||||||
|
|
||||||
|
You may create and distribute a Larger Work under terms of Your choice,
|
||||||
|
provided that You also comply with the requirements of this License for
|
||||||
|
the Covered Software. If the Larger Work is a combination of Covered
|
||||||
|
Software with a work governed by one or more Secondary Licenses, and the
|
||||||
|
Covered Software is not Incompatible With Secondary Licenses, this
|
||||||
|
License permits You to additionally distribute such Covered Software
|
||||||
|
under the terms of such Secondary License(s), so that the recipient of
|
||||||
|
the Larger Work may, at their option, further distribute the Covered
|
||||||
|
Software under the terms of either this License or such Secondary
|
||||||
|
License(s).
|
||||||
|
|
||||||
|
3.4. Notices
|
||||||
|
|
||||||
|
You may not remove or alter the substance of any license notices
|
||||||
|
(including copyright notices, patent notices, disclaimers of warranty,
|
||||||
|
or limitations of liability) contained within the Source Code Form of
|
||||||
|
the Covered Software, except that You may alter any license notices to
|
||||||
|
the extent required to remedy known factual inaccuracies.
|
||||||
|
|
||||||
|
3.5. Application of Additional Terms
|
||||||
|
|
||||||
|
You may choose to offer, and to charge a fee for, warranty, support,
|
||||||
|
indemnity or liability obligations to one or more recipients of Covered
|
||||||
|
Software. However, You may do so only on Your own behalf, and not on
|
||||||
|
behalf of any Contributor. You must make it absolutely clear that any
|
||||||
|
such warranty, support, indemnity, or liability obligation is offered by
|
||||||
|
You alone, and You hereby agree to indemnify every Contributor for any
|
||||||
|
liability incurred by such Contributor as a result of warranty, support,
|
||||||
|
indemnity or liability terms You offer. You may include additional
|
||||||
|
disclaimers of warranty and limitations of liability specific to any
|
||||||
|
jurisdiction.
|
||||||
|
|
||||||
|
4. Inability to Comply Due to Statute or Regulation
|
||||||
|
---------------------------------------------------
|
||||||
|
|
||||||
|
If it is impossible for You to comply with any of the terms of this
|
||||||
|
License with respect to some or all of the Covered Software due to
|
||||||
|
statute, judicial order, or regulation then You must: (a) comply with
|
||||||
|
the terms of this License to the maximum extent possible; and (b)
|
||||||
|
describe the limitations and the code they affect. Such description must
|
||||||
|
be placed in a text file included with all distributions of the Covered
|
||||||
|
Software under the name "LEGAL". The contents of the LEGAL file are for
|
||||||
|
informational purposes only and do not alter the terms of this License.
|
||||||
|
|
||||||
|
5. Termination
|
||||||
|
--------------
|
||||||
|
|
||||||
|
5.1. The rights granted under this License will terminate automatically
|
||||||
|
if You fail to comply with any of its terms. However, if You become
|
||||||
|
compliant, then the rights granted under this License from a particular
|
||||||
|
Contributor are reinstated (a) provisionally, unless and until such
|
||||||
|
Contributor explicitly and finally terminates Your grants, and (b) on an
|
||||||
|
ongoing basis, if such Contributor fails to notify You of the
|
||||||
|
non-compliance by some reasonable means prior to 60 days after You have
|
||||||
|
come back into compliance. Moreover, Your grants from a particular
|
||||||
|
Contributor are reinstated on an ongoing basis if such Contributor
|
||||||
|
notifies You of the non-compliance by some reasonable means, this is the
|
||||||
|
first time You have received notice of non-compliance with this License
|
||||||
|
from such Contributor, and You become compliant prior to 30 days after
|
||||||
|
Your receipt of the notice.
|
||||||
|
|
||||||
|
5.2. If You initiate litigation against any entity by asserting a patent
|
||||||
|
infringement claim (excluding declaratory judgment actions,
|
||||||
|
counter-claims, and cross-claims) alleging that a Contributor Version
|
||||||
|
directly or indirectly infringes any patent, then the rights granted to
|
||||||
|
You by any and all Contributors for the Covered Software under Section
|
||||||
|
2.1 of this License shall terminate.
|
||||||
|
|
||||||
|
5.3. In the event of termination under Sections 5.1 or 5.2 above, all
|
||||||
|
end user license agreements (excluding distributors and resellers) which
|
||||||
|
have been validly granted by You or Your distributors under this License
|
||||||
|
prior to termination shall survive termination.
|
||||||
|
|
||||||
|
************************************************************************
|
||||||
|
* *
|
||||||
|
* 6. Disclaimer of Warranty *
|
||||||
|
* ------------------------- *
|
||||||
|
* *
|
||||||
|
* Covered Software is provided under this License on an "as is" *
|
||||||
|
* basis, without warranty of any kind, either expressed, implied, or *
|
||||||
|
* statutory, including, without limitation, warranties that the *
|
||||||
|
* Covered Software is free of defects, merchantable, fit for a *
|
||||||
|
* particular purpose or non-infringing. The entire risk as to the *
|
||||||
|
* quality and performance of the Covered Software is with You. *
|
||||||
|
* Should any Covered Software prove defective in any respect, You *
|
||||||
|
* (not any Contributor) assume the cost of any necessary servicing, *
|
||||||
|
* repair, or correction. This disclaimer of warranty constitutes an *
|
||||||
|
* essential part of this License. No use of any Covered Software is *
|
||||||
|
* authorized under this License except under this disclaimer. *
|
||||||
|
* *
|
||||||
|
************************************************************************
|
||||||
|
|
||||||
|
************************************************************************
|
||||||
|
* *
|
||||||
|
* 7. Limitation of Liability *
|
||||||
|
* -------------------------- *
|
||||||
|
* *
|
||||||
|
* Under no circumstances and under no legal theory, whether tort *
|
||||||
|
* (including negligence), contract, or otherwise, shall any *
|
||||||
|
* Contributor, or anyone who distributes Covered Software as *
|
||||||
|
* permitted above, be liable to You for any direct, indirect, *
|
||||||
|
* special, incidental, or consequential damages of any character *
|
||||||
|
* including, without limitation, damages for lost profits, loss of *
|
||||||
|
* goodwill, work stoppage, computer failure or malfunction, or any *
|
||||||
|
* and all other commercial damages or losses, even if such party *
|
||||||
|
* shall have been informed of the possibility of such damages. This *
|
||||||
|
* limitation of liability shall not apply to liability for death or *
|
||||||
|
* personal injury resulting from such party's negligence to the *
|
||||||
|
* extent applicable law prohibits such limitation. Some *
|
||||||
|
* jurisdictions do not allow the exclusion or limitation of *
|
||||||
|
* incidental or consequential damages, so this exclusion and *
|
||||||
|
* limitation may not apply to You. *
|
||||||
|
* *
|
||||||
|
************************************************************************
|
||||||
|
|
||||||
|
8. Litigation
|
||||||
|
-------------
|
||||||
|
|
||||||
|
Any litigation relating to this License may be brought only in the
|
||||||
|
courts of a jurisdiction where the defendant maintains its principal
|
||||||
|
place of business and such litigation shall be governed by laws of that
|
||||||
|
jurisdiction, without reference to its conflict-of-law provisions.
|
||||||
|
Nothing in this Section shall prevent a party's ability to bring
|
||||||
|
cross-claims or counter-claims.
|
||||||
|
|
||||||
|
9. Miscellaneous
|
||||||
|
----------------
|
||||||
|
|
||||||
|
This License represents the complete agreement concerning the subject
|
||||||
|
matter hereof. If any provision of this License is held to be
|
||||||
|
unenforceable, such provision shall be reformed only to the extent
|
||||||
|
necessary to make it enforceable. Any law or regulation which provides
|
||||||
|
that the language of a contract shall be construed against the drafter
|
||||||
|
shall not be used to construe this License against a Contributor.
|
||||||
|
|
||||||
|
10. Versions of the License
|
||||||
|
---------------------------
|
||||||
|
|
||||||
|
10.1. New Versions
|
||||||
|
|
||||||
|
Mozilla Foundation is the license steward. Except as provided in Section
|
||||||
|
10.3, no one other than the license steward has the right to modify or
|
||||||
|
publish new versions of this License. Each version will be given a
|
||||||
|
distinguishing version number.
|
||||||
|
|
||||||
|
10.2. Effect of New Versions
|
||||||
|
|
||||||
|
You may distribute the Covered Software under the terms of the version
|
||||||
|
of the License under which You originally received the Covered Software,
|
||||||
|
or under the terms of any subsequent version published by the license
|
||||||
|
steward.
|
||||||
|
|
||||||
|
10.3. Modified Versions
|
||||||
|
|
||||||
|
If you create software not governed by this License, and you want to
|
||||||
|
create a new license for such software, you may create and use a
|
||||||
|
modified version of this License if you rename the license and remove
|
||||||
|
any references to the name of the license steward (except to note that
|
||||||
|
such modified license differs from this License).
|
||||||
|
|
||||||
|
10.4. Distributing Source Code Form that is Incompatible With Secondary
|
||||||
|
Licenses
|
||||||
|
|
||||||
|
If You choose to distribute Source Code Form that is Incompatible With
|
||||||
|
Secondary Licenses under the terms of this version of the License, the
|
||||||
|
notice described in Exhibit B of this License must be attached.
|
||||||
|
|
||||||
|
Exhibit A - Source Code Form License Notice
|
||||||
|
-------------------------------------------
|
||||||
|
|
||||||
|
This Source Code Form is subject to the terms of the Mozilla Public
|
||||||
|
License, v. 2.0. If a copy of the MPL was not distributed with this
|
||||||
|
file, You can obtain one at http://mozilla.org/MPL/2.0/.
|
||||||
|
|
||||||
|
If it is not possible or desirable to put the notice in a particular
|
||||||
|
file, then You may include the notice in a location (such as a LICENSE
|
||||||
|
file in a relevant directory) where a recipient would be likely to look
|
||||||
|
for such a notice.
|
||||||
|
|
||||||
|
You may add additional accurate notices of copyright ownership.
|
||||||
|
|
||||||
|
Exhibit B - "Incompatible With Secondary Licenses" Notice
|
||||||
|
---------------------------------------------------------
|
||||||
|
|
||||||
|
This Source Code Form is "Incompatible With Secondary Licenses", as
|
||||||
|
defined by the Mozilla Public License, v. 2.0.
|
||||||
181
README.md
Normal file
181
README.md
Normal file
@@ -0,0 +1,181 @@
|
|||||||
|
# mosaicstack-telemetry
|
||||||
|
|
||||||
|
Python client SDK for [Mosaic Stack Telemetry](https://github.com/mosaicstack/telemetry). Report AI coding task-completion telemetry and query crowd-sourced predictions for token usage, cost, and quality outcomes.
|
||||||
|
|
||||||
|
## Installation
|
||||||
|
|
||||||
|
```bash
|
||||||
|
pip install mosaicstack-telemetry
|
||||||
|
# or
|
||||||
|
uv add mosaicstack-telemetry
|
||||||
|
```
|
||||||
|
|
||||||
|
## Quick Start (Sync)
|
||||||
|
|
||||||
|
Best for scripts, aider integrations, and non-async contexts:
|
||||||
|
|
||||||
|
```python
|
||||||
|
from mosaicstack_telemetry import (
|
||||||
|
TelemetryClient,
|
||||||
|
TelemetryConfig,
|
||||||
|
EventBuilder,
|
||||||
|
TaskType,
|
||||||
|
Provider,
|
||||||
|
Harness,
|
||||||
|
Complexity,
|
||||||
|
Outcome,
|
||||||
|
QualityGate,
|
||||||
|
)
|
||||||
|
|
||||||
|
config = TelemetryConfig(
|
||||||
|
server_url="https://telemetry.mosaicstack.dev",
|
||||||
|
api_key="your-64-char-hex-api-key-here...",
|
||||||
|
instance_id="your-uuid-instance-id",
|
||||||
|
)
|
||||||
|
|
||||||
|
client = TelemetryClient(config)
|
||||||
|
client.start() # Starts background submission thread
|
||||||
|
|
||||||
|
# Build and track an event
|
||||||
|
event = (
|
||||||
|
EventBuilder(instance_id=config.instance_id)
|
||||||
|
.task_type(TaskType.IMPLEMENTATION)
|
||||||
|
.model("claude-sonnet-4-20250514")
|
||||||
|
.provider(Provider.ANTHROPIC)
|
||||||
|
.harness_type(Harness.AIDER)
|
||||||
|
.complexity_level(Complexity.MEDIUM)
|
||||||
|
.outcome_value(Outcome.SUCCESS)
|
||||||
|
.duration_ms(45000)
|
||||||
|
.tokens(estimated_in=5000, estimated_out=2000, actual_in=5200, actual_out=1800)
|
||||||
|
.cost(estimated=50000, actual=48000)
|
||||||
|
.quality(passed=True, gates_run=[QualityGate.LINT, QualityGate.TEST])
|
||||||
|
.context(compactions=0, rotations=0, utilization=0.4)
|
||||||
|
.language("python")
|
||||||
|
.build()
|
||||||
|
)
|
||||||
|
|
||||||
|
client.track(event) # Non-blocking, thread-safe
|
||||||
|
|
||||||
|
# When done
|
||||||
|
client.stop() # Flushes remaining events
|
||||||
|
```
|
||||||
|
|
||||||
|
## Async Usage
|
||||||
|
|
||||||
|
For asyncio-based applications:
|
||||||
|
|
||||||
|
```python
|
||||||
|
import asyncio
|
||||||
|
from mosaicstack_telemetry import TelemetryClient, TelemetryConfig
|
||||||
|
|
||||||
|
async def main():
|
||||||
|
config = TelemetryConfig(
|
||||||
|
server_url="https://telemetry.mosaicstack.dev",
|
||||||
|
api_key="your-64-char-hex-api-key-here...",
|
||||||
|
instance_id="your-uuid-instance-id",
|
||||||
|
)
|
||||||
|
|
||||||
|
client = TelemetryClient(config)
|
||||||
|
await client.start_async() # Starts asyncio background task
|
||||||
|
|
||||||
|
# track() is always synchronous
|
||||||
|
client.track(event)
|
||||||
|
|
||||||
|
await client.stop_async() # Flushes remaining events
|
||||||
|
|
||||||
|
asyncio.run(main())
|
||||||
|
```
|
||||||
|
|
||||||
|
## Context Manager
|
||||||
|
|
||||||
|
Both sync and async context managers are supported:
|
||||||
|
|
||||||
|
```python
|
||||||
|
# Sync
|
||||||
|
with TelemetryClient(config) as client:
|
||||||
|
client.track(event)
|
||||||
|
|
||||||
|
# Async
|
||||||
|
async with TelemetryClient(config) as client:
|
||||||
|
client.track(event)
|
||||||
|
```
|
||||||
|
|
||||||
|
## Configuration via Environment Variables
|
||||||
|
|
||||||
|
All core settings can be set via environment variables:
|
||||||
|
|
||||||
|
```bash
|
||||||
|
export MOSAIC_TELEMETRY_ENABLED=true
|
||||||
|
export MOSAIC_TELEMETRY_SERVER_URL=https://telemetry.mosaicstack.dev
|
||||||
|
export MOSAIC_TELEMETRY_API_KEY=your-64-char-hex-api-key
|
||||||
|
export MOSAIC_TELEMETRY_INSTANCE_ID=your-uuid-instance-id
|
||||||
|
```
|
||||||
|
|
||||||
|
Then create a config with defaults:
|
||||||
|
|
||||||
|
```python
|
||||||
|
config = TelemetryConfig() # Picks up env vars automatically
|
||||||
|
```
|
||||||
|
|
||||||
|
Explicit constructor values take priority over environment variables.
|
||||||
|
|
||||||
|
## Querying Predictions
|
||||||
|
|
||||||
|
Fetch crowd-sourced predictions for token usage, cost, and quality:
|
||||||
|
|
||||||
|
```python
|
||||||
|
from mosaicstack_telemetry import PredictionQuery, TaskType, Provider, Complexity
|
||||||
|
|
||||||
|
query = PredictionQuery(
|
||||||
|
task_type=TaskType.IMPLEMENTATION,
|
||||||
|
model="claude-sonnet-4-20250514",
|
||||||
|
provider=Provider.ANTHROPIC,
|
||||||
|
complexity=Complexity.MEDIUM,
|
||||||
|
)
|
||||||
|
|
||||||
|
# Async
|
||||||
|
await client.refresh_predictions([query])
|
||||||
|
|
||||||
|
# Sync
|
||||||
|
client.refresh_predictions_sync([query])
|
||||||
|
|
||||||
|
# Read from cache
|
||||||
|
prediction = client.get_prediction(query)
|
||||||
|
if prediction and prediction.prediction:
|
||||||
|
print(f"Expected input tokens (median): {prediction.prediction.input_tokens.median}")
|
||||||
|
print(f"Expected cost (median): ${prediction.prediction.cost_usd_micros['median'] / 1_000_000:.4f}")
|
||||||
|
print(f"Quality gate pass rate: {prediction.prediction.quality.gate_pass_rate:.0%}")
|
||||||
|
```
|
||||||
|
|
||||||
|
## Dry-Run Mode
|
||||||
|
|
||||||
|
Test your integration without sending data to the server:
|
||||||
|
|
||||||
|
```python
|
||||||
|
config = TelemetryConfig(
|
||||||
|
server_url="https://telemetry.mosaicstack.dev",
|
||||||
|
api_key="a" * 64,
|
||||||
|
instance_id="12345678-1234-1234-1234-123456789abc",
|
||||||
|
dry_run=True, # Logs batches but doesn't send
|
||||||
|
)
|
||||||
|
```
|
||||||
|
|
||||||
|
## Configuration Reference
|
||||||
|
|
||||||
|
| Parameter | Default | Description |
|
||||||
|
|-----------|---------|-------------|
|
||||||
|
| `server_url` | (required) | Telemetry server base URL |
|
||||||
|
| `api_key` | (required) | 64-character hex API key |
|
||||||
|
| `instance_id` | (required) | UUID identifying this instance |
|
||||||
|
| `enabled` | `True` | Enable/disable telemetry |
|
||||||
|
| `submit_interval_seconds` | `300.0` | Background flush interval |
|
||||||
|
| `max_queue_size` | `1000` | Max events in memory queue |
|
||||||
|
| `batch_size` | `100` | Events per batch (server max) |
|
||||||
|
| `request_timeout_seconds` | `10.0` | HTTP request timeout |
|
||||||
|
| `prediction_cache_ttl_seconds` | `21600.0` | Prediction cache TTL (6h) |
|
||||||
|
| `dry_run` | `False` | Log but don't send |
|
||||||
|
| `max_retries` | `3` | Retries on failure |
|
||||||
|
|
||||||
|
## License
|
||||||
|
|
||||||
|
MPL-2.0
|
||||||
49
pyproject.toml
Normal file
49
pyproject.toml
Normal file
@@ -0,0 +1,49 @@
|
|||||||
|
[project]
|
||||||
|
name = "mosaicstack-telemetry"
|
||||||
|
version = "0.1.0"
|
||||||
|
description = "Python client SDK for Mosaic Stack Telemetry"
|
||||||
|
requires-python = ">=3.10"
|
||||||
|
license = "MPL-2.0"
|
||||||
|
dependencies = [
|
||||||
|
"httpx>=0.25.0",
|
||||||
|
"pydantic>=2.5.0",
|
||||||
|
]
|
||||||
|
|
||||||
|
[project.optional-dependencies]
|
||||||
|
dev = [
|
||||||
|
"pytest>=8.0",
|
||||||
|
"pytest-cov>=5.0",
|
||||||
|
"pytest-asyncio>=0.24",
|
||||||
|
"ruff>=0.14.0",
|
||||||
|
"mypy>=1.15",
|
||||||
|
"respx>=0.21.0",
|
||||||
|
]
|
||||||
|
|
||||||
|
[build-system]
|
||||||
|
requires = ["hatchling"]
|
||||||
|
build-backend = "hatchling.build"
|
||||||
|
|
||||||
|
[tool.hatch.build.targets.wheel]
|
||||||
|
packages = ["src/mosaicstack_telemetry"]
|
||||||
|
|
||||||
|
[tool.pytest.ini_options]
|
||||||
|
testpaths = ["tests"]
|
||||||
|
asyncio_mode = "auto"
|
||||||
|
addopts = "--cov=src/mosaicstack_telemetry --cov-report=term-missing --cov-fail-under=85"
|
||||||
|
|
||||||
|
[tool.ruff]
|
||||||
|
line-length = 100
|
||||||
|
target-version = "py310"
|
||||||
|
src = ["src", "tests"]
|
||||||
|
|
||||||
|
[tool.ruff.lint]
|
||||||
|
select = ["E", "F", "I", "N", "W", "B", "UP", "S", "A", "C4", "DTZ", "T20", "RUF"]
|
||||||
|
|
||||||
|
[tool.ruff.lint.per-file-ignores]
|
||||||
|
"tests/**" = ["S101"]
|
||||||
|
|
||||||
|
[tool.mypy]
|
||||||
|
python_version = "3.10"
|
||||||
|
strict = true
|
||||||
|
plugins = ["pydantic.mypy"]
|
||||||
|
mypy_path = "src"
|
||||||
96
scripts/validate_schema.py
Normal file
96
scripts/validate_schema.py
Normal file
@@ -0,0 +1,96 @@
|
|||||||
|
#!/usr/bin/env python3
|
||||||
|
"""Validate that the SDK types match the expected schema."""
|
||||||
|
|
||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
import json
|
||||||
|
import sys
|
||||||
|
|
||||||
|
from mosaicstack_telemetry.types.events import (
|
||||||
|
Complexity,
|
||||||
|
Harness,
|
||||||
|
Outcome,
|
||||||
|
Provider,
|
||||||
|
QualityGate,
|
||||||
|
RepoSizeCategory,
|
||||||
|
TaskCompletionEvent,
|
||||||
|
TaskType,
|
||||||
|
)
|
||||||
|
from mosaicstack_telemetry.types.predictions import (
|
||||||
|
PredictionQuery,
|
||||||
|
PredictionResponse,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
def main() -> int:
|
||||||
|
"""Validate schema by generating JSON schema for key models."""
|
||||||
|
errors: list[str] = []
|
||||||
|
|
||||||
|
# Validate enums have expected values
|
||||||
|
expected_task_types = {
|
||||||
|
"planning", "implementation", "code_review", "testing",
|
||||||
|
"debugging", "refactoring", "documentation", "configuration",
|
||||||
|
"security_audit", "unknown",
|
||||||
|
}
|
||||||
|
actual_task_types = {t.value for t in TaskType}
|
||||||
|
if actual_task_types != expected_task_types:
|
||||||
|
errors.append(f"TaskType mismatch: {actual_task_types ^ expected_task_types}")
|
||||||
|
|
||||||
|
expected_complexity = {"low", "medium", "high", "critical"}
|
||||||
|
actual_complexity = {c.value for c in Complexity}
|
||||||
|
if actual_complexity != expected_complexity:
|
||||||
|
errors.append(f"Complexity mismatch: {actual_complexity ^ expected_complexity}")
|
||||||
|
|
||||||
|
expected_harness = {
|
||||||
|
"claude_code", "opencode", "kilo_code", "aider",
|
||||||
|
"api_direct", "ollama_local", "custom", "unknown",
|
||||||
|
}
|
||||||
|
actual_harness = {h.value for h in Harness}
|
||||||
|
if actual_harness != expected_harness:
|
||||||
|
errors.append(f"Harness mismatch: {actual_harness ^ expected_harness}")
|
||||||
|
|
||||||
|
expected_provider = {
|
||||||
|
"anthropic", "openai", "openrouter", "ollama",
|
||||||
|
"google", "mistral", "custom", "unknown",
|
||||||
|
}
|
||||||
|
actual_provider = {p.value for p in Provider}
|
||||||
|
if actual_provider != expected_provider:
|
||||||
|
errors.append(f"Provider mismatch: {actual_provider ^ expected_provider}")
|
||||||
|
|
||||||
|
expected_gates = {"build", "lint", "test", "coverage", "typecheck", "security"}
|
||||||
|
actual_gates = {g.value for g in QualityGate}
|
||||||
|
if actual_gates != expected_gates:
|
||||||
|
errors.append(f"QualityGate mismatch: {actual_gates ^ expected_gates}")
|
||||||
|
|
||||||
|
expected_outcome = {"success", "failure", "partial", "timeout"}
|
||||||
|
actual_outcome = {o.value for o in Outcome}
|
||||||
|
if actual_outcome != expected_outcome:
|
||||||
|
errors.append(f"Outcome mismatch: {actual_outcome ^ expected_outcome}")
|
||||||
|
|
||||||
|
expected_repo_size = {"tiny", "small", "medium", "large", "huge"}
|
||||||
|
actual_repo_size = {r.value for r in RepoSizeCategory}
|
||||||
|
if actual_repo_size != expected_repo_size:
|
||||||
|
errors.append(f"RepoSizeCategory mismatch: {actual_repo_size ^ expected_repo_size}")
|
||||||
|
|
||||||
|
# Generate JSON schemas for key models
|
||||||
|
print("TaskCompletionEvent schema:")
|
||||||
|
print(json.dumps(TaskCompletionEvent.model_json_schema(), indent=2))
|
||||||
|
print()
|
||||||
|
print("PredictionQuery schema:")
|
||||||
|
print(json.dumps(PredictionQuery.model_json_schema(), indent=2))
|
||||||
|
print()
|
||||||
|
print("PredictionResponse schema:")
|
||||||
|
print(json.dumps(PredictionResponse.model_json_schema(), indent=2))
|
||||||
|
|
||||||
|
if errors:
|
||||||
|
print("\nVALIDATION ERRORS:")
|
||||||
|
for error in errors:
|
||||||
|
print(f" - {error}")
|
||||||
|
return 1
|
||||||
|
|
||||||
|
print("\nAll schema validations passed.")
|
||||||
|
return 0
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
sys.exit(main())
|
||||||
69
src/mosaicstack_telemetry/__init__.py
Normal file
69
src/mosaicstack_telemetry/__init__.py
Normal file
@@ -0,0 +1,69 @@
|
|||||||
|
"""Mosaic Stack Telemetry — Python client SDK.
|
||||||
|
|
||||||
|
A lightweight client for reporting AI coding task-completion telemetry
|
||||||
|
and querying crowd-sourced predictions from a Mosaic Stack Telemetry server.
|
||||||
|
"""
|
||||||
|
|
||||||
|
from mosaicstack_telemetry.client import TelemetryClient
|
||||||
|
from mosaicstack_telemetry.config import TelemetryConfig
|
||||||
|
from mosaicstack_telemetry.event_builder import EventBuilder
|
||||||
|
from mosaicstack_telemetry.prediction_cache import PredictionCache
|
||||||
|
from mosaicstack_telemetry.queue import EventQueue
|
||||||
|
from mosaicstack_telemetry.types.common import (
|
||||||
|
BatchEventRequest,
|
||||||
|
BatchEventResponse,
|
||||||
|
BatchEventResult,
|
||||||
|
TelemetryError,
|
||||||
|
)
|
||||||
|
from mosaicstack_telemetry.types.events import (
|
||||||
|
Complexity,
|
||||||
|
Harness,
|
||||||
|
Outcome,
|
||||||
|
Provider,
|
||||||
|
QualityGate,
|
||||||
|
RepoSizeCategory,
|
||||||
|
TaskCompletionEvent,
|
||||||
|
TaskType,
|
||||||
|
)
|
||||||
|
from mosaicstack_telemetry.types.predictions import (
|
||||||
|
CorrectionFactors,
|
||||||
|
PredictionData,
|
||||||
|
PredictionMetadata,
|
||||||
|
PredictionQuery,
|
||||||
|
PredictionResponse,
|
||||||
|
QualityPrediction,
|
||||||
|
TokenDistribution,
|
||||||
|
)
|
||||||
|
|
||||||
|
__version__ = "0.1.0"
|
||||||
|
|
||||||
|
__all__ = [
|
||||||
|
# Client
|
||||||
|
"TelemetryClient",
|
||||||
|
"TelemetryConfig",
|
||||||
|
"EventBuilder",
|
||||||
|
"EventQueue",
|
||||||
|
"PredictionCache",
|
||||||
|
# Types - Events
|
||||||
|
"TaskCompletionEvent",
|
||||||
|
"TaskType",
|
||||||
|
"Complexity",
|
||||||
|
"Harness",
|
||||||
|
"Provider",
|
||||||
|
"QualityGate",
|
||||||
|
"Outcome",
|
||||||
|
"RepoSizeCategory",
|
||||||
|
# Types - Predictions
|
||||||
|
"PredictionQuery",
|
||||||
|
"PredictionResponse",
|
||||||
|
"PredictionData",
|
||||||
|
"PredictionMetadata",
|
||||||
|
"TokenDistribution",
|
||||||
|
"CorrectionFactors",
|
||||||
|
"QualityPrediction",
|
||||||
|
# Types - Common
|
||||||
|
"BatchEventRequest",
|
||||||
|
"BatchEventResponse",
|
||||||
|
"BatchEventResult",
|
||||||
|
"TelemetryError",
|
||||||
|
]
|
||||||
109
src/mosaicstack_telemetry/_async.py
Normal file
109
src/mosaicstack_telemetry/_async.py
Normal file
@@ -0,0 +1,109 @@
|
|||||||
|
"""Asynchronous submitter using asyncio.Task for periodic flushing."""
|
||||||
|
|
||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
import asyncio
|
||||||
|
import logging
|
||||||
|
from typing import TYPE_CHECKING
|
||||||
|
|
||||||
|
import httpx
|
||||||
|
|
||||||
|
from mosaicstack_telemetry.submitter import submit_batch_async
|
||||||
|
|
||||||
|
if TYPE_CHECKING:
|
||||||
|
from mosaicstack_telemetry.config import TelemetryConfig
|
||||||
|
from mosaicstack_telemetry.queue import EventQueue
|
||||||
|
|
||||||
|
logger = logging.getLogger("mosaicstack_telemetry")
|
||||||
|
|
||||||
|
|
||||||
|
class AsyncSubmitter:
|
||||||
|
"""Periodic event submitter using asyncio.Task and httpx.AsyncClient."""
|
||||||
|
|
||||||
|
def __init__(self, config: TelemetryConfig, queue: EventQueue) -> None:
|
||||||
|
self._config = config
|
||||||
|
self._queue = queue
|
||||||
|
self._client: httpx.AsyncClient | None = None
|
||||||
|
self._task: asyncio.Task[None] | None = None
|
||||||
|
self._running = False
|
||||||
|
|
||||||
|
@property
|
||||||
|
def is_running(self) -> bool:
|
||||||
|
"""Whether the submitter is currently running."""
|
||||||
|
return self._running
|
||||||
|
|
||||||
|
async def start(self) -> None:
|
||||||
|
"""Start the periodic flush loop."""
|
||||||
|
if self._running:
|
||||||
|
return
|
||||||
|
self._client = httpx.AsyncClient()
|
||||||
|
self._running = True
|
||||||
|
self._task = asyncio.create_task(self._loop())
|
||||||
|
logger.info(
|
||||||
|
"Async submitter started (interval=%.1fs)",
|
||||||
|
self._config.submit_interval_seconds,
|
||||||
|
)
|
||||||
|
|
||||||
|
async def stop(self) -> None:
|
||||||
|
"""Stop the periodic flush loop and perform a final flush."""
|
||||||
|
if not self._running:
|
||||||
|
return
|
||||||
|
self._running = False
|
||||||
|
if self._task is not None:
|
||||||
|
self._task.cancel()
|
||||||
|
try:
|
||||||
|
await self._task
|
||||||
|
except asyncio.CancelledError:
|
||||||
|
pass
|
||||||
|
self._task = None
|
||||||
|
|
||||||
|
# Final flush
|
||||||
|
await self.flush()
|
||||||
|
|
||||||
|
if self._client is not None:
|
||||||
|
await self._client.aclose()
|
||||||
|
self._client = None
|
||||||
|
logger.info("Async submitter stopped")
|
||||||
|
|
||||||
|
async def flush(self) -> None:
|
||||||
|
"""Flush all queued events immediately."""
|
||||||
|
while not self._queue.is_empty:
|
||||||
|
events = self._queue.drain(self._config.batch_size)
|
||||||
|
if not events:
|
||||||
|
break
|
||||||
|
|
||||||
|
client = self._client
|
||||||
|
if client is None:
|
||||||
|
client = httpx.AsyncClient()
|
||||||
|
try:
|
||||||
|
result = await submit_batch_async(client, self._config, events)
|
||||||
|
finally:
|
||||||
|
await client.aclose()
|
||||||
|
else:
|
||||||
|
result = await submit_batch_async(client, self._config, events)
|
||||||
|
|
||||||
|
if result is None:
|
||||||
|
logger.warning("Batch submission failed, re-queuing %d events", len(events))
|
||||||
|
self._queue.put_back(events)
|
||||||
|
break
|
||||||
|
|
||||||
|
if result.rejected > 0:
|
||||||
|
logger.warning(
|
||||||
|
"Batch partially rejected: %d accepted, %d rejected",
|
||||||
|
result.accepted,
|
||||||
|
result.rejected,
|
||||||
|
)
|
||||||
|
else:
|
||||||
|
logger.debug("Batch submitted: %d events accepted", result.accepted)
|
||||||
|
|
||||||
|
async def _loop(self) -> None:
|
||||||
|
"""Periodic flush loop."""
|
||||||
|
while self._running:
|
||||||
|
try:
|
||||||
|
await asyncio.sleep(self._config.submit_interval_seconds)
|
||||||
|
if self._running:
|
||||||
|
await self.flush()
|
||||||
|
except asyncio.CancelledError:
|
||||||
|
break
|
||||||
|
except Exception:
|
||||||
|
logger.exception("Error during periodic async flush")
|
||||||
118
src/mosaicstack_telemetry/_sync.py
Normal file
118
src/mosaicstack_telemetry/_sync.py
Normal file
@@ -0,0 +1,118 @@
|
|||||||
|
"""Synchronous submitter using threading.Timer for periodic flushing."""
|
||||||
|
|
||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
import logging
|
||||||
|
import threading
|
||||||
|
from typing import TYPE_CHECKING
|
||||||
|
|
||||||
|
import httpx
|
||||||
|
|
||||||
|
from mosaicstack_telemetry.submitter import submit_batch_sync
|
||||||
|
|
||||||
|
if TYPE_CHECKING:
|
||||||
|
from mosaicstack_telemetry.config import TelemetryConfig
|
||||||
|
from mosaicstack_telemetry.queue import EventQueue
|
||||||
|
|
||||||
|
logger = logging.getLogger("mosaicstack_telemetry")
|
||||||
|
|
||||||
|
|
||||||
|
class SyncSubmitter:
|
||||||
|
"""Periodic event submitter using threading.Timer and httpx.Client."""
|
||||||
|
|
||||||
|
def __init__(self, config: TelemetryConfig, queue: EventQueue) -> None:
|
||||||
|
self._config = config
|
||||||
|
self._queue = queue
|
||||||
|
self._client: httpx.Client | None = None
|
||||||
|
self._timer: threading.Timer | None = None
|
||||||
|
self._running = False
|
||||||
|
self._lock = threading.Lock()
|
||||||
|
|
||||||
|
@property
|
||||||
|
def is_running(self) -> bool:
|
||||||
|
"""Whether the submitter is currently running."""
|
||||||
|
return self._running
|
||||||
|
|
||||||
|
def start(self) -> None:
|
||||||
|
"""Start the periodic flush loop."""
|
||||||
|
with self._lock:
|
||||||
|
if self._running:
|
||||||
|
return
|
||||||
|
self._client = httpx.Client()
|
||||||
|
self._running = True
|
||||||
|
self._schedule_next()
|
||||||
|
logger.info("Sync submitter started (interval=%.1fs)", self._config.submit_interval_seconds)
|
||||||
|
|
||||||
|
def stop(self) -> None:
|
||||||
|
"""Stop the periodic flush loop and perform a final flush."""
|
||||||
|
with self._lock:
|
||||||
|
if not self._running:
|
||||||
|
return
|
||||||
|
self._running = False
|
||||||
|
if self._timer is not None:
|
||||||
|
self._timer.cancel()
|
||||||
|
self._timer = None
|
||||||
|
|
||||||
|
# Final flush outside the lock to avoid deadlock
|
||||||
|
self.flush()
|
||||||
|
|
||||||
|
with self._lock:
|
||||||
|
if self._client is not None:
|
||||||
|
self._client.close()
|
||||||
|
self._client = None
|
||||||
|
logger.info("Sync submitter stopped")
|
||||||
|
|
||||||
|
def flush(self) -> None:
|
||||||
|
"""Flush all queued events immediately."""
|
||||||
|
while not self._queue.is_empty:
|
||||||
|
events = self._queue.drain(self._config.batch_size)
|
||||||
|
if not events:
|
||||||
|
break
|
||||||
|
|
||||||
|
with self._lock:
|
||||||
|
client = self._client
|
||||||
|
if client is None:
|
||||||
|
# Create a temporary client for the flush
|
||||||
|
client = httpx.Client()
|
||||||
|
try:
|
||||||
|
result = submit_batch_sync(client, self._config, events)
|
||||||
|
finally:
|
||||||
|
client.close()
|
||||||
|
else:
|
||||||
|
result = submit_batch_sync(client, self._config, events)
|
||||||
|
|
||||||
|
if result is None:
|
||||||
|
# Submission failed, re-queue events
|
||||||
|
logger.warning("Batch submission failed, re-queuing %d events", len(events))
|
||||||
|
self._queue.put_back(events)
|
||||||
|
break
|
||||||
|
|
||||||
|
if result.rejected > 0:
|
||||||
|
logger.warning(
|
||||||
|
"Batch partially rejected: %d accepted, %d rejected",
|
||||||
|
result.accepted,
|
||||||
|
result.rejected,
|
||||||
|
)
|
||||||
|
else:
|
||||||
|
logger.debug("Batch submitted: %d events accepted", result.accepted)
|
||||||
|
|
||||||
|
def _schedule_next(self) -> None:
|
||||||
|
"""Schedule the next flush iteration."""
|
||||||
|
if not self._running:
|
||||||
|
return
|
||||||
|
self._timer = threading.Timer(self._config.submit_interval_seconds, self._tick)
|
||||||
|
self._timer.daemon = True
|
||||||
|
self._timer.start()
|
||||||
|
|
||||||
|
def _tick(self) -> None:
|
||||||
|
"""Timer callback: flush and reschedule."""
|
||||||
|
if not self._running:
|
||||||
|
return
|
||||||
|
try:
|
||||||
|
self.flush()
|
||||||
|
except Exception:
|
||||||
|
logger.exception("Error during periodic flush")
|
||||||
|
finally:
|
||||||
|
with self._lock:
|
||||||
|
if self._running:
|
||||||
|
self._schedule_next()
|
||||||
196
src/mosaicstack_telemetry/client.py
Normal file
196
src/mosaicstack_telemetry/client.py
Normal file
@@ -0,0 +1,196 @@
|
|||||||
|
"""Main TelemetryClient — the public entry point for the SDK."""
|
||||||
|
|
||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
import logging
|
||||||
|
from typing import Any
|
||||||
|
|
||||||
|
import httpx
|
||||||
|
|
||||||
|
from mosaicstack_telemetry._async import AsyncSubmitter
|
||||||
|
from mosaicstack_telemetry._sync import SyncSubmitter
|
||||||
|
from mosaicstack_telemetry.config import TelemetryConfig
|
||||||
|
from mosaicstack_telemetry.prediction_cache import PredictionCache
|
||||||
|
from mosaicstack_telemetry.queue import EventQueue
|
||||||
|
from mosaicstack_telemetry.types.events import TaskCompletionEvent
|
||||||
|
from mosaicstack_telemetry.types.predictions import (
|
||||||
|
PredictionQuery,
|
||||||
|
PredictionResponse,
|
||||||
|
)
|
||||||
|
|
||||||
|
logger = logging.getLogger("mosaicstack_telemetry")
|
||||||
|
|
||||||
|
|
||||||
|
class TelemetryClient:
|
||||||
|
"""Main client for Mosaic Stack Telemetry.
|
||||||
|
|
||||||
|
Supports both sync and async usage patterns:
|
||||||
|
|
||||||
|
**Sync (threading-based):**
|
||||||
|
|
||||||
|
client = TelemetryClient(config)
|
||||||
|
client.start()
|
||||||
|
client.track(event)
|
||||||
|
client.stop()
|
||||||
|
|
||||||
|
**Async (asyncio-based):**
|
||||||
|
|
||||||
|
client = TelemetryClient(config)
|
||||||
|
await client.start_async()
|
||||||
|
client.track(event)
|
||||||
|
await client.stop_async()
|
||||||
|
|
||||||
|
**Context managers:**
|
||||||
|
|
||||||
|
with TelemetryClient(config) as client:
|
||||||
|
client.track(event)
|
||||||
|
|
||||||
|
async with TelemetryClient(config) as client:
|
||||||
|
client.track(event)
|
||||||
|
"""
|
||||||
|
|
||||||
|
def __init__(self, config: TelemetryConfig) -> None:
|
||||||
|
errors = config.validate()
|
||||||
|
if errors and config.enabled:
|
||||||
|
logger.warning("Telemetry config validation errors: %s", "; ".join(errors))
|
||||||
|
|
||||||
|
self._config = config
|
||||||
|
self._queue = EventQueue(max_size=config.max_queue_size)
|
||||||
|
self._prediction_cache = PredictionCache(ttl_seconds=config.prediction_cache_ttl_seconds)
|
||||||
|
self._sync_submitter: SyncSubmitter | None = None
|
||||||
|
self._async_submitter: AsyncSubmitter | None = None
|
||||||
|
|
||||||
|
def start(self) -> None:
|
||||||
|
"""Start background submission using threading.Timer loop."""
|
||||||
|
if not self._config.enabled:
|
||||||
|
logger.info("Telemetry disabled, skipping start")
|
||||||
|
return
|
||||||
|
self._sync_submitter = SyncSubmitter(self._config, self._queue)
|
||||||
|
self._sync_submitter.start()
|
||||||
|
|
||||||
|
async def start_async(self) -> None:
|
||||||
|
"""Start with asyncio.Task for async contexts."""
|
||||||
|
if not self._config.enabled:
|
||||||
|
logger.info("Telemetry disabled, skipping async start")
|
||||||
|
return
|
||||||
|
self._async_submitter = AsyncSubmitter(self._config, self._queue)
|
||||||
|
await self._async_submitter.start()
|
||||||
|
|
||||||
|
def stop(self) -> None:
|
||||||
|
"""Stop background submission, flush remaining events synchronously."""
|
||||||
|
if self._sync_submitter is not None:
|
||||||
|
self._sync_submitter.stop()
|
||||||
|
self._sync_submitter = None
|
||||||
|
|
||||||
|
async def stop_async(self) -> None:
|
||||||
|
"""Async stop and flush."""
|
||||||
|
if self._async_submitter is not None:
|
||||||
|
await self._async_submitter.stop()
|
||||||
|
self._async_submitter = None
|
||||||
|
|
||||||
|
def track(self, event: TaskCompletionEvent) -> None:
|
||||||
|
"""Queue an event for submission. Always synchronous. Never blocks or throws.
|
||||||
|
|
||||||
|
If telemetry is disabled, the event is silently dropped.
|
||||||
|
"""
|
||||||
|
try:
|
||||||
|
if not self._config.enabled:
|
||||||
|
return
|
||||||
|
self._queue.put(event)
|
||||||
|
logger.debug("Event queued: %s", event.event_id)
|
||||||
|
except Exception:
|
||||||
|
logger.exception("Unexpected error in track()")
|
||||||
|
|
||||||
|
def get_prediction(self, query: PredictionQuery) -> PredictionResponse | None:
|
||||||
|
"""Get a cached prediction. Returns None if not cached or expired."""
|
||||||
|
return self._prediction_cache.get(query)
|
||||||
|
|
||||||
|
def refresh_predictions_sync(self, queries: list[PredictionQuery]) -> None:
|
||||||
|
"""Fetch fresh predictions from server synchronously."""
|
||||||
|
if not queries:
|
||||||
|
return
|
||||||
|
url = f"{self._config.server_url}/v1/predictions/batch"
|
||||||
|
body = {"queries": [q.model_dump(mode="json") for q in queries]}
|
||||||
|
|
||||||
|
try:
|
||||||
|
with httpx.Client() as client:
|
||||||
|
response = client.post(
|
||||||
|
url,
|
||||||
|
json=body,
|
||||||
|
headers={"User-Agent": self._config.user_agent},
|
||||||
|
timeout=self._config.request_timeout_seconds,
|
||||||
|
)
|
||||||
|
if response.status_code == 200:
|
||||||
|
data = response.json()
|
||||||
|
results = data.get("results", [])
|
||||||
|
for query, result_data in zip(queries, results):
|
||||||
|
pred = PredictionResponse.model_validate(result_data)
|
||||||
|
self._prediction_cache.put(query, pred)
|
||||||
|
logger.debug("Refreshed %d predictions", len(results))
|
||||||
|
else:
|
||||||
|
logger.warning(
|
||||||
|
"Prediction refresh failed with status %d",
|
||||||
|
response.status_code,
|
||||||
|
)
|
||||||
|
except Exception:
|
||||||
|
logger.exception("Error refreshing predictions")
|
||||||
|
|
||||||
|
async def refresh_predictions(self, queries: list[PredictionQuery]) -> None:
|
||||||
|
"""Fetch fresh predictions from server asynchronously."""
|
||||||
|
if not queries:
|
||||||
|
return
|
||||||
|
url = f"{self._config.server_url}/v1/predictions/batch"
|
||||||
|
body = {"queries": [q.model_dump(mode="json") for q in queries]}
|
||||||
|
|
||||||
|
try:
|
||||||
|
async with httpx.AsyncClient() as client:
|
||||||
|
response = await client.post(
|
||||||
|
url,
|
||||||
|
json=body,
|
||||||
|
headers={"User-Agent": self._config.user_agent},
|
||||||
|
timeout=self._config.request_timeout_seconds,
|
||||||
|
)
|
||||||
|
if response.status_code == 200:
|
||||||
|
data = response.json()
|
||||||
|
results = data.get("results", [])
|
||||||
|
for query, result_data in zip(queries, results):
|
||||||
|
pred = PredictionResponse.model_validate(result_data)
|
||||||
|
self._prediction_cache.put(query, pred)
|
||||||
|
logger.debug("Refreshed %d predictions", len(results))
|
||||||
|
else:
|
||||||
|
logger.warning(
|
||||||
|
"Prediction refresh failed with status %d",
|
||||||
|
response.status_code,
|
||||||
|
)
|
||||||
|
except Exception:
|
||||||
|
logger.exception("Error refreshing predictions")
|
||||||
|
|
||||||
|
@property
|
||||||
|
def queue_size(self) -> int:
|
||||||
|
"""Number of events currently in the queue."""
|
||||||
|
return self._queue.size
|
||||||
|
|
||||||
|
@property
|
||||||
|
def is_running(self) -> bool:
|
||||||
|
"""Whether background submission is active."""
|
||||||
|
if self._sync_submitter is not None:
|
||||||
|
return self._sync_submitter.is_running
|
||||||
|
if self._async_submitter is not None:
|
||||||
|
return self._async_submitter.is_running
|
||||||
|
return False
|
||||||
|
|
||||||
|
# Sync context manager
|
||||||
|
def __enter__(self) -> TelemetryClient:
|
||||||
|
self.start()
|
||||||
|
return self
|
||||||
|
|
||||||
|
def __exit__(self, *exc: Any) -> None:
|
||||||
|
self.stop()
|
||||||
|
|
||||||
|
# Async context manager
|
||||||
|
async def __aenter__(self) -> TelemetryClient:
|
||||||
|
await self.start_async()
|
||||||
|
return self
|
||||||
|
|
||||||
|
async def __aexit__(self, *exc: Any) -> None:
|
||||||
|
await self.stop_async()
|
||||||
91
src/mosaicstack_telemetry/config.py
Normal file
91
src/mosaicstack_telemetry/config.py
Normal file
@@ -0,0 +1,91 @@
|
|||||||
|
"""Telemetry client configuration."""
|
||||||
|
|
||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
import os
|
||||||
|
import re
|
||||||
|
from dataclasses import dataclass, field
|
||||||
|
|
||||||
|
_HEX_64_RE = re.compile(r"^[0-9a-fA-F]{64}$")
|
||||||
|
_UUID_RE = re.compile(
|
||||||
|
r"^[0-9a-fA-F]{8}-[0-9a-fA-F]{4}-[0-9a-fA-F]{4}-[0-9a-fA-F]{4}-[0-9a-fA-F]{12}$"
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
@dataclass
|
||||||
|
class TelemetryConfig:
|
||||||
|
"""Configuration for the telemetry client.
|
||||||
|
|
||||||
|
Values can be provided directly or loaded from environment variables:
|
||||||
|
- MOSAIC_TELEMETRY_ENABLED -> enabled
|
||||||
|
- MOSAIC_TELEMETRY_SERVER_URL -> server_url
|
||||||
|
- MOSAIC_TELEMETRY_API_KEY -> api_key
|
||||||
|
- MOSAIC_TELEMETRY_INSTANCE_ID -> instance_id
|
||||||
|
"""
|
||||||
|
|
||||||
|
server_url: str = ""
|
||||||
|
api_key: str = ""
|
||||||
|
instance_id: str = ""
|
||||||
|
enabled: bool = True
|
||||||
|
submit_interval_seconds: float = 300.0
|
||||||
|
max_queue_size: int = 1000
|
||||||
|
batch_size: int = 100
|
||||||
|
request_timeout_seconds: float = 10.0
|
||||||
|
prediction_cache_ttl_seconds: float = 21600.0
|
||||||
|
dry_run: bool = False
|
||||||
|
max_retries: int = 3
|
||||||
|
user_agent: str = field(default="mosaicstack-telemetry-python/0.1.0")
|
||||||
|
|
||||||
|
def __post_init__(self) -> None:
|
||||||
|
"""Load environment variable overrides and validate."""
|
||||||
|
env_enabled = os.environ.get("MOSAIC_TELEMETRY_ENABLED")
|
||||||
|
if env_enabled is not None:
|
||||||
|
self.enabled = env_enabled.lower() in ("1", "true", "yes")
|
||||||
|
|
||||||
|
env_url = os.environ.get("MOSAIC_TELEMETRY_SERVER_URL")
|
||||||
|
if env_url and not self.server_url:
|
||||||
|
self.server_url = env_url
|
||||||
|
|
||||||
|
env_key = os.environ.get("MOSAIC_TELEMETRY_API_KEY")
|
||||||
|
if env_key and not self.api_key:
|
||||||
|
self.api_key = env_key
|
||||||
|
|
||||||
|
env_instance = os.environ.get("MOSAIC_TELEMETRY_INSTANCE_ID")
|
||||||
|
if env_instance and not self.instance_id:
|
||||||
|
self.instance_id = env_instance
|
||||||
|
|
||||||
|
# Strip trailing slashes from server_url
|
||||||
|
self.server_url = self.server_url.rstrip("/")
|
||||||
|
|
||||||
|
def validate(self) -> list[str]:
|
||||||
|
"""Validate configuration and return list of errors (empty if valid)."""
|
||||||
|
errors: list[str] = []
|
||||||
|
|
||||||
|
if not self.server_url:
|
||||||
|
errors.append("server_url is required")
|
||||||
|
elif not self.server_url.startswith(("http://", "https://")):
|
||||||
|
errors.append("server_url must start with http:// or https://")
|
||||||
|
|
||||||
|
if not self.api_key:
|
||||||
|
errors.append("api_key is required")
|
||||||
|
elif not _HEX_64_RE.match(self.api_key):
|
||||||
|
errors.append("api_key must be a 64-character hex string")
|
||||||
|
|
||||||
|
if not self.instance_id:
|
||||||
|
errors.append("instance_id is required")
|
||||||
|
elif not _UUID_RE.match(self.instance_id):
|
||||||
|
errors.append("instance_id must be a valid UUID string")
|
||||||
|
|
||||||
|
if self.submit_interval_seconds <= 0:
|
||||||
|
errors.append("submit_interval_seconds must be positive")
|
||||||
|
|
||||||
|
if self.max_queue_size <= 0:
|
||||||
|
errors.append("max_queue_size must be positive")
|
||||||
|
|
||||||
|
if self.batch_size <= 0 or self.batch_size > 100:
|
||||||
|
errors.append("batch_size must be between 1 and 100")
|
||||||
|
|
||||||
|
if self.request_timeout_seconds <= 0:
|
||||||
|
errors.append("request_timeout_seconds must be positive")
|
||||||
|
|
||||||
|
return errors
|
||||||
207
src/mosaicstack_telemetry/event_builder.py
Normal file
207
src/mosaicstack_telemetry/event_builder.py
Normal file
@@ -0,0 +1,207 @@
|
|||||||
|
"""Convenience builder for constructing TaskCompletionEvent instances."""
|
||||||
|
|
||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
from datetime import datetime, timezone
|
||||||
|
from uuid import UUID, uuid4
|
||||||
|
|
||||||
|
from mosaicstack_telemetry.types.events import (
|
||||||
|
Complexity,
|
||||||
|
Harness,
|
||||||
|
Outcome,
|
||||||
|
Provider,
|
||||||
|
QualityGate,
|
||||||
|
RepoSizeCategory,
|
||||||
|
TaskCompletionEvent,
|
||||||
|
TaskType,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
class EventBuilder:
|
||||||
|
"""Fluent builder for TaskCompletionEvent.
|
||||||
|
|
||||||
|
Provides a convenient way to construct events with sensible defaults
|
||||||
|
and a chainable API.
|
||||||
|
|
||||||
|
Example::
|
||||||
|
|
||||||
|
event = (
|
||||||
|
EventBuilder(instance_id="...")
|
||||||
|
.task_type(TaskType.IMPLEMENTATION)
|
||||||
|
.model("claude-sonnet-4-20250514")
|
||||||
|
.provider(Provider.ANTHROPIC)
|
||||||
|
.harness(Harness.CLAUDE_CODE)
|
||||||
|
.complexity(Complexity.MEDIUM)
|
||||||
|
.outcome(Outcome.SUCCESS)
|
||||||
|
.duration_ms(45000)
|
||||||
|
.tokens(estimated_in=1000, estimated_out=500, actual_in=1100, actual_out=480)
|
||||||
|
.cost(estimated=50000, actual=48000)
|
||||||
|
.quality(passed=True, gates_run=[QualityGate.LINT, QualityGate.TEST])
|
||||||
|
.context(compactions=0, rotations=0, utilization=0.3)
|
||||||
|
.build()
|
||||||
|
)
|
||||||
|
"""
|
||||||
|
|
||||||
|
def __init__(self, instance_id: str | UUID) -> None:
|
||||||
|
self._instance_id = UUID(str(instance_id))
|
||||||
|
self._event_id: UUID = uuid4()
|
||||||
|
self._timestamp: datetime = datetime.now(timezone.utc)
|
||||||
|
self._task_duration_ms: int = 0
|
||||||
|
self._task_type: TaskType = TaskType.UNKNOWN
|
||||||
|
self._complexity: Complexity = Complexity.MEDIUM
|
||||||
|
self._harness: Harness = Harness.UNKNOWN
|
||||||
|
self._model: str = "unknown"
|
||||||
|
self._provider: Provider = Provider.UNKNOWN
|
||||||
|
self._estimated_input_tokens: int = 0
|
||||||
|
self._estimated_output_tokens: int = 0
|
||||||
|
self._actual_input_tokens: int = 0
|
||||||
|
self._actual_output_tokens: int = 0
|
||||||
|
self._estimated_cost_usd_micros: int = 0
|
||||||
|
self._actual_cost_usd_micros: int = 0
|
||||||
|
self._quality_gate_passed: bool = False
|
||||||
|
self._quality_gates_run: list[QualityGate] = []
|
||||||
|
self._quality_gates_failed: list[QualityGate] = []
|
||||||
|
self._context_compactions: int = 0
|
||||||
|
self._context_rotations: int = 0
|
||||||
|
self._context_utilization_final: float = 0.0
|
||||||
|
self._outcome: Outcome = Outcome.FAILURE
|
||||||
|
self._retry_count: int = 0
|
||||||
|
self._language: str | None = None
|
||||||
|
self._repo_size_category: RepoSizeCategory | None = None
|
||||||
|
|
||||||
|
def event_id(self, value: str | UUID) -> EventBuilder:
|
||||||
|
"""Set a specific event ID (default: auto-generated UUID)."""
|
||||||
|
self._event_id = UUID(str(value))
|
||||||
|
return self
|
||||||
|
|
||||||
|
def timestamp(self, value: datetime) -> EventBuilder:
|
||||||
|
"""Set the event timestamp (default: now UTC)."""
|
||||||
|
self._timestamp = value
|
||||||
|
return self
|
||||||
|
|
||||||
|
def task_type(self, value: TaskType) -> EventBuilder:
|
||||||
|
"""Set the task type."""
|
||||||
|
self._task_type = value
|
||||||
|
return self
|
||||||
|
|
||||||
|
def complexity_level(self, value: Complexity) -> EventBuilder:
|
||||||
|
"""Set the complexity level."""
|
||||||
|
self._complexity = value
|
||||||
|
return self
|
||||||
|
|
||||||
|
def harness_type(self, value: Harness) -> EventBuilder:
|
||||||
|
"""Set the harness type."""
|
||||||
|
self._harness = value
|
||||||
|
return self
|
||||||
|
|
||||||
|
def model(self, value: str) -> EventBuilder:
|
||||||
|
"""Set the model name."""
|
||||||
|
self._model = value
|
||||||
|
return self
|
||||||
|
|
||||||
|
def provider(self, value: Provider) -> EventBuilder:
|
||||||
|
"""Set the provider."""
|
||||||
|
self._provider = value
|
||||||
|
return self
|
||||||
|
|
||||||
|
def duration_ms(self, value: int) -> EventBuilder:
|
||||||
|
"""Set the task duration in milliseconds."""
|
||||||
|
self._task_duration_ms = value
|
||||||
|
return self
|
||||||
|
|
||||||
|
def tokens(
|
||||||
|
self,
|
||||||
|
*,
|
||||||
|
estimated_in: int = 0,
|
||||||
|
estimated_out: int = 0,
|
||||||
|
actual_in: int = 0,
|
||||||
|
actual_out: int = 0,
|
||||||
|
) -> EventBuilder:
|
||||||
|
"""Set token counts."""
|
||||||
|
self._estimated_input_tokens = estimated_in
|
||||||
|
self._estimated_output_tokens = estimated_out
|
||||||
|
self._actual_input_tokens = actual_in
|
||||||
|
self._actual_output_tokens = actual_out
|
||||||
|
return self
|
||||||
|
|
||||||
|
def cost(self, *, estimated: int = 0, actual: int = 0) -> EventBuilder:
|
||||||
|
"""Set cost in USD micros."""
|
||||||
|
self._estimated_cost_usd_micros = estimated
|
||||||
|
self._actual_cost_usd_micros = actual
|
||||||
|
return self
|
||||||
|
|
||||||
|
def quality(
|
||||||
|
self,
|
||||||
|
*,
|
||||||
|
passed: bool,
|
||||||
|
gates_run: list[QualityGate] | None = None,
|
||||||
|
gates_failed: list[QualityGate] | None = None,
|
||||||
|
) -> EventBuilder:
|
||||||
|
"""Set quality gate results."""
|
||||||
|
self._quality_gate_passed = passed
|
||||||
|
self._quality_gates_run = gates_run or []
|
||||||
|
self._quality_gates_failed = gates_failed or []
|
||||||
|
return self
|
||||||
|
|
||||||
|
def context(
|
||||||
|
self,
|
||||||
|
*,
|
||||||
|
compactions: int = 0,
|
||||||
|
rotations: int = 0,
|
||||||
|
utilization: float = 0.0,
|
||||||
|
) -> EventBuilder:
|
||||||
|
"""Set context window metrics."""
|
||||||
|
self._context_compactions = compactions
|
||||||
|
self._context_rotations = rotations
|
||||||
|
self._context_utilization_final = utilization
|
||||||
|
return self
|
||||||
|
|
||||||
|
def outcome_value(self, value: Outcome) -> EventBuilder:
|
||||||
|
"""Set the task outcome."""
|
||||||
|
self._outcome = value
|
||||||
|
return self
|
||||||
|
|
||||||
|
def retry_count(self, value: int) -> EventBuilder:
|
||||||
|
"""Set the retry count."""
|
||||||
|
self._retry_count = value
|
||||||
|
return self
|
||||||
|
|
||||||
|
def language(self, value: str | None) -> EventBuilder:
|
||||||
|
"""Set the programming language."""
|
||||||
|
self._language = value
|
||||||
|
return self
|
||||||
|
|
||||||
|
def repo_size(self, value: RepoSizeCategory | None) -> EventBuilder:
|
||||||
|
"""Set the repository size category."""
|
||||||
|
self._repo_size_category = value
|
||||||
|
return self
|
||||||
|
|
||||||
|
def build(self) -> TaskCompletionEvent:
|
||||||
|
"""Build and return the TaskCompletionEvent."""
|
||||||
|
return TaskCompletionEvent(
|
||||||
|
instance_id=self._instance_id,
|
||||||
|
event_id=self._event_id,
|
||||||
|
timestamp=self._timestamp,
|
||||||
|
task_duration_ms=self._task_duration_ms,
|
||||||
|
task_type=self._task_type,
|
||||||
|
complexity=self._complexity,
|
||||||
|
harness=self._harness,
|
||||||
|
model=self._model,
|
||||||
|
provider=self._provider,
|
||||||
|
estimated_input_tokens=self._estimated_input_tokens,
|
||||||
|
estimated_output_tokens=self._estimated_output_tokens,
|
||||||
|
actual_input_tokens=self._actual_input_tokens,
|
||||||
|
actual_output_tokens=self._actual_output_tokens,
|
||||||
|
estimated_cost_usd_micros=self._estimated_cost_usd_micros,
|
||||||
|
actual_cost_usd_micros=self._actual_cost_usd_micros,
|
||||||
|
quality_gate_passed=self._quality_gate_passed,
|
||||||
|
quality_gates_run=self._quality_gates_run,
|
||||||
|
quality_gates_failed=self._quality_gates_failed,
|
||||||
|
context_compactions=self._context_compactions,
|
||||||
|
context_rotations=self._context_rotations,
|
||||||
|
context_utilization_final=self._context_utilization_final,
|
||||||
|
outcome=self._outcome,
|
||||||
|
retry_count=self._retry_count,
|
||||||
|
language=self._language,
|
||||||
|
repo_size_category=self._repo_size_category,
|
||||||
|
)
|
||||||
56
src/mosaicstack_telemetry/prediction_cache.py
Normal file
56
src/mosaicstack_telemetry/prediction_cache.py
Normal file
@@ -0,0 +1,56 @@
|
|||||||
|
"""Thread-safe prediction cache with TTL."""
|
||||||
|
|
||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
import logging
|
||||||
|
import threading
|
||||||
|
import time
|
||||||
|
|
||||||
|
from mosaicstack_telemetry.types.predictions import PredictionQuery, PredictionResponse
|
||||||
|
|
||||||
|
logger = logging.getLogger("mosaicstack_telemetry")
|
||||||
|
|
||||||
|
|
||||||
|
def _cache_key(query: PredictionQuery) -> str:
|
||||||
|
"""Generate a deterministic cache key from a prediction query."""
|
||||||
|
return f"{query.task_type.value}:{query.model}:{query.provider.value}:{query.complexity.value}"
|
||||||
|
|
||||||
|
|
||||||
|
class PredictionCache:
|
||||||
|
"""Thread-safe dict-based cache with TTL for prediction responses."""
|
||||||
|
|
||||||
|
def __init__(self, ttl_seconds: float = 21600.0) -> None:
|
||||||
|
self._ttl = ttl_seconds
|
||||||
|
self._store: dict[str, tuple[PredictionResponse, float]] = {}
|
||||||
|
self._lock = threading.Lock()
|
||||||
|
|
||||||
|
def get(self, query: PredictionQuery) -> PredictionResponse | None:
|
||||||
|
"""Get a cached prediction, or None if not found or expired."""
|
||||||
|
key = _cache_key(query)
|
||||||
|
with self._lock:
|
||||||
|
entry = self._store.get(key)
|
||||||
|
if entry is None:
|
||||||
|
return None
|
||||||
|
response, expires_at = entry
|
||||||
|
if time.monotonic() > expires_at:
|
||||||
|
del self._store[key]
|
||||||
|
return None
|
||||||
|
return response
|
||||||
|
|
||||||
|
def put(self, query: PredictionQuery, response: PredictionResponse) -> None:
|
||||||
|
"""Store a prediction response with TTL."""
|
||||||
|
key = _cache_key(query)
|
||||||
|
expires_at = time.monotonic() + self._ttl
|
||||||
|
with self._lock:
|
||||||
|
self._store[key] = (response, expires_at)
|
||||||
|
|
||||||
|
def clear(self) -> None:
|
||||||
|
"""Invalidate all cached predictions."""
|
||||||
|
with self._lock:
|
||||||
|
self._store.clear()
|
||||||
|
|
||||||
|
@property
|
||||||
|
def size(self) -> int:
|
||||||
|
"""Number of entries in the cache (including possibly expired)."""
|
||||||
|
with self._lock:
|
||||||
|
return len(self._store)
|
||||||
70
src/mosaicstack_telemetry/queue.py
Normal file
70
src/mosaicstack_telemetry/queue.py
Normal file
@@ -0,0 +1,70 @@
|
|||||||
|
"""Thread-safe bounded event queue."""
|
||||||
|
|
||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
import logging
|
||||||
|
import threading
|
||||||
|
from collections import deque
|
||||||
|
|
||||||
|
from mosaicstack_telemetry.types.events import TaskCompletionEvent
|
||||||
|
|
||||||
|
logger = logging.getLogger("mosaicstack_telemetry")
|
||||||
|
|
||||||
|
|
||||||
|
class EventQueue:
|
||||||
|
"""Thread-safe bounded FIFO queue for telemetry events.
|
||||||
|
|
||||||
|
When the queue is full, the oldest events are evicted (FIFO eviction)
|
||||||
|
to make room for new ones.
|
||||||
|
"""
|
||||||
|
|
||||||
|
def __init__(self, max_size: int = 1000) -> None:
|
||||||
|
self._max_size = max_size
|
||||||
|
self._deque: deque[TaskCompletionEvent] = deque(maxlen=max_size)
|
||||||
|
self._lock = threading.Lock()
|
||||||
|
|
||||||
|
def put(self, event: TaskCompletionEvent) -> None:
|
||||||
|
"""Add an event to the queue. Never blocks.
|
||||||
|
|
||||||
|
If the queue is full, the oldest event is silently evicted.
|
||||||
|
"""
|
||||||
|
with self._lock:
|
||||||
|
if len(self._deque) >= self._max_size:
|
||||||
|
logger.warning(
|
||||||
|
"Event queue full (%d items), evicting oldest event",
|
||||||
|
self._max_size,
|
||||||
|
)
|
||||||
|
self._deque.append(event)
|
||||||
|
|
||||||
|
def drain(self, max_items: int) -> list[TaskCompletionEvent]:
|
||||||
|
"""Remove and return up to max_items events from the front of the queue."""
|
||||||
|
with self._lock:
|
||||||
|
count = min(max_items, len(self._deque))
|
||||||
|
items: list[TaskCompletionEvent] = []
|
||||||
|
for _ in range(count):
|
||||||
|
items.append(self._deque.popleft())
|
||||||
|
return items
|
||||||
|
|
||||||
|
def put_back(self, events: list[TaskCompletionEvent]) -> None:
|
||||||
|
"""Put events back at the front of the queue (for retry scenarios).
|
||||||
|
|
||||||
|
Events are added to the left (front) so they get drained first next time.
|
||||||
|
If adding them would exceed max_size, only as many as fit are re-added.
|
||||||
|
"""
|
||||||
|
with self._lock:
|
||||||
|
available = self._max_size - len(self._deque)
|
||||||
|
to_add = events[:available]
|
||||||
|
for event in reversed(to_add):
|
||||||
|
self._deque.appendleft(event)
|
||||||
|
|
||||||
|
@property
|
||||||
|
def size(self) -> int:
|
||||||
|
"""Current number of events in the queue."""
|
||||||
|
with self._lock:
|
||||||
|
return len(self._deque)
|
||||||
|
|
||||||
|
@property
|
||||||
|
def is_empty(self) -> bool:
|
||||||
|
"""Whether the queue is empty."""
|
||||||
|
with self._lock:
|
||||||
|
return len(self._deque) == 0
|
||||||
205
src/mosaicstack_telemetry/submitter.py
Normal file
205
src/mosaicstack_telemetry/submitter.py
Normal file
@@ -0,0 +1,205 @@
|
|||||||
|
"""Batch submission logic with retry and backoff."""
|
||||||
|
|
||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
import logging
|
||||||
|
import random
|
||||||
|
import time
|
||||||
|
from typing import TYPE_CHECKING
|
||||||
|
|
||||||
|
import httpx
|
||||||
|
|
||||||
|
from mosaicstack_telemetry.types.common import BatchEventRequest, BatchEventResponse
|
||||||
|
from mosaicstack_telemetry.types.events import TaskCompletionEvent
|
||||||
|
|
||||||
|
if TYPE_CHECKING:
|
||||||
|
from mosaicstack_telemetry.config import TelemetryConfig
|
||||||
|
|
||||||
|
logger = logging.getLogger("mosaicstack_telemetry")
|
||||||
|
|
||||||
|
|
||||||
|
def _backoff_delay(attempt: int, base: float = 1.0, maximum: float = 60.0) -> float:
|
||||||
|
"""Calculate exponential backoff with jitter."""
|
||||||
|
delay = min(base * (2**attempt), maximum)
|
||||||
|
jitter = random.uniform(0, delay * 0.5) # noqa: S311
|
||||||
|
return delay + jitter
|
||||||
|
|
||||||
|
|
||||||
|
def submit_batch_sync(
|
||||||
|
client: httpx.Client,
|
||||||
|
config: TelemetryConfig,
|
||||||
|
events: list[TaskCompletionEvent],
|
||||||
|
) -> BatchEventResponse | None:
|
||||||
|
"""Submit a batch of events synchronously with retry logic.
|
||||||
|
|
||||||
|
Returns the BatchEventResponse on success, or None if all retries failed.
|
||||||
|
"""
|
||||||
|
url = f"{config.server_url}/v1/events/batch"
|
||||||
|
request_body = BatchEventRequest(events=events)
|
||||||
|
|
||||||
|
for attempt in range(config.max_retries + 1):
|
||||||
|
try:
|
||||||
|
if config.dry_run:
|
||||||
|
logger.info(
|
||||||
|
"[DRY RUN] Would submit batch of %d events to %s",
|
||||||
|
len(events),
|
||||||
|
url,
|
||||||
|
)
|
||||||
|
return BatchEventResponse(
|
||||||
|
accepted=len(events),
|
||||||
|
rejected=0,
|
||||||
|
results=[],
|
||||||
|
)
|
||||||
|
|
||||||
|
response = client.post(
|
||||||
|
url,
|
||||||
|
json=request_body.model_dump(mode="json"),
|
||||||
|
headers={
|
||||||
|
"Authorization": f"Bearer {config.api_key}",
|
||||||
|
"Content-Type": "application/json",
|
||||||
|
"User-Agent": config.user_agent,
|
||||||
|
},
|
||||||
|
timeout=config.request_timeout_seconds,
|
||||||
|
)
|
||||||
|
|
||||||
|
if response.status_code == 202:
|
||||||
|
return BatchEventResponse.model_validate(response.json())
|
||||||
|
|
||||||
|
if response.status_code == 429:
|
||||||
|
retry_after = response.headers.get("Retry-After")
|
||||||
|
delay = float(retry_after) if retry_after else _backoff_delay(attempt)
|
||||||
|
logger.warning(
|
||||||
|
"Rate limited (429), retrying after %.1f seconds (attempt %d/%d)",
|
||||||
|
delay,
|
||||||
|
attempt + 1,
|
||||||
|
config.max_retries + 1,
|
||||||
|
)
|
||||||
|
time.sleep(delay)
|
||||||
|
continue
|
||||||
|
|
||||||
|
if response.status_code == 403:
|
||||||
|
logger.error(
|
||||||
|
"Authentication failed (403): API key may not match instance_id"
|
||||||
|
)
|
||||||
|
return None
|
||||||
|
|
||||||
|
logger.warning(
|
||||||
|
"Unexpected status %d from server (attempt %d/%d): %s",
|
||||||
|
response.status_code,
|
||||||
|
attempt + 1,
|
||||||
|
config.max_retries + 1,
|
||||||
|
response.text[:200],
|
||||||
|
)
|
||||||
|
|
||||||
|
except httpx.TimeoutException:
|
||||||
|
logger.warning(
|
||||||
|
"Request timed out (attempt %d/%d)",
|
||||||
|
attempt + 1,
|
||||||
|
config.max_retries + 1,
|
||||||
|
)
|
||||||
|
except httpx.HTTPError as exc:
|
||||||
|
logger.warning(
|
||||||
|
"Network error (attempt %d/%d): %s",
|
||||||
|
attempt + 1,
|
||||||
|
config.max_retries + 1,
|
||||||
|
exc,
|
||||||
|
)
|
||||||
|
|
||||||
|
if attempt < config.max_retries:
|
||||||
|
delay = _backoff_delay(attempt)
|
||||||
|
logger.debug("Backing off for %.1f seconds before retry", delay)
|
||||||
|
time.sleep(delay)
|
||||||
|
|
||||||
|
logger.error("All %d attempts failed for batch of %d events", config.max_retries + 1, len(events))
|
||||||
|
return None
|
||||||
|
|
||||||
|
|
||||||
|
async def submit_batch_async(
|
||||||
|
client: httpx.AsyncClient,
|
||||||
|
config: TelemetryConfig,
|
||||||
|
events: list[TaskCompletionEvent],
|
||||||
|
) -> BatchEventResponse | None:
|
||||||
|
"""Submit a batch of events asynchronously with retry logic.
|
||||||
|
|
||||||
|
Returns the BatchEventResponse on success, or None if all retries failed.
|
||||||
|
"""
|
||||||
|
import asyncio
|
||||||
|
|
||||||
|
url = f"{config.server_url}/v1/events/batch"
|
||||||
|
request_body = BatchEventRequest(events=events)
|
||||||
|
|
||||||
|
for attempt in range(config.max_retries + 1):
|
||||||
|
try:
|
||||||
|
if config.dry_run:
|
||||||
|
logger.info(
|
||||||
|
"[DRY RUN] Would submit batch of %d events to %s",
|
||||||
|
len(events),
|
||||||
|
url,
|
||||||
|
)
|
||||||
|
return BatchEventResponse(
|
||||||
|
accepted=len(events),
|
||||||
|
rejected=0,
|
||||||
|
results=[],
|
||||||
|
)
|
||||||
|
|
||||||
|
response = await client.post(
|
||||||
|
url,
|
||||||
|
json=request_body.model_dump(mode="json"),
|
||||||
|
headers={
|
||||||
|
"Authorization": f"Bearer {config.api_key}",
|
||||||
|
"Content-Type": "application/json",
|
||||||
|
"User-Agent": config.user_agent,
|
||||||
|
},
|
||||||
|
timeout=config.request_timeout_seconds,
|
||||||
|
)
|
||||||
|
|
||||||
|
if response.status_code == 202:
|
||||||
|
return BatchEventResponse.model_validate(response.json())
|
||||||
|
|
||||||
|
if response.status_code == 429:
|
||||||
|
retry_after = response.headers.get("Retry-After")
|
||||||
|
delay = float(retry_after) if retry_after else _backoff_delay(attempt)
|
||||||
|
logger.warning(
|
||||||
|
"Rate limited (429), retrying after %.1f seconds (attempt %d/%d)",
|
||||||
|
delay,
|
||||||
|
attempt + 1,
|
||||||
|
config.max_retries + 1,
|
||||||
|
)
|
||||||
|
await asyncio.sleep(delay)
|
||||||
|
continue
|
||||||
|
|
||||||
|
if response.status_code == 403:
|
||||||
|
logger.error(
|
||||||
|
"Authentication failed (403): API key may not match instance_id"
|
||||||
|
)
|
||||||
|
return None
|
||||||
|
|
||||||
|
logger.warning(
|
||||||
|
"Unexpected status %d from server (attempt %d/%d): %s",
|
||||||
|
response.status_code,
|
||||||
|
attempt + 1,
|
||||||
|
config.max_retries + 1,
|
||||||
|
response.text[:200],
|
||||||
|
)
|
||||||
|
|
||||||
|
except httpx.TimeoutException:
|
||||||
|
logger.warning(
|
||||||
|
"Request timed out (attempt %d/%d)",
|
||||||
|
attempt + 1,
|
||||||
|
config.max_retries + 1,
|
||||||
|
)
|
||||||
|
except httpx.HTTPError as exc:
|
||||||
|
logger.warning(
|
||||||
|
"Network error (attempt %d/%d): %s",
|
||||||
|
attempt + 1,
|
||||||
|
config.max_retries + 1,
|
||||||
|
exc,
|
||||||
|
)
|
||||||
|
|
||||||
|
if attempt < config.max_retries:
|
||||||
|
delay = _backoff_delay(attempt)
|
||||||
|
logger.debug("Backing off for %.1f seconds before retry", delay)
|
||||||
|
await asyncio.sleep(delay)
|
||||||
|
|
||||||
|
logger.error("All %d attempts failed for batch of %d events", config.max_retries + 1, len(events))
|
||||||
|
return None
|
||||||
49
src/mosaicstack_telemetry/types/__init__.py
Normal file
49
src/mosaicstack_telemetry/types/__init__.py
Normal file
@@ -0,0 +1,49 @@
|
|||||||
|
"""Mosaic Stack Telemetry type definitions."""
|
||||||
|
|
||||||
|
from mosaicstack_telemetry.types.common import (
|
||||||
|
BatchEventRequest,
|
||||||
|
BatchEventResponse,
|
||||||
|
BatchEventResult,
|
||||||
|
TelemetryError,
|
||||||
|
)
|
||||||
|
from mosaicstack_telemetry.types.events import (
|
||||||
|
Complexity,
|
||||||
|
Harness,
|
||||||
|
Outcome,
|
||||||
|
Provider,
|
||||||
|
QualityGate,
|
||||||
|
RepoSizeCategory,
|
||||||
|
TaskCompletionEvent,
|
||||||
|
TaskType,
|
||||||
|
)
|
||||||
|
from mosaicstack_telemetry.types.predictions import (
|
||||||
|
CorrectionFactors,
|
||||||
|
PredictionData,
|
||||||
|
PredictionMetadata,
|
||||||
|
PredictionQuery,
|
||||||
|
PredictionResponse,
|
||||||
|
QualityPrediction,
|
||||||
|
TokenDistribution,
|
||||||
|
)
|
||||||
|
|
||||||
|
__all__ = [
|
||||||
|
"BatchEventRequest",
|
||||||
|
"BatchEventResponse",
|
||||||
|
"BatchEventResult",
|
||||||
|
"Complexity",
|
||||||
|
"CorrectionFactors",
|
||||||
|
"Harness",
|
||||||
|
"Outcome",
|
||||||
|
"PredictionData",
|
||||||
|
"PredictionMetadata",
|
||||||
|
"PredictionQuery",
|
||||||
|
"PredictionResponse",
|
||||||
|
"Provider",
|
||||||
|
"QualityGate",
|
||||||
|
"QualityPrediction",
|
||||||
|
"RepoSizeCategory",
|
||||||
|
"TaskCompletionEvent",
|
||||||
|
"TaskType",
|
||||||
|
"TelemetryError",
|
||||||
|
"TokenDistribution",
|
||||||
|
]
|
||||||
35
src/mosaicstack_telemetry/types/common.py
Normal file
35
src/mosaicstack_telemetry/types/common.py
Normal file
@@ -0,0 +1,35 @@
|
|||||||
|
"""Common types shared across the SDK."""
|
||||||
|
|
||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
from uuid import UUID
|
||||||
|
|
||||||
|
from pydantic import BaseModel, Field
|
||||||
|
|
||||||
|
from mosaicstack_telemetry.types.events import TaskCompletionEvent
|
||||||
|
|
||||||
|
|
||||||
|
class TelemetryError(Exception):
|
||||||
|
"""Base exception for telemetry client errors."""
|
||||||
|
|
||||||
|
|
||||||
|
class BatchEventRequest(BaseModel):
|
||||||
|
"""Request body for batch event submission."""
|
||||||
|
|
||||||
|
events: list[TaskCompletionEvent] = Field(min_length=1, max_length=100)
|
||||||
|
|
||||||
|
|
||||||
|
class BatchEventResult(BaseModel):
|
||||||
|
"""Result for a single event in a batch submission."""
|
||||||
|
|
||||||
|
event_id: UUID
|
||||||
|
status: str # "accepted" or "rejected"
|
||||||
|
error: str | None = None
|
||||||
|
|
||||||
|
|
||||||
|
class BatchEventResponse(BaseModel):
|
||||||
|
"""Response from the batch event submission endpoint."""
|
||||||
|
|
||||||
|
accepted: int
|
||||||
|
rejected: int
|
||||||
|
results: list[BatchEventResult]
|
||||||
122
src/mosaicstack_telemetry/types/events.py
Normal file
122
src/mosaicstack_telemetry/types/events.py
Normal file
@@ -0,0 +1,122 @@
|
|||||||
|
"""Task completion event types and enums."""
|
||||||
|
|
||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
from datetime import datetime, timezone
|
||||||
|
from enum import Enum
|
||||||
|
from uuid import UUID, uuid4
|
||||||
|
|
||||||
|
from pydantic import BaseModel, Field
|
||||||
|
|
||||||
|
|
||||||
|
class TaskType(str, Enum):
|
||||||
|
"""Type of task being performed."""
|
||||||
|
|
||||||
|
PLANNING = "planning"
|
||||||
|
IMPLEMENTATION = "implementation"
|
||||||
|
CODE_REVIEW = "code_review"
|
||||||
|
TESTING = "testing"
|
||||||
|
DEBUGGING = "debugging"
|
||||||
|
REFACTORING = "refactoring"
|
||||||
|
DOCUMENTATION = "documentation"
|
||||||
|
CONFIGURATION = "configuration"
|
||||||
|
SECURITY_AUDIT = "security_audit"
|
||||||
|
UNKNOWN = "unknown"
|
||||||
|
|
||||||
|
|
||||||
|
class Complexity(str, Enum):
|
||||||
|
"""Task complexity level."""
|
||||||
|
|
||||||
|
LOW = "low"
|
||||||
|
MEDIUM = "medium"
|
||||||
|
HIGH = "high"
|
||||||
|
CRITICAL = "critical"
|
||||||
|
|
||||||
|
|
||||||
|
class Harness(str, Enum):
|
||||||
|
"""AI coding harness used."""
|
||||||
|
|
||||||
|
CLAUDE_CODE = "claude_code"
|
||||||
|
OPENCODE = "opencode"
|
||||||
|
KILO_CODE = "kilo_code"
|
||||||
|
AIDER = "aider"
|
||||||
|
API_DIRECT = "api_direct"
|
||||||
|
OLLAMA_LOCAL = "ollama_local"
|
||||||
|
CUSTOM = "custom"
|
||||||
|
UNKNOWN = "unknown"
|
||||||
|
|
||||||
|
|
||||||
|
class Provider(str, Enum):
|
||||||
|
"""AI model provider."""
|
||||||
|
|
||||||
|
ANTHROPIC = "anthropic"
|
||||||
|
OPENAI = "openai"
|
||||||
|
OPENROUTER = "openrouter"
|
||||||
|
OLLAMA = "ollama"
|
||||||
|
GOOGLE = "google"
|
||||||
|
MISTRAL = "mistral"
|
||||||
|
CUSTOM = "custom"
|
||||||
|
UNKNOWN = "unknown"
|
||||||
|
|
||||||
|
|
||||||
|
class QualityGate(str, Enum):
|
||||||
|
"""Quality gate type."""
|
||||||
|
|
||||||
|
BUILD = "build"
|
||||||
|
LINT = "lint"
|
||||||
|
TEST = "test"
|
||||||
|
COVERAGE = "coverage"
|
||||||
|
TYPECHECK = "typecheck"
|
||||||
|
SECURITY = "security"
|
||||||
|
|
||||||
|
|
||||||
|
class Outcome(str, Enum):
|
||||||
|
"""Task outcome."""
|
||||||
|
|
||||||
|
SUCCESS = "success"
|
||||||
|
FAILURE = "failure"
|
||||||
|
PARTIAL = "partial"
|
||||||
|
TIMEOUT = "timeout"
|
||||||
|
|
||||||
|
|
||||||
|
class RepoSizeCategory(str, Enum):
|
||||||
|
"""Repository size category."""
|
||||||
|
|
||||||
|
TINY = "tiny"
|
||||||
|
SMALL = "small"
|
||||||
|
MEDIUM = "medium"
|
||||||
|
LARGE = "large"
|
||||||
|
HUGE = "huge"
|
||||||
|
|
||||||
|
|
||||||
|
class TaskCompletionEvent(BaseModel):
|
||||||
|
"""A single task completion telemetry event."""
|
||||||
|
|
||||||
|
instance_id: UUID
|
||||||
|
event_id: UUID = Field(default_factory=uuid4)
|
||||||
|
schema_version: str = "1.0"
|
||||||
|
timestamp: datetime = Field(
|
||||||
|
default_factory=lambda: datetime.now(timezone.utc),
|
||||||
|
)
|
||||||
|
task_duration_ms: int = Field(ge=0, le=86_400_000)
|
||||||
|
task_type: TaskType
|
||||||
|
complexity: Complexity
|
||||||
|
harness: Harness
|
||||||
|
model: str = Field(min_length=1, max_length=100)
|
||||||
|
provider: Provider
|
||||||
|
estimated_input_tokens: int = Field(ge=0, le=10_000_000)
|
||||||
|
estimated_output_tokens: int = Field(ge=0, le=10_000_000)
|
||||||
|
actual_input_tokens: int = Field(ge=0, le=10_000_000)
|
||||||
|
actual_output_tokens: int = Field(ge=0, le=10_000_000)
|
||||||
|
estimated_cost_usd_micros: int = Field(ge=0, le=100_000_000)
|
||||||
|
actual_cost_usd_micros: int = Field(ge=0, le=100_000_000)
|
||||||
|
quality_gate_passed: bool
|
||||||
|
quality_gates_run: list[QualityGate] = Field(default_factory=list)
|
||||||
|
quality_gates_failed: list[QualityGate] = Field(default_factory=list)
|
||||||
|
context_compactions: int = Field(ge=0, le=100)
|
||||||
|
context_rotations: int = Field(ge=0, le=50)
|
||||||
|
context_utilization_final: float = Field(ge=0.0, le=1.0)
|
||||||
|
outcome: Outcome
|
||||||
|
retry_count: int = Field(ge=0, le=20)
|
||||||
|
language: str | None = Field(default=None, max_length=30)
|
||||||
|
repo_size_category: RepoSizeCategory | None = None
|
||||||
72
src/mosaicstack_telemetry/types/predictions.py
Normal file
72
src/mosaicstack_telemetry/types/predictions.py
Normal file
@@ -0,0 +1,72 @@
|
|||||||
|
"""Prediction request and response types."""
|
||||||
|
|
||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
from datetime import datetime
|
||||||
|
|
||||||
|
from pydantic import BaseModel
|
||||||
|
|
||||||
|
from mosaicstack_telemetry.types.events import Complexity, Provider, TaskType
|
||||||
|
|
||||||
|
|
||||||
|
class TokenDistribution(BaseModel):
|
||||||
|
"""Token usage distribution percentiles."""
|
||||||
|
|
||||||
|
p10: int
|
||||||
|
p25: int
|
||||||
|
median: int
|
||||||
|
p75: int
|
||||||
|
p90: int
|
||||||
|
|
||||||
|
|
||||||
|
class CorrectionFactors(BaseModel):
|
||||||
|
"""Correction factors for estimated vs actual tokens."""
|
||||||
|
|
||||||
|
input: float
|
||||||
|
output: float
|
||||||
|
|
||||||
|
|
||||||
|
class QualityPrediction(BaseModel):
|
||||||
|
"""Quality gate prediction data."""
|
||||||
|
|
||||||
|
gate_pass_rate: float
|
||||||
|
success_rate: float
|
||||||
|
|
||||||
|
|
||||||
|
class PredictionData(BaseModel):
|
||||||
|
"""Full prediction data for a task type/model/provider/complexity combination."""
|
||||||
|
|
||||||
|
input_tokens: TokenDistribution
|
||||||
|
output_tokens: TokenDistribution
|
||||||
|
cost_usd_micros: dict[str, int]
|
||||||
|
duration_ms: dict[str, int]
|
||||||
|
correction_factors: CorrectionFactors
|
||||||
|
quality: QualityPrediction
|
||||||
|
|
||||||
|
|
||||||
|
class PredictionMetadata(BaseModel):
|
||||||
|
"""Metadata about how a prediction was generated."""
|
||||||
|
|
||||||
|
sample_size: int
|
||||||
|
fallback_level: int
|
||||||
|
confidence: str # "none", "low", "medium", "high"
|
||||||
|
last_updated: datetime | None = None
|
||||||
|
dimensions_matched: dict[str, str | None] | None = None
|
||||||
|
fallback_note: str | None = None
|
||||||
|
cache_hit: bool = False
|
||||||
|
|
||||||
|
|
||||||
|
class PredictionResponse(BaseModel):
|
||||||
|
"""Response from the prediction endpoint."""
|
||||||
|
|
||||||
|
prediction: PredictionData | None = None
|
||||||
|
metadata: PredictionMetadata
|
||||||
|
|
||||||
|
|
||||||
|
class PredictionQuery(BaseModel):
|
||||||
|
"""Query parameters for a prediction request."""
|
||||||
|
|
||||||
|
task_type: TaskType
|
||||||
|
model: str
|
||||||
|
provider: Provider
|
||||||
|
complexity: Complexity
|
||||||
0
tests/__init__.py
Normal file
0
tests/__init__.py
Normal file
183
tests/conftest.py
Normal file
183
tests/conftest.py
Normal file
@@ -0,0 +1,183 @@
|
|||||||
|
"""Shared test fixtures."""
|
||||||
|
|
||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
from datetime import datetime, timezone
|
||||||
|
from uuid import UUID, uuid4
|
||||||
|
|
||||||
|
import pytest
|
||||||
|
|
||||||
|
from mosaicstack_telemetry.config import TelemetryConfig
|
||||||
|
from mosaicstack_telemetry.types.events import (
|
||||||
|
Complexity,
|
||||||
|
Harness,
|
||||||
|
Outcome,
|
||||||
|
Provider,
|
||||||
|
TaskCompletionEvent,
|
||||||
|
TaskType,
|
||||||
|
)
|
||||||
|
from mosaicstack_telemetry.types.predictions import (
|
||||||
|
CorrectionFactors,
|
||||||
|
PredictionData,
|
||||||
|
PredictionMetadata,
|
||||||
|
PredictionQuery,
|
||||||
|
PredictionResponse,
|
||||||
|
QualityPrediction,
|
||||||
|
TokenDistribution,
|
||||||
|
)
|
||||||
|
|
||||||
|
TEST_API_KEY = "a" * 64
|
||||||
|
TEST_INSTANCE_ID = "12345678-1234-1234-1234-123456789abc"
|
||||||
|
TEST_SERVER_URL = "https://telemetry.example.com"
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.fixture()
|
||||||
|
def config() -> TelemetryConfig:
|
||||||
|
"""Create a valid test configuration."""
|
||||||
|
return TelemetryConfig(
|
||||||
|
server_url=TEST_SERVER_URL,
|
||||||
|
api_key=TEST_API_KEY,
|
||||||
|
instance_id=TEST_INSTANCE_ID,
|
||||||
|
submit_interval_seconds=1.0,
|
||||||
|
max_queue_size=100,
|
||||||
|
batch_size=10,
|
||||||
|
request_timeout_seconds=5.0,
|
||||||
|
max_retries=1,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.fixture()
|
||||||
|
def dry_run_config() -> TelemetryConfig:
|
||||||
|
"""Create a test configuration with dry_run enabled."""
|
||||||
|
return TelemetryConfig(
|
||||||
|
server_url=TEST_SERVER_URL,
|
||||||
|
api_key=TEST_API_KEY,
|
||||||
|
instance_id=TEST_INSTANCE_ID,
|
||||||
|
submit_interval_seconds=1.0,
|
||||||
|
max_queue_size=100,
|
||||||
|
batch_size=10,
|
||||||
|
request_timeout_seconds=5.0,
|
||||||
|
dry_run=True,
|
||||||
|
max_retries=1,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.fixture()
|
||||||
|
def disabled_config() -> TelemetryConfig:
|
||||||
|
"""Create a disabled test configuration."""
|
||||||
|
return TelemetryConfig(
|
||||||
|
server_url=TEST_SERVER_URL,
|
||||||
|
api_key=TEST_API_KEY,
|
||||||
|
instance_id=TEST_INSTANCE_ID,
|
||||||
|
enabled=False,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.fixture()
|
||||||
|
def sample_instance_id() -> UUID:
|
||||||
|
"""Return a fixed instance UUID for testing."""
|
||||||
|
return UUID(TEST_INSTANCE_ID)
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.fixture()
|
||||||
|
def sample_event(sample_instance_id: UUID) -> TaskCompletionEvent:
|
||||||
|
"""Create a sample task completion event."""
|
||||||
|
return TaskCompletionEvent(
|
||||||
|
instance_id=sample_instance_id,
|
||||||
|
event_id=uuid4(),
|
||||||
|
timestamp=datetime.now(timezone.utc),
|
||||||
|
task_duration_ms=30000,
|
||||||
|
task_type=TaskType.IMPLEMENTATION,
|
||||||
|
complexity=Complexity.MEDIUM,
|
||||||
|
harness=Harness.CLAUDE_CODE,
|
||||||
|
model="claude-sonnet-4-20250514",
|
||||||
|
provider=Provider.ANTHROPIC,
|
||||||
|
estimated_input_tokens=5000,
|
||||||
|
estimated_output_tokens=2000,
|
||||||
|
actual_input_tokens=5200,
|
||||||
|
actual_output_tokens=1800,
|
||||||
|
estimated_cost_usd_micros=10000,
|
||||||
|
actual_cost_usd_micros=9500,
|
||||||
|
quality_gate_passed=True,
|
||||||
|
quality_gates_run=[],
|
||||||
|
quality_gates_failed=[],
|
||||||
|
context_compactions=0,
|
||||||
|
context_rotations=0,
|
||||||
|
context_utilization_final=0.4,
|
||||||
|
outcome=Outcome.SUCCESS,
|
||||||
|
retry_count=0,
|
||||||
|
language="python",
|
||||||
|
repo_size_category=None,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
def make_event(instance_id: UUID | None = None) -> TaskCompletionEvent:
|
||||||
|
"""Factory helper to create a sample event with optional overrides."""
|
||||||
|
return TaskCompletionEvent(
|
||||||
|
instance_id=instance_id or UUID(TEST_INSTANCE_ID),
|
||||||
|
event_id=uuid4(),
|
||||||
|
timestamp=datetime.now(timezone.utc),
|
||||||
|
task_duration_ms=15000,
|
||||||
|
task_type=TaskType.DEBUGGING,
|
||||||
|
complexity=Complexity.LOW,
|
||||||
|
harness=Harness.AIDER,
|
||||||
|
model="gpt-4o",
|
||||||
|
provider=Provider.OPENAI,
|
||||||
|
estimated_input_tokens=1000,
|
||||||
|
estimated_output_tokens=500,
|
||||||
|
actual_input_tokens=1100,
|
||||||
|
actual_output_tokens=480,
|
||||||
|
estimated_cost_usd_micros=3000,
|
||||||
|
actual_cost_usd_micros=2800,
|
||||||
|
quality_gate_passed=True,
|
||||||
|
quality_gates_run=[],
|
||||||
|
quality_gates_failed=[],
|
||||||
|
context_compactions=0,
|
||||||
|
context_rotations=0,
|
||||||
|
context_utilization_final=0.2,
|
||||||
|
outcome=Outcome.SUCCESS,
|
||||||
|
retry_count=0,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.fixture()
|
||||||
|
def sample_prediction_query() -> PredictionQuery:
|
||||||
|
"""Create a sample prediction query."""
|
||||||
|
return PredictionQuery(
|
||||||
|
task_type=TaskType.IMPLEMENTATION,
|
||||||
|
model="claude-sonnet-4-20250514",
|
||||||
|
provider=Provider.ANTHROPIC,
|
||||||
|
complexity=Complexity.MEDIUM,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.fixture()
|
||||||
|
def sample_prediction_response() -> PredictionResponse:
|
||||||
|
"""Create a sample prediction response."""
|
||||||
|
return PredictionResponse(
|
||||||
|
prediction=PredictionData(
|
||||||
|
input_tokens=TokenDistribution(p10=1000, p25=2000, median=3000, p75=4000, p90=5000),
|
||||||
|
output_tokens=TokenDistribution(p10=500, p25=1000, median=1500, p75=2000, p90=2500),
|
||||||
|
cost_usd_micros={"p10": 1000, "median": 3000, "p90": 5000},
|
||||||
|
duration_ms={"p10": 10000, "median": 30000, "p90": 60000},
|
||||||
|
correction_factors=CorrectionFactors(input=1.05, output=0.95),
|
||||||
|
quality=QualityPrediction(gate_pass_rate=0.85, success_rate=0.9),
|
||||||
|
),
|
||||||
|
metadata=PredictionMetadata(
|
||||||
|
sample_size=150,
|
||||||
|
fallback_level=0,
|
||||||
|
confidence="high",
|
||||||
|
),
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
def make_batch_response_json(events: list[TaskCompletionEvent]) -> dict:
|
||||||
|
"""Create a batch response JSON dict for a list of events (all accepted)."""
|
||||||
|
return {
|
||||||
|
"accepted": len(events),
|
||||||
|
"rejected": 0,
|
||||||
|
"results": [
|
||||||
|
{"event_id": str(e.event_id), "status": "accepted", "error": None}
|
||||||
|
for e in events
|
||||||
|
],
|
||||||
|
}
|
||||||
321
tests/test_client.py
Normal file
321
tests/test_client.py
Normal file
@@ -0,0 +1,321 @@
|
|||||||
|
"""Tests for TelemetryClient."""
|
||||||
|
|
||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
import httpx
|
||||||
|
import pytest
|
||||||
|
import respx
|
||||||
|
|
||||||
|
from mosaicstack_telemetry.client import TelemetryClient
|
||||||
|
from mosaicstack_telemetry.config import TelemetryConfig
|
||||||
|
from mosaicstack_telemetry.types.events import (
|
||||||
|
Complexity,
|
||||||
|
Outcome,
|
||||||
|
Provider,
|
||||||
|
TaskCompletionEvent,
|
||||||
|
TaskType,
|
||||||
|
)
|
||||||
|
from mosaicstack_telemetry.types.predictions import (
|
||||||
|
PredictionMetadata,
|
||||||
|
PredictionQuery,
|
||||||
|
PredictionResponse,
|
||||||
|
)
|
||||||
|
from tests.conftest import (
|
||||||
|
TEST_API_KEY,
|
||||||
|
TEST_INSTANCE_ID,
|
||||||
|
TEST_SERVER_URL,
|
||||||
|
make_batch_response_json,
|
||||||
|
make_event,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
class TestTelemetryClientLifecycle:
|
||||||
|
"""Tests for client start/stop lifecycle."""
|
||||||
|
|
||||||
|
def test_start_stop_sync(self, config: TelemetryConfig) -> None:
|
||||||
|
"""Client can start and stop synchronously."""
|
||||||
|
client = TelemetryClient(config)
|
||||||
|
client.start()
|
||||||
|
assert client.is_running is True
|
||||||
|
client.stop()
|
||||||
|
assert client.is_running is False
|
||||||
|
|
||||||
|
async def test_start_stop_async(self, config: TelemetryConfig) -> None:
|
||||||
|
"""Client can start and stop asynchronously."""
|
||||||
|
client = TelemetryClient(config)
|
||||||
|
await client.start_async()
|
||||||
|
assert client.is_running is True
|
||||||
|
await client.stop_async()
|
||||||
|
assert client.is_running is False
|
||||||
|
|
||||||
|
def test_start_disabled(self, disabled_config: TelemetryConfig) -> None:
|
||||||
|
"""Starting a disabled client is a no-op."""
|
||||||
|
client = TelemetryClient(disabled_config)
|
||||||
|
client.start()
|
||||||
|
assert client.is_running is False
|
||||||
|
client.stop()
|
||||||
|
|
||||||
|
async def test_start_async_disabled(self, disabled_config: TelemetryConfig) -> None:
|
||||||
|
"""Starting a disabled async client is a no-op."""
|
||||||
|
client = TelemetryClient(disabled_config)
|
||||||
|
await client.start_async()
|
||||||
|
assert client.is_running is False
|
||||||
|
await client.stop_async()
|
||||||
|
|
||||||
|
|
||||||
|
class TestTelemetryClientTrack:
|
||||||
|
"""Tests for the track() method."""
|
||||||
|
|
||||||
|
def test_track_queues_event(self, config: TelemetryConfig) -> None:
|
||||||
|
"""track() adds event to the queue."""
|
||||||
|
client = TelemetryClient(config)
|
||||||
|
event = make_event()
|
||||||
|
client.track(event)
|
||||||
|
assert client.queue_size == 1
|
||||||
|
|
||||||
|
def test_track_multiple_events(self, config: TelemetryConfig) -> None:
|
||||||
|
"""Multiple events can be tracked."""
|
||||||
|
client = TelemetryClient(config)
|
||||||
|
for _ in range(5):
|
||||||
|
client.track(make_event())
|
||||||
|
assert client.queue_size == 5
|
||||||
|
|
||||||
|
def test_track_disabled_drops_event(self, disabled_config: TelemetryConfig) -> None:
|
||||||
|
"""track() silently drops events when disabled."""
|
||||||
|
client = TelemetryClient(disabled_config)
|
||||||
|
client.track(make_event())
|
||||||
|
assert client.queue_size == 0
|
||||||
|
|
||||||
|
def test_track_never_throws(self, config: TelemetryConfig) -> None:
|
||||||
|
"""track() should never raise exceptions."""
|
||||||
|
client = TelemetryClient(config)
|
||||||
|
# This should not raise even with invalid-ish usage
|
||||||
|
event = make_event()
|
||||||
|
client.track(event)
|
||||||
|
assert client.queue_size == 1
|
||||||
|
|
||||||
|
|
||||||
|
class TestTelemetryClientContextManager:
|
||||||
|
"""Tests for context manager support."""
|
||||||
|
|
||||||
|
@respx.mock
|
||||||
|
def test_sync_context_manager(self, config: TelemetryConfig) -> None:
|
||||||
|
"""Sync context manager starts and stops correctly."""
|
||||||
|
# Mock any potential flush calls
|
||||||
|
respx.post(f"{TEST_SERVER_URL}/v1/events/batch").mock(
|
||||||
|
return_value=httpx.Response(
|
||||||
|
202,
|
||||||
|
json={"accepted": 0, "rejected": 0, "results": []},
|
||||||
|
)
|
||||||
|
)
|
||||||
|
|
||||||
|
with TelemetryClient(config) as client:
|
||||||
|
assert client.is_running is True
|
||||||
|
client.track(make_event())
|
||||||
|
|
||||||
|
assert client.is_running is False
|
||||||
|
|
||||||
|
@respx.mock
|
||||||
|
async def test_async_context_manager(self, config: TelemetryConfig) -> None:
|
||||||
|
"""Async context manager starts and stops correctly."""
|
||||||
|
respx.post(f"{TEST_SERVER_URL}/v1/events/batch").mock(
|
||||||
|
return_value=httpx.Response(
|
||||||
|
202,
|
||||||
|
json={"accepted": 0, "rejected": 0, "results": []},
|
||||||
|
)
|
||||||
|
)
|
||||||
|
|
||||||
|
async with TelemetryClient(config) as client:
|
||||||
|
assert client.is_running is True
|
||||||
|
client.track(make_event())
|
||||||
|
|
||||||
|
assert client.is_running is False
|
||||||
|
|
||||||
|
|
||||||
|
class TestTelemetryClientPredictions:
|
||||||
|
"""Tests for prediction caching and retrieval."""
|
||||||
|
|
||||||
|
def test_get_prediction_miss(self, config: TelemetryConfig) -> None:
|
||||||
|
"""get_prediction returns None on cache miss."""
|
||||||
|
client = TelemetryClient(config)
|
||||||
|
query = PredictionQuery(
|
||||||
|
task_type=TaskType.IMPLEMENTATION,
|
||||||
|
model="test-model",
|
||||||
|
provider=Provider.ANTHROPIC,
|
||||||
|
complexity=Complexity.MEDIUM,
|
||||||
|
)
|
||||||
|
assert client.get_prediction(query) is None
|
||||||
|
|
||||||
|
def test_get_prediction_after_cache_populated(
|
||||||
|
self, config: TelemetryConfig
|
||||||
|
) -> None:
|
||||||
|
"""get_prediction returns cached value."""
|
||||||
|
client = TelemetryClient(config)
|
||||||
|
query = PredictionQuery(
|
||||||
|
task_type=TaskType.IMPLEMENTATION,
|
||||||
|
model="test-model",
|
||||||
|
provider=Provider.ANTHROPIC,
|
||||||
|
complexity=Complexity.MEDIUM,
|
||||||
|
)
|
||||||
|
response = PredictionResponse(
|
||||||
|
prediction=None,
|
||||||
|
metadata=PredictionMetadata(
|
||||||
|
sample_size=50,
|
||||||
|
fallback_level=0,
|
||||||
|
confidence="medium",
|
||||||
|
),
|
||||||
|
)
|
||||||
|
# Directly populate the cache
|
||||||
|
client._prediction_cache.put(query, response)
|
||||||
|
|
||||||
|
result = client.get_prediction(query)
|
||||||
|
assert result is not None
|
||||||
|
assert result.metadata.sample_size == 50
|
||||||
|
|
||||||
|
@respx.mock
|
||||||
|
async def test_refresh_predictions_async(self, config: TelemetryConfig) -> None:
|
||||||
|
"""refresh_predictions fetches and caches predictions."""
|
||||||
|
query = PredictionQuery(
|
||||||
|
task_type=TaskType.IMPLEMENTATION,
|
||||||
|
model="test-model",
|
||||||
|
provider=Provider.ANTHROPIC,
|
||||||
|
complexity=Complexity.MEDIUM,
|
||||||
|
)
|
||||||
|
|
||||||
|
response_data = {
|
||||||
|
"results": [
|
||||||
|
{
|
||||||
|
"prediction": None,
|
||||||
|
"metadata": {
|
||||||
|
"sample_size": 75,
|
||||||
|
"fallback_level": 1,
|
||||||
|
"confidence": "medium",
|
||||||
|
},
|
||||||
|
}
|
||||||
|
]
|
||||||
|
}
|
||||||
|
|
||||||
|
respx.post(f"{TEST_SERVER_URL}/v1/predictions/batch").mock(
|
||||||
|
return_value=httpx.Response(200, json=response_data)
|
||||||
|
)
|
||||||
|
|
||||||
|
client = TelemetryClient(config)
|
||||||
|
await client.refresh_predictions([query])
|
||||||
|
|
||||||
|
result = client.get_prediction(query)
|
||||||
|
assert result is not None
|
||||||
|
assert result.metadata.sample_size == 75
|
||||||
|
|
||||||
|
@respx.mock
|
||||||
|
def test_refresh_predictions_sync(self, config: TelemetryConfig) -> None:
|
||||||
|
"""refresh_predictions_sync fetches and caches predictions."""
|
||||||
|
query = PredictionQuery(
|
||||||
|
task_type=TaskType.IMPLEMENTATION,
|
||||||
|
model="test-model",
|
||||||
|
provider=Provider.ANTHROPIC,
|
||||||
|
complexity=Complexity.MEDIUM,
|
||||||
|
)
|
||||||
|
|
||||||
|
response_data = {
|
||||||
|
"results": [
|
||||||
|
{
|
||||||
|
"prediction": None,
|
||||||
|
"metadata": {
|
||||||
|
"sample_size": 60,
|
||||||
|
"fallback_level": 0,
|
||||||
|
"confidence": "low",
|
||||||
|
},
|
||||||
|
}
|
||||||
|
]
|
||||||
|
}
|
||||||
|
|
||||||
|
respx.post(f"{TEST_SERVER_URL}/v1/predictions/batch").mock(
|
||||||
|
return_value=httpx.Response(200, json=response_data)
|
||||||
|
)
|
||||||
|
|
||||||
|
client = TelemetryClient(config)
|
||||||
|
client.refresh_predictions_sync([query])
|
||||||
|
|
||||||
|
result = client.get_prediction(query)
|
||||||
|
assert result is not None
|
||||||
|
assert result.metadata.sample_size == 60
|
||||||
|
|
||||||
|
@respx.mock
|
||||||
|
async def test_refresh_predictions_server_error(
|
||||||
|
self, config: TelemetryConfig
|
||||||
|
) -> None:
|
||||||
|
"""refresh_predictions handles server errors gracefully."""
|
||||||
|
query = PredictionQuery(
|
||||||
|
task_type=TaskType.IMPLEMENTATION,
|
||||||
|
model="test-model",
|
||||||
|
provider=Provider.ANTHROPIC,
|
||||||
|
complexity=Complexity.MEDIUM,
|
||||||
|
)
|
||||||
|
|
||||||
|
respx.post(f"{TEST_SERVER_URL}/v1/predictions/batch").mock(
|
||||||
|
return_value=httpx.Response(500, text="Internal Server Error")
|
||||||
|
)
|
||||||
|
|
||||||
|
client = TelemetryClient(config)
|
||||||
|
# Should not raise
|
||||||
|
await client.refresh_predictions([query])
|
||||||
|
# Cache should still be empty
|
||||||
|
assert client.get_prediction(query) is None
|
||||||
|
|
||||||
|
async def test_refresh_predictions_empty_list(
|
||||||
|
self, config: TelemetryConfig
|
||||||
|
) -> None:
|
||||||
|
"""refresh_predictions with empty list is a no-op."""
|
||||||
|
client = TelemetryClient(config)
|
||||||
|
await client.refresh_predictions([])
|
||||||
|
|
||||||
|
|
||||||
|
class TestTelemetryClientConfig:
|
||||||
|
"""Tests for configuration handling."""
|
||||||
|
|
||||||
|
def test_config_env_vars(self, monkeypatch: pytest.MonkeyPatch) -> None:
|
||||||
|
"""Environment variables override defaults."""
|
||||||
|
monkeypatch.setenv("MOSAIC_TELEMETRY_ENABLED", "false")
|
||||||
|
monkeypatch.setenv("MOSAIC_TELEMETRY_SERVER_URL", "https://env-server.com")
|
||||||
|
monkeypatch.setenv("MOSAIC_TELEMETRY_API_KEY", "b" * 64)
|
||||||
|
monkeypatch.setenv("MOSAIC_TELEMETRY_INSTANCE_ID", TEST_INSTANCE_ID)
|
||||||
|
|
||||||
|
config = TelemetryConfig()
|
||||||
|
assert config.enabled is False
|
||||||
|
assert config.server_url == "https://env-server.com"
|
||||||
|
assert config.api_key == "b" * 64
|
||||||
|
assert config.instance_id == TEST_INSTANCE_ID
|
||||||
|
|
||||||
|
def test_config_validation_errors(self) -> None:
|
||||||
|
"""Invalid config produces validation errors."""
|
||||||
|
config = TelemetryConfig(
|
||||||
|
server_url="",
|
||||||
|
api_key="short",
|
||||||
|
instance_id="not-a-uuid",
|
||||||
|
)
|
||||||
|
errors = config.validate()
|
||||||
|
assert len(errors) >= 3
|
||||||
|
|
||||||
|
def test_config_validation_success(self, config: TelemetryConfig) -> None:
|
||||||
|
"""Valid config produces no validation errors."""
|
||||||
|
errors = config.validate()
|
||||||
|
assert errors == []
|
||||||
|
|
||||||
|
def test_config_strips_trailing_slash(self) -> None:
|
||||||
|
"""server_url trailing slashes are stripped."""
|
||||||
|
config = TelemetryConfig(
|
||||||
|
server_url="https://example.com/",
|
||||||
|
api_key=TEST_API_KEY,
|
||||||
|
instance_id=TEST_INSTANCE_ID,
|
||||||
|
)
|
||||||
|
assert config.server_url == "https://example.com"
|
||||||
|
|
||||||
|
def test_explicit_values_override_env(self, monkeypatch: pytest.MonkeyPatch) -> None:
|
||||||
|
"""Explicit constructor values take priority over env vars."""
|
||||||
|
monkeypatch.setenv("MOSAIC_TELEMETRY_SERVER_URL", "https://env-server.com")
|
||||||
|
config = TelemetryConfig(
|
||||||
|
server_url="https://explicit-server.com",
|
||||||
|
api_key=TEST_API_KEY,
|
||||||
|
instance_id=TEST_INSTANCE_ID,
|
||||||
|
)
|
||||||
|
assert config.server_url == "https://explicit-server.com"
|
||||||
137
tests/test_event_builder.py
Normal file
137
tests/test_event_builder.py
Normal file
@@ -0,0 +1,137 @@
|
|||||||
|
"""Tests for EventBuilder."""
|
||||||
|
|
||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
from datetime import datetime, timezone
|
||||||
|
from uuid import UUID
|
||||||
|
|
||||||
|
from mosaicstack_telemetry.event_builder import EventBuilder
|
||||||
|
from mosaicstack_telemetry.types.events import (
|
||||||
|
Complexity,
|
||||||
|
Harness,
|
||||||
|
Outcome,
|
||||||
|
Provider,
|
||||||
|
QualityGate,
|
||||||
|
RepoSizeCategory,
|
||||||
|
TaskType,
|
||||||
|
)
|
||||||
|
|
||||||
|
TEST_INSTANCE_ID = "12345678-1234-1234-1234-123456789abc"
|
||||||
|
|
||||||
|
|
||||||
|
class TestEventBuilder:
|
||||||
|
"""Tests for the fluent event builder."""
|
||||||
|
|
||||||
|
def test_build_complete_event(self) -> None:
|
||||||
|
"""Build an event with all fields set."""
|
||||||
|
event = (
|
||||||
|
EventBuilder(instance_id=TEST_INSTANCE_ID)
|
||||||
|
.task_type(TaskType.IMPLEMENTATION)
|
||||||
|
.model("claude-sonnet-4-20250514")
|
||||||
|
.provider(Provider.ANTHROPIC)
|
||||||
|
.harness_type(Harness.CLAUDE_CODE)
|
||||||
|
.complexity_level(Complexity.HIGH)
|
||||||
|
.outcome_value(Outcome.SUCCESS)
|
||||||
|
.duration_ms(45000)
|
||||||
|
.tokens(estimated_in=5000, estimated_out=2000, actual_in=5200, actual_out=1800)
|
||||||
|
.cost(estimated=50000, actual=48000)
|
||||||
|
.quality(
|
||||||
|
passed=True,
|
||||||
|
gates_run=[QualityGate.LINT, QualityGate.TEST],
|
||||||
|
gates_failed=[],
|
||||||
|
)
|
||||||
|
.context(compactions=1, rotations=0, utilization=0.4)
|
||||||
|
.retry_count(0)
|
||||||
|
.language("python")
|
||||||
|
.repo_size(RepoSizeCategory.MEDIUM)
|
||||||
|
.build()
|
||||||
|
)
|
||||||
|
|
||||||
|
assert event.instance_id == UUID(TEST_INSTANCE_ID)
|
||||||
|
assert event.task_type == TaskType.IMPLEMENTATION
|
||||||
|
assert event.model == "claude-sonnet-4-20250514"
|
||||||
|
assert event.provider == Provider.ANTHROPIC
|
||||||
|
assert event.harness == Harness.CLAUDE_CODE
|
||||||
|
assert event.complexity == Complexity.HIGH
|
||||||
|
assert event.outcome == Outcome.SUCCESS
|
||||||
|
assert event.task_duration_ms == 45000
|
||||||
|
assert event.estimated_input_tokens == 5000
|
||||||
|
assert event.estimated_output_tokens == 2000
|
||||||
|
assert event.actual_input_tokens == 5200
|
||||||
|
assert event.actual_output_tokens == 1800
|
||||||
|
assert event.estimated_cost_usd_micros == 50000
|
||||||
|
assert event.actual_cost_usd_micros == 48000
|
||||||
|
assert event.quality_gate_passed is True
|
||||||
|
assert event.quality_gates_run == [QualityGate.LINT, QualityGate.TEST]
|
||||||
|
assert event.quality_gates_failed == []
|
||||||
|
assert event.context_compactions == 1
|
||||||
|
assert event.context_rotations == 0
|
||||||
|
assert event.context_utilization_final == 0.4
|
||||||
|
assert event.retry_count == 0
|
||||||
|
assert event.language == "python"
|
||||||
|
assert event.repo_size_category == RepoSizeCategory.MEDIUM
|
||||||
|
|
||||||
|
def test_auto_generated_defaults(self) -> None:
|
||||||
|
"""event_id and timestamp are auto-generated."""
|
||||||
|
event = (
|
||||||
|
EventBuilder(instance_id=TEST_INSTANCE_ID)
|
||||||
|
.task_type(TaskType.DEBUGGING)
|
||||||
|
.model("gpt-4o")
|
||||||
|
.provider(Provider.OPENAI)
|
||||||
|
.build()
|
||||||
|
)
|
||||||
|
|
||||||
|
assert event.event_id is not None
|
||||||
|
assert event.timestamp is not None
|
||||||
|
assert event.timestamp.tzinfo is not None
|
||||||
|
|
||||||
|
def test_custom_event_id(self) -> None:
|
||||||
|
"""Custom event_id can be set."""
|
||||||
|
custom_id = "abcdef12-1234-1234-1234-123456789abc"
|
||||||
|
event = (
|
||||||
|
EventBuilder(instance_id=TEST_INSTANCE_ID)
|
||||||
|
.event_id(custom_id)
|
||||||
|
.model("test-model")
|
||||||
|
.build()
|
||||||
|
)
|
||||||
|
|
||||||
|
assert event.event_id == UUID(custom_id)
|
||||||
|
|
||||||
|
def test_custom_timestamp(self) -> None:
|
||||||
|
"""Custom timestamp can be set."""
|
||||||
|
ts = datetime(2026, 1, 15, 12, 0, 0, tzinfo=timezone.utc)
|
||||||
|
event = (
|
||||||
|
EventBuilder(instance_id=TEST_INSTANCE_ID)
|
||||||
|
.timestamp(ts)
|
||||||
|
.model("test-model")
|
||||||
|
.build()
|
||||||
|
)
|
||||||
|
|
||||||
|
assert event.timestamp == ts
|
||||||
|
|
||||||
|
def test_minimal_event_defaults(self) -> None:
|
||||||
|
"""Minimal event has sensible defaults."""
|
||||||
|
event = EventBuilder(instance_id=TEST_INSTANCE_ID).model("test-model").build()
|
||||||
|
|
||||||
|
assert event.task_type == TaskType.UNKNOWN
|
||||||
|
assert event.complexity == Complexity.MEDIUM
|
||||||
|
assert event.harness == Harness.UNKNOWN
|
||||||
|
assert event.provider == Provider.UNKNOWN
|
||||||
|
assert event.outcome == Outcome.FAILURE
|
||||||
|
assert event.task_duration_ms == 0
|
||||||
|
assert event.retry_count == 0
|
||||||
|
assert event.language is None
|
||||||
|
assert event.repo_size_category is None
|
||||||
|
|
||||||
|
def test_quality_defaults_to_empty_lists(self) -> None:
|
||||||
|
"""Quality gate lists default to empty."""
|
||||||
|
event = EventBuilder(instance_id=TEST_INSTANCE_ID).model("m").build()
|
||||||
|
|
||||||
|
assert event.quality_gates_run == []
|
||||||
|
assert event.quality_gates_failed == []
|
||||||
|
assert event.quality_gate_passed is False
|
||||||
|
|
||||||
|
def test_schema_version(self) -> None:
|
||||||
|
"""Schema version defaults to 1.0."""
|
||||||
|
event = EventBuilder(instance_id=TEST_INSTANCE_ID).model("m").build()
|
||||||
|
assert event.schema_version == "1.0"
|
||||||
147
tests/test_prediction_cache.py
Normal file
147
tests/test_prediction_cache.py
Normal file
@@ -0,0 +1,147 @@
|
|||||||
|
"""Tests for PredictionCache."""
|
||||||
|
|
||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
import threading
|
||||||
|
import time
|
||||||
|
|
||||||
|
from mosaicstack_telemetry.prediction_cache import PredictionCache
|
||||||
|
from mosaicstack_telemetry.types.events import Complexity, Provider, TaskType
|
||||||
|
from mosaicstack_telemetry.types.predictions import (
|
||||||
|
PredictionMetadata,
|
||||||
|
PredictionQuery,
|
||||||
|
PredictionResponse,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
def _make_query(
|
||||||
|
task_type: TaskType = TaskType.IMPLEMENTATION,
|
||||||
|
model: str = "claude-sonnet-4-20250514",
|
||||||
|
) -> PredictionQuery:
|
||||||
|
return PredictionQuery(
|
||||||
|
task_type=task_type,
|
||||||
|
model=model,
|
||||||
|
provider=Provider.ANTHROPIC,
|
||||||
|
complexity=Complexity.MEDIUM,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
def _make_response(sample_size: int = 100) -> PredictionResponse:
|
||||||
|
return PredictionResponse(
|
||||||
|
prediction=None,
|
||||||
|
metadata=PredictionMetadata(
|
||||||
|
sample_size=sample_size,
|
||||||
|
fallback_level=0,
|
||||||
|
confidence="high",
|
||||||
|
),
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
class TestPredictionCache:
|
||||||
|
"""Tests for the TTL-based prediction cache."""
|
||||||
|
|
||||||
|
def test_cache_hit(self) -> None:
|
||||||
|
"""Cached predictions are returned on hit."""
|
||||||
|
cache = PredictionCache(ttl_seconds=60.0)
|
||||||
|
query = _make_query()
|
||||||
|
response = _make_response()
|
||||||
|
|
||||||
|
cache.put(query, response)
|
||||||
|
result = cache.get(query)
|
||||||
|
|
||||||
|
assert result is not None
|
||||||
|
assert result.metadata.sample_size == 100
|
||||||
|
|
||||||
|
def test_cache_miss(self) -> None:
|
||||||
|
"""Missing keys return None."""
|
||||||
|
cache = PredictionCache(ttl_seconds=60.0)
|
||||||
|
query = _make_query()
|
||||||
|
|
||||||
|
result = cache.get(query)
|
||||||
|
assert result is None
|
||||||
|
|
||||||
|
def test_cache_expiry(self) -> None:
|
||||||
|
"""Expired entries return None."""
|
||||||
|
cache = PredictionCache(ttl_seconds=0.05)
|
||||||
|
query = _make_query()
|
||||||
|
response = _make_response()
|
||||||
|
|
||||||
|
cache.put(query, response)
|
||||||
|
time.sleep(0.1)
|
||||||
|
result = cache.get(query)
|
||||||
|
|
||||||
|
assert result is None
|
||||||
|
|
||||||
|
def test_different_queries_different_keys(self) -> None:
|
||||||
|
"""Different queries map to different cache entries."""
|
||||||
|
cache = PredictionCache(ttl_seconds=60.0)
|
||||||
|
query1 = _make_query(task_type=TaskType.IMPLEMENTATION)
|
||||||
|
query2 = _make_query(task_type=TaskType.DEBUGGING)
|
||||||
|
|
||||||
|
cache.put(query1, _make_response(sample_size=100))
|
||||||
|
cache.put(query2, _make_response(sample_size=200))
|
||||||
|
|
||||||
|
result1 = cache.get(query1)
|
||||||
|
result2 = cache.get(query2)
|
||||||
|
|
||||||
|
assert result1 is not None
|
||||||
|
assert result2 is not None
|
||||||
|
assert result1.metadata.sample_size == 100
|
||||||
|
assert result2.metadata.sample_size == 200
|
||||||
|
|
||||||
|
def test_cache_clear(self) -> None:
|
||||||
|
"""Clear removes all entries."""
|
||||||
|
cache = PredictionCache(ttl_seconds=60.0)
|
||||||
|
query = _make_query()
|
||||||
|
cache.put(query, _make_response())
|
||||||
|
|
||||||
|
assert cache.size == 1
|
||||||
|
cache.clear()
|
||||||
|
assert cache.size == 0
|
||||||
|
assert cache.get(query) is None
|
||||||
|
|
||||||
|
def test_cache_overwrite(self) -> None:
|
||||||
|
"""Putting a new value for the same key overwrites."""
|
||||||
|
cache = PredictionCache(ttl_seconds=60.0)
|
||||||
|
query = _make_query()
|
||||||
|
|
||||||
|
cache.put(query, _make_response(sample_size=100))
|
||||||
|
cache.put(query, _make_response(sample_size=200))
|
||||||
|
|
||||||
|
result = cache.get(query)
|
||||||
|
assert result is not None
|
||||||
|
assert result.metadata.sample_size == 200
|
||||||
|
|
||||||
|
def test_thread_safety(self) -> None:
|
||||||
|
"""Cache handles concurrent access from multiple threads."""
|
||||||
|
cache = PredictionCache(ttl_seconds=60.0)
|
||||||
|
errors: list[Exception] = []
|
||||||
|
iterations = 100
|
||||||
|
|
||||||
|
def writer(thread_id: int) -> None:
|
||||||
|
try:
|
||||||
|
for i in range(iterations):
|
||||||
|
query = _make_query(model=f"model-{thread_id}-{i}")
|
||||||
|
cache.put(query, _make_response(sample_size=i))
|
||||||
|
except Exception as e:
|
||||||
|
errors.append(e)
|
||||||
|
|
||||||
|
def reader(thread_id: int) -> None:
|
||||||
|
try:
|
||||||
|
for i in range(iterations):
|
||||||
|
query = _make_query(model=f"model-{thread_id}-{i}")
|
||||||
|
cache.get(query) # May or may not hit
|
||||||
|
except Exception as e:
|
||||||
|
errors.append(e)
|
||||||
|
|
||||||
|
threads: list[threading.Thread] = []
|
||||||
|
for tid in range(4):
|
||||||
|
threads.append(threading.Thread(target=writer, args=(tid,)))
|
||||||
|
threads.append(threading.Thread(target=reader, args=(tid,)))
|
||||||
|
|
||||||
|
for t in threads:
|
||||||
|
t.start()
|
||||||
|
for t in threads:
|
||||||
|
t.join(timeout=5)
|
||||||
|
|
||||||
|
assert not errors, f"Thread errors: {errors}"
|
||||||
133
tests/test_queue.py
Normal file
133
tests/test_queue.py
Normal file
@@ -0,0 +1,133 @@
|
|||||||
|
"""Tests for EventQueue."""
|
||||||
|
|
||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
import threading
|
||||||
|
|
||||||
|
from mosaicstack_telemetry.queue import EventQueue
|
||||||
|
from tests.conftest import make_event
|
||||||
|
|
||||||
|
|
||||||
|
class TestEventQueue:
|
||||||
|
"""Tests for the bounded thread-safe event queue."""
|
||||||
|
|
||||||
|
def test_put_and_drain(self) -> None:
|
||||||
|
"""Events can be put in and drained out in FIFO order."""
|
||||||
|
queue = EventQueue(max_size=10)
|
||||||
|
e1 = make_event()
|
||||||
|
e2 = make_event()
|
||||||
|
queue.put(e1)
|
||||||
|
queue.put(e2)
|
||||||
|
|
||||||
|
drained = queue.drain(10)
|
||||||
|
assert len(drained) == 2
|
||||||
|
assert drained[0].event_id == e1.event_id
|
||||||
|
assert drained[1].event_id == e2.event_id
|
||||||
|
|
||||||
|
def test_drain_max_items(self) -> None:
|
||||||
|
"""Drain respects the max_items limit."""
|
||||||
|
queue = EventQueue(max_size=10)
|
||||||
|
for _ in range(5):
|
||||||
|
queue.put(make_event())
|
||||||
|
|
||||||
|
drained = queue.drain(3)
|
||||||
|
assert len(drained) == 3
|
||||||
|
assert queue.size == 2
|
||||||
|
|
||||||
|
def test_drain_empty_queue(self) -> None:
|
||||||
|
"""Draining an empty queue returns empty list."""
|
||||||
|
queue = EventQueue(max_size=10)
|
||||||
|
drained = queue.drain(5)
|
||||||
|
assert drained == []
|
||||||
|
|
||||||
|
def test_bounded_fifo_eviction(self) -> None:
|
||||||
|
"""When queue is full, oldest events are evicted."""
|
||||||
|
queue = EventQueue(max_size=3)
|
||||||
|
events = [make_event() for _ in range(5)]
|
||||||
|
for e in events:
|
||||||
|
queue.put(e)
|
||||||
|
|
||||||
|
assert queue.size == 3
|
||||||
|
drained = queue.drain(3)
|
||||||
|
# Should have the last 3 events (oldest 2 were evicted)
|
||||||
|
assert drained[0].event_id == events[2].event_id
|
||||||
|
assert drained[1].event_id == events[3].event_id
|
||||||
|
assert drained[2].event_id == events[4].event_id
|
||||||
|
|
||||||
|
def test_size_property(self) -> None:
|
||||||
|
"""Size property reflects current queue length."""
|
||||||
|
queue = EventQueue(max_size=10)
|
||||||
|
assert queue.size == 0
|
||||||
|
queue.put(make_event())
|
||||||
|
assert queue.size == 1
|
||||||
|
queue.put(make_event())
|
||||||
|
assert queue.size == 2
|
||||||
|
queue.drain(1)
|
||||||
|
assert queue.size == 1
|
||||||
|
|
||||||
|
def test_is_empty_property(self) -> None:
|
||||||
|
"""is_empty property works correctly."""
|
||||||
|
queue = EventQueue(max_size=10)
|
||||||
|
assert queue.is_empty is True
|
||||||
|
queue.put(make_event())
|
||||||
|
assert queue.is_empty is False
|
||||||
|
queue.drain(1)
|
||||||
|
assert queue.is_empty is True
|
||||||
|
|
||||||
|
def test_put_back(self) -> None:
|
||||||
|
"""put_back re-adds events to the front of the queue."""
|
||||||
|
queue = EventQueue(max_size=10)
|
||||||
|
e1 = make_event()
|
||||||
|
e2 = make_event()
|
||||||
|
queue.put(e1)
|
||||||
|
|
||||||
|
queue.put_back([e2])
|
||||||
|
drained = queue.drain(2)
|
||||||
|
# e2 should be first (put_back adds to front)
|
||||||
|
assert drained[0].event_id == e2.event_id
|
||||||
|
assert drained[1].event_id == e1.event_id
|
||||||
|
|
||||||
|
def test_put_back_respects_max_size(self) -> None:
|
||||||
|
"""put_back doesn't exceed max_size."""
|
||||||
|
queue = EventQueue(max_size=3)
|
||||||
|
for _ in range(3):
|
||||||
|
queue.put(make_event())
|
||||||
|
|
||||||
|
events_to_add = [make_event() for _ in range(5)]
|
||||||
|
queue.put_back(events_to_add)
|
||||||
|
assert queue.size == 3
|
||||||
|
|
||||||
|
def test_thread_safety_concurrent_put_drain(self) -> None:
|
||||||
|
"""Queue handles concurrent put and drain operations."""
|
||||||
|
queue = EventQueue(max_size=1000)
|
||||||
|
total_puts = 500
|
||||||
|
errors: list[Exception] = []
|
||||||
|
|
||||||
|
def put_events() -> None:
|
||||||
|
try:
|
||||||
|
for _ in range(total_puts):
|
||||||
|
queue.put(make_event())
|
||||||
|
except Exception as e:
|
||||||
|
errors.append(e)
|
||||||
|
|
||||||
|
def drain_events() -> None:
|
||||||
|
try:
|
||||||
|
drained_count = 0
|
||||||
|
while drained_count < total_puts:
|
||||||
|
batch = queue.drain(10)
|
||||||
|
drained_count += len(batch)
|
||||||
|
if not batch:
|
||||||
|
threading.Event().wait(0.001)
|
||||||
|
except Exception as e:
|
||||||
|
errors.append(e)
|
||||||
|
|
||||||
|
put_thread = threading.Thread(target=put_events)
|
||||||
|
drain_thread = threading.Thread(target=drain_events)
|
||||||
|
|
||||||
|
put_thread.start()
|
||||||
|
drain_thread.start()
|
||||||
|
|
||||||
|
put_thread.join(timeout=5)
|
||||||
|
drain_thread.join(timeout=5)
|
||||||
|
|
||||||
|
assert not errors, f"Thread errors: {errors}"
|
||||||
207
tests/test_submitter.py
Normal file
207
tests/test_submitter.py
Normal file
@@ -0,0 +1,207 @@
|
|||||||
|
"""Tests for batch submission logic."""
|
||||||
|
|
||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
import httpx
|
||||||
|
import pytest
|
||||||
|
import respx
|
||||||
|
|
||||||
|
from mosaicstack_telemetry.config import TelemetryConfig
|
||||||
|
from mosaicstack_telemetry.submitter import submit_batch_async, submit_batch_sync
|
||||||
|
from tests.conftest import (
|
||||||
|
TEST_API_KEY,
|
||||||
|
TEST_INSTANCE_ID,
|
||||||
|
TEST_SERVER_URL,
|
||||||
|
make_batch_response_json,
|
||||||
|
make_event,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.fixture()
|
||||||
|
def fast_config() -> TelemetryConfig:
|
||||||
|
"""Config with minimal retries and timeouts for fast tests."""
|
||||||
|
return TelemetryConfig(
|
||||||
|
server_url=TEST_SERVER_URL,
|
||||||
|
api_key=TEST_API_KEY,
|
||||||
|
instance_id=TEST_INSTANCE_ID,
|
||||||
|
max_retries=1,
|
||||||
|
request_timeout_seconds=2.0,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
class TestSubmitBatchSync:
|
||||||
|
"""Tests for synchronous batch submission."""
|
||||||
|
|
||||||
|
@respx.mock
|
||||||
|
def test_successful_submission(self, fast_config: TelemetryConfig) -> None:
|
||||||
|
"""Successful 202 response returns BatchEventResponse."""
|
||||||
|
events = [make_event() for _ in range(3)]
|
||||||
|
response_json = make_batch_response_json(events)
|
||||||
|
|
||||||
|
respx.post(f"{TEST_SERVER_URL}/v1/events/batch").mock(
|
||||||
|
return_value=httpx.Response(202, json=response_json)
|
||||||
|
)
|
||||||
|
|
||||||
|
with httpx.Client() as client:
|
||||||
|
result = submit_batch_sync(client, fast_config, events)
|
||||||
|
|
||||||
|
assert result is not None
|
||||||
|
assert result.accepted == 3
|
||||||
|
assert result.rejected == 0
|
||||||
|
|
||||||
|
@respx.mock
|
||||||
|
def test_429_with_retry_after(self, fast_config: TelemetryConfig) -> None:
|
||||||
|
"""429 response respects Retry-After header and retries."""
|
||||||
|
events = [make_event()]
|
||||||
|
response_json = make_batch_response_json(events)
|
||||||
|
|
||||||
|
route = respx.post(f"{TEST_SERVER_URL}/v1/events/batch")
|
||||||
|
route.side_effect = [
|
||||||
|
httpx.Response(429, headers={"Retry-After": "0.1"}),
|
||||||
|
httpx.Response(202, json=response_json),
|
||||||
|
]
|
||||||
|
|
||||||
|
with httpx.Client() as client:
|
||||||
|
result = submit_batch_sync(client, fast_config, events)
|
||||||
|
|
||||||
|
assert result is not None
|
||||||
|
assert result.accepted == 1
|
||||||
|
|
||||||
|
@respx.mock
|
||||||
|
def test_403_returns_none(self, fast_config: TelemetryConfig) -> None:
|
||||||
|
"""403 response returns None immediately."""
|
||||||
|
events = [make_event()]
|
||||||
|
|
||||||
|
respx.post(f"{TEST_SERVER_URL}/v1/events/batch").mock(
|
||||||
|
return_value=httpx.Response(403, json={"error": "Forbidden"})
|
||||||
|
)
|
||||||
|
|
||||||
|
with httpx.Client() as client:
|
||||||
|
result = submit_batch_sync(client, fast_config, events)
|
||||||
|
|
||||||
|
assert result is None
|
||||||
|
|
||||||
|
@respx.mock
|
||||||
|
def test_network_error_retries(self, fast_config: TelemetryConfig) -> None:
|
||||||
|
"""Network errors trigger retry with backoff."""
|
||||||
|
events = [make_event()]
|
||||||
|
response_json = make_batch_response_json(events)
|
||||||
|
|
||||||
|
route = respx.post(f"{TEST_SERVER_URL}/v1/events/batch")
|
||||||
|
route.side_effect = [
|
||||||
|
httpx.ConnectError("Connection refused"),
|
||||||
|
httpx.Response(202, json=response_json),
|
||||||
|
]
|
||||||
|
|
||||||
|
with httpx.Client() as client:
|
||||||
|
result = submit_batch_sync(client, fast_config, events)
|
||||||
|
|
||||||
|
assert result is not None
|
||||||
|
assert result.accepted == 1
|
||||||
|
|
||||||
|
@respx.mock
|
||||||
|
def test_all_retries_exhausted(self, fast_config: TelemetryConfig) -> None:
|
||||||
|
"""When all retries fail, returns None."""
|
||||||
|
events = [make_event()]
|
||||||
|
|
||||||
|
respx.post(f"{TEST_SERVER_URL}/v1/events/batch").mock(
|
||||||
|
side_effect=httpx.ConnectError("Connection refused")
|
||||||
|
)
|
||||||
|
|
||||||
|
with httpx.Client() as client:
|
||||||
|
result = submit_batch_sync(client, fast_config, events)
|
||||||
|
|
||||||
|
assert result is None
|
||||||
|
|
||||||
|
def test_dry_run_mode(self, fast_config: TelemetryConfig) -> None:
|
||||||
|
"""Dry run mode logs but doesn't send."""
|
||||||
|
fast_config.dry_run = True
|
||||||
|
events = [make_event() for _ in range(5)]
|
||||||
|
|
||||||
|
with httpx.Client() as client:
|
||||||
|
result = submit_batch_sync(client, fast_config, events)
|
||||||
|
|
||||||
|
assert result is not None
|
||||||
|
assert result.accepted == 5
|
||||||
|
assert result.rejected == 0
|
||||||
|
|
||||||
|
@respx.mock
|
||||||
|
def test_500_error_retries(self, fast_config: TelemetryConfig) -> None:
|
||||||
|
"""Server errors (500) trigger retries."""
|
||||||
|
events = [make_event()]
|
||||||
|
response_json = make_batch_response_json(events)
|
||||||
|
|
||||||
|
route = respx.post(f"{TEST_SERVER_URL}/v1/events/batch")
|
||||||
|
route.side_effect = [
|
||||||
|
httpx.Response(500, text="Internal Server Error"),
|
||||||
|
httpx.Response(202, json=response_json),
|
||||||
|
]
|
||||||
|
|
||||||
|
with httpx.Client() as client:
|
||||||
|
result = submit_batch_sync(client, fast_config, events)
|
||||||
|
|
||||||
|
assert result is not None
|
||||||
|
assert result.accepted == 1
|
||||||
|
|
||||||
|
|
||||||
|
class TestSubmitBatchAsync:
|
||||||
|
"""Tests for asynchronous batch submission."""
|
||||||
|
|
||||||
|
@respx.mock
|
||||||
|
async def test_successful_submission(self, fast_config: TelemetryConfig) -> None:
|
||||||
|
"""Successful 202 response returns BatchEventResponse."""
|
||||||
|
events = [make_event() for _ in range(3)]
|
||||||
|
response_json = make_batch_response_json(events)
|
||||||
|
|
||||||
|
respx.post(f"{TEST_SERVER_URL}/v1/events/batch").mock(
|
||||||
|
return_value=httpx.Response(202, json=response_json)
|
||||||
|
)
|
||||||
|
|
||||||
|
async with httpx.AsyncClient() as client:
|
||||||
|
result = await submit_batch_async(client, fast_config, events)
|
||||||
|
|
||||||
|
assert result is not None
|
||||||
|
assert result.accepted == 3
|
||||||
|
|
||||||
|
@respx.mock
|
||||||
|
async def test_429_with_retry_after(self, fast_config: TelemetryConfig) -> None:
|
||||||
|
"""429 response respects Retry-After and retries asynchronously."""
|
||||||
|
events = [make_event()]
|
||||||
|
response_json = make_batch_response_json(events)
|
||||||
|
|
||||||
|
route = respx.post(f"{TEST_SERVER_URL}/v1/events/batch")
|
||||||
|
route.side_effect = [
|
||||||
|
httpx.Response(429, headers={"Retry-After": "0.1"}),
|
||||||
|
httpx.Response(202, json=response_json),
|
||||||
|
]
|
||||||
|
|
||||||
|
async with httpx.AsyncClient() as client:
|
||||||
|
result = await submit_batch_async(client, fast_config, events)
|
||||||
|
|
||||||
|
assert result is not None
|
||||||
|
assert result.accepted == 1
|
||||||
|
|
||||||
|
@respx.mock
|
||||||
|
async def test_403_returns_none(self, fast_config: TelemetryConfig) -> None:
|
||||||
|
"""403 returns None immediately."""
|
||||||
|
events = [make_event()]
|
||||||
|
|
||||||
|
respx.post(f"{TEST_SERVER_URL}/v1/events/batch").mock(
|
||||||
|
return_value=httpx.Response(403, json={"error": "Forbidden"})
|
||||||
|
)
|
||||||
|
|
||||||
|
async with httpx.AsyncClient() as client:
|
||||||
|
result = await submit_batch_async(client, fast_config, events)
|
||||||
|
|
||||||
|
assert result is None
|
||||||
|
|
||||||
|
async def test_dry_run_mode(self, fast_config: TelemetryConfig) -> None:
|
||||||
|
"""Dry run mode returns mock response without HTTP."""
|
||||||
|
fast_config.dry_run = True
|
||||||
|
events = [make_event() for _ in range(3)]
|
||||||
|
|
||||||
|
async with httpx.AsyncClient() as client:
|
||||||
|
result = await submit_batch_async(client, fast_config, events)
|
||||||
|
|
||||||
|
assert result is not None
|
||||||
|
assert result.accepted == 3
|
||||||
Reference in New Issue
Block a user