feat(agent-reflection): durable kernel — reflection.v1 capture + risk-floor + Phase-0 (#545)
This commit was merged in pull request #545.
This commit is contained in:
@@ -6,3 +6,4 @@ export * from './provider/index.js';
|
||||
export * from './routing/index.js';
|
||||
export * from './commands/index.js';
|
||||
export * from './federation/index.js';
|
||||
export * from './reflection/index.js';
|
||||
|
||||
146
packages/types/src/reflection/__tests__/reflection.spec.ts
Normal file
146
packages/types/src/reflection/__tests__/reflection.spec.ts
Normal file
@@ -0,0 +1,146 @@
|
||||
/**
|
||||
* Unit tests for the reflection.v1 schema + self-report boundary.
|
||||
*
|
||||
* The runtime source of truth is the zod schema set in `reflection.ts`. The
|
||||
* class-validator `ReflectionSelfReportDto` is the NestJS-side boundary type
|
||||
* (exercised under the gateway app's reflect-metadata runtime, mirroring how
|
||||
* `chat.dto.ts` is tested in apps/gateway); here we validate the self-report
|
||||
* input with its zod counterpart, which is what the Stop hook actually uses.
|
||||
*
|
||||
* Coverage:
|
||||
* - REVIEW_SURFACES canonical ordering (the enum both zod + JSON Schema mirror)
|
||||
* - ReflectionV1Schema accepts a fully-populated record
|
||||
* - ReflectionV1Schema accepts a degraded record (self-report fields null)
|
||||
* - ReflectionV1Schema rejects bad schema literal / out-of-range confidence / bad surface
|
||||
* - ReflectionSelfReportSchema accepts valid + empty, rejects bad input
|
||||
*/
|
||||
|
||||
import { describe, expect, it } from 'vitest';
|
||||
|
||||
import {
|
||||
REVIEW_SURFACES,
|
||||
ReflectionV1Schema,
|
||||
ReflectionSelfReportSchema,
|
||||
type ReflectionV1,
|
||||
} from '../index.js';
|
||||
|
||||
const baseMechanical = {
|
||||
schema: 'reflection.v1' as const,
|
||||
task_ref: 'stack#544',
|
||||
agent: 'claude',
|
||||
session_id: 'sess-abc',
|
||||
timestamp: '2026-06-16T00:00:00.000Z',
|
||||
repo: 'stack',
|
||||
risk: {
|
||||
needs_review: true,
|
||||
score: 1.0,
|
||||
surface: 'auth' as const,
|
||||
reason: 'auth surface (weight 1) in: src/auth.ts',
|
||||
},
|
||||
files_changed: ['src/auth.ts'],
|
||||
provenance: {
|
||||
source: 'stop-hook' as const,
|
||||
reflection_attempt: 1,
|
||||
degraded: false,
|
||||
reflection_mode: 'solo' as const,
|
||||
},
|
||||
};
|
||||
|
||||
describe('REVIEW_SURFACES', () => {
|
||||
it('keeps the canonical most→least-sensitive ordering', () => {
|
||||
expect(REVIEW_SURFACES).toEqual([
|
||||
'auth',
|
||||
'data',
|
||||
'infra',
|
||||
'build',
|
||||
'ui',
|
||||
'test',
|
||||
'docs',
|
||||
'none',
|
||||
]);
|
||||
});
|
||||
});
|
||||
|
||||
describe('ReflectionV1Schema', () => {
|
||||
it('accepts a fully-populated record', () => {
|
||||
const rec: ReflectionV1 = {
|
||||
...baseMechanical,
|
||||
confidence: 0.7,
|
||||
most_likely_wrong: { surface: 'auth', description: 'token refresh untested' },
|
||||
known_not_in_diff: 'manual QA only on the happy path',
|
||||
};
|
||||
expect(() => ReflectionV1Schema.parse(rec)).not.toThrow();
|
||||
});
|
||||
|
||||
it('accepts a degraded record with null self-report fields', () => {
|
||||
const rec: ReflectionV1 = {
|
||||
...baseMechanical,
|
||||
confidence: null,
|
||||
most_likely_wrong: null,
|
||||
known_not_in_diff: null,
|
||||
provenance: { ...baseMechanical.provenance, degraded: true },
|
||||
};
|
||||
expect(() => ReflectionV1Schema.parse(rec)).not.toThrow();
|
||||
});
|
||||
|
||||
it('rejects a wrong schema literal', () => {
|
||||
const bad = {
|
||||
...baseMechanical,
|
||||
schema: 'reflection.v2',
|
||||
confidence: null,
|
||||
most_likely_wrong: null,
|
||||
known_not_in_diff: null,
|
||||
};
|
||||
expect(() => ReflectionV1Schema.parse(bad)).toThrow();
|
||||
});
|
||||
|
||||
it('rejects out-of-range confidence', () => {
|
||||
const bad = {
|
||||
...baseMechanical,
|
||||
confidence: 1.5,
|
||||
most_likely_wrong: null,
|
||||
known_not_in_diff: null,
|
||||
};
|
||||
expect(() => ReflectionV1Schema.parse(bad)).toThrow();
|
||||
});
|
||||
|
||||
it('rejects an unknown surface', () => {
|
||||
const bad = {
|
||||
...baseMechanical,
|
||||
risk: { ...baseMechanical.risk, surface: 'network' },
|
||||
confidence: null,
|
||||
most_likely_wrong: null,
|
||||
known_not_in_diff: null,
|
||||
};
|
||||
expect(() => ReflectionV1Schema.parse(bad)).toThrow();
|
||||
});
|
||||
});
|
||||
|
||||
describe('ReflectionSelfReportSchema', () => {
|
||||
it('accepts a valid self-report', () => {
|
||||
const ok = ReflectionSelfReportSchema.safeParse({
|
||||
confidence: 0.8,
|
||||
most_likely_wrong: {
|
||||
surface: 'data',
|
||||
description: 'migration not run against prod-sized data',
|
||||
},
|
||||
known_not_in_diff: 'rollback path untested',
|
||||
});
|
||||
expect(ok.success).toBe(true);
|
||||
});
|
||||
|
||||
it('accepts an empty self-report (all optional)', () => {
|
||||
expect(ReflectionSelfReportSchema.safeParse({}).success).toBe(true);
|
||||
});
|
||||
|
||||
it('rejects confidence above 1', () => {
|
||||
expect(ReflectionSelfReportSchema.safeParse({ confidence: 2 }).success).toBe(false);
|
||||
});
|
||||
|
||||
it('rejects an unknown most_likely_wrong.surface', () => {
|
||||
const res = ReflectionSelfReportSchema.safeParse({
|
||||
most_likely_wrong: { surface: 'network', description: 'x' },
|
||||
});
|
||||
expect(res.success).toBe(false);
|
||||
});
|
||||
});
|
||||
30
packages/types/src/reflection/index.ts
Normal file
30
packages/types/src/reflection/index.ts
Normal file
@@ -0,0 +1,30 @@
|
||||
/**
|
||||
* Agent reflection (v1) — public barrel.
|
||||
*
|
||||
* reflection.ts — zod schemas (runtime source of truth) + inferred types
|
||||
* reflection.dto.ts — class-validator DTO for the agent self-report input
|
||||
*/
|
||||
|
||||
export {
|
||||
REVIEW_SURFACES,
|
||||
ReviewSurfaceSchema,
|
||||
MostLikelyWrongSchema,
|
||||
ReflectionRiskSchema,
|
||||
ReflectionModeSchema,
|
||||
ReflectionProvenanceSchema,
|
||||
ReflectionSelfReportSchema,
|
||||
ReflectionV1Schema,
|
||||
REFLECTION_SCHEMA_ID,
|
||||
} from './reflection.js';
|
||||
|
||||
export type {
|
||||
ReviewSurface,
|
||||
MostLikelyWrong,
|
||||
ReflectionRisk,
|
||||
ReflectionMode,
|
||||
ReflectionProvenance,
|
||||
ReflectionSelfReport,
|
||||
ReflectionV1,
|
||||
} from './reflection.js';
|
||||
|
||||
export { MostLikelyWrongDto, ReflectionSelfReportDto } from './reflection.dto.js';
|
||||
55
packages/types/src/reflection/reflection.dto.ts
Normal file
55
packages/types/src/reflection/reflection.dto.ts
Normal file
@@ -0,0 +1,55 @@
|
||||
/**
|
||||
* Reflection self-report DTO — class-validator boundary.
|
||||
*
|
||||
* Validates the agent-supplied self-report input (the optional
|
||||
* `$REFLECTION_INPUT` file, default `<repo>/.mosaic/reflection-input.json`)
|
||||
* before it is merged into a `reflection.v1` record. This is the only
|
||||
* externally-authored input on the reflection path, so it gets a DTO per the
|
||||
* Mosaic module-boundary rule.
|
||||
*
|
||||
* Class-validator only (no class-transformer `@Type`) — matching `chat.dto.ts`
|
||||
* — so the module is safe to import without a `reflect-metadata` shim. Deep
|
||||
* nested validation of `most_likely_wrong` is owned by the zod
|
||||
* `ReflectionSelfReportSchema` in `reflection.ts`, which is what the Stop hook
|
||||
* actually enforces at runtime.
|
||||
*/
|
||||
|
||||
import {
|
||||
IsIn,
|
||||
IsNumber,
|
||||
IsObject,
|
||||
IsOptional,
|
||||
IsString,
|
||||
Max,
|
||||
Min,
|
||||
MaxLength,
|
||||
} from 'class-validator';
|
||||
|
||||
import { REVIEW_SURFACES } from './reflection.js';
|
||||
|
||||
/** Shape of `most_likely_wrong`; validated structurally by zod at runtime. */
|
||||
export class MostLikelyWrongDto {
|
||||
@IsIn(REVIEW_SURFACES as unknown as string[])
|
||||
surface!: string;
|
||||
|
||||
@IsString()
|
||||
@MaxLength(4_000)
|
||||
description!: string;
|
||||
}
|
||||
|
||||
export class ReflectionSelfReportDto {
|
||||
@IsOptional()
|
||||
@IsNumber()
|
||||
@Min(0)
|
||||
@Max(1)
|
||||
confidence?: number;
|
||||
|
||||
@IsOptional()
|
||||
@IsObject()
|
||||
most_likely_wrong?: MostLikelyWrongDto;
|
||||
|
||||
@IsOptional()
|
||||
@IsString()
|
||||
@MaxLength(8_000)
|
||||
known_not_in_diff?: string;
|
||||
}
|
||||
90
packages/types/src/reflection/reflection.ts
Normal file
90
packages/types/src/reflection/reflection.ts
Normal file
@@ -0,0 +1,90 @@
|
||||
/**
|
||||
* Agent reflection (v1) — wire schema.
|
||||
*
|
||||
* Runtime source of truth for the `reflection.v1` sidecar emitted at end-of-run
|
||||
* by the Stop hook (design §10 step 1). The JSON Schema artifact at
|
||||
* `@mosaicstack/macp` `src/schemas/reflection.v1.schema.json` is the documented
|
||||
* contract; this zod schema is the executable one and MUST agree with it.
|
||||
*
|
||||
* Field provenance:
|
||||
* - MECHANICAL (risk, files_changed, ids, provenance): written by the hook.
|
||||
* - SELF-REPORTED (confidence, most_likely_wrong, known_not_in_diff): merged
|
||||
* from an optional agent-supplied input; null when absent.
|
||||
*
|
||||
* Pure — no NestJS, no DB, no Node-only APIs. Safe for browser/edge.
|
||||
*/
|
||||
|
||||
import { z } from 'zod';
|
||||
|
||||
/** Review surfaces, ordered most- to least-sensitive. Mirrors macp risk-floor. */
|
||||
export const REVIEW_SURFACES = [
|
||||
'auth',
|
||||
'data',
|
||||
'infra',
|
||||
'build',
|
||||
'ui',
|
||||
'test',
|
||||
'docs',
|
||||
'none',
|
||||
] as const;
|
||||
|
||||
export const ReviewSurfaceSchema = z.enum(REVIEW_SURFACES);
|
||||
export type ReviewSurface = z.infer<typeof ReviewSurfaceSchema>;
|
||||
|
||||
/** SELF-REPORTED: the single most-likely way the work is wrong. */
|
||||
export const MostLikelyWrongSchema = z.object({
|
||||
surface: ReviewSurfaceSchema,
|
||||
description: z.string(),
|
||||
});
|
||||
export type MostLikelyWrong = z.infer<typeof MostLikelyWrongSchema>;
|
||||
|
||||
/** MECHANICAL: output of the diff risk-floor (see `@mosaicstack/macp`). */
|
||||
export const ReflectionRiskSchema = z.object({
|
||||
needs_review: z.boolean(),
|
||||
score: z.number().min(0).max(1),
|
||||
surface: ReviewSurfaceSchema,
|
||||
reason: z.string(),
|
||||
});
|
||||
export type ReflectionRisk = z.infer<typeof ReflectionRiskSchema>;
|
||||
|
||||
export const ReflectionModeSchema = z.enum(['off', 'solo', 'orchestrated']);
|
||||
export type ReflectionMode = z.infer<typeof ReflectionModeSchema>;
|
||||
|
||||
export const ReflectionProvenanceSchema = z.object({
|
||||
source: z.literal('stop-hook'),
|
||||
reflection_attempt: z.number().int().min(1),
|
||||
degraded: z.boolean(),
|
||||
reflection_mode: ReflectionModeSchema,
|
||||
});
|
||||
export type ReflectionProvenance = z.infer<typeof ReflectionProvenanceSchema>;
|
||||
|
||||
/**
|
||||
* The self-reported half of a reflection. Supplied by the agent out-of-band
|
||||
* (e.g. `<repo>/.mosaic/reflection-input.json`) and merged by the hook. All
|
||||
* fields optional; missing fields become `null` in the assembled record.
|
||||
*/
|
||||
export const ReflectionSelfReportSchema = z.object({
|
||||
confidence: z.number().min(0).max(1).nullable().optional(),
|
||||
most_likely_wrong: MostLikelyWrongSchema.nullable().optional(),
|
||||
known_not_in_diff: z.string().nullable().optional(),
|
||||
});
|
||||
export type ReflectionSelfReport = z.infer<typeof ReflectionSelfReportSchema>;
|
||||
|
||||
/** The full assembled `reflection.v1` sidecar. */
|
||||
export const ReflectionV1Schema = z.object({
|
||||
schema: z.literal('reflection.v1'),
|
||||
task_ref: z.string(),
|
||||
agent: z.string(),
|
||||
session_id: z.string(),
|
||||
timestamp: z.string(),
|
||||
repo: z.string(),
|
||||
confidence: z.number().min(0).max(1).nullable(),
|
||||
most_likely_wrong: MostLikelyWrongSchema.nullable(),
|
||||
known_not_in_diff: z.string().nullable(),
|
||||
risk: ReflectionRiskSchema,
|
||||
files_changed: z.array(z.string()),
|
||||
provenance: ReflectionProvenanceSchema,
|
||||
});
|
||||
export type ReflectionV1 = z.infer<typeof ReflectionV1Schema>;
|
||||
|
||||
export const REFLECTION_SCHEMA_ID = 'reflection.v1' as const;
|
||||
Reference in New Issue
Block a user