feat(agent-reflection): durable kernel — reflection.v1 capture + risk-floor + Phase-0 (#545)

2026-06-16 21:35:40 +00:00
parent c461380a4a
commit b8807e60df
17 changed files with 1498 additions and 0 deletions
--- a/packages/types/src/index.ts
+++ b/packages/types/src/index.ts
@@ -6,3 +6,4 @@ export * from './provider/index.js';
 export * from './routing/index.js';
 export * from './commands/index.js';
 export * from './federation/index.js';
+export * from './reflection/index.js';
--- a/packages/types/src/reflection/tests/reflection.spec.ts
+++ b/packages/types/src/reflection/tests/reflection.spec.ts
@@ -0,0 +1,146 @@
+/**
+ * Unit tests for the reflection.v1 schema + self-report boundary.
+ *
+ * The runtime source of truth is the zod schema set in `reflection.ts`. The
+ * class-validator `ReflectionSelfReportDto` is the NestJS-side boundary type
+ * (exercised under the gateway app's reflect-metadata runtime, mirroring how
+ * `chat.dto.ts` is tested in apps/gateway); here we validate the self-report
+ * input with its zod counterpart, which is what the Stop hook actually uses.
+ *
+ * Coverage:
+ *  - REVIEW_SURFACES canonical ordering (the enum both zod + JSON Schema mirror)
+ *  - ReflectionV1Schema accepts a fully-populated record
+ *  - ReflectionV1Schema accepts a degraded record (self-report fields null)
+ *  - ReflectionV1Schema rejects bad schema literal / out-of-range confidence / bad surface
+ *  - ReflectionSelfReportSchema accepts valid + empty, rejects bad input
+ */
+
+import { describe, expect, it } from 'vitest';
+
+import {
+  REVIEW_SURFACES,
+  ReflectionV1Schema,
+  ReflectionSelfReportSchema,
+  type ReflectionV1,
+} from '../index.js';
+
+const baseMechanical = {
+  schema: 'reflection.v1' as const,
+  task_ref: 'stack#544',
+  agent: 'claude',
+  session_id: 'sess-abc',
+  timestamp: '2026-06-16T00:00:00.000Z',
+  repo: 'stack',
+  risk: {
+    needs_review: true,
+    score: 1.0,
+    surface: 'auth' as const,
+    reason: 'auth surface (weight 1) in: src/auth.ts',
+  },
+  files_changed: ['src/auth.ts'],
+  provenance: {
+    source: 'stop-hook' as const,
+    reflection_attempt: 1,
+    degraded: false,
+    reflection_mode: 'solo' as const,
+  },
+};
+
+describe('REVIEW_SURFACES', () => {
+  it('keeps the canonical most→least-sensitive ordering', () => {
+    expect(REVIEW_SURFACES).toEqual([
+      'auth',
+      'data',
+      'infra',
+      'build',
+      'ui',
+      'test',
+      'docs',
+      'none',
+    ]);
+  });
+});
+
+describe('ReflectionV1Schema', () => {
+  it('accepts a fully-populated record', () => {
+    const rec: ReflectionV1 = {
+      ...baseMechanical,
+      confidence: 0.7,
+      most_likely_wrong: { surface: 'auth', description: 'token refresh untested' },
+      known_not_in_diff: 'manual QA only on the happy path',
+    };
+    expect(() => ReflectionV1Schema.parse(rec)).not.toThrow();
+  });
+
+  it('accepts a degraded record with null self-report fields', () => {
+    const rec: ReflectionV1 = {
+      ...baseMechanical,
+      confidence: null,
+      most_likely_wrong: null,
+      known_not_in_diff: null,
+      provenance: { ...baseMechanical.provenance, degraded: true },
+    };
+    expect(() => ReflectionV1Schema.parse(rec)).not.toThrow();
+  });
+
+  it('rejects a wrong schema literal', () => {
+    const bad = {
+      ...baseMechanical,
+      schema: 'reflection.v2',
+      confidence: null,
+      most_likely_wrong: null,
+      known_not_in_diff: null,
+    };
+    expect(() => ReflectionV1Schema.parse(bad)).toThrow();
+  });
+
+  it('rejects out-of-range confidence', () => {
+    const bad = {
+      ...baseMechanical,
+      confidence: 1.5,
+      most_likely_wrong: null,
+      known_not_in_diff: null,
+    };
+    expect(() => ReflectionV1Schema.parse(bad)).toThrow();
+  });
+
+  it('rejects an unknown surface', () => {
+    const bad = {
+      ...baseMechanical,
+      risk: { ...baseMechanical.risk, surface: 'network' },
+      confidence: null,
+      most_likely_wrong: null,
+      known_not_in_diff: null,
+    };
+    expect(() => ReflectionV1Schema.parse(bad)).toThrow();
+  });
+});
+
+describe('ReflectionSelfReportSchema', () => {
+  it('accepts a valid self-report', () => {
+    const ok = ReflectionSelfReportSchema.safeParse({
+      confidence: 0.8,
+      most_likely_wrong: {
+        surface: 'data',
+        description: 'migration not run against prod-sized data',
+      },
+      known_not_in_diff: 'rollback path untested',
+    });
+    expect(ok.success).toBe(true);
+  });
+
+  it('accepts an empty self-report (all optional)', () => {
+    expect(ReflectionSelfReportSchema.safeParse({}).success).toBe(true);
+  });
+
+  it('rejects confidence above 1', () => {
+    expect(ReflectionSelfReportSchema.safeParse({ confidence: 2 }).success).toBe(false);
+  });
+
+  it('rejects an unknown most_likely_wrong.surface', () => {
+    const res = ReflectionSelfReportSchema.safeParse({
+      most_likely_wrong: { surface: 'network', description: 'x' },
+    });
+    expect(res.success).toBe(false);
+  });
+});
--- a/packages/types/src/reflection/index.ts
+++ b/packages/types/src/reflection/index.ts
@@ -0,0 +1,30 @@
+/**
+ * Agent reflection (v1) — public barrel.
+ *
+ * reflection.ts      — zod schemas (runtime source of truth) + inferred types
+ * reflection.dto.ts  — class-validator DTO for the agent self-report input
+ */
+
+export {
+  REVIEW_SURFACES,
+  ReviewSurfaceSchema,
+  MostLikelyWrongSchema,
+  ReflectionRiskSchema,
+  ReflectionModeSchema,
+  ReflectionProvenanceSchema,
+  ReflectionSelfReportSchema,
+  ReflectionV1Schema,
+  REFLECTION_SCHEMA_ID,
+} from './reflection.js';
+
+export type {
+  ReviewSurface,
+  MostLikelyWrong,
+  ReflectionRisk,
+  ReflectionMode,
+  ReflectionProvenance,
+  ReflectionSelfReport,
+  ReflectionV1,
+} from './reflection.js';
+
+export { MostLikelyWrongDto, ReflectionSelfReportDto } from './reflection.dto.js';
--- a/packages/types/src/reflection/reflection.dto.ts
+++ b/packages/types/src/reflection/reflection.dto.ts
@@ -0,0 +1,55 @@
+/**
+ * Reflection self-report DTO — class-validator boundary.
+ *
+ * Validates the agent-supplied self-report input (the optional
+ * `$REFLECTION_INPUT` file, default `<repo>/.mosaic/reflection-input.json`)
+ * before it is merged into a `reflection.v1` record. This is the only
+ * externally-authored input on the reflection path, so it gets a DTO per the
+ * Mosaic module-boundary rule.
+ *
+ * Class-validator only (no class-transformer `@Type`) — matching `chat.dto.ts`
+ * — so the module is safe to import without a `reflect-metadata` shim. Deep
+ * nested validation of `most_likely_wrong` is owned by the zod
+ * `ReflectionSelfReportSchema` in `reflection.ts`, which is what the Stop hook
+ * actually enforces at runtime.
+ */
+
+import {
+  IsIn,
+  IsNumber,
+  IsObject,
+  IsOptional,
+  IsString,
+  Max,
+  Min,
+  MaxLength,
+} from 'class-validator';
+
+import { REVIEW_SURFACES } from './reflection.js';
+
+/** Shape of `most_likely_wrong`; validated structurally by zod at runtime. */
+export class MostLikelyWrongDto {
+  @IsIn(REVIEW_SURFACES as unknown as string[])
+  surface!: string;
+
+  @IsString()
+  @MaxLength(4_000)
+  description!: string;
+}
+
+export class ReflectionSelfReportDto {
+  @IsOptional()
+  @IsNumber()
+  @Min(0)
+  @Max(1)
+  confidence?: number;
+
+  @IsOptional()
+  @IsObject()
+  most_likely_wrong?: MostLikelyWrongDto;
+
+  @IsOptional()
+  @IsString()
+  @MaxLength(8_000)
+  known_not_in_diff?: string;
+}
--- a/packages/types/src/reflection/reflection.ts
+++ b/packages/types/src/reflection/reflection.ts
@@ -0,0 +1,90 @@
+/**
+ * Agent reflection (v1) — wire schema.
+ *
+ * Runtime source of truth for the `reflection.v1` sidecar emitted at end-of-run
+ * by the Stop hook (design §10 step 1). The JSON Schema artifact at
+ * `@mosaicstack/macp` `src/schemas/reflection.v1.schema.json` is the documented
+ * contract; this zod schema is the executable one and MUST agree with it.
+ *
+ * Field provenance:
+ *   - MECHANICAL  (risk, files_changed, ids, provenance): written by the hook.
+ *   - SELF-REPORTED (confidence, most_likely_wrong, known_not_in_diff): merged
+ *     from an optional agent-supplied input; null when absent.
+ *
+ * Pure — no NestJS, no DB, no Node-only APIs. Safe for browser/edge.
+ */
+
+import { z } from 'zod';
+
+/** Review surfaces, ordered most- to least-sensitive. Mirrors macp risk-floor. */
+export const REVIEW_SURFACES = [
+  'auth',
+  'data',
+  'infra',
+  'build',
+  'ui',
+  'test',
+  'docs',
+  'none',
+] as const;
+
+export const ReviewSurfaceSchema = z.enum(REVIEW_SURFACES);
+export type ReviewSurface = z.infer<typeof ReviewSurfaceSchema>;
+
+/** SELF-REPORTED: the single most-likely way the work is wrong. */
+export const MostLikelyWrongSchema = z.object({
+  surface: ReviewSurfaceSchema,
+  description: z.string(),
+});
+export type MostLikelyWrong = z.infer<typeof MostLikelyWrongSchema>;
+
+/** MECHANICAL: output of the diff risk-floor (see `@mosaicstack/macp`). */
+export const ReflectionRiskSchema = z.object({
+  needs_review: z.boolean(),
+  score: z.number().min(0).max(1),
+  surface: ReviewSurfaceSchema,
+  reason: z.string(),
+});
+export type ReflectionRisk = z.infer<typeof ReflectionRiskSchema>;
+
+export const ReflectionModeSchema = z.enum(['off', 'solo', 'orchestrated']);
+export type ReflectionMode = z.infer<typeof ReflectionModeSchema>;
+
+export const ReflectionProvenanceSchema = z.object({
+  source: z.literal('stop-hook'),
+  reflection_attempt: z.number().int().min(1),
+  degraded: z.boolean(),
+  reflection_mode: ReflectionModeSchema,
+});
+export type ReflectionProvenance = z.infer<typeof ReflectionProvenanceSchema>;
+
+/**
+ * The self-reported half of a reflection. Supplied by the agent out-of-band
+ * (e.g. `<repo>/.mosaic/reflection-input.json`) and merged by the hook. All
+ * fields optional; missing fields become `null` in the assembled record.
+ */
+export const ReflectionSelfReportSchema = z.object({
+  confidence: z.number().min(0).max(1).nullable().optional(),
+  most_likely_wrong: MostLikelyWrongSchema.nullable().optional(),
+  known_not_in_diff: z.string().nullable().optional(),
+});
+export type ReflectionSelfReport = z.infer<typeof ReflectionSelfReportSchema>;
+
+/** The full assembled `reflection.v1` sidecar. */
+export const ReflectionV1Schema = z.object({
+  schema: z.literal('reflection.v1'),
+  task_ref: z.string(),
+  agent: z.string(),
+  session_id: z.string(),
+  timestamp: z.string(),
+  repo: z.string(),
+  confidence: z.number().min(0).max(1).nullable(),
+  most_likely_wrong: MostLikelyWrongSchema.nullable(),
+  known_not_in_diff: z.string().nullable(),
+  risk: ReflectionRiskSchema,
+  files_changed: z.array(z.string()),
+  provenance: ReflectionProvenanceSchema,
+});
+export type ReflectionV1 = z.infer<typeof ReflectionV1Schema>;
+
+export const REFLECTION_SCHEMA_ID = 'reflection.v1' as const;