feat(P4-002): semantic search — pgvector embeddings + search API
Add EmbeddingService using OpenAI-compatible embeddings API (supports text-embedding-3-small, configurable via EMBEDDING_MODEL and EMBEDDING_API_URL env vars). Wire embedding generation into insight creation and semantic search endpoint. POST /api/memory/search now generates a query embedding and performs cosine distance search via pgvector when OPENAI_API_KEY is configured. Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
69
apps/gateway/src/memory/embedding.service.ts
Normal file
69
apps/gateway/src/memory/embedding.service.ts
Normal file
@@ -0,0 +1,69 @@
|
|||||||
|
import { Injectable, Logger } from '@nestjs/common';
|
||||||
|
import type { EmbeddingProvider } from '@mosaic/memory';
|
||||||
|
|
||||||
|
const DEFAULT_MODEL = 'text-embedding-3-small';
|
||||||
|
const DEFAULT_DIMENSIONS = 1536;
|
||||||
|
|
||||||
|
interface EmbeddingResponse {
|
||||||
|
data: Array<{ embedding: number[]; index: number }>;
|
||||||
|
model: string;
|
||||||
|
usage: { prompt_tokens: number; total_tokens: number };
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Generates embeddings via the OpenAI-compatible embeddings API.
|
||||||
|
* Supports OpenAI, Azure OpenAI, and any provider with a compatible endpoint.
|
||||||
|
*/
|
||||||
|
@Injectable()
|
||||||
|
export class EmbeddingService implements EmbeddingProvider {
|
||||||
|
private readonly logger = new Logger(EmbeddingService.name);
|
||||||
|
private readonly apiKey: string | undefined;
|
||||||
|
private readonly baseUrl: string;
|
||||||
|
private readonly model: string;
|
||||||
|
|
||||||
|
readonly dimensions = DEFAULT_DIMENSIONS;
|
||||||
|
|
||||||
|
constructor() {
|
||||||
|
this.apiKey = process.env['OPENAI_API_KEY'];
|
||||||
|
this.baseUrl = process.env['EMBEDDING_API_URL'] ?? 'https://api.openai.com/v1';
|
||||||
|
this.model = process.env['EMBEDDING_MODEL'] ?? DEFAULT_MODEL;
|
||||||
|
}
|
||||||
|
|
||||||
|
get available(): boolean {
|
||||||
|
return !!this.apiKey;
|
||||||
|
}
|
||||||
|
|
||||||
|
async embed(text: string): Promise<number[]> {
|
||||||
|
const results = await this.embedBatch([text]);
|
||||||
|
return results[0]!;
|
||||||
|
}
|
||||||
|
|
||||||
|
async embedBatch(texts: string[]): Promise<number[][]> {
|
||||||
|
if (!this.apiKey) {
|
||||||
|
this.logger.warn('No OPENAI_API_KEY configured — returning zero vectors');
|
||||||
|
return texts.map(() => new Array<number>(this.dimensions).fill(0));
|
||||||
|
}
|
||||||
|
|
||||||
|
const response = await fetch(`${this.baseUrl}/embeddings`, {
|
||||||
|
method: 'POST',
|
||||||
|
headers: {
|
||||||
|
'Content-Type': 'application/json',
|
||||||
|
Authorization: `Bearer ${this.apiKey}`,
|
||||||
|
},
|
||||||
|
body: JSON.stringify({
|
||||||
|
model: this.model,
|
||||||
|
input: texts,
|
||||||
|
dimensions: this.dimensions,
|
||||||
|
}),
|
||||||
|
});
|
||||||
|
|
||||||
|
if (!response.ok) {
|
||||||
|
const body = await response.text();
|
||||||
|
this.logger.error(`Embedding API error: ${response.status} ${body}`);
|
||||||
|
throw new Error(`Embedding API returned ${response.status}`);
|
||||||
|
}
|
||||||
|
|
||||||
|
const json = (await response.json()) as EmbeddingResponse;
|
||||||
|
return json.data.sort((a, b) => a.index - b.index).map((d) => d.embedding);
|
||||||
|
}
|
||||||
|
}
|
||||||
@@ -15,12 +15,16 @@ import {
|
|||||||
import type { Memory } from '@mosaic/memory';
|
import type { Memory } from '@mosaic/memory';
|
||||||
import { MEMORY } from './memory.tokens.js';
|
import { MEMORY } from './memory.tokens.js';
|
||||||
import { AuthGuard } from '../auth/auth.guard.js';
|
import { AuthGuard } from '../auth/auth.guard.js';
|
||||||
|
import { EmbeddingService } from './embedding.service.js';
|
||||||
import type { UpsertPreferenceDto, CreateInsightDto, SearchMemoryDto } from './memory.dto.js';
|
import type { UpsertPreferenceDto, CreateInsightDto, SearchMemoryDto } from './memory.dto.js';
|
||||||
|
|
||||||
@Controller('api/memory')
|
@Controller('api/memory')
|
||||||
@UseGuards(AuthGuard)
|
@UseGuards(AuthGuard)
|
||||||
export class MemoryController {
|
export class MemoryController {
|
||||||
constructor(@Inject(MEMORY) private readonly memory: Memory) {}
|
constructor(
|
||||||
|
@Inject(MEMORY) private readonly memory: Memory,
|
||||||
|
private readonly embeddings: EmbeddingService,
|
||||||
|
) {}
|
||||||
|
|
||||||
// ─── Preferences ────────────────────────────────────────────────────
|
// ─── Preferences ────────────────────────────────────────────────────
|
||||||
|
|
||||||
@@ -76,12 +80,17 @@ export class MemoryController {
|
|||||||
|
|
||||||
@Post('insights')
|
@Post('insights')
|
||||||
async createInsight(@Query('userId') userId: string, @Body() dto: CreateInsightDto) {
|
async createInsight(@Query('userId') userId: string, @Body() dto: CreateInsightDto) {
|
||||||
|
const embedding = this.embeddings.available
|
||||||
|
? await this.embeddings.embed(dto.content)
|
||||||
|
: undefined;
|
||||||
|
|
||||||
return this.memory.insights.create({
|
return this.memory.insights.create({
|
||||||
userId,
|
userId,
|
||||||
content: dto.content,
|
content: dto.content,
|
||||||
source: dto.source,
|
source: dto.source,
|
||||||
category: dto.category,
|
category: dto.category,
|
||||||
metadata: dto.metadata,
|
metadata: dto.metadata,
|
||||||
|
embedding: embedding ?? null,
|
||||||
});
|
});
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -96,13 +105,22 @@ export class MemoryController {
|
|||||||
|
|
||||||
@Post('search')
|
@Post('search')
|
||||||
async searchMemory(@Query('userId') userId: string, @Body() dto: SearchMemoryDto) {
|
async searchMemory(@Query('userId') userId: string, @Body() dto: SearchMemoryDto) {
|
||||||
// Search requires an embedding provider to be configured.
|
if (!this.embeddings.available) {
|
||||||
// For now, return empty results if no embedding is available.
|
return {
|
||||||
// P4-002 will implement the full embedding + search pipeline.
|
query: dto.query,
|
||||||
return {
|
results: [],
|
||||||
query: dto.query,
|
message: 'Semantic search requires OPENAI_API_KEY for embeddings',
|
||||||
results: [],
|
};
|
||||||
message: 'Semantic search requires embedding provider (P4-002)',
|
}
|
||||||
};
|
|
||||||
|
const queryEmbedding = await this.embeddings.embed(dto.query);
|
||||||
|
const results = await this.memory.insights.searchByEmbedding(
|
||||||
|
userId,
|
||||||
|
queryEmbedding,
|
||||||
|
dto.limit ?? 10,
|
||||||
|
dto.maxDistance ?? 0.8,
|
||||||
|
);
|
||||||
|
|
||||||
|
return { query: dto.query, results };
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -4,6 +4,7 @@ import type { Db } from '@mosaic/db';
|
|||||||
import { DB } from '../database/database.module.js';
|
import { DB } from '../database/database.module.js';
|
||||||
import { MEMORY } from './memory.tokens.js';
|
import { MEMORY } from './memory.tokens.js';
|
||||||
import { MemoryController } from './memory.controller.js';
|
import { MemoryController } from './memory.controller.js';
|
||||||
|
import { EmbeddingService } from './embedding.service.js';
|
||||||
|
|
||||||
@Global()
|
@Global()
|
||||||
@Module({
|
@Module({
|
||||||
@@ -13,8 +14,9 @@ import { MemoryController } from './memory.controller.js';
|
|||||||
useFactory: (db: Db): Memory => createMemory(db),
|
useFactory: (db: Db): Memory => createMemory(db),
|
||||||
inject: [DB],
|
inject: [DB],
|
||||||
},
|
},
|
||||||
|
EmbeddingService,
|
||||||
],
|
],
|
||||||
controllers: [MemoryController],
|
controllers: [MemoryController],
|
||||||
exports: [MEMORY],
|
exports: [MEMORY, EmbeddingService],
|
||||||
})
|
})
|
||||||
export class MemoryModule {}
|
export class MemoryModule {}
|
||||||
|
|||||||
@@ -37,8 +37,8 @@
|
|||||||
| P3-006 | done | Phase 3 | Settings — provider config, profile, integrations | #88 | #31 |
|
| P3-006 | done | Phase 3 | Settings — provider config, profile, integrations | #88 | #31 |
|
||||||
| P3-007 | done | Phase 3 | Admin panel — user management, RBAC | #89 | #32 |
|
| P3-007 | done | Phase 3 | Admin panel — user management, RBAC | #89 | #32 |
|
||||||
| P3-008 | done | Phase 3 | Verify Phase 3 — web dashboard functional E2E | — | #33 |
|
| P3-008 | done | Phase 3 | Verify Phase 3 — web dashboard functional E2E | — | #33 |
|
||||||
| P4-001 | not-started | Phase 4 | @mosaic/memory — preference + insight stores | — | #34 |
|
| P4-001 | in-progress | Phase 4 | @mosaic/memory — preference + insight stores | — | #34 |
|
||||||
| P4-002 | not-started | Phase 4 | Semantic search — pgvector embeddings + search API | — | #35 |
|
| P4-002 | in-progress | Phase 4 | Semantic search — pgvector embeddings + search API | — | #35 |
|
||||||
| P4-003 | not-started | Phase 4 | @mosaic/log — log ingest, parsing, tiered storage | — | #36 |
|
| P4-003 | not-started | Phase 4 | @mosaic/log — log ingest, parsing, tiered storage | — | #36 |
|
||||||
| P4-004 | not-started | Phase 4 | Summarization pipeline — Haiku-tier LLM + cron | — | #37 |
|
| P4-004 | not-started | Phase 4 | Summarization pipeline — Haiku-tier LLM + cron | — | #37 |
|
||||||
| P4-005 | not-started | Phase 4 | Memory integration — inject into agent sessions | — | #38 |
|
| P4-005 | not-started | Phase 4 | Memory integration — inject into agent sessions | — | #38 |
|
||||||
|
|||||||
Reference in New Issue
Block a user