497 lines
16 KiB
TypeScript
497 lines
16 KiB
TypeScript
import { Type } from '@sinclair/typebox';
|
|
import type { ToolDefinition } from '@mariozechner/pi-coding-agent';
|
|
|
|
const DEFAULT_TIMEOUT_MS = 15_000;
|
|
const MAX_RESULTS = 10;
|
|
const MAX_RESPONSE_BYTES = 256 * 1024; // 256 KB
|
|
|
|
// ─── Provider helpers ────────────────────────────────────────────────────────
|
|
|
|
interface SearchResult {
|
|
title: string;
|
|
url: string;
|
|
snippet: string;
|
|
}
|
|
|
|
interface SearchResponse {
|
|
provider: string;
|
|
query: string;
|
|
results: SearchResult[];
|
|
error?: string;
|
|
}
|
|
|
|
async function fetchWithTimeout(
|
|
url: string,
|
|
init: RequestInit,
|
|
timeoutMs: number,
|
|
): Promise<Response> {
|
|
const controller = new AbortController();
|
|
const timer = setTimeout(() => controller.abort(), timeoutMs);
|
|
try {
|
|
return await fetch(url, { ...init, signal: controller.signal });
|
|
} finally {
|
|
clearTimeout(timer);
|
|
}
|
|
}
|
|
|
|
async function readLimited(response: Response): Promise<string> {
|
|
const reader = response.body?.getReader();
|
|
if (!reader) return '';
|
|
const chunks: Uint8Array[] = [];
|
|
let total = 0;
|
|
while (true) {
|
|
const { done, value } = await reader.read();
|
|
if (done) break;
|
|
total += value.length;
|
|
if (total > MAX_RESPONSE_BYTES) {
|
|
chunks.push(value.subarray(0, MAX_RESPONSE_BYTES - (total - value.length)));
|
|
reader.cancel();
|
|
break;
|
|
}
|
|
chunks.push(value);
|
|
}
|
|
const combined = new Uint8Array(chunks.reduce((a, c) => a + c.length, 0));
|
|
let offset = 0;
|
|
for (const chunk of chunks) {
|
|
combined.set(chunk, offset);
|
|
offset += chunk.length;
|
|
}
|
|
return new TextDecoder().decode(combined);
|
|
}
|
|
|
|
// ─── Brave Search ────────────────────────────────────────────────────────────
|
|
|
|
async function searchBrave(query: string, limit: number): Promise<SearchResponse> {
|
|
const apiKey = process.env['BRAVE_API_KEY'];
|
|
if (!apiKey) return { provider: 'brave', query, results: [], error: 'BRAVE_API_KEY not set' };
|
|
|
|
try {
|
|
const params = new URLSearchParams({
|
|
q: query,
|
|
count: String(Math.min(limit, 20)),
|
|
});
|
|
const res = await fetchWithTimeout(
|
|
`https://api.search.brave.com/res/v1/web/search?${params}`,
|
|
{ headers: { 'X-Subscription-Token': apiKey, Accept: 'application/json' } },
|
|
DEFAULT_TIMEOUT_MS,
|
|
);
|
|
if (!res.ok) {
|
|
const body = await res.text().catch(() => '');
|
|
return { provider: 'brave', query, results: [], error: `HTTP ${res.status}: ${body}` };
|
|
}
|
|
const data = (await res.json()) as {
|
|
web?: { results?: Array<{ title: string; url: string; description: string }> };
|
|
};
|
|
const results: SearchResult[] = (data.web?.results ?? []).slice(0, limit).map((r) => ({
|
|
title: r.title,
|
|
url: r.url,
|
|
snippet: r.description,
|
|
}));
|
|
return { provider: 'brave', query, results };
|
|
} catch (err) {
|
|
return {
|
|
provider: 'brave',
|
|
query,
|
|
results: [],
|
|
error: err instanceof Error ? err.message : String(err),
|
|
};
|
|
}
|
|
}
|
|
|
|
// ─── Tavily Search ───────────────────────────────────────────────────────────
|
|
|
|
async function searchTavily(query: string, limit: number): Promise<SearchResponse> {
|
|
const apiKey = process.env['TAVILY_API_KEY'];
|
|
if (!apiKey) return { provider: 'tavily', query, results: [], error: 'TAVILY_API_KEY not set' };
|
|
|
|
try {
|
|
const res = await fetchWithTimeout(
|
|
'https://api.tavily.com/search',
|
|
{
|
|
method: 'POST',
|
|
headers: { 'Content-Type': 'application/json' },
|
|
body: JSON.stringify({
|
|
api_key: apiKey,
|
|
query,
|
|
max_results: Math.min(limit, 10),
|
|
include_answer: false,
|
|
}),
|
|
},
|
|
DEFAULT_TIMEOUT_MS,
|
|
);
|
|
if (!res.ok) {
|
|
const body = await res.text().catch(() => '');
|
|
return { provider: 'tavily', query, results: [], error: `HTTP ${res.status}: ${body}` };
|
|
}
|
|
const data = (await res.json()) as {
|
|
results?: Array<{ title: string; url: string; content: string }>;
|
|
};
|
|
const results: SearchResult[] = (data.results ?? []).slice(0, limit).map((r) => ({
|
|
title: r.title,
|
|
url: r.url,
|
|
snippet: r.content,
|
|
}));
|
|
return { provider: 'tavily', query, results };
|
|
} catch (err) {
|
|
return {
|
|
provider: 'tavily',
|
|
query,
|
|
results: [],
|
|
error: err instanceof Error ? err.message : String(err),
|
|
};
|
|
}
|
|
}
|
|
|
|
// ─── SearXNG (self-hosted) ───────────────────────────────────────────────────
|
|
|
|
async function searchSearxng(query: string, limit: number): Promise<SearchResponse> {
|
|
const baseUrl = process.env['SEARXNG_URL'];
|
|
if (!baseUrl) return { provider: 'searxng', query, results: [], error: 'SEARXNG_URL not set' };
|
|
|
|
try {
|
|
const params = new URLSearchParams({
|
|
q: query,
|
|
format: 'json',
|
|
pageno: '1',
|
|
});
|
|
const res = await fetchWithTimeout(
|
|
`${baseUrl.replace(/\/$/, '')}/search?${params}`,
|
|
{ headers: { Accept: 'application/json' } },
|
|
DEFAULT_TIMEOUT_MS,
|
|
);
|
|
if (!res.ok) {
|
|
const body = await res.text().catch(() => '');
|
|
return { provider: 'searxng', query, results: [], error: `HTTP ${res.status}: ${body}` };
|
|
}
|
|
const data = (await res.json()) as {
|
|
results?: Array<{ title: string; url: string; content: string }>;
|
|
};
|
|
const results: SearchResult[] = (data.results ?? []).slice(0, limit).map((r) => ({
|
|
title: r.title,
|
|
url: r.url,
|
|
snippet: r.content,
|
|
}));
|
|
return { provider: 'searxng', query, results };
|
|
} catch (err) {
|
|
return {
|
|
provider: 'searxng',
|
|
query,
|
|
results: [],
|
|
error: err instanceof Error ? err.message : String(err),
|
|
};
|
|
}
|
|
}
|
|
|
|
// ─── DuckDuckGo (lite HTML endpoint) ─────────────────────────────────────────
|
|
|
|
async function searchDuckDuckGo(query: string, limit: number): Promise<SearchResponse> {
|
|
try {
|
|
// Use the DuckDuckGo Instant Answer API (JSON, free, no key)
|
|
const params = new URLSearchParams({
|
|
q: query,
|
|
format: 'json',
|
|
no_html: '1',
|
|
skip_disambig: '1',
|
|
});
|
|
const res = await fetchWithTimeout(
|
|
`https://api.duckduckgo.com/?${params}`,
|
|
{ headers: { Accept: 'application/json' } },
|
|
DEFAULT_TIMEOUT_MS,
|
|
);
|
|
if (!res.ok) {
|
|
return {
|
|
provider: 'duckduckgo',
|
|
query,
|
|
results: [],
|
|
error: `HTTP ${res.status}`,
|
|
};
|
|
}
|
|
const text = await readLimited(res);
|
|
const data = JSON.parse(text) as {
|
|
AbstractText?: string;
|
|
AbstractURL?: string;
|
|
AbstractSource?: string;
|
|
RelatedTopics?: Array<{
|
|
Text?: string;
|
|
FirstURL?: string;
|
|
Result?: string;
|
|
Topics?: Array<{ Text?: string; FirstURL?: string }>;
|
|
}>;
|
|
};
|
|
|
|
const results: SearchResult[] = [];
|
|
|
|
// Main abstract result
|
|
if (data.AbstractText && data.AbstractURL) {
|
|
results.push({
|
|
title: data.AbstractSource ?? 'DuckDuckGo Abstract',
|
|
url: data.AbstractURL,
|
|
snippet: data.AbstractText,
|
|
});
|
|
}
|
|
|
|
// Related topics
|
|
for (const topic of data.RelatedTopics ?? []) {
|
|
if (results.length >= limit) break;
|
|
if (topic.Text && topic.FirstURL) {
|
|
results.push({
|
|
title: topic.Text.slice(0, 120),
|
|
url: topic.FirstURL,
|
|
snippet: topic.Text,
|
|
});
|
|
}
|
|
// Sub-topics
|
|
for (const sub of topic.Topics ?? []) {
|
|
if (results.length >= limit) break;
|
|
if (sub.Text && sub.FirstURL) {
|
|
results.push({
|
|
title: sub.Text.slice(0, 120),
|
|
url: sub.FirstURL,
|
|
snippet: sub.Text,
|
|
});
|
|
}
|
|
}
|
|
}
|
|
|
|
return { provider: 'duckduckgo', query, results: results.slice(0, limit) };
|
|
} catch (err) {
|
|
return {
|
|
provider: 'duckduckgo',
|
|
query,
|
|
results: [],
|
|
error: err instanceof Error ? err.message : String(err),
|
|
};
|
|
}
|
|
}
|
|
|
|
// ─── Provider resolution ─────────────────────────────────────────────────────
|
|
|
|
type SearchProvider = 'brave' | 'tavily' | 'searxng' | 'duckduckgo' | 'auto';
|
|
|
|
function getAvailableProviders(): SearchProvider[] {
|
|
const available: SearchProvider[] = [];
|
|
if (process.env['BRAVE_API_KEY']) available.push('brave');
|
|
if (process.env['TAVILY_API_KEY']) available.push('tavily');
|
|
if (process.env['SEARXNG_URL']) available.push('searxng');
|
|
// DuckDuckGo is always available (no API key needed)
|
|
available.push('duckduckgo');
|
|
return available;
|
|
}
|
|
|
|
async function executeSearch(
|
|
provider: SearchProvider,
|
|
query: string,
|
|
limit: number,
|
|
): Promise<SearchResponse> {
|
|
switch (provider) {
|
|
case 'brave':
|
|
return searchBrave(query, limit);
|
|
case 'tavily':
|
|
return searchTavily(query, limit);
|
|
case 'searxng':
|
|
return searchSearxng(query, limit);
|
|
case 'duckduckgo':
|
|
return searchDuckDuckGo(query, limit);
|
|
case 'auto': {
|
|
// Try providers in priority order: Brave > Tavily > SearXNG > DuckDuckGo
|
|
const available = getAvailableProviders();
|
|
for (const p of available) {
|
|
const result = await executeSearch(p, query, limit);
|
|
if (!result.error && result.results.length > 0) return result;
|
|
}
|
|
// Fall back to DuckDuckGo if everything failed
|
|
return searchDuckDuckGo(query, limit);
|
|
}
|
|
}
|
|
}
|
|
|
|
function formatSearchResults(response: SearchResponse): string {
|
|
const lines: string[] = [];
|
|
lines.push(`Search provider: ${response.provider}`);
|
|
lines.push(`Query: "${response.query}"`);
|
|
|
|
if (response.error) {
|
|
lines.push(`Error: ${response.error}`);
|
|
}
|
|
|
|
if (response.results.length === 0) {
|
|
lines.push('No results found.');
|
|
} else {
|
|
lines.push(`Results (${response.results.length}):\n`);
|
|
for (let i = 0; i < response.results.length; i++) {
|
|
const r = response.results[i]!;
|
|
lines.push(`${i + 1}. ${r.title}`);
|
|
lines.push(` URL: ${r.url}`);
|
|
lines.push(` ${r.snippet}`);
|
|
lines.push('');
|
|
}
|
|
}
|
|
return lines.join('\n');
|
|
}
|
|
|
|
// ─── Tool exports ────────────────────────────────────────────────────────────
|
|
|
|
export function createSearchTools(): ToolDefinition[] {
|
|
const webSearch: ToolDefinition = {
|
|
name: 'web_search',
|
|
label: 'Web Search',
|
|
description:
|
|
'Search the web using configured search providers. ' +
|
|
'Supports Brave, Tavily, SearXNG, and DuckDuckGo. ' +
|
|
'Use "auto" provider to pick the best available. ' +
|
|
'DuckDuckGo is always available as a fallback (no API key needed).',
|
|
parameters: Type.Object({
|
|
query: Type.String({ description: 'Search query' }),
|
|
provider: Type.Optional(
|
|
Type.String({
|
|
description:
|
|
'Search provider: "auto" (default), "brave", "tavily", "searxng", or "duckduckgo"',
|
|
}),
|
|
),
|
|
limit: Type.Optional(
|
|
Type.Number({ description: `Max results to return (default 5, max ${MAX_RESULTS})` }),
|
|
),
|
|
}),
|
|
async execute(_toolCallId, params) {
|
|
const { query, provider, limit } = params as {
|
|
query: string;
|
|
provider?: string;
|
|
limit?: number;
|
|
};
|
|
|
|
const effectiveProvider = (provider ?? 'auto') as SearchProvider;
|
|
const validProviders = ['auto', 'brave', 'tavily', 'searxng', 'duckduckgo'];
|
|
if (!validProviders.includes(effectiveProvider)) {
|
|
return {
|
|
content: [
|
|
{
|
|
type: 'text' as const,
|
|
text: `Invalid provider "${provider}". Valid: ${validProviders.join(', ')}`,
|
|
},
|
|
],
|
|
details: undefined,
|
|
};
|
|
}
|
|
|
|
const effectiveLimit = Math.min(Math.max(limit ?? 5, 1), MAX_RESULTS);
|
|
|
|
try {
|
|
const response = await executeSearch(effectiveProvider, query, effectiveLimit);
|
|
return {
|
|
content: [{ type: 'text' as const, text: formatSearchResults(response) }],
|
|
details: undefined,
|
|
};
|
|
} catch (err) {
|
|
return {
|
|
content: [
|
|
{
|
|
type: 'text' as const,
|
|
text: `Search failed: ${err instanceof Error ? err.message : String(err)}`,
|
|
},
|
|
],
|
|
details: undefined,
|
|
};
|
|
}
|
|
},
|
|
};
|
|
|
|
const webSearchNews: ToolDefinition = {
|
|
name: 'web_search_news',
|
|
label: 'Web Search (News)',
|
|
description:
|
|
'Search for recent news articles. Uses Brave News API if available, falls back to standard search with news keywords.',
|
|
parameters: Type.Object({
|
|
query: Type.String({ description: 'News search query' }),
|
|
limit: Type.Optional(
|
|
Type.Number({ description: `Max results (default 5, max ${MAX_RESULTS})` }),
|
|
),
|
|
}),
|
|
async execute(_toolCallId, params) {
|
|
const { query, limit } = params as { query: string; limit?: number };
|
|
const effectiveLimit = Math.min(Math.max(limit ?? 5, 1), MAX_RESULTS);
|
|
|
|
// Try Brave News API first (dedicated news endpoint)
|
|
const braveKey = process.env['BRAVE_API_KEY'];
|
|
if (braveKey) {
|
|
try {
|
|
const newsParams = new URLSearchParams({
|
|
q: query,
|
|
count: String(effectiveLimit),
|
|
});
|
|
const res = await fetchWithTimeout(
|
|
`https://api.search.brave.com/res/v1/news/search?${newsParams}`,
|
|
{
|
|
headers: {
|
|
'X-Subscription-Token': braveKey,
|
|
Accept: 'application/json',
|
|
},
|
|
},
|
|
DEFAULT_TIMEOUT_MS,
|
|
);
|
|
if (res.ok) {
|
|
const data = (await res.json()) as {
|
|
results?: Array<{
|
|
title: string;
|
|
url: string;
|
|
description: string;
|
|
age?: string;
|
|
}>;
|
|
};
|
|
const results: SearchResult[] = (data.results ?? [])
|
|
.slice(0, effectiveLimit)
|
|
.map((r) => ({
|
|
title: r.title + (r.age ? ` (${r.age})` : ''),
|
|
url: r.url,
|
|
snippet: r.description,
|
|
}));
|
|
const response: SearchResponse = { provider: 'brave-news', query, results };
|
|
return {
|
|
content: [{ type: 'text' as const, text: formatSearchResults(response) }],
|
|
details: undefined,
|
|
};
|
|
}
|
|
} catch {
|
|
// Fall through to generic search
|
|
}
|
|
}
|
|
|
|
// Fallback: standard search with "news" appended
|
|
const newsQuery = `${query} news latest`;
|
|
const response = await executeSearch('auto', newsQuery, effectiveLimit);
|
|
return {
|
|
content: [{ type: 'text' as const, text: formatSearchResults(response) }],
|
|
details: undefined,
|
|
};
|
|
},
|
|
};
|
|
|
|
const searchProviders: ToolDefinition = {
|
|
name: 'web_search_providers',
|
|
label: 'List Search Providers',
|
|
description: 'List the currently available and configured web search providers.',
|
|
parameters: Type.Object({}),
|
|
async execute() {
|
|
const available = getAvailableProviders();
|
|
const allProviders = [
|
|
{ name: 'brave', configured: !!process.env['BRAVE_API_KEY'], envVar: 'BRAVE_API_KEY' },
|
|
{ name: 'tavily', configured: !!process.env['TAVILY_API_KEY'], envVar: 'TAVILY_API_KEY' },
|
|
{ name: 'searxng', configured: !!process.env['SEARXNG_URL'], envVar: 'SEARXNG_URL' },
|
|
{ name: 'duckduckgo', configured: true, envVar: '(none — always available)' },
|
|
];
|
|
|
|
const lines = ['Search providers:\n'];
|
|
for (const p of allProviders) {
|
|
const status = p.configured ? '✓ configured' : '✗ not configured';
|
|
lines.push(` ${p.name}: ${status} (${p.envVar})`);
|
|
}
|
|
lines.push(`\nActive providers for "auto" mode: ${available.join(', ')}`);
|
|
return {
|
|
content: [{ type: 'text' as const, text: lines.join('\n') }],
|
|
details: undefined,
|
|
};
|
|
},
|
|
};
|
|
|
|
return [webSearch, webSearchNews, searchProviders];
|
|
}
|