Fix web_search: switch from Jina AI (now requires key) to DuckDuckGo HTML scraping

Made-with: Cursor
This commit is contained in:
2026-03-02 19:49:09 -08:00
parent 6fc4d52232
commit d9496ce847

View File

@@ -1,8 +1,8 @@
import { registerTool } from './registry'; import { registerTool } from './registry';
/** /**
* Web search via Jina AI's free search endpoint (s.jina.ai). * Web search via DuckDuckGo HTML endpoint.
* No API key required. Returns clean, AI-readable markdown results. * No API key required. Scrapes result snippets and titles.
* Atlas uses this for competitor research, market context, pricing models, etc. * Atlas uses this for competitor research, market context, pricing models, etc.
*/ */
registerTool({ registerTool({
@@ -22,13 +22,13 @@ registerTool({
const query = String(args.query).trim(); const query = String(args.query).trim();
if (!query) return { error: 'No query provided' }; if (!query) return { error: 'No query provided' };
const url = `https://s.jina.ai/${encodeURIComponent(query)}`; const url = `https://html.duckduckgo.com/html/?q=${encodeURIComponent(query)}`;
try { try {
const res = await fetch(url, { const res = await fetch(url, {
headers: { headers: {
'Accept': 'text/plain', 'User-Agent': 'Mozilla/5.0 (compatible; VIBN-Atlas/1.0)',
'X-Return-Format': 'markdown', 'Accept': 'text/html',
}, },
signal: AbortSignal.timeout(15_000), signal: AbortSignal.timeout(15_000),
}); });
@@ -37,15 +37,46 @@ registerTool({
return { error: `Search failed with status ${res.status}` }; return { error: `Search failed with status ${res.status}` };
} }
const text = await res.text(); const html = await res.text();
// Jina returns verbose results — truncate to avoid flooding the context window // Extract result titles and snippets from DuckDuckGo HTML
const truncated = text.length > 6000 ? text.slice(0, 6000) + '\n\n[...results truncated]' : text; const results: string[] = [];
// Match result titles
const titleMatches = html.matchAll(/class="result__a"[^>]*href="[^"]*"[^>]*>(.*?)<\/a>/gs);
const titles: string[] = [];
for (const m of titleMatches) {
const title = m[1].replace(/<[^>]+>/g, '').trim();
if (title) titles.push(title);
}
// Match result snippets
const snippetMatches = html.matchAll(/class="result__snippet"[^>]*>(.*?)<\/a>/gs);
const snippets: string[] = [];
for (const m of snippetMatches) {
const snippet = m[1].replace(/<[^>]+>/g, '').trim();
if (snippet) snippets.push(snippet);
}
// Combine up to 6 results
const count = Math.min(6, Math.max(titles.length, snippets.length));
for (let i = 0; i < count; i++) {
const title = titles[i] || '';
const snippet = snippets[i] || '';
if (title || snippet) {
results.push(`**${title}**\n${snippet}`);
}
}
if (results.length === 0) {
return { error: 'No results found' };
}
const text = results.join('\n\n');
const truncated = text.length > 5000 ? text.slice(0, 5000) + '\n\n[...results truncated]' : text;
return { query, results: truncated };
return {
query,
results: truncated,
};
} catch (err: unknown) { } catch (err: unknown) {
const message = err instanceof Error ? err.message : String(err); const message = err instanceof Error ? err.message : String(err);
return { error: `Search request failed: ${message}` }; return { error: `Search request failed: ${message}` };