Fix web_search: switch from Jina AI (now requires key) to DuckDuckGo HTML scraping
Made-with: Cursor
This commit is contained in:
@@ -1,8 +1,8 @@
|
||||
import { registerTool } from './registry';
|
||||
|
||||
/**
|
||||
* Web search via Jina AI's free search endpoint (s.jina.ai).
|
||||
* No API key required. Returns clean, AI-readable markdown results.
|
||||
* Web search via DuckDuckGo HTML endpoint.
|
||||
* No API key required. Scrapes result snippets and titles.
|
||||
* Atlas uses this for competitor research, market context, pricing models, etc.
|
||||
*/
|
||||
registerTool({
|
||||
@@ -22,13 +22,13 @@ registerTool({
|
||||
const query = String(args.query).trim();
|
||||
if (!query) return { error: 'No query provided' };
|
||||
|
||||
const url = `https://s.jina.ai/${encodeURIComponent(query)}`;
|
||||
const url = `https://html.duckduckgo.com/html/?q=${encodeURIComponent(query)}`;
|
||||
|
||||
try {
|
||||
const res = await fetch(url, {
|
||||
headers: {
|
||||
'Accept': 'text/plain',
|
||||
'X-Return-Format': 'markdown',
|
||||
'User-Agent': 'Mozilla/5.0 (compatible; VIBN-Atlas/1.0)',
|
||||
'Accept': 'text/html',
|
||||
},
|
||||
signal: AbortSignal.timeout(15_000),
|
||||
});
|
||||
@@ -37,15 +37,46 @@ registerTool({
|
||||
return { error: `Search failed with status ${res.status}` };
|
||||
}
|
||||
|
||||
const text = await res.text();
|
||||
const html = await res.text();
|
||||
|
||||
// Jina returns verbose results — truncate to avoid flooding the context window
|
||||
const truncated = text.length > 6000 ? text.slice(0, 6000) + '\n\n[...results truncated]' : text;
|
||||
// Extract result titles and snippets from DuckDuckGo HTML
|
||||
const results: string[] = [];
|
||||
|
||||
// Match result titles
|
||||
const titleMatches = html.matchAll(/class="result__a"[^>]*href="[^"]*"[^>]*>(.*?)<\/a>/gs);
|
||||
const titles: string[] = [];
|
||||
for (const m of titleMatches) {
|
||||
const title = m[1].replace(/<[^>]+>/g, '').trim();
|
||||
if (title) titles.push(title);
|
||||
}
|
||||
|
||||
// Match result snippets
|
||||
const snippetMatches = html.matchAll(/class="result__snippet"[^>]*>(.*?)<\/a>/gs);
|
||||
const snippets: string[] = [];
|
||||
for (const m of snippetMatches) {
|
||||
const snippet = m[1].replace(/<[^>]+>/g, '').trim();
|
||||
if (snippet) snippets.push(snippet);
|
||||
}
|
||||
|
||||
// Combine up to 6 results
|
||||
const count = Math.min(6, Math.max(titles.length, snippets.length));
|
||||
for (let i = 0; i < count; i++) {
|
||||
const title = titles[i] || '';
|
||||
const snippet = snippets[i] || '';
|
||||
if (title || snippet) {
|
||||
results.push(`**${title}**\n${snippet}`);
|
||||
}
|
||||
}
|
||||
|
||||
if (results.length === 0) {
|
||||
return { error: 'No results found' };
|
||||
}
|
||||
|
||||
const text = results.join('\n\n');
|
||||
const truncated = text.length > 5000 ? text.slice(0, 5000) + '\n\n[...results truncated]' : text;
|
||||
|
||||
return { query, results: truncated };
|
||||
|
||||
return {
|
||||
query,
|
||||
results: truncated,
|
||||
};
|
||||
} catch (err: unknown) {
|
||||
const message = err instanceof Error ? err.message : String(err);
|
||||
return { error: `Search request failed: ${message}` };
|
||||
|
||||
Reference in New Issue
Block a user