Fix web_search: switch from Jina AI (now requires key) to DuckDuckGo HTML scraping
Made-with: Cursor
This commit is contained in:
@@ -1,8 +1,8 @@
|
|||||||
import { registerTool } from './registry';
|
import { registerTool } from './registry';
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Web search via Jina AI's free search endpoint (s.jina.ai).
|
* Web search via DuckDuckGo HTML endpoint.
|
||||||
* No API key required. Returns clean, AI-readable markdown results.
|
* No API key required. Scrapes result snippets and titles.
|
||||||
* Atlas uses this for competitor research, market context, pricing models, etc.
|
* Atlas uses this for competitor research, market context, pricing models, etc.
|
||||||
*/
|
*/
|
||||||
registerTool({
|
registerTool({
|
||||||
@@ -22,13 +22,13 @@ registerTool({
|
|||||||
const query = String(args.query).trim();
|
const query = String(args.query).trim();
|
||||||
if (!query) return { error: 'No query provided' };
|
if (!query) return { error: 'No query provided' };
|
||||||
|
|
||||||
const url = `https://s.jina.ai/${encodeURIComponent(query)}`;
|
const url = `https://html.duckduckgo.com/html/?q=${encodeURIComponent(query)}`;
|
||||||
|
|
||||||
try {
|
try {
|
||||||
const res = await fetch(url, {
|
const res = await fetch(url, {
|
||||||
headers: {
|
headers: {
|
||||||
'Accept': 'text/plain',
|
'User-Agent': 'Mozilla/5.0 (compatible; VIBN-Atlas/1.0)',
|
||||||
'X-Return-Format': 'markdown',
|
'Accept': 'text/html',
|
||||||
},
|
},
|
||||||
signal: AbortSignal.timeout(15_000),
|
signal: AbortSignal.timeout(15_000),
|
||||||
});
|
});
|
||||||
@@ -37,15 +37,46 @@ registerTool({
|
|||||||
return { error: `Search failed with status ${res.status}` };
|
return { error: `Search failed with status ${res.status}` };
|
||||||
}
|
}
|
||||||
|
|
||||||
const text = await res.text();
|
const html = await res.text();
|
||||||
|
|
||||||
// Jina returns verbose results — truncate to avoid flooding the context window
|
// Extract result titles and snippets from DuckDuckGo HTML
|
||||||
const truncated = text.length > 6000 ? text.slice(0, 6000) + '\n\n[...results truncated]' : text;
|
const results: string[] = [];
|
||||||
|
|
||||||
|
// Match result titles
|
||||||
|
const titleMatches = html.matchAll(/class="result__a"[^>]*href="[^"]*"[^>]*>(.*?)<\/a>/gs);
|
||||||
|
const titles: string[] = [];
|
||||||
|
for (const m of titleMatches) {
|
||||||
|
const title = m[1].replace(/<[^>]+>/g, '').trim();
|
||||||
|
if (title) titles.push(title);
|
||||||
|
}
|
||||||
|
|
||||||
|
// Match result snippets
|
||||||
|
const snippetMatches = html.matchAll(/class="result__snippet"[^>]*>(.*?)<\/a>/gs);
|
||||||
|
const snippets: string[] = [];
|
||||||
|
for (const m of snippetMatches) {
|
||||||
|
const snippet = m[1].replace(/<[^>]+>/g, '').trim();
|
||||||
|
if (snippet) snippets.push(snippet);
|
||||||
|
}
|
||||||
|
|
||||||
|
// Combine up to 6 results
|
||||||
|
const count = Math.min(6, Math.max(titles.length, snippets.length));
|
||||||
|
for (let i = 0; i < count; i++) {
|
||||||
|
const title = titles[i] || '';
|
||||||
|
const snippet = snippets[i] || '';
|
||||||
|
if (title || snippet) {
|
||||||
|
results.push(`**${title}**\n${snippet}`);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if (results.length === 0) {
|
||||||
|
return { error: 'No results found' };
|
||||||
|
}
|
||||||
|
|
||||||
|
const text = results.join('\n\n');
|
||||||
|
const truncated = text.length > 5000 ? text.slice(0, 5000) + '\n\n[...results truncated]' : text;
|
||||||
|
|
||||||
|
return { query, results: truncated };
|
||||||
|
|
||||||
return {
|
|
||||||
query,
|
|
||||||
results: truncated,
|
|
||||||
};
|
|
||||||
} catch (err: unknown) {
|
} catch (err: unknown) {
|
||||||
const message = err instanceof Error ? err.message : String(err);
|
const message = err instanceof Error ? err.message : String(err);
|
||||||
return { error: `Search request failed: ${message}` };
|
return { error: `Search request failed: ${message}` };
|
||||||
|
|||||||
Reference in New Issue
Block a user