feat(search): Implement OpenSearch support
feat(search): Add searchUrl to message feat(parsers): Enhance parsers to deal with some thinking models better.
This commit is contained in:
parent
f65b168388
commit
ddfe8c607d
12 changed files with 115 additions and 17 deletions
|
|
@ -10,6 +10,7 @@ const suggestionGeneratorPrompt = `
|
|||
You are an AI suggestion generator for an AI powered search engine. You will be given a conversation below. You need to generate 4-5 suggestions based on the conversation. The suggestion should be relevant to the conversation that can be used by the user to ask the chat model for more information.
|
||||
You need to make sure the suggestions are relevant to the conversation and are helpful to the user. Keep a note that the user might use these suggestions to ask a chat model for more information.
|
||||
Make sure the suggestions are medium in length and are informative and relevant to the conversation.
|
||||
If you are a thinking or reasoning AI, you should avoid using \`<suggestions>\` and \`</suggestions>\` tags in your thinking. Those tags should only be used in the final output.
|
||||
|
||||
Provide these suggestions separated by newlines between the XML tags <suggestions> and </suggestions>. For example:
|
||||
|
||||
|
|
|
|||
|
|
@ -21,6 +21,10 @@ class LineOutputParser extends BaseOutputParser<string> {
|
|||
async parse(text: string): Promise<string> {
|
||||
text = text.trim() || '';
|
||||
|
||||
// First, remove all <think>...</think> blocks to avoid parsing tags inside thinking content
|
||||
// This might be a little aggressive. Prompt massaging might be all we need, but this is a guarantee and should rarely mess anything up.
|
||||
text = this.removeThinkingBlocks(text);
|
||||
|
||||
const regex = /^(\s*(-|\*|\d+\.\s|\d+\)\s|\u2022)\s*)+/;
|
||||
const startKeyIndex = text.indexOf(`<${this.key}>`);
|
||||
const endKeyIndex = text.indexOf(`</${this.key}>`);
|
||||
|
|
@ -40,6 +44,17 @@ class LineOutputParser extends BaseOutputParser<string> {
|
|||
return line;
|
||||
}
|
||||
|
||||
/**
|
||||
* Removes all content within <think>...</think> blocks
|
||||
* @param text The input text containing thinking blocks
|
||||
* @returns The text with all thinking blocks removed
|
||||
*/
|
||||
private removeThinkingBlocks(text: string): string {
|
||||
// Use regex to identify and remove all <think>...</think> blocks
|
||||
// Using the 's' flag to make dot match newlines
|
||||
return text.replace(/<think>[\s\S]*?<\/think>/g, '').trim();
|
||||
}
|
||||
|
||||
getFormatInstructions(): string {
|
||||
throw new Error('Not implemented.');
|
||||
}
|
||||
|
|
|
|||
|
|
@ -21,6 +21,10 @@ class LineListOutputParser extends BaseOutputParser<string[]> {
|
|||
async parse(text: string): Promise<string[]> {
|
||||
text = text.trim() || '';
|
||||
|
||||
// First, remove all <think>...</think> blocks to avoid parsing tags inside thinking content
|
||||
// This might be a little aggressive. Prompt massaging might be all we need, but this is a guarantee and should rarely mess anything up.
|
||||
text = this.removeThinkingBlocks(text);
|
||||
|
||||
const regex = /^(\s*(-|\*|\d+\.\s|\d+\)\s|\u2022)\s*)+/;
|
||||
const startKeyIndex = text.indexOf(`<${this.key}>`);
|
||||
const endKeyIndex = text.indexOf(`</${this.key}>`);
|
||||
|
|
@ -42,6 +46,17 @@ class LineListOutputParser extends BaseOutputParser<string[]> {
|
|||
return lines;
|
||||
}
|
||||
|
||||
/**
|
||||
* Removes all content within <think>...</think> blocks
|
||||
* @param text The input text containing thinking blocks
|
||||
* @returns The text with all thinking blocks removed
|
||||
*/
|
||||
private removeThinkingBlocks(text: string): string {
|
||||
// Use regex to identify and remove all <think>...</think> blocks
|
||||
// Using [\s\S] pattern to match all characters including newlines
|
||||
return text.replace(/<think>[\s\S]*?<\/think>/g, '').trim();
|
||||
}
|
||||
|
||||
getFormatInstructions(): string {
|
||||
throw new Error('Not implemented.');
|
||||
}
|
||||
|
|
|
|||
|
|
@ -3,6 +3,7 @@ You are an AI question rephraser. You will be given a conversation and a follow-
|
|||
If it is a simple writing task or a greeting (unless the greeting contains a question after it) like Hi, Hello, How are you, etc. than a question then you need to return \`not_needed\` as the response (This is because the LLM won't need to search the web for finding information on this topic).
|
||||
If the user asks some question from some URL or wants you to summarize a PDF or a webpage (via URL) you need to return the links inside the \`links\` XML block and the question inside the \`question\` XML block. If the user wants to you to summarize the webpage or the PDF you need to return \`summarize\` inside the \`question\` XML block in place of a question and the link to summarize in the \`links\` XML block.
|
||||
You must always return the rephrased question inside the \`question\` XML block, if there are no links in the follow-up question then don't insert a \`links\` XML block in your response.
|
||||
If you are a thinking or reasoning AI, you should avoid using \`<question>\` and \`</question>\` tags in your thinking. Those tags should only be used in the final output. You should also avoid using \`<links>\` and \`</links>\` tags in your thinking. Those tags should only be used in the final output.
|
||||
|
||||
There are several examples attached for your reference inside the below \`examples\` XML block
|
||||
|
||||
|
|
|
|||
|
|
@ -56,6 +56,7 @@ class MetaSearchAgent implements MetaSearchAgentType {
|
|||
private config: Config;
|
||||
private strParser = new StringOutputParser();
|
||||
private searchQuery?: string;
|
||||
private searxngUrl?: string;
|
||||
|
||||
constructor(config: Config) {
|
||||
this.config = config;
|
||||
|
|
@ -81,6 +82,7 @@ class MetaSearchAgent implements MetaSearchAgentType {
|
|||
let question = this.config.summarizer
|
||||
? await questionOutputParser.parse(input)
|
||||
: input;
|
||||
console.log('question', question);
|
||||
|
||||
if (question === 'not_needed') {
|
||||
return { query: '', docs: [] };
|
||||
|
|
@ -206,12 +208,15 @@ class MetaSearchAgent implements MetaSearchAgentType {
|
|||
} else {
|
||||
question = question.replace(/<think>.*?<\/think>/g, '');
|
||||
|
||||
const res = await searchSearxng(question, {
|
||||
const searxngResult = await searchSearxng(question, {
|
||||
language: 'en',
|
||||
engines: this.config.activeEngines,
|
||||
});
|
||||
|
||||
const documents = res.results.map(
|
||||
// Store the SearXNG URL for later use in emitting to the client
|
||||
this.searxngUrl = searxngResult.searchUrl;
|
||||
|
||||
const documents = searxngResult.results.map(
|
||||
(result) =>
|
||||
new Document({
|
||||
pageContent:
|
||||
|
|
@ -265,7 +270,7 @@ class MetaSearchAgent implements MetaSearchAgentType {
|
|||
|
||||
query = searchRetrieverResult.query;
|
||||
docs = searchRetrieverResult.docs;
|
||||
|
||||
|
||||
// Store the search query in the context for emitting to the client
|
||||
if (searchRetrieverResult.searchQuery) {
|
||||
this.searchQuery = searchRetrieverResult.searchQuery;
|
||||
|
|
@ -447,16 +452,15 @@ class MetaSearchAgent implements MetaSearchAgentType {
|
|||
event.event === 'on_chain_end' &&
|
||||
event.name === 'FinalSourceRetriever'
|
||||
) {
|
||||
// Add searchQuery to the sources data if it exists
|
||||
const sourcesData = event.data.output;
|
||||
// @ts-ignore - we added searchQuery property
|
||||
if (this.searchQuery) {
|
||||
emitter.emit(
|
||||
'data',
|
||||
JSON.stringify({
|
||||
type: 'sources',
|
||||
JSON.stringify({
|
||||
type: 'sources',
|
||||
data: sourcesData,
|
||||
searchQuery: this.searchQuery
|
||||
searchQuery: this.searchQuery,
|
||||
searchUrl: this.searxngUrl,
|
||||
}),
|
||||
);
|
||||
} else {
|
||||
|
|
|
|||
|
|
@ -19,6 +19,12 @@ interface SearxngSearchResult {
|
|||
iframe_src?: string;
|
||||
}
|
||||
|
||||
interface SearxngResponse {
|
||||
results: SearxngSearchResult[];
|
||||
suggestions: string[];
|
||||
searchUrl: string;
|
||||
}
|
||||
|
||||
export const searchSearxng = async (
|
||||
query: string,
|
||||
opts?: SearxngSearchOptions,
|
||||
|
|
@ -44,5 +50,16 @@ export const searchSearxng = async (
|
|||
const results: SearxngSearchResult[] = res.data.results;
|
||||
const suggestions: string[] = res.data.suggestions;
|
||||
|
||||
return { results, suggestions };
|
||||
// Create a URL for viewing the search results in the SearXNG web interface
|
||||
const searchUrl = new URL(searxngURL);
|
||||
searchUrl.pathname = '/search';
|
||||
searchUrl.searchParams.append('q', query);
|
||||
if (opts?.engines?.length) {
|
||||
searchUrl.searchParams.append('engines', opts.engines.join(','));
|
||||
}
|
||||
if (opts?.language) {
|
||||
searchUrl.searchParams.append('language', opts.language);
|
||||
}
|
||||
|
||||
return { results, suggestions, searchUrl: searchUrl.toString() };
|
||||
};
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue