This commit is contained in:
Jin Yucong 2024-07-05 14:36:50 +08:00
parent 5b1aaee605
commit 3b737a078a
63 changed files with 1132 additions and 1853 deletions

View file

@ -1,24 +1,16 @@
import { BaseMessage } from '@langchain/core/messages';
import {
PromptTemplate,
ChatPromptTemplate,
MessagesPlaceholder,
} from '@langchain/core/prompts';
import {
RunnableSequence,
RunnableMap,
RunnableLambda,
} from '@langchain/core/runnables';
import { StringOutputParser } from '@langchain/core/output_parsers';
import { Document } from '@langchain/core/documents';
import { searchSearxng } from '../lib/searxng';
import type { StreamEvent } from '@langchain/core/tracers/log_stream';
import type { BaseChatModel } from '@langchain/core/language_models/chat_models';
import type { Embeddings } from '@langchain/core/embeddings';
import formatChatHistoryAsString from '../utils/formatHistory';
import eventEmitter from 'events';
import computeSimilarity from '../utils/computeSimilarity';
import logger from '../utils/logger';
import { BaseMessage } from "@langchain/core/messages";
import { PromptTemplate, ChatPromptTemplate, MessagesPlaceholder } from "@langchain/core/prompts";
import { RunnableSequence, RunnableMap, RunnableLambda } from "@langchain/core/runnables";
import { StringOutputParser } from "@langchain/core/output_parsers";
import { Document } from "@langchain/core/documents";
import { searchSearxng } from "../lib/searxng";
import type { StreamEvent } from "@langchain/core/tracers/log_stream";
import type { BaseChatModel } from "@langchain/core/language_models/chat_models";
import type { Embeddings } from "@langchain/core/embeddings";
import formatChatHistoryAsString from "../utils/formatHistory";
import eventEmitter from "events";
import computeSimilarity from "../utils/computeSimilarity";
import logger from "../utils/logger";
const basicAcademicSearchRetrieverPrompt = `
You will be given a conversation below and a follow up question. You need to rephrase the follow-up question if needed so it is a standalone question that can be used by the LLM to search the web for information.
@ -65,34 +57,16 @@ const basicAcademicSearchResponsePrompt = `
const strParser = new StringOutputParser();
const handleStream = async (
stream: AsyncGenerator<StreamEvent, unknown, unknown>,
emitter: eventEmitter,
) => {
const handleStream = async (stream: AsyncGenerator<StreamEvent, unknown, unknown>, emitter: eventEmitter) => {
for await (const event of stream) {
if (
event.event === 'on_chain_end' &&
event.name === 'FinalSourceRetriever'
) {
emitter.emit(
'data',
JSON.stringify({ type: 'sources', data: event.data.output }),
);
if (event.event === "on_chain_end" && event.name === "FinalSourceRetriever") {
emitter.emit("data", JSON.stringify({ type: "sources", data: event.data.output }));
}
if (
event.event === 'on_chain_stream' &&
event.name === 'FinalResponseGenerator'
) {
emitter.emit(
'data',
JSON.stringify({ type: 'response', data: event.data.chunk }),
);
if (event.event === "on_chain_stream" && event.name === "FinalResponseGenerator") {
emitter.emit("data", JSON.stringify({ type: "response", data: event.data.chunk }));
}
if (
event.event === 'on_chain_end' &&
event.name === 'FinalResponseGenerator'
) {
emitter.emit('end');
if (event.event === "on_chain_end" && event.name === "FinalResponseGenerator") {
emitter.emit("end");
}
}
};
@ -108,22 +82,17 @@ const createBasicAcademicSearchRetrieverChain = (llm: BaseChatModel) => {
llm,
strParser,
RunnableLambda.from(async (input: string) => {
if (input === 'not_needed') {
return { query: '', docs: [] };
if (input === "not_needed") {
return { query: "", docs: [] };
}
const res = await searchSearxng(input, {
language: 'en',
engines: [
'arxiv',
'google scholar',
'internetarchivescholar',
'pubmed',
],
language: "en",
engines: ["arxiv", "google scholar", "internetarchivescholar", "pubmed"],
});
const documents = res.results.map(
(result) =>
result =>
new Document({
pageContent: result.content,
metadata: {
@ -139,36 +108,22 @@ const createBasicAcademicSearchRetrieverChain = (llm: BaseChatModel) => {
]);
};
const createBasicAcademicSearchAnsweringChain = (
llm: BaseChatModel,
embeddings: Embeddings,
) => {
const basicAcademicSearchRetrieverChain =
createBasicAcademicSearchRetrieverChain(llm);
const createBasicAcademicSearchAnsweringChain = (llm: BaseChatModel, embeddings: Embeddings) => {
const basicAcademicSearchRetrieverChain = createBasicAcademicSearchRetrieverChain(llm);
const processDocs = async (docs: Document[]) => {
return docs
.map((_, index) => `${index + 1}. ${docs[index].pageContent}`)
.join('\n');
return docs.map((_, index) => `${index + 1}. ${docs[index].pageContent}`).join("\n");
};
const rerankDocs = async ({
query,
docs,
}: {
query: string;
docs: Document[];
}) => {
const rerankDocs = async ({ query, docs }: { query: string; docs: Document[] }) => {
if (docs.length === 0) {
return docs;
}
const docsWithContent = docs.filter(
(doc) => doc.pageContent && doc.pageContent.length > 0,
);
const docsWithContent = docs.filter(doc => doc.pageContent && doc.pageContent.length > 0);
const [docEmbeddings, queryEmbedding] = await Promise.all([
embeddings.embedDocuments(docsWithContent.map((doc) => doc.pageContent)),
embeddings.embedDocuments(docsWithContent.map(doc => doc.pageContent)),
embeddings.embedQuery(query),
]);
@ -184,7 +139,7 @@ const createBasicAcademicSearchAnsweringChain = (
const sortedDocs = similarity
.sort((a, b) => b.similarity - a.similarity)
.slice(0, 15)
.map((sim) => docsWithContent[sim.index]);
.map(sim => docsWithContent[sim.index]);
return sortedDocs;
};
@ -194,41 +149,35 @@ const createBasicAcademicSearchAnsweringChain = (
query: (input: BasicChainInput) => input.query,
chat_history: (input: BasicChainInput) => input.chat_history,
context: RunnableSequence.from([
(input) => ({
input => ({
query: input.query,
chat_history: formatChatHistoryAsString(input.chat_history),
}),
basicAcademicSearchRetrieverChain
.pipe(rerankDocs)
.withConfig({
runName: 'FinalSourceRetriever',
runName: "FinalSourceRetriever",
})
.pipe(processDocs),
]),
}),
ChatPromptTemplate.fromMessages([
['system', basicAcademicSearchResponsePrompt],
new MessagesPlaceholder('chat_history'),
['user', '{query}'],
["system", basicAcademicSearchResponsePrompt],
new MessagesPlaceholder("chat_history"),
["user", "{query}"],
]),
llm,
strParser,
]).withConfig({
runName: 'FinalResponseGenerator',
runName: "FinalResponseGenerator",
});
};
const basicAcademicSearch = (
query: string,
history: BaseMessage[],
llm: BaseChatModel,
embeddings: Embeddings,
) => {
const basicAcademicSearch = (query: string, history: BaseMessage[], llm: BaseChatModel, embeddings: Embeddings) => {
const emitter = new eventEmitter();
try {
const basicAcademicSearchAnsweringChain =
createBasicAcademicSearchAnsweringChain(llm, embeddings);
const basicAcademicSearchAnsweringChain = createBasicAcademicSearchAnsweringChain(llm, embeddings);
const stream = basicAcademicSearchAnsweringChain.streamEvents(
{
@ -236,28 +185,20 @@ const basicAcademicSearch = (
query: query,
},
{
version: 'v1',
version: "v1",
},
);
handleStream(stream, emitter);
} catch (err) {
emitter.emit(
'error',
JSON.stringify({ data: 'An error has occurred please try again later' }),
);
emitter.emit("error", JSON.stringify({ data: "An error has occurred please try again later" }));
logger.error(`Error in academic search: ${err}`);
}
return emitter;
};
const handleAcademicSearch = (
message: string,
history: BaseMessage[],
llm: BaseChatModel,
embeddings: Embeddings,
) => {
const handleAcademicSearch = (message: string, history: BaseMessage[], llm: BaseChatModel, embeddings: Embeddings) => {
const emitter = basicAcademicSearch(message, history, llm, embeddings);
return emitter;
};

View file

@ -1,14 +1,10 @@
import {
RunnableSequence,
RunnableMap,
RunnableLambda,
} from '@langchain/core/runnables';
import { PromptTemplate } from '@langchain/core/prompts';
import formatChatHistoryAsString from '../utils/formatHistory';
import { BaseMessage } from '@langchain/core/messages';
import { StringOutputParser } from '@langchain/core/output_parsers';
import { searchSearxng } from '../lib/searxng';
import type { BaseChatModel } from '@langchain/core/language_models/chat_models';
import { RunnableSequence, RunnableMap, RunnableLambda } from "@langchain/core/runnables";
import { PromptTemplate } from "@langchain/core/prompts";
import formatChatHistoryAsString from "../utils/formatHistory";
import { BaseMessage } from "@langchain/core/messages";
import { StringOutputParser } from "@langchain/core/output_parsers";
import { searchSearxng } from "../lib/searxng";
import type { BaseChatModel } from "@langchain/core/language_models/chat_models";
const imageSearchChainPrompt = `
You will be given a conversation below and a follow up question. You need to rephrase the follow-up question so it is a standalone question that can be used by the LLM to search the web for images.
@ -53,12 +49,12 @@ const createImageSearchChain = (llm: BaseChatModel) => {
strParser,
RunnableLambda.from(async (input: string) => {
const res = await searchSearxng(input, {
engines: ['bing images', 'google images'],
engines: ["bing images", "google images"],
});
const images = [];
res.results.forEach((result) => {
res.results.forEach(result => {
if (result.img_src && result.url && result.title) {
images.push({
img_src: result.img_src,
@ -73,10 +69,7 @@ const createImageSearchChain = (llm: BaseChatModel) => {
]);
};
const handleImageSearch = (
input: ImageSearchChainInput,
llm: BaseChatModel,
) => {
const handleImageSearch = (input: ImageSearchChainInput, llm: BaseChatModel) => {
const imageSearchChain = createImageSearchChain(llm);
return imageSearchChain.invoke(input);
};

View file

@ -1,24 +1,16 @@
import { BaseMessage } from '@langchain/core/messages';
import {
PromptTemplate,
ChatPromptTemplate,
MessagesPlaceholder,
} from '@langchain/core/prompts';
import {
RunnableSequence,
RunnableMap,
RunnableLambda,
} from '@langchain/core/runnables';
import { StringOutputParser } from '@langchain/core/output_parsers';
import { Document } from '@langchain/core/documents';
import { searchSearxng } from '../lib/searxng';
import type { StreamEvent } from '@langchain/core/tracers/log_stream';
import type { BaseChatModel } from '@langchain/core/language_models/chat_models';
import type { Embeddings } from '@langchain/core/embeddings';
import formatChatHistoryAsString from '../utils/formatHistory';
import eventEmitter from 'events';
import computeSimilarity from '../utils/computeSimilarity';
import logger from '../utils/logger';
import { BaseMessage } from "@langchain/core/messages";
import { PromptTemplate, ChatPromptTemplate, MessagesPlaceholder } from "@langchain/core/prompts";
import { RunnableSequence, RunnableMap, RunnableLambda } from "@langchain/core/runnables";
import { StringOutputParser } from "@langchain/core/output_parsers";
import { Document } from "@langchain/core/documents";
import { searchSearxng } from "../lib/searxng";
import type { StreamEvent } from "@langchain/core/tracers/log_stream";
import type { BaseChatModel } from "@langchain/core/language_models/chat_models";
import type { Embeddings } from "@langchain/core/embeddings";
import formatChatHistoryAsString from "../utils/formatHistory";
import eventEmitter from "events";
import computeSimilarity from "../utils/computeSimilarity";
import logger from "../utils/logger";
const basicRedditSearchRetrieverPrompt = `
You will be given a conversation below and a follow up question. You need to rephrase the follow-up question if needed so it is a standalone question that can be used by the LLM to search the web for information.
@ -65,34 +57,16 @@ const basicRedditSearchResponsePrompt = `
const strParser = new StringOutputParser();
const handleStream = async (
stream: AsyncGenerator<StreamEvent, unknown, unknown>,
emitter: eventEmitter,
) => {
const handleStream = async (stream: AsyncGenerator<StreamEvent, unknown, unknown>, emitter: eventEmitter) => {
for await (const event of stream) {
if (
event.event === 'on_chain_end' &&
event.name === 'FinalSourceRetriever'
) {
emitter.emit(
'data',
JSON.stringify({ type: 'sources', data: event.data.output }),
);
if (event.event === "on_chain_end" && event.name === "FinalSourceRetriever") {
emitter.emit("data", JSON.stringify({ type: "sources", data: event.data.output }));
}
if (
event.event === 'on_chain_stream' &&
event.name === 'FinalResponseGenerator'
) {
emitter.emit(
'data',
JSON.stringify({ type: 'response', data: event.data.chunk }),
);
if (event.event === "on_chain_stream" && event.name === "FinalResponseGenerator") {
emitter.emit("data", JSON.stringify({ type: "response", data: event.data.chunk }));
}
if (
event.event === 'on_chain_end' &&
event.name === 'FinalResponseGenerator'
) {
emitter.emit('end');
if (event.event === "on_chain_end" && event.name === "FinalResponseGenerator") {
emitter.emit("end");
}
}
};
@ -108,17 +82,17 @@ const createBasicRedditSearchRetrieverChain = (llm: BaseChatModel) => {
llm,
strParser,
RunnableLambda.from(async (input: string) => {
if (input === 'not_needed') {
return { query: '', docs: [] };
if (input === "not_needed") {
return { query: "", docs: [] };
}
const res = await searchSearxng(input, {
language: 'en',
engines: ['reddit'],
language: "en",
engines: ["reddit"],
});
const documents = res.results.map(
(result) =>
result =>
new Document({
pageContent: result.content ? result.content : result.title,
metadata: {
@ -134,36 +108,22 @@ const createBasicRedditSearchRetrieverChain = (llm: BaseChatModel) => {
]);
};
const createBasicRedditSearchAnsweringChain = (
llm: BaseChatModel,
embeddings: Embeddings,
) => {
const basicRedditSearchRetrieverChain =
createBasicRedditSearchRetrieverChain(llm);
const createBasicRedditSearchAnsweringChain = (llm: BaseChatModel, embeddings: Embeddings) => {
const basicRedditSearchRetrieverChain = createBasicRedditSearchRetrieverChain(llm);
const processDocs = async (docs: Document[]) => {
return docs
.map((_, index) => `${index + 1}. ${docs[index].pageContent}`)
.join('\n');
return docs.map((_, index) => `${index + 1}. ${docs[index].pageContent}`).join("\n");
};
const rerankDocs = async ({
query,
docs,
}: {
query: string;
docs: Document[];
}) => {
const rerankDocs = async ({ query, docs }: { query: string; docs: Document[] }) => {
if (docs.length === 0) {
return docs;
}
const docsWithContent = docs.filter(
(doc) => doc.pageContent && doc.pageContent.length > 0,
);
const docsWithContent = docs.filter(doc => doc.pageContent && doc.pageContent.length > 0);
const [docEmbeddings, queryEmbedding] = await Promise.all([
embeddings.embedDocuments(docsWithContent.map((doc) => doc.pageContent)),
embeddings.embedDocuments(docsWithContent.map(doc => doc.pageContent)),
embeddings.embedQuery(query),
]);
@ -179,8 +139,8 @@ const createBasicRedditSearchAnsweringChain = (
const sortedDocs = similarity
.sort((a, b) => b.similarity - a.similarity)
.slice(0, 15)
.filter((sim) => sim.similarity > 0.3)
.map((sim) => docsWithContent[sim.index]);
.filter(sim => sim.similarity > 0.3)
.map(sim => docsWithContent[sim.index]);
return sortedDocs;
};
@ -190,69 +150,55 @@ const createBasicRedditSearchAnsweringChain = (
query: (input: BasicChainInput) => input.query,
chat_history: (input: BasicChainInput) => input.chat_history,
context: RunnableSequence.from([
(input) => ({
input => ({
query: input.query,
chat_history: formatChatHistoryAsString(input.chat_history),
}),
basicRedditSearchRetrieverChain
.pipe(rerankDocs)
.withConfig({
runName: 'FinalSourceRetriever',
runName: "FinalSourceRetriever",
})
.pipe(processDocs),
]),
}),
ChatPromptTemplate.fromMessages([
['system', basicRedditSearchResponsePrompt],
new MessagesPlaceholder('chat_history'),
['user', '{query}'],
["system", basicRedditSearchResponsePrompt],
new MessagesPlaceholder("chat_history"),
["user", "{query}"],
]),
llm,
strParser,
]).withConfig({
runName: 'FinalResponseGenerator',
runName: "FinalResponseGenerator",
});
};
const basicRedditSearch = (
query: string,
history: BaseMessage[],
llm: BaseChatModel,
embeddings: Embeddings,
) => {
const basicRedditSearch = (query: string, history: BaseMessage[], llm: BaseChatModel, embeddings: Embeddings) => {
const emitter = new eventEmitter();
try {
const basicRedditSearchAnsweringChain =
createBasicRedditSearchAnsweringChain(llm, embeddings);
const basicRedditSearchAnsweringChain = createBasicRedditSearchAnsweringChain(llm, embeddings);
const stream = basicRedditSearchAnsweringChain.streamEvents(
{
chat_history: history,
query: query,
},
{
version: 'v1',
version: "v1",
},
);
handleStream(stream, emitter);
} catch (err) {
emitter.emit(
'error',
JSON.stringify({ data: 'An error has occurred please try again later' }),
);
emitter.emit("error", JSON.stringify({ data: "An error has occurred please try again later" }));
logger.error(`Error in RedditSearch: ${err}`);
}
return emitter;
};
const handleRedditSearch = (
message: string,
history: BaseMessage[],
llm: BaseChatModel,
embeddings: Embeddings,
) => {
const handleRedditSearch = (message: string, history: BaseMessage[], llm: BaseChatModel, embeddings: Embeddings) => {
const emitter = basicRedditSearch(message, history, llm, embeddings);
return emitter;
};

View file

@ -1,10 +1,10 @@
import { RunnableSequence, RunnableMap } from '@langchain/core/runnables';
import ListLineOutputParser from '../lib/outputParsers/listLineOutputParser';
import { PromptTemplate } from '@langchain/core/prompts';
import formatChatHistoryAsString from '../utils/formatHistory';
import { BaseMessage } from '@langchain/core/messages';
import { BaseChatModel } from '@langchain/core/language_models/chat_models';
import { ChatOpenAI } from '@langchain/openai';
import { RunnableSequence, RunnableMap } from "@langchain/core/runnables";
import ListLineOutputParser from "../lib/outputParsers/listLineOutputParser";
import { PromptTemplate } from "@langchain/core/prompts";
import formatChatHistoryAsString from "../utils/formatHistory";
import { BaseMessage } from "@langchain/core/messages";
import { BaseChatModel } from "@langchain/core/language_models/chat_models";
import { ChatOpenAI } from "@langchain/openai";
const suggestionGeneratorPrompt = `
You are an AI suggestion generator for an AI powered search engine. You will be given a conversation below. You need to generate 4-5 suggestions based on the conversation. The suggestion should be relevant to the conversation that can be used by the user to ask the chat model for more information.
@ -28,14 +28,13 @@ type SuggestionGeneratorInput = {
};
const outputParser = new ListLineOutputParser({
key: 'suggestions',
key: "suggestions",
});
const createSuggestionGeneratorChain = (llm: BaseChatModel) => {
return RunnableSequence.from([
RunnableMap.from({
chat_history: (input: SuggestionGeneratorInput) =>
formatChatHistoryAsString(input.chat_history),
chat_history: (input: SuggestionGeneratorInput) => formatChatHistoryAsString(input.chat_history),
}),
PromptTemplate.fromTemplate(suggestionGeneratorPrompt),
llm,
@ -43,10 +42,7 @@ const createSuggestionGeneratorChain = (llm: BaseChatModel) => {
]);
};
const generateSuggestions = (
input: SuggestionGeneratorInput,
llm: BaseChatModel,
) => {
const generateSuggestions = (input: SuggestionGeneratorInput, llm: BaseChatModel) => {
(llm as ChatOpenAI).temperature = 0;
const suggestionGeneratorChain = createSuggestionGeneratorChain(llm);
return suggestionGeneratorChain.invoke(input);

View file

@ -1,14 +1,10 @@
import {
RunnableSequence,
RunnableMap,
RunnableLambda,
} from '@langchain/core/runnables';
import { PromptTemplate } from '@langchain/core/prompts';
import formatChatHistoryAsString from '../utils/formatHistory';
import { BaseMessage } from '@langchain/core/messages';
import { StringOutputParser } from '@langchain/core/output_parsers';
import { searchSearxng } from '../lib/searxng';
import type { BaseChatModel } from '@langchain/core/language_models/chat_models';
import { RunnableSequence, RunnableMap, RunnableLambda } from "@langchain/core/runnables";
import { PromptTemplate } from "@langchain/core/prompts";
import formatChatHistoryAsString from "../utils/formatHistory";
import { BaseMessage } from "@langchain/core/messages";
import { StringOutputParser } from "@langchain/core/output_parsers";
import { searchSearxng } from "../lib/searxng";
import type { BaseChatModel } from "@langchain/core/language_models/chat_models";
const VideoSearchChainPrompt = `
You will be given a conversation below and a follow up question. You need to rephrase the follow-up question so it is a standalone question that can be used by the LLM to search Youtube for videos.
@ -53,18 +49,13 @@ const createVideoSearchChain = (llm: BaseChatModel) => {
strParser,
RunnableLambda.from(async (input: string) => {
const res = await searchSearxng(input, {
engines: ['youtube'],
engines: ["youtube"],
});
const videos = [];
res.results.forEach((result) => {
if (
result.thumbnail &&
result.url &&
result.title &&
result.iframe_src
) {
res.results.forEach(result => {
if (result.thumbnail && result.url && result.title && result.iframe_src) {
videos.push({
img_src: result.thumbnail,
url: result.url,
@ -79,10 +70,7 @@ const createVideoSearchChain = (llm: BaseChatModel) => {
]);
};
const handleVideoSearch = (
input: VideoSearchChainInput,
llm: BaseChatModel,
) => {
const handleVideoSearch = (input: VideoSearchChainInput, llm: BaseChatModel) => {
const VideoSearchChain = createVideoSearchChain(llm);
return VideoSearchChain.invoke(input);
};

View file

@ -1,24 +1,16 @@
import { BaseMessage } from '@langchain/core/messages';
import {
PromptTemplate,
ChatPromptTemplate,
MessagesPlaceholder,
} from '@langchain/core/prompts';
import {
RunnableSequence,
RunnableMap,
RunnableLambda,
} from '@langchain/core/runnables';
import { StringOutputParser } from '@langchain/core/output_parsers';
import { Document } from '@langchain/core/documents';
import { searchSearxng } from '../lib/searxng';
import type { StreamEvent } from '@langchain/core/tracers/log_stream';
import type { BaseChatModel } from '@langchain/core/language_models/chat_models';
import type { Embeddings } from '@langchain/core/embeddings';
import formatChatHistoryAsString from '../utils/formatHistory';
import eventEmitter from 'events';
import computeSimilarity from '../utils/computeSimilarity';
import logger from '../utils/logger';
import { BaseMessage } from "@langchain/core/messages";
import { PromptTemplate, ChatPromptTemplate, MessagesPlaceholder } from "@langchain/core/prompts";
import { RunnableSequence, RunnableMap, RunnableLambda } from "@langchain/core/runnables";
import { StringOutputParser } from "@langchain/core/output_parsers";
import { Document } from "@langchain/core/documents";
import { searchSearxng } from "../lib/searxng";
import type { StreamEvent } from "@langchain/core/tracers/log_stream";
import type { BaseChatModel } from "@langchain/core/language_models/chat_models";
import type { Embeddings } from "@langchain/core/embeddings";
import formatChatHistoryAsString from "../utils/formatHistory";
import eventEmitter from "events";
import computeSimilarity from "../utils/computeSimilarity";
import logger from "../utils/logger";
const basicSearchRetrieverPrompt = `
You will be given a conversation below and a follow up question. You need to rephrase the follow-up question if needed so it is a standalone question that can be used by the LLM to search the web for information.
@ -65,34 +57,16 @@ const basicWebSearchResponsePrompt = `
const strParser = new StringOutputParser();
const handleStream = async (
stream: AsyncGenerator<StreamEvent, unknown, unknown>,
emitter: eventEmitter,
) => {
const handleStream = async (stream: AsyncGenerator<StreamEvent, unknown, unknown>, emitter: eventEmitter) => {
for await (const event of stream) {
if (
event.event === 'on_chain_end' &&
event.name === 'FinalSourceRetriever'
) {
emitter.emit(
'data',
JSON.stringify({ type: 'sources', data: event.data.output }),
);
if (event.event === "on_chain_end" && event.name === "FinalSourceRetriever") {
emitter.emit("data", JSON.stringify({ type: "sources", data: event.data.output }));
}
if (
event.event === 'on_chain_stream' &&
event.name === 'FinalResponseGenerator'
) {
emitter.emit(
'data',
JSON.stringify({ type: 'response', data: event.data.chunk }),
);
if (event.event === "on_chain_stream" && event.name === "FinalResponseGenerator") {
emitter.emit("data", JSON.stringify({ type: "response", data: event.data.chunk }));
}
if (
event.event === 'on_chain_end' &&
event.name === 'FinalResponseGenerator'
) {
emitter.emit('end');
if (event.event === "on_chain_end" && event.name === "FinalResponseGenerator") {
emitter.emit("end");
}
}
};
@ -108,16 +82,16 @@ const createBasicWebSearchRetrieverChain = (llm: BaseChatModel) => {
llm,
strParser,
RunnableLambda.from(async (input: string) => {
if (input === 'not_needed') {
return { query: '', docs: [] };
if (input === "not_needed") {
return { query: "", docs: [] };
}
const res = await searchSearxng(input, {
language: 'en',
language: "en",
});
const documents = res.results.map(
(result) =>
result =>
new Document({
pageContent: result.content,
metadata: {
@ -133,35 +107,22 @@ const createBasicWebSearchRetrieverChain = (llm: BaseChatModel) => {
]);
};
const createBasicWebSearchAnsweringChain = (
llm: BaseChatModel,
embeddings: Embeddings,
) => {
const createBasicWebSearchAnsweringChain = (llm: BaseChatModel, embeddings: Embeddings) => {
const basicWebSearchRetrieverChain = createBasicWebSearchRetrieverChain(llm);
const processDocs = async (docs: Document[]) => {
return docs
.map((_, index) => `${index + 1}. ${docs[index].pageContent}`)
.join('\n');
return docs.map((_, index) => `${index + 1}. ${docs[index].pageContent}`).join("\n");
};
const rerankDocs = async ({
query,
docs,
}: {
query: string;
docs: Document[];
}) => {
const rerankDocs = async ({ query, docs }: { query: string; docs: Document[] }) => {
if (docs.length === 0) {
return docs;
}
const docsWithContent = docs.filter(
(doc) => doc.pageContent && doc.pageContent.length > 0,
);
const docsWithContent = docs.filter(doc => doc.pageContent && doc.pageContent.length > 0);
const [docEmbeddings, queryEmbedding] = await Promise.all([
embeddings.embedDocuments(docsWithContent.map((doc) => doc.pageContent)),
embeddings.embedDocuments(docsWithContent.map(doc => doc.pageContent)),
embeddings.embedQuery(query),
]);
@ -176,9 +137,9 @@ const createBasicWebSearchAnsweringChain = (
const sortedDocs = similarity
.sort((a, b) => b.similarity - a.similarity)
.filter((sim) => sim.similarity > 0.5)
.filter(sim => sim.similarity > 0.5)
.slice(0, 15)
.map((sim) => docsWithContent[sim.index]);
.map(sim => docsWithContent[sim.index]);
return sortedDocs;
};
@ -188,43 +149,35 @@ const createBasicWebSearchAnsweringChain = (
query: (input: BasicChainInput) => input.query,
chat_history: (input: BasicChainInput) => input.chat_history,
context: RunnableSequence.from([
(input) => ({
input => ({
query: input.query,
chat_history: formatChatHistoryAsString(input.chat_history),
}),
basicWebSearchRetrieverChain
.pipe(rerankDocs)
.withConfig({
runName: 'FinalSourceRetriever',
runName: "FinalSourceRetriever",
})
.pipe(processDocs),
]),
}),
ChatPromptTemplate.fromMessages([
['system', basicWebSearchResponsePrompt],
new MessagesPlaceholder('chat_history'),
['user', '{query}'],
["system", basicWebSearchResponsePrompt],
new MessagesPlaceholder("chat_history"),
["user", "{query}"],
]),
llm,
strParser,
]).withConfig({
runName: 'FinalResponseGenerator',
runName: "FinalResponseGenerator",
});
};
const basicWebSearch = (
query: string,
history: BaseMessage[],
llm: BaseChatModel,
embeddings: Embeddings,
) => {
const basicWebSearch = (query: string, history: BaseMessage[], llm: BaseChatModel, embeddings: Embeddings) => {
const emitter = new eventEmitter();
try {
const basicWebSearchAnsweringChain = createBasicWebSearchAnsweringChain(
llm,
embeddings,
);
const basicWebSearchAnsweringChain = createBasicWebSearchAnsweringChain(llm, embeddings);
const stream = basicWebSearchAnsweringChain.streamEvents(
{
@ -232,28 +185,20 @@ const basicWebSearch = (
query: query,
},
{
version: 'v1',
version: "v1",
},
);
handleStream(stream, emitter);
} catch (err) {
emitter.emit(
'error',
JSON.stringify({ data: 'An error has occurred please try again later' }),
);
emitter.emit("error", JSON.stringify({ data: "An error has occurred please try again later" }));
logger.error(`Error in websearch: ${err}`);
}
return emitter;
};
const handleWebSearch = (
message: string,
history: BaseMessage[],
llm: BaseChatModel,
embeddings: Embeddings,
) => {
const handleWebSearch = (message: string, history: BaseMessage[], llm: BaseChatModel, embeddings: Embeddings) => {
const emitter = basicWebSearch(message, history, llm, embeddings);
return emitter;
};

View file

@ -1,23 +1,15 @@
import { BaseMessage } from '@langchain/core/messages';
import {
PromptTemplate,
ChatPromptTemplate,
MessagesPlaceholder,
} from '@langchain/core/prompts';
import {
RunnableSequence,
RunnableMap,
RunnableLambda,
} from '@langchain/core/runnables';
import { StringOutputParser } from '@langchain/core/output_parsers';
import { Document } from '@langchain/core/documents';
import { searchSearxng } from '../lib/searxng';
import type { StreamEvent } from '@langchain/core/tracers/log_stream';
import type { BaseChatModel } from '@langchain/core/language_models/chat_models';
import type { Embeddings } from '@langchain/core/embeddings';
import formatChatHistoryAsString from '../utils/formatHistory';
import eventEmitter from 'events';
import logger from '../utils/logger';
import { BaseMessage } from "@langchain/core/messages";
import { PromptTemplate, ChatPromptTemplate, MessagesPlaceholder } from "@langchain/core/prompts";
import { RunnableSequence, RunnableMap, RunnableLambda } from "@langchain/core/runnables";
import { StringOutputParser } from "@langchain/core/output_parsers";
import { Document } from "@langchain/core/documents";
import { searchSearxng } from "../lib/searxng";
import type { StreamEvent } from "@langchain/core/tracers/log_stream";
import type { BaseChatModel } from "@langchain/core/language_models/chat_models";
import type { Embeddings } from "@langchain/core/embeddings";
import formatChatHistoryAsString from "../utils/formatHistory";
import eventEmitter from "events";
import logger from "../utils/logger";
const basicWolframAlphaSearchRetrieverPrompt = `
You will be given a conversation below and a follow up question. You need to rephrase the follow-up question if needed so it is a standalone question that can be used by the LLM to search the web for information.
@ -64,34 +56,16 @@ const basicWolframAlphaSearchResponsePrompt = `
const strParser = new StringOutputParser();
const handleStream = async (
stream: AsyncGenerator<StreamEvent, unknown, unknown>,
emitter: eventEmitter,
) => {
const handleStream = async (stream: AsyncGenerator<StreamEvent, unknown, unknown>, emitter: eventEmitter) => {
for await (const event of stream) {
if (
event.event === 'on_chain_end' &&
event.name === 'FinalSourceRetriever'
) {
emitter.emit(
'data',
JSON.stringify({ type: 'sources', data: event.data.output }),
);
if (event.event === "on_chain_end" && event.name === "FinalSourceRetriever") {
emitter.emit("data", JSON.stringify({ type: "sources", data: event.data.output }));
}
if (
event.event === 'on_chain_stream' &&
event.name === 'FinalResponseGenerator'
) {
emitter.emit(
'data',
JSON.stringify({ type: 'response', data: event.data.chunk }),
);
if (event.event === "on_chain_stream" && event.name === "FinalResponseGenerator") {
emitter.emit("data", JSON.stringify({ type: "response", data: event.data.chunk }));
}
if (
event.event === 'on_chain_end' &&
event.name === 'FinalResponseGenerator'
) {
emitter.emit('end');
if (event.event === "on_chain_end" && event.name === "FinalResponseGenerator") {
emitter.emit("end");
}
}
};
@ -107,17 +81,17 @@ const createBasicWolframAlphaSearchRetrieverChain = (llm: BaseChatModel) => {
llm,
strParser,
RunnableLambda.from(async (input: string) => {
if (input === 'not_needed') {
return { query: '', docs: [] };
if (input === "not_needed") {
return { query: "", docs: [] };
}
const res = await searchSearxng(input, {
language: 'en',
engines: ['wolframalpha'],
language: "en",
engines: ["wolframalpha"],
});
const documents = res.results.map(
(result) =>
result =>
new Document({
pageContent: result.content,
metadata: {
@ -134,13 +108,10 @@ const createBasicWolframAlphaSearchRetrieverChain = (llm: BaseChatModel) => {
};
const createBasicWolframAlphaSearchAnsweringChain = (llm: BaseChatModel) => {
const basicWolframAlphaSearchRetrieverChain =
createBasicWolframAlphaSearchRetrieverChain(llm);
const basicWolframAlphaSearchRetrieverChain = createBasicWolframAlphaSearchRetrieverChain(llm);
const processDocs = (docs: Document[]) => {
return docs
.map((_, index) => `${index + 1}. ${docs[index].pageContent}`)
.join('\n');
return docs.map((_, index) => `${index + 1}. ${docs[index].pageContent}`).join("\n");
};
return RunnableSequence.from([
@ -148,7 +119,7 @@ const createBasicWolframAlphaSearchAnsweringChain = (llm: BaseChatModel) => {
query: (input: BasicChainInput) => input.query,
chat_history: (input: BasicChainInput) => input.chat_history,
context: RunnableSequence.from([
(input) => ({
input => ({
query: input.query,
chat_history: formatChatHistoryAsString(input.chat_history),
}),
@ -157,49 +128,41 @@ const createBasicWolframAlphaSearchAnsweringChain = (llm: BaseChatModel) => {
return docs;
})
.withConfig({
runName: 'FinalSourceRetriever',
runName: "FinalSourceRetriever",
})
.pipe(processDocs),
]),
}),
ChatPromptTemplate.fromMessages([
['system', basicWolframAlphaSearchResponsePrompt],
new MessagesPlaceholder('chat_history'),
['user', '{query}'],
["system", basicWolframAlphaSearchResponsePrompt],
new MessagesPlaceholder("chat_history"),
["user", "{query}"],
]),
llm,
strParser,
]).withConfig({
runName: 'FinalResponseGenerator',
runName: "FinalResponseGenerator",
});
};
const basicWolframAlphaSearch = (
query: string,
history: BaseMessage[],
llm: BaseChatModel,
) => {
const basicWolframAlphaSearch = (query: string, history: BaseMessage[], llm: BaseChatModel) => {
const emitter = new eventEmitter();
try {
const basicWolframAlphaSearchAnsweringChain =
createBasicWolframAlphaSearchAnsweringChain(llm);
const basicWolframAlphaSearchAnsweringChain = createBasicWolframAlphaSearchAnsweringChain(llm);
const stream = basicWolframAlphaSearchAnsweringChain.streamEvents(
{
chat_history: history,
query: query,
},
{
version: 'v1',
version: "v1",
},
);
handleStream(stream, emitter);
} catch (err) {
emitter.emit(
'error',
JSON.stringify({ data: 'An error has occurred please try again later' }),
);
emitter.emit("error", JSON.stringify({ data: "An error has occurred please try again later" }));
logger.error(`Error in WolframAlphaSearch: ${err}`);
}

View file

@ -1,15 +1,12 @@
import { BaseMessage } from '@langchain/core/messages';
import {
ChatPromptTemplate,
MessagesPlaceholder,
} from '@langchain/core/prompts';
import { RunnableSequence } from '@langchain/core/runnables';
import { StringOutputParser } from '@langchain/core/output_parsers';
import type { StreamEvent } from '@langchain/core/tracers/log_stream';
import eventEmitter from 'events';
import type { BaseChatModel } from '@langchain/core/language_models/chat_models';
import type { Embeddings } from '@langchain/core/embeddings';
import logger from '../utils/logger';
import { BaseMessage } from "@langchain/core/messages";
import { ChatPromptTemplate, MessagesPlaceholder } from "@langchain/core/prompts";
import { RunnableSequence } from "@langchain/core/runnables";
import { StringOutputParser } from "@langchain/core/output_parsers";
import type { StreamEvent } from "@langchain/core/tracers/log_stream";
import eventEmitter from "events";
import type { BaseChatModel } from "@langchain/core/language_models/chat_models";
import type { Embeddings } from "@langchain/core/embeddings";
import logger from "../utils/logger";
const writingAssistantPrompt = `
You are Perplexica, an AI model who is expert at searching the web and answering user's queries. You are currently set on focus mode 'Writing Assistant', this means you will be helping the user write a response to a given query.
@ -18,25 +15,13 @@ Since you are a writing assistant, you would not perform web searches. If you th
const strParser = new StringOutputParser();
const handleStream = async (
stream: AsyncGenerator<StreamEvent, unknown, unknown>,
emitter: eventEmitter,
) => {
const handleStream = async (stream: AsyncGenerator<StreamEvent, unknown, unknown>, emitter: eventEmitter) => {
for await (const event of stream) {
if (
event.event === 'on_chain_stream' &&
event.name === 'FinalResponseGenerator'
) {
emitter.emit(
'data',
JSON.stringify({ type: 'response', data: event.data.chunk }),
);
if (event.event === "on_chain_stream" && event.name === "FinalResponseGenerator") {
emitter.emit("data", JSON.stringify({ type: "response", data: event.data.chunk }));
}
if (
event.event === 'on_chain_end' &&
event.name === 'FinalResponseGenerator'
) {
emitter.emit('end');
if (event.event === "on_chain_end" && event.name === "FinalResponseGenerator") {
emitter.emit("end");
}
}
};
@ -44,14 +29,14 @@ const handleStream = async (
const createWritingAssistantChain = (llm: BaseChatModel) => {
return RunnableSequence.from([
ChatPromptTemplate.fromMessages([
['system', writingAssistantPrompt],
new MessagesPlaceholder('chat_history'),
['user', '{query}'],
["system", writingAssistantPrompt],
new MessagesPlaceholder("chat_history"),
["user", "{query}"],
]),
llm,
strParser,
]).withConfig({
runName: 'FinalResponseGenerator',
runName: "FinalResponseGenerator",
});
};
@ -72,16 +57,13 @@ const handleWritingAssistant = (
query: query,
},
{
version: 'v1',
version: "v1",
},
);
handleStream(stream, emitter);
} catch (err) {
emitter.emit(
'error',
JSON.stringify({ data: 'An error has occurred please try again later' }),
);
emitter.emit("error", JSON.stringify({ data: "An error has occurred please try again later" }));
logger.error(`Error in writing assistant: ${err}`);
}

View file

@ -1,24 +1,16 @@
import { BaseMessage } from '@langchain/core/messages';
import {
PromptTemplate,
ChatPromptTemplate,
MessagesPlaceholder,
} from '@langchain/core/prompts';
import {
RunnableSequence,
RunnableMap,
RunnableLambda,
} from '@langchain/core/runnables';
import { StringOutputParser } from '@langchain/core/output_parsers';
import { Document } from '@langchain/core/documents';
import { searchSearxng } from '../lib/searxng';
import type { StreamEvent } from '@langchain/core/tracers/log_stream';
import type { BaseChatModel } from '@langchain/core/language_models/chat_models';
import type { Embeddings } from '@langchain/core/embeddings';
import formatChatHistoryAsString from '../utils/formatHistory';
import eventEmitter from 'events';
import computeSimilarity from '../utils/computeSimilarity';
import logger from '../utils/logger';
import { BaseMessage } from "@langchain/core/messages";
import { PromptTemplate, ChatPromptTemplate, MessagesPlaceholder } from "@langchain/core/prompts";
import { RunnableSequence, RunnableMap, RunnableLambda } from "@langchain/core/runnables";
import { StringOutputParser } from "@langchain/core/output_parsers";
import { Document } from "@langchain/core/documents";
import { searchSearxng } from "../lib/searxng";
import type { StreamEvent } from "@langchain/core/tracers/log_stream";
import type { BaseChatModel } from "@langchain/core/language_models/chat_models";
import type { Embeddings } from "@langchain/core/embeddings";
import formatChatHistoryAsString from "../utils/formatHistory";
import eventEmitter from "events";
import computeSimilarity from "../utils/computeSimilarity";
import logger from "../utils/logger";
const basicYoutubeSearchRetrieverPrompt = `
You will be given a conversation below and a follow up question. You need to rephrase the follow-up question if needed so it is a standalone question that can be used by the LLM to search the web for information.
@ -65,34 +57,16 @@ const basicYoutubeSearchResponsePrompt = `
const strParser = new StringOutputParser();
const handleStream = async (
stream: AsyncGenerator<StreamEvent, unknown, unknown>,
emitter: eventEmitter,
) => {
const handleStream = async (stream: AsyncGenerator<StreamEvent, unknown, unknown>, emitter: eventEmitter) => {
for await (const event of stream) {
if (
event.event === 'on_chain_end' &&
event.name === 'FinalSourceRetriever'
) {
emitter.emit(
'data',
JSON.stringify({ type: 'sources', data: event.data.output }),
);
if (event.event === "on_chain_end" && event.name === "FinalSourceRetriever") {
emitter.emit("data", JSON.stringify({ type: "sources", data: event.data.output }));
}
if (
event.event === 'on_chain_stream' &&
event.name === 'FinalResponseGenerator'
) {
emitter.emit(
'data',
JSON.stringify({ type: 'response', data: event.data.chunk }),
);
if (event.event === "on_chain_stream" && event.name === "FinalResponseGenerator") {
emitter.emit("data", JSON.stringify({ type: "response", data: event.data.chunk }));
}
if (
event.event === 'on_chain_end' &&
event.name === 'FinalResponseGenerator'
) {
emitter.emit('end');
if (event.event === "on_chain_end" && event.name === "FinalResponseGenerator") {
emitter.emit("end");
}
}
};
@ -108,17 +82,17 @@ const createBasicYoutubeSearchRetrieverChain = (llm: BaseChatModel) => {
llm,
strParser,
RunnableLambda.from(async (input: string) => {
if (input === 'not_needed') {
return { query: '', docs: [] };
if (input === "not_needed") {
return { query: "", docs: [] };
}
const res = await searchSearxng(input, {
language: 'en',
engines: ['youtube'],
language: "en",
engines: ["youtube"],
});
const documents = res.results.map(
(result) =>
result =>
new Document({
pageContent: result.content ? result.content : result.title,
metadata: {
@ -134,36 +108,22 @@ const createBasicYoutubeSearchRetrieverChain = (llm: BaseChatModel) => {
]);
};
const createBasicYoutubeSearchAnsweringChain = (
llm: BaseChatModel,
embeddings: Embeddings,
) => {
const basicYoutubeSearchRetrieverChain =
createBasicYoutubeSearchRetrieverChain(llm);
const createBasicYoutubeSearchAnsweringChain = (llm: BaseChatModel, embeddings: Embeddings) => {
const basicYoutubeSearchRetrieverChain = createBasicYoutubeSearchRetrieverChain(llm);
const processDocs = async (docs: Document[]) => {
return docs
.map((_, index) => `${index + 1}. ${docs[index].pageContent}`)
.join('\n');
return docs.map((_, index) => `${index + 1}. ${docs[index].pageContent}`).join("\n");
};
const rerankDocs = async ({
query,
docs,
}: {
query: string;
docs: Document[];
}) => {
const rerankDocs = async ({ query, docs }: { query: string; docs: Document[] }) => {
if (docs.length === 0) {
return docs;
}
const docsWithContent = docs.filter(
(doc) => doc.pageContent && doc.pageContent.length > 0,
);
const docsWithContent = docs.filter(doc => doc.pageContent && doc.pageContent.length > 0);
const [docEmbeddings, queryEmbedding] = await Promise.all([
embeddings.embedDocuments(docsWithContent.map((doc) => doc.pageContent)),
embeddings.embedDocuments(docsWithContent.map(doc => doc.pageContent)),
embeddings.embedQuery(query),
]);
@ -179,8 +139,8 @@ const createBasicYoutubeSearchAnsweringChain = (
const sortedDocs = similarity
.sort((a, b) => b.similarity - a.similarity)
.slice(0, 15)
.filter((sim) => sim.similarity > 0.3)
.map((sim) => docsWithContent[sim.index]);
.filter(sim => sim.similarity > 0.3)
.map(sim => docsWithContent[sim.index]);
return sortedDocs;
};
@ -190,41 +150,35 @@ const createBasicYoutubeSearchAnsweringChain = (
query: (input: BasicChainInput) => input.query,
chat_history: (input: BasicChainInput) => input.chat_history,
context: RunnableSequence.from([
(input) => ({
input => ({
query: input.query,
chat_history: formatChatHistoryAsString(input.chat_history),
}),
basicYoutubeSearchRetrieverChain
.pipe(rerankDocs)
.withConfig({
runName: 'FinalSourceRetriever',
runName: "FinalSourceRetriever",
})
.pipe(processDocs),
]),
}),
ChatPromptTemplate.fromMessages([
['system', basicYoutubeSearchResponsePrompt],
new MessagesPlaceholder('chat_history'),
['user', '{query}'],
["system", basicYoutubeSearchResponsePrompt],
new MessagesPlaceholder("chat_history"),
["user", "{query}"],
]),
llm,
strParser,
]).withConfig({
runName: 'FinalResponseGenerator',
runName: "FinalResponseGenerator",
});
};
const basicYoutubeSearch = (
query: string,
history: BaseMessage[],
llm: BaseChatModel,
embeddings: Embeddings,
) => {
const basicYoutubeSearch = (query: string, history: BaseMessage[], llm: BaseChatModel, embeddings: Embeddings) => {
const emitter = new eventEmitter();
try {
const basicYoutubeSearchAnsweringChain =
createBasicYoutubeSearchAnsweringChain(llm, embeddings);
const basicYoutubeSearchAnsweringChain = createBasicYoutubeSearchAnsweringChain(llm, embeddings);
const stream = basicYoutubeSearchAnsweringChain.streamEvents(
{
@ -232,28 +186,20 @@ const basicYoutubeSearch = (
query: query,
},
{
version: 'v1',
version: "v1",
},
);
handleStream(stream, emitter);
} catch (err) {
emitter.emit(
'error',
JSON.stringify({ data: 'An error has occurred please try again later' }),
);
emitter.emit("error", JSON.stringify({ data: "An error has occurred please try again later" }));
logger.error(`Error in youtube search: ${err}`);
}
return emitter;
};
const handleYoutubeSearch = (
message: string,
history: BaseMessage[],
llm: BaseChatModel,
embeddings: Embeddings,
) => {
const handleYoutubeSearch = (message: string, history: BaseMessage[], llm: BaseChatModel, embeddings: Embeddings) => {
const emitter = basicYoutubeSearch(message, history, llm, embeddings);
return emitter;
};