This commit is contained in:
Jin Yucong 2024-07-05 14:36:50 +08:00
parent 5b1aaee605
commit 3b737a078a
63 changed files with 1132 additions and 1853 deletions

View file

@ -1,8 +1,7 @@
import { Embeddings, type EmbeddingsParams } from '@langchain/core/embeddings';
import { chunkArray } from '@langchain/core/utils/chunk_array';
import { Embeddings, type EmbeddingsParams } from "@langchain/core/embeddings";
import { chunkArray } from "@langchain/core/utils/chunk_array";
export interface HuggingFaceTransformersEmbeddingsParams
extends EmbeddingsParams {
export interface HuggingFaceTransformersEmbeddingsParams extends EmbeddingsParams {
modelName: string;
model: string;
@ -14,13 +13,10 @@ export interface HuggingFaceTransformersEmbeddingsParams
stripNewLines?: boolean;
}
export class HuggingFaceTransformersEmbeddings
extends Embeddings
implements HuggingFaceTransformersEmbeddingsParams
{
modelName = 'Xenova/all-MiniLM-L6-v2';
export class HuggingFaceTransformersEmbeddings extends Embeddings implements HuggingFaceTransformersEmbeddingsParams {
modelName = "Xenova/all-MiniLM-L6-v2";
model = 'Xenova/all-MiniLM-L6-v2';
model = "Xenova/all-MiniLM-L6-v2";
batchSize = 512;
@ -41,12 +37,9 @@ export class HuggingFaceTransformersEmbeddings
}
async embedDocuments(texts: string[]): Promise<number[][]> {
const batches = chunkArray(
this.stripNewLines ? texts.map((t) => t.replace(/\n/g, ' ')) : texts,
this.batchSize,
);
const batches = chunkArray(this.stripNewLines ? texts.map(t => t.replace(/\n/g, " ")) : texts, this.batchSize);
const batchRequests = batches.map((batch) => this.runEmbedding(batch));
const batchRequests = batches.map(batch => this.runEmbedding(batch));
const batchResponses = await Promise.all(batchRequests);
const embeddings: number[][] = [];
@ -61,22 +54,17 @@ export class HuggingFaceTransformersEmbeddings
}
async embedQuery(text: string): Promise<number[]> {
const data = await this.runEmbedding([
this.stripNewLines ? text.replace(/\n/g, ' ') : text,
]);
const data = await this.runEmbedding([this.stripNewLines ? text.replace(/\n/g, " ") : text]);
return data[0];
}
private async runEmbedding(texts: string[]) {
const { pipeline } = await import('@xenova/transformers');
const { pipeline } = await import("@xenova/transformers");
const pipe = await (this.pipelinePromise ??= pipeline(
'feature-extraction',
this.model,
));
const pipe = await (this.pipelinePromise ??= pipeline("feature-extraction", this.model));
return this.caller.call(async () => {
const output = await pipe(texts, { pooling: 'mean', normalize: true });
const output = await pipe(texts, { pooling: "mean", normalize: true });
return output.tolist();
});
}

View file

@ -1,11 +1,11 @@
import { BaseOutputParser } from '@langchain/core/output_parsers';
import { BaseOutputParser } from "@langchain/core/output_parsers";
interface LineListOutputParserArgs {
key?: string;
}
class LineListOutputParser extends BaseOutputParser<string[]> {
private key = 'questions';
private key = "questions";
constructor(args?: LineListOutputParserArgs) {
super();
@ -13,30 +13,29 @@ class LineListOutputParser extends BaseOutputParser<string[]> {
}
static lc_name() {
return 'LineListOutputParser';
return "LineListOutputParser";
}
lc_namespace = ['langchain', 'output_parsers', 'line_list_output_parser'];
lc_namespace = ["langchain", "output_parsers", "line_list_output_parser"];
async parse(text: string): Promise<string[]> {
const regex = /^(\s*(-|\*|\d+\.\s|\d+\)\s|\u2022)\s*)+/;
const startKeyIndex = text.indexOf(`<${this.key}>`);
const endKeyIndex = text.indexOf(`</${this.key}>`);
const questionsStartIndex =
startKeyIndex === -1 ? 0 : startKeyIndex + `<${this.key}>`.length;
const questionsStartIndex = startKeyIndex === -1 ? 0 : startKeyIndex + `<${this.key}>`.length;
const questionsEndIndex = endKeyIndex === -1 ? text.length : endKeyIndex;
const lines = text
.slice(questionsStartIndex, questionsEndIndex)
.trim()
.split('\n')
.filter((line) => line.trim() !== '')
.map((line) => line.replace(regex, ''));
.split("\n")
.filter(line => line.trim() !== "")
.map(line => line.replace(regex, ""));
return lines;
}
getFormatInstructions(): string {
throw new Error('Not implemented.');
throw new Error("Not implemented.");
}
}

View file

@ -1,13 +1,9 @@
import { ChatOpenAI, OpenAIEmbeddings } from '@langchain/openai';
import { ChatOllama } from '@langchain/community/chat_models/ollama';
import { OllamaEmbeddings } from '@langchain/community/embeddings/ollama';
import { HuggingFaceTransformersEmbeddings } from './huggingfaceTransformer';
import {
getGroqApiKey,
getOllamaApiEndpoint,
getOpenaiApiKey,
} from '../config';
import logger from '../utils/logger';
import { ChatOpenAI, OpenAIEmbeddings } from "@langchain/openai";
import { ChatOllama } from "@langchain/community/chat_models/ollama";
import { OllamaEmbeddings } from "@langchain/community/embeddings/ollama";
import { HuggingFaceTransformersEmbeddings } from "./huggingfaceTransformer";
import { getGroqApiKey, getOllamaApiEndpoint, getOpenaiApiKey } from "../config";
import logger from "../utils/logger";
export const getAvailableChatModelProviders = async () => {
const openAIApiKey = getOpenaiApiKey();
@ -18,25 +14,25 @@ export const getAvailableChatModelProviders = async () => {
if (openAIApiKey) {
try {
models['openai'] = {
'GPT-3.5 turbo': new ChatOpenAI({
models["openai"] = {
"GPT-3.5 turbo": new ChatOpenAI({
openAIApiKey,
modelName: 'gpt-3.5-turbo',
modelName: "gpt-3.5-turbo",
temperature: 0.7,
}),
'GPT-4': new ChatOpenAI({
"GPT-4": new ChatOpenAI({
openAIApiKey,
modelName: 'gpt-4',
modelName: "gpt-4",
temperature: 0.7,
}),
'GPT-4 turbo': new ChatOpenAI({
"GPT-4 turbo": new ChatOpenAI({
openAIApiKey,
modelName: 'gpt-4-turbo',
modelName: "gpt-4-turbo",
temperature: 0.7,
}),
'GPT-4 omni': new ChatOpenAI({
"GPT-4 omni": new ChatOpenAI({
openAIApiKey,
modelName: 'gpt-4o',
modelName: "gpt-4o",
temperature: 0.7,
}),
};
@ -47,45 +43,45 @@ export const getAvailableChatModelProviders = async () => {
if (groqApiKey) {
try {
models['groq'] = {
'LLaMA3 8b': new ChatOpenAI(
models["groq"] = {
"LLaMA3 8b": new ChatOpenAI(
{
openAIApiKey: groqApiKey,
modelName: 'llama3-8b-8192',
modelName: "llama3-8b-8192",
temperature: 0.7,
},
{
baseURL: 'https://api.groq.com/openai/v1',
baseURL: "https://api.groq.com/openai/v1",
},
),
'LLaMA3 70b': new ChatOpenAI(
"LLaMA3 70b": new ChatOpenAI(
{
openAIApiKey: groqApiKey,
modelName: 'llama3-70b-8192',
modelName: "llama3-70b-8192",
temperature: 0.7,
},
{
baseURL: 'https://api.groq.com/openai/v1',
baseURL: "https://api.groq.com/openai/v1",
},
),
'Mixtral 8x7b': new ChatOpenAI(
"Mixtral 8x7b": new ChatOpenAI(
{
openAIApiKey: groqApiKey,
modelName: 'mixtral-8x7b-32768',
modelName: "mixtral-8x7b-32768",
temperature: 0.7,
},
{
baseURL: 'https://api.groq.com/openai/v1',
baseURL: "https://api.groq.com/openai/v1",
},
),
'Gemma 7b': new ChatOpenAI(
"Gemma 7b": new ChatOpenAI(
{
openAIApiKey: groqApiKey,
modelName: 'gemma-7b-it',
modelName: "gemma-7b-it",
temperature: 0.7,
},
{
baseURL: 'https://api.groq.com/openai/v1',
baseURL: "https://api.groq.com/openai/v1",
},
),
};
@ -98,14 +94,14 @@ export const getAvailableChatModelProviders = async () => {
try {
const response = await fetch(`${ollamaEndpoint}/api/tags`, {
headers: {
'Content-Type': 'application/json',
"Content-Type": "application/json",
},
});
// eslint-disable-next-line @typescript-eslint/no-explicit-any
const { models: ollamaModels } = (await response.json()) as any;
models['ollama'] = ollamaModels.reduce((acc, model) => {
models["ollama"] = ollamaModels.reduce((acc, model) => {
acc[model.model] = new ChatOllama({
baseUrl: ollamaEndpoint,
model: model.model,
@ -118,7 +114,7 @@ export const getAvailableChatModelProviders = async () => {
}
}
models['custom_openai'] = {};
models["custom_openai"] = {};
return models;
};
@ -131,14 +127,14 @@ export const getAvailableEmbeddingModelProviders = async () => {
if (openAIApiKey) {
try {
models['openai'] = {
'Text embedding 3 small': new OpenAIEmbeddings({
models["openai"] = {
"Text embedding 3 small": new OpenAIEmbeddings({
openAIApiKey,
modelName: 'text-embedding-3-small',
modelName: "text-embedding-3-small",
}),
'Text embedding 3 large': new OpenAIEmbeddings({
"Text embedding 3 large": new OpenAIEmbeddings({
openAIApiKey,
modelName: 'text-embedding-3-large',
modelName: "text-embedding-3-large",
}),
};
} catch (err) {
@ -150,14 +146,14 @@ export const getAvailableEmbeddingModelProviders = async () => {
try {
const response = await fetch(`${ollamaEndpoint}/api/tags`, {
headers: {
'Content-Type': 'application/json',
"Content-Type": "application/json",
},
});
// eslint-disable-next-line @typescript-eslint/no-explicit-any
const { models: ollamaModels } = (await response.json()) as any;
models['ollama'] = ollamaModels.reduce((acc, model) => {
models["ollama"] = ollamaModels.reduce((acc, model) => {
acc[model.model] = new OllamaEmbeddings({
baseUrl: ollamaEndpoint,
model: model.model,
@ -170,15 +166,15 @@ export const getAvailableEmbeddingModelProviders = async () => {
}
try {
models['local'] = {
'BGE Small': new HuggingFaceTransformersEmbeddings({
modelName: 'Xenova/bge-small-en-v1.5',
models["local"] = {
"BGE Small": new HuggingFaceTransformersEmbeddings({
modelName: "Xenova/bge-small-en-v1.5",
}),
'GTE Small': new HuggingFaceTransformersEmbeddings({
modelName: 'Xenova/gte-small',
"GTE Small": new HuggingFaceTransformersEmbeddings({
modelName: "Xenova/gte-small",
}),
'Bert Multilingual': new HuggingFaceTransformersEmbeddings({
modelName: 'Xenova/bert-base-multilingual-uncased',
"Bert Multilingual": new HuggingFaceTransformersEmbeddings({
modelName: "Xenova/bert-base-multilingual-uncased",
}),
};
} catch (err) {

View file

@ -1,5 +1,5 @@
import axios from 'axios';
import { getSearxngApiEndpoint } from '../config';
import axios from "axios";
import { getSearxngApiEndpoint } from "../config";
interface SearxngSearchOptions {
categories?: string[];
@ -19,19 +19,16 @@ interface SearxngSearchResult {
iframe_src?: string;
}
export const searchSearxng = async (
query: string,
opts?: SearxngSearchOptions,
) => {
export const searchSearxng = async (query: string, opts?: SearxngSearchOptions) => {
const searxngURL = getSearxngApiEndpoint();
const url = new URL(`${searxngURL}/search?format=json`);
url.searchParams.append('q', query);
url.searchParams.append("q", query);
if (opts) {
Object.keys(opts).forEach((key) => {
Object.keys(opts).forEach(key => {
if (Array.isArray(opts[key])) {
url.searchParams.append(key, opts[key].join(','));
url.searchParams.append(key, opts[key].join(","));
return;
}
url.searchParams.append(key, opts[key]);