feat: add expert search, legal search and UI improvements
This commit is contained in:
parent
2c5ca94b3c
commit
271199c527
53 changed files with 4595 additions and 708 deletions
|
|
@ -5,6 +5,7 @@ import http from 'http';
|
|||
import routes from './routes';
|
||||
import { getPort } from './config';
|
||||
import logger from './utils/logger';
|
||||
import imagesRouter from './routes/images';
|
||||
|
||||
const port = getPort();
|
||||
|
||||
|
|
@ -23,6 +24,8 @@ app.get('/api', (_, res) => {
|
|||
res.status(200).json({ status: 'ok' });
|
||||
});
|
||||
|
||||
app.use('/api/images', imagesRouter);
|
||||
|
||||
server.listen(port, () => {
|
||||
logger.info(`Server is running on port ${port}`);
|
||||
});
|
||||
|
|
|
|||
235
src/chains/expertSearchAgent.ts
Normal file
235
src/chains/expertSearchAgent.ts
Normal file
|
|
@ -0,0 +1,235 @@
|
|||
import { ChatPromptTemplate, PromptTemplate } from '@langchain/core/prompts';
|
||||
import { BaseChatModel } from '@langchain/core/language_models/chat_models';
|
||||
import {
|
||||
RunnableLambda,
|
||||
RunnableMap,
|
||||
RunnableSequence,
|
||||
} from '@langchain/core/runnables';
|
||||
import { StringOutputParser } from '@langchain/core/output_parsers';
|
||||
import { BaseMessage } from '@langchain/core/messages';
|
||||
import { supabase } from '../db/supabase';
|
||||
import formatChatHistoryAsString from '../utils/formatHistory';
|
||||
import { Expert, ExpertSearchRequest, ExpertSearchResponse } from '../types/types';
|
||||
|
||||
type ExpertSearchChainInput = {
|
||||
chat_history: BaseMessage[];
|
||||
query: string;
|
||||
};
|
||||
|
||||
const ExpertSearchChainPrompt = `
|
||||
Vous êtes un agent spécialisé dans l'analyse et la recherche d'experts professionnels. Votre rôle est d'interpréter les demandes des utilisateurs et d'extraire les informations essentielles pour trouver l'expert le plus pertinent.
|
||||
|
||||
OBJECTIF :
|
||||
Analyser la requête pour identifier précisément :
|
||||
1. Le domaine d'expertise recherché
|
||||
2. La localisation souhaitée (si mentionnée)
|
||||
|
||||
RÈGLES D'EXTRACTION :
|
||||
- Pour l'EXPERTISE :
|
||||
* Identifier le domaine principal (comptabilité, droit, marketing, etc.)
|
||||
* Reconnaître les spécialisations (droit des affaires, marketing digital, etc.)
|
||||
* Nettoyer les mots parasites (expert, spécialiste, professionnel, etc.)
|
||||
|
||||
- Pour la VILLE :
|
||||
* Si mentionnée
|
||||
* Extraire la ville mentionnée
|
||||
* Ignorer si non spécifiée
|
||||
* Standardiser le format (tout en minuscules)
|
||||
|
||||
FORMAT DE RÉPONSE STRICT :
|
||||
Répondre en deux lignes exactement :
|
||||
expertise: [domaine d'expertise]
|
||||
ville: [ville si mentionnée]
|
||||
|
||||
EXEMPLES D'ANALYSE :
|
||||
|
||||
1. "Je cherche un expert comptable sur Paris"
|
||||
expertise: comptabilité
|
||||
ville: paris
|
||||
|
||||
2. "Il me faudrait un avocat spécialisé en droit des affaires à Lyon"
|
||||
expertise: droit des affaires
|
||||
ville: lyon
|
||||
|
||||
Conversation précédente :
|
||||
{chat_history}
|
||||
|
||||
Requête actuelle : {query}
|
||||
|
||||
Principe de recherche d'expert :
|
||||
- Pour toute recherche d'expert, extraire UNIQUEMENT :
|
||||
* L'expertise demandée
|
||||
* La ville (si mentionnée)
|
||||
|
||||
- Mots déclencheurs à reconnaître :
|
||||
* "cherche un expert/spécialiste/consultant"
|
||||
* "besoin d'un professionnel"
|
||||
* "recherche quelqu'un pour"
|
||||
* "qui peut m'aider avec"
|
||||
|
||||
<example>
|
||||
\`<query>
|
||||
Je cherche un expert comptable
|
||||
</query>
|
||||
expertise: comptabilité
|
||||
ville:
|
||||
\`
|
||||
|
||||
\`<query>
|
||||
J'ai besoin d'un spécialiste en droit des sociétés à Lyon
|
||||
</query>
|
||||
expertise: droit des sociétés
|
||||
ville: lyon
|
||||
\`
|
||||
|
||||
\`<query>
|
||||
Qui peut m'aider avec ma comptabilité sur Paris ?
|
||||
</query>
|
||||
expertise: comptabilité
|
||||
ville: paris
|
||||
\`
|
||||
</example>
|
||||
`;
|
||||
|
||||
const ExpertAnalysisPrompt = `
|
||||
Vous devez générer une synthèse des experts trouvés en vous basant UNIQUEMENT sur les données fournies.
|
||||
|
||||
Contexte de la recherche : {query}
|
||||
|
||||
Experts trouvés (à utiliser EXCLUSIVEMENT) :
|
||||
{experts}
|
||||
|
||||
Format de la synthèse :
|
||||
🎯 Synthèse de la recherche
|
||||
[Résumé bref de la demande]
|
||||
|
||||
💫 Experts disponibles :
|
||||
[Pour chaque expert trouvé dans les données :]
|
||||
- [Prénom Nom] à [Ville]
|
||||
Expertise : [expertises]
|
||||
Tarif : [tarif]€
|
||||
[Point clé de la biographie]
|
||||
|
||||
⚠️ IMPORTANT : N'inventez PAS d'experts. Utilisez UNIQUEMENT les données fournies.
|
||||
`;
|
||||
|
||||
const strParser = new StringOutputParser();
|
||||
|
||||
// Fonction pour convertir les données de l'expert
|
||||
const convertToExpert = (data: any): Expert => {
|
||||
return {
|
||||
id: data.id,
|
||||
id_expert: data.id_expert || '',
|
||||
nom: data.nom,
|
||||
prenom: data.prenom,
|
||||
adresse: data.adresse || '',
|
||||
pays: data.pays,
|
||||
ville: data.ville,
|
||||
expertises: data.expertises,
|
||||
specialite: data.specialite || data.expertises?.[0] || '',
|
||||
biographie: data.biographie,
|
||||
tarif: data.tarif || 0,
|
||||
services: data.services,
|
||||
created_at: data.created_at,
|
||||
image_url: data.image_url
|
||||
};
|
||||
};
|
||||
|
||||
const createExpertSearchChain = (llm: BaseChatModel) => {
|
||||
return RunnableSequence.from([
|
||||
RunnableMap.from({
|
||||
chat_history: (input: ExpertSearchChainInput) => {
|
||||
return formatChatHistoryAsString(input.chat_history || []);
|
||||
},
|
||||
query: (input: ExpertSearchChainInput) => {
|
||||
return input.query || '';
|
||||
},
|
||||
}),
|
||||
PromptTemplate.fromTemplate(ExpertSearchChainPrompt),
|
||||
llm,
|
||||
strParser,
|
||||
RunnableLambda.from(async (response: string) => {
|
||||
try {
|
||||
// Extraire expertise et ville avec gestion des erreurs
|
||||
const lines = response.split('\n').filter(line => line.trim() !== '');
|
||||
const expertise = lines[0]?.replace('expertise:', '')?.trim() || '';
|
||||
const ville = lines[1]?.replace('ville:', '')?.trim() || '';
|
||||
|
||||
if (!expertise) {
|
||||
return {
|
||||
experts: [],
|
||||
synthese: "Je n'ai pas pu identifier l'expertise recherchée."
|
||||
} as ExpertSearchResponse;
|
||||
}
|
||||
|
||||
// Rechercher les experts
|
||||
let query = supabase
|
||||
.from('experts')
|
||||
.select('*')
|
||||
.ilike('expertises', `%${expertise}%`)
|
||||
.limit(3);
|
||||
|
||||
if (ville) {
|
||||
query = query.ilike('ville', `%${ville}%`);
|
||||
}
|
||||
|
||||
const { data: experts, error } = await query;
|
||||
|
||||
if (error) throw error;
|
||||
|
||||
if (!experts || experts.length === 0) {
|
||||
return {
|
||||
experts: [],
|
||||
synthese: "Désolé, je n'ai pas trouvé d'experts correspondant à vos critères."
|
||||
} as ExpertSearchResponse;
|
||||
}
|
||||
|
||||
const synthesePrompt = PromptTemplate.fromTemplate(ExpertAnalysisPrompt);
|
||||
const formattedPrompt = await synthesePrompt.format({
|
||||
query: response,
|
||||
experts: JSON.stringify(experts, null, 2)
|
||||
});
|
||||
|
||||
const syntheseResponse = await llm.invoke(formattedPrompt);
|
||||
const syntheseString = typeof syntheseResponse.content === 'string'
|
||||
? syntheseResponse.content
|
||||
: JSON.stringify(syntheseResponse.content);
|
||||
|
||||
return {
|
||||
experts: experts.map(convertToExpert),
|
||||
synthese: syntheseString
|
||||
} as ExpertSearchResponse;
|
||||
|
||||
} catch (error) {
|
||||
console.error('❌ Erreur:', error);
|
||||
return {
|
||||
experts: [],
|
||||
synthese: "Une erreur est survenue lors de la recherche d'experts."
|
||||
} as ExpertSearchResponse;
|
||||
}
|
||||
}),
|
||||
]);
|
||||
};
|
||||
|
||||
const handleExpertSearch = async (input: ExpertSearchRequest, llm: BaseChatModel) => {
|
||||
try {
|
||||
// 1. Analyse de la requête via LLM pour extraire l'expertise et la ville
|
||||
const expertSearchChain = createExpertSearchChain(llm);
|
||||
const result = await expertSearchChain.invoke({
|
||||
query: input.query,
|
||||
chat_history: input.chat_history || []
|
||||
}) as ExpertSearchResponse; // Le résultat est déjà une ExpertSearchResponse
|
||||
|
||||
// Pas besoin de retraiter la réponse car createExpertSearchChain fait déjà tout le travail
|
||||
return result;
|
||||
|
||||
} catch (error) {
|
||||
console.error('❌ Erreur dans handleExpertSearch:', error);
|
||||
return {
|
||||
experts: [],
|
||||
synthese: "Une erreur est survenue."
|
||||
};
|
||||
}
|
||||
};
|
||||
|
||||
export default handleExpertSearch;
|
||||
|
|
@ -11,25 +11,35 @@ import { searchSearxng } from '../lib/searxng';
|
|||
import type { BaseChatModel } from '@langchain/core/language_models/chat_models';
|
||||
|
||||
const imageSearchChainPrompt = `
|
||||
You will be given a conversation below and a follow up question. You need to rephrase the follow-up question so it is a standalone question that can be used by the LLM to search the web for images.
|
||||
You need to make sure the rephrased question agrees with the conversation and is relevant to the conversation.
|
||||
Vous êtes un expert en recherche d'images pour illustrer des contenus business. Votre objectif est de trouver des images élégantes et modernes qui illustrent le sujet de manière indirecte et esthétique.
|
||||
|
||||
Example:
|
||||
1. Follow up question: What is a cat?
|
||||
Rephrased: A cat
|
||||
Principes à suivre :
|
||||
- Privilégier des images lifestyle et esthétiques
|
||||
- Éviter les schémas, graphiques et images trop techniques
|
||||
- Favoriser des images avec des personnes dans des situations naturelles
|
||||
- Choisir des images lumineuses et positives
|
||||
- Préférer des compositions simples et épurées
|
||||
|
||||
2. Follow up question: What is a car? How does it works?
|
||||
Rephrased: Car working
|
||||
Format de la requête :
|
||||
- 2-3 mots-clés maximum
|
||||
- Ajouter "lifestyle" ou "modern" pour améliorer la qualité
|
||||
- Toujours ajouter "professional" pour le contexte business
|
||||
|
||||
3. Follow up question: How does an AC work?
|
||||
Rephrased: AC working
|
||||
Exemples :
|
||||
1. Question : "Comment créer une entreprise ?"
|
||||
Requête : "entrepreneur lifestyle modern"
|
||||
|
||||
Conversation:
|
||||
2. Question : "Qu'est-ce qu'un business plan ?"
|
||||
Requête : "business meeting professional"
|
||||
|
||||
3. Question : "Comment faire sa comptabilité ?"
|
||||
Requête : "office work lifestyle"
|
||||
|
||||
Conversation :
|
||||
{chat_history}
|
||||
|
||||
Follow up question: {query}
|
||||
Rephrased question:
|
||||
`;
|
||||
Question : {query}
|
||||
Requête de recherche d'image :`;
|
||||
|
||||
type ImageSearchChainInput = {
|
||||
chat_history: BaseMessage[];
|
||||
|
|
@ -53,11 +63,12 @@ const createImageSearchChain = (llm: BaseChatModel) => {
|
|||
strParser,
|
||||
RunnableLambda.from(async (input: string) => {
|
||||
const res = await searchSearxng(input, {
|
||||
engines: ['bing images', 'google images'],
|
||||
engines: ['google_images', 'bing_images'],
|
||||
language: 'fr',
|
||||
categories: ['images'],
|
||||
});
|
||||
|
||||
|
||||
const images = [];
|
||||
|
||||
res.results.forEach((result) => {
|
||||
if (result.img_src && result.url && result.title) {
|
||||
images.push({
|
||||
|
|
@ -67,7 +78,7 @@ const createImageSearchChain = (llm: BaseChatModel) => {
|
|||
});
|
||||
}
|
||||
});
|
||||
|
||||
|
||||
return images.slice(0, 10);
|
||||
}),
|
||||
]);
|
||||
|
|
@ -81,4 +92,4 @@ const handleImageSearch = (
|
|||
return imageSearchChain.invoke(input);
|
||||
};
|
||||
|
||||
export default handleImageSearch;
|
||||
export default handleImageSearch;
|
||||
113
src/chains/legalSearchAgent.ts
Normal file
113
src/chains/legalSearchAgent.ts
Normal file
|
|
@ -0,0 +1,113 @@
|
|||
import {
|
||||
RunnableSequence,
|
||||
RunnableMap,
|
||||
RunnableLambda,
|
||||
} from '@langchain/core/runnables';
|
||||
import { PromptTemplate } from '@langchain/core/prompts';
|
||||
import formatChatHistoryAsString from '../utils/formatHistory';
|
||||
import { BaseMessage } from '@langchain/core/messages';
|
||||
import { StringOutputParser } from '@langchain/core/output_parsers';
|
||||
import { searchSearxng } from '../lib/searxng';
|
||||
import type { BaseChatModel } from '@langchain/core/language_models/chat_models';
|
||||
|
||||
const legalSearchChainPrompt = `
|
||||
Vous êtes un assistant juridique expert spécialisé dans la recherche documentaire légale française. Votre rôle est d'analyser la question de l'utilisateur et de générer une requête de recherche optimisée.
|
||||
|
||||
Contexte de la conversation :
|
||||
{chat_history}
|
||||
|
||||
Question actuelle : {query}
|
||||
|
||||
Instructions détaillées :
|
||||
1. Analysez précisément :
|
||||
- Le domaine juridique spécifique (droit du travail, droit des sociétés, etc.)
|
||||
- Le type de document recherché (loi, décret, jurisprudence, etc.)
|
||||
- Les points clés de la problématique
|
||||
|
||||
2. Construisez une requête qui inclut :
|
||||
- Les termes juridiques exacts (articles de code, références légales)
|
||||
- Les mots-clés techniques appropriés
|
||||
- Les synonymes pertinents
|
||||
- La période temporelle si pertinente (loi récente, modifications)
|
||||
|
||||
3. Priorisez les sources selon la hiérarchie :
|
||||
- Codes et lois : Légifrance
|
||||
- Information officielle : Service-public.fr
|
||||
- Publications : Journal-officiel
|
||||
- Informations pratiques : URSSAF, CCI
|
||||
|
||||
Exemples de reformulation :
|
||||
Question : "Comment créer une SARL ?"
|
||||
→ "Code commerce SARL constitution statuts gérance responsabilité associés capital social formalités légifrance service-public"
|
||||
|
||||
Question : "Licenciement économique procédure"
|
||||
→ "Code travail licenciement économique procédure CSE PSE motif notification délais recours légifrance"
|
||||
|
||||
Question : "Bail commercial résiliation"
|
||||
→ "Code commerce bail commercial résiliation article L145-4 congé indemnité éviction légifrance jurisprudence"
|
||||
|
||||
Reformulez la question de manière précise et technique :`;
|
||||
|
||||
type LegalSearchChainInput = {
|
||||
chat_history: BaseMessage[];
|
||||
query: string;
|
||||
};
|
||||
|
||||
const strParser = new StringOutputParser();
|
||||
|
||||
const createLegalSearchChain = (llm: BaseChatModel) => {
|
||||
return RunnableSequence.from([
|
||||
RunnableMap.from({
|
||||
chat_history: (input: LegalSearchChainInput) => {
|
||||
return formatChatHistoryAsString(input.chat_history);
|
||||
},
|
||||
query: (input: LegalSearchChainInput) => {
|
||||
return input.query;
|
||||
},
|
||||
}),
|
||||
PromptTemplate.fromTemplate(legalSearchChainPrompt),
|
||||
llm,
|
||||
strParser,
|
||||
RunnableLambda.from(async (input: string) => {
|
||||
const pdfQuery = `${input} filetype:pdf`;
|
||||
|
||||
const res = await searchSearxng(pdfQuery, {
|
||||
engines: [
|
||||
'legifrance',
|
||||
'journal_officiel',
|
||||
'service_public',
|
||||
'URSSAF',
|
||||
'CCI'
|
||||
],
|
||||
language: 'fr',
|
||||
categories: ['general', 'files']
|
||||
});
|
||||
|
||||
const documents = [];
|
||||
|
||||
res.results.forEach((result) => {
|
||||
if (result.url && result.title) {
|
||||
documents.push({
|
||||
url: result.url,
|
||||
title: result.title,
|
||||
snippet: result.content || '',
|
||||
source: result.url.split('/')[2] || 'unknown',
|
||||
type: 'pdf'
|
||||
});
|
||||
}
|
||||
});
|
||||
|
||||
return documents.slice(0, 10);
|
||||
}),
|
||||
]);
|
||||
};
|
||||
|
||||
const handleLegalSearch = (
|
||||
input: LegalSearchChainInput,
|
||||
llm: BaseChatModel,
|
||||
) => {
|
||||
const legalSearchChain = createLegalSearchChain(llm);
|
||||
return legalSearchChain.invoke(input);
|
||||
};
|
||||
|
||||
export default handleLegalSearch;
|
||||
292
src/chains/rag_document_upload.ts
Normal file
292
src/chains/rag_document_upload.ts
Normal file
|
|
@ -0,0 +1,292 @@
|
|||
import { RecursiveCharacterTextSplitter } from 'langchain/text_splitter';
|
||||
import { Document } from '@langchain/core/documents';
|
||||
import { Embeddings } from '@langchain/core/embeddings';
|
||||
import { Chroma } from '@langchain/community/vectorstores/chroma';
|
||||
import { BaseChatModel } from '@langchain/core/language_models/chat_models';
|
||||
import { RunnableSequence, RunnableMap } from '@langchain/core/runnables';
|
||||
import { PromptTemplate } from '@langchain/core/prompts';
|
||||
import { StringOutputParser } from '@langchain/core/output_parsers';
|
||||
import formatChatHistoryAsString from '../utils/formatHistory';
|
||||
import { BaseMessage } from '@langchain/core/messages';
|
||||
|
||||
// Type local pour la chaîne de recherche
|
||||
type SearchInput = {
|
||||
query: string;
|
||||
chat_history: BaseMessage[];
|
||||
type?: string;
|
||||
};
|
||||
|
||||
export class RAGDocumentChain {
|
||||
private vectorStore: Chroma | null = null;
|
||||
private textSplitter = new RecursiveCharacterTextSplitter({
|
||||
chunkSize: 1000,
|
||||
chunkOverlap: 200,
|
||||
separators: ["\n\n", "\n", ".", "!", "?", ";", ":", " ", ""],
|
||||
keepSeparator: true,
|
||||
lengthFunction: (text) => text.length
|
||||
});
|
||||
|
||||
// Add chunk preprocessing
|
||||
private preprocessChunk(text: string): string {
|
||||
return text
|
||||
.replace(/\s+/g, ' ')
|
||||
.replace(/\n+/g, ' ')
|
||||
.trim();
|
||||
}
|
||||
|
||||
// Add metadata enrichment
|
||||
private enrichChunkMetadata(doc: Document): Document {
|
||||
const metadata = {
|
||||
...doc.metadata,
|
||||
chunk_type: 'text',
|
||||
word_count: doc.pageContent.split(/\s+/).length,
|
||||
processed_date: new Date().toISOString()
|
||||
};
|
||||
return new Document({
|
||||
pageContent: this.preprocessChunk(doc.pageContent),
|
||||
metadata
|
||||
});
|
||||
}
|
||||
|
||||
// Add chunk scoring
|
||||
private scoreChunk(chunk: string): number {
|
||||
const wordCount = chunk.split(/\s+/).length;
|
||||
const sentenceCount = chunk.split(/[.!?]+/).length;
|
||||
return wordCount > 10 && sentenceCount > 0 ? 1 : 0;
|
||||
}
|
||||
|
||||
public async initializeVectorStoreFromDocuments(
|
||||
documents: Document[],
|
||||
embeddings: Embeddings
|
||||
) {
|
||||
try {
|
||||
console.log("🔄 Préparation des documents...");
|
||||
|
||||
// Validate and preprocess documents
|
||||
const validDocuments = documents
|
||||
.filter(doc => doc.pageContent && doc.pageContent.trim().length > 50)
|
||||
.map(doc => this.enrichChunkMetadata(doc));
|
||||
|
||||
// Split documents into chunks
|
||||
const texts = await this.textSplitter.splitDocuments(validDocuments);
|
||||
console.log(`📄 ${texts.length} chunks créés`);
|
||||
|
||||
// Score and filter chunks
|
||||
const scoredTexts = texts.filter(doc => this.scoreChunk(doc.pageContent) > 0);
|
||||
console.log(`📄 ${scoredTexts.length} chunks valides après scoring`);
|
||||
|
||||
// Deduplicate chunks
|
||||
const uniqueTexts = this.deduplicateChunks(scoredTexts);
|
||||
console.log(`📄 ${uniqueTexts.length} chunks uniques après déduplication`);
|
||||
|
||||
// Initialize vector store with optimized settings
|
||||
this.vectorStore = await Chroma.fromDocuments(
|
||||
uniqueTexts,
|
||||
embeddings,
|
||||
{
|
||||
collectionName: "uploaded_docs",
|
||||
url: "http://chroma:8000",
|
||||
collectionMetadata: {
|
||||
"hnsw:space": "cosine",
|
||||
"hnsw:construction_ef": 100, // Increased for better index quality
|
||||
"hnsw:search_ef": 50, // Balanced for search performance
|
||||
"hnsw:m": 16 // Number of connections per element
|
||||
}
|
||||
}
|
||||
);
|
||||
|
||||
console.log("✅ VectorStore initialisé avec succès");
|
||||
return {
|
||||
totalDocuments: documents.length,
|
||||
validChunks: uniqueTexts.length,
|
||||
averageChunkSize: this.calculateAverageChunkSize(uniqueTexts)
|
||||
};
|
||||
} catch (error) {
|
||||
console.error("❌ Erreur lors de l'initialisation:", error);
|
||||
throw new Error(`Erreur d'initialisation du VectorStore: ${error.message}`);
|
||||
}
|
||||
}
|
||||
|
||||
private calculateAverageChunkSize(chunks: Document[]): number {
|
||||
if (chunks.length === 0) return 0;
|
||||
const totalLength = chunks.reduce((sum, doc) => sum + doc.pageContent.length, 0);
|
||||
return Math.round(totalLength / chunks.length);
|
||||
}
|
||||
|
||||
private deduplicateChunks(chunks: Document[]): Document[] {
|
||||
const seen = new Set<string>();
|
||||
return chunks.filter(chunk => {
|
||||
const normalized = chunk.pageContent
|
||||
.toLowerCase()
|
||||
.replace(/\s+/g, ' ')
|
||||
.trim();
|
||||
|
||||
if (seen.has(normalized)) {
|
||||
return false;
|
||||
}
|
||||
seen.add(normalized);
|
||||
return true;
|
||||
});
|
||||
}
|
||||
|
||||
public async searchSimilarDocuments(query: string, limit: number = 5) {
|
||||
if (!this.vectorStore) {
|
||||
console.warn("⚠️ VectorStore non initialisé");
|
||||
return [];
|
||||
}
|
||||
|
||||
try {
|
||||
console.log("🔍 Recherche pour:", query);
|
||||
|
||||
const initialResults = await this.vectorStore.similaritySearch(
|
||||
query,
|
||||
limit * 2,
|
||||
{
|
||||
filter: { source: { $exists: true } },
|
||||
minScore: 0.7
|
||||
}
|
||||
);
|
||||
|
||||
const scoredResults = initialResults
|
||||
.filter(doc => doc.pageContent.trim().length > 50)
|
||||
.map(doc => ({
|
||||
document: doc,
|
||||
score: this.calculateRelevanceScore(query, doc.pageContent)
|
||||
}))
|
||||
.sort((a, b) => b.score - a.score)
|
||||
.slice(0, limit)
|
||||
.map(item => {
|
||||
const doc = item.document;
|
||||
const pageNumber = doc.metadata.page_number || doc.metadata.pageNumber || 1;
|
||||
const title = doc.metadata.title || 'Document';
|
||||
const source = doc.metadata.source;
|
||||
|
||||
// Préparer le texte à surligner
|
||||
const searchText = doc.pageContent
|
||||
.substring(0, 200)
|
||||
.replace(/[\n\r]+/g, ' ')
|
||||
.trim();
|
||||
|
||||
return new Document({
|
||||
pageContent: doc.pageContent,
|
||||
metadata: {
|
||||
title: title,
|
||||
pageNumber: pageNumber,
|
||||
source: source,
|
||||
type: doc.metadata.type || 'uploaded',
|
||||
searchText: searchText,
|
||||
url: source ?
|
||||
`/api/uploads/${source}/view?page=${pageNumber}&search=${encodeURIComponent(searchText)}` :
|
||||
undefined
|
||||
}
|
||||
});
|
||||
});
|
||||
|
||||
const mergedResults = this.mergeRelatedChunks(scoredResults);
|
||||
console.log(`📄 ${mergedResults.length} documents pertinents trouvés après reranking`);
|
||||
return mergedResults;
|
||||
} catch (error) {
|
||||
console.error("❌ Erreur de recherche:", error);
|
||||
return [];
|
||||
}
|
||||
}
|
||||
|
||||
private calculateRelevanceScore(query: string, content: string): number {
|
||||
const normalizedQuery = query.toLowerCase();
|
||||
const normalizedContent = content.toLowerCase();
|
||||
|
||||
// Basic relevance scoring based on multiple factors
|
||||
let score = 0;
|
||||
|
||||
// Term frequency
|
||||
const queryTerms = normalizedQuery.split(/\s+/);
|
||||
queryTerms.forEach(term => {
|
||||
const termCount = (normalizedContent.match(new RegExp(term, 'g')) || []).length;
|
||||
score += termCount * 0.1;
|
||||
});
|
||||
|
||||
// Exact phrase matching
|
||||
if (normalizedContent.includes(normalizedQuery)) {
|
||||
score += 1;
|
||||
}
|
||||
|
||||
// Content length penalty (prefer shorter, more focused chunks)
|
||||
const lengthPenalty = Math.max(0, 1 - (content.length / 5000));
|
||||
score *= (1 + lengthPenalty);
|
||||
|
||||
return score;
|
||||
}
|
||||
|
||||
private mergeRelatedChunks(documents: Document[]): Document[] {
|
||||
const merged: { [key: string]: Document } = {};
|
||||
|
||||
documents.forEach(doc => {
|
||||
const source = doc.metadata?.source || '';
|
||||
const page = doc.metadata?.pageNumber || 1;
|
||||
const key = `${source}-${page}`;
|
||||
|
||||
if (!merged[key]) {
|
||||
merged[key] = doc;
|
||||
} else {
|
||||
const existingDoc = merged[key];
|
||||
merged[key] = new Document({
|
||||
pageContent: `${existingDoc.pageContent}\n\n${doc.pageContent}`,
|
||||
metadata: {
|
||||
...existingDoc.metadata,
|
||||
searchText: existingDoc.metadata.searchText
|
||||
}
|
||||
});
|
||||
}
|
||||
});
|
||||
|
||||
return Object.values(merged);
|
||||
}
|
||||
|
||||
public createSearchChain(llm: BaseChatModel) {
|
||||
return RunnableSequence.from([
|
||||
RunnableMap.from({
|
||||
query: (input: SearchInput) => input.query,
|
||||
chat_history: (input: SearchInput) => formatChatHistoryAsString(input.chat_history),
|
||||
context: async (input: SearchInput) => {
|
||||
const docs = await this.searchSimilarDocuments(input.query);
|
||||
return docs.map((doc, i) => {
|
||||
const source = doc.metadata?.source || 'Document';
|
||||
const title = doc.metadata?.title || '';
|
||||
const pageNumber = doc.metadata?.pageNumber;
|
||||
const url = doc.metadata?.url;
|
||||
|
||||
let sourceInfo = `Source: ${title || source}`;
|
||||
if (pageNumber) sourceInfo += ` (page ${pageNumber})`;
|
||||
if (url) sourceInfo += `\nURL: ${url}`;
|
||||
|
||||
return `[Source ${i + 1}] ${doc.pageContent}\n${sourceInfo}`;
|
||||
}).join("\n\n");
|
||||
}
|
||||
}),
|
||||
PromptTemplate.fromTemplate(`
|
||||
Tu es un assistant expert qui répond aux questions en se basant uniquement sur le contexte fourni.
|
||||
Historique de la conversation:
|
||||
{chat_history}
|
||||
|
||||
Contexte disponible:
|
||||
{context}
|
||||
|
||||
Question: {query}
|
||||
|
||||
Instructions:
|
||||
1. Réponds uniquement en te basant sur le contexte fourni
|
||||
2. Si la réponse n'est pas dans le contexte, dis-le clairement
|
||||
3. Cite les sources pertinentes en utilisant [Source X]
|
||||
4. Sois précis et concis
|
||||
|
||||
Réponse:
|
||||
`),
|
||||
llm,
|
||||
new StringOutputParser()
|
||||
]);
|
||||
}
|
||||
|
||||
public isInitialized(): boolean {
|
||||
return this.vectorStore !== null;
|
||||
}
|
||||
}
|
||||
|
|
@ -15,10 +15,12 @@ interface Config {
|
|||
GROQ: string;
|
||||
ANTHROPIC: string;
|
||||
GEMINI: string;
|
||||
SUPABASE: string;
|
||||
};
|
||||
API_ENDPOINTS: {
|
||||
SEARXNG: string;
|
||||
OLLAMA: string;
|
||||
SUPABASE_URL: string;
|
||||
};
|
||||
}
|
||||
|
||||
|
|
@ -46,9 +48,15 @@ export const getAnthropicApiKey = () => loadConfig().API_KEYS.ANTHROPIC;
|
|||
|
||||
export const getGeminiApiKey = () => loadConfig().API_KEYS.GEMINI;
|
||||
|
||||
export const getSupabaseKey = () =>
|
||||
process.env.SUPABASE_KEY || loadConfig().API_KEYS.SUPABASE;
|
||||
|
||||
export const getSearxngApiEndpoint = () =>
|
||||
process.env.SEARXNG_API_URL || loadConfig().API_ENDPOINTS.SEARXNG;
|
||||
|
||||
export const getSupabaseUrl = () =>
|
||||
process.env.SUPABASE_URL || loadConfig().API_ENDPOINTS.SUPABASE_URL;
|
||||
|
||||
export const getOllamaApiEndpoint = () => loadConfig().API_ENDPOINTS.OLLAMA;
|
||||
|
||||
export const updateConfig = (config: RecursivePartial<Config>) => {
|
||||
|
|
|
|||
29
src/db/supabase.ts
Normal file
29
src/db/supabase.ts
Normal file
|
|
@ -0,0 +1,29 @@
|
|||
// Dans supabase.ts
|
||||
import { createClient } from '@supabase/supabase-js';
|
||||
import { getSupabaseUrl, getSupabaseKey } from '../config';
|
||||
|
||||
const supabaseUrl = getSupabaseUrl();
|
||||
const supabaseKey = getSupabaseKey();
|
||||
|
||||
if (!supabaseUrl || !supabaseKey) {
|
||||
throw new Error('Missing Supabase credentials');
|
||||
}
|
||||
|
||||
export const supabase = createClient(supabaseUrl, supabaseKey);
|
||||
|
||||
// Fonction de test de connexion
|
||||
export async function checkSupabaseConnection() {
|
||||
try {
|
||||
const { data, error } = await supabase
|
||||
.from('experts')
|
||||
.select('*')
|
||||
.limit(1);
|
||||
|
||||
if (error) throw error;
|
||||
console.log('✅ Connexion Supabase établie avec succès');
|
||||
return true;
|
||||
} catch (error) {
|
||||
console.error('❌ Erreur de connexion Supabase:', error);
|
||||
return false;
|
||||
}
|
||||
}
|
||||
26
src/lib/outputParsers/imageOutputParser.ts
Normal file
26
src/lib/outputParsers/imageOutputParser.ts
Normal file
|
|
@ -0,0 +1,26 @@
|
|||
import { BaseOutputParser } from "@langchain/core/output_parsers";
|
||||
|
||||
export interface ImageSearchResult {
|
||||
query: string;
|
||||
context?: string;
|
||||
}
|
||||
|
||||
class ImageOutputParser extends BaseOutputParser<ImageSearchResult> {
|
||||
lc_namespace = ['langchain', 'output_parsers', 'image_output_parser'];
|
||||
|
||||
async parse(text: string): Promise<ImageSearchResult> {
|
||||
const parts = text.split('IMAGE:');
|
||||
return {
|
||||
query: parts[1]?.trim() || '',
|
||||
context: parts[0].replace('RÉSUMÉ:', '').trim()
|
||||
};
|
||||
}
|
||||
|
||||
getFormatInstructions(): string {
|
||||
return `Le format attendu est:
|
||||
RÉSUMÉ: <contexte>
|
||||
IMAGE: <requête d'image>`;
|
||||
}
|
||||
}
|
||||
|
||||
export default ImageOutputParser;
|
||||
|
|
@ -1,10 +1,11 @@
|
|||
import axios from 'axios';
|
||||
import { getSearxngApiEndpoint } from '../config';
|
||||
|
||||
interface SearxngSearchOptions {
|
||||
categories?: string[];
|
||||
engines?: string[];
|
||||
export interface SearxngSearchOptions {
|
||||
language?: string;
|
||||
engines?: string[];
|
||||
categories?: string[];
|
||||
limit?: number;
|
||||
pageno?: number;
|
||||
}
|
||||
|
||||
|
|
@ -19,10 +20,10 @@ interface SearxngSearchResult {
|
|||
iframe_src?: string;
|
||||
}
|
||||
|
||||
export const searchSearxng = async (
|
||||
export async function searchSearxng(
|
||||
query: string,
|
||||
opts?: SearxngSearchOptions,
|
||||
) => {
|
||||
opts: SearxngSearchOptions = {}
|
||||
) {
|
||||
const searxngURL = getSearxngApiEndpoint();
|
||||
|
||||
const url = new URL(`${searxngURL}/search?format=json`);
|
||||
|
|
@ -44,4 +45,4 @@ export const searchSearxng = async (
|
|||
const suggestions: string[] = res.data.suggestions;
|
||||
|
||||
return { results, suggestions };
|
||||
};
|
||||
}
|
||||
|
|
|
|||
|
|
@ -1,46 +1,85 @@
|
|||
export const webSearchRetrieverPrompt = `
|
||||
You are an AI question rephraser. You will be given a conversation and a follow-up question, you will have to rephrase the follow up question so it is a standalone question and can be used by another LLM to search the web for information to answer it.
|
||||
If it is a smple writing task or a greeting (unless the greeting contains a question after it) like Hi, Hello, How are you, etc. than a question then you need to return \`not_needed\` as the response (This is because the LLM won't need to search the web for finding information on this topic).
|
||||
If the user asks some question from some URL or wants you to summarize a PDF or a webpage (via URL) you need to return the links inside the \`links\` XML block and the question inside the \`question\` XML block. If the user wants to you to summarize the webpage or the PDF you need to return \`summarize\` inside the \`question\` XML block in place of a question and the link to summarize in the \`links\` XML block.
|
||||
You must always return the rephrased question inside the \`question\` XML block, if there are no links in the follow-up question then don't insert a \`links\` XML block in your response.
|
||||
Tu es X-me une IA analyste spécialisée dans l'entrepreneuriat et le développement des TPE/PME et artisans, avec une expertise particulière en droit des affaires. Votre rôle est de reformuler les questions pour cibler les textes juridiques et réglementaires pertinents.
|
||||
|
||||
There are several examples attached for your reference inside the below \`examples\` XML block
|
||||
### Sources Juridiques Prioritaires
|
||||
1. **Codes**:
|
||||
- Code civil
|
||||
- Code de commerce
|
||||
- Code du travail
|
||||
- Code de la consommation
|
||||
- Code général des impôts
|
||||
|
||||
2. **Textes Réglementaires**:
|
||||
- Lois
|
||||
- Décrets
|
||||
- Arrêtés
|
||||
- Circulaires
|
||||
|
||||
3. **Jurisprudence**:
|
||||
- Décisions de la Cour de cassation
|
||||
- Arrêts du Conseil d'État
|
||||
- Décisions des Cours d'appel
|
||||
|
||||
4. **Sources Officielles**:
|
||||
- Journal officiel
|
||||
- Bulletins officiels
|
||||
- Documentation administrative
|
||||
|
||||
Pour chaque question, vous devez :
|
||||
1. Identifier les textes juridiques applicables
|
||||
2. Citer les articles précis des codes concernés
|
||||
3. Rechercher la jurisprudence pertinente
|
||||
4. Vérifier les dernières modifications législatives
|
||||
|
||||
### Sources d'Information Prioritaires
|
||||
1. **LegalAI**: Légifrance, CNIL, URSSAF pour les aspects juridiques
|
||||
2. **FinanceAI**: BPI France, Impots.gouv.fr, INSEE pour la finance
|
||||
3. **GrowthAI**: CREDOC, CMA France pour le développement commercial
|
||||
4. **MatchAI**: Annuaires des Experts-Comptables, APEC pour l'expertise
|
||||
5. **StrategyAI**: France Stratégie, Bpifrance Le Lab pour la stratégie
|
||||
6. **PeopleAI**: DARES, Pôle emploi pour les RH
|
||||
7. **ToolBoxAI**: CCI France, LegalPlace pour les outils pratiques
|
||||
8. **TechAI**: INRIA, French Tech pour l'innovation
|
||||
9. **StartAI**: Portail Auto-Entrepreneur, CCI pour la création
|
||||
10. **MasterAI**: Data.gouv.fr, Eurostat pour les données centralisées
|
||||
|
||||
Dans l'analyse des questions, privilégiez :
|
||||
- Les aspects de création et développement d'entreprise
|
||||
- Les exigences administratives et juridiques
|
||||
- Les considérations financières et opérationnelles
|
||||
- L'analyse de marché et la stratégie
|
||||
- Le développement professionnel et la formation
|
||||
|
||||
Si c'est une tâche simple d'écriture ou un salut (sauf si le salut contient une question après) comme Hi, Hello, How are you, etc. alors vous devez retourner \`not_needed\` comme réponse (C'est parce que le LLM ne devrait pas chercher des informations sur ce sujet).
|
||||
Si l'utilisateur demande une question d'un certain URL ou veut que vous résumiez un PDF ou une page web (via URL) vous devez retourner les liens à l'intérieur du bloc \`links\` XML et la question à l'intérieur du bloc \`question\` XML. Si l'utilisateur veut que vous résumiez la page web ou le PDF vous devez retourner \`summarize\` à l'intérieur du bloc \`question\` XML en remplacement de la question et le lien à résumer dans le bloc \`links\` XML.
|
||||
Vous devez toujours retourner la question reformulée à l'intérieur du bloc \`question\` XML, si il n'y a pas de liens dans la question de suivi alors ne pas insérer un bloc \`links\` XML dans votre réponse.
|
||||
|
||||
Il y a plusieurs exemples attachés pour votre référence à l'intérieur du bloc \`examples\` XML
|
||||
|
||||
<examples>
|
||||
1. Follow up question: What is the capital of France
|
||||
Rephrased question:\`
|
||||
1. Question de suivi : Comment créer mon entreprise ?
|
||||
Question reformulée :\`
|
||||
<question>
|
||||
Capital of france
|
||||
Étapes et conditions pour créer une entreprise en France, procédures administratives et aides disponibles selon les sources StartAI (CCI, Auto-entrepreneur) et LegalAI (URSSAF)
|
||||
</question>
|
||||
\`
|
||||
|
||||
2. Hi, how are you?
|
||||
Rephrased question\`
|
||||
2. Question de suivi : Quels financements sont disponibles ?
|
||||
Question reformulée :\`
|
||||
<question>
|
||||
Options de financement et aides financières disponibles pour les TPE/PME et artisans en France selon FinanceAI (BPI France) et MasterAI (Data.gouv.fr)
|
||||
</question>
|
||||
\`
|
||||
|
||||
3. Question de suivi : Bonjour, comment allez-vous ?
|
||||
Question reformulée :\`
|
||||
<question>
|
||||
not_needed
|
||||
</question>
|
||||
\`
|
||||
|
||||
3. Follow up question: What is Docker?
|
||||
Rephrased question: \`
|
||||
<question>
|
||||
What is Docker
|
||||
</question>
|
||||
\`
|
||||
|
||||
4. Follow up question: Can you tell me what is X from https://example.com
|
||||
Rephrased question: \`
|
||||
<question>
|
||||
Can you tell me what is X?
|
||||
</question>
|
||||
|
||||
<links>
|
||||
https://example.com
|
||||
</links>
|
||||
\`
|
||||
|
||||
5. Follow up question: Summarize the content from https://example.com
|
||||
Rephrased question: \`
|
||||
4. Question de suivi : Pouvez-vous analyser ce business plan sur https://example.com ?
|
||||
Question reformulée :\`
|
||||
<question>
|
||||
summarize
|
||||
</question>
|
||||
|
|
@ -51,27 +90,39 @@ https://example.com
|
|||
\`
|
||||
</examples>
|
||||
|
||||
Anything below is the part of the actual conversation and you need to use conversation and the follow-up question to rephrase the follow-up question as a standalone question based on the guidelines shared above.
|
||||
|
||||
<conversation>
|
||||
{chat_history}
|
||||
</conversation>
|
||||
|
||||
Follow up question: {query}
|
||||
Rephrased question:
|
||||
Question de suivi : {query}
|
||||
Question reformulée :
|
||||
`;
|
||||
|
||||
export const webSearchResponsePrompt = `
|
||||
You are Perplexica, an AI model skilled in web search and crafting detailed, engaging, and well-structured answers. You excel at summarizing web pages and extracting relevant information to create professional, blog-style responses.
|
||||
Vous êtes X-me, une IA experte en conseil aux entreprises, spécialisée dans l'accompagnement des TPE, PME et artisans. Votre expertise couvre la création d'entreprise, le développement commercial, la gestion et le conseil stratégique. Vous excellez dans l'analyse des informations du marché et fournissez des conseils pratiques et applicables.
|
||||
|
||||
Your task is to provide answers that are:
|
||||
- **Informative and relevant**: Thoroughly address the user's query using the given context.
|
||||
- **Well-structured**: Include clear headings and subheadings, and use a professional tone to present information concisely and logically.
|
||||
- **Engaging and detailed**: Write responses that read like a high-quality blog post, including extra details and relevant insights.
|
||||
- **Cited and credible**: Use inline citations with [number] notation to refer to the context source(s) for each fact or detail included.
|
||||
- **Explanatory and Comprehensive**: Strive to explain the topic in depth, offering detailed analysis, insights, and clarifications wherever applicable.
|
||||
### Sources d'Information Prioritaires
|
||||
1. **LegalAI (Administratif & Juridique)**:
|
||||
- Légifrance, CNIL, URSSAF
|
||||
- Journal officiel, Cours et tribunaux
|
||||
|
||||
### Formatting Instructions
|
||||
Vos réponses doivent être :
|
||||
- **Orientées Business**: Prioriser les informations pertinentes pour les entrepreneurs, dirigeants de TPE/PME et artisans
|
||||
- **Pratiques et Actionnables**: Fournir des conseils concrets et des solutions réalisables
|
||||
- **Contextualisées**: Prendre en compte les défis et contraintes spécifiques des petites entreprises
|
||||
- **Adaptées aux Ressources**: Proposer des solutions tenant compte des moyens limités des petites structures
|
||||
- **Conformes à la Réglementation**: Inclure les aspects réglementaires et administratifs pertinents pour les entreprises françaises
|
||||
|
||||
### Domaines d'Expertise
|
||||
- Création et Développement d'Entreprise
|
||||
- Démarches Administratives et Juridiques
|
||||
- Gestion Financière et Recherche de Financements
|
||||
- Analyse de Marché et Stratégie
|
||||
- Gestion Opérationnelle et des Ressources
|
||||
- Transformation Numérique
|
||||
- Formation Professionnelle et Développement des Compétences
|
||||
|
||||
### Instructions de Formatage
|
||||
- **Structure**: Use a well-organized format with proper headings (e.g., "## Example heading 1" or "## Example heading 2"). Present information in paragraphs or concise bullet points where appropriate.
|
||||
- **Tone and Style**: Maintain a neutral, journalistic tone with engaging narrative flow. Write as though you're crafting an in-depth article for a professional audience.
|
||||
- **Markdown Usage**: Format your response with Markdown for clarity. Use headings, subheadings, bold text, and italicized words as needed to enhance readability.
|
||||
|
|
@ -79,7 +130,7 @@ export const webSearchResponsePrompt = `
|
|||
- **No main heading/title**: Start your response directly with the introduction unless asked to provide a specific title.
|
||||
- **Conclusion or Summary**: Include a concluding paragraph that synthesizes the provided information or suggests potential next steps, where appropriate.
|
||||
|
||||
### Citation Requirements
|
||||
### Citations Requises
|
||||
- Cite every single fact, statement, or sentence using [number] notation corresponding to the source from the provided \`context\`.
|
||||
- Integrate citations naturally at the end of sentences or clauses as appropriate. For example, "The Eiffel Tower is one of the most visited landmarks in the world[1]."
|
||||
- Ensure that **every sentence in your response includes at least one citation**, even when information is inferred or connected to general knowledge available in the provided context.
|
||||
|
|
@ -87,20 +138,17 @@ export const webSearchResponsePrompt = `
|
|||
- Always prioritize credibility and accuracy by linking all statements back to their respective context sources.
|
||||
- Avoid citing unsupported assumptions or personal interpretations; if no source supports a statement, clearly indicate the limitation.
|
||||
|
||||
### Special Instructions
|
||||
- If the query involves technical, historical, or complex topics, provide detailed background and explanatory sections to ensure clarity.
|
||||
- If the user provides vague input or if relevant information is missing, explain what additional details might help refine the search.
|
||||
- If no relevant information is found, say: "Hmm, sorry I could not find any relevant information on this topic. Would you like me to search again or ask something else?" Be transparent about limitations and suggest alternatives or ways to reframe the query.
|
||||
|
||||
### Example Output
|
||||
- Begin with a brief introduction summarizing the event or query topic.
|
||||
- Follow with detailed sections under clear headings, covering all aspects of the query if possible.
|
||||
- Provide explanations or historical context as needed to enhance understanding.
|
||||
- End with a conclusion or overall perspective if relevant.
|
||||
### Instructions Spéciales
|
||||
- Pour les sujets techniques ou administratifs, fournir des guides étape par étape adaptés aux non-experts
|
||||
- Pour les solutions ou outils, considérer les contraintes budgétaires des petites entreprises
|
||||
- Inclure les informations sur les aides et dispositifs de soutien disponibles
|
||||
- Pour la réglementation, préciser si elle s'applique spécifiquement aux artisans, TPE ou PME
|
||||
- Mentionner les organisations professionnelles ou ressources pertinentes
|
||||
|
||||
<context>
|
||||
{context}
|
||||
</context>
|
||||
|
||||
Current date & time in ISO format (UTC timezone) is: {date}.
|
||||
Date et heure actuelles au format ISO (fuseau UTC) : {date}.
|
||||
`;
|
||||
|
||||
|
|
|
|||
|
|
@ -1,48 +1,22 @@
|
|||
import express from 'express';
|
||||
import { searchSearxng } from '../lib/searxng';
|
||||
import logger from '../utils/logger';
|
||||
import { Router } from 'express';
|
||||
import { supabase } from '../db/supabase';
|
||||
|
||||
const router = express.Router();
|
||||
const router = Router();
|
||||
|
||||
router.get('/', async (req, res) => {
|
||||
// Route pour récupérer les experts
|
||||
router.get('/experts', async (req, res) => {
|
||||
try {
|
||||
const data = (
|
||||
await Promise.all([
|
||||
searchSearxng('site:businessinsider.com AI', {
|
||||
engines: ['bing news'],
|
||||
pageno: 1,
|
||||
}),
|
||||
searchSearxng('site:www.exchangewire.com AI', {
|
||||
engines: ['bing news'],
|
||||
pageno: 1,
|
||||
}),
|
||||
searchSearxng('site:yahoo.com AI', {
|
||||
engines: ['bing news'],
|
||||
pageno: 1,
|
||||
}),
|
||||
searchSearxng('site:businessinsider.com tech', {
|
||||
engines: ['bing news'],
|
||||
pageno: 1,
|
||||
}),
|
||||
searchSearxng('site:www.exchangewire.com tech', {
|
||||
engines: ['bing news'],
|
||||
pageno: 1,
|
||||
}),
|
||||
searchSearxng('site:yahoo.com tech', {
|
||||
engines: ['bing news'],
|
||||
pageno: 1,
|
||||
}),
|
||||
])
|
||||
)
|
||||
.map((result) => result.results)
|
||||
.flat()
|
||||
.sort(() => Math.random() - 0.5);
|
||||
|
||||
return res.json({ blogs: data });
|
||||
} catch (err: any) {
|
||||
logger.error(`Error in discover route: ${err.message}`);
|
||||
return res.status(500).json({ message: 'An error has occurred' });
|
||||
const { data, error } = await supabase
|
||||
.from('experts')
|
||||
.select('*');
|
||||
|
||||
if (error) throw error;
|
||||
|
||||
res.json(data);
|
||||
} catch (error) {
|
||||
console.error('Error fetching experts:', error);
|
||||
res.status(500).json({ error: error.message });
|
||||
}
|
||||
});
|
||||
|
||||
export default router;
|
||||
export default router;
|
||||
114
src/routes/experts.ts
Normal file
114
src/routes/experts.ts
Normal file
|
|
@ -0,0 +1,114 @@
|
|||
import express from 'express';
|
||||
import handleExpertSearch from '../chains/expertSearchAgent';
|
||||
import { BaseChatModel } from '@langchain/core/language_models/chat_models';
|
||||
import { getAvailableChatModelProviders } from '../lib/providers';
|
||||
import { HumanMessage, AIMessage } from '@langchain/core/messages';
|
||||
import logger from '../utils/logger';
|
||||
import { ChatOpenAI } from '@langchain/openai';
|
||||
import { ExpertSearchRequest } from '../types/types';
|
||||
import crypto from 'crypto';
|
||||
|
||||
const router = express.Router();
|
||||
|
||||
interface ChatModel {
|
||||
provider: string;
|
||||
model: string;
|
||||
customOpenAIBaseURL?: string;
|
||||
customOpenAIKey?: string;
|
||||
}
|
||||
|
||||
interface ExpertSearchBody {
|
||||
query: string;
|
||||
chatHistory: any[];
|
||||
chatModel?: ChatModel;
|
||||
}
|
||||
|
||||
router.post('/', async (req, res) => {
|
||||
try {
|
||||
const body: ExpertSearchBody = req.body;
|
||||
|
||||
// Conversion de l'historique du chat
|
||||
const chatHistory = body.chatHistory.map((msg: any) => {
|
||||
if (msg.role === 'user') {
|
||||
return new HumanMessage(msg.content);
|
||||
} else if (msg.role === 'assistant') {
|
||||
return new AIMessage(msg.content);
|
||||
}
|
||||
});
|
||||
|
||||
// Configuration du modèle LLM
|
||||
const chatModelProviders = await getAvailableChatModelProviders();
|
||||
|
||||
const chatModelProvider =
|
||||
body.chatModel?.provider || Object.keys(chatModelProviders)[0];
|
||||
const chatModel =
|
||||
body.chatModel?.model ||
|
||||
Object.keys(chatModelProviders[chatModelProvider])[0];
|
||||
|
||||
let llm: BaseChatModel | undefined;
|
||||
|
||||
if (body.chatModel?.provider === 'custom_openai') {
|
||||
if (
|
||||
!body.chatModel?.customOpenAIBaseURL ||
|
||||
!body.chatModel?.customOpenAIKey
|
||||
) {
|
||||
return res
|
||||
.status(400)
|
||||
.json({ message: 'Missing custom OpenAI base URL or key' });
|
||||
}
|
||||
|
||||
llm = new ChatOpenAI({
|
||||
modelName: body.chatModel.model,
|
||||
openAIApiKey: body.chatModel.customOpenAIKey,
|
||||
temperature: 0.7,
|
||||
configuration: {
|
||||
baseURL: body.chatModel.customOpenAIBaseURL,
|
||||
},
|
||||
}) as unknown as BaseChatModel;
|
||||
} else if (
|
||||
chatModelProviders[chatModelProvider] &&
|
||||
chatModelProviders[chatModelProvider][chatModel]
|
||||
) {
|
||||
llm = chatModelProviders[chatModelProvider][chatModel]
|
||||
.model as unknown as BaseChatModel | undefined;
|
||||
}
|
||||
|
||||
if (!llm) {
|
||||
return res.status(400).json({ message: 'Invalid model selected' });
|
||||
}
|
||||
|
||||
// Génération des IDs uniques
|
||||
const messageId = crypto.randomBytes(7).toString('hex');
|
||||
const chatId = crypto.randomBytes(7).toString('hex');
|
||||
|
||||
// Préparation de la requête
|
||||
const expertSearchRequest: ExpertSearchRequest = {
|
||||
query: body.query,
|
||||
chat_history: chatHistory,
|
||||
messageId,
|
||||
chatId
|
||||
};
|
||||
|
||||
// Recherche d'experts
|
||||
const expertResults = await handleExpertSearch(expertSearchRequest, llm);
|
||||
console.log("🔍 Experts trouvés:", expertResults.experts.length);
|
||||
|
||||
// Format unifié de la réponse
|
||||
res.status(200).json({
|
||||
type: 'expert_results',
|
||||
messageId,
|
||||
data: {
|
||||
experts: expertResults.experts,
|
||||
synthese: expertResults.synthese,
|
||||
query: body.query
|
||||
}
|
||||
});
|
||||
|
||||
} catch (err) {
|
||||
console.error("🔍 Erreur dans la recherche d'experts:", err);
|
||||
res.status(500).json({ message: 'Une erreur est survenue.' });
|
||||
logger.error(`Erreur dans la recherche d'experts: ${err.message}`);
|
||||
}
|
||||
});
|
||||
|
||||
export default router;
|
||||
|
|
@ -24,6 +24,7 @@ interface ImageSearchBody {
|
|||
router.post('/', async (req, res) => {
|
||||
try {
|
||||
let body: ImageSearchBody = req.body;
|
||||
console.log("📸 Requête de recherche d'images reçue:", body.query);
|
||||
|
||||
const chatHistory = body.chatHistory.map((msg: any) => {
|
||||
if (msg.role === 'user') {
|
||||
|
|
@ -73,6 +74,7 @@ router.post('/', async (req, res) => {
|
|||
return res.status(400).json({ message: 'Invalid model selected' });
|
||||
}
|
||||
|
||||
|
||||
const images = await handleImageSearch(
|
||||
{ query: body.query, chat_history: chatHistory },
|
||||
llm,
|
||||
|
|
|
|||
|
|
@ -6,8 +6,10 @@ import modelsRouter from './models';
|
|||
import suggestionsRouter from './suggestions';
|
||||
import chatsRouter from './chats';
|
||||
import searchRouter from './search';
|
||||
import discoverRouter from './discover';
|
||||
import newsRouter from './news';
|
||||
import uploadsRouter from './uploads';
|
||||
import legalRouter from './legal';
|
||||
import discoverRouter from './discover';
|
||||
|
||||
const router = express.Router();
|
||||
|
||||
|
|
@ -18,7 +20,9 @@ router.use('/models', modelsRouter);
|
|||
router.use('/suggestions', suggestionsRouter);
|
||||
router.use('/chats', chatsRouter);
|
||||
router.use('/search', searchRouter);
|
||||
router.use('/discover', discoverRouter);
|
||||
router.use('/news', newsRouter);
|
||||
router.use('/uploads', uploadsRouter);
|
||||
router.use('/legal', legalRouter);
|
||||
router.use('/discover', discoverRouter);
|
||||
|
||||
export default router;
|
||||
|
|
|
|||
88
src/routes/legal.ts
Normal file
88
src/routes/legal.ts
Normal file
|
|
@ -0,0 +1,88 @@
|
|||
import express from 'express';
|
||||
import handleLegalSearch from '../chains/legalSearchAgent'; // Nouveau nom
|
||||
import { BaseChatModel } from '@langchain/core/language_models/chat_models';
|
||||
import { getAvailableChatModelProviders } from '../lib/providers';
|
||||
import { HumanMessage, AIMessage } from '@langchain/core/messages';
|
||||
import logger from '../utils/logger';
|
||||
import { ChatOpenAI } from '@langchain/openai';
|
||||
|
||||
const router = express.Router();
|
||||
|
||||
interface ChatModel {
|
||||
provider: string;
|
||||
model: string;
|
||||
customOpenAIBaseURL?: string;
|
||||
customOpenAIKey?: string;
|
||||
}
|
||||
|
||||
interface LegalSearchBody { // Renommé
|
||||
query: string;
|
||||
chatHistory: any[];
|
||||
chatModel?: ChatModel;
|
||||
}
|
||||
|
||||
router.post('/', async (req, res) => {
|
||||
try {
|
||||
let body: LegalSearchBody = req.body;
|
||||
|
||||
const chatHistory = body.chatHistory.map((msg: any) => {
|
||||
if (msg.role === 'user') {
|
||||
return new HumanMessage(msg.content);
|
||||
} else if (msg.role === 'assistant') {
|
||||
return new AIMessage(msg.content);
|
||||
}
|
||||
});
|
||||
|
||||
const chatModelProviders = await getAvailableChatModelProviders();
|
||||
|
||||
const chatModelProvider =
|
||||
body.chatModel?.provider || Object.keys(chatModelProviders)[0];
|
||||
const chatModel =
|
||||
body.chatModel?.model ||
|
||||
Object.keys(chatModelProviders[chatModelProvider])[0];
|
||||
|
||||
let llm: BaseChatModel | undefined;
|
||||
|
||||
if (body.chatModel?.provider === 'custom_openai') {
|
||||
if (
|
||||
!body.chatModel?.customOpenAIBaseURL ||
|
||||
!body.chatModel?.customOpenAIKey
|
||||
) {
|
||||
return res
|
||||
.status(400)
|
||||
.json({ message: 'Missing custom OpenAI base URL or key' });
|
||||
}
|
||||
|
||||
llm = new ChatOpenAI({
|
||||
modelName: body.chatModel.model,
|
||||
openAIApiKey: body.chatModel.customOpenAIKey,
|
||||
temperature: 0.7,
|
||||
configuration: {
|
||||
baseURL: body.chatModel.customOpenAIBaseURL,
|
||||
},
|
||||
}) as unknown as BaseChatModel;
|
||||
} else if (
|
||||
chatModelProviders[chatModelProvider] &&
|
||||
chatModelProviders[chatModelProvider][chatModel]
|
||||
) {
|
||||
llm = chatModelProviders[chatModelProvider][chatModel]
|
||||
.model as unknown as BaseChatModel | undefined;
|
||||
}
|
||||
|
||||
if (!llm) {
|
||||
return res.status(400).json({ message: 'Invalid model selected' });
|
||||
}
|
||||
|
||||
const legalDocuments = await handleLegalSearch( // Renommé
|
||||
{ query: body.query, chat_history: chatHistory },
|
||||
llm,
|
||||
);
|
||||
|
||||
res.status(200).json({ documents: legalDocuments }); // Modifié la réponse
|
||||
} catch (err) {
|
||||
res.status(500).json({ message: 'An error has occurred.' });
|
||||
logger.error(`Error in legal search: ${err.message}`); // Mis à jour le message d'erreur
|
||||
}
|
||||
});
|
||||
|
||||
export default router;
|
||||
152
src/routes/legifrance.ts.bak
Normal file
152
src/routes/legifrance.ts.bak
Normal file
|
|
@ -0,0 +1,152 @@
|
|||
import express from 'express';
|
||||
import { RAGDocumentChain, handleLegiFranceSearch } from '../chains/rag_document_upload';
|
||||
import { BaseChatModel } from '@langchain/core/language_models/chat_models';
|
||||
import { getAvailableChatModelProviders } from '../lib/providers';
|
||||
import { HumanMessage, AIMessage } from '@langchain/core/messages';
|
||||
import logger from '../utils/logger';
|
||||
import { ChatOpenAI } from '@langchain/openai';
|
||||
import crypto from 'crypto';
|
||||
import { Document } from '@langchain/core/schema/document';
|
||||
import { OpenAIEmbeddings } from '@langchain/openai';
|
||||
|
||||
const router = express.Router();
|
||||
const ragChain = new RAGDocumentChain();
|
||||
|
||||
interface ChatModel {
|
||||
provider: string;
|
||||
model: string;
|
||||
customOpenAIBaseURL?: string;
|
||||
customOpenAIKey?: string;
|
||||
}
|
||||
|
||||
interface LegiFranceSearchBody {
|
||||
query: string;
|
||||
chatHistory: any[];
|
||||
chatModel?: ChatModel;
|
||||
urls?: string[];
|
||||
}
|
||||
|
||||
interface LegiFranceRequest {
|
||||
query: string;
|
||||
// autres propriétés si nécessaires
|
||||
}
|
||||
|
||||
router.post('/initialize', async (req, res) => {
|
||||
try {
|
||||
const { urls } = req.body;
|
||||
if (!Array.isArray(urls)) {
|
||||
return res.status(400).json({ error: "URLs must be an array" });
|
||||
}
|
||||
|
||||
// Créer des documents à partir des URLs
|
||||
const docs = urls.map(url => new Document({
|
||||
pageContent: "", // À remplir avec le contenu réel
|
||||
metadata: { source: url }
|
||||
}));
|
||||
|
||||
// Initialiser les embeddings (à ajuster selon votre configuration)
|
||||
const embeddings = new OpenAIEmbeddings({
|
||||
openAIApiKey: process.env.OPENAI_API_KEY,
|
||||
});
|
||||
|
||||
await ragChain.initializeVectorStore(docs, embeddings);
|
||||
res.json({ success: true });
|
||||
} catch (err) {
|
||||
logger.error("Error initializing LegiFrance search:", err);
|
||||
res.status(500).json({ error: "Failed to initialize LegiFrance search" });
|
||||
}
|
||||
});
|
||||
|
||||
router.post('/search', async (req, res) => {
|
||||
try {
|
||||
const body: LegiFranceSearchBody = req.body;
|
||||
console.log("📚 [LegiFrance] Début de la recherche avec query:", body.query);
|
||||
|
||||
// Configuration du modèle LLM
|
||||
const chatModelProviders = await getAvailableChatModelProviders();
|
||||
const chatModelProvider = body.chatModel?.provider || Object.keys(chatModelProviders)[0];
|
||||
const chatModel = body.chatModel?.model || Object.keys(chatModelProviders[chatModelProvider])[0];
|
||||
console.log("🤖 [LegiFrance] Modèle sélectionné:", { provider: chatModelProvider, model: chatModel });
|
||||
|
||||
let llm: BaseChatModel | undefined;
|
||||
|
||||
if (body.chatModel?.provider === 'custom_openai') {
|
||||
if (!body.chatModel?.customOpenAIBaseURL || !body.chatModel?.customOpenAIKey) {
|
||||
return res.status(400).json({ message: 'Missing custom OpenAI base URL or key' });
|
||||
}
|
||||
|
||||
llm = new ChatOpenAI({
|
||||
modelName: body.chatModel.model,
|
||||
openAIApiKey: body.chatModel.customOpenAIKey,
|
||||
temperature: 0.7,
|
||||
configuration: {
|
||||
baseURL: body.chatModel.customOpenAIBaseURL,
|
||||
},
|
||||
}) as unknown as BaseChatModel;
|
||||
} else if (chatModelProviders[chatModelProvider] &&
|
||||
chatModelProviders[chatModelProvider][chatModel]) {
|
||||
llm = chatModelProviders[chatModelProvider][chatModel].model as unknown as BaseChatModel;
|
||||
}
|
||||
|
||||
if (!llm) {
|
||||
return res.status(400).json({ message: 'Invalid model selected' });
|
||||
}
|
||||
|
||||
// Génération des IDs uniques
|
||||
const messageId = crypto.randomBytes(7).toString('hex');
|
||||
const chatId = crypto.randomBytes(7).toString('hex');
|
||||
|
||||
// Conversion de l'historique du chat
|
||||
const chatHistory = body.chatHistory.map((msg: any) => {
|
||||
if (msg.role === 'user') {
|
||||
return new HumanMessage(msg.content);
|
||||
} else if (msg.role === 'assistant') {
|
||||
return new AIMessage(msg.content);
|
||||
}
|
||||
});
|
||||
console.log("💬 [LegiFrance] Historique du chat converti:", chatHistory);
|
||||
|
||||
console.log("🔍 [LegiFrance] Début de handleLegiFranceSearch avec:", {
|
||||
query: body.query,
|
||||
llmType: llm?.constructor.name,
|
||||
chainStatus: ragChain ? "initialisé" : "non initialisé"
|
||||
});
|
||||
|
||||
// Ajouter la recherche avec handleLegiFranceSearch
|
||||
const result = await handleLegiFranceSearch(
|
||||
{
|
||||
query: body.query,
|
||||
chat_history: chatHistory
|
||||
},
|
||||
llm,
|
||||
ragChain
|
||||
);
|
||||
|
||||
console.log("✅ [LegiFrance] Résultat obtenu:", {
|
||||
textLength: result.text?.length,
|
||||
sourcesCount: result.sources?.length
|
||||
});
|
||||
|
||||
// Format unifié de la réponse
|
||||
res.status(200).json({
|
||||
type: 'legifrance_results',
|
||||
messageId,
|
||||
data: {
|
||||
text: result.text,
|
||||
sources: result.sources,
|
||||
query: body.query
|
||||
}
|
||||
});
|
||||
|
||||
} catch (err) {
|
||||
console.error("❌ [LegiFrance] Erreur détaillée:", {
|
||||
message: err.message,
|
||||
stack: err.stack,
|
||||
name: err.name
|
||||
});
|
||||
res.status(500).json({ message: 'Une erreur est survenue.' });
|
||||
logger.error(`Erreur dans la recherche LegiFrance: ${err.message}`);
|
||||
}
|
||||
});
|
||||
|
||||
export default router;
|
||||
48
src/routes/news.ts
Normal file
48
src/routes/news.ts
Normal file
|
|
@ -0,0 +1,48 @@
|
|||
import express from 'express';
|
||||
import { searchSearxng } from '../lib/searxng';
|
||||
import logger from '../utils/logger';
|
||||
|
||||
const router = express.Router();
|
||||
|
||||
router.get('/', async (req, res) => {
|
||||
try {
|
||||
const data = (
|
||||
await Promise.all([
|
||||
searchSearxng('site:businessinsider.com AI', {
|
||||
engines: ['bing news'],
|
||||
pageno: 1,
|
||||
}),
|
||||
searchSearxng('site:www.exchangewire.com AI', {
|
||||
engines: ['bing news'],
|
||||
pageno: 1,
|
||||
}),
|
||||
searchSearxng('site:yahoo.com AI', {
|
||||
engines: ['bing news'],
|
||||
pageno: 1,
|
||||
}),
|
||||
searchSearxng('site:businessinsider.com tech', {
|
||||
engines: ['bing news'],
|
||||
pageno: 1,
|
||||
}),
|
||||
searchSearxng('site:www.exchangewire.com tech', {
|
||||
engines: ['bing news'],
|
||||
pageno: 1,
|
||||
}),
|
||||
searchSearxng('site:yahoo.com tech', {
|
||||
engines: ['bing news'],
|
||||
pageno: 1,
|
||||
}),
|
||||
])
|
||||
)
|
||||
.map((result) => result.results)
|
||||
.flat()
|
||||
.sort(() => Math.random() - 0.5);
|
||||
|
||||
return res.json({ articles: data });
|
||||
} catch (err: any) {
|
||||
logger.error(`Error in news route: ${err.message}`);
|
||||
return res.status(500).json({ message: 'An error has occurred' });
|
||||
}
|
||||
});
|
||||
|
||||
export default router;
|
||||
|
|
@ -9,13 +9,18 @@ import { getAvailableEmbeddingModelProviders } from '../lib/providers';
|
|||
import { PDFLoader } from '@langchain/community/document_loaders/fs/pdf';
|
||||
import { DocxLoader } from '@langchain/community/document_loaders/fs/docx';
|
||||
import { RecursiveCharacterTextSplitter } from '@langchain/textsplitters';
|
||||
import { Document } from 'langchain/document';
|
||||
import { Document } from '@langchain/core/documents';
|
||||
import { RAGDocumentChain } from '../chains/rag_document_upload';
|
||||
import { Chroma } from "langchain/vectorstores/chroma";
|
||||
|
||||
const router = express.Router();
|
||||
|
||||
const splitter = new RecursiveCharacterTextSplitter({
|
||||
chunkSize: 500,
|
||||
chunkOverlap: 100,
|
||||
chunkSize: 1000,
|
||||
chunkOverlap: 200,
|
||||
separators: ["\n\n", "\n", ".", "!", "?", ";", ":", " ", ""],
|
||||
keepSeparator: true,
|
||||
lengthFunction: (text) => text.length
|
||||
});
|
||||
|
||||
const storage = multer.diskStorage({
|
||||
|
|
@ -34,6 +39,29 @@ const storage = multer.diskStorage({
|
|||
|
||||
const upload = multer({ storage });
|
||||
|
||||
const preprocessDocument = (doc: Document): Document => {
|
||||
const cleanContent = doc.pageContent
|
||||
.replace(/\s+/g, ' ')
|
||||
.replace(/\n+/g, ' ')
|
||||
.trim();
|
||||
|
||||
return new Document({
|
||||
pageContent: cleanContent,
|
||||
metadata: {
|
||||
...doc.metadata,
|
||||
chunk_type: 'text',
|
||||
word_count: cleanContent.split(/\s+/).length,
|
||||
processed_date: new Date().toISOString()
|
||||
}
|
||||
});
|
||||
};
|
||||
|
||||
const scoreDocument = (doc: Document): number => {
|
||||
const wordCount = doc.pageContent.split(/\s+/).length;
|
||||
const sentenceCount = doc.pageContent.split(/[.!?]+/).length;
|
||||
return wordCount > 10 && sentenceCount > 0 ? 1 : 0;
|
||||
};
|
||||
|
||||
router.post(
|
||||
'/',
|
||||
upload.fields([
|
||||
|
|
@ -43,109 +71,220 @@ router.post(
|
|||
]),
|
||||
async (req, res) => {
|
||||
try {
|
||||
console.log("📥 [Uploads] Début du traitement avec body:", {
|
||||
embedding_model: req.body.embedding_model,
|
||||
embedding_model_provider: req.body.embedding_model_provider
|
||||
});
|
||||
|
||||
const { embedding_model, embedding_model_provider } = req.body;
|
||||
|
||||
if (!embedding_model || !embedding_model_provider) {
|
||||
res
|
||||
.status(400)
|
||||
.json({ message: 'Missing embedding model or provider' });
|
||||
console.warn("⚠️ [Uploads] Modèle ou provider manquant");
|
||||
res.status(400).json({ message: 'Missing embedding model or provider' });
|
||||
return;
|
||||
}
|
||||
|
||||
const embeddingModels = await getAvailableEmbeddingModelProviders();
|
||||
const provider =
|
||||
embedding_model_provider ?? Object.keys(embeddingModels)[0];
|
||||
const embeddingModel: Embeddings =
|
||||
embedding_model ?? Object.keys(embeddingModels[provider])[0];
|
||||
console.log("🔍 [Uploads] Modèles disponibles:", Object.keys(embeddingModels));
|
||||
|
||||
const provider = embedding_model_provider ?? Object.keys(embeddingModels)[0];
|
||||
const embeddingModel: Embeddings = embedding_model ?? Object.keys(embeddingModels[provider])[0];
|
||||
|
||||
console.log("🤖 [Uploads] Modèle sélectionné:", { provider, model: embeddingModel });
|
||||
|
||||
let embeddingsModel: Embeddings | undefined;
|
||||
|
||||
if (
|
||||
embeddingModels[provider] &&
|
||||
embeddingModels[provider][embeddingModel]
|
||||
) {
|
||||
embeddingsModel = embeddingModels[provider][embeddingModel].model as
|
||||
| Embeddings
|
||||
| undefined;
|
||||
if (embeddingModels[provider] && embeddingModels[provider][embeddingModel]) {
|
||||
embeddingsModel = embeddingModels[provider][embeddingModel].model as Embeddings | undefined;
|
||||
}
|
||||
|
||||
if (!embeddingsModel) {
|
||||
console.error("❌ [Uploads] Modèle invalide");
|
||||
res.status(400).json({ message: 'Invalid LLM model selected' });
|
||||
return;
|
||||
}
|
||||
|
||||
const files = req.files['files'] as Express.Multer.File[];
|
||||
console.log("📁 [Uploads] Fichiers reçus:", files?.map(f => ({
|
||||
name: f.originalname,
|
||||
path: f.path,
|
||||
type: f.mimetype
|
||||
})));
|
||||
|
||||
if (!files || files.length === 0) {
|
||||
console.warn("⚠️ [Uploads] Aucun fichier reçu");
|
||||
res.status(400).json({ message: 'No files uploaded' });
|
||||
return;
|
||||
}
|
||||
|
||||
const processedDocs: Document[] = [];
|
||||
const ragChain = new RAGDocumentChain();
|
||||
let totalPages = 0;
|
||||
|
||||
await Promise.all(
|
||||
files.map(async (file) => {
|
||||
console.log(`📄 [Uploads] Traitement du fichier: ${file.originalname}`);
|
||||
let docs: Document[] = [];
|
||||
|
||||
if (file.mimetype === 'application/pdf') {
|
||||
const loader = new PDFLoader(file.path);
|
||||
console.log(`📚 [Uploads] Chargement du PDF: ${file.path}`);
|
||||
const loader = new PDFLoader(file.path, {
|
||||
splitPages: true
|
||||
});
|
||||
docs = await loader.load();
|
||||
} else if (
|
||||
file.mimetype ===
|
||||
'application/vnd.openxmlformats-officedocument.wordprocessingml.document'
|
||||
) {
|
||||
totalPages += docs.length;
|
||||
} else if (file.mimetype === 'application/vnd.openxmlformats-officedocument.wordprocessingml.document') {
|
||||
console.log(`📝 [Uploads] Chargement du DOCX: ${file.path}`);
|
||||
const loader = new DocxLoader(file.path);
|
||||
docs = await loader.load();
|
||||
totalPages += docs.length;
|
||||
} else if (file.mimetype === 'text/plain') {
|
||||
console.log(`📄 [Uploads] Chargement du TXT: ${file.path}`);
|
||||
const text = fs.readFileSync(file.path, 'utf-8');
|
||||
docs = [
|
||||
new Document({
|
||||
pageContent: text,
|
||||
metadata: {
|
||||
title: file.originalname,
|
||||
},
|
||||
}),
|
||||
];
|
||||
docs = [new Document({
|
||||
pageContent: text,
|
||||
metadata: {
|
||||
title: file.originalname,
|
||||
source: file.path,
|
||||
type: 'text'
|
||||
}
|
||||
})];
|
||||
totalPages += 1;
|
||||
}
|
||||
|
||||
const splitted = await splitter.splitDocuments(docs);
|
||||
const preprocessedDocs = docs.map(preprocessDocument);
|
||||
const scoredDocs = preprocessedDocs.filter(doc => scoreDocument(doc) > 0);
|
||||
|
||||
console.log(`✂️ [Uploads] Splitting du document en ${scoredDocs.length} parties valides`);
|
||||
const splitted = await splitter.splitDocuments(scoredDocs);
|
||||
|
||||
const json = JSON.stringify({
|
||||
title: file.originalname,
|
||||
contents: splitted.map((doc) => doc.pageContent),
|
||||
const enrichedDocs = splitted.map((doc, index) => {
|
||||
const pageNumber = Math.floor(index / (splitted.length / docs.length)) + 1;
|
||||
return new Document({
|
||||
pageContent: doc.pageContent,
|
||||
metadata: {
|
||||
...doc.metadata,
|
||||
source: file.path,
|
||||
title: file.originalname,
|
||||
page_number: pageNumber,
|
||||
chunk_index: index,
|
||||
total_chunks: splitted.length,
|
||||
file_type: file.mimetype,
|
||||
search_text: doc.pageContent.substring(0, 100).trim()
|
||||
}
|
||||
});
|
||||
});
|
||||
|
||||
processedDocs.push(...enrichedDocs);
|
||||
|
||||
const pathToSave = file.path.replace(/\.\w+$/, '-extracted.json');
|
||||
fs.writeFileSync(pathToSave, json);
|
||||
|
||||
const embeddings = await embeddingsModel.embedDocuments(
|
||||
splitted.map((doc) => doc.pageContent),
|
||||
);
|
||||
|
||||
const embeddingsJSON = JSON.stringify({
|
||||
const contentToSave = {
|
||||
title: file.originalname,
|
||||
embeddings: embeddings,
|
||||
});
|
||||
contents: enrichedDocs.map((doc) => ({
|
||||
content: doc.pageContent,
|
||||
metadata: doc.metadata
|
||||
})),
|
||||
pageCount: docs.length,
|
||||
processingDate: new Date().toISOString()
|
||||
};
|
||||
|
||||
const pathToSaveEmbeddings = file.path.replace(
|
||||
/\.\w+$/,
|
||||
'-embeddings.json',
|
||||
fs.writeFileSync(pathToSave, JSON.stringify(contentToSave, null, 2));
|
||||
|
||||
console.log(`🧮 [Uploads] Génération des embeddings pour ${enrichedDocs.length} chunks`);
|
||||
const embeddings = await embeddingsModel.embedDocuments(
|
||||
enrichedDocs.map((doc) => doc.pageContent)
|
||||
);
|
||||
fs.writeFileSync(pathToSaveEmbeddings, embeddingsJSON);
|
||||
}),
|
||||
|
||||
const pathToSaveEmbeddings = file.path.replace(/\.\w+$/, '-embeddings.json');
|
||||
const embeddingsToSave = {
|
||||
title: file.originalname,
|
||||
embeddings: embeddings.map((embedding, index) => ({
|
||||
vector: embedding,
|
||||
metadata: enrichedDocs[index].metadata
|
||||
}))
|
||||
};
|
||||
|
||||
fs.writeFileSync(pathToSaveEmbeddings, JSON.stringify(embeddingsToSave));
|
||||
})
|
||||
);
|
||||
|
||||
console.log("🔄 [Uploads] Initialisation du vectorStore avec", processedDocs.length, "documents");
|
||||
const initResult = await ragChain.initializeVectorStoreFromDocuments(
|
||||
processedDocs,
|
||||
embeddingsModel
|
||||
);
|
||||
|
||||
console.log("✅ [Uploads] VectorStore initialisé:", initResult);
|
||||
|
||||
res.status(200).json({
|
||||
files: files.map((file) => {
|
||||
return {
|
||||
fileName: file.originalname,
|
||||
fileExtension: file.filename.split('.').pop(),
|
||||
fileId: file.filename.replace(/\.\w+$/, ''),
|
||||
};
|
||||
}),
|
||||
files: files.map((file) => ({
|
||||
fileName: file.originalname,
|
||||
fileExtension: file.filename.split('.').pop(),
|
||||
fileId: file.filename.replace(/\.\w+$/, ''),
|
||||
stats: {
|
||||
chunks: processedDocs.filter(d => d.metadata.source === file.path).length,
|
||||
pages: totalPages
|
||||
}
|
||||
})),
|
||||
});
|
||||
} catch (err: any) {
|
||||
console.error("❌ [Uploads] Erreur:", {
|
||||
message: err.message,
|
||||
stack: err.stack,
|
||||
name: err.name
|
||||
});
|
||||
logger.error(`Error in uploading file results: ${err.message}`);
|
||||
res.status(500).json({ message: 'An error has occurred.' });
|
||||
}
|
||||
},
|
||||
);
|
||||
|
||||
router.get('/:fileId/view', async (req, res) => {
|
||||
try {
|
||||
const { fileId } = req.params;
|
||||
const search = req.query.search as string;
|
||||
const page = req.query.page as string;
|
||||
|
||||
// Chercher tous les fichiers qui commencent par fileId dans le dossier uploads
|
||||
const uploadsDir = path.join(process.cwd(), 'uploads');
|
||||
const files = fs.readdirSync(uploadsDir);
|
||||
const pdfFile = files.find(file => file.startsWith(fileId) && file.endsWith('.pdf'));
|
||||
|
||||
if (!pdfFile) {
|
||||
console.error(`❌ PDF non trouvé pour l'ID: ${fileId}`);
|
||||
return res.status(404).json({ error: 'Document PDF non trouvé' });
|
||||
}
|
||||
|
||||
const filePath = path.join(uploadsDir, pdfFile);
|
||||
console.log("📄 Envoi du fichier:", filePath);
|
||||
|
||||
// Définir les headers pour le PDF
|
||||
res.setHeader('Content-Type', 'application/pdf');
|
||||
res.setHeader('Content-Disposition', `inline; filename="${pdfFile}"`);
|
||||
|
||||
// Ajouter les paramètres de navigation et de surlignage
|
||||
if (search) {
|
||||
// Nettoyer le texte de recherche
|
||||
const cleanSearch = search
|
||||
.replace(/[\n\r]+/g, ' ')
|
||||
.trim();
|
||||
|
||||
if (cleanSearch) {
|
||||
res.setHeader('X-PDF-Search', cleanSearch);
|
||||
res.setHeader('X-PDF-Highlight', 'true');
|
||||
res.setHeader('X-PDF-Highlight-Color', '#FFD700'); // Or
|
||||
}
|
||||
}
|
||||
|
||||
if (page) {
|
||||
res.setHeader('X-PDF-Page', page);
|
||||
}
|
||||
|
||||
// Envoyer le fichier
|
||||
res.sendFile(filePath);
|
||||
} catch (error) {
|
||||
console.error('❌ Erreur lors de la visualisation du document:', error);
|
||||
res.status(500).json({ error: 'Erreur lors de la visualisation du document' });
|
||||
}
|
||||
});
|
||||
|
||||
export default router;
|
||||
|
|
|
|||
File diff suppressed because it is too large
Load diff
1
src/types/index.ts
Normal file
1
src/types/index.ts
Normal file
|
|
@ -0,0 +1 @@
|
|||
export * from './types';
|
||||
73
src/types/types.ts
Normal file
73
src/types/types.ts
Normal file
|
|
@ -0,0 +1,73 @@
|
|||
import { BaseMessage } from '@langchain/core/messages';
|
||||
|
||||
export interface Expert {
|
||||
id: number;
|
||||
id_expert: string;
|
||||
nom: string;
|
||||
prenom: string;
|
||||
adresse: string;
|
||||
pays: string;
|
||||
ville: string;
|
||||
expertises: string;
|
||||
specialite: string;
|
||||
biographie: string;
|
||||
tarif: number;
|
||||
services: any;
|
||||
created_at: string;
|
||||
image_url: string;
|
||||
}
|
||||
|
||||
export interface ExpertSearchRequest {
|
||||
query: string;
|
||||
chat_history: BaseMessage[];
|
||||
messageId: string;
|
||||
chatId: string;
|
||||
}
|
||||
|
||||
export interface ExpertSearchResponse {
|
||||
experts: Expert[];
|
||||
synthese: string;
|
||||
}
|
||||
|
||||
export interface EnrichedResponse {
|
||||
text: string;
|
||||
sources: Source[];
|
||||
suggestions: string[];
|
||||
images: ImageResult[];
|
||||
}
|
||||
|
||||
export interface Source {
|
||||
title: string;
|
||||
url: string;
|
||||
snippet: string;
|
||||
}
|
||||
|
||||
export interface ImageResult {
|
||||
url: string;
|
||||
title: string;
|
||||
source: string;
|
||||
}
|
||||
|
||||
export interface DocumentMetadata {
|
||||
title?: string;
|
||||
source?: string;
|
||||
type?: string;
|
||||
url?: string;
|
||||
pageNumber?: number;
|
||||
score?: number;
|
||||
expertData?: any;
|
||||
searchText?: string;
|
||||
illustrationImage?: string;
|
||||
imageTitle?: string;
|
||||
[key: string]: any;
|
||||
}
|
||||
|
||||
export interface NormalizedSource {
|
||||
pageContent: string;
|
||||
metadata: DocumentMetadata;
|
||||
}
|
||||
|
||||
export interface SearchResult {
|
||||
pageContent: string;
|
||||
metadata: DocumentMetadata;
|
||||
}
|
||||
|
|
@ -37,6 +37,7 @@ export const searchHandlers = {
|
|||
rerankThreshold: 0.3,
|
||||
searchWeb: true,
|
||||
summarizer: true,
|
||||
searchDatabase: true,
|
||||
}),
|
||||
academicSearch: new MetaSearchAgent({
|
||||
activeEngines: ['arxiv', 'google scholar', 'pubmed'],
|
||||
|
|
@ -46,6 +47,7 @@ export const searchHandlers = {
|
|||
rerankThreshold: 0,
|
||||
searchWeb: true,
|
||||
summarizer: false,
|
||||
searchDatabase: true,
|
||||
}),
|
||||
writingAssistant: new MetaSearchAgent({
|
||||
activeEngines: [],
|
||||
|
|
@ -55,6 +57,7 @@ export const searchHandlers = {
|
|||
rerankThreshold: 0,
|
||||
searchWeb: false,
|
||||
summarizer: false,
|
||||
searchDatabase: true,
|
||||
}),
|
||||
wolframAlphaSearch: new MetaSearchAgent({
|
||||
activeEngines: ['wolframalpha'],
|
||||
|
|
@ -64,6 +67,7 @@ export const searchHandlers = {
|
|||
rerankThreshold: 0,
|
||||
searchWeb: true,
|
||||
summarizer: false,
|
||||
searchDatabase: true,
|
||||
}),
|
||||
youtubeSearch: new MetaSearchAgent({
|
||||
activeEngines: ['youtube'],
|
||||
|
|
@ -73,6 +77,7 @@ export const searchHandlers = {
|
|||
rerankThreshold: 0.3,
|
||||
searchWeb: true,
|
||||
summarizer: false,
|
||||
searchDatabase: true,
|
||||
}),
|
||||
redditSearch: new MetaSearchAgent({
|
||||
activeEngines: ['reddit'],
|
||||
|
|
@ -82,6 +87,7 @@ export const searchHandlers = {
|
|||
rerankThreshold: 0.3,
|
||||
searchWeb: true,
|
||||
summarizer: false,
|
||||
searchDatabase: true,
|
||||
}),
|
||||
};
|
||||
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue