feat(app): Introduce quality mode. Improve functionality of balanced mode using readability to get page content and pull relevant excerpts

feat(UI): Show progress during inferrence
feat(security): Don't show API keys in the UI any more
feat(models): Support Claude 4 Anthropic models
This commit is contained in:
Willie Zutz 2025-05-23 18:03:35 -06:00
parent 288120dc1d
commit c47a630372
17 changed files with 2142 additions and 818 deletions

View file

@ -3,6 +3,9 @@ import { htmlToText } from 'html-to-text';
import { RecursiveCharacterTextSplitter } from 'langchain/text_splitter';
import { Document } from '@langchain/core/documents';
import pdfParse from 'pdf-parse';
import { JSDOM } from 'jsdom';
import { Readability } from '@mozilla/readability';
import fetch from 'node-fetch';
export const getDocumentsFromLinks = async ({ links }: { links: string[] }) => {
const splitter = new RecursiveCharacterTextSplitter();
@ -97,3 +100,55 @@ export const getDocumentsFromLinks = async ({ links }: { links: string[] }) => {
return docs;
};
export const getWebContent = async (
url: string,
getHtml: boolean = false,
): Promise<Document | null> => {
try {
const response = await fetch(url, { timeout: 5000 });
const html = await response.text();
// Create a DOM from the fetched HTML
const dom = new JSDOM(html, { url });
// Get title before we modify the DOM
const originalTitle = dom.window.document.title;
// Use Readability to parse the article content
const reader = new Readability(dom.window.document, { charThreshold: 25 });
const article = reader.parse();
if (!article) {
console.warn(`Failed to parse article content for URL: ${url}`);
return null;
}
// Normalize the text content by removing extra spaces and newlines. Iterate through the lines one by one and throw out the ones that are empty or contain only whitespace.
const normalizedText =
article?.textContent
?.split('\n')
.map((line) => line.trim())
.filter((line) => line.length > 0)
.join('\n') || '';
// Create a Document with the parsed content
return new Document({
pageContent: normalizedText || '',
metadata: {
html: getHtml ? article.content : undefined,
title: article.title || originalTitle,
url: url,
excerpt: article.excerpt || undefined,
byline: article.byline || undefined,
siteName: article.siteName || undefined,
readingTime: article.length
? Math.ceil(article.length / 1000)
: undefined,
},
});
} catch (error) {
console.error(`Error fetching/parsing URL ${url}:`); //, error);
return null;
}
};