This commit is contained in:
Eli Grinfeld, MBA 2025-02-07 15:38:49 +08:00 committed by GitHub
commit c616072732
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
103 changed files with 31225 additions and 1370 deletions

View file

@ -1,38 +1,16 @@
import { startWebSocketServer } from './websocket';
import express from 'express';
import cors from 'cors';
import http from 'http';
import routes from './routes';
import { getPort } from './config';
import logger from './utils/logger';
const port = getPort();
import searchRoutes from './routes/search';
import businessRoutes from './routes/business';
const app = express();
const server = http.createServer(app);
const corsOptions = {
origin: '*',
};
app.use(cors(corsOptions));
// Middleware
app.use(cors());
app.use(express.json());
app.use('/api', routes);
app.get('/api', (_, res) => {
res.status(200).json({ status: 'ok' });
});
// Routes
app.use('/api/search', searchRoutes);
app.use('/api/business', businessRoutes);
server.listen(port, () => {
logger.info(`Server is running on port ${port}`);
});
startWebSocketServer(server);
process.on('uncaughtException', (err, origin) => {
logger.error(`Uncaught Exception at ${origin}: ${err}`);
});
process.on('unhandledRejection', (reason, promise) => {
logger.error(`Unhandled Rejection at: ${promise}, reason: ${reason}`);
});
export default app;

View file

@ -77,3 +77,16 @@ export const updateConfig = (config: RecursivePartial<Config>) => {
toml.stringify(config),
);
};
export const config = {
ollama: {
url: process.env.OLLAMA_URL || 'http://localhost:11434',
model: process.env.OLLAMA_MODEL || 'mistral',
options: {
temperature: 0.1,
top_p: 0.9,
timeout: 30000 // 30 seconds timeout
}
},
// ... other config
};

40
src/config/env.ts Normal file
View file

@ -0,0 +1,40 @@
import dotenv from 'dotenv';
// Load environment variables
dotenv.config();
// Environment configuration
const env = {
// Supabase Configuration
SUPABASE_URL: process.env.SUPABASE_URL || '',
SUPABASE_KEY: process.env.SUPABASE_KEY || '',
// Server Configuration
PORT: parseInt(process.env.PORT || '3001', 10),
NODE_ENV: process.env.NODE_ENV || 'development',
// Search Configuration
MAX_RESULTS_PER_QUERY: parseInt(process.env.MAX_RESULTS_PER_QUERY || '50', 10),
CACHE_DURATION_HOURS: parseInt(process.env.CACHE_DURATION_HOURS || '24', 10),
CACHE_DURATION_DAYS: parseInt(process.env.CACHE_DURATION_DAYS || '7', 10),
// SearxNG Configuration
SEARXNG_URL: process.env.SEARXNG_URL || 'http://localhost:4000',
// Ollama Configuration
OLLAMA_URL: process.env.OLLAMA_URL || 'http://localhost:11434',
OLLAMA_MODEL: process.env.OLLAMA_MODEL || 'deepseek-coder:6.7b',
// Hugging Face Configuration
HUGGING_FACE_API_KEY: process.env.HUGGING_FACE_API_KEY || ''
};
// Validate required environment variables
const requiredEnvVars = ['SUPABASE_URL', 'SUPABASE_KEY', 'SEARXNG_URL'];
for (const envVar of requiredEnvVars) {
if (!env[envVar as keyof typeof env]) {
throw new Error(`Missing required environment variable: ${envVar}`);
}
}
export { env };

77
src/config/index.ts Normal file
View file

@ -0,0 +1,77 @@
import dotenv from 'dotenv';
import path from 'path';
// Load .env file
dotenv.config({ path: path.resolve(__dirname, '../../.env') });
export interface Config {
supabase: {
url: string;
anonKey: string;
};
server: {
port: number;
nodeEnv: string;
};
search: {
maxResultsPerQuery: number;
cacheDurationHours: number;
searxngUrl?: string;
};
rateLimit: {
windowMs: number;
maxRequests: number;
};
security: {
corsOrigin: string;
jwtSecret: string;
};
proxy?: {
http?: string;
https?: string;
};
logging: {
level: string;
};
}
const config: Config = {
supabase: {
url: process.env.SUPABASE_URL || '',
anonKey: process.env.SUPABASE_ANON_KEY || '',
},
server: {
port: parseInt(process.env.PORT || '3000', 10),
nodeEnv: process.env.NODE_ENV || 'development',
},
search: {
maxResultsPerQuery: parseInt(process.env.MAX_RESULTS_PER_QUERY || '20', 10),
cacheDurationHours: parseInt(process.env.CACHE_DURATION_HOURS || '24', 10),
searxngUrl: process.env.SEARXNG_URL
},
rateLimit: {
windowMs: parseInt(process.env.RATE_LIMIT_WINDOW_MS || '900000', 10),
maxRequests: parseInt(process.env.RATE_LIMIT_MAX_REQUESTS || '100', 10),
},
security: {
corsOrigin: process.env.CORS_ORIGIN || 'http://localhost:3000',
jwtSecret: process.env.JWT_SECRET || 'your_jwt_secret_key',
},
logging: {
level: process.env.LOG_LEVEL || 'info',
},
};
// Validate required configuration
const validateConfig = () => {
if (!config.supabase.url) {
throw new Error('SUPABASE_URL is required');
}
if (!config.supabase.anonKey) {
throw new Error('SUPABASE_ANON_KEY is required');
}
};
validateConfig();
export { config };

24
src/index.ts Normal file
View file

@ -0,0 +1,24 @@
import './config/env'; // Load environment variables first
import { startServer } from './server';
import { isPortAvailable } from './utils/portCheck';
import { testConnection } from './lib/supabase';
const PORT = process.env.PORT || 3001;
const init = async () => {
if (!await isPortAvailable(PORT)) {
console.error(`Port ${PORT} is in use. Please try a different port or free up the current one.`);
process.exit(1);
}
// Test Supabase connection
const isConnected = await testConnection();
if (!isConnected) {
console.error('Failed to connect to Supabase. Please check your configuration.');
process.exit(1);
}
startServer();
};
init().catch(console.error);

116
src/lib/categories.ts Normal file
View file

@ -0,0 +1,116 @@
export interface Category {
id: string;
name: string;
icon: string;
subcategories: SubCategory[];
}
export interface SubCategory {
id: string;
name: string;
}
export const categories: Category[] = [
{
id: 'real-estate-pros',
name: 'Real Estate Professionals',
icon: '🏢',
subcategories: [
{ id: 'wholesalers', name: 'Real Estate Wholesalers' },
{ id: 'agents', name: 'Real Estate Agents' },
{ id: 'attorneys', name: 'Real Estate Attorneys' },
{ id: 'scouts', name: 'Property Scouts' },
{ id: 'brokers', name: 'Real Estate Brokers' },
{ id: 'consultants', name: 'Real Estate Consultants' }
]
},
{
id: 'legal-title',
name: 'Legal & Title Services',
icon: '⚖️',
subcategories: [
{ id: 'title-companies', name: 'Title Companies' },
{ id: 'closing-attorneys', name: 'Closing Attorneys' },
{ id: 'zoning-consultants', name: 'Zoning Consultants' },
{ id: 'probate-specialists', name: 'Probate Specialists' },
{ id: 'eviction-specialists', name: 'Eviction Specialists' }
]
},
{
id: 'financial',
name: 'Financial Services',
icon: '💰',
subcategories: [
{ id: 'hard-money', name: 'Hard Money Lenders' },
{ id: 'private-equity', name: 'Private Equity Investors' },
{ id: 'mortgage-brokers', name: 'Mortgage Brokers' },
{ id: 'tax-advisors', name: 'Tax Advisors' },
{ id: 'appraisers', name: 'Appraisers' }
]
},
{
id: 'contractors',
name: 'Specialist Contractors',
icon: '🔨',
subcategories: [
{ id: 'general', name: 'General Contractors' },
{ id: 'plumbers', name: 'Plumbers' },
{ id: 'electricians', name: 'Electricians' },
{ id: 'hvac', name: 'HVAC Technicians' },
{ id: 'roofers', name: 'Roofers' },
{ id: 'foundation', name: 'Foundation Specialists' },
{ id: 'asbestos', name: 'Asbestos Removal' },
{ id: 'mold', name: 'Mold Remediation' }
]
},
{
id: 'property-services',
name: 'Property Services',
icon: '🏠',
subcategories: [
{ id: 'surveyors', name: 'Surveyors' },
{ id: 'inspectors', name: 'Inspectors' },
{ id: 'property-managers', name: 'Property Managers' },
{ id: 'environmental', name: 'Environmental Consultants' },
{ id: 'junk-removal', name: 'Junk Removal Services' },
{ id: 'cleaning', name: 'Property Cleaning' }
]
},
{
id: 'marketing',
name: 'Marketing & Lead Gen',
icon: '📢',
subcategories: [
{ id: 'direct-mail', name: 'Direct Mail Services' },
{ id: 'social-media', name: 'Social Media Marketing' },
{ id: 'seo', name: 'SEO Specialists' },
{ id: 'ppc', name: 'PPC Advertising' },
{ id: 'lead-gen', name: 'Lead Generation' },
{ id: 'skip-tracing', name: 'Skip Tracing Services' }
]
},
{
id: 'data-tech',
name: 'Data & Technology',
icon: '💻',
subcategories: [
{ id: 'data-providers', name: 'Property Data Providers' },
{ id: 'crm', name: 'CRM Systems' },
{ id: 'valuation', name: 'Valuation Tools' },
{ id: 'virtual-tours', name: 'Virtual Tour Services' },
{ id: 'automation', name: 'Automation Tools' }
]
},
{
id: 'specialty',
name: 'Specialty Services',
icon: '🎯',
subcategories: [
{ id: 'auction', name: 'Auction Companies' },
{ id: 'relocation', name: 'Relocation Services' },
{ id: 'staging', name: 'Home Staging' },
{ id: 'photography', name: 'Real Estate Photography' },
{ id: 'virtual-assistant', name: 'Virtual Assistants' }
]
}
];

51
src/lib/db/optOutDb.ts Normal file
View file

@ -0,0 +1,51 @@
import { Database } from 'better-sqlite3';
import path from 'path';
interface OptOutEntry {
domain: string;
email: string;
reason?: string;
timestamp: Date;
}
export class OptOutDatabase {
private db: Database;
constructor() {
this.db = new Database(path.join(__dirname, '../../../data/optout.db'));
this.initializeDatabase();
}
private initializeDatabase() {
this.db.exec(`
CREATE TABLE IF NOT EXISTS opt_outs (
domain TEXT PRIMARY KEY,
email TEXT NOT NULL,
reason TEXT,
timestamp DATETIME DEFAULT CURRENT_TIMESTAMP
);
CREATE INDEX IF NOT EXISTS idx_domain ON opt_outs(domain);
`);
}
async addOptOut(entry: OptOutEntry): Promise<void> {
const stmt = this.db.prepare(
'INSERT OR REPLACE INTO opt_outs (domain, email, reason, timestamp) VALUES (?, ?, ?, ?)'
);
stmt.run(entry.domain, entry.email, entry.reason, entry.timestamp.toISOString());
}
isOptedOut(domain: string): boolean {
const stmt = this.db.prepare('SELECT 1 FROM opt_outs WHERE domain = ?');
return stmt.get(domain) !== undefined;
}
removeOptOut(domain: string): void {
const stmt = this.db.prepare('DELETE FROM opt_outs WHERE domain = ?');
stmt.run(domain);
}
getOptOutList(): OptOutEntry[] {
return this.db.prepare('SELECT * FROM opt_outs').all();
}
}

74
src/lib/db/supabase.ts Normal file
View file

@ -0,0 +1,74 @@
import { createClient } from '@supabase/supabase-js';
import { BusinessData } from '../searxng';
import { env } from '../../config/env';
// Create the Supabase client with validated environment variables
export const supabase = createClient(
env.supabase.url,
env.supabase.anonKey,
{
auth: {
persistSession: false // Since this is a server environment
}
}
);
// Define the cache record type
export interface CacheRecord {
id: string;
query: string;
results: BusinessData[];
location: string;
category: string;
created_at: string;
updated_at: string;
expires_at: string;
}
// Export database helper functions
export async function getCacheEntry(
category: string,
location: string
): Promise<CacheRecord | null> {
const { data, error } = await supabase
.from('search_cache')
.select('*')
.eq('category', category.toLowerCase())
.eq('location', location.toLowerCase())
.gt('expires_at', new Date().toISOString())
.order('created_at', { ascending: false })
.limit(1)
.single();
if (error) {
console.error('Cache lookup failed:', error);
return null;
}
return data;
}
export async function saveCacheEntry(
category: string,
location: string,
results: BusinessData[],
expiresInDays: number = 7
): Promise<void> {
const expiresAt = new Date();
expiresAt.setDate(expiresAt.getDate() + expiresInDays);
const { error } = await supabase
.from('search_cache')
.insert({
query: `${category} in ${location}`,
category: category.toLowerCase(),
location: location.toLowerCase(),
results,
expires_at: expiresAt.toISOString()
});
if (error) {
console.error('Failed to save cache entry:', error);
throw error;
}
}

195
src/lib/emailScraper.ts Normal file
View file

@ -0,0 +1,195 @@
import axios from 'axios';
import * as cheerio from 'cheerio';
import { Cache } from './utils/cache';
import { RateLimiter } from './utils/rateLimiter';
import robotsParser from 'robots-parser';
interface ScrapingResult {
emails: string[];
phones: string[];
addresses: string[];
socialLinks: string[];
source: string;
timestamp: Date;
attribution: string;
}
export class EmailScraper {
private cache: Cache<ScrapingResult>;
private rateLimiter: RateLimiter;
private robotsCache = new Map<string, any>();
constructor(private options = {
timeout: 5000,
cacheTTL: 60,
rateLimit: { windowMs: 60000, maxRequests: 10 }, // More conservative rate limiting
userAgent: 'BizSearch/1.0 (+https://your-domain.com/about) - Business Directory Service'
}) {
this.cache = new Cache<ScrapingResult>(options.cacheTTL);
this.rateLimiter = new RateLimiter(options.rateLimit.windowMs, options.rateLimit.maxRequests);
}
private async checkRobotsPermission(url: string): Promise<boolean> {
try {
const { protocol, host } = new URL(url);
const robotsUrl = `${protocol}//${host}/robots.txt`;
let parser = this.robotsCache.get(host);
if (!parser) {
const response = await axios.get(robotsUrl);
parser = robotsParser(robotsUrl, response.data);
this.robotsCache.set(host, parser);
}
return parser.isAllowed(url, this.options.userAgent);
} catch (error) {
console.warn(`Could not check robots.txt for ${url}:`, error);
return true; // Assume allowed if robots.txt is unavailable
}
}
async scrapeEmails(url: string): Promise<ScrapingResult> {
// Check cache first
const cached = this.cache.get(url);
if (cached) return cached;
// Check robots.txt
const allowed = await this.checkRobotsPermission(url);
if (!allowed) {
console.log(`Respecting robots.txt disallow for ${url}`);
return {
emails: [],
phones: [],
addresses: [],
socialLinks: [],
source: url,
timestamp: new Date(),
attribution: 'Restricted by robots.txt'
};
}
// Wait for rate limiting slot
await this.rateLimiter.waitForSlot();
try {
const response = await axios.get(url, {
timeout: this.options.timeout,
headers: {
'User-Agent': this.options.userAgent,
'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8',
}
});
// Check for noindex meta tag
const $ = cheerio.load(response.data);
if ($('meta[name="robots"][content*="noindex"]').length > 0) {
return {
emails: [],
phones: [],
addresses: [],
socialLinks: [],
source: url,
timestamp: new Date(),
attribution: 'Respecting noindex directive'
};
}
// Only extract contact information from public contact pages or structured data
const isContactPage = /contact|about/i.test(url) ||
$('h1, h2').text().toLowerCase().includes('contact');
const result = {
emails: new Set<string>(),
phones: new Set<string>(),
addresses: new Set<string>(),
socialLinks: new Set<string>(),
source: url,
timestamp: new Date(),
attribution: `Data from public business listing at ${new URL(url).hostname}`
};
// Extract from structured data (Schema.org)
$('script[type="application/ld+json"]').each((_, element) => {
try {
const data = JSON.parse($(element).html() || '{}');
if (data['@type'] === 'LocalBusiness' || data['@type'] === 'Organization') {
if (data.email) result.emails.add(data.email.toLowerCase());
if (data.telephone) result.phones.add(this.formatPhoneNumber(data.telephone));
if (data.address) {
const fullAddress = this.formatAddress(data.address);
if (fullAddress) result.addresses.add(fullAddress);
}
}
} catch (e) {
console.error('Error parsing JSON-LD:', e);
}
});
// Only scrape additional info if it's a contact page
if (isContactPage) {
// Extract clearly marked contact information
$('[itemprop="email"], .contact-email, .email').each((_, element) => {
const email = $(element).text().trim();
if (this.isValidEmail(email)) {
result.emails.add(email.toLowerCase());
}
});
$('[itemprop="telephone"], .phone, .contact-phone').each((_, element) => {
const phone = $(element).text().trim();
const formatted = this.formatPhoneNumber(phone);
if (formatted) result.phones.add(formatted);
});
}
const finalResult = {
...result,
emails: Array.from(result.emails),
phones: Array.from(result.phones),
addresses: Array.from(result.addresses),
socialLinks: Array.from(result.socialLinks)
};
this.cache.set(url, finalResult);
return finalResult;
} catch (error) {
console.error(`Failed to scrape ${url}:`, error);
return {
emails: [],
phones: [],
addresses: [],
socialLinks: [],
source: url,
timestamp: new Date(),
attribution: 'Error accessing page'
};
}
}
private isValidEmail(email: string): boolean {
return /^[a-zA-Z0-9._%+-]+@[a-zA-Z0-9.-]+\.[a-zA-Z]{2,}$/.test(email);
}
private formatPhoneNumber(phone: string): string {
const digits = phone.replace(/\D/g, '');
if (digits.length === 10) {
return `(${digits.slice(0,3)}) ${digits.slice(3,6)}-${digits.slice(6)}`;
}
return phone;
}
private formatAddress(address: any): string | null {
if (typeof address === 'string') return address;
if (typeof address === 'object') {
const parts = [
address.streetAddress,
address.addressLocality,
address.addressRegion,
address.postalCode
].filter(Boolean);
if (parts.length > 0) return parts.join(', ');
}
return null;
}
}

View file

@ -0,0 +1,19 @@
import { Business, SearchParams } from '../../../types/business';
import { WebScraperProvider } from './webScraper';
export class BusinessProvider {
private scraper: WebScraperProvider;
constructor() {
this.scraper = new WebScraperProvider();
}
async search(params: SearchParams): Promise<Business[]> {
return this.scraper.search(params);
}
async getDetails(businessId: string): Promise<Business | null> {
// Implement detailed business lookup using stored data or additional scraping
return null;
}
}

View file

@ -0,0 +1,111 @@
import { Business, SearchParams } from '../../../types/business';
import { searchWeb } from '../search'; // This is Perplexica's existing search function
import { parseHTML } from '../utils/parser';
export class WebScraperProvider {
async search(params: SearchParams): Promise<Business[]> {
const searchQueries = this.generateQueries(params);
const businesses: Business[] = [];
for (const query of searchQueries) {
// Use Perplexica's existing search functionality
const results = await searchWeb(query, {
maxResults: 20,
type: 'general' // or 'news' depending on what we want
});
for (const result of results) {
try {
const html = await fetch(result.url).then(res => res.text());
const businessData = await this.extractBusinessData(html, result.url);
if (businessData) {
businesses.push(businessData);
}
} catch (error) {
console.error(`Failed to extract data from ${result.url}:`, error);
}
}
}
return this.deduplicateBusinesses(businesses);
}
private generateQueries(params: SearchParams): string[] {
const { location, category } = params;
return [
`${category} in ${location}`,
`${category} business ${location}`,
`best ${category} near ${location}`,
`${category} services ${location} reviews`
];
}
private async extractBusinessData(html: string, sourceUrl: string): Promise<Business | null> {
const $ = parseHTML(html);
// Different extraction logic based on source
if (sourceUrl.includes('yelp.com')) {
return this.extractYelpData($);
} else if (sourceUrl.includes('yellowpages.com')) {
return this.extractYellowPagesData($);
}
// ... other source-specific extractors
return null;
}
private extractYelpData($: any): Business | null {
try {
return {
id: crypto.randomUUID(),
name: $('.business-name').text().trim(),
phone: $('.phone-number').text().trim(),
address: $('.address').text().trim(),
city: $('.city').text().trim(),
state: $('.state').text().trim(),
zip: $('.zip').text().trim(),
category: $('.category-str-list').text().split(',').map(s => s.trim()),
rating: parseFloat($('.rating').text()),
reviewCount: parseInt($('.review-count').text()),
services: $('.services-list').text().split(',').map(s => s.trim()),
hours: this.extractHours($),
website: $('.website-link').attr('href'),
verified: false,
lastUpdated: new Date()
};
} catch (error) {
return null;
}
}
private deduplicateBusinesses(businesses: Business[]): Business[] {
// Group by phone number and address to identify duplicates
const uniqueBusinesses = new Map<string, Business>();
for (const business of businesses) {
const key = `${business.phone}-${business.address}`.toLowerCase();
if (!uniqueBusinesses.has(key)) {
uniqueBusinesses.set(key, business);
} else {
// Merge data if we have additional information
const existing = uniqueBusinesses.get(key)!;
uniqueBusinesses.set(key, this.mergeBusinessData(existing, business));
}
}
return Array.from(uniqueBusinesses.values());
}
private mergeBusinessData(existing: Business, newData: Business): Business {
return {
...existing,
services: [...new Set([...existing.services, ...newData.services])],
rating: (existing.rating + newData.rating) / 2,
reviewCount: existing.reviewCount + newData.reviewCount,
// Keep the most complete data for other fields
website: existing.website || newData.website,
email: existing.email || newData.email,
hours: existing.hours || newData.hours
};
}
}

54
src/lib/search.ts Normal file
View file

@ -0,0 +1,54 @@
import axios from 'axios';
import { config } from '../config';
interface SearchOptions {
maxResults?: number;
type?: 'general' | 'news';
engines?: string[];
}
interface SearchResult {
url: string;
title: string;
content: string;
score?: number;
}
export async function searchWeb(
query: string,
options: SearchOptions = {}
): Promise<SearchResult[]> {
const {
maxResults = 20,
type = 'general',
engines = ['google', 'bing', 'duckduckgo']
} = options;
try {
const response = await axios.get(`${config.search.searxngUrl || process.env.SEARXNG_URL}/search`, {
params: {
q: query,
format: 'json',
categories: type,
engines: engines.join(','),
limit: maxResults
}
});
if (!response.data || !response.data.results) {
console.error('Invalid response from SearxNG:', response.data);
return [];
}
return response.data.results.map((result: any) => ({
url: result.url,
title: result.title,
content: result.content || result.snippet || '',
score: result.score
}));
} catch (error) {
console.error('Search failed:', error);
throw error;
}
}

View file

@ -1,47 +1,313 @@
import axios from 'axios';
import { getSearxngApiEndpoint } from '../config';
import * as cheerio from 'cheerio';
import { createWorker } from 'tesseract.js';
import { env } from '../config/env';
import { OllamaService } from './services/ollamaService';
import { BusinessData } from './types';
import { db } from './services/databaseService';
import { generateBusinessId } from './utils';
import { extractContactFromHtml, extractCleanAddress } from './utils/scraper';
import { GeocodingService } from './services/geocodingService';
import { cleanAddress, formatPhoneNumber, cleanEmail, cleanDescription } from './utils/dataCleanup';
import { CleanupService } from './services/cleanupService';
interface SearxngSearchOptions {
categories?: string[];
engines?: string[];
language?: string;
pageno?: number;
// Define interfaces used only in this file
interface SearchResult {
url: string;
title: string;
content: string;
phone?: string;
email?: string;
address?: string;
website?: string;
rating?: number;
coordinates?: {
lat: number;
lng: number;
};
}
interface SearxngSearchResult {
title: string;
url: string;
img_src?: string;
thumbnail_src?: string;
thumbnail?: string;
content?: string;
author?: string;
iframe_src?: string;
interface ContactInfo {
phone?: string;
email?: string;
address?: string;
description?: string;
openingHours?: string[];
}
export const searchSearxng = async (
query: string,
opts?: SearxngSearchOptions,
) => {
const searxngURL = getSearxngApiEndpoint();
// Export the main search function
export async function searchBusinesses(
query: string,
options: { onProgress?: (status: string, progress: number) => void } = {}
): Promise<BusinessData[]> {
try {
console.log('Processing search query:', query);
const [searchTerm, location] = query.split(' in ').map(s => s.trim());
if (!searchTerm || !location) {
throw new Error('Invalid search query format. Use: "search term in location"');
}
const url = new URL(`${searxngURL}/search?format=json`);
url.searchParams.append('q', query);
options.onProgress?.('Checking cache', 0);
if (opts) {
Object.keys(opts).forEach((key) => {
if (Array.isArray(opts[key])) {
url.searchParams.append(key, opts[key].join(','));
return;
}
url.searchParams.append(key, opts[key]);
});
}
// Check cache first
const cacheKey = `search:${searchTerm}:${location}`;
let results = await db.getFromCache(cacheKey);
if (!results) {
// Check database for existing businesses
console.log('Searching database for:', searchTerm, 'in', location);
const existingBusinesses = await db.searchBusinesses(searchTerm, location);
// Start search immediately
console.log('Starting web search');
const searchPromise = performSearch(searchTerm, location, options);
if (existingBusinesses.length > 0) {
console.log(`Found ${existingBusinesses.length} existing businesses`);
options.onProgress?.('Retrieved from database', 50);
}
const res = await axios.get(url.toString());
// Wait for new results
const newResults = await searchPromise;
console.log(`Got ${newResults.length} new results from search`);
// Merge results, removing duplicates by ID
const allResults = [...existingBusinesses];
for (const result of newResults) {
if (!allResults.some(b => b.id === result.id)) {
allResults.push(result);
}
}
console.log(`Total unique results: ${allResults.length}`);
// Cache combined results
await db.saveToCache(cacheKey, allResults, env.cache.durationHours * 60 * 60 * 1000);
console.log(`Returning ${allResults.length} total results (${existingBusinesses.length} existing + ${newResults.length} new)`);
results = allResults;
}
const results: SearxngSearchResult[] = res.data.results;
const suggestions: string[] = res.data.suggestions;
// Clean all results using LLM
options.onProgress?.('Cleaning data', 75);
const cleanedResults = await CleanupService.cleanBusinessRecords(results);
return { results, suggestions };
};
options.onProgress?.('Search complete', 100);
return cleanedResults;
} catch (error) {
console.error('Search error:', error);
return [];
}
}
async function performSearch(
searchTerm: string,
location: string,
options: any
): Promise<BusinessData[]> {
const queries = [
searchTerm + ' ' + location,
searchTerm + ' business near ' + location,
searchTerm + ' services ' + location,
'local ' + searchTerm + ' ' + location
];
options.onProgress?.('Searching multiple sources', 25);
let allResults: SearchResult[] = [];
const seenUrls = new Set<string>();
for (const q of queries) {
try {
const response = await axios.get(`${env.searxng.currentUrl}/search`, {
params: {
q,
format: 'json',
engines: 'google,google_maps',
language: 'en-US',
time_range: '',
safesearch: 1
}
});
if (response.data?.results) {
// Deduplicate results
const newResults = response.data.results.filter((result: SearchResult) => {
if (seenUrls.has(result.url)) {
return false;
}
seenUrls.add(result.url);
return true;
});
console.log(`Found ${newResults.length} unique results from ${response.data.results[0]?.engine}`);
allResults = allResults.concat(newResults);
}
} catch (error) {
console.error(`Search failed for query "${q}":`, error);
}
}
options.onProgress?.('Processing results', 50);
const filteredResults = allResults.filter(isValidBusinessResult);
const processedResults = await processResults(filteredResults, location);
// Save results to database
for (const result of processedResults) {
await db.saveBusiness(result).catch(console.error);
}
options.onProgress?.('Search complete', 100);
return processedResults;
}
// Add other necessary functions (isValidBusinessResult, processResults, etc.)
function isValidBusinessResult(result: SearchResult): boolean {
// Skip listing/directory pages and search results
const skipPatterns = [
'tripadvisor.com',
'yelp.com',
'opentable.com',
'restaurants-for-sale',
'guide.michelin.com',
'denver.org',
'/blog/',
'/maps/',
'search?',
'features/',
'/lists/',
'reddit.com',
'eater.com'
];
if (skipPatterns.some(pattern => result.url.toLowerCase().includes(pattern))) {
console.log(`Skipping listing page: ${result.url}`);
return false;
}
// Must have a title
if (!result.title || result.title.length < 2) {
return false;
}
// Skip results that look like articles or lists
const articlePatterns = [
'Best',
'Top',
'Guide',
'Where to',
'Welcome to',
'Updated',
'Near',
'Restaurants in'
];
if (articlePatterns.some(pattern => result.title.includes(pattern))) {
console.log(`Skipping article: ${result.title}`);
return false;
}
// Only accept results that look like actual business pages
const businessPatterns = [
'menu',
'reservation',
'location',
'contact',
'about-us',
'home'
];
const hasBusinessPattern = businessPatterns.some(pattern =>
result.url.toLowerCase().includes(pattern) ||
result.content.toLowerCase().includes(pattern)
);
if (!hasBusinessPattern) {
console.log(`Skipping non-business page: ${result.url}`);
return false;
}
return true;
}
async function processResults(results: SearchResult[], location: string): Promise<BusinessData[]> {
const processedResults: BusinessData[] = [];
// Get coordinates for the location
const locationGeo = await GeocodingService.geocode(location);
const defaultCoords = locationGeo || { lat: 39.7392, lng: -104.9903 };
for (const result of results) {
try {
// Extract contact info from webpage
const contactInfo = await extractContactFromHtml(result.url);
// Create initial business record
const business: BusinessData = {
id: generateBusinessId(result),
name: cleanBusinessName(result.title),
phone: result.phone || contactInfo.phone || '',
email: result.email || contactInfo.email || '',
address: result.address || contactInfo.address || '',
rating: result.rating || 0,
website: result.website || result.url || '',
logo: '',
source: 'web',
description: result.content || contactInfo.description || '',
location: defaultCoords,
openingHours: contactInfo.openingHours
};
// Clean up the record using LLM
const cleanedBusiness = await CleanupService.cleanBusinessRecord(business);
// Get coordinates for cleaned address
if (cleanedBusiness.address) {
const addressGeo = await GeocodingService.geocode(cleanedBusiness.address);
if (addressGeo) {
cleanedBusiness.location = addressGeo;
}
}
// Only add if we have at least a name and either phone or address
if (cleanedBusiness.name && (cleanedBusiness.phone || cleanedBusiness.address)) {
processedResults.push(cleanedBusiness);
}
} catch (error) {
console.error(`Error processing result ${result.title}:`, error);
}
}
return processedResults;
}
// Helper functions
function cleanBusinessName(name: string): string {
// Remove common suffixes and prefixes
const cleanName = name
.replace(/^(The|A|An)\s+/i, '')
.replace(/\s+(-||—|:).*$/, '')
.replace(/\s*\([^)]*\)/g, '')
.trim();
return cleanName;
}
async function getLocationCoordinates(address: string): Promise<{lat: number, lng: number}> {
// Implement geocoding here
// For now, return default coordinates for Denver
return { lat: 39.7392, lng: -104.9903 };
}
async function searchAndUpdateInBackground(searchTerm: string, location: string) {
try {
const results = await performSearch(searchTerm, location, {});
console.log(`Updated ${results.length} businesses in background`);
} catch (error) {
console.error('Background search error:', error);
}
}
// ... rest of the file remains the same

View file

@ -0,0 +1,111 @@
import axios from 'axios';
import * as cheerio from 'cheerio';
import { Cache } from '../utils/cache';
import { RateLimiter } from '../utils/rateLimiter';
interface CrawlResult {
mainContent: string;
contactInfo: string;
aboutInfo: string;
structuredData: any;
}
export class BusinessCrawler {
private cache: Cache<CrawlResult>;
private rateLimiter: RateLimiter;
constructor() {
this.cache = new Cache<CrawlResult>(60); // 1 hour cache
this.rateLimiter = new RateLimiter();
}
async crawlBusinessSite(url: string): Promise<CrawlResult> {
// Check cache first
const cached = this.cache.get(url);
if (cached) return cached;
await this.rateLimiter.waitForSlot();
try {
const mainPage = await this.fetchPage(url);
const $ = cheerio.load(mainPage);
// Get all important URLs
const contactUrl = this.findContactPage($, url);
const aboutUrl = this.findAboutPage($, url);
// Crawl additional pages
const [contactPage, aboutPage] = await Promise.all([
contactUrl ? this.fetchPage(contactUrl) : '',
aboutUrl ? this.fetchPage(aboutUrl) : ''
]);
// Extract structured data
const structuredData = this.extractStructuredData($);
const result = {
mainContent: $('body').text(),
contactInfo: contactPage,
aboutInfo: aboutPage,
structuredData
};
this.cache.set(url, result);
return result;
} catch (error) {
console.error(`Failed to crawl ${url}:`, error);
return {
mainContent: '',
contactInfo: '',
aboutInfo: '',
structuredData: {}
};
}
}
private async fetchPage(url: string): Promise<string> {
try {
const response = await axios.get(url, {
timeout: 10000,
headers: {
'User-Agent': 'Mozilla/5.0 (compatible; BizSearch/1.0; +http://localhost:3000/about)',
}
});
return response.data;
} catch (error) {
console.error(`Failed to fetch ${url}:`, error);
return '';
}
}
private findContactPage($: cheerio.CheerioAPI, baseUrl: string): string | null {
const contactLinks = $('a[href*="contact"], a:contains("Contact")');
if (contactLinks.length > 0) {
const href = contactLinks.first().attr('href');
return href ? new URL(href, baseUrl).toString() : null;
}
return null;
}
private findAboutPage($: cheerio.CheerioAPI, baseUrl: string): string | null {
const aboutLinks = $('a[href*="about"], a:contains("About")');
if (aboutLinks.length > 0) {
const href = aboutLinks.first().attr('href');
return href ? new URL(href, baseUrl).toString() : null;
}
return null;
}
private extractStructuredData($: cheerio.CheerioAPI): any {
const structuredData: any[] = [];
$('script[type="application/ld+json"]').each((_, element) => {
try {
const data = JSON.parse($(element).html() || '{}');
structuredData.push(data);
} catch (error) {
console.error('Failed to parse structured data:', error);
}
});
return structuredData;
}
}

View file

@ -0,0 +1,71 @@
import { supabase } from '../supabase';
import { BusinessData } from '../searxng';
export class CacheService {
static async getCachedResults(category: string, location: string): Promise<BusinessData[] | null> {
try {
const { data, error } = await supabase
.from('search_cache')
.select('results')
.eq('category', category.toLowerCase())
.eq('location', location.toLowerCase())
.gt('expires_at', new Date().toISOString())
.order('created_at', { ascending: false })
.limit(1)
.single();
if (error) throw error;
return data ? data.results : null;
} catch (error) {
console.error('Cache lookup failed:', error);
return null;
}
}
static async cacheResults(
category: string,
location: string,
results: BusinessData[],
expiresInDays: number = 7
): Promise<void> {
try {
const expiresAt = new Date();
expiresAt.setDate(expiresAt.getDate() + expiresInDays);
const { error } = await supabase
.from('search_cache')
.insert({
query: `${category} in ${location}`,
category: category.toLowerCase(),
location: location.toLowerCase(),
results,
expires_at: expiresAt.toISOString()
});
if (error) throw error;
} catch (error) {
console.error('Failed to cache results:', error);
}
}
static async updateCache(
category: string,
location: string,
newResults: BusinessData[]
): Promise<void> {
try {
const { error } = await supabase
.from('search_cache')
.update({
results: newResults,
updated_at: new Date().toISOString()
})
.eq('category', category.toLowerCase())
.eq('location', location.toLowerCase());
if (error) throw error;
} catch (error) {
console.error('Failed to update cache:', error);
}
}
}

View file

@ -0,0 +1,235 @@
import { DeepSeekService } from './deepseekService';
import { Business } from '../types';
import { db } from './databaseService';
// Constants for validation and scoring
const BATCH_SIZE = 3; // Process businesses in small batches to avoid overwhelming LLM
const LLM_TIMEOUT = 30000; // 30 second timeout for LLM requests
const MIN_CONFIDENCE_SCORE = 0.7; // Minimum score required to cache results
const VALID_EMAIL_REGEX = /^[a-zA-Z0-9._%+-]+@[a-zA-Z0-9.-]+\.[a-zA-Z]{2,}$/;
const VALID_PHONE_REGEX = /^\(\d{3}\) \d{3}-\d{4}$/;
const VALID_ADDRESS_REGEX = /^\d+.*(?:street|st|avenue|ave|road|rd|boulevard|blvd|lane|ln|drive|dr|court|ct|circle|cir|way|parkway|pkwy|place|pl),?\s+[a-z ]+,\s*[a-z]{2}\s+\d{5}$/i;
export class CleanupService {
/**
* Attempts to clean business data using LLM with timeout protection.
* Falls back to original data if LLM fails or times out.
*/
private static async cleanWithLLM(prompt: string, originalBusiness: Business): Promise<string> {
try {
const timeoutPromise = new Promise((_, reject) => {
setTimeout(() => reject(new Error('LLM timeout')), LLM_TIMEOUT);
});
const llmPromise = DeepSeekService.chat([{
role: 'user',
content: prompt
}]);
const response = await Promise.race([llmPromise, timeoutPromise]);
return (response as string).trim();
} catch (error) {
console.error('LLM cleanup error:', error);
// On timeout, return the original values
return `
Address: ${originalBusiness.address}
Phone: ${originalBusiness.phone}
Email: ${originalBusiness.email}
Description: ${originalBusiness.description}
`;
}
}
/**
* Calculates a confidence score (0-1) for the cleaned business data.
* Score is based on:
* - Valid email format (0.25)
* - Valid phone format (0.25)
* - Valid address format (0.25)
* - Description quality (0.25)
*/
private static calculateConfidenceScore(business: Business): number {
let score = 0;
// Valid email adds 0.25
if (business.email && VALID_EMAIL_REGEX.test(business.email)) {
score += 0.25;
}
// Valid phone adds 0.25
if (business.phone && VALID_PHONE_REGEX.test(business.phone)) {
score += 0.25;
}
// Valid address adds 0.25
if (business.address && VALID_ADDRESS_REGEX.test(business.address)) {
score += 0.25;
}
// Description quality checks (0.25 max)
if (business.description) {
// Length check (0.1)
if (business.description.length > 30 && business.description.length < 200) {
score += 0.1;
}
// Relevance check (0.1)
const businessType = this.getBusinessType(business.name);
if (business.description.toLowerCase().includes(businessType)) {
score += 0.1;
}
// No HTML/markdown (0.05)
if (!/[<>[\]()]/.test(business.description)) {
score += 0.05;
}
}
return score;
}
/**
* Determines the type of business based on name keywords.
* Used for validating and generating descriptions.
*/
private static getBusinessType(name: string): string {
const types = [
'restaurant', 'plumber', 'electrician', 'cafe', 'bar',
'salon', 'shop', 'store', 'service'
];
const nameLower = name.toLowerCase();
return types.find(type => nameLower.includes(type)) || 'business';
}
/**
* Parses LLM response into structured business data.
* Expects format: "field: value" for each line.
*/
private static parseResponse(response: string): Partial<Business> {
const cleaned: Partial<Business> = {};
const lines = response.split('\n');
for (const line of lines) {
const [field, ...values] = line.split(':');
const value = values.join(':').trim();
switch (field.toLowerCase().trim()) {
case 'address':
cleaned.address = value;
break;
case 'phone':
cleaned.phone = value;
break;
case 'email':
cleaned.email = value;
break;
case 'description':
cleaned.description = value;
break;
}
}
return cleaned;
}
/**
* Applies validation rules and cleaning to each field.
* - Standardizes formats
* - Removes invalid data
* - Ensures consistent formatting
*/
private static validateAndClean(business: Business): Business {
const cleaned = { ...business };
// Email validation and cleaning
if (cleaned.email) {
cleaned.email = cleaned.email
.toLowerCase()
.replace(/\[|\]|\(mailto:.*?\)/g, '')
.replace(/^\d+-\d+/, '')
.trim();
if (!VALID_EMAIL_REGEX.test(cleaned.email) ||
['none', 'n/a', 'union office', ''].includes(cleaned.email.toLowerCase())) {
cleaned.email = '';
}
}
// Phone validation and cleaning
if (cleaned.phone) {
const digits = cleaned.phone.replace(/\D/g, '');
if (digits.length === 10) {
cleaned.phone = `(${digits.slice(0,3)}) ${digits.slice(3,6)}-${digits.slice(6)}`;
} else {
cleaned.phone = '';
}
}
// Address validation and cleaning
if (cleaned.address) {
cleaned.address = cleaned.address
.replace(/^.*?(?=\d|[A-Z])/s, '')
.replace(/^(Sure!.*?:|The business.*?:|.*?address.*?:)(?:\s*\\n)*\s*/si, '')
.replace(/\s+/g, ' ')
.trim();
// Standardize state abbreviations
cleaned.address = cleaned.address.replace(/\b(Colorado|Colo|Col)\b/gi, 'CO');
}
// Description validation and cleaning
if (cleaned.description) {
cleaned.description = cleaned.description
.replace(/\$\d+(\.\d{2})?/g, '') // Remove prices
.replace(/\b(call|email|website|click|visit)\b.*$/i, '') // Remove calls to action
.replace(/\s+/g, ' ')
.trim();
const businessType = this.getBusinessType(cleaned.name);
if (businessType !== 'business' &&
!cleaned.description.toLowerCase().includes(businessType)) {
cleaned.description = `${businessType.charAt(0).toUpperCase() + businessType.slice(1)} services in the Denver area.`;
}
}
return cleaned;
}
static async cleanBusinessRecord(business: Business): Promise<Business> {
// Check cache first
const cacheKey = `clean:${business.id}`;
const cached = await db.getFromCache(cacheKey);
if (cached) {
console.log('Using cached clean data for:', business.name);
return cached;
}
// Clean using DeepSeek
const cleaned = await DeepSeekService.cleanBusinessData(business);
const validated = this.validateAndClean({ ...business, ...cleaned });
// Only cache if confidence score is high enough
const confidence = this.calculateConfidenceScore(validated);
if (confidence >= MIN_CONFIDENCE_SCORE) {
await db.saveToCache(cacheKey, validated, 24 * 60 * 60 * 1000);
}
return validated;
}
static async cleanBusinessRecords(businesses: Business[]): Promise<Business[]> {
const cleanedBusinesses: Business[] = [];
// Process in batches
for (let i = 0; i < businesses.length; i += BATCH_SIZE) {
const batch = businesses.slice(i, i + BATCH_SIZE);
const cleanedBatch = await Promise.all(
batch.map(business => this.cleanBusinessRecord(business))
);
cleanedBusinesses.push(...cleanedBatch);
}
return cleanedBusinesses;
}
}

View file

@ -0,0 +1,107 @@
import { OllamaService } from './ollamaService';
interface ValidatedBusinessData {
name: string;
phone: string;
email: string;
address: string;
description: string;
hours?: string;
isValid: boolean;
}
export class DataValidationService {
private ollama: OllamaService;
constructor() {
this.ollama = new OllamaService();
}
async validateAndCleanData(rawText: string): Promise<ValidatedBusinessData> {
try {
const prompt = `
You are a business data validation expert. Extract and validate business information from the following text.
Return ONLY a JSON object with the following format, nothing else:
{
"name": "verified business name",
"phone": "formatted phone number or N/A",
"email": "verified email address or N/A",
"address": "verified physical address or N/A",
"description": "short business description",
"hours": "business hours if available",
"isValid": boolean
}
Rules:
1. Phone numbers should be in (XXX) XXX-XXXX format
2. Addresses should be properly formatted with street, city, state, zip
3. Remove any irrelevant text from descriptions
4. Set isValid to true only if name and at least one contact method is found
5. Clean up any obvious formatting issues
6. Validate email addresses for proper format
Text to analyze:
${rawText}
`;
const response = await this.ollama.generateResponse(prompt);
try {
// Find the JSON object in the response
const jsonMatch = response.match(/\{[\s\S]*\}/);
if (!jsonMatch) {
throw new Error('No JSON found in response');
}
const result = JSON.parse(jsonMatch[0]);
return this.validateResult(result);
} catch (parseError) {
console.error('Failed to parse Ollama response:', parseError);
throw parseError;
}
} catch (error) {
console.error('Data validation failed:', error);
return {
name: 'Unknown',
phone: 'N/A',
email: 'N/A',
address: 'N/A',
description: '',
hours: '',
isValid: false
};
}
}
private validateResult(result: any): ValidatedBusinessData {
// Ensure all required fields are present
const validated: ValidatedBusinessData = {
name: this.cleanField(result.name) || 'Unknown',
phone: this.formatPhone(result.phone) || 'N/A',
email: this.cleanField(result.email) || 'N/A',
address: this.cleanField(result.address) || 'N/A',
description: this.cleanField(result.description) || '',
hours: this.cleanField(result.hours),
isValid: Boolean(result.isValid)
};
return validated;
}
private cleanField(value: any): string {
if (!value || typeof value !== 'string') return '';
return value.trim().replace(/\s+/g, ' ');
}
private formatPhone(phone: string): string {
if (!phone || phone === 'N/A') return 'N/A';
// Extract digits
const digits = phone.replace(/\D/g, '');
if (digits.length === 10) {
return `(${digits.slice(0,3)}) ${digits.slice(3,6)}-${digits.slice(6)}`;
}
return phone;
}
}

View file

@ -0,0 +1,80 @@
import { createClient } from '@supabase/supabase-js';
import { Business } from '../types';
import env from '../../config/env';
interface PartialBusiness {
name: string;
address: string;
phone: string;
description: string;
website?: string;
rating?: number;
source?: string;
location?: {
lat: number;
lng: number;
};
}
export class DatabaseService {
private supabase;
constructor() {
this.supabase = createClient(env.SUPABASE_URL, env.SUPABASE_KEY);
}
async saveBusiness(business: PartialBusiness): Promise<Business> {
const { data, error } = await this.supabase
.from('businesses')
.upsert({
name: business.name,
address: business.address,
phone: business.phone,
description: business.description,
website: business.website,
source: business.source || 'deepseek',
rating: business.rating || 4.5,
location: business.location ? `(${business.location.lng},${business.location.lat})` : '(0,0)'
})
.select()
.single();
if (error) {
console.error('Error saving business:', error);
throw new Error('Failed to save business');
}
return data;
}
async findBusinessesByQuery(query: string, location: string): Promise<Business[]> {
const { data, error } = await this.supabase
.from('businesses')
.select('*')
.or(`name.ilike.%${query}%,description.ilike.%${query}%`)
.ilike('address', `%${location}%`)
.order('rating', { ascending: false });
if (error) {
console.error('Error finding businesses:', error);
throw new Error('Failed to find businesses');
}
return data || [];
}
async getBusinessById(id: string): Promise<Business | null> {
const { data, error } = await this.supabase
.from('businesses')
.select('*')
.eq('id', id)
.single();
if (error) {
console.error('Error getting business:', error);
return null;
}
return data;
}
}

View file

@ -0,0 +1,285 @@
import axios from 'axios';
import EventEmitter from 'events';
import { Business } from '../types';
interface PartialBusiness {
name: string;
address: string;
phone: string;
description: string;
website?: string;
rating?: number;
}
export class DeepSeekService extends EventEmitter {
private readonly baseUrl: string;
private readonly model: string;
constructor() {
super();
this.baseUrl = process.env.OLLAMA_URL || 'http://localhost:11434';
this.model = process.env.OLLAMA_MODEL || 'deepseek-coder:6.7b';
console.log('DeepSeekService initialized with:', {
baseUrl: this.baseUrl,
model: this.model
});
}
async streamChat(messages: any[], onResult: (business: PartialBusiness) => Promise<void>): Promise<void> {
try {
console.log('\nStarting streaming chat request...');
// Enhanced system prompt with more explicit instructions
const enhancedMessages = [
{
role: "system",
content: `You are a business search assistant powered by Deepseek Coder. Your task is to generate sample business listings in JSON format.
When asked about businesses in a location, return business listings one at a time in this exact JSON format:
\`\`\`json
{
"name": "Example Plumbing Co",
"address": "123 Main St, Denver, CO 80202",
"phone": "(303) 555-0123",
"description": "Licensed plumbing contractor specializing in residential and commercial services",
"website": "https://exampleplumbing.com",
"rating": 4.8
}
\`\`\`
Important rules:
1. Return ONE business at a time in JSON format
2. Generate realistic but fictional business data
3. Use proper formatting for phone numbers and addresses
4. Include ratings from 1-5 stars (can use decimals)
5. When sorting by rating, return highest rated first
6. Make each business unique with different names, addresses, and phone numbers
7. Keep descriptions concise and professional
8. Use realistic website URLs based on business names
9. Return exactly the number of businesses requested`
},
...messages
];
console.log('Sending streaming request to Ollama with messages:', JSON.stringify(enhancedMessages, null, 2));
const response = await axios.post(`${this.baseUrl}/api/chat`, {
model: this.model,
messages: enhancedMessages,
stream: true,
temperature: 0.7,
max_tokens: 1000,
system: "You are a business search assistant that returns one business at a time in JSON format."
}, {
responseType: 'stream'
});
let currentJson = '';
response.data.on('data', async (chunk: Buffer) => {
const text = chunk.toString();
currentJson += text;
// Try to find and process complete JSON objects
try {
const business = await this.extractNextBusiness(currentJson);
if (business) {
currentJson = ''; // Reset for next business
await onResult(business);
}
} catch (error) {
// Continue collecting more data if JSON is incomplete
console.debug('Collecting more data for complete JSON');
}
});
return new Promise((resolve, reject) => {
response.data.on('end', () => resolve());
response.data.on('error', (error: Error) => reject(error));
});
} catch (error) {
console.error('\nDeepseek streaming chat error:', error);
if (error instanceof Error) {
console.error('Error stack:', error.stack);
throw new Error(`AI model streaming error: ${error.message}`);
}
throw new Error('Failed to get streaming response from AI model');
}
}
private async extractNextBusiness(text: string): Promise<PartialBusiness | null> {
// Try to find a complete JSON object
const jsonMatch = text.match(/\{[^{]*\}/);
if (!jsonMatch) return null;
try {
const jsonStr = jsonMatch[0];
const business = JSON.parse(jsonStr);
// Validate required fields
if (!business.name || !business.address || !business.phone || !business.description) {
return null;
}
return business;
} catch (e) {
return null;
}
}
async chat(messages: any[]): Promise<any> {
try {
console.log('\nStarting chat request...');
// Enhanced system prompt with more explicit instructions
const enhancedMessages = [
{
role: "system",
content: `You are a business search assistant powered by Deepseek Coder. Your task is to generate sample business listings in JSON format.
When asked about businesses in a location, return business listings in this exact JSON format, with no additional text or comments:
\`\`\`json
[
{
"name": "Example Plumbing Co",
"address": "123 Main St, Denver, CO 80202",
"phone": "(303) 555-0123",
"description": "Licensed plumbing contractor specializing in residential and commercial services",
"website": "https://exampleplumbing.com",
"rating": 4.8
}
]
\`\`\`
Important rules:
1. Return ONLY the JSON array inside code blocks - no explanations or comments
2. Generate realistic but fictional business data
3. Use proper formatting for phone numbers (e.g., "(303) 555-XXXX") and addresses
4. Include ratings from 1-5 stars (can use decimals, e.g., 4.8)
5. When sorting by rating, sort from highest to lowest rating
6. When asked for a specific number of results, always return exactly that many
7. Make each business unique with different names, addresses, and phone numbers
8. Keep descriptions concise and professional
9. Use realistic website URLs based on business names`
},
...messages
];
console.log('Sending request to Ollama with messages:', JSON.stringify(enhancedMessages, null, 2));
const response = await axios.post(`${this.baseUrl}/api/chat`, {
model: this.model,
messages: enhancedMessages,
stream: false,
temperature: 0.7,
max_tokens: 1000,
system: "You are a business search assistant that always responds with JSON data."
});
if (!response.data) {
throw new Error('Empty response from AI model');
}
console.log('\nRaw response data:', JSON.stringify(response.data, null, 2));
if (!response.data.message?.content) {
throw new Error('No content in AI model response');
}
console.log('\nParsing AI response...');
const results = await this.sanitizeJsonResponse(response.data.message.content);
console.log('Parsed results:', JSON.stringify(results, null, 2));
return results;
} catch (error) {
console.error('\nDeepseek chat error:', error);
if (error instanceof Error) {
console.error('Error stack:', error.stack);
throw new Error(`AI model error: ${error.message}`);
}
throw new Error('Failed to get response from AI model');
}
}
private async sanitizeJsonResponse(text: string): Promise<PartialBusiness[]> {
console.log('Attempting to parse response:', text);
// First try to find JSON blocks
const jsonBlockMatch = text.match(/```(?:json)?\s*([\s\S]*?)\s*```/);
if (jsonBlockMatch) {
try {
const jsonStr = jsonBlockMatch[1].trim();
console.log('Found JSON block:', jsonStr);
const parsed = JSON.parse(jsonStr);
return Array.isArray(parsed) ? parsed : [parsed];
} catch (e) {
console.error('Failed to parse JSON block:', e);
}
}
// Then try to find any JSON-like structure
const jsonPatterns = [
/\[\s*\{[\s\S]*\}\s*\]/, // Array of objects
/\{[\s\S]*\}/ // Single object
];
for (const pattern of jsonPatterns) {
const match = text.match(pattern);
if (match) {
try {
const jsonStr = match[0].trim();
console.log('Found JSON pattern:', jsonStr);
const parsed = JSON.parse(jsonStr);
return Array.isArray(parsed) ? parsed : [parsed];
} catch (e) {
console.error('Failed to parse JSON pattern:', e);
continue;
}
}
}
// If no valid JSON found, try to extract structured data
try {
const extractedData = this.extractBusinessData(text);
if (extractedData) {
console.log('Extracted business data:', extractedData);
return [extractedData];
}
} catch (e) {
console.error('Failed to extract business data:', e);
}
throw new Error('No valid JSON or business information found in response');
}
private extractBusinessData(text: string): PartialBusiness {
// Extract business information using regex patterns
const businessInfo: PartialBusiness = {
name: this.extractField(text, 'name', '[^"\\n]+') || 'Unknown Business',
address: this.extractField(text, 'address', '[^"\\n]+') || 'Address not available',
phone: this.extractField(text, 'phone', '[^"\\n]+') || 'Phone not available',
description: this.extractField(text, 'description', '[^"\\n]+') || 'No description available'
};
const website = this.extractField(text, 'website', '[^"\\n]+');
if (website) {
businessInfo.website = website;
}
const rating = this.extractField(text, 'rating', '[0-9.]+');
if (rating) {
businessInfo.rating = parseFloat(rating);
}
return businessInfo;
}
private extractField(text: string, field: string, pattern: string): string {
const regex = new RegExp(`"?${field}"?\\s*[:=]\\s*"?(${pattern})"?`, 'i');
const match = text.match(regex);
return match ? match[1].trim() : '';
}
}

View file

@ -0,0 +1,63 @@
import axios from 'axios';
import { sleep } from '../utils/helpers';
interface GeocodingResult {
lat: number;
lng: number;
formattedAddress: string;
}
export class GeocodingService {
private static cache = new Map<string, GeocodingResult>();
private static lastRequestTime = 0;
private static RATE_LIMIT_MS = 1000; // 1 second between requests (Nominatim requirement)
static async geocode(address: string): Promise<GeocodingResult | null> {
// Check cache first
const cached = this.cache.get(address);
if (cached) return cached;
try {
// Rate limiting
const now = Date.now();
const timeSinceLastRequest = now - this.lastRequestTime;
if (timeSinceLastRequest < this.RATE_LIMIT_MS) {
await sleep(this.RATE_LIMIT_MS - timeSinceLastRequest);
}
this.lastRequestTime = Date.now();
const response = await axios.get(
'https://nominatim.openstreetmap.org/search',
{
params: {
q: address,
format: 'json',
limit: 1,
addressdetails: 1
},
headers: {
'User-Agent': 'BusinessFinder/1.0'
}
}
);
if (response.data?.length > 0) {
const result = response.data[0];
const geocoded = {
lat: parseFloat(result.lat),
lng: parseFloat(result.lon),
formattedAddress: result.display_name
};
// Cache the result
this.cache.set(address, geocoded);
return geocoded;
}
return null;
} catch (error) {
console.error('Geocoding error:', error);
return null;
}
}
}

View file

@ -0,0 +1,40 @@
import axios from 'axios';
import { supabase } from '../supabase';
import { env } from '../../config/env';
export class HealthCheckService {
private static async checkSupabase(): Promise<boolean> {
try {
const { data, error } = await supabase.from('searches').select('count');
return !error;
} catch (error) {
console.error('Supabase health check failed:', error);
return false;
}
}
private static async checkSearx(): Promise<boolean> {
try {
const response = await axios.get(env.SEARXNG_URL);
return response.status === 200;
} catch (error) {
console.error('SearxNG health check failed:', error);
return false;
}
}
public static async checkHealth(): Promise<{
supabase: boolean;
searx: boolean;
}> {
const [supabaseHealth, searxHealth] = await Promise.all([
this.checkSupabase(),
this.checkSearx()
]);
return {
supabase: supabaseHealth,
searx: searxHealth
};
}
}

View file

@ -0,0 +1,45 @@
import axios from 'axios';
import { env } from '../../config/env';
export class OllamaService {
private static readonly baseUrl = env.ollama.url;
private static readonly model = env.ollama.model;
static async complete(prompt: string): Promise<string> {
try {
const response = await axios.post(`${this.baseUrl}/api/generate`, {
model: this.model,
prompt: prompt,
stream: false
});
if (response.data?.response) {
return response.data.response;
}
throw new Error('No response from Ollama');
} catch (error) {
console.error('Ollama error:', error);
throw error;
}
}
static async chat(messages: { role: 'user' | 'assistant'; content: string }[]): Promise<string> {
try {
const response = await axios.post(`${this.baseUrl}/api/chat`, {
model: this.model,
messages: messages,
stream: false
});
if (response.data?.message?.content) {
return response.data.message.content;
}
throw new Error('No response from Ollama chat');
} catch (error) {
console.error('Ollama chat error:', error);
throw error;
}
}
}

View file

@ -0,0 +1,135 @@
import EventEmitter from 'events';
import { DeepSeekService } from './deepseekService';
import { DatabaseService } from './databaseService';
import { Business } from '../types';
interface PartialBusiness {
name: string;
address: string;
phone: string;
description: string;
website?: string;
rating?: number;
source?: string;
location?: {
lat: number;
lng: number;
};
}
export class SearchService extends EventEmitter {
private deepseekService: DeepSeekService;
private databaseService: DatabaseService;
constructor() {
super();
this.deepseekService = new DeepSeekService();
this.databaseService = new DatabaseService();
this.deepseekService.on('progress', (data) => {
this.emit('progress', data);
});
}
async streamSearch(query: string, location: string, limit: number = 10): Promise<void> {
try {
// First, try to find cached results in database
const cachedResults = await this.databaseService.findBusinessesByQuery(query, location);
if (cachedResults.length > 0) {
// Emit cached results one by one
for (const result of this.sortByRating(cachedResults).slice(0, limit)) {
this.emit('result', result);
await new Promise(resolve => setTimeout(resolve, 100)); // Small delay between results
}
this.emit('complete');
return;
}
// If no cached results, use DeepSeek to generate new results
const aiResults = await this.deepseekService.streamChat([{
role: "user",
content: `Find ${query} in ${location}. You must return exactly ${limit} results in valid JSON format, sorted by rating from highest to lowest. Each result must include a rating between 1-5 stars. Do not include any comments or explanations in the JSON.`
}], async (business: PartialBusiness) => {
try {
// Extract lat/lng from address using a geocoding service
const coords = await this.geocodeAddress(business.address);
// Save to database and emit result
const savedBusiness = await this.databaseService.saveBusiness({
...business,
source: 'deepseek',
location: coords || {
lat: 39.7392, // Denver's default coordinates
lng: -104.9903
}
});
this.emit('result', savedBusiness);
} catch (error) {
console.error('Error processing business:', error);
this.emit('error', error);
}
});
this.emit('complete');
} catch (error) {
console.error('Search error:', error);
this.emit('error', error);
throw error;
}
}
async search(query: string, location: string, limit: number = 10): Promise<Business[]> {
try {
// First, try to find cached results in database
const cachedResults = await this.databaseService.findBusinessesByQuery(query, location);
if (cachedResults.length > 0) {
return this.sortByRating(cachedResults).slice(0, limit);
}
// If no cached results, use DeepSeek to generate new results
const aiResults = await this.deepseekService.chat([{
role: "user",
content: `Find ${query} in ${location}. You must return exactly ${limit} results in valid JSON format, sorted by rating from highest to lowest. Each result must include a rating between 1-5 stars. Do not include any comments or explanations in the JSON.`
}]);
// Save the results to database
const savedResults = await Promise.all(
(aiResults as PartialBusiness[]).map(async (business: PartialBusiness) => {
// Extract lat/lng from address using a geocoding service
const coords = await this.geocodeAddress(business.address);
return this.databaseService.saveBusiness({
...business,
source: 'deepseek',
location: coords || {
lat: 39.7392, // Denver's default coordinates
lng: -104.9903
}
});
})
);
return this.sortByRating(savedResults);
} catch (error) {
console.error('Search error:', error);
throw error;
}
}
private sortByRating(businesses: Business[]): Business[] {
return businesses.sort((a, b) => b.rating - a.rating);
}
private async geocodeAddress(address: string): Promise<{ lat: number; lng: number } | null> {
// TODO: Implement real geocoding service
// For now, return null to use default coordinates
return null;
}
async getBusinessById(id: string): Promise<Business | null> {
return this.databaseService.getBusinessById(id);
}
}

View file

@ -0,0 +1,93 @@
import { createClient } from '@supabase/supabase-js';
import { env } from '../../config/env';
import { BusinessData } from '../searxng';
export class SupabaseService {
private supabase;
constructor() {
this.supabase = createClient(env.supabase.url, env.supabase.anonKey);
}
async upsertBusinesses(businesses: BusinessData[]): Promise<void> {
try {
console.log('Upserting businesses to Supabase:', businesses.length);
for (const business of businesses) {
try {
// Create a unique identifier based on multiple properties
const identifier = [
business.name.toLowerCase(),
business.phone?.replace(/\D/g, ''),
business.address?.toLowerCase(),
business.website?.toLowerCase()
]
.filter(Boolean) // Remove empty values
.join('_') // Join with underscore
.replace(/[^a-z0-9]/g, '_'); // Replace non-alphanumeric chars
// Log the data being inserted
console.log('Upserting business:', {
id: identifier,
name: business.name,
phone: business.phone,
email: business.email,
address: business.address,
rating: business.rating,
website: business.website,
location: business.location
});
// Check if business exists
const { data: existing, error: selectError } = await this.supabase
.from('businesses')
.select('rating, search_count')
.eq('id', identifier)
.single();
if (selectError && selectError.code !== 'PGRST116') {
console.error('Error checking existing business:', selectError);
}
// Prepare upsert data
const upsertData = {
id: identifier,
name: business.name,
phone: business.phone || null,
email: business.email || null,
address: business.address || null,
rating: existing ? Math.max(business.rating, existing.rating) : business.rating,
website: business.website || null,
logo: business.logo || null,
source: business.source || null,
description: business.description || null,
latitude: business.location?.lat || null,
longitude: business.location?.lng || null,
last_updated: new Date().toISOString(),
search_count: existing ? existing.search_count + 1 : 1
};
console.log('Upserting with data:', upsertData);
const { error: upsertError } = await this.supabase
.from('businesses')
.upsert(upsertData, {
onConflict: 'id'
});
if (upsertError) {
console.error('Error upserting business:', upsertError);
console.error('Failed business data:', upsertData);
} else {
console.log(`Successfully upserted business: ${business.name}`);
}
} catch (businessError) {
console.error('Error processing business:', business.name, businessError);
}
}
} catch (error) {
console.error('Error saving businesses to Supabase:', error);
throw error;
}
}
}

35
src/lib/supabase.ts Normal file
View file

@ -0,0 +1,35 @@
import { createClient } from '@supabase/supabase-js';
import { env } from '../config/env';
// Validate Supabase configuration
if (!env.SUPABASE_URL || !env.SUPABASE_KEY) {
throw new Error('Missing Supabase configuration');
}
// Create Supabase client
export const supabase = createClient(
env.SUPABASE_URL,
env.SUPABASE_KEY,
{
auth: {
autoRefreshToken: true,
persistSession: true,
detectSessionInUrl: true
}
}
);
// Test connection function
export async function testConnection() {
try {
console.log('Testing Supabase connection...');
console.log('URL:', env.SUPABASE_URL);
const { data, error } = await supabase.from('searches').select('count');
if (error) throw error;
console.log('Supabase connection successful');
return true;
} catch (error) {
console.error('Supabase connection failed:', error);
return false;
}
}

16
src/lib/types.ts Normal file
View file

@ -0,0 +1,16 @@
export interface Business {
id: string;
name: string;
address: string;
phone: string;
description: string;
website?: string;
source: string;
rating: number;
location: {
lat: number;
lng: number;
};
}
export type BusinessData = Business;

39
src/lib/utils.ts Normal file
View file

@ -0,0 +1,39 @@
import crypto from 'crypto';
interface BusinessIdentifier {
title?: string;
name?: string;
phone?: string;
address?: string;
url?: string;
website?: string;
}
export function generateBusinessId(business: BusinessIdentifier): string {
const components = [
business.title || business.name,
business.phone,
business.address,
business.url || business.website
].filter(Boolean);
const hash = crypto.createHash('md5')
.update(components.join('|'))
.digest('hex');
return `hash_${hash}`;
}
export function extractPlaceIdFromUrl(url: string): string | null {
try {
// Match patterns like:
// https://www.google.com/maps/place/.../.../data=!3m1!4b1!4m5!3m4!1s0x876c7ed0cb78d6d3:0x2cd0c4490736f7c!8m2!
// https://maps.google.com/maps?q=...&ftid=0x876c7ed0cb78d6d3:0x2cd0c4490736f7c
const placeIdRegex = /[!\/]([0-9a-f]{16}:[0-9a-f]{16})/i;
const match = url.match(placeIdRegex);
return match ? match[1] : null;
} catch (error) {
console.warn('Error extracting place ID from URL:', error);
return null;
}
}

36
src/lib/utils/cache.ts Normal file
View file

@ -0,0 +1,36 @@
interface CacheItem<T> {
data: T;
timestamp: number;
}
export class Cache<T> {
private store = new Map<string, CacheItem<T>>();
private ttl: number;
constructor(ttlMinutes: number = 60) {
this.ttl = ttlMinutes * 60 * 1000;
}
set(key: string, value: T): void {
this.store.set(key, {
data: value,
timestamp: Date.now()
});
}
get(key: string): T | null {
const item = this.store.get(key);
if (!item) return null;
if (Date.now() - item.timestamp > this.ttl) {
this.store.delete(key);
return null;
}
return item.data;
}
clear(): void {
this.store.clear();
}
}

View file

@ -0,0 +1,67 @@
import { Business } from '../types';
export function normalizePhoneNumber(phone: string): string {
return phone.replace(/[^\d]/g, '');
}
export function normalizeAddress(address: string): string {
// Remove common suffixes and standardize format
return address
.toLowerCase()
.replace(/(street|st\.?|avenue|ave\.?|road|rd\.?)/g, '')
.trim();
}
export function extractZipCode(text: string): string | null {
const match = text.match(/\b\d{5}(?:-\d{4})?\b/);
return match ? match[0] : null;
}
export function calculateReliabilityScore(business: Business): number {
let score = 0;
// More complete data = higher score
if (business.phone) score += 2;
if (business.website) score += 1;
if (business.email) score += 1;
if (business.hours?.length) score += 2;
if (business.services && business.services.length > 0) score += 1;
if (business.reviewCount && business.reviewCount > 10) score += 2;
return score;
}
export function cleanAddress(address: string): string {
return address
.replace(/^(Sure!|Here is |The business address( is| found in the text is)?:?\n?\s*)/i, '')
.replace(/\n/g, ' ')
.trim();
}
export function formatPhoneNumber(phone: string): string {
// Remove all non-numeric characters
const cleaned = phone.replace(/\D/g, '');
// Format as (XXX) XXX-XXXX
if (cleaned.length === 10) {
return `(${cleaned.slice(0,3)}) ${cleaned.slice(3,6)}-${cleaned.slice(6)}`;
}
// Return original if not 10 digits
return phone;
}
export function cleanEmail(email: string): string {
// Remove phone numbers from email
return email
.replace(/\d{3}-\d{4}/, '')
.replace(/\d{10}/, '')
.trim();
}
export function cleanDescription(description: string): string {
return description
.replace(/^(Description:|About:|Info:)/i, '')
.replace(/\s+/g, ' ')
.trim();
}

18
src/lib/utils/helpers.ts Normal file
View file

@ -0,0 +1,18 @@
export function sleep(ms: number): Promise<void> {
return new Promise(resolve => setTimeout(resolve, ms));
}
export function cleanText(text: string): string {
return text
.replace(/\s+/g, ' ')
.replace(/[^\w\s-.,]/g, '')
.trim();
}
export function isValidPhone(phone: string): boolean {
return /^\+?[\d-.()\s]{10,}$/.test(phone);
}
export function isValidEmail(email: string): boolean {
return /^[^\s@]+@[^\s@]+\.[^\s@]+$/.test(email);
}

View file

@ -0,0 +1,23 @@
export class RateLimiter {
private timestamps: number[] = [];
private readonly windowMs: number;
private readonly maxRequests: number;
constructor(windowMs: number = 60000, maxRequests: number = 30) {
this.windowMs = windowMs;
this.maxRequests = maxRequests;
}
async waitForSlot(): Promise<void> {
const now = Date.now();
this.timestamps = this.timestamps.filter(time => now - time < this.windowMs);
if (this.timestamps.length >= this.maxRequests) {
const oldestRequest = this.timestamps[0];
const waitTime = this.windowMs - (now - oldestRequest);
await new Promise(resolve => setTimeout(resolve, waitTime));
}
this.timestamps.push(now);
}
}

168
src/lib/utils/scraper.ts Normal file
View file

@ -0,0 +1,168 @@
import axios from 'axios';
import * as cheerio from 'cheerio';
import { OllamaService } from '../services/ollamaService';
import { sleep } from './helpers';
const RATE_LIMIT_MS = 1000; // 1 second between requests
let lastRequestTime = 0;
async function rateLimitedRequest(url: string) {
const now = Date.now();
const timeSinceLastRequest = now - lastRequestTime;
if (timeSinceLastRequest < RATE_LIMIT_MS) {
await sleep(RATE_LIMIT_MS - timeSinceLastRequest);
}
lastRequestTime = Date.now();
return axios.get(url, {
timeout: 5000,
headers: {
'User-Agent': 'Mozilla/5.0 (compatible; BusinessFinder/1.0; +http://example.com/bot)',
'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8',
'Accept-Language': 'en-US,en;q=0.5'
}
});
}
export interface ContactInfo {
phone?: string;
email?: string;
address?: string;
description?: string;
openingHours?: string[];
}
export async function extractContactFromHtml(url: string): Promise<ContactInfo> {
try {
const response = await rateLimitedRequest(url);
const $ = cheerio.load(response.data);
// Extract structured data if available
const structuredData = $('script[type="application/ld+json"]')
.map((_, el) => {
try {
return JSON.parse($(el).html() || '');
} catch {
return null;
}
})
.get()
.filter(Boolean);
// Look for LocalBusiness or Restaurant schema
const businessData = structuredData.find(data =>
data['@type'] === 'LocalBusiness' ||
data['@type'] === 'Restaurant'
);
if (businessData) {
return {
phone: businessData.telephone,
email: businessData.email,
address: businessData.address?.streetAddress,
description: businessData.description,
openingHours: businessData.openingHours
};
}
// Fallback to regular HTML parsing
return {
phone: findPhone($),
email: findEmail($),
address: findAddress($),
description: $('meta[name="description"]').attr('content'),
openingHours: findOpeningHours($)
};
} catch (error) {
console.warn(`Error extracting contact info from ${url}:`, error);
return {};
}
}
export async function extractCleanAddress(text: string, location: string): Promise<string> {
try {
const ollama = new OllamaService();
const prompt = `
Extract a business address from this text. The business should be in or near ${location}.
Only return the address, nothing else. If no valid address is found, return an empty string.
Text: ${text}
`;
const response = await OllamaService.complete(prompt);
return response.trim();
} catch (error) {
console.warn('Error extracting address:', error);
return '';
}
}
// Helper functions
function findPhone($: cheerio.CheerioAPI): string | undefined {
// Common phone patterns
const phonePatterns = [
/\b\(?([0-9]{3})\)?[-. ]?([0-9]{3})[-. ]?([0-9]{4})\b/,
/\b(?:Phone|Tel|Contact):\s*([0-9-().+ ]{10,})\b/i
];
for (const pattern of phonePatterns) {
const match = $.text().match(pattern);
if (match) return match[0];
}
return undefined;
}
function findEmail($: cheerio.CheerioAPI): string | undefined {
const emailPattern = /\b[A-Za-z0-9._%+-]+@[A-Za-z0-9.-]+\.[A-Z|a-z]{2,}\b/;
const match = $.text().match(emailPattern);
return match ? match[0] : undefined;
}
function findAddress($: cheerio.CheerioAPI): string | undefined {
// Look for address in common elements
const addressSelectors = [
'address',
'[itemtype="http://schema.org/PostalAddress"]',
'.address',
'#address',
'[class*="address"]',
'[id*="address"]'
];
for (const selector of addressSelectors) {
const element = $(selector).first();
if (element.length) {
return element.text().trim();
}
}
return undefined;
}
function findOpeningHours($: cheerio.CheerioAPI): string[] {
const hours: string[] = [];
const hoursSelectors = [
'[itemtype="http://schema.org/OpeningHoursSpecification"]',
'.hours',
'#hours',
'[class*="hours"]',
'[id*="hours"]'
];
for (const selector of hoursSelectors) {
const element = $(selector).first();
if (element.length) {
element.find('*').each((_, el) => {
const text = $(el).text().trim();
if (text && !hours.includes(text)) {
hours.push(text);
}
});
}
}
return hours;
}

View file

@ -0,0 +1,119 @@
import * as cheerio from 'cheerio';
interface StructuredData {
name?: string;
email?: string;
phone?: string;
address?: string;
socialProfiles?: string[];
openingHours?: Record<string, string>;
description?: string;
}
export class StructuredDataParser {
static parse($: cheerio.CheerioAPI): StructuredData[] {
const results: StructuredData[] = [];
// Parse JSON-LD
$('script[type="application/ld+json"]').each((_, element) => {
try {
const data = JSON.parse($(element).html() || '{}');
if (Array.isArray(data)) {
data.forEach(item => this.parseStructuredItem(item, results));
} else {
this.parseStructuredItem(data, results);
}
} catch (e) {
console.error('Error parsing JSON-LD:', e);
}
});
// Parse microdata
$('[itemtype]').each((_, element) => {
const type = $(element).attr('itemtype');
if (type?.includes('Organization') || type?.includes('LocalBusiness')) {
const data: StructuredData = {
name: $('[itemprop="name"]', element).text(),
email: $('[itemprop="email"]', element).text(),
phone: $('[itemprop="telephone"]', element).text(),
address: this.extractMicrodataAddress($, element),
socialProfiles: this.extractSocialProfiles($, element)
};
results.push(data);
}
});
// Parse RDFa
$('[typeof="Organization"], [typeof="LocalBusiness"]').each((_, element) => {
const data: StructuredData = {
name: $('[property="name"]', element).text(),
email: $('[property="email"]', element).text(),
phone: $('[property="telephone"]', element).text(),
address: this.extractRdfaAddress($, element),
socialProfiles: this.extractSocialProfiles($, element)
};
results.push(data);
});
return results;
}
private static parseStructuredItem(data: any, results: StructuredData[]): void {
if (data['@type'] === 'Organization' || data['@type'] === 'LocalBusiness') {
results.push({
name: data.name,
email: data.email,
phone: data.telephone,
address: this.formatAddress(data.address),
socialProfiles: this.extractSocialUrls(data),
openingHours: this.parseOpeningHours(data.openingHours),
description: data.description
});
}
}
private static formatAddress(address: any): string | undefined {
if (typeof address === 'string') return address;
if (typeof address === 'object') {
const parts = [
address.streetAddress,
address.addressLocality,
address.addressRegion,
address.postalCode,
address.addressCountry
].filter(Boolean);
return parts.join(', ');
}
return undefined;
}
private static extractSocialUrls(data: any): string[] {
const urls: string[] = [];
if (data.sameAs) {
if (Array.isArray(data.sameAs)) {
urls.push(...data.sameAs);
} else if (typeof data.sameAs === 'string') {
urls.push(data.sameAs);
}
}
return urls;
}
private static parseOpeningHours(hours: any): Record<string, string> | undefined {
if (!hours) return undefined;
if (Array.isArray(hours)) {
const schedule: Record<string, string> = {};
hours.forEach(spec => {
const match = spec.match(/^(\w+)(-\w+)?\s+(\d\d:\d\d)-(\d\d:\d\d)$/);
if (match) {
schedule[match[1]] = `${match[3]}-${match[4]}`;
}
});
return schedule;
}
return undefined;
}
// ... helper methods for microdata and RDFa parsing ...
}

47
src/middleware/auth.ts Normal file
View file

@ -0,0 +1,47 @@
import { Request, Response, NextFunction } from 'express';
import { supabase } from '../lib/supabase';
// Extend Express Request type to include user
declare global {
namespace Express {
interface Request {
user?: {
id: string;
email: string;
role: string;
};
}
}
}
export async function authenticateUser(
req: Request,
res: Response,
next: NextFunction
) {
try {
const authHeader = req.headers.authorization;
if (!authHeader) {
return res.status(401).json({ error: 'No authorization header' });
}
const token = authHeader.replace('Bearer ', '');
const { data: { user }, error } = await supabase.auth.getUser(token);
if (error || !user) {
return res.status(401).json({ error: 'Invalid token' });
}
// Add user info to request
req.user = {
id: user.id,
email: user.email!,
role: (user.app_metadata?.role as string) || 'user'
};
next();
} catch (error) {
console.error('Authentication error:', error);
res.status(401).json({ error: 'Authentication failed' });
}
}

148
src/routes/api.ts Normal file
View file

@ -0,0 +1,148 @@
import express from 'express';
import { SearchService } from '../lib/services/searchService';
import { Business } from '../lib/types';
const router = express.Router();
const searchService = new SearchService();
// Error handling middleware for JSON parsing errors
router.use((err: Error, req: express.Request, res: express.Response, next: express.NextFunction) => {
if (err instanceof SyntaxError && 'body' in err) {
return res.status(400).json({
success: false,
error: 'Invalid JSON'
});
}
next();
});
// Business categories endpoint
router.get('/categories', (req, res) => {
const categories = [
'Restaurant',
'Retail',
'Service',
'Healthcare',
'Professional',
'Entertainment',
'Education',
'Technology',
'Manufacturing',
'Construction',
'Transportation',
'Real Estate',
'Financial',
'Legal',
'Other'
];
res.json(categories);
});
// Streaming search endpoint
router.post('/search/stream', (req, res) => {
const { query, location } = req.body;
if (!query || !location) {
return res.status(400).json({
success: false,
error: 'Query and location are required'
});
}
// Set headers for SSE
res.setHeader('Content-Type', 'text/event-stream');
res.setHeader('Cache-Control', 'no-cache');
res.setHeader('Connection', 'keep-alive');
// Send initial message
res.write('data: {"type":"start","message":"Starting search..."}\n\n');
// Create search service instance for this request
const search = new SearchService();
// Listen for individual results
search.on('result', (business: Business) => {
res.write(`data: {"type":"result","business":${JSON.stringify(business)}}\n\n`);
});
// Listen for progress updates
search.on('progress', (data: any) => {
res.write(`data: {"type":"progress","data":${JSON.stringify(data)}}\n\n`);
});
// Listen for completion
search.on('complete', () => {
res.write('data: {"type":"complete","message":"Search complete"}\n\n');
res.end();
});
// Listen for errors
search.on('error', (error: Error) => {
res.write(`data: {"type":"error","message":${JSON.stringify(error.message)}}\n\n`);
res.end();
});
// Start the search
search.streamSearch(query, location).catch(error => {
console.error('Search error:', error);
res.write(`data: {"type":"error","message":${JSON.stringify(error.message)}}\n\n`);
res.end();
});
});
// Regular search endpoint (non-streaming)
router.post('/search', async (req, res) => {
const { query, location } = req.body;
if (!query || !location) {
return res.status(400).json({
success: false,
error: 'Query and location are required'
});
}
try {
const results = await searchService.search(query, location);
res.json({
success: true,
results
});
} catch (error) {
const errorMessage = error instanceof Error ? error.message : 'An error occurred during search';
console.error('Search error:', error);
res.status(500).json({
success: false,
error: errorMessage
});
}
});
// Get business by ID
router.get('/business/:id', async (req, res) => {
const { id } = req.params;
try {
const business = await searchService.getBusinessById(id);
if (!business) {
return res.status(404).json({
success: false,
error: 'Business not found'
});
}
res.json({
success: true,
business
});
} catch (error) {
const errorMessage = error instanceof Error ? error.message : 'Failed to fetch business details';
console.error('Error fetching business:', error);
res.status(500).json({
success: false,
error: errorMessage
});
}
});
export default router;

413
src/routes/business.ts Normal file
View file

@ -0,0 +1,413 @@
import { Router } from 'express';
import { z } from 'zod';
import { supabase } from '../lib/supabase';
import { authenticateUser } from '../middleware/auth';
const router = Router();
// Initialize database tables
async function initializeTables() {
try {
// Create businesses table if it doesn't exist
const { error: businessError } = await supabase.from('businesses').select('id').limit(1);
if (businessError?.code === 'PGRST204') {
const { error } = await supabase.rpc('execute_sql', {
sql_string: `
CREATE TABLE IF NOT EXISTS public.businesses (
id TEXT PRIMARY KEY,
name TEXT NOT NULL,
phone TEXT,
email TEXT,
address TEXT,
rating NUMERIC,
website TEXT,
description TEXT,
source TEXT,
logo TEXT,
latitude NUMERIC,
longitude NUMERIC,
last_updated TIMESTAMP WITH TIME ZONE DEFAULT timezone('utc'::text, now()),
search_count INTEGER DEFAULT 1,
created_at TIMESTAMP WITH TIME ZONE DEFAULT timezone('utc'::text, now()),
place_id TEXT
);
`
});
if (error) console.error('Error creating businesses table:', error);
}
// Create business_profiles table if it doesn't exist
const { error: profileError } = await supabase.from('business_profiles').select('business_id').limit(1);
if (profileError?.code === 'PGRST204') {
const { error } = await supabase.rpc('execute_sql', {
sql_string: `
CREATE TABLE IF NOT EXISTS public.business_profiles (
business_id TEXT PRIMARY KEY REFERENCES public.businesses(id),
claimed_by UUID REFERENCES auth.users(id),
claimed_at TIMESTAMP WITH TIME ZONE,
verification_status TEXT NOT NULL DEFAULT 'unverified',
social_links JSONB DEFAULT '{}',
hours_of_operation JSONB DEFAULT '{}',
additional_photos TEXT[] DEFAULT '{}',
tags TEXT[] DEFAULT '{}',
updated_at TIMESTAMP WITH TIME ZONE DEFAULT CURRENT_TIMESTAMP,
CONSTRAINT valid_verification_status CHECK (verification_status IN ('unverified', 'pending', 'verified', 'rejected'))
);
CREATE TABLE IF NOT EXISTS public.business_claims (
id UUID PRIMARY KEY DEFAULT gen_random_uuid(),
business_id TEXT NOT NULL REFERENCES public.businesses(id),
user_id UUID NOT NULL REFERENCES auth.users(id),
status TEXT NOT NULL DEFAULT 'pending',
proof_documents TEXT[] DEFAULT '{}',
submitted_at TIMESTAMP WITH TIME ZONE DEFAULT CURRENT_TIMESTAMP,
reviewed_at TIMESTAMP WITH TIME ZONE,
reviewed_by UUID REFERENCES auth.users(id),
notes TEXT,
CONSTRAINT valid_claim_status CHECK (status IN ('pending', 'approved', 'rejected'))
);
CREATE INDEX IF NOT EXISTS idx_business_profiles_claimed_by ON public.business_profiles(claimed_by);
CREATE INDEX IF NOT EXISTS idx_business_claims_business_id ON public.business_claims(business_id);
CREATE INDEX IF NOT EXISTS idx_business_claims_user_id ON public.business_claims(user_id);
CREATE INDEX IF NOT EXISTS idx_business_claims_status ON public.business_claims(status);
ALTER TABLE public.business_profiles ENABLE ROW LEVEL SECURITY;
ALTER TABLE public.business_claims ENABLE ROW LEVEL SECURITY;
DROP POLICY IF EXISTS "Public profiles are viewable by everyone" ON public.business_profiles;
CREATE POLICY "Public profiles are viewable by everyone"
ON public.business_profiles FOR SELECT
USING (true);
DROP POLICY IF EXISTS "Profiles can be updated by verified owners" ON public.business_profiles;
CREATE POLICY "Profiles can be updated by verified owners"
ON public.business_profiles FOR UPDATE
USING (auth.uid() = claimed_by AND verification_status = 'verified');
DROP POLICY IF EXISTS "Users can view their own claims" ON public.business_claims;
CREATE POLICY "Users can view their own claims"
ON public.business_claims FOR SELECT
USING (auth.uid() = user_id);
DROP POLICY IF EXISTS "Users can create claims" ON public.business_claims;
CREATE POLICY "Users can create claims"
ON public.business_claims FOR INSERT
WITH CHECK (auth.uid() = user_id);
DROP POLICY IF EXISTS "Only admins can review claims" ON public.business_claims;
CREATE POLICY "Only admins can review claims"
ON public.business_claims FOR UPDATE
USING (EXISTS (
SELECT 1 FROM auth.users
WHERE auth.uid() = id
AND raw_app_meta_data->>'role' = 'admin'
));
`
});
if (error) console.error('Error creating profile tables:', error);
}
// Insert test data
const { error: testDataError } = await supabase
.from('businesses')
.insert([
{
id: 'test-business-1',
name: 'Test Coffee Shop',
phone: '303-555-0123',
email: 'contact@testcoffee.com',
address: '123 Test St, Denver, CO 80202',
rating: 4.5,
website: 'https://testcoffee.com',
description: 'A cozy coffee shop in downtown Denver serving artisanal coffee and pastries.',
source: 'manual'
}
])
.select()
.single();
if (testDataError) {
console.error('Error inserting test data:', testDataError);
}
// Create test business profile
const { error: testProfileError } = await supabase
.from('business_profiles')
.insert([
{
business_id: 'test-business-1',
verification_status: 'unverified',
social_links: {
facebook: 'https://facebook.com/testcoffee',
instagram: 'https://instagram.com/testcoffee'
},
hours_of_operation: {
monday: ['7:00', '19:00'],
tuesday: ['7:00', '19:00'],
wednesday: ['7:00', '19:00'],
thursday: ['7:00', '19:00'],
friday: ['7:00', '20:00'],
saturday: ['8:00', '20:00'],
sunday: ['8:00', '18:00']
},
tags: ['coffee', 'pastries', 'breakfast', 'lunch']
}
])
.select()
.single();
if (testProfileError) {
console.error('Error creating test profile:', testProfileError);
}
} catch (error) {
console.error('Error initializing tables:', error);
}
}
// Call initialization on startup
initializeTables();
// Schema for business profile updates
const profileUpdateSchema = z.object({
social_links: z.record(z.string()).optional(),
hours_of_operation: z.record(z.array(z.string())).optional(),
additional_photos: z.array(z.string()).optional(),
tags: z.array(z.string()).optional(),
});
// Schema for claim submissions
const claimSubmissionSchema = z.object({
business_id: z.string(),
proof_documents: z.array(z.string()),
notes: z.string().optional(),
});
// Get business profile
router.get('/:businessId', async (req, res) => {
try {
const { businessId } = req.params;
// Get business details and profile
const { data: business, error: businessError } = await supabase
.from('businesses')
.select(`
*,
business_profiles (*)
`)
.eq('id', businessId)
.single();
if (businessError) throw businessError;
if (!business) {
return res.status(404).json({ error: 'Business not found' });
}
res.json(business);
} catch (error) {
console.error('Error fetching business profile:', error);
res.status(500).json({ error: 'Failed to fetch business profile' });
}
});
// Update business profile (requires authentication)
router.patch('/:businessId/profile', authenticateUser, async (req, res) => {
try {
const { businessId } = req.params;
if (!req.user) {
return res.status(401).json({ error: 'User not authenticated' });
}
const userId = req.user.id;
const updates = profileUpdateSchema.parse(req.body);
// Check if user owns this profile
const { data: profile } = await supabase
.from('business_profiles')
.select('claimed_by, verification_status')
.eq('business_id', businessId)
.single();
if (!profile || profile.claimed_by !== userId || profile.verification_status !== 'verified') {
return res.status(403).json({ error: 'Not authorized to update this profile' });
}
// Update profile
const { error: updateError } = await supabase
.from('business_profiles')
.update({
...updates,
updated_at: new Date().toISOString(),
})
.eq('business_id', businessId);
if (updateError) throw updateError;
res.json({ message: 'Profile updated successfully' });
} catch (error) {
console.error('Error updating business profile:', error);
res.status(500).json({ error: 'Failed to update profile' });
}
});
// Submit a claim for a business
router.post('/claim', authenticateUser, async (req, res) => {
try {
if (!req.user) {
return res.status(401).json({ error: 'User not authenticated' });
}
const userId = req.user.id;
const claim = claimSubmissionSchema.parse(req.body);
// Check if business exists
const { data: business } = await supabase
.from('businesses')
.select('id')
.eq('id', claim.business_id)
.single();
if (!business) {
return res.status(404).json({ error: 'Business not found' });
}
// Check if business is already claimed
const { data: existingProfile } = await supabase
.from('business_profiles')
.select('claimed_by')
.eq('business_id', claim.business_id)
.single();
if (existingProfile?.claimed_by) {
return res.status(400).json({ error: 'Business is already claimed' });
}
// Check for existing pending claims
const { data: existingClaim } = await supabase
.from('business_claims')
.select('id')
.eq('business_id', claim.business_id)
.eq('status', 'pending')
.single();
if (existingClaim) {
return res.status(400).json({ error: 'A pending claim already exists for this business' });
}
// Create claim
const { error: claimError } = await supabase
.from('business_claims')
.insert({
business_id: claim.business_id,
user_id: userId,
proof_documents: claim.proof_documents,
notes: claim.notes,
});
if (claimError) throw claimError;
res.json({ message: 'Claim submitted successfully' });
} catch (error) {
console.error('Error submitting business claim:', error);
res.status(500).json({ error: 'Failed to submit claim' });
}
});
// Get claims for a business (admin only)
router.get('/:businessId/claims', authenticateUser, async (req, res) => {
try {
const { businessId } = req.params;
if (!req.user) {
return res.status(401).json({ error: 'User not authenticated' });
}
const userId = req.user.id;
// Check if user is admin
const { data: user } = await supabase
.from('users')
.select('raw_app_meta_data')
.eq('id', userId)
.single();
if (user?.raw_app_meta_data?.role !== 'admin') {
return res.status(403).json({ error: 'Not authorized' });
}
const { data: claims, error } = await supabase
.from('business_claims')
.select(`
*,
user:user_id (
email
)
`)
.eq('business_id', businessId)
.order('submitted_at', { ascending: false });
if (error) throw error;
res.json(claims);
} catch (error) {
console.error('Error fetching business claims:', error);
res.status(500).json({ error: 'Failed to fetch claims' });
}
});
// Review a claim (admin only)
router.post('/claims/:claimId/review', authenticateUser, async (req, res) => {
try {
const { claimId } = req.params;
if (!req.user) {
return res.status(401).json({ error: 'User not authenticated' });
}
const userId = req.user.id;
const { status, notes } = z.object({
status: z.enum(['approved', 'rejected']),
notes: z.string().optional(),
}).parse(req.body);
// Check if user is admin
const { data: user } = await supabase
.from('users')
.select('raw_app_meta_data')
.eq('id', userId)
.single();
if (user?.raw_app_meta_data?.role !== 'admin') {
return res.status(403).json({ error: 'Not authorized' });
}
// Get claim details
const { data: claim } = await supabase
.from('business_claims')
.select('business_id, status')
.eq('id', claimId)
.single();
if (!claim) {
return res.status(404).json({ error: 'Claim not found' });
}
if (claim.status !== 'pending') {
return res.status(400).json({ error: 'Claim has already been reviewed' });
}
// Start a transaction
const { error: updateError } = await supabase.rpc('review_business_claim', {
p_claim_id: claimId,
p_business_id: claim.business_id,
p_user_id: userId,
p_status: status,
p_notes: notes
});
if (updateError) throw updateError;
res.json({ message: 'Claim reviewed successfully' });
} catch (error) {
console.error('Error reviewing business claim:', error);
res.status(500).json({ error: 'Failed to review claim' });
}
});
export default router;

View file

@ -1,160 +1,310 @@
import express from 'express';
import logger from '../utils/logger';
import type { BaseChatModel } from '@langchain/core/language_models/chat_models';
import type { Embeddings } from '@langchain/core/embeddings';
import { ChatOpenAI } from '@langchain/openai';
import {
getAvailableChatModelProviders,
getAvailableEmbeddingModelProviders,
} from '../lib/providers';
import { searchHandlers } from '../websocket/messageHandler';
import { AIMessage, BaseMessage, HumanMessage } from '@langchain/core/messages';
import { MetaSearchAgentType } from '../search/metaSearchAgent';
import { Router, Response as ExpressResponse } from 'express';
import { z } from 'zod';
import fetch from 'node-fetch';
import { Response as FetchResponse } from 'node-fetch';
import { supabase } from '../lib/supabase';
import { env } from '../config/env';
const router = express.Router();
const router = Router();
interface chatModel {
provider: string;
model: string;
customOpenAIBaseURL?: string;
customOpenAIKey?: string;
const searchSchema = z.object({
query: z.string().min(1),
});
interface Business {
id: string;
name: string;
description: string;
website: string;
phone: string | null;
address: string | null;
}
interface embeddingModel {
provider: string;
model: string;
interface SearxResult {
url: string;
title: string;
content: string;
engine: string;
score: number;
}
interface ChatRequestBody {
optimizationMode: 'speed' | 'balanced';
focusMode: string;
chatModel?: chatModel;
embeddingModel?: embeddingModel;
interface SearxResponse {
query: string;
history: Array<[string, string]>;
results: SearxResult[];
}
router.post('/', async (req, res) => {
async function getCachedResults(query: string): Promise<Business[]> {
console.log('Fetching cached results for query:', query);
const normalizedQuery = query.toLowerCase()
.trim()
.replace(/,/g, '') // Remove commas
.replace(/\s+/g, ' '); // Normalize whitespace
const searchTerms = normalizedQuery.split(' ').filter(term => term.length > 0);
console.log('Normalized search terms:', searchTerms);
// First try exact match
const { data: exactMatch } = await supabase
.from('search_cache')
.select('*')
.eq('query', normalizedQuery)
.single();
if (exactMatch) {
console.log('Found exact match in cache');
return exactMatch.results as Business[];
}
// Then try fuzzy search
console.log('Trying fuzzy search with terms:', searchTerms);
const searchConditions = searchTerms.map(term => `query.ilike.%${term}%`);
const { data: cachedResults, error } = await supabase
.from('search_cache')
.select('*')
.or(searchConditions.join(','));
if (error) {
console.error('Error fetching cached results:', error);
return [];
}
if (!cachedResults || cachedResults.length === 0) {
console.log('No cached results found');
return [];
}
console.log(`Found ${cachedResults.length} cached searches`);
// Combine and deduplicate results from all matching searches
const allResults = cachedResults.flatMap(cache => cache.results as Business[]);
const uniqueResults = Array.from(new Map(allResults.map(item => [item.id, item])).values());
console.log(`Combined into ${uniqueResults.length} unique businesses`);
// Sort by relevance to search terms
const sortedResults = uniqueResults.sort((a, b) => {
const aScore = searchTerms.filter(term =>
a.name.toLowerCase().includes(term) ||
a.description.toLowerCase().includes(term)
).length;
const bScore = searchTerms.filter(term =>
b.name.toLowerCase().includes(term) ||
b.description.toLowerCase().includes(term)
).length;
return bScore - aScore;
});
return sortedResults;
}
async function searchSearxNG(query: string): Promise<Business[]> {
console.log('Starting SearxNG search for query:', query);
try {
const body: ChatRequestBody = req.body;
const params = new URLSearchParams({
q: `${query} denver business`,
format: 'json',
language: 'en',
time_range: '',
safesearch: '1',
engines: 'google,bing,duckduckgo'
});
if (!body.focusMode || !body.query) {
return res.status(400).json({ message: 'Missing focus mode or query' });
}
const searchUrl = `${env.SEARXNG_URL}/search?${params.toString()}`;
console.log('Searching SearxNG at URL:', searchUrl);
body.history = body.history || [];
body.optimizationMode = body.optimizationMode || 'balanced';
const history: BaseMessage[] = body.history.map((msg) => {
if (msg[0] === 'human') {
return new HumanMessage({
content: msg[1],
});
} else {
return new AIMessage({
content: msg[1],
});
const response: FetchResponse = await fetch(searchUrl, {
method: 'GET',
headers: {
'Accept': 'application/json',
}
});
const [chatModelProviders, embeddingModelProviders] = await Promise.all([
getAvailableChatModelProviders(),
getAvailableEmbeddingModelProviders(),
]);
const chatModelProvider =
body.chatModel?.provider || Object.keys(chatModelProviders)[0];
const chatModel =
body.chatModel?.model ||
Object.keys(chatModelProviders[chatModelProvider])[0];
const embeddingModelProvider =
body.embeddingModel?.provider || Object.keys(embeddingModelProviders)[0];
const embeddingModel =
body.embeddingModel?.model ||
Object.keys(embeddingModelProviders[embeddingModelProvider])[0];
let llm: BaseChatModel | undefined;
let embeddings: Embeddings | undefined;
if (body.chatModel?.provider === 'custom_openai') {
if (
!body.chatModel?.customOpenAIBaseURL ||
!body.chatModel?.customOpenAIKey
) {
return res
.status(400)
.json({ message: 'Missing custom OpenAI base URL or key' });
}
llm = new ChatOpenAI({
modelName: body.chatModel.model,
openAIApiKey: body.chatModel.customOpenAIKey,
temperature: 0.7,
configuration: {
baseURL: body.chatModel.customOpenAIBaseURL,
},
}) as unknown as BaseChatModel;
} else if (
chatModelProviders[chatModelProvider] &&
chatModelProviders[chatModelProvider][chatModel]
) {
llm = chatModelProviders[chatModelProvider][chatModel]
.model as unknown as BaseChatModel | undefined;
if (!response.ok) {
throw new Error(`SearxNG search failed: ${response.statusText} (${response.status})`);
}
if (
embeddingModelProviders[embeddingModelProvider] &&
embeddingModelProviders[embeddingModelProvider][embeddingModel]
) {
embeddings = embeddingModelProviders[embeddingModelProvider][
embeddingModel
].model as Embeddings | undefined;
const data = await response.json() as SearxResponse;
console.log(`Got ${data.results?.length || 0} raw results from SearxNG`);
console.log('Sample result:', data.results?.[0]);
if (!data.results || data.results.length === 0) {
return [];
}
if (!llm || !embeddings) {
return res.status(400).json({ message: 'Invalid model selected' });
}
const filteredResults = data.results
.filter(result =>
result.title &&
result.url &&
!result.url.includes('yelp.com/search') &&
!result.url.includes('google.com/search') &&
!result.url.includes('bbb.org/search') &&
!result.url.includes('thumbtack.com/search') &&
!result.url.includes('angi.com/search') &&
!result.url.includes('yellowpages.com/search')
);
const searchHandler: MetaSearchAgentType = searchHandlers[body.focusMode];
console.log(`Filtered to ${filteredResults.length} relevant results`);
console.log('Sample filtered result:', filteredResults[0]);
if (!searchHandler) {
return res.status(400).json({ message: 'Invalid focus mode' });
}
const searchTerms = query.toLowerCase().split(' ');
const businesses = filteredResults
.map(result => {
const business = {
id: result.url,
name: cleanBusinessName(result.title),
description: result.content || '',
website: result.url,
phone: extractPhone(result.content || '') || extractPhone(result.title),
address: extractAddress(result.content || '') || extractAddress(result.title),
score: result.score || 0
};
console.log('Processed business:', business);
return business;
})
.filter(business => {
// Check if business name contains any of the search terms
const nameMatches = searchTerms.some(term =>
business.name.toLowerCase().includes(term)
);
// Check if description contains any of the search terms
const descriptionMatches = searchTerms.some(term =>
business.description.toLowerCase().includes(term)
);
return business.name.length > 2 && (nameMatches || descriptionMatches);
})
.sort((a, b) => {
// Score based on how many search terms match the name and description
const aScore = searchTerms.filter(term =>
a.name.toLowerCase().includes(term) ||
a.description.toLowerCase().includes(term)
).length;
const bScore = searchTerms.filter(term =>
b.name.toLowerCase().includes(term) ||
b.description.toLowerCase().includes(term)
).length;
return bScore - aScore;
})
.slice(0, 10);
const emitter = await searchHandler.searchAndAnswer(
body.query,
history,
llm,
embeddings,
body.optimizationMode,
[],
);
console.log(`Transformed into ${businesses.length} business entries`);
return businesses;
} catch (error) {
console.error('SearxNG search error:', error);
return [];
}
}
let message = '';
let sources = [];
async function cacheResults(query: string, results: Business[]): Promise<void> {
if (!results.length) return;
emitter.on('data', (data) => {
const parsedData = JSON.parse(data);
if (parsedData.type === 'response') {
message += parsedData.data;
} else if (parsedData.type === 'sources') {
sources = parsedData.data;
}
});
console.log(`Caching ${results.length} results for query:`, query);
const normalizedQuery = query.toLowerCase().trim();
const { data: existing } = await supabase
.from('search_cache')
.select('id, results')
.eq('query', normalizedQuery)
.single();
emitter.on('end', () => {
res.status(200).json({ message, sources });
});
if (existing) {
console.log('Updating existing cache entry');
// Merge new results with existing ones, removing duplicates
const allResults = [...existing.results, ...results];
const uniqueResults = Array.from(new Map(allResults.map(item => [item.id, item])).values());
emitter.on('error', (data) => {
const parsedData = JSON.parse(data);
res.status(500).json({ message: parsedData.data });
});
} catch (err: any) {
logger.error(`Error in getting search results: ${err.message}`);
res.status(500).json({ message: 'An error has occurred.' });
await supabase
.from('search_cache')
.update({
results: uniqueResults,
updated_at: new Date().toISOString()
})
.eq('id', existing.id);
} else {
console.log('Creating new cache entry');
await supabase
.from('search_cache')
.insert({
query: normalizedQuery,
results,
location: 'denver', // Default location
category: 'business', // Default category
created_at: new Date().toISOString(),
updated_at: new Date().toISOString(),
expires_at: new Date(Date.now() + 7 * 24 * 60 * 60 * 1000).toISOString() // 7 days from now
});
}
}
function cleanBusinessName(title: string): string {
return title
.replace(/^(the\s+)?/i, '')
.replace(/\s*[-|]\s*.+$/i, '')
.replace(/\s*\|.*$/i, '')
.replace(/\s*in\s+denver.*$/i, '')
.replace(/\s*near\s+denver.*$/i, '')
.replace(/\s*-\s*.*denver.*$/i, '')
.trim();
}
function extractPhone(text: string): string | null {
const phoneRegex = /(\+?1?\s*\(?[0-9]{3}\)?[-.\s]?[0-9]{3}[-.\s]?[0-9]{4})/;
const match = text.match(phoneRegex);
return match ? match[1] : null;
}
function extractAddress(text: string): string | null {
const addressRegex = /\d+\s+[A-Za-z0-9\s,]+(?:Street|St|Avenue|Ave|Road|Rd|Boulevard|Blvd|Lane|Ln|Drive|Dr|Way|Court|Ct|Circle|Cir)[,\s]+(?:[A-Za-z\s]+,\s*)?(?:CO|Colorado)[,\s]+\d{5}(?:-\d{4})?/i;
const match = text.match(addressRegex);
return match ? match[0] : null;
}
router.post('/search', async (req, res) => {
try {
console.log('Received search request:', req.body);
const { query } = searchSchema.parse(req.body);
await handleSearch(query, res);
} catch (error) {
console.error('Search error:', error);
res.status(400).json({ error: 'Search failed. Please try again.' });
}
});
// Also support GET requests for easier testing
router.get('/search', async (req, res) => {
try {
const query = req.query.q as string;
if (!query) {
return res.status(400).json({ error: 'Query parameter "q" is required' });
}
console.log('Received search request:', { query });
await handleSearch(query, res);
} catch (error) {
console.error('Search error:', error);
res.status(400).json({ error: 'Search failed. Please try again.' });
}
});
// Helper function to handle search logic
async function handleSearch(query: string, res: ExpressResponse) {
// Get cached results immediately
const cachedResults = await getCachedResults(query);
console.log(`Returning ${cachedResults.length} cached results to client`);
// Send cached results to client
res.json({ results: cachedResults });
// Search for new results in the background
console.log('Starting background search');
searchSearxNG(query).then(async newResults => {
console.log(`Found ${newResults.length} new results from SearxNG`);
if (newResults.length > 0) {
await cacheResults(query, newResults);
}
}).catch(error => {
console.error('Background search error:', error);
});
}
export default router;

21
src/server.ts Normal file
View file

@ -0,0 +1,21 @@
import express from 'express';
import cors from 'cors';
import { env } from './config/env';
import app from './app';
import { HealthCheckService } from './lib/services/healthCheck';
const port = env.PORT || 3000;
// Health check endpoint
app.get('/health', async (req, res) => {
const health = await HealthCheckService.checkHealth();
res.json(health);
});
export function startServer() {
return app.listen(port, () => {
console.log(`Server is running on port ${port}`);
});
}
export default app;

3
src/styles/input.css Normal file
View file

@ -0,0 +1,3 @@
@tailwind base;
@tailwind components;
@tailwind utilities;

102
src/test-supabase.ts Normal file
View file

@ -0,0 +1,102 @@
import { createClient } from '@supabase/supabase-js';
import dotenv from 'dotenv';
// Load environment variables
dotenv.config();
async function testSupabaseConnection() {
console.log('Testing Supabase connection...');
console.log('URL:', process.env.SUPABASE_URL);
console.log('Key length:', process.env.SUPABASE_KEY?.length || 0);
try {
const supabase = createClient(
process.env.SUPABASE_URL!,
process.env.SUPABASE_KEY!,
{
auth: {
autoRefreshToken: true,
persistSession: true
}
}
);
// Test businesses table
console.log('\nTesting businesses table:');
const testBusiness = {
id: 'test_' + Date.now(),
name: 'Test Business',
phone: '123-456-7890',
email: 'test@example.com',
address: '123 Test St',
rating: 5,
website: 'https://test.com',
source: 'test',
description: 'Test description',
latitude: 39.7392,
longitude: -104.9903,
search_count: 1,
created_at: new Date().toISOString()
};
const { error: insertBusinessError } = await supabase
.from('businesses')
.insert([testBusiness])
.select();
if (insertBusinessError) {
console.error('❌ INSERT business error:', insertBusinessError);
} else {
console.log('✅ INSERT business OK');
// Clean up
await supabase.from('businesses').delete().eq('id', testBusiness.id);
}
// Test searches table
console.log('\nTesting searches table:');
const testSearch = {
query: 'test query',
location: 'test location',
results_count: 0,
timestamp: new Date().toISOString()
};
const { error: insertSearchError } = await supabase
.from('searches')
.insert([testSearch])
.select();
if (insertSearchError) {
console.error('❌ INSERT search error:', insertSearchError);
} else {
console.log('✅ INSERT search OK');
}
// Test cache table
console.log('\nTesting cache table:');
const testCache = {
key: 'test_key_' + Date.now(),
value: { test: true },
created_at: new Date().toISOString(),
expires_at: new Date(Date.now() + 3600000).toISOString()
};
const { error: insertCacheError } = await supabase
.from('cache')
.insert([testCache])
.select();
if (insertCacheError) {
console.error('❌ INSERT cache error:', insertCacheError);
} else {
console.log('✅ INSERT cache OK');
// Clean up
await supabase.from('cache').delete().eq('key', testCache.key);
}
} catch (error: any) {
console.error('❌ Unexpected error:', error);
}
}
testSupabaseConnection().catch(console.error);

View file

@ -0,0 +1,139 @@
import { createClient } from '@supabase/supabase-js';
// Mock data type
type MockData = {
businesses: { id: string; name: string };
cache: { key: string; value: { test: boolean } };
};
// Mock Supabase client
jest.mock('@supabase/supabase-js', () => ({
createClient: jest.fn(() => ({
from: jest.fn((table: keyof MockData) => {
const mockData: MockData = {
businesses: { id: 'test_1', name: 'Test Business' },
cache: { key: 'test_key', value: { test: true } }
};
return {
insert: jest.fn(() => ({
select: jest.fn().mockResolvedValue({
data: [mockData[table]],
error: null
})
})),
select: jest.fn(() => ({
eq: jest.fn(() => ({
single: jest.fn().mockResolvedValue({
data: mockData[table],
error: null
}),
gt: jest.fn(() => ({
single: jest.fn().mockResolvedValue({
data: null,
error: null
})
}))
}))
})),
update: jest.fn(() => ({
eq: jest.fn().mockResolvedValue({
error: null
})
})),
delete: jest.fn(() => ({
eq: jest.fn().mockResolvedValue({
error: null
})
}))
};
})
}))
}));
describe('Database Operations', () => {
const supabase = createClient('test-url', 'test-key');
const testBusiness = {
id: `test_${Date.now()}`,
name: 'Test Business',
phone: '(303) 555-1234',
email: 'test@example.com',
address: '123 Test St, Denver, CO 80202',
rating: 5,
website: 'https://test.com',
source: 'test',
description: 'Test description',
location: { lat: 39.7392, lng: -104.9903 },
search_count: 1,
created_at: new Date().toISOString()
};
beforeEach(() => {
jest.clearAllMocks();
});
describe('Business Operations', () => {
it('should insert a business successfully', async () => {
const { data, error } = await supabase
.from('businesses')
.insert([testBusiness])
.select();
expect(error).toBeNull();
expect(data).toBeTruthy();
expect(data![0].name).toBe('Test Business');
});
it('should retrieve a business by id', async () => {
const { data, error } = await supabase
.from('businesses')
.select()
.eq('id', testBusiness.id)
.single();
expect(error).toBeNull();
expect(data).toBeTruthy();
expect(data.name).toBe('Test Business');
});
it('should update a business', async () => {
const { error } = await supabase
.from('businesses')
.update({ name: 'Updated Test Business' })
.eq('id', testBusiness.id);
expect(error).toBeNull();
});
});
describe('Cache Operations', () => {
const testCache = {
key: `test_key_${Date.now()}`,
value: { test: true },
created_at: new Date().toISOString(),
expires_at: new Date(Date.now() + 3600000).toISOString()
};
it('should insert cache entry', async () => {
const { data, error } = await supabase
.from('cache')
.insert([testCache])
.select();
expect(error).toBeNull();
expect(data).toBeTruthy();
});
it('should retrieve cache entry', async () => {
const { data, error } = await supabase
.from('cache')
.select()
.eq('key', testCache.key)
.single();
expect(error).toBeNull();
expect(data.value).toEqual({ test: true });
});
});
});

View file

@ -0,0 +1,92 @@
import { DeepSeekService } from '../../lib/services/deepseekService';
import { Business } from '../../lib/types';
// Mock the DeepSeek service
jest.mock('../../lib/services/deepseekService', () => {
const mockCleanedBusiness = {
name: "Denver's Best Plumbing & Repair",
address: "1234 Main Street, Denver, CO 80202",
phone: "(720) 555-1234",
email: "support@denverplumbing.com",
description: "Professional plumbing services in Denver metro area"
};
return {
DeepSeekService: {
chat: jest.fn().mockResolvedValue(JSON.stringify({
business_info: mockCleanedBusiness
})),
detectBusinessType: jest.fn().mockReturnValue('service'),
sanitizeJsonResponse: jest.fn().mockReturnValue(mockCleanedBusiness),
manualClean: jest.fn().mockReturnValue(mockCleanedBusiness),
cleanBusinessData: jest.fn().mockResolvedValue(mockCleanedBusiness)
}
};
});
describe('DeepSeekService', () => {
describe('cleanBusinessData', () => {
const testBusiness: Business = {
id: 'test_1',
name: "Denver's Best Plumbing & Repair [LLC] (A Family Business)",
address: "Suite 200-B, 1234 Main Street, Denver, Colorado 80202",
phone: "(720) 555-1234",
email: "support@denverplumbing.com",
description: "Professional plumbing services in Denver metro area",
source: 'test',
website: 'https://example.com',
rating: 4.8,
location: { lat: 39.7392, lng: -104.9903 },
openingHours: []
};
beforeEach(() => {
jest.clearAllMocks();
});
it('should clean business name correctly', async () => {
const cleaned = await DeepSeekService.cleanBusinessData(testBusiness);
expect(cleaned.name).not.toMatch(/[\[\]{}()]/);
expect(cleaned.name).toBeTruthy();
});
it('should format phone number correctly', async () => {
const cleaned = await DeepSeekService.cleanBusinessData(testBusiness);
expect(cleaned.phone).toMatch(/^\(\d{3}\) \d{3}-\d{4}$/);
});
it('should clean email address', async () => {
const cleaned = await DeepSeekService.cleanBusinessData(testBusiness);
expect(cleaned.email).not.toMatch(/[\[\]<>()]|mailto:|click|schedule/i);
expect(cleaned.email).toMatch(/^[^\s@]+@[^\s@]+\.[^\s@]+$/);
});
it('should clean description', async () => {
const cleaned = await DeepSeekService.cleanBusinessData(testBusiness);
expect(cleaned.description).not.toMatch(/[\$\d]+%?\s*off|\$/i);
expect(cleaned.description).not.toMatch(/\b(?:call|email|visit|contact|text|www\.|http|@)\b/i);
expect(cleaned.description).not.toMatch(/[📞📧🌐💳☎️📱]/);
expect(cleaned.description).not.toMatch(/#\w+/);
});
});
describe('chat', () => {
it('should return a response from the model', async () => {
const response = await DeepSeekService['chat']([{
role: 'user',
content: 'Test message'
}]);
expect(response).toBeTruthy();
expect(typeof response).toBe('string');
});
it('should handle errors gracefully', async () => {
(DeepSeekService['chat'] as jest.Mock).mockRejectedValueOnce(new Error('Test error'));
await expect(DeepSeekService['chat']([{
role: 'user',
content: 'Test message'
}])).rejects.toThrow('Test error');
});
});
});

View file

@ -0,0 +1,145 @@
import express from 'express';
import request from 'supertest';
import { SearchService } from '../../../lib/services/searchService';
import { Business } from '../../../lib/types';
// Mock SearchService
jest.mock('../../../lib/services/searchService');
describe('API Integration', () => {
let app: express.Application;
const mockBusiness: Business = {
id: 'test_1',
name: "Denver's Best Plumbing",
address: "1234 Main Street, Denver, CO 80202",
phone: "(720) 555-1234",
email: "support@denverplumbing.com",
description: "Professional plumbing services",
source: 'test',
website: 'https://example.com',
rating: 4.8,
location: { lat: 39.7392, lng: -104.9903 },
openingHours: []
};
beforeAll(() => {
app = express();
app.use(express.json());
// Mock SearchService methods
(SearchService.prototype.search as jest.Mock).mockResolvedValue([mockBusiness]);
(SearchService.prototype.getBusinessById as jest.Mock).mockResolvedValue(mockBusiness);
// Add error handling middleware
app.use((err: any, req: express.Request, res: express.Response, next: express.NextFunction) => {
if (err instanceof SyntaxError && 'body' in err) {
return res.status(400).json({ error: 'Invalid JSON' });
}
next(err);
});
// Add routes
app.use('/api', require('../../../routes/api').default);
});
beforeEach(() => {
jest.clearAllMocks();
});
describe('Search Endpoints', () => {
it('should handle search requests', async () => {
const response = await request(app)
.post('/api/search')
.send({
query: 'plumber in Denver',
location: 'Denver, CO'
});
expect(response.status).toBe(200);
expect(response.body).toHaveProperty('results');
expect(Array.isArray(response.body.results)).toBe(true);
expect(response.body.results[0]).toEqual(mockBusiness);
});
it('should handle missing parameters', async () => {
const response = await request(app)
.post('/api/search')
.send({
query: 'plumber in Denver'
// missing location
});
expect(response.status).toBe(400);
expect(response.body).toHaveProperty('error');
});
it('should handle search errors', async () => {
// Mock search error
(SearchService.prototype.search as jest.Mock)
.mockRejectedValueOnce(new Error('Search failed'));
const response = await request(app)
.post('/api/search')
.send({
query: 'plumber in Denver',
location: 'Denver, CO'
});
expect(response.status).toBe(500);
expect(response.body).toHaveProperty('error');
});
});
describe('Business Details Endpoint', () => {
it('should retrieve business details', async () => {
const response = await request(app)
.get('/api/business/test_1');
expect(response.status).toBe(200);
expect(response.body).toEqual(mockBusiness);
});
it('should handle non-existent business', async () => {
// Mock not found
(SearchService.prototype.getBusinessById as jest.Mock)
.mockResolvedValueOnce(null);
const response = await request(app)
.get('/api/business/non_existent');
expect(response.status).toBe(404);
expect(response.body).toHaveProperty('error');
});
});
describe('Error Handling', () => {
it('should handle invalid JSON', async () => {
const response = await request(app)
.post('/api/search')
.set('Content-Type', 'application/json')
.send('{"invalid json"}');
expect(response.status).toBe(400);
expect(response.body).toHaveProperty('error');
expect(response.body.error).toBe('Invalid JSON');
});
it('should handle rate limiting', async () => {
// Mock rate limit error
(SearchService.prototype.search as jest.Mock)
.mockRejectedValueOnce({ response: { status: 429 } });
const response = await request(app)
.post('/api/search')
.send({
query: 'plumber in Denver',
location: 'Denver, CO'
});
expect(response.status).toBe(429);
expect(response.body).toHaveProperty('error');
expect(response.body.error).toBe('Rate limit exceeded');
});
});
});

View file

@ -0,0 +1,162 @@
import { DeepSeekService } from '../../../lib/services/deepseekService';
import { createClient } from '@supabase/supabase-js';
import { SearchService } from '../../../lib/services/searchService';
import { Business } from '../../../lib/types';
// Mock external services
jest.mock('@supabase/supabase-js');
jest.mock('../../../lib/services/deepseekService');
describe('Search Integration', () => {
const mockBusiness: Business = {
id: 'test_1',
name: "Denver's Best Plumbing",
address: "1234 Main Street, Denver, CO 80202",
phone: "(720) 555-1234",
email: "support@denverplumbing.com",
description: "Professional plumbing services",
source: 'test',
website: 'https://example.com',
rating: 4.8,
location: { lat: 39.7392, lng: -104.9903 },
openingHours: []
};
// Mock Supabase responses
const mockSupabase = {
from: jest.fn().mockReturnValue({
insert: jest.fn().mockReturnValue({
select: jest.fn().mockResolvedValue({
data: [mockBusiness],
error: null
})
}),
select: jest.fn().mockReturnValue({
eq: jest.fn().mockReturnValue({
single: jest.fn().mockResolvedValue({
data: null,
error: null
})
})
})
})
};
beforeEach(() => {
jest.clearAllMocks();
(createClient as jest.Mock).mockReturnValue(mockSupabase);
});
describe('Search and Store Flow', () => {
it('should search, clean, and store business data', async () => {
const searchService = new SearchService();
const query = 'plumber in Denver';
const location = 'Denver, CO';
// Mock performSearch to return results
const performSearchSpy = jest.spyOn(searchService as any, 'performSearch')
.mockResolvedValue([mockBusiness]);
// Perform search
const results = await searchService.search(query, location);
// Verify search results
expect(results).toBeTruthy();
expect(Array.isArray(results)).toBe(true);
expect(results[0]).toEqual(mockBusiness);
// Verify cache was checked first
expect(mockSupabase.from).toHaveBeenCalledWith('cache');
// Verify results were cached
expect(mockSupabase.from).toHaveBeenCalledWith('cache');
expect(mockSupabase.from().insert).toHaveBeenCalled();
});
it('should handle search errors gracefully', async () => {
const searchService = new SearchService();
// Mock performSearch to throw error
jest.spyOn(searchService as any, 'performSearch')
.mockRejectedValue(new Error('Search failed'));
await expect(searchService.search('invalid query', 'invalid location'))
.rejects.toThrow('Search failed');
});
it('should use cache when available', async () => {
const searchService = new SearchService();
const query = 'plumber in Denver';
const location = 'Denver, CO';
// Mock cache hit
mockSupabase.from.mockReturnValueOnce({
select: jest.fn().mockReturnValue({
eq: jest.fn().mockReturnValue({
single: jest.fn().mockResolvedValue({
data: { value: [mockBusiness] },
error: null
})
})
})
});
const results = await searchService.search(query, location);
// Verify cache was checked
expect(mockSupabase.from).toHaveBeenCalledWith('cache');
expect(results).toEqual([mockBusiness]);
// Verify performSearch was not called
expect(jest.spyOn(searchService as any, 'performSearch')).not.toHaveBeenCalled();
});
it('should handle rate limiting', async () => {
const searchService = new SearchService();
// Mock performSearch to throw rate limit error
jest.spyOn(searchService as any, 'performSearch')
.mockRejectedValue({ response: { status: 429 } });
const query = 'plumber in Denver';
const location = 'Denver, CO';
await expect(searchService.search(query, location))
.rejects.toThrow('Rate limit exceeded');
});
});
describe('Data Consistency', () => {
it('should maintain data consistency between search and retrieval', async () => {
const searchService = new SearchService();
const query = 'plumber in Denver';
const location = 'Denver, CO';
// Mock performSearch to return results
jest.spyOn(searchService as any, 'performSearch')
.mockResolvedValue([mockBusiness]);
// Perform search
const searchResults = await searchService.search(query, location);
const firstResult = searchResults[0];
// Mock database retrieval
mockSupabase.from.mockReturnValueOnce({
select: jest.fn().mockReturnValue({
eq: jest.fn().mockReturnValue({
single: jest.fn().mockResolvedValue({
data: firstResult,
error: null
})
})
})
});
// Retrieve the same business
const retrieved = await searchService.getBusinessById(firstResult.id);
// Verify data consistency
expect(retrieved).toEqual(firstResult);
});
});
});

22
src/tests/setup.ts Normal file
View file

@ -0,0 +1,22 @@
import dotenv from 'dotenv';
// Load environment variables for testing
dotenv.config({ path: '.env.test' });
// Set default timeout for all tests
jest.setTimeout(10000);
// Global setup
beforeAll(() => {
// Add any global setup here
});
// Global teardown
afterAll(() => {
// Add any global cleanup here
});
// Reset mocks between tests
afterEach(() => {
jest.clearAllMocks();
});

94
src/tests/supabaseTest.ts Normal file
View file

@ -0,0 +1,94 @@
import '../config/env'; // Load env vars first
import { CacheService } from '../lib/services/cacheService';
import type { PostgrestError } from '@supabase/supabase-js';
import { env } from '../config/env';
async function testSupabaseConnection() {
console.log('\n🔍 Testing Supabase Connection...');
console.log('Using Supabase URL:', env.supabase.url);
try {
// Test data
const testData = {
category: 'test_category',
location: 'test_location',
results: [{
name: 'Test Business',
phone: '123-456-7890',
email: 'test@example.com',
address: '123 Test St, Test City, TS 12345',
rating: 95,
website: 'https://test.com',
logo: '',
source: 'test',
description: 'Test business description'
}]
};
console.log('\n1⃣ Testing write operation...');
await CacheService.cacheResults(
testData.category,
testData.location,
testData.results,
env.cache.durationDays
);
console.log('✅ Write successful');
console.log('\n2⃣ Testing read operation...');
const cachedResults = await CacheService.getCachedResults(
testData.category,
testData.location
);
if (cachedResults && cachedResults.length > 0) {
console.log('✅ Read successful');
console.log('\nCached data:', JSON.stringify(cachedResults[0], null, 2));
} else {
throw new Error('No results found in cache');
}
console.log('\n3⃣ Testing update operation...');
const updatedResults = [...testData.results];
updatedResults[0].rating = 98;
await CacheService.updateCache(
testData.category,
testData.location,
updatedResults
);
console.log('✅ Update successful');
console.log('\n✨ All tests passed! Supabase connection is working properly.\n');
} catch (error: unknown) {
console.error('\n❌ Test failed:');
if (error instanceof Error) {
console.error('Error message:', error.message);
// Check if it's a Supabase error by looking at the shape of the error object
const isSupabaseError = (err: any): err is PostgrestError =>
'code' in err && 'details' in err && 'hint' in err && 'message' in err;
if (error.message.includes('connection') || isSupabaseError(error)) {
console.log('\n📋 Troubleshooting steps:');
console.log('1. Check if your SUPABASE_URL and SUPABASE_ANON_KEY are correct in .env');
console.log('2. Verify that the search_cache table exists in your Supabase project');
console.log('3. Check if RLS policies are properly configured');
if (isSupabaseError(error)) {
console.log('\nSupabase error details:');
console.log('Code:', error.code);
console.log('Details:', error.details);
console.log('Hint:', error.hint);
}
}
} else {
console.error('Unknown error:', error);
}
process.exit(1);
}
}
// Run the test
testSupabaseConnection();

43
src/tests/testDeepseek.ts Normal file
View file

@ -0,0 +1,43 @@
import { DeepSeekService } from '../lib/services/deepseekService';
import dotenv from 'dotenv';
dotenv.config();
async function testDeepseekService() {
const service = new DeepSeekService();
try {
console.log('Starting DeepSeek test...');
console.log('Base URL:', process.env.OLLAMA_URL || 'http://localhost:11434');
const testQuery = {
role: "user",
content: "Find plumbers in Denver, CO. You must return exactly 10 results in valid JSON format, sorted by rating from highest to lowest. Each result must include a rating between 1-5 stars. Do not include any comments or explanations in the JSON."
};
console.log('Sending test query:', testQuery);
const response = await service.chat([testQuery]);
console.log('\nTest successful!');
console.log('Parsed response:', JSON.stringify(response, null, 2));
} catch (error) {
console.error('\nTest failed!');
if (error instanceof Error) {
console.error('Error message:', error.message);
console.error('Stack trace:', error.stack);
} else {
console.error('Unknown error:', error);
}
}
}
// Run the test
console.log('=== Starting DeepSeek Service Test ===\n');
testDeepseekService().then(() => {
console.log('\n=== Test Complete ===');
}).catch(error => {
console.error('\n=== Test Failed ===');
console.error(error);
});

47
src/tests/testOllama.ts Normal file
View file

@ -0,0 +1,47 @@
import axios from 'axios';
import dotenv from 'dotenv';
dotenv.config();
async function testOllamaConnection() {
const baseUrl = process.env.OLLAMA_URL || 'http://localhost:11434';
console.log('Testing Ollama connection...');
console.log('Base URL:', baseUrl);
try {
// Simple test request
const response = await axios.post(`${baseUrl}/api/chat`, {
model: 'deepseek-coder:6.7b',
messages: [{
role: 'user',
content: 'Return a simple JSON array with one object: {"test": "success"}'
}],
stream: false
});
console.log('\nResponse received:');
console.log('Status:', response.status);
console.log('Data:', JSON.stringify(response.data, null, 2));
} catch (error) {
console.error('Connection test failed:');
if (axios.isAxiosError(error)) {
console.error('Network error:', error.message);
if (error.response) {
console.error('Response status:', error.response.status);
console.error('Response data:', error.response.data);
}
} else {
console.error('Error:', error);
}
}
}
console.log('=== Starting Ollama Connection Test ===\n');
testOllamaConnection().then(() => {
console.log('\n=== Test Complete ===');
}).catch(error => {
console.error('\n=== Test Failed ===');
console.error(error);
});

26
src/tests/testSearch.ts Normal file
View file

@ -0,0 +1,26 @@
import { searchSearxng } from '../lib/searxng';
async function testSearchEngine() {
try {
console.log('Testing SearxNG connection...');
const results = await searchSearxng('plumbers in Denver', {
engines: ['google', 'bing', 'duckduckgo'],
pageno: 1
});
if (results && results.results && results.results.length > 0) {
console.log('✅ Search successful!');
console.log('Number of results:', results.results.length);
console.log('First result:', results.results[0]);
} else {
console.log('❌ No results found');
}
} catch (error) {
console.error('❌ Search test failed:', error);
console.error('Make sure SearxNG is running on http://localhost:4000');
}
}
testSearchEngine();

28
src/types/business.ts Normal file
View file

@ -0,0 +1,28 @@
export interface Business {
id: string;
name: string;
phone: string;
address: string;
city: string;
state: string;
zip: string;
category: string[];
rating: number;
reviewCount: number;
license?: string;
services: string[];
hours: Record<string, string>;
website?: string;
email?: string;
verified: boolean;
lastUpdated: Date;
}
export interface SearchParams {
location: string;
category?: string;
radius?: number;
minRating?: number;
sortBy?: 'rating' | 'distance' | 'reviewCount';
verified?: boolean;
}

18
src/utils/portCheck.ts Normal file
View file

@ -0,0 +1,18 @@
import net from 'net';
export function isPortAvailable(port: number | string): Promise<boolean> {
return new Promise((resolve) => {
const server = net.createServer();
server.once('error', () => {
resolve(false);
});
server.once('listening', () => {
server.close();
resolve(true);
});
server.listen(port);
});
}