Merge 9f4ae1baac into 7ec201d011
This commit is contained in:
commit
c616072732
103 changed files with 31225 additions and 1370 deletions
38
src/app.ts
38
src/app.ts
|
|
@ -1,38 +1,16 @@
|
|||
import { startWebSocketServer } from './websocket';
|
||||
import express from 'express';
|
||||
import cors from 'cors';
|
||||
import http from 'http';
|
||||
import routes from './routes';
|
||||
import { getPort } from './config';
|
||||
import logger from './utils/logger';
|
||||
|
||||
const port = getPort();
|
||||
import searchRoutes from './routes/search';
|
||||
import businessRoutes from './routes/business';
|
||||
|
||||
const app = express();
|
||||
const server = http.createServer(app);
|
||||
|
||||
const corsOptions = {
|
||||
origin: '*',
|
||||
};
|
||||
|
||||
app.use(cors(corsOptions));
|
||||
// Middleware
|
||||
app.use(cors());
|
||||
app.use(express.json());
|
||||
|
||||
app.use('/api', routes);
|
||||
app.get('/api', (_, res) => {
|
||||
res.status(200).json({ status: 'ok' });
|
||||
});
|
||||
// Routes
|
||||
app.use('/api/search', searchRoutes);
|
||||
app.use('/api/business', businessRoutes);
|
||||
|
||||
server.listen(port, () => {
|
||||
logger.info(`Server is running on port ${port}`);
|
||||
});
|
||||
|
||||
startWebSocketServer(server);
|
||||
|
||||
process.on('uncaughtException', (err, origin) => {
|
||||
logger.error(`Uncaught Exception at ${origin}: ${err}`);
|
||||
});
|
||||
|
||||
process.on('unhandledRejection', (reason, promise) => {
|
||||
logger.error(`Unhandled Rejection at: ${promise}, reason: ${reason}`);
|
||||
});
|
||||
export default app;
|
||||
|
|
|
|||
|
|
@ -77,3 +77,16 @@ export const updateConfig = (config: RecursivePartial<Config>) => {
|
|||
toml.stringify(config),
|
||||
);
|
||||
};
|
||||
|
||||
export const config = {
|
||||
ollama: {
|
||||
url: process.env.OLLAMA_URL || 'http://localhost:11434',
|
||||
model: process.env.OLLAMA_MODEL || 'mistral',
|
||||
options: {
|
||||
temperature: 0.1,
|
||||
top_p: 0.9,
|
||||
timeout: 30000 // 30 seconds timeout
|
||||
}
|
||||
},
|
||||
// ... other config
|
||||
};
|
||||
|
|
|
|||
40
src/config/env.ts
Normal file
40
src/config/env.ts
Normal file
|
|
@ -0,0 +1,40 @@
|
|||
import dotenv from 'dotenv';
|
||||
|
||||
// Load environment variables
|
||||
dotenv.config();
|
||||
|
||||
// Environment configuration
|
||||
const env = {
|
||||
// Supabase Configuration
|
||||
SUPABASE_URL: process.env.SUPABASE_URL || '',
|
||||
SUPABASE_KEY: process.env.SUPABASE_KEY || '',
|
||||
|
||||
// Server Configuration
|
||||
PORT: parseInt(process.env.PORT || '3001', 10),
|
||||
NODE_ENV: process.env.NODE_ENV || 'development',
|
||||
|
||||
// Search Configuration
|
||||
MAX_RESULTS_PER_QUERY: parseInt(process.env.MAX_RESULTS_PER_QUERY || '50', 10),
|
||||
CACHE_DURATION_HOURS: parseInt(process.env.CACHE_DURATION_HOURS || '24', 10),
|
||||
CACHE_DURATION_DAYS: parseInt(process.env.CACHE_DURATION_DAYS || '7', 10),
|
||||
|
||||
// SearxNG Configuration
|
||||
SEARXNG_URL: process.env.SEARXNG_URL || 'http://localhost:4000',
|
||||
|
||||
// Ollama Configuration
|
||||
OLLAMA_URL: process.env.OLLAMA_URL || 'http://localhost:11434',
|
||||
OLLAMA_MODEL: process.env.OLLAMA_MODEL || 'deepseek-coder:6.7b',
|
||||
|
||||
// Hugging Face Configuration
|
||||
HUGGING_FACE_API_KEY: process.env.HUGGING_FACE_API_KEY || ''
|
||||
};
|
||||
|
||||
// Validate required environment variables
|
||||
const requiredEnvVars = ['SUPABASE_URL', 'SUPABASE_KEY', 'SEARXNG_URL'];
|
||||
for (const envVar of requiredEnvVars) {
|
||||
if (!env[envVar as keyof typeof env]) {
|
||||
throw new Error(`Missing required environment variable: ${envVar}`);
|
||||
}
|
||||
}
|
||||
|
||||
export { env };
|
||||
77
src/config/index.ts
Normal file
77
src/config/index.ts
Normal file
|
|
@ -0,0 +1,77 @@
|
|||
import dotenv from 'dotenv';
|
||||
import path from 'path';
|
||||
|
||||
// Load .env file
|
||||
dotenv.config({ path: path.resolve(__dirname, '../../.env') });
|
||||
|
||||
export interface Config {
|
||||
supabase: {
|
||||
url: string;
|
||||
anonKey: string;
|
||||
};
|
||||
server: {
|
||||
port: number;
|
||||
nodeEnv: string;
|
||||
};
|
||||
search: {
|
||||
maxResultsPerQuery: number;
|
||||
cacheDurationHours: number;
|
||||
searxngUrl?: string;
|
||||
};
|
||||
rateLimit: {
|
||||
windowMs: number;
|
||||
maxRequests: number;
|
||||
};
|
||||
security: {
|
||||
corsOrigin: string;
|
||||
jwtSecret: string;
|
||||
};
|
||||
proxy?: {
|
||||
http?: string;
|
||||
https?: string;
|
||||
};
|
||||
logging: {
|
||||
level: string;
|
||||
};
|
||||
}
|
||||
|
||||
const config: Config = {
|
||||
supabase: {
|
||||
url: process.env.SUPABASE_URL || '',
|
||||
anonKey: process.env.SUPABASE_ANON_KEY || '',
|
||||
},
|
||||
server: {
|
||||
port: parseInt(process.env.PORT || '3000', 10),
|
||||
nodeEnv: process.env.NODE_ENV || 'development',
|
||||
},
|
||||
search: {
|
||||
maxResultsPerQuery: parseInt(process.env.MAX_RESULTS_PER_QUERY || '20', 10),
|
||||
cacheDurationHours: parseInt(process.env.CACHE_DURATION_HOURS || '24', 10),
|
||||
searxngUrl: process.env.SEARXNG_URL
|
||||
},
|
||||
rateLimit: {
|
||||
windowMs: parseInt(process.env.RATE_LIMIT_WINDOW_MS || '900000', 10),
|
||||
maxRequests: parseInt(process.env.RATE_LIMIT_MAX_REQUESTS || '100', 10),
|
||||
},
|
||||
security: {
|
||||
corsOrigin: process.env.CORS_ORIGIN || 'http://localhost:3000',
|
||||
jwtSecret: process.env.JWT_SECRET || 'your_jwt_secret_key',
|
||||
},
|
||||
logging: {
|
||||
level: process.env.LOG_LEVEL || 'info',
|
||||
},
|
||||
};
|
||||
|
||||
// Validate required configuration
|
||||
const validateConfig = () => {
|
||||
if (!config.supabase.url) {
|
||||
throw new Error('SUPABASE_URL is required');
|
||||
}
|
||||
if (!config.supabase.anonKey) {
|
||||
throw new Error('SUPABASE_ANON_KEY is required');
|
||||
}
|
||||
};
|
||||
|
||||
validateConfig();
|
||||
|
||||
export { config };
|
||||
24
src/index.ts
Normal file
24
src/index.ts
Normal file
|
|
@ -0,0 +1,24 @@
|
|||
import './config/env'; // Load environment variables first
|
||||
import { startServer } from './server';
|
||||
import { isPortAvailable } from './utils/portCheck';
|
||||
import { testConnection } from './lib/supabase';
|
||||
|
||||
const PORT = process.env.PORT || 3001;
|
||||
|
||||
const init = async () => {
|
||||
if (!await isPortAvailable(PORT)) {
|
||||
console.error(`Port ${PORT} is in use. Please try a different port or free up the current one.`);
|
||||
process.exit(1);
|
||||
}
|
||||
|
||||
// Test Supabase connection
|
||||
const isConnected = await testConnection();
|
||||
if (!isConnected) {
|
||||
console.error('Failed to connect to Supabase. Please check your configuration.');
|
||||
process.exit(1);
|
||||
}
|
||||
|
||||
startServer();
|
||||
};
|
||||
|
||||
init().catch(console.error);
|
||||
116
src/lib/categories.ts
Normal file
116
src/lib/categories.ts
Normal file
|
|
@ -0,0 +1,116 @@
|
|||
export interface Category {
|
||||
id: string;
|
||||
name: string;
|
||||
icon: string;
|
||||
subcategories: SubCategory[];
|
||||
}
|
||||
|
||||
export interface SubCategory {
|
||||
id: string;
|
||||
name: string;
|
||||
}
|
||||
|
||||
export const categories: Category[] = [
|
||||
{
|
||||
id: 'real-estate-pros',
|
||||
name: 'Real Estate Professionals',
|
||||
icon: '🏢',
|
||||
subcategories: [
|
||||
{ id: 'wholesalers', name: 'Real Estate Wholesalers' },
|
||||
{ id: 'agents', name: 'Real Estate Agents' },
|
||||
{ id: 'attorneys', name: 'Real Estate Attorneys' },
|
||||
{ id: 'scouts', name: 'Property Scouts' },
|
||||
{ id: 'brokers', name: 'Real Estate Brokers' },
|
||||
{ id: 'consultants', name: 'Real Estate Consultants' }
|
||||
]
|
||||
},
|
||||
{
|
||||
id: 'legal-title',
|
||||
name: 'Legal & Title Services',
|
||||
icon: '⚖️',
|
||||
subcategories: [
|
||||
{ id: 'title-companies', name: 'Title Companies' },
|
||||
{ id: 'closing-attorneys', name: 'Closing Attorneys' },
|
||||
{ id: 'zoning-consultants', name: 'Zoning Consultants' },
|
||||
{ id: 'probate-specialists', name: 'Probate Specialists' },
|
||||
{ id: 'eviction-specialists', name: 'Eviction Specialists' }
|
||||
]
|
||||
},
|
||||
{
|
||||
id: 'financial',
|
||||
name: 'Financial Services',
|
||||
icon: '💰',
|
||||
subcategories: [
|
||||
{ id: 'hard-money', name: 'Hard Money Lenders' },
|
||||
{ id: 'private-equity', name: 'Private Equity Investors' },
|
||||
{ id: 'mortgage-brokers', name: 'Mortgage Brokers' },
|
||||
{ id: 'tax-advisors', name: 'Tax Advisors' },
|
||||
{ id: 'appraisers', name: 'Appraisers' }
|
||||
]
|
||||
},
|
||||
{
|
||||
id: 'contractors',
|
||||
name: 'Specialist Contractors',
|
||||
icon: '🔨',
|
||||
subcategories: [
|
||||
{ id: 'general', name: 'General Contractors' },
|
||||
{ id: 'plumbers', name: 'Plumbers' },
|
||||
{ id: 'electricians', name: 'Electricians' },
|
||||
{ id: 'hvac', name: 'HVAC Technicians' },
|
||||
{ id: 'roofers', name: 'Roofers' },
|
||||
{ id: 'foundation', name: 'Foundation Specialists' },
|
||||
{ id: 'asbestos', name: 'Asbestos Removal' },
|
||||
{ id: 'mold', name: 'Mold Remediation' }
|
||||
]
|
||||
},
|
||||
{
|
||||
id: 'property-services',
|
||||
name: 'Property Services',
|
||||
icon: '🏠',
|
||||
subcategories: [
|
||||
{ id: 'surveyors', name: 'Surveyors' },
|
||||
{ id: 'inspectors', name: 'Inspectors' },
|
||||
{ id: 'property-managers', name: 'Property Managers' },
|
||||
{ id: 'environmental', name: 'Environmental Consultants' },
|
||||
{ id: 'junk-removal', name: 'Junk Removal Services' },
|
||||
{ id: 'cleaning', name: 'Property Cleaning' }
|
||||
]
|
||||
},
|
||||
{
|
||||
id: 'marketing',
|
||||
name: 'Marketing & Lead Gen',
|
||||
icon: '📢',
|
||||
subcategories: [
|
||||
{ id: 'direct-mail', name: 'Direct Mail Services' },
|
||||
{ id: 'social-media', name: 'Social Media Marketing' },
|
||||
{ id: 'seo', name: 'SEO Specialists' },
|
||||
{ id: 'ppc', name: 'PPC Advertising' },
|
||||
{ id: 'lead-gen', name: 'Lead Generation' },
|
||||
{ id: 'skip-tracing', name: 'Skip Tracing Services' }
|
||||
]
|
||||
},
|
||||
{
|
||||
id: 'data-tech',
|
||||
name: 'Data & Technology',
|
||||
icon: '💻',
|
||||
subcategories: [
|
||||
{ id: 'data-providers', name: 'Property Data Providers' },
|
||||
{ id: 'crm', name: 'CRM Systems' },
|
||||
{ id: 'valuation', name: 'Valuation Tools' },
|
||||
{ id: 'virtual-tours', name: 'Virtual Tour Services' },
|
||||
{ id: 'automation', name: 'Automation Tools' }
|
||||
]
|
||||
},
|
||||
{
|
||||
id: 'specialty',
|
||||
name: 'Specialty Services',
|
||||
icon: '🎯',
|
||||
subcategories: [
|
||||
{ id: 'auction', name: 'Auction Companies' },
|
||||
{ id: 'relocation', name: 'Relocation Services' },
|
||||
{ id: 'staging', name: 'Home Staging' },
|
||||
{ id: 'photography', name: 'Real Estate Photography' },
|
||||
{ id: 'virtual-assistant', name: 'Virtual Assistants' }
|
||||
]
|
||||
}
|
||||
];
|
||||
51
src/lib/db/optOutDb.ts
Normal file
51
src/lib/db/optOutDb.ts
Normal file
|
|
@ -0,0 +1,51 @@
|
|||
import { Database } from 'better-sqlite3';
|
||||
import path from 'path';
|
||||
|
||||
interface OptOutEntry {
|
||||
domain: string;
|
||||
email: string;
|
||||
reason?: string;
|
||||
timestamp: Date;
|
||||
}
|
||||
|
||||
export class OptOutDatabase {
|
||||
private db: Database;
|
||||
|
||||
constructor() {
|
||||
this.db = new Database(path.join(__dirname, '../../../data/optout.db'));
|
||||
this.initializeDatabase();
|
||||
}
|
||||
|
||||
private initializeDatabase() {
|
||||
this.db.exec(`
|
||||
CREATE TABLE IF NOT EXISTS opt_outs (
|
||||
domain TEXT PRIMARY KEY,
|
||||
email TEXT NOT NULL,
|
||||
reason TEXT,
|
||||
timestamp DATETIME DEFAULT CURRENT_TIMESTAMP
|
||||
);
|
||||
CREATE INDEX IF NOT EXISTS idx_domain ON opt_outs(domain);
|
||||
`);
|
||||
}
|
||||
|
||||
async addOptOut(entry: OptOutEntry): Promise<void> {
|
||||
const stmt = this.db.prepare(
|
||||
'INSERT OR REPLACE INTO opt_outs (domain, email, reason, timestamp) VALUES (?, ?, ?, ?)'
|
||||
);
|
||||
stmt.run(entry.domain, entry.email, entry.reason, entry.timestamp.toISOString());
|
||||
}
|
||||
|
||||
isOptedOut(domain: string): boolean {
|
||||
const stmt = this.db.prepare('SELECT 1 FROM opt_outs WHERE domain = ?');
|
||||
return stmt.get(domain) !== undefined;
|
||||
}
|
||||
|
||||
removeOptOut(domain: string): void {
|
||||
const stmt = this.db.prepare('DELETE FROM opt_outs WHERE domain = ?');
|
||||
stmt.run(domain);
|
||||
}
|
||||
|
||||
getOptOutList(): OptOutEntry[] {
|
||||
return this.db.prepare('SELECT * FROM opt_outs').all();
|
||||
}
|
||||
}
|
||||
74
src/lib/db/supabase.ts
Normal file
74
src/lib/db/supabase.ts
Normal file
|
|
@ -0,0 +1,74 @@
|
|||
import { createClient } from '@supabase/supabase-js';
|
||||
import { BusinessData } from '../searxng';
|
||||
import { env } from '../../config/env';
|
||||
|
||||
// Create the Supabase client with validated environment variables
|
||||
export const supabase = createClient(
|
||||
env.supabase.url,
|
||||
env.supabase.anonKey,
|
||||
{
|
||||
auth: {
|
||||
persistSession: false // Since this is a server environment
|
||||
}
|
||||
}
|
||||
);
|
||||
|
||||
// Define the cache record type
|
||||
export interface CacheRecord {
|
||||
id: string;
|
||||
query: string;
|
||||
results: BusinessData[];
|
||||
location: string;
|
||||
category: string;
|
||||
created_at: string;
|
||||
updated_at: string;
|
||||
expires_at: string;
|
||||
}
|
||||
|
||||
// Export database helper functions
|
||||
export async function getCacheEntry(
|
||||
category: string,
|
||||
location: string
|
||||
): Promise<CacheRecord | null> {
|
||||
const { data, error } = await supabase
|
||||
.from('search_cache')
|
||||
.select('*')
|
||||
.eq('category', category.toLowerCase())
|
||||
.eq('location', location.toLowerCase())
|
||||
.gt('expires_at', new Date().toISOString())
|
||||
.order('created_at', { ascending: false })
|
||||
.limit(1)
|
||||
.single();
|
||||
|
||||
if (error) {
|
||||
console.error('Cache lookup failed:', error);
|
||||
return null;
|
||||
}
|
||||
|
||||
return data;
|
||||
}
|
||||
|
||||
export async function saveCacheEntry(
|
||||
category: string,
|
||||
location: string,
|
||||
results: BusinessData[],
|
||||
expiresInDays: number = 7
|
||||
): Promise<void> {
|
||||
const expiresAt = new Date();
|
||||
expiresAt.setDate(expiresAt.getDate() + expiresInDays);
|
||||
|
||||
const { error } = await supabase
|
||||
.from('search_cache')
|
||||
.insert({
|
||||
query: `${category} in ${location}`,
|
||||
category: category.toLowerCase(),
|
||||
location: location.toLowerCase(),
|
||||
results,
|
||||
expires_at: expiresAt.toISOString()
|
||||
});
|
||||
|
||||
if (error) {
|
||||
console.error('Failed to save cache entry:', error);
|
||||
throw error;
|
||||
}
|
||||
}
|
||||
195
src/lib/emailScraper.ts
Normal file
195
src/lib/emailScraper.ts
Normal file
|
|
@ -0,0 +1,195 @@
|
|||
import axios from 'axios';
|
||||
import * as cheerio from 'cheerio';
|
||||
import { Cache } from './utils/cache';
|
||||
import { RateLimiter } from './utils/rateLimiter';
|
||||
import robotsParser from 'robots-parser';
|
||||
|
||||
interface ScrapingResult {
|
||||
emails: string[];
|
||||
phones: string[];
|
||||
addresses: string[];
|
||||
socialLinks: string[];
|
||||
source: string;
|
||||
timestamp: Date;
|
||||
attribution: string;
|
||||
}
|
||||
|
||||
export class EmailScraper {
|
||||
private cache: Cache<ScrapingResult>;
|
||||
private rateLimiter: RateLimiter;
|
||||
private robotsCache = new Map<string, any>();
|
||||
|
||||
constructor(private options = {
|
||||
timeout: 5000,
|
||||
cacheTTL: 60,
|
||||
rateLimit: { windowMs: 60000, maxRequests: 10 }, // More conservative rate limiting
|
||||
userAgent: 'BizSearch/1.0 (+https://your-domain.com/about) - Business Directory Service'
|
||||
}) {
|
||||
this.cache = new Cache<ScrapingResult>(options.cacheTTL);
|
||||
this.rateLimiter = new RateLimiter(options.rateLimit.windowMs, options.rateLimit.maxRequests);
|
||||
}
|
||||
|
||||
private async checkRobotsPermission(url: string): Promise<boolean> {
|
||||
try {
|
||||
const { protocol, host } = new URL(url);
|
||||
const robotsUrl = `${protocol}//${host}/robots.txt`;
|
||||
|
||||
let parser = this.robotsCache.get(host);
|
||||
if (!parser) {
|
||||
const response = await axios.get(robotsUrl);
|
||||
parser = robotsParser(robotsUrl, response.data);
|
||||
this.robotsCache.set(host, parser);
|
||||
}
|
||||
|
||||
return parser.isAllowed(url, this.options.userAgent);
|
||||
} catch (error) {
|
||||
console.warn(`Could not check robots.txt for ${url}:`, error);
|
||||
return true; // Assume allowed if robots.txt is unavailable
|
||||
}
|
||||
}
|
||||
|
||||
async scrapeEmails(url: string): Promise<ScrapingResult> {
|
||||
// Check cache first
|
||||
const cached = this.cache.get(url);
|
||||
if (cached) return cached;
|
||||
|
||||
// Check robots.txt
|
||||
const allowed = await this.checkRobotsPermission(url);
|
||||
if (!allowed) {
|
||||
console.log(`Respecting robots.txt disallow for ${url}`);
|
||||
return {
|
||||
emails: [],
|
||||
phones: [],
|
||||
addresses: [],
|
||||
socialLinks: [],
|
||||
source: url,
|
||||
timestamp: new Date(),
|
||||
attribution: 'Restricted by robots.txt'
|
||||
};
|
||||
}
|
||||
|
||||
// Wait for rate limiting slot
|
||||
await this.rateLimiter.waitForSlot();
|
||||
|
||||
try {
|
||||
const response = await axios.get(url, {
|
||||
timeout: this.options.timeout,
|
||||
headers: {
|
||||
'User-Agent': this.options.userAgent,
|
||||
'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8',
|
||||
}
|
||||
});
|
||||
|
||||
// Check for noindex meta tag
|
||||
const $ = cheerio.load(response.data);
|
||||
if ($('meta[name="robots"][content*="noindex"]').length > 0) {
|
||||
return {
|
||||
emails: [],
|
||||
phones: [],
|
||||
addresses: [],
|
||||
socialLinks: [],
|
||||
source: url,
|
||||
timestamp: new Date(),
|
||||
attribution: 'Respecting noindex directive'
|
||||
};
|
||||
}
|
||||
|
||||
// Only extract contact information from public contact pages or structured data
|
||||
const isContactPage = /contact|about/i.test(url) ||
|
||||
$('h1, h2').text().toLowerCase().includes('contact');
|
||||
|
||||
const result = {
|
||||
emails: new Set<string>(),
|
||||
phones: new Set<string>(),
|
||||
addresses: new Set<string>(),
|
||||
socialLinks: new Set<string>(),
|
||||
source: url,
|
||||
timestamp: new Date(),
|
||||
attribution: `Data from public business listing at ${new URL(url).hostname}`
|
||||
};
|
||||
|
||||
// Extract from structured data (Schema.org)
|
||||
$('script[type="application/ld+json"]').each((_, element) => {
|
||||
try {
|
||||
const data = JSON.parse($(element).html() || '{}');
|
||||
if (data['@type'] === 'LocalBusiness' || data['@type'] === 'Organization') {
|
||||
if (data.email) result.emails.add(data.email.toLowerCase());
|
||||
if (data.telephone) result.phones.add(this.formatPhoneNumber(data.telephone));
|
||||
if (data.address) {
|
||||
const fullAddress = this.formatAddress(data.address);
|
||||
if (fullAddress) result.addresses.add(fullAddress);
|
||||
}
|
||||
}
|
||||
} catch (e) {
|
||||
console.error('Error parsing JSON-LD:', e);
|
||||
}
|
||||
});
|
||||
|
||||
// Only scrape additional info if it's a contact page
|
||||
if (isContactPage) {
|
||||
// Extract clearly marked contact information
|
||||
$('[itemprop="email"], .contact-email, .email').each((_, element) => {
|
||||
const email = $(element).text().trim();
|
||||
if (this.isValidEmail(email)) {
|
||||
result.emails.add(email.toLowerCase());
|
||||
}
|
||||
});
|
||||
|
||||
$('[itemprop="telephone"], .phone, .contact-phone').each((_, element) => {
|
||||
const phone = $(element).text().trim();
|
||||
const formatted = this.formatPhoneNumber(phone);
|
||||
if (formatted) result.phones.add(formatted);
|
||||
});
|
||||
}
|
||||
|
||||
const finalResult = {
|
||||
...result,
|
||||
emails: Array.from(result.emails),
|
||||
phones: Array.from(result.phones),
|
||||
addresses: Array.from(result.addresses),
|
||||
socialLinks: Array.from(result.socialLinks)
|
||||
};
|
||||
|
||||
this.cache.set(url, finalResult);
|
||||
return finalResult;
|
||||
|
||||
} catch (error) {
|
||||
console.error(`Failed to scrape ${url}:`, error);
|
||||
return {
|
||||
emails: [],
|
||||
phones: [],
|
||||
addresses: [],
|
||||
socialLinks: [],
|
||||
source: url,
|
||||
timestamp: new Date(),
|
||||
attribution: 'Error accessing page'
|
||||
};
|
||||
}
|
||||
}
|
||||
|
||||
private isValidEmail(email: string): boolean {
|
||||
return /^[a-zA-Z0-9._%+-]+@[a-zA-Z0-9.-]+\.[a-zA-Z]{2,}$/.test(email);
|
||||
}
|
||||
|
||||
private formatPhoneNumber(phone: string): string {
|
||||
const digits = phone.replace(/\D/g, '');
|
||||
if (digits.length === 10) {
|
||||
return `(${digits.slice(0,3)}) ${digits.slice(3,6)}-${digits.slice(6)}`;
|
||||
}
|
||||
return phone;
|
||||
}
|
||||
|
||||
private formatAddress(address: any): string | null {
|
||||
if (typeof address === 'string') return address;
|
||||
if (typeof address === 'object') {
|
||||
const parts = [
|
||||
address.streetAddress,
|
||||
address.addressLocality,
|
||||
address.addressRegion,
|
||||
address.postalCode
|
||||
].filter(Boolean);
|
||||
if (parts.length > 0) return parts.join(', ');
|
||||
}
|
||||
return null;
|
||||
}
|
||||
}
|
||||
19
src/lib/providers/business/index.ts
Normal file
19
src/lib/providers/business/index.ts
Normal file
|
|
@ -0,0 +1,19 @@
|
|||
import { Business, SearchParams } from '../../../types/business';
|
||||
import { WebScraperProvider } from './webScraper';
|
||||
|
||||
export class BusinessProvider {
|
||||
private scraper: WebScraperProvider;
|
||||
|
||||
constructor() {
|
||||
this.scraper = new WebScraperProvider();
|
||||
}
|
||||
|
||||
async search(params: SearchParams): Promise<Business[]> {
|
||||
return this.scraper.search(params);
|
||||
}
|
||||
|
||||
async getDetails(businessId: string): Promise<Business | null> {
|
||||
// Implement detailed business lookup using stored data or additional scraping
|
||||
return null;
|
||||
}
|
||||
}
|
||||
111
src/lib/providers/business/webScraper.ts
Normal file
111
src/lib/providers/business/webScraper.ts
Normal file
|
|
@ -0,0 +1,111 @@
|
|||
import { Business, SearchParams } from '../../../types/business';
|
||||
import { searchWeb } from '../search'; // This is Perplexica's existing search function
|
||||
import { parseHTML } from '../utils/parser';
|
||||
|
||||
export class WebScraperProvider {
|
||||
async search(params: SearchParams): Promise<Business[]> {
|
||||
const searchQueries = this.generateQueries(params);
|
||||
const businesses: Business[] = [];
|
||||
|
||||
for (const query of searchQueries) {
|
||||
// Use Perplexica's existing search functionality
|
||||
const results = await searchWeb(query, {
|
||||
maxResults: 20,
|
||||
type: 'general' // or 'news' depending on what we want
|
||||
});
|
||||
|
||||
for (const result of results) {
|
||||
try {
|
||||
const html = await fetch(result.url).then(res => res.text());
|
||||
const businessData = await this.extractBusinessData(html, result.url);
|
||||
if (businessData) {
|
||||
businesses.push(businessData);
|
||||
}
|
||||
} catch (error) {
|
||||
console.error(`Failed to extract data from ${result.url}:`, error);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return this.deduplicateBusinesses(businesses);
|
||||
}
|
||||
|
||||
private generateQueries(params: SearchParams): string[] {
|
||||
const { location, category } = params;
|
||||
return [
|
||||
`${category} in ${location}`,
|
||||
`${category} business ${location}`,
|
||||
`best ${category} near ${location}`,
|
||||
`${category} services ${location} reviews`
|
||||
];
|
||||
}
|
||||
|
||||
private async extractBusinessData(html: string, sourceUrl: string): Promise<Business | null> {
|
||||
const $ = parseHTML(html);
|
||||
|
||||
// Different extraction logic based on source
|
||||
if (sourceUrl.includes('yelp.com')) {
|
||||
return this.extractYelpData($);
|
||||
} else if (sourceUrl.includes('yellowpages.com')) {
|
||||
return this.extractYellowPagesData($);
|
||||
}
|
||||
// ... other source-specific extractors
|
||||
|
||||
return null;
|
||||
}
|
||||
|
||||
private extractYelpData($: any): Business | null {
|
||||
try {
|
||||
return {
|
||||
id: crypto.randomUUID(),
|
||||
name: $('.business-name').text().trim(),
|
||||
phone: $('.phone-number').text().trim(),
|
||||
address: $('.address').text().trim(),
|
||||
city: $('.city').text().trim(),
|
||||
state: $('.state').text().trim(),
|
||||
zip: $('.zip').text().trim(),
|
||||
category: $('.category-str-list').text().split(',').map(s => s.trim()),
|
||||
rating: parseFloat($('.rating').text()),
|
||||
reviewCount: parseInt($('.review-count').text()),
|
||||
services: $('.services-list').text().split(',').map(s => s.trim()),
|
||||
hours: this.extractHours($),
|
||||
website: $('.website-link').attr('href'),
|
||||
verified: false,
|
||||
lastUpdated: new Date()
|
||||
};
|
||||
} catch (error) {
|
||||
return null;
|
||||
}
|
||||
}
|
||||
|
||||
private deduplicateBusinesses(businesses: Business[]): Business[] {
|
||||
// Group by phone number and address to identify duplicates
|
||||
const uniqueBusinesses = new Map<string, Business>();
|
||||
|
||||
for (const business of businesses) {
|
||||
const key = `${business.phone}-${business.address}`.toLowerCase();
|
||||
if (!uniqueBusinesses.has(key)) {
|
||||
uniqueBusinesses.set(key, business);
|
||||
} else {
|
||||
// Merge data if we have additional information
|
||||
const existing = uniqueBusinesses.get(key)!;
|
||||
uniqueBusinesses.set(key, this.mergeBusinessData(existing, business));
|
||||
}
|
||||
}
|
||||
|
||||
return Array.from(uniqueBusinesses.values());
|
||||
}
|
||||
|
||||
private mergeBusinessData(existing: Business, newData: Business): Business {
|
||||
return {
|
||||
...existing,
|
||||
services: [...new Set([...existing.services, ...newData.services])],
|
||||
rating: (existing.rating + newData.rating) / 2,
|
||||
reviewCount: existing.reviewCount + newData.reviewCount,
|
||||
// Keep the most complete data for other fields
|
||||
website: existing.website || newData.website,
|
||||
email: existing.email || newData.email,
|
||||
hours: existing.hours || newData.hours
|
||||
};
|
||||
}
|
||||
}
|
||||
54
src/lib/search.ts
Normal file
54
src/lib/search.ts
Normal file
|
|
@ -0,0 +1,54 @@
|
|||
import axios from 'axios';
|
||||
import { config } from '../config';
|
||||
|
||||
interface SearchOptions {
|
||||
maxResults?: number;
|
||||
type?: 'general' | 'news';
|
||||
engines?: string[];
|
||||
}
|
||||
|
||||
interface SearchResult {
|
||||
url: string;
|
||||
title: string;
|
||||
content: string;
|
||||
score?: number;
|
||||
}
|
||||
|
||||
export async function searchWeb(
|
||||
query: string,
|
||||
options: SearchOptions = {}
|
||||
): Promise<SearchResult[]> {
|
||||
const {
|
||||
maxResults = 20,
|
||||
type = 'general',
|
||||
engines = ['google', 'bing', 'duckduckgo']
|
||||
} = options;
|
||||
|
||||
try {
|
||||
const response = await axios.get(`${config.search.searxngUrl || process.env.SEARXNG_URL}/search`, {
|
||||
params: {
|
||||
q: query,
|
||||
format: 'json',
|
||||
categories: type,
|
||||
engines: engines.join(','),
|
||||
limit: maxResults
|
||||
}
|
||||
});
|
||||
|
||||
if (!response.data || !response.data.results) {
|
||||
console.error('Invalid response from SearxNG:', response.data);
|
||||
return [];
|
||||
}
|
||||
|
||||
return response.data.results.map((result: any) => ({
|
||||
url: result.url,
|
||||
title: result.title,
|
||||
content: result.content || result.snippet || '',
|
||||
score: result.score
|
||||
}));
|
||||
|
||||
} catch (error) {
|
||||
console.error('Search failed:', error);
|
||||
throw error;
|
||||
}
|
||||
}
|
||||
|
|
@ -1,47 +1,313 @@
|
|||
import axios from 'axios';
|
||||
import { getSearxngApiEndpoint } from '../config';
|
||||
import * as cheerio from 'cheerio';
|
||||
import { createWorker } from 'tesseract.js';
|
||||
import { env } from '../config/env';
|
||||
import { OllamaService } from './services/ollamaService';
|
||||
import { BusinessData } from './types';
|
||||
import { db } from './services/databaseService';
|
||||
import { generateBusinessId } from './utils';
|
||||
import { extractContactFromHtml, extractCleanAddress } from './utils/scraper';
|
||||
import { GeocodingService } from './services/geocodingService';
|
||||
import { cleanAddress, formatPhoneNumber, cleanEmail, cleanDescription } from './utils/dataCleanup';
|
||||
import { CleanupService } from './services/cleanupService';
|
||||
|
||||
interface SearxngSearchOptions {
|
||||
categories?: string[];
|
||||
engines?: string[];
|
||||
language?: string;
|
||||
pageno?: number;
|
||||
// Define interfaces used only in this file
|
||||
interface SearchResult {
|
||||
url: string;
|
||||
title: string;
|
||||
content: string;
|
||||
phone?: string;
|
||||
email?: string;
|
||||
address?: string;
|
||||
website?: string;
|
||||
rating?: number;
|
||||
coordinates?: {
|
||||
lat: number;
|
||||
lng: number;
|
||||
};
|
||||
}
|
||||
|
||||
interface SearxngSearchResult {
|
||||
title: string;
|
||||
url: string;
|
||||
img_src?: string;
|
||||
thumbnail_src?: string;
|
||||
thumbnail?: string;
|
||||
content?: string;
|
||||
author?: string;
|
||||
iframe_src?: string;
|
||||
interface ContactInfo {
|
||||
phone?: string;
|
||||
email?: string;
|
||||
address?: string;
|
||||
description?: string;
|
||||
openingHours?: string[];
|
||||
}
|
||||
|
||||
export const searchSearxng = async (
|
||||
query: string,
|
||||
opts?: SearxngSearchOptions,
|
||||
) => {
|
||||
const searxngURL = getSearxngApiEndpoint();
|
||||
// Export the main search function
|
||||
export async function searchBusinesses(
|
||||
query: string,
|
||||
options: { onProgress?: (status: string, progress: number) => void } = {}
|
||||
): Promise<BusinessData[]> {
|
||||
try {
|
||||
console.log('Processing search query:', query);
|
||||
const [searchTerm, location] = query.split(' in ').map(s => s.trim());
|
||||
if (!searchTerm || !location) {
|
||||
throw new Error('Invalid search query format. Use: "search term in location"');
|
||||
}
|
||||
|
||||
const url = new URL(`${searxngURL}/search?format=json`);
|
||||
url.searchParams.append('q', query);
|
||||
options.onProgress?.('Checking cache', 0);
|
||||
|
||||
if (opts) {
|
||||
Object.keys(opts).forEach((key) => {
|
||||
if (Array.isArray(opts[key])) {
|
||||
url.searchParams.append(key, opts[key].join(','));
|
||||
return;
|
||||
}
|
||||
url.searchParams.append(key, opts[key]);
|
||||
});
|
||||
}
|
||||
// Check cache first
|
||||
const cacheKey = `search:${searchTerm}:${location}`;
|
||||
let results = await db.getFromCache(cacheKey);
|
||||
|
||||
if (!results) {
|
||||
// Check database for existing businesses
|
||||
console.log('Searching database for:', searchTerm, 'in', location);
|
||||
const existingBusinesses = await db.searchBusinesses(searchTerm, location);
|
||||
|
||||
// Start search immediately
|
||||
console.log('Starting web search');
|
||||
const searchPromise = performSearch(searchTerm, location, options);
|
||||
|
||||
if (existingBusinesses.length > 0) {
|
||||
console.log(`Found ${existingBusinesses.length} existing businesses`);
|
||||
options.onProgress?.('Retrieved from database', 50);
|
||||
}
|
||||
|
||||
const res = await axios.get(url.toString());
|
||||
// Wait for new results
|
||||
const newResults = await searchPromise;
|
||||
console.log(`Got ${newResults.length} new results from search`);
|
||||
|
||||
// Merge results, removing duplicates by ID
|
||||
const allResults = [...existingBusinesses];
|
||||
for (const result of newResults) {
|
||||
if (!allResults.some(b => b.id === result.id)) {
|
||||
allResults.push(result);
|
||||
}
|
||||
}
|
||||
|
||||
console.log(`Total unique results: ${allResults.length}`);
|
||||
|
||||
// Cache combined results
|
||||
await db.saveToCache(cacheKey, allResults, env.cache.durationHours * 60 * 60 * 1000);
|
||||
|
||||
console.log(`Returning ${allResults.length} total results (${existingBusinesses.length} existing + ${newResults.length} new)`);
|
||||
results = allResults;
|
||||
}
|
||||
|
||||
const results: SearxngSearchResult[] = res.data.results;
|
||||
const suggestions: string[] = res.data.suggestions;
|
||||
// Clean all results using LLM
|
||||
options.onProgress?.('Cleaning data', 75);
|
||||
const cleanedResults = await CleanupService.cleanBusinessRecords(results);
|
||||
|
||||
return { results, suggestions };
|
||||
};
|
||||
options.onProgress?.('Search complete', 100);
|
||||
return cleanedResults;
|
||||
} catch (error) {
|
||||
console.error('Search error:', error);
|
||||
return [];
|
||||
}
|
||||
}
|
||||
|
||||
async function performSearch(
|
||||
searchTerm: string,
|
||||
location: string,
|
||||
options: any
|
||||
): Promise<BusinessData[]> {
|
||||
const queries = [
|
||||
searchTerm + ' ' + location,
|
||||
searchTerm + ' business near ' + location,
|
||||
searchTerm + ' services ' + location,
|
||||
'local ' + searchTerm + ' ' + location
|
||||
];
|
||||
|
||||
options.onProgress?.('Searching multiple sources', 25);
|
||||
|
||||
let allResults: SearchResult[] = [];
|
||||
const seenUrls = new Set<string>();
|
||||
|
||||
for (const q of queries) {
|
||||
try {
|
||||
const response = await axios.get(`${env.searxng.currentUrl}/search`, {
|
||||
params: {
|
||||
q,
|
||||
format: 'json',
|
||||
engines: 'google,google_maps',
|
||||
language: 'en-US',
|
||||
time_range: '',
|
||||
safesearch: 1
|
||||
}
|
||||
});
|
||||
|
||||
if (response.data?.results) {
|
||||
// Deduplicate results
|
||||
const newResults = response.data.results.filter((result: SearchResult) => {
|
||||
if (seenUrls.has(result.url)) {
|
||||
return false;
|
||||
}
|
||||
seenUrls.add(result.url);
|
||||
return true;
|
||||
});
|
||||
|
||||
console.log(`Found ${newResults.length} unique results from ${response.data.results[0]?.engine}`);
|
||||
allResults = allResults.concat(newResults);
|
||||
}
|
||||
} catch (error) {
|
||||
console.error(`Search failed for query "${q}":`, error);
|
||||
}
|
||||
}
|
||||
|
||||
options.onProgress?.('Processing results', 50);
|
||||
|
||||
const filteredResults = allResults.filter(isValidBusinessResult);
|
||||
const processedResults = await processResults(filteredResults, location);
|
||||
|
||||
// Save results to database
|
||||
for (const result of processedResults) {
|
||||
await db.saveBusiness(result).catch(console.error);
|
||||
}
|
||||
|
||||
options.onProgress?.('Search complete', 100);
|
||||
return processedResults;
|
||||
}
|
||||
|
||||
// Add other necessary functions (isValidBusinessResult, processResults, etc.)
|
||||
function isValidBusinessResult(result: SearchResult): boolean {
|
||||
// Skip listing/directory pages and search results
|
||||
const skipPatterns = [
|
||||
'tripadvisor.com',
|
||||
'yelp.com',
|
||||
'opentable.com',
|
||||
'restaurants-for-sale',
|
||||
'guide.michelin.com',
|
||||
'denver.org',
|
||||
'/blog/',
|
||||
'/maps/',
|
||||
'search?',
|
||||
'features/',
|
||||
'/lists/',
|
||||
'reddit.com',
|
||||
'eater.com'
|
||||
];
|
||||
|
||||
if (skipPatterns.some(pattern => result.url.toLowerCase().includes(pattern))) {
|
||||
console.log(`Skipping listing page: ${result.url}`);
|
||||
return false;
|
||||
}
|
||||
|
||||
// Must have a title
|
||||
if (!result.title || result.title.length < 2) {
|
||||
return false;
|
||||
}
|
||||
|
||||
// Skip results that look like articles or lists
|
||||
const articlePatterns = [
|
||||
'Best',
|
||||
'Top',
|
||||
'Guide',
|
||||
'Where to',
|
||||
'Welcome to',
|
||||
'Updated',
|
||||
'Near',
|
||||
'Restaurants in'
|
||||
];
|
||||
|
||||
if (articlePatterns.some(pattern => result.title.includes(pattern))) {
|
||||
console.log(`Skipping article: ${result.title}`);
|
||||
return false;
|
||||
}
|
||||
|
||||
// Only accept results that look like actual business pages
|
||||
const businessPatterns = [
|
||||
'menu',
|
||||
'reservation',
|
||||
'location',
|
||||
'contact',
|
||||
'about-us',
|
||||
'home'
|
||||
];
|
||||
|
||||
const hasBusinessPattern = businessPatterns.some(pattern =>
|
||||
result.url.toLowerCase().includes(pattern) ||
|
||||
result.content.toLowerCase().includes(pattern)
|
||||
);
|
||||
|
||||
if (!hasBusinessPattern) {
|
||||
console.log(`Skipping non-business page: ${result.url}`);
|
||||
return false;
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
async function processResults(results: SearchResult[], location: string): Promise<BusinessData[]> {
|
||||
const processedResults: BusinessData[] = [];
|
||||
|
||||
// Get coordinates for the location
|
||||
const locationGeo = await GeocodingService.geocode(location);
|
||||
const defaultCoords = locationGeo || { lat: 39.7392, lng: -104.9903 };
|
||||
|
||||
for (const result of results) {
|
||||
try {
|
||||
// Extract contact info from webpage
|
||||
const contactInfo = await extractContactFromHtml(result.url);
|
||||
|
||||
// Create initial business record
|
||||
const business: BusinessData = {
|
||||
id: generateBusinessId(result),
|
||||
name: cleanBusinessName(result.title),
|
||||
phone: result.phone || contactInfo.phone || '',
|
||||
email: result.email || contactInfo.email || '',
|
||||
address: result.address || contactInfo.address || '',
|
||||
rating: result.rating || 0,
|
||||
website: result.website || result.url || '',
|
||||
logo: '',
|
||||
source: 'web',
|
||||
description: result.content || contactInfo.description || '',
|
||||
location: defaultCoords,
|
||||
openingHours: contactInfo.openingHours
|
||||
};
|
||||
|
||||
// Clean up the record using LLM
|
||||
const cleanedBusiness = await CleanupService.cleanBusinessRecord(business);
|
||||
|
||||
// Get coordinates for cleaned address
|
||||
if (cleanedBusiness.address) {
|
||||
const addressGeo = await GeocodingService.geocode(cleanedBusiness.address);
|
||||
if (addressGeo) {
|
||||
cleanedBusiness.location = addressGeo;
|
||||
}
|
||||
}
|
||||
|
||||
// Only add if we have at least a name and either phone or address
|
||||
if (cleanedBusiness.name && (cleanedBusiness.phone || cleanedBusiness.address)) {
|
||||
processedResults.push(cleanedBusiness);
|
||||
}
|
||||
|
||||
} catch (error) {
|
||||
console.error(`Error processing result ${result.title}:`, error);
|
||||
}
|
||||
}
|
||||
|
||||
return processedResults;
|
||||
}
|
||||
|
||||
// Helper functions
|
||||
function cleanBusinessName(name: string): string {
|
||||
// Remove common suffixes and prefixes
|
||||
const cleanName = name
|
||||
.replace(/^(The|A|An)\s+/i, '')
|
||||
.replace(/\s+(-|–|—|:).*$/, '')
|
||||
.replace(/\s*\([^)]*\)/g, '')
|
||||
.trim();
|
||||
|
||||
return cleanName;
|
||||
}
|
||||
|
||||
async function getLocationCoordinates(address: string): Promise<{lat: number, lng: number}> {
|
||||
// Implement geocoding here
|
||||
// For now, return default coordinates for Denver
|
||||
return { lat: 39.7392, lng: -104.9903 };
|
||||
}
|
||||
|
||||
async function searchAndUpdateInBackground(searchTerm: string, location: string) {
|
||||
try {
|
||||
const results = await performSearch(searchTerm, location, {});
|
||||
console.log(`Updated ${results.length} businesses in background`);
|
||||
} catch (error) {
|
||||
console.error('Background search error:', error);
|
||||
}
|
||||
}
|
||||
|
||||
// ... rest of the file remains the same
|
||||
|
|
|
|||
111
src/lib/services/businessCrawler.ts
Normal file
111
src/lib/services/businessCrawler.ts
Normal file
|
|
@ -0,0 +1,111 @@
|
|||
import axios from 'axios';
|
||||
import * as cheerio from 'cheerio';
|
||||
import { Cache } from '../utils/cache';
|
||||
import { RateLimiter } from '../utils/rateLimiter';
|
||||
|
||||
interface CrawlResult {
|
||||
mainContent: string;
|
||||
contactInfo: string;
|
||||
aboutInfo: string;
|
||||
structuredData: any;
|
||||
}
|
||||
|
||||
export class BusinessCrawler {
|
||||
private cache: Cache<CrawlResult>;
|
||||
private rateLimiter: RateLimiter;
|
||||
|
||||
constructor() {
|
||||
this.cache = new Cache<CrawlResult>(60); // 1 hour cache
|
||||
this.rateLimiter = new RateLimiter();
|
||||
}
|
||||
|
||||
async crawlBusinessSite(url: string): Promise<CrawlResult> {
|
||||
// Check cache first
|
||||
const cached = this.cache.get(url);
|
||||
if (cached) return cached;
|
||||
|
||||
await this.rateLimiter.waitForSlot();
|
||||
|
||||
try {
|
||||
const mainPage = await this.fetchPage(url);
|
||||
const $ = cheerio.load(mainPage);
|
||||
|
||||
// Get all important URLs
|
||||
const contactUrl = this.findContactPage($, url);
|
||||
const aboutUrl = this.findAboutPage($, url);
|
||||
|
||||
// Crawl additional pages
|
||||
const [contactPage, aboutPage] = await Promise.all([
|
||||
contactUrl ? this.fetchPage(contactUrl) : '',
|
||||
aboutUrl ? this.fetchPage(aboutUrl) : ''
|
||||
]);
|
||||
|
||||
// Extract structured data
|
||||
const structuredData = this.extractStructuredData($);
|
||||
|
||||
const result = {
|
||||
mainContent: $('body').text(),
|
||||
contactInfo: contactPage,
|
||||
aboutInfo: aboutPage,
|
||||
structuredData
|
||||
};
|
||||
|
||||
this.cache.set(url, result);
|
||||
return result;
|
||||
} catch (error) {
|
||||
console.error(`Failed to crawl ${url}:`, error);
|
||||
return {
|
||||
mainContent: '',
|
||||
contactInfo: '',
|
||||
aboutInfo: '',
|
||||
structuredData: {}
|
||||
};
|
||||
}
|
||||
}
|
||||
|
||||
private async fetchPage(url: string): Promise<string> {
|
||||
try {
|
||||
const response = await axios.get(url, {
|
||||
timeout: 10000,
|
||||
headers: {
|
||||
'User-Agent': 'Mozilla/5.0 (compatible; BizSearch/1.0; +http://localhost:3000/about)',
|
||||
}
|
||||
});
|
||||
return response.data;
|
||||
} catch (error) {
|
||||
console.error(`Failed to fetch ${url}:`, error);
|
||||
return '';
|
||||
}
|
||||
}
|
||||
|
||||
private findContactPage($: cheerio.CheerioAPI, baseUrl: string): string | null {
|
||||
const contactLinks = $('a[href*="contact"], a:contains("Contact")');
|
||||
if (contactLinks.length > 0) {
|
||||
const href = contactLinks.first().attr('href');
|
||||
return href ? new URL(href, baseUrl).toString() : null;
|
||||
}
|
||||
return null;
|
||||
}
|
||||
|
||||
private findAboutPage($: cheerio.CheerioAPI, baseUrl: string): string | null {
|
||||
const aboutLinks = $('a[href*="about"], a:contains("About")');
|
||||
if (aboutLinks.length > 0) {
|
||||
const href = aboutLinks.first().attr('href');
|
||||
return href ? new URL(href, baseUrl).toString() : null;
|
||||
}
|
||||
return null;
|
||||
}
|
||||
|
||||
private extractStructuredData($: cheerio.CheerioAPI): any {
|
||||
const structuredData: any[] = [];
|
||||
$('script[type="application/ld+json"]').each((_, element) => {
|
||||
try {
|
||||
const data = JSON.parse($(element).html() || '{}');
|
||||
structuredData.push(data);
|
||||
} catch (error) {
|
||||
console.error('Failed to parse structured data:', error);
|
||||
}
|
||||
});
|
||||
return structuredData;
|
||||
}
|
||||
}
|
||||
71
src/lib/services/cacheService.ts
Normal file
71
src/lib/services/cacheService.ts
Normal file
|
|
@ -0,0 +1,71 @@
|
|||
import { supabase } from '../supabase';
|
||||
import { BusinessData } from '../searxng';
|
||||
|
||||
export class CacheService {
|
||||
static async getCachedResults(category: string, location: string): Promise<BusinessData[] | null> {
|
||||
try {
|
||||
const { data, error } = await supabase
|
||||
.from('search_cache')
|
||||
.select('results')
|
||||
.eq('category', category.toLowerCase())
|
||||
.eq('location', location.toLowerCase())
|
||||
.gt('expires_at', new Date().toISOString())
|
||||
.order('created_at', { ascending: false })
|
||||
.limit(1)
|
||||
.single();
|
||||
|
||||
if (error) throw error;
|
||||
return data ? data.results : null;
|
||||
} catch (error) {
|
||||
console.error('Cache lookup failed:', error);
|
||||
return null;
|
||||
}
|
||||
}
|
||||
|
||||
static async cacheResults(
|
||||
category: string,
|
||||
location: string,
|
||||
results: BusinessData[],
|
||||
expiresInDays: number = 7
|
||||
): Promise<void> {
|
||||
try {
|
||||
const expiresAt = new Date();
|
||||
expiresAt.setDate(expiresAt.getDate() + expiresInDays);
|
||||
|
||||
const { error } = await supabase
|
||||
.from('search_cache')
|
||||
.insert({
|
||||
query: `${category} in ${location}`,
|
||||
category: category.toLowerCase(),
|
||||
location: location.toLowerCase(),
|
||||
results,
|
||||
expires_at: expiresAt.toISOString()
|
||||
});
|
||||
|
||||
if (error) throw error;
|
||||
} catch (error) {
|
||||
console.error('Failed to cache results:', error);
|
||||
}
|
||||
}
|
||||
|
||||
static async updateCache(
|
||||
category: string,
|
||||
location: string,
|
||||
newResults: BusinessData[]
|
||||
): Promise<void> {
|
||||
try {
|
||||
const { error } = await supabase
|
||||
.from('search_cache')
|
||||
.update({
|
||||
results: newResults,
|
||||
updated_at: new Date().toISOString()
|
||||
})
|
||||
.eq('category', category.toLowerCase())
|
||||
.eq('location', location.toLowerCase());
|
||||
|
||||
if (error) throw error;
|
||||
} catch (error) {
|
||||
console.error('Failed to update cache:', error);
|
||||
}
|
||||
}
|
||||
}
|
||||
235
src/lib/services/cleanupService.ts
Normal file
235
src/lib/services/cleanupService.ts
Normal file
|
|
@ -0,0 +1,235 @@
|
|||
import { DeepSeekService } from './deepseekService';
|
||||
import { Business } from '../types';
|
||||
import { db } from './databaseService';
|
||||
|
||||
// Constants for validation and scoring
|
||||
const BATCH_SIZE = 3; // Process businesses in small batches to avoid overwhelming LLM
|
||||
const LLM_TIMEOUT = 30000; // 30 second timeout for LLM requests
|
||||
const MIN_CONFIDENCE_SCORE = 0.7; // Minimum score required to cache results
|
||||
const VALID_EMAIL_REGEX = /^[a-zA-Z0-9._%+-]+@[a-zA-Z0-9.-]+\.[a-zA-Z]{2,}$/;
|
||||
const VALID_PHONE_REGEX = /^\(\d{3}\) \d{3}-\d{4}$/;
|
||||
const VALID_ADDRESS_REGEX = /^\d+.*(?:street|st|avenue|ave|road|rd|boulevard|blvd|lane|ln|drive|dr|court|ct|circle|cir|way|parkway|pkwy|place|pl),?\s+[a-z ]+,\s*[a-z]{2}\s+\d{5}$/i;
|
||||
|
||||
export class CleanupService {
|
||||
/**
|
||||
* Attempts to clean business data using LLM with timeout protection.
|
||||
* Falls back to original data if LLM fails or times out.
|
||||
*/
|
||||
private static async cleanWithLLM(prompt: string, originalBusiness: Business): Promise<string> {
|
||||
try {
|
||||
const timeoutPromise = new Promise((_, reject) => {
|
||||
setTimeout(() => reject(new Error('LLM timeout')), LLM_TIMEOUT);
|
||||
});
|
||||
|
||||
const llmPromise = DeepSeekService.chat([{
|
||||
role: 'user',
|
||||
content: prompt
|
||||
}]);
|
||||
|
||||
const response = await Promise.race([llmPromise, timeoutPromise]);
|
||||
return (response as string).trim();
|
||||
} catch (error) {
|
||||
console.error('LLM cleanup error:', error);
|
||||
// On timeout, return the original values
|
||||
return `
|
||||
Address: ${originalBusiness.address}
|
||||
Phone: ${originalBusiness.phone}
|
||||
Email: ${originalBusiness.email}
|
||||
Description: ${originalBusiness.description}
|
||||
`;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Calculates a confidence score (0-1) for the cleaned business data.
|
||||
* Score is based on:
|
||||
* - Valid email format (0.25)
|
||||
* - Valid phone format (0.25)
|
||||
* - Valid address format (0.25)
|
||||
* - Description quality (0.25)
|
||||
*/
|
||||
private static calculateConfidenceScore(business: Business): number {
|
||||
let score = 0;
|
||||
|
||||
// Valid email adds 0.25
|
||||
if (business.email && VALID_EMAIL_REGEX.test(business.email)) {
|
||||
score += 0.25;
|
||||
}
|
||||
|
||||
// Valid phone adds 0.25
|
||||
if (business.phone && VALID_PHONE_REGEX.test(business.phone)) {
|
||||
score += 0.25;
|
||||
}
|
||||
|
||||
// Valid address adds 0.25
|
||||
if (business.address && VALID_ADDRESS_REGEX.test(business.address)) {
|
||||
score += 0.25;
|
||||
}
|
||||
|
||||
// Description quality checks (0.25 max)
|
||||
if (business.description) {
|
||||
// Length check (0.1)
|
||||
if (business.description.length > 30 && business.description.length < 200) {
|
||||
score += 0.1;
|
||||
}
|
||||
|
||||
// Relevance check (0.1)
|
||||
const businessType = this.getBusinessType(business.name);
|
||||
if (business.description.toLowerCase().includes(businessType)) {
|
||||
score += 0.1;
|
||||
}
|
||||
|
||||
// No HTML/markdown (0.05)
|
||||
if (!/[<>[\]()]/.test(business.description)) {
|
||||
score += 0.05;
|
||||
}
|
||||
}
|
||||
|
||||
return score;
|
||||
}
|
||||
|
||||
/**
|
||||
* Determines the type of business based on name keywords.
|
||||
* Used for validating and generating descriptions.
|
||||
*/
|
||||
private static getBusinessType(name: string): string {
|
||||
const types = [
|
||||
'restaurant', 'plumber', 'electrician', 'cafe', 'bar',
|
||||
'salon', 'shop', 'store', 'service'
|
||||
];
|
||||
|
||||
const nameLower = name.toLowerCase();
|
||||
return types.find(type => nameLower.includes(type)) || 'business';
|
||||
}
|
||||
|
||||
/**
|
||||
* Parses LLM response into structured business data.
|
||||
* Expects format: "field: value" for each line.
|
||||
*/
|
||||
private static parseResponse(response: string): Partial<Business> {
|
||||
const cleaned: Partial<Business> = {};
|
||||
const lines = response.split('\n');
|
||||
|
||||
for (const line of lines) {
|
||||
const [field, ...values] = line.split(':');
|
||||
const value = values.join(':').trim();
|
||||
|
||||
switch (field.toLowerCase().trim()) {
|
||||
case 'address':
|
||||
cleaned.address = value;
|
||||
break;
|
||||
case 'phone':
|
||||
cleaned.phone = value;
|
||||
break;
|
||||
case 'email':
|
||||
cleaned.email = value;
|
||||
break;
|
||||
case 'description':
|
||||
cleaned.description = value;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
return cleaned;
|
||||
}
|
||||
|
||||
/**
|
||||
* Applies validation rules and cleaning to each field.
|
||||
* - Standardizes formats
|
||||
* - Removes invalid data
|
||||
* - Ensures consistent formatting
|
||||
*/
|
||||
private static validateAndClean(business: Business): Business {
|
||||
const cleaned = { ...business };
|
||||
|
||||
// Email validation and cleaning
|
||||
if (cleaned.email) {
|
||||
cleaned.email = cleaned.email
|
||||
.toLowerCase()
|
||||
.replace(/\[|\]|\(mailto:.*?\)/g, '')
|
||||
.replace(/^\d+-\d+/, '')
|
||||
.trim();
|
||||
|
||||
if (!VALID_EMAIL_REGEX.test(cleaned.email) ||
|
||||
['none', 'n/a', 'union office', ''].includes(cleaned.email.toLowerCase())) {
|
||||
cleaned.email = '';
|
||||
}
|
||||
}
|
||||
|
||||
// Phone validation and cleaning
|
||||
if (cleaned.phone) {
|
||||
const digits = cleaned.phone.replace(/\D/g, '');
|
||||
if (digits.length === 10) {
|
||||
cleaned.phone = `(${digits.slice(0,3)}) ${digits.slice(3,6)}-${digits.slice(6)}`;
|
||||
} else {
|
||||
cleaned.phone = '';
|
||||
}
|
||||
}
|
||||
|
||||
// Address validation and cleaning
|
||||
if (cleaned.address) {
|
||||
cleaned.address = cleaned.address
|
||||
.replace(/^.*?(?=\d|[A-Z])/s, '')
|
||||
.replace(/^(Sure!.*?:|The business.*?:|.*?address.*?:)(?:\s*\\n)*\s*/si, '')
|
||||
.replace(/\s+/g, ' ')
|
||||
.trim();
|
||||
|
||||
// Standardize state abbreviations
|
||||
cleaned.address = cleaned.address.replace(/\b(Colorado|Colo|Col)\b/gi, 'CO');
|
||||
}
|
||||
|
||||
// Description validation and cleaning
|
||||
if (cleaned.description) {
|
||||
cleaned.description = cleaned.description
|
||||
.replace(/\$\d+(\.\d{2})?/g, '') // Remove prices
|
||||
.replace(/\b(call|email|website|click|visit)\b.*$/i, '') // Remove calls to action
|
||||
.replace(/\s+/g, ' ')
|
||||
.trim();
|
||||
|
||||
const businessType = this.getBusinessType(cleaned.name);
|
||||
if (businessType !== 'business' &&
|
||||
!cleaned.description.toLowerCase().includes(businessType)) {
|
||||
cleaned.description = `${businessType.charAt(0).toUpperCase() + businessType.slice(1)} services in the Denver area.`;
|
||||
}
|
||||
}
|
||||
|
||||
return cleaned;
|
||||
}
|
||||
|
||||
static async cleanBusinessRecord(business: Business): Promise<Business> {
|
||||
// Check cache first
|
||||
const cacheKey = `clean:${business.id}`;
|
||||
const cached = await db.getFromCache(cacheKey);
|
||||
if (cached) {
|
||||
console.log('Using cached clean data for:', business.name);
|
||||
return cached;
|
||||
}
|
||||
|
||||
// Clean using DeepSeek
|
||||
const cleaned = await DeepSeekService.cleanBusinessData(business);
|
||||
const validated = this.validateAndClean({ ...business, ...cleaned });
|
||||
|
||||
// Only cache if confidence score is high enough
|
||||
const confidence = this.calculateConfidenceScore(validated);
|
||||
if (confidence >= MIN_CONFIDENCE_SCORE) {
|
||||
await db.saveToCache(cacheKey, validated, 24 * 60 * 60 * 1000);
|
||||
}
|
||||
|
||||
return validated;
|
||||
}
|
||||
|
||||
static async cleanBusinessRecords(businesses: Business[]): Promise<Business[]> {
|
||||
const cleanedBusinesses: Business[] = [];
|
||||
|
||||
// Process in batches
|
||||
for (let i = 0; i < businesses.length; i += BATCH_SIZE) {
|
||||
const batch = businesses.slice(i, i + BATCH_SIZE);
|
||||
const cleanedBatch = await Promise.all(
|
||||
batch.map(business => this.cleanBusinessRecord(business))
|
||||
);
|
||||
cleanedBusinesses.push(...cleanedBatch);
|
||||
}
|
||||
|
||||
return cleanedBusinesses;
|
||||
}
|
||||
}
|
||||
107
src/lib/services/dataValidation.ts
Normal file
107
src/lib/services/dataValidation.ts
Normal file
|
|
@ -0,0 +1,107 @@
|
|||
import { OllamaService } from './ollamaService';
|
||||
|
||||
interface ValidatedBusinessData {
|
||||
name: string;
|
||||
phone: string;
|
||||
email: string;
|
||||
address: string;
|
||||
description: string;
|
||||
hours?: string;
|
||||
isValid: boolean;
|
||||
}
|
||||
|
||||
export class DataValidationService {
|
||||
private ollama: OllamaService;
|
||||
|
||||
constructor() {
|
||||
this.ollama = new OllamaService();
|
||||
}
|
||||
|
||||
async validateAndCleanData(rawText: string): Promise<ValidatedBusinessData> {
|
||||
try {
|
||||
const prompt = `
|
||||
You are a business data validation expert. Extract and validate business information from the following text.
|
||||
Return ONLY a JSON object with the following format, nothing else:
|
||||
{
|
||||
"name": "verified business name",
|
||||
"phone": "formatted phone number or N/A",
|
||||
"email": "verified email address or N/A",
|
||||
"address": "verified physical address or N/A",
|
||||
"description": "short business description",
|
||||
"hours": "business hours if available",
|
||||
"isValid": boolean
|
||||
}
|
||||
|
||||
Rules:
|
||||
1. Phone numbers should be in (XXX) XXX-XXXX format
|
||||
2. Addresses should be properly formatted with street, city, state, zip
|
||||
3. Remove any irrelevant text from descriptions
|
||||
4. Set isValid to true only if name and at least one contact method is found
|
||||
5. Clean up any obvious formatting issues
|
||||
6. Validate email addresses for proper format
|
||||
|
||||
Text to analyze:
|
||||
${rawText}
|
||||
`;
|
||||
|
||||
const response = await this.ollama.generateResponse(prompt);
|
||||
|
||||
try {
|
||||
// Find the JSON object in the response
|
||||
const jsonMatch = response.match(/\{[\s\S]*\}/);
|
||||
if (!jsonMatch) {
|
||||
throw new Error('No JSON found in response');
|
||||
}
|
||||
|
||||
const result = JSON.parse(jsonMatch[0]);
|
||||
return this.validateResult(result);
|
||||
} catch (parseError) {
|
||||
console.error('Failed to parse Ollama response:', parseError);
|
||||
throw parseError;
|
||||
}
|
||||
} catch (error) {
|
||||
console.error('Data validation failed:', error);
|
||||
return {
|
||||
name: 'Unknown',
|
||||
phone: 'N/A',
|
||||
email: 'N/A',
|
||||
address: 'N/A',
|
||||
description: '',
|
||||
hours: '',
|
||||
isValid: false
|
||||
};
|
||||
}
|
||||
}
|
||||
|
||||
private validateResult(result: any): ValidatedBusinessData {
|
||||
// Ensure all required fields are present
|
||||
const validated: ValidatedBusinessData = {
|
||||
name: this.cleanField(result.name) || 'Unknown',
|
||||
phone: this.formatPhone(result.phone) || 'N/A',
|
||||
email: this.cleanField(result.email) || 'N/A',
|
||||
address: this.cleanField(result.address) || 'N/A',
|
||||
description: this.cleanField(result.description) || '',
|
||||
hours: this.cleanField(result.hours),
|
||||
isValid: Boolean(result.isValid)
|
||||
};
|
||||
|
||||
return validated;
|
||||
}
|
||||
|
||||
private cleanField(value: any): string {
|
||||
if (!value || typeof value !== 'string') return '';
|
||||
return value.trim().replace(/\s+/g, ' ');
|
||||
}
|
||||
|
||||
private formatPhone(phone: string): string {
|
||||
if (!phone || phone === 'N/A') return 'N/A';
|
||||
|
||||
// Extract digits
|
||||
const digits = phone.replace(/\D/g, '');
|
||||
if (digits.length === 10) {
|
||||
return `(${digits.slice(0,3)}) ${digits.slice(3,6)}-${digits.slice(6)}`;
|
||||
}
|
||||
|
||||
return phone;
|
||||
}
|
||||
}
|
||||
80
src/lib/services/databaseService.ts
Normal file
80
src/lib/services/databaseService.ts
Normal file
|
|
@ -0,0 +1,80 @@
|
|||
import { createClient } from '@supabase/supabase-js';
|
||||
import { Business } from '../types';
|
||||
import env from '../../config/env';
|
||||
|
||||
interface PartialBusiness {
|
||||
name: string;
|
||||
address: string;
|
||||
phone: string;
|
||||
description: string;
|
||||
website?: string;
|
||||
rating?: number;
|
||||
source?: string;
|
||||
location?: {
|
||||
lat: number;
|
||||
lng: number;
|
||||
};
|
||||
}
|
||||
|
||||
export class DatabaseService {
|
||||
private supabase;
|
||||
|
||||
constructor() {
|
||||
this.supabase = createClient(env.SUPABASE_URL, env.SUPABASE_KEY);
|
||||
}
|
||||
|
||||
async saveBusiness(business: PartialBusiness): Promise<Business> {
|
||||
const { data, error } = await this.supabase
|
||||
.from('businesses')
|
||||
.upsert({
|
||||
name: business.name,
|
||||
address: business.address,
|
||||
phone: business.phone,
|
||||
description: business.description,
|
||||
website: business.website,
|
||||
source: business.source || 'deepseek',
|
||||
rating: business.rating || 4.5,
|
||||
location: business.location ? `(${business.location.lng},${business.location.lat})` : '(0,0)'
|
||||
})
|
||||
.select()
|
||||
.single();
|
||||
|
||||
if (error) {
|
||||
console.error('Error saving business:', error);
|
||||
throw new Error('Failed to save business');
|
||||
}
|
||||
|
||||
return data;
|
||||
}
|
||||
|
||||
async findBusinessesByQuery(query: string, location: string): Promise<Business[]> {
|
||||
const { data, error } = await this.supabase
|
||||
.from('businesses')
|
||||
.select('*')
|
||||
.or(`name.ilike.%${query}%,description.ilike.%${query}%`)
|
||||
.ilike('address', `%${location}%`)
|
||||
.order('rating', { ascending: false });
|
||||
|
||||
if (error) {
|
||||
console.error('Error finding businesses:', error);
|
||||
throw new Error('Failed to find businesses');
|
||||
}
|
||||
|
||||
return data || [];
|
||||
}
|
||||
|
||||
async getBusinessById(id: string): Promise<Business | null> {
|
||||
const { data, error } = await this.supabase
|
||||
.from('businesses')
|
||||
.select('*')
|
||||
.eq('id', id)
|
||||
.single();
|
||||
|
||||
if (error) {
|
||||
console.error('Error getting business:', error);
|
||||
return null;
|
||||
}
|
||||
|
||||
return data;
|
||||
}
|
||||
}
|
||||
285
src/lib/services/deepseekService.ts
Normal file
285
src/lib/services/deepseekService.ts
Normal file
|
|
@ -0,0 +1,285 @@
|
|||
import axios from 'axios';
|
||||
import EventEmitter from 'events';
|
||||
import { Business } from '../types';
|
||||
|
||||
interface PartialBusiness {
|
||||
name: string;
|
||||
address: string;
|
||||
phone: string;
|
||||
description: string;
|
||||
website?: string;
|
||||
rating?: number;
|
||||
}
|
||||
|
||||
export class DeepSeekService extends EventEmitter {
|
||||
private readonly baseUrl: string;
|
||||
private readonly model: string;
|
||||
|
||||
constructor() {
|
||||
super();
|
||||
this.baseUrl = process.env.OLLAMA_URL || 'http://localhost:11434';
|
||||
this.model = process.env.OLLAMA_MODEL || 'deepseek-coder:6.7b';
|
||||
console.log('DeepSeekService initialized with:', {
|
||||
baseUrl: this.baseUrl,
|
||||
model: this.model
|
||||
});
|
||||
}
|
||||
|
||||
async streamChat(messages: any[], onResult: (business: PartialBusiness) => Promise<void>): Promise<void> {
|
||||
try {
|
||||
console.log('\nStarting streaming chat request...');
|
||||
|
||||
// Enhanced system prompt with more explicit instructions
|
||||
const enhancedMessages = [
|
||||
{
|
||||
role: "system",
|
||||
content: `You are a business search assistant powered by Deepseek Coder. Your task is to generate sample business listings in JSON format.
|
||||
|
||||
When asked about businesses in a location, return business listings one at a time in this exact JSON format:
|
||||
|
||||
\`\`\`json
|
||||
{
|
||||
"name": "Example Plumbing Co",
|
||||
"address": "123 Main St, Denver, CO 80202",
|
||||
"phone": "(303) 555-0123",
|
||||
"description": "Licensed plumbing contractor specializing in residential and commercial services",
|
||||
"website": "https://exampleplumbing.com",
|
||||
"rating": 4.8
|
||||
}
|
||||
\`\`\`
|
||||
|
||||
Important rules:
|
||||
1. Return ONE business at a time in JSON format
|
||||
2. Generate realistic but fictional business data
|
||||
3. Use proper formatting for phone numbers and addresses
|
||||
4. Include ratings from 1-5 stars (can use decimals)
|
||||
5. When sorting by rating, return highest rated first
|
||||
6. Make each business unique with different names, addresses, and phone numbers
|
||||
7. Keep descriptions concise and professional
|
||||
8. Use realistic website URLs based on business names
|
||||
9. Return exactly the number of businesses requested`
|
||||
},
|
||||
...messages
|
||||
];
|
||||
|
||||
console.log('Sending streaming request to Ollama with messages:', JSON.stringify(enhancedMessages, null, 2));
|
||||
|
||||
const response = await axios.post(`${this.baseUrl}/api/chat`, {
|
||||
model: this.model,
|
||||
messages: enhancedMessages,
|
||||
stream: true,
|
||||
temperature: 0.7,
|
||||
max_tokens: 1000,
|
||||
system: "You are a business search assistant that returns one business at a time in JSON format."
|
||||
}, {
|
||||
responseType: 'stream'
|
||||
});
|
||||
|
||||
let currentJson = '';
|
||||
response.data.on('data', async (chunk: Buffer) => {
|
||||
const text = chunk.toString();
|
||||
currentJson += text;
|
||||
|
||||
// Try to find and process complete JSON objects
|
||||
try {
|
||||
const business = await this.extractNextBusiness(currentJson);
|
||||
if (business) {
|
||||
currentJson = ''; // Reset for next business
|
||||
await onResult(business);
|
||||
}
|
||||
} catch (error) {
|
||||
// Continue collecting more data if JSON is incomplete
|
||||
console.debug('Collecting more data for complete JSON');
|
||||
}
|
||||
});
|
||||
|
||||
return new Promise((resolve, reject) => {
|
||||
response.data.on('end', () => resolve());
|
||||
response.data.on('error', (error: Error) => reject(error));
|
||||
});
|
||||
|
||||
} catch (error) {
|
||||
console.error('\nDeepseek streaming chat error:', error);
|
||||
if (error instanceof Error) {
|
||||
console.error('Error stack:', error.stack);
|
||||
throw new Error(`AI model streaming error: ${error.message}`);
|
||||
}
|
||||
throw new Error('Failed to get streaming response from AI model');
|
||||
}
|
||||
}
|
||||
|
||||
private async extractNextBusiness(text: string): Promise<PartialBusiness | null> {
|
||||
// Try to find a complete JSON object
|
||||
const jsonMatch = text.match(/\{[^{]*\}/);
|
||||
if (!jsonMatch) return null;
|
||||
|
||||
try {
|
||||
const jsonStr = jsonMatch[0];
|
||||
const business = JSON.parse(jsonStr);
|
||||
|
||||
// Validate required fields
|
||||
if (!business.name || !business.address || !business.phone || !business.description) {
|
||||
return null;
|
||||
}
|
||||
|
||||
return business;
|
||||
} catch (e) {
|
||||
return null;
|
||||
}
|
||||
}
|
||||
|
||||
async chat(messages: any[]): Promise<any> {
|
||||
try {
|
||||
console.log('\nStarting chat request...');
|
||||
|
||||
// Enhanced system prompt with more explicit instructions
|
||||
const enhancedMessages = [
|
||||
{
|
||||
role: "system",
|
||||
content: `You are a business search assistant powered by Deepseek Coder. Your task is to generate sample business listings in JSON format.
|
||||
|
||||
When asked about businesses in a location, return business listings in this exact JSON format, with no additional text or comments:
|
||||
|
||||
\`\`\`json
|
||||
[
|
||||
{
|
||||
"name": "Example Plumbing Co",
|
||||
"address": "123 Main St, Denver, CO 80202",
|
||||
"phone": "(303) 555-0123",
|
||||
"description": "Licensed plumbing contractor specializing in residential and commercial services",
|
||||
"website": "https://exampleplumbing.com",
|
||||
"rating": 4.8
|
||||
}
|
||||
]
|
||||
\`\`\`
|
||||
|
||||
Important rules:
|
||||
1. Return ONLY the JSON array inside code blocks - no explanations or comments
|
||||
2. Generate realistic but fictional business data
|
||||
3. Use proper formatting for phone numbers (e.g., "(303) 555-XXXX") and addresses
|
||||
4. Include ratings from 1-5 stars (can use decimals, e.g., 4.8)
|
||||
5. When sorting by rating, sort from highest to lowest rating
|
||||
6. When asked for a specific number of results, always return exactly that many
|
||||
7. Make each business unique with different names, addresses, and phone numbers
|
||||
8. Keep descriptions concise and professional
|
||||
9. Use realistic website URLs based on business names`
|
||||
},
|
||||
...messages
|
||||
];
|
||||
|
||||
console.log('Sending request to Ollama with messages:', JSON.stringify(enhancedMessages, null, 2));
|
||||
|
||||
const response = await axios.post(`${this.baseUrl}/api/chat`, {
|
||||
model: this.model,
|
||||
messages: enhancedMessages,
|
||||
stream: false,
|
||||
temperature: 0.7,
|
||||
max_tokens: 1000,
|
||||
system: "You are a business search assistant that always responds with JSON data."
|
||||
});
|
||||
|
||||
if (!response.data) {
|
||||
throw new Error('Empty response from AI model');
|
||||
}
|
||||
|
||||
console.log('\nRaw response data:', JSON.stringify(response.data, null, 2));
|
||||
|
||||
if (!response.data.message?.content) {
|
||||
throw new Error('No content in AI model response');
|
||||
}
|
||||
|
||||
console.log('\nParsing AI response...');
|
||||
const results = await this.sanitizeJsonResponse(response.data.message.content);
|
||||
console.log('Parsed results:', JSON.stringify(results, null, 2));
|
||||
|
||||
return results;
|
||||
|
||||
} catch (error) {
|
||||
console.error('\nDeepseek chat error:', error);
|
||||
if (error instanceof Error) {
|
||||
console.error('Error stack:', error.stack);
|
||||
throw new Error(`AI model error: ${error.message}`);
|
||||
}
|
||||
throw new Error('Failed to get response from AI model');
|
||||
}
|
||||
}
|
||||
|
||||
private async sanitizeJsonResponse(text: string): Promise<PartialBusiness[]> {
|
||||
console.log('Attempting to parse response:', text);
|
||||
|
||||
// First try to find JSON blocks
|
||||
const jsonBlockMatch = text.match(/```(?:json)?\s*([\s\S]*?)\s*```/);
|
||||
if (jsonBlockMatch) {
|
||||
try {
|
||||
const jsonStr = jsonBlockMatch[1].trim();
|
||||
console.log('Found JSON block:', jsonStr);
|
||||
const parsed = JSON.parse(jsonStr);
|
||||
return Array.isArray(parsed) ? parsed : [parsed];
|
||||
} catch (e) {
|
||||
console.error('Failed to parse JSON block:', e);
|
||||
}
|
||||
}
|
||||
|
||||
// Then try to find any JSON-like structure
|
||||
const jsonPatterns = [
|
||||
/\[\s*\{[\s\S]*\}\s*\]/, // Array of objects
|
||||
/\{[\s\S]*\}/ // Single object
|
||||
];
|
||||
|
||||
for (const pattern of jsonPatterns) {
|
||||
const match = text.match(pattern);
|
||||
if (match) {
|
||||
try {
|
||||
const jsonStr = match[0].trim();
|
||||
console.log('Found JSON pattern:', jsonStr);
|
||||
const parsed = JSON.parse(jsonStr);
|
||||
return Array.isArray(parsed) ? parsed : [parsed];
|
||||
} catch (e) {
|
||||
console.error('Failed to parse JSON pattern:', e);
|
||||
continue;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// If no valid JSON found, try to extract structured data
|
||||
try {
|
||||
const extractedData = this.extractBusinessData(text);
|
||||
if (extractedData) {
|
||||
console.log('Extracted business data:', extractedData);
|
||||
return [extractedData];
|
||||
}
|
||||
} catch (e) {
|
||||
console.error('Failed to extract business data:', e);
|
||||
}
|
||||
|
||||
throw new Error('No valid JSON or business information found in response');
|
||||
}
|
||||
|
||||
private extractBusinessData(text: string): PartialBusiness {
|
||||
// Extract business information using regex patterns
|
||||
const businessInfo: PartialBusiness = {
|
||||
name: this.extractField(text, 'name', '[^"\\n]+') || 'Unknown Business',
|
||||
address: this.extractField(text, 'address', '[^"\\n]+') || 'Address not available',
|
||||
phone: this.extractField(text, 'phone', '[^"\\n]+') || 'Phone not available',
|
||||
description: this.extractField(text, 'description', '[^"\\n]+') || 'No description available'
|
||||
};
|
||||
|
||||
const website = this.extractField(text, 'website', '[^"\\n]+');
|
||||
if (website) {
|
||||
businessInfo.website = website;
|
||||
}
|
||||
|
||||
const rating = this.extractField(text, 'rating', '[0-9.]+');
|
||||
if (rating) {
|
||||
businessInfo.rating = parseFloat(rating);
|
||||
}
|
||||
|
||||
return businessInfo;
|
||||
}
|
||||
|
||||
private extractField(text: string, field: string, pattern: string): string {
|
||||
const regex = new RegExp(`"?${field}"?\\s*[:=]\\s*"?(${pattern})"?`, 'i');
|
||||
const match = text.match(regex);
|
||||
return match ? match[1].trim() : '';
|
||||
}
|
||||
}
|
||||
63
src/lib/services/geocodingService.ts
Normal file
63
src/lib/services/geocodingService.ts
Normal file
|
|
@ -0,0 +1,63 @@
|
|||
import axios from 'axios';
|
||||
import { sleep } from '../utils/helpers';
|
||||
|
||||
interface GeocodingResult {
|
||||
lat: number;
|
||||
lng: number;
|
||||
formattedAddress: string;
|
||||
}
|
||||
|
||||
export class GeocodingService {
|
||||
private static cache = new Map<string, GeocodingResult>();
|
||||
private static lastRequestTime = 0;
|
||||
private static RATE_LIMIT_MS = 1000; // 1 second between requests (Nominatim requirement)
|
||||
|
||||
static async geocode(address: string): Promise<GeocodingResult | null> {
|
||||
// Check cache first
|
||||
const cached = this.cache.get(address);
|
||||
if (cached) return cached;
|
||||
|
||||
try {
|
||||
// Rate limiting
|
||||
const now = Date.now();
|
||||
const timeSinceLastRequest = now - this.lastRequestTime;
|
||||
if (timeSinceLastRequest < this.RATE_LIMIT_MS) {
|
||||
await sleep(this.RATE_LIMIT_MS - timeSinceLastRequest);
|
||||
}
|
||||
this.lastRequestTime = Date.now();
|
||||
|
||||
const response = await axios.get(
|
||||
'https://nominatim.openstreetmap.org/search',
|
||||
{
|
||||
params: {
|
||||
q: address,
|
||||
format: 'json',
|
||||
limit: 1,
|
||||
addressdetails: 1
|
||||
},
|
||||
headers: {
|
||||
'User-Agent': 'BusinessFinder/1.0'
|
||||
}
|
||||
}
|
||||
);
|
||||
|
||||
if (response.data?.length > 0) {
|
||||
const result = response.data[0];
|
||||
const geocoded = {
|
||||
lat: parseFloat(result.lat),
|
||||
lng: parseFloat(result.lon),
|
||||
formattedAddress: result.display_name
|
||||
};
|
||||
|
||||
// Cache the result
|
||||
this.cache.set(address, geocoded);
|
||||
return geocoded;
|
||||
}
|
||||
|
||||
return null;
|
||||
} catch (error) {
|
||||
console.error('Geocoding error:', error);
|
||||
return null;
|
||||
}
|
||||
}
|
||||
}
|
||||
40
src/lib/services/healthCheck.ts
Normal file
40
src/lib/services/healthCheck.ts
Normal file
|
|
@ -0,0 +1,40 @@
|
|||
import axios from 'axios';
|
||||
import { supabase } from '../supabase';
|
||||
import { env } from '../../config/env';
|
||||
|
||||
export class HealthCheckService {
|
||||
private static async checkSupabase(): Promise<boolean> {
|
||||
try {
|
||||
const { data, error } = await supabase.from('searches').select('count');
|
||||
return !error;
|
||||
} catch (error) {
|
||||
console.error('Supabase health check failed:', error);
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
private static async checkSearx(): Promise<boolean> {
|
||||
try {
|
||||
const response = await axios.get(env.SEARXNG_URL);
|
||||
return response.status === 200;
|
||||
} catch (error) {
|
||||
console.error('SearxNG health check failed:', error);
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
public static async checkHealth(): Promise<{
|
||||
supabase: boolean;
|
||||
searx: boolean;
|
||||
}> {
|
||||
const [supabaseHealth, searxHealth] = await Promise.all([
|
||||
this.checkSupabase(),
|
||||
this.checkSearx()
|
||||
]);
|
||||
|
||||
return {
|
||||
supabase: supabaseHealth,
|
||||
searx: searxHealth
|
||||
};
|
||||
}
|
||||
}
|
||||
45
src/lib/services/ollamaService.ts
Normal file
45
src/lib/services/ollamaService.ts
Normal file
|
|
@ -0,0 +1,45 @@
|
|||
import axios from 'axios';
|
||||
import { env } from '../../config/env';
|
||||
|
||||
export class OllamaService {
|
||||
private static readonly baseUrl = env.ollama.url;
|
||||
private static readonly model = env.ollama.model;
|
||||
|
||||
static async complete(prompt: string): Promise<string> {
|
||||
try {
|
||||
const response = await axios.post(`${this.baseUrl}/api/generate`, {
|
||||
model: this.model,
|
||||
prompt: prompt,
|
||||
stream: false
|
||||
});
|
||||
|
||||
if (response.data?.response) {
|
||||
return response.data.response;
|
||||
}
|
||||
|
||||
throw new Error('No response from Ollama');
|
||||
} catch (error) {
|
||||
console.error('Ollama error:', error);
|
||||
throw error;
|
||||
}
|
||||
}
|
||||
|
||||
static async chat(messages: { role: 'user' | 'assistant'; content: string }[]): Promise<string> {
|
||||
try {
|
||||
const response = await axios.post(`${this.baseUrl}/api/chat`, {
|
||||
model: this.model,
|
||||
messages: messages,
|
||||
stream: false
|
||||
});
|
||||
|
||||
if (response.data?.message?.content) {
|
||||
return response.data.message.content;
|
||||
}
|
||||
|
||||
throw new Error('No response from Ollama chat');
|
||||
} catch (error) {
|
||||
console.error('Ollama chat error:', error);
|
||||
throw error;
|
||||
}
|
||||
}
|
||||
}
|
||||
135
src/lib/services/searchService.ts
Normal file
135
src/lib/services/searchService.ts
Normal file
|
|
@ -0,0 +1,135 @@
|
|||
import EventEmitter from 'events';
|
||||
import { DeepSeekService } from './deepseekService';
|
||||
import { DatabaseService } from './databaseService';
|
||||
import { Business } from '../types';
|
||||
|
||||
interface PartialBusiness {
|
||||
name: string;
|
||||
address: string;
|
||||
phone: string;
|
||||
description: string;
|
||||
website?: string;
|
||||
rating?: number;
|
||||
source?: string;
|
||||
location?: {
|
||||
lat: number;
|
||||
lng: number;
|
||||
};
|
||||
}
|
||||
|
||||
export class SearchService extends EventEmitter {
|
||||
private deepseekService: DeepSeekService;
|
||||
private databaseService: DatabaseService;
|
||||
|
||||
constructor() {
|
||||
super();
|
||||
this.deepseekService = new DeepSeekService();
|
||||
this.databaseService = new DatabaseService();
|
||||
|
||||
this.deepseekService.on('progress', (data) => {
|
||||
this.emit('progress', data);
|
||||
});
|
||||
}
|
||||
|
||||
async streamSearch(query: string, location: string, limit: number = 10): Promise<void> {
|
||||
try {
|
||||
// First, try to find cached results in database
|
||||
const cachedResults = await this.databaseService.findBusinessesByQuery(query, location);
|
||||
if (cachedResults.length > 0) {
|
||||
// Emit cached results one by one
|
||||
for (const result of this.sortByRating(cachedResults).slice(0, limit)) {
|
||||
this.emit('result', result);
|
||||
await new Promise(resolve => setTimeout(resolve, 100)); // Small delay between results
|
||||
}
|
||||
this.emit('complete');
|
||||
return;
|
||||
}
|
||||
|
||||
// If no cached results, use DeepSeek to generate new results
|
||||
const aiResults = await this.deepseekService.streamChat([{
|
||||
role: "user",
|
||||
content: `Find ${query} in ${location}. You must return exactly ${limit} results in valid JSON format, sorted by rating from highest to lowest. Each result must include a rating between 1-5 stars. Do not include any comments or explanations in the JSON.`
|
||||
}], async (business: PartialBusiness) => {
|
||||
try {
|
||||
// Extract lat/lng from address using a geocoding service
|
||||
const coords = await this.geocodeAddress(business.address);
|
||||
|
||||
// Save to database and emit result
|
||||
const savedBusiness = await this.databaseService.saveBusiness({
|
||||
...business,
|
||||
source: 'deepseek',
|
||||
location: coords || {
|
||||
lat: 39.7392, // Denver's default coordinates
|
||||
lng: -104.9903
|
||||
}
|
||||
});
|
||||
|
||||
this.emit('result', savedBusiness);
|
||||
} catch (error) {
|
||||
console.error('Error processing business:', error);
|
||||
this.emit('error', error);
|
||||
}
|
||||
});
|
||||
|
||||
this.emit('complete');
|
||||
|
||||
} catch (error) {
|
||||
console.error('Search error:', error);
|
||||
this.emit('error', error);
|
||||
throw error;
|
||||
}
|
||||
}
|
||||
|
||||
async search(query: string, location: string, limit: number = 10): Promise<Business[]> {
|
||||
try {
|
||||
// First, try to find cached results in database
|
||||
const cachedResults = await this.databaseService.findBusinessesByQuery(query, location);
|
||||
if (cachedResults.length > 0) {
|
||||
return this.sortByRating(cachedResults).slice(0, limit);
|
||||
}
|
||||
|
||||
// If no cached results, use DeepSeek to generate new results
|
||||
const aiResults = await this.deepseekService.chat([{
|
||||
role: "user",
|
||||
content: `Find ${query} in ${location}. You must return exactly ${limit} results in valid JSON format, sorted by rating from highest to lowest. Each result must include a rating between 1-5 stars. Do not include any comments or explanations in the JSON.`
|
||||
}]);
|
||||
|
||||
// Save the results to database
|
||||
const savedResults = await Promise.all(
|
||||
(aiResults as PartialBusiness[]).map(async (business: PartialBusiness) => {
|
||||
// Extract lat/lng from address using a geocoding service
|
||||
const coords = await this.geocodeAddress(business.address);
|
||||
|
||||
return this.databaseService.saveBusiness({
|
||||
...business,
|
||||
source: 'deepseek',
|
||||
location: coords || {
|
||||
lat: 39.7392, // Denver's default coordinates
|
||||
lng: -104.9903
|
||||
}
|
||||
});
|
||||
})
|
||||
);
|
||||
|
||||
return this.sortByRating(savedResults);
|
||||
|
||||
} catch (error) {
|
||||
console.error('Search error:', error);
|
||||
throw error;
|
||||
}
|
||||
}
|
||||
|
||||
private sortByRating(businesses: Business[]): Business[] {
|
||||
return businesses.sort((a, b) => b.rating - a.rating);
|
||||
}
|
||||
|
||||
private async geocodeAddress(address: string): Promise<{ lat: number; lng: number } | null> {
|
||||
// TODO: Implement real geocoding service
|
||||
// For now, return null to use default coordinates
|
||||
return null;
|
||||
}
|
||||
|
||||
async getBusinessById(id: string): Promise<Business | null> {
|
||||
return this.databaseService.getBusinessById(id);
|
||||
}
|
||||
}
|
||||
93
src/lib/services/supabaseService.ts
Normal file
93
src/lib/services/supabaseService.ts
Normal file
|
|
@ -0,0 +1,93 @@
|
|||
import { createClient } from '@supabase/supabase-js';
|
||||
import { env } from '../../config/env';
|
||||
import { BusinessData } from '../searxng';
|
||||
|
||||
export class SupabaseService {
|
||||
private supabase;
|
||||
|
||||
constructor() {
|
||||
this.supabase = createClient(env.supabase.url, env.supabase.anonKey);
|
||||
}
|
||||
|
||||
async upsertBusinesses(businesses: BusinessData[]): Promise<void> {
|
||||
try {
|
||||
console.log('Upserting businesses to Supabase:', businesses.length);
|
||||
|
||||
for (const business of businesses) {
|
||||
try {
|
||||
// Create a unique identifier based on multiple properties
|
||||
const identifier = [
|
||||
business.name.toLowerCase(),
|
||||
business.phone?.replace(/\D/g, ''),
|
||||
business.address?.toLowerCase(),
|
||||
business.website?.toLowerCase()
|
||||
]
|
||||
.filter(Boolean) // Remove empty values
|
||||
.join('_') // Join with underscore
|
||||
.replace(/[^a-z0-9]/g, '_'); // Replace non-alphanumeric chars
|
||||
|
||||
// Log the data being inserted
|
||||
console.log('Upserting business:', {
|
||||
id: identifier,
|
||||
name: business.name,
|
||||
phone: business.phone,
|
||||
email: business.email,
|
||||
address: business.address,
|
||||
rating: business.rating,
|
||||
website: business.website,
|
||||
location: business.location
|
||||
});
|
||||
|
||||
// Check if business exists
|
||||
const { data: existing, error: selectError } = await this.supabase
|
||||
.from('businesses')
|
||||
.select('rating, search_count')
|
||||
.eq('id', identifier)
|
||||
.single();
|
||||
|
||||
if (selectError && selectError.code !== 'PGRST116') {
|
||||
console.error('Error checking existing business:', selectError);
|
||||
}
|
||||
|
||||
// Prepare upsert data
|
||||
const upsertData = {
|
||||
id: identifier,
|
||||
name: business.name,
|
||||
phone: business.phone || null,
|
||||
email: business.email || null,
|
||||
address: business.address || null,
|
||||
rating: existing ? Math.max(business.rating, existing.rating) : business.rating,
|
||||
website: business.website || null,
|
||||
logo: business.logo || null,
|
||||
source: business.source || null,
|
||||
description: business.description || null,
|
||||
latitude: business.location?.lat || null,
|
||||
longitude: business.location?.lng || null,
|
||||
last_updated: new Date().toISOString(),
|
||||
search_count: existing ? existing.search_count + 1 : 1
|
||||
};
|
||||
|
||||
console.log('Upserting with data:', upsertData);
|
||||
|
||||
const { error: upsertError } = await this.supabase
|
||||
.from('businesses')
|
||||
.upsert(upsertData, {
|
||||
onConflict: 'id'
|
||||
});
|
||||
|
||||
if (upsertError) {
|
||||
console.error('Error upserting business:', upsertError);
|
||||
console.error('Failed business data:', upsertData);
|
||||
} else {
|
||||
console.log(`Successfully upserted business: ${business.name}`);
|
||||
}
|
||||
} catch (businessError) {
|
||||
console.error('Error processing business:', business.name, businessError);
|
||||
}
|
||||
}
|
||||
} catch (error) {
|
||||
console.error('Error saving businesses to Supabase:', error);
|
||||
throw error;
|
||||
}
|
||||
}
|
||||
}
|
||||
35
src/lib/supabase.ts
Normal file
35
src/lib/supabase.ts
Normal file
|
|
@ -0,0 +1,35 @@
|
|||
import { createClient } from '@supabase/supabase-js';
|
||||
import { env } from '../config/env';
|
||||
|
||||
// Validate Supabase configuration
|
||||
if (!env.SUPABASE_URL || !env.SUPABASE_KEY) {
|
||||
throw new Error('Missing Supabase configuration');
|
||||
}
|
||||
|
||||
// Create Supabase client
|
||||
export const supabase = createClient(
|
||||
env.SUPABASE_URL,
|
||||
env.SUPABASE_KEY,
|
||||
{
|
||||
auth: {
|
||||
autoRefreshToken: true,
|
||||
persistSession: true,
|
||||
detectSessionInUrl: true
|
||||
}
|
||||
}
|
||||
);
|
||||
|
||||
// Test connection function
|
||||
export async function testConnection() {
|
||||
try {
|
||||
console.log('Testing Supabase connection...');
|
||||
console.log('URL:', env.SUPABASE_URL);
|
||||
const { data, error } = await supabase.from('searches').select('count');
|
||||
if (error) throw error;
|
||||
console.log('Supabase connection successful');
|
||||
return true;
|
||||
} catch (error) {
|
||||
console.error('Supabase connection failed:', error);
|
||||
return false;
|
||||
}
|
||||
}
|
||||
16
src/lib/types.ts
Normal file
16
src/lib/types.ts
Normal file
|
|
@ -0,0 +1,16 @@
|
|||
export interface Business {
|
||||
id: string;
|
||||
name: string;
|
||||
address: string;
|
||||
phone: string;
|
||||
description: string;
|
||||
website?: string;
|
||||
source: string;
|
||||
rating: number;
|
||||
location: {
|
||||
lat: number;
|
||||
lng: number;
|
||||
};
|
||||
}
|
||||
|
||||
export type BusinessData = Business;
|
||||
39
src/lib/utils.ts
Normal file
39
src/lib/utils.ts
Normal file
|
|
@ -0,0 +1,39 @@
|
|||
import crypto from 'crypto';
|
||||
|
||||
interface BusinessIdentifier {
|
||||
title?: string;
|
||||
name?: string;
|
||||
phone?: string;
|
||||
address?: string;
|
||||
url?: string;
|
||||
website?: string;
|
||||
}
|
||||
|
||||
export function generateBusinessId(business: BusinessIdentifier): string {
|
||||
const components = [
|
||||
business.title || business.name,
|
||||
business.phone,
|
||||
business.address,
|
||||
business.url || business.website
|
||||
].filter(Boolean);
|
||||
|
||||
const hash = crypto.createHash('md5')
|
||||
.update(components.join('|'))
|
||||
.digest('hex');
|
||||
|
||||
return `hash_${hash}`;
|
||||
}
|
||||
|
||||
export function extractPlaceIdFromUrl(url: string): string | null {
|
||||
try {
|
||||
// Match patterns like:
|
||||
// https://www.google.com/maps/place/.../.../data=!3m1!4b1!4m5!3m4!1s0x876c7ed0cb78d6d3:0x2cd0c4490736f7c!8m2!
|
||||
// https://maps.google.com/maps?q=...&ftid=0x876c7ed0cb78d6d3:0x2cd0c4490736f7c
|
||||
const placeIdRegex = /[!\/]([0-9a-f]{16}:[0-9a-f]{16})/i;
|
||||
const match = url.match(placeIdRegex);
|
||||
return match ? match[1] : null;
|
||||
} catch (error) {
|
||||
console.warn('Error extracting place ID from URL:', error);
|
||||
return null;
|
||||
}
|
||||
}
|
||||
36
src/lib/utils/cache.ts
Normal file
36
src/lib/utils/cache.ts
Normal file
|
|
@ -0,0 +1,36 @@
|
|||
interface CacheItem<T> {
|
||||
data: T;
|
||||
timestamp: number;
|
||||
}
|
||||
|
||||
export class Cache<T> {
|
||||
private store = new Map<string, CacheItem<T>>();
|
||||
private ttl: number;
|
||||
|
||||
constructor(ttlMinutes: number = 60) {
|
||||
this.ttl = ttlMinutes * 60 * 1000;
|
||||
}
|
||||
|
||||
set(key: string, value: T): void {
|
||||
this.store.set(key, {
|
||||
data: value,
|
||||
timestamp: Date.now()
|
||||
});
|
||||
}
|
||||
|
||||
get(key: string): T | null {
|
||||
const item = this.store.get(key);
|
||||
if (!item) return null;
|
||||
|
||||
if (Date.now() - item.timestamp > this.ttl) {
|
||||
this.store.delete(key);
|
||||
return null;
|
||||
}
|
||||
|
||||
return item.data;
|
||||
}
|
||||
|
||||
clear(): void {
|
||||
this.store.clear();
|
||||
}
|
||||
}
|
||||
67
src/lib/utils/dataCleanup.ts
Normal file
67
src/lib/utils/dataCleanup.ts
Normal file
|
|
@ -0,0 +1,67 @@
|
|||
import { Business } from '../types';
|
||||
|
||||
export function normalizePhoneNumber(phone: string): string {
|
||||
return phone.replace(/[^\d]/g, '');
|
||||
}
|
||||
|
||||
export function normalizeAddress(address: string): string {
|
||||
// Remove common suffixes and standardize format
|
||||
return address
|
||||
.toLowerCase()
|
||||
.replace(/(street|st\.?|avenue|ave\.?|road|rd\.?)/g, '')
|
||||
.trim();
|
||||
}
|
||||
|
||||
export function extractZipCode(text: string): string | null {
|
||||
const match = text.match(/\b\d{5}(?:-\d{4})?\b/);
|
||||
return match ? match[0] : null;
|
||||
}
|
||||
|
||||
export function calculateReliabilityScore(business: Business): number {
|
||||
let score = 0;
|
||||
|
||||
// More complete data = higher score
|
||||
if (business.phone) score += 2;
|
||||
if (business.website) score += 1;
|
||||
if (business.email) score += 1;
|
||||
if (business.hours?.length) score += 2;
|
||||
if (business.services && business.services.length > 0) score += 1;
|
||||
if (business.reviewCount && business.reviewCount > 10) score += 2;
|
||||
|
||||
return score;
|
||||
}
|
||||
|
||||
export function cleanAddress(address: string): string {
|
||||
return address
|
||||
.replace(/^(Sure!|Here is |The business address( is| found in the text is)?:?\n?\s*)/i, '')
|
||||
.replace(/\n/g, ' ')
|
||||
.trim();
|
||||
}
|
||||
|
||||
export function formatPhoneNumber(phone: string): string {
|
||||
// Remove all non-numeric characters
|
||||
const cleaned = phone.replace(/\D/g, '');
|
||||
|
||||
// Format as (XXX) XXX-XXXX
|
||||
if (cleaned.length === 10) {
|
||||
return `(${cleaned.slice(0,3)}) ${cleaned.slice(3,6)}-${cleaned.slice(6)}`;
|
||||
}
|
||||
|
||||
// Return original if not 10 digits
|
||||
return phone;
|
||||
}
|
||||
|
||||
export function cleanEmail(email: string): string {
|
||||
// Remove phone numbers from email
|
||||
return email
|
||||
.replace(/\d{3}-\d{4}/, '')
|
||||
.replace(/\d{10}/, '')
|
||||
.trim();
|
||||
}
|
||||
|
||||
export function cleanDescription(description: string): string {
|
||||
return description
|
||||
.replace(/^(Description:|About:|Info:)/i, '')
|
||||
.replace(/\s+/g, ' ')
|
||||
.trim();
|
||||
}
|
||||
18
src/lib/utils/helpers.ts
Normal file
18
src/lib/utils/helpers.ts
Normal file
|
|
@ -0,0 +1,18 @@
|
|||
export function sleep(ms: number): Promise<void> {
|
||||
return new Promise(resolve => setTimeout(resolve, ms));
|
||||
}
|
||||
|
||||
export function cleanText(text: string): string {
|
||||
return text
|
||||
.replace(/\s+/g, ' ')
|
||||
.replace(/[^\w\s-.,]/g, '')
|
||||
.trim();
|
||||
}
|
||||
|
||||
export function isValidPhone(phone: string): boolean {
|
||||
return /^\+?[\d-.()\s]{10,}$/.test(phone);
|
||||
}
|
||||
|
||||
export function isValidEmail(email: string): boolean {
|
||||
return /^[^\s@]+@[^\s@]+\.[^\s@]+$/.test(email);
|
||||
}
|
||||
23
src/lib/utils/rateLimiter.ts
Normal file
23
src/lib/utils/rateLimiter.ts
Normal file
|
|
@ -0,0 +1,23 @@
|
|||
export class RateLimiter {
|
||||
private timestamps: number[] = [];
|
||||
private readonly windowMs: number;
|
||||
private readonly maxRequests: number;
|
||||
|
||||
constructor(windowMs: number = 60000, maxRequests: number = 30) {
|
||||
this.windowMs = windowMs;
|
||||
this.maxRequests = maxRequests;
|
||||
}
|
||||
|
||||
async waitForSlot(): Promise<void> {
|
||||
const now = Date.now();
|
||||
this.timestamps = this.timestamps.filter(time => now - time < this.windowMs);
|
||||
|
||||
if (this.timestamps.length >= this.maxRequests) {
|
||||
const oldestRequest = this.timestamps[0];
|
||||
const waitTime = this.windowMs - (now - oldestRequest);
|
||||
await new Promise(resolve => setTimeout(resolve, waitTime));
|
||||
}
|
||||
|
||||
this.timestamps.push(now);
|
||||
}
|
||||
}
|
||||
168
src/lib/utils/scraper.ts
Normal file
168
src/lib/utils/scraper.ts
Normal file
|
|
@ -0,0 +1,168 @@
|
|||
import axios from 'axios';
|
||||
import * as cheerio from 'cheerio';
|
||||
import { OllamaService } from '../services/ollamaService';
|
||||
import { sleep } from './helpers';
|
||||
|
||||
const RATE_LIMIT_MS = 1000; // 1 second between requests
|
||||
let lastRequestTime = 0;
|
||||
|
||||
async function rateLimitedRequest(url: string) {
|
||||
const now = Date.now();
|
||||
const timeSinceLastRequest = now - lastRequestTime;
|
||||
|
||||
if (timeSinceLastRequest < RATE_LIMIT_MS) {
|
||||
await sleep(RATE_LIMIT_MS - timeSinceLastRequest);
|
||||
}
|
||||
|
||||
lastRequestTime = Date.now();
|
||||
return axios.get(url, {
|
||||
timeout: 5000,
|
||||
headers: {
|
||||
'User-Agent': 'Mozilla/5.0 (compatible; BusinessFinder/1.0; +http://example.com/bot)',
|
||||
'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8',
|
||||
'Accept-Language': 'en-US,en;q=0.5'
|
||||
}
|
||||
});
|
||||
}
|
||||
|
||||
export interface ContactInfo {
|
||||
phone?: string;
|
||||
email?: string;
|
||||
address?: string;
|
||||
description?: string;
|
||||
openingHours?: string[];
|
||||
}
|
||||
|
||||
export async function extractContactFromHtml(url: string): Promise<ContactInfo> {
|
||||
try {
|
||||
const response = await rateLimitedRequest(url);
|
||||
|
||||
const $ = cheerio.load(response.data);
|
||||
|
||||
// Extract structured data if available
|
||||
const structuredData = $('script[type="application/ld+json"]')
|
||||
.map((_, el) => {
|
||||
try {
|
||||
return JSON.parse($(el).html() || '');
|
||||
} catch {
|
||||
return null;
|
||||
}
|
||||
})
|
||||
.get()
|
||||
.filter(Boolean);
|
||||
|
||||
// Look for LocalBusiness or Restaurant schema
|
||||
const businessData = structuredData.find(data =>
|
||||
data['@type'] === 'LocalBusiness' ||
|
||||
data['@type'] === 'Restaurant'
|
||||
);
|
||||
|
||||
if (businessData) {
|
||||
return {
|
||||
phone: businessData.telephone,
|
||||
email: businessData.email,
|
||||
address: businessData.address?.streetAddress,
|
||||
description: businessData.description,
|
||||
openingHours: businessData.openingHours
|
||||
};
|
||||
}
|
||||
|
||||
// Fallback to regular HTML parsing
|
||||
return {
|
||||
phone: findPhone($),
|
||||
email: findEmail($),
|
||||
address: findAddress($),
|
||||
description: $('meta[name="description"]').attr('content'),
|
||||
openingHours: findOpeningHours($)
|
||||
};
|
||||
} catch (error) {
|
||||
console.warn(`Error extracting contact info from ${url}:`, error);
|
||||
return {};
|
||||
}
|
||||
}
|
||||
|
||||
export async function extractCleanAddress(text: string, location: string): Promise<string> {
|
||||
try {
|
||||
const ollama = new OllamaService();
|
||||
const prompt = `
|
||||
Extract a business address from this text. The business should be in or near ${location}.
|
||||
Only return the address, nothing else. If no valid address is found, return an empty string.
|
||||
|
||||
Text: ${text}
|
||||
`;
|
||||
|
||||
const response = await OllamaService.complete(prompt);
|
||||
return response.trim();
|
||||
} catch (error) {
|
||||
console.warn('Error extracting address:', error);
|
||||
return '';
|
||||
}
|
||||
}
|
||||
|
||||
// Helper functions
|
||||
function findPhone($: cheerio.CheerioAPI): string | undefined {
|
||||
// Common phone patterns
|
||||
const phonePatterns = [
|
||||
/\b\(?([0-9]{3})\)?[-. ]?([0-9]{3})[-. ]?([0-9]{4})\b/,
|
||||
/\b(?:Phone|Tel|Contact):\s*([0-9-().+ ]{10,})\b/i
|
||||
];
|
||||
|
||||
for (const pattern of phonePatterns) {
|
||||
const match = $.text().match(pattern);
|
||||
if (match) return match[0];
|
||||
}
|
||||
|
||||
return undefined;
|
||||
}
|
||||
|
||||
function findEmail($: cheerio.CheerioAPI): string | undefined {
|
||||
const emailPattern = /\b[A-Za-z0-9._%+-]+@[A-Za-z0-9.-]+\.[A-Z|a-z]{2,}\b/;
|
||||
const match = $.text().match(emailPattern);
|
||||
return match ? match[0] : undefined;
|
||||
}
|
||||
|
||||
function findAddress($: cheerio.CheerioAPI): string | undefined {
|
||||
// Look for address in common elements
|
||||
const addressSelectors = [
|
||||
'address',
|
||||
'[itemtype="http://schema.org/PostalAddress"]',
|
||||
'.address',
|
||||
'#address',
|
||||
'[class*="address"]',
|
||||
'[id*="address"]'
|
||||
];
|
||||
|
||||
for (const selector of addressSelectors) {
|
||||
const element = $(selector).first();
|
||||
if (element.length) {
|
||||
return element.text().trim();
|
||||
}
|
||||
}
|
||||
|
||||
return undefined;
|
||||
}
|
||||
|
||||
function findOpeningHours($: cheerio.CheerioAPI): string[] {
|
||||
const hours: string[] = [];
|
||||
const hoursSelectors = [
|
||||
'[itemtype="http://schema.org/OpeningHoursSpecification"]',
|
||||
'.hours',
|
||||
'#hours',
|
||||
'[class*="hours"]',
|
||||
'[id*="hours"]'
|
||||
];
|
||||
|
||||
for (const selector of hoursSelectors) {
|
||||
const element = $(selector).first();
|
||||
if (element.length) {
|
||||
element.find('*').each((_, el) => {
|
||||
const text = $(el).text().trim();
|
||||
if (text && !hours.includes(text)) {
|
||||
hours.push(text);
|
||||
}
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
return hours;
|
||||
}
|
||||
119
src/lib/utils/structuredDataParser.ts
Normal file
119
src/lib/utils/structuredDataParser.ts
Normal file
|
|
@ -0,0 +1,119 @@
|
|||
import * as cheerio from 'cheerio';
|
||||
|
||||
interface StructuredData {
|
||||
name?: string;
|
||||
email?: string;
|
||||
phone?: string;
|
||||
address?: string;
|
||||
socialProfiles?: string[];
|
||||
openingHours?: Record<string, string>;
|
||||
description?: string;
|
||||
}
|
||||
|
||||
export class StructuredDataParser {
|
||||
static parse($: cheerio.CheerioAPI): StructuredData[] {
|
||||
const results: StructuredData[] = [];
|
||||
|
||||
// Parse JSON-LD
|
||||
$('script[type="application/ld+json"]').each((_, element) => {
|
||||
try {
|
||||
const data = JSON.parse($(element).html() || '{}');
|
||||
if (Array.isArray(data)) {
|
||||
data.forEach(item => this.parseStructuredItem(item, results));
|
||||
} else {
|
||||
this.parseStructuredItem(data, results);
|
||||
}
|
||||
} catch (e) {
|
||||
console.error('Error parsing JSON-LD:', e);
|
||||
}
|
||||
});
|
||||
|
||||
// Parse microdata
|
||||
$('[itemtype]').each((_, element) => {
|
||||
const type = $(element).attr('itemtype');
|
||||
if (type?.includes('Organization') || type?.includes('LocalBusiness')) {
|
||||
const data: StructuredData = {
|
||||
name: $('[itemprop="name"]', element).text(),
|
||||
email: $('[itemprop="email"]', element).text(),
|
||||
phone: $('[itemprop="telephone"]', element).text(),
|
||||
address: this.extractMicrodataAddress($, element),
|
||||
socialProfiles: this.extractSocialProfiles($, element)
|
||||
};
|
||||
results.push(data);
|
||||
}
|
||||
});
|
||||
|
||||
// Parse RDFa
|
||||
$('[typeof="Organization"], [typeof="LocalBusiness"]').each((_, element) => {
|
||||
const data: StructuredData = {
|
||||
name: $('[property="name"]', element).text(),
|
||||
email: $('[property="email"]', element).text(),
|
||||
phone: $('[property="telephone"]', element).text(),
|
||||
address: this.extractRdfaAddress($, element),
|
||||
socialProfiles: this.extractSocialProfiles($, element)
|
||||
};
|
||||
results.push(data);
|
||||
});
|
||||
|
||||
return results;
|
||||
}
|
||||
|
||||
private static parseStructuredItem(data: any, results: StructuredData[]): void {
|
||||
if (data['@type'] === 'Organization' || data['@type'] === 'LocalBusiness') {
|
||||
results.push({
|
||||
name: data.name,
|
||||
email: data.email,
|
||||
phone: data.telephone,
|
||||
address: this.formatAddress(data.address),
|
||||
socialProfiles: this.extractSocialUrls(data),
|
||||
openingHours: this.parseOpeningHours(data.openingHours),
|
||||
description: data.description
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
private static formatAddress(address: any): string | undefined {
|
||||
if (typeof address === 'string') return address;
|
||||
if (typeof address === 'object') {
|
||||
const parts = [
|
||||
address.streetAddress,
|
||||
address.addressLocality,
|
||||
address.addressRegion,
|
||||
address.postalCode,
|
||||
address.addressCountry
|
||||
].filter(Boolean);
|
||||
return parts.join(', ');
|
||||
}
|
||||
return undefined;
|
||||
}
|
||||
|
||||
private static extractSocialUrls(data: any): string[] {
|
||||
const urls: string[] = [];
|
||||
if (data.sameAs) {
|
||||
if (Array.isArray(data.sameAs)) {
|
||||
urls.push(...data.sameAs);
|
||||
} else if (typeof data.sameAs === 'string') {
|
||||
urls.push(data.sameAs);
|
||||
}
|
||||
}
|
||||
return urls;
|
||||
}
|
||||
|
||||
private static parseOpeningHours(hours: any): Record<string, string> | undefined {
|
||||
if (!hours) return undefined;
|
||||
|
||||
if (Array.isArray(hours)) {
|
||||
const schedule: Record<string, string> = {};
|
||||
hours.forEach(spec => {
|
||||
const match = spec.match(/^(\w+)(-\w+)?\s+(\d\d:\d\d)-(\d\d:\d\d)$/);
|
||||
if (match) {
|
||||
schedule[match[1]] = `${match[3]}-${match[4]}`;
|
||||
}
|
||||
});
|
||||
return schedule;
|
||||
}
|
||||
return undefined;
|
||||
}
|
||||
|
||||
// ... helper methods for microdata and RDFa parsing ...
|
||||
}
|
||||
47
src/middleware/auth.ts
Normal file
47
src/middleware/auth.ts
Normal file
|
|
@ -0,0 +1,47 @@
|
|||
import { Request, Response, NextFunction } from 'express';
|
||||
import { supabase } from '../lib/supabase';
|
||||
|
||||
// Extend Express Request type to include user
|
||||
declare global {
|
||||
namespace Express {
|
||||
interface Request {
|
||||
user?: {
|
||||
id: string;
|
||||
email: string;
|
||||
role: string;
|
||||
};
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
export async function authenticateUser(
|
||||
req: Request,
|
||||
res: Response,
|
||||
next: NextFunction
|
||||
) {
|
||||
try {
|
||||
const authHeader = req.headers.authorization;
|
||||
if (!authHeader) {
|
||||
return res.status(401).json({ error: 'No authorization header' });
|
||||
}
|
||||
|
||||
const token = authHeader.replace('Bearer ', '');
|
||||
const { data: { user }, error } = await supabase.auth.getUser(token);
|
||||
|
||||
if (error || !user) {
|
||||
return res.status(401).json({ error: 'Invalid token' });
|
||||
}
|
||||
|
||||
// Add user info to request
|
||||
req.user = {
|
||||
id: user.id,
|
||||
email: user.email!,
|
||||
role: (user.app_metadata?.role as string) || 'user'
|
||||
};
|
||||
|
||||
next();
|
||||
} catch (error) {
|
||||
console.error('Authentication error:', error);
|
||||
res.status(401).json({ error: 'Authentication failed' });
|
||||
}
|
||||
}
|
||||
148
src/routes/api.ts
Normal file
148
src/routes/api.ts
Normal file
|
|
@ -0,0 +1,148 @@
|
|||
import express from 'express';
|
||||
import { SearchService } from '../lib/services/searchService';
|
||||
import { Business } from '../lib/types';
|
||||
|
||||
const router = express.Router();
|
||||
const searchService = new SearchService();
|
||||
|
||||
// Error handling middleware for JSON parsing errors
|
||||
router.use((err: Error, req: express.Request, res: express.Response, next: express.NextFunction) => {
|
||||
if (err instanceof SyntaxError && 'body' in err) {
|
||||
return res.status(400).json({
|
||||
success: false,
|
||||
error: 'Invalid JSON'
|
||||
});
|
||||
}
|
||||
next();
|
||||
});
|
||||
|
||||
// Business categories endpoint
|
||||
router.get('/categories', (req, res) => {
|
||||
const categories = [
|
||||
'Restaurant',
|
||||
'Retail',
|
||||
'Service',
|
||||
'Healthcare',
|
||||
'Professional',
|
||||
'Entertainment',
|
||||
'Education',
|
||||
'Technology',
|
||||
'Manufacturing',
|
||||
'Construction',
|
||||
'Transportation',
|
||||
'Real Estate',
|
||||
'Financial',
|
||||
'Legal',
|
||||
'Other'
|
||||
];
|
||||
res.json(categories);
|
||||
});
|
||||
|
||||
// Streaming search endpoint
|
||||
router.post('/search/stream', (req, res) => {
|
||||
const { query, location } = req.body;
|
||||
|
||||
if (!query || !location) {
|
||||
return res.status(400).json({
|
||||
success: false,
|
||||
error: 'Query and location are required'
|
||||
});
|
||||
}
|
||||
|
||||
// Set headers for SSE
|
||||
res.setHeader('Content-Type', 'text/event-stream');
|
||||
res.setHeader('Cache-Control', 'no-cache');
|
||||
res.setHeader('Connection', 'keep-alive');
|
||||
|
||||
// Send initial message
|
||||
res.write('data: {"type":"start","message":"Starting search..."}\n\n');
|
||||
|
||||
// Create search service instance for this request
|
||||
const search = new SearchService();
|
||||
|
||||
// Listen for individual results
|
||||
search.on('result', (business: Business) => {
|
||||
res.write(`data: {"type":"result","business":${JSON.stringify(business)}}\n\n`);
|
||||
});
|
||||
|
||||
// Listen for progress updates
|
||||
search.on('progress', (data: any) => {
|
||||
res.write(`data: {"type":"progress","data":${JSON.stringify(data)}}\n\n`);
|
||||
});
|
||||
|
||||
// Listen for completion
|
||||
search.on('complete', () => {
|
||||
res.write('data: {"type":"complete","message":"Search complete"}\n\n');
|
||||
res.end();
|
||||
});
|
||||
|
||||
// Listen for errors
|
||||
search.on('error', (error: Error) => {
|
||||
res.write(`data: {"type":"error","message":${JSON.stringify(error.message)}}\n\n`);
|
||||
res.end();
|
||||
});
|
||||
|
||||
// Start the search
|
||||
search.streamSearch(query, location).catch(error => {
|
||||
console.error('Search error:', error);
|
||||
res.write(`data: {"type":"error","message":${JSON.stringify(error.message)}}\n\n`);
|
||||
res.end();
|
||||
});
|
||||
});
|
||||
|
||||
// Regular search endpoint (non-streaming)
|
||||
router.post('/search', async (req, res) => {
|
||||
const { query, location } = req.body;
|
||||
|
||||
if (!query || !location) {
|
||||
return res.status(400).json({
|
||||
success: false,
|
||||
error: 'Query and location are required'
|
||||
});
|
||||
}
|
||||
|
||||
try {
|
||||
const results = await searchService.search(query, location);
|
||||
res.json({
|
||||
success: true,
|
||||
results
|
||||
});
|
||||
} catch (error) {
|
||||
const errorMessage = error instanceof Error ? error.message : 'An error occurred during search';
|
||||
console.error('Search error:', error);
|
||||
res.status(500).json({
|
||||
success: false,
|
||||
error: errorMessage
|
||||
});
|
||||
}
|
||||
});
|
||||
|
||||
// Get business by ID
|
||||
router.get('/business/:id', async (req, res) => {
|
||||
const { id } = req.params;
|
||||
|
||||
try {
|
||||
const business = await searchService.getBusinessById(id);
|
||||
|
||||
if (!business) {
|
||||
return res.status(404).json({
|
||||
success: false,
|
||||
error: 'Business not found'
|
||||
});
|
||||
}
|
||||
|
||||
res.json({
|
||||
success: true,
|
||||
business
|
||||
});
|
||||
} catch (error) {
|
||||
const errorMessage = error instanceof Error ? error.message : 'Failed to fetch business details';
|
||||
console.error('Error fetching business:', error);
|
||||
res.status(500).json({
|
||||
success: false,
|
||||
error: errorMessage
|
||||
});
|
||||
}
|
||||
});
|
||||
|
||||
export default router;
|
||||
413
src/routes/business.ts
Normal file
413
src/routes/business.ts
Normal file
|
|
@ -0,0 +1,413 @@
|
|||
import { Router } from 'express';
|
||||
import { z } from 'zod';
|
||||
import { supabase } from '../lib/supabase';
|
||||
import { authenticateUser } from '../middleware/auth';
|
||||
|
||||
const router = Router();
|
||||
|
||||
// Initialize database tables
|
||||
async function initializeTables() {
|
||||
try {
|
||||
// Create businesses table if it doesn't exist
|
||||
const { error: businessError } = await supabase.from('businesses').select('id').limit(1);
|
||||
|
||||
if (businessError?.code === 'PGRST204') {
|
||||
const { error } = await supabase.rpc('execute_sql', {
|
||||
sql_string: `
|
||||
CREATE TABLE IF NOT EXISTS public.businesses (
|
||||
id TEXT PRIMARY KEY,
|
||||
name TEXT NOT NULL,
|
||||
phone TEXT,
|
||||
email TEXT,
|
||||
address TEXT,
|
||||
rating NUMERIC,
|
||||
website TEXT,
|
||||
description TEXT,
|
||||
source TEXT,
|
||||
logo TEXT,
|
||||
latitude NUMERIC,
|
||||
longitude NUMERIC,
|
||||
last_updated TIMESTAMP WITH TIME ZONE DEFAULT timezone('utc'::text, now()),
|
||||
search_count INTEGER DEFAULT 1,
|
||||
created_at TIMESTAMP WITH TIME ZONE DEFAULT timezone('utc'::text, now()),
|
||||
place_id TEXT
|
||||
);
|
||||
`
|
||||
});
|
||||
if (error) console.error('Error creating businesses table:', error);
|
||||
}
|
||||
|
||||
// Create business_profiles table if it doesn't exist
|
||||
const { error: profileError } = await supabase.from('business_profiles').select('business_id').limit(1);
|
||||
|
||||
if (profileError?.code === 'PGRST204') {
|
||||
const { error } = await supabase.rpc('execute_sql', {
|
||||
sql_string: `
|
||||
CREATE TABLE IF NOT EXISTS public.business_profiles (
|
||||
business_id TEXT PRIMARY KEY REFERENCES public.businesses(id),
|
||||
claimed_by UUID REFERENCES auth.users(id),
|
||||
claimed_at TIMESTAMP WITH TIME ZONE,
|
||||
verification_status TEXT NOT NULL DEFAULT 'unverified',
|
||||
social_links JSONB DEFAULT '{}',
|
||||
hours_of_operation JSONB DEFAULT '{}',
|
||||
additional_photos TEXT[] DEFAULT '{}',
|
||||
tags TEXT[] DEFAULT '{}',
|
||||
updated_at TIMESTAMP WITH TIME ZONE DEFAULT CURRENT_TIMESTAMP,
|
||||
CONSTRAINT valid_verification_status CHECK (verification_status IN ('unverified', 'pending', 'verified', 'rejected'))
|
||||
);
|
||||
|
||||
CREATE TABLE IF NOT EXISTS public.business_claims (
|
||||
id UUID PRIMARY KEY DEFAULT gen_random_uuid(),
|
||||
business_id TEXT NOT NULL REFERENCES public.businesses(id),
|
||||
user_id UUID NOT NULL REFERENCES auth.users(id),
|
||||
status TEXT NOT NULL DEFAULT 'pending',
|
||||
proof_documents TEXT[] DEFAULT '{}',
|
||||
submitted_at TIMESTAMP WITH TIME ZONE DEFAULT CURRENT_TIMESTAMP,
|
||||
reviewed_at TIMESTAMP WITH TIME ZONE,
|
||||
reviewed_by UUID REFERENCES auth.users(id),
|
||||
notes TEXT,
|
||||
CONSTRAINT valid_claim_status CHECK (status IN ('pending', 'approved', 'rejected'))
|
||||
);
|
||||
|
||||
CREATE INDEX IF NOT EXISTS idx_business_profiles_claimed_by ON public.business_profiles(claimed_by);
|
||||
CREATE INDEX IF NOT EXISTS idx_business_claims_business_id ON public.business_claims(business_id);
|
||||
CREATE INDEX IF NOT EXISTS idx_business_claims_user_id ON public.business_claims(user_id);
|
||||
CREATE INDEX IF NOT EXISTS idx_business_claims_status ON public.business_claims(status);
|
||||
|
||||
ALTER TABLE public.business_profiles ENABLE ROW LEVEL SECURITY;
|
||||
ALTER TABLE public.business_claims ENABLE ROW LEVEL SECURITY;
|
||||
|
||||
DROP POLICY IF EXISTS "Public profiles are viewable by everyone" ON public.business_profiles;
|
||||
CREATE POLICY "Public profiles are viewable by everyone"
|
||||
ON public.business_profiles FOR SELECT
|
||||
USING (true);
|
||||
|
||||
DROP POLICY IF EXISTS "Profiles can be updated by verified owners" ON public.business_profiles;
|
||||
CREATE POLICY "Profiles can be updated by verified owners"
|
||||
ON public.business_profiles FOR UPDATE
|
||||
USING (auth.uid() = claimed_by AND verification_status = 'verified');
|
||||
|
||||
DROP POLICY IF EXISTS "Users can view their own claims" ON public.business_claims;
|
||||
CREATE POLICY "Users can view their own claims"
|
||||
ON public.business_claims FOR SELECT
|
||||
USING (auth.uid() = user_id);
|
||||
|
||||
DROP POLICY IF EXISTS "Users can create claims" ON public.business_claims;
|
||||
CREATE POLICY "Users can create claims"
|
||||
ON public.business_claims FOR INSERT
|
||||
WITH CHECK (auth.uid() = user_id);
|
||||
|
||||
DROP POLICY IF EXISTS "Only admins can review claims" ON public.business_claims;
|
||||
CREATE POLICY "Only admins can review claims"
|
||||
ON public.business_claims FOR UPDATE
|
||||
USING (EXISTS (
|
||||
SELECT 1 FROM auth.users
|
||||
WHERE auth.uid() = id
|
||||
AND raw_app_meta_data->>'role' = 'admin'
|
||||
));
|
||||
`
|
||||
});
|
||||
if (error) console.error('Error creating profile tables:', error);
|
||||
}
|
||||
|
||||
// Insert test data
|
||||
const { error: testDataError } = await supabase
|
||||
.from('businesses')
|
||||
.insert([
|
||||
{
|
||||
id: 'test-business-1',
|
||||
name: 'Test Coffee Shop',
|
||||
phone: '303-555-0123',
|
||||
email: 'contact@testcoffee.com',
|
||||
address: '123 Test St, Denver, CO 80202',
|
||||
rating: 4.5,
|
||||
website: 'https://testcoffee.com',
|
||||
description: 'A cozy coffee shop in downtown Denver serving artisanal coffee and pastries.',
|
||||
source: 'manual'
|
||||
}
|
||||
])
|
||||
.select()
|
||||
.single();
|
||||
|
||||
if (testDataError) {
|
||||
console.error('Error inserting test data:', testDataError);
|
||||
}
|
||||
|
||||
// Create test business profile
|
||||
const { error: testProfileError } = await supabase
|
||||
.from('business_profiles')
|
||||
.insert([
|
||||
{
|
||||
business_id: 'test-business-1',
|
||||
verification_status: 'unverified',
|
||||
social_links: {
|
||||
facebook: 'https://facebook.com/testcoffee',
|
||||
instagram: 'https://instagram.com/testcoffee'
|
||||
},
|
||||
hours_of_operation: {
|
||||
monday: ['7:00', '19:00'],
|
||||
tuesday: ['7:00', '19:00'],
|
||||
wednesday: ['7:00', '19:00'],
|
||||
thursday: ['7:00', '19:00'],
|
||||
friday: ['7:00', '20:00'],
|
||||
saturday: ['8:00', '20:00'],
|
||||
sunday: ['8:00', '18:00']
|
||||
},
|
||||
tags: ['coffee', 'pastries', 'breakfast', 'lunch']
|
||||
}
|
||||
])
|
||||
.select()
|
||||
.single();
|
||||
|
||||
if (testProfileError) {
|
||||
console.error('Error creating test profile:', testProfileError);
|
||||
}
|
||||
} catch (error) {
|
||||
console.error('Error initializing tables:', error);
|
||||
}
|
||||
}
|
||||
|
||||
// Call initialization on startup
|
||||
initializeTables();
|
||||
|
||||
// Schema for business profile updates
|
||||
const profileUpdateSchema = z.object({
|
||||
social_links: z.record(z.string()).optional(),
|
||||
hours_of_operation: z.record(z.array(z.string())).optional(),
|
||||
additional_photos: z.array(z.string()).optional(),
|
||||
tags: z.array(z.string()).optional(),
|
||||
});
|
||||
|
||||
// Schema for claim submissions
|
||||
const claimSubmissionSchema = z.object({
|
||||
business_id: z.string(),
|
||||
proof_documents: z.array(z.string()),
|
||||
notes: z.string().optional(),
|
||||
});
|
||||
|
||||
// Get business profile
|
||||
router.get('/:businessId', async (req, res) => {
|
||||
try {
|
||||
const { businessId } = req.params;
|
||||
|
||||
// Get business details and profile
|
||||
const { data: business, error: businessError } = await supabase
|
||||
.from('businesses')
|
||||
.select(`
|
||||
*,
|
||||
business_profiles (*)
|
||||
`)
|
||||
.eq('id', businessId)
|
||||
.single();
|
||||
|
||||
if (businessError) throw businessError;
|
||||
if (!business) {
|
||||
return res.status(404).json({ error: 'Business not found' });
|
||||
}
|
||||
|
||||
res.json(business);
|
||||
} catch (error) {
|
||||
console.error('Error fetching business profile:', error);
|
||||
res.status(500).json({ error: 'Failed to fetch business profile' });
|
||||
}
|
||||
});
|
||||
|
||||
// Update business profile (requires authentication)
|
||||
router.patch('/:businessId/profile', authenticateUser, async (req, res) => {
|
||||
try {
|
||||
const { businessId } = req.params;
|
||||
if (!req.user) {
|
||||
return res.status(401).json({ error: 'User not authenticated' });
|
||||
}
|
||||
const userId = req.user.id;
|
||||
const updates = profileUpdateSchema.parse(req.body);
|
||||
|
||||
// Check if user owns this profile
|
||||
const { data: profile } = await supabase
|
||||
.from('business_profiles')
|
||||
.select('claimed_by, verification_status')
|
||||
.eq('business_id', businessId)
|
||||
.single();
|
||||
|
||||
if (!profile || profile.claimed_by !== userId || profile.verification_status !== 'verified') {
|
||||
return res.status(403).json({ error: 'Not authorized to update this profile' });
|
||||
}
|
||||
|
||||
// Update profile
|
||||
const { error: updateError } = await supabase
|
||||
.from('business_profiles')
|
||||
.update({
|
||||
...updates,
|
||||
updated_at: new Date().toISOString(),
|
||||
})
|
||||
.eq('business_id', businessId);
|
||||
|
||||
if (updateError) throw updateError;
|
||||
|
||||
res.json({ message: 'Profile updated successfully' });
|
||||
} catch (error) {
|
||||
console.error('Error updating business profile:', error);
|
||||
res.status(500).json({ error: 'Failed to update profile' });
|
||||
}
|
||||
});
|
||||
|
||||
// Submit a claim for a business
|
||||
router.post('/claim', authenticateUser, async (req, res) => {
|
||||
try {
|
||||
if (!req.user) {
|
||||
return res.status(401).json({ error: 'User not authenticated' });
|
||||
}
|
||||
const userId = req.user.id;
|
||||
const claim = claimSubmissionSchema.parse(req.body);
|
||||
|
||||
// Check if business exists
|
||||
const { data: business } = await supabase
|
||||
.from('businesses')
|
||||
.select('id')
|
||||
.eq('id', claim.business_id)
|
||||
.single();
|
||||
|
||||
if (!business) {
|
||||
return res.status(404).json({ error: 'Business not found' });
|
||||
}
|
||||
|
||||
// Check if business is already claimed
|
||||
const { data: existingProfile } = await supabase
|
||||
.from('business_profiles')
|
||||
.select('claimed_by')
|
||||
.eq('business_id', claim.business_id)
|
||||
.single();
|
||||
|
||||
if (existingProfile?.claimed_by) {
|
||||
return res.status(400).json({ error: 'Business is already claimed' });
|
||||
}
|
||||
|
||||
// Check for existing pending claims
|
||||
const { data: existingClaim } = await supabase
|
||||
.from('business_claims')
|
||||
.select('id')
|
||||
.eq('business_id', claim.business_id)
|
||||
.eq('status', 'pending')
|
||||
.single();
|
||||
|
||||
if (existingClaim) {
|
||||
return res.status(400).json({ error: 'A pending claim already exists for this business' });
|
||||
}
|
||||
|
||||
// Create claim
|
||||
const { error: claimError } = await supabase
|
||||
.from('business_claims')
|
||||
.insert({
|
||||
business_id: claim.business_id,
|
||||
user_id: userId,
|
||||
proof_documents: claim.proof_documents,
|
||||
notes: claim.notes,
|
||||
});
|
||||
|
||||
if (claimError) throw claimError;
|
||||
|
||||
res.json({ message: 'Claim submitted successfully' });
|
||||
} catch (error) {
|
||||
console.error('Error submitting business claim:', error);
|
||||
res.status(500).json({ error: 'Failed to submit claim' });
|
||||
}
|
||||
});
|
||||
|
||||
// Get claims for a business (admin only)
|
||||
router.get('/:businessId/claims', authenticateUser, async (req, res) => {
|
||||
try {
|
||||
const { businessId } = req.params;
|
||||
if (!req.user) {
|
||||
return res.status(401).json({ error: 'User not authenticated' });
|
||||
}
|
||||
const userId = req.user.id;
|
||||
|
||||
// Check if user is admin
|
||||
const { data: user } = await supabase
|
||||
.from('users')
|
||||
.select('raw_app_meta_data')
|
||||
.eq('id', userId)
|
||||
.single();
|
||||
|
||||
if (user?.raw_app_meta_data?.role !== 'admin') {
|
||||
return res.status(403).json({ error: 'Not authorized' });
|
||||
}
|
||||
|
||||
const { data: claims, error } = await supabase
|
||||
.from('business_claims')
|
||||
.select(`
|
||||
*,
|
||||
user:user_id (
|
||||
email
|
||||
)
|
||||
`)
|
||||
.eq('business_id', businessId)
|
||||
.order('submitted_at', { ascending: false });
|
||||
|
||||
if (error) throw error;
|
||||
|
||||
res.json(claims);
|
||||
} catch (error) {
|
||||
console.error('Error fetching business claims:', error);
|
||||
res.status(500).json({ error: 'Failed to fetch claims' });
|
||||
}
|
||||
});
|
||||
|
||||
// Review a claim (admin only)
|
||||
router.post('/claims/:claimId/review', authenticateUser, async (req, res) => {
|
||||
try {
|
||||
const { claimId } = req.params;
|
||||
if (!req.user) {
|
||||
return res.status(401).json({ error: 'User not authenticated' });
|
||||
}
|
||||
const userId = req.user.id;
|
||||
const { status, notes } = z.object({
|
||||
status: z.enum(['approved', 'rejected']),
|
||||
notes: z.string().optional(),
|
||||
}).parse(req.body);
|
||||
|
||||
// Check if user is admin
|
||||
const { data: user } = await supabase
|
||||
.from('users')
|
||||
.select('raw_app_meta_data')
|
||||
.eq('id', userId)
|
||||
.single();
|
||||
|
||||
if (user?.raw_app_meta_data?.role !== 'admin') {
|
||||
return res.status(403).json({ error: 'Not authorized' });
|
||||
}
|
||||
|
||||
// Get claim details
|
||||
const { data: claim } = await supabase
|
||||
.from('business_claims')
|
||||
.select('business_id, status')
|
||||
.eq('id', claimId)
|
||||
.single();
|
||||
|
||||
if (!claim) {
|
||||
return res.status(404).json({ error: 'Claim not found' });
|
||||
}
|
||||
|
||||
if (claim.status !== 'pending') {
|
||||
return res.status(400).json({ error: 'Claim has already been reviewed' });
|
||||
}
|
||||
|
||||
// Start a transaction
|
||||
const { error: updateError } = await supabase.rpc('review_business_claim', {
|
||||
p_claim_id: claimId,
|
||||
p_business_id: claim.business_id,
|
||||
p_user_id: userId,
|
||||
p_status: status,
|
||||
p_notes: notes
|
||||
});
|
||||
|
||||
if (updateError) throw updateError;
|
||||
|
||||
res.json({ message: 'Claim reviewed successfully' });
|
||||
} catch (error) {
|
||||
console.error('Error reviewing business claim:', error);
|
||||
res.status(500).json({ error: 'Failed to review claim' });
|
||||
}
|
||||
});
|
||||
|
||||
export default router;
|
||||
|
|
@ -1,160 +1,310 @@
|
|||
import express from 'express';
|
||||
import logger from '../utils/logger';
|
||||
import type { BaseChatModel } from '@langchain/core/language_models/chat_models';
|
||||
import type { Embeddings } from '@langchain/core/embeddings';
|
||||
import { ChatOpenAI } from '@langchain/openai';
|
||||
import {
|
||||
getAvailableChatModelProviders,
|
||||
getAvailableEmbeddingModelProviders,
|
||||
} from '../lib/providers';
|
||||
import { searchHandlers } from '../websocket/messageHandler';
|
||||
import { AIMessage, BaseMessage, HumanMessage } from '@langchain/core/messages';
|
||||
import { MetaSearchAgentType } from '../search/metaSearchAgent';
|
||||
import { Router, Response as ExpressResponse } from 'express';
|
||||
import { z } from 'zod';
|
||||
import fetch from 'node-fetch';
|
||||
import { Response as FetchResponse } from 'node-fetch';
|
||||
import { supabase } from '../lib/supabase';
|
||||
import { env } from '../config/env';
|
||||
|
||||
const router = express.Router();
|
||||
const router = Router();
|
||||
|
||||
interface chatModel {
|
||||
provider: string;
|
||||
model: string;
|
||||
customOpenAIBaseURL?: string;
|
||||
customOpenAIKey?: string;
|
||||
const searchSchema = z.object({
|
||||
query: z.string().min(1),
|
||||
});
|
||||
|
||||
interface Business {
|
||||
id: string;
|
||||
name: string;
|
||||
description: string;
|
||||
website: string;
|
||||
phone: string | null;
|
||||
address: string | null;
|
||||
}
|
||||
|
||||
interface embeddingModel {
|
||||
provider: string;
|
||||
model: string;
|
||||
interface SearxResult {
|
||||
url: string;
|
||||
title: string;
|
||||
content: string;
|
||||
engine: string;
|
||||
score: number;
|
||||
}
|
||||
|
||||
interface ChatRequestBody {
|
||||
optimizationMode: 'speed' | 'balanced';
|
||||
focusMode: string;
|
||||
chatModel?: chatModel;
|
||||
embeddingModel?: embeddingModel;
|
||||
interface SearxResponse {
|
||||
query: string;
|
||||
history: Array<[string, string]>;
|
||||
results: SearxResult[];
|
||||
}
|
||||
|
||||
router.post('/', async (req, res) => {
|
||||
async function getCachedResults(query: string): Promise<Business[]> {
|
||||
console.log('Fetching cached results for query:', query);
|
||||
const normalizedQuery = query.toLowerCase()
|
||||
.trim()
|
||||
.replace(/,/g, '') // Remove commas
|
||||
.replace(/\s+/g, ' '); // Normalize whitespace
|
||||
|
||||
const searchTerms = normalizedQuery.split(' ').filter(term => term.length > 0);
|
||||
console.log('Normalized search terms:', searchTerms);
|
||||
|
||||
// First try exact match
|
||||
const { data: exactMatch } = await supabase
|
||||
.from('search_cache')
|
||||
.select('*')
|
||||
.eq('query', normalizedQuery)
|
||||
.single();
|
||||
|
||||
if (exactMatch) {
|
||||
console.log('Found exact match in cache');
|
||||
return exactMatch.results as Business[];
|
||||
}
|
||||
|
||||
// Then try fuzzy search
|
||||
console.log('Trying fuzzy search with terms:', searchTerms);
|
||||
const searchConditions = searchTerms.map(term => `query.ilike.%${term}%`);
|
||||
const { data: cachedResults, error } = await supabase
|
||||
.from('search_cache')
|
||||
.select('*')
|
||||
.or(searchConditions.join(','));
|
||||
|
||||
if (error) {
|
||||
console.error('Error fetching cached results:', error);
|
||||
return [];
|
||||
}
|
||||
|
||||
if (!cachedResults || cachedResults.length === 0) {
|
||||
console.log('No cached results found');
|
||||
return [];
|
||||
}
|
||||
|
||||
console.log(`Found ${cachedResults.length} cached searches`);
|
||||
|
||||
// Combine and deduplicate results from all matching searches
|
||||
const allResults = cachedResults.flatMap(cache => cache.results as Business[]);
|
||||
const uniqueResults = Array.from(new Map(allResults.map(item => [item.id, item])).values());
|
||||
|
||||
console.log(`Combined into ${uniqueResults.length} unique businesses`);
|
||||
|
||||
// Sort by relevance to search terms
|
||||
const sortedResults = uniqueResults.sort((a, b) => {
|
||||
const aScore = searchTerms.filter(term =>
|
||||
a.name.toLowerCase().includes(term) ||
|
||||
a.description.toLowerCase().includes(term)
|
||||
).length;
|
||||
const bScore = searchTerms.filter(term =>
|
||||
b.name.toLowerCase().includes(term) ||
|
||||
b.description.toLowerCase().includes(term)
|
||||
).length;
|
||||
return bScore - aScore;
|
||||
});
|
||||
|
||||
return sortedResults;
|
||||
}
|
||||
|
||||
async function searchSearxNG(query: string): Promise<Business[]> {
|
||||
console.log('Starting SearxNG search for query:', query);
|
||||
try {
|
||||
const body: ChatRequestBody = req.body;
|
||||
const params = new URLSearchParams({
|
||||
q: `${query} denver business`,
|
||||
format: 'json',
|
||||
language: 'en',
|
||||
time_range: '',
|
||||
safesearch: '1',
|
||||
engines: 'google,bing,duckduckgo'
|
||||
});
|
||||
|
||||
if (!body.focusMode || !body.query) {
|
||||
return res.status(400).json({ message: 'Missing focus mode or query' });
|
||||
}
|
||||
const searchUrl = `${env.SEARXNG_URL}/search?${params.toString()}`;
|
||||
console.log('Searching SearxNG at URL:', searchUrl);
|
||||
|
||||
body.history = body.history || [];
|
||||
body.optimizationMode = body.optimizationMode || 'balanced';
|
||||
|
||||
const history: BaseMessage[] = body.history.map((msg) => {
|
||||
if (msg[0] === 'human') {
|
||||
return new HumanMessage({
|
||||
content: msg[1],
|
||||
});
|
||||
} else {
|
||||
return new AIMessage({
|
||||
content: msg[1],
|
||||
});
|
||||
const response: FetchResponse = await fetch(searchUrl, {
|
||||
method: 'GET',
|
||||
headers: {
|
||||
'Accept': 'application/json',
|
||||
}
|
||||
});
|
||||
|
||||
const [chatModelProviders, embeddingModelProviders] = await Promise.all([
|
||||
getAvailableChatModelProviders(),
|
||||
getAvailableEmbeddingModelProviders(),
|
||||
]);
|
||||
|
||||
const chatModelProvider =
|
||||
body.chatModel?.provider || Object.keys(chatModelProviders)[0];
|
||||
const chatModel =
|
||||
body.chatModel?.model ||
|
||||
Object.keys(chatModelProviders[chatModelProvider])[0];
|
||||
|
||||
const embeddingModelProvider =
|
||||
body.embeddingModel?.provider || Object.keys(embeddingModelProviders)[0];
|
||||
const embeddingModel =
|
||||
body.embeddingModel?.model ||
|
||||
Object.keys(embeddingModelProviders[embeddingModelProvider])[0];
|
||||
|
||||
let llm: BaseChatModel | undefined;
|
||||
let embeddings: Embeddings | undefined;
|
||||
|
||||
if (body.chatModel?.provider === 'custom_openai') {
|
||||
if (
|
||||
!body.chatModel?.customOpenAIBaseURL ||
|
||||
!body.chatModel?.customOpenAIKey
|
||||
) {
|
||||
return res
|
||||
.status(400)
|
||||
.json({ message: 'Missing custom OpenAI base URL or key' });
|
||||
}
|
||||
|
||||
llm = new ChatOpenAI({
|
||||
modelName: body.chatModel.model,
|
||||
openAIApiKey: body.chatModel.customOpenAIKey,
|
||||
temperature: 0.7,
|
||||
configuration: {
|
||||
baseURL: body.chatModel.customOpenAIBaseURL,
|
||||
},
|
||||
}) as unknown as BaseChatModel;
|
||||
} else if (
|
||||
chatModelProviders[chatModelProvider] &&
|
||||
chatModelProviders[chatModelProvider][chatModel]
|
||||
) {
|
||||
llm = chatModelProviders[chatModelProvider][chatModel]
|
||||
.model as unknown as BaseChatModel | undefined;
|
||||
if (!response.ok) {
|
||||
throw new Error(`SearxNG search failed: ${response.statusText} (${response.status})`);
|
||||
}
|
||||
|
||||
if (
|
||||
embeddingModelProviders[embeddingModelProvider] &&
|
||||
embeddingModelProviders[embeddingModelProvider][embeddingModel]
|
||||
) {
|
||||
embeddings = embeddingModelProviders[embeddingModelProvider][
|
||||
embeddingModel
|
||||
].model as Embeddings | undefined;
|
||||
const data = await response.json() as SearxResponse;
|
||||
console.log(`Got ${data.results?.length || 0} raw results from SearxNG`);
|
||||
console.log('Sample result:', data.results?.[0]);
|
||||
|
||||
if (!data.results || data.results.length === 0) {
|
||||
return [];
|
||||
}
|
||||
|
||||
if (!llm || !embeddings) {
|
||||
return res.status(400).json({ message: 'Invalid model selected' });
|
||||
}
|
||||
const filteredResults = data.results
|
||||
.filter(result =>
|
||||
result.title &&
|
||||
result.url &&
|
||||
!result.url.includes('yelp.com/search') &&
|
||||
!result.url.includes('google.com/search') &&
|
||||
!result.url.includes('bbb.org/search') &&
|
||||
!result.url.includes('thumbtack.com/search') &&
|
||||
!result.url.includes('angi.com/search') &&
|
||||
!result.url.includes('yellowpages.com/search')
|
||||
);
|
||||
|
||||
const searchHandler: MetaSearchAgentType = searchHandlers[body.focusMode];
|
||||
console.log(`Filtered to ${filteredResults.length} relevant results`);
|
||||
console.log('Sample filtered result:', filteredResults[0]);
|
||||
|
||||
if (!searchHandler) {
|
||||
return res.status(400).json({ message: 'Invalid focus mode' });
|
||||
}
|
||||
const searchTerms = query.toLowerCase().split(' ');
|
||||
const businesses = filteredResults
|
||||
.map(result => {
|
||||
const business = {
|
||||
id: result.url,
|
||||
name: cleanBusinessName(result.title),
|
||||
description: result.content || '',
|
||||
website: result.url,
|
||||
phone: extractPhone(result.content || '') || extractPhone(result.title),
|
||||
address: extractAddress(result.content || '') || extractAddress(result.title),
|
||||
score: result.score || 0
|
||||
};
|
||||
console.log('Processed business:', business);
|
||||
return business;
|
||||
})
|
||||
.filter(business => {
|
||||
// Check if business name contains any of the search terms
|
||||
const nameMatches = searchTerms.some(term =>
|
||||
business.name.toLowerCase().includes(term)
|
||||
);
|
||||
|
||||
// Check if description contains any of the search terms
|
||||
const descriptionMatches = searchTerms.some(term =>
|
||||
business.description.toLowerCase().includes(term)
|
||||
);
|
||||
|
||||
return business.name.length > 2 && (nameMatches || descriptionMatches);
|
||||
})
|
||||
.sort((a, b) => {
|
||||
// Score based on how many search terms match the name and description
|
||||
const aScore = searchTerms.filter(term =>
|
||||
a.name.toLowerCase().includes(term) ||
|
||||
a.description.toLowerCase().includes(term)
|
||||
).length;
|
||||
const bScore = searchTerms.filter(term =>
|
||||
b.name.toLowerCase().includes(term) ||
|
||||
b.description.toLowerCase().includes(term)
|
||||
).length;
|
||||
return bScore - aScore;
|
||||
})
|
||||
.slice(0, 10);
|
||||
|
||||
const emitter = await searchHandler.searchAndAnswer(
|
||||
body.query,
|
||||
history,
|
||||
llm,
|
||||
embeddings,
|
||||
body.optimizationMode,
|
||||
[],
|
||||
);
|
||||
console.log(`Transformed into ${businesses.length} business entries`);
|
||||
return businesses;
|
||||
} catch (error) {
|
||||
console.error('SearxNG search error:', error);
|
||||
return [];
|
||||
}
|
||||
}
|
||||
|
||||
let message = '';
|
||||
let sources = [];
|
||||
async function cacheResults(query: string, results: Business[]): Promise<void> {
|
||||
if (!results.length) return;
|
||||
|
||||
emitter.on('data', (data) => {
|
||||
const parsedData = JSON.parse(data);
|
||||
if (parsedData.type === 'response') {
|
||||
message += parsedData.data;
|
||||
} else if (parsedData.type === 'sources') {
|
||||
sources = parsedData.data;
|
||||
}
|
||||
});
|
||||
console.log(`Caching ${results.length} results for query:`, query);
|
||||
const normalizedQuery = query.toLowerCase().trim();
|
||||
|
||||
const { data: existing } = await supabase
|
||||
.from('search_cache')
|
||||
.select('id, results')
|
||||
.eq('query', normalizedQuery)
|
||||
.single();
|
||||
|
||||
emitter.on('end', () => {
|
||||
res.status(200).json({ message, sources });
|
||||
});
|
||||
if (existing) {
|
||||
console.log('Updating existing cache entry');
|
||||
// Merge new results with existing ones, removing duplicates
|
||||
const allResults = [...existing.results, ...results];
|
||||
const uniqueResults = Array.from(new Map(allResults.map(item => [item.id, item])).values());
|
||||
|
||||
emitter.on('error', (data) => {
|
||||
const parsedData = JSON.parse(data);
|
||||
res.status(500).json({ message: parsedData.data });
|
||||
});
|
||||
} catch (err: any) {
|
||||
logger.error(`Error in getting search results: ${err.message}`);
|
||||
res.status(500).json({ message: 'An error has occurred.' });
|
||||
await supabase
|
||||
.from('search_cache')
|
||||
.update({
|
||||
results: uniqueResults,
|
||||
updated_at: new Date().toISOString()
|
||||
})
|
||||
.eq('id', existing.id);
|
||||
} else {
|
||||
console.log('Creating new cache entry');
|
||||
await supabase
|
||||
.from('search_cache')
|
||||
.insert({
|
||||
query: normalizedQuery,
|
||||
results,
|
||||
location: 'denver', // Default location
|
||||
category: 'business', // Default category
|
||||
created_at: new Date().toISOString(),
|
||||
updated_at: new Date().toISOString(),
|
||||
expires_at: new Date(Date.now() + 7 * 24 * 60 * 60 * 1000).toISOString() // 7 days from now
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
function cleanBusinessName(title: string): string {
|
||||
return title
|
||||
.replace(/^(the\s+)?/i, '')
|
||||
.replace(/\s*[-|]\s*.+$/i, '')
|
||||
.replace(/\s*\|.*$/i, '')
|
||||
.replace(/\s*in\s+denver.*$/i, '')
|
||||
.replace(/\s*near\s+denver.*$/i, '')
|
||||
.replace(/\s*-\s*.*denver.*$/i, '')
|
||||
.trim();
|
||||
}
|
||||
|
||||
function extractPhone(text: string): string | null {
|
||||
const phoneRegex = /(\+?1?\s*\(?[0-9]{3}\)?[-.\s]?[0-9]{3}[-.\s]?[0-9]{4})/;
|
||||
const match = text.match(phoneRegex);
|
||||
return match ? match[1] : null;
|
||||
}
|
||||
|
||||
function extractAddress(text: string): string | null {
|
||||
const addressRegex = /\d+\s+[A-Za-z0-9\s,]+(?:Street|St|Avenue|Ave|Road|Rd|Boulevard|Blvd|Lane|Ln|Drive|Dr|Way|Court|Ct|Circle|Cir)[,\s]+(?:[A-Za-z\s]+,\s*)?(?:CO|Colorado)[,\s]+\d{5}(?:-\d{4})?/i;
|
||||
const match = text.match(addressRegex);
|
||||
return match ? match[0] : null;
|
||||
}
|
||||
|
||||
router.post('/search', async (req, res) => {
|
||||
try {
|
||||
console.log('Received search request:', req.body);
|
||||
const { query } = searchSchema.parse(req.body);
|
||||
await handleSearch(query, res);
|
||||
} catch (error) {
|
||||
console.error('Search error:', error);
|
||||
res.status(400).json({ error: 'Search failed. Please try again.' });
|
||||
}
|
||||
});
|
||||
|
||||
// Also support GET requests for easier testing
|
||||
router.get('/search', async (req, res) => {
|
||||
try {
|
||||
const query = req.query.q as string;
|
||||
if (!query) {
|
||||
return res.status(400).json({ error: 'Query parameter "q" is required' });
|
||||
}
|
||||
console.log('Received search request:', { query });
|
||||
await handleSearch(query, res);
|
||||
} catch (error) {
|
||||
console.error('Search error:', error);
|
||||
res.status(400).json({ error: 'Search failed. Please try again.' });
|
||||
}
|
||||
});
|
||||
|
||||
// Helper function to handle search logic
|
||||
async function handleSearch(query: string, res: ExpressResponse) {
|
||||
// Get cached results immediately
|
||||
const cachedResults = await getCachedResults(query);
|
||||
console.log(`Returning ${cachedResults.length} cached results to client`);
|
||||
|
||||
// Send cached results to client
|
||||
res.json({ results: cachedResults });
|
||||
|
||||
// Search for new results in the background
|
||||
console.log('Starting background search');
|
||||
searchSearxNG(query).then(async newResults => {
|
||||
console.log(`Found ${newResults.length} new results from SearxNG`);
|
||||
if (newResults.length > 0) {
|
||||
await cacheResults(query, newResults);
|
||||
}
|
||||
}).catch(error => {
|
||||
console.error('Background search error:', error);
|
||||
});
|
||||
}
|
||||
|
||||
export default router;
|
||||
|
|
|
|||
21
src/server.ts
Normal file
21
src/server.ts
Normal file
|
|
@ -0,0 +1,21 @@
|
|||
import express from 'express';
|
||||
import cors from 'cors';
|
||||
import { env } from './config/env';
|
||||
import app from './app';
|
||||
import { HealthCheckService } from './lib/services/healthCheck';
|
||||
|
||||
const port = env.PORT || 3000;
|
||||
|
||||
// Health check endpoint
|
||||
app.get('/health', async (req, res) => {
|
||||
const health = await HealthCheckService.checkHealth();
|
||||
res.json(health);
|
||||
});
|
||||
|
||||
export function startServer() {
|
||||
return app.listen(port, () => {
|
||||
console.log(`Server is running on port ${port}`);
|
||||
});
|
||||
}
|
||||
|
||||
export default app;
|
||||
3
src/styles/input.css
Normal file
3
src/styles/input.css
Normal file
|
|
@ -0,0 +1,3 @@
|
|||
@tailwind base;
|
||||
@tailwind components;
|
||||
@tailwind utilities;
|
||||
102
src/test-supabase.ts
Normal file
102
src/test-supabase.ts
Normal file
|
|
@ -0,0 +1,102 @@
|
|||
import { createClient } from '@supabase/supabase-js';
|
||||
import dotenv from 'dotenv';
|
||||
|
||||
// Load environment variables
|
||||
dotenv.config();
|
||||
|
||||
async function testSupabaseConnection() {
|
||||
console.log('Testing Supabase connection...');
|
||||
console.log('URL:', process.env.SUPABASE_URL);
|
||||
console.log('Key length:', process.env.SUPABASE_KEY?.length || 0);
|
||||
|
||||
try {
|
||||
const supabase = createClient(
|
||||
process.env.SUPABASE_URL!,
|
||||
process.env.SUPABASE_KEY!,
|
||||
{
|
||||
auth: {
|
||||
autoRefreshToken: true,
|
||||
persistSession: true
|
||||
}
|
||||
}
|
||||
);
|
||||
|
||||
// Test businesses table
|
||||
console.log('\nTesting businesses table:');
|
||||
const testBusiness = {
|
||||
id: 'test_' + Date.now(),
|
||||
name: 'Test Business',
|
||||
phone: '123-456-7890',
|
||||
email: 'test@example.com',
|
||||
address: '123 Test St',
|
||||
rating: 5,
|
||||
website: 'https://test.com',
|
||||
source: 'test',
|
||||
description: 'Test description',
|
||||
latitude: 39.7392,
|
||||
longitude: -104.9903,
|
||||
search_count: 1,
|
||||
created_at: new Date().toISOString()
|
||||
};
|
||||
|
||||
const { error: insertBusinessError } = await supabase
|
||||
.from('businesses')
|
||||
.insert([testBusiness])
|
||||
.select();
|
||||
|
||||
if (insertBusinessError) {
|
||||
console.error('❌ INSERT business error:', insertBusinessError);
|
||||
} else {
|
||||
console.log('✅ INSERT business OK');
|
||||
// Clean up
|
||||
await supabase.from('businesses').delete().eq('id', testBusiness.id);
|
||||
}
|
||||
|
||||
// Test searches table
|
||||
console.log('\nTesting searches table:');
|
||||
const testSearch = {
|
||||
query: 'test query',
|
||||
location: 'test location',
|
||||
results_count: 0,
|
||||
timestamp: new Date().toISOString()
|
||||
};
|
||||
|
||||
const { error: insertSearchError } = await supabase
|
||||
.from('searches')
|
||||
.insert([testSearch])
|
||||
.select();
|
||||
|
||||
if (insertSearchError) {
|
||||
console.error('❌ INSERT search error:', insertSearchError);
|
||||
} else {
|
||||
console.log('✅ INSERT search OK');
|
||||
}
|
||||
|
||||
// Test cache table
|
||||
console.log('\nTesting cache table:');
|
||||
const testCache = {
|
||||
key: 'test_key_' + Date.now(),
|
||||
value: { test: true },
|
||||
created_at: new Date().toISOString(),
|
||||
expires_at: new Date(Date.now() + 3600000).toISOString()
|
||||
};
|
||||
|
||||
const { error: insertCacheError } = await supabase
|
||||
.from('cache')
|
||||
.insert([testCache])
|
||||
.select();
|
||||
|
||||
if (insertCacheError) {
|
||||
console.error('❌ INSERT cache error:', insertCacheError);
|
||||
} else {
|
||||
console.log('✅ INSERT cache OK');
|
||||
// Clean up
|
||||
await supabase.from('cache').delete().eq('key', testCache.key);
|
||||
}
|
||||
|
||||
} catch (error: any) {
|
||||
console.error('❌ Unexpected error:', error);
|
||||
}
|
||||
}
|
||||
|
||||
testSupabaseConnection().catch(console.error);
|
||||
139
src/tests/__tests__/database.test.ts
Normal file
139
src/tests/__tests__/database.test.ts
Normal file
|
|
@ -0,0 +1,139 @@
|
|||
import { createClient } from '@supabase/supabase-js';
|
||||
|
||||
// Mock data type
|
||||
type MockData = {
|
||||
businesses: { id: string; name: string };
|
||||
cache: { key: string; value: { test: boolean } };
|
||||
};
|
||||
|
||||
// Mock Supabase client
|
||||
jest.mock('@supabase/supabase-js', () => ({
|
||||
createClient: jest.fn(() => ({
|
||||
from: jest.fn((table: keyof MockData) => {
|
||||
const mockData: MockData = {
|
||||
businesses: { id: 'test_1', name: 'Test Business' },
|
||||
cache: { key: 'test_key', value: { test: true } }
|
||||
};
|
||||
|
||||
return {
|
||||
insert: jest.fn(() => ({
|
||||
select: jest.fn().mockResolvedValue({
|
||||
data: [mockData[table]],
|
||||
error: null
|
||||
})
|
||||
})),
|
||||
select: jest.fn(() => ({
|
||||
eq: jest.fn(() => ({
|
||||
single: jest.fn().mockResolvedValue({
|
||||
data: mockData[table],
|
||||
error: null
|
||||
}),
|
||||
gt: jest.fn(() => ({
|
||||
single: jest.fn().mockResolvedValue({
|
||||
data: null,
|
||||
error: null
|
||||
})
|
||||
}))
|
||||
}))
|
||||
})),
|
||||
update: jest.fn(() => ({
|
||||
eq: jest.fn().mockResolvedValue({
|
||||
error: null
|
||||
})
|
||||
})),
|
||||
delete: jest.fn(() => ({
|
||||
eq: jest.fn().mockResolvedValue({
|
||||
error: null
|
||||
})
|
||||
}))
|
||||
};
|
||||
})
|
||||
}))
|
||||
}));
|
||||
|
||||
describe('Database Operations', () => {
|
||||
const supabase = createClient('test-url', 'test-key');
|
||||
|
||||
const testBusiness = {
|
||||
id: `test_${Date.now()}`,
|
||||
name: 'Test Business',
|
||||
phone: '(303) 555-1234',
|
||||
email: 'test@example.com',
|
||||
address: '123 Test St, Denver, CO 80202',
|
||||
rating: 5,
|
||||
website: 'https://test.com',
|
||||
source: 'test',
|
||||
description: 'Test description',
|
||||
location: { lat: 39.7392, lng: -104.9903 },
|
||||
search_count: 1,
|
||||
created_at: new Date().toISOString()
|
||||
};
|
||||
|
||||
beforeEach(() => {
|
||||
jest.clearAllMocks();
|
||||
});
|
||||
|
||||
describe('Business Operations', () => {
|
||||
it('should insert a business successfully', async () => {
|
||||
const { data, error } = await supabase
|
||||
.from('businesses')
|
||||
.insert([testBusiness])
|
||||
.select();
|
||||
|
||||
expect(error).toBeNull();
|
||||
expect(data).toBeTruthy();
|
||||
expect(data![0].name).toBe('Test Business');
|
||||
});
|
||||
|
||||
it('should retrieve a business by id', async () => {
|
||||
const { data, error } = await supabase
|
||||
.from('businesses')
|
||||
.select()
|
||||
.eq('id', testBusiness.id)
|
||||
.single();
|
||||
|
||||
expect(error).toBeNull();
|
||||
expect(data).toBeTruthy();
|
||||
expect(data.name).toBe('Test Business');
|
||||
});
|
||||
|
||||
it('should update a business', async () => {
|
||||
const { error } = await supabase
|
||||
.from('businesses')
|
||||
.update({ name: 'Updated Test Business' })
|
||||
.eq('id', testBusiness.id);
|
||||
|
||||
expect(error).toBeNull();
|
||||
});
|
||||
});
|
||||
|
||||
describe('Cache Operations', () => {
|
||||
const testCache = {
|
||||
key: `test_key_${Date.now()}`,
|
||||
value: { test: true },
|
||||
created_at: new Date().toISOString(),
|
||||
expires_at: new Date(Date.now() + 3600000).toISOString()
|
||||
};
|
||||
|
||||
it('should insert cache entry', async () => {
|
||||
const { data, error } = await supabase
|
||||
.from('cache')
|
||||
.insert([testCache])
|
||||
.select();
|
||||
|
||||
expect(error).toBeNull();
|
||||
expect(data).toBeTruthy();
|
||||
});
|
||||
|
||||
it('should retrieve cache entry', async () => {
|
||||
const { data, error } = await supabase
|
||||
.from('cache')
|
||||
.select()
|
||||
.eq('key', testCache.key)
|
||||
.single();
|
||||
|
||||
expect(error).toBeNull();
|
||||
expect(data.value).toEqual({ test: true });
|
||||
});
|
||||
});
|
||||
});
|
||||
92
src/tests/__tests__/deepseek.test.ts
Normal file
92
src/tests/__tests__/deepseek.test.ts
Normal file
|
|
@ -0,0 +1,92 @@
|
|||
import { DeepSeekService } from '../../lib/services/deepseekService';
|
||||
import { Business } from '../../lib/types';
|
||||
|
||||
// Mock the DeepSeek service
|
||||
jest.mock('../../lib/services/deepseekService', () => {
|
||||
const mockCleanedBusiness = {
|
||||
name: "Denver's Best Plumbing & Repair",
|
||||
address: "1234 Main Street, Denver, CO 80202",
|
||||
phone: "(720) 555-1234",
|
||||
email: "support@denverplumbing.com",
|
||||
description: "Professional plumbing services in Denver metro area"
|
||||
};
|
||||
|
||||
return {
|
||||
DeepSeekService: {
|
||||
chat: jest.fn().mockResolvedValue(JSON.stringify({
|
||||
business_info: mockCleanedBusiness
|
||||
})),
|
||||
detectBusinessType: jest.fn().mockReturnValue('service'),
|
||||
sanitizeJsonResponse: jest.fn().mockReturnValue(mockCleanedBusiness),
|
||||
manualClean: jest.fn().mockReturnValue(mockCleanedBusiness),
|
||||
cleanBusinessData: jest.fn().mockResolvedValue(mockCleanedBusiness)
|
||||
}
|
||||
};
|
||||
});
|
||||
|
||||
describe('DeepSeekService', () => {
|
||||
describe('cleanBusinessData', () => {
|
||||
const testBusiness: Business = {
|
||||
id: 'test_1',
|
||||
name: "Denver's Best Plumbing & Repair [LLC] (A Family Business)",
|
||||
address: "Suite 200-B, 1234 Main Street, Denver, Colorado 80202",
|
||||
phone: "(720) 555-1234",
|
||||
email: "support@denverplumbing.com",
|
||||
description: "Professional plumbing services in Denver metro area",
|
||||
source: 'test',
|
||||
website: 'https://example.com',
|
||||
rating: 4.8,
|
||||
location: { lat: 39.7392, lng: -104.9903 },
|
||||
openingHours: []
|
||||
};
|
||||
|
||||
beforeEach(() => {
|
||||
jest.clearAllMocks();
|
||||
});
|
||||
|
||||
it('should clean business name correctly', async () => {
|
||||
const cleaned = await DeepSeekService.cleanBusinessData(testBusiness);
|
||||
expect(cleaned.name).not.toMatch(/[\[\]{}()]/);
|
||||
expect(cleaned.name).toBeTruthy();
|
||||
});
|
||||
|
||||
it('should format phone number correctly', async () => {
|
||||
const cleaned = await DeepSeekService.cleanBusinessData(testBusiness);
|
||||
expect(cleaned.phone).toMatch(/^\(\d{3}\) \d{3}-\d{4}$/);
|
||||
});
|
||||
|
||||
it('should clean email address', async () => {
|
||||
const cleaned = await DeepSeekService.cleanBusinessData(testBusiness);
|
||||
expect(cleaned.email).not.toMatch(/[\[\]<>()]|mailto:|click|schedule/i);
|
||||
expect(cleaned.email).toMatch(/^[^\s@]+@[^\s@]+\.[^\s@]+$/);
|
||||
});
|
||||
|
||||
it('should clean description', async () => {
|
||||
const cleaned = await DeepSeekService.cleanBusinessData(testBusiness);
|
||||
expect(cleaned.description).not.toMatch(/[\$\d]+%?\s*off|\$/i);
|
||||
expect(cleaned.description).not.toMatch(/\b(?:call|email|visit|contact|text|www\.|http|@)\b/i);
|
||||
expect(cleaned.description).not.toMatch(/[📞📧🌐💳☎️📱]/);
|
||||
expect(cleaned.description).not.toMatch(/#\w+/);
|
||||
});
|
||||
});
|
||||
|
||||
describe('chat', () => {
|
||||
it('should return a response from the model', async () => {
|
||||
const response = await DeepSeekService['chat']([{
|
||||
role: 'user',
|
||||
content: 'Test message'
|
||||
}]);
|
||||
expect(response).toBeTruthy();
|
||||
expect(typeof response).toBe('string');
|
||||
});
|
||||
|
||||
it('should handle errors gracefully', async () => {
|
||||
(DeepSeekService['chat'] as jest.Mock).mockRejectedValueOnce(new Error('Test error'));
|
||||
|
||||
await expect(DeepSeekService['chat']([{
|
||||
role: 'user',
|
||||
content: 'Test message'
|
||||
}])).rejects.toThrow('Test error');
|
||||
});
|
||||
});
|
||||
});
|
||||
145
src/tests/__tests__/integration/api.test.ts
Normal file
145
src/tests/__tests__/integration/api.test.ts
Normal file
|
|
@ -0,0 +1,145 @@
|
|||
import express from 'express';
|
||||
import request from 'supertest';
|
||||
import { SearchService } from '../../../lib/services/searchService';
|
||||
import { Business } from '../../../lib/types';
|
||||
|
||||
// Mock SearchService
|
||||
jest.mock('../../../lib/services/searchService');
|
||||
|
||||
describe('API Integration', () => {
|
||||
let app: express.Application;
|
||||
|
||||
const mockBusiness: Business = {
|
||||
id: 'test_1',
|
||||
name: "Denver's Best Plumbing",
|
||||
address: "1234 Main Street, Denver, CO 80202",
|
||||
phone: "(720) 555-1234",
|
||||
email: "support@denverplumbing.com",
|
||||
description: "Professional plumbing services",
|
||||
source: 'test',
|
||||
website: 'https://example.com',
|
||||
rating: 4.8,
|
||||
location: { lat: 39.7392, lng: -104.9903 },
|
||||
openingHours: []
|
||||
};
|
||||
|
||||
beforeAll(() => {
|
||||
app = express();
|
||||
app.use(express.json());
|
||||
|
||||
// Mock SearchService methods
|
||||
(SearchService.prototype.search as jest.Mock).mockResolvedValue([mockBusiness]);
|
||||
(SearchService.prototype.getBusinessById as jest.Mock).mockResolvedValue(mockBusiness);
|
||||
|
||||
// Add error handling middleware
|
||||
app.use((err: any, req: express.Request, res: express.Response, next: express.NextFunction) => {
|
||||
if (err instanceof SyntaxError && 'body' in err) {
|
||||
return res.status(400).json({ error: 'Invalid JSON' });
|
||||
}
|
||||
next(err);
|
||||
});
|
||||
|
||||
// Add routes
|
||||
app.use('/api', require('../../../routes/api').default);
|
||||
});
|
||||
|
||||
beforeEach(() => {
|
||||
jest.clearAllMocks();
|
||||
});
|
||||
|
||||
describe('Search Endpoints', () => {
|
||||
it('should handle search requests', async () => {
|
||||
const response = await request(app)
|
||||
.post('/api/search')
|
||||
.send({
|
||||
query: 'plumber in Denver',
|
||||
location: 'Denver, CO'
|
||||
});
|
||||
|
||||
expect(response.status).toBe(200);
|
||||
expect(response.body).toHaveProperty('results');
|
||||
expect(Array.isArray(response.body.results)).toBe(true);
|
||||
expect(response.body.results[0]).toEqual(mockBusiness);
|
||||
});
|
||||
|
||||
it('should handle missing parameters', async () => {
|
||||
const response = await request(app)
|
||||
.post('/api/search')
|
||||
.send({
|
||||
query: 'plumber in Denver'
|
||||
// missing location
|
||||
});
|
||||
|
||||
expect(response.status).toBe(400);
|
||||
expect(response.body).toHaveProperty('error');
|
||||
});
|
||||
|
||||
it('should handle search errors', async () => {
|
||||
// Mock search error
|
||||
(SearchService.prototype.search as jest.Mock)
|
||||
.mockRejectedValueOnce(new Error('Search failed'));
|
||||
|
||||
const response = await request(app)
|
||||
.post('/api/search')
|
||||
.send({
|
||||
query: 'plumber in Denver',
|
||||
location: 'Denver, CO'
|
||||
});
|
||||
|
||||
expect(response.status).toBe(500);
|
||||
expect(response.body).toHaveProperty('error');
|
||||
});
|
||||
});
|
||||
|
||||
describe('Business Details Endpoint', () => {
|
||||
it('should retrieve business details', async () => {
|
||||
const response = await request(app)
|
||||
.get('/api/business/test_1');
|
||||
|
||||
expect(response.status).toBe(200);
|
||||
expect(response.body).toEqual(mockBusiness);
|
||||
});
|
||||
|
||||
it('should handle non-existent business', async () => {
|
||||
// Mock not found
|
||||
(SearchService.prototype.getBusinessById as jest.Mock)
|
||||
.mockResolvedValueOnce(null);
|
||||
|
||||
const response = await request(app)
|
||||
.get('/api/business/non_existent');
|
||||
|
||||
expect(response.status).toBe(404);
|
||||
expect(response.body).toHaveProperty('error');
|
||||
});
|
||||
});
|
||||
|
||||
describe('Error Handling', () => {
|
||||
it('should handle invalid JSON', async () => {
|
||||
const response = await request(app)
|
||||
.post('/api/search')
|
||||
.set('Content-Type', 'application/json')
|
||||
.send('{"invalid json"}');
|
||||
|
||||
expect(response.status).toBe(400);
|
||||
expect(response.body).toHaveProperty('error');
|
||||
expect(response.body.error).toBe('Invalid JSON');
|
||||
});
|
||||
|
||||
it('should handle rate limiting', async () => {
|
||||
// Mock rate limit error
|
||||
(SearchService.prototype.search as jest.Mock)
|
||||
.mockRejectedValueOnce({ response: { status: 429 } });
|
||||
|
||||
const response = await request(app)
|
||||
.post('/api/search')
|
||||
.send({
|
||||
query: 'plumber in Denver',
|
||||
location: 'Denver, CO'
|
||||
});
|
||||
|
||||
expect(response.status).toBe(429);
|
||||
expect(response.body).toHaveProperty('error');
|
||||
expect(response.body.error).toBe('Rate limit exceeded');
|
||||
});
|
||||
});
|
||||
});
|
||||
162
src/tests/__tests__/integration/search.test.ts
Normal file
162
src/tests/__tests__/integration/search.test.ts
Normal file
|
|
@ -0,0 +1,162 @@
|
|||
import { DeepSeekService } from '../../../lib/services/deepseekService';
|
||||
import { createClient } from '@supabase/supabase-js';
|
||||
import { SearchService } from '../../../lib/services/searchService';
|
||||
import { Business } from '../../../lib/types';
|
||||
|
||||
// Mock external services
|
||||
jest.mock('@supabase/supabase-js');
|
||||
jest.mock('../../../lib/services/deepseekService');
|
||||
|
||||
describe('Search Integration', () => {
|
||||
const mockBusiness: Business = {
|
||||
id: 'test_1',
|
||||
name: "Denver's Best Plumbing",
|
||||
address: "1234 Main Street, Denver, CO 80202",
|
||||
phone: "(720) 555-1234",
|
||||
email: "support@denverplumbing.com",
|
||||
description: "Professional plumbing services",
|
||||
source: 'test',
|
||||
website: 'https://example.com',
|
||||
rating: 4.8,
|
||||
location: { lat: 39.7392, lng: -104.9903 },
|
||||
openingHours: []
|
||||
};
|
||||
|
||||
// Mock Supabase responses
|
||||
const mockSupabase = {
|
||||
from: jest.fn().mockReturnValue({
|
||||
insert: jest.fn().mockReturnValue({
|
||||
select: jest.fn().mockResolvedValue({
|
||||
data: [mockBusiness],
|
||||
error: null
|
||||
})
|
||||
}),
|
||||
select: jest.fn().mockReturnValue({
|
||||
eq: jest.fn().mockReturnValue({
|
||||
single: jest.fn().mockResolvedValue({
|
||||
data: null,
|
||||
error: null
|
||||
})
|
||||
})
|
||||
})
|
||||
})
|
||||
};
|
||||
|
||||
beforeEach(() => {
|
||||
jest.clearAllMocks();
|
||||
(createClient as jest.Mock).mockReturnValue(mockSupabase);
|
||||
});
|
||||
|
||||
describe('Search and Store Flow', () => {
|
||||
it('should search, clean, and store business data', async () => {
|
||||
const searchService = new SearchService();
|
||||
const query = 'plumber in Denver';
|
||||
const location = 'Denver, CO';
|
||||
|
||||
// Mock performSearch to return results
|
||||
const performSearchSpy = jest.spyOn(searchService as any, 'performSearch')
|
||||
.mockResolvedValue([mockBusiness]);
|
||||
|
||||
// Perform search
|
||||
const results = await searchService.search(query, location);
|
||||
|
||||
// Verify search results
|
||||
expect(results).toBeTruthy();
|
||||
expect(Array.isArray(results)).toBe(true);
|
||||
expect(results[0]).toEqual(mockBusiness);
|
||||
|
||||
// Verify cache was checked first
|
||||
expect(mockSupabase.from).toHaveBeenCalledWith('cache');
|
||||
|
||||
// Verify results were cached
|
||||
expect(mockSupabase.from).toHaveBeenCalledWith('cache');
|
||||
expect(mockSupabase.from().insert).toHaveBeenCalled();
|
||||
});
|
||||
|
||||
it('should handle search errors gracefully', async () => {
|
||||
const searchService = new SearchService();
|
||||
|
||||
// Mock performSearch to throw error
|
||||
jest.spyOn(searchService as any, 'performSearch')
|
||||
.mockRejectedValue(new Error('Search failed'));
|
||||
|
||||
await expect(searchService.search('invalid query', 'invalid location'))
|
||||
.rejects.toThrow('Search failed');
|
||||
});
|
||||
|
||||
it('should use cache when available', async () => {
|
||||
const searchService = new SearchService();
|
||||
const query = 'plumber in Denver';
|
||||
const location = 'Denver, CO';
|
||||
|
||||
// Mock cache hit
|
||||
mockSupabase.from.mockReturnValueOnce({
|
||||
select: jest.fn().mockReturnValue({
|
||||
eq: jest.fn().mockReturnValue({
|
||||
single: jest.fn().mockResolvedValue({
|
||||
data: { value: [mockBusiness] },
|
||||
error: null
|
||||
})
|
||||
})
|
||||
})
|
||||
});
|
||||
|
||||
const results = await searchService.search(query, location);
|
||||
|
||||
// Verify cache was checked
|
||||
expect(mockSupabase.from).toHaveBeenCalledWith('cache');
|
||||
expect(results).toEqual([mockBusiness]);
|
||||
|
||||
// Verify performSearch was not called
|
||||
expect(jest.spyOn(searchService as any, 'performSearch')).not.toHaveBeenCalled();
|
||||
});
|
||||
|
||||
it('should handle rate limiting', async () => {
|
||||
const searchService = new SearchService();
|
||||
|
||||
// Mock performSearch to throw rate limit error
|
||||
jest.spyOn(searchService as any, 'performSearch')
|
||||
.mockRejectedValue({ response: { status: 429 } });
|
||||
|
||||
const query = 'plumber in Denver';
|
||||
const location = 'Denver, CO';
|
||||
|
||||
await expect(searchService.search(query, location))
|
||||
.rejects.toThrow('Rate limit exceeded');
|
||||
});
|
||||
});
|
||||
|
||||
describe('Data Consistency', () => {
|
||||
it('should maintain data consistency between search and retrieval', async () => {
|
||||
const searchService = new SearchService();
|
||||
const query = 'plumber in Denver';
|
||||
const location = 'Denver, CO';
|
||||
|
||||
// Mock performSearch to return results
|
||||
jest.spyOn(searchService as any, 'performSearch')
|
||||
.mockResolvedValue([mockBusiness]);
|
||||
|
||||
// Perform search
|
||||
const searchResults = await searchService.search(query, location);
|
||||
const firstResult = searchResults[0];
|
||||
|
||||
// Mock database retrieval
|
||||
mockSupabase.from.mockReturnValueOnce({
|
||||
select: jest.fn().mockReturnValue({
|
||||
eq: jest.fn().mockReturnValue({
|
||||
single: jest.fn().mockResolvedValue({
|
||||
data: firstResult,
|
||||
error: null
|
||||
})
|
||||
})
|
||||
})
|
||||
});
|
||||
|
||||
// Retrieve the same business
|
||||
const retrieved = await searchService.getBusinessById(firstResult.id);
|
||||
|
||||
// Verify data consistency
|
||||
expect(retrieved).toEqual(firstResult);
|
||||
});
|
||||
});
|
||||
});
|
||||
22
src/tests/setup.ts
Normal file
22
src/tests/setup.ts
Normal file
|
|
@ -0,0 +1,22 @@
|
|||
import dotenv from 'dotenv';
|
||||
|
||||
// Load environment variables for testing
|
||||
dotenv.config({ path: '.env.test' });
|
||||
|
||||
// Set default timeout for all tests
|
||||
jest.setTimeout(10000);
|
||||
|
||||
// Global setup
|
||||
beforeAll(() => {
|
||||
// Add any global setup here
|
||||
});
|
||||
|
||||
// Global teardown
|
||||
afterAll(() => {
|
||||
// Add any global cleanup here
|
||||
});
|
||||
|
||||
// Reset mocks between tests
|
||||
afterEach(() => {
|
||||
jest.clearAllMocks();
|
||||
});
|
||||
94
src/tests/supabaseTest.ts
Normal file
94
src/tests/supabaseTest.ts
Normal file
|
|
@ -0,0 +1,94 @@
|
|||
import '../config/env'; // Load env vars first
|
||||
import { CacheService } from '../lib/services/cacheService';
|
||||
import type { PostgrestError } from '@supabase/supabase-js';
|
||||
import { env } from '../config/env';
|
||||
|
||||
async function testSupabaseConnection() {
|
||||
console.log('\n🔍 Testing Supabase Connection...');
|
||||
console.log('Using Supabase URL:', env.supabase.url);
|
||||
|
||||
try {
|
||||
// Test data
|
||||
const testData = {
|
||||
category: 'test_category',
|
||||
location: 'test_location',
|
||||
results: [{
|
||||
name: 'Test Business',
|
||||
phone: '123-456-7890',
|
||||
email: 'test@example.com',
|
||||
address: '123 Test St, Test City, TS 12345',
|
||||
rating: 95,
|
||||
website: 'https://test.com',
|
||||
logo: '',
|
||||
source: 'test',
|
||||
description: 'Test business description'
|
||||
}]
|
||||
};
|
||||
|
||||
console.log('\n1️⃣ Testing write operation...');
|
||||
await CacheService.cacheResults(
|
||||
testData.category,
|
||||
testData.location,
|
||||
testData.results,
|
||||
env.cache.durationDays
|
||||
);
|
||||
console.log('✅ Write successful');
|
||||
|
||||
console.log('\n2️⃣ Testing read operation...');
|
||||
const cachedResults = await CacheService.getCachedResults(
|
||||
testData.category,
|
||||
testData.location
|
||||
);
|
||||
|
||||
if (cachedResults && cachedResults.length > 0) {
|
||||
console.log('✅ Read successful');
|
||||
console.log('\nCached data:', JSON.stringify(cachedResults[0], null, 2));
|
||||
} else {
|
||||
throw new Error('No results found in cache');
|
||||
}
|
||||
|
||||
console.log('\n3️⃣ Testing update operation...');
|
||||
const updatedResults = [...testData.results];
|
||||
updatedResults[0].rating = 98;
|
||||
await CacheService.updateCache(
|
||||
testData.category,
|
||||
testData.location,
|
||||
updatedResults
|
||||
);
|
||||
console.log('✅ Update successful');
|
||||
|
||||
console.log('\n✨ All tests passed! Supabase connection is working properly.\n');
|
||||
|
||||
} catch (error: unknown) {
|
||||
console.error('\n❌ Test failed:');
|
||||
|
||||
if (error instanceof Error) {
|
||||
console.error('Error message:', error.message);
|
||||
|
||||
// Check if it's a Supabase error by looking at the shape of the error object
|
||||
const isSupabaseError = (err: any): err is PostgrestError =>
|
||||
'code' in err && 'details' in err && 'hint' in err && 'message' in err;
|
||||
|
||||
if (error.message.includes('connection') || isSupabaseError(error)) {
|
||||
console.log('\n📋 Troubleshooting steps:');
|
||||
console.log('1. Check if your SUPABASE_URL and SUPABASE_ANON_KEY are correct in .env');
|
||||
console.log('2. Verify that the search_cache table exists in your Supabase project');
|
||||
console.log('3. Check if RLS policies are properly configured');
|
||||
|
||||
if (isSupabaseError(error)) {
|
||||
console.log('\nSupabase error details:');
|
||||
console.log('Code:', error.code);
|
||||
console.log('Details:', error.details);
|
||||
console.log('Hint:', error.hint);
|
||||
}
|
||||
}
|
||||
} else {
|
||||
console.error('Unknown error:', error);
|
||||
}
|
||||
|
||||
process.exit(1);
|
||||
}
|
||||
}
|
||||
|
||||
// Run the test
|
||||
testSupabaseConnection();
|
||||
43
src/tests/testDeepseek.ts
Normal file
43
src/tests/testDeepseek.ts
Normal file
|
|
@ -0,0 +1,43 @@
|
|||
import { DeepSeekService } from '../lib/services/deepseekService';
|
||||
import dotenv from 'dotenv';
|
||||
|
||||
dotenv.config();
|
||||
|
||||
async function testDeepseekService() {
|
||||
const service = new DeepSeekService();
|
||||
|
||||
try {
|
||||
console.log('Starting DeepSeek test...');
|
||||
console.log('Base URL:', process.env.OLLAMA_URL || 'http://localhost:11434');
|
||||
|
||||
const testQuery = {
|
||||
role: "user",
|
||||
content: "Find plumbers in Denver, CO. You must return exactly 10 results in valid JSON format, sorted by rating from highest to lowest. Each result must include a rating between 1-5 stars. Do not include any comments or explanations in the JSON."
|
||||
};
|
||||
|
||||
console.log('Sending test query:', testQuery);
|
||||
|
||||
const response = await service.chat([testQuery]);
|
||||
|
||||
console.log('\nTest successful!');
|
||||
console.log('Parsed response:', JSON.stringify(response, null, 2));
|
||||
|
||||
} catch (error) {
|
||||
console.error('\nTest failed!');
|
||||
if (error instanceof Error) {
|
||||
console.error('Error message:', error.message);
|
||||
console.error('Stack trace:', error.stack);
|
||||
} else {
|
||||
console.error('Unknown error:', error);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Run the test
|
||||
console.log('=== Starting DeepSeek Service Test ===\n');
|
||||
testDeepseekService().then(() => {
|
||||
console.log('\n=== Test Complete ===');
|
||||
}).catch(error => {
|
||||
console.error('\n=== Test Failed ===');
|
||||
console.error(error);
|
||||
});
|
||||
47
src/tests/testOllama.ts
Normal file
47
src/tests/testOllama.ts
Normal file
|
|
@ -0,0 +1,47 @@
|
|||
import axios from 'axios';
|
||||
import dotenv from 'dotenv';
|
||||
|
||||
dotenv.config();
|
||||
|
||||
async function testOllamaConnection() {
|
||||
const baseUrl = process.env.OLLAMA_URL || 'http://localhost:11434';
|
||||
|
||||
console.log('Testing Ollama connection...');
|
||||
console.log('Base URL:', baseUrl);
|
||||
|
||||
try {
|
||||
// Simple test request
|
||||
const response = await axios.post(`${baseUrl}/api/chat`, {
|
||||
model: 'deepseek-coder:6.7b',
|
||||
messages: [{
|
||||
role: 'user',
|
||||
content: 'Return a simple JSON array with one object: {"test": "success"}'
|
||||
}],
|
||||
stream: false
|
||||
});
|
||||
|
||||
console.log('\nResponse received:');
|
||||
console.log('Status:', response.status);
|
||||
console.log('Data:', JSON.stringify(response.data, null, 2));
|
||||
|
||||
} catch (error) {
|
||||
console.error('Connection test failed:');
|
||||
if (axios.isAxiosError(error)) {
|
||||
console.error('Network error:', error.message);
|
||||
if (error.response) {
|
||||
console.error('Response status:', error.response.status);
|
||||
console.error('Response data:', error.response.data);
|
||||
}
|
||||
} else {
|
||||
console.error('Error:', error);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
console.log('=== Starting Ollama Connection Test ===\n');
|
||||
testOllamaConnection().then(() => {
|
||||
console.log('\n=== Test Complete ===');
|
||||
}).catch(error => {
|
||||
console.error('\n=== Test Failed ===');
|
||||
console.error(error);
|
||||
});
|
||||
26
src/tests/testSearch.ts
Normal file
26
src/tests/testSearch.ts
Normal file
|
|
@ -0,0 +1,26 @@
|
|||
import { searchSearxng } from '../lib/searxng';
|
||||
|
||||
async function testSearchEngine() {
|
||||
try {
|
||||
console.log('Testing SearxNG connection...');
|
||||
|
||||
const results = await searchSearxng('plumbers in Denver', {
|
||||
engines: ['google', 'bing', 'duckduckgo'],
|
||||
pageno: 1
|
||||
});
|
||||
|
||||
if (results && results.results && results.results.length > 0) {
|
||||
console.log('✅ Search successful!');
|
||||
console.log('Number of results:', results.results.length);
|
||||
console.log('First result:', results.results[0]);
|
||||
} else {
|
||||
console.log('❌ No results found');
|
||||
}
|
||||
|
||||
} catch (error) {
|
||||
console.error('❌ Search test failed:', error);
|
||||
console.error('Make sure SearxNG is running on http://localhost:4000');
|
||||
}
|
||||
}
|
||||
|
||||
testSearchEngine();
|
||||
28
src/types/business.ts
Normal file
28
src/types/business.ts
Normal file
|
|
@ -0,0 +1,28 @@
|
|||
export interface Business {
|
||||
id: string;
|
||||
name: string;
|
||||
phone: string;
|
||||
address: string;
|
||||
city: string;
|
||||
state: string;
|
||||
zip: string;
|
||||
category: string[];
|
||||
rating: number;
|
||||
reviewCount: number;
|
||||
license?: string;
|
||||
services: string[];
|
||||
hours: Record<string, string>;
|
||||
website?: string;
|
||||
email?: string;
|
||||
verified: boolean;
|
||||
lastUpdated: Date;
|
||||
}
|
||||
|
||||
export interface SearchParams {
|
||||
location: string;
|
||||
category?: string;
|
||||
radius?: number;
|
||||
minRating?: number;
|
||||
sortBy?: 'rating' | 'distance' | 'reviewCount';
|
||||
verified?: boolean;
|
||||
}
|
||||
18
src/utils/portCheck.ts
Normal file
18
src/utils/portCheck.ts
Normal file
|
|
@ -0,0 +1,18 @@
|
|||
import net from 'net';
|
||||
|
||||
export function isPortAvailable(port: number | string): Promise<boolean> {
|
||||
return new Promise((resolve) => {
|
||||
const server = net.createServer();
|
||||
|
||||
server.once('error', () => {
|
||||
resolve(false);
|
||||
});
|
||||
|
||||
server.once('listening', () => {
|
||||
server.close();
|
||||
resolve(true);
|
||||
});
|
||||
|
||||
server.listen(port);
|
||||
});
|
||||
}
|
||||
Loading…
Add table
Add a link
Reference in a new issue