import { Actor, log } from 'apify'; import { GoogleGenerativeAI } from '@google/generative-ai'; import { createRequire } from 'module'; import { integrateActorData, SUPPORTED_ACTORS, USE_CASE_TEMPLATES, getTemplate, listSupportedActors, listTemplates } from './integrations.js'; import { addEmbeddingsToRecords, generateRandomEmbedding, EMBEDDING_MODELS } from './embeddings.js'; import { MemorySession, saveToMemorySession, loadFromMemorySession } from '../../../shared/memory-persistence.js'; // CJS import workaround for RuvLLM native extension const require = createRequire(import.meta.url); let ruvllm = null; let sonaCoordinator = null; let trajectoryBuilder = null; // Safe Actor.charge helper - gracefully handles cases where monetization isn't set up async function safeCharge(eventName, count = 1) { try { await Actor.charge({ eventName, count }); } catch (e) { // Silently ignore charging errors - monetization may not be configured log.debug?.(`Charge skipped for ${eventName}: ${e.message}`); } } try { ruvllm = require('@ruvector/ruvllm'); log.info('RuvLLM loaded successfully - TRM/SONA self-learning enabled'); } catch (e) { log.warning(`RuvLLM not available: ${e.message}. Using standard generation.`); } // Initialize Actor await Actor.init(); try { // Get input const input = await Actor.getInput() || {}; const { // Mode selection mode = 'generate', // Integration parameters integrateActorId, integrateRunId = 'latest', integrateDatasetId, memorizeFields = [], useTemplate, // Output options webhookUrl, generateEmbeddings = false, // Core parameters dataType = 'ecommerce', count = 100, schema = {}, timeSeriesConfig = {}, eventTypes = ['page_view', 'click', 'scroll', 'form_submit'], embeddingDimensions = 384, provider = 'openrouter', apiKey, openrouterApiKey, geminiApiKey, anthropicApiKey, model = 'deepseek/deepseek-chat', outputFormat = 'json', seed, quality = 0.8, // Web scraping specific options websiteType = 'ecommerce', apiEndpoint = '/api/products', simulationMode = false, batchSize = 100, delayBetweenBatches = 0, // SONA/TRM parameters sonaEnabled = true, ewcLambda = 2000, patternThreshold = 0.7, sonaLearningTiers = ['instant', 'background'], // ONNX Embedding parameters useOnnxEmbeddings = true, embeddingModel = 'all-MiniLM-L6-v2', // Crunchbase/Grounding parameters crunchbaseCompanies = [], crunchbaseIndustry = null, // Memory Session parameters memorySessionId = null, memorySessionEnabled = false, appendToSession = true } = input; log.info('AI Synthetic Data Generator v2.5 with ONNX Embeddings & TRM/SONA', { mode, dataType, count, provider, model, sonaEnabled, useOnnxEmbeddings, embeddingModel }); // Initialize SONA if available and enabled if (ruvllm && sonaEnabled) { try { if (ruvllm.SonaCoordinator) { sonaCoordinator = new ruvllm.SonaCoordinator({ tiers: sonaLearningTiers, ewcLambda, patternThreshold }); log.info('SONA Coordinator initialized', { tiers: sonaLearningTiers, ewcLambda }); } if (ruvllm.TrajectoryBuilder) { trajectoryBuilder = new ruvllm.TrajectoryBuilder({ maxSteps: 100 }); log.info('Trajectory Builder initialized'); } // Charge for SONA learning session await safeCharge('sona-learning-session', 1); } catch (e) { log.warning(`SONA initialization failed: ${e.message}`); } } // Check for API key based on provider - support both new separate fields and legacy apiKey // Gemini key also needed for Crunchbase grounding regardless of provider const geminiKey = (provider === 'gemini' || dataType === 'crunchbase') ? (geminiApiKey || apiKey || process.env.GEMINI_API_KEY) : null; const openRouterKey = provider === 'openrouter' ? (openrouterApiKey || apiKey || process.env.OPENROUTER_API_KEY) : null; const anthropicKey = provider === 'anthropic' ? (anthropicApiKey || apiKey || process.env.ANTHROPIC_API_KEY) : null; if (provider === 'gemini' && !geminiKey) { log.warning('No Gemini API key provided. Using algorithmic generation (still produces great data!)'); } if (provider === 'openrouter' && !openRouterKey) { log.warning('No OpenRouter API key provided. Using algorithmic generation.'); } if (provider === 'anthropic' && !anthropicKey) { log.warning('No Anthropic API key provided. Using algorithmic generation.'); } let generatedData = []; const startTime = Date.now(); // ============================================ // MODE HANDLING: generate, integrate, template // ============================================ if (mode === 'integrate' || mode === 'template') { // Integration mode - transform data from other Apify actors log.info(`Running in ${mode} mode`, { integrateActorId, useTemplate }); // Get template config if using template mode let templateConfig = null; let effectiveActorId = integrateActorId; let effectiveMemorizeFields = memorizeFields; if (mode === 'template' && useTemplate) { templateConfig = getTemplate(useTemplate); log.info(`Using template: ${templateConfig.name}`, { suggestedActors: templateConfig.suggestedActors }); // Use template defaults if not overridden if (!effectiveActorId && templateConfig.suggestedActors.length > 0) { effectiveActorId = templateConfig.suggestedActors[0]; log.info(`Using template's suggested actor: ${effectiveActorId}`); } if (effectiveMemorizeFields.length === 0) { effectiveMemorizeFields = templateConfig.memorizeFields || []; } // Charge for template execution await safeCharge('template-execution', 1); } // Fetch data from the actor's dataset let sourceData = []; if (integrateDatasetId) { // Direct dataset access log.info(`Fetching from dataset: ${integrateDatasetId}`); const dataset = await Actor.openDataset(integrateDatasetId, { forceCloud: true }); const { items } = await dataset.getData({ limit: count }); sourceData = items; } else if (effectiveActorId) { // Fetch from actor run log.info(`Fetching from actor: ${effectiveActorId}, run: ${integrateRunId}`); try { // Use Apify client to fetch last run's dataset const client = Actor.newClient(); let runInfo; if (integrateRunId === 'latest') { const runs = await client.actor(effectiveActorId).runs().list({ limit: 1 }); if (runs.items.length === 0) { throw new Error(`No runs found for actor ${effectiveActorId}`); } runInfo = runs.items[0]; } else { runInfo = await client.run(integrateRunId).get(); } if (runInfo && runInfo.defaultDatasetId) { const dataset = await client.dataset(runInfo.defaultDatasetId).listItems({ limit: count }); sourceData = dataset.items; log.info(`Fetched ${sourceData.length} items from ${effectiveActorId}`); } } catch (e) { log.error(`Failed to fetch data from ${effectiveActorId}: ${e.message}`); log.info('Generating synthetic data as fallback...'); // Fall back to synthetic data generation sourceData = []; } } if (sourceData.length > 0) { // Transform the data const result = await integrateActorData({ actorId: effectiveActorId, data: sourceData, memorizeFields: effectiveMemorizeFields, template: useTemplate, maxItems: count }); generatedData = result.data; // Charge for integration await safeCharge('actor-integration', 1); await safeCharge('integrated-record', generatedData.length); log.info(`Transformed ${generatedData.length} records from ${effectiveActorId}`); } else if (mode === 'template' && templateConfig) { // Generate synthetic data based on template output format log.info('No source data available, generating synthetic data based on template schema...'); const random = createSeededRandom(seed); generatedData = []; for (let i = 0; i < count; i++) { const record = generateFromTemplateSchema(templateConfig.outputFormat, random, i); generatedData.push(record); } } else { throw new Error('No data source specified. Provide integrateActorId or integrateDatasetId.'); } } else { // Generate mode - create synthetic data // Generate data based on type - optimized for web scraping use cases switch (dataType) { case 'demo': generatedData = await generateDemoData(count, geminiKey, model); break; case 'ecommerce': generatedData = await generateEcommerceData(count, seed); break; case 'social': generatedData = await generateSocialMediaData(count, seed); break; case 'api_response': generatedData = await generateApiResponseData(count, apiEndpoint, seed); break; case 'search_results': generatedData = await generateSearchResultsData(count, seed); break; case 'real_estate': generatedData = await generateRealEstateData(count, seed); break; case 'jobs': generatedData = await generateJobListingsData(count, seed); break; case 'news': generatedData = await generateNewsData(count, seed); break; case 'structured': generatedData = await generateStructuredData(count, schema, geminiKey || openRouterKey || anthropicKey, model, seed, provider); break; case 'timeseries': generatedData = await generateTimeSeriesData(count, timeSeriesConfig, seed); break; case 'events': generatedData = await generateEventData(count, eventTypes, seed); break; case 'embeddings': generatedData = await generateEmbeddingData(count, embeddingDimensions, seed); break; // Enterprise/Company Simulators case 'stock_trading': generatedData = await generateStockTradingData(count, seed); break; case 'medical': generatedData = await generateMedicalData(count, seed); break; case 'company': generatedData = await generateCompanyData(count, seed); break; case 'supply_chain': generatedData = await generateSupplyChainData(count, seed); break; case 'financial': generatedData = await generateFinancialData(count, seed); break; case 'bloomberg': generatedData = await generateBloombergData(count, seed); break; case 'zoominfo': generatedData = await generateZoomInfoData(count, seed); break; case 'factset': generatedData = await generateFactSetData(count, seed); break; case 'lseg': generatedData = await generateLSEGData(count, seed); break; case 'crunchbase': generatedData = await generateCrunchbaseData(count, geminiKey, crunchbaseCompanies, crunchbaseIndustry); break; // PRIORITY 1: High-Value Exotic Data Types case 'eeg': generatedData = await generateEEGData(count, seed); break; case 'cgm': generatedData = await generateCGMData(count, seed); break; case 'siem': generatedData = await generateSIEMData(count, seed); break; case 'threat_intel': generatedData = await generateThreatIntelData(count, seed); break; case 'netflow': generatedData = await generateNetFlowData(count, seed); break; // PRIORITY 2: Industrial & Scientific Data Types case 'scada': generatedData = await generateSCADAData(count, seed); break; case 'lidar': generatedData = await generateLiDARData(count, seed); break; case 'canbus': generatedData = await generateCANBusData(count, seed); break; case 'genomic_vcf': generatedData = await generateGenomicVCFData(count, seed); break; case 'satellite': generatedData = await generateSatelliteData(count, seed); break; // PRIORITY 3: Exotic/Niche Data Types case 'fmri': generatedData = await generateFMRIData(count, seed); break; case 'protein_pdb': generatedData = await generateProteinPDBData(count, seed); break; case 'power_grid': generatedData = await generatePowerGridData(count, seed); break; case 'ais': generatedData = await generateAISData(count, seed); break; case 'radar': generatedData = await generateRadarData(count, seed); break; default: throw new Error(`Unknown data type: ${dataType}. Available: ecommerce, social, api_response, search_results, real_estate, jobs, news, structured, timeseries, events, embeddings, stock_trading, medical, company, supply_chain, financial, bloomberg, zoominfo, factset, lseg, crunchbase, eeg, cgm, siem, threat_intel, netflow, scada, lidar, canbus, genomic_vcf, satellite, fmri, protein_pdb, power_grid, ais, radar, demo`); } } // End of generate mode else block const generationTime = Date.now() - startTime; // ============================================ // EMBEDDING GENERATION (optional) // ============================================ if (generateEmbeddings && generatedData.length > 0) { const modelConfig = EMBEDDING_MODELS[embeddingModel] || EMBEDDING_MODELS['all-MiniLM-L6-v2']; const effectiveDimensions = useOnnxEmbeddings ? modelConfig.dimensions : embeddingDimensions; log.info(`Generating embeddings with ${effectiveDimensions} dimensions...`, { useOnnx: useOnnxEmbeddings, model: useOnnxEmbeddings ? embeddingModel : 'random' }); if (useOnnxEmbeddings) { // Use ONNX-powered semantic embeddings try { generatedData = await addEmbeddingsToRecords(generatedData, { modelName: embeddingModel }); log.info(`Added ONNX embeddings using ${embeddingModel} model`); await safeCharge('onnx-embedding-generation', generatedData.length); } catch (e) { log.warning(`ONNX embedding failed: ${e.message}. Falling back to random embeddings.`); // Fall back to random embeddings const random = createSeededRandom(seed); generatedData = generatedData.map((item) => ({ ...item, embedding: generateRandomEmbedding(effectiveDimensions, random), embeddingModel: 'random', embeddingDimensions: effectiveDimensions })); } } else { // Use random embeddings (faster, for testing) const random = createSeededRandom(seed); generatedData = generatedData.map((item) => ({ ...item, embedding: generateRandomEmbedding(effectiveDimensions, random), embeddingModel: 'random', embeddingDimensions: effectiveDimensions })); } // Charge for embedding generation await safeCharge('embedding-generation', generatedData.length); log.info(`Added embeddings to ${generatedData.length} records`); } // Track generation trajectory for SONA learning if (trajectoryBuilder && sonaEnabled) { try { // Use correct TrajectoryBuilder API: startStep -> endStep -> complete const stepId = trajectoryBuilder.startStep('generate', { dataType, count: generatedData.length, quality, seed: seed || 'random' }); trajectoryBuilder.endStep(stepId, { duration: generationTime, success: true, recordsGenerated: generatedData.length }); log.info('Generation trajectory tracked for SONA learning'); } catch (e) { log.warning(`Trajectory tracking failed: ${e.message}`); } } // SONA pattern learning from generated data with data-type specific training if (sonaCoordinator && sonaEnabled && generatedData.length > 0) { try { const sampleSize = Math.min(10, generatedData.length); const sample = generatedData.slice(0, sampleSize); // Record data-type specific patterns for neural training const dataTypePatterns = extractDataTypePatterns(dataType, sample); // Use correct SonaCoordinator API: recordSignal for instant learning sonaCoordinator.recordSignal({ type: 'generation_complete', dataType, samples: sample, quality, generationTime, count: generatedData.length, patterns: dataTypePatterns }); // Process instant learning tier with data-type optimization if (sonaLearningTiers.includes('instant')) { await sonaCoordinator.processInstantLearning(); } // Train neural patterns for this data type (use safe method detection) if (trajectoryBuilder && sonaLearningTiers.includes('background')) { const trainingData = { action: `generate_${dataType}`, observation: { quality, count: generatedData.length, time: generationTime }, reward: quality * (generationTime < 100 ? 1.0 : 0.8), patterns: dataTypePatterns }; // Try available trajectory methods const method = trajectoryBuilder.track || trajectoryBuilder.recordTrajectory || trajectoryBuilder.add; if (typeof method === 'function') { method.call(trajectoryBuilder, trainingData); } } log.info(`SONA recorded signal from ${sampleSize} samples`, { stats: sonaCoordinator.stats(), patterns: Object.keys(dataTypePatterns).length }); } catch (e) { log.warning(`SONA pattern learning failed: ${e.message}`); } } // Helper function to extract data-type specific patterns for training function extractDataTypePatterns(type, samples) { const patterns = {}; if (!samples || samples.length === 0) return patterns; switch (type) { case 'ecommerce': patterns.priceRange = { min: Math.min(...samples.map(s => s.price || 0)), max: Math.max(...samples.map(s => s.price || 0)) }; patterns.ratingDistribution = samples.reduce((acc, s) => { acc[Math.floor(s.rating || 0)] = (acc[Math.floor(s.rating || 0)] || 0) + 1; return acc; }, {}); patterns.categoryFreq = samples.reduce((acc, s) => { acc[s.category] = (acc[s.category] || 0) + 1; return acc; }, {}); break; case 'bloomberg': patterns.sectorDistribution = samples.reduce((acc, s) => { acc[s.security?.sector] = (acc[s.security?.sector] || 0) + 1; return acc; }, {}); patterns.recommendationFreq = samples.reduce((acc, s) => { acc[s.consensus?.recommendation] = (acc[s.consensus?.recommendation] || 0) + 1; return acc; }, {}); patterns.avgVolume = samples.reduce((sum, s) => sum + (s.pricing?.volume || 0), 0) / samples.length; break; case 'medical': patterns.severityDistribution = samples.reduce((acc, s) => { acc[s.diagnosis?.severity] = (acc[s.diagnosis?.severity] || 0) + 1; return acc; }, {}); patterns.avgAge = samples.reduce((sum, s) => sum + (s.patient?.age || 0), 0) / samples.length; break; case 'supply_chain': patterns.statusDistribution = samples.reduce((acc, s) => { acc[s.order?.status] = (acc[s.order?.status] || 0) + 1; return acc; }, {}); patterns.avgLeadTime = samples.reduce((sum, s) => sum + (s.supplier?.leadTime || 0), 0) / samples.length; break; default: patterns.recordCount = samples.length; } return patterns; } log.info(`Generated ${generatedData.length} records in ${generationTime}ms`); // Charge custom events based on data type const eventMap = { 'ecommerce': 'ecommerce-product', 'social': 'social-media-post', 'jobs': 'job-listing', 'real_estate': 'real-estate-listing', 'search_results': 'search-result', 'api_response': 'api-mock-response', 'news': 'news-article', // Enterprise data types 'stock_trading': 'stock-trading-record', 'medical': 'medical-record', 'company': 'company-record', 'supply_chain': 'supply-chain-record', 'financial': 'financial-record', 'bloomberg': 'bloomberg-terminal-record' }; // Simulation mode - push in batches with delays if (simulationMode && delayBetweenBatches > 0) { log.info(`Simulation mode: pushing ${batchSize} records every ${delayBetweenBatches}ms`); // Charge for simulation session await safeCharge('simulation-session', 1); const totalBatches = Math.ceil(generatedData.length / batchSize); for (let i = 0; i < generatedData.length; i += batchSize) { const batch = generatedData.slice(i, i + batchSize); const batchNum = Math.floor(i / batchSize) + 1; await Actor.pushData(batch.map((item, idx) => ({ id: i + idx + 1, type: dataType, data: item, metadata: { generatedAt: new Date().toISOString(), provider, model, quality, seed: seed || 'random', batch: batchNum, totalBatches, simulationMode: true } }))); // Charge for simulation batch await safeCharge('simulation-batch', 1); log.info(`Pushed batch ${batchNum}/${totalBatches}`); if (i + batchSize < generatedData.length) { await new Promise(resolve => setTimeout(resolve, delayBetweenBatches)); } } } else { // Push all results at once await Actor.pushData(generatedData.map((item, index) => ({ id: index + 1, type: mode === 'generate' ? dataType : mode, data: item, metadata: { generatedAt: new Date().toISOString(), mode, dataType: mode === 'generate' ? dataType : null, actorId: integrateActorId || null, template: useTemplate || null, provider, model, quality, seed: seed || 'random', hasEmbedding: generateEmbeddings } }))); // Charge for data type specific events const eventName = eventMap[dataType]; if (eventName && mode === 'generate') { await safeCharge(eventName, generatedData.length); log.info(`Charged ${generatedData.length} ${eventName} events`); } // Charge for AI-enhanced records if using AI if ((geminiKey || openRouterKey || anthropicKey) && dataType === 'structured') { await safeCharge('ai-enhanced-record', generatedData.length); log.info(`Charged ${generatedData.length} AI-enhanced events`); } } log.info(`Pushed ${generatedData.length} records to dataset`); // ============================================ // MEMORY SESSION PERSISTENCE (optional) // ============================================ let memorySessionResult = null; if (memorySessionEnabled && memorySessionId) { try { log.info(`Saving to memory session: ${memorySessionId}`); const session = new MemorySession(memorySessionId, { actorName: 'agentic-synth' }); await session.init(); // Load existing memories if appending if (appendToSession) { await session.load(); log.info(`Loaded ${session.getMemories().length} existing memories`); } // Add generated data to session const memoryRecords = generatedData.map((item, index) => ({ id: `synth_${Date.now()}_${index}`, text: typeof item === 'string' ? item : JSON.stringify(item).substring(0, 500), data: item, type: mode === 'generate' ? dataType : mode, embedding: item.embedding || null, metadata: { generatedAt: new Date().toISOString(), mode, dataType: mode === 'generate' ? dataType : null, actorId: integrateActorId || null, template: useTemplate || null, provider, model } })); await session.addBatch(memoryRecords); await session.save(); memorySessionResult = { sessionId: memorySessionId, totalMemories: session.getMemories().length, addedMemories: memoryRecords.length, metadata: session.getMetadata() }; log.info(`Saved ${memoryRecords.length} records to memory session ${memorySessionId}`); log.info(`Total memories in session: ${session.getMemories().length}`); } catch (e) { log.warning(`Memory session save failed: ${e.message}`); memorySessionResult = { error: e.message }; } } // ============================================ // WEBHOOK NOTIFICATION (optional) // ============================================ if (webhookUrl) { log.info(`Sending webhook to: ${webhookUrl}`); try { const webhookPayload = { actorId: 'ruv/ai-synthetic-data-generator', runId: process.env.APIFY_ACTOR_RUN_ID, status: 'success', mode, dataType: mode === 'generate' ? dataType : null, template: useTemplate || null, integrateActorId: integrateActorId || null, totalRecords: generatedData.length, generationTime, hasEmbeddings: generateEmbeddings, datasetId: process.env.APIFY_DEFAULT_DATASET_ID, memorySession: memorySessionResult, timestamp: new Date().toISOString() }; const response = await fetch(webhookUrl, { method: 'POST', headers: { 'Content-Type': 'application/json', 'User-Agent': 'Apify-AI-Synthetic-Data-Generator/2.5' }, body: JSON.stringify(webhookPayload) }); if (response.ok) { log.info('Webhook notification sent successfully'); await safeCharge('webhook-notification', 1); } else { log.warning(`Webhook failed with status: ${response.status}`); } } catch (e) { log.warning(`Webhook notification failed: ${e.message}`); } } } catch (error) { log.error('Actor failed', { error: error.message }); throw error; } finally { await Actor.exit(); } // ============================================ // WEB SCRAPING FOCUSED GENERATORS // ============================================ async function generateEcommerceData(count, seed) { log.info('Generating e-commerce product data...'); const random = createSeededRandom(seed); const results = []; // Category-matched brands for realistic data const categoryBrands = { 'Electronics': ['Samsung', 'Sony', 'Apple', 'LG', 'Bose', 'JBL', 'Anker', 'Logitech'], 'Clothing': ['Nike', 'Adidas', 'Zara', 'H&M', 'Levi\'s', 'Gap', 'Uniqlo', 'Calvin Klein'], 'Home & Garden': ['IKEA', 'Pottery Barn', 'West Elm', 'Crate & Barrel', 'HomeGoods', 'Wayfair'], 'Sports': ['Nike', 'Under Armour', 'Adidas', 'Puma', 'Wilson', 'Spalding', 'Callaway'], 'Books': ['Penguin', 'HarperCollins', 'Simon & Schuster', 'Random House', 'Scholastic'], 'Toys': ['LEGO', 'Hasbro', 'Mattel', 'Fisher-Price', 'Melissa & Doug', 'Nerf'], 'Beauty': ['L\'Oreal', 'Maybelline', 'Neutrogena', 'Olay', 'Revlon', 'CeraVe', 'The Ordinary'], 'Automotive': ['Bosch', 'Michelin', 'Goodyear', 'Mobil', 'Castrol', 'WeatherTech', 'AutoZone'] }; const categories = Object.keys(categoryBrands); const conditions = ['New', 'Used - Like New', 'Used - Good', 'Refurbished']; for (let i = 0; i < count; i++) { const category = categories[Math.floor(random() * categories.length)]; const brandsForCategory = categoryBrands[category]; const brand = brandsForCategory[Math.floor(random() * brandsForCategory.length)]; const basePrice = 10 + random() * 990; const hasDiscount = random() > 0.6; // Consistent stock logic: if stockCount is 0, inStock is false const stockCount = Math.floor(random() * 500); const inStock = stockCount > 0 && random() > 0.1; // Consistent shipping logic: free shipping means price is 0 const isFreeShipping = random() > 0.4; const shippingPrice = isFreeShipping ? 0 : Math.round((5 + random() * 10) * 100) / 100; results.push({ url: `https://example-store.com/products/${generateSlug(random)}-${i}`, title: `${brand} ${generateProductName(category, random)}`, price: Math.round(basePrice * 100) / 100, originalPrice: hasDiscount ? Math.round(basePrice * (1.1 + random() * 0.4) * 100) / 100 : null, currency: 'USD', category, brand, rating: Math.round((3 + random() * 2) * 10) / 10, reviewCount: Math.floor(random() * 5000), inStock, stockCount: inStock ? stockCount : 0, condition: conditions[Math.floor(random() * conditions.length)], seller: { name: `Seller${Math.floor(random() * 1000)}`, rating: Math.round((3.5 + random() * 1.5) * 10) / 10, totalSales: Math.floor(random() * 50000) }, shipping: { free: isFreeShipping, estimatedDays: Math.floor(2 + random() * 8), price: shippingPrice }, images: Array.from({ length: Math.floor(1 + random() * 5) }, (_, j) => `https://example-store.com/images/product-${i}-${j}.jpg` ), specifications: generateSpecs(category, random), scrapedAt: new Date().toISOString() }); } return results; } async function generateSocialMediaData(count, seed) { log.info('Generating social media data...'); const random = createSeededRandom(seed); const results = []; const platforms = ['twitter', 'instagram', 'facebook', 'linkedin', 'tiktok']; const postTypes = ['text', 'image', 'video', 'link', 'poll']; for (let i = 0; i < count; i++) { const platform = platforms[Math.floor(random() * platforms.length)]; const postType = postTypes[Math.floor(random() * postTypes.length)]; const timestamp = new Date(Date.now() - random() * 30 * 24 * 60 * 60 * 1000); results.push({ url: `https://${platform}.com/post/${generateId(random)}`, platform, postType, author: { username: `user_${generateId(random)}`, displayName: generateName(random), verified: random() > 0.85, followers: Math.floor(random() * 1000000), following: Math.floor(random() * 5000), profileUrl: `https://${platform}.com/user_${generateId(random)}` }, content: { text: generateSocialText(random), hashtags: Array.from({ length: Math.floor(random() * 6) }, () => `#${generateHashtag(random)}`), mentions: Array.from({ length: Math.floor(random() * 3) }, () => `@user_${generateId(random)}`), mediaUrls: postType !== 'text' ? [`https://${platform}.com/media/${generateId(random)}.jpg`] : [] }, engagement: { likes: Math.floor(random() * 100000), comments: Math.floor(random() * 5000), shares: Math.floor(random() * 10000), views: Math.floor(random() * 1000000) }, timestamp: timestamp.toISOString(), scrapedAt: new Date().toISOString() }); } return results; } async function generateApiResponseData(count, endpoint, seed) { log.info('Generating API response data...', { endpoint }); const random = createSeededRandom(seed); const results = []; for (let i = 0; i < count; i++) { const statusCodes = [200, 200, 200, 200, 201, 400, 401, 404, 500]; const statusCode = statusCodes[Math.floor(random() * statusCodes.length)]; results.push({ endpoint: `${endpoint}/${i}`, method: 'GET', statusCode, headers: { 'content-type': 'application/json', 'x-request-id': generateId(random), 'x-rate-limit-remaining': Math.floor(random() * 1000), 'cache-control': random() > 0.5 ? 'max-age=3600' : 'no-cache' }, responseTime: Math.floor(50 + random() * 500), body: statusCode < 400 ? { id: generateId(random), data: generateRandomObject(random), pagination: { page: 1, perPage: 20, total: Math.floor(random() * 10000), hasMore: random() > 0.3 } } : { error: { code: `ERR_${statusCode}`, message: getErrorMessage(statusCode) } }, timestamp: new Date().toISOString() }); } return results; } async function generateSearchResultsData(count, seed) { log.info('Generating search results data...'); const random = createSeededRandom(seed); const results = []; const domains = ['example.com', 'blog.example.org', 'news.example.net', 'shop.example.io', 'docs.example.dev']; for (let i = 0; i < count; i++) { const domain = domains[Math.floor(random() * domains.length)]; results.push({ position: i + 1, url: `https://${domain}/${generateSlug(random)}`, title: generateSearchTitle(random), snippet: generateSnippet(random), domain, displayUrl: `${domain} > ${generateBreadcrumb(random)}`, type: random() > 0.8 ? 'featured' : 'organic', sitelinks: random() > 0.7 ? Array.from({ length: Math.floor(2 + random() * 4) }, () => ({ title: generateSearchTitle(random), url: `https://${domain}/${generateSlug(random)}` })) : null, rich_snippet: random() > 0.6 ? { rating: Math.round((3 + random() * 2) * 10) / 10, reviewCount: Math.floor(random() * 10000), price: random() > 0.5 ? `$${Math.floor(10 + random() * 500)}` : null } : null, scrapedAt: new Date().toISOString() }); } return results; } async function generateRealEstateData(count, seed) { log.info('Generating real estate listing data...'); const random = createSeededRandom(seed); const results = []; const propertyTypes = ['House', 'Apartment', 'Condo', 'Townhouse', 'Land', 'Commercial']; const cities = ['New York', 'Los Angeles', 'Chicago', 'Houston', 'Phoenix', 'San Diego', 'Dallas', 'Austin']; const listingTypes = ['For Sale', 'For Rent', 'Auction']; for (let i = 0; i < count; i++) { const propertyType = propertyTypes[Math.floor(random() * propertyTypes.length)]; const city = cities[Math.floor(random() * cities.length)]; const listingType = listingTypes[Math.floor(random() * listingTypes.length)]; const bedrooms = Math.floor(1 + random() * 6); const sqft = Math.floor(500 + random() * 4500); results.push({ url: `https://realestate-example.com/listing/${generateId(random)}`, listingId: generateId(random), title: `${bedrooms} Bed ${propertyType} in ${city}`, price: Math.floor(100000 + random() * 2000000), listingType, propertyType, address: { street: `${Math.floor(100 + random() * 9900)} ${generateStreetName(random)}`, city, state: getState(city), zipCode: String(Math.floor(10000 + random() * 90000)), country: 'USA' }, details: { bedrooms, bathrooms: Math.floor(1 + random() * 4), sqft, lotSize: Math.floor(sqft * (1.5 + random() * 3)), yearBuilt: Math.floor(1950 + random() * 74), parking: Math.floor(random() * 4), stories: Math.floor(1 + random() * 3) }, features: generateRealEstateFeatures(random), agent: { name: generateName(random), phone: generatePhone(random), email: `agent${Math.floor(random() * 1000)}@realestate.com`, company: `${generateName(random)} Realty` }, images: Array.from({ length: Math.floor(5 + random() * 20) }, (_, j) => `https://realestate-example.com/images/listing-${i}-${j}.jpg` ), daysOnMarket: Math.floor(random() * 180), scrapedAt: new Date().toISOString() }); } return results; } async function generateJobListingsData(count, seed) { log.info('Generating job listings data...'); const random = createSeededRandom(seed); const results = []; const titles = ['Software Engineer', 'Product Manager', 'Data Scientist', 'UX Designer', 'DevOps Engineer', 'Marketing Manager', 'Sales Representative', 'Customer Success Manager']; const companies = ['TechCorp', 'InnovateLabs', 'DataDriven Inc', 'CloudScale', 'StartupXYZ', 'Enterprise Solutions', 'Digital Agency', 'Growth Partners']; const locations = ['Remote', 'New York, NY', 'San Francisco, CA', 'Austin, TX', 'Seattle, WA', 'Boston, MA', 'Chicago, IL', 'Los Angeles, CA']; const types = ['Full-time', 'Part-time', 'Contract', 'Internship']; for (let i = 0; i < count; i++) { const title = titles[Math.floor(random() * titles.length)]; const company = companies[Math.floor(random() * companies.length)]; const location = locations[Math.floor(random() * locations.length)]; const salaryMin = Math.floor(50000 + random() * 100000); results.push({ url: `https://jobs-example.com/job/${generateId(random)}`, jobId: generateId(random), title, company: { name: company, logo: `https://jobs-example.com/logos/${company.toLowerCase().replace(/\s/g, '-')}.png`, rating: Math.round((3 + random() * 2) * 10) / 10, reviewCount: Math.floor(random() * 5000), size: ['1-50', '51-200', '201-500', '501-1000', '1000+'][Math.floor(random() * 5)] }, location, remote: location === 'Remote' || random() > 0.7, type: types[Math.floor(random() * types.length)], salary: { min: salaryMin, max: salaryMin + Math.floor(random() * 50000), currency: 'USD', period: 'yearly' }, description: generateJobDescription(random), requirements: Array.from({ length: Math.floor(3 + random() * 5) }, () => generateRequirement(random)), benefits: generateBenefits(random), postedDate: new Date(Date.now() - random() * 30 * 24 * 60 * 60 * 1000).toISOString(), applicants: Math.floor(random() * 500), scrapedAt: new Date().toISOString() }); } return results; } async function generateNewsData(count, seed) { log.info('Generating news article data...'); const random = createSeededRandom(seed); const results = []; const sources = ['TechNews', 'BusinessDaily', 'WorldReport', 'ScienceToday', 'HealthWatch', 'SportsCentral']; const categories = ['Technology', 'Business', 'Politics', 'Science', 'Health', 'Sports', 'Entertainment']; const authors = ['John Smith', 'Sarah Johnson', 'Mike Williams', 'Emily Brown', 'David Lee', 'Lisa Chen']; for (let i = 0; i < count; i++) { const source = sources[Math.floor(random() * sources.length)]; const category = categories[Math.floor(random() * categories.length)]; const publishDate = new Date(Date.now() - random() * 7 * 24 * 60 * 60 * 1000); results.push({ url: `https://${source.toLowerCase()}.com/article/${generateSlug(random)}`, title: generateNewsTitle(category, random), subtitle: generateSubtitle(random), source, category, author: { name: authors[Math.floor(random() * authors.length)], url: `https://${source.toLowerCase()}.com/author/${generateSlug(random)}` }, publishedAt: publishDate.toISOString(), updatedAt: random() > 0.7 ? new Date(publishDate.getTime() + random() * 24 * 60 * 60 * 1000).toISOString() : null, content: { text: generateArticleContent(random), wordCount: Math.floor(300 + random() * 1500), readingTime: Math.floor(2 + random() * 10) }, images: [{ url: `https://${source.toLowerCase()}.com/images/article-${i}.jpg`, caption: generateCaption(random) }], tags: Array.from({ length: Math.floor(2 + random() * 5) }, () => generateTag(random)), engagement: { views: Math.floor(random() * 100000), comments: Math.floor(random() * 500), shares: Math.floor(random() * 2000) }, scrapedAt: new Date().toISOString() }); } return results; } // ============================================ // ORIGINAL GENERATORS (kept for compatibility) // ============================================ async function generateDemoData(count, apiKey, model) { log.info('Generating demo data with web scraping examples...'); const results = []; const perType = Math.ceil(count / 5); // E-commerce products const ecommerce = await generateEcommerceData(perType); results.push(...ecommerce.map(d => ({ ...d, _type: 'ecommerce' }))); // Social media posts const social = await generateSocialMediaData(perType); results.push(...social.map(d => ({ ...d, _type: 'social' }))); // Search results const search = await generateSearchResultsData(perType); results.push(...search.map(d => ({ ...d, _type: 'search_results' }))); // Job listings const jobs = await generateJobListingsData(perType); results.push(...jobs.map(d => ({ ...d, _type: 'jobs' }))); // News articles const news = await generateNewsData(perType); results.push(...news.map(d => ({ ...d, _type: 'news' }))); return results.slice(0, count); } async function generateStructuredData(count, schema, apiKey, model, seed, provider = 'gemini') { log.info('Generating structured data...', { count, schema, provider, model }); const results = []; const random = createSeededRandom(seed); if (apiKey && Object.keys(schema).length > 0) { try { const prompt = `Generate ${Math.min(count, 20)} unique records matching this schema: ${JSON.stringify(schema, null, 2)} Return ONLY a valid JSON array with no additional text. Each record should be realistic and diverse.`; let text; if (provider === 'openrouter') { // Use OpenRouter API (supports DeepSeek, GPT, Claude, Llama, etc.) const response = await fetch('https://openrouter.ai/api/v1/chat/completions', { method: 'POST', headers: { 'Authorization': `Bearer ${apiKey}`, 'Content-Type': 'application/json', 'HTTP-Referer': 'https://apify.com', 'X-Title': 'AI Synthetic Data Generator' }, body: JSON.stringify({ model: model || 'deepseek/deepseek-chat', messages: [{ role: 'user', content: prompt }], temperature: 0.7 }) }); const data = await response.json(); text = data.choices?.[0]?.message?.content || ''; log.info('OpenRouter response received', { model }); } else if (provider === 'anthropic') { // Use Anthropic Claude API directly const response = await fetch('https://api.anthropic.com/v1/messages', { method: 'POST', headers: { 'x-api-key': apiKey, 'Content-Type': 'application/json', 'anthropic-version': '2023-06-01' }, body: JSON.stringify({ model: model || 'claude-3-5-haiku-20241022', max_tokens: 4096, messages: [{ role: 'user', content: prompt }] }) }); const data = await response.json(); text = data.content?.[0]?.text || ''; log.info('Anthropic response received', { model }); } else { // Use Gemini const genAI = new GoogleGenerativeAI(apiKey); const gemini = genAI.getGenerativeModel({ model: model || 'gemini-2.0-flash-exp' }); const result = await gemini.generateContent(prompt); text = result.response.text(); log.info('Gemini response received', { model }); } const jsonMatch = text.match(/\[[\s\S]*\]/); if (jsonMatch) { const parsed = JSON.parse(jsonMatch[0]); results.push(...parsed); log.info(`AI generated ${parsed.length} records`); } while (results.length < count) { results.push(generateFallbackStructured(schema, random)); } } catch (e) { log.warning(`AI generation failed: ${e.message}. Using fallback.`); for (let i = 0; i < count; i++) { results.push(generateFallbackStructured(schema, random)); } } } else { for (let i = 0; i < count; i++) { results.push(generateFallbackStructured(schema, random)); } } return results.slice(0, count); } function generateFallbackStructured(schema, random) { const record = {}; for (const [key, type] of Object.entries(schema)) { if (typeof type === 'string') { if (type.includes('url')) { record[key] = `https://example.com/${generateSlug(random)}`; } else if (type.includes('email')) { record[key] = `user${Math.floor(random() * 10000)}@example.com`; } else if (type.includes('fullName') || type.includes('name')) { record[key] = generateName(random); } else if (type.includes('number')) { const match = type.match(/\((\d+)-(\d+)\)/); if (match) { const min = parseInt(match[1]); const max = parseInt(match[2]); record[key] = min + Math.floor(random() * (max - min + 1)); } else { record[key] = Math.floor(random() * 100); } } else if (type.includes('boolean')) { record[key] = random() > 0.5; } else if (type.includes('(') && type.includes(',')) { const options = type.match(/\(([^)]+)\)/)?.[1].split(',').map(s => s.trim()) || ['Option1', 'Option2']; record[key] = options[Math.floor(random() * options.length)]; } else { record[key] = `value_${Math.floor(random() * 1000)}`; } } } return record; } async function generateTimeSeriesData(count, config, seed) { log.info('Generating time-series data...', { count, config }); const { interval = '1h', trend = 'flat', seasonality = false, noise = 0.1, startDate = '2024-01-01' } = config; const random = createSeededRandom(seed); const results = []; const start = new Date(startDate); const intervalMs = parseInterval(interval); let value = 100; const trendFactor = trend === 'upward' ? 0.01 : trend === 'downward' ? -0.01 : 0; for (let i = 0; i < count; i++) { const timestamp = new Date(start.getTime() + i * intervalMs); value *= (1 + trendFactor); let seasonalValue = value; if (seasonality) { const hour = timestamp.getHours(); const seasonalFactor = 1 + 0.2 * Math.sin((hour / 24) * 2 * Math.PI); seasonalValue = value * seasonalFactor; } const noiseValue = seasonalValue * (1 + (random() - 0.5) * 2 * noise); results.push({ timestamp: timestamp.toISOString(), value: Math.round(noiseValue * 100) / 100, open: Math.round(noiseValue * (1 - random() * 0.02) * 100) / 100, high: Math.round(noiseValue * (1 + random() * 0.03) * 100) / 100, low: Math.round(noiseValue * (1 - random() * 0.03) * 100) / 100, close: Math.round(noiseValue * (1 + (random() - 0.5) * 0.02) * 100) / 100, volume: Math.floor(random() * 1000000) }); } return results; } async function generateEventData(count, eventTypes, seed) { log.info('Generating web event data...', { count, eventTypes }); const random = createSeededRandom(seed); const results = []; const now = Date.now(); const dayMs = 24 * 60 * 60 * 1000; for (let i = 0; i < count; i++) { const eventType = eventTypes[Math.floor(random() * eventTypes.length)]; const timestamp = new Date(now - random() * 30 * dayMs); const event = { eventId: `evt_${Date.now()}_${i}`, type: eventType, timestamp: timestamp.toISOString(), userId: `user_${Math.floor(random() * 1000)}`, sessionId: `sess_${Math.floor(random() * 10000)}`, page: { url: `https://example.com/${generateSlug(random)}`, title: generateSearchTitle(random), referrer: random() > 0.3 ? 'https://google.com' : 'direct' }, device: { type: random() > 0.6 ? 'mobile' : 'desktop', browser: ['Chrome', 'Firefox', 'Safari', 'Edge'][Math.floor(random() * 4)], os: ['Windows', 'macOS', 'iOS', 'Android', 'Linux'][Math.floor(random() * 5)] }, properties: generateEventProperties(eventType, random) }; results.push(event); } results.sort((a, b) => new Date(a.timestamp) - new Date(b.timestamp)); return results; } function generateEventProperties(eventType, random) { switch (eventType) { case 'page_view': return { loadTime: Math.floor(100 + random() * 3000), scrollDepth: Math.floor(random() * 100) }; case 'click': return { element: ['button', 'link', 'image', 'card'][Math.floor(random() * 4)], elementId: `el_${Math.floor(random() * 1000)}`, x: Math.floor(random() * 1920), y: Math.floor(random() * 1080) }; case 'scroll': return { direction: random() > 0.8 ? 'up' : 'down', depth: Math.floor(random() * 100), velocity: Math.floor(random() * 500) }; case 'form_submit': return { formId: `form_${Math.floor(random() * 100)}`, formName: ['contact', 'signup', 'checkout', 'search'][Math.floor(random() * 4)], success: random() > 0.1, fieldCount: Math.floor(2 + random() * 10) }; case 'api_call': return { endpoint: `/api/${['users', 'products', 'orders', 'search'][Math.floor(random() * 4)]}`, method: ['GET', 'POST', 'PUT', 'DELETE'][Math.floor(random() * 4)], statusCode: random() > 0.9 ? 500 : random() > 0.1 ? 200 : 400, responseTime: Math.floor(50 + random() * 500) }; default: return { value: Math.floor(random() * 100) }; } } async function generateEmbeddingData(count, dimensions, seed) { log.info('Generating embedding data...', { count, dimensions }); const random = createSeededRandom(seed); const results = []; const topics = [ 'Product search optimization', 'Customer sentiment analysis', 'Price comparison algorithms', 'Inventory management', 'User behavior tracking', 'Market trend analysis', 'Competitor monitoring', 'Review aggregation', 'Category classification', 'Recommendation engines' ]; for (let i = 0; i < count; i++) { const embedding = []; let norm = 0; for (let j = 0; j < dimensions; j++) { const val = random() * 2 - 1; embedding.push(val); norm += val * val; } norm = Math.sqrt(norm); for (let j = 0; j < dimensions; j++) { embedding[j] = Math.round((embedding[j] / norm) * 1000000) / 1000000; } results.push({ id: `emb_${i}`, text: topics[i % topics.length] + ` - variant ${Math.floor(i / topics.length)}`, embedding, dimensions, model: 'synthetic' }); } return results; } // ============================================ // UTILITY FUNCTIONS // ============================================ function createSeededRandom(seed) { if (!seed) return Math.random; let s = hashCode(String(seed)); return function() { s = Math.sin(s) * 10000; return s - Math.floor(s); }; } function hashCode(str) { let hash = 0; for (let i = 0; i < str.length; i++) { const char = str.charCodeAt(i); hash = ((hash << 5) - hash) + char; hash = hash & hash; } return Math.abs(hash); } /** * Generate synthetic data based on a template output schema */ function generateFromTemplateSchema(outputFormat, random, index) { const record = {}; for (const [key, type] of Object.entries(outputFormat)) { if (type === 'string') { record[key] = generateTemplateString(key, random); } else if (type.startsWith('number')) { const match = type.match(/\((\d+)-(\d+)\)/); if (match) { const min = parseInt(match[1]); const max = parseInt(match[2]); record[key] = min + Math.floor(random() * (max - min + 1)); } else { record[key] = Math.floor(random() * 100); } } else if (type === 'object') { record[key] = { id: generateId(random), value: Math.floor(random() * 1000) }; } else if (type.startsWith('array')) { const itemType = type.match(/<(\w+)>/)?.[1] || 'string'; const count = Math.floor(2 + random() * 4); record[key] = Array.from({ length: count }, () => itemType === 'string' ? generateTemplateString(key, random) : itemType === 'object' ? { id: generateId(random), value: Math.floor(random() * 100) } : Math.floor(random() * 1000) ); } else { record[key] = `value_${index}_${Math.floor(random() * 1000)}`; } } record._templateId = `tpl_${index}`; record._generatedAt = new Date().toISOString(); return record; } /** * Generate context-aware string values based on field name */ function generateTemplateString(fieldName, random) { const lowerField = fieldName.toLowerCase(); if (lowerField.includes('id') || lowerField.includes('Id')) { return `id_${Math.floor(random() * 100000)}`; } if (lowerField.includes('name') || lowerField.includes('title')) { return generateName(random); } if (lowerField.includes('email')) { return `user${Math.floor(random() * 10000)}@example.com`; } if (lowerField.includes('phone')) { return generatePhone(random); } if (lowerField.includes('url') || lowerField.includes('website')) { return `https://example.com/${generateSlug(random)}`; } if (lowerField.includes('description') || lowerField.includes('content') || lowerField.includes('summary')) { return generateSnippet(random); } if (lowerField.includes('approach') || lowerField.includes('strategy')) { const approaches = ['Direct outreach', 'Email campaign', 'Social engagement', 'Referral network', 'Content marketing']; return approaches[Math.floor(random() * approaches.length)]; } if (lowerField.includes('insight') || lowerField.includes('finding')) { const insights = ['High growth potential', 'Active buyer signals', 'Recent funding round', 'Expanding market', 'Technology adoption']; return insights[Math.floor(random() * insights.length)]; } if (lowerField.includes('style') || lowerField.includes('type')) { const styles = ['Professional', 'Casual', 'Educational', 'Entertaining', 'Promotional']; return styles[Math.floor(random() * styles.length)]; } return `value_${Math.floor(random() * 1000)}`; } function parseInterval(interval) { const match = interval.match(/(\d+)([mhd])/); if (!match) return 3600000; const value = parseInt(match[1]); const unit = match[2]; switch (unit) { case 'm': return value * 60 * 1000; case 'h': return value * 60 * 60 * 1000; case 'd': return value * 24 * 60 * 60 * 1000; default: return 3600000; } } function generateId(random) { return Math.random().toString(36).substring(2, 15); } function generateSlug(random) { const words = ['best', 'top', 'new', 'amazing', 'premium', 'ultra', 'pro', 'max', 'elite', 'smart']; const nouns = ['product', 'item', 'deal', 'offer', 'guide', 'review', 'article', 'post']; return `${words[Math.floor(random() * words.length)]}-${nouns[Math.floor(random() * nouns.length)]}-${Math.floor(random() * 10000)}`; } function generateName(random) { const firstNames = ['John', 'Jane', 'Alex', 'Sarah', 'Mike', 'Emma', 'Chris', 'Lisa', 'David', 'Amy']; const lastNames = ['Smith', 'Johnson', 'Williams', 'Brown', 'Jones', 'Garcia', 'Miller', 'Davis', 'Wilson']; return `${firstNames[Math.floor(random() * firstNames.length)]} ${lastNames[Math.floor(random() * lastNames.length)]}`; } function generateProductName(category, random) { const adjectives = ['Premium', 'Ultra', 'Pro', 'Classic', 'Smart', 'Portable', 'Wireless', 'Advanced']; const products = { 'Electronics': ['Headphones', 'Speaker', 'Charger', 'Cable', 'Adapter', 'Mouse', 'Keyboard'], 'Clothing': ['T-Shirt', 'Jacket', 'Jeans', 'Sneakers', 'Hat', 'Sweater', 'Dress'], 'Home & Garden': ['Lamp', 'Planter', 'Organizer', 'Tool Set', 'Decoration', 'Rug'], 'Sports': ['Ball', 'Gloves', 'Bag', 'Mat', 'Weights', 'Bottle', 'Band'], 'Books': ['Guide', 'Novel', 'Textbook', 'Cookbook', 'Biography', 'Manual'], 'Toys': ['Figure', 'Game', 'Puzzle', 'Set', 'Doll', 'Car'], 'Beauty': ['Cream', 'Serum', 'Mask', 'Oil', 'Brush', 'Palette'], 'Automotive': ['Cover', 'Mat', 'Charger', 'Holder', 'Cleaner', 'Light'] }; const items = products[category] || products['Electronics']; return `${adjectives[Math.floor(random() * adjectives.length)]} ${items[Math.floor(random() * items.length)]}`; } function generateSpecs(category, random) { const specs = { 'Electronics': { battery: `${Math.floor(1000 + random() * 4000)}mAh`, connectivity: 'Bluetooth 5.0', warranty: '1 year' }, 'Clothing': { material: random() > 0.5 ? 'Cotton' : 'Polyester', size: ['S', 'M', 'L', 'XL'][Math.floor(random() * 4)] }, 'Home & Garden': { dimensions: `${Math.floor(10 + random() * 50)}x${Math.floor(10 + random() * 50)}cm`, weight: `${Math.floor(random() * 10)}kg` } }; return specs[category] || { general: 'Standard specifications' }; } function generateSocialText(random) { const texts = [ 'Just discovered this amazing product! Highly recommend', 'Working on something exciting today', 'Can\'t believe how good this turned out', 'Who else is enjoying this beautiful day?', 'Sharing my latest project with you all', 'This is a game changer for productivity', 'Thoughts on the latest industry trends?' ]; return texts[Math.floor(random() * texts.length)]; } function generateHashtag(random) { const tags = ['tech', 'innovation', 'business', 'startup', 'coding', 'design', 'marketing', 'growth', 'success', 'tips']; return tags[Math.floor(random() * tags.length)]; } function generateRandomObject(random) { return { name: generateName(random), value: Math.floor(random() * 1000), active: random() > 0.3, tags: ['tag1', 'tag2', 'tag3'].slice(0, Math.floor(1 + random() * 3)) }; } function getErrorMessage(code) { const messages = { 400: 'Bad Request - Invalid parameters', 401: 'Unauthorized - Invalid API key', 403: 'Forbidden - Access denied', 404: 'Not Found - Resource does not exist', 500: 'Internal Server Error' }; return messages[code] || 'Unknown error'; } function generateSearchTitle(random) { const templates = [ 'How to Get Started with {topic}', 'The Complete Guide to {topic}', 'Top 10 {topic} Tips for Beginners', 'Best {topic} Practices in 2024', '{topic}: Everything You Need to Know' ]; const topics = ['Web Scraping', 'Data Analysis', 'API Integration', 'Automation', 'Machine Learning']; const template = templates[Math.floor(random() * templates.length)]; const topic = topics[Math.floor(random() * topics.length)]; return template.replace('{topic}', topic); } function generateSnippet(random) { const snippets = [ 'Learn how to effectively implement solutions with our comprehensive guide. Discover best practices and expert tips.', 'This detailed tutorial walks you through step-by-step instructions for achieving optimal results.', 'Get started quickly with our beginner-friendly approach. No prior experience required.', 'Explore advanced techniques used by industry professionals to maximize efficiency.', 'Find out why thousands of users trust our methods for reliable, consistent outcomes.' ]; return snippets[Math.floor(random() * snippets.length)]; } function generateBreadcrumb(random) { const paths = ['guides', 'tutorials', 'blog', 'docs', 'resources']; return paths[Math.floor(random() * paths.length)]; } function generateStreetName(random) { const types = ['St', 'Ave', 'Blvd', 'Dr', 'Ln', 'Way', 'Ct']; const names = ['Oak', 'Main', 'Park', 'Cedar', 'Elm', 'Washington', 'Lake', 'Hill']; return `${names[Math.floor(random() * names.length)]} ${types[Math.floor(random() * types.length)]}`; } function getState(city) { const states = { 'New York': 'NY', 'Los Angeles': 'CA', 'Chicago': 'IL', 'Houston': 'TX', 'Phoenix': 'AZ', 'San Diego': 'CA', 'Dallas': 'TX', 'Austin': 'TX' }; return states[city] || 'CA'; } function generateRealEstateFeatures(random) { const allFeatures = ['Pool', 'Garage', 'Garden', 'Fireplace', 'Central AC', 'Hardwood Floors', 'Updated Kitchen', 'Smart Home', 'Solar Panels', 'Home Office']; const count = Math.floor(2 + random() * 5); return allFeatures.sort(() => random() - 0.5).slice(0, count); } function generatePhone(random) { return `(${Math.floor(200 + random() * 800)}) ${Math.floor(100 + random() * 900)}-${Math.floor(1000 + random() * 9000)}`; } function generateJobDescription(random) { return 'We are looking for a talented professional to join our growing team. You will work on challenging projects and collaborate with cross-functional teams to deliver exceptional results.'; } function generateRequirement(random) { const reqs = [ '3+ years of relevant experience', 'Strong communication skills', 'Bachelor\'s degree or equivalent', 'Experience with modern tools', 'Ability to work independently', 'Team collaboration experience', 'Problem-solving mindset' ]; return reqs[Math.floor(random() * reqs.length)]; } function generateBenefits(random) { const allBenefits = ['Health Insurance', '401k Match', 'Remote Work', 'Unlimited PTO', 'Stock Options', 'Learning Budget', 'Gym Membership', 'Free Lunch']; return allBenefits.sort(() => random() - 0.5).slice(0, Math.floor(3 + random() * 4)); } function generateNewsTitle(category, random) { const templates = { 'Technology': ['New AI Breakthrough Transforms {x}', 'Tech Giants Announce {x} Initiative', 'The Future of {x} is Here'], 'Business': ['Market Sees Record {x}', 'Company Reports {x} Growth', 'Industry Leaders Discuss {x}'], 'Politics': ['Government Announces {x} Policy', 'Leaders Meet to Discuss {x}', 'New {x} Legislation Proposed'], 'Science': ['Scientists Discover {x}', 'New Research Reveals {x}', 'Breakthrough in {x} Studies'], 'Health': ['Health Experts Recommend {x}', 'New Study Links {x} to Wellness', 'Medical Advances in {x}'], 'Sports': ['Team Wins {x} Championship', 'Athletes Break {x} Record', 'Sports World Reacts to {x}'], 'Entertainment': ['Celebrity Announces {x}', 'New {x} Series Premieres', 'Entertainment Industry Embraces {x}'] }; const words = ['Major', 'Surprising', 'Historic', 'Unprecedented', 'Exciting']; const catTemplates = templates[category] || templates['Technology']; const template = catTemplates[Math.floor(random() * catTemplates.length)]; const word = words[Math.floor(random() * words.length)]; return template.replace('{x}', word); } function generateSubtitle(random) { return 'Industry experts weigh in on the implications and what it means for the future.'; } function generateArticleContent(random) { return 'Lorem ipsum dolor sit amet, consectetur adipiscing elit. Sed do eiusmod tempor incididunt ut labore et dolore magna aliqua. Ut enim ad minim veniam, quis nostrud exercitation ullamco laboris.'; } function generateCaption(random) { return 'Image: Illustration of the main topic covered in this article.'; } function generateTag(random) { const tags = ['trending', 'breaking', 'exclusive', 'analysis', 'opinion', 'featured', 'popular']; return tags[Math.floor(random() * tags.length)]; } // ============================================ // ENTERPRISE/COMPANY SIMULATORS // ============================================ async function generateStockTradingData(count, seed) { log.info('Generating stock trading data (Bloomberg-style)...'); const random = createSeededRandom(seed); const results = []; const symbols = ['AAPL', 'GOOGL', 'MSFT', 'AMZN', 'META', 'NVDA', 'TSLA', 'JPM', 'V', 'WMT', 'UNH', 'JNJ', 'PG', 'HD', 'BAC']; const exchanges = ['NYSE', 'NASDAQ', 'LSE', 'TSE', 'HKEX']; const orderTypes = ['market', 'limit', 'stop', 'stop_limit', 'trailing_stop']; const sides = ['buy', 'sell']; for (let i = 0; i < count; i++) { const symbol = symbols[Math.floor(random() * symbols.length)]; const basePrice = 50 + random() * 500; const timestamp = new Date(Date.now() - random() * 24 * 60 * 60 * 1000); const volume = Math.floor(100 + random() * 100000); results.push({ tradeId: `TRD${Date.now()}${i}`, symbol, exchange: exchanges[Math.floor(random() * exchanges.length)], timestamp: timestamp.toISOString(), ohlcv: { open: Math.round(basePrice * (1 - random() * 0.02) * 100) / 100, high: Math.round(basePrice * (1 + random() * 0.03) * 100) / 100, low: Math.round(basePrice * (1 - random() * 0.03) * 100) / 100, close: Math.round(basePrice * 100) / 100, volume, vwap: Math.round(basePrice * (1 + (random() - 0.5) * 0.01) * 100) / 100 }, quote: { bid: Math.round(basePrice * 0.999 * 100) / 100, ask: Math.round(basePrice * 1.001 * 100) / 100, bidSize: Math.floor(100 + random() * 10000), askSize: Math.floor(100 + random() * 10000), spread: Math.round(basePrice * 0.002 * 100) / 100 }, order: { type: orderTypes[Math.floor(random() * orderTypes.length)], side: sides[Math.floor(random() * sides.length)], quantity: Math.floor(10 + random() * 1000), filledQuantity: Math.floor(10 + random() * 1000), status: random() > 0.1 ? 'filled' : random() > 0.5 ? 'partial' : 'pending' }, marketData: { marketCap: Math.floor(random() * 3000) + 'B', peRatio: Math.round((10 + random() * 40) * 10) / 10, dividendYield: Math.round(random() * 5 * 100) / 100, beta: Math.round((0.5 + random() * 1.5) * 100) / 100, fiftyTwoWeekHigh: Math.round(basePrice * 1.3 * 100) / 100, fiftyTwoWeekLow: Math.round(basePrice * 0.7 * 100) / 100 }, analytics: { rsi: Math.round((20 + random() * 60) * 10) / 10, macd: Math.round((random() - 0.5) * 10 * 100) / 100, movingAvg50: Math.round(basePrice * (1 + (random() - 0.5) * 0.1) * 100) / 100, movingAvg200: Math.round(basePrice * (1 + (random() - 0.5) * 0.15) * 100) / 100 }, scrapedAt: new Date().toISOString() }); } return results; } async function generateMedicalData(count, seed) { log.info('Generating medical/healthcare data...'); const random = createSeededRandom(seed); const results = []; const departments = ['Cardiology', 'Neurology', 'Orthopedics', 'Oncology', 'Pediatrics', 'Emergency', 'Radiology', 'Surgery']; const diagnoses = ['Hypertension', 'Type 2 Diabetes', 'Chronic Pain', 'Respiratory Infection', 'Anxiety Disorder', 'Cardiac Arrhythmia', 'Migraine', 'Osteoarthritis']; const procedures = ['Blood Test', 'MRI Scan', 'X-Ray', 'CT Scan', 'Ultrasound', 'ECG', 'Endoscopy', 'Biopsy']; const insurers = ['Blue Cross', 'Aetna', 'UnitedHealth', 'Cigna', 'Humana', 'Kaiser', 'Medicare', 'Medicaid']; const statuses = ['admitted', 'discharged', 'outpatient', 'emergency', 'scheduled']; for (let i = 0; i < count; i++) { const admitDate = new Date(Date.now() - random() * 365 * 24 * 60 * 60 * 1000); const age = Math.floor(18 + random() * 70); results.push({ recordId: `MED${Date.now()}${i}`, patient: { id: `PAT${Math.floor(random() * 1000000)}`, age, gender: random() > 0.5 ? 'M' : 'F', bloodType: ['A+', 'A-', 'B+', 'B-', 'O+', 'O-', 'AB+', 'AB-'][Math.floor(random() * 8)], allergies: random() > 0.7 ? ['Penicillin', 'Sulfa', 'Latex'][Math.floor(random() * 3)] : null }, encounter: { type: statuses[Math.floor(random() * statuses.length)], department: departments[Math.floor(random() * departments.length)], admitDate: admitDate.toISOString(), dischargeDate: random() > 0.3 ? new Date(admitDate.getTime() + random() * 7 * 24 * 60 * 60 * 1000).toISOString() : null, lengthOfStay: Math.floor(1 + random() * 14) }, diagnosis: { primary: diagnoses[Math.floor(random() * diagnoses.length)], secondary: random() > 0.5 ? diagnoses[Math.floor(random() * diagnoses.length)] : null, icdCode: `I${Math.floor(10 + random() * 90)}.${Math.floor(random() * 10)}`, severity: ['mild', 'moderate', 'severe', 'critical'][Math.floor(random() * 4)] }, procedures: Array.from({ length: Math.floor(1 + random() * 3) }, () => ({ name: procedures[Math.floor(random() * procedures.length)], cptCode: `${Math.floor(10000 + random() * 90000)}`, date: new Date(admitDate.getTime() + random() * 3 * 24 * 60 * 60 * 1000).toISOString(), result: random() > 0.1 ? 'normal' : 'abnormal' })), vitals: { bloodPressure: `${Math.floor(100 + random() * 60)}/${Math.floor(60 + random() * 40)}`, heartRate: Math.floor(60 + random() * 40), temperature: Math.round((97 + random() * 4) * 10) / 10, oxygenSaturation: Math.floor(94 + random() * 6), weight: Math.floor(120 + random() * 150), height: Math.floor(60 + random() * 20) }, billing: { insurer: insurers[Math.floor(random() * insurers.length)], policyNumber: `POL${Math.floor(random() * 10000000)}`, totalCharges: Math.floor(1000 + random() * 50000), covered: Math.floor(800 + random() * 40000), patientResponsibility: Math.floor(100 + random() * 5000), claimStatus: random() > 0.2 ? 'approved' : random() > 0.5 ? 'pending' : 'denied' }, provider: { physician: generateName(random), npi: `${Math.floor(1000000000 + random() * 9000000000)}`, facility: `${['Metro', 'Central', 'Regional', 'University'][Math.floor(random() * 4)]} Medical Center` }, scrapedAt: new Date().toISOString() }); } return results; } /** * Generate Crunchbase-style company data using Gemini Grounding API * Uses Google Search grounding for real, up-to-date company information */ async function generateCrunchbaseData(count, apiKey, companyNames = [], industry = null) { log.info('Generating Crunchbase-style data with Gemini Grounding...', { count, industry }); const results = []; if (!apiKey) { log.warning('No Gemini API key - falling back to synthetic company data'); return generateCompanyData(count, 'crunchbase-fallback'); } const { GoogleGenerativeAI } = await import('@google/generative-ai'); const genAI = new GoogleGenerativeAI(apiKey); // Use Gemini 2.0 Flash with Google Search grounding const model = genAI.getGenerativeModel({ model: 'gemini-2.0-flash-exp', tools: [{ google_search: {} }] }); // Generate company names if not provided const targetCompanies = companyNames.length > 0 ? companyNames : await generateCompanyList(model, count, industry); for (let i = 0; i < Math.min(count, targetCompanies.length); i++) { const companyName = targetCompanies[i]; try { const prompt = `Research "${companyName}" company and provide current information in this exact JSON format: { "name": "Official company name", "description": "Brief company description (1-2 sentences)", "founded": 2010, "founders": ["Founder Name 1", "Founder Name 2"], "headquarters": {"city": "City", "state": "State", "country": "Country"}, "industry": "Primary industry", "subIndustry": "Sub-industry or sector", "employeeCount": "Range like 1001-5000 or exact number", "fundingTotal": "$X million/billion or 'Private/Not disclosed'", "lastFundingRound": {"type": "Series X or IPO", "amount": "$X", "date": "YYYY-MM"}, "valuation": "$X billion or 'Private'", "revenue": "$X million/billion or 'Not disclosed'", "website": "https://company.com", "linkedIn": "linkedin.com/company/name", "ceo": "CEO Name", "publicStatus": "Public (NASDAQ:TICK)" or "Private", "competitors": ["Competitor 1", "Competitor 2"], "keyProducts": ["Product 1", "Product 2"], "recentNews": "Brief recent news (1 sentence)" } Only return valid JSON, no markdown or explanation.`; const result = await model.generateContent(prompt); const text = result.response.text(); // Extract JSON from response const jsonMatch = text.match(/\{[\s\S]*\}/); if (jsonMatch) { const companyData = JSON.parse(jsonMatch[0]); results.push({ id: `crunchbase_${Date.now()}_${i}`, type: 'crunchbase', data: { ...companyData, dataSource: 'gemini-grounding', groundingUsed: true, lastUpdated: new Date().toISOString() }, metadata: { query: companyName, generatedAt: new Date().toISOString(), provider: 'gemini', model: 'gemini-2.0-flash-exp', grounded: true } }); log.info(`Grounded data for: ${companyName}`); } } catch (e) { log.warning(`Failed to get grounded data for ${companyName}: ${e.message}`); // Add fallback synthetic data results.push({ id: `crunchbase_${Date.now()}_${i}`, type: 'crunchbase', data: { name: companyName, description: 'Company information not available', dataSource: 'fallback', groundingUsed: false, error: e.message }, metadata: { query: companyName, generatedAt: new Date().toISOString(), grounded: false } }); } // Rate limiting - 15 RPM for Gemini free tier if (i < count - 1) { await new Promise(r => setTimeout(r, 4100)); } } return results; } /** * Generate a list of companies to research using Gemini Grounding */ async function generateCompanyList(model, count, industry = null) { const industryFilter = industry ? ` in the ${industry} industry` : ''; const prompt = `List ${Math.min(count, 20)} notable startup and tech companies${industryFilter} that are frequently covered on Crunchbase. Include a mix of: - Unicorns (valued over $1B) - Recently funded startups - Established tech companies Return only company names, one per line, no numbering or bullets.`; try { const result = await model.generateContent(prompt); const text = result.response.text(); return text.split('\n').filter(line => line.trim().length > 0).slice(0, count); } catch (e) { log.warning(`Failed to generate company list: ${e.message}`); // Fallback to well-known companies return ['OpenAI', 'Anthropic', 'Stripe', 'SpaceX', 'Databricks', 'Figma', 'Notion', 'Discord', 'Canva', 'Airtable'].slice(0, count); } } async function generateCompanyData(count, seed) { log.info('Generating company/corporate data...'); const random = createSeededRandom(seed); const results = []; const industries = ['Technology', 'Healthcare', 'Finance', 'Manufacturing', 'Retail', 'Energy', 'Telecommunications', 'Transportation']; const companyTypes = ['Corporation', 'LLC', 'Partnership', 'Sole Proprietorship', 'S-Corp', 'Non-Profit']; const departments = ['Engineering', 'Sales', 'Marketing', 'Finance', 'HR', 'Operations', 'Legal', 'R&D']; for (let i = 0; i < count; i++) { const founded = Math.floor(1950 + random() * 74); const employees = Math.floor(10 + random() * 100000); const revenue = Math.floor(100000 + random() * 50000000000); results.push({ companyId: `COM${Date.now()}${i}`, profile: { name: `${generateName(random).split(' ')[1]} ${['Industries', 'Corp', 'Inc', 'Holdings', 'Group', 'Technologies', 'Solutions'][Math.floor(random() * 7)]}`, ticker: random() > 0.5 ? `${String.fromCharCode(65 + Math.floor(random() * 26))}${String.fromCharCode(65 + Math.floor(random() * 26))}${String.fromCharCode(65 + Math.floor(random() * 26))}${String.fromCharCode(65 + Math.floor(random() * 26))}` : null, type: companyTypes[Math.floor(random() * companyTypes.length)], industry: industries[Math.floor(random() * industries.length)], founded, website: `https://example-company-${i}.com`, description: 'Leading provider of innovative solutions for modern enterprises.' }, headquarters: { address: `${Math.floor(100 + random() * 9900)} Corporate Blvd`, city: ['New York', 'San Francisco', 'Chicago', 'Boston', 'Austin', 'Seattle'][Math.floor(random() * 6)], state: ['NY', 'CA', 'IL', 'MA', 'TX', 'WA'][Math.floor(random() * 6)], country: 'USA', timezone: 'America/New_York' }, financials: { revenue, revenueGrowth: Math.round((random() * 40 - 10) * 10) / 10, netIncome: Math.floor(revenue * (0.05 + random() * 0.15)), grossMargin: Math.round((30 + random() * 40) * 10) / 10, operatingMargin: Math.round((10 + random() * 25) * 10) / 10, debtToEquity: Math.round(random() * 2 * 100) / 100, currentRatio: Math.round((1 + random() * 2) * 100) / 100, fiscalYearEnd: ['December', 'March', 'June', 'September'][Math.floor(random() * 4)] }, workforce: { totalEmployees: employees, fullTime: Math.floor(employees * 0.85), partTime: Math.floor(employees * 0.1), contractors: Math.floor(employees * 0.05), departments: departments.slice(0, Math.floor(3 + random() * 5)).map(dept => ({ name: dept, headcount: Math.floor(employees * (0.05 + random() * 0.2)), budget: Math.floor(revenue * (0.01 + random() * 0.1)) })), avgTenure: Math.round((2 + random() * 8) * 10) / 10, turnoverRate: Math.round((5 + random() * 20) * 10) / 10 }, leadership: Array.from({ length: Math.floor(3 + random() * 5) }, () => ({ name: generateName(random), title: ['CEO', 'CFO', 'CTO', 'COO', 'CMO', 'CHRO', 'CLO', 'CIO'][Math.floor(random() * 8)], since: Math.floor(2010 + random() * 14), compensation: Math.floor(500000 + random() * 10000000) })), metrics: { customerCount: Math.floor(100 + random() * 1000000), nps: Math.floor(-20 + random() * 100), marketShare: Math.round(random() * 30 * 10) / 10, brandValue: Math.floor(random() * 50) + 'B' }, scrapedAt: new Date().toISOString() }); } return results; } async function generateSupplyChainData(count, seed) { log.info('Generating supply chain data...'); const random = createSeededRandom(seed); const results = []; const productCategories = ['Electronics', 'Raw Materials', 'Components', 'Finished Goods', 'Packaging', 'Chemicals', 'Textiles', 'Machinery']; const statuses = ['in_transit', 'delivered', 'pending', 'delayed', 'customs_hold', 'processing', 'shipped', 'cancelled']; const transportModes = ['air', 'sea', 'rail', 'truck', 'multimodal']; const warehouses = ['WH-NYC-01', 'WH-LAX-02', 'WH-CHI-03', 'WH-HOU-04', 'WH-SEA-05', 'WH-MIA-06']; const countries = ['USA', 'China', 'Germany', 'Japan', 'Mexico', 'Vietnam', 'India', 'South Korea']; for (let i = 0; i < count; i++) { const orderDate = new Date(Date.now() - random() * 90 * 24 * 60 * 60 * 1000); const quantity = Math.floor(10 + random() * 10000); const unitPrice = Math.round((1 + random() * 500) * 100) / 100; results.push({ shipmentId: `SHP${Date.now()}${i}`, order: { orderId: `ORD${Math.floor(random() * 10000000)}`, orderDate: orderDate.toISOString(), priority: ['standard', 'express', 'critical'][Math.floor(random() * 3)], status: statuses[Math.floor(random() * statuses.length)] }, product: { sku: `SKU-${Math.floor(100000 + random() * 900000)}`, name: `${productCategories[Math.floor(random() * productCategories.length)]} Item ${Math.floor(random() * 1000)}`, category: productCategories[Math.floor(random() * productCategories.length)], quantity, unitPrice, totalValue: Math.round(quantity * unitPrice * 100) / 100, weight: Math.round((0.1 + random() * 100) * 10) / 10, dimensions: { length: Math.floor(10 + random() * 100), width: Math.floor(10 + random() * 100), height: Math.floor(10 + random() * 50) } }, supplier: { id: `SUP${Math.floor(random() * 10000)}`, name: `${generateName(random).split(' ')[1]} Supply Co`, country: countries[Math.floor(random() * countries.length)], leadTime: Math.floor(7 + random() * 60), rating: Math.round((3 + random() * 2) * 10) / 10, onTimeDelivery: Math.round((70 + random() * 30) * 10) / 10 }, logistics: { carrier: ['FedEx', 'UPS', 'DHL', 'Maersk', 'Expeditors', 'DB Schenker'][Math.floor(random() * 6)], mode: transportModes[Math.floor(random() * transportModes.length)], trackingNumber: `TRK${Math.floor(random() * 1000000000000)}`, origin: { facility: warehouses[Math.floor(random() * warehouses.length)], country: countries[Math.floor(random() * countries.length)], departureDate: orderDate.toISOString() }, destination: { facility: warehouses[Math.floor(random() * warehouses.length)], country: countries[Math.floor(random() * countries.length)], eta: new Date(orderDate.getTime() + (7 + random() * 30) * 24 * 60 * 60 * 1000).toISOString() }, currentLocation: { lat: 25 + random() * 25, lng: -120 + random() * 60, lastUpdate: new Date(orderDate.getTime() + random() * 7 * 24 * 60 * 60 * 1000).toISOString() } }, inventory: { warehouse: warehouses[Math.floor(random() * warehouses.length)], stockLevel: Math.floor(random() * 5000), reorderPoint: Math.floor(100 + random() * 500), safetyStock: Math.floor(50 + random() * 200), daysOfSupply: Math.floor(10 + random() * 90) }, costs: { productCost: Math.round(quantity * unitPrice * 100) / 100, shippingCost: Math.round(quantity * unitPrice * (0.05 + random() * 0.15) * 100) / 100, tariffs: Math.round(quantity * unitPrice * random() * 0.1 * 100) / 100, insurance: Math.round(quantity * unitPrice * 0.02 * 100) / 100, totalLandedCost: Math.round(quantity * unitPrice * (1.1 + random() * 0.2) * 100) / 100 }, compliance: { hsCode: `${Math.floor(1000 + random() * 9000)}.${Math.floor(10 + random() * 90)}`, countryOfOrigin: countries[Math.floor(random() * countries.length)], certificates: random() > 0.5 ? ['ISO 9001', 'CE', 'RoHS'][Math.floor(random() * 3)] : null, customsCleared: random() > 0.3 }, scrapedAt: new Date().toISOString() }); } return results; } async function generateFinancialData(count, seed) { log.info('Generating financial services data...'); const random = createSeededRandom(seed); const results = []; const accountTypes = ['checking', 'savings', 'investment', 'retirement', 'credit', 'loan', 'mortgage']; const transactionTypes = ['debit', 'credit', 'transfer', 'payment', 'withdrawal', 'deposit', 'fee', 'interest']; const categories = ['groceries', 'utilities', 'entertainment', 'dining', 'travel', 'shopping', 'healthcare', 'insurance', 'investment']; const institutions = ['Chase', 'Bank of America', 'Wells Fargo', 'Citi', 'Capital One', 'Goldman Sachs', 'Morgan Stanley', 'Fidelity']; for (let i = 0; i < count; i++) { const transactionDate = new Date(Date.now() - random() * 365 * 24 * 60 * 60 * 1000); const amount = Math.round((1 + random() * 10000) * 100) / 100; results.push({ transactionId: `TXN${Date.now()}${i}`, account: { accountId: `ACC${Math.floor(random() * 100000000)}`, type: accountTypes[Math.floor(random() * accountTypes.length)], institution: institutions[Math.floor(random() * institutions.length)], balance: Math.round((1000 + random() * 500000) * 100) / 100, availableCredit: random() > 0.5 ? Math.round((5000 + random() * 50000) * 100) / 100 : null, interestRate: Math.round((random() * 25) * 100) / 100 }, transaction: { type: transactionTypes[Math.floor(random() * transactionTypes.length)], amount, currency: 'USD', date: transactionDate.toISOString(), description: `${categories[Math.floor(random() * categories.length)].toUpperCase()} - ${generateName(random).split(' ')[1]} Store`, category: categories[Math.floor(random() * categories.length)], status: random() > 0.05 ? 'completed' : random() > 0.5 ? 'pending' : 'failed', merchant: { name: `${generateName(random).split(' ')[1]} ${['Store', 'Shop', 'Market', 'Services'][Math.floor(random() * 4)]}`, category: categories[Math.floor(random() * categories.length)], mcc: `${Math.floor(1000 + random() * 9000)}` } }, card: random() > 0.3 ? { last4: `${Math.floor(1000 + random() * 9000)}`, brand: ['Visa', 'Mastercard', 'Amex', 'Discover'][Math.floor(random() * 4)], expiryMonth: Math.floor(1 + random() * 12), expiryYear: Math.floor(2025 + random() * 5) } : null, fraud: { score: Math.round(random() * 100), flagged: random() > 0.95, rules: random() > 0.9 ? ['unusual_location', 'high_amount', 'velocity_check'][Math.floor(random() * 3)] : null }, analytics: { dayOfWeek: transactionDate.getDay(), hourOfDay: transactionDate.getHours(), isRecurring: random() > 0.7, monthlyAverage: Math.round((100 + random() * 2000) * 100) / 100 }, scrapedAt: new Date().toISOString() }); } return results; } async function generateBloombergData(count, seed) { log.info('Generating Bloomberg terminal-style data...'); const random = createSeededRandom(seed); const results = []; const assetClasses = ['equity', 'fixed_income', 'commodity', 'fx', 'derivative', 'crypto']; const sectors = ['Technology', 'Healthcare', 'Financials', 'Consumer', 'Energy', 'Industrials', 'Materials', 'Utilities']; const ratings = ['AAA', 'AA+', 'AA', 'AA-', 'A+', 'A', 'A-', 'BBB+', 'BBB', 'BBB-', 'BB+', 'BB', 'B', 'CCC']; const newsCategories = ['earnings', 'merger', 'regulatory', 'analyst_upgrade', 'analyst_downgrade', 'dividend', 'lawsuit', 'executive']; for (let i = 0; i < count; i++) { const timestamp = new Date(Date.now() - random() * 24 * 60 * 60 * 1000); const basePrice = 10 + random() * 500; results.push({ terminalId: `BBG${Date.now()}${i}`, security: { ticker: `${String.fromCharCode(65 + Math.floor(random() * 26))}${String.fromCharCode(65 + Math.floor(random() * 26))}${String.fromCharCode(65 + Math.floor(random() * 26))}${String.fromCharCode(65 + Math.floor(random() * 26))}`, name: `${generateName(random).split(' ')[1]} ${['Corp', 'Inc', 'Ltd', 'Holdings', 'Group'][Math.floor(random() * 5)]}`, assetClass: assetClasses[Math.floor(random() * assetClasses.length)], sector: sectors[Math.floor(random() * sectors.length)], country: ['US', 'GB', 'JP', 'DE', 'CN', 'FR', 'CA', 'AU'][Math.floor(random() * 8)], currency: ['USD', 'EUR', 'GBP', 'JPY', 'CNY'][Math.floor(random() * 5)], isin: `US${Math.floor(1000000000 + random() * 9000000000)}`, cusip: `${Math.floor(100000000 + random() * 900000000)}` }, pricing: { last: Math.round(basePrice * 100) / 100, bid: Math.round(basePrice * 0.999 * 100) / 100, ask: Math.round(basePrice * 1.001 * 100) / 100, open: Math.round(basePrice * (1 - random() * 0.02) * 100) / 100, high: Math.round(basePrice * (1 + random() * 0.03) * 100) / 100, low: Math.round(basePrice * (1 - random() * 0.03) * 100) / 100, close: Math.round(basePrice * (1 + (random() - 0.5) * 0.02) * 100) / 100, change: Math.round((random() - 0.5) * 10 * 100) / 100, changePercent: Math.round((random() - 0.5) * 5 * 100) / 100, volume: Math.floor(random() * 50000000), avgVolume: Math.floor(random() * 30000000) }, fundamentals: { marketCap: Math.floor(random() * 3000) + 'B', enterpriseValue: Math.floor(random() * 3500) + 'B', peRatio: Math.round((5 + random() * 50) * 10) / 10, forwardPe: Math.round((5 + random() * 40) * 10) / 10, pbRatio: Math.round((0.5 + random() * 10) * 10) / 10, evEbitda: Math.round((5 + random() * 30) * 10) / 10, debtToEquity: Math.round(random() * 3 * 100) / 100, roe: Math.round((5 + random() * 30) * 10) / 10, eps: Math.round((random() * 20) * 100) / 100, dividend: Math.round(random() * 5 * 100) / 100, payoutRatio: Math.round((20 + random() * 60) * 10) / 10 }, credit: { rating: ratings[Math.floor(random() * ratings.length)], outlook: ['positive', 'stable', 'negative'][Math.floor(random() * 3)], agency: ['S&P', 'Moody\'s', 'Fitch'][Math.floor(random() * 3)], spread: Math.round((50 + random() * 500)), cds: Math.round((20 + random() * 300)) }, analytics: { beta: Math.round((0.5 + random() * 1.5) * 100) / 100, sharpeRatio: Math.round((random() * 3) * 100) / 100, volatility: Math.round((10 + random() * 40) * 10) / 10, correlation: Math.round((random() * 2 - 1) * 100) / 100, var95: Math.round((random() * 10) * 100) / 100, maxDrawdown: Math.round((5 + random() * 30) * 10) / 10 }, consensus: (() => { // Generate consistent analyst ratings const numAnalysts = Math.floor(5 + random() * 40); const buyPct = random(); const sellPct = random() * (1 - buyPct); const holdPct = 1 - buyPct - sellPct; const buyRatings = Math.floor(numAnalysts * buyPct); const sellRatings = Math.floor(numAnalysts * sellPct); const holdRatings = numAnalysts - buyRatings - sellRatings; // Derive recommendation from actual ratings const buyScore = buyRatings / numAnalysts; let recommendation; if (buyScore > 0.7) recommendation = 'strong_buy'; else if (buyScore > 0.5) recommendation = 'buy'; else if (buyScore > 0.3) recommendation = 'hold'; else if (buyScore > 0.15) recommendation = 'sell'; else recommendation = 'strong_sell'; return { recommendation, targetPrice: Math.round(basePrice * (1 + (random() - 0.3) * 0.5) * 100) / 100, numAnalysts, buyRatings, holdRatings, sellRatings }; })(), news: { headline: `${generateName(random).split(' ')[1]} Corp ${newsCategories[Math.floor(random() * newsCategories.length)].replace('_', ' ')} update`, source: ['Reuters', 'Bloomberg', 'WSJ', 'FT', 'CNBC'][Math.floor(random() * 5)], timestamp: timestamp.toISOString(), sentiment: ['positive', 'neutral', 'negative'][Math.floor(random() * 3)], relevance: Math.round(random() * 100) }, events: { nextEarnings: new Date(Date.now() + random() * 90 * 24 * 60 * 60 * 1000).toISOString().split('T')[0], exDividendDate: random() > 0.5 ? new Date(Date.now() + random() * 30 * 24 * 60 * 60 * 1000).toISOString().split('T')[0] : null, annualMeeting: new Date(Date.now() + random() * 180 * 24 * 60 * 60 * 1000).toISOString().split('T')[0] }, scrapedAt: new Date().toISOString() }); } return results; } async function generateZoomInfoData(count, seed) { log.info('Generating ZoomInfo-style B2B enrichment data...'); const random = createSeededRandom(seed); const results = []; const industries = ['Software', 'Healthcare', 'Financial Services', 'Manufacturing', 'Retail', 'Telecommunications', 'Professional Services', 'Real Estate']; const departments = ['Engineering', 'Sales', 'Marketing', 'Finance', 'Operations', 'Product', 'HR', 'Customer Success', 'Legal', 'IT']; const seniority = ['C-Level', 'VP', 'Director', 'Manager', 'Individual Contributor', 'Entry Level']; const technologies = ['Salesforce', 'AWS', 'Microsoft Azure', 'Google Cloud', 'HubSpot', 'SAP', 'Oracle', 'Workday', 'Tableau', 'Snowflake', 'MongoDB', 'PostgreSQL']; const fundingStages = ['Seed', 'Series A', 'Series B', 'Series C', 'Series D+', 'IPO', 'Acquired', 'Bootstrapped']; const intentSignals = ['product_research', 'competitor_analysis', 'pricing_page_visit', 'demo_request', 'content_download', 'job_posting', 'technology_install', 'budget_approval']; for (let i = 0; i < count; i++) { const companyName = `${generateName(random).split(' ')[1]} ${['Corp', 'Inc', 'Solutions', 'Technologies', 'Systems', 'Group'][Math.floor(random() * 6)]}`; const domain = companyName.toLowerCase().replace(/[^a-z]/g, '') + '.com'; const employees = Math.floor(10 + random() * 50000); const revenueM = Math.floor(1 + random() * 5000); const firstName = generateName(random).split(' ')[0]; const lastName = generateName(random).split(' ')[1]; const dept = departments[Math.floor(random() * departments.length)]; const level = seniority[Math.floor(random() * seniority.length)]; results.push({ recordId: `ZI${Date.now()}${i}`, company: { name: companyName, domain: domain, industry: industries[Math.floor(random() * industries.length)], subIndustry: `${industries[Math.floor(random() * industries.length)]} - ${['Enterprise', 'Mid-Market', 'SMB'][Math.floor(random() * 3)]}`, employees: employees, employeeRange: employees < 50 ? '1-50' : employees < 200 ? '51-200' : employees < 1000 ? '201-1000' : employees < 5000 ? '1001-5000' : '5000+', revenue: `$${revenueM}M`, revenueRange: revenueM < 10 ? '$1M-$10M' : revenueM < 50 ? '$10M-$50M' : revenueM < 200 ? '$50M-$200M' : revenueM < 1000 ? '$200M-$1B' : '$1B+', founded: Math.floor(1970 + random() * 50), headquarters: { street: `${Math.floor(100 + random() * 9900)} ${['Main', 'Market', 'Broadway', 'Park', 'Tech'][Math.floor(random() * 5)]} St`, city: ['San Francisco', 'New York', 'Boston', 'Austin', 'Seattle', 'Chicago', 'Denver'][Math.floor(random() * 7)], state: ['CA', 'NY', 'MA', 'TX', 'WA', 'IL', 'CO'][Math.floor(random() * 7)], country: 'USA', postalCode: String(Math.floor(10000 + random() * 90000)) }, phone: `+1-${Math.floor(200 + random() * 800)}-${Math.floor(100 + random() * 900)}-${Math.floor(1000 + random() * 9000)}`, website: `https://${domain}`, description: `Leading provider of ${industries[Math.floor(random() * industries.length)].toLowerCase()} solutions for enterprise customers`, fundingStage: fundingStages[Math.floor(random() * fundingStages.length)], totalFunding: `$${Math.floor(1 + random() * 500)}M`, lastFundingDate: new Date(Date.now() - random() * 1095 * 24 * 60 * 60 * 1000).toISOString().split('T')[0], investors: Array.from({length: Math.floor(1 + random() * 5)}, () => `${generateName(random).split(' ')[1]} ${['Ventures', 'Capital', 'Partners'][Math.floor(random() * 3)]}` ) }, contact: { firstName: firstName, lastName: lastName, fullName: `${firstName} ${lastName}`, email: `${firstName.toLowerCase()}.${lastName.toLowerCase()}@${domain}`, directPhone: `+1-${Math.floor(200 + random() * 800)}-${Math.floor(100 + random() * 900)}-${Math.floor(1000 + random() * 9000)}`, mobilePhone: random() > 0.5 ? `+1-${Math.floor(200 + random() * 800)}-${Math.floor(100 + random() * 900)}-${Math.floor(1000 + random() * 9000)}` : null, title: `${level === 'C-Level' ? ['CEO', 'CTO', 'CFO', 'COO', 'CMO'][Math.floor(random() * 5)] : level === 'VP' ? `VP of ${dept}` : level === 'Director' ? `Director of ${dept}` : level === 'Manager' ? `${dept} Manager` : `${dept} ${['Specialist', 'Analyst', 'Associate'][Math.floor(random() * 3)]}`}`, department: dept, seniority: level, linkedIn: `https://linkedin.com/in/${firstName.toLowerCase()}-${lastName.toLowerCase()}-${Math.floor(random() * 99999)}`, twitter: random() > 0.6 ? `@${firstName.toLowerCase()}${lastName.toLowerCase()}` : null, yearsInRole: Math.floor(random() * 8), yearsAtCompany: Math.floor(random() * 12), previousCompanies: Array.from({length: Math.floor(1 + random() * 3)}, () => `${generateName(random).split(' ')[1]} ${['Corp', 'Inc', 'Technologies'][Math.floor(random() * 3)]}` ), education: { degree: ['Bachelor\'s', 'Master\'s', 'MBA', 'PhD'][Math.floor(random() * 4)], field: ['Computer Science', 'Business', 'Engineering', 'Marketing', 'Finance'][Math.floor(random() * 5)], school: ['Stanford', 'MIT', 'Harvard', 'Berkeley', 'Carnegie Mellon', 'Northwestern'][Math.floor(random() * 6)] } }, technographics: { installedTechnologies: Array.from({length: Math.floor(3 + random() * 8)}, () => technologies[Math.floor(random() * technologies.length)] ).filter((v, i, a) => a.indexOf(v) === i), technologySpend: `$${Math.floor(100 + random() * 10000)}K`, cloudProvider: ['AWS', 'Azure', 'Google Cloud', 'Multi-Cloud'][Math.floor(random() * 4)], crmSystem: ['Salesforce', 'HubSpot', 'Microsoft Dynamics', 'Zoho'][Math.floor(random() * 4)], marketingAutomation: ['HubSpot', 'Marketo', 'Pardot', 'Eloqua'][Math.floor(random() * 4)], analyticsTools: ['Google Analytics', 'Adobe Analytics', 'Mixpanel', 'Amplitude'][Math.floor(random() * 4)] }, intentSignals: { recentActivity: Array.from({length: Math.floor(1 + random() * 5)}, () => ({ signal: intentSignals[Math.floor(random() * intentSignals.length)], timestamp: new Date(Date.now() - random() * 30 * 24 * 60 * 60 * 1000).toISOString(), score: Math.floor(1 + random() * 100), source: ['website', 'content', 'events', 'social', 'search'][Math.floor(random() * 5)] })), buyingStage: ['awareness', 'consideration', 'decision', 'purchase'][Math.floor(random() * 4)], engagementScore: Math.floor(1 + random() * 100), lastEngagement: new Date(Date.now() - random() * 60 * 24 * 60 * 60 * 1000).toISOString() }, organizationChart: { reportsTo: random() > 0.3 ? `${generateName(random)}` : null, directReports: Math.floor(random() * 15), totalTeamSize: Math.floor(random() * 50), peers: Array.from({length: Math.floor(2 + random() * 5)}, () => generateName(random)) }, dataQuality: { emailVerified: random() > 0.2, phoneVerified: random() > 0.3, lastVerified: new Date(Date.now() - random() * 90 * 24 * 60 * 60 * 1000).toISOString(), confidenceScore: Math.floor(70 + random() * 30), dataFreshness: Math.floor(random() * 60) + ' days' }, scrapedAt: new Date().toISOString() }); } return results; } async function generateFactSetData(count, seed) { log.info('Generating FactSet-style financial analytics data...'); const random = createSeededRandom(seed); const results = []; const sectors = ['Technology', 'Healthcare', 'Financials', 'Consumer Discretionary', 'Consumer Staples', 'Energy', 'Industrials', 'Materials', 'Real Estate', 'Utilities', 'Communication Services']; const exchanges = ['NYSE', 'NASDAQ', 'LSE', 'TSE', 'HKEX', 'Euronext', 'SSE']; const analystFirms = ['Goldman Sachs', 'Morgan Stanley', 'JP Morgan', 'Bank of America', 'Citi', 'Deutsche Bank', 'Barclays', 'UBS', 'Credit Suisse', 'Wells Fargo']; const institutionalTypes = ['Mutual Fund', 'Hedge Fund', 'Pension Fund', 'Sovereign Wealth', 'ETF', 'Private Equity', 'Insurance', 'Endowment']; for (let i = 0; i < count; i++) { const companyName = `${generateName(random).split(' ')[1]} ${['Corporation', 'Inc', 'Holdings', 'Group', 'International'][Math.floor(random() * 5)]}`; const ticker = `${String.fromCharCode(65 + Math.floor(random() * 26))}${String.fromCharCode(65 + Math.floor(random() * 26))}${String.fromCharCode(65 + Math.floor(random() * 26))}`; const basePrice = 10 + random() * 500; const revenue = Math.floor(100 + random() * 50000); const employees = Math.floor(100 + random() * 200000); results.push({ entityId: `FS${Date.now()}${i}`, company: { name: companyName, ticker: ticker, exchange: exchanges[Math.floor(random() * exchanges.length)], sector: sectors[Math.floor(random() * sectors.length)], industry: `${sectors[Math.floor(random() * sectors.length)]} - Specialized`, country: ['USA', 'UK', 'Japan', 'Germany', 'China', 'France', 'Canada'][Math.floor(random() * 7)], employees: employees, fiscalYearEnd: ['December', 'March', 'June', 'September'][Math.floor(random() * 4)], ipoDate: new Date(Date.now() - random() * 7300 * 24 * 60 * 60 * 1000).toISOString().split('T')[0], description: `Global leader in ${sectors[Math.floor(random() * sectors.length)].toLowerCase()} with operations across multiple continents` }, fundamentals: { revenue: { current: revenue, yoy_growth: Math.round((random() - 0.3) * 30 * 10) / 10, trailing_12m: revenue, quarterly: [ Math.round(revenue * 0.24 * 100) / 100, Math.round(revenue * 0.25 * 100) / 100, Math.round(revenue * 0.26 * 100) / 100, Math.round(revenue * 0.25 * 100) / 100 ] }, profitability: { ebitda: Math.round(revenue * (0.1 + random() * 0.3)), ebitda_margin: Math.round((10 + random() * 30) * 10) / 10, operating_income: Math.round(revenue * (0.08 + random() * 0.25)), operating_margin: Math.round((8 + random() * 25) * 10) / 10, net_income: Math.round(revenue * (0.05 + random() * 0.20)), net_margin: Math.round((5 + random() * 20) * 10) / 10, roe: Math.round((5 + random() * 30) * 10) / 10, roa: Math.round((3 + random() * 15) * 10) / 10, roic: Math.round((5 + random() * 25) * 10) / 10 }, growth_rates: { revenue_1yr: Math.round((random() - 0.2) * 30 * 10) / 10, revenue_3yr_cagr: Math.round((random() - 0.1) * 25 * 10) / 10, revenue_5yr_cagr: Math.round((random() - 0.1) * 20 * 10) / 10, earnings_1yr: Math.round((random() - 0.3) * 40 * 10) / 10, earnings_3yr_cagr: Math.round((random() - 0.2) * 30 * 10) / 10, earnings_5yr_cagr: Math.round((random() - 0.1) * 25 * 10) / 10 }, balance_sheet: { total_assets: Math.round(revenue * (1.5 + random() * 3)), total_liabilities: Math.round(revenue * (0.8 + random() * 2)), stockholders_equity: Math.round(revenue * (0.5 + random() * 1.5)), cash: Math.round(revenue * (0.1 + random() * 0.5)), debt: Math.round(revenue * (0.2 + random() * 1.2)), working_capital: Math.round(revenue * (0.1 + random() * 0.4)) }, cash_flow: { operating_cf: Math.round(revenue * (0.1 + random() * 0.25)), investing_cf: Math.round(revenue * (-0.15 - random() * 0.15)), financing_cf: Math.round(revenue * (-0.05 + random() * 0.15)), free_cash_flow: Math.round(revenue * (0.05 + random() * 0.20)), fcf_yield: Math.round((3 + random() * 8) * 10) / 10 } }, estimates: { eps: { current_quarter: Math.round((basePrice * 0.01 + random() * basePrice * 0.02) * 100) / 100, next_quarter: Math.round((basePrice * 0.01 + random() * basePrice * 0.025) * 100) / 100, current_year: Math.round((basePrice * 0.04 + random() * basePrice * 0.06) * 100) / 100, next_year: Math.round((basePrice * 0.05 + random() * basePrice * 0.08) * 100) / 100, consensus_growth: Math.round((5 + random() * 20) * 10) / 10, surprise_history: Array.from({length: 4}, () => Math.round((random() - 0.5) * 20 * 10) / 10) }, revenue: { current_quarter: Math.round(revenue * 0.25 * (1 + (random() - 0.3) * 0.1)), next_quarter: Math.round(revenue * 0.26 * (1 + (random() - 0.2) * 0.1)), current_year: Math.round(revenue * (1 + (random() - 0.2) * 0.15)), next_year: Math.round(revenue * (1.05 + random() * 0.15)), consensus_growth: Math.round((3 + random() * 15) * 10) / 10 }, price_targets: { high: Math.round(basePrice * (1.3 + random() * 0.5) * 100) / 100, low: Math.round(basePrice * (0.7 - random() * 0.2) * 100) / 100, mean: Math.round(basePrice * (1 + (random() - 0.5) * 0.3) * 100) / 100, median: Math.round(basePrice * (1 + (random() - 0.5) * 0.25) * 100) / 100, num_analysts: Math.floor(8 + random() * 35) } }, ownership: { institutional: { percentage: Math.round((40 + random() * 50) * 10) / 10, holders: Math.floor(100 + random() * 900), topHolders: Array.from({length: 10}, (_, idx) => ({ name: `${generateName(random).split(' ')[1]} ${institutionalTypes[Math.floor(random() * institutionalTypes.length)]}`, shares: Math.floor(1000000 + random() * 50000000), percentage: Math.round((1 + random() * 8) * 100) / 100, value: Math.round(basePrice * (1000000 + random() * 50000000) / 1000000), changeQoQ: Math.round((random() - 0.5) * 20 * 100) / 100, rank: idx + 1 })) }, insider: { percentage: Math.round((1 + random() * 15) * 10) / 10, recentTransactions: Array.from({length: Math.floor(5 + random() * 10)}, () => ({ date: new Date(Date.now() - random() * 180 * 24 * 60 * 60 * 1000).toISOString().split('T')[0], insider: generateName(random), title: ['CEO', 'CFO', 'COO', 'Director', 'EVP', 'SVP'][Math.floor(random() * 6)], transaction: ['Buy', 'Sell'][Math.floor(random() * 2)], shares: Math.floor(1000 + random() * 100000), price: Math.round(basePrice * (1 + (random() - 0.5) * 0.1) * 100) / 100, value: Math.round(basePrice * (1000 + random() * 100000) / 1000) })) }, buybacks: { active_program: random() > 0.3, authorization: Math.round(revenue * (0.05 + random() * 0.15)), remaining: Math.round(revenue * (0.02 + random() * 0.10)), shares_repurchased_ltm: Math.floor(random() * 10000000) } }, supplyChain: { majorCustomers: Array.from({length: Math.floor(3 + random() * 7)}, () => ({ name: `${generateName(random).split(' ')[1]} ${['Corp', 'Inc', 'Group'][Math.floor(random() * 3)]}`, revenueContribution: Math.round((2 + random() * 15) * 10) / 10, relationship: ['Strategic Partner', 'Key Customer', 'Major Account'][Math.floor(random() * 3)], yearsOfBusiness: Math.floor(1 + random() * 15) })), majorSuppliers: Array.from({length: Math.floor(3 + random() * 7)}, () => ({ name: `${generateName(random).split(' ')[1]} ${['Corp', 'Systems', 'Technologies'][Math.floor(random() * 3)]}`, category: ['Components', 'Raw Materials', 'Services', 'Software'][Math.floor(random() * 4)], dependencyLevel: ['Critical', 'High', 'Medium', 'Low'][Math.floor(random() * 4)], geographicRisk: ['Low', 'Medium', 'High'][Math.floor(random() * 3)] })), geographicExposure: { north_america: Math.round((20 + random() * 60) * 10) / 10, europe: Math.round((10 + random() * 40) * 10) / 10, asia_pacific: Math.round((10 + random() * 50) * 10) / 10, rest_of_world: Math.round((5 + random() * 20) * 10) / 10 } }, analystCoverage: Array.from({length: Math.floor(5 + random() * 20)}, () => ({ firm: analystFirms[Math.floor(random() * analystFirms.length)], analyst: generateName(random), rating: ['Strong Buy', 'Buy', 'Hold', 'Sell', 'Strong Sell'][Math.floor(random() * 5)], priceTarget: Math.round(basePrice * (0.8 + random() * 0.6) * 100) / 100, lastUpdate: new Date(Date.now() - random() * 90 * 24 * 60 * 60 * 1000).toISOString().split('T')[0], confidence: ['High', 'Medium', 'Low'][Math.floor(random() * 3)] })), scrapedAt: new Date().toISOString() }); } return results; } async function generateLSEGData(count, seed) { log.info('Generating LSEG/Refinitiv-style workspace data...'); const random = createSeededRandom(seed); const results = []; const newsSources = ['Reuters', 'Dow Jones', 'PR Newswire', 'Business Wire', 'Bloomberg', 'Financial Times', 'WSJ']; const newsCategories = ['Earnings', 'M&A', 'Regulatory', 'Corporate', 'Market', 'Economic', 'Political', 'ESG']; const dealTypes = ['M&A', 'IPO', 'Secondary Offering', 'Bond Issuance', 'Loan', 'Private Placement', 'Buyout', 'Joint Venture']; const esgCategories = ['Environmental', 'Social', 'Governance']; const controversyTypes = ['Legal', 'Environmental', 'Labor', 'Ethical', 'Regulatory', 'Product']; const regions = ['North America', 'Europe', 'Asia Pacific', 'Latin America', 'Middle East', 'Africa']; for (let i = 0; i < count; i++) { const companyName = `${generateName(random).split(' ')[1]} ${['Corporation', 'Group', 'Holdings', 'International', 'Industries'][Math.floor(random() * 5)]}`; const ticker = `${String.fromCharCode(65 + Math.floor(random() * 26))}${String.fromCharCode(65 + Math.floor(random() * 26))}${String.fromCharCode(65 + Math.floor(random() * 26))}`; const basePrice = 10 + random() * 500; results.push({ workspaceId: `LSEG${Date.now()}${i}`, company: { name: companyName, ticker: ticker, ric: `${ticker}.${['N', 'O', 'L', 'T', 'HK'][Math.floor(random() * 5)]}`, permId: `${Math.floor(1000000000 + random() * 9000000000)}`, lei: `${Math.floor(100000000000000000000 + random() * 900000000000000000000)}`, sector: ['Technology', 'Healthcare', 'Financials', 'Energy', 'Industrials'][Math.floor(random() * 5)], region: regions[Math.floor(random() * regions.length)] }, news: { stories: Array.from({length: Math.floor(3 + random() * 12)}, () => ({ headline: `${companyName} ${['announces', 'reports', 'unveils', 'confirms', 'explores'][Math.floor(random() * 5)]} ${newsCategories[Math.floor(random() * newsCategories.length)].toLowerCase()} ${['update', 'initiative', 'strategy', 'partnership', 'results'][Math.floor(random() * 5)]}`, source: newsSources[Math.floor(random() * newsSources.length)], timestamp: new Date(Date.now() - random() * 168 * 60 * 60 * 1000).toISOString(), category: newsCategories[Math.floor(random() * newsCategories.length)], sentiment: { score: Math.round((random() - 0.5) * 2 * 100) / 100, label: ['Very Positive', 'Positive', 'Neutral', 'Negative', 'Very Negative'][Math.floor(random() * 5)], confidence: Math.round((70 + random() * 30) * 10) / 10 }, topics: Array.from({length: Math.floor(2 + random() * 5)}, () => ['Revenue', 'Expansion', 'Innovation', 'Partnership', 'Regulation', 'Sustainability'][Math.floor(random() * 6)] ), entities: { people: Array.from({length: Math.floor(1 + random() * 3)}, () => generateName(random)), organizations: Array.from({length: Math.floor(1 + random() * 4)}, () => `${generateName(random).split(' ')[1]} ${['Corp', 'Inc', 'Group'][Math.floor(random() * 3)]}` ), locations: Array.from({length: Math.floor(1 + random() * 3)}, () => ['New York', 'London', 'Tokyo', 'Singapore', 'Hong Kong', 'Dubai'][Math.floor(random() * 6)] ) }, relevance: Math.round((60 + random() * 40) * 10) / 10, language: ['en', 'en-US', 'en-GB'][Math.floor(random() * 3)], wordCount: Math.floor(200 + random() * 1500) })), realTimeAlerts: Array.from({length: Math.floor(1 + random() * 5)}, () => ({ type: ['Price', 'Volume', 'News', 'Rating', 'Insider'][Math.floor(random() * 5)], severity: ['Critical', 'High', 'Medium', 'Low'][Math.floor(random() * 4)], message: `Alert triggered for ${companyName}`, timestamp: new Date(Date.now() - random() * 24 * 60 * 60 * 1000).toISOString() })) }, deals: { announced: Array.from({length: Math.floor(1 + random() * 8)}, () => ({ dealId: `D${Math.floor(100000000 + random() * 900000000)}`, type: dealTypes[Math.floor(random() * dealTypes.length)], status: ['Announced', 'Pending', 'Completed', 'Withdrawn'][Math.floor(random() * 4)], value: Math.round((50 + random() * 10000) * 10) / 10, currency: 'USD', announceDate: new Date(Date.now() - random() * 730 * 24 * 60 * 60 * 1000).toISOString().split('T')[0], expectedClose: new Date(Date.now() + random() * 365 * 24 * 60 * 60 * 1000).toISOString().split('T')[0], parties: { acquirer: companyName, target: `${generateName(random).split(' ')[1]} ${['Corp', 'Inc', 'Group'][Math.floor(random() * 3)]}`, advisors: { financial: Array.from({length: Math.floor(1 + random() * 3)}, () => ['Goldman Sachs', 'Morgan Stanley', 'JP Morgan', 'Bank of America'][Math.floor(random() * 4)] ), legal: Array.from({length: Math.floor(1 + random() * 2)}, () => ['Wachtell', 'Skadden', 'Sullivan & Cromwell', 'Cleary Gottlieb'][Math.floor(random() * 4)] ) } }, rationale: ['Strategic Expansion', 'Market Entry', 'Technology Acquisition', 'Vertical Integration'][Math.floor(random() * 4)], synergies: Math.round((10 + random() * 500) * 10) / 10, premium: Math.round((10 + random() * 50) * 10) / 10 })), issuances: Array.from({length: Math.floor(1 + random() * 5)}, () => ({ type: ['Investment Grade Bond', 'High Yield Bond', 'Convertible', 'Green Bond'][Math.floor(random() * 4)], amount: Math.round((100 + random() * 5000) * 10) / 10, maturity: Math.floor(3 + random() * 27) + ' years', coupon: Math.round((1 + random() * 8) * 100) / 100, rating: ['AAA', 'AA', 'A', 'BBB', 'BB', 'B'][Math.floor(random() * 6)], issueDate: new Date(Date.now() - random() * 365 * 24 * 60 * 60 * 1000).toISOString().split('T')[0], underwriters: Array.from({length: Math.floor(2 + random() * 5)}, () => ['JP Morgan', 'Bank of America', 'Citi', 'Goldman Sachs', 'Morgan Stanley'][Math.floor(random() * 5)] ) })) }, esg: { scores: { overall: Math.round((30 + random() * 70) * 10) / 10, environmental: Math.round((30 + random() * 70) * 10) / 10, social: Math.round((30 + random() * 70) * 10) / 10, governance: Math.round((30 + random() * 70) * 10) / 10, controversy: Math.round((0 + random() * 100) * 10) / 10 }, percentileRank: { industry: Math.floor(1 + random() * 100), global: Math.floor(1 + random() * 100) }, grade: ['A+', 'A', 'A-', 'B+', 'B', 'B-', 'C+', 'C', 'D'][Math.floor(random() * 9)], categories: esgCategories.map(cat => ({ category: cat, score: Math.round((30 + random() * 70) * 10) / 10, trend: ['Improving', 'Stable', 'Declining'][Math.floor(random() * 3)], keyIssues: Array.from({length: Math.floor(2 + random() * 4)}, () => ['Carbon Emissions', 'Water Usage', 'Diversity', 'Labor Practices', 'Board Independence', 'Executive Pay'][Math.floor(random() * 6)] ) })), controversies: Array.from({length: Math.floor(random() * 4)}, () => ({ type: controversyTypes[Math.floor(random() * controversyTypes.length)], severity: ['Critical', 'High', 'Medium', 'Low'][Math.floor(random() * 4)], date: new Date(Date.now() - random() * 1825 * 24 * 60 * 60 * 1000).toISOString().split('T')[0], description: `${controversyTypes[Math.floor(random() * controversyTypes.length)]} controversy involving ${companyName}`, status: ['Ongoing', 'Resolved', 'Under Investigation'][Math.floor(random() * 3)], impact: Math.round((1 + random() * 10) * 10) / 10 })), sdgAlignment: Array.from({length: Math.floor(3 + random() * 8)}, () => ({ goal: Math.floor(1 + random() * 17), score: Math.round((30 + random() * 70) * 10) / 10 })) }, research: { analystReports: Array.from({length: Math.floor(5 + random() * 15)}, () => ({ firm: ['Goldman Sachs Research', 'Morgan Stanley Research', 'JP Morgan Research'][Math.floor(random() * 3)], analyst: generateName(random), title: `${companyName} - ${['Initiating Coverage', 'Q4 Update', 'Sector Outlook', 'Deep Dive'][Math.floor(random() * 4)]}`, date: new Date(Date.now() - random() * 180 * 24 * 60 * 60 * 1000).toISOString().split('T')[0], rating: ['Overweight', 'Equal-weight', 'Underweight', 'Buy', 'Hold', 'Sell'][Math.floor(random() * 6)], priceTarget: Math.round(basePrice * (0.8 + random() * 0.6) * 100) / 100, pages: Math.floor(15 + random() * 100), keyTakeaways: Array.from({length: 3}, () => ['Strong fundamentals', 'Market expansion opportunity', 'Valuation attractive', 'Execution risk'][Math.floor(random() * 4)] ) })), earnings: { nextDate: new Date(Date.now() + random() * 90 * 24 * 60 * 60 * 1000).toISOString().split('T')[0], consensus: { eps: Math.round((basePrice * 0.02) * 100) / 100, revenue: Math.round((1000 + random() * 50000) * 10) / 10, numEstimates: Math.floor(8 + random() * 30) }, whisperNumber: Math.round((basePrice * 0.021) * 100) / 100 } }, marketData: { price: Math.round(basePrice * 100) / 100, change: Math.round((random() - 0.5) * 10 * 100) / 100, changePercent: Math.round((random() - 0.5) * 5 * 100) / 100, volume: Math.floor(random() * 20000000), marketCap: Math.round(basePrice * (10 + random() * 990) * 100) / 100 + 'B', beta: Math.round((0.5 + random() * 1.5) * 100) / 100, shortInterest: Math.round((1 + random() * 15) * 10) / 10 }, scrapedAt: new Date().toISOString() }); } return results; } /** * Generate fMRI (Functional Magnetic Resonance Imaging) brain activity data * Simulates BOLD signal time series and brain voxel coordinates */ async function generateFMRIData(count, seed) { log.info('Generating fMRI brain activity data...'); const random = createSeededRandom(seed); const results = []; const brainRegions = [ { name: 'Dorsolateral Prefrontal Cortex', abbr: 'DLPFC', type: 'cortical', x: [30, 50], y: [20, 40], z: [20, 35] }, { name: 'Anterior Cingulate Cortex', abbr: 'ACC', type: 'cortical', x: [0, 10], y: [30, 45], z: [15, 30] }, { name: 'Amygdala', abbr: 'AMY', type: 'subcortical', x: [20, 30], y: [-10, 5], z: [-15, -5] }, { name: 'Hippocampus', abbr: 'HIP', type: 'subcortical', x: [25, 35], y: [-20, -10], z: [-10, 0] }, { name: 'Primary Motor Cortex', abbr: 'M1', type: 'cortical', x: [35, 45], y: [-15, 0], z: [45, 60] }, { name: 'Primary Visual Cortex', abbr: 'V1', type: 'cortical', x: [10, 25], y: [-90, -75], z: [0, 15] }, { name: 'Thalamus', abbr: 'THA', type: 'subcortical', x: [10, 15], y: [-15, -5], z: [5, 15] }, { name: 'Caudate Nucleus', abbr: 'CAU', type: 'subcortical', x: [12, 18], y: [10, 20], z: [10, 20] } ]; const conditions = ['rest', 'task', 'visual_stim', 'motor_task', 'cognitive_load', 'emotional_stim']; const TR = 2.0; // Repetition time in seconds (standard fMRI) for (let i = 0; i < count; i++) { const region = brainRegions[Math.floor(random() * brainRegions.length)]; const condition = conditions[Math.floor(random() * conditions.length)]; const numTimePoints = 100 + Math.floor(random() * 200); // 100-300 time points // Generate voxel coordinates within brain region const voxelX = Math.floor(region.x[0] + random() * (region.x[1] - region.x[0])); const voxelY = Math.floor(region.y[0] + random() * (region.y[1] - region.y[0])); const voxelZ = Math.floor(region.z[0] + random() * (region.z[1] - region.z[0])); // Generate BOLD signal time series with realistic noise and activation const baseline = 100 + random() * 20; const activationMagnitude = condition === 'rest' ? 0 : (2 + random() * 4); // 2-6% signal change const boldSignal = Array.from({ length: numTimePoints }, (_, t) => { const noise = (random() - 0.5) * 1.5; // Physiological noise const drift = Math.sin(t / numTimePoints * Math.PI) * 0.5; // Scanner drift const activation = condition !== 'rest' ? Math.sin(t / 20) * activationMagnitude : 0; return Math.round((baseline + activation + noise + drift) * 100) / 100; }); // Generate connectivity matrix (correlation with other voxels) const connectivityMatrix = Array.from({ length: 8 }, () => Array.from({ length: 8 }, () => Math.round((random() * 2 - 1) * 100) / 100) ); results.push({ scanId: `fMRI_${Date.now()}_${i}`, subject: { id: `SUB${String(Math.floor(1 + random() * 999)).padStart(3, '0')}`, age: Math.floor(18 + random() * 50), gender: random() > 0.5 ? 'M' : 'F', handedness: random() > 0.1 ? 'right' : 'left' }, acquisition: { scanner: ['Siemens Prisma 3T', 'GE Discovery MR750 3T', 'Philips Ingenia 3T'][Math.floor(random() * 3)], fieldStrength: '3T', TR: TR, TE: Math.round((25 + random() * 10) * 10) / 10, // Echo time (ms) flipAngle: 75 + Math.floor(random() * 15), // degrees voxelSize: [3, 3, 3], // mm slices: 32 + Math.floor(random() * 16) }, voxel: { coordinates: { x: voxelX, y: voxelY, z: voxelZ }, mniCoordinates: { x: voxelX - 45, y: voxelY - 60, z: voxelZ - 35 }, // MNI space region: region.name, regionAbbr: region.abbr, regionType: region.type, hemisphere: voxelX > 45 ? 'right' : 'left' }, timeSeries: { condition, numTimePoints, TR: TR, duration: numTimePoints * TR, boldSignal: boldSignal.slice(0, 50), // Store first 50 points for space fullSeriesStats: { mean: Math.round(boldSignal.reduce((a, b) => a + b, 0) / boldSignal.length * 100) / 100, stdDev: Math.round(Math.sqrt(boldSignal.reduce((sum, val) => sum + Math.pow(val - baseline, 2), 0) / boldSignal.length) * 100) / 100, min: Math.min(...boldSignal), max: Math.max(...boldSignal) } }, activation: { isActive: activationMagnitude > 0, percentSignalChange: Math.round(activationMagnitude * 100) / 100, tStatistic: activationMagnitude > 0 ? Math.round((2 + random() * 4) * 100) / 100 : 0, pValue: activationMagnitude > 0 ? Math.round(random() * 0.05 * 10000) / 10000 : 1, clusterSize: activationMagnitude > 0 ? Math.floor(10 + random() * 200) : 0 }, connectivity: { matrix: connectivityMatrix, meanCorrelation: Math.round(connectivityMatrix[0].reduce((a, b) => a + b, 0) / 8 * 100) / 100, strongestConnection: { region: brainRegions[Math.floor(random() * brainRegions.length)].abbr, correlation: Math.round((0.5 + random() * 0.5) * 100) / 100 } }, quality: { snr: Math.round((20 + random() * 30) * 10) / 10, // Signal-to-noise ratio motion: Math.round(random() * 2 * 100) / 100, // mm displacement artifacts: random() > 0.8 ? ['susceptibility', 'motion'][Math.floor(random() * 2)] : null, qualityRating: ['excellent', 'good', 'fair', 'poor'][Math.floor(random() * 4)] }, scrapedAt: new Date().toISOString() }); } return results; } /** * Generate Protein Data Bank (PDB) molecular structure data * Simulates protein atomic coordinates and structural information */ async function generateProteinPDBData(count, seed) { log.info('Generating Protein PDB molecular structure data...'); const random = createSeededRandom(seed); const results = []; const aminoAcids = ['ALA', 'ARG', 'ASN', 'ASP', 'CYS', 'GLN', 'GLU', 'GLY', 'HIS', 'ILE', 'LEU', 'LYS', 'MET', 'PHE', 'PRO', 'SER', 'THR', 'TRP', 'TYR', 'VAL']; const secondaryStructures = ['helix', 'sheet', 'coil', 'turn']; const chains = ['A', 'B', 'C', 'D', 'E', 'F']; const atomTypes = ['N', 'CA', 'C', 'O', 'CB', 'CG', 'CD', 'CE', 'NZ', 'OG']; for (let i = 0; i < count; i++) { const pdbId = `${Math.floor(1000 + random() * 8999)}`; const numResidues = 50 + Math.floor(random() * 450); // 50-500 residues const numChains = 1 + Math.floor(random() * 3); const numAtoms = numResidues * 8; // ~8 atoms per residue average // Generate atom records (sample) const atoms = Array.from({ length: Math.min(50, numAtoms) }, (_, atomIdx) => { const residueIdx = Math.floor(atomIdx / 8) + 1; return { serial: atomIdx + 1, atomName: atomTypes[atomIdx % atomTypes.length], altLoc: '', residueName: aminoAcids[Math.floor(random() * aminoAcids.length)], chainId: chains[Math.floor(random() * numChains)], residueSeq: residueIdx, iCode: '', coordinates: { x: Math.round((random() * 100 - 50) * 1000) / 1000, y: Math.round((random() * 100 - 50) * 1000) / 1000, z: Math.round((random() * 100 - 50) * 1000) / 1000 }, occupancy: Math.round((0.8 + random() * 0.2) * 100) / 100, tempFactor: Math.round((10 + random() * 40) * 100) / 100, // B-factor element: atomTypes[atomIdx % atomTypes.length][0], charge: '' }; }); // Generate secondary structure assignment const secondaryStructureMap = Array.from({ length: numResidues }, () => secondaryStructures[Math.floor(random() * secondaryStructures.length)] ); // Calculate secondary structure percentages const helixCount = secondaryStructureMap.filter(s => s === 'helix').length; const sheetCount = secondaryStructureMap.filter(s => s === 'sheet').length; const coilCount = secondaryStructureMap.filter(s => s === 'coil').length; results.push({ pdbId: pdbId, header: { classification: ['HYDROLASE', 'TRANSFERASE', 'OXIDOREDUCTASE', 'LYASE', 'ISOMERASE', 'LIGASE', 'MEMBRANE PROTEIN', 'SIGNALING PROTEIN'][Math.floor(random() * 8)], depositionDate: new Date(Date.now() - random() * 365 * 10 * 24 * 60 * 60 * 1000).toISOString().split('T')[0], title: `Crystal structure of ${aminoAcids[Math.floor(random() * aminoAcids.length)]} rich domain at ${Math.round((1.5 + random() * 1.5) * 10) / 10}A resolution`, organism: ['Homo sapiens', 'Escherichia coli', 'Saccharomyces cerevisiae', 'Mus musculus'][Math.floor(random() * 4)], expression: ['Escherichia coli', 'Insect cells', 'Mammalian cells', 'Yeast'][Math.floor(random() * 4)] }, structure: { numChains, numResidues, numAtoms, resolution: Math.round((1.5 + random() * 1.5) * 100) / 100, // Angstroms rValue: Math.round((0.15 + random() * 0.15) * 1000) / 1000, rFree: Math.round((0.18 + random() * 0.15) * 1000) / 1000, spaceGroup: ['P 21 21 21', 'P 1 21 1', 'C 2 2 21', 'P 43 21 2'][Math.floor(random() * 4)], unitCell: { a: Math.round((40 + random() * 60) * 100) / 100, b: Math.round((40 + random() * 60) * 100) / 100, c: Math.round((40 + random() * 60) * 100) / 100, alpha: 90, beta: 90 + Math.round(random() * 20), gamma: 90 } }, sequence: { chains: Array.from({ length: numChains }, (_, chainIdx) => ({ chainId: chains[chainIdx], length: Math.floor(numResidues / numChains), sequence: Array.from({ length: 30 }, () => aminoAcids[Math.floor(random() * aminoAcids.length)]).join('-') })) }, secondaryStructure: { helixPercent: Math.round((helixCount / numResidues) * 100), sheetPercent: Math.round((sheetCount / numResidues) * 100), coilPercent: Math.round((coilCount / numResidues) * 100), assignments: secondaryStructureMap.slice(0, 30) // Sample }, atoms: atoms, ligands: random() > 0.3 ? [{ hetId: ['ATP', 'NAD', 'FAD', 'HEM', 'MG', 'ZN', 'CA'][Math.floor(random() * 7)], chainId: chains[Math.floor(random() * numChains)], residueSeq: numResidues + 1, numAtoms: Math.floor(10 + random() * 40), bindingSite: { residues: Array.from({ length: 5 }, () => Math.floor(1 + random() * numResidues)), bindingEnergy: Math.round((-5 - random() * 10) * 100) / 100 // kcal/mol } }] : [], quality: { clashScore: Math.round(random() * 20 * 10) / 10, ramachandranFavored: Math.round((85 + random() * 12) * 10) / 10, ramachandranOutliers: Math.round(random() * 3 * 10) / 10, rotamerOutliers: Math.round(random() * 5 * 10) / 10, cbetaDeviations: Math.floor(random() * 5) }, scrapedAt: new Date().toISOString() }); } return results; } /** * Generate Power Grid electrical telemetry data * Simulates 3-phase power, voltage, current, and grid events */ async function generatePowerGridData(count, seed) { log.info('Generating Power Grid telemetry data...'); const random = createSeededRandom(seed); const results = []; const substations = ['North', 'South', 'East', 'West', 'Central', 'Industrial', 'Residential', 'Commercial']; const voltageClasses = [ { nominal: 765000, tolerance: 0.05, name: 'Extra High Voltage' }, { nominal: 345000, tolerance: 0.05, name: 'Extra High Voltage' }, { nominal: 138000, tolerance: 0.06, name: 'High Voltage' }, { nominal: 69000, tolerance: 0.06, name: 'High Voltage' }, { nominal: 13800, tolerance: 0.08, name: 'Medium Voltage' }, { nominal: 480, tolerance: 0.1, name: 'Low Voltage' } ]; const eventTypes = ['normal', 'fault', 'switching', 'load_change', 'voltage_sag', 'voltage_swell', 'harmonic_distortion']; for (let i = 0; i < count; i++) { const voltageClass = voltageClasses[Math.floor(random() * voltageClasses.length)]; const eventType = random() > 0.8 ? eventTypes[1 + Math.floor(random() * (eventTypes.length - 1))] : 'normal'; const baseVoltage = voltageClass.nominal; // 3-phase voltage with realistic variation const phaseA_V = Math.round((baseVoltage * (1 + (random() - 0.5) * voltageClass.tolerance)) * 100) / 100; const phaseB_V = Math.round((baseVoltage * (1 + (random() - 0.5) * voltageClass.tolerance)) * 100) / 100; const phaseC_V = Math.round((baseVoltage * (1 + (random() - 0.5) * voltageClass.tolerance)) * 100) / 100; // Current based on power and voltage const apparentPower = Math.floor(100000 + random() * 50000000); // VA const avgVoltage = (phaseA_V + phaseB_V + phaseC_V) / 3; const baseCurrent = apparentPower / (Math.sqrt(3) * avgVoltage); const phaseA_I = Math.round((baseCurrent * (0.9 + random() * 0.2)) * 100) / 100; const phaseB_I = Math.round((baseCurrent * (0.9 + random() * 0.2)) * 100) / 100; const phaseC_I = Math.round((baseCurrent * (0.9 + random() * 0.2)) * 100) / 100; // Power factor and power calculations const powerFactor = Math.round((0.85 + random() * 0.14) * 1000) / 1000; const activePower = Math.round(apparentPower * powerFactor); const reactivePower = Math.round(Math.sqrt(Math.pow(apparentPower, 2) - Math.pow(activePower, 2))); // Frequency (nominal 60 Hz in US, 50 Hz in Europe) const nominalFreq = random() > 0.5 ? 60 : 50; const frequency = Math.round((nominalFreq + (random() - 0.5) * 0.1) * 1000) / 1000; // Harmonics (Total Harmonic Distortion) const thd_v = Math.round((eventType === 'harmonic_distortion' ? 3 + random() * 5 : random() * 2) * 100) / 100; const thd_i = Math.round((eventType === 'harmonic_distortion' ? 5 + random() * 10 : random() * 3) * 100) / 100; results.push({ recordId: `PMU_${Date.now()}_${i}`, location: { substation: `${substations[Math.floor(random() * substations.length)]} Substation`, pmuId: `PMU${String(Math.floor(1 + random() * 999)).padStart(3, '0')}`, busNumber: Math.floor(1 + random() * 100), voltageClass: voltageClass.name, nominalVoltage: voltageClass.nominal, latitude: Math.round((30 + random() * 20) * 1000000) / 1000000, longitude: Math.round((-100 + random() * 30) * 1000000) / 1000000 }, timestamp: new Date(Date.now() - random() * 3600000).toISOString(), voltage: { phaseA: phaseA_V, phaseB: phaseB_V, phaseC: phaseC_V, neutral: Math.round(Math.abs(phaseA_V + phaseB_V + phaseC_V) / 10 * 100) / 100, lineToLine: { AB: Math.round(Math.sqrt(3) * ((phaseA_V + phaseB_V) / 2) * 100) / 100, BC: Math.round(Math.sqrt(3) * ((phaseB_V + phaseC_V) / 2) * 100) / 100, CA: Math.round(Math.sqrt(3) * ((phaseC_V + phaseA_V) / 2) * 100) / 100 }, unbalance: Math.round(random() * 2 * 100) / 100 // percent }, current: { phaseA: phaseA_I, phaseB: phaseB_I, phaseC: phaseC_I, neutral: Math.round(Math.sqrt(Math.pow(phaseA_I, 2) + Math.pow(phaseB_I, 2) + Math.pow(phaseC_I, 2)) * 100) / 100, unbalance: Math.round(random() * 3 * 100) / 100 }, power: { active: activePower, reactive: reactivePower, apparent: apparentPower, powerFactor: powerFactor, phaseAngle: Math.round((random() * 60 - 30) * 100) / 100 // degrees }, frequency: { value: frequency, rateOfChange: Math.round((random() - 0.5) * 0.1 * 1000) / 1000, // Hz/s deviation: Math.round((frequency - nominalFreq) * 1000) / 1000 }, harmonics: { THD_voltage: thd_v, THD_current: thd_i, dominantHarmonic: Math.floor(3 + random() * 12) * 2 + 1, // Odd harmonics individual: { H3: Math.round(random() * 2 * 100) / 100, H5: Math.round(random() * 3 * 100) / 100, H7: Math.round(random() * 2 * 100) / 100, H11: Math.round(random() * 1 * 100) / 100 } }, phasor: { voltage: { magnitude: Math.round(avgVoltage * 100) / 100, angle: Math.round(random() * 360 * 100) / 100 }, current: { magnitude: Math.round(baseCurrent * 100) / 100, angle: Math.round(random() * 360 * 100) / 100 } }, event: { type: eventType, severity: eventType === 'normal' ? 'none' : ['low', 'medium', 'high', 'critical'][Math.floor(random() * 4)], duration: eventType === 'normal' ? 0 : Math.round(random() * 5000), // ms faultLocation: eventType === 'fault' ? { distance: Math.round(random() * 50 * 100) / 100, // km impedance: Math.round((random() * 10) * 100) / 100 // ohms } : null, switchingOperation: eventType === 'switching' ? { breaker: `CB${Math.floor(1 + random() * 50)}`, status: random() > 0.5 ? 'opened' : 'closed' } : null }, quality: { timeError: Math.round(random() * 1000), // microseconds dataValidity: random() > 0.95 ? 'invalid' : 'valid', synchronizationSource: ['GPS', 'IRIG-B', 'NTP'][Math.floor(random() * 3)], uncertaintyEstimate: Math.round(random() * 0.5 * 1000) / 1000 }, scrapedAt: new Date().toISOString() }); } return results; } /** * Generate AIS (Automatic Identification System) maritime ship tracking data * Simulates vessel positions, navigation status, and maritime traffic */ async function generateAISData(count, seed) { log.info('Generating AIS maritime tracking data...'); const random = createSeededRandom(seed); const results = []; const vesselTypes = [ { code: 30, name: 'Fishing' }, { code: 60, name: 'Passenger' }, { code: 70, name: 'Cargo' }, { code: 80, name: 'Tanker' }, { code: 36, name: 'Sailing' }, { code: 37, name: 'Pleasure Craft' }, { code: 52, name: 'Tug' }, { code: 31, name: 'Towing' } ]; const navStatuses = [ 'Under way using engine', 'At anchor', 'Not under command', 'Restricted manoeuvrability', 'Constrained by draught', 'Moored', 'Aground', 'Engaged in fishing', 'Under way sailing' ]; const messageTypes = [1, 2, 3, 5, 18, 19, 21, 24, 27]; const destinations = ['NEW YORK', 'ROTTERDAM', 'SINGAPORE', 'HONG KONG', 'SHANGHAI', 'LOS ANGELES', 'HAMBURG', 'DUBAI', 'TOKYO', 'SOUTHAMPTON', 'PANAMA CANAL', 'SUEZ CANAL']; // Generate realistic shipping lanes const shippingLanes = [ { name: 'North Atlantic', lat: [40, 50], lon: [-60, -10] }, { name: 'Mediterranean', lat: [30, 45], lon: [0, 35] }, { name: 'Panama Approach', lat: [5, 15], lon: [-85, -75] }, { name: 'Malacca Strait', lat: [0, 6], lon: [98, 105] }, { name: 'English Channel', lat: [49, 51], lon: [-5, 2] } ]; for (let i = 0; i < count; i++) { const vesselType = vesselTypes[Math.floor(random() * vesselTypes.length)]; const messageType = messageTypes[Math.floor(random() * messageTypes.length)]; const lane = shippingLanes[Math.floor(random() * shippingLanes.length)]; // Position within shipping lane const latitude = Math.round((lane.lat[0] + random() * (lane.lat[1] - lane.lat[0])) * 1000000) / 1000000; const longitude = Math.round((lane.lon[0] + random() * (lane.lon[1] - lane.lon[0])) * 1000000) / 1000000; // Speed and course const speed = Math.round((random() * 25) * 10) / 10; // knots const course = Math.round(random() * 360 * 10) / 10; // degrees const heading = Math.round((course + (random() - 0.5) * 10) * 10) / 10; results.push({ recordId: `AIS_${Date.now()}_${i}`, vessel: { mmsi: String(200000000 + Math.floor(random() * 799999999)), // Valid MMSI range imo: messageType === 5 ? String(1000000 + Math.floor(random() * 8999999)) : null, // IMO number name: `${['OCEAN', 'PACIFIC', 'ATLANTIC', 'MARINE', 'SEA', 'WAVE'][Math.floor(random() * 6)]} ${['STAR', 'VOYAGER', 'PIONEER', 'SPIRIT', 'VENTURE'][Math.floor(random() * 5)]}`, callSign: `${String.fromCharCode(65 + Math.floor(random() * 26))}${String.fromCharCode(65 + Math.floor(random() * 26))}${Math.floor(1000 + random() * 8999)}`, type: vesselType.name, typeCode: vesselType.code, flag: ['USA', 'UK', 'PANAMA', 'LIBERIA', 'MARSHALL IS', 'SINGAPORE', 'MALTA'][Math.floor(random() * 7)] }, dimensions: { length: Math.floor(50 + random() * 350), // meters beam: Math.floor(10 + random() * 50), // meters draught: Math.round((2 + random() * 15) * 10) / 10, // meters toBow: Math.floor(20 + random() * 150), toStern: Math.floor(20 + random() * 150), toPort: Math.floor(5 + random() * 20), toStarboard: Math.floor(5 + random() * 20) }, position: { latitude, longitude, accuracy: random() > 0.9 ? 'low' : 'high', timestamp: new Date(Date.now() - random() * 300000).toISOString(), // Within last 5 min positioningDevice: ['GPS', 'DGPS', 'Loran-C'][Math.floor(random() * 3)] }, navigation: { status: navStatuses[Math.floor(random() * navStatuses.length)], speed: speed, course: course, heading: heading, rateOfTurn: Math.round((random() - 0.5) * 10 * 100) / 100, // degrees/min destination: messageType === 5 ? destinations[Math.floor(random() * destinations.length)] : null, eta: messageType === 5 ? new Date(Date.now() + (1 + random() * 10) * 24 * 60 * 60 * 1000).toISOString() : null }, message: { type: messageType, repeatIndicator: Math.floor(random() * 4), class: messageType <= 3 ? 'A' : 'B', channel: random() > 0.5 ? 'A' : 'B', timeSlot: Math.floor(random() * 2250) }, safety: { collisionRisk: speed > 0 ? (random() > 0.85 ? 'high' : random() > 0.6 ? 'medium' : 'low') : 'none', closestApproach: speed > 0 ? { distance: Math.round((0.1 + random() * 10) * 100) / 100, // nautical miles time: Math.round((5 + random() * 55)), // minutes vesselMMSI: String(200000000 + Math.floor(random() * 799999999)) } : null, inShippingLane: random() > 0.2, weatherConditions: { seaState: Math.floor(random() * 9), // Douglas scale 0-9 visibility: Math.round((1 + random() * 9) * 10) / 10, // nautical miles windSpeed: Math.round(random() * 40) // knots } }, routing: { shippingLane: lane.name, nextWaypoint: { latitude: Math.round((latitude + (random() - 0.5) * 2) * 1000000) / 1000000, longitude: Math.round((longitude + (random() - 0.5) * 2) * 1000000) / 1000000, distance: Math.round((10 + random() * 200) * 10) / 10, // nautical miles eta: new Date(Date.now() + random() * 86400000).toISOString() }, routeDeviation: Math.round(random() * 5 * 100) / 100, // nautical miles trafficDensity: ['low', 'medium', 'high', 'very high'][Math.floor(random() * 4)] }, scrapedAt: new Date().toISOString() }); } return results; } /** * Generate Radar data (weather and vehicle detection) * Simulates reflectivity, velocity, and Doppler measurements */ async function generateRadarData(count, seed) { log.info('Generating Radar detection data...'); const random = createSeededRandom(seed); const results = []; const radarTypes = ['weather', 'vehicle', 'marine', 'air_traffic']; const weatherTypes = ['clear', 'rain', 'snow', 'hail', 'storm', 'tornado']; const vehicleTypes = ['car', 'truck', 'motorcycle', 'bicycle', 'pedestrian']; const precipTypes = ['none', 'drizzle', 'rain', 'heavy_rain', 'snow', 'sleet', 'hail', 'mixed']; for (let i = 0; i < count; i++) { const radarType = radarTypes[Math.floor(random() * radarTypes.length)]; const isWeather = radarType === 'weather'; // Range gate parameters const range = Math.round((0.1 + random() * 50) * 100) / 100; // km const azimuth = Math.round(random() * 360 * 10) / 10; // degrees const elevation = Math.round((random() * 20 - 5) * 10) / 10; // degrees // Reflectivity (dBZ) - weather radar const reflectivity = isWeather ? Math.round((-20 + random() * 80) * 10) / 10 // -20 to 60 dBZ : Math.round((10 + random() * 30) * 10) / 10; // Vehicle radar // Doppler velocity const velocity = Math.round((random() * 60 - 30) * 10) / 10; // m/s // Weather-specific data const weatherData = isWeather ? { precipitationType: precipTypes[Math.floor(random() * precipTypes.length)], precipitationRate: Math.round(random() * 100 * 10) / 10, // mm/hr stormCell: reflectivity > 45 ? { id: `CELL${Math.floor(100 + random() * 899)}`, top: Math.round((5 + random() * 15) * 100) / 100, // km vil: Math.round(random() * 80), // kg/m² severity: reflectivity > 55 ? 'severe' : 'moderate', movement: { direction: Math.round(random() * 360), speed: Math.round((10 + random() * 40) * 10) / 10 // km/h } } : null, echoTop: Math.round((2 + random() * 18) * 100) / 100, // km verticalIntegratedLiquid: Math.round(random() * 50), // kg/m² hydrometeorClassification: ['biological', 'anomalous_prop', 'ice_crystals', 'dry_snow', 'wet_snow', 'light_rain', 'moderate_rain', 'heavy_rain', 'hail', 'big_drops'][Math.floor(random() * 10)] } : null; // Vehicle detection data const vehicleData = !isWeather ? { detections: Array.from({ length: Math.floor(1 + random() * 5) }, () => ({ type: vehicleTypes[Math.floor(random() * vehicleTypes.length)], range: Math.round((2 + random() * 200) * 10) / 10, // meters azimuth: Math.round(random() * 180 * 10) / 10, // degrees velocity: Math.round((random() * 50) * 10) / 10, // m/s rcs: Math.round((random() * 40 - 10) * 10) / 10, // dBsm (radar cross section) confidence: Math.round((0.5 + random() * 0.5) * 100) / 100, trackId: Math.floor(1000 + random() * 8999) })), trackingQuality: ['excellent', 'good', 'fair', 'poor'][Math.floor(random() * 4)], multipath: random() > 0.8, clutter: random() > 0.7 } : null; results.push({ recordId: `RADAR_${Date.now()}_${i}`, radar: { id: `RADAR${String(Math.floor(1 + random() * 999)).padStart(3, '0')}`, type: radarType, location: { latitude: Math.round((25 + random() * 25) * 1000000) / 1000000, longitude: Math.round((-125 + random() * 50) * 1000000) / 1000000, altitude: Math.round(random() * 2000), // meters name: `${['North', 'South', 'East', 'West', 'Central'][Math.floor(random() * 5)]} Site` }, specifications: { frequency: radarType === 'weather' ? '2.7-3.0 GHz (S-band)' : '76-81 GHz (W-band)', wavelength: radarType === 'weather' ? '10 cm' : '4 mm', beamWidth: Math.round((0.5 + random() * 2) * 10) / 10, // degrees pulseWidth: Math.round((0.5 + random() * 2) * 100) / 100, // microseconds prf: Math.round((300 + random() * 1700)), // Hz (pulse repetition frequency) maxRange: radarType === 'weather' ? 250 : 150, // km rangeResolution: Math.round((50 + random() * 200)), // meters mode: ['surveillance', 'tracking', 'doppler'][Math.floor(random() * 3)] } }, measurement: { timestamp: new Date(Date.now() - random() * 300000).toISOString(), scanNumber: Math.floor(1 + random() * 1000), elevationAngle: elevation, azimuthAngle: azimuth, range: range, gateSpacing: Math.round((100 + random() * 150)), // meters reflectivity: reflectivity, velocity: velocity, spectrumWidth: Math.round((1 + random() * 10) * 10) / 10, // m/s correlation: Math.round((0.7 + random() * 0.3) * 1000) / 1000, snr: Math.round((5 + random() * 35) * 10) / 10, // dB zdr: isWeather ? Math.round((random() * 6 - 1) * 10) / 10 : null, // Differential reflectivity (dB) kdp: isWeather ? Math.round((random() * 5) * 100) / 100 : null, // Specific differential phase (deg/km) rhohv: isWeather ? Math.round((0.7 + random() * 0.3) * 1000) / 1000 : null // Correlation coefficient }, weather: weatherData, vehicle: vehicleData, doppler: { velocitySpectrum: Array.from({ length: 16 }, () => Math.round(random() * 100)), nyquistVelocity: Math.round((10 + random() * 20) * 10) / 10, // m/s aliasing: Math.abs(velocity) > 25, spectralWidth: Math.round((1 + random() * 8) * 10) / 10 }, quality: { clutter: random() > 0.7 ? 'high' : random() > 0.4 ? 'medium' : 'low', groundClutterSuppression: random() > 0.5, anomalousPropagation: random() > 0.9, blockage: random() > 0.85, calibrationStatus: random() > 0.95 ? 'needs_cal' : 'ok', dataQualityIndex: Math.round((0.6 + random() * 0.4) * 100) / 100 }, scrapedAt: new Date().toISOString() }); } return results; } // ============================================ // PRIORITY 2: EXOTIC DATA GENERATORS // ============================================ async function generateSCADAData(count, seed) { log.info('Generating SCADA/Industrial control data...'); const random = createSeededRandom(seed); const results = []; const equipment = { pump: { type: 'PUMP', maxPressure: 150, maxFlow: 500, units: { pressure: 'PSI', flow: 'GPM' } }, valve: { type: 'VALVE', positions: ['OPEN', 'CLOSED', 'THROTTLING'], units: { position: '%' } }, motor: { type: 'MOTOR', maxSpeed: 1800, maxCurrent: 50, units: { speed: 'RPM', current: 'A' } }, tank: { type: 'TANK', maxLevel: 100, maxVolume: 10000, units: { level: '%', volume: 'GAL' } }, heater: { type: 'HEATER', maxTemp: 300, maxPower: 100, units: { temp: 'F', power: 'kW' } } }; const equipmentTypes = Object.keys(equipment); const alarmTypes = ['HIGH_LIMIT', 'LOW_LIMIT', 'RATE_OF_CHANGE', 'DEVIATION', 'COMM_FAILURE']; const qualityCodes = ['GOOD', 'BAD', 'UNCERTAIN', 'FORCED']; for (let i = 0; i < count; i++) { const eqType = equipmentTypes[Math.floor(random() * equipmentTypes.length)]; const eqConfig = equipment[eqType]; const timestamp = new Date(Date.now() - random() * 24 * 60 * 60 * 1000); const processVars = {}; if (eqType === 'pump') { processVars.pressure = Math.round((random() * eqConfig.maxPressure) * 10) / 10; processVars.flow = Math.round((random() * eqConfig.maxFlow) * 10) / 10; processVars.vibration = Math.round((random() * 10) * 100) / 100; } else if (eqType === 'valve') { processVars.position = Math.round((random() * 100) * 10) / 10; processVars.command = Math.round((random() * 100) * 10) / 10; processVars.feedback = processVars.command + (random() - 0.5) * 2; } else if (eqType === 'motor') { processVars.speed = Math.round((random() * eqConfig.maxSpeed) * 10) / 10; processVars.current = Math.round((random() * eqConfig.maxCurrent) * 10) / 10; processVars.torque = Math.round((random() * 100) * 10) / 10; } else if (eqType === 'tank') { processVars.level = Math.round((random() * eqConfig.maxLevel) * 10) / 10; processVars.volume = Math.round((processVars.level / 100 * eqConfig.maxVolume) * 10) / 10; processVars.temperature = Math.round((60 + random() * 100) * 10) / 10; } else if (eqType === 'heater') { processVars.temperature = Math.round((60 + random() * eqConfig.maxTemp) * 10) / 10; processVars.setpoint = Math.round((100 + random() * 200) * 10) / 10; processVars.power = Math.round((random() * eqConfig.maxPower) * 10) / 10; } const activeAlarms = []; if (random() > 0.85) { const alarmType = alarmTypes[Math.floor(random() * alarmTypes.length)]; activeAlarms.push({ type: alarmType, priority: Math.floor(1 + random() * 4), message: eqType.toUpperCase() + '_' + (i + 1) + ': ' + alarmType, acknowledgedAt: random() > 0.5 ? new Date(timestamp.getTime() + random() * 60000).toISOString() : null }); } results.push({ tagId: eqType.toUpperCase() + '_' + String(i + 1).padStart(4, '0'), equipmentType: eqConfig.type, location: 'AREA_' + (Math.floor(random() * 5) + 1), timestamp: timestamp.toISOString(), processVariables: processVars, plcRegisters: { holding: Array.from({ length: 8 }, () => Math.floor(random() * 65536)), input: Array.from({ length: 4 }, () => Math.floor(random() * 65536)), coil: Array.from({ length: 4 }, () => random() > 0.5) }, controlOutputs: { analogOut: Math.round((random() * 100) * 10) / 10, digitalOut: random() > 0.5, mode: ['AUTO', 'MANUAL', 'CASCADE'][Math.floor(random() * 3)] }, setpoints: Object.keys(processVars).reduce((acc, key) => { if (typeof processVars[key] === 'number') { acc[key] = Math.round((processVars[key] * (0.9 + random() * 0.2)) * 10) / 10; } return acc; }, {}), alarms: activeAlarms, qualityCode: qualityCodes[Math.floor(random() * qualityCodes.length)], opcua: { nodeId: 'ns=2;s=' + eqType.toUpperCase() + '.' + (i + 1), browseName: eqType.toUpperCase() + '_' + (i + 1), statusCode: random() > 0.95 ? 'Bad' : 'Good' }, scrapedAt: new Date().toISOString() }); } return results; } async function generateLiDARData(count, seed) { log.info('Generating LiDAR point cloud data...'); const random = createSeededRandom(seed); const results = []; const scanPatterns = ['ROTATING_360', 'SOLID_STATE', 'FLASH', 'MEMS_MIRROR']; const classifications = [ { code: 0, name: 'NEVER_CLASSIFIED' }, { code: 1, name: 'UNCLASSIFIED' }, { code: 2, name: 'GROUND' }, { code: 3, name: 'LOW_VEGETATION' }, { code: 4, name: 'MEDIUM_VEGETATION' }, { code: 5, name: 'HIGH_VEGETATION' }, { code: 6, name: 'BUILDING' }, { code: 7, name: 'LOW_POINT' }, { code: 9, name: 'WATER' }, { code: 13, name: 'VEHICLE' }, { code: 14, name: 'PEDESTRIAN' } ]; const objectTypes = ['VEHICLE', 'PEDESTRIAN', 'CYCLIST', 'OBSTACLE', 'TRAFFIC_SIGN']; for (let i = 0; i < count; i++) { const timestamp = new Date(Date.now() - random() * 3600 * 1000); const numPoints = Math.floor(10000 + random() * 90000); const scanPattern = scanPatterns[Math.floor(random() * scanPatterns.length)]; const points = Array.from({ length: Math.min(numPoints, 1000) }, (_, idx) => { const angle = (idx / 1000) * 2 * Math.PI; const distance = 2 + random() * 100; const classification = classifications[Math.floor(random() * classifications.length)]; return { x: Math.round((distance * Math.cos(angle)) * 1000) / 1000, y: Math.round((distance * Math.sin(angle)) * 1000) / 1000, z: Math.round(((random() - 0.5) * 10) * 1000) / 1000, intensity: Math.floor(random() * 255), returnNumber: Math.floor(1 + random() * 4), numberOfReturns: Math.floor(1 + random() * 5), classification: classification.code, classificationName: classification.name, scanAngle: Math.round((random() - 0.5) * 60 * 10) / 10, rgb: random() > 0.5 ? { r: Math.floor(random() * 255), g: Math.floor(random() * 255), b: Math.floor(random() * 255) } : null }; }); const detections = Array.from({ length: Math.floor(random() * 10) }, () => { const objType = objectTypes[Math.floor(random() * objectTypes.length)]; const centerX = (random() - 0.5) * 100; const centerY = (random() - 0.5) * 100; const centerZ = random() * 2; return { objectType: objType, confidence: Math.round((0.5 + random() * 0.5) * 1000) / 1000, boundingBox: { center: { x: centerX, y: centerY, z: centerZ }, dimensions: { length: Math.round((2 + random() * 8) * 100) / 100, width: Math.round((1.5 + random() * 3) * 100) / 100, height: Math.round((1 + random() * 3) * 100) / 100 }, rotation: Math.round((random() * 360) * 10) / 10 }, velocity: objType !== 'TRAFFIC_SIGN' && objType !== 'OBSTACLE' ? { x: Math.round(((random() - 0.5) * 30) * 100) / 100, y: Math.round(((random() - 0.5) * 30) * 100) / 100, z: Math.round(((random() - 0.5) * 2) * 100) / 100 } : null, trackId: 'TRK_' + Math.floor(random() * 1000) }; }); results.push({ scanId: 'SCAN_' + timestamp.getTime() + '_' + i, timestamp: timestamp.toISOString(), sensorId: 'LIDAR_' + (Math.floor(random() * 10) + 1), scanPattern, pointCloud: { numPoints, samplePoints: points.slice(0, 100), format: 'LAS_1.4', coordinateSystem: 'WGS84_UTM', bounds: { minX: Math.min(...points.map(p => p.x)), maxX: Math.max(...points.map(p => p.x)), minY: Math.min(...points.map(p => p.y)), maxY: Math.max(...points.map(p => p.y)), minZ: Math.min(...points.map(p => p.z)), maxZ: Math.max(...points.map(p => p.z)) } }, detections, metadata: { horizontalFov: Math.round((scanPattern === 'ROTATING_360' ? 360 : 120) * 10) / 10, verticalFov: Math.round((30 + random() * 40) * 10) / 10, range: Math.round((50 + random() * 200) * 10) / 10, accuracy: Math.round((0.01 + random() * 0.05) * 1000) / 1000, scanRate: Math.round((5 + random() * 15) * 10) / 10 }, scrapedAt: new Date().toISOString() }); } return results; } async function generateCANBusData(count, seed) { log.info('Generating CAN bus vehicle data...'); const random = createSeededRandom(seed); const results = []; const ecuTypes = { engine: { arbitrationId: 0x0C0, signals: ['rpm', 'throttle', 'coolant_temp', 'oil_pressure'] }, transmission: { arbitrationId: 0x0D0, signals: ['gear', 'clutch', 'shift_position'] }, chassis: { arbitrationId: 0x1A0, signals: ['speed', 'brake_pressure', 'steering_angle', 'abs_active'] }, body: { arbitrationId: 0x2C0, signals: ['door_driver', 'door_passenger', 'lights', 'windows'] }, battery: { arbitrationId: 0x3E0, signals: ['voltage', 'current', 'soc', 'temperature'] } }; const ecuNames = Object.keys(ecuTypes); for (let i = 0; i < count; i++) { const timestamp = new Date(Date.now() - random() * 3600 * 1000); const ecuName = ecuNames[Math.floor(random() * ecuNames.length)]; const ecu = ecuTypes[ecuName]; const signals = {}; if (ecuName === 'engine') { signals.rpm = Math.floor(800 + random() * 6000); signals.throttle = Math.round((random() * 100) * 10) / 10; signals.coolant_temp = Math.round((70 + random() * 50) * 10) / 10; signals.oil_pressure = Math.round((20 + random() * 80) * 10) / 10; signals.intake_temp = Math.round((20 + random() * 80) * 10) / 10; signals.maf = Math.round((10 + random() * 200) * 10) / 10; } else if (ecuName === 'transmission') { signals.gear = Math.floor(random() * 6) + 1; signals.clutch = Math.round((random() * 100) * 10) / 10; signals.shift_position = ['P', 'R', 'N', 'D', 'S'][Math.floor(random() * 5)]; signals.torque_converter = Math.round((random() * 100) * 10) / 10; } else if (ecuName === 'chassis') { signals.speed = Math.round((random() * 120) * 10) / 10; signals.brake_pressure = Math.round((random() * 2000) * 10) / 10; signals.steering_angle = Math.round(((random() - 0.5) * 900) * 10) / 10; signals.abs_active = random() > 0.9; signals.traction_control = random() > 0.85; signals.wheel_speed_fl = Math.round((signals.speed * (0.95 + random() * 0.1)) * 10) / 10; signals.wheel_speed_fr = Math.round((signals.speed * (0.95 + random() * 0.1)) * 10) / 10; signals.wheel_speed_rl = Math.round((signals.speed * (0.95 + random() * 0.1)) * 10) / 10; signals.wheel_speed_rr = Math.round((signals.speed * (0.95 + random() * 0.1)) * 10) / 10; } else if (ecuName === 'body') { signals.door_driver = random() > 0.9; signals.door_passenger = random() > 0.9; signals.door_rear_left = random() > 0.95; signals.door_rear_right = random() > 0.95; signals.trunk = random() > 0.98; signals.lights = ['OFF', 'PARKING', 'LOW_BEAM', 'HIGH_BEAM'][Math.floor(random() * 4)]; signals.windows = { driver: Math.floor(random() * 100), passenger: Math.floor(random() * 100), rear_left: Math.floor(random() * 100), rear_right: Math.floor(random() * 100) }; } else if (ecuName === 'battery') { signals.voltage = Math.round((12 + random() * 3) * 100) / 100; signals.current = Math.round(((random() - 0.5) * 200) * 10) / 10; signals.soc = Math.round((20 + random() * 80) * 10) / 10; signals.temperature = Math.round((15 + random() * 40) * 10) / 10; } const dataBytes = Array.from({ length: 8 }, () => Math.floor(random() * 256)); results.push({ messageId: 'CAN_' + timestamp.getTime() + '_' + i, timestamp: timestamp.toISOString(), arbitrationId: '0x' + ecu.arbitrationId.toString(16).toUpperCase().padStart(3, '0'), ecuName: ecuName.toUpperCase(), dlc: 8, data: dataBytes.map(b => '0x' + b.toString(16).toUpperCase().padStart(2, '0')).join(' '), signals, dbcDecoded: { messageName: ecuName.toUpperCase() + '_STATUS', cycletime: Math.floor(10 + random() * 90), signalCount: Object.keys(signals).length }, busLoad: Math.round((random() * 100) * 10) / 10, errorFrames: Math.floor(random() * 5), scrapedAt: new Date().toISOString() }); } return results; } async function generateGenomicVCFData(count, seed) { log.info('Generating genomic VCF variant data...'); const random = createSeededRandom(seed); const results = []; const chromosomes = ['1', '2', '3', '4', '5', '6', '7', '8', '9', '10', '11', '12', '13', '14', '15', '16', '17', '18', '19', '20', '21', '22', 'X', 'Y', 'MT']; const bases = ['A', 'C', 'G', 'T']; const consequences = ['MISSENSE', 'SYNONYMOUS', 'NONSENSE', 'FRAMESHIFT', 'SPLICE_SITE', 'INTRONIC', 'UTR_5', 'UTR_3', 'INTERGENIC']; const impacts = ['HIGH', 'MODERATE', 'LOW', 'MODIFIER']; const filters = ['PASS', 'LOW_QUAL', 'STRAND_BIAS', 'LOW_DEPTH']; const genotypes = ['0/0', '0/1', '1/1', '0/2', '1/2']; for (let i = 0; i < count; i++) { const chrom = chromosomes[Math.floor(random() * chromosomes.length)]; const pos = Math.floor(1000000 + random() * 200000000); const ref = bases[Math.floor(random() * bases.length)]; const alt = bases.filter(b => b !== ref)[Math.floor(random() * 3)]; const qual = Math.round((random() * 1000) * 10) / 10; const filter = qual > 30 ? 'PASS' : filters[Math.floor(random() * filters.length)]; const genotype = genotypes[Math.floor(random() * genotypes.length)]; const geneNames = ['BRCA1', 'TP53', 'EGFR', 'KRAS', 'PTEN', 'MYC', 'NOTCH1', 'APC', 'RB1', 'VHL', 'CDKN2A', 'PIK3CA']; const gene = geneNames[Math.floor(random() * geneNames.length)]; const consequence = consequences[Math.floor(random() * consequences.length)]; const impact = impacts[Math.floor(random() * impacts.length)]; results.push({ variantId: 'VAR_' + chrom + '_' + pos + '_' + i, vcfRecord: { chrom, pos, id: random() > 0.7 ? ('rs' + Math.floor(1000000 + random() * 99000000)) : '.', ref, alt, qual, filter, info: { DP: Math.floor(10 + random() * 200), AF: Math.round((random()) * 1000) / 1000, AC: Math.floor(1 + random() * 10), AN: Math.floor(10 + random() * 100), BaseQRankSum: Math.round(((random() - 0.5) * 10) * 100) / 100, MQ: Math.round((40 + random() * 20) * 10) / 10, MQRankSum: Math.round(((random() - 0.5) * 5) * 100) / 100, ReadPosRankSum: Math.round(((random() - 0.5) * 5) * 100) / 100 }, format: ['GT', 'DP', 'GQ', 'AD'], samples: [{ GT: genotype, DP: Math.floor(10 + random() * 100), GQ: Math.floor(random() * 99), AD: genotype === '0/1' ? (Math.floor(random() * 50) + ',' + Math.floor(random() * 50)) : genotype === '1/1' ? ('0,' + Math.floor(random() * 100)) : (Math.floor(random() * 100) + ',0') }] }, annotation: { gene, transcript: gene + '-001', consequence, impact, proteinChange: consequence === 'MISSENSE' ? ('p.' + ['Ala', 'Arg', 'Asn', 'Asp', 'Cys', 'Gln', 'Glu'][Math.floor(random() * 7)] + Math.floor(1 + random() * 500) + ['Val', 'Leu', 'Ile', 'Met'][Math.floor(random() * 4)]) : null, cdnaChange: 'c.' + Math.floor(1 + random() * 3000) + ref + '>' + alt, exon: consequence !== 'INTRONIC' ? (Math.floor(1 + random() * 20) + '/20') : null }, populationFrequencies: { gnomAD_AF: Math.round((random() * 0.1) * 100000) / 100000, gnomAD_AF_afr: Math.round((random() * 0.1) * 100000) / 100000, gnomAD_AF_eas: Math.round((random() * 0.1) * 100000) / 100000, gnomAD_AF_nfe: Math.round((random() * 0.1) * 100000) / 100000, ExAC_AF: Math.round((random() * 0.1) * 100000) / 100000, '1000g_AF': Math.round((random() * 0.1) * 100000) / 100000 }, predictions: { SIFT: random() > 0.5 ? 'TOLERATED' : 'DELETERIOUS', SIFT_score: Math.round((random()) * 1000) / 1000, PolyPhen: random() > 0.5 ? 'BENIGN' : 'PROBABLY_DAMAGING', PolyPhen_score: Math.round((random()) * 1000) / 1000, CADD_phred: Math.round((random() * 40) * 10) / 10, GERP_RS: Math.round(((random() - 0.5) * 12) * 100) / 100 }, clinicalSignificance: { clinvar: ['BENIGN', 'LIKELY_BENIGN', 'UNCERTAIN', 'LIKELY_PATHOGENIC', 'PATHOGENIC'][Math.floor(random() * 5)], reviewStatus: ['NO_ASSERTION', 'SINGLE_SUBMITTER', 'MULTIPLE_SUBMITTERS', 'EXPERT_PANEL'][Math.floor(random() * 4)], conditions: random() > 0.7 ? ['Hereditary cancer syndrome', 'Familial adenomatous polyposis'][Math.floor(random() * 2)] : [] }, scrapedAt: new Date().toISOString() }); } return results; } async function generateSatelliteData(count, seed) { log.info('Generating satellite multi-spectral imagery data...'); const random = createSeededRandom(seed); const results = []; const satellites = ['Landsat-8', 'Landsat-9', 'Sentinel-2A', 'Sentinel-2B', 'MODIS', 'WorldView-3', 'Planet']; const bands = { 'Landsat-8': ['Coastal', 'Blue', 'Green', 'Red', 'NIR', 'SWIR1', 'SWIR2', 'Cirrus', 'TIR1', 'TIR2'], 'Sentinel-2A': ['Coastal', 'Blue', 'Green', 'Red', 'RedEdge1', 'RedEdge2', 'RedEdge3', 'NIR', 'SWIR1', 'SWIR2'], 'MODIS': ['Red', 'NIR', 'Blue', 'Green', 'SWIR', 'TIR'], 'WorldView-3': ['Coastal', 'Blue', 'Green', 'Yellow', 'Red', 'RedEdge', 'NIR1', 'NIR2'], 'Planet': ['Blue', 'Green', 'Red', 'NIR'] }; const processingLevels = ['L1C', 'L1T', 'L2A', 'L2SP']; for (let i = 0; i < count; i++) { const satellite = satellites[Math.floor(random() * satellites.length)]; const satelliteBands = bands[satellite] || bands['Landsat-8']; const timestamp = new Date(Date.now() - random() * 365 * 24 * 60 * 60 * 1000); const lat = (random() - 0.5) * 180; const lon = (random() - 0.5) * 360; const cloudCover = Math.round((random() * 100) * 10) / 10; const pixelValues = {}; satelliteBands.forEach(band => { let maxValue = 65535; if (band.includes('TIR')) { maxValue = 40000; } pixelValues[band] = Math.floor(random() * maxValue); }); const red = pixelValues['Red'] || 0; const nir = pixelValues['NIR'] || pixelValues['NIR1'] || 0; const ndvi = nir + red !== 0 ? Math.round(((nir - red) / (nir + red)) * 1000) / 1000 : 0; const evi = nir + red !== 0 ? Math.round((2.5 * (nir - red) / (nir + 6 * red - 7.5 * (pixelValues['Blue'] || 0) + 1)) * 1000) / 1000 : 0; results.push({ sceneId: satellite.replace('-', '') + '_' + timestamp.getTime() + '_' + i, satellite, sensor: satellite.includes('Landsat') ? 'OLI/TIRS' : satellite.includes('Sentinel') ? 'MSI' : 'Unknown', timestamp: timestamp.toISOString(), acquisitionDate: timestamp.toISOString().split('T')[0], processingLevel: processingLevels[Math.floor(random() * processingLevels.length)], location: { centerLat: Math.round(lat * 100000) / 100000, centerLon: Math.round(lon * 100000) / 100000, path: Math.floor(1 + random() * 233), row: Math.floor(1 + random() * 248), wrs: Math.floor(1 + random() * 233) + '/' + Math.floor(1 + random() * 248) }, geometry: { type: 'Polygon', coordinates: [[ [lon, lat], [lon + 0.1, lat], [lon + 0.1, lat + 0.1], [lon, lat + 0.1], [lon, lat] ]] }, bands: satelliteBands.map(bandName => ({ name: bandName, wavelength: bandName === 'Blue' ? '0.45-0.51' : bandName === 'Green' ? '0.53-0.59' : bandName === 'Red' ? '0.64-0.67' : bandName === 'NIR' || bandName === 'NIR1' ? '0.85-0.88' : bandName === 'SWIR1' ? '1.57-1.65' : bandName === 'SWIR2' ? '2.11-2.29' : bandName.includes('TIR') ? '10.6-12.5' : '0.43-0.45', resolution: satellite.includes('Landsat') ? 30 : satellite.includes('Sentinel') ? 10 : 250, pixelValue: pixelValues[bandName], units: bandName.includes('TIR') ? 'Kelvin' : 'DN' })), cloudCover: { percentage: cloudCover, level: cloudCover < 10 ? 'CLEAR' : cloudCover < 30 ? 'PARTLY_CLOUDY' : cloudCover < 70 ? 'MOSTLY_CLOUDY' : 'OVERCAST', cloudMask: Array.from({ length: 100 }, () => random() < cloudCover / 100) }, indices: { NDVI: ndvi, EVI: evi, NDWI: pixelValues['Green'] && pixelValues['NIR'] ? Math.round(((pixelValues['Green'] - pixelValues['NIR']) / (pixelValues['Green'] + pixelValues['NIR'])) * 1000) / 1000 : 0, SAVI: nir + red !== 0 ? Math.round((1.5 * (nir - red) / (nir + red + 0.5)) * 1000) / 1000 : 0 }, metadata: { sunElevation: Math.round((30 + random() * 60) * 100) / 100, sunAzimuth: Math.round((random() * 360) * 100) / 100, viewAngle: Math.round((random() * 30) * 100) / 100, resolution: satellite.includes('WorldView') ? 1.24 : satellite.includes('Planet') ? 3 : satellite.includes('Sentinel') ? 10 : 30, format: 'GeoTIFF', projection: 'EPSG:4326', tileId: 'T' + Math.floor(10 + random() * 50) + ['A', 'B', 'C', 'D'][Math.floor(random() * 4)] }, qualityAssessment: { overallQuality: ['EXCELLENT', 'GOOD', 'FAIR', 'POOR'][Math.floor(random() * 4)], radiometricQuality: Math.round((random() * 10) * 10) / 10, geometricQuality: Math.round((random() * 10) * 10) / 10, artifacts: random() > 0.8, stripingDetected: random() > 0.95 }, scrapedAt: new Date().toISOString() }); } return results; }