3897 lines
167 KiB
Plaintext
3897 lines
167 KiB
Plaintext
import { Actor, log } from 'apify';
|
|
import { GoogleGenerativeAI } from '@google/generative-ai';
|
|
import { createRequire } from 'module';
|
|
import { integrateActorData, SUPPORTED_ACTORS, USE_CASE_TEMPLATES, getTemplate, listSupportedActors, listTemplates } from './integrations.js';
|
|
import { addEmbeddingsToRecords, generateRandomEmbedding, EMBEDDING_MODELS } from './embeddings.js';
|
|
import { MemorySession, saveToMemorySession, loadFromMemorySession } from '../../../shared/memory-persistence.js';
|
|
|
|
// CJS import workaround for RuvLLM native extension
|
|
const require = createRequire(import.meta.url);
|
|
let ruvllm = null;
|
|
let sonaCoordinator = null;
|
|
let trajectoryBuilder = null;
|
|
|
|
// Safe Actor.charge helper - gracefully handles cases where monetization isn't set up
|
|
async function safeCharge(eventName, count = 1) {
|
|
try {
|
|
await Actor.charge({ eventName, count });
|
|
} catch (e) {
|
|
// Silently ignore charging errors - monetization may not be configured
|
|
log.debug?.(`Charge skipped for ${eventName}: ${e.message}`);
|
|
}
|
|
}
|
|
|
|
try {
|
|
ruvllm = require('@ruvector/ruvllm');
|
|
log.info('RuvLLM loaded successfully - TRM/SONA self-learning enabled');
|
|
} catch (e) {
|
|
log.warning(`RuvLLM not available: ${e.message}. Using standard generation.`);
|
|
}
|
|
|
|
// Initialize Actor
|
|
await Actor.init();
|
|
|
|
try {
|
|
// Get input
|
|
const input = await Actor.getInput() || {};
|
|
|
|
const {
|
|
// Mode selection
|
|
mode = 'generate',
|
|
// Integration parameters
|
|
integrateActorId,
|
|
integrateRunId = 'latest',
|
|
integrateDatasetId,
|
|
memorizeFields = [],
|
|
useTemplate,
|
|
// Output options
|
|
webhookUrl,
|
|
generateEmbeddings = false,
|
|
// Core parameters
|
|
dataType = 'ecommerce',
|
|
count = 100,
|
|
schema = {},
|
|
timeSeriesConfig = {},
|
|
eventTypes = ['page_view', 'click', 'scroll', 'form_submit'],
|
|
embeddingDimensions = 384,
|
|
provider = 'openrouter',
|
|
apiKey,
|
|
openrouterApiKey,
|
|
geminiApiKey,
|
|
anthropicApiKey,
|
|
model = 'deepseek/deepseek-chat',
|
|
outputFormat = 'json',
|
|
seed,
|
|
quality = 0.8,
|
|
// Web scraping specific options
|
|
websiteType = 'ecommerce',
|
|
apiEndpoint = '/api/products',
|
|
simulationMode = false,
|
|
batchSize = 100,
|
|
delayBetweenBatches = 0,
|
|
// SONA/TRM parameters
|
|
sonaEnabled = true,
|
|
ewcLambda = 2000,
|
|
patternThreshold = 0.7,
|
|
sonaLearningTiers = ['instant', 'background'],
|
|
// ONNX Embedding parameters
|
|
useOnnxEmbeddings = true,
|
|
embeddingModel = 'all-MiniLM-L6-v2',
|
|
// Crunchbase/Grounding parameters
|
|
crunchbaseCompanies = [],
|
|
crunchbaseIndustry = null,
|
|
// Memory Session parameters
|
|
memorySessionId = null,
|
|
memorySessionEnabled = false,
|
|
appendToSession = true
|
|
} = input;
|
|
|
|
log.info('AI Synthetic Data Generator v2.5 with ONNX Embeddings & TRM/SONA', { mode, dataType, count, provider, model, sonaEnabled, useOnnxEmbeddings, embeddingModel });
|
|
|
|
// Initialize SONA if available and enabled
|
|
if (ruvllm && sonaEnabled) {
|
|
try {
|
|
if (ruvllm.SonaCoordinator) {
|
|
sonaCoordinator = new ruvllm.SonaCoordinator({
|
|
tiers: sonaLearningTiers,
|
|
ewcLambda,
|
|
patternThreshold
|
|
});
|
|
log.info('SONA Coordinator initialized', { tiers: sonaLearningTiers, ewcLambda });
|
|
}
|
|
if (ruvllm.TrajectoryBuilder) {
|
|
trajectoryBuilder = new ruvllm.TrajectoryBuilder({
|
|
maxSteps: 100
|
|
});
|
|
log.info('Trajectory Builder initialized');
|
|
}
|
|
// Charge for SONA learning session
|
|
await safeCharge('sona-learning-session', 1);
|
|
} catch (e) {
|
|
log.warning(`SONA initialization failed: ${e.message}`);
|
|
}
|
|
}
|
|
|
|
// Check for API key based on provider - support both new separate fields and legacy apiKey
|
|
// Gemini key also needed for Crunchbase grounding regardless of provider
|
|
const geminiKey = (provider === 'gemini' || dataType === 'crunchbase') ? (geminiApiKey || apiKey || process.env.GEMINI_API_KEY) : null;
|
|
const openRouterKey = provider === 'openrouter' ? (openrouterApiKey || apiKey || process.env.OPENROUTER_API_KEY) : null;
|
|
const anthropicKey = provider === 'anthropic' ? (anthropicApiKey || apiKey || process.env.ANTHROPIC_API_KEY) : null;
|
|
|
|
if (provider === 'gemini' && !geminiKey) {
|
|
log.warning('No Gemini API key provided. Using algorithmic generation (still produces great data!)');
|
|
}
|
|
if (provider === 'openrouter' && !openRouterKey) {
|
|
log.warning('No OpenRouter API key provided. Using algorithmic generation.');
|
|
}
|
|
if (provider === 'anthropic' && !anthropicKey) {
|
|
log.warning('No Anthropic API key provided. Using algorithmic generation.');
|
|
}
|
|
|
|
let generatedData = [];
|
|
const startTime = Date.now();
|
|
|
|
// ============================================
|
|
// MODE HANDLING: generate, integrate, template
|
|
// ============================================
|
|
|
|
if (mode === 'integrate' || mode === 'template') {
|
|
// Integration mode - transform data from other Apify actors
|
|
log.info(`Running in ${mode} mode`, { integrateActorId, useTemplate });
|
|
|
|
// Get template config if using template mode
|
|
let templateConfig = null;
|
|
let effectiveActorId = integrateActorId;
|
|
let effectiveMemorizeFields = memorizeFields;
|
|
|
|
if (mode === 'template' && useTemplate) {
|
|
templateConfig = getTemplate(useTemplate);
|
|
log.info(`Using template: ${templateConfig.name}`, { suggestedActors: templateConfig.suggestedActors });
|
|
|
|
// Use template defaults if not overridden
|
|
if (!effectiveActorId && templateConfig.suggestedActors.length > 0) {
|
|
effectiveActorId = templateConfig.suggestedActors[0];
|
|
log.info(`Using template's suggested actor: ${effectiveActorId}`);
|
|
}
|
|
if (effectiveMemorizeFields.length === 0) {
|
|
effectiveMemorizeFields = templateConfig.memorizeFields || [];
|
|
}
|
|
|
|
// Charge for template execution
|
|
await safeCharge('template-execution', 1);
|
|
}
|
|
|
|
// Fetch data from the actor's dataset
|
|
let sourceData = [];
|
|
|
|
if (integrateDatasetId) {
|
|
// Direct dataset access
|
|
log.info(`Fetching from dataset: ${integrateDatasetId}`);
|
|
const dataset = await Actor.openDataset(integrateDatasetId, { forceCloud: true });
|
|
const { items } = await dataset.getData({ limit: count });
|
|
sourceData = items;
|
|
} else if (effectiveActorId) {
|
|
// Fetch from actor run
|
|
log.info(`Fetching from actor: ${effectiveActorId}, run: ${integrateRunId}`);
|
|
|
|
try {
|
|
// Use Apify client to fetch last run's dataset
|
|
const client = Actor.newClient();
|
|
|
|
let runInfo;
|
|
if (integrateRunId === 'latest') {
|
|
const runs = await client.actor(effectiveActorId).runs().list({ limit: 1 });
|
|
if (runs.items.length === 0) {
|
|
throw new Error(`No runs found for actor ${effectiveActorId}`);
|
|
}
|
|
runInfo = runs.items[0];
|
|
} else {
|
|
runInfo = await client.run(integrateRunId).get();
|
|
}
|
|
|
|
if (runInfo && runInfo.defaultDatasetId) {
|
|
const dataset = await client.dataset(runInfo.defaultDatasetId).listItems({ limit: count });
|
|
sourceData = dataset.items;
|
|
log.info(`Fetched ${sourceData.length} items from ${effectiveActorId}`);
|
|
}
|
|
} catch (e) {
|
|
log.error(`Failed to fetch data from ${effectiveActorId}: ${e.message}`);
|
|
log.info('Generating synthetic data as fallback...');
|
|
// Fall back to synthetic data generation
|
|
sourceData = [];
|
|
}
|
|
}
|
|
|
|
if (sourceData.length > 0) {
|
|
// Transform the data
|
|
const result = await integrateActorData({
|
|
actorId: effectiveActorId,
|
|
data: sourceData,
|
|
memorizeFields: effectiveMemorizeFields,
|
|
template: useTemplate,
|
|
maxItems: count
|
|
});
|
|
|
|
generatedData = result.data;
|
|
|
|
// Charge for integration
|
|
await safeCharge('actor-integration', 1);
|
|
await safeCharge('integrated-record', generatedData.length);
|
|
|
|
log.info(`Transformed ${generatedData.length} records from ${effectiveActorId}`);
|
|
} else if (mode === 'template' && templateConfig) {
|
|
// Generate synthetic data based on template output format
|
|
log.info('No source data available, generating synthetic data based on template schema...');
|
|
|
|
const random = createSeededRandom(seed);
|
|
generatedData = [];
|
|
|
|
for (let i = 0; i < count; i++) {
|
|
const record = generateFromTemplateSchema(templateConfig.outputFormat, random, i);
|
|
generatedData.push(record);
|
|
}
|
|
} else {
|
|
throw new Error('No data source specified. Provide integrateActorId or integrateDatasetId.');
|
|
}
|
|
|
|
} else {
|
|
// Generate mode - create synthetic data
|
|
// Generate data based on type - optimized for web scraping use cases
|
|
switch (dataType) {
|
|
case 'demo':
|
|
generatedData = await generateDemoData(count, geminiKey, model);
|
|
break;
|
|
case 'ecommerce':
|
|
generatedData = await generateEcommerceData(count, seed);
|
|
break;
|
|
case 'social':
|
|
generatedData = await generateSocialMediaData(count, seed);
|
|
break;
|
|
case 'api_response':
|
|
generatedData = await generateApiResponseData(count, apiEndpoint, seed);
|
|
break;
|
|
case 'search_results':
|
|
generatedData = await generateSearchResultsData(count, seed);
|
|
break;
|
|
case 'real_estate':
|
|
generatedData = await generateRealEstateData(count, seed);
|
|
break;
|
|
case 'jobs':
|
|
generatedData = await generateJobListingsData(count, seed);
|
|
break;
|
|
case 'news':
|
|
generatedData = await generateNewsData(count, seed);
|
|
break;
|
|
case 'structured':
|
|
generatedData = await generateStructuredData(count, schema, geminiKey || openRouterKey || anthropicKey, model, seed, provider);
|
|
break;
|
|
case 'timeseries':
|
|
generatedData = await generateTimeSeriesData(count, timeSeriesConfig, seed);
|
|
break;
|
|
case 'events':
|
|
generatedData = await generateEventData(count, eventTypes, seed);
|
|
break;
|
|
case 'embeddings':
|
|
generatedData = await generateEmbeddingData(count, embeddingDimensions, seed);
|
|
break;
|
|
// Enterprise/Company Simulators
|
|
case 'stock_trading':
|
|
generatedData = await generateStockTradingData(count, seed);
|
|
break;
|
|
case 'medical':
|
|
generatedData = await generateMedicalData(count, seed);
|
|
break;
|
|
case 'company':
|
|
generatedData = await generateCompanyData(count, seed);
|
|
break;
|
|
case 'supply_chain':
|
|
generatedData = await generateSupplyChainData(count, seed);
|
|
break;
|
|
case 'financial':
|
|
generatedData = await generateFinancialData(count, seed);
|
|
break;
|
|
case 'bloomberg':
|
|
generatedData = await generateBloombergData(count, seed);
|
|
break;
|
|
case 'zoominfo':
|
|
generatedData = await generateZoomInfoData(count, seed);
|
|
break;
|
|
case 'factset':
|
|
generatedData = await generateFactSetData(count, seed);
|
|
break;
|
|
case 'lseg':
|
|
generatedData = await generateLSEGData(count, seed);
|
|
break;
|
|
case 'crunchbase':
|
|
generatedData = await generateCrunchbaseData(count, geminiKey, crunchbaseCompanies, crunchbaseIndustry);
|
|
break;
|
|
// PRIORITY 1: High-Value Exotic Data Types
|
|
case 'eeg':
|
|
generatedData = await generateEEGData(count, seed);
|
|
break;
|
|
case 'cgm':
|
|
generatedData = await generateCGMData(count, seed);
|
|
break;
|
|
case 'siem':
|
|
generatedData = await generateSIEMData(count, seed);
|
|
break;
|
|
case 'threat_intel':
|
|
generatedData = await generateThreatIntelData(count, seed);
|
|
break;
|
|
case 'netflow':
|
|
generatedData = await generateNetFlowData(count, seed);
|
|
break;
|
|
// PRIORITY 2: Industrial & Scientific Data Types
|
|
case 'scada':
|
|
generatedData = await generateSCADAData(count, seed);
|
|
break;
|
|
case 'lidar':
|
|
generatedData = await generateLiDARData(count, seed);
|
|
break;
|
|
case 'canbus':
|
|
generatedData = await generateCANBusData(count, seed);
|
|
break;
|
|
case 'genomic_vcf':
|
|
generatedData = await generateGenomicVCFData(count, seed);
|
|
break;
|
|
case 'satellite':
|
|
generatedData = await generateSatelliteData(count, seed);
|
|
break;
|
|
// PRIORITY 3: Exotic/Niche Data Types
|
|
case 'fmri':
|
|
generatedData = await generateFMRIData(count, seed);
|
|
break;
|
|
case 'protein_pdb':
|
|
generatedData = await generateProteinPDBData(count, seed);
|
|
break;
|
|
case 'power_grid':
|
|
generatedData = await generatePowerGridData(count, seed);
|
|
break;
|
|
case 'ais':
|
|
generatedData = await generateAISData(count, seed);
|
|
break;
|
|
case 'radar':
|
|
generatedData = await generateRadarData(count, seed);
|
|
break;
|
|
default:
|
|
throw new Error(`Unknown data type: ${dataType}. Available: ecommerce, social, api_response, search_results, real_estate, jobs, news, structured, timeseries, events, embeddings, stock_trading, medical, company, supply_chain, financial, bloomberg, zoominfo, factset, lseg, crunchbase, eeg, cgm, siem, threat_intel, netflow, scada, lidar, canbus, genomic_vcf, satellite, fmri, protein_pdb, power_grid, ais, radar, demo`);
|
|
}
|
|
} // End of generate mode else block
|
|
|
|
const generationTime = Date.now() - startTime;
|
|
|
|
// ============================================
|
|
// EMBEDDING GENERATION (optional)
|
|
// ============================================
|
|
if (generateEmbeddings && generatedData.length > 0) {
|
|
const modelConfig = EMBEDDING_MODELS[embeddingModel] || EMBEDDING_MODELS['all-MiniLM-L6-v2'];
|
|
const effectiveDimensions = useOnnxEmbeddings ? modelConfig.dimensions : embeddingDimensions;
|
|
|
|
log.info(`Generating embeddings with ${effectiveDimensions} dimensions...`, {
|
|
useOnnx: useOnnxEmbeddings,
|
|
model: useOnnxEmbeddings ? embeddingModel : 'random'
|
|
});
|
|
|
|
if (useOnnxEmbeddings) {
|
|
// Use ONNX-powered semantic embeddings
|
|
try {
|
|
generatedData = await addEmbeddingsToRecords(generatedData, { modelName: embeddingModel });
|
|
log.info(`Added ONNX embeddings using ${embeddingModel} model`);
|
|
await safeCharge('onnx-embedding-generation', generatedData.length);
|
|
} catch (e) {
|
|
log.warning(`ONNX embedding failed: ${e.message}. Falling back to random embeddings.`);
|
|
// Fall back to random embeddings
|
|
const random = createSeededRandom(seed);
|
|
generatedData = generatedData.map((item) => ({
|
|
...item,
|
|
embedding: generateRandomEmbedding(effectiveDimensions, random),
|
|
embeddingModel: 'random',
|
|
embeddingDimensions: effectiveDimensions
|
|
}));
|
|
}
|
|
} else {
|
|
// Use random embeddings (faster, for testing)
|
|
const random = createSeededRandom(seed);
|
|
generatedData = generatedData.map((item) => ({
|
|
...item,
|
|
embedding: generateRandomEmbedding(effectiveDimensions, random),
|
|
embeddingModel: 'random',
|
|
embeddingDimensions: effectiveDimensions
|
|
}));
|
|
}
|
|
|
|
// Charge for embedding generation
|
|
await safeCharge('embedding-generation', generatedData.length);
|
|
log.info(`Added embeddings to ${generatedData.length} records`);
|
|
}
|
|
|
|
// Track generation trajectory for SONA learning
|
|
if (trajectoryBuilder && sonaEnabled) {
|
|
try {
|
|
// Use correct TrajectoryBuilder API: startStep -> endStep -> complete
|
|
const stepId = trajectoryBuilder.startStep('generate', {
|
|
dataType,
|
|
count: generatedData.length,
|
|
quality,
|
|
seed: seed || 'random'
|
|
});
|
|
trajectoryBuilder.endStep(stepId, {
|
|
duration: generationTime,
|
|
success: true,
|
|
recordsGenerated: generatedData.length
|
|
});
|
|
log.info('Generation trajectory tracked for SONA learning');
|
|
} catch (e) {
|
|
log.warning(`Trajectory tracking failed: ${e.message}`);
|
|
}
|
|
}
|
|
|
|
// SONA pattern learning from generated data with data-type specific training
|
|
if (sonaCoordinator && sonaEnabled && generatedData.length > 0) {
|
|
try {
|
|
const sampleSize = Math.min(10, generatedData.length);
|
|
const sample = generatedData.slice(0, sampleSize);
|
|
|
|
// Record data-type specific patterns for neural training
|
|
const dataTypePatterns = extractDataTypePatterns(dataType, sample);
|
|
|
|
// Use correct SonaCoordinator API: recordSignal for instant learning
|
|
sonaCoordinator.recordSignal({
|
|
type: 'generation_complete',
|
|
dataType,
|
|
samples: sample,
|
|
quality,
|
|
generationTime,
|
|
count: generatedData.length,
|
|
patterns: dataTypePatterns
|
|
});
|
|
|
|
// Process instant learning tier with data-type optimization
|
|
if (sonaLearningTiers.includes('instant')) {
|
|
await sonaCoordinator.processInstantLearning();
|
|
}
|
|
|
|
// Train neural patterns for this data type (use safe method detection)
|
|
if (trajectoryBuilder && sonaLearningTiers.includes('background')) {
|
|
const trainingData = {
|
|
action: `generate_${dataType}`,
|
|
observation: { quality, count: generatedData.length, time: generationTime },
|
|
reward: quality * (generationTime < 100 ? 1.0 : 0.8),
|
|
patterns: dataTypePatterns
|
|
};
|
|
// Try available trajectory methods
|
|
const method = trajectoryBuilder.track || trajectoryBuilder.recordTrajectory || trajectoryBuilder.add;
|
|
if (typeof method === 'function') {
|
|
method.call(trajectoryBuilder, trainingData);
|
|
}
|
|
}
|
|
|
|
log.info(`SONA recorded signal from ${sampleSize} samples`, {
|
|
stats: sonaCoordinator.stats(),
|
|
patterns: Object.keys(dataTypePatterns).length
|
|
});
|
|
} catch (e) {
|
|
log.warning(`SONA pattern learning failed: ${e.message}`);
|
|
}
|
|
}
|
|
|
|
// Helper function to extract data-type specific patterns for training
|
|
function extractDataTypePatterns(type, samples) {
|
|
const patterns = {};
|
|
if (!samples || samples.length === 0) return patterns;
|
|
|
|
switch (type) {
|
|
case 'ecommerce':
|
|
patterns.priceRange = { min: Math.min(...samples.map(s => s.price || 0)), max: Math.max(...samples.map(s => s.price || 0)) };
|
|
patterns.ratingDistribution = samples.reduce((acc, s) => { acc[Math.floor(s.rating || 0)] = (acc[Math.floor(s.rating || 0)] || 0) + 1; return acc; }, {});
|
|
patterns.categoryFreq = samples.reduce((acc, s) => { acc[s.category] = (acc[s.category] || 0) + 1; return acc; }, {});
|
|
break;
|
|
case 'bloomberg':
|
|
patterns.sectorDistribution = samples.reduce((acc, s) => { acc[s.security?.sector] = (acc[s.security?.sector] || 0) + 1; return acc; }, {});
|
|
patterns.recommendationFreq = samples.reduce((acc, s) => { acc[s.consensus?.recommendation] = (acc[s.consensus?.recommendation] || 0) + 1; return acc; }, {});
|
|
patterns.avgVolume = samples.reduce((sum, s) => sum + (s.pricing?.volume || 0), 0) / samples.length;
|
|
break;
|
|
case 'medical':
|
|
patterns.severityDistribution = samples.reduce((acc, s) => { acc[s.diagnosis?.severity] = (acc[s.diagnosis?.severity] || 0) + 1; return acc; }, {});
|
|
patterns.avgAge = samples.reduce((sum, s) => sum + (s.patient?.age || 0), 0) / samples.length;
|
|
break;
|
|
case 'supply_chain':
|
|
patterns.statusDistribution = samples.reduce((acc, s) => { acc[s.order?.status] = (acc[s.order?.status] || 0) + 1; return acc; }, {});
|
|
patterns.avgLeadTime = samples.reduce((sum, s) => sum + (s.supplier?.leadTime || 0), 0) / samples.length;
|
|
break;
|
|
default:
|
|
patterns.recordCount = samples.length;
|
|
}
|
|
return patterns;
|
|
}
|
|
|
|
log.info(`Generated ${generatedData.length} records in ${generationTime}ms`);
|
|
|
|
// Charge custom events based on data type
|
|
const eventMap = {
|
|
'ecommerce': 'ecommerce-product',
|
|
'social': 'social-media-post',
|
|
'jobs': 'job-listing',
|
|
'real_estate': 'real-estate-listing',
|
|
'search_results': 'search-result',
|
|
'api_response': 'api-mock-response',
|
|
'news': 'news-article',
|
|
// Enterprise data types
|
|
'stock_trading': 'stock-trading-record',
|
|
'medical': 'medical-record',
|
|
'company': 'company-record',
|
|
'supply_chain': 'supply-chain-record',
|
|
'financial': 'financial-record',
|
|
'bloomberg': 'bloomberg-terminal-record'
|
|
};
|
|
|
|
// Simulation mode - push in batches with delays
|
|
if (simulationMode && delayBetweenBatches > 0) {
|
|
log.info(`Simulation mode: pushing ${batchSize} records every ${delayBetweenBatches}ms`);
|
|
|
|
// Charge for simulation session
|
|
await safeCharge('simulation-session', 1);
|
|
|
|
const totalBatches = Math.ceil(generatedData.length / batchSize);
|
|
|
|
for (let i = 0; i < generatedData.length; i += batchSize) {
|
|
const batch = generatedData.slice(i, i + batchSize);
|
|
const batchNum = Math.floor(i / batchSize) + 1;
|
|
|
|
await Actor.pushData(batch.map((item, idx) => ({
|
|
id: i + idx + 1,
|
|
type: dataType,
|
|
data: item,
|
|
metadata: {
|
|
generatedAt: new Date().toISOString(),
|
|
provider,
|
|
model,
|
|
quality,
|
|
seed: seed || 'random',
|
|
batch: batchNum,
|
|
totalBatches,
|
|
simulationMode: true
|
|
}
|
|
})));
|
|
|
|
// Charge for simulation batch
|
|
await safeCharge('simulation-batch', 1);
|
|
|
|
log.info(`Pushed batch ${batchNum}/${totalBatches}`);
|
|
|
|
if (i + batchSize < generatedData.length) {
|
|
await new Promise(resolve => setTimeout(resolve, delayBetweenBatches));
|
|
}
|
|
}
|
|
} else {
|
|
// Push all results at once
|
|
await Actor.pushData(generatedData.map((item, index) => ({
|
|
id: index + 1,
|
|
type: mode === 'generate' ? dataType : mode,
|
|
data: item,
|
|
metadata: {
|
|
generatedAt: new Date().toISOString(),
|
|
mode,
|
|
dataType: mode === 'generate' ? dataType : null,
|
|
actorId: integrateActorId || null,
|
|
template: useTemplate || null,
|
|
provider,
|
|
model,
|
|
quality,
|
|
seed: seed || 'random',
|
|
hasEmbedding: generateEmbeddings
|
|
}
|
|
})));
|
|
|
|
// Charge for data type specific events
|
|
const eventName = eventMap[dataType];
|
|
if (eventName && mode === 'generate') {
|
|
await safeCharge(eventName, generatedData.length);
|
|
log.info(`Charged ${generatedData.length} ${eventName} events`);
|
|
}
|
|
|
|
// Charge for AI-enhanced records if using AI
|
|
if ((geminiKey || openRouterKey || anthropicKey) && dataType === 'structured') {
|
|
await safeCharge('ai-enhanced-record', generatedData.length);
|
|
log.info(`Charged ${generatedData.length} AI-enhanced events`);
|
|
}
|
|
}
|
|
|
|
log.info(`Pushed ${generatedData.length} records to dataset`);
|
|
|
|
// ============================================
|
|
// MEMORY SESSION PERSISTENCE (optional)
|
|
// ============================================
|
|
let memorySessionResult = null;
|
|
if (memorySessionEnabled && memorySessionId) {
|
|
try {
|
|
log.info(`Saving to memory session: ${memorySessionId}`);
|
|
|
|
const session = new MemorySession(memorySessionId, { actorName: 'agentic-synth' });
|
|
await session.init();
|
|
|
|
// Load existing memories if appending
|
|
if (appendToSession) {
|
|
await session.load();
|
|
log.info(`Loaded ${session.getMemories().length} existing memories`);
|
|
}
|
|
|
|
// Add generated data to session
|
|
const memoryRecords = generatedData.map((item, index) => ({
|
|
id: `synth_${Date.now()}_${index}`,
|
|
text: typeof item === 'string' ? item : JSON.stringify(item).substring(0, 500),
|
|
data: item,
|
|
type: mode === 'generate' ? dataType : mode,
|
|
embedding: item.embedding || null,
|
|
metadata: {
|
|
generatedAt: new Date().toISOString(),
|
|
mode,
|
|
dataType: mode === 'generate' ? dataType : null,
|
|
actorId: integrateActorId || null,
|
|
template: useTemplate || null,
|
|
provider,
|
|
model
|
|
}
|
|
}));
|
|
|
|
await session.addBatch(memoryRecords);
|
|
await session.save();
|
|
|
|
memorySessionResult = {
|
|
sessionId: memorySessionId,
|
|
totalMemories: session.getMemories().length,
|
|
addedMemories: memoryRecords.length,
|
|
metadata: session.getMetadata()
|
|
};
|
|
|
|
log.info(`Saved ${memoryRecords.length} records to memory session ${memorySessionId}`);
|
|
log.info(`Total memories in session: ${session.getMemories().length}`);
|
|
} catch (e) {
|
|
log.warning(`Memory session save failed: ${e.message}`);
|
|
memorySessionResult = { error: e.message };
|
|
}
|
|
}
|
|
|
|
// ============================================
|
|
// WEBHOOK NOTIFICATION (optional)
|
|
// ============================================
|
|
if (webhookUrl) {
|
|
log.info(`Sending webhook to: ${webhookUrl}`);
|
|
|
|
try {
|
|
const webhookPayload = {
|
|
actorId: 'ruv/ai-synthetic-data-generator',
|
|
runId: process.env.APIFY_ACTOR_RUN_ID,
|
|
status: 'success',
|
|
mode,
|
|
dataType: mode === 'generate' ? dataType : null,
|
|
template: useTemplate || null,
|
|
integrateActorId: integrateActorId || null,
|
|
totalRecords: generatedData.length,
|
|
generationTime,
|
|
hasEmbeddings: generateEmbeddings,
|
|
datasetId: process.env.APIFY_DEFAULT_DATASET_ID,
|
|
memorySession: memorySessionResult,
|
|
timestamp: new Date().toISOString()
|
|
};
|
|
|
|
const response = await fetch(webhookUrl, {
|
|
method: 'POST',
|
|
headers: {
|
|
'Content-Type': 'application/json',
|
|
'User-Agent': 'Apify-AI-Synthetic-Data-Generator/2.5'
|
|
},
|
|
body: JSON.stringify(webhookPayload)
|
|
});
|
|
|
|
if (response.ok) {
|
|
log.info('Webhook notification sent successfully');
|
|
await safeCharge('webhook-notification', 1);
|
|
} else {
|
|
log.warning(`Webhook failed with status: ${response.status}`);
|
|
}
|
|
} catch (e) {
|
|
log.warning(`Webhook notification failed: ${e.message}`);
|
|
}
|
|
}
|
|
|
|
} catch (error) {
|
|
log.error('Actor failed', { error: error.message });
|
|
throw error;
|
|
} finally {
|
|
await Actor.exit();
|
|
}
|
|
|
|
// ============================================
|
|
// WEB SCRAPING FOCUSED GENERATORS
|
|
// ============================================
|
|
|
|
async function generateEcommerceData(count, seed) {
|
|
log.info('Generating e-commerce product data...');
|
|
const random = createSeededRandom(seed);
|
|
const results = [];
|
|
|
|
// Category-matched brands for realistic data
|
|
const categoryBrands = {
|
|
'Electronics': ['Samsung', 'Sony', 'Apple', 'LG', 'Bose', 'JBL', 'Anker', 'Logitech'],
|
|
'Clothing': ['Nike', 'Adidas', 'Zara', 'H&M', 'Levi\'s', 'Gap', 'Uniqlo', 'Calvin Klein'],
|
|
'Home & Garden': ['IKEA', 'Pottery Barn', 'West Elm', 'Crate & Barrel', 'HomeGoods', 'Wayfair'],
|
|
'Sports': ['Nike', 'Under Armour', 'Adidas', 'Puma', 'Wilson', 'Spalding', 'Callaway'],
|
|
'Books': ['Penguin', 'HarperCollins', 'Simon & Schuster', 'Random House', 'Scholastic'],
|
|
'Toys': ['LEGO', 'Hasbro', 'Mattel', 'Fisher-Price', 'Melissa & Doug', 'Nerf'],
|
|
'Beauty': ['L\'Oreal', 'Maybelline', 'Neutrogena', 'Olay', 'Revlon', 'CeraVe', 'The Ordinary'],
|
|
'Automotive': ['Bosch', 'Michelin', 'Goodyear', 'Mobil', 'Castrol', 'WeatherTech', 'AutoZone']
|
|
};
|
|
const categories = Object.keys(categoryBrands);
|
|
const conditions = ['New', 'Used - Like New', 'Used - Good', 'Refurbished'];
|
|
|
|
for (let i = 0; i < count; i++) {
|
|
const category = categories[Math.floor(random() * categories.length)];
|
|
const brandsForCategory = categoryBrands[category];
|
|
const brand = brandsForCategory[Math.floor(random() * brandsForCategory.length)];
|
|
const basePrice = 10 + random() * 990;
|
|
const hasDiscount = random() > 0.6;
|
|
|
|
// Consistent stock logic: if stockCount is 0, inStock is false
|
|
const stockCount = Math.floor(random() * 500);
|
|
const inStock = stockCount > 0 && random() > 0.1;
|
|
|
|
// Consistent shipping logic: free shipping means price is 0
|
|
const isFreeShipping = random() > 0.4;
|
|
const shippingPrice = isFreeShipping ? 0 : Math.round((5 + random() * 10) * 100) / 100;
|
|
|
|
results.push({
|
|
url: `https://example-store.com/products/${generateSlug(random)}-${i}`,
|
|
title: `${brand} ${generateProductName(category, random)}`,
|
|
price: Math.round(basePrice * 100) / 100,
|
|
originalPrice: hasDiscount ? Math.round(basePrice * (1.1 + random() * 0.4) * 100) / 100 : null,
|
|
currency: 'USD',
|
|
category,
|
|
brand,
|
|
rating: Math.round((3 + random() * 2) * 10) / 10,
|
|
reviewCount: Math.floor(random() * 5000),
|
|
inStock,
|
|
stockCount: inStock ? stockCount : 0,
|
|
condition: conditions[Math.floor(random() * conditions.length)],
|
|
seller: {
|
|
name: `Seller${Math.floor(random() * 1000)}`,
|
|
rating: Math.round((3.5 + random() * 1.5) * 10) / 10,
|
|
totalSales: Math.floor(random() * 50000)
|
|
},
|
|
shipping: {
|
|
free: isFreeShipping,
|
|
estimatedDays: Math.floor(2 + random() * 8),
|
|
price: shippingPrice
|
|
},
|
|
images: Array.from({ length: Math.floor(1 + random() * 5) }, (_, j) =>
|
|
`https://example-store.com/images/product-${i}-${j}.jpg`
|
|
),
|
|
specifications: generateSpecs(category, random),
|
|
scrapedAt: new Date().toISOString()
|
|
});
|
|
}
|
|
|
|
return results;
|
|
}
|
|
|
|
async function generateSocialMediaData(count, seed) {
|
|
log.info('Generating social media data...');
|
|
const random = createSeededRandom(seed);
|
|
const results = [];
|
|
|
|
const platforms = ['twitter', 'instagram', 'facebook', 'linkedin', 'tiktok'];
|
|
const postTypes = ['text', 'image', 'video', 'link', 'poll'];
|
|
|
|
for (let i = 0; i < count; i++) {
|
|
const platform = platforms[Math.floor(random() * platforms.length)];
|
|
const postType = postTypes[Math.floor(random() * postTypes.length)];
|
|
const timestamp = new Date(Date.now() - random() * 30 * 24 * 60 * 60 * 1000);
|
|
|
|
results.push({
|
|
url: `https://${platform}.com/post/${generateId(random)}`,
|
|
platform,
|
|
postType,
|
|
author: {
|
|
username: `user_${generateId(random)}`,
|
|
displayName: generateName(random),
|
|
verified: random() > 0.85,
|
|
followers: Math.floor(random() * 1000000),
|
|
following: Math.floor(random() * 5000),
|
|
profileUrl: `https://${platform}.com/user_${generateId(random)}`
|
|
},
|
|
content: {
|
|
text: generateSocialText(random),
|
|
hashtags: Array.from({ length: Math.floor(random() * 6) }, () => `#${generateHashtag(random)}`),
|
|
mentions: Array.from({ length: Math.floor(random() * 3) }, () => `@user_${generateId(random)}`),
|
|
mediaUrls: postType !== 'text' ? [`https://${platform}.com/media/${generateId(random)}.jpg`] : []
|
|
},
|
|
engagement: {
|
|
likes: Math.floor(random() * 100000),
|
|
comments: Math.floor(random() * 5000),
|
|
shares: Math.floor(random() * 10000),
|
|
views: Math.floor(random() * 1000000)
|
|
},
|
|
timestamp: timestamp.toISOString(),
|
|
scrapedAt: new Date().toISOString()
|
|
});
|
|
}
|
|
|
|
return results;
|
|
}
|
|
|
|
async function generateApiResponseData(count, endpoint, seed) {
|
|
log.info('Generating API response data...', { endpoint });
|
|
const random = createSeededRandom(seed);
|
|
const results = [];
|
|
|
|
for (let i = 0; i < count; i++) {
|
|
const statusCodes = [200, 200, 200, 200, 201, 400, 401, 404, 500];
|
|
const statusCode = statusCodes[Math.floor(random() * statusCodes.length)];
|
|
|
|
results.push({
|
|
endpoint: `${endpoint}/${i}`,
|
|
method: 'GET',
|
|
statusCode,
|
|
headers: {
|
|
'content-type': 'application/json',
|
|
'x-request-id': generateId(random),
|
|
'x-rate-limit-remaining': Math.floor(random() * 1000),
|
|
'cache-control': random() > 0.5 ? 'max-age=3600' : 'no-cache'
|
|
},
|
|
responseTime: Math.floor(50 + random() * 500),
|
|
body: statusCode < 400 ? {
|
|
id: generateId(random),
|
|
data: generateRandomObject(random),
|
|
pagination: {
|
|
page: 1,
|
|
perPage: 20,
|
|
total: Math.floor(random() * 10000),
|
|
hasMore: random() > 0.3
|
|
}
|
|
} : {
|
|
error: {
|
|
code: `ERR_${statusCode}`,
|
|
message: getErrorMessage(statusCode)
|
|
}
|
|
},
|
|
timestamp: new Date().toISOString()
|
|
});
|
|
}
|
|
|
|
return results;
|
|
}
|
|
|
|
async function generateSearchResultsData(count, seed) {
|
|
log.info('Generating search results data...');
|
|
const random = createSeededRandom(seed);
|
|
const results = [];
|
|
|
|
const domains = ['example.com', 'blog.example.org', 'news.example.net', 'shop.example.io', 'docs.example.dev'];
|
|
|
|
for (let i = 0; i < count; i++) {
|
|
const domain = domains[Math.floor(random() * domains.length)];
|
|
|
|
results.push({
|
|
position: i + 1,
|
|
url: `https://${domain}/${generateSlug(random)}`,
|
|
title: generateSearchTitle(random),
|
|
snippet: generateSnippet(random),
|
|
domain,
|
|
displayUrl: `${domain} > ${generateBreadcrumb(random)}`,
|
|
type: random() > 0.8 ? 'featured' : 'organic',
|
|
sitelinks: random() > 0.7 ? Array.from({ length: Math.floor(2 + random() * 4) }, () => ({
|
|
title: generateSearchTitle(random),
|
|
url: `https://${domain}/${generateSlug(random)}`
|
|
})) : null,
|
|
rich_snippet: random() > 0.6 ? {
|
|
rating: Math.round((3 + random() * 2) * 10) / 10,
|
|
reviewCount: Math.floor(random() * 10000),
|
|
price: random() > 0.5 ? `$${Math.floor(10 + random() * 500)}` : null
|
|
} : null,
|
|
scrapedAt: new Date().toISOString()
|
|
});
|
|
}
|
|
|
|
return results;
|
|
}
|
|
|
|
async function generateRealEstateData(count, seed) {
|
|
log.info('Generating real estate listing data...');
|
|
const random = createSeededRandom(seed);
|
|
const results = [];
|
|
|
|
const propertyTypes = ['House', 'Apartment', 'Condo', 'Townhouse', 'Land', 'Commercial'];
|
|
const cities = ['New York', 'Los Angeles', 'Chicago', 'Houston', 'Phoenix', 'San Diego', 'Dallas', 'Austin'];
|
|
const listingTypes = ['For Sale', 'For Rent', 'Auction'];
|
|
|
|
for (let i = 0; i < count; i++) {
|
|
const propertyType = propertyTypes[Math.floor(random() * propertyTypes.length)];
|
|
const city = cities[Math.floor(random() * cities.length)];
|
|
const listingType = listingTypes[Math.floor(random() * listingTypes.length)];
|
|
const bedrooms = Math.floor(1 + random() * 6);
|
|
const sqft = Math.floor(500 + random() * 4500);
|
|
|
|
results.push({
|
|
url: `https://realestate-example.com/listing/${generateId(random)}`,
|
|
listingId: generateId(random),
|
|
title: `${bedrooms} Bed ${propertyType} in ${city}`,
|
|
price: Math.floor(100000 + random() * 2000000),
|
|
listingType,
|
|
propertyType,
|
|
address: {
|
|
street: `${Math.floor(100 + random() * 9900)} ${generateStreetName(random)}`,
|
|
city,
|
|
state: getState(city),
|
|
zipCode: String(Math.floor(10000 + random() * 90000)),
|
|
country: 'USA'
|
|
},
|
|
details: {
|
|
bedrooms,
|
|
bathrooms: Math.floor(1 + random() * 4),
|
|
sqft,
|
|
lotSize: Math.floor(sqft * (1.5 + random() * 3)),
|
|
yearBuilt: Math.floor(1950 + random() * 74),
|
|
parking: Math.floor(random() * 4),
|
|
stories: Math.floor(1 + random() * 3)
|
|
},
|
|
features: generateRealEstateFeatures(random),
|
|
agent: {
|
|
name: generateName(random),
|
|
phone: generatePhone(random),
|
|
email: `agent${Math.floor(random() * 1000)}@realestate.com`,
|
|
company: `${generateName(random)} Realty`
|
|
},
|
|
images: Array.from({ length: Math.floor(5 + random() * 20) }, (_, j) =>
|
|
`https://realestate-example.com/images/listing-${i}-${j}.jpg`
|
|
),
|
|
daysOnMarket: Math.floor(random() * 180),
|
|
scrapedAt: new Date().toISOString()
|
|
});
|
|
}
|
|
|
|
return results;
|
|
}
|
|
|
|
async function generateJobListingsData(count, seed) {
|
|
log.info('Generating job listings data...');
|
|
const random = createSeededRandom(seed);
|
|
const results = [];
|
|
|
|
const titles = ['Software Engineer', 'Product Manager', 'Data Scientist', 'UX Designer', 'DevOps Engineer', 'Marketing Manager', 'Sales Representative', 'Customer Success Manager'];
|
|
const companies = ['TechCorp', 'InnovateLabs', 'DataDriven Inc', 'CloudScale', 'StartupXYZ', 'Enterprise Solutions', 'Digital Agency', 'Growth Partners'];
|
|
const locations = ['Remote', 'New York, NY', 'San Francisco, CA', 'Austin, TX', 'Seattle, WA', 'Boston, MA', 'Chicago, IL', 'Los Angeles, CA'];
|
|
const types = ['Full-time', 'Part-time', 'Contract', 'Internship'];
|
|
|
|
for (let i = 0; i < count; i++) {
|
|
const title = titles[Math.floor(random() * titles.length)];
|
|
const company = companies[Math.floor(random() * companies.length)];
|
|
const location = locations[Math.floor(random() * locations.length)];
|
|
const salaryMin = Math.floor(50000 + random() * 100000);
|
|
|
|
results.push({
|
|
url: `https://jobs-example.com/job/${generateId(random)}`,
|
|
jobId: generateId(random),
|
|
title,
|
|
company: {
|
|
name: company,
|
|
logo: `https://jobs-example.com/logos/${company.toLowerCase().replace(/\s/g, '-')}.png`,
|
|
rating: Math.round((3 + random() * 2) * 10) / 10,
|
|
reviewCount: Math.floor(random() * 5000),
|
|
size: ['1-50', '51-200', '201-500', '501-1000', '1000+'][Math.floor(random() * 5)]
|
|
},
|
|
location,
|
|
remote: location === 'Remote' || random() > 0.7,
|
|
type: types[Math.floor(random() * types.length)],
|
|
salary: {
|
|
min: salaryMin,
|
|
max: salaryMin + Math.floor(random() * 50000),
|
|
currency: 'USD',
|
|
period: 'yearly'
|
|
},
|
|
description: generateJobDescription(random),
|
|
requirements: Array.from({ length: Math.floor(3 + random() * 5) }, () => generateRequirement(random)),
|
|
benefits: generateBenefits(random),
|
|
postedDate: new Date(Date.now() - random() * 30 * 24 * 60 * 60 * 1000).toISOString(),
|
|
applicants: Math.floor(random() * 500),
|
|
scrapedAt: new Date().toISOString()
|
|
});
|
|
}
|
|
|
|
return results;
|
|
}
|
|
|
|
async function generateNewsData(count, seed) {
|
|
log.info('Generating news article data...');
|
|
const random = createSeededRandom(seed);
|
|
const results = [];
|
|
|
|
const sources = ['TechNews', 'BusinessDaily', 'WorldReport', 'ScienceToday', 'HealthWatch', 'SportsCentral'];
|
|
const categories = ['Technology', 'Business', 'Politics', 'Science', 'Health', 'Sports', 'Entertainment'];
|
|
const authors = ['John Smith', 'Sarah Johnson', 'Mike Williams', 'Emily Brown', 'David Lee', 'Lisa Chen'];
|
|
|
|
for (let i = 0; i < count; i++) {
|
|
const source = sources[Math.floor(random() * sources.length)];
|
|
const category = categories[Math.floor(random() * categories.length)];
|
|
const publishDate = new Date(Date.now() - random() * 7 * 24 * 60 * 60 * 1000);
|
|
|
|
results.push({
|
|
url: `https://${source.toLowerCase()}.com/article/${generateSlug(random)}`,
|
|
title: generateNewsTitle(category, random),
|
|
subtitle: generateSubtitle(random),
|
|
source,
|
|
category,
|
|
author: {
|
|
name: authors[Math.floor(random() * authors.length)],
|
|
url: `https://${source.toLowerCase()}.com/author/${generateSlug(random)}`
|
|
},
|
|
publishedAt: publishDate.toISOString(),
|
|
updatedAt: random() > 0.7 ? new Date(publishDate.getTime() + random() * 24 * 60 * 60 * 1000).toISOString() : null,
|
|
content: {
|
|
text: generateArticleContent(random),
|
|
wordCount: Math.floor(300 + random() * 1500),
|
|
readingTime: Math.floor(2 + random() * 10)
|
|
},
|
|
images: [{
|
|
url: `https://${source.toLowerCase()}.com/images/article-${i}.jpg`,
|
|
caption: generateCaption(random)
|
|
}],
|
|
tags: Array.from({ length: Math.floor(2 + random() * 5) }, () => generateTag(random)),
|
|
engagement: {
|
|
views: Math.floor(random() * 100000),
|
|
comments: Math.floor(random() * 500),
|
|
shares: Math.floor(random() * 2000)
|
|
},
|
|
scrapedAt: new Date().toISOString()
|
|
});
|
|
}
|
|
|
|
return results;
|
|
}
|
|
|
|
// ============================================
|
|
// ORIGINAL GENERATORS (kept for compatibility)
|
|
// ============================================
|
|
|
|
async function generateDemoData(count, apiKey, model) {
|
|
log.info('Generating demo data with web scraping examples...');
|
|
|
|
const results = [];
|
|
const perType = Math.ceil(count / 5);
|
|
|
|
// E-commerce products
|
|
const ecommerce = await generateEcommerceData(perType);
|
|
results.push(...ecommerce.map(d => ({ ...d, _type: 'ecommerce' })));
|
|
|
|
// Social media posts
|
|
const social = await generateSocialMediaData(perType);
|
|
results.push(...social.map(d => ({ ...d, _type: 'social' })));
|
|
|
|
// Search results
|
|
const search = await generateSearchResultsData(perType);
|
|
results.push(...search.map(d => ({ ...d, _type: 'search_results' })));
|
|
|
|
// Job listings
|
|
const jobs = await generateJobListingsData(perType);
|
|
results.push(...jobs.map(d => ({ ...d, _type: 'jobs' })));
|
|
|
|
// News articles
|
|
const news = await generateNewsData(perType);
|
|
results.push(...news.map(d => ({ ...d, _type: 'news' })));
|
|
|
|
return results.slice(0, count);
|
|
}
|
|
|
|
async function generateStructuredData(count, schema, apiKey, model, seed, provider = 'gemini') {
|
|
log.info('Generating structured data...', { count, schema, provider, model });
|
|
|
|
const results = [];
|
|
const random = createSeededRandom(seed);
|
|
|
|
if (apiKey && Object.keys(schema).length > 0) {
|
|
try {
|
|
const prompt = `Generate ${Math.min(count, 20)} unique records matching this schema:
|
|
${JSON.stringify(schema, null, 2)}
|
|
|
|
Return ONLY a valid JSON array with no additional text. Each record should be realistic and diverse.`;
|
|
|
|
let text;
|
|
|
|
if (provider === 'openrouter') {
|
|
// Use OpenRouter API (supports DeepSeek, GPT, Claude, Llama, etc.)
|
|
const response = await fetch('https://openrouter.ai/api/v1/chat/completions', {
|
|
method: 'POST',
|
|
headers: {
|
|
'Authorization': `Bearer ${apiKey}`,
|
|
'Content-Type': 'application/json',
|
|
'HTTP-Referer': 'https://apify.com',
|
|
'X-Title': 'AI Synthetic Data Generator'
|
|
},
|
|
body: JSON.stringify({
|
|
model: model || 'deepseek/deepseek-chat',
|
|
messages: [{ role: 'user', content: prompt }],
|
|
temperature: 0.7
|
|
})
|
|
});
|
|
const data = await response.json();
|
|
text = data.choices?.[0]?.message?.content || '';
|
|
log.info('OpenRouter response received', { model });
|
|
} else if (provider === 'anthropic') {
|
|
// Use Anthropic Claude API directly
|
|
const response = await fetch('https://api.anthropic.com/v1/messages', {
|
|
method: 'POST',
|
|
headers: {
|
|
'x-api-key': apiKey,
|
|
'Content-Type': 'application/json',
|
|
'anthropic-version': '2023-06-01'
|
|
},
|
|
body: JSON.stringify({
|
|
model: model || 'claude-3-5-haiku-20241022',
|
|
max_tokens: 4096,
|
|
messages: [{ role: 'user', content: prompt }]
|
|
})
|
|
});
|
|
const data = await response.json();
|
|
text = data.content?.[0]?.text || '';
|
|
log.info('Anthropic response received', { model });
|
|
} else {
|
|
// Use Gemini
|
|
const genAI = new GoogleGenerativeAI(apiKey);
|
|
const gemini = genAI.getGenerativeModel({ model: model || 'gemini-2.0-flash-exp' });
|
|
const result = await gemini.generateContent(prompt);
|
|
text = result.response.text();
|
|
log.info('Gemini response received', { model });
|
|
}
|
|
|
|
const jsonMatch = text.match(/\[[\s\S]*\]/);
|
|
if (jsonMatch) {
|
|
const parsed = JSON.parse(jsonMatch[0]);
|
|
results.push(...parsed);
|
|
log.info(`AI generated ${parsed.length} records`);
|
|
}
|
|
|
|
while (results.length < count) {
|
|
results.push(generateFallbackStructured(schema, random));
|
|
}
|
|
|
|
} catch (e) {
|
|
log.warning(`AI generation failed: ${e.message}. Using fallback.`);
|
|
for (let i = 0; i < count; i++) {
|
|
results.push(generateFallbackStructured(schema, random));
|
|
}
|
|
}
|
|
} else {
|
|
for (let i = 0; i < count; i++) {
|
|
results.push(generateFallbackStructured(schema, random));
|
|
}
|
|
}
|
|
|
|
return results.slice(0, count);
|
|
}
|
|
|
|
function generateFallbackStructured(schema, random) {
|
|
const record = {};
|
|
|
|
for (const [key, type] of Object.entries(schema)) {
|
|
if (typeof type === 'string') {
|
|
if (type.includes('url')) {
|
|
record[key] = `https://example.com/${generateSlug(random)}`;
|
|
} else if (type.includes('email')) {
|
|
record[key] = `user${Math.floor(random() * 10000)}@example.com`;
|
|
} else if (type.includes('fullName') || type.includes('name')) {
|
|
record[key] = generateName(random);
|
|
} else if (type.includes('number')) {
|
|
const match = type.match(/\((\d+)-(\d+)\)/);
|
|
if (match) {
|
|
const min = parseInt(match[1]);
|
|
const max = parseInt(match[2]);
|
|
record[key] = min + Math.floor(random() * (max - min + 1));
|
|
} else {
|
|
record[key] = Math.floor(random() * 100);
|
|
}
|
|
} else if (type.includes('boolean')) {
|
|
record[key] = random() > 0.5;
|
|
} else if (type.includes('(') && type.includes(',')) {
|
|
const options = type.match(/\(([^)]+)\)/)?.[1].split(',').map(s => s.trim()) || ['Option1', 'Option2'];
|
|
record[key] = options[Math.floor(random() * options.length)];
|
|
} else {
|
|
record[key] = `value_${Math.floor(random() * 1000)}`;
|
|
}
|
|
}
|
|
}
|
|
|
|
return record;
|
|
}
|
|
|
|
async function generateTimeSeriesData(count, config, seed) {
|
|
log.info('Generating time-series data...', { count, config });
|
|
|
|
const {
|
|
interval = '1h',
|
|
trend = 'flat',
|
|
seasonality = false,
|
|
noise = 0.1,
|
|
startDate = '2024-01-01'
|
|
} = config;
|
|
|
|
const random = createSeededRandom(seed);
|
|
const results = [];
|
|
|
|
const start = new Date(startDate);
|
|
const intervalMs = parseInterval(interval);
|
|
|
|
let value = 100;
|
|
const trendFactor = trend === 'upward' ? 0.01 : trend === 'downward' ? -0.01 : 0;
|
|
|
|
for (let i = 0; i < count; i++) {
|
|
const timestamp = new Date(start.getTime() + i * intervalMs);
|
|
|
|
value *= (1 + trendFactor);
|
|
|
|
let seasonalValue = value;
|
|
if (seasonality) {
|
|
const hour = timestamp.getHours();
|
|
const seasonalFactor = 1 + 0.2 * Math.sin((hour / 24) * 2 * Math.PI);
|
|
seasonalValue = value * seasonalFactor;
|
|
}
|
|
|
|
const noiseValue = seasonalValue * (1 + (random() - 0.5) * 2 * noise);
|
|
|
|
results.push({
|
|
timestamp: timestamp.toISOString(),
|
|
value: Math.round(noiseValue * 100) / 100,
|
|
open: Math.round(noiseValue * (1 - random() * 0.02) * 100) / 100,
|
|
high: Math.round(noiseValue * (1 + random() * 0.03) * 100) / 100,
|
|
low: Math.round(noiseValue * (1 - random() * 0.03) * 100) / 100,
|
|
close: Math.round(noiseValue * (1 + (random() - 0.5) * 0.02) * 100) / 100,
|
|
volume: Math.floor(random() * 1000000)
|
|
});
|
|
}
|
|
|
|
return results;
|
|
}
|
|
|
|
async function generateEventData(count, eventTypes, seed) {
|
|
log.info('Generating web event data...', { count, eventTypes });
|
|
|
|
const random = createSeededRandom(seed);
|
|
const results = [];
|
|
|
|
const now = Date.now();
|
|
const dayMs = 24 * 60 * 60 * 1000;
|
|
|
|
for (let i = 0; i < count; i++) {
|
|
const eventType = eventTypes[Math.floor(random() * eventTypes.length)];
|
|
const timestamp = new Date(now - random() * 30 * dayMs);
|
|
|
|
const event = {
|
|
eventId: `evt_${Date.now()}_${i}`,
|
|
type: eventType,
|
|
timestamp: timestamp.toISOString(),
|
|
userId: `user_${Math.floor(random() * 1000)}`,
|
|
sessionId: `sess_${Math.floor(random() * 10000)}`,
|
|
page: {
|
|
url: `https://example.com/${generateSlug(random)}`,
|
|
title: generateSearchTitle(random),
|
|
referrer: random() > 0.3 ? 'https://google.com' : 'direct'
|
|
},
|
|
device: {
|
|
type: random() > 0.6 ? 'mobile' : 'desktop',
|
|
browser: ['Chrome', 'Firefox', 'Safari', 'Edge'][Math.floor(random() * 4)],
|
|
os: ['Windows', 'macOS', 'iOS', 'Android', 'Linux'][Math.floor(random() * 5)]
|
|
},
|
|
properties: generateEventProperties(eventType, random)
|
|
};
|
|
|
|
results.push(event);
|
|
}
|
|
|
|
results.sort((a, b) => new Date(a.timestamp) - new Date(b.timestamp));
|
|
|
|
return results;
|
|
}
|
|
|
|
function generateEventProperties(eventType, random) {
|
|
switch (eventType) {
|
|
case 'page_view':
|
|
return {
|
|
loadTime: Math.floor(100 + random() * 3000),
|
|
scrollDepth: Math.floor(random() * 100)
|
|
};
|
|
case 'click':
|
|
return {
|
|
element: ['button', 'link', 'image', 'card'][Math.floor(random() * 4)],
|
|
elementId: `el_${Math.floor(random() * 1000)}`,
|
|
x: Math.floor(random() * 1920),
|
|
y: Math.floor(random() * 1080)
|
|
};
|
|
case 'scroll':
|
|
return {
|
|
direction: random() > 0.8 ? 'up' : 'down',
|
|
depth: Math.floor(random() * 100),
|
|
velocity: Math.floor(random() * 500)
|
|
};
|
|
case 'form_submit':
|
|
return {
|
|
formId: `form_${Math.floor(random() * 100)}`,
|
|
formName: ['contact', 'signup', 'checkout', 'search'][Math.floor(random() * 4)],
|
|
success: random() > 0.1,
|
|
fieldCount: Math.floor(2 + random() * 10)
|
|
};
|
|
case 'api_call':
|
|
return {
|
|
endpoint: `/api/${['users', 'products', 'orders', 'search'][Math.floor(random() * 4)]}`,
|
|
method: ['GET', 'POST', 'PUT', 'DELETE'][Math.floor(random() * 4)],
|
|
statusCode: random() > 0.9 ? 500 : random() > 0.1 ? 200 : 400,
|
|
responseTime: Math.floor(50 + random() * 500)
|
|
};
|
|
default:
|
|
return { value: Math.floor(random() * 100) };
|
|
}
|
|
}
|
|
|
|
async function generateEmbeddingData(count, dimensions, seed) {
|
|
log.info('Generating embedding data...', { count, dimensions });
|
|
|
|
const random = createSeededRandom(seed);
|
|
const results = [];
|
|
|
|
const topics = [
|
|
'Product search optimization',
|
|
'Customer sentiment analysis',
|
|
'Price comparison algorithms',
|
|
'Inventory management',
|
|
'User behavior tracking',
|
|
'Market trend analysis',
|
|
'Competitor monitoring',
|
|
'Review aggregation',
|
|
'Category classification',
|
|
'Recommendation engines'
|
|
];
|
|
|
|
for (let i = 0; i < count; i++) {
|
|
const embedding = [];
|
|
let norm = 0;
|
|
|
|
for (let j = 0; j < dimensions; j++) {
|
|
const val = random() * 2 - 1;
|
|
embedding.push(val);
|
|
norm += val * val;
|
|
}
|
|
|
|
norm = Math.sqrt(norm);
|
|
for (let j = 0; j < dimensions; j++) {
|
|
embedding[j] = Math.round((embedding[j] / norm) * 1000000) / 1000000;
|
|
}
|
|
|
|
results.push({
|
|
id: `emb_${i}`,
|
|
text: topics[i % topics.length] + ` - variant ${Math.floor(i / topics.length)}`,
|
|
embedding,
|
|
dimensions,
|
|
model: 'synthetic'
|
|
});
|
|
}
|
|
|
|
return results;
|
|
}
|
|
|
|
// ============================================
|
|
// UTILITY FUNCTIONS
|
|
// ============================================
|
|
|
|
function createSeededRandom(seed) {
|
|
if (!seed) return Math.random;
|
|
|
|
let s = hashCode(String(seed));
|
|
return function() {
|
|
s = Math.sin(s) * 10000;
|
|
return s - Math.floor(s);
|
|
};
|
|
}
|
|
|
|
function hashCode(str) {
|
|
let hash = 0;
|
|
for (let i = 0; i < str.length; i++) {
|
|
const char = str.charCodeAt(i);
|
|
hash = ((hash << 5) - hash) + char;
|
|
hash = hash & hash;
|
|
}
|
|
return Math.abs(hash);
|
|
}
|
|
|
|
/**
|
|
* Generate synthetic data based on a template output schema
|
|
*/
|
|
function generateFromTemplateSchema(outputFormat, random, index) {
|
|
const record = {};
|
|
|
|
for (const [key, type] of Object.entries(outputFormat)) {
|
|
if (type === 'string') {
|
|
record[key] = generateTemplateString(key, random);
|
|
} else if (type.startsWith('number')) {
|
|
const match = type.match(/\((\d+)-(\d+)\)/);
|
|
if (match) {
|
|
const min = parseInt(match[1]);
|
|
const max = parseInt(match[2]);
|
|
record[key] = min + Math.floor(random() * (max - min + 1));
|
|
} else {
|
|
record[key] = Math.floor(random() * 100);
|
|
}
|
|
} else if (type === 'object') {
|
|
record[key] = { id: generateId(random), value: Math.floor(random() * 1000) };
|
|
} else if (type.startsWith('array')) {
|
|
const itemType = type.match(/<(\w+)>/)?.[1] || 'string';
|
|
const count = Math.floor(2 + random() * 4);
|
|
record[key] = Array.from({ length: count }, () =>
|
|
itemType === 'string' ? generateTemplateString(key, random) :
|
|
itemType === 'object' ? { id: generateId(random), value: Math.floor(random() * 100) } :
|
|
Math.floor(random() * 1000)
|
|
);
|
|
} else {
|
|
record[key] = `value_${index}_${Math.floor(random() * 1000)}`;
|
|
}
|
|
}
|
|
|
|
record._templateId = `tpl_${index}`;
|
|
record._generatedAt = new Date().toISOString();
|
|
|
|
return record;
|
|
}
|
|
|
|
/**
|
|
* Generate context-aware string values based on field name
|
|
*/
|
|
function generateTemplateString(fieldName, random) {
|
|
const lowerField = fieldName.toLowerCase();
|
|
|
|
if (lowerField.includes('id') || lowerField.includes('Id')) {
|
|
return `id_${Math.floor(random() * 100000)}`;
|
|
}
|
|
if (lowerField.includes('name') || lowerField.includes('title')) {
|
|
return generateName(random);
|
|
}
|
|
if (lowerField.includes('email')) {
|
|
return `user${Math.floor(random() * 10000)}@example.com`;
|
|
}
|
|
if (lowerField.includes('phone')) {
|
|
return generatePhone(random);
|
|
}
|
|
if (lowerField.includes('url') || lowerField.includes('website')) {
|
|
return `https://example.com/${generateSlug(random)}`;
|
|
}
|
|
if (lowerField.includes('description') || lowerField.includes('content') || lowerField.includes('summary')) {
|
|
return generateSnippet(random);
|
|
}
|
|
if (lowerField.includes('approach') || lowerField.includes('strategy')) {
|
|
const approaches = ['Direct outreach', 'Email campaign', 'Social engagement', 'Referral network', 'Content marketing'];
|
|
return approaches[Math.floor(random() * approaches.length)];
|
|
}
|
|
if (lowerField.includes('insight') || lowerField.includes('finding')) {
|
|
const insights = ['High growth potential', 'Active buyer signals', 'Recent funding round', 'Expanding market', 'Technology adoption'];
|
|
return insights[Math.floor(random() * insights.length)];
|
|
}
|
|
if (lowerField.includes('style') || lowerField.includes('type')) {
|
|
const styles = ['Professional', 'Casual', 'Educational', 'Entertaining', 'Promotional'];
|
|
return styles[Math.floor(random() * styles.length)];
|
|
}
|
|
|
|
return `value_${Math.floor(random() * 1000)}`;
|
|
}
|
|
|
|
function parseInterval(interval) {
|
|
const match = interval.match(/(\d+)([mhd])/);
|
|
if (!match) return 3600000;
|
|
|
|
const value = parseInt(match[1]);
|
|
const unit = match[2];
|
|
|
|
switch (unit) {
|
|
case 'm': return value * 60 * 1000;
|
|
case 'h': return value * 60 * 60 * 1000;
|
|
case 'd': return value * 24 * 60 * 60 * 1000;
|
|
default: return 3600000;
|
|
}
|
|
}
|
|
|
|
function generateId(random) {
|
|
return Math.random().toString(36).substring(2, 15);
|
|
}
|
|
|
|
function generateSlug(random) {
|
|
const words = ['best', 'top', 'new', 'amazing', 'premium', 'ultra', 'pro', 'max', 'elite', 'smart'];
|
|
const nouns = ['product', 'item', 'deal', 'offer', 'guide', 'review', 'article', 'post'];
|
|
return `${words[Math.floor(random() * words.length)]}-${nouns[Math.floor(random() * nouns.length)]}-${Math.floor(random() * 10000)}`;
|
|
}
|
|
|
|
function generateName(random) {
|
|
const firstNames = ['John', 'Jane', 'Alex', 'Sarah', 'Mike', 'Emma', 'Chris', 'Lisa', 'David', 'Amy'];
|
|
const lastNames = ['Smith', 'Johnson', 'Williams', 'Brown', 'Jones', 'Garcia', 'Miller', 'Davis', 'Wilson'];
|
|
return `${firstNames[Math.floor(random() * firstNames.length)]} ${lastNames[Math.floor(random() * lastNames.length)]}`;
|
|
}
|
|
|
|
function generateProductName(category, random) {
|
|
const adjectives = ['Premium', 'Ultra', 'Pro', 'Classic', 'Smart', 'Portable', 'Wireless', 'Advanced'];
|
|
const products = {
|
|
'Electronics': ['Headphones', 'Speaker', 'Charger', 'Cable', 'Adapter', 'Mouse', 'Keyboard'],
|
|
'Clothing': ['T-Shirt', 'Jacket', 'Jeans', 'Sneakers', 'Hat', 'Sweater', 'Dress'],
|
|
'Home & Garden': ['Lamp', 'Planter', 'Organizer', 'Tool Set', 'Decoration', 'Rug'],
|
|
'Sports': ['Ball', 'Gloves', 'Bag', 'Mat', 'Weights', 'Bottle', 'Band'],
|
|
'Books': ['Guide', 'Novel', 'Textbook', 'Cookbook', 'Biography', 'Manual'],
|
|
'Toys': ['Figure', 'Game', 'Puzzle', 'Set', 'Doll', 'Car'],
|
|
'Beauty': ['Cream', 'Serum', 'Mask', 'Oil', 'Brush', 'Palette'],
|
|
'Automotive': ['Cover', 'Mat', 'Charger', 'Holder', 'Cleaner', 'Light']
|
|
};
|
|
const items = products[category] || products['Electronics'];
|
|
return `${adjectives[Math.floor(random() * adjectives.length)]} ${items[Math.floor(random() * items.length)]}`;
|
|
}
|
|
|
|
function generateSpecs(category, random) {
|
|
const specs = {
|
|
'Electronics': { battery: `${Math.floor(1000 + random() * 4000)}mAh`, connectivity: 'Bluetooth 5.0', warranty: '1 year' },
|
|
'Clothing': { material: random() > 0.5 ? 'Cotton' : 'Polyester', size: ['S', 'M', 'L', 'XL'][Math.floor(random() * 4)] },
|
|
'Home & Garden': { dimensions: `${Math.floor(10 + random() * 50)}x${Math.floor(10 + random() * 50)}cm`, weight: `${Math.floor(random() * 10)}kg` }
|
|
};
|
|
return specs[category] || { general: 'Standard specifications' };
|
|
}
|
|
|
|
function generateSocialText(random) {
|
|
const texts = [
|
|
'Just discovered this amazing product! Highly recommend',
|
|
'Working on something exciting today',
|
|
'Can\'t believe how good this turned out',
|
|
'Who else is enjoying this beautiful day?',
|
|
'Sharing my latest project with you all',
|
|
'This is a game changer for productivity',
|
|
'Thoughts on the latest industry trends?'
|
|
];
|
|
return texts[Math.floor(random() * texts.length)];
|
|
}
|
|
|
|
function generateHashtag(random) {
|
|
const tags = ['tech', 'innovation', 'business', 'startup', 'coding', 'design', 'marketing', 'growth', 'success', 'tips'];
|
|
return tags[Math.floor(random() * tags.length)];
|
|
}
|
|
|
|
function generateRandomObject(random) {
|
|
return {
|
|
name: generateName(random),
|
|
value: Math.floor(random() * 1000),
|
|
active: random() > 0.3,
|
|
tags: ['tag1', 'tag2', 'tag3'].slice(0, Math.floor(1 + random() * 3))
|
|
};
|
|
}
|
|
|
|
function getErrorMessage(code) {
|
|
const messages = {
|
|
400: 'Bad Request - Invalid parameters',
|
|
401: 'Unauthorized - Invalid API key',
|
|
403: 'Forbidden - Access denied',
|
|
404: 'Not Found - Resource does not exist',
|
|
500: 'Internal Server Error'
|
|
};
|
|
return messages[code] || 'Unknown error';
|
|
}
|
|
|
|
function generateSearchTitle(random) {
|
|
const templates = [
|
|
'How to Get Started with {topic}',
|
|
'The Complete Guide to {topic}',
|
|
'Top 10 {topic} Tips for Beginners',
|
|
'Best {topic} Practices in 2024',
|
|
'{topic}: Everything You Need to Know'
|
|
];
|
|
const topics = ['Web Scraping', 'Data Analysis', 'API Integration', 'Automation', 'Machine Learning'];
|
|
const template = templates[Math.floor(random() * templates.length)];
|
|
const topic = topics[Math.floor(random() * topics.length)];
|
|
return template.replace('{topic}', topic);
|
|
}
|
|
|
|
function generateSnippet(random) {
|
|
const snippets = [
|
|
'Learn how to effectively implement solutions with our comprehensive guide. Discover best practices and expert tips.',
|
|
'This detailed tutorial walks you through step-by-step instructions for achieving optimal results.',
|
|
'Get started quickly with our beginner-friendly approach. No prior experience required.',
|
|
'Explore advanced techniques used by industry professionals to maximize efficiency.',
|
|
'Find out why thousands of users trust our methods for reliable, consistent outcomes.'
|
|
];
|
|
return snippets[Math.floor(random() * snippets.length)];
|
|
}
|
|
|
|
function generateBreadcrumb(random) {
|
|
const paths = ['guides', 'tutorials', 'blog', 'docs', 'resources'];
|
|
return paths[Math.floor(random() * paths.length)];
|
|
}
|
|
|
|
function generateStreetName(random) {
|
|
const types = ['St', 'Ave', 'Blvd', 'Dr', 'Ln', 'Way', 'Ct'];
|
|
const names = ['Oak', 'Main', 'Park', 'Cedar', 'Elm', 'Washington', 'Lake', 'Hill'];
|
|
return `${names[Math.floor(random() * names.length)]} ${types[Math.floor(random() * types.length)]}`;
|
|
}
|
|
|
|
function getState(city) {
|
|
const states = {
|
|
'New York': 'NY', 'Los Angeles': 'CA', 'Chicago': 'IL', 'Houston': 'TX',
|
|
'Phoenix': 'AZ', 'San Diego': 'CA', 'Dallas': 'TX', 'Austin': 'TX'
|
|
};
|
|
return states[city] || 'CA';
|
|
}
|
|
|
|
function generateRealEstateFeatures(random) {
|
|
const allFeatures = ['Pool', 'Garage', 'Garden', 'Fireplace', 'Central AC', 'Hardwood Floors', 'Updated Kitchen', 'Smart Home', 'Solar Panels', 'Home Office'];
|
|
const count = Math.floor(2 + random() * 5);
|
|
return allFeatures.sort(() => random() - 0.5).slice(0, count);
|
|
}
|
|
|
|
function generatePhone(random) {
|
|
return `(${Math.floor(200 + random() * 800)}) ${Math.floor(100 + random() * 900)}-${Math.floor(1000 + random() * 9000)}`;
|
|
}
|
|
|
|
function generateJobDescription(random) {
|
|
return 'We are looking for a talented professional to join our growing team. You will work on challenging projects and collaborate with cross-functional teams to deliver exceptional results.';
|
|
}
|
|
|
|
function generateRequirement(random) {
|
|
const reqs = [
|
|
'3+ years of relevant experience',
|
|
'Strong communication skills',
|
|
'Bachelor\'s degree or equivalent',
|
|
'Experience with modern tools',
|
|
'Ability to work independently',
|
|
'Team collaboration experience',
|
|
'Problem-solving mindset'
|
|
];
|
|
return reqs[Math.floor(random() * reqs.length)];
|
|
}
|
|
|
|
function generateBenefits(random) {
|
|
const allBenefits = ['Health Insurance', '401k Match', 'Remote Work', 'Unlimited PTO', 'Stock Options', 'Learning Budget', 'Gym Membership', 'Free Lunch'];
|
|
return allBenefits.sort(() => random() - 0.5).slice(0, Math.floor(3 + random() * 4));
|
|
}
|
|
|
|
function generateNewsTitle(category, random) {
|
|
const templates = {
|
|
'Technology': ['New AI Breakthrough Transforms {x}', 'Tech Giants Announce {x} Initiative', 'The Future of {x} is Here'],
|
|
'Business': ['Market Sees Record {x}', 'Company Reports {x} Growth', 'Industry Leaders Discuss {x}'],
|
|
'Politics': ['Government Announces {x} Policy', 'Leaders Meet to Discuss {x}', 'New {x} Legislation Proposed'],
|
|
'Science': ['Scientists Discover {x}', 'New Research Reveals {x}', 'Breakthrough in {x} Studies'],
|
|
'Health': ['Health Experts Recommend {x}', 'New Study Links {x} to Wellness', 'Medical Advances in {x}'],
|
|
'Sports': ['Team Wins {x} Championship', 'Athletes Break {x} Record', 'Sports World Reacts to {x}'],
|
|
'Entertainment': ['Celebrity Announces {x}', 'New {x} Series Premieres', 'Entertainment Industry Embraces {x}']
|
|
};
|
|
const words = ['Major', 'Surprising', 'Historic', 'Unprecedented', 'Exciting'];
|
|
const catTemplates = templates[category] || templates['Technology'];
|
|
const template = catTemplates[Math.floor(random() * catTemplates.length)];
|
|
const word = words[Math.floor(random() * words.length)];
|
|
return template.replace('{x}', word);
|
|
}
|
|
|
|
function generateSubtitle(random) {
|
|
return 'Industry experts weigh in on the implications and what it means for the future.';
|
|
}
|
|
|
|
function generateArticleContent(random) {
|
|
return 'Lorem ipsum dolor sit amet, consectetur adipiscing elit. Sed do eiusmod tempor incididunt ut labore et dolore magna aliqua. Ut enim ad minim veniam, quis nostrud exercitation ullamco laboris.';
|
|
}
|
|
|
|
function generateCaption(random) {
|
|
return 'Image: Illustration of the main topic covered in this article.';
|
|
}
|
|
|
|
function generateTag(random) {
|
|
const tags = ['trending', 'breaking', 'exclusive', 'analysis', 'opinion', 'featured', 'popular'];
|
|
return tags[Math.floor(random() * tags.length)];
|
|
}
|
|
|
|
// ============================================
|
|
// ENTERPRISE/COMPANY SIMULATORS
|
|
// ============================================
|
|
|
|
async function generateStockTradingData(count, seed) {
|
|
log.info('Generating stock trading data (Bloomberg-style)...');
|
|
const random = createSeededRandom(seed);
|
|
const results = [];
|
|
|
|
const symbols = ['AAPL', 'GOOGL', 'MSFT', 'AMZN', 'META', 'NVDA', 'TSLA', 'JPM', 'V', 'WMT', 'UNH', 'JNJ', 'PG', 'HD', 'BAC'];
|
|
const exchanges = ['NYSE', 'NASDAQ', 'LSE', 'TSE', 'HKEX'];
|
|
const orderTypes = ['market', 'limit', 'stop', 'stop_limit', 'trailing_stop'];
|
|
const sides = ['buy', 'sell'];
|
|
|
|
for (let i = 0; i < count; i++) {
|
|
const symbol = symbols[Math.floor(random() * symbols.length)];
|
|
const basePrice = 50 + random() * 500;
|
|
const timestamp = new Date(Date.now() - random() * 24 * 60 * 60 * 1000);
|
|
const volume = Math.floor(100 + random() * 100000);
|
|
|
|
results.push({
|
|
tradeId: `TRD${Date.now()}${i}`,
|
|
symbol,
|
|
exchange: exchanges[Math.floor(random() * exchanges.length)],
|
|
timestamp: timestamp.toISOString(),
|
|
ohlcv: {
|
|
open: Math.round(basePrice * (1 - random() * 0.02) * 100) / 100,
|
|
high: Math.round(basePrice * (1 + random() * 0.03) * 100) / 100,
|
|
low: Math.round(basePrice * (1 - random() * 0.03) * 100) / 100,
|
|
close: Math.round(basePrice * 100) / 100,
|
|
volume,
|
|
vwap: Math.round(basePrice * (1 + (random() - 0.5) * 0.01) * 100) / 100
|
|
},
|
|
quote: {
|
|
bid: Math.round(basePrice * 0.999 * 100) / 100,
|
|
ask: Math.round(basePrice * 1.001 * 100) / 100,
|
|
bidSize: Math.floor(100 + random() * 10000),
|
|
askSize: Math.floor(100 + random() * 10000),
|
|
spread: Math.round(basePrice * 0.002 * 100) / 100
|
|
},
|
|
order: {
|
|
type: orderTypes[Math.floor(random() * orderTypes.length)],
|
|
side: sides[Math.floor(random() * sides.length)],
|
|
quantity: Math.floor(10 + random() * 1000),
|
|
filledQuantity: Math.floor(10 + random() * 1000),
|
|
status: random() > 0.1 ? 'filled' : random() > 0.5 ? 'partial' : 'pending'
|
|
},
|
|
marketData: {
|
|
marketCap: Math.floor(random() * 3000) + 'B',
|
|
peRatio: Math.round((10 + random() * 40) * 10) / 10,
|
|
dividendYield: Math.round(random() * 5 * 100) / 100,
|
|
beta: Math.round((0.5 + random() * 1.5) * 100) / 100,
|
|
fiftyTwoWeekHigh: Math.round(basePrice * 1.3 * 100) / 100,
|
|
fiftyTwoWeekLow: Math.round(basePrice * 0.7 * 100) / 100
|
|
},
|
|
analytics: {
|
|
rsi: Math.round((20 + random() * 60) * 10) / 10,
|
|
macd: Math.round((random() - 0.5) * 10 * 100) / 100,
|
|
movingAvg50: Math.round(basePrice * (1 + (random() - 0.5) * 0.1) * 100) / 100,
|
|
movingAvg200: Math.round(basePrice * (1 + (random() - 0.5) * 0.15) * 100) / 100
|
|
},
|
|
scrapedAt: new Date().toISOString()
|
|
});
|
|
}
|
|
|
|
return results;
|
|
}
|
|
|
|
async function generateMedicalData(count, seed) {
|
|
log.info('Generating medical/healthcare data...');
|
|
const random = createSeededRandom(seed);
|
|
const results = [];
|
|
|
|
const departments = ['Cardiology', 'Neurology', 'Orthopedics', 'Oncology', 'Pediatrics', 'Emergency', 'Radiology', 'Surgery'];
|
|
const diagnoses = ['Hypertension', 'Type 2 Diabetes', 'Chronic Pain', 'Respiratory Infection', 'Anxiety Disorder', 'Cardiac Arrhythmia', 'Migraine', 'Osteoarthritis'];
|
|
const procedures = ['Blood Test', 'MRI Scan', 'X-Ray', 'CT Scan', 'Ultrasound', 'ECG', 'Endoscopy', 'Biopsy'];
|
|
const insurers = ['Blue Cross', 'Aetna', 'UnitedHealth', 'Cigna', 'Humana', 'Kaiser', 'Medicare', 'Medicaid'];
|
|
const statuses = ['admitted', 'discharged', 'outpatient', 'emergency', 'scheduled'];
|
|
|
|
for (let i = 0; i < count; i++) {
|
|
const admitDate = new Date(Date.now() - random() * 365 * 24 * 60 * 60 * 1000);
|
|
const age = Math.floor(18 + random() * 70);
|
|
|
|
results.push({
|
|
recordId: `MED${Date.now()}${i}`,
|
|
patient: {
|
|
id: `PAT${Math.floor(random() * 1000000)}`,
|
|
age,
|
|
gender: random() > 0.5 ? 'M' : 'F',
|
|
bloodType: ['A+', 'A-', 'B+', 'B-', 'O+', 'O-', 'AB+', 'AB-'][Math.floor(random() * 8)],
|
|
allergies: random() > 0.7 ? ['Penicillin', 'Sulfa', 'Latex'][Math.floor(random() * 3)] : null
|
|
},
|
|
encounter: {
|
|
type: statuses[Math.floor(random() * statuses.length)],
|
|
department: departments[Math.floor(random() * departments.length)],
|
|
admitDate: admitDate.toISOString(),
|
|
dischargeDate: random() > 0.3 ? new Date(admitDate.getTime() + random() * 7 * 24 * 60 * 60 * 1000).toISOString() : null,
|
|
lengthOfStay: Math.floor(1 + random() * 14)
|
|
},
|
|
diagnosis: {
|
|
primary: diagnoses[Math.floor(random() * diagnoses.length)],
|
|
secondary: random() > 0.5 ? diagnoses[Math.floor(random() * diagnoses.length)] : null,
|
|
icdCode: `I${Math.floor(10 + random() * 90)}.${Math.floor(random() * 10)}`,
|
|
severity: ['mild', 'moderate', 'severe', 'critical'][Math.floor(random() * 4)]
|
|
},
|
|
procedures: Array.from({ length: Math.floor(1 + random() * 3) }, () => ({
|
|
name: procedures[Math.floor(random() * procedures.length)],
|
|
cptCode: `${Math.floor(10000 + random() * 90000)}`,
|
|
date: new Date(admitDate.getTime() + random() * 3 * 24 * 60 * 60 * 1000).toISOString(),
|
|
result: random() > 0.1 ? 'normal' : 'abnormal'
|
|
})),
|
|
vitals: {
|
|
bloodPressure: `${Math.floor(100 + random() * 60)}/${Math.floor(60 + random() * 40)}`,
|
|
heartRate: Math.floor(60 + random() * 40),
|
|
temperature: Math.round((97 + random() * 4) * 10) / 10,
|
|
oxygenSaturation: Math.floor(94 + random() * 6),
|
|
weight: Math.floor(120 + random() * 150),
|
|
height: Math.floor(60 + random() * 20)
|
|
},
|
|
billing: {
|
|
insurer: insurers[Math.floor(random() * insurers.length)],
|
|
policyNumber: `POL${Math.floor(random() * 10000000)}`,
|
|
totalCharges: Math.floor(1000 + random() * 50000),
|
|
covered: Math.floor(800 + random() * 40000),
|
|
patientResponsibility: Math.floor(100 + random() * 5000),
|
|
claimStatus: random() > 0.2 ? 'approved' : random() > 0.5 ? 'pending' : 'denied'
|
|
},
|
|
provider: {
|
|
physician: generateName(random),
|
|
npi: `${Math.floor(1000000000 + random() * 9000000000)}`,
|
|
facility: `${['Metro', 'Central', 'Regional', 'University'][Math.floor(random() * 4)]} Medical Center`
|
|
},
|
|
scrapedAt: new Date().toISOString()
|
|
});
|
|
}
|
|
|
|
return results;
|
|
}
|
|
|
|
/**
|
|
* Generate Crunchbase-style company data using Gemini Grounding API
|
|
* Uses Google Search grounding for real, up-to-date company information
|
|
*/
|
|
async function generateCrunchbaseData(count, apiKey, companyNames = [], industry = null) {
|
|
log.info('Generating Crunchbase-style data with Gemini Grounding...', { count, industry });
|
|
const results = [];
|
|
|
|
if (!apiKey) {
|
|
log.warning('No Gemini API key - falling back to synthetic company data');
|
|
return generateCompanyData(count, 'crunchbase-fallback');
|
|
}
|
|
|
|
const { GoogleGenerativeAI } = await import('@google/generative-ai');
|
|
const genAI = new GoogleGenerativeAI(apiKey);
|
|
|
|
// Use Gemini 2.0 Flash with Google Search grounding
|
|
const model = genAI.getGenerativeModel({
|
|
model: 'gemini-2.0-flash-exp',
|
|
tools: [{ google_search: {} }]
|
|
});
|
|
|
|
// Generate company names if not provided
|
|
const targetCompanies = companyNames.length > 0 ? companyNames : await generateCompanyList(model, count, industry);
|
|
|
|
for (let i = 0; i < Math.min(count, targetCompanies.length); i++) {
|
|
const companyName = targetCompanies[i];
|
|
|
|
try {
|
|
const prompt = `Research "${companyName}" company and provide current information in this exact JSON format:
|
|
{
|
|
"name": "Official company name",
|
|
"description": "Brief company description (1-2 sentences)",
|
|
"founded": 2010,
|
|
"founders": ["Founder Name 1", "Founder Name 2"],
|
|
"headquarters": {"city": "City", "state": "State", "country": "Country"},
|
|
"industry": "Primary industry",
|
|
"subIndustry": "Sub-industry or sector",
|
|
"employeeCount": "Range like 1001-5000 or exact number",
|
|
"fundingTotal": "$X million/billion or 'Private/Not disclosed'",
|
|
"lastFundingRound": {"type": "Series X or IPO", "amount": "$X", "date": "YYYY-MM"},
|
|
"valuation": "$X billion or 'Private'",
|
|
"revenue": "$X million/billion or 'Not disclosed'",
|
|
"website": "https://company.com",
|
|
"linkedIn": "linkedin.com/company/name",
|
|
"ceo": "CEO Name",
|
|
"publicStatus": "Public (NASDAQ:TICK)" or "Private",
|
|
"competitors": ["Competitor 1", "Competitor 2"],
|
|
"keyProducts": ["Product 1", "Product 2"],
|
|
"recentNews": "Brief recent news (1 sentence)"
|
|
}
|
|
Only return valid JSON, no markdown or explanation.`;
|
|
|
|
const result = await model.generateContent(prompt);
|
|
const text = result.response.text();
|
|
|
|
// Extract JSON from response
|
|
const jsonMatch = text.match(/\{[\s\S]*\}/);
|
|
if (jsonMatch) {
|
|
const companyData = JSON.parse(jsonMatch[0]);
|
|
results.push({
|
|
id: `crunchbase_${Date.now()}_${i}`,
|
|
type: 'crunchbase',
|
|
data: {
|
|
...companyData,
|
|
dataSource: 'gemini-grounding',
|
|
groundingUsed: true,
|
|
lastUpdated: new Date().toISOString()
|
|
},
|
|
metadata: {
|
|
query: companyName,
|
|
generatedAt: new Date().toISOString(),
|
|
provider: 'gemini',
|
|
model: 'gemini-2.0-flash-exp',
|
|
grounded: true
|
|
}
|
|
});
|
|
log.info(`Grounded data for: ${companyName}`);
|
|
}
|
|
} catch (e) {
|
|
log.warning(`Failed to get grounded data for ${companyName}: ${e.message}`);
|
|
// Add fallback synthetic data
|
|
results.push({
|
|
id: `crunchbase_${Date.now()}_${i}`,
|
|
type: 'crunchbase',
|
|
data: {
|
|
name: companyName,
|
|
description: 'Company information not available',
|
|
dataSource: 'fallback',
|
|
groundingUsed: false,
|
|
error: e.message
|
|
},
|
|
metadata: {
|
|
query: companyName,
|
|
generatedAt: new Date().toISOString(),
|
|
grounded: false
|
|
}
|
|
});
|
|
}
|
|
|
|
// Rate limiting - 15 RPM for Gemini free tier
|
|
if (i < count - 1) {
|
|
await new Promise(r => setTimeout(r, 4100));
|
|
}
|
|
}
|
|
|
|
return results;
|
|
}
|
|
|
|
/**
|
|
* Generate a list of companies to research using Gemini Grounding
|
|
*/
|
|
async function generateCompanyList(model, count, industry = null) {
|
|
const industryFilter = industry ? ` in the ${industry} industry` : '';
|
|
const prompt = `List ${Math.min(count, 20)} notable startup and tech companies${industryFilter} that are frequently covered on Crunchbase.
|
|
Include a mix of:
|
|
- Unicorns (valued over $1B)
|
|
- Recently funded startups
|
|
- Established tech companies
|
|
Return only company names, one per line, no numbering or bullets.`;
|
|
|
|
try {
|
|
const result = await model.generateContent(prompt);
|
|
const text = result.response.text();
|
|
return text.split('\n').filter(line => line.trim().length > 0).slice(0, count);
|
|
} catch (e) {
|
|
log.warning(`Failed to generate company list: ${e.message}`);
|
|
// Fallback to well-known companies
|
|
return ['OpenAI', 'Anthropic', 'Stripe', 'SpaceX', 'Databricks', 'Figma', 'Notion', 'Discord', 'Canva', 'Airtable'].slice(0, count);
|
|
}
|
|
}
|
|
|
|
async function generateCompanyData(count, seed) {
|
|
log.info('Generating company/corporate data...');
|
|
const random = createSeededRandom(seed);
|
|
const results = [];
|
|
|
|
const industries = ['Technology', 'Healthcare', 'Finance', 'Manufacturing', 'Retail', 'Energy', 'Telecommunications', 'Transportation'];
|
|
const companyTypes = ['Corporation', 'LLC', 'Partnership', 'Sole Proprietorship', 'S-Corp', 'Non-Profit'];
|
|
const departments = ['Engineering', 'Sales', 'Marketing', 'Finance', 'HR', 'Operations', 'Legal', 'R&D'];
|
|
|
|
for (let i = 0; i < count; i++) {
|
|
const founded = Math.floor(1950 + random() * 74);
|
|
const employees = Math.floor(10 + random() * 100000);
|
|
const revenue = Math.floor(100000 + random() * 50000000000);
|
|
|
|
results.push({
|
|
companyId: `COM${Date.now()}${i}`,
|
|
profile: {
|
|
name: `${generateName(random).split(' ')[1]} ${['Industries', 'Corp', 'Inc', 'Holdings', 'Group', 'Technologies', 'Solutions'][Math.floor(random() * 7)]}`,
|
|
ticker: random() > 0.5 ? `${String.fromCharCode(65 + Math.floor(random() * 26))}${String.fromCharCode(65 + Math.floor(random() * 26))}${String.fromCharCode(65 + Math.floor(random() * 26))}${String.fromCharCode(65 + Math.floor(random() * 26))}` : null,
|
|
type: companyTypes[Math.floor(random() * companyTypes.length)],
|
|
industry: industries[Math.floor(random() * industries.length)],
|
|
founded,
|
|
website: `https://example-company-${i}.com`,
|
|
description: 'Leading provider of innovative solutions for modern enterprises.'
|
|
},
|
|
headquarters: {
|
|
address: `${Math.floor(100 + random() * 9900)} Corporate Blvd`,
|
|
city: ['New York', 'San Francisco', 'Chicago', 'Boston', 'Austin', 'Seattle'][Math.floor(random() * 6)],
|
|
state: ['NY', 'CA', 'IL', 'MA', 'TX', 'WA'][Math.floor(random() * 6)],
|
|
country: 'USA',
|
|
timezone: 'America/New_York'
|
|
},
|
|
financials: {
|
|
revenue,
|
|
revenueGrowth: Math.round((random() * 40 - 10) * 10) / 10,
|
|
netIncome: Math.floor(revenue * (0.05 + random() * 0.15)),
|
|
grossMargin: Math.round((30 + random() * 40) * 10) / 10,
|
|
operatingMargin: Math.round((10 + random() * 25) * 10) / 10,
|
|
debtToEquity: Math.round(random() * 2 * 100) / 100,
|
|
currentRatio: Math.round((1 + random() * 2) * 100) / 100,
|
|
fiscalYearEnd: ['December', 'March', 'June', 'September'][Math.floor(random() * 4)]
|
|
},
|
|
workforce: {
|
|
totalEmployees: employees,
|
|
fullTime: Math.floor(employees * 0.85),
|
|
partTime: Math.floor(employees * 0.1),
|
|
contractors: Math.floor(employees * 0.05),
|
|
departments: departments.slice(0, Math.floor(3 + random() * 5)).map(dept => ({
|
|
name: dept,
|
|
headcount: Math.floor(employees * (0.05 + random() * 0.2)),
|
|
budget: Math.floor(revenue * (0.01 + random() * 0.1))
|
|
})),
|
|
avgTenure: Math.round((2 + random() * 8) * 10) / 10,
|
|
turnoverRate: Math.round((5 + random() * 20) * 10) / 10
|
|
},
|
|
leadership: Array.from({ length: Math.floor(3 + random() * 5) }, () => ({
|
|
name: generateName(random),
|
|
title: ['CEO', 'CFO', 'CTO', 'COO', 'CMO', 'CHRO', 'CLO', 'CIO'][Math.floor(random() * 8)],
|
|
since: Math.floor(2010 + random() * 14),
|
|
compensation: Math.floor(500000 + random() * 10000000)
|
|
})),
|
|
metrics: {
|
|
customerCount: Math.floor(100 + random() * 1000000),
|
|
nps: Math.floor(-20 + random() * 100),
|
|
marketShare: Math.round(random() * 30 * 10) / 10,
|
|
brandValue: Math.floor(random() * 50) + 'B'
|
|
},
|
|
scrapedAt: new Date().toISOString()
|
|
});
|
|
}
|
|
|
|
return results;
|
|
}
|
|
|
|
async function generateSupplyChainData(count, seed) {
|
|
log.info('Generating supply chain data...');
|
|
const random = createSeededRandom(seed);
|
|
const results = [];
|
|
|
|
const productCategories = ['Electronics', 'Raw Materials', 'Components', 'Finished Goods', 'Packaging', 'Chemicals', 'Textiles', 'Machinery'];
|
|
const statuses = ['in_transit', 'delivered', 'pending', 'delayed', 'customs_hold', 'processing', 'shipped', 'cancelled'];
|
|
const transportModes = ['air', 'sea', 'rail', 'truck', 'multimodal'];
|
|
const warehouses = ['WH-NYC-01', 'WH-LAX-02', 'WH-CHI-03', 'WH-HOU-04', 'WH-SEA-05', 'WH-MIA-06'];
|
|
const countries = ['USA', 'China', 'Germany', 'Japan', 'Mexico', 'Vietnam', 'India', 'South Korea'];
|
|
|
|
for (let i = 0; i < count; i++) {
|
|
const orderDate = new Date(Date.now() - random() * 90 * 24 * 60 * 60 * 1000);
|
|
const quantity = Math.floor(10 + random() * 10000);
|
|
const unitPrice = Math.round((1 + random() * 500) * 100) / 100;
|
|
|
|
results.push({
|
|
shipmentId: `SHP${Date.now()}${i}`,
|
|
order: {
|
|
orderId: `ORD${Math.floor(random() * 10000000)}`,
|
|
orderDate: orderDate.toISOString(),
|
|
priority: ['standard', 'express', 'critical'][Math.floor(random() * 3)],
|
|
status: statuses[Math.floor(random() * statuses.length)]
|
|
},
|
|
product: {
|
|
sku: `SKU-${Math.floor(100000 + random() * 900000)}`,
|
|
name: `${productCategories[Math.floor(random() * productCategories.length)]} Item ${Math.floor(random() * 1000)}`,
|
|
category: productCategories[Math.floor(random() * productCategories.length)],
|
|
quantity,
|
|
unitPrice,
|
|
totalValue: Math.round(quantity * unitPrice * 100) / 100,
|
|
weight: Math.round((0.1 + random() * 100) * 10) / 10,
|
|
dimensions: {
|
|
length: Math.floor(10 + random() * 100),
|
|
width: Math.floor(10 + random() * 100),
|
|
height: Math.floor(10 + random() * 50)
|
|
}
|
|
},
|
|
supplier: {
|
|
id: `SUP${Math.floor(random() * 10000)}`,
|
|
name: `${generateName(random).split(' ')[1]} Supply Co`,
|
|
country: countries[Math.floor(random() * countries.length)],
|
|
leadTime: Math.floor(7 + random() * 60),
|
|
rating: Math.round((3 + random() * 2) * 10) / 10,
|
|
onTimeDelivery: Math.round((70 + random() * 30) * 10) / 10
|
|
},
|
|
logistics: {
|
|
carrier: ['FedEx', 'UPS', 'DHL', 'Maersk', 'Expeditors', 'DB Schenker'][Math.floor(random() * 6)],
|
|
mode: transportModes[Math.floor(random() * transportModes.length)],
|
|
trackingNumber: `TRK${Math.floor(random() * 1000000000000)}`,
|
|
origin: {
|
|
facility: warehouses[Math.floor(random() * warehouses.length)],
|
|
country: countries[Math.floor(random() * countries.length)],
|
|
departureDate: orderDate.toISOString()
|
|
},
|
|
destination: {
|
|
facility: warehouses[Math.floor(random() * warehouses.length)],
|
|
country: countries[Math.floor(random() * countries.length)],
|
|
eta: new Date(orderDate.getTime() + (7 + random() * 30) * 24 * 60 * 60 * 1000).toISOString()
|
|
},
|
|
currentLocation: {
|
|
lat: 25 + random() * 25,
|
|
lng: -120 + random() * 60,
|
|
lastUpdate: new Date(orderDate.getTime() + random() * 7 * 24 * 60 * 60 * 1000).toISOString()
|
|
}
|
|
},
|
|
inventory: {
|
|
warehouse: warehouses[Math.floor(random() * warehouses.length)],
|
|
stockLevel: Math.floor(random() * 5000),
|
|
reorderPoint: Math.floor(100 + random() * 500),
|
|
safetyStock: Math.floor(50 + random() * 200),
|
|
daysOfSupply: Math.floor(10 + random() * 90)
|
|
},
|
|
costs: {
|
|
productCost: Math.round(quantity * unitPrice * 100) / 100,
|
|
shippingCost: Math.round(quantity * unitPrice * (0.05 + random() * 0.15) * 100) / 100,
|
|
tariffs: Math.round(quantity * unitPrice * random() * 0.1 * 100) / 100,
|
|
insurance: Math.round(quantity * unitPrice * 0.02 * 100) / 100,
|
|
totalLandedCost: Math.round(quantity * unitPrice * (1.1 + random() * 0.2) * 100) / 100
|
|
},
|
|
compliance: {
|
|
hsCode: `${Math.floor(1000 + random() * 9000)}.${Math.floor(10 + random() * 90)}`,
|
|
countryOfOrigin: countries[Math.floor(random() * countries.length)],
|
|
certificates: random() > 0.5 ? ['ISO 9001', 'CE', 'RoHS'][Math.floor(random() * 3)] : null,
|
|
customsCleared: random() > 0.3
|
|
},
|
|
scrapedAt: new Date().toISOString()
|
|
});
|
|
}
|
|
|
|
return results;
|
|
}
|
|
|
|
async function generateFinancialData(count, seed) {
|
|
log.info('Generating financial services data...');
|
|
const random = createSeededRandom(seed);
|
|
const results = [];
|
|
|
|
const accountTypes = ['checking', 'savings', 'investment', 'retirement', 'credit', 'loan', 'mortgage'];
|
|
const transactionTypes = ['debit', 'credit', 'transfer', 'payment', 'withdrawal', 'deposit', 'fee', 'interest'];
|
|
const categories = ['groceries', 'utilities', 'entertainment', 'dining', 'travel', 'shopping', 'healthcare', 'insurance', 'investment'];
|
|
const institutions = ['Chase', 'Bank of America', 'Wells Fargo', 'Citi', 'Capital One', 'Goldman Sachs', 'Morgan Stanley', 'Fidelity'];
|
|
|
|
for (let i = 0; i < count; i++) {
|
|
const transactionDate = new Date(Date.now() - random() * 365 * 24 * 60 * 60 * 1000);
|
|
const amount = Math.round((1 + random() * 10000) * 100) / 100;
|
|
|
|
results.push({
|
|
transactionId: `TXN${Date.now()}${i}`,
|
|
account: {
|
|
accountId: `ACC${Math.floor(random() * 100000000)}`,
|
|
type: accountTypes[Math.floor(random() * accountTypes.length)],
|
|
institution: institutions[Math.floor(random() * institutions.length)],
|
|
balance: Math.round((1000 + random() * 500000) * 100) / 100,
|
|
availableCredit: random() > 0.5 ? Math.round((5000 + random() * 50000) * 100) / 100 : null,
|
|
interestRate: Math.round((random() * 25) * 100) / 100
|
|
},
|
|
transaction: {
|
|
type: transactionTypes[Math.floor(random() * transactionTypes.length)],
|
|
amount,
|
|
currency: 'USD',
|
|
date: transactionDate.toISOString(),
|
|
description: `${categories[Math.floor(random() * categories.length)].toUpperCase()} - ${generateName(random).split(' ')[1]} Store`,
|
|
category: categories[Math.floor(random() * categories.length)],
|
|
status: random() > 0.05 ? 'completed' : random() > 0.5 ? 'pending' : 'failed',
|
|
merchant: {
|
|
name: `${generateName(random).split(' ')[1]} ${['Store', 'Shop', 'Market', 'Services'][Math.floor(random() * 4)]}`,
|
|
category: categories[Math.floor(random() * categories.length)],
|
|
mcc: `${Math.floor(1000 + random() * 9000)}`
|
|
}
|
|
},
|
|
card: random() > 0.3 ? {
|
|
last4: `${Math.floor(1000 + random() * 9000)}`,
|
|
brand: ['Visa', 'Mastercard', 'Amex', 'Discover'][Math.floor(random() * 4)],
|
|
expiryMonth: Math.floor(1 + random() * 12),
|
|
expiryYear: Math.floor(2025 + random() * 5)
|
|
} : null,
|
|
fraud: {
|
|
score: Math.round(random() * 100),
|
|
flagged: random() > 0.95,
|
|
rules: random() > 0.9 ? ['unusual_location', 'high_amount', 'velocity_check'][Math.floor(random() * 3)] : null
|
|
},
|
|
analytics: {
|
|
dayOfWeek: transactionDate.getDay(),
|
|
hourOfDay: transactionDate.getHours(),
|
|
isRecurring: random() > 0.7,
|
|
monthlyAverage: Math.round((100 + random() * 2000) * 100) / 100
|
|
},
|
|
scrapedAt: new Date().toISOString()
|
|
});
|
|
}
|
|
|
|
return results;
|
|
}
|
|
|
|
async function generateBloombergData(count, seed) {
|
|
log.info('Generating Bloomberg terminal-style data...');
|
|
const random = createSeededRandom(seed);
|
|
const results = [];
|
|
|
|
const assetClasses = ['equity', 'fixed_income', 'commodity', 'fx', 'derivative', 'crypto'];
|
|
const sectors = ['Technology', 'Healthcare', 'Financials', 'Consumer', 'Energy', 'Industrials', 'Materials', 'Utilities'];
|
|
const ratings = ['AAA', 'AA+', 'AA', 'AA-', 'A+', 'A', 'A-', 'BBB+', 'BBB', 'BBB-', 'BB+', 'BB', 'B', 'CCC'];
|
|
const newsCategories = ['earnings', 'merger', 'regulatory', 'analyst_upgrade', 'analyst_downgrade', 'dividend', 'lawsuit', 'executive'];
|
|
|
|
for (let i = 0; i < count; i++) {
|
|
const timestamp = new Date(Date.now() - random() * 24 * 60 * 60 * 1000);
|
|
const basePrice = 10 + random() * 500;
|
|
|
|
results.push({
|
|
terminalId: `BBG${Date.now()}${i}`,
|
|
security: {
|
|
ticker: `${String.fromCharCode(65 + Math.floor(random() * 26))}${String.fromCharCode(65 + Math.floor(random() * 26))}${String.fromCharCode(65 + Math.floor(random() * 26))}${String.fromCharCode(65 + Math.floor(random() * 26))}`,
|
|
name: `${generateName(random).split(' ')[1]} ${['Corp', 'Inc', 'Ltd', 'Holdings', 'Group'][Math.floor(random() * 5)]}`,
|
|
assetClass: assetClasses[Math.floor(random() * assetClasses.length)],
|
|
sector: sectors[Math.floor(random() * sectors.length)],
|
|
country: ['US', 'GB', 'JP', 'DE', 'CN', 'FR', 'CA', 'AU'][Math.floor(random() * 8)],
|
|
currency: ['USD', 'EUR', 'GBP', 'JPY', 'CNY'][Math.floor(random() * 5)],
|
|
isin: `US${Math.floor(1000000000 + random() * 9000000000)}`,
|
|
cusip: `${Math.floor(100000000 + random() * 900000000)}`
|
|
},
|
|
pricing: {
|
|
last: Math.round(basePrice * 100) / 100,
|
|
bid: Math.round(basePrice * 0.999 * 100) / 100,
|
|
ask: Math.round(basePrice * 1.001 * 100) / 100,
|
|
open: Math.round(basePrice * (1 - random() * 0.02) * 100) / 100,
|
|
high: Math.round(basePrice * (1 + random() * 0.03) * 100) / 100,
|
|
low: Math.round(basePrice * (1 - random() * 0.03) * 100) / 100,
|
|
close: Math.round(basePrice * (1 + (random() - 0.5) * 0.02) * 100) / 100,
|
|
change: Math.round((random() - 0.5) * 10 * 100) / 100,
|
|
changePercent: Math.round((random() - 0.5) * 5 * 100) / 100,
|
|
volume: Math.floor(random() * 50000000),
|
|
avgVolume: Math.floor(random() * 30000000)
|
|
},
|
|
fundamentals: {
|
|
marketCap: Math.floor(random() * 3000) + 'B',
|
|
enterpriseValue: Math.floor(random() * 3500) + 'B',
|
|
peRatio: Math.round((5 + random() * 50) * 10) / 10,
|
|
forwardPe: Math.round((5 + random() * 40) * 10) / 10,
|
|
pbRatio: Math.round((0.5 + random() * 10) * 10) / 10,
|
|
evEbitda: Math.round((5 + random() * 30) * 10) / 10,
|
|
debtToEquity: Math.round(random() * 3 * 100) / 100,
|
|
roe: Math.round((5 + random() * 30) * 10) / 10,
|
|
eps: Math.round((random() * 20) * 100) / 100,
|
|
dividend: Math.round(random() * 5 * 100) / 100,
|
|
payoutRatio: Math.round((20 + random() * 60) * 10) / 10
|
|
},
|
|
credit: {
|
|
rating: ratings[Math.floor(random() * ratings.length)],
|
|
outlook: ['positive', 'stable', 'negative'][Math.floor(random() * 3)],
|
|
agency: ['S&P', 'Moody\'s', 'Fitch'][Math.floor(random() * 3)],
|
|
spread: Math.round((50 + random() * 500)),
|
|
cds: Math.round((20 + random() * 300))
|
|
},
|
|
analytics: {
|
|
beta: Math.round((0.5 + random() * 1.5) * 100) / 100,
|
|
sharpeRatio: Math.round((random() * 3) * 100) / 100,
|
|
volatility: Math.round((10 + random() * 40) * 10) / 10,
|
|
correlation: Math.round((random() * 2 - 1) * 100) / 100,
|
|
var95: Math.round((random() * 10) * 100) / 100,
|
|
maxDrawdown: Math.round((5 + random() * 30) * 10) / 10
|
|
},
|
|
consensus: (() => {
|
|
// Generate consistent analyst ratings
|
|
const numAnalysts = Math.floor(5 + random() * 40);
|
|
const buyPct = random();
|
|
const sellPct = random() * (1 - buyPct);
|
|
const holdPct = 1 - buyPct - sellPct;
|
|
const buyRatings = Math.floor(numAnalysts * buyPct);
|
|
const sellRatings = Math.floor(numAnalysts * sellPct);
|
|
const holdRatings = numAnalysts - buyRatings - sellRatings;
|
|
|
|
// Derive recommendation from actual ratings
|
|
const buyScore = buyRatings / numAnalysts;
|
|
let recommendation;
|
|
if (buyScore > 0.7) recommendation = 'strong_buy';
|
|
else if (buyScore > 0.5) recommendation = 'buy';
|
|
else if (buyScore > 0.3) recommendation = 'hold';
|
|
else if (buyScore > 0.15) recommendation = 'sell';
|
|
else recommendation = 'strong_sell';
|
|
|
|
return {
|
|
recommendation,
|
|
targetPrice: Math.round(basePrice * (1 + (random() - 0.3) * 0.5) * 100) / 100,
|
|
numAnalysts,
|
|
buyRatings,
|
|
holdRatings,
|
|
sellRatings
|
|
};
|
|
})(),
|
|
news: {
|
|
headline: `${generateName(random).split(' ')[1]} Corp ${newsCategories[Math.floor(random() * newsCategories.length)].replace('_', ' ')} update`,
|
|
source: ['Reuters', 'Bloomberg', 'WSJ', 'FT', 'CNBC'][Math.floor(random() * 5)],
|
|
timestamp: timestamp.toISOString(),
|
|
sentiment: ['positive', 'neutral', 'negative'][Math.floor(random() * 3)],
|
|
relevance: Math.round(random() * 100)
|
|
},
|
|
events: {
|
|
nextEarnings: new Date(Date.now() + random() * 90 * 24 * 60 * 60 * 1000).toISOString().split('T')[0],
|
|
exDividendDate: random() > 0.5 ? new Date(Date.now() + random() * 30 * 24 * 60 * 60 * 1000).toISOString().split('T')[0] : null,
|
|
annualMeeting: new Date(Date.now() + random() * 180 * 24 * 60 * 60 * 1000).toISOString().split('T')[0]
|
|
},
|
|
scrapedAt: new Date().toISOString()
|
|
});
|
|
}
|
|
|
|
return results;
|
|
}
|
|
|
|
async function generateZoomInfoData(count, seed) {
|
|
log.info('Generating ZoomInfo-style B2B enrichment data...');
|
|
const random = createSeededRandom(seed);
|
|
const results = [];
|
|
|
|
const industries = ['Software', 'Healthcare', 'Financial Services', 'Manufacturing', 'Retail', 'Telecommunications', 'Professional Services', 'Real Estate'];
|
|
const departments = ['Engineering', 'Sales', 'Marketing', 'Finance', 'Operations', 'Product', 'HR', 'Customer Success', 'Legal', 'IT'];
|
|
const seniority = ['C-Level', 'VP', 'Director', 'Manager', 'Individual Contributor', 'Entry Level'];
|
|
const technologies = ['Salesforce', 'AWS', 'Microsoft Azure', 'Google Cloud', 'HubSpot', 'SAP', 'Oracle', 'Workday', 'Tableau', 'Snowflake', 'MongoDB', 'PostgreSQL'];
|
|
const fundingStages = ['Seed', 'Series A', 'Series B', 'Series C', 'Series D+', 'IPO', 'Acquired', 'Bootstrapped'];
|
|
const intentSignals = ['product_research', 'competitor_analysis', 'pricing_page_visit', 'demo_request', 'content_download', 'job_posting', 'technology_install', 'budget_approval'];
|
|
|
|
for (let i = 0; i < count; i++) {
|
|
const companyName = `${generateName(random).split(' ')[1]} ${['Corp', 'Inc', 'Solutions', 'Technologies', 'Systems', 'Group'][Math.floor(random() * 6)]}`;
|
|
const domain = companyName.toLowerCase().replace(/[^a-z]/g, '') + '.com';
|
|
const employees = Math.floor(10 + random() * 50000);
|
|
const revenueM = Math.floor(1 + random() * 5000);
|
|
|
|
const firstName = generateName(random).split(' ')[0];
|
|
const lastName = generateName(random).split(' ')[1];
|
|
const dept = departments[Math.floor(random() * departments.length)];
|
|
const level = seniority[Math.floor(random() * seniority.length)];
|
|
|
|
results.push({
|
|
recordId: `ZI${Date.now()}${i}`,
|
|
company: {
|
|
name: companyName,
|
|
domain: domain,
|
|
industry: industries[Math.floor(random() * industries.length)],
|
|
subIndustry: `${industries[Math.floor(random() * industries.length)]} - ${['Enterprise', 'Mid-Market', 'SMB'][Math.floor(random() * 3)]}`,
|
|
employees: employees,
|
|
employeeRange: employees < 50 ? '1-50' : employees < 200 ? '51-200' : employees < 1000 ? '201-1000' : employees < 5000 ? '1001-5000' : '5000+',
|
|
revenue: `$${revenueM}M`,
|
|
revenueRange: revenueM < 10 ? '$1M-$10M' : revenueM < 50 ? '$10M-$50M' : revenueM < 200 ? '$50M-$200M' : revenueM < 1000 ? '$200M-$1B' : '$1B+',
|
|
founded: Math.floor(1970 + random() * 50),
|
|
headquarters: {
|
|
street: `${Math.floor(100 + random() * 9900)} ${['Main', 'Market', 'Broadway', 'Park', 'Tech'][Math.floor(random() * 5)]} St`,
|
|
city: ['San Francisco', 'New York', 'Boston', 'Austin', 'Seattle', 'Chicago', 'Denver'][Math.floor(random() * 7)],
|
|
state: ['CA', 'NY', 'MA', 'TX', 'WA', 'IL', 'CO'][Math.floor(random() * 7)],
|
|
country: 'USA',
|
|
postalCode: String(Math.floor(10000 + random() * 90000))
|
|
},
|
|
phone: `+1-${Math.floor(200 + random() * 800)}-${Math.floor(100 + random() * 900)}-${Math.floor(1000 + random() * 9000)}`,
|
|
website: `https://${domain}`,
|
|
description: `Leading provider of ${industries[Math.floor(random() * industries.length)].toLowerCase()} solutions for enterprise customers`,
|
|
fundingStage: fundingStages[Math.floor(random() * fundingStages.length)],
|
|
totalFunding: `$${Math.floor(1 + random() * 500)}M`,
|
|
lastFundingDate: new Date(Date.now() - random() * 1095 * 24 * 60 * 60 * 1000).toISOString().split('T')[0],
|
|
investors: Array.from({length: Math.floor(1 + random() * 5)}, () =>
|
|
`${generateName(random).split(' ')[1]} ${['Ventures', 'Capital', 'Partners'][Math.floor(random() * 3)]}`
|
|
)
|
|
},
|
|
contact: {
|
|
firstName: firstName,
|
|
lastName: lastName,
|
|
fullName: `${firstName} ${lastName}`,
|
|
email: `${firstName.toLowerCase()}.${lastName.toLowerCase()}@${domain}`,
|
|
directPhone: `+1-${Math.floor(200 + random() * 800)}-${Math.floor(100 + random() * 900)}-${Math.floor(1000 + random() * 9000)}`,
|
|
mobilePhone: random() > 0.5 ? `+1-${Math.floor(200 + random() * 800)}-${Math.floor(100 + random() * 900)}-${Math.floor(1000 + random() * 9000)}` : null,
|
|
title: `${level === 'C-Level' ? ['CEO', 'CTO', 'CFO', 'COO', 'CMO'][Math.floor(random() * 5)] :
|
|
level === 'VP' ? `VP of ${dept}` :
|
|
level === 'Director' ? `Director of ${dept}` :
|
|
level === 'Manager' ? `${dept} Manager` :
|
|
`${dept} ${['Specialist', 'Analyst', 'Associate'][Math.floor(random() * 3)]}`}`,
|
|
department: dept,
|
|
seniority: level,
|
|
linkedIn: `https://linkedin.com/in/${firstName.toLowerCase()}-${lastName.toLowerCase()}-${Math.floor(random() * 99999)}`,
|
|
twitter: random() > 0.6 ? `@${firstName.toLowerCase()}${lastName.toLowerCase()}` : null,
|
|
yearsInRole: Math.floor(random() * 8),
|
|
yearsAtCompany: Math.floor(random() * 12),
|
|
previousCompanies: Array.from({length: Math.floor(1 + random() * 3)}, () =>
|
|
`${generateName(random).split(' ')[1]} ${['Corp', 'Inc', 'Technologies'][Math.floor(random() * 3)]}`
|
|
),
|
|
education: {
|
|
degree: ['Bachelor\'s', 'Master\'s', 'MBA', 'PhD'][Math.floor(random() * 4)],
|
|
field: ['Computer Science', 'Business', 'Engineering', 'Marketing', 'Finance'][Math.floor(random() * 5)],
|
|
school: ['Stanford', 'MIT', 'Harvard', 'Berkeley', 'Carnegie Mellon', 'Northwestern'][Math.floor(random() * 6)]
|
|
}
|
|
},
|
|
technographics: {
|
|
installedTechnologies: Array.from({length: Math.floor(3 + random() * 8)}, () =>
|
|
technologies[Math.floor(random() * technologies.length)]
|
|
).filter((v, i, a) => a.indexOf(v) === i),
|
|
technologySpend: `$${Math.floor(100 + random() * 10000)}K`,
|
|
cloudProvider: ['AWS', 'Azure', 'Google Cloud', 'Multi-Cloud'][Math.floor(random() * 4)],
|
|
crmSystem: ['Salesforce', 'HubSpot', 'Microsoft Dynamics', 'Zoho'][Math.floor(random() * 4)],
|
|
marketingAutomation: ['HubSpot', 'Marketo', 'Pardot', 'Eloqua'][Math.floor(random() * 4)],
|
|
analyticsTools: ['Google Analytics', 'Adobe Analytics', 'Mixpanel', 'Amplitude'][Math.floor(random() * 4)]
|
|
},
|
|
intentSignals: {
|
|
recentActivity: Array.from({length: Math.floor(1 + random() * 5)}, () => ({
|
|
signal: intentSignals[Math.floor(random() * intentSignals.length)],
|
|
timestamp: new Date(Date.now() - random() * 30 * 24 * 60 * 60 * 1000).toISOString(),
|
|
score: Math.floor(1 + random() * 100),
|
|
source: ['website', 'content', 'events', 'social', 'search'][Math.floor(random() * 5)]
|
|
})),
|
|
buyingStage: ['awareness', 'consideration', 'decision', 'purchase'][Math.floor(random() * 4)],
|
|
engagementScore: Math.floor(1 + random() * 100),
|
|
lastEngagement: new Date(Date.now() - random() * 60 * 24 * 60 * 60 * 1000).toISOString()
|
|
},
|
|
organizationChart: {
|
|
reportsTo: random() > 0.3 ? `${generateName(random)}` : null,
|
|
directReports: Math.floor(random() * 15),
|
|
totalTeamSize: Math.floor(random() * 50),
|
|
peers: Array.from({length: Math.floor(2 + random() * 5)}, () => generateName(random))
|
|
},
|
|
dataQuality: {
|
|
emailVerified: random() > 0.2,
|
|
phoneVerified: random() > 0.3,
|
|
lastVerified: new Date(Date.now() - random() * 90 * 24 * 60 * 60 * 1000).toISOString(),
|
|
confidenceScore: Math.floor(70 + random() * 30),
|
|
dataFreshness: Math.floor(random() * 60) + ' days'
|
|
},
|
|
scrapedAt: new Date().toISOString()
|
|
});
|
|
}
|
|
|
|
return results;
|
|
}
|
|
|
|
async function generateFactSetData(count, seed) {
|
|
log.info('Generating FactSet-style financial analytics data...');
|
|
const random = createSeededRandom(seed);
|
|
const results = [];
|
|
|
|
const sectors = ['Technology', 'Healthcare', 'Financials', 'Consumer Discretionary', 'Consumer Staples', 'Energy', 'Industrials', 'Materials', 'Real Estate', 'Utilities', 'Communication Services'];
|
|
const exchanges = ['NYSE', 'NASDAQ', 'LSE', 'TSE', 'HKEX', 'Euronext', 'SSE'];
|
|
const analystFirms = ['Goldman Sachs', 'Morgan Stanley', 'JP Morgan', 'Bank of America', 'Citi', 'Deutsche Bank', 'Barclays', 'UBS', 'Credit Suisse', 'Wells Fargo'];
|
|
const institutionalTypes = ['Mutual Fund', 'Hedge Fund', 'Pension Fund', 'Sovereign Wealth', 'ETF', 'Private Equity', 'Insurance', 'Endowment'];
|
|
|
|
for (let i = 0; i < count; i++) {
|
|
const companyName = `${generateName(random).split(' ')[1]} ${['Corporation', 'Inc', 'Holdings', 'Group', 'International'][Math.floor(random() * 5)]}`;
|
|
const ticker = `${String.fromCharCode(65 + Math.floor(random() * 26))}${String.fromCharCode(65 + Math.floor(random() * 26))}${String.fromCharCode(65 + Math.floor(random() * 26))}`;
|
|
const basePrice = 10 + random() * 500;
|
|
const revenue = Math.floor(100 + random() * 50000);
|
|
const employees = Math.floor(100 + random() * 200000);
|
|
|
|
results.push({
|
|
entityId: `FS${Date.now()}${i}`,
|
|
company: {
|
|
name: companyName,
|
|
ticker: ticker,
|
|
exchange: exchanges[Math.floor(random() * exchanges.length)],
|
|
sector: sectors[Math.floor(random() * sectors.length)],
|
|
industry: `${sectors[Math.floor(random() * sectors.length)]} - Specialized`,
|
|
country: ['USA', 'UK', 'Japan', 'Germany', 'China', 'France', 'Canada'][Math.floor(random() * 7)],
|
|
employees: employees,
|
|
fiscalYearEnd: ['December', 'March', 'June', 'September'][Math.floor(random() * 4)],
|
|
ipoDate: new Date(Date.now() - random() * 7300 * 24 * 60 * 60 * 1000).toISOString().split('T')[0],
|
|
description: `Global leader in ${sectors[Math.floor(random() * sectors.length)].toLowerCase()} with operations across multiple continents`
|
|
},
|
|
fundamentals: {
|
|
revenue: {
|
|
current: revenue,
|
|
yoy_growth: Math.round((random() - 0.3) * 30 * 10) / 10,
|
|
trailing_12m: revenue,
|
|
quarterly: [
|
|
Math.round(revenue * 0.24 * 100) / 100,
|
|
Math.round(revenue * 0.25 * 100) / 100,
|
|
Math.round(revenue * 0.26 * 100) / 100,
|
|
Math.round(revenue * 0.25 * 100) / 100
|
|
]
|
|
},
|
|
profitability: {
|
|
ebitda: Math.round(revenue * (0.1 + random() * 0.3)),
|
|
ebitda_margin: Math.round((10 + random() * 30) * 10) / 10,
|
|
operating_income: Math.round(revenue * (0.08 + random() * 0.25)),
|
|
operating_margin: Math.round((8 + random() * 25) * 10) / 10,
|
|
net_income: Math.round(revenue * (0.05 + random() * 0.20)),
|
|
net_margin: Math.round((5 + random() * 20) * 10) / 10,
|
|
roe: Math.round((5 + random() * 30) * 10) / 10,
|
|
roa: Math.round((3 + random() * 15) * 10) / 10,
|
|
roic: Math.round((5 + random() * 25) * 10) / 10
|
|
},
|
|
growth_rates: {
|
|
revenue_1yr: Math.round((random() - 0.2) * 30 * 10) / 10,
|
|
revenue_3yr_cagr: Math.round((random() - 0.1) * 25 * 10) / 10,
|
|
revenue_5yr_cagr: Math.round((random() - 0.1) * 20 * 10) / 10,
|
|
earnings_1yr: Math.round((random() - 0.3) * 40 * 10) / 10,
|
|
earnings_3yr_cagr: Math.round((random() - 0.2) * 30 * 10) / 10,
|
|
earnings_5yr_cagr: Math.round((random() - 0.1) * 25 * 10) / 10
|
|
},
|
|
balance_sheet: {
|
|
total_assets: Math.round(revenue * (1.5 + random() * 3)),
|
|
total_liabilities: Math.round(revenue * (0.8 + random() * 2)),
|
|
stockholders_equity: Math.round(revenue * (0.5 + random() * 1.5)),
|
|
cash: Math.round(revenue * (0.1 + random() * 0.5)),
|
|
debt: Math.round(revenue * (0.2 + random() * 1.2)),
|
|
working_capital: Math.round(revenue * (0.1 + random() * 0.4))
|
|
},
|
|
cash_flow: {
|
|
operating_cf: Math.round(revenue * (0.1 + random() * 0.25)),
|
|
investing_cf: Math.round(revenue * (-0.15 - random() * 0.15)),
|
|
financing_cf: Math.round(revenue * (-0.05 + random() * 0.15)),
|
|
free_cash_flow: Math.round(revenue * (0.05 + random() * 0.20)),
|
|
fcf_yield: Math.round((3 + random() * 8) * 10) / 10
|
|
}
|
|
},
|
|
estimates: {
|
|
eps: {
|
|
current_quarter: Math.round((basePrice * 0.01 + random() * basePrice * 0.02) * 100) / 100,
|
|
next_quarter: Math.round((basePrice * 0.01 + random() * basePrice * 0.025) * 100) / 100,
|
|
current_year: Math.round((basePrice * 0.04 + random() * basePrice * 0.06) * 100) / 100,
|
|
next_year: Math.round((basePrice * 0.05 + random() * basePrice * 0.08) * 100) / 100,
|
|
consensus_growth: Math.round((5 + random() * 20) * 10) / 10,
|
|
surprise_history: Array.from({length: 4}, () => Math.round((random() - 0.5) * 20 * 10) / 10)
|
|
},
|
|
revenue: {
|
|
current_quarter: Math.round(revenue * 0.25 * (1 + (random() - 0.3) * 0.1)),
|
|
next_quarter: Math.round(revenue * 0.26 * (1 + (random() - 0.2) * 0.1)),
|
|
current_year: Math.round(revenue * (1 + (random() - 0.2) * 0.15)),
|
|
next_year: Math.round(revenue * (1.05 + random() * 0.15)),
|
|
consensus_growth: Math.round((3 + random() * 15) * 10) / 10
|
|
},
|
|
price_targets: {
|
|
high: Math.round(basePrice * (1.3 + random() * 0.5) * 100) / 100,
|
|
low: Math.round(basePrice * (0.7 - random() * 0.2) * 100) / 100,
|
|
mean: Math.round(basePrice * (1 + (random() - 0.5) * 0.3) * 100) / 100,
|
|
median: Math.round(basePrice * (1 + (random() - 0.5) * 0.25) * 100) / 100,
|
|
num_analysts: Math.floor(8 + random() * 35)
|
|
}
|
|
},
|
|
ownership: {
|
|
institutional: {
|
|
percentage: Math.round((40 + random() * 50) * 10) / 10,
|
|
holders: Math.floor(100 + random() * 900),
|
|
topHolders: Array.from({length: 10}, (_, idx) => ({
|
|
name: `${generateName(random).split(' ')[1]} ${institutionalTypes[Math.floor(random() * institutionalTypes.length)]}`,
|
|
shares: Math.floor(1000000 + random() * 50000000),
|
|
percentage: Math.round((1 + random() * 8) * 100) / 100,
|
|
value: Math.round(basePrice * (1000000 + random() * 50000000) / 1000000),
|
|
changeQoQ: Math.round((random() - 0.5) * 20 * 100) / 100,
|
|
rank: idx + 1
|
|
}))
|
|
},
|
|
insider: {
|
|
percentage: Math.round((1 + random() * 15) * 10) / 10,
|
|
recentTransactions: Array.from({length: Math.floor(5 + random() * 10)}, () => ({
|
|
date: new Date(Date.now() - random() * 180 * 24 * 60 * 60 * 1000).toISOString().split('T')[0],
|
|
insider: generateName(random),
|
|
title: ['CEO', 'CFO', 'COO', 'Director', 'EVP', 'SVP'][Math.floor(random() * 6)],
|
|
transaction: ['Buy', 'Sell'][Math.floor(random() * 2)],
|
|
shares: Math.floor(1000 + random() * 100000),
|
|
price: Math.round(basePrice * (1 + (random() - 0.5) * 0.1) * 100) / 100,
|
|
value: Math.round(basePrice * (1000 + random() * 100000) / 1000)
|
|
}))
|
|
},
|
|
buybacks: {
|
|
active_program: random() > 0.3,
|
|
authorization: Math.round(revenue * (0.05 + random() * 0.15)),
|
|
remaining: Math.round(revenue * (0.02 + random() * 0.10)),
|
|
shares_repurchased_ltm: Math.floor(random() * 10000000)
|
|
}
|
|
},
|
|
supplyChain: {
|
|
majorCustomers: Array.from({length: Math.floor(3 + random() * 7)}, () => ({
|
|
name: `${generateName(random).split(' ')[1]} ${['Corp', 'Inc', 'Group'][Math.floor(random() * 3)]}`,
|
|
revenueContribution: Math.round((2 + random() * 15) * 10) / 10,
|
|
relationship: ['Strategic Partner', 'Key Customer', 'Major Account'][Math.floor(random() * 3)],
|
|
yearsOfBusiness: Math.floor(1 + random() * 15)
|
|
})),
|
|
majorSuppliers: Array.from({length: Math.floor(3 + random() * 7)}, () => ({
|
|
name: `${generateName(random).split(' ')[1]} ${['Corp', 'Systems', 'Technologies'][Math.floor(random() * 3)]}`,
|
|
category: ['Components', 'Raw Materials', 'Services', 'Software'][Math.floor(random() * 4)],
|
|
dependencyLevel: ['Critical', 'High', 'Medium', 'Low'][Math.floor(random() * 4)],
|
|
geographicRisk: ['Low', 'Medium', 'High'][Math.floor(random() * 3)]
|
|
})),
|
|
geographicExposure: {
|
|
north_america: Math.round((20 + random() * 60) * 10) / 10,
|
|
europe: Math.round((10 + random() * 40) * 10) / 10,
|
|
asia_pacific: Math.round((10 + random() * 50) * 10) / 10,
|
|
rest_of_world: Math.round((5 + random() * 20) * 10) / 10
|
|
}
|
|
},
|
|
analystCoverage: Array.from({length: Math.floor(5 + random() * 20)}, () => ({
|
|
firm: analystFirms[Math.floor(random() * analystFirms.length)],
|
|
analyst: generateName(random),
|
|
rating: ['Strong Buy', 'Buy', 'Hold', 'Sell', 'Strong Sell'][Math.floor(random() * 5)],
|
|
priceTarget: Math.round(basePrice * (0.8 + random() * 0.6) * 100) / 100,
|
|
lastUpdate: new Date(Date.now() - random() * 90 * 24 * 60 * 60 * 1000).toISOString().split('T')[0],
|
|
confidence: ['High', 'Medium', 'Low'][Math.floor(random() * 3)]
|
|
})),
|
|
scrapedAt: new Date().toISOString()
|
|
});
|
|
}
|
|
|
|
return results;
|
|
}
|
|
|
|
async function generateLSEGData(count, seed) {
|
|
log.info('Generating LSEG/Refinitiv-style workspace data...');
|
|
const random = createSeededRandom(seed);
|
|
const results = [];
|
|
|
|
const newsSources = ['Reuters', 'Dow Jones', 'PR Newswire', 'Business Wire', 'Bloomberg', 'Financial Times', 'WSJ'];
|
|
const newsCategories = ['Earnings', 'M&A', 'Regulatory', 'Corporate', 'Market', 'Economic', 'Political', 'ESG'];
|
|
const dealTypes = ['M&A', 'IPO', 'Secondary Offering', 'Bond Issuance', 'Loan', 'Private Placement', 'Buyout', 'Joint Venture'];
|
|
const esgCategories = ['Environmental', 'Social', 'Governance'];
|
|
const controversyTypes = ['Legal', 'Environmental', 'Labor', 'Ethical', 'Regulatory', 'Product'];
|
|
const regions = ['North America', 'Europe', 'Asia Pacific', 'Latin America', 'Middle East', 'Africa'];
|
|
|
|
for (let i = 0; i < count; i++) {
|
|
const companyName = `${generateName(random).split(' ')[1]} ${['Corporation', 'Group', 'Holdings', 'International', 'Industries'][Math.floor(random() * 5)]}`;
|
|
const ticker = `${String.fromCharCode(65 + Math.floor(random() * 26))}${String.fromCharCode(65 + Math.floor(random() * 26))}${String.fromCharCode(65 + Math.floor(random() * 26))}`;
|
|
const basePrice = 10 + random() * 500;
|
|
|
|
results.push({
|
|
workspaceId: `LSEG${Date.now()}${i}`,
|
|
company: {
|
|
name: companyName,
|
|
ticker: ticker,
|
|
ric: `${ticker}.${['N', 'O', 'L', 'T', 'HK'][Math.floor(random() * 5)]}`,
|
|
permId: `${Math.floor(1000000000 + random() * 9000000000)}`,
|
|
lei: `${Math.floor(100000000000000000000 + random() * 900000000000000000000)}`,
|
|
sector: ['Technology', 'Healthcare', 'Financials', 'Energy', 'Industrials'][Math.floor(random() * 5)],
|
|
region: regions[Math.floor(random() * regions.length)]
|
|
},
|
|
news: {
|
|
stories: Array.from({length: Math.floor(3 + random() * 12)}, () => ({
|
|
headline: `${companyName} ${['announces', 'reports', 'unveils', 'confirms', 'explores'][Math.floor(random() * 5)]} ${newsCategories[Math.floor(random() * newsCategories.length)].toLowerCase()} ${['update', 'initiative', 'strategy', 'partnership', 'results'][Math.floor(random() * 5)]}`,
|
|
source: newsSources[Math.floor(random() * newsSources.length)],
|
|
timestamp: new Date(Date.now() - random() * 168 * 60 * 60 * 1000).toISOString(),
|
|
category: newsCategories[Math.floor(random() * newsCategories.length)],
|
|
sentiment: {
|
|
score: Math.round((random() - 0.5) * 2 * 100) / 100,
|
|
label: ['Very Positive', 'Positive', 'Neutral', 'Negative', 'Very Negative'][Math.floor(random() * 5)],
|
|
confidence: Math.round((70 + random() * 30) * 10) / 10
|
|
},
|
|
topics: Array.from({length: Math.floor(2 + random() * 5)}, () =>
|
|
['Revenue', 'Expansion', 'Innovation', 'Partnership', 'Regulation', 'Sustainability'][Math.floor(random() * 6)]
|
|
),
|
|
entities: {
|
|
people: Array.from({length: Math.floor(1 + random() * 3)}, () => generateName(random)),
|
|
organizations: Array.from({length: Math.floor(1 + random() * 4)}, () =>
|
|
`${generateName(random).split(' ')[1]} ${['Corp', 'Inc', 'Group'][Math.floor(random() * 3)]}`
|
|
),
|
|
locations: Array.from({length: Math.floor(1 + random() * 3)}, () =>
|
|
['New York', 'London', 'Tokyo', 'Singapore', 'Hong Kong', 'Dubai'][Math.floor(random() * 6)]
|
|
)
|
|
},
|
|
relevance: Math.round((60 + random() * 40) * 10) / 10,
|
|
language: ['en', 'en-US', 'en-GB'][Math.floor(random() * 3)],
|
|
wordCount: Math.floor(200 + random() * 1500)
|
|
})),
|
|
realTimeAlerts: Array.from({length: Math.floor(1 + random() * 5)}, () => ({
|
|
type: ['Price', 'Volume', 'News', 'Rating', 'Insider'][Math.floor(random() * 5)],
|
|
severity: ['Critical', 'High', 'Medium', 'Low'][Math.floor(random() * 4)],
|
|
message: `Alert triggered for ${companyName}`,
|
|
timestamp: new Date(Date.now() - random() * 24 * 60 * 60 * 1000).toISOString()
|
|
}))
|
|
},
|
|
deals: {
|
|
announced: Array.from({length: Math.floor(1 + random() * 8)}, () => ({
|
|
dealId: `D${Math.floor(100000000 + random() * 900000000)}`,
|
|
type: dealTypes[Math.floor(random() * dealTypes.length)],
|
|
status: ['Announced', 'Pending', 'Completed', 'Withdrawn'][Math.floor(random() * 4)],
|
|
value: Math.round((50 + random() * 10000) * 10) / 10,
|
|
currency: 'USD',
|
|
announceDate: new Date(Date.now() - random() * 730 * 24 * 60 * 60 * 1000).toISOString().split('T')[0],
|
|
expectedClose: new Date(Date.now() + random() * 365 * 24 * 60 * 60 * 1000).toISOString().split('T')[0],
|
|
parties: {
|
|
acquirer: companyName,
|
|
target: `${generateName(random).split(' ')[1]} ${['Corp', 'Inc', 'Group'][Math.floor(random() * 3)]}`,
|
|
advisors: {
|
|
financial: Array.from({length: Math.floor(1 + random() * 3)}, () =>
|
|
['Goldman Sachs', 'Morgan Stanley', 'JP Morgan', 'Bank of America'][Math.floor(random() * 4)]
|
|
),
|
|
legal: Array.from({length: Math.floor(1 + random() * 2)}, () =>
|
|
['Wachtell', 'Skadden', 'Sullivan & Cromwell', 'Cleary Gottlieb'][Math.floor(random() * 4)]
|
|
)
|
|
}
|
|
},
|
|
rationale: ['Strategic Expansion', 'Market Entry', 'Technology Acquisition', 'Vertical Integration'][Math.floor(random() * 4)],
|
|
synergies: Math.round((10 + random() * 500) * 10) / 10,
|
|
premium: Math.round((10 + random() * 50) * 10) / 10
|
|
})),
|
|
issuances: Array.from({length: Math.floor(1 + random() * 5)}, () => ({
|
|
type: ['Investment Grade Bond', 'High Yield Bond', 'Convertible', 'Green Bond'][Math.floor(random() * 4)],
|
|
amount: Math.round((100 + random() * 5000) * 10) / 10,
|
|
maturity: Math.floor(3 + random() * 27) + ' years',
|
|
coupon: Math.round((1 + random() * 8) * 100) / 100,
|
|
rating: ['AAA', 'AA', 'A', 'BBB', 'BB', 'B'][Math.floor(random() * 6)],
|
|
issueDate: new Date(Date.now() - random() * 365 * 24 * 60 * 60 * 1000).toISOString().split('T')[0],
|
|
underwriters: Array.from({length: Math.floor(2 + random() * 5)}, () =>
|
|
['JP Morgan', 'Bank of America', 'Citi', 'Goldman Sachs', 'Morgan Stanley'][Math.floor(random() * 5)]
|
|
)
|
|
}))
|
|
},
|
|
esg: {
|
|
scores: {
|
|
overall: Math.round((30 + random() * 70) * 10) / 10,
|
|
environmental: Math.round((30 + random() * 70) * 10) / 10,
|
|
social: Math.round((30 + random() * 70) * 10) / 10,
|
|
governance: Math.round((30 + random() * 70) * 10) / 10,
|
|
controversy: Math.round((0 + random() * 100) * 10) / 10
|
|
},
|
|
percentileRank: {
|
|
industry: Math.floor(1 + random() * 100),
|
|
global: Math.floor(1 + random() * 100)
|
|
},
|
|
grade: ['A+', 'A', 'A-', 'B+', 'B', 'B-', 'C+', 'C', 'D'][Math.floor(random() * 9)],
|
|
categories: esgCategories.map(cat => ({
|
|
category: cat,
|
|
score: Math.round((30 + random() * 70) * 10) / 10,
|
|
trend: ['Improving', 'Stable', 'Declining'][Math.floor(random() * 3)],
|
|
keyIssues: Array.from({length: Math.floor(2 + random() * 4)}, () =>
|
|
['Carbon Emissions', 'Water Usage', 'Diversity', 'Labor Practices', 'Board Independence', 'Executive Pay'][Math.floor(random() * 6)]
|
|
)
|
|
})),
|
|
controversies: Array.from({length: Math.floor(random() * 4)}, () => ({
|
|
type: controversyTypes[Math.floor(random() * controversyTypes.length)],
|
|
severity: ['Critical', 'High', 'Medium', 'Low'][Math.floor(random() * 4)],
|
|
date: new Date(Date.now() - random() * 1825 * 24 * 60 * 60 * 1000).toISOString().split('T')[0],
|
|
description: `${controversyTypes[Math.floor(random() * controversyTypes.length)]} controversy involving ${companyName}`,
|
|
status: ['Ongoing', 'Resolved', 'Under Investigation'][Math.floor(random() * 3)],
|
|
impact: Math.round((1 + random() * 10) * 10) / 10
|
|
})),
|
|
sdgAlignment: Array.from({length: Math.floor(3 + random() * 8)}, () => ({
|
|
goal: Math.floor(1 + random() * 17),
|
|
score: Math.round((30 + random() * 70) * 10) / 10
|
|
}))
|
|
},
|
|
research: {
|
|
analystReports: Array.from({length: Math.floor(5 + random() * 15)}, () => ({
|
|
firm: ['Goldman Sachs Research', 'Morgan Stanley Research', 'JP Morgan Research'][Math.floor(random() * 3)],
|
|
analyst: generateName(random),
|
|
title: `${companyName} - ${['Initiating Coverage', 'Q4 Update', 'Sector Outlook', 'Deep Dive'][Math.floor(random() * 4)]}`,
|
|
date: new Date(Date.now() - random() * 180 * 24 * 60 * 60 * 1000).toISOString().split('T')[0],
|
|
rating: ['Overweight', 'Equal-weight', 'Underweight', 'Buy', 'Hold', 'Sell'][Math.floor(random() * 6)],
|
|
priceTarget: Math.round(basePrice * (0.8 + random() * 0.6) * 100) / 100,
|
|
pages: Math.floor(15 + random() * 100),
|
|
keyTakeaways: Array.from({length: 3}, () =>
|
|
['Strong fundamentals', 'Market expansion opportunity', 'Valuation attractive', 'Execution risk'][Math.floor(random() * 4)]
|
|
)
|
|
})),
|
|
earnings: {
|
|
nextDate: new Date(Date.now() + random() * 90 * 24 * 60 * 60 * 1000).toISOString().split('T')[0],
|
|
consensus: {
|
|
eps: Math.round((basePrice * 0.02) * 100) / 100,
|
|
revenue: Math.round((1000 + random() * 50000) * 10) / 10,
|
|
numEstimates: Math.floor(8 + random() * 30)
|
|
},
|
|
whisperNumber: Math.round((basePrice * 0.021) * 100) / 100
|
|
}
|
|
},
|
|
marketData: {
|
|
price: Math.round(basePrice * 100) / 100,
|
|
change: Math.round((random() - 0.5) * 10 * 100) / 100,
|
|
changePercent: Math.round((random() - 0.5) * 5 * 100) / 100,
|
|
volume: Math.floor(random() * 20000000),
|
|
marketCap: Math.round(basePrice * (10 + random() * 990) * 100) / 100 + 'B',
|
|
beta: Math.round((0.5 + random() * 1.5) * 100) / 100,
|
|
shortInterest: Math.round((1 + random() * 15) * 10) / 10
|
|
},
|
|
scrapedAt: new Date().toISOString()
|
|
});
|
|
}
|
|
|
|
return results;
|
|
}
|
|
|
|
|
|
/**
|
|
* Generate fMRI (Functional Magnetic Resonance Imaging) brain activity data
|
|
* Simulates BOLD signal time series and brain voxel coordinates
|
|
*/
|
|
async function generateFMRIData(count, seed) {
|
|
log.info('Generating fMRI brain activity data...');
|
|
const random = createSeededRandom(seed);
|
|
const results = [];
|
|
|
|
const brainRegions = [
|
|
{ name: 'Dorsolateral Prefrontal Cortex', abbr: 'DLPFC', type: 'cortical', x: [30, 50], y: [20, 40], z: [20, 35] },
|
|
{ name: 'Anterior Cingulate Cortex', abbr: 'ACC', type: 'cortical', x: [0, 10], y: [30, 45], z: [15, 30] },
|
|
{ name: 'Amygdala', abbr: 'AMY', type: 'subcortical', x: [20, 30], y: [-10, 5], z: [-15, -5] },
|
|
{ name: 'Hippocampus', abbr: 'HIP', type: 'subcortical', x: [25, 35], y: [-20, -10], z: [-10, 0] },
|
|
{ name: 'Primary Motor Cortex', abbr: 'M1', type: 'cortical', x: [35, 45], y: [-15, 0], z: [45, 60] },
|
|
{ name: 'Primary Visual Cortex', abbr: 'V1', type: 'cortical', x: [10, 25], y: [-90, -75], z: [0, 15] },
|
|
{ name: 'Thalamus', abbr: 'THA', type: 'subcortical', x: [10, 15], y: [-15, -5], z: [5, 15] },
|
|
{ name: 'Caudate Nucleus', abbr: 'CAU', type: 'subcortical', x: [12, 18], y: [10, 20], z: [10, 20] }
|
|
];
|
|
|
|
const conditions = ['rest', 'task', 'visual_stim', 'motor_task', 'cognitive_load', 'emotional_stim'];
|
|
const TR = 2.0; // Repetition time in seconds (standard fMRI)
|
|
|
|
for (let i = 0; i < count; i++) {
|
|
const region = brainRegions[Math.floor(random() * brainRegions.length)];
|
|
const condition = conditions[Math.floor(random() * conditions.length)];
|
|
const numTimePoints = 100 + Math.floor(random() * 200); // 100-300 time points
|
|
|
|
// Generate voxel coordinates within brain region
|
|
const voxelX = Math.floor(region.x[0] + random() * (region.x[1] - region.x[0]));
|
|
const voxelY = Math.floor(region.y[0] + random() * (region.y[1] - region.y[0]));
|
|
const voxelZ = Math.floor(region.z[0] + random() * (region.z[1] - region.z[0]));
|
|
|
|
// Generate BOLD signal time series with realistic noise and activation
|
|
const baseline = 100 + random() * 20;
|
|
const activationMagnitude = condition === 'rest' ? 0 : (2 + random() * 4); // 2-6% signal change
|
|
const boldSignal = Array.from({ length: numTimePoints }, (_, t) => {
|
|
const noise = (random() - 0.5) * 1.5; // Physiological noise
|
|
const drift = Math.sin(t / numTimePoints * Math.PI) * 0.5; // Scanner drift
|
|
const activation = condition !== 'rest' ? Math.sin(t / 20) * activationMagnitude : 0;
|
|
return Math.round((baseline + activation + noise + drift) * 100) / 100;
|
|
});
|
|
|
|
// Generate connectivity matrix (correlation with other voxels)
|
|
const connectivityMatrix = Array.from({ length: 8 }, () =>
|
|
Array.from({ length: 8 }, () => Math.round((random() * 2 - 1) * 100) / 100)
|
|
);
|
|
|
|
results.push({
|
|
scanId: `fMRI_${Date.now()}_${i}`,
|
|
subject: {
|
|
id: `SUB${String(Math.floor(1 + random() * 999)).padStart(3, '0')}`,
|
|
age: Math.floor(18 + random() * 50),
|
|
gender: random() > 0.5 ? 'M' : 'F',
|
|
handedness: random() > 0.1 ? 'right' : 'left'
|
|
},
|
|
acquisition: {
|
|
scanner: ['Siemens Prisma 3T', 'GE Discovery MR750 3T', 'Philips Ingenia 3T'][Math.floor(random() * 3)],
|
|
fieldStrength: '3T',
|
|
TR: TR,
|
|
TE: Math.round((25 + random() * 10) * 10) / 10, // Echo time (ms)
|
|
flipAngle: 75 + Math.floor(random() * 15), // degrees
|
|
voxelSize: [3, 3, 3], // mm
|
|
slices: 32 + Math.floor(random() * 16)
|
|
},
|
|
voxel: {
|
|
coordinates: { x: voxelX, y: voxelY, z: voxelZ },
|
|
mniCoordinates: { x: voxelX - 45, y: voxelY - 60, z: voxelZ - 35 }, // MNI space
|
|
region: region.name,
|
|
regionAbbr: region.abbr,
|
|
regionType: region.type,
|
|
hemisphere: voxelX > 45 ? 'right' : 'left'
|
|
},
|
|
timeSeries: {
|
|
condition,
|
|
numTimePoints,
|
|
TR: TR,
|
|
duration: numTimePoints * TR,
|
|
boldSignal: boldSignal.slice(0, 50), // Store first 50 points for space
|
|
fullSeriesStats: {
|
|
mean: Math.round(boldSignal.reduce((a, b) => a + b, 0) / boldSignal.length * 100) / 100,
|
|
stdDev: Math.round(Math.sqrt(boldSignal.reduce((sum, val) => sum + Math.pow(val - baseline, 2), 0) / boldSignal.length) * 100) / 100,
|
|
min: Math.min(...boldSignal),
|
|
max: Math.max(...boldSignal)
|
|
}
|
|
},
|
|
activation: {
|
|
isActive: activationMagnitude > 0,
|
|
percentSignalChange: Math.round(activationMagnitude * 100) / 100,
|
|
tStatistic: activationMagnitude > 0 ? Math.round((2 + random() * 4) * 100) / 100 : 0,
|
|
pValue: activationMagnitude > 0 ? Math.round(random() * 0.05 * 10000) / 10000 : 1,
|
|
clusterSize: activationMagnitude > 0 ? Math.floor(10 + random() * 200) : 0
|
|
},
|
|
connectivity: {
|
|
matrix: connectivityMatrix,
|
|
meanCorrelation: Math.round(connectivityMatrix[0].reduce((a, b) => a + b, 0) / 8 * 100) / 100,
|
|
strongestConnection: {
|
|
region: brainRegions[Math.floor(random() * brainRegions.length)].abbr,
|
|
correlation: Math.round((0.5 + random() * 0.5) * 100) / 100
|
|
}
|
|
},
|
|
quality: {
|
|
snr: Math.round((20 + random() * 30) * 10) / 10, // Signal-to-noise ratio
|
|
motion: Math.round(random() * 2 * 100) / 100, // mm displacement
|
|
artifacts: random() > 0.8 ? ['susceptibility', 'motion'][Math.floor(random() * 2)] : null,
|
|
qualityRating: ['excellent', 'good', 'fair', 'poor'][Math.floor(random() * 4)]
|
|
},
|
|
scrapedAt: new Date().toISOString()
|
|
});
|
|
}
|
|
|
|
return results;
|
|
}
|
|
|
|
/**
|
|
* Generate Protein Data Bank (PDB) molecular structure data
|
|
* Simulates protein atomic coordinates and structural information
|
|
*/
|
|
async function generateProteinPDBData(count, seed) {
|
|
log.info('Generating Protein PDB molecular structure data...');
|
|
const random = createSeededRandom(seed);
|
|
const results = [];
|
|
|
|
const aminoAcids = ['ALA', 'ARG', 'ASN', 'ASP', 'CYS', 'GLN', 'GLU', 'GLY', 'HIS', 'ILE',
|
|
'LEU', 'LYS', 'MET', 'PHE', 'PRO', 'SER', 'THR', 'TRP', 'TYR', 'VAL'];
|
|
const secondaryStructures = ['helix', 'sheet', 'coil', 'turn'];
|
|
const chains = ['A', 'B', 'C', 'D', 'E', 'F'];
|
|
const atomTypes = ['N', 'CA', 'C', 'O', 'CB', 'CG', 'CD', 'CE', 'NZ', 'OG'];
|
|
|
|
for (let i = 0; i < count; i++) {
|
|
const pdbId = `${Math.floor(1000 + random() * 8999)}`;
|
|
const numResidues = 50 + Math.floor(random() * 450); // 50-500 residues
|
|
const numChains = 1 + Math.floor(random() * 3);
|
|
const numAtoms = numResidues * 8; // ~8 atoms per residue average
|
|
|
|
// Generate atom records (sample)
|
|
const atoms = Array.from({ length: Math.min(50, numAtoms) }, (_, atomIdx) => {
|
|
const residueIdx = Math.floor(atomIdx / 8) + 1;
|
|
return {
|
|
serial: atomIdx + 1,
|
|
atomName: atomTypes[atomIdx % atomTypes.length],
|
|
altLoc: '',
|
|
residueName: aminoAcids[Math.floor(random() * aminoAcids.length)],
|
|
chainId: chains[Math.floor(random() * numChains)],
|
|
residueSeq: residueIdx,
|
|
iCode: '',
|
|
coordinates: {
|
|
x: Math.round((random() * 100 - 50) * 1000) / 1000,
|
|
y: Math.round((random() * 100 - 50) * 1000) / 1000,
|
|
z: Math.round((random() * 100 - 50) * 1000) / 1000
|
|
},
|
|
occupancy: Math.round((0.8 + random() * 0.2) * 100) / 100,
|
|
tempFactor: Math.round((10 + random() * 40) * 100) / 100, // B-factor
|
|
element: atomTypes[atomIdx % atomTypes.length][0],
|
|
charge: ''
|
|
};
|
|
});
|
|
|
|
// Generate secondary structure assignment
|
|
const secondaryStructureMap = Array.from({ length: numResidues }, () =>
|
|
secondaryStructures[Math.floor(random() * secondaryStructures.length)]
|
|
);
|
|
|
|
// Calculate secondary structure percentages
|
|
const helixCount = secondaryStructureMap.filter(s => s === 'helix').length;
|
|
const sheetCount = secondaryStructureMap.filter(s => s === 'sheet').length;
|
|
const coilCount = secondaryStructureMap.filter(s => s === 'coil').length;
|
|
|
|
results.push({
|
|
pdbId: pdbId,
|
|
header: {
|
|
classification: ['HYDROLASE', 'TRANSFERASE', 'OXIDOREDUCTASE', 'LYASE', 'ISOMERASE', 'LIGASE', 'MEMBRANE PROTEIN', 'SIGNALING PROTEIN'][Math.floor(random() * 8)],
|
|
depositionDate: new Date(Date.now() - random() * 365 * 10 * 24 * 60 * 60 * 1000).toISOString().split('T')[0],
|
|
title: `Crystal structure of ${aminoAcids[Math.floor(random() * aminoAcids.length)]} rich domain at ${Math.round((1.5 + random() * 1.5) * 10) / 10}A resolution`,
|
|
organism: ['Homo sapiens', 'Escherichia coli', 'Saccharomyces cerevisiae', 'Mus musculus'][Math.floor(random() * 4)],
|
|
expression: ['Escherichia coli', 'Insect cells', 'Mammalian cells', 'Yeast'][Math.floor(random() * 4)]
|
|
},
|
|
structure: {
|
|
numChains,
|
|
numResidues,
|
|
numAtoms,
|
|
resolution: Math.round((1.5 + random() * 1.5) * 100) / 100, // Angstroms
|
|
rValue: Math.round((0.15 + random() * 0.15) * 1000) / 1000,
|
|
rFree: Math.round((0.18 + random() * 0.15) * 1000) / 1000,
|
|
spaceGroup: ['P 21 21 21', 'P 1 21 1', 'C 2 2 21', 'P 43 21 2'][Math.floor(random() * 4)],
|
|
unitCell: {
|
|
a: Math.round((40 + random() * 60) * 100) / 100,
|
|
b: Math.round((40 + random() * 60) * 100) / 100,
|
|
c: Math.round((40 + random() * 60) * 100) / 100,
|
|
alpha: 90,
|
|
beta: 90 + Math.round(random() * 20),
|
|
gamma: 90
|
|
}
|
|
},
|
|
sequence: {
|
|
chains: Array.from({ length: numChains }, (_, chainIdx) => ({
|
|
chainId: chains[chainIdx],
|
|
length: Math.floor(numResidues / numChains),
|
|
sequence: Array.from({ length: 30 }, () => aminoAcids[Math.floor(random() * aminoAcids.length)]).join('-')
|
|
}))
|
|
},
|
|
secondaryStructure: {
|
|
helixPercent: Math.round((helixCount / numResidues) * 100),
|
|
sheetPercent: Math.round((sheetCount / numResidues) * 100),
|
|
coilPercent: Math.round((coilCount / numResidues) * 100),
|
|
assignments: secondaryStructureMap.slice(0, 30) // Sample
|
|
},
|
|
atoms: atoms,
|
|
ligands: random() > 0.3 ? [{
|
|
hetId: ['ATP', 'NAD', 'FAD', 'HEM', 'MG', 'ZN', 'CA'][Math.floor(random() * 7)],
|
|
chainId: chains[Math.floor(random() * numChains)],
|
|
residueSeq: numResidues + 1,
|
|
numAtoms: Math.floor(10 + random() * 40),
|
|
bindingSite: {
|
|
residues: Array.from({ length: 5 }, () => Math.floor(1 + random() * numResidues)),
|
|
bindingEnergy: Math.round((-5 - random() * 10) * 100) / 100 // kcal/mol
|
|
}
|
|
}] : [],
|
|
quality: {
|
|
clashScore: Math.round(random() * 20 * 10) / 10,
|
|
ramachandranFavored: Math.round((85 + random() * 12) * 10) / 10,
|
|
ramachandranOutliers: Math.round(random() * 3 * 10) / 10,
|
|
rotamerOutliers: Math.round(random() * 5 * 10) / 10,
|
|
cbetaDeviations: Math.floor(random() * 5)
|
|
},
|
|
scrapedAt: new Date().toISOString()
|
|
});
|
|
}
|
|
|
|
return results;
|
|
}
|
|
|
|
/**
|
|
* Generate Power Grid electrical telemetry data
|
|
* Simulates 3-phase power, voltage, current, and grid events
|
|
*/
|
|
async function generatePowerGridData(count, seed) {
|
|
log.info('Generating Power Grid telemetry data...');
|
|
const random = createSeededRandom(seed);
|
|
const results = [];
|
|
|
|
const substations = ['North', 'South', 'East', 'West', 'Central', 'Industrial', 'Residential', 'Commercial'];
|
|
const voltageClasses = [
|
|
{ nominal: 765000, tolerance: 0.05, name: 'Extra High Voltage' },
|
|
{ nominal: 345000, tolerance: 0.05, name: 'Extra High Voltage' },
|
|
{ nominal: 138000, tolerance: 0.06, name: 'High Voltage' },
|
|
{ nominal: 69000, tolerance: 0.06, name: 'High Voltage' },
|
|
{ nominal: 13800, tolerance: 0.08, name: 'Medium Voltage' },
|
|
{ nominal: 480, tolerance: 0.1, name: 'Low Voltage' }
|
|
];
|
|
const eventTypes = ['normal', 'fault', 'switching', 'load_change', 'voltage_sag', 'voltage_swell', 'harmonic_distortion'];
|
|
|
|
for (let i = 0; i < count; i++) {
|
|
const voltageClass = voltageClasses[Math.floor(random() * voltageClasses.length)];
|
|
const eventType = random() > 0.8 ? eventTypes[1 + Math.floor(random() * (eventTypes.length - 1))] : 'normal';
|
|
const baseVoltage = voltageClass.nominal;
|
|
|
|
// 3-phase voltage with realistic variation
|
|
const phaseA_V = Math.round((baseVoltage * (1 + (random() - 0.5) * voltageClass.tolerance)) * 100) / 100;
|
|
const phaseB_V = Math.round((baseVoltage * (1 + (random() - 0.5) * voltageClass.tolerance)) * 100) / 100;
|
|
const phaseC_V = Math.round((baseVoltage * (1 + (random() - 0.5) * voltageClass.tolerance)) * 100) / 100;
|
|
|
|
// Current based on power and voltage
|
|
const apparentPower = Math.floor(100000 + random() * 50000000); // VA
|
|
const avgVoltage = (phaseA_V + phaseB_V + phaseC_V) / 3;
|
|
const baseCurrent = apparentPower / (Math.sqrt(3) * avgVoltage);
|
|
|
|
const phaseA_I = Math.round((baseCurrent * (0.9 + random() * 0.2)) * 100) / 100;
|
|
const phaseB_I = Math.round((baseCurrent * (0.9 + random() * 0.2)) * 100) / 100;
|
|
const phaseC_I = Math.round((baseCurrent * (0.9 + random() * 0.2)) * 100) / 100;
|
|
|
|
// Power factor and power calculations
|
|
const powerFactor = Math.round((0.85 + random() * 0.14) * 1000) / 1000;
|
|
const activePower = Math.round(apparentPower * powerFactor);
|
|
const reactivePower = Math.round(Math.sqrt(Math.pow(apparentPower, 2) - Math.pow(activePower, 2)));
|
|
|
|
// Frequency (nominal 60 Hz in US, 50 Hz in Europe)
|
|
const nominalFreq = random() > 0.5 ? 60 : 50;
|
|
const frequency = Math.round((nominalFreq + (random() - 0.5) * 0.1) * 1000) / 1000;
|
|
|
|
// Harmonics (Total Harmonic Distortion)
|
|
const thd_v = Math.round((eventType === 'harmonic_distortion' ? 3 + random() * 5 : random() * 2) * 100) / 100;
|
|
const thd_i = Math.round((eventType === 'harmonic_distortion' ? 5 + random() * 10 : random() * 3) * 100) / 100;
|
|
|
|
results.push({
|
|
recordId: `PMU_${Date.now()}_${i}`,
|
|
location: {
|
|
substation: `${substations[Math.floor(random() * substations.length)]} Substation`,
|
|
pmuId: `PMU${String(Math.floor(1 + random() * 999)).padStart(3, '0')}`,
|
|
busNumber: Math.floor(1 + random() * 100),
|
|
voltageClass: voltageClass.name,
|
|
nominalVoltage: voltageClass.nominal,
|
|
latitude: Math.round((30 + random() * 20) * 1000000) / 1000000,
|
|
longitude: Math.round((-100 + random() * 30) * 1000000) / 1000000
|
|
},
|
|
timestamp: new Date(Date.now() - random() * 3600000).toISOString(),
|
|
voltage: {
|
|
phaseA: phaseA_V,
|
|
phaseB: phaseB_V,
|
|
phaseC: phaseC_V,
|
|
neutral: Math.round(Math.abs(phaseA_V + phaseB_V + phaseC_V) / 10 * 100) / 100,
|
|
lineToLine: {
|
|
AB: Math.round(Math.sqrt(3) * ((phaseA_V + phaseB_V) / 2) * 100) / 100,
|
|
BC: Math.round(Math.sqrt(3) * ((phaseB_V + phaseC_V) / 2) * 100) / 100,
|
|
CA: Math.round(Math.sqrt(3) * ((phaseC_V + phaseA_V) / 2) * 100) / 100
|
|
},
|
|
unbalance: Math.round(random() * 2 * 100) / 100 // percent
|
|
},
|
|
current: {
|
|
phaseA: phaseA_I,
|
|
phaseB: phaseB_I,
|
|
phaseC: phaseC_I,
|
|
neutral: Math.round(Math.sqrt(Math.pow(phaseA_I, 2) + Math.pow(phaseB_I, 2) + Math.pow(phaseC_I, 2)) * 100) / 100,
|
|
unbalance: Math.round(random() * 3 * 100) / 100
|
|
},
|
|
power: {
|
|
active: activePower,
|
|
reactive: reactivePower,
|
|
apparent: apparentPower,
|
|
powerFactor: powerFactor,
|
|
phaseAngle: Math.round((random() * 60 - 30) * 100) / 100 // degrees
|
|
},
|
|
frequency: {
|
|
value: frequency,
|
|
rateOfChange: Math.round((random() - 0.5) * 0.1 * 1000) / 1000, // Hz/s
|
|
deviation: Math.round((frequency - nominalFreq) * 1000) / 1000
|
|
},
|
|
harmonics: {
|
|
THD_voltage: thd_v,
|
|
THD_current: thd_i,
|
|
dominantHarmonic: Math.floor(3 + random() * 12) * 2 + 1, // Odd harmonics
|
|
individual: {
|
|
H3: Math.round(random() * 2 * 100) / 100,
|
|
H5: Math.round(random() * 3 * 100) / 100,
|
|
H7: Math.round(random() * 2 * 100) / 100,
|
|
H11: Math.round(random() * 1 * 100) / 100
|
|
}
|
|
},
|
|
phasor: {
|
|
voltage: {
|
|
magnitude: Math.round(avgVoltage * 100) / 100,
|
|
angle: Math.round(random() * 360 * 100) / 100
|
|
},
|
|
current: {
|
|
magnitude: Math.round(baseCurrent * 100) / 100,
|
|
angle: Math.round(random() * 360 * 100) / 100
|
|
}
|
|
},
|
|
event: {
|
|
type: eventType,
|
|
severity: eventType === 'normal' ? 'none' : ['low', 'medium', 'high', 'critical'][Math.floor(random() * 4)],
|
|
duration: eventType === 'normal' ? 0 : Math.round(random() * 5000), // ms
|
|
faultLocation: eventType === 'fault' ? {
|
|
distance: Math.round(random() * 50 * 100) / 100, // km
|
|
impedance: Math.round((random() * 10) * 100) / 100 // ohms
|
|
} : null,
|
|
switchingOperation: eventType === 'switching' ? {
|
|
breaker: `CB${Math.floor(1 + random() * 50)}`,
|
|
status: random() > 0.5 ? 'opened' : 'closed'
|
|
} : null
|
|
},
|
|
quality: {
|
|
timeError: Math.round(random() * 1000), // microseconds
|
|
dataValidity: random() > 0.95 ? 'invalid' : 'valid',
|
|
synchronizationSource: ['GPS', 'IRIG-B', 'NTP'][Math.floor(random() * 3)],
|
|
uncertaintyEstimate: Math.round(random() * 0.5 * 1000) / 1000
|
|
},
|
|
scrapedAt: new Date().toISOString()
|
|
});
|
|
}
|
|
|
|
return results;
|
|
}
|
|
|
|
/**
|
|
* Generate AIS (Automatic Identification System) maritime ship tracking data
|
|
* Simulates vessel positions, navigation status, and maritime traffic
|
|
*/
|
|
async function generateAISData(count, seed) {
|
|
log.info('Generating AIS maritime tracking data...');
|
|
const random = createSeededRandom(seed);
|
|
const results = [];
|
|
|
|
const vesselTypes = [
|
|
{ code: 30, name: 'Fishing' },
|
|
{ code: 60, name: 'Passenger' },
|
|
{ code: 70, name: 'Cargo' },
|
|
{ code: 80, name: 'Tanker' },
|
|
{ code: 36, name: 'Sailing' },
|
|
{ code: 37, name: 'Pleasure Craft' },
|
|
{ code: 52, name: 'Tug' },
|
|
{ code: 31, name: 'Towing' }
|
|
];
|
|
|
|
const navStatuses = [
|
|
'Under way using engine',
|
|
'At anchor',
|
|
'Not under command',
|
|
'Restricted manoeuvrability',
|
|
'Constrained by draught',
|
|
'Moored',
|
|
'Aground',
|
|
'Engaged in fishing',
|
|
'Under way sailing'
|
|
];
|
|
|
|
const messageTypes = [1, 2, 3, 5, 18, 19, 21, 24, 27];
|
|
const destinations = ['NEW YORK', 'ROTTERDAM', 'SINGAPORE', 'HONG KONG', 'SHANGHAI', 'LOS ANGELES',
|
|
'HAMBURG', 'DUBAI', 'TOKYO', 'SOUTHAMPTON', 'PANAMA CANAL', 'SUEZ CANAL'];
|
|
|
|
// Generate realistic shipping lanes
|
|
const shippingLanes = [
|
|
{ name: 'North Atlantic', lat: [40, 50], lon: [-60, -10] },
|
|
{ name: 'Mediterranean', lat: [30, 45], lon: [0, 35] },
|
|
{ name: 'Panama Approach', lat: [5, 15], lon: [-85, -75] },
|
|
{ name: 'Malacca Strait', lat: [0, 6], lon: [98, 105] },
|
|
{ name: 'English Channel', lat: [49, 51], lon: [-5, 2] }
|
|
];
|
|
|
|
for (let i = 0; i < count; i++) {
|
|
const vesselType = vesselTypes[Math.floor(random() * vesselTypes.length)];
|
|
const messageType = messageTypes[Math.floor(random() * messageTypes.length)];
|
|
const lane = shippingLanes[Math.floor(random() * shippingLanes.length)];
|
|
|
|
// Position within shipping lane
|
|
const latitude = Math.round((lane.lat[0] + random() * (lane.lat[1] - lane.lat[0])) * 1000000) / 1000000;
|
|
const longitude = Math.round((lane.lon[0] + random() * (lane.lon[1] - lane.lon[0])) * 1000000) / 1000000;
|
|
|
|
// Speed and course
|
|
const speed = Math.round((random() * 25) * 10) / 10; // knots
|
|
const course = Math.round(random() * 360 * 10) / 10; // degrees
|
|
const heading = Math.round((course + (random() - 0.5) * 10) * 10) / 10;
|
|
|
|
results.push({
|
|
recordId: `AIS_${Date.now()}_${i}`,
|
|
vessel: {
|
|
mmsi: String(200000000 + Math.floor(random() * 799999999)), // Valid MMSI range
|
|
imo: messageType === 5 ? String(1000000 + Math.floor(random() * 8999999)) : null, // IMO number
|
|
name: `${['OCEAN', 'PACIFIC', 'ATLANTIC', 'MARINE', 'SEA', 'WAVE'][Math.floor(random() * 6)]} ${['STAR', 'VOYAGER', 'PIONEER', 'SPIRIT', 'VENTURE'][Math.floor(random() * 5)]}`,
|
|
callSign: `${String.fromCharCode(65 + Math.floor(random() * 26))}${String.fromCharCode(65 + Math.floor(random() * 26))}${Math.floor(1000 + random() * 8999)}`,
|
|
type: vesselType.name,
|
|
typeCode: vesselType.code,
|
|
flag: ['USA', 'UK', 'PANAMA', 'LIBERIA', 'MARSHALL IS', 'SINGAPORE', 'MALTA'][Math.floor(random() * 7)]
|
|
},
|
|
dimensions: {
|
|
length: Math.floor(50 + random() * 350), // meters
|
|
beam: Math.floor(10 + random() * 50), // meters
|
|
draught: Math.round((2 + random() * 15) * 10) / 10, // meters
|
|
toBow: Math.floor(20 + random() * 150),
|
|
toStern: Math.floor(20 + random() * 150),
|
|
toPort: Math.floor(5 + random() * 20),
|
|
toStarboard: Math.floor(5 + random() * 20)
|
|
},
|
|
position: {
|
|
latitude,
|
|
longitude,
|
|
accuracy: random() > 0.9 ? 'low' : 'high',
|
|
timestamp: new Date(Date.now() - random() * 300000).toISOString(), // Within last 5 min
|
|
positioningDevice: ['GPS', 'DGPS', 'Loran-C'][Math.floor(random() * 3)]
|
|
},
|
|
navigation: {
|
|
status: navStatuses[Math.floor(random() * navStatuses.length)],
|
|
speed: speed,
|
|
course: course,
|
|
heading: heading,
|
|
rateOfTurn: Math.round((random() - 0.5) * 10 * 100) / 100, // degrees/min
|
|
destination: messageType === 5 ? destinations[Math.floor(random() * destinations.length)] : null,
|
|
eta: messageType === 5 ? new Date(Date.now() + (1 + random() * 10) * 24 * 60 * 60 * 1000).toISOString() : null
|
|
},
|
|
message: {
|
|
type: messageType,
|
|
repeatIndicator: Math.floor(random() * 4),
|
|
class: messageType <= 3 ? 'A' : 'B',
|
|
channel: random() > 0.5 ? 'A' : 'B',
|
|
timeSlot: Math.floor(random() * 2250)
|
|
},
|
|
safety: {
|
|
collisionRisk: speed > 0 ? (random() > 0.85 ? 'high' : random() > 0.6 ? 'medium' : 'low') : 'none',
|
|
closestApproach: speed > 0 ? {
|
|
distance: Math.round((0.1 + random() * 10) * 100) / 100, // nautical miles
|
|
time: Math.round((5 + random() * 55)), // minutes
|
|
vesselMMSI: String(200000000 + Math.floor(random() * 799999999))
|
|
} : null,
|
|
inShippingLane: random() > 0.2,
|
|
weatherConditions: {
|
|
seaState: Math.floor(random() * 9), // Douglas scale 0-9
|
|
visibility: Math.round((1 + random() * 9) * 10) / 10, // nautical miles
|
|
windSpeed: Math.round(random() * 40) // knots
|
|
}
|
|
},
|
|
routing: {
|
|
shippingLane: lane.name,
|
|
nextWaypoint: {
|
|
latitude: Math.round((latitude + (random() - 0.5) * 2) * 1000000) / 1000000,
|
|
longitude: Math.round((longitude + (random() - 0.5) * 2) * 1000000) / 1000000,
|
|
distance: Math.round((10 + random() * 200) * 10) / 10, // nautical miles
|
|
eta: new Date(Date.now() + random() * 86400000).toISOString()
|
|
},
|
|
routeDeviation: Math.round(random() * 5 * 100) / 100, // nautical miles
|
|
trafficDensity: ['low', 'medium', 'high', 'very high'][Math.floor(random() * 4)]
|
|
},
|
|
scrapedAt: new Date().toISOString()
|
|
});
|
|
}
|
|
|
|
return results;
|
|
}
|
|
|
|
/**
|
|
* Generate Radar data (weather and vehicle detection)
|
|
* Simulates reflectivity, velocity, and Doppler measurements
|
|
*/
|
|
async function generateRadarData(count, seed) {
|
|
log.info('Generating Radar detection data...');
|
|
const random = createSeededRandom(seed);
|
|
const results = [];
|
|
|
|
const radarTypes = ['weather', 'vehicle', 'marine', 'air_traffic'];
|
|
const weatherTypes = ['clear', 'rain', 'snow', 'hail', 'storm', 'tornado'];
|
|
const vehicleTypes = ['car', 'truck', 'motorcycle', 'bicycle', 'pedestrian'];
|
|
const precipTypes = ['none', 'drizzle', 'rain', 'heavy_rain', 'snow', 'sleet', 'hail', 'mixed'];
|
|
|
|
for (let i = 0; i < count; i++) {
|
|
const radarType = radarTypes[Math.floor(random() * radarTypes.length)];
|
|
const isWeather = radarType === 'weather';
|
|
|
|
// Range gate parameters
|
|
const range = Math.round((0.1 + random() * 50) * 100) / 100; // km
|
|
const azimuth = Math.round(random() * 360 * 10) / 10; // degrees
|
|
const elevation = Math.round((random() * 20 - 5) * 10) / 10; // degrees
|
|
|
|
// Reflectivity (dBZ) - weather radar
|
|
const reflectivity = isWeather
|
|
? Math.round((-20 + random() * 80) * 10) / 10 // -20 to 60 dBZ
|
|
: Math.round((10 + random() * 30) * 10) / 10; // Vehicle radar
|
|
|
|
// Doppler velocity
|
|
const velocity = Math.round((random() * 60 - 30) * 10) / 10; // m/s
|
|
|
|
// Weather-specific data
|
|
const weatherData = isWeather ? {
|
|
precipitationType: precipTypes[Math.floor(random() * precipTypes.length)],
|
|
precipitationRate: Math.round(random() * 100 * 10) / 10, // mm/hr
|
|
stormCell: reflectivity > 45 ? {
|
|
id: `CELL${Math.floor(100 + random() * 899)}`,
|
|
top: Math.round((5 + random() * 15) * 100) / 100, // km
|
|
vil: Math.round(random() * 80), // kg/m²
|
|
severity: reflectivity > 55 ? 'severe' : 'moderate',
|
|
movement: {
|
|
direction: Math.round(random() * 360),
|
|
speed: Math.round((10 + random() * 40) * 10) / 10 // km/h
|
|
}
|
|
} : null,
|
|
echoTop: Math.round((2 + random() * 18) * 100) / 100, // km
|
|
verticalIntegratedLiquid: Math.round(random() * 50), // kg/m²
|
|
hydrometeorClassification: ['biological', 'anomalous_prop', 'ice_crystals', 'dry_snow', 'wet_snow',
|
|
'light_rain', 'moderate_rain', 'heavy_rain', 'hail', 'big_drops'][Math.floor(random() * 10)]
|
|
} : null;
|
|
|
|
// Vehicle detection data
|
|
const vehicleData = !isWeather ? {
|
|
detections: Array.from({ length: Math.floor(1 + random() * 5) }, () => ({
|
|
type: vehicleTypes[Math.floor(random() * vehicleTypes.length)],
|
|
range: Math.round((2 + random() * 200) * 10) / 10, // meters
|
|
azimuth: Math.round(random() * 180 * 10) / 10, // degrees
|
|
velocity: Math.round((random() * 50) * 10) / 10, // m/s
|
|
rcs: Math.round((random() * 40 - 10) * 10) / 10, // dBsm (radar cross section)
|
|
confidence: Math.round((0.5 + random() * 0.5) * 100) / 100,
|
|
trackId: Math.floor(1000 + random() * 8999)
|
|
})),
|
|
trackingQuality: ['excellent', 'good', 'fair', 'poor'][Math.floor(random() * 4)],
|
|
multipath: random() > 0.8,
|
|
clutter: random() > 0.7
|
|
} : null;
|
|
|
|
results.push({
|
|
recordId: `RADAR_${Date.now()}_${i}`,
|
|
radar: {
|
|
id: `RADAR${String(Math.floor(1 + random() * 999)).padStart(3, '0')}`,
|
|
type: radarType,
|
|
location: {
|
|
latitude: Math.round((25 + random() * 25) * 1000000) / 1000000,
|
|
longitude: Math.round((-125 + random() * 50) * 1000000) / 1000000,
|
|
altitude: Math.round(random() * 2000), // meters
|
|
name: `${['North', 'South', 'East', 'West', 'Central'][Math.floor(random() * 5)]} Site`
|
|
},
|
|
specifications: {
|
|
frequency: radarType === 'weather' ? '2.7-3.0 GHz (S-band)' : '76-81 GHz (W-band)',
|
|
wavelength: radarType === 'weather' ? '10 cm' : '4 mm',
|
|
beamWidth: Math.round((0.5 + random() * 2) * 10) / 10, // degrees
|
|
pulseWidth: Math.round((0.5 + random() * 2) * 100) / 100, // microseconds
|
|
prf: Math.round((300 + random() * 1700)), // Hz (pulse repetition frequency)
|
|
maxRange: radarType === 'weather' ? 250 : 150, // km
|
|
rangeResolution: Math.round((50 + random() * 200)), // meters
|
|
mode: ['surveillance', 'tracking', 'doppler'][Math.floor(random() * 3)]
|
|
}
|
|
},
|
|
measurement: {
|
|
timestamp: new Date(Date.now() - random() * 300000).toISOString(),
|
|
scanNumber: Math.floor(1 + random() * 1000),
|
|
elevationAngle: elevation,
|
|
azimuthAngle: azimuth,
|
|
range: range,
|
|
gateSpacing: Math.round((100 + random() * 150)), // meters
|
|
reflectivity: reflectivity,
|
|
velocity: velocity,
|
|
spectrumWidth: Math.round((1 + random() * 10) * 10) / 10, // m/s
|
|
correlation: Math.round((0.7 + random() * 0.3) * 1000) / 1000,
|
|
snr: Math.round((5 + random() * 35) * 10) / 10, // dB
|
|
zdr: isWeather ? Math.round((random() * 6 - 1) * 10) / 10 : null, // Differential reflectivity (dB)
|
|
kdp: isWeather ? Math.round((random() * 5) * 100) / 100 : null, // Specific differential phase (deg/km)
|
|
rhohv: isWeather ? Math.round((0.7 + random() * 0.3) * 1000) / 1000 : null // Correlation coefficient
|
|
},
|
|
weather: weatherData,
|
|
vehicle: vehicleData,
|
|
doppler: {
|
|
velocitySpectrum: Array.from({ length: 16 }, () => Math.round(random() * 100)),
|
|
nyquistVelocity: Math.round((10 + random() * 20) * 10) / 10, // m/s
|
|
aliasing: Math.abs(velocity) > 25,
|
|
spectralWidth: Math.round((1 + random() * 8) * 10) / 10
|
|
},
|
|
quality: {
|
|
clutter: random() > 0.7 ? 'high' : random() > 0.4 ? 'medium' : 'low',
|
|
groundClutterSuppression: random() > 0.5,
|
|
anomalousPropagation: random() > 0.9,
|
|
blockage: random() > 0.85,
|
|
calibrationStatus: random() > 0.95 ? 'needs_cal' : 'ok',
|
|
dataQualityIndex: Math.round((0.6 + random() * 0.4) * 100) / 100
|
|
},
|
|
scrapedAt: new Date().toISOString()
|
|
});
|
|
}
|
|
|
|
return results;
|
|
}
|
|
|
|
// ============================================
|
|
// PRIORITY 2: EXOTIC DATA GENERATORS
|
|
// ============================================
|
|
|
|
async function generateSCADAData(count, seed) {
|
|
log.info('Generating SCADA/Industrial control data...');
|
|
const random = createSeededRandom(seed);
|
|
const results = [];
|
|
|
|
const equipment = {
|
|
pump: { type: 'PUMP', maxPressure: 150, maxFlow: 500, units: { pressure: 'PSI', flow: 'GPM' } },
|
|
valve: { type: 'VALVE', positions: ['OPEN', 'CLOSED', 'THROTTLING'], units: { position: '%' } },
|
|
motor: { type: 'MOTOR', maxSpeed: 1800, maxCurrent: 50, units: { speed: 'RPM', current: 'A' } },
|
|
tank: { type: 'TANK', maxLevel: 100, maxVolume: 10000, units: { level: '%', volume: 'GAL' } },
|
|
heater: { type: 'HEATER', maxTemp: 300, maxPower: 100, units: { temp: 'F', power: 'kW' } }
|
|
};
|
|
|
|
const equipmentTypes = Object.keys(equipment);
|
|
const alarmTypes = ['HIGH_LIMIT', 'LOW_LIMIT', 'RATE_OF_CHANGE', 'DEVIATION', 'COMM_FAILURE'];
|
|
const qualityCodes = ['GOOD', 'BAD', 'UNCERTAIN', 'FORCED'];
|
|
|
|
for (let i = 0; i < count; i++) {
|
|
const eqType = equipmentTypes[Math.floor(random() * equipmentTypes.length)];
|
|
const eqConfig = equipment[eqType];
|
|
const timestamp = new Date(Date.now() - random() * 24 * 60 * 60 * 1000);
|
|
|
|
const processVars = {};
|
|
if (eqType === 'pump') {
|
|
processVars.pressure = Math.round((random() * eqConfig.maxPressure) * 10) / 10;
|
|
processVars.flow = Math.round((random() * eqConfig.maxFlow) * 10) / 10;
|
|
processVars.vibration = Math.round((random() * 10) * 100) / 100;
|
|
} else if (eqType === 'valve') {
|
|
processVars.position = Math.round((random() * 100) * 10) / 10;
|
|
processVars.command = Math.round((random() * 100) * 10) / 10;
|
|
processVars.feedback = processVars.command + (random() - 0.5) * 2;
|
|
} else if (eqType === 'motor') {
|
|
processVars.speed = Math.round((random() * eqConfig.maxSpeed) * 10) / 10;
|
|
processVars.current = Math.round((random() * eqConfig.maxCurrent) * 10) / 10;
|
|
processVars.torque = Math.round((random() * 100) * 10) / 10;
|
|
} else if (eqType === 'tank') {
|
|
processVars.level = Math.round((random() * eqConfig.maxLevel) * 10) / 10;
|
|
processVars.volume = Math.round((processVars.level / 100 * eqConfig.maxVolume) * 10) / 10;
|
|
processVars.temperature = Math.round((60 + random() * 100) * 10) / 10;
|
|
} else if (eqType === 'heater') {
|
|
processVars.temperature = Math.round((60 + random() * eqConfig.maxTemp) * 10) / 10;
|
|
processVars.setpoint = Math.round((100 + random() * 200) * 10) / 10;
|
|
processVars.power = Math.round((random() * eqConfig.maxPower) * 10) / 10;
|
|
}
|
|
|
|
const activeAlarms = [];
|
|
if (random() > 0.85) {
|
|
const alarmType = alarmTypes[Math.floor(random() * alarmTypes.length)];
|
|
activeAlarms.push({
|
|
type: alarmType,
|
|
priority: Math.floor(1 + random() * 4),
|
|
message: eqType.toUpperCase() + '_' + (i + 1) + ': ' + alarmType,
|
|
acknowledgedAt: random() > 0.5 ? new Date(timestamp.getTime() + random() * 60000).toISOString() : null
|
|
});
|
|
}
|
|
|
|
results.push({
|
|
tagId: eqType.toUpperCase() + '_' + String(i + 1).padStart(4, '0'),
|
|
equipmentType: eqConfig.type,
|
|
location: 'AREA_' + (Math.floor(random() * 5) + 1),
|
|
timestamp: timestamp.toISOString(),
|
|
processVariables: processVars,
|
|
plcRegisters: {
|
|
holding: Array.from({ length: 8 }, () => Math.floor(random() * 65536)),
|
|
input: Array.from({ length: 4 }, () => Math.floor(random() * 65536)),
|
|
coil: Array.from({ length: 4 }, () => random() > 0.5)
|
|
},
|
|
controlOutputs: {
|
|
analogOut: Math.round((random() * 100) * 10) / 10,
|
|
digitalOut: random() > 0.5,
|
|
mode: ['AUTO', 'MANUAL', 'CASCADE'][Math.floor(random() * 3)]
|
|
},
|
|
setpoints: Object.keys(processVars).reduce((acc, key) => {
|
|
if (typeof processVars[key] === 'number') {
|
|
acc[key] = Math.round((processVars[key] * (0.9 + random() * 0.2)) * 10) / 10;
|
|
}
|
|
return acc;
|
|
}, {}),
|
|
alarms: activeAlarms,
|
|
qualityCode: qualityCodes[Math.floor(random() * qualityCodes.length)],
|
|
opcua: {
|
|
nodeId: 'ns=2;s=' + eqType.toUpperCase() + '.' + (i + 1),
|
|
browseName: eqType.toUpperCase() + '_' + (i + 1),
|
|
statusCode: random() > 0.95 ? 'Bad' : 'Good'
|
|
},
|
|
scrapedAt: new Date().toISOString()
|
|
});
|
|
}
|
|
|
|
return results;
|
|
}
|
|
|
|
async function generateLiDARData(count, seed) {
|
|
log.info('Generating LiDAR point cloud data...');
|
|
const random = createSeededRandom(seed);
|
|
const results = [];
|
|
|
|
const scanPatterns = ['ROTATING_360', 'SOLID_STATE', 'FLASH', 'MEMS_MIRROR'];
|
|
const classifications = [
|
|
{ code: 0, name: 'NEVER_CLASSIFIED' },
|
|
{ code: 1, name: 'UNCLASSIFIED' },
|
|
{ code: 2, name: 'GROUND' },
|
|
{ code: 3, name: 'LOW_VEGETATION' },
|
|
{ code: 4, name: 'MEDIUM_VEGETATION' },
|
|
{ code: 5, name: 'HIGH_VEGETATION' },
|
|
{ code: 6, name: 'BUILDING' },
|
|
{ code: 7, name: 'LOW_POINT' },
|
|
{ code: 9, name: 'WATER' },
|
|
{ code: 13, name: 'VEHICLE' },
|
|
{ code: 14, name: 'PEDESTRIAN' }
|
|
];
|
|
const objectTypes = ['VEHICLE', 'PEDESTRIAN', 'CYCLIST', 'OBSTACLE', 'TRAFFIC_SIGN'];
|
|
|
|
for (let i = 0; i < count; i++) {
|
|
const timestamp = new Date(Date.now() - random() * 3600 * 1000);
|
|
const numPoints = Math.floor(10000 + random() * 90000);
|
|
const scanPattern = scanPatterns[Math.floor(random() * scanPatterns.length)];
|
|
|
|
const points = Array.from({ length: Math.min(numPoints, 1000) }, (_, idx) => {
|
|
const angle = (idx / 1000) * 2 * Math.PI;
|
|
const distance = 2 + random() * 100;
|
|
const classification = classifications[Math.floor(random() * classifications.length)];
|
|
|
|
return {
|
|
x: Math.round((distance * Math.cos(angle)) * 1000) / 1000,
|
|
y: Math.round((distance * Math.sin(angle)) * 1000) / 1000,
|
|
z: Math.round(((random() - 0.5) * 10) * 1000) / 1000,
|
|
intensity: Math.floor(random() * 255),
|
|
returnNumber: Math.floor(1 + random() * 4),
|
|
numberOfReturns: Math.floor(1 + random() * 5),
|
|
classification: classification.code,
|
|
classificationName: classification.name,
|
|
scanAngle: Math.round((random() - 0.5) * 60 * 10) / 10,
|
|
rgb: random() > 0.5 ? {
|
|
r: Math.floor(random() * 255),
|
|
g: Math.floor(random() * 255),
|
|
b: Math.floor(random() * 255)
|
|
} : null
|
|
};
|
|
});
|
|
|
|
const detections = Array.from({ length: Math.floor(random() * 10) }, () => {
|
|
const objType = objectTypes[Math.floor(random() * objectTypes.length)];
|
|
const centerX = (random() - 0.5) * 100;
|
|
const centerY = (random() - 0.5) * 100;
|
|
const centerZ = random() * 2;
|
|
|
|
return {
|
|
objectType: objType,
|
|
confidence: Math.round((0.5 + random() * 0.5) * 1000) / 1000,
|
|
boundingBox: {
|
|
center: { x: centerX, y: centerY, z: centerZ },
|
|
dimensions: {
|
|
length: Math.round((2 + random() * 8) * 100) / 100,
|
|
width: Math.round((1.5 + random() * 3) * 100) / 100,
|
|
height: Math.round((1 + random() * 3) * 100) / 100
|
|
},
|
|
rotation: Math.round((random() * 360) * 10) / 10
|
|
},
|
|
velocity: objType !== 'TRAFFIC_SIGN' && objType !== 'OBSTACLE' ? {
|
|
x: Math.round(((random() - 0.5) * 30) * 100) / 100,
|
|
y: Math.round(((random() - 0.5) * 30) * 100) / 100,
|
|
z: Math.round(((random() - 0.5) * 2) * 100) / 100
|
|
} : null,
|
|
trackId: 'TRK_' + Math.floor(random() * 1000)
|
|
};
|
|
});
|
|
|
|
results.push({
|
|
scanId: 'SCAN_' + timestamp.getTime() + '_' + i,
|
|
timestamp: timestamp.toISOString(),
|
|
sensorId: 'LIDAR_' + (Math.floor(random() * 10) + 1),
|
|
scanPattern,
|
|
pointCloud: {
|
|
numPoints,
|
|
samplePoints: points.slice(0, 100),
|
|
format: 'LAS_1.4',
|
|
coordinateSystem: 'WGS84_UTM',
|
|
bounds: {
|
|
minX: Math.min(...points.map(p => p.x)),
|
|
maxX: Math.max(...points.map(p => p.x)),
|
|
minY: Math.min(...points.map(p => p.y)),
|
|
maxY: Math.max(...points.map(p => p.y)),
|
|
minZ: Math.min(...points.map(p => p.z)),
|
|
maxZ: Math.max(...points.map(p => p.z))
|
|
}
|
|
},
|
|
detections,
|
|
metadata: {
|
|
horizontalFov: Math.round((scanPattern === 'ROTATING_360' ? 360 : 120) * 10) / 10,
|
|
verticalFov: Math.round((30 + random() * 40) * 10) / 10,
|
|
range: Math.round((50 + random() * 200) * 10) / 10,
|
|
accuracy: Math.round((0.01 + random() * 0.05) * 1000) / 1000,
|
|
scanRate: Math.round((5 + random() * 15) * 10) / 10
|
|
},
|
|
scrapedAt: new Date().toISOString()
|
|
});
|
|
}
|
|
|
|
return results;
|
|
}
|
|
|
|
async function generateCANBusData(count, seed) {
|
|
log.info('Generating CAN bus vehicle data...');
|
|
const random = createSeededRandom(seed);
|
|
const results = [];
|
|
|
|
const ecuTypes = {
|
|
engine: { arbitrationId: 0x0C0, signals: ['rpm', 'throttle', 'coolant_temp', 'oil_pressure'] },
|
|
transmission: { arbitrationId: 0x0D0, signals: ['gear', 'clutch', 'shift_position'] },
|
|
chassis: { arbitrationId: 0x1A0, signals: ['speed', 'brake_pressure', 'steering_angle', 'abs_active'] },
|
|
body: { arbitrationId: 0x2C0, signals: ['door_driver', 'door_passenger', 'lights', 'windows'] },
|
|
battery: { arbitrationId: 0x3E0, signals: ['voltage', 'current', 'soc', 'temperature'] }
|
|
};
|
|
|
|
const ecuNames = Object.keys(ecuTypes);
|
|
|
|
for (let i = 0; i < count; i++) {
|
|
const timestamp = new Date(Date.now() - random() * 3600 * 1000);
|
|
const ecuName = ecuNames[Math.floor(random() * ecuNames.length)];
|
|
const ecu = ecuTypes[ecuName];
|
|
|
|
const signals = {};
|
|
if (ecuName === 'engine') {
|
|
signals.rpm = Math.floor(800 + random() * 6000);
|
|
signals.throttle = Math.round((random() * 100) * 10) / 10;
|
|
signals.coolant_temp = Math.round((70 + random() * 50) * 10) / 10;
|
|
signals.oil_pressure = Math.round((20 + random() * 80) * 10) / 10;
|
|
signals.intake_temp = Math.round((20 + random() * 80) * 10) / 10;
|
|
signals.maf = Math.round((10 + random() * 200) * 10) / 10;
|
|
} else if (ecuName === 'transmission') {
|
|
signals.gear = Math.floor(random() * 6) + 1;
|
|
signals.clutch = Math.round((random() * 100) * 10) / 10;
|
|
signals.shift_position = ['P', 'R', 'N', 'D', 'S'][Math.floor(random() * 5)];
|
|
signals.torque_converter = Math.round((random() * 100) * 10) / 10;
|
|
} else if (ecuName === 'chassis') {
|
|
signals.speed = Math.round((random() * 120) * 10) / 10;
|
|
signals.brake_pressure = Math.round((random() * 2000) * 10) / 10;
|
|
signals.steering_angle = Math.round(((random() - 0.5) * 900) * 10) / 10;
|
|
signals.abs_active = random() > 0.9;
|
|
signals.traction_control = random() > 0.85;
|
|
signals.wheel_speed_fl = Math.round((signals.speed * (0.95 + random() * 0.1)) * 10) / 10;
|
|
signals.wheel_speed_fr = Math.round((signals.speed * (0.95 + random() * 0.1)) * 10) / 10;
|
|
signals.wheel_speed_rl = Math.round((signals.speed * (0.95 + random() * 0.1)) * 10) / 10;
|
|
signals.wheel_speed_rr = Math.round((signals.speed * (0.95 + random() * 0.1)) * 10) / 10;
|
|
} else if (ecuName === 'body') {
|
|
signals.door_driver = random() > 0.9;
|
|
signals.door_passenger = random() > 0.9;
|
|
signals.door_rear_left = random() > 0.95;
|
|
signals.door_rear_right = random() > 0.95;
|
|
signals.trunk = random() > 0.98;
|
|
signals.lights = ['OFF', 'PARKING', 'LOW_BEAM', 'HIGH_BEAM'][Math.floor(random() * 4)];
|
|
signals.windows = {
|
|
driver: Math.floor(random() * 100),
|
|
passenger: Math.floor(random() * 100),
|
|
rear_left: Math.floor(random() * 100),
|
|
rear_right: Math.floor(random() * 100)
|
|
};
|
|
} else if (ecuName === 'battery') {
|
|
signals.voltage = Math.round((12 + random() * 3) * 100) / 100;
|
|
signals.current = Math.round(((random() - 0.5) * 200) * 10) / 10;
|
|
signals.soc = Math.round((20 + random() * 80) * 10) / 10;
|
|
signals.temperature = Math.round((15 + random() * 40) * 10) / 10;
|
|
}
|
|
|
|
const dataBytes = Array.from({ length: 8 }, () => Math.floor(random() * 256));
|
|
|
|
results.push({
|
|
messageId: 'CAN_' + timestamp.getTime() + '_' + i,
|
|
timestamp: timestamp.toISOString(),
|
|
arbitrationId: '0x' + ecu.arbitrationId.toString(16).toUpperCase().padStart(3, '0'),
|
|
ecuName: ecuName.toUpperCase(),
|
|
dlc: 8,
|
|
data: dataBytes.map(b => '0x' + b.toString(16).toUpperCase().padStart(2, '0')).join(' '),
|
|
signals,
|
|
dbcDecoded: {
|
|
messageName: ecuName.toUpperCase() + '_STATUS',
|
|
cycletime: Math.floor(10 + random() * 90),
|
|
signalCount: Object.keys(signals).length
|
|
},
|
|
busLoad: Math.round((random() * 100) * 10) / 10,
|
|
errorFrames: Math.floor(random() * 5),
|
|
scrapedAt: new Date().toISOString()
|
|
});
|
|
}
|
|
|
|
return results;
|
|
}
|
|
|
|
async function generateGenomicVCFData(count, seed) {
|
|
log.info('Generating genomic VCF variant data...');
|
|
const random = createSeededRandom(seed);
|
|
const results = [];
|
|
|
|
const chromosomes = ['1', '2', '3', '4', '5', '6', '7', '8', '9', '10', '11', '12', '13', '14', '15', '16', '17', '18', '19', '20', '21', '22', 'X', 'Y', 'MT'];
|
|
const bases = ['A', 'C', 'G', 'T'];
|
|
const consequences = ['MISSENSE', 'SYNONYMOUS', 'NONSENSE', 'FRAMESHIFT', 'SPLICE_SITE', 'INTRONIC', 'UTR_5', 'UTR_3', 'INTERGENIC'];
|
|
const impacts = ['HIGH', 'MODERATE', 'LOW', 'MODIFIER'];
|
|
const filters = ['PASS', 'LOW_QUAL', 'STRAND_BIAS', 'LOW_DEPTH'];
|
|
const genotypes = ['0/0', '0/1', '1/1', '0/2', '1/2'];
|
|
|
|
for (let i = 0; i < count; i++) {
|
|
const chrom = chromosomes[Math.floor(random() * chromosomes.length)];
|
|
const pos = Math.floor(1000000 + random() * 200000000);
|
|
const ref = bases[Math.floor(random() * bases.length)];
|
|
const alt = bases.filter(b => b !== ref)[Math.floor(random() * 3)];
|
|
const qual = Math.round((random() * 1000) * 10) / 10;
|
|
const filter = qual > 30 ? 'PASS' : filters[Math.floor(random() * filters.length)];
|
|
const genotype = genotypes[Math.floor(random() * genotypes.length)];
|
|
|
|
const geneNames = ['BRCA1', 'TP53', 'EGFR', 'KRAS', 'PTEN', 'MYC', 'NOTCH1', 'APC', 'RB1', 'VHL', 'CDKN2A', 'PIK3CA'];
|
|
const gene = geneNames[Math.floor(random() * geneNames.length)];
|
|
const consequence = consequences[Math.floor(random() * consequences.length)];
|
|
const impact = impacts[Math.floor(random() * impacts.length)];
|
|
|
|
results.push({
|
|
variantId: 'VAR_' + chrom + '_' + pos + '_' + i,
|
|
vcfRecord: {
|
|
chrom,
|
|
pos,
|
|
id: random() > 0.7 ? ('rs' + Math.floor(1000000 + random() * 99000000)) : '.',
|
|
ref,
|
|
alt,
|
|
qual,
|
|
filter,
|
|
info: {
|
|
DP: Math.floor(10 + random() * 200),
|
|
AF: Math.round((random()) * 1000) / 1000,
|
|
AC: Math.floor(1 + random() * 10),
|
|
AN: Math.floor(10 + random() * 100),
|
|
BaseQRankSum: Math.round(((random() - 0.5) * 10) * 100) / 100,
|
|
MQ: Math.round((40 + random() * 20) * 10) / 10,
|
|
MQRankSum: Math.round(((random() - 0.5) * 5) * 100) / 100,
|
|
ReadPosRankSum: Math.round(((random() - 0.5) * 5) * 100) / 100
|
|
},
|
|
format: ['GT', 'DP', 'GQ', 'AD'],
|
|
samples: [{
|
|
GT: genotype,
|
|
DP: Math.floor(10 + random() * 100),
|
|
GQ: Math.floor(random() * 99),
|
|
AD: genotype === '0/1' ?
|
|
(Math.floor(random() * 50) + ',' + Math.floor(random() * 50)) :
|
|
genotype === '1/1' ? ('0,' + Math.floor(random() * 100)) : (Math.floor(random() * 100) + ',0')
|
|
}]
|
|
},
|
|
annotation: {
|
|
gene,
|
|
transcript: gene + '-001',
|
|
consequence,
|
|
impact,
|
|
proteinChange: consequence === 'MISSENSE' ? ('p.' + ['Ala', 'Arg', 'Asn', 'Asp', 'Cys', 'Gln', 'Glu'][Math.floor(random() * 7)] + Math.floor(1 + random() * 500) + ['Val', 'Leu', 'Ile', 'Met'][Math.floor(random() * 4)]) : null,
|
|
cdnaChange: 'c.' + Math.floor(1 + random() * 3000) + ref + '>' + alt,
|
|
exon: consequence !== 'INTRONIC' ? (Math.floor(1 + random() * 20) + '/20') : null
|
|
},
|
|
populationFrequencies: {
|
|
gnomAD_AF: Math.round((random() * 0.1) * 100000) / 100000,
|
|
gnomAD_AF_afr: Math.round((random() * 0.1) * 100000) / 100000,
|
|
gnomAD_AF_eas: Math.round((random() * 0.1) * 100000) / 100000,
|
|
gnomAD_AF_nfe: Math.round((random() * 0.1) * 100000) / 100000,
|
|
ExAC_AF: Math.round((random() * 0.1) * 100000) / 100000,
|
|
'1000g_AF': Math.round((random() * 0.1) * 100000) / 100000
|
|
},
|
|
predictions: {
|
|
SIFT: random() > 0.5 ? 'TOLERATED' : 'DELETERIOUS',
|
|
SIFT_score: Math.round((random()) * 1000) / 1000,
|
|
PolyPhen: random() > 0.5 ? 'BENIGN' : 'PROBABLY_DAMAGING',
|
|
PolyPhen_score: Math.round((random()) * 1000) / 1000,
|
|
CADD_phred: Math.round((random() * 40) * 10) / 10,
|
|
GERP_RS: Math.round(((random() - 0.5) * 12) * 100) / 100
|
|
},
|
|
clinicalSignificance: {
|
|
clinvar: ['BENIGN', 'LIKELY_BENIGN', 'UNCERTAIN', 'LIKELY_PATHOGENIC', 'PATHOGENIC'][Math.floor(random() * 5)],
|
|
reviewStatus: ['NO_ASSERTION', 'SINGLE_SUBMITTER', 'MULTIPLE_SUBMITTERS', 'EXPERT_PANEL'][Math.floor(random() * 4)],
|
|
conditions: random() > 0.7 ? ['Hereditary cancer syndrome', 'Familial adenomatous polyposis'][Math.floor(random() * 2)] : []
|
|
},
|
|
scrapedAt: new Date().toISOString()
|
|
});
|
|
}
|
|
|
|
return results;
|
|
}
|
|
|
|
async function generateSatelliteData(count, seed) {
|
|
log.info('Generating satellite multi-spectral imagery data...');
|
|
const random = createSeededRandom(seed);
|
|
const results = [];
|
|
|
|
const satellites = ['Landsat-8', 'Landsat-9', 'Sentinel-2A', 'Sentinel-2B', 'MODIS', 'WorldView-3', 'Planet'];
|
|
const bands = {
|
|
'Landsat-8': ['Coastal', 'Blue', 'Green', 'Red', 'NIR', 'SWIR1', 'SWIR2', 'Cirrus', 'TIR1', 'TIR2'],
|
|
'Sentinel-2A': ['Coastal', 'Blue', 'Green', 'Red', 'RedEdge1', 'RedEdge2', 'RedEdge3', 'NIR', 'SWIR1', 'SWIR2'],
|
|
'MODIS': ['Red', 'NIR', 'Blue', 'Green', 'SWIR', 'TIR'],
|
|
'WorldView-3': ['Coastal', 'Blue', 'Green', 'Yellow', 'Red', 'RedEdge', 'NIR1', 'NIR2'],
|
|
'Planet': ['Blue', 'Green', 'Red', 'NIR']
|
|
};
|
|
|
|
const processingLevels = ['L1C', 'L1T', 'L2A', 'L2SP'];
|
|
|
|
for (let i = 0; i < count; i++) {
|
|
const satellite = satellites[Math.floor(random() * satellites.length)];
|
|
const satelliteBands = bands[satellite] || bands['Landsat-8'];
|
|
const timestamp = new Date(Date.now() - random() * 365 * 24 * 60 * 60 * 1000);
|
|
const lat = (random() - 0.5) * 180;
|
|
const lon = (random() - 0.5) * 360;
|
|
const cloudCover = Math.round((random() * 100) * 10) / 10;
|
|
|
|
const pixelValues = {};
|
|
satelliteBands.forEach(band => {
|
|
let maxValue = 65535;
|
|
if (band.includes('TIR')) {
|
|
maxValue = 40000;
|
|
}
|
|
pixelValues[band] = Math.floor(random() * maxValue);
|
|
});
|
|
|
|
const red = pixelValues['Red'] || 0;
|
|
const nir = pixelValues['NIR'] || pixelValues['NIR1'] || 0;
|
|
const ndvi = nir + red !== 0 ? Math.round(((nir - red) / (nir + red)) * 1000) / 1000 : 0;
|
|
const evi = nir + red !== 0 ? Math.round((2.5 * (nir - red) / (nir + 6 * red - 7.5 * (pixelValues['Blue'] || 0) + 1)) * 1000) / 1000 : 0;
|
|
|
|
results.push({
|
|
sceneId: satellite.replace('-', '') + '_' + timestamp.getTime() + '_' + i,
|
|
satellite,
|
|
sensor: satellite.includes('Landsat') ? 'OLI/TIRS' : satellite.includes('Sentinel') ? 'MSI' : 'Unknown',
|
|
timestamp: timestamp.toISOString(),
|
|
acquisitionDate: timestamp.toISOString().split('T')[0],
|
|
processingLevel: processingLevels[Math.floor(random() * processingLevels.length)],
|
|
location: {
|
|
centerLat: Math.round(lat * 100000) / 100000,
|
|
centerLon: Math.round(lon * 100000) / 100000,
|
|
path: Math.floor(1 + random() * 233),
|
|
row: Math.floor(1 + random() * 248),
|
|
wrs: Math.floor(1 + random() * 233) + '/' + Math.floor(1 + random() * 248)
|
|
},
|
|
geometry: {
|
|
type: 'Polygon',
|
|
coordinates: [[
|
|
[lon, lat],
|
|
[lon + 0.1, lat],
|
|
[lon + 0.1, lat + 0.1],
|
|
[lon, lat + 0.1],
|
|
[lon, lat]
|
|
]]
|
|
},
|
|
bands: satelliteBands.map(bandName => ({
|
|
name: bandName,
|
|
wavelength: bandName === 'Blue' ? '0.45-0.51' :
|
|
bandName === 'Green' ? '0.53-0.59' :
|
|
bandName === 'Red' ? '0.64-0.67' :
|
|
bandName === 'NIR' || bandName === 'NIR1' ? '0.85-0.88' :
|
|
bandName === 'SWIR1' ? '1.57-1.65' :
|
|
bandName === 'SWIR2' ? '2.11-2.29' :
|
|
bandName.includes('TIR') ? '10.6-12.5' : '0.43-0.45',
|
|
resolution: satellite.includes('Landsat') ? 30 : satellite.includes('Sentinel') ? 10 : 250,
|
|
pixelValue: pixelValues[bandName],
|
|
units: bandName.includes('TIR') ? 'Kelvin' : 'DN'
|
|
})),
|
|
cloudCover: {
|
|
percentage: cloudCover,
|
|
level: cloudCover < 10 ? 'CLEAR' :
|
|
cloudCover < 30 ? 'PARTLY_CLOUDY' :
|
|
cloudCover < 70 ? 'MOSTLY_CLOUDY' : 'OVERCAST',
|
|
cloudMask: Array.from({ length: 100 }, () => random() < cloudCover / 100)
|
|
},
|
|
indices: {
|
|
NDVI: ndvi,
|
|
EVI: evi,
|
|
NDWI: pixelValues['Green'] && pixelValues['NIR'] ?
|
|
Math.round(((pixelValues['Green'] - pixelValues['NIR']) / (pixelValues['Green'] + pixelValues['NIR'])) * 1000) / 1000 : 0,
|
|
SAVI: nir + red !== 0 ?
|
|
Math.round((1.5 * (nir - red) / (nir + red + 0.5)) * 1000) / 1000 : 0
|
|
},
|
|
metadata: {
|
|
sunElevation: Math.round((30 + random() * 60) * 100) / 100,
|
|
sunAzimuth: Math.round((random() * 360) * 100) / 100,
|
|
viewAngle: Math.round((random() * 30) * 100) / 100,
|
|
resolution: satellite.includes('WorldView') ? 1.24 :
|
|
satellite.includes('Planet') ? 3 :
|
|
satellite.includes('Sentinel') ? 10 : 30,
|
|
format: 'GeoTIFF',
|
|
projection: 'EPSG:4326',
|
|
tileId: 'T' + Math.floor(10 + random() * 50) + ['A', 'B', 'C', 'D'][Math.floor(random() * 4)]
|
|
},
|
|
qualityAssessment: {
|
|
overallQuality: ['EXCELLENT', 'GOOD', 'FAIR', 'POOR'][Math.floor(random() * 4)],
|
|
radiometricQuality: Math.round((random() * 10) * 10) / 10,
|
|
geometricQuality: Math.round((random() * 10) * 10) / 10,
|
|
artifacts: random() > 0.8,
|
|
stripingDetected: random() > 0.95
|
|
},
|
|
scrapedAt: new Date().toISOString()
|
|
});
|
|
}
|
|
|
|
return results;
|
|
}
|