wifi-densepose/vendor/ruvector/examples/rvf/examples/life_candidate.rs

530 lines
18 KiB
Rust

//! Life Candidate Scoring Pipeline (L0-L2) using RVF
//!
//! Demonstrates spectral disequilibrium analysis from ADR-040:
//! L0 Ingest: Synthetic JWST NIRSpec spectra with absorption features
//! L1 Feature Extraction: Molecule identification + co-occurrence edges
//! L2 Disequilibrium: Score imbalance, repeatability, penalties
//!
//! Output: Ranked life candidate list with uncertainty and follow-up
//!
//! RVF segments used: VEC_SEG, MANIFEST_SEG, WITNESS_SEG
//!
//! Run: cargo run --example life_candidate
use rvf_runtime::{
FilterExpr, MetadataEntry, MetadataValue, QueryOptions, RvfOptions, RvfStore,
};
use rvf_runtime::filter::FilterValue;
use rvf_runtime::options::DistanceMetric;
use rvf_crypto::{create_witness_chain, verify_witness_chain, shake256_256, WitnessEntry};
use tempfile::TempDir;
// ---------------------------------------------------------------------------
// LCG helpers
// ---------------------------------------------------------------------------
fn lcg_next(state: &mut u64) -> u64 {
*state = state.wrapping_mul(6364136223846793005).wrapping_add(1442695040888963407);
*state
}
fn random_vector(dim: usize, seed: u64) -> Vec<f32> {
let mut v = Vec::with_capacity(dim);
let mut x = seed.wrapping_add(1);
for _ in 0..dim {
x = x.wrapping_mul(6364136223846793005).wrapping_add(1442695040888963407);
v.push(((x >> 33) as f32) / (u32::MAX as f32) - 0.5);
}
v
}
fn lcg_f64(state: &mut u64) -> f64 {
lcg_next(state);
(*state >> 11) as f64 / ((1u64 << 53) as f64)
}
// ---------------------------------------------------------------------------
// ADR-040 domain types
// ---------------------------------------------------------------------------
#[derive(Debug, Clone)]
struct Molecule {
name: &'static str,
wavelength_um: f64,
width_um: f64,
}
const MOLECULES: &[Molecule] = &[
Molecule { name: "H2O", wavelength_um: 1.4, width_um: 0.15 },
Molecule { name: "CO2", wavelength_um: 2.0, width_um: 0.2 },
Molecule { name: "CH4", wavelength_um: 2.3, width_um: 0.1 },
Molecule { name: "O3", wavelength_um: 0.6, width_um: 0.08 },
Molecule { name: "NH3", wavelength_um: 3.0, width_um: 0.12 },
];
#[derive(Debug, Clone)]
#[allow(dead_code)]
struct Spectrum {
target_id: u64,
target_name: String,
wavelengths: Vec<f64>,
flux: Vec<f64>,
detected_molecules: Vec<String>,
}
#[derive(Debug, Clone)]
struct CoOccurrenceEdge {
molecule_a: String,
molecule_b: String,
confidence: f64,
}
#[derive(Debug, Clone)]
#[allow(dead_code)]
struct LifeScore {
target_id: u64,
target_name: String,
disequilibrium: f64,
repeatability: f64,
contamination_penalty: f64,
stellar_confound_penalty: f64,
total_score: f64,
uncertainty: f64,
num_molecules: usize,
follow_up: Vec<&'static str>,
}
// ---------------------------------------------------------------------------
// L0: Synthetic spectrum generation
// ---------------------------------------------------------------------------
fn generate_spectrum(target_id: u64, seed: u64) -> Spectrum {
let mut rng = seed.wrapping_add(target_id * 6271);
let target_names = [
"TRAPPIST-1e", "TRAPPIST-1f", "K2-18b", "LHS-1140b",
"Proxima-Cen-b", "TOI-700d", "Kepler-442b", "GJ-1002b",
"Wolf-1061c", "Ross-128b", "GJ-273b", "Teegarden-b",
"LP-890-9c", "TOI-1452b", "GJ-357d",
];
let name = target_names[target_id as usize % target_names.len()].to_string();
// Wavelength grid: 0.5 to 5.0 um in 200 bins
let num_bins = 200;
let wl_min = 0.5;
let wl_max = 5.0;
let mut wavelengths = Vec::with_capacity(num_bins);
let mut flux = Vec::with_capacity(num_bins);
for i in 0..num_bins {
let wl = wl_min + (wl_max - wl_min) * (i as f64 / (num_bins - 1) as f64);
wavelengths.push(wl);
// Blackbody-ish continuum (simplified)
let continuum = 1.0 / (1.0 + ((wl - 2.0) / 2.0).powi(2));
let noise = (lcg_f64(&mut rng) - 0.5) * 0.02;
flux.push(continuum + noise);
}
// Inject molecule absorption features
let mut detected = Vec::new();
for mol in MOLECULES {
// Each target has a random chance of having each molecule
let has_molecule = lcg_f64(&mut rng) > 0.35;
if has_molecule {
let depth = 0.05 + lcg_f64(&mut rng) * 0.3;
for (i, &wl) in wavelengths.iter().enumerate() {
let dist = (wl - mol.wavelength_um).abs();
if dist < mol.width_um {
let gaussian = (-0.5 * (dist / (mol.width_um * 0.4)).powi(2)).exp();
flux[i] -= depth * gaussian;
}
}
detected.push(mol.name.to_string());
}
}
Spectrum {
target_id,
target_name: name,
wavelengths,
flux,
detected_molecules: detected,
}
}
// ---------------------------------------------------------------------------
// L1: Feature extraction + co-occurrence edges
// ---------------------------------------------------------------------------
fn extract_features(spectrum: &Spectrum) -> Vec<CoOccurrenceEdge> {
let mut edges = Vec::new();
let mols = &spectrum.detected_molecules;
let mut rng: u64 = 0xBEEF + spectrum.target_id;
for i in 0..mols.len() {
for j in (i + 1)..mols.len() {
let confidence = 0.5 + lcg_f64(&mut rng) * 0.5;
edges.push(CoOccurrenceEdge {
molecule_a: mols[i].clone(),
molecule_b: mols[j].clone(),
confidence,
});
}
}
edges
}
// ---------------------------------------------------------------------------
// L2: Disequilibrium scoring
// ---------------------------------------------------------------------------
// Equilibrium expectation: which molecule pairs would be expected together
fn equilibrium_expectation(a: &str, b: &str) -> f64 {
match (a, b) {
("CO2", "H2O") | ("H2O", "CO2") => 0.8, // common together
("O3", "CH4") | ("CH4", "O3") => 0.05, // disequilibrium pair!
("H2O", "O3") | ("O3", "H2O") => 0.3,
("NH3", "CH4") | ("CH4", "NH3") => 0.4,
("CO2", "CH4") | ("CH4", "CO2") => 0.2,
_ => 0.5,
}
}
fn score_life_candidate(
spectrum: &Spectrum,
edges: &[CoOccurrenceEdge],
observations: usize,
) -> LifeScore {
let mut rng: u64 = 0xFACE + spectrum.target_id * 31;
// Disequilibrium: how far are co-occurrences from equilibrium expectations?
let mut disequilibrium = 0.0;
if !edges.is_empty() {
for edge in edges {
let expected = equilibrium_expectation(&edge.molecule_a, &edge.molecule_b);
let observed = edge.confidence;
disequilibrium += (observed - expected).abs();
}
disequilibrium /= edges.len() as f64;
}
// Repeatability: more observations = more reliable
let repeatability = 1.0 - (1.0 / (1.0 + observations as f64 * 0.3));
// Contamination risk penalty: random small factor
let contamination_penalty = lcg_f64(&mut rng) * 0.15;
// Stellar activity confound penalty
let stellar_confound_penalty = lcg_f64(&mut rng) * 0.1;
// Total score
let raw = disequilibrium * 0.4 + repeatability * 0.3
- contamination_penalty * 0.15
- stellar_confound_penalty * 0.15;
let total_score = raw.max(0.0).min(1.0);
// Uncertainty decreases with more molecules and observations
let uncertainty = 0.5 / (1.0 + spectrum.detected_molecules.len() as f64 * 0.3 + observations as f64 * 0.1);
// Follow-up recommendations
let mut follow_up: Vec<&'static str> = Vec::new();
if spectrum.detected_molecules.len() < 3 {
follow_up.push("additional_nirspec_observations");
}
if disequilibrium > 0.3 {
follow_up.push("high_resolution_spectroscopy");
}
if contamination_penalty > 0.1 {
follow_up.push("contamination_check");
}
if follow_up.is_empty() {
follow_up.push("routine_monitoring");
}
LifeScore {
target_id: spectrum.target_id,
target_name: spectrum.target_name.clone(),
disequilibrium,
repeatability,
contamination_penalty,
stellar_confound_penalty,
total_score,
uncertainty,
num_molecules: spectrum.detected_molecules.len(),
follow_up,
}
}
// ---------------------------------------------------------------------------
// Main
// ---------------------------------------------------------------------------
fn main() {
println!("=== Life Candidate Scoring Pipeline (L0-L2) ===\n");
let dim = 64;
let num_targets = 15;
let tmp_dir = TempDir::new().expect("failed to create temp dir");
let store_path = tmp_dir.path().join("life_candidate.rvf");
let options = RvfOptions {
dimension: dim as u16,
metric: DistanceMetric::L2,
..Default::default()
};
let mut store = RvfStore::create(&store_path, options).expect("failed to create store");
// ====================================================================
// L0: Ingest — generate synthetic JWST NIRSpec spectra
// ====================================================================
println!("--- L0. Ingest: Synthetic JWST NIRSpec Spectra ---");
let spectra: Vec<Spectrum> = (0..num_targets)
.map(|i| generate_spectrum(i, 42))
.collect();
// Store spectral windows as embeddings
let mut all_vectors: Vec<Vec<f32>> = Vec::new();
let mut all_ids: Vec<u64> = Vec::new();
let mut all_metadata: Vec<MetadataEntry> = Vec::new();
let mut global_id = 0u64;
// Each spectrum gets split into wavelength bands
let bands = ["vis", "nir-j", "nir-h", "nir-k", "mir"];
let band_ranges: &[(f64, f64)] = &[
(0.5, 0.9), (0.9, 1.4), (1.4, 1.8), (1.8, 2.5), (2.5, 5.0),
];
for spectrum in &spectra {
for (band_idx, band_name) in bands.iter().enumerate() {
let (wl_lo, wl_hi) = band_ranges[band_idx];
// Determine dominant molecule in this band
let mut dominant_mol = "none";
for mol in MOLECULES {
if mol.wavelength_um >= wl_lo && mol.wavelength_um < wl_hi {
if spectrum.detected_molecules.contains(&mol.name.to_string()) {
dominant_mol = mol.name;
break;
}
}
}
let vec = random_vector(dim, global_id * 13 + spectrum.target_id * 7);
all_vectors.push(vec);
all_ids.push(global_id);
// Metadata: instrument (0), target_id (1), wavelength_band (2), molecule (3)
all_metadata.push(MetadataEntry {
field_id: 0,
value: MetadataValue::String("jwst-nirspec".to_string()),
});
all_metadata.push(MetadataEntry {
field_id: 1,
value: MetadataValue::U64(spectrum.target_id),
});
all_metadata.push(MetadataEntry {
field_id: 2,
value: MetadataValue::String(band_name.to_string()),
});
all_metadata.push(MetadataEntry {
field_id: 3,
value: MetadataValue::String(dominant_mol.to_string()),
});
global_id += 1;
}
}
let vec_refs: Vec<&[f32]> = all_vectors.iter().map(|v| v.as_slice()).collect();
let ingest = store
.ingest_batch(&vec_refs, &all_ids, Some(&all_metadata))
.expect("ingest failed");
println!(" Targets: {}", num_targets);
println!(" Bands: {:?}", bands);
println!(" Windows: {} total", ingest.accepted);
println!(" Embedding: {} dims", dim);
println!(" Instrument: jwst-nirspec");
println!("\n Target spectra:");
for s in spectra.iter().take(5) {
println!(
" {} (id={}) molecules: [{}]",
s.target_name,
s.target_id,
s.detected_molecules.join(", ")
);
}
// ====================================================================
// L1: Feature extraction — molecule identification + co-occurrence
// ====================================================================
println!("\n--- L1. Feature Extraction: Molecule Co-Occurrence ---");
let all_edges: Vec<Vec<CoOccurrenceEdge>> = spectra.iter().map(|s| extract_features(s)).collect();
let total_edges: usize = all_edges.iter().map(|e| e.len()).sum();
println!(" Total co-occurrence edges: {}", total_edges);
println!("\n Molecule detection summary:");
for mol in MOLECULES {
let count = spectra
.iter()
.filter(|s| s.detected_molecules.contains(&mol.name.to_string()))
.count();
println!(
" {}: detected in {}/{} targets (lambda={:.1}um)",
mol.name, count, num_targets, mol.wavelength_um
);
}
println!("\n Sample co-occurrence edges:");
for (i, edges) in all_edges.iter().enumerate().take(3) {
if !edges.is_empty() {
println!(" {} (target {}):", spectra[i].target_name, i);
for e in edges.iter().take(3) {
println!(
" {} <-> {} conf={:.4}",
e.molecule_a, e.molecule_b, e.confidence
);
}
}
}
// ====================================================================
// L2: Disequilibrium scoring
// ====================================================================
println!("\n--- L2. Disequilibrium Scoring ---");
let mut scores: Vec<LifeScore> = Vec::new();
for (i, spectrum) in spectra.iter().enumerate() {
let num_obs = 3 + (i % 5); // simulate 3-7 observations
let score = score_life_candidate(spectrum, &all_edges[i], num_obs);
scores.push(score);
}
// Sort by total score descending
scores.sort_by(|a, b| b.total_score.partial_cmp(&a.total_score).unwrap());
println!(" Score components: disequilibrium(0.4), repeatability(0.3),");
println!(" -contamination(0.15), -stellar_confound(0.15)\n");
println!(
" {:>14} {:>5} {:>6} {:>5} {:>5} {:>7} {:>5}",
"Target", "Mols", "Diseq", "Rept", "Score", "Uncert", "Rank"
);
println!(
" {:->14} {:->5} {:->6} {:->5} {:->5} {:->7} {:->5}",
"", "", "", "", "", "", ""
);
for (rank, s) in scores.iter().enumerate() {
println!(
" {:>14} {:>5} {:>6.3} {:>5.3} {:>5.3} {:>7.4} {:>5}",
s.target_name,
s.num_molecules,
s.disequilibrium,
s.repeatability,
s.total_score,
s.uncertainty,
rank + 1
);
}
// ====================================================================
// Filtered query: O3-bearing targets
// ====================================================================
println!("\n--- Filtered Query: O3-Bearing Windows ---");
let query_vec = random_vector(dim, 101);
let filter_o3 = FilterExpr::Eq(3, FilterValue::String("O3".to_string()));
let opts_o3 = QueryOptions {
filter: Some(filter_o3),
..Default::default()
};
let results_o3 = store
.query(&query_vec, 10, &opts_o3)
.expect("filtered query failed");
println!(" O3-bearing windows: {}", results_o3.len());
// ====================================================================
// Witness chain: full provenance trace
// ====================================================================
println!("\n--- Witness Chain: Provenance Trace ---");
let chain_steps = [
("genesis", 0x01u8),
("l0_spectrum_ingest", 0x08),
("l0_band_window", 0x02),
("l0_continuum_normalize", 0x02),
("l1_absorption_detect", 0x02),
("l1_molecule_identify", 0x02),
("l1_cooccurrence_build", 0x02),
("l2_equilibrium_compare", 0x02),
("l2_disequilibrium_score", 0x02),
("l2_repeatability_check", 0x02),
("l2_contamination_penalty", 0x02),
("l2_stellar_confound", 0x02),
("l2_final_rank", 0x02),
("provenance_seal", 0x01),
];
let entries: Vec<WitnessEntry> = chain_steps
.iter()
.enumerate()
.map(|(i, (step, wtype))| {
let action_data = format!("life_candidate:{}:step_{}", step, i);
WitnessEntry {
prev_hash: [0u8; 32],
action_hash: shake256_256(action_data.as_bytes()),
timestamp_ns: 1_700_000_000_000_000_000 + i as u64 * 1_000_000_000,
witness_type: *wtype,
}
})
.collect();
let chain_bytes = create_witness_chain(&entries);
let verified = verify_witness_chain(&chain_bytes).expect("chain verification failed");
println!(" Chain entries: {}", verified.len());
println!(" Chain size: {} bytes", chain_bytes.len());
println!(" Integrity: VALID");
println!("\n Pipeline steps:");
for (i, (step, _)) in chain_steps.iter().enumerate() {
let wtype_name = match verified[i].witness_type {
0x01 => "PROV",
0x02 => "COMP",
0x05 => "ATTS",
0x08 => "DATA",
_ => "????",
};
println!(" [{:>4}] {:>2} -> {}", wtype_name, i, step);
}
// ====================================================================
// Summary
// ====================================================================
println!("\n=== Life Candidate Summary ===\n");
println!(" Targets analyzed: {}", num_targets);
println!(" Spectral windows: {}", ingest.accepted);
println!(" Co-occurrence edges: {}", total_edges);
println!(" Witness entries: {}", verified.len());
if let Some(best) = scores.first() {
println!("\n Top life candidate:");
println!(" Target: {}", best.target_name);
println!(" Molecules: {}", best.num_molecules);
println!(" Disequilib.: {:.4}", best.disequilibrium);
println!(" Total score: {:.4}", best.total_score);
println!(" Uncertainty: {:.4}", best.uncertainty);
println!(" Follow-up: {:?}", best.follow_up);
}
store.close().expect("failed to close store");
println!("\nDone.");
}