Merge pull request #356 from ruvnet/fix/large-dataset-training

fix: skip triplet JSON export for large datasets (>100K)
This commit is contained in:
rUv 2026-04-03 09:37:30 -04:00 committed by GitHub
commit aae01a2be8
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
1 changed files with 7 additions and 3 deletions

View File

@ -1257,9 +1257,13 @@ async function main() {
contrastiveResult.finalLoss = finalContrastiveLoss;
contrastiveResult.improvement = contrastiveImprovement;
// Export contrastive training data
const contrastiveOutDir = contrastiveTrainer.exportTrainingData();
console.log(` Training data exported to: ${contrastiveOutDir}`);
// Export contrastive training data (skip for large datasets to avoid JSON string limit)
if (contrastiveTrainer.getTripletCount() < 100000) {
const contrastiveOutDir = contrastiveTrainer.exportTrainingData();
console.log(` Training data exported to: ${contrastiveOutDir}`);
} else {
console.log(` Skipping triplet export (${contrastiveTrainer.getTripletCount()} triplets too large for JSON)`);
}
// -----------------------------------------------------------------------
// Phase 2: Task head training via TrainingPipeline