Merge pull request #558 from broadinstitute/rp_vqsr_nondeterminism_fix
Fix for non-determinism in the VQSR with very large data sets
This commit is contained in:
commit
7c7ff90266
|
|
@ -247,7 +247,7 @@ public class VariantDataManager {
|
|||
logger.warn( "WARNING: Training with very few variant sites! Please check the model reporting PDF to ensure the quality of the model is reliable." );
|
||||
} else if( trainingData.size() > VRAC.MAX_NUM_TRAINING_DATA ) {
|
||||
logger.warn( "WARNING: Very large training set detected. Downsampling to " + VRAC.MAX_NUM_TRAINING_DATA + " training variants." );
|
||||
Collections.shuffle(trainingData);
|
||||
Collections.shuffle(trainingData, GenomeAnalysisEngine.getRandomGenerator());
|
||||
return trainingData.subList(0, VRAC.MAX_NUM_TRAINING_DATA);
|
||||
}
|
||||
return trainingData;
|
||||
|
|
@ -295,13 +295,13 @@ public class VariantDataManager {
|
|||
|
||||
public List<VariantDatum> getRandomDataForPlotting( final int numToAdd, final List<VariantDatum> trainingData, final List<VariantDatum> antiTrainingData, final List<VariantDatum> evaluationData ) {
|
||||
final List<VariantDatum> returnData = new ExpandingArrayList<>();
|
||||
Collections.shuffle(trainingData);
|
||||
Collections.shuffle(antiTrainingData);
|
||||
Collections.shuffle(evaluationData);
|
||||
Collections.shuffle(trainingData, GenomeAnalysisEngine.getRandomGenerator());
|
||||
Collections.shuffle(antiTrainingData, GenomeAnalysisEngine.getRandomGenerator());
|
||||
Collections.shuffle(evaluationData, GenomeAnalysisEngine.getRandomGenerator());
|
||||
returnData.addAll(trainingData.subList(0, Math.min(numToAdd, trainingData.size())));
|
||||
returnData.addAll(antiTrainingData.subList(0, Math.min(numToAdd, antiTrainingData.size())));
|
||||
returnData.addAll(evaluationData.subList(0, Math.min(numToAdd, evaluationData.size())));
|
||||
Collections.shuffle(returnData);
|
||||
Collections.shuffle(returnData, GenomeAnalysisEngine.getRandomGenerator());
|
||||
return returnData;
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -46,6 +46,7 @@
|
|||
|
||||
package org.broadinstitute.sting.gatk.walkers.annotator;
|
||||
|
||||
import org.broadinstitute.sting.gatk.GenomeAnalysisEngine;
|
||||
import org.broadinstitute.sting.utils.MannWhitneyU;
|
||||
import org.testng.Assert;
|
||||
import org.testng.annotations.BeforeClass;
|
||||
|
|
@ -75,9 +76,9 @@ public class RankSumUnitTest {
|
|||
makeDistribution(distribution20_40, 40, skew, observations/2);
|
||||
|
||||
// shuffle the observations
|
||||
Collections.shuffle(distribution20);
|
||||
Collections.shuffle(distribution30);
|
||||
Collections.shuffle(distribution20_40);
|
||||
Collections.shuffle(distribution20, GenomeAnalysisEngine.getRandomGenerator());
|
||||
Collections.shuffle(distribution30, GenomeAnalysisEngine.getRandomGenerator());
|
||||
Collections.shuffle(distribution20_40, GenomeAnalysisEngine.getRandomGenerator());
|
||||
}
|
||||
|
||||
private static void makeDistribution(final List<Integer> result, final int target, final int skew, final int numObservations) {
|
||||
|
|
|
|||
Loading…
Reference in New Issue