Added more integration tests for the variant quality score recalibrator

git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@4181 348d0f76-0448-11de-a6fe-93d51630548a
This commit is contained in:
rpoplin 2010-09-01 15:31:24 +00:00
parent fc5caa98a5
commit 469bbaa240
3 changed files with 38 additions and 15 deletions

View File

@ -112,7 +112,7 @@ public class GenerateVariantClustersWalker extends RodWalker<ExpandingArrayList<
private ExpandingArrayList<String> annotationKeys;
private Set<String> ignoreInputFilterSet = null;
private Set<String> inputNames = new HashSet<String>();
VariantGaussianMixtureModel theModel = new VariantGaussianMixtureModel();
//---------------------------------------------------------------------------------------------------------------
//
@ -174,7 +174,7 @@ public class GenerateVariantClustersWalker extends RodWalker<ExpandingArrayList<
if( !vc.isFiltered() || IGNORE_ALL_INPUT_FILTERS || (ignoreInputFilterSet != null && ignoreInputFilterSet.containsAll(vc.getFilters())) ) {
int iii = 0;
for( final String key : annotationKeys ) {
annotationValues[iii++] = VariantGaussianMixtureModel.decodeAnnotation( key, vc, true );
annotationValues[iii++] = theModel.decodeAnnotation( key, vc, true );
}
final VariantDatum variantDatum = new VariantDatum();
@ -234,7 +234,6 @@ public class GenerateVariantClustersWalker extends RodWalker<ExpandingArrayList<
dataManager.normalizeData(); // Each data point is now [ (x - mean) / standard deviation ]
// Create either the Gaussian Mixture Model or the Nearest Neighbors model and run it
VariantGaussianMixtureModel theModel;
switch (OPTIMIZATION_MODEL) {
case GAUSSIAN_MIXTURE_MODEL:
theModel = new VariantGaussianMixtureModel( dataManager, MAX_GAUSSIANS, MAX_ITERATIONS, FORCE_INDEPENDENT,

View File

@ -60,7 +60,7 @@ public final class VariantGaussianMixtureModel extends VariantOptimizationModel
private final int maxGaussians;
private final int maxIterations;
private final static long RANDOM_SEED = 91801305;
private final static Random rand = new Random( RANDOM_SEED );
private final Random rand = new Random( RANDOM_SEED );
private final double MIN_PROB_CONVERGENCE = 1E-5;
private final double SHRINKAGE;
@ -85,6 +85,23 @@ public final class VariantGaussianMixtureModel extends VariantOptimizationModel
private static final Pattern ANNOTATION_PATTERN = Pattern.compile("^@!ANNOTATION.*");
private static final Pattern CLUSTER_PATTERN = Pattern.compile("^@!CLUSTER.*");
public VariantGaussianMixtureModel() {
dataManager = null;
maxGaussians = 0;
maxIterations = 0;
SHRINKAGE = 0;
DIRICHLET_PARAMETER = 0;
FORCE_INDEPENDENT_ANNOTATIONS = false;
mu = null;
sigma = null;
sigmaInverse = null;
determinant = null;
stdThreshold = 0;
hyperParameter_a = null;
hyperParameter_b = null;
hyperParameter_lambda = null;
}
public VariantGaussianMixtureModel( final VariantDataManager _dataManager, final int _maxGaussians, final int _maxIterations,
final boolean _forceIndependent, final double _stdThreshold, final double _shrinkage, final double _dirichlet) {
dataManager = _dataManager;
@ -398,7 +415,7 @@ public final class VariantGaussianMixtureModel extends VariantOptimizationModel
}
}
public static double decodeAnnotation( final String annotationKey, final VariantContext vc, final boolean jitter ) {
public double decodeAnnotation( final String annotationKey, final VariantContext vc, final boolean jitter ) {
double value;
//if( annotationKey.equals("AB") && !vc.getAttributes().containsKey(annotationKey) ) {
// value = (0.5 - 0.005) + (0.01 * rand.nextDouble()); // HomVar calls don't have an allele balance

View File

@ -14,7 +14,8 @@ public class VariantRecalibrationWalkersIntegrationTest extends WalkerTest {
@Test
public void testGenerateVariantClusters() {
HashMap<String, String> e = new HashMap<String, String>();
e.put( validationDataLocation + "yri.trio.gatk_glftrio.intersection.annotated.filtered.chr1.vcf", "79727fb46bb60be30eae2666e4f792b6" );
e.put( validationDataLocation + "yri.trio.gatk_glftrio.intersection.annotated.filtered.chr1.vcf", "cb3e8d9072d478243c3f9d0ee09fef3b" );
e.put( validationDataLocation + "lowpass.N3.chr1.raw.0.vcf", "0d4fee916a886ca98c286d3b7fed0ff6" );
for ( Map.Entry<String, String> entry : e.entrySet() ) {
String vcf = entry.getKey();
@ -23,12 +24,13 @@ public class VariantRecalibrationWalkersIntegrationTest extends WalkerTest {
WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(
"-R " + b36KGReference +
" --DBSNP " + GATKDataLocation + "dbsnp_129_b36.rod" +
" -B:hapmap,VCF " + validationDataLocation + "CEU_hapmap_nogt_23.vcf" +
" -weightDBSNP 1.0 -weightHapMap 1.0" +
" -T GenerateVariantClusters" +
" -B:input,VCF " + vcf +
" -L 1:1-100,000,000" +
" --ignore_filter GATK_STANDARD" +
" -an QD -an HRun -an SB" +
" -weightDBSNP 1.0" +
" -clusterFile %s",
1, // just one output file
Arrays.asList(md5));
@ -39,18 +41,22 @@ public class VariantRecalibrationWalkersIntegrationTest extends WalkerTest {
@Test
public void testVariantRecalibrator() {
HashMap<String, String> e = new HashMap<String, String>();
e.put( validationDataLocation + "yri.trio.gatk_glftrio.intersection.annotated.filtered.chr1.vcf", "f2790e2f4f3c23df800c18aaa39b3fde" );
for ( Map.Entry<String, String> entry : e.entrySet() ) {
HashMap<String, List<String>> e = new HashMap<String, List<String>>();
e.put( validationDataLocation + "yri.trio.gatk_glftrio.intersection.annotated.filtered.chr1.vcf",
Arrays.asList("ad0868adddb2e837b4fc08f140e4d9c3", "937080353c7e03e11f8a70fc0004bf76","5b89fa5a4edf0080d64230d4103d2b8d")); // Each test checks the md5 of three output files
e.put( validationDataLocation + "lowpass.N3.chr1.raw.0.vcf",
Arrays.asList("054d3228acfd6b02d24bfcf2fbd280a0", "f7c5c6cff9dd5280b25e24e0591e4cb0","1de1473db5720b882edf1381fa3dd039")); // Each test checks the md5 of three output files
for ( Map.Entry<String, List<String>> entry : e.entrySet() ) {
String vcf = entry.getKey();
String md5 = entry.getValue();
List<String> md5s = entry.getValue();
String clusterFile = clusterFiles.get(vcf);
System.out.printf("PARAMS FOR %s is %s%n", vcf, clusterFile);
if ( clusterFile != null ) {
WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(
"-R " + b36KGReference +
" --DBSNP " + GATKDataLocation + "dbsnp_129_b36.rod" +
" -B:hapmap,VCF " + validationDataLocation + "CEU_hapmap_nogt_23.vcf" +
" -T VariantRecalibrator" +
" -B:input,VCF " + vcf +
" -L 1:40,000,000-100,000,000" +
@ -62,7 +68,7 @@ public class VariantRecalibrationWalkersIntegrationTest extends WalkerTest {
" -tranchesFile %s" +
" -reportDatFile %s",
3, // two output file
Arrays.asList(md5, "d979864c46ae237f52f1002f0ec19b16","ab438d03e276a053d7534056f62c83f3"));
md5s);
List<File> result = executeTest("testVariantRecalibrator", spec).getFirst();
inputVCFFiles.put(vcf, result.get(0).getAbsolutePath());
tranchesFiles.put(vcf, result.get(1).getAbsolutePath());
@ -75,7 +81,8 @@ public class VariantRecalibrationWalkersIntegrationTest extends WalkerTest {
@Test
public void testApplyVariantCuts() {
HashMap<String, String> e = new HashMap<String, String>();
e.put( validationDataLocation + "yri.trio.gatk_glftrio.intersection.annotated.filtered.chr1.vcf", "d30eafb642a80a00bd82fb4140b5277e" );
e.put( validationDataLocation + "yri.trio.gatk_glftrio.intersection.annotated.filtered.chr1.vcf", "841deca1cb30a5a081cfbaa6f663e22a" );
e.put( validationDataLocation + "lowpass.N3.chr1.raw.0.vcf", "6234259e75fd7c1db31566dde1c55a82" );
for ( Map.Entry<String, String> entry : e.entrySet() ) {
String vcf = entry.getKey();
@ -95,7 +102,7 @@ public class VariantRecalibrationWalkersIntegrationTest extends WalkerTest {
" -tranchesFile " + tranchesFile,
1, // just one output file
Arrays.asList(md5));
List<File> result = executeTest("testApplyVariantCuts", spec).getFirst();
executeTest("testApplyVariantCuts", spec);
}
}
}