Added more integration tests for the variant quality score recalibrator

git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@4181 348d0f76-0448-11de-a6fe-93d51630548a
This commit is contained in:
rpoplin 2010-09-01 15:31:24 +00:00
parent fc5caa98a5
commit 469bbaa240
3 changed files with 38 additions and 15 deletions

View File

@ -112,7 +112,7 @@ public class GenerateVariantClustersWalker extends RodWalker<ExpandingArrayList<
private ExpandingArrayList<String> annotationKeys; private ExpandingArrayList<String> annotationKeys;
private Set<String> ignoreInputFilterSet = null; private Set<String> ignoreInputFilterSet = null;
private Set<String> inputNames = new HashSet<String>(); private Set<String> inputNames = new HashSet<String>();
VariantGaussianMixtureModel theModel = new VariantGaussianMixtureModel();
//--------------------------------------------------------------------------------------------------------------- //---------------------------------------------------------------------------------------------------------------
// //
@ -174,7 +174,7 @@ public class GenerateVariantClustersWalker extends RodWalker<ExpandingArrayList<
if( !vc.isFiltered() || IGNORE_ALL_INPUT_FILTERS || (ignoreInputFilterSet != null && ignoreInputFilterSet.containsAll(vc.getFilters())) ) { if( !vc.isFiltered() || IGNORE_ALL_INPUT_FILTERS || (ignoreInputFilterSet != null && ignoreInputFilterSet.containsAll(vc.getFilters())) ) {
int iii = 0; int iii = 0;
for( final String key : annotationKeys ) { for( final String key : annotationKeys ) {
annotationValues[iii++] = VariantGaussianMixtureModel.decodeAnnotation( key, vc, true ); annotationValues[iii++] = theModel.decodeAnnotation( key, vc, true );
} }
final VariantDatum variantDatum = new VariantDatum(); final VariantDatum variantDatum = new VariantDatum();
@ -234,7 +234,6 @@ public class GenerateVariantClustersWalker extends RodWalker<ExpandingArrayList<
dataManager.normalizeData(); // Each data point is now [ (x - mean) / standard deviation ] dataManager.normalizeData(); // Each data point is now [ (x - mean) / standard deviation ]
// Create either the Gaussian Mixture Model or the Nearest Neighbors model and run it // Create either the Gaussian Mixture Model or the Nearest Neighbors model and run it
VariantGaussianMixtureModel theModel;
switch (OPTIMIZATION_MODEL) { switch (OPTIMIZATION_MODEL) {
case GAUSSIAN_MIXTURE_MODEL: case GAUSSIAN_MIXTURE_MODEL:
theModel = new VariantGaussianMixtureModel( dataManager, MAX_GAUSSIANS, MAX_ITERATIONS, FORCE_INDEPENDENT, theModel = new VariantGaussianMixtureModel( dataManager, MAX_GAUSSIANS, MAX_ITERATIONS, FORCE_INDEPENDENT,

View File

@ -60,7 +60,7 @@ public final class VariantGaussianMixtureModel extends VariantOptimizationModel
private final int maxGaussians; private final int maxGaussians;
private final int maxIterations; private final int maxIterations;
private final static long RANDOM_SEED = 91801305; private final static long RANDOM_SEED = 91801305;
private final static Random rand = new Random( RANDOM_SEED ); private final Random rand = new Random( RANDOM_SEED );
private final double MIN_PROB_CONVERGENCE = 1E-5; private final double MIN_PROB_CONVERGENCE = 1E-5;
private final double SHRINKAGE; private final double SHRINKAGE;
@ -85,6 +85,23 @@ public final class VariantGaussianMixtureModel extends VariantOptimizationModel
private static final Pattern ANNOTATION_PATTERN = Pattern.compile("^@!ANNOTATION.*"); private static final Pattern ANNOTATION_PATTERN = Pattern.compile("^@!ANNOTATION.*");
private static final Pattern CLUSTER_PATTERN = Pattern.compile("^@!CLUSTER.*"); private static final Pattern CLUSTER_PATTERN = Pattern.compile("^@!CLUSTER.*");
public VariantGaussianMixtureModel() {
dataManager = null;
maxGaussians = 0;
maxIterations = 0;
SHRINKAGE = 0;
DIRICHLET_PARAMETER = 0;
FORCE_INDEPENDENT_ANNOTATIONS = false;
mu = null;
sigma = null;
sigmaInverse = null;
determinant = null;
stdThreshold = 0;
hyperParameter_a = null;
hyperParameter_b = null;
hyperParameter_lambda = null;
}
public VariantGaussianMixtureModel( final VariantDataManager _dataManager, final int _maxGaussians, final int _maxIterations, public VariantGaussianMixtureModel( final VariantDataManager _dataManager, final int _maxGaussians, final int _maxIterations,
final boolean _forceIndependent, final double _stdThreshold, final double _shrinkage, final double _dirichlet) { final boolean _forceIndependent, final double _stdThreshold, final double _shrinkage, final double _dirichlet) {
dataManager = _dataManager; dataManager = _dataManager;
@ -398,7 +415,7 @@ public final class VariantGaussianMixtureModel extends VariantOptimizationModel
} }
} }
public static double decodeAnnotation( final String annotationKey, final VariantContext vc, final boolean jitter ) { public double decodeAnnotation( final String annotationKey, final VariantContext vc, final boolean jitter ) {
double value; double value;
//if( annotationKey.equals("AB") && !vc.getAttributes().containsKey(annotationKey) ) { //if( annotationKey.equals("AB") && !vc.getAttributes().containsKey(annotationKey) ) {
// value = (0.5 - 0.005) + (0.01 * rand.nextDouble()); // HomVar calls don't have an allele balance // value = (0.5 - 0.005) + (0.01 * rand.nextDouble()); // HomVar calls don't have an allele balance

View File

@ -14,7 +14,8 @@ public class VariantRecalibrationWalkersIntegrationTest extends WalkerTest {
@Test @Test
public void testGenerateVariantClusters() { public void testGenerateVariantClusters() {
HashMap<String, String> e = new HashMap<String, String>(); HashMap<String, String> e = new HashMap<String, String>();
e.put( validationDataLocation + "yri.trio.gatk_glftrio.intersection.annotated.filtered.chr1.vcf", "79727fb46bb60be30eae2666e4f792b6" ); e.put( validationDataLocation + "yri.trio.gatk_glftrio.intersection.annotated.filtered.chr1.vcf", "cb3e8d9072d478243c3f9d0ee09fef3b" );
e.put( validationDataLocation + "lowpass.N3.chr1.raw.0.vcf", "0d4fee916a886ca98c286d3b7fed0ff6" );
for ( Map.Entry<String, String> entry : e.entrySet() ) { for ( Map.Entry<String, String> entry : e.entrySet() ) {
String vcf = entry.getKey(); String vcf = entry.getKey();
@ -23,12 +24,13 @@ public class VariantRecalibrationWalkersIntegrationTest extends WalkerTest {
WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec( WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(
"-R " + b36KGReference + "-R " + b36KGReference +
" --DBSNP " + GATKDataLocation + "dbsnp_129_b36.rod" + " --DBSNP " + GATKDataLocation + "dbsnp_129_b36.rod" +
" -B:hapmap,VCF " + validationDataLocation + "CEU_hapmap_nogt_23.vcf" +
" -weightDBSNP 1.0 -weightHapMap 1.0" +
" -T GenerateVariantClusters" + " -T GenerateVariantClusters" +
" -B:input,VCF " + vcf + " -B:input,VCF " + vcf +
" -L 1:1-100,000,000" + " -L 1:1-100,000,000" +
" --ignore_filter GATK_STANDARD" + " --ignore_filter GATK_STANDARD" +
" -an QD -an HRun -an SB" + " -an QD -an HRun -an SB" +
" -weightDBSNP 1.0" +
" -clusterFile %s", " -clusterFile %s",
1, // just one output file 1, // just one output file
Arrays.asList(md5)); Arrays.asList(md5));
@ -39,18 +41,22 @@ public class VariantRecalibrationWalkersIntegrationTest extends WalkerTest {
@Test @Test
public void testVariantRecalibrator() { public void testVariantRecalibrator() {
HashMap<String, String> e = new HashMap<String, String>(); HashMap<String, List<String>> e = new HashMap<String, List<String>>();
e.put( validationDataLocation + "yri.trio.gatk_glftrio.intersection.annotated.filtered.chr1.vcf", "f2790e2f4f3c23df800c18aaa39b3fde" ); e.put( validationDataLocation + "yri.trio.gatk_glftrio.intersection.annotated.filtered.chr1.vcf",
Arrays.asList("ad0868adddb2e837b4fc08f140e4d9c3", "937080353c7e03e11f8a70fc0004bf76","5b89fa5a4edf0080d64230d4103d2b8d")); // Each test checks the md5 of three output files
for ( Map.Entry<String, String> entry : e.entrySet() ) { e.put( validationDataLocation + "lowpass.N3.chr1.raw.0.vcf",
Arrays.asList("054d3228acfd6b02d24bfcf2fbd280a0", "f7c5c6cff9dd5280b25e24e0591e4cb0","1de1473db5720b882edf1381fa3dd039")); // Each test checks the md5 of three output files
for ( Map.Entry<String, List<String>> entry : e.entrySet() ) {
String vcf = entry.getKey(); String vcf = entry.getKey();
String md5 = entry.getValue(); List<String> md5s = entry.getValue();
String clusterFile = clusterFiles.get(vcf); String clusterFile = clusterFiles.get(vcf);
System.out.printf("PARAMS FOR %s is %s%n", vcf, clusterFile); System.out.printf("PARAMS FOR %s is %s%n", vcf, clusterFile);
if ( clusterFile != null ) { if ( clusterFile != null ) {
WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec( WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(
"-R " + b36KGReference + "-R " + b36KGReference +
" --DBSNP " + GATKDataLocation + "dbsnp_129_b36.rod" + " --DBSNP " + GATKDataLocation + "dbsnp_129_b36.rod" +
" -B:hapmap,VCF " + validationDataLocation + "CEU_hapmap_nogt_23.vcf" +
" -T VariantRecalibrator" + " -T VariantRecalibrator" +
" -B:input,VCF " + vcf + " -B:input,VCF " + vcf +
" -L 1:40,000,000-100,000,000" + " -L 1:40,000,000-100,000,000" +
@ -62,7 +68,7 @@ public class VariantRecalibrationWalkersIntegrationTest extends WalkerTest {
" -tranchesFile %s" + " -tranchesFile %s" +
" -reportDatFile %s", " -reportDatFile %s",
3, // two output file 3, // two output file
Arrays.asList(md5, "d979864c46ae237f52f1002f0ec19b16","ab438d03e276a053d7534056f62c83f3")); md5s);
List<File> result = executeTest("testVariantRecalibrator", spec).getFirst(); List<File> result = executeTest("testVariantRecalibrator", spec).getFirst();
inputVCFFiles.put(vcf, result.get(0).getAbsolutePath()); inputVCFFiles.put(vcf, result.get(0).getAbsolutePath());
tranchesFiles.put(vcf, result.get(1).getAbsolutePath()); tranchesFiles.put(vcf, result.get(1).getAbsolutePath());
@ -75,7 +81,8 @@ public class VariantRecalibrationWalkersIntegrationTest extends WalkerTest {
@Test @Test
public void testApplyVariantCuts() { public void testApplyVariantCuts() {
HashMap<String, String> e = new HashMap<String, String>(); HashMap<String, String> e = new HashMap<String, String>();
e.put( validationDataLocation + "yri.trio.gatk_glftrio.intersection.annotated.filtered.chr1.vcf", "d30eafb642a80a00bd82fb4140b5277e" ); e.put( validationDataLocation + "yri.trio.gatk_glftrio.intersection.annotated.filtered.chr1.vcf", "841deca1cb30a5a081cfbaa6f663e22a" );
e.put( validationDataLocation + "lowpass.N3.chr1.raw.0.vcf", "6234259e75fd7c1db31566dde1c55a82" );
for ( Map.Entry<String, String> entry : e.entrySet() ) { for ( Map.Entry<String, String> entry : e.entrySet() ) {
String vcf = entry.getKey(); String vcf = entry.getKey();
@ -95,7 +102,7 @@ public class VariantRecalibrationWalkersIntegrationTest extends WalkerTest {
" -tranchesFile " + tranchesFile, " -tranchesFile " + tranchesFile,
1, // just one output file 1, // just one output file
Arrays.asList(md5)); Arrays.asList(md5));
List<File> result = executeTest("testApplyVariantCuts", spec).getFirst(); executeTest("testApplyVariantCuts", spec);
} }
} }
} }