Added more integration tests for the variant quality score recalibrator
git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@4181 348d0f76-0448-11de-a6fe-93d51630548a
This commit is contained in:
parent
fc5caa98a5
commit
469bbaa240
|
|
@ -112,7 +112,7 @@ public class GenerateVariantClustersWalker extends RodWalker<ExpandingArrayList<
|
|||
private ExpandingArrayList<String> annotationKeys;
|
||||
private Set<String> ignoreInputFilterSet = null;
|
||||
private Set<String> inputNames = new HashSet<String>();
|
||||
|
||||
VariantGaussianMixtureModel theModel = new VariantGaussianMixtureModel();
|
||||
|
||||
//---------------------------------------------------------------------------------------------------------------
|
||||
//
|
||||
|
|
@ -174,7 +174,7 @@ public class GenerateVariantClustersWalker extends RodWalker<ExpandingArrayList<
|
|||
if( !vc.isFiltered() || IGNORE_ALL_INPUT_FILTERS || (ignoreInputFilterSet != null && ignoreInputFilterSet.containsAll(vc.getFilters())) ) {
|
||||
int iii = 0;
|
||||
for( final String key : annotationKeys ) {
|
||||
annotationValues[iii++] = VariantGaussianMixtureModel.decodeAnnotation( key, vc, true );
|
||||
annotationValues[iii++] = theModel.decodeAnnotation( key, vc, true );
|
||||
}
|
||||
|
||||
final VariantDatum variantDatum = new VariantDatum();
|
||||
|
|
@ -234,7 +234,6 @@ public class GenerateVariantClustersWalker extends RodWalker<ExpandingArrayList<
|
|||
dataManager.normalizeData(); // Each data point is now [ (x - mean) / standard deviation ]
|
||||
|
||||
// Create either the Gaussian Mixture Model or the Nearest Neighbors model and run it
|
||||
VariantGaussianMixtureModel theModel;
|
||||
switch (OPTIMIZATION_MODEL) {
|
||||
case GAUSSIAN_MIXTURE_MODEL:
|
||||
theModel = new VariantGaussianMixtureModel( dataManager, MAX_GAUSSIANS, MAX_ITERATIONS, FORCE_INDEPENDENT,
|
||||
|
|
|
|||
|
|
@ -60,7 +60,7 @@ public final class VariantGaussianMixtureModel extends VariantOptimizationModel
|
|||
private final int maxGaussians;
|
||||
private final int maxIterations;
|
||||
private final static long RANDOM_SEED = 91801305;
|
||||
private final static Random rand = new Random( RANDOM_SEED );
|
||||
private final Random rand = new Random( RANDOM_SEED );
|
||||
private final double MIN_PROB_CONVERGENCE = 1E-5;
|
||||
|
||||
private final double SHRINKAGE;
|
||||
|
|
@ -85,6 +85,23 @@ public final class VariantGaussianMixtureModel extends VariantOptimizationModel
|
|||
private static final Pattern ANNOTATION_PATTERN = Pattern.compile("^@!ANNOTATION.*");
|
||||
private static final Pattern CLUSTER_PATTERN = Pattern.compile("^@!CLUSTER.*");
|
||||
|
||||
public VariantGaussianMixtureModel() {
|
||||
dataManager = null;
|
||||
maxGaussians = 0;
|
||||
maxIterations = 0;
|
||||
SHRINKAGE = 0;
|
||||
DIRICHLET_PARAMETER = 0;
|
||||
FORCE_INDEPENDENT_ANNOTATIONS = false;
|
||||
mu = null;
|
||||
sigma = null;
|
||||
sigmaInverse = null;
|
||||
determinant = null;
|
||||
stdThreshold = 0;
|
||||
hyperParameter_a = null;
|
||||
hyperParameter_b = null;
|
||||
hyperParameter_lambda = null;
|
||||
}
|
||||
|
||||
public VariantGaussianMixtureModel( final VariantDataManager _dataManager, final int _maxGaussians, final int _maxIterations,
|
||||
final boolean _forceIndependent, final double _stdThreshold, final double _shrinkage, final double _dirichlet) {
|
||||
dataManager = _dataManager;
|
||||
|
|
@ -398,7 +415,7 @@ public final class VariantGaussianMixtureModel extends VariantOptimizationModel
|
|||
}
|
||||
}
|
||||
|
||||
public static double decodeAnnotation( final String annotationKey, final VariantContext vc, final boolean jitter ) {
|
||||
public double decodeAnnotation( final String annotationKey, final VariantContext vc, final boolean jitter ) {
|
||||
double value;
|
||||
//if( annotationKey.equals("AB") && !vc.getAttributes().containsKey(annotationKey) ) {
|
||||
// value = (0.5 - 0.005) + (0.01 * rand.nextDouble()); // HomVar calls don't have an allele balance
|
||||
|
|
|
|||
|
|
@ -14,7 +14,8 @@ public class VariantRecalibrationWalkersIntegrationTest extends WalkerTest {
|
|||
@Test
|
||||
public void testGenerateVariantClusters() {
|
||||
HashMap<String, String> e = new HashMap<String, String>();
|
||||
e.put( validationDataLocation + "yri.trio.gatk_glftrio.intersection.annotated.filtered.chr1.vcf", "79727fb46bb60be30eae2666e4f792b6" );
|
||||
e.put( validationDataLocation + "yri.trio.gatk_glftrio.intersection.annotated.filtered.chr1.vcf", "cb3e8d9072d478243c3f9d0ee09fef3b" );
|
||||
e.put( validationDataLocation + "lowpass.N3.chr1.raw.0.vcf", "0d4fee916a886ca98c286d3b7fed0ff6" );
|
||||
|
||||
for ( Map.Entry<String, String> entry : e.entrySet() ) {
|
||||
String vcf = entry.getKey();
|
||||
|
|
@ -23,12 +24,13 @@ public class VariantRecalibrationWalkersIntegrationTest extends WalkerTest {
|
|||
WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(
|
||||
"-R " + b36KGReference +
|
||||
" --DBSNP " + GATKDataLocation + "dbsnp_129_b36.rod" +
|
||||
" -B:hapmap,VCF " + validationDataLocation + "CEU_hapmap_nogt_23.vcf" +
|
||||
" -weightDBSNP 1.0 -weightHapMap 1.0" +
|
||||
" -T GenerateVariantClusters" +
|
||||
" -B:input,VCF " + vcf +
|
||||
" -L 1:1-100,000,000" +
|
||||
" --ignore_filter GATK_STANDARD" +
|
||||
" -an QD -an HRun -an SB" +
|
||||
" -weightDBSNP 1.0" +
|
||||
" -clusterFile %s",
|
||||
1, // just one output file
|
||||
Arrays.asList(md5));
|
||||
|
|
@ -39,18 +41,22 @@ public class VariantRecalibrationWalkersIntegrationTest extends WalkerTest {
|
|||
|
||||
@Test
|
||||
public void testVariantRecalibrator() {
|
||||
HashMap<String, String> e = new HashMap<String, String>();
|
||||
e.put( validationDataLocation + "yri.trio.gatk_glftrio.intersection.annotated.filtered.chr1.vcf", "f2790e2f4f3c23df800c18aaa39b3fde" );
|
||||
|
||||
for ( Map.Entry<String, String> entry : e.entrySet() ) {
|
||||
HashMap<String, List<String>> e = new HashMap<String, List<String>>();
|
||||
e.put( validationDataLocation + "yri.trio.gatk_glftrio.intersection.annotated.filtered.chr1.vcf",
|
||||
Arrays.asList("ad0868adddb2e837b4fc08f140e4d9c3", "937080353c7e03e11f8a70fc0004bf76","5b89fa5a4edf0080d64230d4103d2b8d")); // Each test checks the md5 of three output files
|
||||
e.put( validationDataLocation + "lowpass.N3.chr1.raw.0.vcf",
|
||||
Arrays.asList("054d3228acfd6b02d24bfcf2fbd280a0", "f7c5c6cff9dd5280b25e24e0591e4cb0","1de1473db5720b882edf1381fa3dd039")); // Each test checks the md5 of three output files
|
||||
|
||||
for ( Map.Entry<String, List<String>> entry : e.entrySet() ) {
|
||||
String vcf = entry.getKey();
|
||||
String md5 = entry.getValue();
|
||||
List<String> md5s = entry.getValue();
|
||||
String clusterFile = clusterFiles.get(vcf);
|
||||
System.out.printf("PARAMS FOR %s is %s%n", vcf, clusterFile);
|
||||
if ( clusterFile != null ) {
|
||||
WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(
|
||||
"-R " + b36KGReference +
|
||||
" --DBSNP " + GATKDataLocation + "dbsnp_129_b36.rod" +
|
||||
" -B:hapmap,VCF " + validationDataLocation + "CEU_hapmap_nogt_23.vcf" +
|
||||
" -T VariantRecalibrator" +
|
||||
" -B:input,VCF " + vcf +
|
||||
" -L 1:40,000,000-100,000,000" +
|
||||
|
|
@ -62,7 +68,7 @@ public class VariantRecalibrationWalkersIntegrationTest extends WalkerTest {
|
|||
" -tranchesFile %s" +
|
||||
" -reportDatFile %s",
|
||||
3, // two output file
|
||||
Arrays.asList(md5, "d979864c46ae237f52f1002f0ec19b16","ab438d03e276a053d7534056f62c83f3"));
|
||||
md5s);
|
||||
List<File> result = executeTest("testVariantRecalibrator", spec).getFirst();
|
||||
inputVCFFiles.put(vcf, result.get(0).getAbsolutePath());
|
||||
tranchesFiles.put(vcf, result.get(1).getAbsolutePath());
|
||||
|
|
@ -75,7 +81,8 @@ public class VariantRecalibrationWalkersIntegrationTest extends WalkerTest {
|
|||
@Test
|
||||
public void testApplyVariantCuts() {
|
||||
HashMap<String, String> e = new HashMap<String, String>();
|
||||
e.put( validationDataLocation + "yri.trio.gatk_glftrio.intersection.annotated.filtered.chr1.vcf", "d30eafb642a80a00bd82fb4140b5277e" );
|
||||
e.put( validationDataLocation + "yri.trio.gatk_glftrio.intersection.annotated.filtered.chr1.vcf", "841deca1cb30a5a081cfbaa6f663e22a" );
|
||||
e.put( validationDataLocation + "lowpass.N3.chr1.raw.0.vcf", "6234259e75fd7c1db31566dde1c55a82" );
|
||||
|
||||
for ( Map.Entry<String, String> entry : e.entrySet() ) {
|
||||
String vcf = entry.getKey();
|
||||
|
|
@ -95,7 +102,7 @@ public class VariantRecalibrationWalkersIntegrationTest extends WalkerTest {
|
|||
" -tranchesFile " + tranchesFile,
|
||||
1, // just one output file
|
||||
Arrays.asList(md5));
|
||||
List<File> result = executeTest("testApplyVariantCuts", spec).getFirst();
|
||||
executeTest("testApplyVariantCuts", spec);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
|||
Loading…
Reference in New Issue