VR integration tests are longer but much more useful.

git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@4210 348d0f76-0448-11de-a6fe-93d51630548a
This commit is contained in:
rpoplin 2010-09-03 15:50:19 +00:00
parent da11efa1a2
commit e3962c0d13
5 changed files with 23 additions and 20 deletions

View File

@ -134,7 +134,6 @@ public class ApplyVariantCuts extends RodWalker<Integer, Integer> {
public void initialize() {
// todo -- ryan, it's always best to use a data structure, I need to read these in too.
// todo -- I would have updated your code but there's no integration test to protect me from unexpected effects
boolean firstLine = true;
try {
for( final String line : new XReadLines( TRANCHES_FILE ) ) {

View File

@ -139,10 +139,8 @@ public class GenerateVariantClustersWalker extends RodWalker<ExpandingArrayList<
foundDBSNP = true;
} else if ( d.getName().equals("hapmap") ) {
logger.info("Found HapMap track for use in training with weight = " + WEIGHT_HAPMAP);
foundDBSNP = true;
} else if ( d.getName().equals("1kg") ) {
logger.info("Found 1KG track for use in training with weight = " + WEIGHT_1KG);
foundDBSNP = true;
} else {
logger.info("Not evaluating ROD binding " + d.getName());
}
@ -228,8 +226,8 @@ public class GenerateVariantClustersWalker extends RodWalker<ExpandingArrayList<
final VariantDataManager dataManager = new VariantDataManager( reduceSum, annotationKeys );
reduceSum.clear(); // Don't need this ever again, clean up some memory
logger.info( "There are " + dataManager.numVariants + " variants with > 0 clustering weight and " + dataManager.numAnnotations + " annotations." );
logger.info( "The annotations are: " + annotationKeys );
logger.info( "There are " + dataManager.numVariants + " variants with > 0 clustering weight and qual > threshold (--qualThreshold = " + QUAL_THRESHOLD + ")" );
logger.info( "The annotations used for clustering are: " + annotationKeys );
dataManager.normalizeData(); // Each data point is now [ (x - mean) / standard deviation ]

View File

@ -68,7 +68,7 @@ public final class VariantGaussianMixtureModel extends VariantOptimizationModel
private double[] pClusterLog10;
private final double[] determinant;
private final double stdThreshold;
private double singletonFPRate = -1; // Est. FP rate for singleton calls. Used to estimate FP rate as a function of AC
private double singletonFPRate = -1; // Estimated FP rate for singleton calls. Used to estimate FP rate as a function of AC
private double[] empiricalMu;
private Matrix empiricalSigma;
@ -181,16 +181,20 @@ public final class VariantGaussianMixtureModel extends VariantOptimizationModel
// Only cluster with a good set of knowns. Filter based on being too many std's away from the mean annotation value
// Filtering based on known status and qual threshold happens in GenerateVariantClusters
long numAboveSTD = 0L;
for( int iii = 0; iii < dataManager.data.length; iii++ ) {
final VariantDatum datum = dataManager.data[iii];
for( final double val : datum.annotations ) {
if( Math.abs(val) > stdThreshold ) {
datum.weight = 0.0;
numAboveSTD++;
break;
}
}
}
logger.info( numAboveSTD + " variants were rejected from the training set for having annotation values more than X standard deviations away from the mean. (--stdThreshold = " + stdThreshold + ")" );
generateEmpricalStats( dataManager.data );
logger.info("Initializing using k-means...");
@ -338,7 +342,7 @@ public final class VariantGaussianMixtureModel extends VariantOptimizationModel
if(rand.nextBoolean()) {
randSigma[ppp][jjj] *= -1.0;
}
if(jjj != ppp) { randSigma[jjj][ppp] = 0.0; } // Sigma is a symmetric, positive-definite matrix created by taking a lower diagonal matrix and multiplying by its transpose
if(jjj != ppp) { randSigma[jjj][ppp] = 0.0; } // Sigma is a symmetric, positive-definite matrix created by taking a lower diagonal matrix and multiplying it by its transpose
}
}
if( FORCE_INDEPENDENT_ANNOTATIONS ) {

View File

@ -48,6 +48,7 @@ public abstract class BaseTest {
protected static String GATKDataLocation = "/humgen/gsa-hpprojects/GATK/data/";
protected static String validationDataLocation = GATKDataLocation + "Validation_Data/";
protected static String evaluationDataLocation = GATKDataLocation + "Evaluation_Data/";
protected static String comparisonDataLocation = GATKDataLocation + "Comparisons/";
protected static String testDir = "testdata/";
protected static boolean alreadySetup = false;

View File

@ -14,8 +14,8 @@ public class VariantRecalibrationWalkersIntegrationTest extends WalkerTest {
@Test
public void testGenerateVariantClusters() {
HashMap<String, String> e = new HashMap<String, String>();
e.put( validationDataLocation + "yri.trio.gatk_glftrio.intersection.annotated.filtered.chr1.vcf", "cb3e8d9072d478243c3f9d0ee09fef3b" );
e.put( validationDataLocation + "lowpass.N3.chr1.raw.0.vcf", "0d4fee916a886ca98c286d3b7fed0ff6" );
e.put( validationDataLocation + "yri.trio.gatk_glftrio.intersection.annotated.filtered.chr1.vcf", "7c5431a560e9ca257523cf68b808b058" );
e.put( validationDataLocation + "lowpass.N3.chr1.raw.vcf", "a438635bc96a80c5e6f090d82a394819" );
for ( Map.Entry<String, String> entry : e.entrySet() ) {
String vcf = entry.getKey();
@ -24,11 +24,12 @@ public class VariantRecalibrationWalkersIntegrationTest extends WalkerTest {
WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(
"-R " + b36KGReference +
" --DBSNP " + GATKDataLocation + "dbsnp_129_b36.rod" +
" -B:hapmap,VCF " + validationDataLocation + "CEU_hapmap_nogt_23.vcf" +
" -weightDBSNP 1.0 -weightHapMap 1.0" +
" -B:hapmap,VCF " + comparisonDataLocation + "Validated/HapMap/3.2/genotypes_r27_nr.b36_fwd.vcf" +
" -weightDBSNP 0.2 -weightHapMap 1.0" +
" -T GenerateVariantClusters" +
" -B:input,VCF " + vcf +
" -L 1:1-100,000,000" +
" -L 1:50,000,000-200,000,000" +
" -qual 50.0" +
" --ignore_filter GATK_STANDARD" +
" -an QD -an HRun -an SB" +
" -clusterFile %s",
@ -43,9 +44,9 @@ public class VariantRecalibrationWalkersIntegrationTest extends WalkerTest {
public void testVariantRecalibrator() {
HashMap<String, List<String>> e = new HashMap<String, List<String>>();
e.put( validationDataLocation + "yri.trio.gatk_glftrio.intersection.annotated.filtered.chr1.vcf",
Arrays.asList("9c25a88c9fa48d7373029d2bfb40ad54", "937080353c7e03e11f8a70fc0004bf76","5b89fa5a4edf0080d64230d4103d2b8d")); // Each test checks the md5 of three output files
e.put( validationDataLocation + "lowpass.N3.chr1.raw.0.vcf",
Arrays.asList("074462f829ff553584e9b5e330be6d4b", "f7c5c6cff9dd5280b25e24e0591e4cb0","1de1473db5720b882edf1381fa3dd039")); // Each test checks the md5 of three output files
Arrays.asList("dad6d1458a453a1b720bd79be922df73", "2a70516b26f6ccf9db638153661eee62","c1952240ff3105354e04363c18a8eab9")); // Each test checks the md5 of three output files
e.put( validationDataLocation + "lowpass.N3.chr1.raw.vcf",
Arrays.asList("89d7e43d9f1086c859abb38fb1b51d40", "d2471d2dd956392de7e776463299e28d","eeafcb907f93ace2ac47a98497b52b0d")); // Each test checks the md5 of three output files
for ( Map.Entry<String, List<String>> entry : e.entrySet() ) {
String vcf = entry.getKey();
@ -56,10 +57,10 @@ public class VariantRecalibrationWalkersIntegrationTest extends WalkerTest {
WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(
"-R " + b36KGReference +
" --DBSNP " + GATKDataLocation + "dbsnp_129_b36.rod" +
" -B:hapmap,VCF " + validationDataLocation + "CEU_hapmap_nogt_23.vcf" +
" -B:hapmap,VCF " + comparisonDataLocation + "Validated/HapMap/3.2/genotypes_r27_nr.b36_fwd.vcf" +
" -T VariantRecalibrator" +
" -B:input,VCF " + vcf +
" -L 1:40,000,000-100,000,000" +
" -L 1:20,000,000-100,000,000" +
" --ignore_filter GATK_STANDARD" +
" --ignore_filter HARD_TO_VALIDATE" +
" -clusterFile " + clusterFile +
@ -81,8 +82,8 @@ public class VariantRecalibrationWalkersIntegrationTest extends WalkerTest {
@Test
public void testApplyVariantCuts() {
HashMap<String, String> e = new HashMap<String, String>();
e.put( validationDataLocation + "yri.trio.gatk_glftrio.intersection.annotated.filtered.chr1.vcf", "5b00a80b2a8fd078d6c2aa16924920e4" );
e.put( validationDataLocation + "lowpass.N3.chr1.raw.0.vcf", "d785380792da88f8d64fe1cf1133eeb0" );
e.put( validationDataLocation + "yri.trio.gatk_glftrio.intersection.annotated.filtered.chr1.vcf", "5511e26689ddd1486049db083dcd79c0" );
e.put( validationDataLocation + "lowpass.N3.chr1.raw.vcf", "4f7f13d1f190e882a466003ce0917222" );
for ( Map.Entry<String, String> entry : e.entrySet() ) {
String vcf = entry.getKey();
@ -96,7 +97,7 @@ public class VariantRecalibrationWalkersIntegrationTest extends WalkerTest {
"-R " + b36KGReference +
" --DBSNP " + GATKDataLocation + "dbsnp_129_b36.rod" +
" -T ApplyVariantCuts" +
" -L 1:40,000,000-100,000,000" +
" -L 1:20,000,000-100,000,000" +
" -B:input,VCF " + inputVCFFile +
" -o %s" +
" -tranchesFile " + tranchesFile,