diff --git a/java/src/org/broadinstitute/sting/gatk/walkers/recalibration/CovariateCounterWalker.java b/java/src/org/broadinstitute/sting/gatk/walkers/recalibration/CovariateCounterWalker.java index 88595cae6..2f92eb837 100755 --- a/java/src/org/broadinstitute/sting/gatk/walkers/recalibration/CovariateCounterWalker.java +++ b/java/src/org/broadinstitute/sting/gatk/walkers/recalibration/CovariateCounterWalker.java @@ -104,6 +104,8 @@ public class CovariateCounterWalker extends LocusWalker { + + ///////////////////////////// + // Inputs + ///////////////////////////// + @Input(fullName="tranches_file", shortName="tranchesFile", doc="The input tranches file describing where to cut the data", required=true) + private File TRANCHES_FILE; + + ///////////////////////////// + // Outputs + ///////////////////////////// + @Output( doc="The output filtered VCF file", required=true) + private VCFWriter vcfWriter = null; + ///////////////////////////// // Command Line Arguments ///////////////////////////// - @Argument(fullName="tranchesFile", shortName="tf", doc="The input tranches file describing where to cut the data", required=true) - private String TRANCHE_FILENAME = "optimizer.dat.tranches"; - @Argument(fullName="outputVCFFile", shortName="outputVCF", doc="The output filtered VCF file", required=true) - private String OUTPUT_FILENAME = "optimizer.vcf"; @Argument(fullName="fdr_filter_level", shortName="fdr_filter_level", doc="The FDR level at which to start filtering.", required=false) private double FDR_FILTER_LEVEL = 0.0; - ///////////////////////////// // Private Member Variables ///////////////////////////// - private VCFWriter vcfWriter; final ExpandingArrayList qCuts = new ExpandingArrayList(); final ExpandingArrayList filterName = new ExpandingArrayList(); @@ -128,7 +137,7 @@ public class ApplyVariantCuts extends RodWalker { // todo -- I would have updated your code but there's no integration test to protect me from unexpected effects boolean firstLine = true; try { - for( final String line : new XReadLines(new File( TRANCHE_FILENAME )) ) { + for( final String line : new XReadLines( TRANCHES_FILE ) ) { if( !firstLine ) { final String[] vals = line.split(","); double FDR = Double.parseDouble(vals[0]); @@ -147,7 +156,7 @@ public class ApplyVariantCuts extends RodWalker { firstLine = false; } } catch( FileNotFoundException e ) { - throw new StingException("Can not find input file: " + TRANCHE_FILENAME); + throw new StingException("Can not find input file: " + TRANCHES_FILE); } // setup the header fields @@ -155,7 +164,6 @@ public class ApplyVariantCuts extends RodWalker { hInfo.addAll(VCFUtils.getHeaderFields(getToolkit())); hInfo.add(new VCFInfoHeaderLine("OQ", 1, VCFHeaderLineType.Float, "The original variant quality score")); hInfo.add(new VCFHeaderLine("source", "VariantOptimizer")); - vcfWriter = new StandardVCFWriter( new File(OUTPUT_FILENAME) ); final TreeSet samples = new TreeSet(); samples.addAll(SampleUtils.getSampleListWithVCFHeader(getToolkit(), null)); @@ -195,8 +203,9 @@ public class ApplyVariantCuts extends RodWalker { break; } } - if( filterString == null ) + if( filterString == null ) { filterString = filterName.get(0)+"+"; + } if ( !filterString.equals(VCFConstants.PASSES_FILTERS_v4) ) { Set filters = new HashSet(); @@ -227,7 +236,6 @@ public class ApplyVariantCuts extends RodWalker { } public void onTraversalDone( Integer reduceSum ) { - vcfWriter.close(); } } diff --git a/java/src/org/broadinstitute/sting/gatk/walkers/variantrecalibration/GenerateVariantClustersWalker.java b/java/src/org/broadinstitute/sting/gatk/walkers/variantrecalibration/GenerateVariantClustersWalker.java index bc60bed65..575e9a20c 100755 --- a/java/src/org/broadinstitute/sting/gatk/walkers/variantrecalibration/GenerateVariantClustersWalker.java +++ b/java/src/org/broadinstitute/sting/gatk/walkers/variantrecalibration/GenerateVariantClustersWalker.java @@ -27,6 +27,7 @@ package org.broadinstitute.sting.gatk.walkers.variantrecalibration; import org.broad.tribble.dbsnp.DbSNPFeature; import org.broad.tribble.util.variantcontext.VariantContext; +import org.broadinstitute.sting.commandline.Output; import org.broadinstitute.sting.gatk.contexts.AlignmentContext; import org.broadinstitute.sting.gatk.contexts.ReferenceContext; import org.broadinstitute.sting.gatk.contexts.variantcontext.VariantContextUtils; @@ -41,7 +42,9 @@ import org.broadinstitute.sting.utils.StingException; import org.broadinstitute.sting.utils.Utils; import org.broadinstitute.sting.commandline.Argument; +import java.io.File; import java.io.IOException; +import java.io.PrintStream; import java.util.*; /** @@ -55,6 +58,13 @@ import java.util.*; public class GenerateVariantClustersWalker extends RodWalker, ExpandingArrayList> { + ///////////////////////////// + // Outputs + ///////////////////////////// + + @Output(fullName="cluster_file", shortName="clusterFile", doc="The output cluster file", required=true) + private PrintStream CLUSTER_FILE; + ///////////////////////////// // Command Line Arguments ///////////////////////////// @@ -64,17 +74,15 @@ public class GenerateVariantClustersWalker extends RodWalker(Arrays.asList(USE_ANNOTATIONS)); @@ -222,8 +229,8 @@ public class GenerateVariantClustersWalker extends RodWalker annotationLines = new ExpandingArrayList(); - final ExpandingArrayList alleleCountLines = new ExpandingArrayList(); final ExpandingArrayList clusterLines = new ExpandingArrayList(); try { - for ( final String line : new XReadLines(new File( clusterFileName )) ) { + for ( final String line : new XReadLines( clusterFile ) ) { if( ANNOTATION_PATTERN.matcher(line).matches() ) { annotationLines.add(line); - } else if( ALLELECOUNT_PATTERN.matcher(line).matches() ) { - alleleCountLines.add(line); } else if( CLUSTER_PATTERN.matcher(line).matches() ) { clusterLines.add(line); } else { - throw new StingException("Malformed input file: " + clusterFileName); + throw new StingException("Malformed input file: " + clusterFile); } } } catch ( FileNotFoundException e ) { - throw new StingException("Can not find input file: " + clusterFileName); + throw new StingException("Can not find input file: " + clusterFile); } dataManager = new VariantDataManager( annotationLines ); @@ -140,7 +131,6 @@ public final class VariantGaussianMixtureModel extends VariantOptimizationModel DIRICHLET_PARAMETER = 0; maxIterations = 0; stdThreshold = 0.0; - qualThreshold = 0.0; FORCE_INDEPENDENT_ANNOTATIONS = false; hyperParameter_a = null; hyperParameter_b = null; @@ -155,14 +145,6 @@ public final class VariantGaussianMixtureModel extends VariantOptimizationModel pClusterLog10 = new double[maxGaussians]; determinant = new double[maxGaussians]; - alleleCountFactorArray = new double[alleleCountLines.size() + 1]; - for( final String line : alleleCountLines ) { - final String[] vals = line.split(","); - int i = Integer.parseInt(vals[1]); - alleleCountFactorArray[i] = Double.parseDouble(vals[2]); - double oldACPrior = alleleCountFactorArray[i]; - } - int kkk = 0; for( final String line : clusterLines ) { final String[] vals = line.split(","); @@ -183,10 +165,7 @@ public final class VariantGaussianMixtureModel extends VariantOptimizationModel logger.info("Found " + maxGaussians + " clusters using " + dataManager.numAnnotations + " annotations: " + dataManager.annotationKeys); } - public final void run( final String clusterFileName ) { - - // Initialize the Allele Count prior - generateAlleleCountPrior(); + public final void run( final PrintStream clusterFile ) { // int numValid = 0; // int numOutlier = 0; @@ -215,7 +194,7 @@ public final class VariantGaussianMixtureModel extends VariantOptimizationModel logger.info("Initializing using k-means..."); initializeUsingKMeans( dataManager.data ); logger.info("... done!"); - createClusters( dataManager.data, 0, maxGaussians, clusterFileName ); + createClusters( dataManager.data, 0, maxGaussians, clusterFile ); // Simply cluster with all the variants. The knowns have been given more weight than the novels //logger.info("Clustering with " + dataManager.data.length + " variants."); @@ -260,43 +239,12 @@ public final class VariantGaussianMixtureModel extends VariantOptimizationModel //empiricalSigma.timesEquals(1.0 / (Math.pow(maxGaussians, 2.0 / ((double) numAnnotations)))); } - - private void generateAlleleCountPrior() { - - final double[] acExpectation = new double[alleleCountFactorArray.length]; - final double[] acActual = new double[alleleCountFactorArray.length]; - final int[] alleleCount = new int[alleleCountFactorArray.length]; - - double sumExpectation = 0.0; - for( int iii = 1; iii < alleleCountFactorArray.length; iii++ ) { - acExpectation[iii] = 1.0 / ((double) iii); - sumExpectation += acExpectation[iii]; - } - for( int iii = 1; iii < alleleCountFactorArray.length; iii++ ) { - acExpectation[iii] /= sumExpectation; // Turn acExpectation into a probability distribution - alleleCount[iii] = 1; // Start off with one count to smooth the estimate - } - for( final VariantDatum datum : dataManager.data ) { - alleleCount[datum.alleleCount]++; - } - for( int iii = 1; iii < alleleCountFactorArray.length; iii++ ) { - acActual[iii] = ((double)alleleCount[iii]) / ((double) (dataManager.data.length+(alleleCountFactorArray.length-1))); // Turn acActual into a probability distribution - } - for( int iii = 1; iii < alleleCountFactorArray.length; iii++ ) { - alleleCountFactorArray[iii] = acExpectation[iii] / acActual[iii]; // Prior is (expected / observed) - } - } - public void setSingletonFPRate( final double rate ) { this.singletonFPRate = rate; } public final double getAlleleCountPrior( final int alleleCount ) { - if ( this.singletonFPRate == -1 ) - return alleleCountFactorArray[alleleCount]; - else { - return Math.min(0.95, 1.0 - Math.pow(singletonFPRate, alleleCount)); //TODO -- define the vals - } + return Math.min(0.95, 1.0 - Math.pow(singletonFPRate, alleleCount)); //TODO -- define the vals } private void initializeUsingKMeans( final VariantDatum[] data ) { @@ -365,7 +313,7 @@ public final class VariantGaussianMixtureModel extends VariantOptimizationModel } } - public final void createClusters( final VariantDatum[] data, final int startCluster, final int stopCluster, final String clusterFileName ) { + public final void createClusters( final VariantDatum[] data, final int startCluster, final int stopCluster, final PrintStream clusterFile ) { final int numVariants = data.length; final int numAnnotations = data[0].annotations.length; @@ -439,38 +387,28 @@ public final class VariantGaussianMixtureModel extends VariantOptimizationModel ///////////////////////////////////////////////////////////////////////////////////////////////////////////////// // Output the final cluster parameters ///////////////////////////////////////////////////////////////////////////////////////////////////////////////// - printClusterParameters( clusterFileName ); + printClusterParameters( clusterFile ); } - private void printClusterParameters( final String clusterFileName ) { - try { - final PrintStream outputFile = new PrintStream( clusterFileName ); - dataManager.printClusterFileHeader( outputFile ); - for( int iii = 1; iii < alleleCountFactorArray.length; iii++ ) { - outputFile.print("@!ALLELECOUNT,"); - outputFile.println(iii + "," + alleleCountFactorArray[iii]); - } + private void printClusterParameters( final PrintStream clusterFile ) { + dataManager.printClusterFileHeader( clusterFile ); - final int numAnnotations = mu[0].length; - for( int kkk = 0; kkk < maxGaussians; kkk++ ) { - if( Math.pow(10.0, pClusterLog10[kkk]) > 1E-4 ) { // BUGBUG: make this a command line argument - final double sigmaVals[][] = sigma[kkk].getArray(); - outputFile.print("@!CLUSTER"); - outputFile.print("," + Math.pow(10.0, pClusterLog10[kkk])); - for(int jjj = 0; jjj < numAnnotations; jjj++ ) { - outputFile.print("," + mu[kkk][jjj]); - } - for(int jjj = 0; jjj < numAnnotations; jjj++ ) { - for(int ppp = 0; ppp < numAnnotations; ppp++ ) { - outputFile.print("," + (sigmaVals[jjj][ppp] / hyperParameter_a[kkk]) ); - } - } - outputFile.println(); + final int numAnnotations = mu[0].length; + for( int kkk = 0; kkk < maxGaussians; kkk++ ) { + if( Math.pow(10.0, pClusterLog10[kkk]) > 1E-4 ) { // BUGBUG: make this a command line argument + final double sigmaVals[][] = sigma[kkk].getArray(); + clusterFile.print("@!CLUSTER"); + clusterFile.print("," + Math.pow(10.0, pClusterLog10[kkk])); + for(int jjj = 0; jjj < numAnnotations; jjj++ ) { + clusterFile.print("," + mu[kkk][jjj]); } + for(int jjj = 0; jjj < numAnnotations; jjj++ ) { + for(int ppp = 0; ppp < numAnnotations; ppp++ ) { + clusterFile.print("," + (sigmaVals[jjj][ppp] / hyperParameter_a[kkk]) ); + } + } + clusterFile.println(); } - outputFile.close(); - } catch (Exception e) { - throw new StingException( "Unable to create output file: " + clusterFileName ); } } @@ -563,13 +501,10 @@ public final class VariantGaussianMixtureModel extends VariantOptimizationModel } annIndex++; - outputFile.close(); } - - // BUGBUG: next output the actual cluster on top by integrating out every other annotation } - public final void outputOptimizationCurve( final VariantDatum[] data, final String outputPrefix, + public final void outputOptimizationCurve( final VariantDatum[] data, final PrintStream outputReportDatFile, final PrintStream outputTranchesFile, final int desiredNumVariants, final Double[] FDRtranches, final double QUAL_STEP ) { final int numVariants = data.length; @@ -595,16 +530,16 @@ public final class VariantGaussianMixtureModel extends VariantOptimizationModel PrintStream outputFile; try { - outputFile = new PrintStream( outputPrefix + ".dat" ); + outputFile = new PrintStream( outputReportDatFile ); } catch (Exception e) { - throw new StingException( "Unable to create output file: " + outputPrefix + ".dat" ); + throw new StingException( "Unable to create output file: " + outputReportDatFile ); } PrintStream tranchesOutputFile; try { - tranchesOutputFile = new PrintStream( outputPrefix + ".dat.tranches" ); + tranchesOutputFile = new PrintStream( outputTranchesFile ); tranchesOutputFile.println("FDRtranche,novelTITV,pCut,numNovel,filterName"); } catch (Exception e) { - throw new StingException( "Unable to create output file: " + outputPrefix + ".dat.tranches" ); + throw new StingException( "Unable to create output file: " + outputTranchesFile ); } int numKnown = 0; @@ -706,12 +641,8 @@ public final class VariantGaussianMixtureModel extends VariantOptimizationModel logger.info("\t" + String.format("--> with an implied novel FDR of %.2f percent", Math.abs(100.0 * (1.0-((novelTiTvAtCut[jjj] - 0.5) / (targetTITV - 0.5)))))); } } - - outputFile.close(); - tranchesOutputFile.close(); } - private double evaluateGaussians( final VariantDatum[] data, final double[][] pVarInCluster, final int startCluster, final int stopCluster ) { final int numAnnotations = data[0].annotations.length; diff --git a/java/src/org/broadinstitute/sting/gatk/walkers/variantrecalibration/VariantOptimizationInterface.java b/java/src/org/broadinstitute/sting/gatk/walkers/variantrecalibration/VariantOptimizationInterface.java index 15ad69810..697840ceb 100755 --- a/java/src/org/broadinstitute/sting/gatk/walkers/variantrecalibration/VariantOptimizationInterface.java +++ b/java/src/org/broadinstitute/sting/gatk/walkers/variantrecalibration/VariantOptimizationInterface.java @@ -1,5 +1,7 @@ package org.broadinstitute.sting.gatk.walkers.variantrecalibration; +import java.io.PrintStream; + /* * Copyright (c) 2010 The Broad Institute * @@ -32,5 +34,5 @@ package org.broadinstitute.sting.gatk.walkers.variantrecalibration; */ public interface VariantOptimizationInterface { - public void run( String outputPrefix ); + public void run( PrintStream outputPrefix ); } diff --git a/java/src/org/broadinstitute/sting/gatk/walkers/variantrecalibration/VariantRecalibrator.java b/java/src/org/broadinstitute/sting/gatk/walkers/variantrecalibration/VariantRecalibrator.java index 24abe68ec..6bfd3b984 100755 --- a/java/src/org/broadinstitute/sting/gatk/walkers/variantrecalibration/VariantRecalibrator.java +++ b/java/src/org/broadinstitute/sting/gatk/walkers/variantrecalibration/VariantRecalibrator.java @@ -28,6 +28,8 @@ package org.broadinstitute.sting.gatk.walkers.variantrecalibration; import org.broad.tribble.dbsnp.DbSNPFeature; import org.broad.tribble.util.variantcontext.VariantContext; import org.broad.tribble.vcf.*; +import org.broadinstitute.sting.commandline.Input; +import org.broadinstitute.sting.commandline.Output; import org.broadinstitute.sting.gatk.contexts.AlignmentContext; import org.broadinstitute.sting.gatk.contexts.ReferenceContext; import org.broadinstitute.sting.gatk.contexts.variantcontext.VariantContextUtils; @@ -42,7 +44,9 @@ import org.broadinstitute.sting.commandline.Argument; import org.broadinstitute.sting.utils.vcf.VCFUtils; import java.io.File; +import java.io.FileNotFoundException; import java.io.IOException; +import java.io.PrintStream; import java.util.*; /** @@ -56,12 +60,29 @@ import java.util.*; public class VariantRecalibrator extends RodWalker, ExpandingArrayList> { + ///////////////////////////// + // Inputs + ///////////////////////////// + @Input(fullName="cluster_file", shortName="clusterFile", doc="The input cluster file generated by GenerateVariantClusters", required=true) + private File CLUSTER_FILE; + + ///////////////////////////// + // Outputs + ///////////////////////////// + @Output(fullName="tranches_file", shortName="tranchesFile", doc="The output tranches file used by ApplyVariantCuts", required=true) + private PrintStream TRANCHES_FILE; + @Output(fullName="report_dat_file", shortName="reportDatFile", doc="The output report .dat file used with Rscript to create the optimization curve PDF file", required=true) + private File REPORT_DAT_FILE; + @Output(doc="File to which recalibrated variants should be written",required=true) + private VCFWriter vcfWriter = null; + + ///////////////////////////// // Command Line Arguments ///////////////////////////// - @Argument(fullName="target_titv", shortName="titv", doc="The expected Ti/Tv ratio to display on optimization curve output figures. (~~2.1 for whole genome experiments)", required=false) - private double TARGET_TITV = 2.1; - @Argument(fullName="backOff", shortName="backOff", doc="The Gaussian back off factor, used to prevent overfitting by spreading out the Gaussians.", required=false) + @Argument(fullName="target_titv", shortName="titv", doc="The expected novel Ti/Tv ratio to use when calculating FDR tranches and for display on optimization curve output figures. (~~2.07 for whole genome experiments)", required=true) + private double TARGET_TITV = 2.07; + @Argument(fullName="backOff", shortName="backOff", doc="The Gaussian back off factor, used to prevent overfitting by enlarging out the Gaussians.", required=false) private double BACKOFF_FACTOR = 1.3; @Argument(fullName="desired_num_variants", shortName="dV", doc="The desired number of variants to keep in a theoretically filtered set", required=false) private int DESIRED_NUM_VARIANTS = 0; @@ -75,14 +96,8 @@ public class VariantRecalibrator extends RodWalker ignoreInputFilterSet = null; //--------------------------------------------------------------------------------------------------------------- @@ -116,9 +131,10 @@ public class VariantRecalibrator extends RodWalker reduceSum ) { - vcfWriter.close(); - final VariantDataManager dataManager = new VariantDataManager( reduceSum, theModel.dataManager.annotationKeys ); reduceSum.clear(); // Don't need this ever again, clean up some memory - theModel.outputOptimizationCurve( dataManager.data, OUTPUT_PREFIX, DESIRED_NUM_VARIANTS, FDR_TRANCHES, QUAL_STEP ); + try { + PrintStream reportDatFilePrintStream = new PrintStream(REPORT_DAT_FILE); + theModel.outputOptimizationCurve( dataManager.data, reportDatFilePrintStream, TRANCHES_FILE, DESIRED_NUM_VARIANTS, FDR_TRANCHES, QUAL_STEP ); + } catch ( FileNotFoundException e ) { + throw new StingException("Can't create report dat file: " + REPORT_DAT_FILE.getName()); + } // Execute Rscript command to plot the optimization curve // Print out the command line to make it clear to the user what is being executed and how one might modify it - final String rScriptOptimizationCurveCommandLine = PATH_TO_RSCRIPT + " " + PATH_TO_RESOURCES + "plot_OptimizationCurve.R" + " " + OUTPUT_PREFIX + ".dat" + " " + TARGET_TITV; - final String rScriptTranchesCommandLine = PATH_TO_RSCRIPT + " " + PATH_TO_RESOURCES + "plot_Tranches.R" + " " + OUTPUT_PREFIX + ".dat" + " " + TARGET_TITV; + final String rScriptOptimizationCurveCommandLine = PATH_TO_RSCRIPT + " " + PATH_TO_RESOURCES + "plot_OptimizationCurve.R" + " " + REPORT_DAT_FILE.getName() + " " + TARGET_TITV; + final String rScriptTranchesCommandLine = PATH_TO_RSCRIPT + " " + PATH_TO_RESOURCES + "plot_Tranches.R" + " " + REPORT_DAT_FILE.getName() + " " + TARGET_TITV; logger.info( rScriptOptimizationCurveCommandLine ); logger.info( rScriptTranchesCommandLine ); // Execute the RScript command to plot the table of truth values try { - Runtime.getRuntime().exec( rScriptOptimizationCurveCommandLine ); - Runtime.getRuntime().exec( rScriptTranchesCommandLine ); - } catch ( IOException e ) { + Process p; + p = Runtime.getRuntime().exec( rScriptOptimizationCurveCommandLine ); + p.waitFor(); + p = Runtime.getRuntime().exec( rScriptTranchesCommandLine ); + p.waitFor(); + } catch ( Exception e ) { Utils.warnUser("Unable to execute the RScript command. While not critical to the calculations themselves, the script outputs a report that is extremely useful for confirming that the recalibration proceded as expected. We highly recommend trying to rerun the script manually if possible."); } } diff --git a/java/test/org/broadinstitute/sting/gatk/walkers/variantrecalibration/VariantRecalibrationWalkersIntegrationTest.java b/java/test/org/broadinstitute/sting/gatk/walkers/variantrecalibration/VariantRecalibrationWalkersIntegrationTest.java index 48c8bd2fb..ba2919ef2 100755 --- a/java/test/org/broadinstitute/sting/gatk/walkers/variantrecalibration/VariantRecalibrationWalkersIntegrationTest.java +++ b/java/test/org/broadinstitute/sting/gatk/walkers/variantrecalibration/VariantRecalibrationWalkersIntegrationTest.java @@ -7,12 +7,14 @@ import java.util.*; import java.io.File; public class VariantRecalibrationWalkersIntegrationTest extends WalkerTest { - static HashMap paramsFiles = new HashMap(); - + static HashMap clusterFiles = new HashMap(); + static HashMap tranchesFiles = new HashMap(); + static HashMap inputVCFFiles = new HashMap(); + @Test public void testGenerateVariantClusters() { HashMap e = new HashMap(); - e.put( validationDataLocation + "yri.trio.gatk_glftrio.intersection.annotated.filtered.chr1.vcf", "005898194773d2f2be45a68f6e4c4c25" ); + e.put( validationDataLocation + "yri.trio.gatk_glftrio.intersection.annotated.filtered.chr1.vcf", "e6724946c3298b3d74bb1ba1396a9190" ); for ( Map.Entry entry : e.entrySet() ) { String vcf = entry.getKey(); @@ -20,7 +22,7 @@ public class VariantRecalibrationWalkersIntegrationTest extends WalkerTest { WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec( "-R " + b36KGReference + - " --DBSNP /humgen/gsa-scr1/GATK_Data/dbsnp_129_b36.rod" + + " --DBSNP " + GATKDataLocation + "dbsnp_129_b36.rod" + " -T GenerateVariantClusters" + " -B input,VCF," + vcf + " -L 1:1-100,000,000" + @@ -30,37 +32,69 @@ public class VariantRecalibrationWalkersIntegrationTest extends WalkerTest { 1, // just one output file Arrays.asList(md5)); List result = executeTest("testGenerateVariantClusters", spec).getFirst(); - paramsFiles.put(vcf, result.get(0).getAbsolutePath()); + clusterFiles.put(vcf, result.get(0).getAbsolutePath()); } } @Test public void testVariantRecalibrator() { HashMap e = new HashMap(); - e.put( validationDataLocation + "yri.trio.gatk_glftrio.intersection.annotated.filtered.chr1.vcf", "acb89428115564cbf35ab7c006252108" ); + e.put( validationDataLocation + "yri.trio.gatk_glftrio.intersection.annotated.filtered.chr1.vcf", "b97ab64b86ce8c8698855058d32853ce" ); for ( Map.Entry entry : e.entrySet() ) { String vcf = entry.getKey(); String md5 = entry.getValue(); - String paramsFile = paramsFiles.get(vcf); - System.out.printf("PARAMS FOR %s is %s%n", vcf, paramsFile); - if ( paramsFile != null ) { - File file = createTempFile("cluster",".vcf"); - WalkerTestSpec spec = new WalkerTestSpec( + String clusterFile = clusterFiles.get(vcf); + System.out.printf("PARAMS FOR %s is %s%n", vcf, clusterFile); + if ( clusterFile != null ) { + WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec( "-R " + b36KGReference + - " --DBSNP /humgen/gsa-scr1/GATK_Data/dbsnp_129_b36.rod" + + " --DBSNP " + GATKDataLocation + "dbsnp_129_b36.rod" + " -T VariantRecalibrator" + " -B input,VCF," + vcf + " -L 1:40,000,000-100,000,000" + " --ignore_filter GATK_STANDARD" + - " -output " + file.getAbsolutePath().substring(0,file.getAbsolutePath().length()-4) + - " -clusterFile " + paramsFile, - 0, - new ArrayList(0)); + " --ignore_filter HARD_TO_VALIDATE" + + " -clusterFile " + clusterFile + + " -titv 2.07" + + " -o %s" + + " -tranchesFile %s" + + " -reportDatFile %s", + 3, // two output file + Arrays.asList(md5, "f603aa2052ae6e81a6bde63a8a3f9539","951a17f9c11de391763b9a8cb239205a")); + List result = executeTest("testVariantRecalibrator", spec).getFirst(); + inputVCFFiles.put(vcf, result.get(0).getAbsolutePath()); + tranchesFiles.put(vcf, result.get(1).getAbsolutePath()); + } + } - spec.addAuxFile(md5, file); - executeTest("testVariantRecalibrator", spec); + } + + @Test + public void testApplyVariantCuts() { + HashMap e = new HashMap(); + e.put( validationDataLocation + "yri.trio.gatk_glftrio.intersection.annotated.filtered.chr1.vcf", "e80f26d68be2b183fd7f062039cef28a" ); + + for ( Map.Entry entry : e.entrySet() ) { + String vcf = entry.getKey(); + String md5 = entry.getValue(); + String inputVCFFile = inputVCFFiles.get(vcf); + String tranchesFile = tranchesFiles.get(vcf); + System.out.printf("PARAMS FOR %s is %s%n", vcf, inputVCFFile); + System.out.printf("PARAMS FOR %s is %s%n", vcf, tranchesFile); + if ( inputVCFFile != null && tranchesFile != null ) { + WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec( + "-R " + b36KGReference + + " --DBSNP " + GATKDataLocation + "dbsnp_129_b36.rod" + + " -T ApplyVariantCuts" + + " -L 1:40,000,000-100,000,000" + + " -B input,VCF," + inputVCFFile + + " -o %s" + + " -tranchesFile " + tranchesFile, + 1, // just one output file + Arrays.asList(md5)); + List result = executeTest("testApplyVariantCuts", spec).getFirst(); } } }