From 2c76f71a03c386091fce5d477a593426bce46337 Mon Sep 17 00:00:00 2001 From: Eric Banks Date: Mon, 6 Aug 2012 22:48:04 -0400 Subject: [PATCH 1/6] Update -maxAlleles argument in integration tests --- ...iedGenotyperGeneralPloidyIntegrationTest.java | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/protected/java/test/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyperGeneralPloidyIntegrationTest.java b/protected/java/test/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyperGeneralPloidyIntegrationTest.java index 9b3103274..4d8c6808f 100644 --- a/protected/java/test/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyperGeneralPloidyIntegrationTest.java +++ b/protected/java/test/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyperGeneralPloidyIntegrationTest.java @@ -46,33 +46,33 @@ public class UnifiedGenotyperGeneralPloidyIntegrationTest extends WalkerTest { @Test public void testBOTH_GGA_Pools() { - PC_LSV_Test(String.format(" -maxAlleles 2 -ploidy 24 -gt_mode GENOTYPE_GIVEN_ALLELES -out_mode EMIT_ALL_SITES -alleles %s",LSV_ALLELES),"LSV_BOTH_GGA","BOTH","d76e3b910259da819f1e1b2adc68ba8d"); + PC_LSV_Test(String.format(" -maxAltAlleles 2 -ploidy 24 -gt_mode GENOTYPE_GIVEN_ALLELES -out_mode EMIT_ALL_SITES -alleles %s",LSV_ALLELES),"LSV_BOTH_GGA","BOTH","d76e3b910259da819f1e1b2adc68ba8d"); } @Test public void testINDEL_GGA_Pools() { - PC_LSV_Test(String.format(" -maxAlleles 1 -ploidy 24 -gt_mode GENOTYPE_GIVEN_ALLELES -out_mode EMIT_ALL_SITES -alleles %s",LSV_ALLELES),"LSV_INDEL_GGA","INDEL","ffadcdaee613dab975197bed0fc78da3"); + PC_LSV_Test(String.format(" -maxAltAlleles 1 -ploidy 24 -gt_mode GENOTYPE_GIVEN_ALLELES -out_mode EMIT_ALL_SITES -alleles %s",LSV_ALLELES),"LSV_INDEL_GGA","INDEL","ffadcdaee613dab975197bed0fc78da3"); } @Test - public void testINDEL_maxAlleles2_ploidy3_Pools_noRef() { - PC_LSV_Test_NoRef(" -maxAlleles 2 -ploidy 3","LSV_INDEL_DISC_NOREF_p3","INDEL","96087fe9240e3656cc2a4e0ff0174d5b"); + public void testINDEL_maxAltAlleles2_ploidy3_Pools_noRef() { + PC_LSV_Test_NoRef(" -maxAltAlleles 2 -ploidy 3","LSV_INDEL_DISC_NOREF_p3","INDEL","96087fe9240e3656cc2a4e0ff0174d5b"); } @Test - public void testINDEL_maxAlleles2_ploidy1_Pools_noRef() { - PC_LSV_Test_NoRef(" -maxAlleles 2 -ploidy 1","LSV_INDEL_DISC_NOREF_p1","INDEL","6fdae7093831ecfc82a06dd707d62fe9"); + public void testINDEL_maxAltAlleles2_ploidy1_Pools_noRef() { + PC_LSV_Test_NoRef(" -maxAltAlleles 2 -ploidy 1","LSV_INDEL_DISC_NOREF_p1","INDEL","6fdae7093831ecfc82a06dd707d62fe9"); } @Test public void testMT_SNP_DISCOVERY_sp4() { - PC_MT_Test(CEUTRIO_BAM, " -maxAlleles 1 -ploidy 8", "MT_SNP_DISCOVERY_sp4","6b27634214530d379db70391a9cfc2d7"); + PC_MT_Test(CEUTRIO_BAM, " -maxAltAlleles 1 -ploidy 8", "MT_SNP_DISCOVERY_sp4","6b27634214530d379db70391a9cfc2d7"); } @Test public void testMT_SNP_GGA_sp10() { - PC_MT_Test(CEUTRIO_BAM, String.format(" -maxAlleles 1 -ploidy 20 -gt_mode GENOTYPE_GIVEN_ALLELES -out_mode EMIT_ALL_SITES -alleles %s",NA12891_CALLS), "MT_SNP_GGA_sp10", "e74d4c73ece45d7fb676b99364df4f1a"); + PC_MT_Test(CEUTRIO_BAM, String.format(" -maxAltAlleles 1 -ploidy 20 -gt_mode GENOTYPE_GIVEN_ALLELES -out_mode EMIT_ALL_SITES -alleles %s",NA12891_CALLS), "MT_SNP_GGA_sp10", "e74d4c73ece45d7fb676b99364df4f1a"); } } From 15085bf03e2c29e921df15ef017d3abb14ea5921 Mon Sep 17 00:00:00 2001 From: Ryan Poplin Date: Tue, 7 Aug 2012 13:58:22 -0400 Subject: [PATCH 2/6] The UnifiedGenotyper now makes use of base insertion and base deletion quality scores if they exist in the reads. --- .../gatk/walkers/indels/PairHMMIndelErrorModel.java | 4 +++- .../genotyper/UnifiedGenotyperIntegrationTest.java | 13 +++++++++++++ 2 files changed, 16 insertions(+), 1 deletion(-) diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/indels/PairHMMIndelErrorModel.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/indels/PairHMMIndelErrorModel.java index 6bfe5702d..65c5a2fbc 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/indels/PairHMMIndelErrorModel.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/indels/PairHMMIndelErrorModel.java @@ -351,7 +351,9 @@ public class PairHMMIndelErrorModel { previousHaplotypeSeen = haplotypeBases.clone(); readLikelihood = pairHMM.computeReadLikelihoodGivenHaplotype(haplotypeBases, readBases, readQuals, - contextLogGapOpenProbabilities, contextLogGapOpenProbabilities, contextLogGapContinuationProbabilities, + (read.hasBaseIndelQualities() ? read.getBaseInsertionQualities() : contextLogGapOpenProbabilities), + (read.hasBaseIndelQualities() ? read.getBaseDeletionQualities() : contextLogGapOpenProbabilities), + contextLogGapContinuationProbabilities, startIndexInHaplotype, matchMetricArray, XMetricArray, YMetricArray); diff --git a/public/java/test/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyperIntegrationTest.java b/public/java/test/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyperIntegrationTest.java index d976e3e22..7b6e1ee96 100755 --- a/public/java/test/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyperIntegrationTest.java +++ b/public/java/test/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyperIntegrationTest.java @@ -355,6 +355,19 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest { executeTest("test GENOTYPE_GIVEN_ALLELES with no evidence in reads", spec); } + @Test + public void testBaseIndelQualityScores() { + WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec( + baseCommandIndelsb37 + + " -I " + privateTestDir + "NA12878.100kb.BQSRv2.example.bam" + + " -o %s" + + " -L 20:10,000,000-10,100,000", + 1, + Arrays.asList("b3c923ed9efa04b85fc18a9b45c8d2a6")); + + executeTest(String.format("test UG with base indel quality scores"), spec); + } + // -------------------------------------------------------------------------------------------------------------- // // testing SnpEff From 982c735c7665219331c9f0b29d317dfa531d93bd Mon Sep 17 00:00:00 2001 From: Mark DePristo Date: Tue, 7 Aug 2012 08:38:16 -0400 Subject: [PATCH 3/6] VisualizeAdaptiveTree now considers only leaf nodes when computing max/min penalty --- .../utils/recalibration/RecalDatumNode.java | 35 +++++++++++++------ 1 file changed, 25 insertions(+), 10 deletions(-) diff --git a/public/java/src/org/broadinstitute/sting/utils/recalibration/RecalDatumNode.java b/public/java/src/org/broadinstitute/sting/utils/recalibration/RecalDatumNode.java index e792a808d..a0b3f2b0a 100644 --- a/public/java/src/org/broadinstitute/sting/utils/recalibration/RecalDatumNode.java +++ b/public/java/src/org/broadinstitute/sting/utils/recalibration/RecalDatumNode.java @@ -169,10 +169,10 @@ public class RecalDatumNode { * The maximum penalty among all nodes * @return */ - public double maxPenalty() { - double max = getPenalty(); + public double maxPenalty(final boolean leafOnly) { + double max = ! leafOnly || isLeaf() ? getPenalty() : Double.MIN_VALUE; for ( final RecalDatumNode sub : subnodes ) - max = Math.max(max, sub.maxPenalty()); + max = Math.max(max, sub.maxPenalty(leafOnly)); return max; } @@ -180,10 +180,10 @@ public class RecalDatumNode { * The minimum penalty among all nodes * @return */ - public double minPenalty() { - double min = getPenalty(); + public double minPenalty(final boolean leafOnly) { + double min = ! leafOnly || isLeaf() ? getPenalty() : Double.MAX_VALUE; for ( final RecalDatumNode sub : subnodes ) - min = Math.min(min, sub.minPenalty()); + min = Math.min(min, sub.minPenalty(leafOnly)); return min; } @@ -251,7 +251,7 @@ public class RecalDatumNode { * @return the chi2 penalty, or 0.0 if it cannot be calculated */ private double calcPenalty() { - if ( isLeaf() ) + if ( isLeaf() || freeToMerge() ) return 0.0; else if ( subnodes.size() == 1 ) // only one value, so its free to merge away @@ -277,6 +277,22 @@ public class RecalDatumNode { } } + /** + * Is this node free to merge because its rounded Q score is the same as all nodes below + * @return + */ + private boolean freeToMerge() { + if ( isLeaf() ) // leaves are free to merge + return true; + else { + final byte myQual = getRecalDatum().getEmpiricalQualityAsByte(); + for ( final RecalDatumNode sub : subnodes ) + if ( sub.getRecalDatum().getEmpiricalQualityAsByte() != myQual ) + return false; + return true; + } + } + /** * Calculate the penalty of this interval, given the overall error rate for the interval * @@ -346,8 +362,6 @@ public class RecalDatumNode { while ( root.size() > maxElements ) { // remove the lowest penalty element, and continue root = root.removeLowestPenaltyNode(); - if ( logger.isDebugEnabled() ) - logger.debug("pruneByPenalty root size is now " + root.size() + " of max " + maxElements); } // our size is below the target, so we are good, return @@ -363,7 +377,8 @@ public class RecalDatumNode { */ private RecalDatumNode removeLowestPenaltyNode() { final Pair, Double> nodeToRemove = getMinPenaltyAboveLeafNode(); - //logger.info("Removing " + nodeToRemove.getFirst() + " with penalty " + nodeToRemove.getSecond()); + if ( logger.isDebugEnabled() ) + logger.debug("Removing " + nodeToRemove.getFirst() + " with penalty " + nodeToRemove.getSecond()); final Pair, Boolean> result = removeNode(nodeToRemove.getFirst()); From 80b94a4f9a456e8bb8c60ac06e34e8427d3ee608 Mon Sep 17 00:00:00 2001 From: Mark DePristo Date: Tue, 7 Aug 2012 16:36:57 -0400 Subject: [PATCH 4/6] AdaptiveContexts implement pruning to a given chi2 p value -- Added bonferroni corrected p-value pruning, so you tell it how significant of a different you are willing to collapse in the tree, and it prunes the tree down to this maximum threshold -- Penalty is now a phred-scaled p-value not the raw chi2 value -- Split command line arguments in VisualizeContextTree into separate arguments for each type of pruning --- .../utils/recalibration/RecalDatumNode.java | 72 ++++++++++++++++--- 1 file changed, 61 insertions(+), 11 deletions(-) diff --git a/public/java/src/org/broadinstitute/sting/utils/recalibration/RecalDatumNode.java b/public/java/src/org/broadinstitute/sting/utils/recalibration/RecalDatumNode.java index a0b3f2b0a..55d3ca13f 100644 --- a/public/java/src/org/broadinstitute/sting/utils/recalibration/RecalDatumNode.java +++ b/public/java/src/org/broadinstitute/sting/utils/recalibration/RecalDatumNode.java @@ -2,6 +2,7 @@ package org.broadinstitute.sting.utils.recalibration; import com.google.java.contract.Ensures; import com.google.java.contract.Requires; +import org.apache.commons.math.MathException; import org.apache.commons.math.stat.inference.ChiSquareTestImpl; import org.apache.log4j.Logger; import org.broadinstitute.sting.utils.collections.Pair; @@ -19,6 +20,7 @@ import java.util.Set; * @since 07/27/12 */ public class RecalDatumNode { + private final static double SMALLEST_CHI2_PVALUE = 1e-300; protected static Logger logger = Logger.getLogger(RecalDatumNode.class); /** @@ -150,8 +152,6 @@ public class RecalDatumNode { * definition have 0 penalty unless they represent a pruned tree with underlying -- but now * pruned -- subtrees * - * TODO -- can we really just add together the chi2 values? - * * @return */ public double totalPenalty() { @@ -244,11 +244,12 @@ public class RecalDatumNode { } /** - * Calculate the chi^2 penalty among subnodes of this node. The chi^2 value - * indicates the degree of independence of the implied error rates among the + * Calculate the phred-scaled p-value for a chi^2 test for independent among subnodes of this node. + * + * The chi^2 value indicates the degree of independence of the implied error rates among the * immediate subnodes * - * @return the chi2 penalty, or 0.0 if it cannot be calculated + * @return the phred-scaled p-value for chi2 penalty, or 0.0 if it cannot be calculated */ private double calcPenalty() { if ( isLeaf() || freeToMerge() ) @@ -267,13 +268,18 @@ public class RecalDatumNode { i++; } - final double chi2 = new ChiSquareTestImpl().chiSquare(counts); + try { + final double chi2PValue = new ChiSquareTestImpl().chiSquareTest(counts); + final double penalty = -10 * Math.log10(Math.max(chi2PValue, SMALLEST_CHI2_PVALUE)); - // make sure things are reasonable and fail early if not - if (Double.isInfinite(chi2) || Double.isNaN(chi2)) - throw new ReviewedStingException("chi2 value is " + chi2 + " at " + getRecalDatum()); + // make sure things are reasonable and fail early if not + if (Double.isInfinite(penalty) || Double.isNaN(penalty)) + throw new ReviewedStingException("chi2 value is " + chi2PValue + " at " + getRecalDatum()); - return chi2; + return penalty; + } catch ( MathException e ) { + throw new ReviewedStingException("Failed in calculating chi2 value", e); + } } } @@ -368,6 +374,50 @@ public class RecalDatumNode { return root; } + /** + * Return a freshly allocated tree where all mergable nodes with < maxPenalty are merged + * + * Note that nodes must have fixed penalties to this algorithm will fail. + * + * @param maxPenaltyIn the maximum penalty we are allowed to incur for a merge + * @param applyBonferroniCorrection if true, we will adjust penalty by the phred-scaled bonferroni correction + * for the size of the initial tree. That is, if there are 10 nodes in the + * tree and maxPenalty is 20 we will actually enforce 10^-2 / 10 = 10^-3 = 30 + * penalty for multiple testing + * @return + */ + public RecalDatumNode pruneToNoMoreThanPenalty(final double maxPenaltyIn, final boolean applyBonferroniCorrection) { + RecalDatumNode root = this; + + final double bonferroniCorrection = 10 * Math.log10(this.size()); + final double maxPenalty = applyBonferroniCorrection ? maxPenaltyIn + bonferroniCorrection : maxPenaltyIn; + + if ( applyBonferroniCorrection ) + logger.info(String.format("Applying Bonferroni correction for %d nodes = %.2f to initial penalty %.2f for total " + + "corrected max penalty of %.2f", this.size(), bonferroniCorrection, maxPenaltyIn, maxPenalty)); + + while ( true ) { + final Pair, Double> minPenaltyNode = root.getMinPenaltyAboveLeafNode(); + + if ( minPenaltyNode == null || minPenaltyNode.getSecond() > maxPenalty ) { + // nothing to merge, or the best candidate is above our max allowed + if ( minPenaltyNode == null ) + if ( logger.isDebugEnabled() ) logger.debug("Stopping because no candidates could be found"); + else + if ( logger.isDebugEnabled() ) logger.debug("Stopping because node " + minPenaltyNode.getFirst() + " has penalty " + minPenaltyNode.getSecond() + " > max " + maxPenalty); + break; + } else { + // remove the lowest penalty element, and continue + if ( logger.isDebugEnabled() ) logger.debug("Removing node " + minPenaltyNode.getFirst() + " with penalty " + minPenaltyNode.getSecond()); + root = root.removeLowestPenaltyNode(); + } + } + + // no more candidates exist with penalty < maxPenalty + return root; + } + + /** * Find the lowest penalty above leaf node in the tree, and return a tree without it * @@ -394,7 +444,7 @@ public class RecalDatumNode { /** * Finds in the tree the node with the lowest penalty whose subnodes are all leaves * - * @return + * @return the node and its penalty, or null if no such node exists */ private Pair, Double> getMinPenaltyAboveLeafNode() { if ( isLeaf() ) From cda8d944b741f5d9ee0cabb9b5bc3acdd4ec83aa Mon Sep 17 00:00:00 2001 From: Mark DePristo Date: Tue, 7 Aug 2012 17:22:27 -0400 Subject: [PATCH 5/6] Bugfixes for BCF with VQSR -- Old version converted doubles directly from strings. New version uses VariantContext getAttributeAsDouble() that looks at the values directly to determine how to convert from Object to Double (via Double.valueOf, (Double), or (Double)(Integer)). -- getAttributeAsDouble() is now smart in converting integers to doubles as needed -- Removed unnecessary logging info in BCF2Codec -- Added integration tests to ensure that VQSR works end-to-end with BCF2 using sites version of the file khalid sent to me -- Added vqsr.bcf_test.snps.unfiltered.bcf file for this integration test --- .../VariantDataManager.java | 2 +- .../sting/utils/codecs/bcf2/BCF2Codec.java | 1 - .../utils/variantcontext/CommonInfo.java | 1 + ...ntRecalibrationWalkersIntegrationTest.java | 59 +++++++++++++++++-- 4 files changed, 57 insertions(+), 6 deletions(-) diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/variantrecalibration/VariantDataManager.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/variantrecalibration/VariantDataManager.java index 45fdad4f8..e88505f99 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/variantrecalibration/VariantDataManager.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/variantrecalibration/VariantDataManager.java @@ -235,7 +235,7 @@ public class VariantDataManager { double value; try { - value = Double.parseDouble( (String)vc.getAttribute( annotationKey ) ); + value = vc.getAttributeAsDouble( annotationKey, Double.NaN ); if( Double.isInfinite(value) ) { value = Double.NaN; } if( jitter && annotationKey.equalsIgnoreCase("HRUN") ) { // Integer valued annotations must be jittered a bit to work in this GMM value += -0.25 + 0.5 * GenomeAnalysisEngine.getRandomGenerator().nextDouble(); diff --git a/public/java/src/org/broadinstitute/sting/utils/codecs/bcf2/BCF2Codec.java b/public/java/src/org/broadinstitute/sting/utils/codecs/bcf2/BCF2Codec.java index 0776b3aa8..244af8517 100644 --- a/public/java/src/org/broadinstitute/sting/utils/codecs/bcf2/BCF2Codec.java +++ b/public/java/src/org/broadinstitute/sting/utils/codecs/bcf2/BCF2Codec.java @@ -167,7 +167,6 @@ public final class BCF2Codec implements FeatureCodec { // create the config offsets if ( ! header.getContigLines().isEmpty() ) { - logger.info("Found contig lines in BCF2 file, using those"); contigNames.clear(); for ( final VCFContigHeaderLine contig : header.getContigLines()) { if ( contig.getID() == null || contig.getID().equals("") ) diff --git a/public/java/src/org/broadinstitute/sting/utils/variantcontext/CommonInfo.java b/public/java/src/org/broadinstitute/sting/utils/variantcontext/CommonInfo.java index fb0d7140d..127f91677 100755 --- a/public/java/src/org/broadinstitute/sting/utils/variantcontext/CommonInfo.java +++ b/public/java/src/org/broadinstitute/sting/utils/variantcontext/CommonInfo.java @@ -216,6 +216,7 @@ final class CommonInfo { Object x = getAttribute(key); if ( x == null ) return defaultValue; if ( x instanceof Double ) return (Double)x; + if ( x instanceof Integer ) return (Integer)x; return Double.valueOf((String)x); // throws an exception if this isn't a string } diff --git a/public/java/test/org/broadinstitute/sting/gatk/walkers/variantrecalibration/VariantRecalibrationWalkersIntegrationTest.java b/public/java/test/org/broadinstitute/sting/gatk/walkers/variantrecalibration/VariantRecalibrationWalkersIntegrationTest.java index 74d071a90..d1ecbb0bf 100755 --- a/public/java/test/org/broadinstitute/sting/gatk/walkers/variantrecalibration/VariantRecalibrationWalkersIntegrationTest.java +++ b/public/java/test/org/broadinstitute/sting/gatk/walkers/variantrecalibration/VariantRecalibrationWalkersIntegrationTest.java @@ -13,7 +13,7 @@ public class VariantRecalibrationWalkersIntegrationTest extends WalkerTest { String recalMD5; String cutVCFMD5; public VRTest(String inVCF, String tranchesMD5, String recalMD5, String cutVCFMD5) { - this.inVCF = validationDataLocation + inVCF; + this.inVCF = inVCF; this.tranchesMD5 = tranchesMD5; this.recalMD5 = recalMD5; this.cutVCFMD5 = cutVCFMD5; @@ -25,7 +25,7 @@ public class VariantRecalibrationWalkersIntegrationTest extends WalkerTest { } } - VRTest lowPass = new VRTest("phase1.projectConsensus.chr20.raw.snps.vcf", + VRTest lowPass = new VRTest(validationDataLocation + "phase1.projectConsensus.chr20.raw.snps.vcf", "f360ce3eb2b0b887301be917a9843e2b", // tranches "287fea5ea066bf3fdd71f5ce9b58eab3", // recal file "356b9570817b9389da71fbe991d8b2f5"); // cut VCF @@ -74,14 +74,65 @@ public class VariantRecalibrationWalkersIntegrationTest extends WalkerTest { executeTest("testApplyRecalibration-"+params.inVCF, spec); } + VRTest bcfTest = new VRTest(privateTestDir + "vqsr.bcf_test.snps.unfiltered.bcf", + "a8ce3cd3dccafdf7d580bcce7d660a9a", // tranches + "1cdf8c9ee77d91d1ba7f002573108bad", // recal file + "62fda105e14b619a1c263855cf56af1d"); // cut VCF + + @DataProvider(name = "VRBCFTest") + public Object[][] createVRBCFTest() { + return new Object[][]{ {bcfTest} }; + //return new Object[][]{ {yriTrio}, {lowPass} }; // Add hg19 chr20 trio calls here + } + + @Test(dataProvider = "VRBCFTest") + public void testVariantRecalibratorWithBCF(VRTest params) { + //System.out.printf("PARAMS FOR %s is %s%n", vcf, clusterFile); + WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec( + "-R " + b37KGReference + + " -resource:known=true,prior=10.0 " + GATKDataLocation + "dbsnp_132_b37.leftAligned.vcf" + + " -resource:truth=true,training=true,prior=15.0 " + comparisonDataLocation + "Validated/HapMap/3.3/sites_r27_nr.b37_fwd.vcf" + + " -resource:training=true,truth=true,prior=12.0 " + comparisonDataLocation + "Validated/Omni2.5_chip/Omni25_sites_1525_samples.b37.vcf" + + " -T VariantRecalibrator" + + " -input " + params.inVCF + + " -L 20:10,000,000-20,000,000" + + " --no_cmdline_in_header" + + " -an AC " + // integer value + " -an QD -an ReadPosRankSum -an FS -an InbreedingCoeff " + // floats value + " -mG 2 "+ + " -recalFile %s" + + " -tranchesFile %s", + 2, + Arrays.asList("bcf", "txt"), + Arrays.asList(params.recalMD5, params.tranchesMD5)); + executeTest("testVariantRecalibrator-"+params.inVCF, spec).getFirst(); + } + + @Test(dataProvider = "VRBCFTest", dependsOnMethods="testVariantRecalibratorWithBCF") + public void testApplyRecalibrationWithBCF(VRTest params) { + WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec( + "-R " + b37KGReference + + " -T ApplyRecalibration" + + " -L 20:10,000,000-20,000,000" + + " --no_cmdline_in_header" + + " -input " + params.inVCF + + " -U LENIENT_VCF_PROCESSING -o %s" + + " -tranchesFile " + getMd5DB().getMD5FilePath(params.tranchesMD5, null) + + " -recalFile " + getMd5DB().getMD5FilePath(params.recalMD5, null), + Arrays.asList(params.cutVCFMD5)); + spec.disableShadowBCF(); + executeTest("testApplyRecalibration-"+params.inVCF, spec); + } + + VRTest indelUnfiltered = new VRTest( - "combined.phase1.chr20.raw.indels.unfiltered.sites.vcf", // all FILTERs as . + validationDataLocation + "combined.phase1.chr20.raw.indels.unfiltered.sites.vcf", // all FILTERs as . "b7589cd098dc153ec64c02dcff2838e4", // tranches "a04a9001f62eff43d363f4d63769f3ee", // recal file "64f576881e21323dd4078262604717a2"); // cut VCF VRTest indelFiltered = new VRTest( - "combined.phase1.chr20.raw.indels.filtered.sites.vcf", // all FILTERs as PASS + validationDataLocation + "combined.phase1.chr20.raw.indels.filtered.sites.vcf", // all FILTERs as PASS "b7589cd098dc153ec64c02dcff2838e4", // tranches "a04a9001f62eff43d363f4d63769f3ee", // recal file "af22c55d91394c56a222fd40d6d54781"); // cut VCF From a7811d673f3abf1ef73ff7e6c4aadbd58d0221c9 Mon Sep 17 00:00:00 2001 From: David Roazen Date: Wed, 8 Aug 2012 09:29:54 -0400 Subject: [PATCH 6/6] Update URL for phone home / GATK key documentation output by the GATK upon error --- .../sting/gatk/CommandLineExecutable.java | 4 ++-- .../gatk/arguments/GATKArgumentCollection.java | 4 ++-- .../sting/gatk/phonehome/GATKRunReport.java | 1 + .../sting/utils/exceptions/UserException.java | 14 +++++++------- 4 files changed, 12 insertions(+), 11 deletions(-) diff --git a/public/java/src/org/broadinstitute/sting/gatk/CommandLineExecutable.java b/public/java/src/org/broadinstitute/sting/gatk/CommandLineExecutable.java index c6bb4a27c..0286cdc52 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/CommandLineExecutable.java +++ b/public/java/src/org/broadinstitute/sting/gatk/CommandLineExecutable.java @@ -130,8 +130,8 @@ public abstract class CommandLineExecutable extends CommandLineProgram { getArgumentCollection().phoneHomeType == GATKRunReport.PhoneHomeOption.STDOUT ) { if ( getArgumentCollection().gatkKeyFile == null ) { throw new UserException("Running with the -et NO_ET or -et STDOUT option requires a GATK Key file. " + - "Please see http://www.broadinstitute.org/gsa/wiki/index.php/Phone_home " + - "for more information and instructions on how to obtain a key."); + "Please see " + GATKRunReport.PHONE_HOME_DOCS_URL + + " for more information and instructions on how to obtain a key."); } else { PublicKey gatkPublicKey = CryptUtils.loadGATKDistributedPublicKey(); diff --git a/public/java/src/org/broadinstitute/sting/gatk/arguments/GATKArgumentCollection.java b/public/java/src/org/broadinstitute/sting/gatk/arguments/GATKArgumentCollection.java index 972116952..1e6920b82 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/arguments/GATKArgumentCollection.java +++ b/public/java/src/org/broadinstitute/sting/gatk/arguments/GATKArgumentCollection.java @@ -66,10 +66,10 @@ public class GATKArgumentCollection { @Argument(fullName = "read_buffer_size", shortName = "rbs", doc="Number of reads per SAM file to buffer in memory", required = false) public Integer readBufferSize = null; - @Argument(fullName = "phone_home", shortName = "et", doc="What kind of GATK run report should we generate? STANDARD is the default, can be NO_ET so nothing is posted to the run repository. Please see http://www.broadinstitute.org/gsa/wiki/index.php/Phone_home for details.", required = false) + @Argument(fullName = "phone_home", shortName = "et", doc="What kind of GATK run report should we generate? STANDARD is the default, can be NO_ET so nothing is posted to the run repository. Please see " + GATKRunReport.PHONE_HOME_DOCS_URL + " for details.", required = false) public GATKRunReport.PhoneHomeOption phoneHomeType = GATKRunReport.PhoneHomeOption.STANDARD; - @Argument(fullName = "gatk_key", shortName = "K", doc="GATK Key file. Required if running with -et NO_ET. Please see http://www.broadinstitute.org/gsa/wiki/index.php/Phone_home for details.", required = false) + @Argument(fullName = "gatk_key", shortName = "K", doc="GATK Key file. Required if running with -et NO_ET. Please see " + GATKRunReport.PHONE_HOME_DOCS_URL + " for details.", required = false) public File gatkKeyFile = null; @Argument(fullName = "read_filter", shortName = "rf", doc = "Specify filtration criteria to apply to each read individually", required = false) diff --git a/public/java/src/org/broadinstitute/sting/gatk/phonehome/GATKRunReport.java b/public/java/src/org/broadinstitute/sting/gatk/phonehome/GATKRunReport.java index a42c73212..4cf5046a2 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/phonehome/GATKRunReport.java +++ b/public/java/src/org/broadinstitute/sting/gatk/phonehome/GATKRunReport.java @@ -88,6 +88,7 @@ public class GATKRunReport { // number of milliseconds before the S3 put operation is timed-out: private static final long S3PutTimeOut = 10 * 1000; + public static final String PHONE_HOME_DOCS_URL = "http://gatkforums.broadinstitute.org/discussion/1250/what-is-phone-home-and-how-does-it-affect-me#latest"; /** * our log diff --git a/public/java/src/org/broadinstitute/sting/utils/exceptions/UserException.java b/public/java/src/org/broadinstitute/sting/utils/exceptions/UserException.java index 52da31362..bda03f675 100755 --- a/public/java/src/org/broadinstitute/sting/utils/exceptions/UserException.java +++ b/public/java/src/org/broadinstitute/sting/utils/exceptions/UserException.java @@ -27,6 +27,7 @@ package org.broadinstitute.sting.utils.exceptions; import net.sf.samtools.SAMFileHeader; import net.sf.samtools.SAMRecord; import net.sf.samtools.SAMSequenceDictionary; +import org.broadinstitute.sting.gatk.phonehome.GATKRunReport; import org.broadinstitute.sting.utils.GenomeLoc; import org.broadinstitute.sting.utils.help.DocumentedGATKFeature; import org.broadinstitute.sting.utils.sam.ReadUtils; @@ -351,8 +352,8 @@ public class UserException extends ReviewedStingException { public static class UnreadableKeyException extends UserException { public UnreadableKeyException ( File f, Exception e ) { super(String.format("Key file %s cannot be read (possibly the key file is corrupt?). Error was: %s. " + - "Please see http://www.broadinstitute.org/gsa/wiki/index.php/Phone_home for help.", - f.getAbsolutePath(), getMessage(e))); + "Please see %s for help.", + f.getAbsolutePath(), getMessage(e), GATKRunReport.PHONE_HOME_DOCS_URL)); } public UnreadableKeyException ( String message, Exception e ) { @@ -361,8 +362,8 @@ public class UserException extends ReviewedStingException { public UnreadableKeyException ( String message ) { super(String.format("Key file cannot be read (possibly the key file is corrupt?): %s. " + - "Please see http://www.broadinstitute.org/gsa/wiki/index.php/Phone_home for help.", - message)); + "Please see %s for help.", + message, GATKRunReport.PHONE_HOME_DOCS_URL)); } } @@ -370,9 +371,8 @@ public class UserException extends ReviewedStingException { public KeySignatureVerificationException ( File f ) { super(String.format("The signature in key file %s failed cryptographic verification. " + "If this key was valid in the past, it's likely been revoked. " + - "Please see http://www.broadinstitute.org/gsa/wiki/index.php/Phone_home " + - "for help.", - f.getAbsolutePath())); + "Please see %s for help.", + f.getAbsolutePath(), GATKRunReport.PHONE_HOME_DOCS_URL)); } } }