diff --git a/protected/java/src/org/broadinstitute/sting/gatk/downsampling/AlleleBiasedDownsamplingUtils.java b/protected/java/src/org/broadinstitute/sting/gatk/downsampling/AlleleBiasedDownsamplingUtils.java index e224a9c57..59357e1c4 100755 --- a/protected/java/src/org/broadinstitute/sting/gatk/downsampling/AlleleBiasedDownsamplingUtils.java +++ b/protected/java/src/org/broadinstitute/sting/gatk/downsampling/AlleleBiasedDownsamplingUtils.java @@ -37,12 +37,6 @@ import java.util.*; public class AlleleBiasedDownsamplingUtils { - private static final ArrayList[] alleleStratifiedElements = new ArrayList[4]; - static { - for ( int i = 0; i < 4; i++ ) - alleleStratifiedElements[i] = new ArrayList(); - } - /** * Computes an allele biased version of the given pileup * @@ -51,15 +45,23 @@ public class AlleleBiasedDownsamplingUtils { * @param log logging output * @return allele biased pileup */ - public synchronized static ReadBackedPileup createAlleleBiasedBasePileup(final ReadBackedPileup pileup, final double downsamplingFraction, final PrintStream log) { + public static ReadBackedPileup createAlleleBiasedBasePileup(final ReadBackedPileup pileup, final double downsamplingFraction, final PrintStream log) { // special case removal of all or no reads if ( downsamplingFraction <= 0.0 ) return pileup; if ( downsamplingFraction >= 1.0 ) return new ReadBackedPileupImpl(pileup.getLocation(), new ArrayList()); + final ArrayList[] alleleStratifiedElements = new ArrayList[4]; + for ( int i = 0; i < 4; i++ ) + alleleStratifiedElements[i] = new ArrayList(); + // start by stratifying the reads by the alleles they represent at this position for( final PileupElement pe : pileup ) { + // abort if we have a reduced read - we do not want to remove it! + if ( pe.getRead().isReducedRead() ) + return pileup; + final int baseIndex = BaseUtils.simpleBaseToBaseIndex(pe.getBase()); if ( baseIndex != -1 ) alleleStratifiedElements[baseIndex].add(pe); diff --git a/protected/java/src/org/broadinstitute/sting/gatk/walkers/bqsr/AdvancedRecalibrationEngine.java b/protected/java/src/org/broadinstitute/sting/gatk/walkers/bqsr/AdvancedRecalibrationEngine.java index be8357679..d0bcd0eb3 100644 --- a/protected/java/src/org/broadinstitute/sting/gatk/walkers/bqsr/AdvancedRecalibrationEngine.java +++ b/protected/java/src/org/broadinstitute/sting/gatk/walkers/bqsr/AdvancedRecalibrationEngine.java @@ -39,57 +39,12 @@ public class AdvancedRecalibrationEngine extends StandardRecalibrationEngine imp // optimization: only allocate temp arrays once per thread private final ThreadLocal threadLocalTempQualArray = new ThreadLocalArray(EventType.values().length, byte.class); - private final ThreadLocal threadLocalTempErrorArray = new ThreadLocalArray(EventType.values().length, boolean.class); private final ThreadLocal threadLocalTempFractionalErrorArray = new ThreadLocalArray(EventType.values().length, double.class); public void initialize(final Covariate[] covariates, final RecalibrationTables recalibrationTables) { super.initialize(covariates, recalibrationTables); } - /** - * Loop through the list of requested covariates and pick out the value from the read, offset, and reference - * Using the list of covariate values as a key, pick out the RecalDatum and increment, - * adding one to the number of observations and potentially one to the number of mismatches for all three - * categories (mismatches, insertions and deletions). - * - * @param pileupElement The pileup element to update - * @param refBase The reference base at this locus - */ - @Override - public void updateDataForPileupElement(final PileupElement pileupElement, final byte refBase) { - final int offset = pileupElement.getOffset(); - final ReadCovariates readCovariates = covariateKeySetFrom(pileupElement.getRead()); - - byte[] tempQualArray = threadLocalTempQualArray.get(); - boolean[] tempErrorArray = threadLocalTempErrorArray.get(); - - tempQualArray[EventType.BASE_SUBSTITUTION.index] = pileupElement.getQual(); - tempErrorArray[EventType.BASE_SUBSTITUTION.index] = !BaseUtils.basesAreEqual(pileupElement.getBase(), refBase); - tempQualArray[EventType.BASE_INSERTION.index] = pileupElement.getBaseInsertionQual(); - tempErrorArray[EventType.BASE_INSERTION.index] = (pileupElement.getRead().getReadNegativeStrandFlag()) ? pileupElement.isAfterInsertion() : pileupElement.isBeforeInsertion(); - tempQualArray[EventType.BASE_DELETION.index] = pileupElement.getBaseDeletionQual(); - tempErrorArray[EventType.BASE_DELETION.index] = (pileupElement.getRead().getReadNegativeStrandFlag()) ? pileupElement.isAfterDeletedBase() : pileupElement.isBeforeDeletedBase(); - - for (final EventType eventType : EventType.values()) { - final int[] keys = readCovariates.getKeySet(offset, eventType); - final int eventIndex = eventType.index; - final byte qual = tempQualArray[eventIndex]; - final boolean isError = tempErrorArray[eventIndex]; - - // TODO: should this really be combine rather than increment? - combineDatumOrPutIfNecessary(recalibrationTables.getReadGroupTable(), qual, isError, keys[0], eventIndex); - - incrementDatumOrPutIfNecessary(recalibrationTables.getQualityScoreTable(), qual, isError, keys[0], keys[1], eventIndex); - - for (int i = 2; i < covariates.length; i++) { - if (keys[i] < 0) - continue; - - incrementDatumOrPutIfNecessary(recalibrationTables.getTable(i), qual, isError, keys[0], keys[1], keys[i], eventIndex); - } - } - } - @Override public void updateDataForRead(final GATKSAMRecord read, final boolean[] skip, final double[] snpErrors, final double[] insertionErrors, final double[] deletionErrors ) { for( int offset = 0; offset < read.getReadBases().length; offset++ ) { diff --git a/protected/java/src/org/broadinstitute/sting/utils/genotyper/AdvancedPerReadAlleleLikelihoodMap.java b/protected/java/src/org/broadinstitute/sting/utils/genotyper/AdvancedPerReadAlleleLikelihoodMap.java index d6cddcf84..77a7c3bd9 100644 --- a/protected/java/src/org/broadinstitute/sting/utils/genotyper/AdvancedPerReadAlleleLikelihoodMap.java +++ b/protected/java/src/org/broadinstitute/sting/utils/genotyper/AdvancedPerReadAlleleLikelihoodMap.java @@ -55,9 +55,12 @@ public class AdvancedPerReadAlleleLikelihoodMap extends StandardPerReadAlleleLik alleleReadMap.put(allele, new ArrayList()); for ( Map.Entry> entry : likelihoodReadMap.entrySet() ) { - final Allele bestAllele = getMostLikelyAllele(entry.getValue()); - if ( bestAllele != Allele.NO_CALL ) - alleleReadMap.get(bestAllele).add(entry.getKey()); + // do not remove reduced reads! + if ( !entry.getKey().isReducedRead() ) { + final Allele bestAllele = getMostLikelyAllele(entry.getValue()); + if ( bestAllele != Allele.NO_CALL ) + alleleReadMap.get(bestAllele).add(entry.getKey()); + } } // compute the reads to remove and actually remove them diff --git a/protected/java/test/org/broadinstitute/sting/gatk/walkers/bqsr/BQSRIntegrationTest.java b/protected/java/test/org/broadinstitute/sting/gatk/walkers/bqsr/BQSRIntegrationTest.java index a50219f48..b839382dc 100644 --- a/protected/java/test/org/broadinstitute/sting/gatk/walkers/bqsr/BQSRIntegrationTest.java +++ b/protected/java/test/org/broadinstitute/sting/gatk/walkers/bqsr/BQSRIntegrationTest.java @@ -51,21 +51,21 @@ public class BQSRIntegrationTest extends WalkerTest { String HiSeqBam = privateTestDir + "HiSeq.1mb.1RG.bam"; String HiSeqInterval = "chr1:10,000,000-10,100,000"; return new Object[][]{ - {new BQSRTest(hg18Reference, HiSeqBam, HiSeqInterval, "", "55a46d8f5d2f9acfa2d7659e18b6df43")}, - {new BQSRTest(hg18Reference, HiSeqBam, HiSeqInterval, " --no_standard_covs -cov ContextCovariate", "8e930f56a8905a5999af7d6ba8a92f91")}, - {new BQSRTest(hg18Reference, HiSeqBam, HiSeqInterval, " --no_standard_covs -cov CycleCovariate", "8e87bee4bd6531b405082c4da785f1f5")}, - {new BQSRTest(hg18Reference, HiSeqBam, HiSeqInterval, " --indels_context_size 4", "b309a5f57b861d7f31cb76cdac4ff8a7")}, - {new BQSRTest(hg18Reference, HiSeqBam, HiSeqInterval, " --low_quality_tail 5", "4c75d47ed2cf93b499be8fbb29b24dfd")}, - {new BQSRTest(hg18Reference, HiSeqBam, HiSeqInterval, " --quantizing_levels 6", "43b06e5568a89e4ce1dd9146ce580c89")}, - {new BQSRTest(hg18Reference, HiSeqBam, HiSeqInterval, " --mismatches_context_size 4", "25f4f48dba27475b0cd7c06ef0239aba")}, - {new BQSRTest(b36KGReference, validationDataLocation + "NA12892.SLX.SRP000031.2009_06.selected.1Mb.1RG.bam", "1:10,000,000-10,200,000", "", "dfcba9acc32b4a1dfeceea135b48615a")}, - {new BQSRTest(b36KGReference, validationDataLocation + "NA19240.chr1.BFAST.SOLID.bam", "1:10,000,000-10,200,000", "", "e8077b721f2e6f51c1945b6f6236835c")}, - {new BQSRTest(b36KGReference, validationDataLocation + "NA12873.454.SRP000031.2009_06.chr1.10_20mb.1RG.bam", "1:10,000,000-10,200,000", "", "fbdc8d0fd312e3a7f49063c580cf5d92")}, - {new BQSRTest(b36KGReference, validationDataLocation + "originalQuals.1kg.chr1.1-1K.1RG.bam", "1:1-1,000", " -OQ", "4f47415628201a4f3c33e48ec066677b")}, - {new BQSRTest(b36KGReference, validationDataLocation + "NA19240.chr1.BFAST.SOLID.bam", "1:10,000,000-20,000,000", " --solid_recal_mode REMOVE_REF_BIAS", "1e89d2b88f4218363b9322b38e9536f2")}, - {new BQSRTest(b36KGReference, privateTestDir + "NA19240.chr1.BFAST.SOLID.hasCSNoCall.bam", "1:50,000-80,000", " --solid_nocall_strategy LEAVE_READ_UNRECALIBRATED", "a7beb0b16756257a274eecf73474ed90")}, - {new BQSRTest(b36KGReference, validationDataLocation + "NA12892.SLX.SRP000031.2009_06.selected.1Mb.1RG.bam", "1:10,000,000-10,200,000", " -knownSites:anyNameABCD,VCF " + privateTestDir + "vcfexample3.vcf", "dfcba9acc32b4a1dfeceea135b48615a")}, - {new BQSRTest(b36KGReference, validationDataLocation + "NA12892.SLX.SRP000031.2009_06.selected.1Mb.1RG.bam", "1:10,000,000-10,200,000", " -knownSites:bed " + validationDataLocation + "bqsrKnownTest.bed", "2082c70e08f1c14290c3812021832f83")}, + {new BQSRTest(hg18Reference, HiSeqBam, HiSeqInterval, "", "387b41dc2221a1a4a782958944662b25")}, + {new BQSRTest(hg18Reference, HiSeqBam, HiSeqInterval, " --no_standard_covs -cov ContextCovariate", "b5e26902e76abbd59f94f65c70d18165")}, + {new BQSRTest(hg18Reference, HiSeqBam, HiSeqInterval, " --no_standard_covs -cov CycleCovariate", "a8a9c3f83269911cb61c5fe8fb98dc4a")}, + {new BQSRTest(hg18Reference, HiSeqBam, HiSeqInterval, " --indels_context_size 4", "f43a0473101c63ae93444c300d843e81")}, + {new BQSRTest(hg18Reference, HiSeqBam, HiSeqInterval, " --low_quality_tail 5", "9e05e63339d4716584bfc717cab6bd0f")}, + {new BQSRTest(hg18Reference, HiSeqBam, HiSeqInterval, " --quantizing_levels 6", "1cf9b9c9c64617dc0f3d2f203f918dbe")}, + {new BQSRTest(hg18Reference, HiSeqBam, HiSeqInterval, " --mismatches_context_size 4", "aa1949a77bc3066fee551a217c970c0d")}, + {new BQSRTest(b36KGReference, validationDataLocation + "NA12892.SLX.SRP000031.2009_06.selected.1Mb.1RG.bam", "1:10,000,000-10,200,000", "", "f70d8b5358bc2f76696f14b7a807ede0")}, + {new BQSRTest(b36KGReference, validationDataLocation + "NA19240.chr1.BFAST.SOLID.bam", "1:10,000,000-10,200,000", "", "4c0f63e06830681560a1e9f9aad9fe98")}, + {new BQSRTest(b36KGReference, validationDataLocation + "NA12873.454.SRP000031.2009_06.chr1.10_20mb.1RG.bam", "1:10,000,000-10,200,000", "", "be2812cd3dae3c326cf35ae3f1c8ad9e")}, + {new BQSRTest(b36KGReference, validationDataLocation + "originalQuals.1kg.chr1.1-1K.1RG.bam", "1:1-1,000", " -OQ", "03c29a0c1d21f72b12daf51cec111599")}, + {new BQSRTest(b36KGReference, validationDataLocation + "NA19240.chr1.BFAST.SOLID.bam", "1:10,000,000-20,000,000", " --solid_recal_mode REMOVE_REF_BIAS", "7080b2cad02ec6e67ebc766b2dccebf8")}, + {new BQSRTest(b36KGReference, privateTestDir + "NA19240.chr1.BFAST.SOLID.hasCSNoCall.bam", "1:50,000-80,000", " --solid_nocall_strategy LEAVE_READ_UNRECALIBRATED", "30e76055c16843b6e33e5b9bd8ced57c")}, + {new BQSRTest(b36KGReference, validationDataLocation + "NA12892.SLX.SRP000031.2009_06.selected.1Mb.1RG.bam", "1:10,000,000-10,200,000", " -knownSites:anyNameABCD,VCF " + privateTestDir + "vcfexample3.vcf", "f70d8b5358bc2f76696f14b7a807ede0")}, + {new BQSRTest(b36KGReference, validationDataLocation + "NA12892.SLX.SRP000031.2009_06.selected.1Mb.1RG.bam", "1:10,000,000-10,200,000", " -knownSites:bed " + validationDataLocation + "bqsrKnownTest.bed", "5e657fd6a44dcdc7674b6e5a2de5dc83")}, }; } @@ -122,12 +122,12 @@ public class BQSRIntegrationTest extends WalkerTest { public Object[][] createPRTestData() { List tests = new ArrayList(); - tests.add(new Object[]{1, new PRTest(" -qq -1", "a1d87da5dcbde35170d6ba6bc3ee2812")}); - tests.add(new Object[]{1, new PRTest(" -qq 6", "a0fecae6d0e5ab9917862fa306186d10")}); + tests.add(new Object[]{1, new PRTest(" -qq -1", "5226c06237b213b9e9b25a32ed92d09a")}); + tests.add(new Object[]{1, new PRTest(" -qq 6", "b592a5c62b952a012e18adb898ea9c33")}); tests.add(new Object[]{1, new PRTest(" -DIQ", "8977bea0c57b808e65e9505eb648cdf7")}); for ( final int nct : Arrays.asList(1, 2, 4) ) { - tests.add(new Object[]{nct, new PRTest("", "d1bbb4ce6aa93e866f106f8b11d888ed")}); + tests.add(new Object[]{nct, new PRTest("", "ab2f209ab98ad3432e208cbd524a4c4a")}); } return tests.toArray(new Object[][]{}); diff --git a/protected/java/test/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyperGeneralPloidyIntegrationTest.java b/protected/java/test/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyperGeneralPloidyIntegrationTest.java index ae3befe66..73bc8fba6 100644 --- a/protected/java/test/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyperGeneralPloidyIntegrationTest.java +++ b/protected/java/test/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyperGeneralPloidyIntegrationTest.java @@ -55,36 +55,36 @@ public class UnifiedGenotyperGeneralPloidyIntegrationTest extends WalkerTest { @Test(enabled = true) public void testSNP_ACS_Pools() { - PC_LSV_Test_short(" -maxAltAlleles 1 -ploidy 6 -out_mode EMIT_ALL_CONFIDENT_SITES","LSV_SNP_ACS","SNP","ec19f0b7c7d57493cecfff988a4815c8"); + PC_LSV_Test_short(" -maxAltAlleles 1 -ploidy 6 -out_mode EMIT_ALL_CONFIDENT_SITES","LSV_SNP_ACS","SNP","df0e67c975ef74d593f1c704daab1705"); } @Test(enabled = true) public void testBOTH_GGA_Pools() { - PC_LSV_Test(String.format(" -maxAltAlleles 2 -ploidy 24 -gt_mode GENOTYPE_GIVEN_ALLELES -out_mode EMIT_ALL_SITES -alleles %s",LSV_ALLELES),"LSV_BOTH_GGA","BOTH","9ce24f4ff787aed9d3754519a60ef49f"); + PC_LSV_Test(String.format(" -maxAltAlleles 2 -ploidy 24 -gt_mode GENOTYPE_GIVEN_ALLELES -out_mode EMIT_ALL_SITES -alleles %s",LSV_ALLELES),"LSV_BOTH_GGA","BOTH","7e5b28c9e21cc7e45c58c41177d8a0fc"); } @Test(enabled = true) public void testINDEL_GGA_Pools() { - PC_LSV_Test(String.format(" -maxAltAlleles 1 -ploidy 24 -gt_mode GENOTYPE_GIVEN_ALLELES -out_mode EMIT_ALL_SITES -alleles %s",LSV_ALLELES),"LSV_INDEL_GGA","INDEL","492c8ba9a80a902097ff15bbeb031592"); + PC_LSV_Test(String.format(" -maxAltAlleles 1 -ploidy 24 -gt_mode GENOTYPE_GIVEN_ALLELES -out_mode EMIT_ALL_SITES -alleles %s",LSV_ALLELES),"LSV_INDEL_GGA","INDEL","ae6c276cc46785a794acff6f7d10ecf7"); } @Test(enabled = true) public void testINDEL_maxAltAlleles2_ploidy3_Pools_noRef() { - PC_LSV_Test_NoRef(" -maxAltAlleles 2 -ploidy 3","LSV_INDEL_DISC_NOREF_p3","INDEL","848e1092b5cd57b0da5f1187e67134e7"); + PC_LSV_Test_NoRef(" -maxAltAlleles 2 -ploidy 3","LSV_INDEL_DISC_NOREF_p3","INDEL","481452ad7d6378cffb5cd834cc621d55"); } @Test(enabled = true) public void testINDEL_maxAltAlleles2_ploidy1_Pools_noRef() { - PC_LSV_Test_NoRef(" -maxAltAlleles 2 -ploidy 1","LSV_INDEL_DISC_NOREF_p1","INDEL","51a7b51d82a341adec0e6510f5dfadd8"); + PC_LSV_Test_NoRef(" -maxAltAlleles 2 -ploidy 1","LSV_INDEL_DISC_NOREF_p1","INDEL","812957e51277aca9925c1a7bb4d9a118"); } @Test(enabled = true) public void testMT_SNP_DISCOVERY_sp4() { - PC_MT_Test(CEUTRIO_BAM, " -maxAltAlleles 1 -ploidy 8", "MT_SNP_DISCOVERY_sp4","0a8c3b06243040b743dd90d497bb3f83"); + PC_MT_Test(CEUTRIO_BAM, " -maxAltAlleles 1 -ploidy 8", "MT_SNP_DISCOVERY_sp4","dd568dc30be90135a3a8957a45a7321c"); } @Test(enabled = true) public void testMT_SNP_GGA_sp10() { - PC_MT_Test(CEUTRIO_BAM, String.format(" -maxAltAlleles 1 -ploidy 20 -gt_mode GENOTYPE_GIVEN_ALLELES -out_mode EMIT_ALL_SITES -alleles %s",NA12891_CALLS), "MT_SNP_GGA_sp10", "f8ea18ec6a717a77fdf8c5f2482d8d8d"); + PC_MT_Test(CEUTRIO_BAM, String.format(" -maxAltAlleles 1 -ploidy 20 -gt_mode GENOTYPE_GIVEN_ALLELES -out_mode EMIT_ALL_SITES -alleles %s",NA12891_CALLS), "MT_SNP_GGA_sp10", "bf793c43b635a931207170be8035b288"); } } diff --git a/protected/java/test/org/broadinstitute/sting/gatk/walkers/haplotypecaller/HaplotypeCallerIntegrationTest.java b/protected/java/test/org/broadinstitute/sting/gatk/walkers/haplotypecaller/HaplotypeCallerIntegrationTest.java index 71f03ea00..86f3748ce 100644 --- a/protected/java/test/org/broadinstitute/sting/gatk/walkers/haplotypecaller/HaplotypeCallerIntegrationTest.java +++ b/protected/java/test/org/broadinstitute/sting/gatk/walkers/haplotypecaller/HaplotypeCallerIntegrationTest.java @@ -21,17 +21,17 @@ public class HaplotypeCallerIntegrationTest extends WalkerTest { @Test public void testHaplotypeCallerMultiSample() { - HCTest(CEUTRIO_BAM, "", "1d826f852ec1457efbfa7ee828c5783a"); + HCTest(CEUTRIO_BAM, "", "aa1df35d6e64d7ca93feb4d2dd15dd0e"); } @Test public void testHaplotypeCallerSingleSample() { - HCTest(NA12878_BAM, "", "01367428c26d3eaf9297c58bf8677dd3"); + HCTest(NA12878_BAM, "", "186c7f322978283c01249c6de2829215"); } @Test public void testHaplotypeCallerMultiSampleGGA() { - HCTest(CEUTRIO_BAM, "--max_alternate_alleles 3 -gt_mode GENOTYPE_GIVEN_ALLELES -alleles " + validationDataLocation + "combined.phase1.chr20.raw.indels.sites.vcf", "53caa950535749f99d3c5b9bb61c7b60"); + HCTest(CEUTRIO_BAM, "--max_alternate_alleles 3 -gt_mode GENOTYPE_GIVEN_ALLELES -alleles " + validationDataLocation + "combined.phase1.chr20.raw.indels.sites.vcf", "de9e78a52207fe62144dba5337965469"); } private void HCTestComplexVariants(String bam, String args, String md5) { @@ -42,7 +42,7 @@ public class HaplotypeCallerIntegrationTest extends WalkerTest { @Test public void testHaplotypeCallerMultiSampleComplex() { - HCTestComplexVariants(privateTestDir + "AFR.complex.variants.bam", "", "966d338f423c86a390d685aa6336ec69"); + HCTestComplexVariants(privateTestDir + "AFR.complex.variants.bam", "", "000dbb1b48f94d017cfec127c6cabe8f"); } private void HCTestSymbolicVariants(String bam, String args, String md5) { @@ -53,7 +53,7 @@ public class HaplotypeCallerIntegrationTest extends WalkerTest { @Test public void testHaplotypeCallerSingleSampleSymbolic() { - HCTestSymbolicVariants(NA12878_CHR20_BAM, "", "b4ea70a446e4782bd3700ca14dd726ff"); + HCTestSymbolicVariants(NA12878_CHR20_BAM, "", "16013a9203367c3d1c4ce1dcdc81ef4a"); } private void HCTestIndelQualityScores(String bam, String args, String md5) { @@ -64,20 +64,20 @@ public class HaplotypeCallerIntegrationTest extends WalkerTest { @Test public void testHaplotypeCallerSingleSampleIndelQualityScores() { - HCTestIndelQualityScores(NA12878_RECALIBRATED_BAM, "", "2581e760279291a3901a506d060bfac8"); + HCTestIndelQualityScores(NA12878_RECALIBRATED_BAM, "", "b369c2a6cb5c99a424551b33bae16f3b"); } @Test public void HCTestProblematicReadsModifiedInActiveRegions() { final String base = String.format("-T HaplotypeCaller -R %s -I %s", REF, privateTestDir + "haplotype-problem-4.bam") + " --no_cmdline_in_header -o %s -minPruning 3"; - final WalkerTestSpec spec = new WalkerTestSpec(base, Arrays.asList("c54c0c9411054bf629bfd98b616e53fc")); + final WalkerTestSpec spec = new WalkerTestSpec(base, Arrays.asList("c306140ad28515ee06c603c225217939")); executeTest("HCTestProblematicReadsModifiedInActiveRegions: ", spec); } @Test public void HCTestStructuralIndels() { final String base = String.format("-T HaplotypeCaller -R %s -I %s", REF, privateTestDir + "AFR.structural.indels.bam") + " --no_cmdline_in_header -o %s -minPruning 6 -L 20:8187565-8187800 -L 20:18670537-18670730"; - final WalkerTestSpec spec = new WalkerTestSpec(base, Arrays.asList("c642dcd93771f6f084d55de31f180d1b")); + final WalkerTestSpec spec = new WalkerTestSpec(base, Arrays.asList("b6c67ee8e99cc8f53a6587bb26028047")); executeTest("HCTestStructuralIndels: ", spec); } @@ -91,7 +91,7 @@ public class HaplotypeCallerIntegrationTest extends WalkerTest { public void HCTestReducedBam() { WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec( "-T HaplotypeCaller -R " + b37KGReference + " --no_cmdline_in_header -I " + privateTestDir + "bamExample.ReducedRead.ADAnnotation.bam -o %s -L 1:67,225,396-67,288,518", 1, - Arrays.asList("79af83432dc4a1768b3ebffffc4d2b8f")); + Arrays.asList("4beb9f87ab3f316a9384c3d0dca6ebe9")); executeTest("HC calling on a ReducedRead BAM", spec); } } diff --git a/public/java/src/org/broadinstitute/sting/commandline/ArgumentTypeDescriptor.java b/public/java/src/org/broadinstitute/sting/commandline/ArgumentTypeDescriptor.java index 54ade61f6..5d7eba16c 100644 --- a/public/java/src/org/broadinstitute/sting/commandline/ArgumentTypeDescriptor.java +++ b/public/java/src/org/broadinstitute/sting/commandline/ArgumentTypeDescriptor.java @@ -406,7 +406,7 @@ public abstract class ArgumentTypeDescriptor { else throw new UserException.CommandLineException( String.format("Failed to parse value %s for argument %s. Message: %s", - value.asString(), fieldName, e.getMessage())); + value, fieldName, e.getMessage())); } } } diff --git a/public/java/src/org/broadinstitute/sting/gatk/GenomeAnalysisEngine.java b/public/java/src/org/broadinstitute/sting/gatk/GenomeAnalysisEngine.java index 67e5ad95b..b7000e0ee 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/GenomeAnalysisEngine.java +++ b/public/java/src/org/broadinstitute/sting/gatk/GenomeAnalysisEngine.java @@ -64,6 +64,7 @@ import org.broadinstitute.sting.utils.threading.ThreadEfficiencyMonitor; import java.io.File; import java.util.*; +import java.util.concurrent.TimeUnit; /** * A GenomeAnalysisEngine that runs a specified walker. @@ -73,6 +74,7 @@ public class GenomeAnalysisEngine { * our log, which we want to capture anything from this class */ private static Logger logger = Logger.getLogger(GenomeAnalysisEngine.class); + public static final long NO_RUNTIME_LIMIT = -1; /** * The GATK command-line argument parsing code. @@ -1090,6 +1092,33 @@ public class GenomeAnalysisEngine { public String createApproximateCommandLineArgumentString(Object... argumentProviders) { return CommandLineUtils.createApproximateCommandLineArgumentString(parsingEngine,argumentProviders); } - + /** + * Does the current runtime in unit exceed the runtime limit, if one has been provided? + * + * @param runtime the runtime of this GATK instance in minutes + * @param unit the time unit of runtime + * @return false if not limit was requested or if runtime <= the limit, true otherwise + */ + public boolean exceedsRuntimeLimit(final long runtime, final TimeUnit unit) { + if ( runtime < 0 ) throw new IllegalArgumentException("runtime must be >= 0 but got " + runtime); + + if ( getArguments().maxRuntime == NO_RUNTIME_LIMIT ) + return false; + else { + final long actualRuntimeNano = TimeUnit.NANOSECONDS.convert(runtime, unit); + final long maxRuntimeNano = getRuntimeLimitInNanoseconds(); + return actualRuntimeNano > maxRuntimeNano; + } + } + + /** + * @return the runtime limit in nanoseconds, or -1 if no limit was specified + */ + public long getRuntimeLimitInNanoseconds() { + if ( getArguments().maxRuntime == NO_RUNTIME_LIMIT ) + return -1; + else + return TimeUnit.NANOSECONDS.convert(getArguments().maxRuntime, getArguments().maxRuntimeUnits); + } } diff --git a/public/java/src/org/broadinstitute/sting/gatk/arguments/GATKArgumentCollection.java b/public/java/src/org/broadinstitute/sting/gatk/arguments/GATKArgumentCollection.java index 7875ced5a..e2b943582 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/arguments/GATKArgumentCollection.java +++ b/public/java/src/org/broadinstitute/sting/gatk/arguments/GATKArgumentCollection.java @@ -31,6 +31,7 @@ import org.broadinstitute.sting.commandline.Argument; import org.broadinstitute.sting.commandline.Hidden; import org.broadinstitute.sting.commandline.Input; import org.broadinstitute.sting.commandline.IntervalBinding; +import org.broadinstitute.sting.gatk.GenomeAnalysisEngine; import org.broadinstitute.sting.gatk.downsampling.DownsampleType; import org.broadinstitute.sting.gatk.downsampling.DownsamplingMethod; import org.broadinstitute.sting.gatk.phonehome.GATKRunReport; @@ -44,6 +45,7 @@ import java.io.File; import java.util.ArrayList; import java.util.Collections; import java.util.List; +import java.util.concurrent.TimeUnit; /** * @author aaron @@ -91,7 +93,7 @@ public class GATKArgumentCollection { // -------------------------------------------------------------------------------------------------------------- // - // XXX + // General features // // -------------------------------------------------------------------------------------------------------------- @@ -143,6 +145,12 @@ public class GATKArgumentCollection { @Argument(fullName = "disableRandomization",doc="Completely eliminates randomization from nondeterministic methods. To be used mostly in the testing framework where dynamic parallelism can result in differing numbers of calls to the generator.") public boolean disableRandomization = false; + @Argument(fullName = "maxRuntime", shortName = "maxRuntime", doc="If provided, that GATK will stop execution cleanly as soon after maxRuntime has been exceeded, truncating the run but not exiting with a failure. By default the value is interpreted in minutes, but this can be changed by maxRuntimeUnits", required = false) + public long maxRuntime = GenomeAnalysisEngine.NO_RUNTIME_LIMIT; + + @Argument(fullName = "maxRuntimeUnits", shortName = "maxRuntimeUnits", doc="The TimeUnit for maxRuntime", required = false) + public TimeUnit maxRuntimeUnits = TimeUnit.MINUTES; + // -------------------------------------------------------------------------------------------------------------- // // Downsampling Arguments diff --git a/public/java/src/org/broadinstitute/sting/gatk/arguments/StandardCallerArgumentCollection.java b/public/java/src/org/broadinstitute/sting/gatk/arguments/StandardCallerArgumentCollection.java index 2f6d9d16f..547f375bb 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/arguments/StandardCallerArgumentCollection.java +++ b/public/java/src/org/broadinstitute/sting/gatk/arguments/StandardCallerArgumentCollection.java @@ -75,8 +75,9 @@ public class StandardCallerArgumentCollection { * Basically, it will ignore the contamination fraction of reads for each alternate allele. So if the pileup contains N total bases, then we * will try to remove (N * contamination fraction) bases for each alternate allele. */ - @Argument(fullName = "contamination_percentage_to_filter", shortName = "contamination", doc = "Fraction of contamination in sequencing data (for all samples) to aggressively remove", required = false) - public double CONTAMINATION_FRACTION = 0.0; + @Argument(fullName = "contamination_fraction_to_filter", shortName = "contamination", doc = "Fraction of contamination in sequencing data (for all samples) to aggressively remove", required = false) + public double CONTAMINATION_FRACTION = DEFAULT_CONTAMINATION_FRACTION; + public static final double DEFAULT_CONTAMINATION_FRACTION = 0.05; @Hidden @Argument(fullName = "logRemovedReadsFromContaminationFiltering", shortName="contaminationLog", required=false) diff --git a/public/java/src/org/broadinstitute/sting/gatk/executive/HierarchicalMicroScheduler.java b/public/java/src/org/broadinstitute/sting/gatk/executive/HierarchicalMicroScheduler.java index 31f2a469c..cc0161b2d 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/executive/HierarchicalMicroScheduler.java +++ b/public/java/src/org/broadinstitute/sting/gatk/executive/HierarchicalMicroScheduler.java @@ -123,7 +123,7 @@ public class HierarchicalMicroScheduler extends MicroScheduler implements Hierar final ReduceTree reduceTree = new ReduceTree(this); initializeWalker(walker); - while (isShardTraversePending() || isTreeReducePending()) { + while (! abortExecution() && (isShardTraversePending() || isTreeReducePending())) { // Check for errors during execution. errorTracker.throwErrorIfPending(); diff --git a/public/java/src/org/broadinstitute/sting/gatk/executive/LinearMicroScheduler.java b/public/java/src/org/broadinstitute/sting/gatk/executive/LinearMicroScheduler.java index 5b94e0767..f3c1ae91c 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/executive/LinearMicroScheduler.java +++ b/public/java/src/org/broadinstitute/sting/gatk/executive/LinearMicroScheduler.java @@ -63,7 +63,7 @@ public class LinearMicroScheduler extends MicroScheduler { final TraversalEngine traversalEngine = borrowTraversalEngine(this); for (Shard shard : shardStrategy ) { - if ( done || shard == null ) // we ran out of shards that aren't owned + if ( abortExecution() || done || shard == null ) // we ran out of shards that aren't owned break; if(shard.getShardType() == Shard.ShardType.LOCUS) { diff --git a/public/java/src/org/broadinstitute/sting/gatk/executive/MicroScheduler.java b/public/java/src/org/broadinstitute/sting/gatk/executive/MicroScheduler.java index df4ed9ef8..38170040a 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/executive/MicroScheduler.java +++ b/public/java/src/org/broadinstitute/sting/gatk/executive/MicroScheduler.java @@ -39,6 +39,7 @@ import org.broadinstitute.sting.gatk.iterators.StingSAMIterator; import org.broadinstitute.sting.gatk.resourcemanagement.ThreadAllocation; import org.broadinstitute.sting.gatk.traversals.*; import org.broadinstitute.sting.gatk.walkers.*; +import org.broadinstitute.sting.utils.AutoFormattingTime; import org.broadinstitute.sting.utils.MathUtils; import org.broadinstitute.sting.utils.exceptions.ReviewedStingException; import org.broadinstitute.sting.utils.exceptions.UserException; @@ -52,6 +53,7 @@ import javax.management.ObjectName; import java.io.File; import java.lang.management.ManagementFactory; import java.util.*; +import java.util.concurrent.TimeUnit; /** @@ -269,6 +271,26 @@ public abstract class MicroScheduler implements MicroSchedulerMBean { this.threadEfficiencyMonitor = threadEfficiencyMonitor; } + /** + * Should we stop all execution work and exit gracefully? + * + * Returns true in the case where some external signal or time limit has been received, indicating + * that this GATK shouldn't continue executing. This isn't a kill signal, it is really a "shutdown + * gracefully at the next opportunity" signal. Concrete implementations of the MicroScheduler + * examine this value as often as reasonable and, if it returns true, stop what they are doing + * at the next available opportunity, shutdown their resources, call notify done, and return. + * + * @return true if we should abort execution, or false otherwise + */ + protected boolean abortExecution() { + final boolean abort = engine.exceedsRuntimeLimit(progressMeter.getRuntimeInNanoseconds(), TimeUnit.NANOSECONDS); + if ( abort ) { + final AutoFormattingTime aft = new AutoFormattingTime(TimeUnit.SECONDS.convert(engine.getRuntimeLimitInNanoseconds(), TimeUnit.NANOSECONDS), 1, 4); + logger.info("Aborting execution (cleanly) because the runtime has exceeded the requested maximum " + aft); + } + return abort; + } + /** * Walks a walker over the given list of intervals. * diff --git a/public/java/src/org/broadinstitute/sting/gatk/refdata/VariantContextAdaptors.java b/public/java/src/org/broadinstitute/sting/gatk/refdata/VariantContextAdaptors.java index 2b46414a8..5c7da82d0 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/refdata/VariantContextAdaptors.java +++ b/public/java/src/org/broadinstitute/sting/gatk/refdata/VariantContextAdaptors.java @@ -364,7 +364,7 @@ public class VariantContextAdaptors { long end = hapmap.getEnd(); if ( deletionLength > 0 ) - end += deletionLength; + end += (deletionLength - 1); VariantContext vc = new VariantContextBuilder(name, hapmap.getChr(), hapmap.getStart(), end, alleles).id(hapmap.getName()).genotypes(genotypes).make(); return vc; } diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/bqsr/BaseRecalibrator.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/bqsr/BaseRecalibrator.java index 245c8e3e8..9506510a9 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/bqsr/BaseRecalibrator.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/bqsr/BaseRecalibrator.java @@ -25,35 +25,41 @@ package org.broadinstitute.sting.gatk.walkers.bqsr; +import net.sf.picard.reference.IndexedFastaSequenceFile; +import net.sf.samtools.CigarElement; import net.sf.samtools.SAMFileHeader; +import org.broad.tribble.Feature; +import org.broadinstitute.sting.commandline.Advanced; +import org.broadinstitute.sting.commandline.Argument; import org.broadinstitute.sting.commandline.ArgumentCollection; import org.broadinstitute.sting.gatk.CommandLineGATK; -import org.broadinstitute.sting.gatk.contexts.AlignmentContext; import org.broadinstitute.sting.gatk.contexts.ReferenceContext; -import org.broadinstitute.sting.gatk.filters.MappingQualityUnavailableFilter; -import org.broadinstitute.sting.gatk.filters.MappingQualityZeroFilter; +import org.broadinstitute.sting.gatk.filters.*; import org.broadinstitute.sting.gatk.iterators.ReadTransformer; import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; import org.broadinstitute.sting.gatk.walkers.*; +import org.broadinstitute.sting.utils.BaseUtils; +import org.broadinstitute.sting.utils.baq.BAQ; import org.broadinstitute.sting.utils.classloader.GATKLiteUtils; +import org.broadinstitute.sting.utils.clipping.ReadClipper; import org.broadinstitute.sting.utils.collections.Pair; import org.broadinstitute.sting.utils.exceptions.ReviewedStingException; import org.broadinstitute.sting.utils.exceptions.UserException; +import org.broadinstitute.sting.utils.fasta.CachingIndexedFastaSequenceFile; import org.broadinstitute.sting.utils.help.DocumentedGATKFeature; -import org.broadinstitute.sting.utils.pileup.PileupElement; -import org.broadinstitute.sting.utils.recalibration.QuantizationInfo; -import org.broadinstitute.sting.utils.recalibration.RecalUtils; -import org.broadinstitute.sting.utils.recalibration.RecalibrationReport; -import org.broadinstitute.sting.utils.recalibration.RecalibrationTables; +import org.broadinstitute.sting.utils.recalibration.*; import org.broadinstitute.sting.utils.recalibration.covariates.Covariate; import org.broadinstitute.sting.utils.sam.GATKSAMRecord; import org.broadinstitute.sting.utils.sam.ReadUtils; import java.io.File; +import java.io.FileNotFoundException; import java.io.IOException; import java.io.PrintStream; import java.lang.reflect.Constructor; import java.util.ArrayList; +import java.util.Arrays; +import java.util.List; /** * First pass of the base quality score recalibration -- Generates recalibration table based on various user-specified covariates (such as read group, reported quality score, machine cycle, and nucleotide context). @@ -103,19 +109,20 @@ import java.util.ArrayList; * */ -@DocumentedGATKFeature( groupName = "BAM Processing and Analysis Tools", extraDocs = {CommandLineGATK.class} ) +@DocumentedGATKFeature(groupName = "BAM Processing and Analysis Tools", extraDocs = {CommandLineGATK.class}) @BAQMode(ApplicationTime = ReadTransformer.ApplicationTime.FORBIDDEN) -@By(DataSource.READS) -@ReadFilters({MappingQualityZeroFilter.class, MappingQualityUnavailableFilter.class}) // only look at covered loci, not every loci of the reference file -@Requires({DataSource.READS, DataSource.REFERENCE}) // filter out all reads with zero or unavailable mapping quality -@PartitionBy(PartitionType.LOCUS) // this walker requires both -I input.bam and -R reference.fasta -public class BaseRecalibrator extends LocusWalker { - +@ReadFilters({MappingQualityZeroFilter.class, MappingQualityUnavailableFilter.class, UnmappedReadFilter.class, NotPrimaryAlignmentFilter.class, DuplicateReadFilter.class, FailsVendorQualityCheckFilter.class}) +@PartitionBy(PartitionType.READ) +public class BaseRecalibrator extends ReadWalker implements NanoSchedulable { @ArgumentCollection private final RecalibrationArgumentCollection RAC = new RecalibrationArgumentCollection(); // all the command line arguments for BQSR and it's covariates + @Advanced + @Argument(fullName = "bqsrBAQGapOpenPenalty", shortName="bqsrBAQGOP", doc="BQSR BAQ gap open penalty (Phred Scaled). Default value is 40. 30 is perhaps better for whole genome call sets", required = false) + public double BAQGOP = BAQ.DEFAULT_GOP; + private QuantizationInfo quantizationInfo; // an object that keeps track of the information necessary for quality score quantization - + private RecalibrationTables recalibrationTables; private Covariate[] requestedCovariates; // list to hold the all the covariate objects that were requested (required + standard + experimental) @@ -124,12 +131,12 @@ public class BaseRecalibrator extends LocusWalker { private int minimumQToUse; - protected static final String SKIP_RECORD_ATTRIBUTE = "SKIP"; // used to label reads that should be skipped. - protected static final String SEEN_ATTRIBUTE = "SEEN"; // used to label reads as processed. protected static final String COVARS_ATTRIBUTE = "COVARS"; // used to store covariates array as a temporary attribute inside GATKSAMRecord.\ private static final String NO_DBSNP_EXCEPTION = "This calculation is critically dependent on being able to skip over known variant sites. Please provide a VCF file containing known sites of genetic variation."; + private BAQ baq; // BAQ the reads on the fly to generate the alignment uncertainty vector + private IndexedFastaSequenceFile referenceReader; // fasta reference reader for use with BAQ calculation /** * Parse the -cov arguments and create a list of covariates to be used here @@ -137,6 +144,8 @@ public class BaseRecalibrator extends LocusWalker { */ public void initialize() { + baq = new BAQ(BAQGOP); // setup the BAQ object with the provided gap open penalty + // check for unsupported access if (getToolkit().isGATKLite() && !getToolkit().getArguments().disableIndelQuals) throw new UserException.NotSupportedInGATKLite("base insertion/deletion recalibration is not supported, please use the --disable_indel_quals argument"); @@ -185,13 +194,21 @@ public class BaseRecalibrator extends LocusWalker { recalibrationEngine.initialize(requestedCovariates, recalibrationTables); minimumQToUse = getToolkit().getArguments().PRESERVE_QSCORES_LESS_THAN; + + try { + // fasta reference reader for use with BAQ calculation + referenceReader = new CachingIndexedFastaSequenceFile(getToolkit().getArguments().referenceFile); + } catch( FileNotFoundException e ) { + throw new UserException.CouldNotReadInputFile(getToolkit().getArguments().referenceFile, e); + } + } private RecalibrationEngine initializeRecalibrationEngine() { final Class recalibrationEngineClass = GATKLiteUtils.getProtectedClassIfAvailable(RecalibrationEngine.class); try { - Constructor constructor = recalibrationEngineClass.getDeclaredConstructor((Class[])null); + final Constructor constructor = recalibrationEngineClass.getDeclaredConstructor((Class[])null); constructor.setAccessible(true); return (RecalibrationEngine)constructor.newInstance(); } @@ -200,60 +217,207 @@ public class BaseRecalibrator extends LocusWalker { } } - private boolean readHasBeenSkipped( final GATKSAMRecord read ) { - return read.containsTemporaryAttribute(SKIP_RECORD_ATTRIBUTE); - } - - private boolean isLowQualityBase( final PileupElement p ) { - return p.getQual() < minimumQToUse; - } - - private boolean readNotSeen( final GATKSAMRecord read ) { - return !read.containsTemporaryAttribute(SEEN_ATTRIBUTE); + private boolean isLowQualityBase( final GATKSAMRecord read, final int offset ) { + return read.getBaseQualities()[offset] < minimumQToUse; } /** * For each read at this locus get the various covariate values and increment that location in the map based on * whether or not the base matches the reference at this particular location - * - * @param tracker the reference metadata tracker - * @param ref the reference context - * @param context the alignment context - * @return returns 1, but this value isn't used in the reduce step */ - public Long map(RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context) { - long countedSites = 0L; - // Only analyze sites not present in the provided known sites - if (tracker.getValues(RAC.knownSites).size() == 0) { - for (final PileupElement p : context.getBasePileup()) { - final GATKSAMRecord read = p.getRead(); - final int offset = p.getOffset(); + public Long map( final ReferenceContext ref, final GATKSAMRecord originalRead, final RefMetaDataTracker metaDataTracker ) { - // This read has been marked to be skipped or base is low quality (we don't recalibrate low quality bases) - if (readHasBeenSkipped(read) || p.isInsertionAtBeginningOfRead() || isLowQualityBase(p) ) - continue; + final GATKSAMRecord read = ReadClipper.hardClipAdaptorSequence(originalRead); + if( read.isEmpty() ) { return 0L; } // the whole read was inside the adaptor so skip it - if (readNotSeen(read)) { - read.setTemporaryAttribute(SEEN_ATTRIBUTE, true); - RecalUtils.parsePlatformForRead(read, RAC); - if (!RecalUtils.isColorSpaceConsistent(RAC.SOLID_NOCALL_STRATEGY, read)) { - read.setTemporaryAttribute(SKIP_RECORD_ATTRIBUTE, true); - continue; + RecalUtils.parsePlatformForRead(read, RAC); + if (!RecalUtils.isColorSpaceConsistent(RAC.SOLID_NOCALL_STRATEGY, read)) { // parse the solid color space and check for color no-calls + return 0L; // skip this read completely + } + read.setTemporaryAttribute(COVARS_ATTRIBUTE, RecalUtils.computeCovariates(read, requestedCovariates)); + + final boolean[] skip = calculateSkipArray(read, metaDataTracker); // skip known sites of variation as well as low quality and non-regular bases + final int[] isSNP = calculateIsSNP(read, ref, originalRead); + final int[] isInsertion = calculateIsIndel(read, EventType.BASE_INSERTION); + final int[] isDeletion = calculateIsIndel(read, EventType.BASE_DELETION); + final byte[] baqArray = calculateBAQArray(read); + + if( baqArray != null ) { // some reads just can't be BAQ'ed + final double[] snpErrors = calculateFractionalErrorArray(isSNP, baqArray); + final double[] insertionErrors = calculateFractionalErrorArray(isInsertion, baqArray); + final double[] deletionErrors = calculateFractionalErrorArray(isDeletion, baqArray); + recalibrationEngine.updateDataForRead(read, skip, snpErrors, insertionErrors, deletionErrors); + return 1L; + } else { + return 0L; + } + } + + protected boolean[] calculateSkipArray( final GATKSAMRecord read, final RefMetaDataTracker metaDataTracker ) { + final byte[] bases = read.getReadBases(); + final boolean[] skip = new boolean[bases.length]; + final boolean[] knownSites = calculateKnownSites(read, metaDataTracker.getValues(RAC.knownSites)); + for( int iii = 0; iii < bases.length; iii++ ) { + skip[iii] = !BaseUtils.isRegularBase(bases[iii]) || isLowQualityBase(read, iii) || knownSites[iii] || badSolidOffset(read, iii); + } + return skip; + } + + protected boolean badSolidOffset( final GATKSAMRecord read, final int offset ) { + return ReadUtils.isSOLiDRead(read) && RAC.SOLID_RECAL_MODE != RecalUtils.SOLID_RECAL_MODE.DO_NOTHING && !RecalUtils.isColorSpaceConsistent(read, offset); + } + + protected boolean[] calculateKnownSites( final GATKSAMRecord read, final List features ) { + final int BUFFER_SIZE = 0; + final int readLength = read.getReadBases().length; + final boolean[] knownSites = new boolean[readLength]; + Arrays.fill(knownSites, false); + for( final Feature f : features ) { + int featureStartOnRead = ReadUtils.getReadCoordinateForReferenceCoordinate(read.getSoftStart(), read.getCigar(), f.getStart(), ReadUtils.ClippingTail.LEFT_TAIL, true); // BUGBUG: should I use LEFT_TAIL here? + if( featureStartOnRead == ReadUtils.CLIPPING_GOAL_NOT_REACHED ) { featureStartOnRead = 0; } + int featureEndOnRead = ReadUtils.getReadCoordinateForReferenceCoordinate(read.getSoftStart(), read.getCigar(), f.getEnd(), ReadUtils.ClippingTail.LEFT_TAIL, true); + if( featureEndOnRead == ReadUtils.CLIPPING_GOAL_NOT_REACHED ) { featureEndOnRead = readLength; } + Arrays.fill(knownSites, Math.max(0, featureStartOnRead - BUFFER_SIZE), Math.min(readLength, featureEndOnRead + 1 + BUFFER_SIZE), true); + } + return knownSites; + } + + // BUGBUG: can be merged with calculateIsIndel + protected static int[] calculateIsSNP( final GATKSAMRecord read, final ReferenceContext ref, final GATKSAMRecord originalRead ) { + final byte[] readBases = read.getReadBases(); + final byte[] refBases = Arrays.copyOfRange(ref.getBases(), read.getAlignmentStart() - originalRead.getAlignmentStart(), ref.getBases().length + read.getAlignmentEnd() - originalRead.getAlignmentEnd()); + final int[] snp = new int[readBases.length]; + int readPos = 0; + int refPos = 0; + for ( final CigarElement ce : read.getCigar().getCigarElements() ) { + final int elementLength = ce.getLength(); + switch (ce.getOperator()) { + case M: + case EQ: + case X: + for( int iii = 0; iii < elementLength; iii++ ) { + snp[readPos] = ( BaseUtils.basesAreEqual(readBases[readPos], refBases[refPos]) ? 0 : 1 ); + readPos++; + refPos++; } - read.setTemporaryAttribute(COVARS_ATTRIBUTE, RecalUtils.computeCovariates(read, requestedCovariates)); - } - - // SOLID bams have inserted the reference base into the read if the color space in inconsistent with the read base so skip it - if (!ReadUtils.isSOLiDRead(read) || - RAC.SOLID_RECAL_MODE == RecalUtils.SOLID_RECAL_MODE.DO_NOTHING || - RecalUtils.isColorSpaceConsistent(read, offset)) - // This base finally passed all the checks for a good base, so add it to the big data hashmap - recalibrationEngine.updateDataForPileupElement(p, ref.getBase()); + break; + case D: + case N: + refPos += elementLength; + break; + case I: + case S: // ReferenceContext doesn't have the soft clipped bases! + readPos += elementLength; + break; + case H: + case P: + break; + default: + throw new ReviewedStingException("Unsupported cigar operator: " + ce.getOperator()); } - countedSites++; + } + return snp; + } + + protected static int[] calculateIsIndel( final GATKSAMRecord read, final EventType mode ) { + final byte[] readBases = read.getReadBases(); + final int[] indel = new int[readBases.length]; + Arrays.fill(indel, 0); + int readPos = 0; + for ( final CigarElement ce : read.getCigar().getCigarElements() ) { + final int elementLength = ce.getLength(); + switch (ce.getOperator()) { + case M: + case EQ: + case X: + case S: + { + readPos += elementLength; + break; + } + case D: + { + final int index = ( read.getReadNegativeStrandFlag() ? readPos : ( readPos > 0 ? readPos - 1 : readPos ) ); + indel[index] = ( mode.equals(EventType.BASE_DELETION) ? 1 : 0 ); + break; + } + case I: + { + final boolean forwardStrandRead = !read.getReadNegativeStrandFlag(); + if( forwardStrandRead ) { + indel[(readPos > 0 ? readPos - 1 : readPos)] = ( mode.equals(EventType.BASE_INSERTION) ? 1 : 0 ); + } + for (int iii = 0; iii < elementLength; iii++) { + readPos++; + } + if( !forwardStrandRead ) { + indel[(readPos < indel.length ? readPos : readPos - 1)] = ( mode.equals(EventType.BASE_INSERTION) ? 1 : 0 ); + } + break; + } + case N: + case H: + case P: + break; + default: + throw new ReviewedStingException("Unsupported cigar operator: " + ce.getOperator()); + } + } + return indel; + } + + protected static double[] calculateFractionalErrorArray( final int[] errorArray, final byte[] baqArray ) { + if(errorArray.length != baqArray.length ) { + throw new ReviewedStingException("Array length mismatch detected. Malformed read?"); } - return countedSites; + final byte NO_BAQ_UNCERTAINTY = (byte)'@'; + final int BLOCK_START_UNSET = -1; + + final double[] fractionalErrors = new double[baqArray.length]; + Arrays.fill(fractionalErrors, 0.0); + boolean inBlock = false; + int blockStartIndex = BLOCK_START_UNSET; + int iii; + for( iii = 0; iii < fractionalErrors.length; iii++ ) { + if( baqArray[iii] == NO_BAQ_UNCERTAINTY ) { + if( !inBlock ) { + fractionalErrors[iii] = (double) errorArray[iii]; + } else { + calculateAndStoreErrorsInBlock(iii, blockStartIndex, errorArray, fractionalErrors); + inBlock = false; // reset state variables + blockStartIndex = BLOCK_START_UNSET; // reset state variables + } + } else { + inBlock = true; + if( blockStartIndex == BLOCK_START_UNSET ) { blockStartIndex = iii; } + } + } + if( inBlock ) { + calculateAndStoreErrorsInBlock(iii-1, blockStartIndex, errorArray, fractionalErrors); + } + if( fractionalErrors.length != errorArray.length ) { + throw new ReviewedStingException("Output array length mismatch detected. Malformed read?"); + } + return fractionalErrors; + } + + private static void calculateAndStoreErrorsInBlock( final int iii, + final int blockStartIndex, + final int[] errorArray, + final double[] fractionalErrors ) { + int totalErrors = 0; + for( int jjj = Math.max(0,blockStartIndex-1); jjj <= iii; jjj++ ) { + totalErrors += errorArray[jjj]; + } + for( int jjj = Math.max(0, blockStartIndex-1); jjj <= iii; jjj++ ) { + fractionalErrors[jjj] = ((double) totalErrors) / ((double)(iii - Math.max(0,blockStartIndex-1) + 1)); + } + } + + private byte[] calculateBAQArray( final GATKSAMRecord read ) { + baq.baqRead(read, referenceReader, BAQ.CalculationMode.RECALCULATE, BAQ.QualityMode.ADD_TAG); + return BAQ.getBAQTag(read); } /** @@ -286,12 +450,12 @@ public class BaseRecalibrator extends LocusWalker { generateReport(); logger.info("...done!"); - if (RAC.RECAL_PDF_FILE != null) { + if ( RAC.RECAL_PDF_FILE != null ) { logger.info("Generating recalibration plots..."); generatePlots(); } - logger.info("Processed: " + result + " sites"); + logger.info("Processed: " + result + " reads"); } private void generatePlots() { @@ -304,7 +468,6 @@ public class BaseRecalibrator extends LocusWalker { RecalUtils.generateRecalibrationPlot(RAC, recalibrationTables, requestedCovariates); } - /** * go through the quality score table and use the # observations and the empirical quality score * to build a quality score histogram for quantization. Then use the QuantizeQual algorithm to @@ -317,5 +480,4 @@ public class BaseRecalibrator extends LocusWalker { private void generateReport() { RecalUtils.outputRecalibrationReport(RAC, quantizationInfo, recalibrationTables, requestedCovariates); } -} - +} \ No newline at end of file diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/bqsr/RecalibrationEngine.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/bqsr/RecalibrationEngine.java index ce60f5a3a..962d62d5e 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/bqsr/RecalibrationEngine.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/bqsr/RecalibrationEngine.java @@ -33,7 +33,5 @@ public interface RecalibrationEngine { public void initialize(final Covariate[] covariates, final RecalibrationTables recalibrationTables); - public void updateDataForPileupElement(final PileupElement pileupElement, final byte refBase); - public void updateDataForRead(final GATKSAMRecord read, final boolean[] skip, final double[] snpErrors, final double[] insertionErrors, final double[] deletionErrors); } diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/bqsr/StandardRecalibrationEngine.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/bqsr/StandardRecalibrationEngine.java index 1f7fbbf42..6031aa955 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/bqsr/StandardRecalibrationEngine.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/bqsr/StandardRecalibrationEngine.java @@ -46,41 +46,30 @@ public class StandardRecalibrationEngine implements RecalibrationEngine, PublicP this.recalibrationTables = recalibrationTables; } - /** - * Loop through the list of requested covariates and pick out the value from the read, offset, and reference - * Using the list of covariate values as a key, pick out the RecalDatum and increment, - * adding one to the number of observations and potentially one to the number of mismatches for mismatches only. - * - * @param pileupElement The pileup element to update - * @param refBase The reference base at this locus - */ - @Override - public void updateDataForPileupElement(final PileupElement pileupElement, final byte refBase) { - final int offset = pileupElement.getOffset(); - final ReadCovariates readCovariates = covariateKeySetFrom(pileupElement.getRead()); - - final byte qual = pileupElement.getQual(); - final boolean isError = !BaseUtils.basesAreEqual(pileupElement.getBase(), refBase); - - final int[] keys = readCovariates.getKeySet(offset, EventType.BASE_SUBSTITUTION); - final int eventIndex = EventType.BASE_SUBSTITUTION.index; - - // TODO: should this really be combine rather than increment? - combineDatumOrPutIfNecessary(recalibrationTables.getReadGroupTable(), qual, isError, keys[0], eventIndex); - - incrementDatumOrPutIfNecessary(recalibrationTables.getQualityScoreTable(), qual, isError, keys[0], keys[1], eventIndex); - - for (int i = 2; i < covariates.length; i++) { - if (keys[i] < 0) - continue; - - incrementDatumOrPutIfNecessary(recalibrationTables.getTable(i), qual, isError, keys[0], keys[1], keys[i], eventIndex); - } - } - @Override public void updateDataForRead( final GATKSAMRecord read, final boolean[] skip, final double[] snpErrors, final double[] insertionErrors, final double[] deletionErrors ) { - throw new UnsupportedOperationException("Delocalized BQSR is not available in the GATK-lite version"); + for( int offset = 0; offset < read.getReadBases().length; offset++ ) { + if( !skip[offset] ) { + final ReadCovariates readCovariates = covariateKeySetFrom(read); + + final byte qual = read.getBaseQualities()[offset]; + final double isError = snpErrors[offset]; + + final int[] keys = readCovariates.getKeySet(offset, EventType.BASE_SUBSTITUTION); + final int eventIndex = EventType.BASE_SUBSTITUTION.index; + + combineDatumOrPutIfNecessary(recalibrationTables.getReadGroupTable(), qual, isError, keys[0], eventIndex); + + incrementDatumOrPutIfNecessary(recalibrationTables.getQualityScoreTable(), qual, isError, keys[0], keys[1], eventIndex); + + for (int i = 2; i < covariates.length; i++) { + if (keys[i] < 0) + continue; + + incrementDatumOrPutIfNecessary(recalibrationTables.getTable(i), qual, isError, keys[0], keys[1], keys[i], eventIndex); + } + } + } } /** @@ -90,10 +79,6 @@ public class StandardRecalibrationEngine implements RecalibrationEngine, PublicP * @param isError whether or not the observation is an error * @return a new RecalDatum object with the observation and the error */ - protected RecalDatum createDatumObject(final byte reportedQual, final boolean isError) { - return new RecalDatum(1, isError ? 1:0, reportedQual); - } - protected RecalDatum createDatumObject(final byte reportedQual, final double isError) { return new RecalDatum(1, isError, reportedQual); } @@ -108,46 +93,6 @@ public class StandardRecalibrationEngine implements RecalibrationEngine, PublicP return (ReadCovariates) read.getTemporaryAttribute(BaseRecalibrator.COVARS_ATTRIBUTE); } - /** - * Increments the RecalDatum at the specified position in the specified table, or put a new item there - * if there isn't already one. - * - * Does this in a thread-safe way WITHOUT being synchronized: relies on the behavior of NestedIntegerArray.put() - * to return false if another thread inserts a new item at our position in the middle of our put operation. - * - * @param table the table that holds/will hold our item - * @param qual qual for this event - * @param isError error status for this event - * @param keys location in table of our item - */ - protected void incrementDatumOrPutIfNecessary( final NestedIntegerArray table, final byte qual, final boolean isError, final int... keys ) { - final RecalDatum existingDatum = table.get(keys); - - // There are three cases here: - // - // 1. The original get succeeded (existingDatum != null) because there already was an item in this position. - // In this case we can just increment the existing item. - // - // 2. The original get failed (existingDatum == null), and we successfully put a new item in this position - // and are done. - // - // 3. The original get failed (existingDatum == null), AND the put fails because another thread comes along - // in the interim and puts an item in this position. In this case we need to do another get after the - // failed put to get the item the other thread put here and increment it. - if ( existingDatum == null ) { - // No existing item, try to put a new one - if ( ! table.put(createDatumObject(qual, isError), keys) ) { - // Failed to put a new item because another thread came along and put an item here first. - // Get the newly-put item and increment it (item is guaranteed to exist at this point) - table.get(keys).increment(isError); - } - } - else { - // Easy case: already an item here, so increment it - existingDatum.increment(isError); - } - } - /** * Increments the RecalDatum at the specified position in the specified table, or put a new item there * if there isn't already one. @@ -177,36 +122,6 @@ public class StandardRecalibrationEngine implements RecalibrationEngine, PublicP } } - /** - * Combines the RecalDatum at the specified position in the specified table with a new RecalDatum, or put a - * new item there if there isn't already one. - * - * Does this in a thread-safe way WITHOUT being synchronized: relies on the behavior of NestedIntegerArray.put() - * to return false if another thread inserts a new item at our position in the middle of our put operation. - * - * @param table the table that holds/will hold our item - * @param qual qual for this event - * @param isError error status for this event - * @param keys location in table of our item - */ - protected void combineDatumOrPutIfNecessary( final NestedIntegerArray table, final byte qual, final boolean isError, final int... keys ) { - final RecalDatum existingDatum = table.get(keys); - final RecalDatum newDatum = createDatumObject(qual, isError); - - if ( existingDatum == null ) { - // No existing item, try to put a new one - if ( ! table.put(newDatum, keys) ) { - // Failed to put a new item because another thread came along and put an item here first. - // Get the newly-put item and combine it with our item (item is guaranteed to exist at this point) - table.get(keys).combine(newDatum); - } - } - else { - // Easy case: already an item here, so combine it with our item - existingDatum.combine(newDatum); - } - } - /** * Combines the RecalDatum at the specified position in the specified table with a new RecalDatum, or put a * new item there if there isn't already one. diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedArgumentCollection.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedArgumentCollection.java index e50f25fe8..5f6ddf0f1 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedArgumentCollection.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedArgumentCollection.java @@ -47,8 +47,8 @@ public class UnifiedArgumentCollection extends StandardCallerArgumentCollection /** * Note that calculating the SLOD increases the runtime by an appreciable amount. */ - @Argument(fullName = "noSLOD", shortName = "nosl", doc = "If provided, we will not calculate the SLOD", required = false) - public boolean NO_SLOD = false; + @Argument(fullName = "computeSLOD", shortName = "slod", doc = "If provided, we will calculate the SLOD (SB annotation)", required = false) + public boolean COMPUTE_SLOD = false; /** * Depending on the value of the --max_alternate_alleles argument, we may genotype only a fraction of the alleles being sent on for genotyping. @@ -204,7 +204,7 @@ public class UnifiedArgumentCollection extends StandardCallerArgumentCollection this.GLmodel = uac.GLmodel; this.AFmodel = uac.AFmodel; this.PCR_error = uac.PCR_error; - this.NO_SLOD = uac.NO_SLOD; + this.COMPUTE_SLOD = uac.COMPUTE_SLOD; this.ANNOTATE_NUMBER_OF_ALLELES_DISCOVERED = uac.ANNOTATE_NUMBER_OF_ALLELES_DISCOVERED; this.MIN_BASE_QUALTY_SCORE = uac.MIN_BASE_QUALTY_SCORE; this.MAX_DELETION_FRACTION = uac.MAX_DELETION_FRACTION; diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyper.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyper.java index a17c2aa1a..36be2e7c6 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyper.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyper.java @@ -28,6 +28,7 @@ package org.broadinstitute.sting.gatk.walkers.genotyper; import org.broadinstitute.sting.commandline.*; import org.broadinstitute.sting.gatk.CommandLineGATK; import org.broadinstitute.sting.gatk.arguments.DbsnpArgumentCollection; +import org.broadinstitute.sting.gatk.arguments.StandardCallerArgumentCollection; import org.broadinstitute.sting.gatk.contexts.AlignmentContext; import org.broadinstitute.sting.gatk.contexts.ReferenceContext; import org.broadinstitute.sting.gatk.downsampling.DownsampleType; @@ -238,7 +239,12 @@ public class UnifiedGenotyper extends LocusWalker, Unif } if ( UAC.CONTAMINATION_FRACTION > 0.0 ) { - throw new UserException.NotSupportedInGATKLite("you cannot enable usage of contamination down-sampling"); + if ( UAC.CONTAMINATION_FRACTION == StandardCallerArgumentCollection.DEFAULT_CONTAMINATION_FRACTION ) { + UAC.CONTAMINATION_FRACTION = 0.0; + logger.warn("setting contamination down-sampling fraction to 0.0 because it is not enabled in GATK-lite"); + } else { + throw new UserException.NotSupportedInGATKLite("you cannot enable usage of contamination down-sampling"); + } } } @@ -290,7 +296,7 @@ public class UnifiedGenotyper extends LocusWalker, Unif headerInfo.addAll(annotationEngine.getVCFAnnotationDescriptions()); // annotation (INFO) fields from UnifiedGenotyper - if ( !UAC.NO_SLOD ) + if ( UAC.COMPUTE_SLOD ) VCFStandardHeaderLines.addStandardInfoLines(headerInfo, true, VCFConstants.STRAND_BIAS_KEY); if ( UAC.ANNOTATE_NUMBER_OF_ALLELES_DISCOVERED ) diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyperEngine.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyperEngine.java index d9b46ad36..879a46ab0 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyperEngine.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyperEngine.java @@ -190,8 +190,8 @@ public class UnifiedGenotyperEngine { final VariantContext vc = calculateLikelihoods(tracker, refContext, stratifiedContexts, AlignmentContextUtils.ReadOrientation.COMPLETE, null, true, model, perReadAlleleLikelihoodMap); if ( vc != null ) results.add(calculateGenotypes(tracker, refContext, rawContext, stratifiedContexts, vc, model, true, perReadAlleleLikelihoodMap)); - else if (UAC.OutputMode == OUTPUT_MODE.EMIT_ALL_SITES) - results.add(generateEmptyContext(tracker, refContext, null, rawContext)); + // else if (UAC.OutputMode == OUTPUT_MODE.EMIT_ALL_SITES) + // results.add(generateEmptyContext(tracker, refContext, null, rawContext)); } } @@ -454,7 +454,7 @@ public class UnifiedGenotyperEngine { attributes.put(VCFConstants.MLE_ALLELE_FREQUENCY_KEY, MLEfrequencies); } - if ( !UAC.NO_SLOD && !limitedContext && !bestGuessIsRef ) { + if ( UAC.COMPUTE_SLOD && !limitedContext && !bestGuessIsRef ) { //final boolean DEBUG_SLOD = false; // the overall lod diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/indels/SomaticIndelDetector.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/indels/SomaticIndelDetector.java index 7c73f59e9..0165c6cf3 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/indels/SomaticIndelDetector.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/indels/SomaticIndelDetector.java @@ -438,8 +438,6 @@ public class SomaticIndelDetector extends ReadWalker { location = getToolkit().getGenomeLocParser().createGenomeLoc(getToolkit().getSAMFileHeader().getSequence(0).getSequenceName(),1); - normalSamples = SampleUtils.getSAMFileSamples(getToolkit().getSAMFileHeaders().get(0)); - try { // we already checked that bedOutput and output_file are not set simultaneously if ( bedOutput != null ) bedWriter = new FileWriter(bedOutput); @@ -1152,8 +1150,9 @@ public class SomaticIndelDetector extends ReadWalker { GenotypesContext genotypes = GenotypesContext.create(); for ( String sample : normalSamples ) { - final GenotypeBuilder gb = new GenotypeBuilder(sample); - gb.attributes(call.makeStatsAttributes(null)); + GenotypeBuilder gb = new GenotypeBuilder(sample); + + gb=call.addStatsAttributes(gb); gb.alleles(! discard_event ? alleles // we made a call - put actual het genotype here: : homref_alleles); // no call: genotype is ref/ref (but alleles still contain the alt if we observed anything at all) @@ -1200,8 +1199,11 @@ public class SomaticIndelDetector extends ReadWalker { if ( start == 0 ) return; - Map attrsNormal = nCall.makeStatsAttributes(null); - Map attrsTumor = tCall.makeStatsAttributes(null); + GenotypeBuilder nGB = new GenotypeBuilder(); + GenotypeBuilder tGB = new GenotypeBuilder(); + + nCall.addStatsAttributes(nGB); + tCall.addStatsAttributes(tGB); Map attrs = new HashMap(); @@ -1242,11 +1244,11 @@ public class SomaticIndelDetector extends ReadWalker { GenotypesContext genotypes = GenotypesContext.create(); for ( String sample : normalSamples ) { - genotypes.add(GenotypeBuilder.create(sample, homRefN ? homRefAlleles : alleles, attrsNormal)); + genotypes.add(nGB.name(sample).alleles(homRefN ? homRefAlleles : alleles).make()); } for ( String sample : tumorSamples ) { - genotypes.add(GenotypeBuilder.create(sample, homRefT ? homRefAlleles : alleles, attrsTumor)); + genotypes.add(tGB.name(sample).alleles(homRefT ? homRefAlleles : alleles).make()); } Set filters = null; @@ -1254,14 +1256,6 @@ public class SomaticIndelDetector extends ReadWalker { filters = new HashSet(); filters.add("NoCall"); } - if ( nCall.getCoverage() < minNormalCoverage ) { - if ( filters == null ) filters = new HashSet(); - filters.add("NCov"); - } - if ( tCall.getCoverage() < minCoverage ) { - if ( filters == null ) filters = new HashSet(); - filters.add("TCov"); - } VariantContext vc = new VariantContextBuilder("IGv2_Indel_call", refName, start, stop, alleles) .genotypes(genotypes).filters(filters).attributes(attrs).make(); @@ -1844,6 +1838,38 @@ public class SomaticIndelDetector extends ReadWalker { VCFIndelAttributes.recordStrandCounts(strand_cons.first,strand_cons.second,strand_ref.first,strand_ref.second,attr); return attr; } + + + /** + * Adds alignment statistics directly into the genotype builder object. + * + * @param gb + * @return + */ + public GenotypeBuilder addStatsAttributes(GenotypeBuilder gb) { + if ( gb == null ) gb = new GenotypeBuilder(); + + gb = VCFIndelAttributes.recordDepth(getConsensusVariantCount(),getAllVariantCount(),getCoverage(),gb); + + gb = VCFIndelAttributes.recordAvMM(getAvConsensusMismatches(),getAvRefMismatches(),gb); + + gb = VCFIndelAttributes.recordAvMapQ(getAvConsensusMapq(),getAvRefMapq(),gb); + + gb = VCFIndelAttributes.recordNQSMMRate(getNQSConsensusMMRate(),getNQSRefMMRate(),gb); + + gb = VCFIndelAttributes.recordNQSAvQ(getNQSConsensusAvQual(),getNQSRefAvQual(),gb); + + gb = VCFIndelAttributes.recordOffsetFromStart(from_start_median,from_start_mad,gb); + + gb = VCFIndelAttributes.recordOffsetFromEnd(from_end_median,from_end_mad,gb); + + PrimitivePair.Int strand_cons = getConsensusStrandCounts(); + PrimitivePair.Int strand_ref = getRefStrandCounts(); + + gb = VCFIndelAttributes.recordStrandCounts(strand_cons.first,strand_cons.second,strand_ref.first,strand_ref.second,gb); + return gb; + } + } interface IndelListener { @@ -2170,18 +2196,18 @@ class VCFIndelAttributes { public static Set getAttributeHeaderLines() { Set lines = new HashSet(); - lines.add(new VCFFormatHeaderLine(ALLELIC_DEPTH_KEY, 2, VCFHeaderLineType.Integer, "# of reads supporting consensus indel/reference at the site")); + lines.add(new VCFFormatHeaderLine(ALLELIC_DEPTH_KEY, 2, VCFHeaderLineType.Integer, "# of reads supporting consensus reference/indel at the site")); lines.add(new VCFFormatHeaderLine(DEPTH_TOTAL_KEY, 1, VCFHeaderLineType.Integer, "Total coverage at the site")); - lines.add(new VCFFormatHeaderLine(MAPQ_KEY, 2, VCFHeaderLineType.Float, "Average mapping qualities of consensus indel-supporting reads/reference-supporting reads")); + lines.add(new VCFFormatHeaderLine(MAPQ_KEY, 2, VCFHeaderLineType.Float, "Average mapping qualities of ref-/consensus indel-supporting reads")); - lines.add(new VCFFormatHeaderLine(MM_KEY, 2, VCFHeaderLineType.Float, "Average # of mismatches per consensus indel-supporting read/per reference-supporting read")); + lines.add(new VCFFormatHeaderLine(MM_KEY, 2, VCFHeaderLineType.Float, "Average # of mismatches per ref-/consensus indel-supporting read")); - lines.add(new VCFFormatHeaderLine(NQS_MMRATE_KEY, 2, VCFHeaderLineType.Float, "Within NQS window: fraction of mismatching bases in consensus indel-supporting reads/in reference-supporting reads")); + lines.add(new VCFFormatHeaderLine(NQS_MMRATE_KEY, 2, VCFHeaderLineType.Float, "Within NQS window: fraction of mismatching bases in ref/consensus indel-supporting reads")); - lines.add(new VCFFormatHeaderLine(NQS_AVQ_KEY, 2, VCFHeaderLineType.Float, "Within NQS window: average quality of bases from consensus indel-supporting reads/from reference-supporting reads")); + lines.add(new VCFFormatHeaderLine(NQS_AVQ_KEY, 2, VCFHeaderLineType.Float, "Within NQS window: average quality of bases in ref-/consensus indel-supporting reads")); - lines.add(new VCFFormatHeaderLine(STRAND_COUNT_KEY, 4, VCFHeaderLineType.Integer, "Strandness: counts of forward-/reverse-aligned indel-supporting reads / forward-/reverse-aligned reference supporting reads")); + lines.add(new VCFFormatHeaderLine(STRAND_COUNT_KEY, 4, VCFHeaderLineType.Integer, "Strandness: counts of forward-/reverse-aligned reference and indel-supporting reads (FwdRef,RevRef,FwdIndel,RevIndel)")); lines.add(new VCFFormatHeaderLine(RSTART_OFFSET_KEY, 2, VCFHeaderLineType.Integer, "Median/mad of indel offsets from the starts of the reads")); lines.add(new VCFFormatHeaderLine(REND_OFFSET_KEY, 2, VCFHeaderLineType.Integer, "Median/mad of indel offsets from the ends of the reads")); @@ -2194,39 +2220,72 @@ class VCFIndelAttributes { return attrs; } + public static GenotypeBuilder recordStrandCounts(int cnt_cons_fwd, int cnt_cons_rev, int cnt_ref_fwd, int cnt_ref_rev, + GenotypeBuilder gb) { + return gb.attribute(STRAND_COUNT_KEY, new Integer[] {cnt_ref_fwd, cnt_ref_rev,cnt_cons_fwd, cnt_cons_rev } ); + } + public static Map recordDepth(int cnt_cons, int cnt_indel, int cnt_total, Map attrs) { - attrs.put(ALLELIC_DEPTH_KEY, new Integer[] {cnt_cons, cnt_indel} ); + attrs.put(ALLELIC_DEPTH_KEY, new Integer[] {cnt_total-cnt_indel, cnt_cons} ); attrs.put(DEPTH_TOTAL_KEY, cnt_total); return attrs; } + public static GenotypeBuilder recordDepth(int cnt_cons, int cnt_indel, int cnt_total, GenotypeBuilder gb) { + return gb.AD(new int[] {cnt_total-cnt_indel,cnt_cons} ).DP(cnt_total); + } + public static Map recordAvMapQ(double cons, double ref, Map attrs) { - attrs.put(MAPQ_KEY, new Float[] {(float)cons, (float)ref} ); + attrs.put(MAPQ_KEY, new Float[] {(float)ref, (float)cons} ); return attrs; } + public static GenotypeBuilder recordAvMapQ(double cons, double ref, GenotypeBuilder gb) { + return gb.attribute(MAPQ_KEY,new float[] {(float)ref, (float)cons} ); + } + public static Map recordAvMM(double cons, double ref, Map attrs) { - attrs.put(MM_KEY, new Float[] {(float)cons, (float)ref} ); + attrs.put(MM_KEY, new Float[] {(float)ref, (float)cons} ); return attrs; } + public static GenotypeBuilder recordAvMM(double cons, double ref, GenotypeBuilder gb) { + return gb.attribute(MM_KEY, new float[] {(float)ref, (float)cons} ); + } + public static Map recordNQSMMRate(double cons, double ref, Map attrs) { - attrs.put(NQS_MMRATE_KEY, new Float[] {(float)cons, (float)ref} ); + attrs.put(NQS_MMRATE_KEY, new Float[] {(float)ref, (float)cons} ); return attrs; } + public static GenotypeBuilder recordNQSMMRate(double cons, double ref, GenotypeBuilder gb) { + return gb.attribute(NQS_MMRATE_KEY, new float[] {(float)ref, (float)cons} ); + } + public static Map recordNQSAvQ(double cons, double ref, Map attrs) { - attrs.put(NQS_AVQ_KEY, new Float[] {(float)cons, (float)ref} ); + attrs.put(NQS_AVQ_KEY, new float[] {(float)ref, (float)cons} ); return attrs; } + public static GenotypeBuilder recordNQSAvQ(double cons, double ref, GenotypeBuilder gb) { + return gb.attribute(NQS_AVQ_KEY, new float[] {(float)ref, (float)cons} ); + } + public static Map recordOffsetFromStart(int median, int mad, Map attrs) { attrs.put(RSTART_OFFSET_KEY, new Integer[] {median, mad} ); return attrs; } + public static GenotypeBuilder recordOffsetFromStart(int median, int mad, GenotypeBuilder gb) { + return gb.attribute(RSTART_OFFSET_KEY, new int[] {median, mad} ); + } + public static Map recordOffsetFromEnd(int median, int mad, Map attrs) { attrs.put(REND_OFFSET_KEY, new Integer[] {median, mad} ); return attrs; } + + public static GenotypeBuilder recordOffsetFromEnd(int median, int mad, GenotypeBuilder gb) { + return gb.attribute(REND_OFFSET_KEY, new int[] {median, mad} ); + } } diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/qc/AssessReducedQuals.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/qc/AssessReducedQuals.java index e74684c53..78bcf1228 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/qc/AssessReducedQuals.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/qc/AssessReducedQuals.java @@ -55,8 +55,8 @@ public class AssessReducedQuals extends LocusWalker implem @Argument(fullName = "qual_epsilon", shortName = "epsilon", doc = "when |Quals_reduced_bam - Quals_original_bam| > epsilon*Quals_original_bam we output this interval", required = false) public int qual_epsilon = 0; - @Argument(fullName = "debugLevel", shortName = "debug", doc = "debug mode on") - public int debugLevel = 0; + @Argument(fullName = "debugLevel", shortName = "debug", doc = "debug mode on") // TODO -- best to make this optional + public int debugLevel = 0; // TODO -- best to make this an enum or boolean @Output protected PrintStream out; @@ -114,7 +114,11 @@ public class AssessReducedQuals extends LocusWalker implem return quals; } + // TODO -- arguments/variables should be final, not method declaration private final boolean isGoodRead(PileupElement p, List tags){ + // TODO -- this isn't quite right. You don't need the tags here. Instead, you want to check whether the read itself (which + // TODO -- you can get from the PileupElement) is a reduced read (not all reads from the reduced bam are reduced) and only + // TODO -- for them do you want to ignore that min mapping quality cutoff (but you *do* still want the min base cutoff). return !p.isDeletion() && (tags.contains(reduced) || (tags.contains(original) && (int)p.getQual() >= 20 && p.getMappingQual() >= 20)); } diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/SelectVariants.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/SelectVariants.java index c7b1d0fc7..d1b7cb96f 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/SelectVariants.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/SelectVariants.java @@ -459,7 +459,6 @@ public class SelectVariants extends RodWalker implements TreeR UAC.GLmodel = GenotypeLikelihoodsCalculationModel.Model.BOTH; UAC.OutputMode = UnifiedGenotyperEngine.OUTPUT_MODE.EMIT_ALL_SITES; UAC.GenotypingMode = GenotypeLikelihoodsCalculationModel.GENOTYPING_MODE.GENOTYPE_GIVEN_ALLELES; - UAC.NO_SLOD = true; UG_engine = new UnifiedGenotyperEngine(getToolkit(), UAC, logger, null, null, samples, VariantContextUtils.DEFAULT_PLOIDY); headerLines.addAll(UnifiedGenotyper.getHeaderInfo(UAC, null, null)); } diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/ValidateVariants.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/ValidateVariants.java index c92551a73..3e6ab050a 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/ValidateVariants.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/ValidateVariants.java @@ -46,16 +46,20 @@ import java.util.Set; /** - * Strictly validates a variants file. + * Validates a VCF file with an extra strict set of criteria. * *

* ValidateVariants is a GATK tool that takes a VCF file and validates much of the information inside it. - * Checks include the correctness of the reference base(s), accuracy of AC & AN values, tests against rsIDs - * when a dbSNP file is provided, and that all alternate alleles are present in at least one sample. + * In addition to standard adherence to the VCF specification, this tool performs extra checks to make ensure + * the information contained within the file is correct. Checks include the correctness of the reference base(s), + * accuracy of AC & AN values, tests against rsIDs when a dbSNP file is provided, and that all alternate alleles + * are present in at least one sample. + * + * If you are looking simply to test the adherence to the VCF specification, use --validationType NONE. * *

Input

*

- * A variant set to filter. + * A variant set to validate. *

* *

Examples

@@ -79,10 +83,9 @@ public class ValidateVariants extends RodWalker { protected DbsnpArgumentCollection dbsnp = new DbsnpArgumentCollection(); public enum ValidationType { - ALL, REF, IDS, ALLELES, CHR_COUNTS + ALL, REF, IDS, ALLELES, CHR_COUNTS, NONE } - @Hidden @Argument(fullName = "validationType", shortName = "type", doc = "which validation type to run", required = false) protected ValidationType type = ValidationType.ALL; @@ -172,7 +175,7 @@ public class ValidateVariants extends RodWalker { numErrors++; logger.warn("***** " + e.getMessage() + " *****"); } else { - throw new UserException.MalformedFile(file, e.getMessage()); + throw new UserException.FailsStrictValidation(file, e.getMessage()); } } } diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/VariantsToVCF.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/VariantsToVCF.java index 5f80f77a4..059e9c5fb 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/VariantsToVCF.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/VariantsToVCF.java @@ -160,7 +160,7 @@ public class VariantsToVCF extends RodWalker { Map alleleMap = new HashMap(2); alleleMap.put(RawHapMapFeature.DELETION, Allele.create(ref.getBase(), dbsnpVC.isSimpleInsertion())); - alleleMap.put(RawHapMapFeature.INSERTION, Allele.create(ref.getBase() + ((RawHapMapFeature)record).getAlleles()[1], !dbsnpVC.isSimpleInsertion())); + alleleMap.put(RawHapMapFeature.INSERTION, Allele.create((char)ref.getBase() + ((RawHapMapFeature)record).getAlleles()[1], !dbsnpVC.isSimpleInsertion())); hapmap.setActualAlleles(alleleMap); // also, use the correct positioning for insertions diff --git a/public/java/src/org/broadinstitute/sting/queue/extensions/gatk/ArgumentDefinitionField.java b/public/java/src/org/broadinstitute/sting/queue/extensions/gatk/ArgumentDefinitionField.java index 368cb3d11..2226c6458 100644 --- a/public/java/src/org/broadinstitute/sting/queue/extensions/gatk/ArgumentDefinitionField.java +++ b/public/java/src/org/broadinstitute/sting/queue/extensions/gatk/ArgumentDefinitionField.java @@ -346,7 +346,7 @@ public abstract class ArgumentDefinitionField extends ArgumentField { @Override protected String getFreezeFields() { - return String.format("if (num_threads.isDefined) nCoresRequest = num_threads%n"); + return String.format("if (num_threads.isDefined) nCoresRequest = num_threads%nif (num_cpu_threads_per_data_thread.isDefined) nCoresRequest = Some(nCoresRequest.getOrElse(1) * num_cpu_threads_per_data_thread.getOrElse(1))%n"); } } diff --git a/public/java/src/org/broadinstitute/sting/utils/AutoFormattingTime.java b/public/java/src/org/broadinstitute/sting/utils/AutoFormattingTime.java index 8964c16cb..4455666e8 100644 --- a/public/java/src/org/broadinstitute/sting/utils/AutoFormattingTime.java +++ b/public/java/src/org/broadinstitute/sting/utils/AutoFormattingTime.java @@ -4,12 +4,21 @@ package org.broadinstitute.sting.utils; * Simple utility class that makes it convenient to print unit adjusted times */ public class AutoFormattingTime { - double timeInSeconds; // in Seconds - int precision; // for format + private final int width; // for format + private final int precision; // for format - public AutoFormattingTime(double timeInSeconds, int precision) { + double timeInSeconds; // in Seconds + private final String formatString; + + public AutoFormattingTime(double timeInSeconds, final int width, int precision) { + this.width = width; this.timeInSeconds = timeInSeconds; this.precision = precision; + this.formatString = "%" + width + "." + precision + "f %s"; + } + + public AutoFormattingTime(double timeInSeconds, int precision) { + this(timeInSeconds, 6, precision); } public AutoFormattingTime(double timeInSeconds) { @@ -20,6 +29,20 @@ public class AutoFormattingTime { return timeInSeconds; } + /** + * @return the precision (a la format's %WIDTH.PERCISIONf) + */ + public int getWidth() { + return width; + } + + /** + * @return the precision (a la format's %WIDTH.PERCISIONf) + */ + public int getPrecision() { + return precision; + } + /** * Instead of 10000 s, returns 2.8 hours * @return @@ -48,6 +71,6 @@ public class AutoFormattingTime { } } - return String.format("%6."+precision+"f %s", unitTime, unit); + return String.format(formatString, unitTime, unit); } } diff --git a/public/java/src/org/broadinstitute/sting/utils/codecs/vcf/VCFHeader.java b/public/java/src/org/broadinstitute/sting/utils/codecs/vcf/VCFHeader.java index 2663e848f..44a3e9af3 100755 --- a/public/java/src/org/broadinstitute/sting/utils/codecs/vcf/VCFHeader.java +++ b/public/java/src/org/broadinstitute/sting/utils/codecs/vcf/VCFHeader.java @@ -159,6 +159,7 @@ public class VCFHeader { */ public void addMetaDataLine(VCFHeaderLine headerLine) { mMetaData.add(headerLine); + loadMetaDataMaps(); } /** @@ -236,7 +237,6 @@ public class VCFHeader { + VCFConstants.GENOTYPE_PL_KEY + " field. As the GATK now only manages PL fields internally" + " automatically adding a corresponding PL field to your VCF header"); addMetaDataLine(new VCFFormatHeaderLine(VCFConstants.GENOTYPE_PL_KEY, VCFHeaderLineCount.G, VCFHeaderLineType.Integer, "Normalized, Phred-scaled likelihoods for genotypes as defined in the VCF specification")); - loadMetaDataMaps(); } } diff --git a/public/java/src/org/broadinstitute/sting/utils/exceptions/UserException.java b/public/java/src/org/broadinstitute/sting/utils/exceptions/UserException.java index c1f408bc7..a49a12292 100755 --- a/public/java/src/org/broadinstitute/sting/utils/exceptions/UserException.java +++ b/public/java/src/org/broadinstitute/sting/utils/exceptions/UserException.java @@ -283,6 +283,12 @@ public class UserException extends ReviewedStingException { } } + public static class FailsStrictValidation extends UserException { + public FailsStrictValidation(File f, String message) { + super(String.format("File %s fails strict validation: %s", f.getAbsolutePath(), message)); + } + } + public static class MalformedFile extends UserException { public MalformedFile(String message) { super(String.format("Unknown file is malformed: %s", message)); diff --git a/public/java/src/org/broadinstitute/sting/utils/progressmeter/ProgressMeter.java b/public/java/src/org/broadinstitute/sting/utils/progressmeter/ProgressMeter.java index 69cf52fc2..a8715e242 100755 --- a/public/java/src/org/broadinstitute/sting/utils/progressmeter/ProgressMeter.java +++ b/public/java/src/org/broadinstitute/sting/utils/progressmeter/ProgressMeter.java @@ -24,6 +24,7 @@ package org.broadinstitute.sting.utils.progressmeter; +import com.google.java.contract.Ensures; import com.google.java.contract.Invariant; import org.apache.log4j.Logger; import org.broadinstitute.sting.utils.*; @@ -200,6 +201,14 @@ public class ProgressMeter { "Location", processingUnitName, processingUnitName)); } + /** + * @return the current runtime in nanoseconds + */ + @Ensures("result >= 0") + public long getRuntimeInNanoseconds() { + return timer.getElapsedTimeNano(); + } + /** * Utility routine that prints out process information (including timing) every N records or * every M seconds, for N and M set in global variables. diff --git a/public/java/src/org/broadinstitute/sting/utils/recalibration/BaseRecalibration.java b/public/java/src/org/broadinstitute/sting/utils/recalibration/BaseRecalibration.java index 2f9387acb..5d4020a07 100644 --- a/public/java/src/org/broadinstitute/sting/utils/recalibration/BaseRecalibration.java +++ b/public/java/src/org/broadinstitute/sting/utils/recalibration/BaseRecalibration.java @@ -61,17 +61,6 @@ public class BaseRecalibration { // qualityScoreByFullCovariateKey[i] = new NestedHashMap(); // } - /** - * Thread local cache to allow multi-threaded use of this class - */ - private ThreadLocal readCovariatesCache; - { - readCovariatesCache = new ThreadLocal () { - @Override protected ReadCovariates initialValue() { - return new ReadCovariates(MAXIMUM_RECALIBRATED_READ_LENGTH, requestedCovariates.length); - } - }; - } /** * Constructor using a GATK Report file @@ -113,16 +102,7 @@ public class BaseRecalibration { } } - // Compute all covariates for the read - // TODO -- the need to clear here suggests there's an error in the indexing / assumption code - // TODO -- for BI and DI. Perhaps due to the indel buffer size on the ends of the reads? - // TODO -- the output varies depending on whether we clear or not - //final ReadCovariates readCovariates = readCovariatesCache.get().clear(); - - // the original code -- doesn't do any clearing - final ReadCovariates readCovariates = readCovariatesCache.get(); - - RecalUtils.computeCovariates(read, requestedCovariates, readCovariates); + final ReadCovariates readCovariates = RecalUtils.computeCovariates(read, requestedCovariates); for (final EventType errorModel : EventType.values()) { // recalibrate all three quality strings if (disableIndelQuals && errorModel != EventType.BASE_SUBSTITUTION) { diff --git a/public/java/src/org/broadinstitute/sting/utils/variantcontext/VariantContext.java b/public/java/src/org/broadinstitute/sting/utils/variantcontext/VariantContext.java index e453e2f8a..b1aa4e759 100755 --- a/public/java/src/org/broadinstitute/sting/utils/variantcontext/VariantContext.java +++ b/public/java/src/org/broadinstitute/sting/utils/variantcontext/VariantContext.java @@ -1073,13 +1073,13 @@ public class VariantContext implements Feature { // to enable tribble integratio } if ( reportedAlleles.size() != observedAlleles.size() ) - throw new TribbleException.InternalCodecException(String.format("the ALT allele(s) for the record at position %s:%d do not match what is observed in the per-sample genotypes", getChr(), getStart())); + throw new TribbleException.InternalCodecException(String.format("one or more of the ALT allele(s) for the record at position %s:%d are not observed at all in the sample genotypes", getChr(), getStart())); int originalSize = reportedAlleles.size(); // take the intersection and see if things change observedAlleles.retainAll(reportedAlleles); if ( observedAlleles.size() != originalSize ) - throw new TribbleException.InternalCodecException(String.format("the ALT allele(s) for the record at position %s:%d do not match what is observed in the per-sample genotypes", getChr(), getStart())); + throw new TribbleException.InternalCodecException(String.format("one or more of the ALT allele(s) for the record at position %s:%d are not observed at all in the sample genotypes", getChr(), getStart())); } public void validateChromosomeCounts() { diff --git a/public/java/src/org/broadinstitute/sting/utils/variantcontext/VariantJEXLContext.java b/public/java/src/org/broadinstitute/sting/utils/variantcontext/VariantJEXLContext.java index 913615a84..abe85e383 100644 --- a/public/java/src/org/broadinstitute/sting/utils/variantcontext/VariantJEXLContext.java +++ b/public/java/src/org/broadinstitute/sting/utils/variantcontext/VariantJEXLContext.java @@ -193,6 +193,8 @@ class JEXLMap implements Map { infoMap.put("isHet", g.isHet() ? "1" : "0"); infoMap.put("isHomVar", g.isHomVar() ? "1" : "0"); infoMap.put(VCFConstants.GENOTYPE_QUALITY_KEY, g.getGQ()); + if ( g.hasDP() ) + infoMap.put(VCFConstants.DEPTH_KEY, g.getDP()); for ( Map.Entry e : g.getExtendedAttributes().entrySet() ) { if ( e.getValue() != null && !e.getValue().equals(VCFConstants.MISSING_VALUE_v4) ) infoMap.put(e.getKey(), e.getValue()); diff --git a/public/java/test/org/broadinstitute/sting/gatk/MaxRuntimeIntegrationTest.java b/public/java/test/org/broadinstitute/sting/gatk/MaxRuntimeIntegrationTest.java new file mode 100644 index 000000000..6cfd7bf46 --- /dev/null +++ b/public/java/test/org/broadinstitute/sting/gatk/MaxRuntimeIntegrationTest.java @@ -0,0 +1,86 @@ +/* + * Copyright (c) 2011, The Broad Institute + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, + * copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following + * conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES + * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT + * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, + * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + */ + +package org.broadinstitute.sting.gatk; + +import org.broadinstitute.sting.WalkerTest; +import org.broadinstitute.sting.utils.SimpleTimer; +import org.testng.Assert; +import org.testng.annotations.DataProvider; +import org.testng.annotations.Test; + +import java.util.Arrays; +import java.util.Collections; +import java.util.concurrent.TimeUnit; + +/** + * + */ +public class MaxRuntimeIntegrationTest extends WalkerTest { + private static final long STARTUP_TIME = TimeUnit.NANOSECONDS.convert(20, TimeUnit.SECONDS); + + private class MaxRuntimeTestProvider extends TestDataProvider { + final long maxRuntime; + final TimeUnit unit; + + public MaxRuntimeTestProvider(final long maxRuntime, final TimeUnit unit) { + super(MaxRuntimeTestProvider.class); + this.maxRuntime = maxRuntime; + this.unit = unit; + setName(String.format("Max runtime test : %d of %s", maxRuntime, unit)); + } + + public long expectedMaxRuntimeNano() { + return TimeUnit.NANOSECONDS.convert(maxRuntime, unit) + STARTUP_TIME; + } + } + + @DataProvider(name = "MaxRuntimeProvider") + public Object[][] makeMaxRuntimeProvider() { + for ( final TimeUnit requestedUnits : Arrays.asList(TimeUnit.NANOSECONDS, TimeUnit.MILLISECONDS, TimeUnit.SECONDS, TimeUnit.MINUTES) ) + new MaxRuntimeTestProvider(requestedUnits.convert(30, TimeUnit.SECONDS), requestedUnits); + + return MaxRuntimeTestProvider.getTests(MaxRuntimeTestProvider.class); + } + + // + // Loop over errors to throw, make sure they are the errors we get back from the engine, regardless of NT type + // + @Test(enabled = true, dataProvider = "MaxRuntimeProvider", timeOut = 60 * 1000) + public void testMaxRuntime(final MaxRuntimeTestProvider cfg) { + WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec( + "-T UnifiedGenotyper -R " + hg18Reference + + " -I " + validationDataLocation + "NA12878.WEx.downsampled20x.bam -o /dev/null" + + " -maxRuntime " + cfg.maxRuntime + " -maxRuntimeUnits " + cfg.unit, 0, + Collections.emptyList()); + final SimpleTimer timer = new SimpleTimer().start(); + executeTest("Max runtime " + cfg, spec); + final long actualRuntimeNano = timer.getElapsedTimeNano(); + + Assert.assertTrue(actualRuntimeNano < cfg.expectedMaxRuntimeNano(), + "Actual runtime " + TimeUnit.SECONDS.convert(actualRuntimeNano, TimeUnit.NANOSECONDS) + + " exceeded max. tolerated runtime " + TimeUnit.SECONDS.convert(cfg.expectedMaxRuntimeNano(), TimeUnit.NANOSECONDS) + + " given requested runtime " + cfg.maxRuntime + " " + cfg.unit); + } +} \ No newline at end of file diff --git a/public/java/test/org/broadinstitute/sting/gatk/walkers/filters/VariantFiltrationIntegrationTest.java b/public/java/test/org/broadinstitute/sting/gatk/walkers/filters/VariantFiltrationIntegrationTest.java index 97b985a29..27e5f3d46 100755 --- a/public/java/test/org/broadinstitute/sting/gatk/walkers/filters/VariantFiltrationIntegrationTest.java +++ b/public/java/test/org/broadinstitute/sting/gatk/walkers/filters/VariantFiltrationIntegrationTest.java @@ -108,4 +108,22 @@ public class VariantFiltrationIntegrationTest extends WalkerTest { Arrays.asList("8ed32a2272bab8043a255362335395ef")); executeTest("testUnfilteredBecomesFilteredAndPass", spec); } + + @Test + public void testFilteringDPfromINFO() { + WalkerTestSpec spec = new WalkerTestSpec( + "-T VariantFiltration -o %s --no_cmdline_in_header -R " + b37KGReference + + " --filterExpression 'DP < 8' --filterName lowDP -V " + privateTestDir + "filteringDepthInFormat.vcf", 1, + Arrays.asList("a01f7cce53ea556c9741aa60b6124c41")); + executeTest("testFilteringDPfromINFO", spec); + } + + @Test + public void testFilteringDPfromFORMAT() { + WalkerTestSpec spec = new WalkerTestSpec( + "-T VariantFiltration -o %s --no_cmdline_in_header -R " + b37KGReference + + " --genotypeFilterExpression 'DP < 8' --genotypeFilterName lowDP -V " + privateTestDir + "filteringDepthInFormat.vcf", 1, + Arrays.asList("e10485c7c33d9211d0c1294fd7858476")); + executeTest("testFilteringDPfromFORMAT", spec); + } } diff --git a/public/java/test/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyperIntegrationTest.java b/public/java/test/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyperIntegrationTest.java index 8eb1c4657..9212d0e53 100755 --- a/public/java/test/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyperIntegrationTest.java +++ b/public/java/test/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyperIntegrationTest.java @@ -16,9 +16,9 @@ import java.util.List; public class UnifiedGenotyperIntegrationTest extends WalkerTest { - private final static String baseCommand = "-T UnifiedGenotyper -R " + b36KGReference + " -nosl --no_cmdline_in_header -glm BOTH -minIndelFrac 0.0 --dbsnp " + b36dbSNP129; - private final static String baseCommandIndels = "-T UnifiedGenotyper -R " + b36KGReference + " -nosl --no_cmdline_in_header -glm INDEL -mbq 20 -minIndelFrac 0.0 --dbsnp " + b36dbSNP129; - private final static String baseCommandIndelsb37 = "-T UnifiedGenotyper -R " + b37KGReference + " -nosl --no_cmdline_in_header -glm INDEL -mbq 20 --dbsnp " + b37dbSNP132; + private final static String baseCommand = "-T UnifiedGenotyper -R " + b36KGReference + " --no_cmdline_in_header -glm BOTH -minIndelFrac 0.0 --dbsnp " + b36dbSNP129; + private final static String baseCommandIndels = "-T UnifiedGenotyper -R " + b36KGReference + " --no_cmdline_in_header -glm INDEL -mbq 20 -minIndelFrac 0.0 --dbsnp " + b36dbSNP129; + private final static String baseCommandIndelsb37 = "-T UnifiedGenotyper -R " + b37KGReference + " --no_cmdline_in_header -glm INDEL -mbq 20 --dbsnp " + b37dbSNP132; private final static String baseCommandNoCmdLineHeaderStdout = "-T UnifiedGenotyper -R " + b37KGReference + " --no_cmdline_in_header -I " + privateTestDir + "bamExample.ReducedRead.ADAnnotation.bam"; // -------------------------------------------------------------------------------------------------------------- @@ -30,7 +30,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest { public void testMultiSamplePilot1() { WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec( baseCommand + " -I " + validationDataLocation + "low_coverage_CEU.chr1.10k-11k.bam -o %s -L 1:10,022,000-10,025,000", 1, - Arrays.asList("847605f4efafef89529fe0e496315edd")); + Arrays.asList("cdec335abc9ad8e59335e39a73e0e95a")); executeTest("test MultiSample Pilot1", spec); } @@ -38,7 +38,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest { public void testWithAllelesPassedIn1() { WalkerTest.WalkerTestSpec spec1 = new WalkerTest.WalkerTestSpec( baseCommand + " --genotyping_mode GENOTYPE_GIVEN_ALLELES -alleles " + privateTestDir + "allelesForUG.vcf -I " + validationDataLocation + "pilot2_daughters.chr20.10k-11k.bam -o %s -L 20:10,000,000-10,025,000", 1, - Arrays.asList("bc15123620e1134f799005d71d1180fe")); + Arrays.asList("efddb5e258f97fd4f6661cff9eaa57de")); executeTest("test MultiSample Pilot2 with alleles passed in", spec1); } @@ -46,7 +46,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest { public void testWithAllelesPassedIn2() { WalkerTest.WalkerTestSpec spec2 = new WalkerTest.WalkerTestSpec( baseCommand + " --output_mode EMIT_ALL_SITES --genotyping_mode GENOTYPE_GIVEN_ALLELES -alleles " + privateTestDir + "allelesForUG.vcf -I " + validationDataLocation + "pilot2_daughters.chr20.10k-11k.bam -o %s -L 20:10,000,000-10,025,000", 1, - Arrays.asList("1ba7afccc8552f20d72d0b62237558e3")); + Arrays.asList("24532eb381724cd74e99370da28d49ed")); executeTest("test MultiSample Pilot2 with alleles passed in and emitting all sites", spec2); } @@ -54,22 +54,22 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest { public void testSingleSamplePilot2() { WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec( baseCommand + " -I " + validationDataLocation + "NA12878.1kg.p2.chr1_10mb_11_mb.SLX.bam -o %s -L 1:10,000,000-10,100,000", 1, - Arrays.asList("afb8768f31ab57eb43f75c1115eadc99")); + Arrays.asList("062a946160eec1d0fc135d58ca654ff4")); executeTest("test SingleSample Pilot2", spec); } @Test public void testMultipleSNPAlleles() { WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec( - "-T UnifiedGenotyper -R " + b37KGReference + " -nosl --no_cmdline_in_header -glm BOTH --dbsnp " + b37dbSNP129 + " -I " + privateTestDir + "multiallelic.snps.bam -o %s -L " + privateTestDir + "multiallelic.snps.intervals", 1, - Arrays.asList("543f68e42034bf44cfb24da8c9204320")); + "-T UnifiedGenotyper -R " + b37KGReference + " --no_cmdline_in_header -glm BOTH --dbsnp " + b37dbSNP129 + " -I " + privateTestDir + "multiallelic.snps.bam -o %s -L " + privateTestDir + "multiallelic.snps.intervals", 1, + Arrays.asList("94dc17d76d841f1d3a36160767ffa034")); executeTest("test Multiple SNP alleles", spec); } @Test public void testBadRead() { WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec( - "-T UnifiedGenotyper -R " + b37KGReference + " -nosl --no_cmdline_in_header -glm BOTH -I " + privateTestDir + "badRead.test.bam -o %s -L 1:22753424-22753464", 1, + "-T UnifiedGenotyper -R " + b37KGReference + " --no_cmdline_in_header -glm BOTH -I " + privateTestDir + "badRead.test.bam -o %s -L 1:22753424-22753464", 1, Arrays.asList("d915535c1458733f09f82670092fcab6")); executeTest("test bad read", spec); } @@ -77,16 +77,16 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest { @Test public void testReverseTrim() { WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec( - "-T UnifiedGenotyper -R " + b37KGReference + " -nosl --no_cmdline_in_header -glm INDEL -I " + validationDataLocation + "CEUTrio.HiSeq.b37.chr20.10_11mb.bam -o %s -L 20:10289124 -L 20:10090289", 1, - Arrays.asList("5ce03dd9ca2d9324c1d4a9d64389beb5")); + "-T UnifiedGenotyper -R " + b37KGReference + " --no_cmdline_in_header -glm INDEL -I " + validationDataLocation + "CEUTrio.HiSeq.b37.chr20.10_11mb.bam -o %s -L 20:10289124 -L 20:10090289", 1, + Arrays.asList("9106d01ca0d0a8fedd068e72d509f380")); executeTest("test reverse trim", spec); } @Test public void testMismatchedPLs() { WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec( - "-T UnifiedGenotyper -R " + b37KGReference + " -nosl --no_cmdline_in_header -glm INDEL -I " + privateTestDir + "mismatchedPLs.bam -o %s -L 1:24020341", 1, - Arrays.asList("3c006b06b17bbe8e787d64eff6a63a19")); + "-T UnifiedGenotyper -R " + b37KGReference + " --no_cmdline_in_header -glm INDEL -I " + privateTestDir + "mismatchedPLs.bam -o %s -L 1:24020341", 1, + Arrays.asList("d847acf841ba8ba653f996ce4869f439")); executeTest("test mismatched PLs", spec); } @@ -96,7 +96,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest { // // -------------------------------------------------------------------------------------------------------------- - private final static String COMPRESSED_OUTPUT_MD5 = "fd236bd635d514e4214d364f45ec4d10"; + private final static String COMPRESSED_OUTPUT_MD5 = "6792419c482e767a3deb28913ed2b1ad"; @Test public void testCompressedOutput() { @@ -120,21 +120,21 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest { String md5 = "d408b4661b820ed86272415b8ea08780"; WalkerTest.WalkerTestSpec spec1 = new WalkerTest.WalkerTestSpec( - baseCommand + " -dt NONE -G none -I " + validationDataLocation + "NA12878.1kg.p2.chr1_10mb_11_mb.SLX.bam -o %s -L 1:10,000,000-10,075,000", 1, + baseCommand + " -dt NONE -G none --contamination_fraction_to_filter 0.0 -I " + validationDataLocation + "NA12878.1kg.p2.chr1_10mb_11_mb.SLX.bam -o %s -L 1:10,000,000-10,075,000", 1, Arrays.asList(md5)); executeTest("test parallelization (single thread)", spec1); GenomeAnalysisEngine.resetRandomGenerator(); WalkerTest.WalkerTestSpec spec2 = new WalkerTest.WalkerTestSpec( - baseCommand + " -dt NONE -G none -I " + validationDataLocation + "NA12878.1kg.p2.chr1_10mb_11_mb.SLX.bam -o %s -L 1:10,000,000-10,075,000 -nt 2", 1, + baseCommand + " -dt NONE -G none --contamination_fraction_to_filter 0.0 -I " + validationDataLocation + "NA12878.1kg.p2.chr1_10mb_11_mb.SLX.bam -o %s -L 1:10,000,000-10,075,000 -nt 2", 1, Arrays.asList(md5)); executeTest("test parallelization (2 threads)", spec2); GenomeAnalysisEngine.resetRandomGenerator(); WalkerTest.WalkerTestSpec spec3 = new WalkerTest.WalkerTestSpec( - baseCommand + " -dt NONE -G none -I " + validationDataLocation + "NA12878.1kg.p2.chr1_10mb_11_mb.SLX.bam -o %s -L 1:10,000,000-10,075,000 -nt 4", 1, + baseCommand + " -dt NONE -G none --contamination_fraction_to_filter 0.0 -I " + validationDataLocation + "NA12878.1kg.p2.chr1_10mb_11_mb.SLX.bam -o %s -L 1:10,000,000-10,075,000 -nt 4", 1, Arrays.asList(md5)); executeTest("test parallelization (4 threads)", spec3); } @@ -149,15 +149,15 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest { public void testMinBaseQualityScore() { WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec( baseCommand + " -I " + validationDataLocation + "NA12878.1kg.p2.chr1_10mb_11_mb.SLX.bam -o %s -L 1:10,000,000-10,010,000 --min_base_quality_score 26", 1, - Arrays.asList("839ecd30d354a36b5dfa2b5e99859765")); + Arrays.asList("56157d930da6ccd224bce1ca93f11e41")); executeTest("test min_base_quality_score 26", spec); } @Test public void testSLOD() { WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec( - "-T UnifiedGenotyper -R " + b36KGReference + " --no_cmdline_in_header -glm BOTH --dbsnp " + b36dbSNP129 + " -I " + validationDataLocation + "NA12878.1kg.p2.chr1_10mb_11_mb.SLX.bam -o %s -L 1:10,000,000-10,010,000", 1, - Arrays.asList("c7429e670ba477bf9a6bbee2fb41c5a9")); + "-T UnifiedGenotyper -R " + b36KGReference + " --computeSLOD --no_cmdline_in_header -glm BOTH --dbsnp " + b36dbSNP129 + " -I " + validationDataLocation + "NA12878.1kg.p2.chr1_10mb_11_mb.SLX.bam -o %s -L 1:10,000,000-10,010,000", 1, + Arrays.asList("6ccb9bd88934e4272d0ce362dd35e603")); executeTest("test SLOD", spec); } @@ -165,7 +165,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest { public void testNDA() { WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec( baseCommand + " --annotateNDA -I " + validationDataLocation + "NA12878.1kg.p2.chr1_10mb_11_mb.SLX.bam -o %s -L 1:10,000,000-10,010,000", 1, - Arrays.asList("abd8e33e649cc11b55e200d3940cc7e2")); + Arrays.asList("480437dd6e2760f4ab3194431519f331")); executeTest("test NDA", spec); } @@ -173,7 +173,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest { public void testCompTrack() { WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec( "-T UnifiedGenotyper -R " + b36KGReference + " --no_cmdline_in_header -glm BOTH -comp:FOO " + b36dbSNP129 + " -I " + validationDataLocation + "NA12878.1kg.p2.chr1_10mb_11_mb.SLX.bam -o %s -L 1:10,000,000-10,010,000", 1, - Arrays.asList("8a9b424e00cdbe6b5e73d517335b2186")); + Arrays.asList("22c039412fd387dde6125b07c9a74a25")); executeTest("test using comp track", spec); } @@ -187,17 +187,17 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest { @Test public void testOutputParameterSitesOnly() { - testOutputParameters("-sites_only", "97ba874eafc9884a4de027a84c036311"); + testOutputParameters("-sites_only", "40aeb4c9e31fe7046b72afc58e7599cb"); } @Test public void testOutputParameterAllConfident() { - testOutputParameters("--output_mode EMIT_ALL_CONFIDENT_SITES", "f9ea04d96eeef29e71d37e60518c2579"); + testOutputParameters("--output_mode EMIT_ALL_CONFIDENT_SITES", "c706ca93b25ff83613cb4e95dcac567c"); } @Test public void testOutputParameterAllSites() { - testOutputParameters("--output_mode EMIT_ALL_SITES", "41c046d38ea328421df924e37e017645"); + testOutputParameters("--output_mode EMIT_ALL_SITES", "8a263fd0a94463ce1de9990f2b8ec841"); } private void testOutputParameters(final String args, final String md5) { @@ -211,18 +211,10 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest { public void testConfidence() { WalkerTest.WalkerTestSpec spec1 = new WalkerTest.WalkerTestSpec( baseCommand + " -I " + validationDataLocation + "NA12878.1kg.p2.chr1_10mb_11_mb.SLX.bam -o %s -L 1:10,000,000-10,010,000 -stand_call_conf 10 ", 1, - Arrays.asList("9addd225a985178339a0c49dc5fdc220")); + Arrays.asList("df524e98903d96ab9353bee7c16a69de")); executeTest("test confidence 1", spec1); } - @Test - public void testConfidence2() { - WalkerTest.WalkerTestSpec spec2 = new WalkerTest.WalkerTestSpec( - baseCommand + " -I " + validationDataLocation + "NA12878.1kg.p2.chr1_10mb_11_mb.SLX.bam -o %s -L 1:10,000,000-10,010,000 -stand_emit_conf 10 ", 1, - Arrays.asList("9addd225a985178339a0c49dc5fdc220")); - executeTest("test confidence 2", spec2); - } - // -------------------------------------------------------------------------------------------------------------- // // testing heterozygosity @@ -230,12 +222,12 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest { // -------------------------------------------------------------------------------------------------------------- @Test public void testHeterozyosity1() { - testHeterozosity( 0.01, "986923de51c71635d47e3d06fe3794a1" ); + testHeterozosity( 0.01, "8e61498ca03a8d805372a64c466b3b42" ); } @Test public void testHeterozyosity2() { - testHeterozosity( 1.0 / 1850, "fb12b1553f813004a394a391a8540873" ); + testHeterozosity( 1.0 / 1850, "668d06b5173cf3b97d052726988e1d7b" ); } private void testHeterozosity(final double arg, final String md5) { @@ -259,7 +251,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest { " -o %s" + " -L 1:10,000,000-10,100,000", 1, - Arrays.asList("04a87b87ee4323eba853c78f25551d1a")); + Arrays.asList("908eb5e21fa39e7fb377cf4a9c4c7835")); executeTest(String.format("test multiple technologies"), spec); } @@ -278,7 +270,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest { " -L 1:10,000,000-10,100,000" + " -baq CALCULATE_AS_NECESSARY", 1, - Arrays.asList("98058fc913b61c22d44875da1f5ea89c")); + Arrays.asList("c814558bb0ed2e19b12e1a2bf4465d52")); executeTest(String.format("test calling with BAQ"), spec); } @@ -297,7 +289,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest { " -o %s" + " -L 1:10,000,000-10,500,000", 1, - Arrays.asList("650c53774afacfc07a595675e8cdde17")); + Arrays.asList("3593495aab5f6204c65de0b073a6ff65")); executeTest(String.format("test indel caller in SLX"), spec); } @@ -312,7 +304,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest { " -minIndelCnt 1" + " -L 1:10,000,000-10,100,000", 1, - Arrays.asList("6a0c2a3a7bcc56ad01428c71408055aa")); + Arrays.asList("8b486a098029d5a106b0a37eff541c15")); executeTest(String.format("test indel caller in SLX with low min allele count"), spec); } @@ -325,7 +317,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest { " -o %s" + " -L 1:10,000,000-10,500,000", 1, - Arrays.asList("5f2721c3323de5390d2d47446139f32b")); + Arrays.asList("18efedc50cae2aacaba372265e38310b")); executeTest(String.format("test indel calling, multiple technologies"), spec); } @@ -335,7 +327,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest { WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec( baseCommandIndels + " --genotyping_mode GENOTYPE_GIVEN_ALLELES -alleles " + privateTestDir + "indelAllelesForUG.vcf -I " + validationDataLocation + "pilot2_daughters.chr20.10k-11k.bam -o %s -L 20:10,000,000-10,100,000", 1, - Arrays.asList("7e3f67bf371112be5dbadb4fe6faa52a")); + Arrays.asList("3ff8c7c80a518aa3eb8671a21479de5f")); executeTest("test MultiSample Pilot2 indels with alleles passed in", spec); } @@ -345,7 +337,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest { baseCommandIndels + " --output_mode EMIT_ALL_SITES --genotyping_mode GENOTYPE_GIVEN_ALLELES -alleles " + privateTestDir + "indelAllelesForUG.vcf -I " + validationDataLocation + "pilot2_daughters.chr20.10k-11k.bam -o %s -L 20:10,000,000-10,100,000", 1, - Arrays.asList("bc31c4977cb7e563ddf9c8dea27f3f4f")); + Arrays.asList("578c0540f4f2052a634a829bcb9cc27d")); executeTest("test MultiSample Pilot2 indels with alleles passed in and emitting all sites", spec); } @@ -353,13 +345,13 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest { public void testMultiSampleIndels1() { WalkerTest.WalkerTestSpec spec1 = new WalkerTest.WalkerTestSpec( baseCommandIndels + " -I " + validationDataLocation + "low_coverage_CEU.chr1.10k-11k.bam -o %s -L 1:10450700-10551000", 1, - Arrays.asList("a4761d7f25e7a62f34494801c98a0da7")); + Arrays.asList("f7d0d0aee603df25c1f0525bb8df189e")); List result = executeTest("test MultiSample Pilot1 CEU indels", spec1).getFirst(); WalkerTest.WalkerTestSpec spec2 = new WalkerTest.WalkerTestSpec( baseCommandIndels + " --genotyping_mode GENOTYPE_GIVEN_ALLELES -alleles " + result.get(0).getAbsolutePath() + " -I " + validationDataLocation + "low_coverage_CEU.chr1.10k-11k.bam -o %s -L 1:10450700-10551000", 1, - Arrays.asList("c526c234947482d1cd2ffc5102083a08")); + Arrays.asList("fc91d457a16b4ca994959c2b5f3f0352")); executeTest("test MultiSample Pilot1 CEU indels using GENOTYPE_GIVEN_ALLELES", spec2); } @@ -415,7 +407,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest { public void testMinIndelFraction0() { WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec( assessMinIndelFraction + " -minIndelFrac 0.0", 1, - Arrays.asList("ba4fafec383fb988f20c8cf53dd3e9a0")); + Arrays.asList("857b8e5df444463ac27f665c4f67fbe2")); executeTest("test minIndelFraction 0.0", spec); } @@ -423,7 +415,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest { public void testMinIndelFraction25() { WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec( assessMinIndelFraction + " -minIndelFrac 0.25", 1, - Arrays.asList("4c57a88de275105156aaafc6f9041365")); + Arrays.asList("81d4c7d9010fd6733b2997bc378e7471")); executeTest("test minIndelFraction 0.25", spec); } @@ -444,8 +436,8 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest { @Test public void testNsInCigar() { WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec( - "-T UnifiedGenotyper -R " + b37KGReference + " -nosl --no_cmdline_in_header -I " + validationDataLocation + "testWithNs.bam -o %s -L 8:141799600-141814700", 1, - Arrays.asList("e8ebfaac0804b782f22ab8ea35152735")); + "-T UnifiedGenotyper -R " + b37KGReference + " --no_cmdline_in_header -I " + validationDataLocation + "testWithNs.bam -o %s -L 8:141799600-141814700", 1, + Arrays.asList("bd7984a374f0ae5d277bd5fc5065f64f")); executeTest("test calling on reads with Ns in CIGAR", spec); } @@ -458,25 +450,25 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest { @Test public void testReducedBam() { WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec( - "-T UnifiedGenotyper -R " + b37KGReference + " -nosl --no_cmdline_in_header -I " + privateTestDir + "bamExample.ReducedRead.ADAnnotation.bam -o %s -L 1:67,225,396-67,288,518", 1, - Arrays.asList("da9c05f87bd6415e97f90c49cf68ed19")); + "-T UnifiedGenotyper -R " + b37KGReference + " --no_cmdline_in_header -I " + privateTestDir + "bamExample.ReducedRead.ADAnnotation.bam -o %s -L 1:67,225,396-67,288,518", 1, + Arrays.asList("9a7cd58b9e3d5b72608c0d529321deba")); executeTest("test calling on a ReducedRead BAM", spec); } @Test public void testReducedBamSNPs() { - testReducedCalling("SNP", "1d4a826b144723ff0766c36aa0239287"); + testReducedCalling("SNP", "e7fc11baf208a1bca7b462d3148c936e"); } @Test public void testReducedBamINDELs() { - testReducedCalling("INDEL", "68ef51d5c98480e0c0192e0eecb95bca"); + testReducedCalling("INDEL", "132a4e0ccf9230b5bb4b56c649e2bdd5"); } private void testReducedCalling(final String model, final String md5) { WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec( - "-T UnifiedGenotyper -R " + b37KGReference + " -nosl --no_cmdline_in_header -I " + privateTestDir + "NA12878.HiSeq.b37.chr20.10_11mb.reduced.bam -o %s -L 20:10,000,000-11,000,000 -glm " + model, 1, + "-T UnifiedGenotyper -R " + b37KGReference + " --no_cmdline_in_header -I " + privateTestDir + "NA12878.HiSeq.b37.chr20.10_11mb.reduced.bam -o %s -L 20:10,000,000-11,000,000 -glm " + model, 1, Arrays.asList(md5)); executeTest("test calling on a ReducedRead BAM with " + model, spec); } @@ -490,7 +482,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest { @Test public void testContaminationDownsampling() { WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec( - baseCommand + " -I " + validationDataLocation + "NA12878.1kg.p2.chr1_10mb_11_mb.SLX.bam -o %s -L 1:10,000,000-10,010,000 --contamination_percentage_to_filter 0.20", 1, + baseCommand + " -I " + validationDataLocation + "NA12878.1kg.p2.chr1_10mb_11_mb.SLX.bam -o %s -L 1:10,000,000-10,010,000 --contamination_fraction_to_filter 0.20", 1, Arrays.asList("27dd04159e06d9524fb8a4eef41f96ae")); executeTest("test contamination_percentage_to_filter 0.20", spec); } diff --git a/public/java/test/org/broadinstitute/sting/gatk/walkers/variantutils/ValidateVariantsIntegrationTest.java b/public/java/test/org/broadinstitute/sting/gatk/walkers/variantutils/ValidateVariantsIntegrationTest.java index 6a3d755d7..67d47997b 100755 --- a/public/java/test/org/broadinstitute/sting/gatk/walkers/variantutils/ValidateVariantsIntegrationTest.java +++ b/public/java/test/org/broadinstitute/sting/gatk/walkers/variantutils/ValidateVariantsIntegrationTest.java @@ -33,6 +33,8 @@ import java.util.Arrays; public class ValidateVariantsIntegrationTest extends WalkerTest { + protected static final String emptyMd5 = "d41d8cd98f00b204e9800998ecf8427e"; + public static String baseTestString(String file, String type) { return "-T ValidateVariants -R " + b36KGReference + " -L 1:10001292-10001303 --variant:vcf " + privateTestDir + file + " --validationType " + type; } @@ -42,7 +44,7 @@ public class ValidateVariantsIntegrationTest extends WalkerTest { WalkerTestSpec spec = new WalkerTestSpec( baseTestString("validationExampleGood.vcf", "ALL"), 0, - Arrays.asList("d41d8cd98f00b204e9800998ecf8427e") + Arrays.asList(emptyMd5) ); executeTest("test good file", spec); @@ -53,7 +55,7 @@ public class ValidateVariantsIntegrationTest extends WalkerTest { WalkerTestSpec spec = new WalkerTestSpec( baseTestString("validationExampleBad.vcf", "REF"), 0, - UserException.MalformedFile.class + UserException.FailsStrictValidation.class ); executeTest("test bad ref base #1", spec); @@ -64,7 +66,7 @@ public class ValidateVariantsIntegrationTest extends WalkerTest { WalkerTestSpec spec = new WalkerTestSpec( baseTestString("validationExampleBad2.vcf", "REF"), 0, - UserException.MalformedFile.class + UserException.FailsStrictValidation.class ); executeTest("test bad ref base #2", spec); @@ -75,7 +77,7 @@ public class ValidateVariantsIntegrationTest extends WalkerTest { WalkerTestSpec spec = new WalkerTestSpec( baseTestString("validationExampleBad.vcf", "CHR_COUNTS"), 0, - UserException.MalformedFile.class + UserException.FailsStrictValidation.class ); executeTest("test bad chr counts #1", spec); @@ -86,7 +88,7 @@ public class ValidateVariantsIntegrationTest extends WalkerTest { WalkerTestSpec spec = new WalkerTestSpec( baseTestString("validationExampleBad2.vcf", "CHR_COUNTS"), 0, - UserException.MalformedFile.class + UserException.FailsStrictValidation.class ); executeTest("test bad chr counts #2", spec); @@ -97,7 +99,7 @@ public class ValidateVariantsIntegrationTest extends WalkerTest { WalkerTestSpec spec = new WalkerTestSpec( baseTestString("validationExampleBad.vcf", "IDS") + " --dbsnp " + b36dbSNP129, 0, - UserException.MalformedFile.class + UserException.FailsStrictValidation.class ); executeTest("test bad RS ID", spec); @@ -108,7 +110,7 @@ public class ValidateVariantsIntegrationTest extends WalkerTest { WalkerTestSpec spec = new WalkerTestSpec( baseTestString("validationExampleBad.vcf", "ALLELES"), 0, - UserException.MalformedFile.class + UserException.FailsStrictValidation.class ); executeTest("test bad alt allele", spec); @@ -119,18 +121,29 @@ public class ValidateVariantsIntegrationTest extends WalkerTest { WalkerTestSpec spec = new WalkerTestSpec( baseTestString("validationExampleBad3.vcf", "REF"), 0, - UserException.MalformedFile.class + UserException.FailsStrictValidation.class ); executeTest("test bad ref allele in deletion", spec); } + @Test + public void testNoValidation() { + WalkerTestSpec spec = new WalkerTestSpec( + baseTestString("validationExampleBad.vcf", "NONE"), + 0, + Arrays.asList(emptyMd5) + ); + + executeTest("test no validation", spec); + } + @Test public void testComplexEvents() { WalkerTestSpec spec = new WalkerTestSpec( baseTestString("complexEvents.vcf", "ALL"), 0, - Arrays.asList("d41d8cd98f00b204e9800998ecf8427e") + Arrays.asList(emptyMd5) ); executeTest("test validating complex events", spec); diff --git a/public/java/test/org/broadinstitute/sting/utils/nanoScheduler/NanoSchedulerIntegrationTest.java b/public/java/test/org/broadinstitute/sting/utils/nanoScheduler/NanoSchedulerIntegrationTest.java index 37614f15f..80b0b4ee2 100755 --- a/public/java/test/org/broadinstitute/sting/utils/nanoScheduler/NanoSchedulerIntegrationTest.java +++ b/public/java/test/org/broadinstitute/sting/utils/nanoScheduler/NanoSchedulerIntegrationTest.java @@ -32,11 +32,12 @@ public class NanoSchedulerIntegrationTest extends WalkerTest { WalkerTestSpec spec = new WalkerTestSpec( buildCommandLine( "-T UnifiedGenotyper -R " + b37KGReference, - "-nosl --no_cmdline_in_header -G", + "--no_cmdline_in_header -G", //"--dbsnp " + b37dbSNP132, "-I " + privateTestDir + "NA12878.HiSeq.b37.chr20.10_11mb.bam", "-L 20:10,000,000-10,100,000", "-glm " + glm, + "--contamination_fraction_to_filter 0.0", "-nt " + nt, "-nct " + nct, "-o %s" diff --git a/public/scala/qscript/org/broadinstitute/sting/queue/qscripts/GATKResourcesBundle.scala b/public/scala/qscript/org/broadinstitute/sting/queue/qscripts/GATKResourcesBundle.scala index 9e8815762..951e380c3 100755 --- a/public/scala/qscript/org/broadinstitute/sting/queue/qscripts/GATKResourcesBundle.scala +++ b/public/scala/qscript/org/broadinstitute/sting/queue/qscripts/GATKResourcesBundle.scala @@ -141,8 +141,8 @@ class GATKResourcesBundle extends QScript { // CEU trio (NA12878,NA12891,NA12892) best practices results (including PBT) // - addResource(new Resource("/humgen/gsa-hpprojects/NA12878Collection/callsets/CEUtrio_BestPractices/current/CEUTrio.HiSeq.WGS.b37.UG.snps_and_indels.recalibrated.filtered.phaseByTransmission.vcf", - "CEUTrio.HiSeq.WGS.b37.UG.bestPractices.phaseByTransmission",b37,true,false)) + addResource(new Resource("/humgen/gsa-hpprojects/NA12878Collection/callsets/CEUtrio_BestPractices/CEUTrio.HiSeq.WGS.b37.snps_and_indels.recalibrated.filtered.phased.CURRENT.vcf", + "CEUTrio.HiSeq.WGS.b37.bestPractices.phased",b37,true,false)) // // example call set for wiki tutorial diff --git a/public/scala/test/org/broadinstitute/sting/queue/pipeline/DataProcessingPipelineTest.scala b/public/scala/test/org/broadinstitute/sting/queue/pipeline/DataProcessingPipelineTest.scala index 19f00ac62..944ef7977 100644 --- a/public/scala/test/org/broadinstitute/sting/queue/pipeline/DataProcessingPipelineTest.scala +++ b/public/scala/test/org/broadinstitute/sting/queue/pipeline/DataProcessingPipelineTest.scala @@ -60,7 +60,7 @@ class DataProcessingPipelineTest { " -bwa /home/unix/carneiro/bin/bwa", " -bwape ", " -p " + projectName).mkString - spec.fileMD5s += testOut -> "6e70efbe6bafc3fedd60bd406bd201db" + spec.fileMD5s += testOut -> "9fca827ecc8436465b831bb6f879357a" PipelineTest.executeTest(spec) } diff --git a/public/scala/test/org/broadinstitute/sting/queue/pipeline/PacbioProcessingPipelineTest.scala b/public/scala/test/org/broadinstitute/sting/queue/pipeline/PacbioProcessingPipelineTest.scala index 74e947377..3e9af3e68 100644 --- a/public/scala/test/org/broadinstitute/sting/queue/pipeline/PacbioProcessingPipelineTest.scala +++ b/public/scala/test/org/broadinstitute/sting/queue/pipeline/PacbioProcessingPipelineTest.scala @@ -40,7 +40,7 @@ class PacbioProcessingPipelineTest { " -blasr ", " -test ", " -D " + BaseTest.publicTestDir + "exampleDBSNP.vcf").mkString - spec.fileMD5s += testOut -> "61b06e8b78a93e6644657e6d38851084" + spec.fileMD5s += testOut -> "b84f9c45e045685067ded681d5e6224c" PipelineTest.executeTest(spec) } }