diff --git a/protected/java/src/org/broadinstitute/sting/gatk/downsampling/AlleleBiasedDownsamplingUtils.java b/protected/java/src/org/broadinstitute/sting/gatk/downsampling/AlleleBiasedDownsamplingUtils.java index e224a9c57..830fd111b 100755 --- a/protected/java/src/org/broadinstitute/sting/gatk/downsampling/AlleleBiasedDownsamplingUtils.java +++ b/protected/java/src/org/broadinstitute/sting/gatk/downsampling/AlleleBiasedDownsamplingUtils.java @@ -60,6 +60,10 @@ public class AlleleBiasedDownsamplingUtils { // start by stratifying the reads by the alleles they represent at this position for( final PileupElement pe : pileup ) { + // abort if we have a reduced read - we do not want to remove it! + if ( pe.getRead().isReducedRead() ) + return pileup; + final int baseIndex = BaseUtils.simpleBaseToBaseIndex(pe.getBase()); if ( baseIndex != -1 ) alleleStratifiedElements[baseIndex].add(pe); diff --git a/protected/java/src/org/broadinstitute/sting/utils/genotyper/AdvancedPerReadAlleleLikelihoodMap.java b/protected/java/src/org/broadinstitute/sting/utils/genotyper/AdvancedPerReadAlleleLikelihoodMap.java index d6cddcf84..77a7c3bd9 100644 --- a/protected/java/src/org/broadinstitute/sting/utils/genotyper/AdvancedPerReadAlleleLikelihoodMap.java +++ b/protected/java/src/org/broadinstitute/sting/utils/genotyper/AdvancedPerReadAlleleLikelihoodMap.java @@ -55,9 +55,12 @@ public class AdvancedPerReadAlleleLikelihoodMap extends StandardPerReadAlleleLik alleleReadMap.put(allele, new ArrayList()); for ( Map.Entry> entry : likelihoodReadMap.entrySet() ) { - final Allele bestAllele = getMostLikelyAllele(entry.getValue()); - if ( bestAllele != Allele.NO_CALL ) - alleleReadMap.get(bestAllele).add(entry.getKey()); + // do not remove reduced reads! + if ( !entry.getKey().isReducedRead() ) { + final Allele bestAllele = getMostLikelyAllele(entry.getValue()); + if ( bestAllele != Allele.NO_CALL ) + alleleReadMap.get(bestAllele).add(entry.getKey()); + } } // compute the reads to remove and actually remove them diff --git a/protected/java/test/org/broadinstitute/sting/gatk/walkers/bqsr/BQSRIntegrationTest.java b/protected/java/test/org/broadinstitute/sting/gatk/walkers/bqsr/BQSRIntegrationTest.java index 90d67d393..a50219f48 100644 --- a/protected/java/test/org/broadinstitute/sting/gatk/walkers/bqsr/BQSRIntegrationTest.java +++ b/protected/java/test/org/broadinstitute/sting/gatk/walkers/bqsr/BQSRIntegrationTest.java @@ -5,7 +5,9 @@ import org.broadinstitute.sting.utils.exceptions.UserException; import org.testng.annotations.DataProvider; import org.testng.annotations.Test; +import java.util.ArrayList; import java.util.Arrays; +import java.util.List; /** * @author ebanks @@ -118,20 +120,26 @@ public class BQSRIntegrationTest extends WalkerTest { @DataProvider(name = "PRTest") public Object[][] createPRTestData() { - return new Object[][]{ - {new PRTest("", "ab2f209ab98ad3432e208cbd524a4c4a")}, - {new PRTest(" -qq -1", "5226c06237b213b9e9b25a32ed92d09a")}, - {new PRTest(" -qq 6", "b592a5c62b952a012e18adb898ea9c33")}, - {new PRTest(" -DIQ", "8977bea0c57b808e65e9505eb648cdf7")} - }; + List tests = new ArrayList(); + + tests.add(new Object[]{1, new PRTest(" -qq -1", "a1d87da5dcbde35170d6ba6bc3ee2812")}); + tests.add(new Object[]{1, new PRTest(" -qq 6", "a0fecae6d0e5ab9917862fa306186d10")}); + tests.add(new Object[]{1, new PRTest(" -DIQ", "8977bea0c57b808e65e9505eb648cdf7")}); + + for ( final int nct : Arrays.asList(1, 2, 4) ) { + tests.add(new Object[]{nct, new PRTest("", "d1bbb4ce6aa93e866f106f8b11d888ed")}); + } + + return tests.toArray(new Object[][]{}); } @Test(dataProvider = "PRTest") - public void testPR(PRTest params) { + public void testPR(final int nct, PRTest params) { WalkerTestSpec spec = new WalkerTestSpec( "-T PrintReads" + " -R " + hg18Reference + " -I " + privateTestDir + "HiSeq.1mb.1RG.bam" + + " -nct " + nct + " -BQSR " + privateTestDir + "HiSeq.20mb.1RG.table" + params.args + " -o %s", diff --git a/protected/java/test/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyperGeneralPloidyIntegrationTest.java b/protected/java/test/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyperGeneralPloidyIntegrationTest.java index ae3befe66..73bc8fba6 100644 --- a/protected/java/test/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyperGeneralPloidyIntegrationTest.java +++ b/protected/java/test/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyperGeneralPloidyIntegrationTest.java @@ -55,36 +55,36 @@ public class UnifiedGenotyperGeneralPloidyIntegrationTest extends WalkerTest { @Test(enabled = true) public void testSNP_ACS_Pools() { - PC_LSV_Test_short(" -maxAltAlleles 1 -ploidy 6 -out_mode EMIT_ALL_CONFIDENT_SITES","LSV_SNP_ACS","SNP","ec19f0b7c7d57493cecfff988a4815c8"); + PC_LSV_Test_short(" -maxAltAlleles 1 -ploidy 6 -out_mode EMIT_ALL_CONFIDENT_SITES","LSV_SNP_ACS","SNP","df0e67c975ef74d593f1c704daab1705"); } @Test(enabled = true) public void testBOTH_GGA_Pools() { - PC_LSV_Test(String.format(" -maxAltAlleles 2 -ploidy 24 -gt_mode GENOTYPE_GIVEN_ALLELES -out_mode EMIT_ALL_SITES -alleles %s",LSV_ALLELES),"LSV_BOTH_GGA","BOTH","9ce24f4ff787aed9d3754519a60ef49f"); + PC_LSV_Test(String.format(" -maxAltAlleles 2 -ploidy 24 -gt_mode GENOTYPE_GIVEN_ALLELES -out_mode EMIT_ALL_SITES -alleles %s",LSV_ALLELES),"LSV_BOTH_GGA","BOTH","7e5b28c9e21cc7e45c58c41177d8a0fc"); } @Test(enabled = true) public void testINDEL_GGA_Pools() { - PC_LSV_Test(String.format(" -maxAltAlleles 1 -ploidy 24 -gt_mode GENOTYPE_GIVEN_ALLELES -out_mode EMIT_ALL_SITES -alleles %s",LSV_ALLELES),"LSV_INDEL_GGA","INDEL","492c8ba9a80a902097ff15bbeb031592"); + PC_LSV_Test(String.format(" -maxAltAlleles 1 -ploidy 24 -gt_mode GENOTYPE_GIVEN_ALLELES -out_mode EMIT_ALL_SITES -alleles %s",LSV_ALLELES),"LSV_INDEL_GGA","INDEL","ae6c276cc46785a794acff6f7d10ecf7"); } @Test(enabled = true) public void testINDEL_maxAltAlleles2_ploidy3_Pools_noRef() { - PC_LSV_Test_NoRef(" -maxAltAlleles 2 -ploidy 3","LSV_INDEL_DISC_NOREF_p3","INDEL","848e1092b5cd57b0da5f1187e67134e7"); + PC_LSV_Test_NoRef(" -maxAltAlleles 2 -ploidy 3","LSV_INDEL_DISC_NOREF_p3","INDEL","481452ad7d6378cffb5cd834cc621d55"); } @Test(enabled = true) public void testINDEL_maxAltAlleles2_ploidy1_Pools_noRef() { - PC_LSV_Test_NoRef(" -maxAltAlleles 2 -ploidy 1","LSV_INDEL_DISC_NOREF_p1","INDEL","51a7b51d82a341adec0e6510f5dfadd8"); + PC_LSV_Test_NoRef(" -maxAltAlleles 2 -ploidy 1","LSV_INDEL_DISC_NOREF_p1","INDEL","812957e51277aca9925c1a7bb4d9a118"); } @Test(enabled = true) public void testMT_SNP_DISCOVERY_sp4() { - PC_MT_Test(CEUTRIO_BAM, " -maxAltAlleles 1 -ploidy 8", "MT_SNP_DISCOVERY_sp4","0a8c3b06243040b743dd90d497bb3f83"); + PC_MT_Test(CEUTRIO_BAM, " -maxAltAlleles 1 -ploidy 8", "MT_SNP_DISCOVERY_sp4","dd568dc30be90135a3a8957a45a7321c"); } @Test(enabled = true) public void testMT_SNP_GGA_sp10() { - PC_MT_Test(CEUTRIO_BAM, String.format(" -maxAltAlleles 1 -ploidy 20 -gt_mode GENOTYPE_GIVEN_ALLELES -out_mode EMIT_ALL_SITES -alleles %s",NA12891_CALLS), "MT_SNP_GGA_sp10", "f8ea18ec6a717a77fdf8c5f2482d8d8d"); + PC_MT_Test(CEUTRIO_BAM, String.format(" -maxAltAlleles 1 -ploidy 20 -gt_mode GENOTYPE_GIVEN_ALLELES -out_mode EMIT_ALL_SITES -alleles %s",NA12891_CALLS), "MT_SNP_GGA_sp10", "bf793c43b635a931207170be8035b288"); } } diff --git a/protected/java/test/org/broadinstitute/sting/gatk/walkers/haplotypecaller/HaplotypeCallerIntegrationTest.java b/protected/java/test/org/broadinstitute/sting/gatk/walkers/haplotypecaller/HaplotypeCallerIntegrationTest.java index 71f03ea00..86f3748ce 100644 --- a/protected/java/test/org/broadinstitute/sting/gatk/walkers/haplotypecaller/HaplotypeCallerIntegrationTest.java +++ b/protected/java/test/org/broadinstitute/sting/gatk/walkers/haplotypecaller/HaplotypeCallerIntegrationTest.java @@ -21,17 +21,17 @@ public class HaplotypeCallerIntegrationTest extends WalkerTest { @Test public void testHaplotypeCallerMultiSample() { - HCTest(CEUTRIO_BAM, "", "1d826f852ec1457efbfa7ee828c5783a"); + HCTest(CEUTRIO_BAM, "", "aa1df35d6e64d7ca93feb4d2dd15dd0e"); } @Test public void testHaplotypeCallerSingleSample() { - HCTest(NA12878_BAM, "", "01367428c26d3eaf9297c58bf8677dd3"); + HCTest(NA12878_BAM, "", "186c7f322978283c01249c6de2829215"); } @Test public void testHaplotypeCallerMultiSampleGGA() { - HCTest(CEUTRIO_BAM, "--max_alternate_alleles 3 -gt_mode GENOTYPE_GIVEN_ALLELES -alleles " + validationDataLocation + "combined.phase1.chr20.raw.indels.sites.vcf", "53caa950535749f99d3c5b9bb61c7b60"); + HCTest(CEUTRIO_BAM, "--max_alternate_alleles 3 -gt_mode GENOTYPE_GIVEN_ALLELES -alleles " + validationDataLocation + "combined.phase1.chr20.raw.indels.sites.vcf", "de9e78a52207fe62144dba5337965469"); } private void HCTestComplexVariants(String bam, String args, String md5) { @@ -42,7 +42,7 @@ public class HaplotypeCallerIntegrationTest extends WalkerTest { @Test public void testHaplotypeCallerMultiSampleComplex() { - HCTestComplexVariants(privateTestDir + "AFR.complex.variants.bam", "", "966d338f423c86a390d685aa6336ec69"); + HCTestComplexVariants(privateTestDir + "AFR.complex.variants.bam", "", "000dbb1b48f94d017cfec127c6cabe8f"); } private void HCTestSymbolicVariants(String bam, String args, String md5) { @@ -53,7 +53,7 @@ public class HaplotypeCallerIntegrationTest extends WalkerTest { @Test public void testHaplotypeCallerSingleSampleSymbolic() { - HCTestSymbolicVariants(NA12878_CHR20_BAM, "", "b4ea70a446e4782bd3700ca14dd726ff"); + HCTestSymbolicVariants(NA12878_CHR20_BAM, "", "16013a9203367c3d1c4ce1dcdc81ef4a"); } private void HCTestIndelQualityScores(String bam, String args, String md5) { @@ -64,20 +64,20 @@ public class HaplotypeCallerIntegrationTest extends WalkerTest { @Test public void testHaplotypeCallerSingleSampleIndelQualityScores() { - HCTestIndelQualityScores(NA12878_RECALIBRATED_BAM, "", "2581e760279291a3901a506d060bfac8"); + HCTestIndelQualityScores(NA12878_RECALIBRATED_BAM, "", "b369c2a6cb5c99a424551b33bae16f3b"); } @Test public void HCTestProblematicReadsModifiedInActiveRegions() { final String base = String.format("-T HaplotypeCaller -R %s -I %s", REF, privateTestDir + "haplotype-problem-4.bam") + " --no_cmdline_in_header -o %s -minPruning 3"; - final WalkerTestSpec spec = new WalkerTestSpec(base, Arrays.asList("c54c0c9411054bf629bfd98b616e53fc")); + final WalkerTestSpec spec = new WalkerTestSpec(base, Arrays.asList("c306140ad28515ee06c603c225217939")); executeTest("HCTestProblematicReadsModifiedInActiveRegions: ", spec); } @Test public void HCTestStructuralIndels() { final String base = String.format("-T HaplotypeCaller -R %s -I %s", REF, privateTestDir + "AFR.structural.indels.bam") + " --no_cmdline_in_header -o %s -minPruning 6 -L 20:8187565-8187800 -L 20:18670537-18670730"; - final WalkerTestSpec spec = new WalkerTestSpec(base, Arrays.asList("c642dcd93771f6f084d55de31f180d1b")); + final WalkerTestSpec spec = new WalkerTestSpec(base, Arrays.asList("b6c67ee8e99cc8f53a6587bb26028047")); executeTest("HCTestStructuralIndels: ", spec); } @@ -91,7 +91,7 @@ public class HaplotypeCallerIntegrationTest extends WalkerTest { public void HCTestReducedBam() { WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec( "-T HaplotypeCaller -R " + b37KGReference + " --no_cmdline_in_header -I " + privateTestDir + "bamExample.ReducedRead.ADAnnotation.bam -o %s -L 1:67,225,396-67,288,518", 1, - Arrays.asList("79af83432dc4a1768b3ebffffc4d2b8f")); + Arrays.asList("4beb9f87ab3f316a9384c3d0dca6ebe9")); executeTest("HC calling on a ReducedRead BAM", spec); } } diff --git a/public/java/src/org/broadinstitute/sting/gatk/arguments/StandardCallerArgumentCollection.java b/public/java/src/org/broadinstitute/sting/gatk/arguments/StandardCallerArgumentCollection.java index 2f6d9d16f..547f375bb 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/arguments/StandardCallerArgumentCollection.java +++ b/public/java/src/org/broadinstitute/sting/gatk/arguments/StandardCallerArgumentCollection.java @@ -75,8 +75,9 @@ public class StandardCallerArgumentCollection { * Basically, it will ignore the contamination fraction of reads for each alternate allele. So if the pileup contains N total bases, then we * will try to remove (N * contamination fraction) bases for each alternate allele. */ - @Argument(fullName = "contamination_percentage_to_filter", shortName = "contamination", doc = "Fraction of contamination in sequencing data (for all samples) to aggressively remove", required = false) - public double CONTAMINATION_FRACTION = 0.0; + @Argument(fullName = "contamination_fraction_to_filter", shortName = "contamination", doc = "Fraction of contamination in sequencing data (for all samples) to aggressively remove", required = false) + public double CONTAMINATION_FRACTION = DEFAULT_CONTAMINATION_FRACTION; + public static final double DEFAULT_CONTAMINATION_FRACTION = 0.05; @Hidden @Argument(fullName = "logRemovedReadsFromContaminationFiltering", shortName="contaminationLog", required=false) diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedArgumentCollection.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedArgumentCollection.java index e50f25fe8..5f6ddf0f1 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedArgumentCollection.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedArgumentCollection.java @@ -47,8 +47,8 @@ public class UnifiedArgumentCollection extends StandardCallerArgumentCollection /** * Note that calculating the SLOD increases the runtime by an appreciable amount. */ - @Argument(fullName = "noSLOD", shortName = "nosl", doc = "If provided, we will not calculate the SLOD", required = false) - public boolean NO_SLOD = false; + @Argument(fullName = "computeSLOD", shortName = "slod", doc = "If provided, we will calculate the SLOD (SB annotation)", required = false) + public boolean COMPUTE_SLOD = false; /** * Depending on the value of the --max_alternate_alleles argument, we may genotype only a fraction of the alleles being sent on for genotyping. @@ -204,7 +204,7 @@ public class UnifiedArgumentCollection extends StandardCallerArgumentCollection this.GLmodel = uac.GLmodel; this.AFmodel = uac.AFmodel; this.PCR_error = uac.PCR_error; - this.NO_SLOD = uac.NO_SLOD; + this.COMPUTE_SLOD = uac.COMPUTE_SLOD; this.ANNOTATE_NUMBER_OF_ALLELES_DISCOVERED = uac.ANNOTATE_NUMBER_OF_ALLELES_DISCOVERED; this.MIN_BASE_QUALTY_SCORE = uac.MIN_BASE_QUALTY_SCORE; this.MAX_DELETION_FRACTION = uac.MAX_DELETION_FRACTION; diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyper.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyper.java index a17c2aa1a..36be2e7c6 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyper.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyper.java @@ -28,6 +28,7 @@ package org.broadinstitute.sting.gatk.walkers.genotyper; import org.broadinstitute.sting.commandline.*; import org.broadinstitute.sting.gatk.CommandLineGATK; import org.broadinstitute.sting.gatk.arguments.DbsnpArgumentCollection; +import org.broadinstitute.sting.gatk.arguments.StandardCallerArgumentCollection; import org.broadinstitute.sting.gatk.contexts.AlignmentContext; import org.broadinstitute.sting.gatk.contexts.ReferenceContext; import org.broadinstitute.sting.gatk.downsampling.DownsampleType; @@ -238,7 +239,12 @@ public class UnifiedGenotyper extends LocusWalker, Unif } if ( UAC.CONTAMINATION_FRACTION > 0.0 ) { - throw new UserException.NotSupportedInGATKLite("you cannot enable usage of contamination down-sampling"); + if ( UAC.CONTAMINATION_FRACTION == StandardCallerArgumentCollection.DEFAULT_CONTAMINATION_FRACTION ) { + UAC.CONTAMINATION_FRACTION = 0.0; + logger.warn("setting contamination down-sampling fraction to 0.0 because it is not enabled in GATK-lite"); + } else { + throw new UserException.NotSupportedInGATKLite("you cannot enable usage of contamination down-sampling"); + } } } @@ -290,7 +296,7 @@ public class UnifiedGenotyper extends LocusWalker, Unif headerInfo.addAll(annotationEngine.getVCFAnnotationDescriptions()); // annotation (INFO) fields from UnifiedGenotyper - if ( !UAC.NO_SLOD ) + if ( UAC.COMPUTE_SLOD ) VCFStandardHeaderLines.addStandardInfoLines(headerInfo, true, VCFConstants.STRAND_BIAS_KEY); if ( UAC.ANNOTATE_NUMBER_OF_ALLELES_DISCOVERED ) diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyperEngine.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyperEngine.java index 3c4a97ec1..97254c478 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyperEngine.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyperEngine.java @@ -451,7 +451,7 @@ public class UnifiedGenotyperEngine { attributes.put(VCFConstants.MLE_ALLELE_FREQUENCY_KEY, MLEfrequencies); } - if ( !UAC.NO_SLOD && !limitedContext && !bestGuessIsRef ) { + if ( UAC.COMPUTE_SLOD && !limitedContext && !bestGuessIsRef ) { //final boolean DEBUG_SLOD = false; // the overall lod diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/qc/AssessReducedQuals.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/qc/AssessReducedQuals.java new file mode 100644 index 000000000..e74684c53 --- /dev/null +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/qc/AssessReducedQuals.java @@ -0,0 +1,169 @@ +package org.broadinstitute.sting.gatk.walkers.qc; + +import org.broadinstitute.sting.commandline.Argument; +import org.broadinstitute.sting.commandline.Output; +import org.broadinstitute.sting.gatk.contexts.AlignmentContext; +import org.broadinstitute.sting.gatk.contexts.ReferenceContext; +import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; +import org.broadinstitute.sting.gatk.walkers.LocusWalker; +import org.broadinstitute.sting.gatk.walkers.TreeReducible; +import org.broadinstitute.sting.utils.GenomeLoc; +import org.broadinstitute.sting.utils.MathUtils; +import org.broadinstitute.sting.utils.pileup.PileupElement; +import org.broadinstitute.sting.utils.pileup.ReadBackedPileup; + +import java.io.PrintStream; +import java.util.List; + +/** + * Emits intervals in which the differences between the original and reduced bam quals are bigger epsilon (unless the quals of + * the reduced bam are above sufficient threshold) + * + *

Input

+ *

+ * The original and reduced BAM files. + *

+ * + *

Output

+ *

+ * A list of intervals in which the differences between the original and reduced bam quals are bigger epsilon. + *

+ * + *

Examples

+ *
+ * java -Xmx2g -jar GenomeAnalysisTK.jar \
+ *   -I:original original.bam \
+ *   -I:reduced reduced.bam \
+ *   -R ref.fasta \
+ *   -T AssessReducedQuals \
+ *   -o output.intervals
+ * 
+ * + * @author ami + */ + +public class AssessReducedQuals extends LocusWalker implements TreeReducible { + + private static final String reduced = "reduced"; + private static final String original = "original"; + private static final int originalQualsIndex = 0; + private static final int reducedQualsIndex = 1; + + @Argument(fullName = "sufficientQualSum", shortName = "sufficientQualSum", doc = "When a reduced bam qual sum is above this threshold, it passes even without comparing to the non-reduced bam ", required = false) + public int sufficientQualSum = 600; + + @Argument(fullName = "qual_epsilon", shortName = "epsilon", doc = "when |Quals_reduced_bam - Quals_original_bam| > epsilon*Quals_original_bam we output this interval", required = false) + public int qual_epsilon = 0; + + @Argument(fullName = "debugLevel", shortName = "debug", doc = "debug mode on") + public int debugLevel = 0; + + @Output + protected PrintStream out; + + public void initialize() { + if (debugLevel != 0) + out.println(" Debug mode" + + "Debug:\tsufficientQualSum: "+sufficientQualSum+ "\n " + + "Debug:\tqual_epsilon: "+qual_epsilon); + } + + @Override + public boolean includeReadsWithDeletionAtLoci() { return true; } + + @Override + public GenomeLoc map(RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context) { + if ( tracker == null ) + return null; + + boolean reportLocus; + final int[] quals = getPileupQuals(context.getBasePileup()); + if (debugLevel != 0) + out.println("Debug:\tLocus Quals\t"+ref.getLocus()+"\toriginal\t"+quals[originalQualsIndex]+"\treduced\t"+quals[reducedQualsIndex]); + final int epsilon = MathUtils.fastRound(quals[originalQualsIndex]*qual_epsilon); + final int calcOriginalQuals = Math.min(quals[originalQualsIndex],sufficientQualSum); + final int calcReducedQuals = Math.min(quals[reducedQualsIndex],sufficientQualSum); + final int OriginalReducedQualDiff = calcOriginalQuals - calcReducedQuals; + reportLocus = OriginalReducedQualDiff > epsilon || OriginalReducedQualDiff < -1*epsilon; + if(debugLevel != 0 && reportLocus) + out.println("Debug:\tEmited Locus\t"+ref.getLocus()+"\toriginal\t"+quals[originalQualsIndex]+"\treduced\t"+quals[reducedQualsIndex]+"\tepsilon\t"+epsilon+"\tdiff\t"+OriginalReducedQualDiff); + + return reportLocus ? ref.getLocus() : null; + } + + private final int[] getPileupQuals(final ReadBackedPileup readPileup) { + + final int[] quals = new int[2]; + String[] printPileup = {"Debug 2:\toriginal pileup:\t"+readPileup.getLocation()+"\nDebug 2:----------------------------------\n", + "Debug 2:\t reduced pileup:\t"+readPileup.getLocation()+"\nDebug 2:----------------------------------\n"}; + + for( PileupElement p : readPileup ){ + final List tags = getToolkit().getReaderIDForRead(p.getRead()).getTags().getPositionalTags(); + if ( isGoodRead(p,tags) ){ + final int tempQual = (int)(p.getQual()) * p.getRepresentativeCount(); + final int tagIndex = getTagIndex(tags); + quals[tagIndex] += tempQual; + if(debugLevel == 2) + printPileup[tagIndex] += "\tDebug 2: ("+p+")\tMQ="+p.getMappingQual()+":QU="+p.getQual()+":RC="+p.getRepresentativeCount()+":OS="+p.getOffset()+"\n"; + } + } + if(debugLevel == 2){ + out.println(printPileup[originalQualsIndex]); + out.println(printPileup[reducedQualsIndex]); + } + return quals; + } + + private final boolean isGoodRead(PileupElement p, List tags){ + return !p.isDeletion() && (tags.contains(reduced) || (tags.contains(original) && (int)p.getQual() >= 20 && p.getMappingQual() >= 20)); + } + + private final int getTagIndex(List tags){ + return tags.contains(reduced) ? 1 : 0; + } + + @Override + public void onTraversalDone(GenomeLoc sum) { + if ( sum != null ) + out.println(sum); + } + + @Override + public GenomeLoc treeReduce(GenomeLoc lhs, GenomeLoc rhs) { + if ( lhs == null ) + return rhs; + + if ( rhs == null ) + return lhs; + + // if contiguous, just merge them + if ( lhs.contiguousP(rhs) ) + return getToolkit().getGenomeLocParser().createGenomeLoc(lhs.getContig(), lhs.getStart(), rhs.getStop()); + + // otherwise, print the lhs and start over with the rhs + out.println(lhs); + return rhs; + } + + @Override + public GenomeLoc reduceInit() { + return null; + } + + @Override + public GenomeLoc reduce(GenomeLoc value, GenomeLoc sum) { + if ( value == null ) + return sum; + + if ( sum == null ) + return value; + + // if contiguous, just merge them + if ( sum.contiguousP(value) ) + return getToolkit().getGenomeLocParser().createGenomeLoc(sum.getContig(), sum.getStart(), value.getStop()); + + // otherwise, print the sum and start over with the value + out.println(sum); + return value; + } +} diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/SelectVariants.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/SelectVariants.java index c7b1d0fc7..d1b7cb96f 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/SelectVariants.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/SelectVariants.java @@ -459,7 +459,6 @@ public class SelectVariants extends RodWalker implements TreeR UAC.GLmodel = GenotypeLikelihoodsCalculationModel.Model.BOTH; UAC.OutputMode = UnifiedGenotyperEngine.OUTPUT_MODE.EMIT_ALL_SITES; UAC.GenotypingMode = GenotypeLikelihoodsCalculationModel.GENOTYPING_MODE.GENOTYPE_GIVEN_ALLELES; - UAC.NO_SLOD = true; UG_engine = new UnifiedGenotyperEngine(getToolkit(), UAC, logger, null, null, samples, VariantContextUtils.DEFAULT_PLOIDY); headerLines.addAll(UnifiedGenotyper.getHeaderInfo(UAC, null, null)); } diff --git a/public/java/src/org/broadinstitute/sting/queue/extensions/gatk/ArgumentDefinitionField.java b/public/java/src/org/broadinstitute/sting/queue/extensions/gatk/ArgumentDefinitionField.java index 368cb3d11..2226c6458 100644 --- a/public/java/src/org/broadinstitute/sting/queue/extensions/gatk/ArgumentDefinitionField.java +++ b/public/java/src/org/broadinstitute/sting/queue/extensions/gatk/ArgumentDefinitionField.java @@ -346,7 +346,7 @@ public abstract class ArgumentDefinitionField extends ArgumentField { @Override protected String getFreezeFields() { - return String.format("if (num_threads.isDefined) nCoresRequest = num_threads%n"); + return String.format("if (num_threads.isDefined) nCoresRequest = num_threads%nif (num_cpu_threads_per_data_thread.isDefined) nCoresRequest = Some(nCoresRequest.getOrElse(1) * num_cpu_threads_per_data_thread.getOrElse(1))%n"); } } diff --git a/public/java/src/org/broadinstitute/sting/utils/collections/NestedIntegerArray.java b/public/java/src/org/broadinstitute/sting/utils/collections/NestedIntegerArray.java index c83de8ec7..050ed52ac 100755 --- a/public/java/src/org/broadinstitute/sting/utils/collections/NestedIntegerArray.java +++ b/public/java/src/org/broadinstitute/sting/utils/collections/NestedIntegerArray.java @@ -25,9 +25,11 @@ package org.broadinstitute.sting.utils.collections; +import org.apache.log4j.Logger; import org.broadinstitute.sting.utils.exceptions.ReviewedStingException; import java.util.ArrayList; +import java.util.Arrays; import java.util.List; /** @@ -38,39 +40,50 @@ import java.util.List; public class NestedIntegerArray { + private static Logger logger = Logger.getLogger(NestedIntegerArray.class); + protected final Object[] data; protected final int numDimensions; protected final int[] dimensions; + // Preallocate the first two dimensions to limit contention during tree traversals in put() + private static final int NUM_DIMENSIONS_TO_PREALLOCATE = 2; + public NestedIntegerArray(final int... dimensions) { numDimensions = dimensions.length; if ( numDimensions == 0 ) throw new ReviewedStingException("There must be at least one dimension to an NestedIntegerArray"); this.dimensions = dimensions.clone(); + int dimensionsToPreallocate = Math.min(dimensions.length, NUM_DIMENSIONS_TO_PREALLOCATE); + + logger.info(String.format("Creating NestedIntegerArray with dimensions %s", Arrays.toString(dimensions))); + logger.info(String.format("Pre-allocating first %d dimensions", dimensionsToPreallocate)); + data = new Object[dimensions[0]]; - prepopulateArray(data, 0); + preallocateArray(data, 0, dimensionsToPreallocate); + + logger.info(String.format("Done pre-allocating first %d dimensions", dimensionsToPreallocate)); } /** - * Recursively allocate the entire tree of arrays in all its dimensions. + * Recursively allocate the first dimensionsToPreallocate dimensions of the tree * - * Doing this upfront uses more memory initially, but saves time over the course of the run - * and (crucially) avoids having to make threads wait while traversing the tree to check - * whether branches exist or not. + * Pre-allocating the first few dimensions helps limit contention during tree traversals in put() * * @param subarray current node in the tree * @param dimension current level in the tree + * @param dimensionsToPreallocate preallocate only this many dimensions (starting from the first) */ - private void prepopulateArray( Object[] subarray, int dimension ) { - if ( dimension >= numDimensions - 1 ) { + private void preallocateArray( Object[] subarray, int dimension, int dimensionsToPreallocate ) { + if ( dimension >= dimensionsToPreallocate - 1 ) { return; } for ( int i = 0; i < subarray.length; i++ ) { subarray[i] = new Object[dimensions[dimension + 1]]; - prepopulateArray((Object[])subarray[i], dimension + 1); + preallocateArray((Object[])subarray[i], dimension + 1, dimensionsToPreallocate); } } @@ -82,8 +95,9 @@ public class NestedIntegerArray { if ( keys[i] >= dimensions[i] ) return null; - myData = (Object[])myData[keys[i]]; // interior nodes in the tree will never be null, so we can safely traverse - // down to the leaves + myData = (Object[])myData[keys[i]]; + if ( myData == null ) + return null; } return (T)myData[keys[numNestedDimensions]]; @@ -92,8 +106,8 @@ public class NestedIntegerArray { /** * Insert a value at the position specified by the given keys. * - * This method is THREAD-SAFE despite not being synchronized, however the caller MUST - * check the return value to see if the put succeeded. This method RETURNS FALSE if + * This method is thread-safe, however the caller MUST check the + * return value to see if the put succeeded. This method RETURNS FALSE if * the value could not be inserted because there already was a value present * at the specified location. In this case the caller should do a get() to get * the already-existing value and (potentially) update it. @@ -113,8 +127,17 @@ public class NestedIntegerArray { if ( keys[i] >= dimensions[i] ) throw new ReviewedStingException("Key " + keys[i] + " is too large for dimension " + i + " (max is " + (dimensions[i]-1) + ")"); - myData = (Object[])myData[keys[i]]; // interior nodes in the tree will never be null, so we can safely traverse - // down to the leaves + // If we're at or beyond the last dimension that was pre-allocated, we need to do a synchronized + // check to see if the next branch exists, and if it doesn't, create it + if ( i >= NUM_DIMENSIONS_TO_PREALLOCATE - 1 ) { + synchronized ( myData ) { + if ( myData[keys[i]] == null ) { + myData[keys[i]] = new Object[dimensions[i + 1]]; + } + } + } + + myData = (Object[])myData[keys[i]]; } synchronized ( myData ) { // lock the bottom row while we examine and (potentially) update it diff --git a/public/java/src/org/broadinstitute/sting/utils/recalibration/BaseRecalibration.java b/public/java/src/org/broadinstitute/sting/utils/recalibration/BaseRecalibration.java index 7ad9302a8..5d4020a07 100644 --- a/public/java/src/org/broadinstitute/sting/utils/recalibration/BaseRecalibration.java +++ b/public/java/src/org/broadinstitute/sting/utils/recalibration/BaseRecalibration.java @@ -61,17 +61,6 @@ public class BaseRecalibration { // qualityScoreByFullCovariateKey[i] = new NestedHashMap(); // } - /** - * Thread local cache to allow multi-threaded use of this class - */ - private ThreadLocal readCovariatesCache; - { - readCovariatesCache = new ThreadLocal () { - @Override protected ReadCovariates initialValue() { - return new ReadCovariates(MAXIMUM_RECALIBRATED_READ_LENGTH, requestedCovariates.length); - } - }; - } /** * Constructor using a GATK Report file @@ -113,13 +102,7 @@ public class BaseRecalibration { } } - // Compute all covariates for the read - // TODO -- the need to clear here suggests there's an error in the indexing / assumption code - // TODO -- for BI and DI. Perhaps due to the indel buffer size on the ends of the reads? - // TODO -- the output varies with -nt 1 and -nt 2 if you don't call clear here - // TODO -- needs to be fixed. - final ReadCovariates readCovariates = readCovariatesCache.get().clear(); - RecalUtils.computeCovariates(read, requestedCovariates, readCovariates); + final ReadCovariates readCovariates = RecalUtils.computeCovariates(read, requestedCovariates); for (final EventType errorModel : EventType.values()) { // recalibrate all three quality strings if (disableIndelQuals && errorModel != EventType.BASE_SUBSTITUTION) { diff --git a/public/java/src/org/broadinstitute/sting/utils/variantcontext/VariantJEXLContext.java b/public/java/src/org/broadinstitute/sting/utils/variantcontext/VariantJEXLContext.java index 913615a84..abe85e383 100644 --- a/public/java/src/org/broadinstitute/sting/utils/variantcontext/VariantJEXLContext.java +++ b/public/java/src/org/broadinstitute/sting/utils/variantcontext/VariantJEXLContext.java @@ -193,6 +193,8 @@ class JEXLMap implements Map { infoMap.put("isHet", g.isHet() ? "1" : "0"); infoMap.put("isHomVar", g.isHomVar() ? "1" : "0"); infoMap.put(VCFConstants.GENOTYPE_QUALITY_KEY, g.getGQ()); + if ( g.hasDP() ) + infoMap.put(VCFConstants.DEPTH_KEY, g.getDP()); for ( Map.Entry e : g.getExtendedAttributes().entrySet() ) { if ( e.getValue() != null && !e.getValue().equals(VCFConstants.MISSING_VALUE_v4) ) infoMap.put(e.getKey(), e.getValue()); diff --git a/public/java/test/org/broadinstitute/sting/gatk/walkers/filters/VariantFiltrationIntegrationTest.java b/public/java/test/org/broadinstitute/sting/gatk/walkers/filters/VariantFiltrationIntegrationTest.java index 97b985a29..27e5f3d46 100755 --- a/public/java/test/org/broadinstitute/sting/gatk/walkers/filters/VariantFiltrationIntegrationTest.java +++ b/public/java/test/org/broadinstitute/sting/gatk/walkers/filters/VariantFiltrationIntegrationTest.java @@ -108,4 +108,22 @@ public class VariantFiltrationIntegrationTest extends WalkerTest { Arrays.asList("8ed32a2272bab8043a255362335395ef")); executeTest("testUnfilteredBecomesFilteredAndPass", spec); } + + @Test + public void testFilteringDPfromINFO() { + WalkerTestSpec spec = new WalkerTestSpec( + "-T VariantFiltration -o %s --no_cmdline_in_header -R " + b37KGReference + + " --filterExpression 'DP < 8' --filterName lowDP -V " + privateTestDir + "filteringDepthInFormat.vcf", 1, + Arrays.asList("a01f7cce53ea556c9741aa60b6124c41")); + executeTest("testFilteringDPfromINFO", spec); + } + + @Test + public void testFilteringDPfromFORMAT() { + WalkerTestSpec spec = new WalkerTestSpec( + "-T VariantFiltration -o %s --no_cmdline_in_header -R " + b37KGReference + + " --genotypeFilterExpression 'DP < 8' --genotypeFilterName lowDP -V " + privateTestDir + "filteringDepthInFormat.vcf", 1, + Arrays.asList("e10485c7c33d9211d0c1294fd7858476")); + executeTest("testFilteringDPfromFORMAT", spec); + } } diff --git a/public/java/test/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyperIntegrationTest.java b/public/java/test/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyperIntegrationTest.java index 8eb1c4657..1b2fbb848 100755 --- a/public/java/test/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyperIntegrationTest.java +++ b/public/java/test/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyperIntegrationTest.java @@ -16,9 +16,9 @@ import java.util.List; public class UnifiedGenotyperIntegrationTest extends WalkerTest { - private final static String baseCommand = "-T UnifiedGenotyper -R " + b36KGReference + " -nosl --no_cmdline_in_header -glm BOTH -minIndelFrac 0.0 --dbsnp " + b36dbSNP129; - private final static String baseCommandIndels = "-T UnifiedGenotyper -R " + b36KGReference + " -nosl --no_cmdline_in_header -glm INDEL -mbq 20 -minIndelFrac 0.0 --dbsnp " + b36dbSNP129; - private final static String baseCommandIndelsb37 = "-T UnifiedGenotyper -R " + b37KGReference + " -nosl --no_cmdline_in_header -glm INDEL -mbq 20 --dbsnp " + b37dbSNP132; + private final static String baseCommand = "-T UnifiedGenotyper -R " + b36KGReference + " --no_cmdline_in_header -glm BOTH -minIndelFrac 0.0 --dbsnp " + b36dbSNP129; + private final static String baseCommandIndels = "-T UnifiedGenotyper -R " + b36KGReference + " --no_cmdline_in_header -glm INDEL -mbq 20 -minIndelFrac 0.0 --dbsnp " + b36dbSNP129; + private final static String baseCommandIndelsb37 = "-T UnifiedGenotyper -R " + b37KGReference + " --no_cmdline_in_header -glm INDEL -mbq 20 --dbsnp " + b37dbSNP132; private final static String baseCommandNoCmdLineHeaderStdout = "-T UnifiedGenotyper -R " + b37KGReference + " --no_cmdline_in_header -I " + privateTestDir + "bamExample.ReducedRead.ADAnnotation.bam"; // -------------------------------------------------------------------------------------------------------------- @@ -30,7 +30,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest { public void testMultiSamplePilot1() { WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec( baseCommand + " -I " + validationDataLocation + "low_coverage_CEU.chr1.10k-11k.bam -o %s -L 1:10,022,000-10,025,000", 1, - Arrays.asList("847605f4efafef89529fe0e496315edd")); + Arrays.asList("cdec335abc9ad8e59335e39a73e0e95a")); executeTest("test MultiSample Pilot1", spec); } @@ -38,7 +38,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest { public void testWithAllelesPassedIn1() { WalkerTest.WalkerTestSpec spec1 = new WalkerTest.WalkerTestSpec( baseCommand + " --genotyping_mode GENOTYPE_GIVEN_ALLELES -alleles " + privateTestDir + "allelesForUG.vcf -I " + validationDataLocation + "pilot2_daughters.chr20.10k-11k.bam -o %s -L 20:10,000,000-10,025,000", 1, - Arrays.asList("bc15123620e1134f799005d71d1180fe")); + Arrays.asList("efddb5e258f97fd4f6661cff9eaa57de")); executeTest("test MultiSample Pilot2 with alleles passed in", spec1); } @@ -46,7 +46,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest { public void testWithAllelesPassedIn2() { WalkerTest.WalkerTestSpec spec2 = new WalkerTest.WalkerTestSpec( baseCommand + " --output_mode EMIT_ALL_SITES --genotyping_mode GENOTYPE_GIVEN_ALLELES -alleles " + privateTestDir + "allelesForUG.vcf -I " + validationDataLocation + "pilot2_daughters.chr20.10k-11k.bam -o %s -L 20:10,000,000-10,025,000", 1, - Arrays.asList("1ba7afccc8552f20d72d0b62237558e3")); + Arrays.asList("24532eb381724cd74e99370da28d49ed")); executeTest("test MultiSample Pilot2 with alleles passed in and emitting all sites", spec2); } @@ -54,22 +54,22 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest { public void testSingleSamplePilot2() { WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec( baseCommand + " -I " + validationDataLocation + "NA12878.1kg.p2.chr1_10mb_11_mb.SLX.bam -o %s -L 1:10,000,000-10,100,000", 1, - Arrays.asList("afb8768f31ab57eb43f75c1115eadc99")); + Arrays.asList("062a946160eec1d0fc135d58ca654ff4")); executeTest("test SingleSample Pilot2", spec); } @Test public void testMultipleSNPAlleles() { WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec( - "-T UnifiedGenotyper -R " + b37KGReference + " -nosl --no_cmdline_in_header -glm BOTH --dbsnp " + b37dbSNP129 + " -I " + privateTestDir + "multiallelic.snps.bam -o %s -L " + privateTestDir + "multiallelic.snps.intervals", 1, - Arrays.asList("543f68e42034bf44cfb24da8c9204320")); + "-T UnifiedGenotyper -R " + b37KGReference + " --no_cmdline_in_header -glm BOTH --dbsnp " + b37dbSNP129 + " -I " + privateTestDir + "multiallelic.snps.bam -o %s -L " + privateTestDir + "multiallelic.snps.intervals", 1, + Arrays.asList("94dc17d76d841f1d3a36160767ffa034")); executeTest("test Multiple SNP alleles", spec); } @Test public void testBadRead() { WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec( - "-T UnifiedGenotyper -R " + b37KGReference + " -nosl --no_cmdline_in_header -glm BOTH -I " + privateTestDir + "badRead.test.bam -o %s -L 1:22753424-22753464", 1, + "-T UnifiedGenotyper -R " + b37KGReference + " --no_cmdline_in_header -glm BOTH -I " + privateTestDir + "badRead.test.bam -o %s -L 1:22753424-22753464", 1, Arrays.asList("d915535c1458733f09f82670092fcab6")); executeTest("test bad read", spec); } @@ -77,16 +77,16 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest { @Test public void testReverseTrim() { WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec( - "-T UnifiedGenotyper -R " + b37KGReference + " -nosl --no_cmdline_in_header -glm INDEL -I " + validationDataLocation + "CEUTrio.HiSeq.b37.chr20.10_11mb.bam -o %s -L 20:10289124 -L 20:10090289", 1, - Arrays.asList("5ce03dd9ca2d9324c1d4a9d64389beb5")); + "-T UnifiedGenotyper -R " + b37KGReference + " --no_cmdline_in_header -glm INDEL -I " + validationDataLocation + "CEUTrio.HiSeq.b37.chr20.10_11mb.bam -o %s -L 20:10289124 -L 20:10090289", 1, + Arrays.asList("9106d01ca0d0a8fedd068e72d509f380")); executeTest("test reverse trim", spec); } @Test public void testMismatchedPLs() { WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec( - "-T UnifiedGenotyper -R " + b37KGReference + " -nosl --no_cmdline_in_header -glm INDEL -I " + privateTestDir + "mismatchedPLs.bam -o %s -L 1:24020341", 1, - Arrays.asList("3c006b06b17bbe8e787d64eff6a63a19")); + "-T UnifiedGenotyper -R " + b37KGReference + " --no_cmdline_in_header -glm INDEL -I " + privateTestDir + "mismatchedPLs.bam -o %s -L 1:24020341", 1, + Arrays.asList("d847acf841ba8ba653f996ce4869f439")); executeTest("test mismatched PLs", spec); } @@ -96,7 +96,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest { // // -------------------------------------------------------------------------------------------------------------- - private final static String COMPRESSED_OUTPUT_MD5 = "fd236bd635d514e4214d364f45ec4d10"; + private final static String COMPRESSED_OUTPUT_MD5 = "6792419c482e767a3deb28913ed2b1ad"; @Test public void testCompressedOutput() { @@ -117,7 +117,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest { // Note that we need to turn off any randomization for this to work, so no downsampling and no annotations - String md5 = "d408b4661b820ed86272415b8ea08780"; + String md5 = "3d25ea660275a455ca443a786bff3d32"; WalkerTest.WalkerTestSpec spec1 = new WalkerTest.WalkerTestSpec( baseCommand + " -dt NONE -G none -I " + validationDataLocation + "NA12878.1kg.p2.chr1_10mb_11_mb.SLX.bam -o %s -L 1:10,000,000-10,075,000", 1, @@ -149,15 +149,15 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest { public void testMinBaseQualityScore() { WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec( baseCommand + " -I " + validationDataLocation + "NA12878.1kg.p2.chr1_10mb_11_mb.SLX.bam -o %s -L 1:10,000,000-10,010,000 --min_base_quality_score 26", 1, - Arrays.asList("839ecd30d354a36b5dfa2b5e99859765")); + Arrays.asList("56157d930da6ccd224bce1ca93f11e41")); executeTest("test min_base_quality_score 26", spec); } @Test public void testSLOD() { WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec( - "-T UnifiedGenotyper -R " + b36KGReference + " --no_cmdline_in_header -glm BOTH --dbsnp " + b36dbSNP129 + " -I " + validationDataLocation + "NA12878.1kg.p2.chr1_10mb_11_mb.SLX.bam -o %s -L 1:10,000,000-10,010,000", 1, - Arrays.asList("c7429e670ba477bf9a6bbee2fb41c5a9")); + "-T UnifiedGenotyper -R " + b36KGReference + " --computeSLOD --no_cmdline_in_header -glm BOTH --dbsnp " + b36dbSNP129 + " -I " + validationDataLocation + "NA12878.1kg.p2.chr1_10mb_11_mb.SLX.bam -o %s -L 1:10,000,000-10,010,000", 1, + Arrays.asList("9f95cfe14d53a697c58247833bfd72a6")); executeTest("test SLOD", spec); } @@ -165,7 +165,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest { public void testNDA() { WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec( baseCommand + " --annotateNDA -I " + validationDataLocation + "NA12878.1kg.p2.chr1_10mb_11_mb.SLX.bam -o %s -L 1:10,000,000-10,010,000", 1, - Arrays.asList("abd8e33e649cc11b55e200d3940cc7e2")); + Arrays.asList("480437dd6e2760f4ab3194431519f331")); executeTest("test NDA", spec); } @@ -173,7 +173,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest { public void testCompTrack() { WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec( "-T UnifiedGenotyper -R " + b36KGReference + " --no_cmdline_in_header -glm BOTH -comp:FOO " + b36dbSNP129 + " -I " + validationDataLocation + "NA12878.1kg.p2.chr1_10mb_11_mb.SLX.bam -o %s -L 1:10,000,000-10,010,000", 1, - Arrays.asList("8a9b424e00cdbe6b5e73d517335b2186")); + Arrays.asList("22c039412fd387dde6125b07c9a74a25")); executeTest("test using comp track", spec); } @@ -187,17 +187,17 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest { @Test public void testOutputParameterSitesOnly() { - testOutputParameters("-sites_only", "97ba874eafc9884a4de027a84c036311"); + testOutputParameters("-sites_only", "40aeb4c9e31fe7046b72afc58e7599cb"); } @Test public void testOutputParameterAllConfident() { - testOutputParameters("--output_mode EMIT_ALL_CONFIDENT_SITES", "f9ea04d96eeef29e71d37e60518c2579"); + testOutputParameters("--output_mode EMIT_ALL_CONFIDENT_SITES", "c706ca93b25ff83613cb4e95dcac567c"); } @Test public void testOutputParameterAllSites() { - testOutputParameters("--output_mode EMIT_ALL_SITES", "41c046d38ea328421df924e37e017645"); + testOutputParameters("--output_mode EMIT_ALL_SITES", "8a263fd0a94463ce1de9990f2b8ec841"); } private void testOutputParameters(final String args, final String md5) { @@ -211,18 +211,10 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest { public void testConfidence() { WalkerTest.WalkerTestSpec spec1 = new WalkerTest.WalkerTestSpec( baseCommand + " -I " + validationDataLocation + "NA12878.1kg.p2.chr1_10mb_11_mb.SLX.bam -o %s -L 1:10,000,000-10,010,000 -stand_call_conf 10 ", 1, - Arrays.asList("9addd225a985178339a0c49dc5fdc220")); + Arrays.asList("df524e98903d96ab9353bee7c16a69de")); executeTest("test confidence 1", spec1); } - @Test - public void testConfidence2() { - WalkerTest.WalkerTestSpec spec2 = new WalkerTest.WalkerTestSpec( - baseCommand + " -I " + validationDataLocation + "NA12878.1kg.p2.chr1_10mb_11_mb.SLX.bam -o %s -L 1:10,000,000-10,010,000 -stand_emit_conf 10 ", 1, - Arrays.asList("9addd225a985178339a0c49dc5fdc220")); - executeTest("test confidence 2", spec2); - } - // -------------------------------------------------------------------------------------------------------------- // // testing heterozygosity @@ -230,12 +222,12 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest { // -------------------------------------------------------------------------------------------------------------- @Test public void testHeterozyosity1() { - testHeterozosity( 0.01, "986923de51c71635d47e3d06fe3794a1" ); + testHeterozosity( 0.01, "8e61498ca03a8d805372a64c466b3b42" ); } @Test public void testHeterozyosity2() { - testHeterozosity( 1.0 / 1850, "fb12b1553f813004a394a391a8540873" ); + testHeterozosity( 1.0 / 1850, "668d06b5173cf3b97d052726988e1d7b" ); } private void testHeterozosity(final double arg, final String md5) { @@ -259,7 +251,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest { " -o %s" + " -L 1:10,000,000-10,100,000", 1, - Arrays.asList("04a87b87ee4323eba853c78f25551d1a")); + Arrays.asList("908eb5e21fa39e7fb377cf4a9c4c7835")); executeTest(String.format("test multiple technologies"), spec); } @@ -278,7 +270,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest { " -L 1:10,000,000-10,100,000" + " -baq CALCULATE_AS_NECESSARY", 1, - Arrays.asList("98058fc913b61c22d44875da1f5ea89c")); + Arrays.asList("c814558bb0ed2e19b12e1a2bf4465d52")); executeTest(String.format("test calling with BAQ"), spec); } @@ -297,7 +289,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest { " -o %s" + " -L 1:10,000,000-10,500,000", 1, - Arrays.asList("650c53774afacfc07a595675e8cdde17")); + Arrays.asList("3593495aab5f6204c65de0b073a6ff65")); executeTest(String.format("test indel caller in SLX"), spec); } @@ -312,7 +304,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest { " -minIndelCnt 1" + " -L 1:10,000,000-10,100,000", 1, - Arrays.asList("6a0c2a3a7bcc56ad01428c71408055aa")); + Arrays.asList("8b486a098029d5a106b0a37eff541c15")); executeTest(String.format("test indel caller in SLX with low min allele count"), spec); } @@ -325,7 +317,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest { " -o %s" + " -L 1:10,000,000-10,500,000", 1, - Arrays.asList("5f2721c3323de5390d2d47446139f32b")); + Arrays.asList("18efedc50cae2aacaba372265e38310b")); executeTest(String.format("test indel calling, multiple technologies"), spec); } @@ -335,7 +327,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest { WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec( baseCommandIndels + " --genotyping_mode GENOTYPE_GIVEN_ALLELES -alleles " + privateTestDir + "indelAllelesForUG.vcf -I " + validationDataLocation + "pilot2_daughters.chr20.10k-11k.bam -o %s -L 20:10,000,000-10,100,000", 1, - Arrays.asList("7e3f67bf371112be5dbadb4fe6faa52a")); + Arrays.asList("3ff8c7c80a518aa3eb8671a21479de5f")); executeTest("test MultiSample Pilot2 indels with alleles passed in", spec); } @@ -345,7 +337,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest { baseCommandIndels + " --output_mode EMIT_ALL_SITES --genotyping_mode GENOTYPE_GIVEN_ALLELES -alleles " + privateTestDir + "indelAllelesForUG.vcf -I " + validationDataLocation + "pilot2_daughters.chr20.10k-11k.bam -o %s -L 20:10,000,000-10,100,000", 1, - Arrays.asList("bc31c4977cb7e563ddf9c8dea27f3f4f")); + Arrays.asList("578c0540f4f2052a634a829bcb9cc27d")); executeTest("test MultiSample Pilot2 indels with alleles passed in and emitting all sites", spec); } @@ -353,13 +345,13 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest { public void testMultiSampleIndels1() { WalkerTest.WalkerTestSpec spec1 = new WalkerTest.WalkerTestSpec( baseCommandIndels + " -I " + validationDataLocation + "low_coverage_CEU.chr1.10k-11k.bam -o %s -L 1:10450700-10551000", 1, - Arrays.asList("a4761d7f25e7a62f34494801c98a0da7")); + Arrays.asList("f7d0d0aee603df25c1f0525bb8df189e")); List result = executeTest("test MultiSample Pilot1 CEU indels", spec1).getFirst(); WalkerTest.WalkerTestSpec spec2 = new WalkerTest.WalkerTestSpec( baseCommandIndels + " --genotyping_mode GENOTYPE_GIVEN_ALLELES -alleles " + result.get(0).getAbsolutePath() + " -I " + validationDataLocation + "low_coverage_CEU.chr1.10k-11k.bam -o %s -L 1:10450700-10551000", 1, - Arrays.asList("c526c234947482d1cd2ffc5102083a08")); + Arrays.asList("fc91d457a16b4ca994959c2b5f3f0352")); executeTest("test MultiSample Pilot1 CEU indels using GENOTYPE_GIVEN_ALLELES", spec2); } @@ -415,7 +407,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest { public void testMinIndelFraction0() { WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec( assessMinIndelFraction + " -minIndelFrac 0.0", 1, - Arrays.asList("ba4fafec383fb988f20c8cf53dd3e9a0")); + Arrays.asList("857b8e5df444463ac27f665c4f67fbe2")); executeTest("test minIndelFraction 0.0", spec); } @@ -423,7 +415,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest { public void testMinIndelFraction25() { WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec( assessMinIndelFraction + " -minIndelFrac 0.25", 1, - Arrays.asList("4c57a88de275105156aaafc6f9041365")); + Arrays.asList("81d4c7d9010fd6733b2997bc378e7471")); executeTest("test minIndelFraction 0.25", spec); } @@ -444,8 +436,8 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest { @Test public void testNsInCigar() { WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec( - "-T UnifiedGenotyper -R " + b37KGReference + " -nosl --no_cmdline_in_header -I " + validationDataLocation + "testWithNs.bam -o %s -L 8:141799600-141814700", 1, - Arrays.asList("e8ebfaac0804b782f22ab8ea35152735")); + "-T UnifiedGenotyper -R " + b37KGReference + " --no_cmdline_in_header -I " + validationDataLocation + "testWithNs.bam -o %s -L 8:141799600-141814700", 1, + Arrays.asList("bd7984a374f0ae5d277bd5fc5065f64f")); executeTest("test calling on reads with Ns in CIGAR", spec); } @@ -458,25 +450,25 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest { @Test public void testReducedBam() { WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec( - "-T UnifiedGenotyper -R " + b37KGReference + " -nosl --no_cmdline_in_header -I " + privateTestDir + "bamExample.ReducedRead.ADAnnotation.bam -o %s -L 1:67,225,396-67,288,518", 1, - Arrays.asList("da9c05f87bd6415e97f90c49cf68ed19")); + "-T UnifiedGenotyper -R " + b37KGReference + " --no_cmdline_in_header -I " + privateTestDir + "bamExample.ReducedRead.ADAnnotation.bam -o %s -L 1:67,225,396-67,288,518", 1, + Arrays.asList("1b711c0966a2f76eb21801e73b76b758")); executeTest("test calling on a ReducedRead BAM", spec); } @Test public void testReducedBamSNPs() { - testReducedCalling("SNP", "1d4a826b144723ff0766c36aa0239287"); + testReducedCalling("SNP", "9ba4867cadb366746ee63e7a4afdb95e"); } @Test public void testReducedBamINDELs() { - testReducedCalling("INDEL", "68ef51d5c98480e0c0192e0eecb95bca"); + testReducedCalling("INDEL", "132a4e0ccf9230b5bb4b56c649e2bdd5"); } private void testReducedCalling(final String model, final String md5) { WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec( - "-T UnifiedGenotyper -R " + b37KGReference + " -nosl --no_cmdline_in_header -I " + privateTestDir + "NA12878.HiSeq.b37.chr20.10_11mb.reduced.bam -o %s -L 20:10,000,000-11,000,000 -glm " + model, 1, + "-T UnifiedGenotyper -R " + b37KGReference + " --no_cmdline_in_header -I " + privateTestDir + "NA12878.HiSeq.b37.chr20.10_11mb.reduced.bam -o %s -L 20:10,000,000-11,000,000 -glm " + model, 1, Arrays.asList(md5)); executeTest("test calling on a ReducedRead BAM with " + model, spec); } @@ -490,7 +482,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest { @Test public void testContaminationDownsampling() { WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec( - baseCommand + " -I " + validationDataLocation + "NA12878.1kg.p2.chr1_10mb_11_mb.SLX.bam -o %s -L 1:10,000,000-10,010,000 --contamination_percentage_to_filter 0.20", 1, + baseCommand + " -I " + validationDataLocation + "NA12878.1kg.p2.chr1_10mb_11_mb.SLX.bam -o %s -L 1:10,000,000-10,010,000 --contamination_fraction_to_filter 0.20", 1, Arrays.asList("27dd04159e06d9524fb8a4eef41f96ae")); executeTest("test contamination_percentage_to_filter 0.20", spec); } diff --git a/public/scala/qscript/org/broadinstitute/sting/queue/qscripts/GATKResourcesBundle.scala b/public/scala/qscript/org/broadinstitute/sting/queue/qscripts/GATKResourcesBundle.scala index 5e66520ca..9e8815762 100755 --- a/public/scala/qscript/org/broadinstitute/sting/queue/qscripts/GATKResourcesBundle.scala +++ b/public/scala/qscript/org/broadinstitute/sting/queue/qscripts/GATKResourcesBundle.scala @@ -136,7 +136,14 @@ class GATKResourcesBundle extends QScript { addResource(new Resource("/humgen/gsa-hpprojects/GATK/data/Comparisons/Unvalidated/GoldStandardIndel/gold.standard.indel.MillsAnd1000G.b37.vcf", "Mills_and_1000G_gold_standard.indels", b37, true, false)) - + + // + // CEU trio (NA12878,NA12891,NA12892) best practices results (including PBT) + // + + addResource(new Resource("/humgen/gsa-hpprojects/NA12878Collection/callsets/CEUtrio_BestPractices/current/CEUTrio.HiSeq.WGS.b37.UG.snps_and_indels.recalibrated.filtered.phaseByTransmission.vcf", + "CEUTrio.HiSeq.WGS.b37.UG.bestPractices.phaseByTransmission",b37,true,false)) + // // example call set for wiki tutorial //