diff --git a/protected/java/test/org/broadinstitute/sting/gatk/walkers/bqsr/BQSRIntegrationTest.java b/protected/java/test/org/broadinstitute/sting/gatk/walkers/bqsr/BQSRIntegrationTest.java index 90d67d393..a50219f48 100644 --- a/protected/java/test/org/broadinstitute/sting/gatk/walkers/bqsr/BQSRIntegrationTest.java +++ b/protected/java/test/org/broadinstitute/sting/gatk/walkers/bqsr/BQSRIntegrationTest.java @@ -5,7 +5,9 @@ import org.broadinstitute.sting.utils.exceptions.UserException; import org.testng.annotations.DataProvider; import org.testng.annotations.Test; +import java.util.ArrayList; import java.util.Arrays; +import java.util.List; /** * @author ebanks @@ -118,20 +120,26 @@ public class BQSRIntegrationTest extends WalkerTest { @DataProvider(name = "PRTest") public Object[][] createPRTestData() { - return new Object[][]{ - {new PRTest("", "ab2f209ab98ad3432e208cbd524a4c4a")}, - {new PRTest(" -qq -1", "5226c06237b213b9e9b25a32ed92d09a")}, - {new PRTest(" -qq 6", "b592a5c62b952a012e18adb898ea9c33")}, - {new PRTest(" -DIQ", "8977bea0c57b808e65e9505eb648cdf7")} - }; + List tests = new ArrayList(); + + tests.add(new Object[]{1, new PRTest(" -qq -1", "a1d87da5dcbde35170d6ba6bc3ee2812")}); + tests.add(new Object[]{1, new PRTest(" -qq 6", "a0fecae6d0e5ab9917862fa306186d10")}); + tests.add(new Object[]{1, new PRTest(" -DIQ", "8977bea0c57b808e65e9505eb648cdf7")}); + + for ( final int nct : Arrays.asList(1, 2, 4) ) { + tests.add(new Object[]{nct, new PRTest("", "d1bbb4ce6aa93e866f106f8b11d888ed")}); + } + + return tests.toArray(new Object[][]{}); } @Test(dataProvider = "PRTest") - public void testPR(PRTest params) { + public void testPR(final int nct, PRTest params) { WalkerTestSpec spec = new WalkerTestSpec( "-T PrintReads" + " -R " + hg18Reference + " -I " + privateTestDir + "HiSeq.1mb.1RG.bam" + + " -nct " + nct + " -BQSR " + privateTestDir + "HiSeq.20mb.1RG.table" + params.args + " -o %s", diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/qc/AssessReducedQuals.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/qc/AssessReducedQuals.java new file mode 100644 index 000000000..e74684c53 --- /dev/null +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/qc/AssessReducedQuals.java @@ -0,0 +1,169 @@ +package org.broadinstitute.sting.gatk.walkers.qc; + +import org.broadinstitute.sting.commandline.Argument; +import org.broadinstitute.sting.commandline.Output; +import org.broadinstitute.sting.gatk.contexts.AlignmentContext; +import org.broadinstitute.sting.gatk.contexts.ReferenceContext; +import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; +import org.broadinstitute.sting.gatk.walkers.LocusWalker; +import org.broadinstitute.sting.gatk.walkers.TreeReducible; +import org.broadinstitute.sting.utils.GenomeLoc; +import org.broadinstitute.sting.utils.MathUtils; +import org.broadinstitute.sting.utils.pileup.PileupElement; +import org.broadinstitute.sting.utils.pileup.ReadBackedPileup; + +import java.io.PrintStream; +import java.util.List; + +/** + * Emits intervals in which the differences between the original and reduced bam quals are bigger epsilon (unless the quals of + * the reduced bam are above sufficient threshold) + * + *

Input

+ *

+ * The original and reduced BAM files. + *

+ * + *

Output

+ *

+ * A list of intervals in which the differences between the original and reduced bam quals are bigger epsilon. + *

+ * + *

Examples

+ *
+ * java -Xmx2g -jar GenomeAnalysisTK.jar \
+ *   -I:original original.bam \
+ *   -I:reduced reduced.bam \
+ *   -R ref.fasta \
+ *   -T AssessReducedQuals \
+ *   -o output.intervals
+ * 
+ * + * @author ami + */ + +public class AssessReducedQuals extends LocusWalker implements TreeReducible { + + private static final String reduced = "reduced"; + private static final String original = "original"; + private static final int originalQualsIndex = 0; + private static final int reducedQualsIndex = 1; + + @Argument(fullName = "sufficientQualSum", shortName = "sufficientQualSum", doc = "When a reduced bam qual sum is above this threshold, it passes even without comparing to the non-reduced bam ", required = false) + public int sufficientQualSum = 600; + + @Argument(fullName = "qual_epsilon", shortName = "epsilon", doc = "when |Quals_reduced_bam - Quals_original_bam| > epsilon*Quals_original_bam we output this interval", required = false) + public int qual_epsilon = 0; + + @Argument(fullName = "debugLevel", shortName = "debug", doc = "debug mode on") + public int debugLevel = 0; + + @Output + protected PrintStream out; + + public void initialize() { + if (debugLevel != 0) + out.println(" Debug mode" + + "Debug:\tsufficientQualSum: "+sufficientQualSum+ "\n " + + "Debug:\tqual_epsilon: "+qual_epsilon); + } + + @Override + public boolean includeReadsWithDeletionAtLoci() { return true; } + + @Override + public GenomeLoc map(RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context) { + if ( tracker == null ) + return null; + + boolean reportLocus; + final int[] quals = getPileupQuals(context.getBasePileup()); + if (debugLevel != 0) + out.println("Debug:\tLocus Quals\t"+ref.getLocus()+"\toriginal\t"+quals[originalQualsIndex]+"\treduced\t"+quals[reducedQualsIndex]); + final int epsilon = MathUtils.fastRound(quals[originalQualsIndex]*qual_epsilon); + final int calcOriginalQuals = Math.min(quals[originalQualsIndex],sufficientQualSum); + final int calcReducedQuals = Math.min(quals[reducedQualsIndex],sufficientQualSum); + final int OriginalReducedQualDiff = calcOriginalQuals - calcReducedQuals; + reportLocus = OriginalReducedQualDiff > epsilon || OriginalReducedQualDiff < -1*epsilon; + if(debugLevel != 0 && reportLocus) + out.println("Debug:\tEmited Locus\t"+ref.getLocus()+"\toriginal\t"+quals[originalQualsIndex]+"\treduced\t"+quals[reducedQualsIndex]+"\tepsilon\t"+epsilon+"\tdiff\t"+OriginalReducedQualDiff); + + return reportLocus ? ref.getLocus() : null; + } + + private final int[] getPileupQuals(final ReadBackedPileup readPileup) { + + final int[] quals = new int[2]; + String[] printPileup = {"Debug 2:\toriginal pileup:\t"+readPileup.getLocation()+"\nDebug 2:----------------------------------\n", + "Debug 2:\t reduced pileup:\t"+readPileup.getLocation()+"\nDebug 2:----------------------------------\n"}; + + for( PileupElement p : readPileup ){ + final List tags = getToolkit().getReaderIDForRead(p.getRead()).getTags().getPositionalTags(); + if ( isGoodRead(p,tags) ){ + final int tempQual = (int)(p.getQual()) * p.getRepresentativeCount(); + final int tagIndex = getTagIndex(tags); + quals[tagIndex] += tempQual; + if(debugLevel == 2) + printPileup[tagIndex] += "\tDebug 2: ("+p+")\tMQ="+p.getMappingQual()+":QU="+p.getQual()+":RC="+p.getRepresentativeCount()+":OS="+p.getOffset()+"\n"; + } + } + if(debugLevel == 2){ + out.println(printPileup[originalQualsIndex]); + out.println(printPileup[reducedQualsIndex]); + } + return quals; + } + + private final boolean isGoodRead(PileupElement p, List tags){ + return !p.isDeletion() && (tags.contains(reduced) || (tags.contains(original) && (int)p.getQual() >= 20 && p.getMappingQual() >= 20)); + } + + private final int getTagIndex(List tags){ + return tags.contains(reduced) ? 1 : 0; + } + + @Override + public void onTraversalDone(GenomeLoc sum) { + if ( sum != null ) + out.println(sum); + } + + @Override + public GenomeLoc treeReduce(GenomeLoc lhs, GenomeLoc rhs) { + if ( lhs == null ) + return rhs; + + if ( rhs == null ) + return lhs; + + // if contiguous, just merge them + if ( lhs.contiguousP(rhs) ) + return getToolkit().getGenomeLocParser().createGenomeLoc(lhs.getContig(), lhs.getStart(), rhs.getStop()); + + // otherwise, print the lhs and start over with the rhs + out.println(lhs); + return rhs; + } + + @Override + public GenomeLoc reduceInit() { + return null; + } + + @Override + public GenomeLoc reduce(GenomeLoc value, GenomeLoc sum) { + if ( value == null ) + return sum; + + if ( sum == null ) + return value; + + // if contiguous, just merge them + if ( sum.contiguousP(value) ) + return getToolkit().getGenomeLocParser().createGenomeLoc(sum.getContig(), sum.getStart(), value.getStop()); + + // otherwise, print the sum and start over with the value + out.println(sum); + return value; + } +} diff --git a/public/java/src/org/broadinstitute/sting/utils/collections/NestedIntegerArray.java b/public/java/src/org/broadinstitute/sting/utils/collections/NestedIntegerArray.java index c83de8ec7..050ed52ac 100755 --- a/public/java/src/org/broadinstitute/sting/utils/collections/NestedIntegerArray.java +++ b/public/java/src/org/broadinstitute/sting/utils/collections/NestedIntegerArray.java @@ -25,9 +25,11 @@ package org.broadinstitute.sting.utils.collections; +import org.apache.log4j.Logger; import org.broadinstitute.sting.utils.exceptions.ReviewedStingException; import java.util.ArrayList; +import java.util.Arrays; import java.util.List; /** @@ -38,39 +40,50 @@ import java.util.List; public class NestedIntegerArray { + private static Logger logger = Logger.getLogger(NestedIntegerArray.class); + protected final Object[] data; protected final int numDimensions; protected final int[] dimensions; + // Preallocate the first two dimensions to limit contention during tree traversals in put() + private static final int NUM_DIMENSIONS_TO_PREALLOCATE = 2; + public NestedIntegerArray(final int... dimensions) { numDimensions = dimensions.length; if ( numDimensions == 0 ) throw new ReviewedStingException("There must be at least one dimension to an NestedIntegerArray"); this.dimensions = dimensions.clone(); + int dimensionsToPreallocate = Math.min(dimensions.length, NUM_DIMENSIONS_TO_PREALLOCATE); + + logger.info(String.format("Creating NestedIntegerArray with dimensions %s", Arrays.toString(dimensions))); + logger.info(String.format("Pre-allocating first %d dimensions", dimensionsToPreallocate)); + data = new Object[dimensions[0]]; - prepopulateArray(data, 0); + preallocateArray(data, 0, dimensionsToPreallocate); + + logger.info(String.format("Done pre-allocating first %d dimensions", dimensionsToPreallocate)); } /** - * Recursively allocate the entire tree of arrays in all its dimensions. + * Recursively allocate the first dimensionsToPreallocate dimensions of the tree * - * Doing this upfront uses more memory initially, but saves time over the course of the run - * and (crucially) avoids having to make threads wait while traversing the tree to check - * whether branches exist or not. + * Pre-allocating the first few dimensions helps limit contention during tree traversals in put() * * @param subarray current node in the tree * @param dimension current level in the tree + * @param dimensionsToPreallocate preallocate only this many dimensions (starting from the first) */ - private void prepopulateArray( Object[] subarray, int dimension ) { - if ( dimension >= numDimensions - 1 ) { + private void preallocateArray( Object[] subarray, int dimension, int dimensionsToPreallocate ) { + if ( dimension >= dimensionsToPreallocate - 1 ) { return; } for ( int i = 0; i < subarray.length; i++ ) { subarray[i] = new Object[dimensions[dimension + 1]]; - prepopulateArray((Object[])subarray[i], dimension + 1); + preallocateArray((Object[])subarray[i], dimension + 1, dimensionsToPreallocate); } } @@ -82,8 +95,9 @@ public class NestedIntegerArray { if ( keys[i] >= dimensions[i] ) return null; - myData = (Object[])myData[keys[i]]; // interior nodes in the tree will never be null, so we can safely traverse - // down to the leaves + myData = (Object[])myData[keys[i]]; + if ( myData == null ) + return null; } return (T)myData[keys[numNestedDimensions]]; @@ -92,8 +106,8 @@ public class NestedIntegerArray { /** * Insert a value at the position specified by the given keys. * - * This method is THREAD-SAFE despite not being synchronized, however the caller MUST - * check the return value to see if the put succeeded. This method RETURNS FALSE if + * This method is thread-safe, however the caller MUST check the + * return value to see if the put succeeded. This method RETURNS FALSE if * the value could not be inserted because there already was a value present * at the specified location. In this case the caller should do a get() to get * the already-existing value and (potentially) update it. @@ -113,8 +127,17 @@ public class NestedIntegerArray { if ( keys[i] >= dimensions[i] ) throw new ReviewedStingException("Key " + keys[i] + " is too large for dimension " + i + " (max is " + (dimensions[i]-1) + ")"); - myData = (Object[])myData[keys[i]]; // interior nodes in the tree will never be null, so we can safely traverse - // down to the leaves + // If we're at or beyond the last dimension that was pre-allocated, we need to do a synchronized + // check to see if the next branch exists, and if it doesn't, create it + if ( i >= NUM_DIMENSIONS_TO_PREALLOCATE - 1 ) { + synchronized ( myData ) { + if ( myData[keys[i]] == null ) { + myData[keys[i]] = new Object[dimensions[i + 1]]; + } + } + } + + myData = (Object[])myData[keys[i]]; } synchronized ( myData ) { // lock the bottom row while we examine and (potentially) update it diff --git a/public/java/src/org/broadinstitute/sting/utils/recalibration/BaseRecalibration.java b/public/java/src/org/broadinstitute/sting/utils/recalibration/BaseRecalibration.java index 7ad9302a8..2f9387acb 100644 --- a/public/java/src/org/broadinstitute/sting/utils/recalibration/BaseRecalibration.java +++ b/public/java/src/org/broadinstitute/sting/utils/recalibration/BaseRecalibration.java @@ -116,9 +116,12 @@ public class BaseRecalibration { // Compute all covariates for the read // TODO -- the need to clear here suggests there's an error in the indexing / assumption code // TODO -- for BI and DI. Perhaps due to the indel buffer size on the ends of the reads? - // TODO -- the output varies with -nt 1 and -nt 2 if you don't call clear here - // TODO -- needs to be fixed. - final ReadCovariates readCovariates = readCovariatesCache.get().clear(); + // TODO -- the output varies depending on whether we clear or not + //final ReadCovariates readCovariates = readCovariatesCache.get().clear(); + + // the original code -- doesn't do any clearing + final ReadCovariates readCovariates = readCovariatesCache.get(); + RecalUtils.computeCovariates(read, requestedCovariates, readCovariates); for (final EventType errorModel : EventType.values()) { // recalibrate all three quality strings diff --git a/public/scala/qscript/org/broadinstitute/sting/queue/qscripts/GATKResourcesBundle.scala b/public/scala/qscript/org/broadinstitute/sting/queue/qscripts/GATKResourcesBundle.scala index 5e66520ca..9e8815762 100755 --- a/public/scala/qscript/org/broadinstitute/sting/queue/qscripts/GATKResourcesBundle.scala +++ b/public/scala/qscript/org/broadinstitute/sting/queue/qscripts/GATKResourcesBundle.scala @@ -136,7 +136,14 @@ class GATKResourcesBundle extends QScript { addResource(new Resource("/humgen/gsa-hpprojects/GATK/data/Comparisons/Unvalidated/GoldStandardIndel/gold.standard.indel.MillsAnd1000G.b37.vcf", "Mills_and_1000G_gold_standard.indels", b37, true, false)) - + + // + // CEU trio (NA12878,NA12891,NA12892) best practices results (including PBT) + // + + addResource(new Resource("/humgen/gsa-hpprojects/NA12878Collection/callsets/CEUtrio_BestPractices/current/CEUTrio.HiSeq.WGS.b37.UG.snps_and_indels.recalibrated.filtered.phaseByTransmission.vcf", + "CEUTrio.HiSeq.WGS.b37.UG.bestPractices.phaseByTransmission",b37,true,false)) + // // example call set for wiki tutorial //