From 379a9841ce8bad91313c974139de1ef00fe2d90b Mon Sep 17 00:00:00 2001 From: Eric Banks Date: Wed, 17 Apr 2013 12:45:09 -0400 Subject: [PATCH] Various bug fixes for recent Reduce Reads additions plus solution implemented for low MQ reads. 1. Using cumulative binomial probability was not working at high coverage sites (because p-values quickly got out of hand) so instead we use a hybrid system for determining significance: at low coverage sites use binomial prob and at high coverage sites revert to using the old base proportions. Then we get the best of both worlds. As a note, coverage refers to just the individual base counts and not the entire pileup. 2. Reads were getting lost because of the comparator being used in the SlidingWindow. When read pairs had the same alignment end position the 2nd one encountered would get dropped (but added to the header!). We now use a PriorityQueue instead of a TreeSet to allow for such cases. 3. Each consensus keeps track of its own number of softclipped bases. There was no reason that that number should be shared between them. 4. We output consensus filtered (i.e. low MQ) reads whenever they are present for now. Don't lose that information. Maybe we'll decide to change this in the future, but for now we are conservative. 5. Also implemented various small performance optimizations based on profiling. Added unit tests to cover these changes; systematic assessment now tests against low MQ reads too. --- .../reducereads/BaseAndQualsCounts.java | 36 +- .../compression/reducereads/BaseCounts.java | 78 +++- .../compression/reducereads/BaseIndex.java | 4 +- .../reducereads/HeaderElement.java | 78 ++-- .../reducereads/MultiSampleCompressor.java | 3 +- .../compression/reducereads/ReduceReads.java | 13 +- .../reducereads/SingleSampleCompressor.java | 7 +- .../reducereads/SlidingWindow.java | 397 ++++++++++-------- .../gatk/walkers/qc/AssessReducedQuals.java | 5 +- .../reducereads/BaseCountsUnitTest.java | 2 +- .../reducereads/HeaderElementUnitTest.java | 15 +- .../ReduceReadsIntegrationTest.java | 38 +- .../reducereads/SlidingWindowUnitTest.java | 152 +++++-- .../broadinstitute/sting/utils/MathUtils.java | 2 +- 14 files changed, 527 insertions(+), 303 deletions(-) diff --git a/protected/java/src/org/broadinstitute/sting/gatk/walkers/compression/reducereads/BaseAndQualsCounts.java b/protected/java/src/org/broadinstitute/sting/gatk/walkers/compression/reducereads/BaseAndQualsCounts.java index 416f66ec6..28a48c212 100644 --- a/protected/java/src/org/broadinstitute/sting/gatk/walkers/compression/reducereads/BaseAndQualsCounts.java +++ b/protected/java/src/org/broadinstitute/sting/gatk/walkers/compression/reducereads/BaseAndQualsCounts.java @@ -80,6 +80,21 @@ public class BaseAndQualsCounts extends BaseCounts { * @param isLowQualBase true if the base is low quality */ public void incr(final byte base, final byte baseQual, final byte insQual, final byte delQual, final int baseMappingQual, final boolean isLowQualBase) { + incr(base, baseQual, insQual, delQual, baseMappingQual, isLowQualBase, false); + } + + /* + * Increments the count + * + * @param base the base + * @param baseQual the base quality + * @param insQual the insertion quality + * @param delQual the deletion quality + * @param baseMappingQual the mapping quality + * @param isLowQualBase true if the base is low quality + * @param isSoftClip true if is soft-clipped + */ + public void incr(final byte base, final byte baseQual, final byte insQual, final byte delQual, final int baseMappingQual, final boolean isLowQualBase, final boolean isSoftClip) { // if we already have high quality bases, ignore low quality ones if ( isLowQualBase && !isLowQuality() ) return; @@ -92,7 +107,7 @@ public class BaseAndQualsCounts extends BaseCounts { } final BaseIndex i = BaseIndex.byteToBase(base); - super.incr(i, baseQual, baseMappingQual); + super.incr(i, baseQual, baseMappingQual, isSoftClip); switch (i) { case A: sumInsertionQual_A += insQual; sumDeletionQual_A += delQual; break; case C: sumInsertionQual_C += insQual; sumDeletionQual_C += delQual; break; @@ -114,13 +129,28 @@ public class BaseAndQualsCounts extends BaseCounts { * @param baseMappingQual the mapping quality * @param isLowQualBase true if the base is low quality */ - public void decr(final byte base, final byte baseQual, final byte insQual, final byte delQual, final int baseMappingQual, final boolean isLowQualBase) { + public void decr(final byte base, final byte baseQual, final byte insQual, final byte delQual, final int baseMappingQual, final boolean isLowQualBase) { + decr(base, baseQual, insQual, delQual, baseMappingQual, isLowQualBase, false); + } + + /* + * Decrements the count + * + * @param base the base + * @param baseQual the base quality + * @param insQual the insertion quality + * @param delQual the deletion quality + * @param baseMappingQual the mapping quality + * @param isLowQualBase true if the base is low quality + * @param isSoftClip true if is soft-clipped + */ + public void decr(final byte base, final byte baseQual, final byte insQual, final byte delQual, final int baseMappingQual, final boolean isLowQualBase, final boolean isSoftClip) { // if this is not the right type of base, ignore it if ( isLowQualBase != isLowQuality() ) return; final BaseIndex i = BaseIndex.byteToBase(base); - super.decr(i, baseQual, baseMappingQual); + super.decr(i, baseQual, baseMappingQual, isSoftClip); switch (i) { case A: sumInsertionQual_A -= insQual; sumDeletionQual_A -= delQual; break; case C: sumInsertionQual_C -= insQual; sumDeletionQual_C -= delQual; break; diff --git a/protected/java/src/org/broadinstitute/sting/gatk/walkers/compression/reducereads/BaseCounts.java b/protected/java/src/org/broadinstitute/sting/gatk/walkers/compression/reducereads/BaseCounts.java index afcaf1510..e1329db3b 100644 --- a/protected/java/src/org/broadinstitute/sting/gatk/walkers/compression/reducereads/BaseCounts.java +++ b/protected/java/src/org/broadinstitute/sting/gatk/walkers/compression/reducereads/BaseCounts.java @@ -80,6 +80,7 @@ import org.broadinstitute.sting.utils.MathUtils; private int count_N = 0; private int sumQual_N = 0; private int totalCount = 0; // keeps track of total count since this is requested so often + private int nSoftClippedBases = 0; private final IntArrayList mappingQualities = new IntArrayList(); // keeps the mapping quality of each read that contributed to this private boolean isLowQuality = true; // this object represents low quality bases unless we are told otherwise @@ -104,6 +105,7 @@ import org.broadinstitute.sting.utils.MathUtils; this.count_I += other.count_I; this.count_N += other.count_N; this.totalCount += other.totalCount; + this.nSoftClippedBases = other.nSoftClippedBases; this.mappingQualities.addAll(other.mappingQualities); } @@ -117,6 +119,7 @@ import org.broadinstitute.sting.utils.MathUtils; this.count_I -= other.count_I; this.count_N -= other.count_N; this.totalCount -= other.totalCount; + this.nSoftClippedBases -= other.nSoftClippedBases; this.mappingQualities.removeAll(other.mappingQualities); } @@ -126,7 +129,7 @@ import org.broadinstitute.sting.utils.MathUtils; } @Ensures("totalCount() == old(totalCount()) || totalCount() == old(totalCount()) + 1") - public void incr(final BaseIndex base, final byte qual, final int mappingQuality) { + public void incr(final BaseIndex base, final byte qual, final int mappingQuality, final boolean isSoftclip) { switch (base) { case A: ++count_A; sumQual_A += qual; break; case C: ++count_C; sumQual_C += qual; break; @@ -137,6 +140,7 @@ import org.broadinstitute.sting.utils.MathUtils; case N: ++count_N; sumQual_N += qual; break; } ++totalCount; + nSoftClippedBases += isSoftclip ? 1 : 0; mappingQualities.add(mappingQuality); } @@ -159,7 +163,7 @@ import org.broadinstitute.sting.utils.MathUtils; } @Ensures("totalCount() == old(totalCount()) || totalCount() == old(totalCount()) - 1") - public void decr(final BaseIndex base, final byte qual, final int mappingQuality) { + public void decr(final BaseIndex base, final byte qual, final int mappingQuality, final boolean isSoftclip) { switch (base) { case A: --count_A; sumQual_A -= qual; break; case C: --count_C; sumQual_C -= qual; break; @@ -170,6 +174,7 @@ import org.broadinstitute.sting.utils.MathUtils; case N: --count_N; sumQual_N -= qual; break; } --totalCount; + nSoftClippedBases -= isSoftclip ? 1 : 0; mappingQualities.remove((Integer) mappingQuality); } @@ -231,6 +236,10 @@ import org.broadinstitute.sting.utils.MathUtils; return (byte) (sumQualsOfBase(base) / countOfBase(base)); } + @Ensures("result >= 0") + public int nSoftclips() { + return nSoftClippedBases; + } @Ensures("result >= 0") public int totalCount() { @@ -281,22 +290,42 @@ import org.broadinstitute.sting.utils.MathUtils; return baseIndexWithMostCounts().getByte(); } + /** + * @return the base index for which the count is highest, including indel indexes + */ @Ensures("result != null") public BaseIndex baseIndexWithMostCounts() { - BaseIndex maxI = MAX_BASE_INDEX_WITH_NO_COUNTS; - for (final BaseIndex i : BaseIndex.values()) { - if (countOfBase(i) > countOfBase(maxI)) - maxI = i; - } - return maxI; + return baseIndexWithMostCounts(true); } + /** + * @return the base index for which the count is highest, excluding indel indexes + */ @Ensures("result != null") public BaseIndex baseIndexWithMostCountsWithoutIndels() { + return baseIndexWithMostCounts(false); + } + + /** + * Finds the base index with the most counts + * + * @param allowIndels should we allow base indexes representing indels? + * @return non-null base index + */ + @Ensures("result != null") + protected BaseIndex baseIndexWithMostCounts(final boolean allowIndels) { BaseIndex maxI = MAX_BASE_INDEX_WITH_NO_COUNTS; + int maxCount = countOfBase(maxI); + for (final BaseIndex i : BaseIndex.values()) { - if (i.isNucleotide() && countOfBase(i) > countOfBase(maxI)) + if ( !allowIndels && !i.isNucleotide() ) + continue; + + final int myCount = countOfBase(i); + if (myCount > maxCount) { maxI = i; + maxCount = myCount; + } } return maxI; } @@ -307,22 +336,36 @@ import org.broadinstitute.sting.utils.MathUtils; @Ensures("result != null") public BaseIndex baseIndexWithMostProbability() { - BaseIndex maxI = MAX_BASE_INDEX_WITH_NO_COUNTS; - for (final BaseIndex i : BaseIndex.values()) { - if (getSumQuals(i) > getSumQuals(maxI)) - maxI = i; - } - return (getSumQuals(maxI) > 0L ? maxI : baseIndexWithMostCounts()); + return baseIndexWithMostProbability(true); } @Ensures("result != null") public BaseIndex baseIndexWithMostProbabilityWithoutIndels() { + return baseIndexWithMostProbability(false); + } + + /** + * Finds the base index with the most probability + * + * @param allowIndels should we allow base indexes representing indels? + * @return non-null base index + */ + @Ensures("result != null") + public BaseIndex baseIndexWithMostProbability(final boolean allowIndels) { BaseIndex maxI = MAX_BASE_INDEX_WITH_NO_COUNTS; + long maxSum = getSumQuals(maxI); + for (final BaseIndex i : BaseIndex.values()) { - if (i.isNucleotide() && getSumQuals(i) > getSumQuals(maxI)) + if ( !allowIndels && !i.isNucleotide() ) + continue; + + final long mySum = getSumQuals(i); + if (mySum > maxSum) { maxI = i; + maxSum = mySum; + } } - return (getSumQuals(maxI) > 0L ? maxI : baseIndexWithMostCountsWithoutIndels()); + return (maxSum > 0L ? maxI : baseIndexWithMostCounts(allowIndels)); } @Ensures("result >=0") @@ -362,6 +405,7 @@ import org.broadinstitute.sting.utils.MathUtils; count_A = count_C = count_G = count_T = count_D = count_I = count_N = 0; sumQual_A = sumQual_C = sumQual_G = sumQual_T = sumQual_D = sumQual_I = sumQual_N = 0; totalCount = 0; + nSoftClippedBases = 0; mappingQualities.clear(); } } diff --git a/protected/java/src/org/broadinstitute/sting/gatk/walkers/compression/reducereads/BaseIndex.java b/protected/java/src/org/broadinstitute/sting/gatk/walkers/compression/reducereads/BaseIndex.java index e41878a0b..665e3e7ce 100644 --- a/protected/java/src/org/broadinstitute/sting/gatk/walkers/compression/reducereads/BaseIndex.java +++ b/protected/java/src/org/broadinstitute/sting/gatk/walkers/compression/reducereads/BaseIndex.java @@ -121,7 +121,7 @@ public enum BaseIndex { * * @return whether or not it is a nucleotide, given the definition above */ - public boolean isNucleotide() { + public final boolean isNucleotide() { return !isIndel(); } @@ -130,7 +130,7 @@ public enum BaseIndex { * * @return true for I or D, false otherwise */ - public boolean isIndel() { + public final boolean isIndel() { return this == D || this == I; } } diff --git a/protected/java/src/org/broadinstitute/sting/gatk/walkers/compression/reducereads/HeaderElement.java b/protected/java/src/org/broadinstitute/sting/gatk/walkers/compression/reducereads/HeaderElement.java index dec323213..38b9e957b 100644 --- a/protected/java/src/org/broadinstitute/sting/gatk/walkers/compression/reducereads/HeaderElement.java +++ b/protected/java/src/org/broadinstitute/sting/gatk/walkers/compression/reducereads/HeaderElement.java @@ -62,9 +62,10 @@ public class HeaderElement { private BaseAndQualsCounts consensusBaseCounts; // How many A,C,G,T (and D's) are in this site. private BaseAndQualsCounts filteredBaseCounts; // How many A,C,G,T (and D's) were filtered out in this site. private int insertionsToTheRight; // How many reads in this site had insertions to the immediate right - private int nSoftClippedBases; // How many bases in this site came from soft clipped bases private int location; // Genome location of this site (the sliding window knows which contig we're at + protected static final int MIN_COUNT_FOR_USING_PVALUE = 2; + public int getLocation() { return location; } @@ -84,7 +85,7 @@ public class HeaderElement { * @param location the reference location for the new element */ public HeaderElement(final int location) { - this(new BaseAndQualsCounts(), new BaseAndQualsCounts(), 0, 0, location); + this(new BaseAndQualsCounts(), new BaseAndQualsCounts(), 0, location); } /** @@ -94,7 +95,7 @@ public class HeaderElement { * @param location the reference location for the new element */ public HeaderElement(final int location, final int insertionsToTheRight) { - this(new BaseAndQualsCounts(), new BaseAndQualsCounts(), insertionsToTheRight, 0, location); + this(new BaseAndQualsCounts(), new BaseAndQualsCounts(), insertionsToTheRight, location); } /** @@ -103,15 +104,13 @@ public class HeaderElement { * @param consensusBaseCounts the BaseCounts object for the running consensus synthetic read * @param filteredBaseCounts the BaseCounts object for the filtered data synthetic read * @param insertionsToTheRight number of insertions to the right of this HeaderElement - * @param nSoftClippedBases number of softclipped bases of this HeaderElement * @param location the reference location of this reference element * HeaderElement */ - public HeaderElement(BaseAndQualsCounts consensusBaseCounts, BaseAndQualsCounts filteredBaseCounts, int insertionsToTheRight, int nSoftClippedBases, int location) { + public HeaderElement(BaseAndQualsCounts consensusBaseCounts, BaseAndQualsCounts filteredBaseCounts, int insertionsToTheRight, int location) { this.consensusBaseCounts = consensusBaseCounts; this.filteredBaseCounts = filteredBaseCounts; this.insertionsToTheRight = insertionsToTheRight; - this.nSoftClippedBases = nSoftClippedBases; this.location = location; } @@ -119,10 +118,13 @@ public class HeaderElement { * Whether or not the site represented by this HeaderElement is variant according to the definitions of variant * by insertion, deletion and mismatches. * + * @param minVariantPvalue min p-value for deciding that a position is or is not variable due to mismatches + * @param minVariantProportion min proportion for deciding that a position is or is not variable due to mismatches + * @param minIndelProportion min proportion for deciding that a position is or is not variable due to indels * @return true if site is variant by any definition. False otherwise. */ - public boolean isVariant(double minVariantPvalue, double minIndelProportion) { - return hasConsensusData() && (isVariantFromInsertions(minIndelProportion) || isVariantFromMismatches(minVariantPvalue) || isVariantFromDeletions(minIndelProportion) || isVariantFromSoftClips()); + public boolean isVariant(final double minVariantPvalue, final double minVariantProportion, final double minIndelProportion) { + return hasConsensusData() && (isVariantFromInsertions(minIndelProportion) || isVariantFromMismatches(minVariantPvalue, minVariantProportion) || isVariantFromDeletions(minIndelProportion) || isVariantFromSoftClips()); } /** @@ -140,11 +142,9 @@ public class HeaderElement { public void addBase(byte base, byte baseQual, byte insQual, byte delQual, int baseMappingQuality, int minBaseQual, int minMappingQual, boolean isSoftClipped) { // If the base passes the MQ filter it is included in the consensus base counts, otherwise it's part of the filtered counts if ( baseMappingQuality >= minMappingQual ) - consensusBaseCounts.incr(base, baseQual, insQual, delQual, baseMappingQuality, baseQual < minBaseQual); + consensusBaseCounts.incr(base, baseQual, insQual, delQual, baseMappingQuality, baseQual < minBaseQual, isSoftClipped); else filteredBaseCounts.incr(base, baseQual, insQual, delQual, baseMappingQuality, baseQual < minBaseQual); - - nSoftClippedBases += isSoftClipped ? 1 : 0; } /** @@ -162,11 +162,9 @@ public class HeaderElement { public void removeBase(byte base, byte baseQual, byte insQual, byte delQual, int baseMappingQuality, int minBaseQual, int minMappingQual, boolean isSoftClipped) { // If the base passes the MQ filter it is included in the consensus base counts, otherwise it's part of the filtered counts if ( baseMappingQuality >= minMappingQual ) - consensusBaseCounts.decr(base, baseQual, insQual, delQual, baseMappingQuality, baseQual < minBaseQual); + consensusBaseCounts.decr(base, baseQual, insQual, delQual, baseMappingQuality, baseQual < minBaseQual, isSoftClipped); else filteredBaseCounts.decr(base, baseQual, insQual, delQual, baseMappingQuality, baseQual < minBaseQual); - - nSoftClippedBases -= isSoftClipped ? 1 : 0; } /** * Adds an insertions to the right of the HeaderElement and updates all counts accordingly. All insertions @@ -246,15 +244,15 @@ public class HeaderElement { /** * Whether or not the HeaderElement is variant due to excess mismatches * - * @param minVariantPvalue the minimum pvalue to call a site variant. + * @param minVariantPvalue the minimum pvalue to call a site variant (used with low coverage). + * @param minVariantProportion the minimum proportion to call a site variant (used with high coverage). * @return whether or not the HeaderElement is variant due to excess mismatches */ - protected boolean isVariantFromMismatches(double minVariantPvalue) { + protected boolean isVariantFromMismatches(final double minVariantPvalue, final double minVariantProportion) { final int totalCount = consensusBaseCounts.totalCountWithoutIndels(); final BaseIndex mostCommon = consensusBaseCounts.baseIndexWithMostProbabilityWithoutIndels(); final int countOfOtherBases = totalCount - consensusBaseCounts.countOfBase(mostCommon); - final double pvalue = countOfOtherBases == 0 ? 0.0 : MathUtils.binomialCumulativeProbability(totalCount, 0, countOfOtherBases); - return pvalue > minVariantPvalue; + return hasSignificantCount(countOfOtherBases, totalCount, minVariantPvalue, minVariantProportion); } /** @@ -264,6 +262,7 @@ public class HeaderElement { * @return true if we had more soft clipped bases contributing to this site than matches/mismatches. */ protected boolean isVariantFromSoftClips() { + final int nSoftClippedBases = consensusBaseCounts.nSoftclips(); return nSoftClippedBases > 0 && nSoftClippedBases >= (consensusBaseCounts.totalCount() - nSoftClippedBases); } @@ -271,10 +270,11 @@ public class HeaderElement { * Calculates the number of alleles necessary to represent this site. * * @param minVariantPvalue the minimum pvalue to call a site variant. + * @param minVariantProportion the minimum proportion to call a site variant. * @return the number of alleles necessary to represent this site or -1 if there are too many indels */ - public int getNumberOfBaseAlleles(final double minVariantPvalue) { - final ObjectArrayList alleles = getAlleles(minVariantPvalue); + public int getNumberOfBaseAlleles(final double minVariantPvalue, final double minVariantProportion) { + final ObjectArrayList alleles = getAlleles(minVariantPvalue, minVariantProportion); return alleles == null ? -1 : alleles.size(); } @@ -282,16 +282,18 @@ public class HeaderElement { * Calculates the alleles necessary to represent this site. * * @param minVariantPvalue the minimum pvalue to call a site variant. + * @param minVariantProportion the minimum proportion to call a site variant. * @return the list of alleles necessary to represent this site or null if there are too many indels */ - public ObjectArrayList getAlleles(final double minVariantPvalue) { + public ObjectArrayList getAlleles(final double minVariantPvalue, final double minVariantProportion) { // make sure we have bases at all final int totalBaseCount = consensusBaseCounts.totalCount(); if ( totalBaseCount == 0 ) return new ObjectArrayList(0); - // next, check for insertions - if ( hasSignificantCount(insertionsToTheRight, minVariantPvalue) ) + // next, check for insertions; technically, the insertion count can be greater than totalBaseCount + // (because of the way insertions are counted), so we need to account for that + if ( hasSignificantCount(Math.min(totalBaseCount, insertionsToTheRight), totalBaseCount, minVariantPvalue, minVariantProportion) ) return null; // finally, check for the bases themselves (including deletions) @@ -301,9 +303,7 @@ public class HeaderElement { if ( baseCount == 0 ) continue; - final double pvalue = MathUtils.binomialCumulativeProbability(totalBaseCount, 0, baseCount); - - if ( pvalue > minVariantPvalue ) { + if ( hasSignificantCount(baseCount, totalBaseCount, minVariantPvalue, minVariantProportion) ) { if ( base == BaseIndex.D ) return null; alleles.add(base); @@ -316,26 +316,34 @@ public class HeaderElement { * Checks whether there are a significant number of softclips. * * @param minVariantPvalue the minimum pvalue to call a site variant. + * @param minVariantProportion the minimum proportion to call a site variant. * @return true if there are significant softclips, false otherwise */ - public boolean hasSignificantSoftclips(final double minVariantPvalue) { - return hasSignificantCount(nSoftClippedBases, minVariantPvalue); + public boolean hasSignificantSoftclips(final double minVariantPvalue, final double minVariantProportion) { + return hasSignificantCount(consensusBaseCounts.nSoftclips(), consensusBaseCounts.totalCount(), minVariantPvalue, minVariantProportion); } /* * Checks whether there are a significant number of count. * - * @param count the count to test against + * @param count the count (k) to test against + * @param total the total (n) to test against * @param minVariantPvalue the minimum pvalue to call a site variant. + * @param minVariantProportion the minimum proportion to call a site variant. * @return true if there is a significant count given the provided pvalue, false otherwise */ - private boolean hasSignificantCount(final int count, final double minVariantPvalue) { - final int totalBaseCount = consensusBaseCounts.totalCount(); - if ( count == 0 || totalBaseCount == 0 ) + private boolean hasSignificantCount(final int count, final int total, final double minVariantPvalue, final double minVariantProportion) { + if ( count == 0 || total == 0 ) return false; - // technically, count can be greater than totalBaseCount (because of the way insertions are counted) so we need to account for that - final double pvalue = MathUtils.binomialCumulativeProbability(totalBaseCount, 0, Math.min(count, totalBaseCount)); - return pvalue > minVariantPvalue; + // use p-values for low counts of k + if ( count <= MIN_COUNT_FOR_USING_PVALUE ) { + final double pvalue = MathUtils.binomialCumulativeProbability(total, 0, count); + return pvalue > minVariantPvalue; + } + + // otherwise, use straight proportions + final int minBaseCountForSignificance = (int)(minVariantProportion * total); + return count >= minBaseCountForSignificance; } } \ No newline at end of file diff --git a/protected/java/src/org/broadinstitute/sting/gatk/walkers/compression/reducereads/MultiSampleCompressor.java b/protected/java/src/org/broadinstitute/sting/gatk/walkers/compression/reducereads/MultiSampleCompressor.java index 85aee9fc9..bdd407fba 100644 --- a/protected/java/src/org/broadinstitute/sting/gatk/walkers/compression/reducereads/MultiSampleCompressor.java +++ b/protected/java/src/org/broadinstitute/sting/gatk/walkers/compression/reducereads/MultiSampleCompressor.java @@ -97,13 +97,14 @@ public class MultiSampleCompressor { final int downsampleCoverage, final int minMappingQuality, final double minAltPValueToTriggerVariant, + final double minAltProportionToTriggerVariant, final double minIndelProportionToTriggerVariant, final int minBaseQual, final ReduceReads.DownsampleStrategy downsampleStrategy) { for ( String name : SampleUtils.getSAMFileSamples(header) ) { compressorsPerSample.put(name, new SingleSampleCompressor(contextSize, downsampleCoverage, - minMappingQuality, minAltPValueToTriggerVariant, minIndelProportionToTriggerVariant, minBaseQual, downsampleStrategy)); + minMappingQuality, minAltPValueToTriggerVariant, minAltProportionToTriggerVariant, minIndelProportionToTriggerVariant, minBaseQual, downsampleStrategy)); } } diff --git a/protected/java/src/org/broadinstitute/sting/gatk/walkers/compression/reducereads/ReduceReads.java b/protected/java/src/org/broadinstitute/sting/gatk/walkers/compression/reducereads/ReduceReads.java index 4d90a83be..82a02ca55 100644 --- a/protected/java/src/org/broadinstitute/sting/gatk/walkers/compression/reducereads/ReduceReads.java +++ b/protected/java/src/org/broadinstitute/sting/gatk/walkers/compression/reducereads/ReduceReads.java @@ -205,15 +205,17 @@ public class ReduceReads extends ReadWalker, Redu /** * Minimum proportion of mismatches in a site to trigger a variant region. Anything below this will be - * considered consensus. + * considered consensus and reduced (otherwise we will try to trigger polyploid compression). Note that + * this value is used only regions with high coverage. */ - @Deprecated + @Advanced @Argument(fullName = "minimum_alt_proportion_to_trigger_variant", shortName = "minvar", doc = "", required = false) public double minAltProportionToTriggerVariant = 0.05; /** * Minimum p-value from binomial distribution of mismatches in a site to trigger a variant region. - * Any site with a value falling below this will be considered consensus and reduced (otherwise we will try to trigger polyploid compression). + * Any site with a value falling below this will be considered consensus and reduced (otherwise we will try to + * trigger polyploid compression). Note that this value is used only regions with low coverage. */ @Advanced @Argument(fullName = "minimum_alt_pvalue_to_trigger_variant", shortName = "min_pvalue", doc = "", required = false) @@ -288,6 +290,9 @@ public class ReduceReads extends ReadWalker, Redu if ( minAltPValueToTriggerVariant < 0.0 || minAltPValueToTriggerVariant > 1.0 ) throw new UserException.BadArgumentValue("--minimum_alt_pvalue_to_trigger_variant", "must be a value between 0 and 1 (inclusive)"); + if ( minAltProportionToTriggerVariant < 0.0 || minAltProportionToTriggerVariant > 1.0 ) + throw new UserException.BadArgumentValue("--minimum_alt_proportion_to_trigger_variant", "must be a value between 0 and 1 (inclusive)"); + if ( known.isEmpty() ) knownSnpPositions = null; else @@ -412,7 +417,7 @@ public class ReduceReads extends ReadWalker, Redu */ @Override public ReduceReadsStash reduceInit() { - return new ReduceReadsStash(new MultiSampleCompressor(getToolkit().getSAMFileHeader(), contextSize, downsampleCoverage, minMappingQuality, minAltPValueToTriggerVariant, minIndelProportionToTriggerVariant, minBaseQual, downsampleStrategy)); + return new ReduceReadsStash(new MultiSampleCompressor(getToolkit().getSAMFileHeader(), contextSize, downsampleCoverage, minMappingQuality, minAltPValueToTriggerVariant, minAltProportionToTriggerVariant, minIndelProportionToTriggerVariant, minBaseQual, downsampleStrategy)); } /** diff --git a/protected/java/src/org/broadinstitute/sting/gatk/walkers/compression/reducereads/SingleSampleCompressor.java b/protected/java/src/org/broadinstitute/sting/gatk/walkers/compression/reducereads/SingleSampleCompressor.java index ec041386c..61c34b6a0 100644 --- a/protected/java/src/org/broadinstitute/sting/gatk/walkers/compression/reducereads/SingleSampleCompressor.java +++ b/protected/java/src/org/broadinstitute/sting/gatk/walkers/compression/reducereads/SingleSampleCompressor.java @@ -63,6 +63,7 @@ public class SingleSampleCompressor { final private int downsampleCoverage; final private int minMappingQuality; final private double minAltPValueToTriggerVariant; + final private double minAltProportionToTriggerVariant; final private double minIndelProportionToTriggerVariant; final private int minBaseQual; final private ReduceReads.DownsampleStrategy downsampleStrategy; @@ -76,6 +77,7 @@ public class SingleSampleCompressor { final int downsampleCoverage, final int minMappingQuality, final double minAltPValueToTriggerVariant, + final double minAltProportionToTriggerVariant, final double minIndelProportionToTriggerVariant, final int minBaseQual, final ReduceReads.DownsampleStrategy downsampleStrategy) { @@ -84,6 +86,7 @@ public class SingleSampleCompressor { this.minMappingQuality = minMappingQuality; this.slidingWindowCounter = 0; this.minAltPValueToTriggerVariant = minAltPValueToTriggerVariant; + this.minAltProportionToTriggerVariant = minAltProportionToTriggerVariant; this.minIndelProportionToTriggerVariant = minIndelProportionToTriggerVariant; this.minBaseQual = minBaseQual; this.downsampleStrategy = downsampleStrategy; @@ -114,7 +117,9 @@ public class SingleSampleCompressor { } if ( slidingWindow == null) { // this is the first read - slidingWindow = new SlidingWindow(read.getReferenceName(), read.getReferenceIndex(), contextSize, read.getHeader(), read.getReadGroup(), slidingWindowCounter, minAltPValueToTriggerVariant, minIndelProportionToTriggerVariant, minBaseQual, minMappingQuality, downsampleCoverage, downsampleStrategy, read.hasBaseIndelQualities()); + slidingWindow = new SlidingWindow(read.getReferenceName(), read.getReferenceIndex(), contextSize, read.getHeader(), read.getReadGroup(), + slidingWindowCounter, minAltPValueToTriggerVariant, minAltProportionToTriggerVariant, minIndelProportionToTriggerVariant, + minBaseQual, minMappingQuality, downsampleCoverage, downsampleStrategy, read.hasBaseIndelQualities()); slidingWindowCounter++; } diff --git a/protected/java/src/org/broadinstitute/sting/gatk/walkers/compression/reducereads/SlidingWindow.java b/protected/java/src/org/broadinstitute/sting/gatk/walkers/compression/reducereads/SlidingWindow.java index 5fd7724cb..d3ca037be 100644 --- a/protected/java/src/org/broadinstitute/sting/gatk/walkers/compression/reducereads/SlidingWindow.java +++ b/protected/java/src/org/broadinstitute/sting/gatk/walkers/compression/reducereads/SlidingWindow.java @@ -60,7 +60,6 @@ import org.broadinstitute.sting.utils.GenomeLoc; import org.broadinstitute.sting.utils.UnvalidatingGenomeLoc; import org.broadinstitute.sting.utils.collections.Pair; import org.broadinstitute.sting.utils.exceptions.ReviewedStingException; -import org.broadinstitute.sting.utils.recalibration.EventType; import org.broadinstitute.sting.utils.sam.AlignmentStartWithNoTiesComparator; import org.broadinstitute.sting.utils.sam.GATKSAMReadGroupRecord; import org.broadinstitute.sting.utils.sam.GATKSAMRecord; @@ -78,8 +77,8 @@ import java.util.*; public class SlidingWindow { // Sliding Window data - final private ObjectAVLTreeSet readsInWindow; - final private LinkedList windowHeader; + final protected PriorityQueue readsInWindow; + final protected LinkedList windowHeader; protected int contextSize; // the largest context size (between mismatches and indels) protected String contig; protected int contigIndex; @@ -99,6 +98,7 @@ public class SlidingWindow { // Additional parameters protected double MIN_ALT_PVALUE_TO_TRIGGER_VARIANT; // pvalue has to be greater than this value to trigger variant region due to mismatches + protected double MIN_ALT_PROPORTION_TO_TRIGGER_VARIANT; // proportion has to be greater than this value to trigger variant region due to mismatches protected double MIN_INDEL_BASE_PROPORTION_TO_TRIGGER_VARIANT; // proportion has to be greater than this value to trigger variant region due to deletions protected int MIN_BASE_QUAL_TO_COUNT; // qual has to be greater than or equal to this value protected int MIN_MAPPING_QUALITY; @@ -146,28 +146,33 @@ public class SlidingWindow { this.windowHeader = new LinkedList(); windowHeader.addFirst(new HeaderElement(startLocation)); - this.readsInWindow = new ObjectAVLTreeSet(); + this.readsInWindow = new PriorityQueue(100, new Comparator() { + @Override + public int compare(GATKSAMRecord read1, GATKSAMRecord read2) { + return read1.getSoftEnd() - read2.getSoftEnd(); + } + }); } public SlidingWindow(final String contig, final int contigIndex, final int contextSize, final SAMFileHeader samHeader, final GATKSAMReadGroupRecord readGroupAttribute, final int windowNumber, - final double minAltPValueToTriggerVariant, final double minIndelProportionToTriggerVariant, + final double minAltPValueToTriggerVariant, final double minAltProportionToTriggerVariant, final double minIndelProportionToTriggerVariant, final int minBaseQual, final int minMappingQuality, final int downsampleCoverage, final ReduceReads.DownsampleStrategy downsampleStrategy, final boolean hasIndelQualities) { this.contextSize = contextSize; this.downsampleCoverage = downsampleCoverage; this.MIN_ALT_PVALUE_TO_TRIGGER_VARIANT = minAltPValueToTriggerVariant; + this.MIN_ALT_PROPORTION_TO_TRIGGER_VARIANT = minAltProportionToTriggerVariant; this.MIN_INDEL_BASE_PROPORTION_TO_TRIGGER_VARIANT = minIndelProportionToTriggerVariant; this.MIN_BASE_QUAL_TO_COUNT = minBaseQual; this.MIN_MAPPING_QUALITY = minMappingQuality; this.windowHeader = new LinkedList(); - this.readsInWindow = new ObjectAVLTreeSet(new Comparator() { + this.readsInWindow = new PriorityQueue(1000, new Comparator() { @Override public int compare(GATKSAMRecord read1, GATKSAMRecord read2) { - final int difference = read1.getSoftEnd() - read2.getSoftEnd(); - return difference != 0 ? difference : read1.getReadName().compareTo(read2.getReadName()); + return read1.getSoftEnd() - read2.getSoftEnd(); } }); @@ -290,8 +295,8 @@ public class SlidingWindow { regions = findVariantRegions(0, breakpoint, markedSites.getVariantSiteBitSet(), !forceClose); } - while (!readsInWindow.isEmpty() && readsInWindow.first().getSoftEnd() < windowHeaderStartLocation) { - readsInWindow.remove(readsInWindow.first()); + while (!readsInWindow.isEmpty() && readsInWindow.peek().getSoftEnd() < windowHeaderStartLocation) { + readsInWindow.poll(); } return regions; @@ -353,7 +358,7 @@ public class SlidingWindow { /** * returns an array marked with variant and non-variant regions (it uses markVariantRegion to make the marks) * - * @param stop check the window from start to stop (not-inclusive) + * @param stop check the window from start to stop (not-inclusive); given in global coordinates */ protected void markSites(final int stop) { @@ -363,21 +368,16 @@ public class SlidingWindow { // copy over as many bits as we can from the previous calculation. Note that we can't trust the // last (contextSize - 1) worth of bits because we may not have actually looked at variant regions there. final int lastPositionMarked = markedSites.updateRegion(windowHeaderStartLocation, sizeOfMarkedRegion) - contextSize - 1; - final int locationToProcess = Math.min(lastPositionMarked, stop - contextSize); + final int locationToProcess = Math.max(windowHeaderStartLocation, Math.min(lastPositionMarked, stop - contextSize)); - // update the iterator to the correct position - Iterator headerElementIterator = windowHeader.iterator(); - for (int i = windowHeaderStartLocation; i < locationToProcess; i++) { - if (headerElementIterator.hasNext()) - headerElementIterator.next(); - } + final ListIterator headerElementIterator = windowHeader.listIterator(locationToProcess - windowHeaderStartLocation); // process a contextSize worth of region from scratch in case there's a variant there for (int i = locationToProcess; i < stop; i++) { if (headerElementIterator.hasNext()) { HeaderElement headerElement = headerElementIterator.next(); - if (headerElement.isVariant(MIN_ALT_PVALUE_TO_TRIGGER_VARIANT, MIN_INDEL_BASE_PROPORTION_TO_TRIGGER_VARIANT)) + if (headerElement.isVariant(MIN_ALT_PVALUE_TO_TRIGGER_VARIANT, MIN_ALT_PROPORTION_TO_TRIGGER_VARIANT, MIN_INDEL_BASE_PROPORTION_TO_TRIGGER_VARIANT)) markVariantRegion(i - windowHeaderStartLocation); } else @@ -409,7 +409,7 @@ public class SlidingWindow { } /** - * Adds bases to the running consensus or filtered data accordingly + * Adds bases to the running consensus * * If adding a sequence with gaps, it will finalize multiple consensus reads and keep the last running consensus * @@ -422,9 +422,10 @@ public class SlidingWindow { @Requires({"start >= 0 && (end >= start || end == 0)"}) @Ensures("result != null") protected ObjectArrayList addToSyntheticReads(final LinkedList header, final int start, final int end, final SyntheticRead.StrandType strandType) { - ObjectArrayList reads = new ObjectArrayList(); - if (start < end) { - ListIterator headerElementIterator = header.listIterator(start); + final ObjectArrayList reads = new ObjectArrayList(); + + if ( start < end ) { + final ListIterator headerElementIterator = header.listIterator(start); if (!headerElementIterator.hasNext()) throw new ReviewedStingException(String.format("Requested to add to synthetic reads a region that contains no header element at index: %d - %d / %d", start, header.size(), end)); @@ -432,37 +433,29 @@ public class SlidingWindow { HeaderElement headerElement = headerElementIterator.next(); if (headerElement.hasConsensusData()) { - reads.addAll(finalizeAndAdd(ConsensusType.FILTERED)); - - int endOfConsensus = findNextNonConsensusElement(header, start, end); - addToRunningConsensus(header, start, endOfConsensus, strandType); + // find the end of the consecutive consensus data in the window + final int endOfConsensus = findNextNonConsensusElement(header, start, end); if (endOfConsensus <= start) throw new ReviewedStingException(String.format("next start is <= current start: (%d <= %d)", endOfConsensus, start)); + // add to running consensus and recurse + addToRunningConsensus(header, start, endOfConsensus, strandType); reads.addAll(addToSyntheticReads(header, endOfConsensus, end, strandType)); - } else if (headerElement.hasFilteredData()) { + + } else { + + // add any outstanding consensus data reads.addAll(finalizeAndAdd(ConsensusType.CONSENSUS)); - int endOfFilteredData = findNextNonFilteredDataElement(header, start, end); - reads.addAll(addToFilteredData(header, start, endOfFilteredData, strandType)); - - if (endOfFilteredData <= start) - throw new ReviewedStingException(String.format("next start is <= current start: (%d <= %d)", endOfFilteredData, start)); - - reads.addAll(addToSyntheticReads(header, endOfFilteredData, end, strandType)); - } else if (headerElement.isEmpty()) { - reads.addAll(finalizeAndAdd(ConsensusType.BOTH)); - - int endOfEmptyData = findNextNonEmptyElement(header, start, end); - + // find the end of the consecutive empty data in the window + final int endOfEmptyData = findNextConsensusElement(header, start, end); if (endOfEmptyData <= start) throw new ReviewedStingException(String.format("next start is <= current start: (%d <= %d)", endOfEmptyData, start)); + // recurse out of the empty region reads.addAll(addToSyntheticReads(header, endOfEmptyData, end, strandType)); - } else - throw new ReviewedStingException(String.format("Header Element %d is neither Consensus, Data or Empty. Something is wrong.", start)); - + } } return reads; @@ -474,24 +467,21 @@ public class SlidingWindow { * @param type the synthetic reads you want to close * @return a possibly null list of GATKSAMRecords generated by finalizing the synthetic reads */ - private ObjectArrayList finalizeAndAdd(ConsensusType type) { - GATKSAMRecord read = null; - ObjectArrayList list = new ObjectArrayList(); + private ObjectArrayList finalizeAndAdd(final ConsensusType type) { - switch (type) { - case CONSENSUS: - read = finalizeRunningConsensus(); - break; - case FILTERED: - read = finalizeFilteredDataConsensus(); - break; - case BOTH: - read = finalizeRunningConsensus(); - if (read != null) list.add(read); - read = finalizeFilteredDataConsensus(); + final ObjectArrayList list = new ObjectArrayList(); + + if ( type == ConsensusType.CONSENSUS || type == ConsensusType.BOTH ) { + final GATKSAMRecord read = finalizeRunningConsensus(); + if ( read != null ) + list.add(read); + } + + if ( type == ConsensusType.FILTERED || type == ConsensusType.BOTH ) { + final GATKSAMRecord read = finalizeFilteredDataConsensus(); + if ( read != null ) + list.add(read); } - if (read != null) - list.add(read); return list; } @@ -499,19 +489,145 @@ public class SlidingWindow { /** * Looks for the next position without consensus data * - * @param start beginning of the filtered region - * @param upTo limit to search for another consensus element + * @param header the header to check + * @param start beginning of the filtered region + * @param upTo limit to search for another consensus element * @return next position in local coordinates (relative to the windowHeader) with consensus data; otherwise, the start position */ - private int findNextNonConsensusElement(LinkedList header, int start, int upTo) { - Iterator headerElementIterator = header.listIterator(start); + private int findNextNonConsensusElement(final LinkedList header, final int start, final int upTo) { + final Iterator headerElementIterator = header.listIterator(start); int index = start; while (index < upTo) { if (!headerElementIterator.hasNext()) throw new ReviewedStingException("There are no more header elements in this window"); - HeaderElement headerElement = headerElementIterator.next(); + if (!headerElementIterator.next().hasConsensusData()) + break; + index++; + } + return index; + } + + /** + * Looks for the next position witho consensus data + * + * @param header the header to check + * @param start beginning of the filtered region + * @param upTo limit to search for another consensus element + * @return next position in local coordinates (relative to the windowHeader) with consensus data; otherwise, the start position + */ + private int findNextConsensusElement(final LinkedList header, final int start, final int upTo) { + final Iterator headerElementIterator = header.listIterator(start); + int index = start; + while (index < upTo) { + if (!headerElementIterator.hasNext()) + throw new ReviewedStingException("There are no more header elements in this window"); + + if (headerElementIterator.next().hasConsensusData()) + break; + index++; + } + return index; + } + + /** + * Adds bases to the filtered data synthetic read. + * + * Different from the addToConsensus method, this method assumes a contiguous sequence of filteredData + * bases. + * + * @param header the window header + * @param start the first header index to add to consensus + * @param end the first header index NOT TO add to consensus + * @param strandType the strandedness that the synthetic read should be represented as having + */ + @Requires({"start >= 0 && (end >= start || end == 0)"}) + private void addToRunningConsensus(final LinkedList header, final int start, final int end, final SyntheticRead.StrandType strandType) { + if (runningConsensus == null) + runningConsensus = new SyntheticRead(samHeader, readGroupAttribute, contig, contigIndex, consensusReadName + consensusCounter++, header.get(start).getLocation(), hasIndelQualities, strandType); + + final Iterator headerElementIterator = header.listIterator(start); + + for (int index = start; index < end; index++) { + if (!headerElementIterator.hasNext()) + throw new ReviewedStingException("Requested to create a running consensus synthetic read from " + start + " to " + end + " but " + index + " does not exist"); + + final HeaderElement headerElement = headerElementIterator.next(); if (!headerElement.hasConsensusData()) + throw new ReviewedStingException("No CONSENSUS data in " + index); + + genericAddBaseToConsensus(runningConsensus, headerElement.getConsensusBaseCounts()); + } + } + + /** + * Adds bases to the running filtered data accordingly + * + * If adding a sequence with gaps, it will finalize multiple consensus reads and keep the last running consensus + * + * @param header the window header + * @param start the first header index to add to consensus + * @param end the first header index NOT TO add to consensus + * @return a non-null list of consensus reads generated by this call. Empty list if no consensus was generated. + */ + @Requires({"start >= 0 && (end >= start || end == 0)"}) + @Ensures("result != null") + protected ObjectArrayList addToFilteredReads(final LinkedList header, final int start, final int end) { + final ObjectArrayList reads = new ObjectArrayList(); + + if ( start < end ) { + final ListIterator headerElementIterator = header.listIterator(start); + + if (!headerElementIterator.hasNext()) + throw new ReviewedStingException(String.format("Requested to add to synthetic reads a region that contains no header element at index: %d - %d / %d", start, header.size(), end)); + + HeaderElement headerElement = headerElementIterator.next(); + + if (headerElement.hasFilteredData()) { + + // find the end of the consecutive filtered data in the window + final int endOfFiltered = findNextNonFilteredElement(header, start, end); + if (endOfFiltered <= start) + throw new ReviewedStingException(String.format("next start is <= current start: (%d <= %d)", endOfFiltered, start)); + + // add to running filtered consensus and recurse + addToFilteredData(header, start, endOfFiltered); + reads.addAll(addToFilteredReads(header, endOfFiltered, end)); + + } else { + + // add any outstanding filtered data + reads.addAll(finalizeAndAdd(ConsensusType.FILTERED)); + + // find the end of the consecutive empty data in the window + final int endOfEmptyData = findNextFilteredElement(header, start, end); + if (endOfEmptyData <= start) + throw new ReviewedStingException(String.format("next start is <= current start: (%d <= %d)", endOfEmptyData, start)); + + // recurse out of the empty region + reads.addAll(addToFilteredReads(header, endOfEmptyData, end)); + } + } + + return reads; + } + + /** + * Looks for the next position without consensus data + * + * @param header the header to check + * @param start beginning of the filtered region + * @param upTo limit to search for another consensus element + * @return next position in local coordinates (relative to the windowHeader) with consensus data; otherwise, the start position + */ + private int findNextNonFilteredElement(final LinkedList header, final int start, final int upTo) { + final Iterator headerElementIterator = header.listIterator(start); + int index = start; + while (index < upTo) { + if (!headerElementIterator.hasNext()) + throw new ReviewedStingException("There are no more header elements in this window"); + + if (!headerElementIterator.next().hasFilteredData()) break; index++; } @@ -519,43 +635,21 @@ public class SlidingWindow { } /** - * Looks for the next position without filtered data + * Looks for the next position witho consensus data * - * @param start beginning of the region - * @param upTo limit to search for - * @return next position in local coordinates (relative to the windowHeader) with no filtered data; otherwise, the start position + * @param header the header to check + * @param start beginning of the filtered region + * @param upTo limit to search for another consensus element + * @return next position in local coordinates (relative to the windowHeader) with consensus data; otherwise, the start position */ - private int findNextNonFilteredDataElement(LinkedList header, int start, int upTo) { - Iterator headerElementIterator = header.listIterator(start); + private int findNextFilteredElement(final LinkedList header, final int start, final int upTo) { + final Iterator headerElementIterator = header.listIterator(start); int index = start; while (index < upTo) { if (!headerElementIterator.hasNext()) throw new ReviewedStingException("There are no more header elements in this window"); - HeaderElement headerElement = headerElementIterator.next(); - if (!headerElement.hasFilteredData() || headerElement.hasConsensusData()) - break; - index++; - } - return index; - } - - /** - * Looks for the next non-empty header element - * - * @param start beginning of the region - * @param upTo limit to search for - * @return next position in local coordinates (relative to the windowHeader) with non-empty element; otherwise, the start position - */ - private int findNextNonEmptyElement(LinkedList header, int start, int upTo) { - ListIterator headerElementIterator = header.listIterator(start); - int index = start; - while (index < upTo) { - if (!headerElementIterator.hasNext()) - throw new ReviewedStingException("There are no more header elements in this window"); - - HeaderElement headerElement = headerElementIterator.next(); - if (!headerElement.isEmpty()) + if (headerElementIterator.next().hasFilteredData()) break; index++; } @@ -571,67 +665,26 @@ public class SlidingWindow { * @param header the window header * @param start the first header index to add to consensus * @param end the first header index NOT TO add to consensus - * @param strandType the strandedness that the synthetic read should be represented as having - * @return a non-null list of GATKSAMRecords representing finalized filtered consensus data. Empty list if no consensus was generated. */ @Requires({"start >= 0 && (end >= start || end == 0)"}) @Ensures("result != null") - private ObjectArrayList addToFilteredData(final LinkedList header, final int start, final int end, final SyntheticRead.StrandType strandType) { - ObjectArrayList result = new ObjectArrayList(); + private void addToFilteredData(final LinkedList header, final int start, final int end) { if (filteredDataConsensus == null) - filteredDataConsensus = new SyntheticRead(samHeader, readGroupAttribute, contig, contigIndex, filteredDataReadName + filteredDataConsensusCounter++, header.get(start).getLocation(), hasIndelQualities, strandType); + filteredDataConsensus = new SyntheticRead(samHeader, readGroupAttribute, contig, contigIndex, filteredDataReadName + filteredDataConsensusCounter++, header.get(start).getLocation(), hasIndelQualities, SyntheticRead.StrandType.STRANDLESS); ListIterator headerElementIterator = header.listIterator(start); for (int index = start; index < end; index++) { if (!headerElementIterator.hasNext()) throw new ReviewedStingException("Requested to create a filtered data synthetic read from " + start + " to " + end + " but " + index + " does not exist"); - HeaderElement headerElement = headerElementIterator.next(); - if (headerElement.hasConsensusData()) - throw new ReviewedStingException("Found consensus data inside region to add to filtered data."); + final HeaderElement headerElement = headerElementIterator.next(); if (!headerElement.hasFilteredData()) throw new ReviewedStingException("No filtered data in " + index); - if ( filteredDataConsensus.getRefStart() + filteredDataConsensus.size() != headerElement.getLocation() ) { - result.add(finalizeFilteredDataConsensus()); - filteredDataConsensus = new SyntheticRead(samHeader, readGroupAttribute, contig, contigIndex, filteredDataReadName + filteredDataConsensusCounter++, headerElement.getLocation(), hasIndelQualities, strandType); - } - genericAddBaseToConsensus(filteredDataConsensus, headerElement.getFilteredBaseCounts()); } - - return result; - } - - /** - * Adds bases to the filtered data synthetic read. - * - * Different from the addToConsensus method, this method assumes a contiguous sequence of filteredData - * bases. - * - * @param header the window header - * @param start the first header index to add to consensus - * @param end the first header index NOT TO add to consensus - * @param strandType the strandedness that the synthetic read should be represented as having - */ - @Requires({"start >= 0 && (end >= start || end == 0)"}) - private void addToRunningConsensus(final LinkedList header, final int start, final int end, final SyntheticRead.StrandType strandType) { - if (runningConsensus == null) - runningConsensus = new SyntheticRead(samHeader, readGroupAttribute, contig, contigIndex, consensusReadName + consensusCounter++, header.get(start).getLocation(), hasIndelQualities, strandType); - - Iterator headerElementIterator = header.listIterator(start); - for (int index = start; index < end; index++) { - if (!headerElementIterator.hasNext()) - throw new ReviewedStingException("Requested to create a running consensus synthetic read from " + start + " to " + end + " but " + index + " does not exist"); - - HeaderElement headerElement = headerElementIterator.next(); - if (!headerElement.hasConsensusData()) - throw new ReviewedStingException("No CONSENSUS data in " + index); - - genericAddBaseToConsensus(runningConsensus, headerElement.getConsensusBaseCounts()); - } } /** @@ -726,7 +779,7 @@ public class SlidingWindow { for ( int i = start; i <= stop; i++ ) { - final int nAlleles = windowHeader.get(i).getNumberOfBaseAlleles(MIN_ALT_PVALUE_TO_TRIGGER_VARIANT); + final int nAlleles = windowHeader.get(i).getNumberOfBaseAlleles(MIN_ALT_PVALUE_TO_TRIGGER_VARIANT, MIN_ALT_PROPORTION_TO_TRIGGER_VARIANT); // we will only work on diploid non-indel cases because we just don't want to handle/test other scenarios if ( nAlleles > 2 || nAlleles == -1 ) @@ -760,8 +813,8 @@ public class SlidingWindow { if ( headerElement.getLocation() == positionToSkip ) continue; - if ( headerElement.hasSignificantSoftclips(MIN_ALT_PVALUE_TO_TRIGGER_VARIANT) || - headerElement.getNumberOfBaseAlleles(MIN_ALT_PVALUE_TO_TRIGGER_VARIANT) > 1 ) + if ( headerElement.hasSignificantSoftclips(MIN_ALT_PVALUE_TO_TRIGGER_VARIANT, MIN_ALT_PROPORTION_TO_TRIGGER_VARIANT) || + headerElement.getNumberOfBaseAlleles(MIN_ALT_PVALUE_TO_TRIGGER_VARIANT, MIN_ALT_PROPORTION_TO_TRIGGER_VARIANT) > 1 ) return true; } @@ -784,6 +837,7 @@ public class SlidingWindow { final CloseVariantRegionResult result = new CloseVariantRegionResult(allReads.stopPerformed); result.reads.addAll(downsampleCoverage > 0 ? downsampleVariantRegion(allReads.reads) : allReads.reads); result.reads.addAll(addToSyntheticReads(windowHeader, 0, allReads.stopPerformed + 1, SyntheticRead.StrandType.STRANDLESS)); + result.reads.addAll(addToFilteredReads(windowHeader, 0, allReads.stopPerformed + 1)); result.reads.addAll(finalizeAndAdd(ConsensusType.BOTH)); return result; // finalized reads will be downsampled if necessary @@ -914,6 +968,7 @@ public class SlidingWindow { if (!windowHeader.isEmpty()) { finalizedReads.addAll(addToSyntheticReads(windowHeader, 0, windowHeader.size(), SyntheticRead.StrandType.STRANDLESS)); + finalizedReads.addAll(addToFilteredReads(windowHeader, 0, windowHeader.size())); finalizedReads.addAll(finalizeAndAdd(ConsensusType.BOTH)); // if it ended in running consensus, finish it up } } @@ -983,7 +1038,7 @@ public class SlidingWindow { // initialize the mapping from base (allele) to header final Byte2IntMap alleleHeaderMap = new Byte2IntArrayMap(2); - for ( final BaseIndex allele : windowHeader.get(hetRefPosition).getAlleles(MIN_ALT_PVALUE_TO_TRIGGER_VARIANT) ) { + for ( final BaseIndex allele : windowHeader.get(hetRefPosition).getAlleles(MIN_ALT_PVALUE_TO_TRIGGER_VARIANT, MIN_ALT_PROPORTION_TO_TRIGGER_VARIANT) ) { final int currentIndex = alleleHeaderMap.size(); if ( currentIndex > 1 ) throw new IllegalStateException("There are more than 2 alleles present when creating a diploid consensus"); @@ -997,7 +1052,7 @@ public class SlidingWindow { if ( alleleHeaderMap.size() != 2 ) throw new IllegalStateException("We expected to see 2 alleles when creating a diploid consensus but saw " + alleleHeaderMap.size()); - final ObjectList readsToRemoveFromHeader = new ObjectArrayList(); + final ObjectList readsToRemove = new ObjectArrayList(); for ( final GATKSAMRecord read : readsInWindow ) { @@ -1006,38 +1061,36 @@ public class SlidingWindow { continue; // remove all other reads from the read cache since we're going to use them here - readsInWindow.remove(read); + readsToRemove.add(read); - // if the read falls before the het position, we don't need to look at it - if ( read.getSoftEnd() < globalHetRefPosition ) + // if the read falls before the het position or has low MQ, we don't need to look at it + if ( read.getSoftEnd() < globalHetRefPosition || read.getMappingQuality() < MIN_MAPPING_QUALITY) continue; // remove all spanning reads from the consensus header since we're going to incorporate them into a consensus here instead removeFromHeader(windowHeader, read); - // make sure it meets the minimum mapping quality requirement (if not, we won't use it for the consensus) - if ( read.getMappingQuality() >= MIN_MAPPING_QUALITY ) { + // where on the read is the het position? + final int readPosOfHet = ReadUtils.getReadCoordinateForReferenceCoordinate(read, globalHetRefPosition, ReadUtils.ClippingTail.LEFT_TAIL); - // where on the read is the het position? - final int readPosOfHet = ReadUtils.getReadCoordinateForReferenceCoordinate(read, globalHetRefPosition, ReadUtils.ClippingTail.LEFT_TAIL); + // this is safe because indels are not supported + final byte base = read.getReadBases()[readPosOfHet]; - // this is safe because indels are not supported - final byte base = read.getReadBases()[readPosOfHet]; - final byte qual = read.getBaseQualities(EventType.BASE_SUBSTITUTION)[readPosOfHet]; + // check which allele this read represents + final Integer allele = alleleHeaderMap.get(base); - // check which allele this read represents - final Integer allele = alleleHeaderMap.get(base); - - // ignore the read if it represents a base that's not part of the consensus - if ( allele != null ) { - // add to the appropriate polyploid header - final SingleStrandConsensusData header = read.getReadNegativeStrandFlag() ? headersNegStrand[allele] : headersPosStrand[allele]; - header.reads.add(read); - addToHeader(header.consensus, read); - } + // ignore the read if it represents a base that's not part of the consensus + if ( allele != null ) { + // add to the appropriate polyploid header + final SingleStrandConsensusData header = read.getReadNegativeStrandFlag() ? headersNegStrand[allele] : headersPosStrand[allele]; + header.reads.add(read); + addToHeader(header.consensus, read); } } + for ( final GATKSAMRecord read : readsToRemove ) + readsInWindow.remove(read); + // create the polyploid synthetic reads if we can final ObjectList hetReads = new ObjectArrayList(); @@ -1171,15 +1224,15 @@ public class SlidingWindow { int readBaseIndex = 0; HeaderElement headerElement; - for ( CigarElement cigarElement : read.getCigar().getCigarElements() ) { + for ( final CigarElement cigarElement : read.getCigar().getCigarElements() ) { switch ( cigarElement.getOperator() ) { case H: break; case I: readBaseIndex += cigarElement.getLength(); - // special case, if we are removing a read that starts in insertion and we don't have the previous header element anymore, don't worry about it. - if ( removeRead && locationIndex == 0 ) + // special case, if we don't have the previous header element anymore, don't worry about it. + if ( locationIndex == 0 ) break; // insertions are added to the base to the left (previous element) @@ -1200,9 +1253,8 @@ public class SlidingWindow { headerElement.removeBase(BaseUtils.Base.D.base, mappingQuality, mappingQuality, mappingQuality, mappingQuality, MIN_BASE_QUAL_TO_COUNT, MIN_MAPPING_QUALITY, false); else headerElement.addBase(BaseUtils.Base.D.base, mappingQuality, mappingQuality, mappingQuality, mappingQuality, MIN_BASE_QUAL_TO_COUNT, MIN_MAPPING_QUALITY, false); - - locationIndex++; } + locationIndex += nDeletionBases; break; case S: case M: @@ -1211,6 +1263,8 @@ public class SlidingWindow { case X: final int nBasesToAdd = cigarElement.getLength(); final boolean isSoftClip = cigarElement.getOperator() == CigarOperator.S; + final byte[] readBases = read.getReadBases(); + final byte[] readQuals = read.getBaseQualities(); final boolean readHasIndelQuals = read.hasBaseIndelQualities(); final byte[] insertionQuals = readHasIndelQuals ? read.getBaseInsertionQualities() : null; final byte[] deletionQuals = readHasIndelQuals ? read.getBaseDeletionQualities() : null; @@ -1219,14 +1273,15 @@ public class SlidingWindow { headerElement = headerElementIterator.next(); final byte insertionQuality = readHasIndelQuals ? insertionQuals[readBaseIndex] : -1; final byte deletionQuality = readHasIndelQuals ? deletionQuals[readBaseIndex] : -1; + if ( removeRead ) - headerElement.removeBase(read.getReadBases()[readBaseIndex], read.getBaseQualities()[readBaseIndex], insertionQuality, deletionQuality, mappingQuality, MIN_BASE_QUAL_TO_COUNT, MIN_MAPPING_QUALITY, isSoftClip); + headerElement.removeBase(readBases[readBaseIndex], readQuals[readBaseIndex], insertionQuality, deletionQuality, mappingQuality, MIN_BASE_QUAL_TO_COUNT, MIN_MAPPING_QUALITY, isSoftClip); else - headerElement.addBase(read.getReadBases()[readBaseIndex], read.getBaseQualities()[readBaseIndex], insertionQuality, deletionQuality, mappingQuality, MIN_BASE_QUAL_TO_COUNT, MIN_MAPPING_QUALITY, isSoftClip); + headerElement.addBase(readBases[readBaseIndex], readQuals[readBaseIndex], insertionQuality, deletionQuality, mappingQuality, MIN_BASE_QUAL_TO_COUNT, MIN_MAPPING_QUALITY, isSoftClip); readBaseIndex++; - locationIndex++; } + locationIndex += nBasesToAdd; break; default: break; diff --git a/protected/java/src/org/broadinstitute/sting/gatk/walkers/qc/AssessReducedQuals.java b/protected/java/src/org/broadinstitute/sting/gatk/walkers/qc/AssessReducedQuals.java index 4e5652c45..a3bdc6691 100644 --- a/protected/java/src/org/broadinstitute/sting/gatk/walkers/qc/AssessReducedQuals.java +++ b/protected/java/src/org/broadinstitute/sting/gatk/walkers/qc/AssessReducedQuals.java @@ -102,6 +102,9 @@ public class AssessReducedQuals extends LocusWalker implem @Argument(fullName = "qual_epsilon", shortName = "epsilon", doc = "when |Quals_reduced_bam - Quals_original_bam| > (epsilon * Quals_original_bam) we output this interval", required = false) public double qual_epsilon = 0.10; + @Argument(fullName = "exclude_low_mq", shortName = "excludeMQ", doc = "ignore reads with mapping quality below this number", required = false) + public int excludeMQ = 0; + @Output protected PrintStream out; @@ -146,7 +149,7 @@ public class AssessReducedQuals extends LocusWalker implem } private boolean isGoodRead(final PileupElement p) { - return !p.isDeletion() && (int)p.getQual() >= 15 && p.getMappingQual() >= 20; + return !p.isDeletion() && (int)p.getQual() >= 15 && p.getMappingQual() >= excludeMQ; } private int getTagIndex(final List tags) { diff --git a/protected/java/test/org/broadinstitute/sting/gatk/walkers/compression/reducereads/BaseCountsUnitTest.java b/protected/java/test/org/broadinstitute/sting/gatk/walkers/compression/reducereads/BaseCountsUnitTest.java index 5ae6e86df..f988471a0 100644 --- a/protected/java/test/org/broadinstitute/sting/gatk/walkers/compression/reducereads/BaseCountsUnitTest.java +++ b/protected/java/test/org/broadinstitute/sting/gatk/walkers/compression/reducereads/BaseCountsUnitTest.java @@ -179,7 +179,7 @@ public class BaseCountsUnitTest extends BaseTest { BaseCounts counts = new BaseCounts(); for ( int qual : test.quals ) - counts.incr(BaseIndex.A, (byte)qual, 20); + counts.incr(BaseIndex.A, (byte)qual, 20, false); final int actualSum = (int)counts.getSumQuals((byte)'A'); final int expectedSum = qualSum(test.quals); diff --git a/protected/java/test/org/broadinstitute/sting/gatk/walkers/compression/reducereads/HeaderElementUnitTest.java b/protected/java/test/org/broadinstitute/sting/gatk/walkers/compression/reducereads/HeaderElementUnitTest.java index d73a71855..32791dd97 100644 --- a/protected/java/test/org/broadinstitute/sting/gatk/walkers/compression/reducereads/HeaderElementUnitTest.java +++ b/protected/java/test/org/broadinstitute/sting/gatk/walkers/compression/reducereads/HeaderElementUnitTest.java @@ -128,8 +128,8 @@ public class HeaderElementUnitTest extends BaseTest { Assert.assertEquals(headerElement.hasConsensusData(), test.MQ >= minMappingQual); Assert.assertEquals(headerElement.hasFilteredData(), test.MQ < minMappingQual); Assert.assertEquals(headerElement.hasConsensusData() ? headerElement.getConsensusBaseCounts().getRMS() : headerElement.getFilteredBaseCounts().getRMS(), (double)test.MQ); - Assert.assertFalse(headerElement.isVariantFromMismatches(0.05)); - Assert.assertEquals(headerElement.isVariant(0.05, 0.05), test.isClip); + Assert.assertFalse(headerElement.isVariantFromMismatches(0.05, 0.05)); + Assert.assertEquals(headerElement.isVariant(0.05, 0.05, 0.05), test.isClip); } @@ -177,7 +177,7 @@ public class HeaderElementUnitTest extends BaseTest { headerElement.addBase(base.b, byte20, byte10, byte10, byte20, minBaseQual, minMappingQual, false); } - final int nAllelesSeen = headerElement.getNumberOfBaseAlleles(test.pvalue); + final int nAllelesSeen = headerElement.getNumberOfBaseAlleles(test.pvalue, test.pvalue); final int nAllelesExpected = calculateExpectedAlleles(test.counts, test.pvalue); Assert.assertEquals(nAllelesSeen, nAllelesExpected); @@ -195,9 +195,14 @@ public class HeaderElementUnitTest extends BaseTest { if ( count == 0 ) continue; - final double pvalue = MathUtils.binomialCumulativeProbability(total, 0, count); + final boolean isSignificant; + if ( count <= HeaderElement.MIN_COUNT_FOR_USING_PVALUE ) { + isSignificant = MathUtils.binomialCumulativeProbability(total, 0, count) > targetPvalue; + } else { + isSignificant = (count >= targetPvalue * total); + } - if ( pvalue > targetPvalue ) { + if ( isSignificant ) { if ( index == BaseIndex.D.index ) return -1; result++; diff --git a/protected/java/test/org/broadinstitute/sting/gatk/walkers/compression/reducereads/ReduceReadsIntegrationTest.java b/protected/java/test/org/broadinstitute/sting/gatk/walkers/compression/reducereads/ReduceReadsIntegrationTest.java index 1ab001147..b5963498a 100644 --- a/protected/java/test/org/broadinstitute/sting/gatk/walkers/compression/reducereads/ReduceReadsIntegrationTest.java +++ b/protected/java/test/org/broadinstitute/sting/gatk/walkers/compression/reducereads/ReduceReadsIntegrationTest.java @@ -157,46 +157,44 @@ public class ReduceReadsIntegrationTest extends WalkerTest { @Test(enabled = true) public void testDefaultCompression() { - RRTest("testDefaultCompression ", L, "62f8cdb85a424e42e9c56f36302d1dba", false); + RRTest("testDefaultCompression ", L, "fa1cffc4539e0c20b818a11da5dba5b9", false); } @Test(enabled = true) public void testDefaultCompressionWithKnowns() { - RRTest("testDefaultCompressionWithKnowns ", L, "874c0e0a54c3db67f5e9d7c0d45b7844", true); + RRTest("testDefaultCompressionWithKnowns ", L, "d1b5fbc402810d9cdc020bb3503f1325", true); } private final String intervals = "-L 20:10,100,000-10,100,500 -L 20:10,200,000-10,200,500 -L 20:10,300,000-10,300,500 -L 20:10,400,000-10,500,000 -L 20:10,500,050-10,500,060 -L 20:10,600,000-10,600,015 -L 20:10,700,000-10,700,110"; @Test(enabled = true) public void testMultipleIntervals() { - RRTest("testMultipleIntervals ", intervals, "2e849f8324b27af36bae8cb9b01722e6", false); + RRTest("testMultipleIntervals ", intervals, "7e9dcd157ad742d4ebae7e56bc4af663", false); } @Test(enabled = true) public void testMultipleIntervalsWithKnowns() { - RRTest("testMultipleIntervalsWithKnowns ", intervals, "71bc2167cc6916288bd34dcf099feebc", true); + RRTest("testMultipleIntervalsWithKnowns ", intervals, "dbb1e95e1bcad956701142afac763717", true); } - final String highCompressionMD5 = "c83256fa2d6785d5188f50dd45c77e0f"; - @Test(enabled = true) public void testHighCompression() { - RRTest("testHighCompression ", " -cs 10 -min_pvalue 0.3 -mindel 0.3 " + L, highCompressionMD5, false); + RRTest("testHighCompression ", " -cs 10 -min_pvalue 0.3 -minvar 0.3 -mindel 0.3 " + L, "8f8fd1a53fa0789116f45e4cf2625906", false); } @Test(enabled = true) public void testHighCompressionWithKnowns() { - RRTest("testHighCompressionWithKnowns ", " -cs 10 -min_pvalue 0.3 -mindel 0.3 " + L, highCompressionMD5, true); + RRTest("testHighCompressionWithKnowns ", " -cs 10 -min_pvalue 0.3 -minvar 0.3 -mindel 0.3 " + L, "52fd2a77802a4677b604abb18e15d96a", true); } @Test(enabled = true) public void testLowCompression() { - RRTest("testLowCompression ", " -cs 30 -min_pvalue 0.001 -mindel 0.01 -minmap 5 -minqual 5 " + L, "a903558ef284381d74b0ad837deb19f6", false); + RRTest("testLowCompression ", " -cs 30 -min_pvalue 0.001 -minvar 0.01 -mindel 0.01 -minmap 5 -minqual 5 " + L, "79c6543d5ce84ebc2ca74404498edbd1", false); } @Test(enabled = true) public void testLowCompressionWithKnowns() { - RRTest("testLowCompressionWithKnowns ", " -cs 30 -min_pvalue 0.001 -mindel 0.01 -minmap 5 -minqual 5 " + L, "a4c5aa158c6ebbc703134cbe2d48619c", true); + RRTest("testLowCompressionWithKnowns ", " -cs 30 -min_pvalue 0.001 -minvar 0.01 -mindel 0.01 -minmap 5 -minqual 5 " + L, "271aec358b309603291a974b5ba3bd60", true); } @Test(enabled = true) @@ -208,7 +206,7 @@ public class ReduceReadsIntegrationTest extends WalkerTest { @Test(enabled = true) public void testIndelCompression() { - final String md5 = "56154baed62be07008d3684a0a4c0996"; + final String md5 = "d20e6012300898a0315c795cab7583d8"; RRTest("testIndelCompression ", " -cs 50 -L 20:10,100,500-10,100,600 ", md5, false); RRTest("testIndelCompressionWithKnowns ", " -cs 50 -L 20:10,100,500-10,100,600 ", md5, true); } @@ -216,25 +214,27 @@ public class ReduceReadsIntegrationTest extends WalkerTest { @Test(enabled = true) public void testFilteredDeletionCompression() { String base = String.format("-T ReduceReads -npt -R %s -I %s ", REF, DELETION_BAM) + " -o %s "; - executeTest("testFilteredDeletionCompression", new WalkerTestSpec(base, Arrays.asList("bam"), Arrays.asList("d7655de41d90aecb716f79e32d53b2d1"))); + executeTest("testFilteredDeletionCompression", new WalkerTestSpec(base, Arrays.asList("bam"), Arrays.asList("e5da09662708f562c0c617ba73cf4763")), "4f916da29d91852077f0a2fdbdd2c7f6"); } + private static final String COREDUCTION_QUALS_TEST_MD5 = "26d84a2bd549a01a63fcebf8847a1b7d"; + @Test(enabled = true) public void testCoReduction() { String base = String.format("-T ReduceReads %s -npt -R %s -I %s -I %s", COREDUCTION_L, REF, COREDUCTION_BAM_A, COREDUCTION_BAM_B) + " -o %s "; - executeTest("testCoReduction", new WalkerTestSpec(base, Arrays.asList("bam"), Arrays.asList("fa549ba96ca0ce5fbf3553ba173167e8"))); + executeTest("testCoReduction", new WalkerTestSpec(base, Arrays.asList("bam"), Arrays.asList("5f4d2c1d9c010dfd6865aeba7d0336fe")), COREDUCTION_QUALS_TEST_MD5); } @Test(enabled = true) public void testCoReductionWithKnowns() { String base = String.format("-T ReduceReads %s -npt -R %s -I %s -I %s -known %s", COREDUCTION_L, REF, COREDUCTION_BAM_A, COREDUCTION_BAM_B, DBSNP) + " -o %s "; - executeTest("testCoReductionWithKnowns", new WalkerTestSpec(base, Arrays.asList("bam"), Arrays.asList("9edcf09b21a4ae8d9fc25222bcb0486b"))); + executeTest("testCoReductionWithKnowns", new WalkerTestSpec(base, Arrays.asList("bam"), Arrays.asList("ca48dd972bf57595c691972c0f887cb4")), COREDUCTION_QUALS_TEST_MD5); } @Test(enabled = true) public void testInsertionsAtEdgeOfConsensus() { String base = String.format("-T ReduceReads -npt -R %s -I %s ", REF, INSERTIONS_AT_EDGE_OF_CONSENSUS_BAM) + " -o %s "; - executeTest("testInsertionsAtEdgeOfConsensus", new WalkerTestSpec(base, Arrays.asList("bam"), Arrays.asList("27cc8f1a336b2d0a29855ceb8fc988b0"))); + executeTest("testInsertionsAtEdgeOfConsensus", new WalkerTestSpec(base, Arrays.asList("bam"), Arrays.asList("760500a5b036b987f84099f45f26a804"))); } /** @@ -248,7 +248,7 @@ public class ReduceReadsIntegrationTest extends WalkerTest { @Test(enabled = true) public void testAddingReadAfterTailingTheStash() { String base = String.format("-T ReduceReads %s -npt -R %s -I %s", STASH_L, REF, STASH_BAM) + " -o %s "; - executeTest("testAddingReadAfterTailingTheStash", new WalkerTestSpec(base, Arrays.asList("bam"), Arrays.asList("34baf99904b676d5f132d3791030ed0a")), "3eab32c215ba68e75efd5ab7e9f7a2e7"); + executeTest("testAddingReadAfterTailingTheStash", new WalkerTestSpec(base, Arrays.asList("bam"), Arrays.asList("67f8a3a647f8ec5212104bdaafd8c862")), "3eab32c215ba68e75efd5ab7e9f7a2e7"); } /** @@ -259,7 +259,7 @@ public class ReduceReadsIntegrationTest extends WalkerTest { public void testDivideByZero() { String base = String.format("-T ReduceReads %s -npt -R %s -I %s", DIVIDEBYZERO_L, REF, DIVIDEBYZERO_BAM) + " -o %s "; // we expect to lose coverage due to the downsampling so don't run the systematic tests - executeTestWithoutAdditionalRRTests("testDivideByZero", new WalkerTestSpec(base, Arrays.asList("bam"), Arrays.asList("985c4f15a1d45267abb2f6790267930d"))); + executeTestWithoutAdditionalRRTests("testDivideByZero", new WalkerTestSpec(base, Arrays.asList("bam"), Arrays.asList("1663f35802f82333c5e15653e437ce2d"))); } /** @@ -269,7 +269,7 @@ public class ReduceReadsIntegrationTest extends WalkerTest { @Test(enabled = true) public void testReadOffContig() { String base = String.format("-T ReduceReads -npt -R %s -I %s ", REF, OFFCONTIG_BAM) + " -o %s "; - executeTest("testReadOffContig", new WalkerTestSpec(base, Arrays.asList("bam"), Arrays.asList("388ef48791965d637e4bdb45d5d7cf01"))); + executeTest("testReadOffContig", new WalkerTestSpec(base, Arrays.asList("bam"), Arrays.asList("0ce693b4ff925998867664e4099f3248"))); } /** @@ -279,7 +279,7 @@ public class ReduceReadsIntegrationTest extends WalkerTest { public void testPairedReadsInVariantRegion() { String base = String.format("-T ReduceReads -npt -R %s -I %s ", hg19Reference, BOTH_ENDS_OF_PAIR_IN_VARIANT_REGION_BAM) + " -o %s --downsample_coverage 250 -dcov 50 "; - executeTest("testPairedReadsInVariantRegion", new WalkerTestSpec(base, Arrays.asList("bam"), Arrays.asList("cfa2588f5edf74c5ddf3d190f5ac6f2d"))); + executeTest("testPairedReadsInVariantRegion", new WalkerTestSpec(base, Arrays.asList("bam"), Arrays.asList("7e7b358443827ca239db3b98f299aec6")), "2af063d1bd3c322b03405dbb3ecf59a9"); } } diff --git a/protected/java/test/org/broadinstitute/sting/gatk/walkers/compression/reducereads/SlidingWindowUnitTest.java b/protected/java/test/org/broadinstitute/sting/gatk/walkers/compression/reducereads/SlidingWindowUnitTest.java index 4bf67f5a2..56ad02084 100644 --- a/protected/java/test/org/broadinstitute/sting/gatk/walkers/compression/reducereads/SlidingWindowUnitTest.java +++ b/protected/java/test/org/broadinstitute/sting/gatk/walkers/compression/reducereads/SlidingWindowUnitTest.java @@ -251,14 +251,15 @@ public class SlidingWindowUnitTest extends BaseTest { } private class ConsensusCreationTest { - public final int expectedNumberOfReads, expectedNumberOfReadsWithHetCompression; + public final int expectedNumberOfReads, expectedNumberOfReadsWithHetCompression, expectedNumberOfReadsAtDeepCoverage; public final List myReads = new ArrayList(20); public final String description; - private ConsensusCreationTest(final List locs, final boolean readsShouldBeLowQuality, final boolean variantBaseShouldBeLowQuality, final int expectedNumberOfReads, final int expectedNumberOfReadsWithHetCompression) { + private ConsensusCreationTest(final List locs, final boolean readsShouldBeLowQuality, final boolean variantBaseShouldBeLowQuality, final int expectedNumberOfReads, final int expectedNumberOfReadsWithHetCompression, final int expectedNumberOfReadsAtDeepCoverage) { this.expectedNumberOfReads = expectedNumberOfReads; this.expectedNumberOfReadsWithHetCompression = expectedNumberOfReadsWithHetCompression; - this.description = String.format("%d %d", expectedNumberOfReads, expectedNumberOfReadsWithHetCompression); + this.expectedNumberOfReadsAtDeepCoverage = expectedNumberOfReadsAtDeepCoverage; + this.description = String.format("%d %d %d", expectedNumberOfReads, expectedNumberOfReadsWithHetCompression, expectedNumberOfReadsAtDeepCoverage); // first, add the basic reads to the collection myReads.addAll(basicReads); @@ -268,10 +269,11 @@ public class SlidingWindowUnitTest extends BaseTest { myReads.add(createVariantRead(loc, readsShouldBeLowQuality, variantBaseShouldBeLowQuality, CigarOperator.M)); } - private ConsensusCreationTest(final List locs, final CigarOperator operator, final int expectedNumberOfReads, final int expectedNumberOfReadsWithHetCompression) { + private ConsensusCreationTest(final List locs, final CigarOperator operator, final int expectedNumberOfReads, final int expectedNumberOfReadsWithHetCompression, final int expectedNumberOfReadsAtDeepCoverage) { this.expectedNumberOfReads = expectedNumberOfReads; this.expectedNumberOfReadsWithHetCompression = expectedNumberOfReadsWithHetCompression; - this.description = String.format("%s %d %d", operator.toString(), expectedNumberOfReads, expectedNumberOfReadsWithHetCompression); + this.expectedNumberOfReadsAtDeepCoverage = expectedNumberOfReadsAtDeepCoverage; + this.description = String.format("%s %d %d %d", operator.toString(), expectedNumberOfReads, expectedNumberOfReadsWithHetCompression, expectedNumberOfReadsAtDeepCoverage); // first, add the basic reads to the collection myReads.addAll(basicReads); @@ -319,46 +321,50 @@ public class SlidingWindowUnitTest extends BaseTest { private static final GenomeLoc loc295 = new UnvalidatingGenomeLoc("1", 0, 1000295, 1000295); private static final GenomeLoc loc309 = new UnvalidatingGenomeLoc("1", 0, 1000309, 1000309); private static final GenomeLoc loc310 = new UnvalidatingGenomeLoc("1", 0, 1000310, 1000310); + private static final GenomeLoc loc320 = new UnvalidatingGenomeLoc("1", 0, 1000320, 1000320); private static final GenomeLoc loc1100 = new UnvalidatingGenomeLoc("1", 0, 1001100, 1001100); + private static final int DEEP_COVERAGE_ITERATIONS = 100; + @DataProvider(name = "ConsensusCreation") public Object[][] createConsensusCreationTestData() { List tests = new ArrayList(); // test high quality reads and bases - tests.add(new Object[]{new ConsensusCreationTest(Arrays.asList(), false, false, 1, 1)}); - tests.add(new Object[]{new ConsensusCreationTest(Arrays.asList(loc290), false, false, 9, 6)}); - tests.add(new Object[]{new ConsensusCreationTest(Arrays.asList(loc290, loc295), false, false, 10, 10)}); - tests.add(new Object[]{new ConsensusCreationTest(Arrays.asList(loc290, loc309), false, false, 10, 10)}); - tests.add(new Object[]{new ConsensusCreationTest(Arrays.asList(loc290, loc310), false, false, 11, 11)}); + tests.add(new Object[]{new ConsensusCreationTest(Arrays.asList(), false, false, 1, 1, 1)}); + tests.add(new Object[]{new ConsensusCreationTest(Arrays.asList(loc290), false, false, 9, 6, 5 + DEEP_COVERAGE_ITERATIONS)}); + tests.add(new Object[]{new ConsensusCreationTest(Arrays.asList(loc290, loc295), false, false, 10, 10, 2 + (8 * DEEP_COVERAGE_ITERATIONS))}); + tests.add(new Object[]{new ConsensusCreationTest(Arrays.asList(loc290, loc309), false, false, 10, 10, 2 + (8 * DEEP_COVERAGE_ITERATIONS))}); + tests.add(new Object[]{new ConsensusCreationTest(Arrays.asList(loc290, loc310), false, false, 11, 11, 2 + (9 * DEEP_COVERAGE_ITERATIONS))}); + tests.add(new Object[]{new ConsensusCreationTest(Arrays.asList(loc290, loc320), false, false, 11, 10, 4 + (6 * DEEP_COVERAGE_ITERATIONS))}); // test low quality reads - tests.add(new Object[]{new ConsensusCreationTest(Arrays.asList(), true, false, 1, 1)}); - tests.add(new Object[]{new ConsensusCreationTest(Arrays.asList(loc290), true, false, 1, 1)}); - tests.add(new Object[]{new ConsensusCreationTest(Arrays.asList(loc290, loc295), true, false, 1, 1)}); - tests.add(new Object[]{new ConsensusCreationTest(Arrays.asList(loc290, loc309), true, false, 1, 1)}); - tests.add(new Object[]{new ConsensusCreationTest(Arrays.asList(loc290, loc310), true, false, 1, 1)}); + tests.add(new Object[]{new ConsensusCreationTest(Arrays.asList(), true, false, 1, 1, 1)}); + tests.add(new Object[]{new ConsensusCreationTest(Arrays.asList(loc290), true, false, 2, 2, 2)}); + tests.add(new Object[]{new ConsensusCreationTest(Arrays.asList(loc290, loc295), true, false, 2, 2, 2)}); + tests.add(new Object[]{new ConsensusCreationTest(Arrays.asList(loc290, loc309), true, false, 2, 2, 2)}); + tests.add(new Object[]{new ConsensusCreationTest(Arrays.asList(loc290, loc310), true, false, 2, 2, 2)}); // test low quality bases - tests.add(new Object[]{new ConsensusCreationTest(Arrays.asList(), false, true, 1, 1)}); - tests.add(new Object[]{new ConsensusCreationTest(Arrays.asList(loc290), false, true, 1, 1)}); - tests.add(new Object[]{new ConsensusCreationTest(Arrays.asList(loc290, loc295), false, true, 1, 1)}); - tests.add(new Object[]{new ConsensusCreationTest(Arrays.asList(loc290, loc309), false, true, 1, 1)}); - tests.add(new Object[]{new ConsensusCreationTest(Arrays.asList(loc290, loc310), false, true, 1, 1)}); + tests.add(new Object[]{new ConsensusCreationTest(Arrays.asList(), false, true, 1, 1, 1)}); + tests.add(new Object[]{new ConsensusCreationTest(Arrays.asList(loc290), false, true, 1, 1, 1)}); + tests.add(new Object[]{new ConsensusCreationTest(Arrays.asList(loc290, loc295), false, true, 1, 1, 1)}); + tests.add(new Object[]{new ConsensusCreationTest(Arrays.asList(loc290, loc309), false, true, 1, 1, 1)}); + tests.add(new Object[]{new ConsensusCreationTest(Arrays.asList(loc290, loc310), false, true, 1, 1, 1)}); // test mixture - tests.add(new Object[]{new ConsensusCreationTest(Arrays.asList(loc1100), true, false, 2, 2)}); - tests.add(new Object[]{new ConsensusCreationTest(Arrays.asList(loc1100), false, true, 1, 1)}); + tests.add(new Object[]{new ConsensusCreationTest(Arrays.asList(loc1100), true, false, 2, 2, 2)}); + tests.add(new Object[]{new ConsensusCreationTest(Arrays.asList(loc1100), false, true, 1, 1, 1)}); // test I/D operators - tests.add(new Object[]{new ConsensusCreationTest(Arrays.asList(loc290), CigarOperator.D, 9, 9)}); - tests.add(new Object[]{new ConsensusCreationTest(Arrays.asList(loc290, loc295), CigarOperator.D, 10, 10)}); - tests.add(new Object[]{new ConsensusCreationTest(Arrays.asList(loc290, loc309), CigarOperator.D, 10, 10)}); - tests.add(new Object[]{new ConsensusCreationTest(Arrays.asList(loc290, loc310), CigarOperator.D, 11, 11)}); - tests.add(new Object[]{new ConsensusCreationTest(Arrays.asList(loc290), CigarOperator.I, 9, 9)}); - tests.add(new Object[]{new ConsensusCreationTest(Arrays.asList(loc290, loc295), CigarOperator.I, 10, 10)}); - tests.add(new Object[]{new ConsensusCreationTest(Arrays.asList(loc290, loc309), CigarOperator.I, 10, 10)}); - tests.add(new Object[]{new ConsensusCreationTest(Arrays.asList(loc290, loc310), CigarOperator.I, 11, 11)}); + tests.add(new Object[]{new ConsensusCreationTest(Arrays.asList(loc290), CigarOperator.D, 9, 9, 2 + (7 * DEEP_COVERAGE_ITERATIONS))}); + tests.add(new Object[]{new ConsensusCreationTest(Arrays.asList(loc290, loc295), CigarOperator.D, 10, 10, 2 + (8 * DEEP_COVERAGE_ITERATIONS))}); + tests.add(new Object[]{new ConsensusCreationTest(Arrays.asList(loc290, loc309), CigarOperator.D, 10, 10, 2 + (8 * DEEP_COVERAGE_ITERATIONS))}); + tests.add(new Object[]{new ConsensusCreationTest(Arrays.asList(loc290, loc310), CigarOperator.D, 11, 11, 2 + (9 * DEEP_COVERAGE_ITERATIONS))}); + tests.add(new Object[]{new ConsensusCreationTest(Arrays.asList(loc290), CigarOperator.I, 9, 9, 2 + (7 * DEEP_COVERAGE_ITERATIONS))}); + tests.add(new Object[]{new ConsensusCreationTest(Arrays.asList(loc290, loc295), CigarOperator.I, 10, 10, 2 + (8 * DEEP_COVERAGE_ITERATIONS))}); + tests.add(new Object[]{new ConsensusCreationTest(Arrays.asList(loc290, loc309), CigarOperator.I, 10, 10, 2 + (8 * DEEP_COVERAGE_ITERATIONS))}); + tests.add(new Object[]{new ConsensusCreationTest(Arrays.asList(loc290, loc310), CigarOperator.I, 11, 11, 2 + (9 * DEEP_COVERAGE_ITERATIONS))}); return tests.toArray(new Object[][]{}); } @@ -368,14 +374,14 @@ public class SlidingWindowUnitTest extends BaseTest { final ObjectAVLTreeSet knownSNPs = new ObjectAVLTreeSet(); // test WITHOUT het compression - SlidingWindow slidingWindow = new SlidingWindow("1", 0, 10, header, new GATKSAMReadGroupRecord("test"), 0, 0.05, 0.05, 20, 20, 100, ReduceReads.DownsampleStrategy.Normal, false); + SlidingWindow slidingWindow = new SlidingWindow("1", 0, 10, header, new GATKSAMReadGroupRecord("test"), 0, 0.05, 0.05, 0.05, 20, 20, 100, ReduceReads.DownsampleStrategy.Normal, false); for ( final GATKSAMRecord read : test.myReads ) slidingWindow.addRead(read); Pair, CompressionStash> result = slidingWindow.close(knownSNPs); // currently empty Assert.assertEquals(result.getFirst().size(), test.expectedNumberOfReads); // test WITH het compression at KNOWN sites - slidingWindow = new SlidingWindow("1", 0, 10, header, new GATKSAMReadGroupRecord("test"), 0, 0.05, 0.05, 20, 20, 100, ReduceReads.DownsampleStrategy.Normal, false); + slidingWindow = new SlidingWindow("1", 0, 10, header, new GATKSAMReadGroupRecord("test"), 0, 0.05, 0.05, 0.05, 20, 20, 100, ReduceReads.DownsampleStrategy.Normal, false); for ( final GATKSAMRecord read : test.myReads ) slidingWindow.addRead(read); for ( int i = 0; i < 1200; i++ ) @@ -384,11 +390,28 @@ public class SlidingWindowUnitTest extends BaseTest { Assert.assertEquals(result.getFirst().size(), test.expectedNumberOfReadsWithHetCompression); // test WITH het compression at ALL sites - slidingWindow = new SlidingWindow("1", 0, 10, header, new GATKSAMReadGroupRecord("test"), 0, 0.05, 0.05, 20, 20, 100, ReduceReads.DownsampleStrategy.Normal, false); + slidingWindow = new SlidingWindow("1", 0, 10, header, new GATKSAMReadGroupRecord("test"), 0, 0.05, 0.05, 0.05, 20, 20, 100, ReduceReads.DownsampleStrategy.Normal, false); for ( final GATKSAMRecord read : test.myReads ) slidingWindow.addRead(read); result = slidingWindow.close(null); Assert.assertEquals(result.getFirst().size(), test.expectedNumberOfReadsWithHetCompression); + + // test with deep coverage + slidingWindow = new SlidingWindow("1", 0, 10, header, new GATKSAMReadGroupRecord("test"), 0, 0.05, 0.05, 0.05, 20, 20, 0, ReduceReads.DownsampleStrategy.Normal, false); + for ( int i = 0; i < DEEP_COVERAGE_ITERATIONS; i++ ) { + for ( final GATKSAMRecord read : test.myReads ) { + final GATKSAMRecord copy = ArtificialSAMUtils.createArtificialRead(header, read.getReadName() + "_" + (i+1), 0, read.getAlignmentStart(), readLength); + copy.setReadBases(read.getReadBases()); + copy.setBaseQualities(read.getBaseQualities()); + copy.setMappingQuality(read.getMappingQuality()); + copy.setReadNegativeStrandFlag(read.getReadNegativeStrandFlag()); + if ( read.getCigar() != null ) + copy.setCigar(read.getCigar()); + slidingWindow.addRead(copy); + } + } + result = slidingWindow.close(null); + Assert.assertEquals(result.getFirst().size(), test.expectedNumberOfReadsAtDeepCoverage); } @Test @@ -412,14 +435,14 @@ public class SlidingWindowUnitTest extends BaseTest { final ObjectAVLTreeSet knownSNPs = new ObjectAVLTreeSet(); // test WITHOUT het compression - SlidingWindow slidingWindow = new SlidingWindow("1", 0, 10, header, new GATKSAMReadGroupRecord("test"), 0, 0.1, 0.05, 20, 20, 100, ReduceReads.DownsampleStrategy.Normal, false); + SlidingWindow slidingWindow = new SlidingWindow("1", 0, 10, header, new GATKSAMReadGroupRecord("test"), 0, 0.1, 0.05, 0.05, 20, 20, 100, ReduceReads.DownsampleStrategy.Normal, false); for ( final GATKSAMRecord read : myReads ) slidingWindow.addRead(read); Pair, CompressionStash> result = slidingWindow.close(knownSNPs); // currently empty Assert.assertEquals(result.getFirst().size(), totalNumReads); // no compression at all // test WITH het compression at KNOWN sites - slidingWindow = new SlidingWindow("1", 0, 10, header, new GATKSAMReadGroupRecord("test"), 0, 0.1, 0.05, 20, 20, 100, ReduceReads.DownsampleStrategy.Normal, false); + slidingWindow = new SlidingWindow("1", 0, 10, header, new GATKSAMReadGroupRecord("test"), 0, 0.1, 0.05, 0.05, 20, 20, 100, ReduceReads.DownsampleStrategy.Normal, false); for ( final GATKSAMRecord read : myReads ) slidingWindow.addRead(read); for ( int i = 0; i < readLength; i++ ) @@ -428,13 +451,59 @@ public class SlidingWindowUnitTest extends BaseTest { Assert.assertEquals(result.getFirst().size(), totalNumReads); // no compression at all // test WITH het compression at ALL sites - slidingWindow = new SlidingWindow("1", 0, 10, header, new GATKSAMReadGroupRecord("test"), 0, 0.1, 0.05, 20, 20, 100, ReduceReads.DownsampleStrategy.Normal, false); + slidingWindow = new SlidingWindow("1", 0, 10, header, new GATKSAMReadGroupRecord("test"), 0, 0.1, 0.05, 0.05, 20, 20, 100, ReduceReads.DownsampleStrategy.Normal, false); for ( final GATKSAMRecord read : myReads ) slidingWindow.addRead(read); result = slidingWindow.close(knownSNPs); Assert.assertEquals(result.getFirst().size(), totalNumReads); // no compression at all } + @Test + public void testAddingReadPairWithSameCoordinates() { + final SlidingWindow slidingWindow = new SlidingWindow("1", 0, 10); + + final GATKSAMRecord read1 = ArtificialSAMUtils.createArtificialRead(header, "basicRead", 0, globalStartPosition, 1); + read1.setReadBases(new byte[]{(byte)'A'}); + read1.setBaseQualities(new byte[]{(byte)'A'}); + read1.setMappingQuality(30); + read1.setReadNegativeStrandFlag(false); + slidingWindow.addRead(read1); + + final GATKSAMRecord read2 = ArtificialSAMUtils.createArtificialRead(header, "basicRead", 0, globalStartPosition, 1); + read2.setReadBases(new byte[]{(byte)'A'}); + read2.setBaseQualities(new byte[]{(byte)'A'}); + read2.setMappingQuality(30); + read2.setReadNegativeStrandFlag(true); + slidingWindow.addRead(read2); + + Assert.assertEquals(slidingWindow.readsInWindow.size(), 2); + } + + @Test + public void testOnlySpanningReadHasLowQual() { + final SlidingWindow slidingWindow = new SlidingWindow("1", 0, 10, header, new GATKSAMReadGroupRecord("test"), 0, 0.1, 0.05, 0.05, 20, 20, 100, ReduceReads.DownsampleStrategy.Normal, false); + + final GATKSAMRecord read1 = ArtificialSAMUtils.createArtificialRead(header, "basicRead1", 0, globalStartPosition, 100); + final GATKSAMRecord read2 = ArtificialSAMUtils.createArtificialRead(header, "basicRead2", 0, globalStartPosition + 50, 100); + + final byte[] bases = Utils.dupBytes((byte) 'A', readLength); + read1.setReadBases(bases); + read2.setReadBases(bases); + + final byte[] baseQuals = Utils.dupBytes((byte) 30, readLength); + baseQuals[80] = (byte)10; + read1.setBaseQualities(baseQuals); + read2.setBaseQualities(baseQuals); + + read1.setMappingQuality(30); + read2.setMappingQuality(30); + + slidingWindow.addRead(read1); + slidingWindow.addRead(read2); + + Assert.assertEquals(slidingWindow.close(null).getFirst().size(), 1); + } + /////////////////////////////////////////////////////////// //// This section tests the downsampling functionality //// @@ -452,7 +521,7 @@ public class SlidingWindowUnitTest extends BaseTest { @Test(dataProvider = "Downsampling", enabled = true) public void testDownsamplingTest(final int dcov) { - final SlidingWindow slidingWindow = new SlidingWindow("1", 0, 10, header, new GATKSAMReadGroupRecord("test"), 0, 0.05, 0.05, 20, 20, dcov, ReduceReads.DownsampleStrategy.Normal, false); + final SlidingWindow slidingWindow = new SlidingWindow("1", 0, 10, header, new GATKSAMReadGroupRecord("test"), 0, 0.05, 0.05, 0.05, 20, 20, dcov, ReduceReads.DownsampleStrategy.Normal, false); final ObjectList result = slidingWindow.downsampleVariantRegion(basicReads); Assert.assertEquals(result.size(), Math.min(dcov, basicReads.size())); @@ -500,7 +569,7 @@ public class SlidingWindowUnitTest extends BaseTest { @Test(dataProvider = "ConsensusQuals", enabled = true) public void testConsensusQualsTest(QualsTest test) { - final SlidingWindow slidingWindow = new SlidingWindow("1", 0, 10, header, new GATKSAMReadGroupRecord("test"), 0, 0.05, 0.05, minUsableConsensusQual, 20, 100, ReduceReads.DownsampleStrategy.Normal, false); + final SlidingWindow slidingWindow = new SlidingWindow("1", 0, 10, header, new GATKSAMReadGroupRecord("test"), 0, 0.05, 0.05, 0.05, minUsableConsensusQual, 20, 100, ReduceReads.DownsampleStrategy.Normal, false); for ( final GATKSAMRecord read : test.myReads ) slidingWindow.addRead(read); final Pair, CompressionStash> result = slidingWindow.close(new ObjectAVLTreeSet()); @@ -569,7 +638,7 @@ public class SlidingWindowUnitTest extends BaseTest { read.setBaseQualities(Utils.dupBytes((byte) 30, readLength)); read.setMappingQuality(30); - final SlidingWindow slidingWindow = new SlidingWindow("1", 0, 10, header, new GATKSAMReadGroupRecord("test"), 0, 0.05, 0.05, 20, 20, 10, ReduceReads.DownsampleStrategy.Normal, false); + final SlidingWindow slidingWindow = new SlidingWindow("1", 0, 10, header, new GATKSAMReadGroupRecord("test"), 0, 0.05, 0.05, 0.05, 20, 20, 10, ReduceReads.DownsampleStrategy.Normal, false); int newIndex = slidingWindow.createNewHeaderElements(windowHeader, read, start); Assert.assertEquals(newIndex, start > 0 ? start : 0); @@ -613,7 +682,7 @@ public class SlidingWindowUnitTest extends BaseTest { read.setMappingQuality(30); // add the read - final SlidingWindow slidingWindow = new SlidingWindow("1", 0, 10, header, new GATKSAMReadGroupRecord("test"), 0, 0.05, 0.05, 20, 20, 10, ReduceReads.DownsampleStrategy.Normal, false); + final SlidingWindow slidingWindow = new SlidingWindow("1", 0, 10, header, new GATKSAMReadGroupRecord("test"), 0, 0.05, 0.05, 0.05, 20, 20, 10, ReduceReads.DownsampleStrategy.Normal, false); slidingWindow.actuallyUpdateHeaderForRead(windowHeader, read, false, start); for ( int i = 0; i < start; i++ ) Assert.assertEquals(windowHeader.get(i).getConsensusBaseCounts().countOfBase(BaseUtils.Base.A.base), 0); @@ -628,7 +697,6 @@ public class SlidingWindowUnitTest extends BaseTest { Assert.assertEquals(windowHeader.get(i).getConsensusBaseCounts().countOfBase(BaseUtils.Base.A.base), 0); } - ////////////////////////////////////////////////////////////////////////////////// //// This section tests functionality related to polyploid consensus creation //// ////////////////////////////////////////////////////////////////////////////////// @@ -691,7 +759,7 @@ public class SlidingWindowUnitTest extends BaseTest { read.setMappingQuality(30); // add the read - final SlidingWindow slidingWindow = new SlidingWindow("1", 0, 10, header, new GATKSAMReadGroupRecord("test"), 0, 0.05, 0.05, 20, 20, 10, ReduceReads.DownsampleStrategy.Normal, false); + final SlidingWindow slidingWindow = new SlidingWindow("1", 0, 10, header, new GATKSAMReadGroupRecord("test"), 0, 0.05, 0.05, 0.05, 20, 20, 10, ReduceReads.DownsampleStrategy.Normal, false); slidingWindow.actuallyUpdateHeaderForRead(windowHeader, read, false, 0); // set up and add a soft-clipped read if requested diff --git a/public/java/src/org/broadinstitute/sting/utils/MathUtils.java b/public/java/src/org/broadinstitute/sting/utils/MathUtils.java index f4644036f..1cc798e36 100644 --- a/public/java/src/org/broadinstitute/sting/utils/MathUtils.java +++ b/public/java/src/org/broadinstitute/sting/utils/MathUtils.java @@ -383,7 +383,7 @@ public class MathUtils { for (int hits = k_start; hits <= k_end; hits++) { prevProb = cumProb; - double probability = binomialProbability(n, hits); + final double probability = binomialProbability(n, hits); cumProb += probability; if (probability > 0 && cumProb - prevProb < probability / 2) { // loss of precision probCache = probCache.add(new BigDecimal(prevProb));