diff --git a/protected/java/src/org/broadinstitute/sting/gatk/walkers/compression/reducereads/BaseAndQualsCounts.java b/protected/java/src/org/broadinstitute/sting/gatk/walkers/compression/reducereads/BaseAndQualsCounts.java index 416f66ec6..28a48c212 100644 --- a/protected/java/src/org/broadinstitute/sting/gatk/walkers/compression/reducereads/BaseAndQualsCounts.java +++ b/protected/java/src/org/broadinstitute/sting/gatk/walkers/compression/reducereads/BaseAndQualsCounts.java @@ -80,6 +80,21 @@ public class BaseAndQualsCounts extends BaseCounts { * @param isLowQualBase true if the base is low quality */ public void incr(final byte base, final byte baseQual, final byte insQual, final byte delQual, final int baseMappingQual, final boolean isLowQualBase) { + incr(base, baseQual, insQual, delQual, baseMappingQual, isLowQualBase, false); + } + + /* + * Increments the count + * + * @param base the base + * @param baseQual the base quality + * @param insQual the insertion quality + * @param delQual the deletion quality + * @param baseMappingQual the mapping quality + * @param isLowQualBase true if the base is low quality + * @param isSoftClip true if is soft-clipped + */ + public void incr(final byte base, final byte baseQual, final byte insQual, final byte delQual, final int baseMappingQual, final boolean isLowQualBase, final boolean isSoftClip) { // if we already have high quality bases, ignore low quality ones if ( isLowQualBase && !isLowQuality() ) return; @@ -92,7 +107,7 @@ public class BaseAndQualsCounts extends BaseCounts { } final BaseIndex i = BaseIndex.byteToBase(base); - super.incr(i, baseQual, baseMappingQual); + super.incr(i, baseQual, baseMappingQual, isSoftClip); switch (i) { case A: sumInsertionQual_A += insQual; sumDeletionQual_A += delQual; break; case C: sumInsertionQual_C += insQual; sumDeletionQual_C += delQual; break; @@ -114,13 +129,28 @@ public class BaseAndQualsCounts extends BaseCounts { * @param baseMappingQual the mapping quality * @param isLowQualBase true if the base is low quality */ - public void decr(final byte base, final byte baseQual, final byte insQual, final byte delQual, final int baseMappingQual, final boolean isLowQualBase) { + public void decr(final byte base, final byte baseQual, final byte insQual, final byte delQual, final int baseMappingQual, final boolean isLowQualBase) { + decr(base, baseQual, insQual, delQual, baseMappingQual, isLowQualBase, false); + } + + /* + * Decrements the count + * + * @param base the base + * @param baseQual the base quality + * @param insQual the insertion quality + * @param delQual the deletion quality + * @param baseMappingQual the mapping quality + * @param isLowQualBase true if the base is low quality + * @param isSoftClip true if is soft-clipped + */ + public void decr(final byte base, final byte baseQual, final byte insQual, final byte delQual, final int baseMappingQual, final boolean isLowQualBase, final boolean isSoftClip) { // if this is not the right type of base, ignore it if ( isLowQualBase != isLowQuality() ) return; final BaseIndex i = BaseIndex.byteToBase(base); - super.decr(i, baseQual, baseMappingQual); + super.decr(i, baseQual, baseMappingQual, isSoftClip); switch (i) { case A: sumInsertionQual_A -= insQual; sumDeletionQual_A -= delQual; break; case C: sumInsertionQual_C -= insQual; sumDeletionQual_C -= delQual; break; diff --git a/protected/java/src/org/broadinstitute/sting/gatk/walkers/compression/reducereads/BaseCounts.java b/protected/java/src/org/broadinstitute/sting/gatk/walkers/compression/reducereads/BaseCounts.java index afcaf1510..e1329db3b 100644 --- a/protected/java/src/org/broadinstitute/sting/gatk/walkers/compression/reducereads/BaseCounts.java +++ b/protected/java/src/org/broadinstitute/sting/gatk/walkers/compression/reducereads/BaseCounts.java @@ -80,6 +80,7 @@ import org.broadinstitute.sting.utils.MathUtils; private int count_N = 0; private int sumQual_N = 0; private int totalCount = 0; // keeps track of total count since this is requested so often + private int nSoftClippedBases = 0; private final IntArrayList mappingQualities = new IntArrayList(); // keeps the mapping quality of each read that contributed to this private boolean isLowQuality = true; // this object represents low quality bases unless we are told otherwise @@ -104,6 +105,7 @@ import org.broadinstitute.sting.utils.MathUtils; this.count_I += other.count_I; this.count_N += other.count_N; this.totalCount += other.totalCount; + this.nSoftClippedBases = other.nSoftClippedBases; this.mappingQualities.addAll(other.mappingQualities); } @@ -117,6 +119,7 @@ import org.broadinstitute.sting.utils.MathUtils; this.count_I -= other.count_I; this.count_N -= other.count_N; this.totalCount -= other.totalCount; + this.nSoftClippedBases -= other.nSoftClippedBases; this.mappingQualities.removeAll(other.mappingQualities); } @@ -126,7 +129,7 @@ import org.broadinstitute.sting.utils.MathUtils; } @Ensures("totalCount() == old(totalCount()) || totalCount() == old(totalCount()) + 1") - public void incr(final BaseIndex base, final byte qual, final int mappingQuality) { + public void incr(final BaseIndex base, final byte qual, final int mappingQuality, final boolean isSoftclip) { switch (base) { case A: ++count_A; sumQual_A += qual; break; case C: ++count_C; sumQual_C += qual; break; @@ -137,6 +140,7 @@ import org.broadinstitute.sting.utils.MathUtils; case N: ++count_N; sumQual_N += qual; break; } ++totalCount; + nSoftClippedBases += isSoftclip ? 1 : 0; mappingQualities.add(mappingQuality); } @@ -159,7 +163,7 @@ import org.broadinstitute.sting.utils.MathUtils; } @Ensures("totalCount() == old(totalCount()) || totalCount() == old(totalCount()) - 1") - public void decr(final BaseIndex base, final byte qual, final int mappingQuality) { + public void decr(final BaseIndex base, final byte qual, final int mappingQuality, final boolean isSoftclip) { switch (base) { case A: --count_A; sumQual_A -= qual; break; case C: --count_C; sumQual_C -= qual; break; @@ -170,6 +174,7 @@ import org.broadinstitute.sting.utils.MathUtils; case N: --count_N; sumQual_N -= qual; break; } --totalCount; + nSoftClippedBases -= isSoftclip ? 1 : 0; mappingQualities.remove((Integer) mappingQuality); } @@ -231,6 +236,10 @@ import org.broadinstitute.sting.utils.MathUtils; return (byte) (sumQualsOfBase(base) / countOfBase(base)); } + @Ensures("result >= 0") + public int nSoftclips() { + return nSoftClippedBases; + } @Ensures("result >= 0") public int totalCount() { @@ -281,22 +290,42 @@ import org.broadinstitute.sting.utils.MathUtils; return baseIndexWithMostCounts().getByte(); } + /** + * @return the base index for which the count is highest, including indel indexes + */ @Ensures("result != null") public BaseIndex baseIndexWithMostCounts() { - BaseIndex maxI = MAX_BASE_INDEX_WITH_NO_COUNTS; - for (final BaseIndex i : BaseIndex.values()) { - if (countOfBase(i) > countOfBase(maxI)) - maxI = i; - } - return maxI; + return baseIndexWithMostCounts(true); } + /** + * @return the base index for which the count is highest, excluding indel indexes + */ @Ensures("result != null") public BaseIndex baseIndexWithMostCountsWithoutIndels() { + return baseIndexWithMostCounts(false); + } + + /** + * Finds the base index with the most counts + * + * @param allowIndels should we allow base indexes representing indels? + * @return non-null base index + */ + @Ensures("result != null") + protected BaseIndex baseIndexWithMostCounts(final boolean allowIndels) { BaseIndex maxI = MAX_BASE_INDEX_WITH_NO_COUNTS; + int maxCount = countOfBase(maxI); + for (final BaseIndex i : BaseIndex.values()) { - if (i.isNucleotide() && countOfBase(i) > countOfBase(maxI)) + if ( !allowIndels && !i.isNucleotide() ) + continue; + + final int myCount = countOfBase(i); + if (myCount > maxCount) { maxI = i; + maxCount = myCount; + } } return maxI; } @@ -307,22 +336,36 @@ import org.broadinstitute.sting.utils.MathUtils; @Ensures("result != null") public BaseIndex baseIndexWithMostProbability() { - BaseIndex maxI = MAX_BASE_INDEX_WITH_NO_COUNTS; - for (final BaseIndex i : BaseIndex.values()) { - if (getSumQuals(i) > getSumQuals(maxI)) - maxI = i; - } - return (getSumQuals(maxI) > 0L ? maxI : baseIndexWithMostCounts()); + return baseIndexWithMostProbability(true); } @Ensures("result != null") public BaseIndex baseIndexWithMostProbabilityWithoutIndels() { + return baseIndexWithMostProbability(false); + } + + /** + * Finds the base index with the most probability + * + * @param allowIndels should we allow base indexes representing indels? + * @return non-null base index + */ + @Ensures("result != null") + public BaseIndex baseIndexWithMostProbability(final boolean allowIndels) { BaseIndex maxI = MAX_BASE_INDEX_WITH_NO_COUNTS; + long maxSum = getSumQuals(maxI); + for (final BaseIndex i : BaseIndex.values()) { - if (i.isNucleotide() && getSumQuals(i) > getSumQuals(maxI)) + if ( !allowIndels && !i.isNucleotide() ) + continue; + + final long mySum = getSumQuals(i); + if (mySum > maxSum) { maxI = i; + maxSum = mySum; + } } - return (getSumQuals(maxI) > 0L ? maxI : baseIndexWithMostCountsWithoutIndels()); + return (maxSum > 0L ? maxI : baseIndexWithMostCounts(allowIndels)); } @Ensures("result >=0") @@ -362,6 +405,7 @@ import org.broadinstitute.sting.utils.MathUtils; count_A = count_C = count_G = count_T = count_D = count_I = count_N = 0; sumQual_A = sumQual_C = sumQual_G = sumQual_T = sumQual_D = sumQual_I = sumQual_N = 0; totalCount = 0; + nSoftClippedBases = 0; mappingQualities.clear(); } } diff --git a/protected/java/src/org/broadinstitute/sting/gatk/walkers/compression/reducereads/BaseIndex.java b/protected/java/src/org/broadinstitute/sting/gatk/walkers/compression/reducereads/BaseIndex.java index e41878a0b..665e3e7ce 100644 --- a/protected/java/src/org/broadinstitute/sting/gatk/walkers/compression/reducereads/BaseIndex.java +++ b/protected/java/src/org/broadinstitute/sting/gatk/walkers/compression/reducereads/BaseIndex.java @@ -121,7 +121,7 @@ public enum BaseIndex { * * @return whether or not it is a nucleotide, given the definition above */ - public boolean isNucleotide() { + public final boolean isNucleotide() { return !isIndel(); } @@ -130,7 +130,7 @@ public enum BaseIndex { * * @return true for I or D, false otherwise */ - public boolean isIndel() { + public final boolean isIndel() { return this == D || this == I; } } diff --git a/protected/java/src/org/broadinstitute/sting/gatk/walkers/compression/reducereads/HeaderElement.java b/protected/java/src/org/broadinstitute/sting/gatk/walkers/compression/reducereads/HeaderElement.java index dec323213..38b9e957b 100644 --- a/protected/java/src/org/broadinstitute/sting/gatk/walkers/compression/reducereads/HeaderElement.java +++ b/protected/java/src/org/broadinstitute/sting/gatk/walkers/compression/reducereads/HeaderElement.java @@ -62,9 +62,10 @@ public class HeaderElement { private BaseAndQualsCounts consensusBaseCounts; // How many A,C,G,T (and D's) are in this site. private BaseAndQualsCounts filteredBaseCounts; // How many A,C,G,T (and D's) were filtered out in this site. private int insertionsToTheRight; // How many reads in this site had insertions to the immediate right - private int nSoftClippedBases; // How many bases in this site came from soft clipped bases private int location; // Genome location of this site (the sliding window knows which contig we're at + protected static final int MIN_COUNT_FOR_USING_PVALUE = 2; + public int getLocation() { return location; } @@ -84,7 +85,7 @@ public class HeaderElement { * @param location the reference location for the new element */ public HeaderElement(final int location) { - this(new BaseAndQualsCounts(), new BaseAndQualsCounts(), 0, 0, location); + this(new BaseAndQualsCounts(), new BaseAndQualsCounts(), 0, location); } /** @@ -94,7 +95,7 @@ public class HeaderElement { * @param location the reference location for the new element */ public HeaderElement(final int location, final int insertionsToTheRight) { - this(new BaseAndQualsCounts(), new BaseAndQualsCounts(), insertionsToTheRight, 0, location); + this(new BaseAndQualsCounts(), new BaseAndQualsCounts(), insertionsToTheRight, location); } /** @@ -103,15 +104,13 @@ public class HeaderElement { * @param consensusBaseCounts the BaseCounts object for the running consensus synthetic read * @param filteredBaseCounts the BaseCounts object for the filtered data synthetic read * @param insertionsToTheRight number of insertions to the right of this HeaderElement - * @param nSoftClippedBases number of softclipped bases of this HeaderElement * @param location the reference location of this reference element * HeaderElement */ - public HeaderElement(BaseAndQualsCounts consensusBaseCounts, BaseAndQualsCounts filteredBaseCounts, int insertionsToTheRight, int nSoftClippedBases, int location) { + public HeaderElement(BaseAndQualsCounts consensusBaseCounts, BaseAndQualsCounts filteredBaseCounts, int insertionsToTheRight, int location) { this.consensusBaseCounts = consensusBaseCounts; this.filteredBaseCounts = filteredBaseCounts; this.insertionsToTheRight = insertionsToTheRight; - this.nSoftClippedBases = nSoftClippedBases; this.location = location; } @@ -119,10 +118,13 @@ public class HeaderElement { * Whether or not the site represented by this HeaderElement is variant according to the definitions of variant * by insertion, deletion and mismatches. * + * @param minVariantPvalue min p-value for deciding that a position is or is not variable due to mismatches + * @param minVariantProportion min proportion for deciding that a position is or is not variable due to mismatches + * @param minIndelProportion min proportion for deciding that a position is or is not variable due to indels * @return true if site is variant by any definition. False otherwise. */ - public boolean isVariant(double minVariantPvalue, double minIndelProportion) { - return hasConsensusData() && (isVariantFromInsertions(minIndelProportion) || isVariantFromMismatches(minVariantPvalue) || isVariantFromDeletions(minIndelProportion) || isVariantFromSoftClips()); + public boolean isVariant(final double minVariantPvalue, final double minVariantProportion, final double minIndelProportion) { + return hasConsensusData() && (isVariantFromInsertions(minIndelProportion) || isVariantFromMismatches(minVariantPvalue, minVariantProportion) || isVariantFromDeletions(minIndelProportion) || isVariantFromSoftClips()); } /** @@ -140,11 +142,9 @@ public class HeaderElement { public void addBase(byte base, byte baseQual, byte insQual, byte delQual, int baseMappingQuality, int minBaseQual, int minMappingQual, boolean isSoftClipped) { // If the base passes the MQ filter it is included in the consensus base counts, otherwise it's part of the filtered counts if ( baseMappingQuality >= minMappingQual ) - consensusBaseCounts.incr(base, baseQual, insQual, delQual, baseMappingQuality, baseQual < minBaseQual); + consensusBaseCounts.incr(base, baseQual, insQual, delQual, baseMappingQuality, baseQual < minBaseQual, isSoftClipped); else filteredBaseCounts.incr(base, baseQual, insQual, delQual, baseMappingQuality, baseQual < minBaseQual); - - nSoftClippedBases += isSoftClipped ? 1 : 0; } /** @@ -162,11 +162,9 @@ public class HeaderElement { public void removeBase(byte base, byte baseQual, byte insQual, byte delQual, int baseMappingQuality, int minBaseQual, int minMappingQual, boolean isSoftClipped) { // If the base passes the MQ filter it is included in the consensus base counts, otherwise it's part of the filtered counts if ( baseMappingQuality >= minMappingQual ) - consensusBaseCounts.decr(base, baseQual, insQual, delQual, baseMappingQuality, baseQual < minBaseQual); + consensusBaseCounts.decr(base, baseQual, insQual, delQual, baseMappingQuality, baseQual < minBaseQual, isSoftClipped); else filteredBaseCounts.decr(base, baseQual, insQual, delQual, baseMappingQuality, baseQual < minBaseQual); - - nSoftClippedBases -= isSoftClipped ? 1 : 0; } /** * Adds an insertions to the right of the HeaderElement and updates all counts accordingly. All insertions @@ -246,15 +244,15 @@ public class HeaderElement { /** * Whether or not the HeaderElement is variant due to excess mismatches * - * @param minVariantPvalue the minimum pvalue to call a site variant. + * @param minVariantPvalue the minimum pvalue to call a site variant (used with low coverage). + * @param minVariantProportion the minimum proportion to call a site variant (used with high coverage). * @return whether or not the HeaderElement is variant due to excess mismatches */ - protected boolean isVariantFromMismatches(double minVariantPvalue) { + protected boolean isVariantFromMismatches(final double minVariantPvalue, final double minVariantProportion) { final int totalCount = consensusBaseCounts.totalCountWithoutIndels(); final BaseIndex mostCommon = consensusBaseCounts.baseIndexWithMostProbabilityWithoutIndels(); final int countOfOtherBases = totalCount - consensusBaseCounts.countOfBase(mostCommon); - final double pvalue = countOfOtherBases == 0 ? 0.0 : MathUtils.binomialCumulativeProbability(totalCount, 0, countOfOtherBases); - return pvalue > minVariantPvalue; + return hasSignificantCount(countOfOtherBases, totalCount, minVariantPvalue, minVariantProportion); } /** @@ -264,6 +262,7 @@ public class HeaderElement { * @return true if we had more soft clipped bases contributing to this site than matches/mismatches. */ protected boolean isVariantFromSoftClips() { + final int nSoftClippedBases = consensusBaseCounts.nSoftclips(); return nSoftClippedBases > 0 && nSoftClippedBases >= (consensusBaseCounts.totalCount() - nSoftClippedBases); } @@ -271,10 +270,11 @@ public class HeaderElement { * Calculates the number of alleles necessary to represent this site. * * @param minVariantPvalue the minimum pvalue to call a site variant. + * @param minVariantProportion the minimum proportion to call a site variant. * @return the number of alleles necessary to represent this site or -1 if there are too many indels */ - public int getNumberOfBaseAlleles(final double minVariantPvalue) { - final ObjectArrayList alleles = getAlleles(minVariantPvalue); + public int getNumberOfBaseAlleles(final double minVariantPvalue, final double minVariantProportion) { + final ObjectArrayList alleles = getAlleles(minVariantPvalue, minVariantProportion); return alleles == null ? -1 : alleles.size(); } @@ -282,16 +282,18 @@ public class HeaderElement { * Calculates the alleles necessary to represent this site. * * @param minVariantPvalue the minimum pvalue to call a site variant. + * @param minVariantProportion the minimum proportion to call a site variant. * @return the list of alleles necessary to represent this site or null if there are too many indels */ - public ObjectArrayList getAlleles(final double minVariantPvalue) { + public ObjectArrayList getAlleles(final double minVariantPvalue, final double minVariantProportion) { // make sure we have bases at all final int totalBaseCount = consensusBaseCounts.totalCount(); if ( totalBaseCount == 0 ) return new ObjectArrayList(0); - // next, check for insertions - if ( hasSignificantCount(insertionsToTheRight, minVariantPvalue) ) + // next, check for insertions; technically, the insertion count can be greater than totalBaseCount + // (because of the way insertions are counted), so we need to account for that + if ( hasSignificantCount(Math.min(totalBaseCount, insertionsToTheRight), totalBaseCount, minVariantPvalue, minVariantProportion) ) return null; // finally, check for the bases themselves (including deletions) @@ -301,9 +303,7 @@ public class HeaderElement { if ( baseCount == 0 ) continue; - final double pvalue = MathUtils.binomialCumulativeProbability(totalBaseCount, 0, baseCount); - - if ( pvalue > minVariantPvalue ) { + if ( hasSignificantCount(baseCount, totalBaseCount, minVariantPvalue, minVariantProportion) ) { if ( base == BaseIndex.D ) return null; alleles.add(base); @@ -316,26 +316,34 @@ public class HeaderElement { * Checks whether there are a significant number of softclips. * * @param minVariantPvalue the minimum pvalue to call a site variant. + * @param minVariantProportion the minimum proportion to call a site variant. * @return true if there are significant softclips, false otherwise */ - public boolean hasSignificantSoftclips(final double minVariantPvalue) { - return hasSignificantCount(nSoftClippedBases, minVariantPvalue); + public boolean hasSignificantSoftclips(final double minVariantPvalue, final double minVariantProportion) { + return hasSignificantCount(consensusBaseCounts.nSoftclips(), consensusBaseCounts.totalCount(), minVariantPvalue, minVariantProportion); } /* * Checks whether there are a significant number of count. * - * @param count the count to test against + * @param count the count (k) to test against + * @param total the total (n) to test against * @param minVariantPvalue the minimum pvalue to call a site variant. + * @param minVariantProportion the minimum proportion to call a site variant. * @return true if there is a significant count given the provided pvalue, false otherwise */ - private boolean hasSignificantCount(final int count, final double minVariantPvalue) { - final int totalBaseCount = consensusBaseCounts.totalCount(); - if ( count == 0 || totalBaseCount == 0 ) + private boolean hasSignificantCount(final int count, final int total, final double minVariantPvalue, final double minVariantProportion) { + if ( count == 0 || total == 0 ) return false; - // technically, count can be greater than totalBaseCount (because of the way insertions are counted) so we need to account for that - final double pvalue = MathUtils.binomialCumulativeProbability(totalBaseCount, 0, Math.min(count, totalBaseCount)); - return pvalue > minVariantPvalue; + // use p-values for low counts of k + if ( count <= MIN_COUNT_FOR_USING_PVALUE ) { + final double pvalue = MathUtils.binomialCumulativeProbability(total, 0, count); + return pvalue > minVariantPvalue; + } + + // otherwise, use straight proportions + final int minBaseCountForSignificance = (int)(minVariantProportion * total); + return count >= minBaseCountForSignificance; } } \ No newline at end of file diff --git a/protected/java/src/org/broadinstitute/sting/gatk/walkers/compression/reducereads/MultiSampleCompressor.java b/protected/java/src/org/broadinstitute/sting/gatk/walkers/compression/reducereads/MultiSampleCompressor.java index 85aee9fc9..bdd407fba 100644 --- a/protected/java/src/org/broadinstitute/sting/gatk/walkers/compression/reducereads/MultiSampleCompressor.java +++ b/protected/java/src/org/broadinstitute/sting/gatk/walkers/compression/reducereads/MultiSampleCompressor.java @@ -97,13 +97,14 @@ public class MultiSampleCompressor { final int downsampleCoverage, final int minMappingQuality, final double minAltPValueToTriggerVariant, + final double minAltProportionToTriggerVariant, final double minIndelProportionToTriggerVariant, final int minBaseQual, final ReduceReads.DownsampleStrategy downsampleStrategy) { for ( String name : SampleUtils.getSAMFileSamples(header) ) { compressorsPerSample.put(name, new SingleSampleCompressor(contextSize, downsampleCoverage, - minMappingQuality, minAltPValueToTriggerVariant, minIndelProportionToTriggerVariant, minBaseQual, downsampleStrategy)); + minMappingQuality, minAltPValueToTriggerVariant, minAltProportionToTriggerVariant, minIndelProportionToTriggerVariant, minBaseQual, downsampleStrategy)); } } diff --git a/protected/java/src/org/broadinstitute/sting/gatk/walkers/compression/reducereads/ReduceReads.java b/protected/java/src/org/broadinstitute/sting/gatk/walkers/compression/reducereads/ReduceReads.java index 4d90a83be..82a02ca55 100644 --- a/protected/java/src/org/broadinstitute/sting/gatk/walkers/compression/reducereads/ReduceReads.java +++ b/protected/java/src/org/broadinstitute/sting/gatk/walkers/compression/reducereads/ReduceReads.java @@ -205,15 +205,17 @@ public class ReduceReads extends ReadWalker, Redu /** * Minimum proportion of mismatches in a site to trigger a variant region. Anything below this will be - * considered consensus. + * considered consensus and reduced (otherwise we will try to trigger polyploid compression). Note that + * this value is used only regions with high coverage. */ - @Deprecated + @Advanced @Argument(fullName = "minimum_alt_proportion_to_trigger_variant", shortName = "minvar", doc = "", required = false) public double minAltProportionToTriggerVariant = 0.05; /** * Minimum p-value from binomial distribution of mismatches in a site to trigger a variant region. - * Any site with a value falling below this will be considered consensus and reduced (otherwise we will try to trigger polyploid compression). + * Any site with a value falling below this will be considered consensus and reduced (otherwise we will try to + * trigger polyploid compression). Note that this value is used only regions with low coverage. */ @Advanced @Argument(fullName = "minimum_alt_pvalue_to_trigger_variant", shortName = "min_pvalue", doc = "", required = false) @@ -288,6 +290,9 @@ public class ReduceReads extends ReadWalker, Redu if ( minAltPValueToTriggerVariant < 0.0 || minAltPValueToTriggerVariant > 1.0 ) throw new UserException.BadArgumentValue("--minimum_alt_pvalue_to_trigger_variant", "must be a value between 0 and 1 (inclusive)"); + if ( minAltProportionToTriggerVariant < 0.0 || minAltProportionToTriggerVariant > 1.0 ) + throw new UserException.BadArgumentValue("--minimum_alt_proportion_to_trigger_variant", "must be a value between 0 and 1 (inclusive)"); + if ( known.isEmpty() ) knownSnpPositions = null; else @@ -412,7 +417,7 @@ public class ReduceReads extends ReadWalker, Redu */ @Override public ReduceReadsStash reduceInit() { - return new ReduceReadsStash(new MultiSampleCompressor(getToolkit().getSAMFileHeader(), contextSize, downsampleCoverage, minMappingQuality, minAltPValueToTriggerVariant, minIndelProportionToTriggerVariant, minBaseQual, downsampleStrategy)); + return new ReduceReadsStash(new MultiSampleCompressor(getToolkit().getSAMFileHeader(), contextSize, downsampleCoverage, minMappingQuality, minAltPValueToTriggerVariant, minAltProportionToTriggerVariant, minIndelProportionToTriggerVariant, minBaseQual, downsampleStrategy)); } /** diff --git a/protected/java/src/org/broadinstitute/sting/gatk/walkers/compression/reducereads/SingleSampleCompressor.java b/protected/java/src/org/broadinstitute/sting/gatk/walkers/compression/reducereads/SingleSampleCompressor.java index ec041386c..61c34b6a0 100644 --- a/protected/java/src/org/broadinstitute/sting/gatk/walkers/compression/reducereads/SingleSampleCompressor.java +++ b/protected/java/src/org/broadinstitute/sting/gatk/walkers/compression/reducereads/SingleSampleCompressor.java @@ -63,6 +63,7 @@ public class SingleSampleCompressor { final private int downsampleCoverage; final private int minMappingQuality; final private double minAltPValueToTriggerVariant; + final private double minAltProportionToTriggerVariant; final private double minIndelProportionToTriggerVariant; final private int minBaseQual; final private ReduceReads.DownsampleStrategy downsampleStrategy; @@ -76,6 +77,7 @@ public class SingleSampleCompressor { final int downsampleCoverage, final int minMappingQuality, final double minAltPValueToTriggerVariant, + final double minAltProportionToTriggerVariant, final double minIndelProportionToTriggerVariant, final int minBaseQual, final ReduceReads.DownsampleStrategy downsampleStrategy) { @@ -84,6 +86,7 @@ public class SingleSampleCompressor { this.minMappingQuality = minMappingQuality; this.slidingWindowCounter = 0; this.minAltPValueToTriggerVariant = minAltPValueToTriggerVariant; + this.minAltProportionToTriggerVariant = minAltProportionToTriggerVariant; this.minIndelProportionToTriggerVariant = minIndelProportionToTriggerVariant; this.minBaseQual = minBaseQual; this.downsampleStrategy = downsampleStrategy; @@ -114,7 +117,9 @@ public class SingleSampleCompressor { } if ( slidingWindow == null) { // this is the first read - slidingWindow = new SlidingWindow(read.getReferenceName(), read.getReferenceIndex(), contextSize, read.getHeader(), read.getReadGroup(), slidingWindowCounter, minAltPValueToTriggerVariant, minIndelProportionToTriggerVariant, minBaseQual, minMappingQuality, downsampleCoverage, downsampleStrategy, read.hasBaseIndelQualities()); + slidingWindow = new SlidingWindow(read.getReferenceName(), read.getReferenceIndex(), contextSize, read.getHeader(), read.getReadGroup(), + slidingWindowCounter, minAltPValueToTriggerVariant, minAltProportionToTriggerVariant, minIndelProportionToTriggerVariant, + minBaseQual, minMappingQuality, downsampleCoverage, downsampleStrategy, read.hasBaseIndelQualities()); slidingWindowCounter++; } diff --git a/protected/java/src/org/broadinstitute/sting/gatk/walkers/compression/reducereads/SlidingWindow.java b/protected/java/src/org/broadinstitute/sting/gatk/walkers/compression/reducereads/SlidingWindow.java index 5fd7724cb..d3ca037be 100644 --- a/protected/java/src/org/broadinstitute/sting/gatk/walkers/compression/reducereads/SlidingWindow.java +++ b/protected/java/src/org/broadinstitute/sting/gatk/walkers/compression/reducereads/SlidingWindow.java @@ -60,7 +60,6 @@ import org.broadinstitute.sting.utils.GenomeLoc; import org.broadinstitute.sting.utils.UnvalidatingGenomeLoc; import org.broadinstitute.sting.utils.collections.Pair; import org.broadinstitute.sting.utils.exceptions.ReviewedStingException; -import org.broadinstitute.sting.utils.recalibration.EventType; import org.broadinstitute.sting.utils.sam.AlignmentStartWithNoTiesComparator; import org.broadinstitute.sting.utils.sam.GATKSAMReadGroupRecord; import org.broadinstitute.sting.utils.sam.GATKSAMRecord; @@ -78,8 +77,8 @@ import java.util.*; public class SlidingWindow { // Sliding Window data - final private ObjectAVLTreeSet readsInWindow; - final private LinkedList windowHeader; + final protected PriorityQueue readsInWindow; + final protected LinkedList windowHeader; protected int contextSize; // the largest context size (between mismatches and indels) protected String contig; protected int contigIndex; @@ -99,6 +98,7 @@ public class SlidingWindow { // Additional parameters protected double MIN_ALT_PVALUE_TO_TRIGGER_VARIANT; // pvalue has to be greater than this value to trigger variant region due to mismatches + protected double MIN_ALT_PROPORTION_TO_TRIGGER_VARIANT; // proportion has to be greater than this value to trigger variant region due to mismatches protected double MIN_INDEL_BASE_PROPORTION_TO_TRIGGER_VARIANT; // proportion has to be greater than this value to trigger variant region due to deletions protected int MIN_BASE_QUAL_TO_COUNT; // qual has to be greater than or equal to this value protected int MIN_MAPPING_QUALITY; @@ -146,28 +146,33 @@ public class SlidingWindow { this.windowHeader = new LinkedList(); windowHeader.addFirst(new HeaderElement(startLocation)); - this.readsInWindow = new ObjectAVLTreeSet(); + this.readsInWindow = new PriorityQueue(100, new Comparator() { + @Override + public int compare(GATKSAMRecord read1, GATKSAMRecord read2) { + return read1.getSoftEnd() - read2.getSoftEnd(); + } + }); } public SlidingWindow(final String contig, final int contigIndex, final int contextSize, final SAMFileHeader samHeader, final GATKSAMReadGroupRecord readGroupAttribute, final int windowNumber, - final double minAltPValueToTriggerVariant, final double minIndelProportionToTriggerVariant, + final double minAltPValueToTriggerVariant, final double minAltProportionToTriggerVariant, final double minIndelProportionToTriggerVariant, final int minBaseQual, final int minMappingQuality, final int downsampleCoverage, final ReduceReads.DownsampleStrategy downsampleStrategy, final boolean hasIndelQualities) { this.contextSize = contextSize; this.downsampleCoverage = downsampleCoverage; this.MIN_ALT_PVALUE_TO_TRIGGER_VARIANT = minAltPValueToTriggerVariant; + this.MIN_ALT_PROPORTION_TO_TRIGGER_VARIANT = minAltProportionToTriggerVariant; this.MIN_INDEL_BASE_PROPORTION_TO_TRIGGER_VARIANT = minIndelProportionToTriggerVariant; this.MIN_BASE_QUAL_TO_COUNT = minBaseQual; this.MIN_MAPPING_QUALITY = minMappingQuality; this.windowHeader = new LinkedList(); - this.readsInWindow = new ObjectAVLTreeSet(new Comparator() { + this.readsInWindow = new PriorityQueue(1000, new Comparator() { @Override public int compare(GATKSAMRecord read1, GATKSAMRecord read2) { - final int difference = read1.getSoftEnd() - read2.getSoftEnd(); - return difference != 0 ? difference : read1.getReadName().compareTo(read2.getReadName()); + return read1.getSoftEnd() - read2.getSoftEnd(); } }); @@ -290,8 +295,8 @@ public class SlidingWindow { regions = findVariantRegions(0, breakpoint, markedSites.getVariantSiteBitSet(), !forceClose); } - while (!readsInWindow.isEmpty() && readsInWindow.first().getSoftEnd() < windowHeaderStartLocation) { - readsInWindow.remove(readsInWindow.first()); + while (!readsInWindow.isEmpty() && readsInWindow.peek().getSoftEnd() < windowHeaderStartLocation) { + readsInWindow.poll(); } return regions; @@ -353,7 +358,7 @@ public class SlidingWindow { /** * returns an array marked with variant and non-variant regions (it uses markVariantRegion to make the marks) * - * @param stop check the window from start to stop (not-inclusive) + * @param stop check the window from start to stop (not-inclusive); given in global coordinates */ protected void markSites(final int stop) { @@ -363,21 +368,16 @@ public class SlidingWindow { // copy over as many bits as we can from the previous calculation. Note that we can't trust the // last (contextSize - 1) worth of bits because we may not have actually looked at variant regions there. final int lastPositionMarked = markedSites.updateRegion(windowHeaderStartLocation, sizeOfMarkedRegion) - contextSize - 1; - final int locationToProcess = Math.min(lastPositionMarked, stop - contextSize); + final int locationToProcess = Math.max(windowHeaderStartLocation, Math.min(lastPositionMarked, stop - contextSize)); - // update the iterator to the correct position - Iterator headerElementIterator = windowHeader.iterator(); - for (int i = windowHeaderStartLocation; i < locationToProcess; i++) { - if (headerElementIterator.hasNext()) - headerElementIterator.next(); - } + final ListIterator headerElementIterator = windowHeader.listIterator(locationToProcess - windowHeaderStartLocation); // process a contextSize worth of region from scratch in case there's a variant there for (int i = locationToProcess; i < stop; i++) { if (headerElementIterator.hasNext()) { HeaderElement headerElement = headerElementIterator.next(); - if (headerElement.isVariant(MIN_ALT_PVALUE_TO_TRIGGER_VARIANT, MIN_INDEL_BASE_PROPORTION_TO_TRIGGER_VARIANT)) + if (headerElement.isVariant(MIN_ALT_PVALUE_TO_TRIGGER_VARIANT, MIN_ALT_PROPORTION_TO_TRIGGER_VARIANT, MIN_INDEL_BASE_PROPORTION_TO_TRIGGER_VARIANT)) markVariantRegion(i - windowHeaderStartLocation); } else @@ -409,7 +409,7 @@ public class SlidingWindow { } /** - * Adds bases to the running consensus or filtered data accordingly + * Adds bases to the running consensus * * If adding a sequence with gaps, it will finalize multiple consensus reads and keep the last running consensus * @@ -422,9 +422,10 @@ public class SlidingWindow { @Requires({"start >= 0 && (end >= start || end == 0)"}) @Ensures("result != null") protected ObjectArrayList addToSyntheticReads(final LinkedList header, final int start, final int end, final SyntheticRead.StrandType strandType) { - ObjectArrayList reads = new ObjectArrayList(); - if (start < end) { - ListIterator headerElementIterator = header.listIterator(start); + final ObjectArrayList reads = new ObjectArrayList(); + + if ( start < end ) { + final ListIterator headerElementIterator = header.listIterator(start); if (!headerElementIterator.hasNext()) throw new ReviewedStingException(String.format("Requested to add to synthetic reads a region that contains no header element at index: %d - %d / %d", start, header.size(), end)); @@ -432,37 +433,29 @@ public class SlidingWindow { HeaderElement headerElement = headerElementIterator.next(); if (headerElement.hasConsensusData()) { - reads.addAll(finalizeAndAdd(ConsensusType.FILTERED)); - - int endOfConsensus = findNextNonConsensusElement(header, start, end); - addToRunningConsensus(header, start, endOfConsensus, strandType); + // find the end of the consecutive consensus data in the window + final int endOfConsensus = findNextNonConsensusElement(header, start, end); if (endOfConsensus <= start) throw new ReviewedStingException(String.format("next start is <= current start: (%d <= %d)", endOfConsensus, start)); + // add to running consensus and recurse + addToRunningConsensus(header, start, endOfConsensus, strandType); reads.addAll(addToSyntheticReads(header, endOfConsensus, end, strandType)); - } else if (headerElement.hasFilteredData()) { + + } else { + + // add any outstanding consensus data reads.addAll(finalizeAndAdd(ConsensusType.CONSENSUS)); - int endOfFilteredData = findNextNonFilteredDataElement(header, start, end); - reads.addAll(addToFilteredData(header, start, endOfFilteredData, strandType)); - - if (endOfFilteredData <= start) - throw new ReviewedStingException(String.format("next start is <= current start: (%d <= %d)", endOfFilteredData, start)); - - reads.addAll(addToSyntheticReads(header, endOfFilteredData, end, strandType)); - } else if (headerElement.isEmpty()) { - reads.addAll(finalizeAndAdd(ConsensusType.BOTH)); - - int endOfEmptyData = findNextNonEmptyElement(header, start, end); - + // find the end of the consecutive empty data in the window + final int endOfEmptyData = findNextConsensusElement(header, start, end); if (endOfEmptyData <= start) throw new ReviewedStingException(String.format("next start is <= current start: (%d <= %d)", endOfEmptyData, start)); + // recurse out of the empty region reads.addAll(addToSyntheticReads(header, endOfEmptyData, end, strandType)); - } else - throw new ReviewedStingException(String.format("Header Element %d is neither Consensus, Data or Empty. Something is wrong.", start)); - + } } return reads; @@ -474,24 +467,21 @@ public class SlidingWindow { * @param type the synthetic reads you want to close * @return a possibly null list of GATKSAMRecords generated by finalizing the synthetic reads */ - private ObjectArrayList finalizeAndAdd(ConsensusType type) { - GATKSAMRecord read = null; - ObjectArrayList list = new ObjectArrayList(); + private ObjectArrayList finalizeAndAdd(final ConsensusType type) { - switch (type) { - case CONSENSUS: - read = finalizeRunningConsensus(); - break; - case FILTERED: - read = finalizeFilteredDataConsensus(); - break; - case BOTH: - read = finalizeRunningConsensus(); - if (read != null) list.add(read); - read = finalizeFilteredDataConsensus(); + final ObjectArrayList list = new ObjectArrayList(); + + if ( type == ConsensusType.CONSENSUS || type == ConsensusType.BOTH ) { + final GATKSAMRecord read = finalizeRunningConsensus(); + if ( read != null ) + list.add(read); + } + + if ( type == ConsensusType.FILTERED || type == ConsensusType.BOTH ) { + final GATKSAMRecord read = finalizeFilteredDataConsensus(); + if ( read != null ) + list.add(read); } - if (read != null) - list.add(read); return list; } @@ -499,19 +489,145 @@ public class SlidingWindow { /** * Looks for the next position without consensus data * - * @param start beginning of the filtered region - * @param upTo limit to search for another consensus element + * @param header the header to check + * @param start beginning of the filtered region + * @param upTo limit to search for another consensus element * @return next position in local coordinates (relative to the windowHeader) with consensus data; otherwise, the start position */ - private int findNextNonConsensusElement(LinkedList header, int start, int upTo) { - Iterator headerElementIterator = header.listIterator(start); + private int findNextNonConsensusElement(final LinkedList header, final int start, final int upTo) { + final Iterator headerElementIterator = header.listIterator(start); int index = start; while (index < upTo) { if (!headerElementIterator.hasNext()) throw new ReviewedStingException("There are no more header elements in this window"); - HeaderElement headerElement = headerElementIterator.next(); + if (!headerElementIterator.next().hasConsensusData()) + break; + index++; + } + return index; + } + + /** + * Looks for the next position witho consensus data + * + * @param header the header to check + * @param start beginning of the filtered region + * @param upTo limit to search for another consensus element + * @return next position in local coordinates (relative to the windowHeader) with consensus data; otherwise, the start position + */ + private int findNextConsensusElement(final LinkedList header, final int start, final int upTo) { + final Iterator headerElementIterator = header.listIterator(start); + int index = start; + while (index < upTo) { + if (!headerElementIterator.hasNext()) + throw new ReviewedStingException("There are no more header elements in this window"); + + if (headerElementIterator.next().hasConsensusData()) + break; + index++; + } + return index; + } + + /** + * Adds bases to the filtered data synthetic read. + * + * Different from the addToConsensus method, this method assumes a contiguous sequence of filteredData + * bases. + * + * @param header the window header + * @param start the first header index to add to consensus + * @param end the first header index NOT TO add to consensus + * @param strandType the strandedness that the synthetic read should be represented as having + */ + @Requires({"start >= 0 && (end >= start || end == 0)"}) + private void addToRunningConsensus(final LinkedList header, final int start, final int end, final SyntheticRead.StrandType strandType) { + if (runningConsensus == null) + runningConsensus = new SyntheticRead(samHeader, readGroupAttribute, contig, contigIndex, consensusReadName + consensusCounter++, header.get(start).getLocation(), hasIndelQualities, strandType); + + final Iterator headerElementIterator = header.listIterator(start); + + for (int index = start; index < end; index++) { + if (!headerElementIterator.hasNext()) + throw new ReviewedStingException("Requested to create a running consensus synthetic read from " + start + " to " + end + " but " + index + " does not exist"); + + final HeaderElement headerElement = headerElementIterator.next(); if (!headerElement.hasConsensusData()) + throw new ReviewedStingException("No CONSENSUS data in " + index); + + genericAddBaseToConsensus(runningConsensus, headerElement.getConsensusBaseCounts()); + } + } + + /** + * Adds bases to the running filtered data accordingly + * + * If adding a sequence with gaps, it will finalize multiple consensus reads and keep the last running consensus + * + * @param header the window header + * @param start the first header index to add to consensus + * @param end the first header index NOT TO add to consensus + * @return a non-null list of consensus reads generated by this call. Empty list if no consensus was generated. + */ + @Requires({"start >= 0 && (end >= start || end == 0)"}) + @Ensures("result != null") + protected ObjectArrayList addToFilteredReads(final LinkedList header, final int start, final int end) { + final ObjectArrayList reads = new ObjectArrayList(); + + if ( start < end ) { + final ListIterator headerElementIterator = header.listIterator(start); + + if (!headerElementIterator.hasNext()) + throw new ReviewedStingException(String.format("Requested to add to synthetic reads a region that contains no header element at index: %d - %d / %d", start, header.size(), end)); + + HeaderElement headerElement = headerElementIterator.next(); + + if (headerElement.hasFilteredData()) { + + // find the end of the consecutive filtered data in the window + final int endOfFiltered = findNextNonFilteredElement(header, start, end); + if (endOfFiltered <= start) + throw new ReviewedStingException(String.format("next start is <= current start: (%d <= %d)", endOfFiltered, start)); + + // add to running filtered consensus and recurse + addToFilteredData(header, start, endOfFiltered); + reads.addAll(addToFilteredReads(header, endOfFiltered, end)); + + } else { + + // add any outstanding filtered data + reads.addAll(finalizeAndAdd(ConsensusType.FILTERED)); + + // find the end of the consecutive empty data in the window + final int endOfEmptyData = findNextFilteredElement(header, start, end); + if (endOfEmptyData <= start) + throw new ReviewedStingException(String.format("next start is <= current start: (%d <= %d)", endOfEmptyData, start)); + + // recurse out of the empty region + reads.addAll(addToFilteredReads(header, endOfEmptyData, end)); + } + } + + return reads; + } + + /** + * Looks for the next position without consensus data + * + * @param header the header to check + * @param start beginning of the filtered region + * @param upTo limit to search for another consensus element + * @return next position in local coordinates (relative to the windowHeader) with consensus data; otherwise, the start position + */ + private int findNextNonFilteredElement(final LinkedList header, final int start, final int upTo) { + final Iterator headerElementIterator = header.listIterator(start); + int index = start; + while (index < upTo) { + if (!headerElementIterator.hasNext()) + throw new ReviewedStingException("There are no more header elements in this window"); + + if (!headerElementIterator.next().hasFilteredData()) break; index++; } @@ -519,43 +635,21 @@ public class SlidingWindow { } /** - * Looks for the next position without filtered data + * Looks for the next position witho consensus data * - * @param start beginning of the region - * @param upTo limit to search for - * @return next position in local coordinates (relative to the windowHeader) with no filtered data; otherwise, the start position + * @param header the header to check + * @param start beginning of the filtered region + * @param upTo limit to search for another consensus element + * @return next position in local coordinates (relative to the windowHeader) with consensus data; otherwise, the start position */ - private int findNextNonFilteredDataElement(LinkedList header, int start, int upTo) { - Iterator headerElementIterator = header.listIterator(start); + private int findNextFilteredElement(final LinkedList header, final int start, final int upTo) { + final Iterator headerElementIterator = header.listIterator(start); int index = start; while (index < upTo) { if (!headerElementIterator.hasNext()) throw new ReviewedStingException("There are no more header elements in this window"); - HeaderElement headerElement = headerElementIterator.next(); - if (!headerElement.hasFilteredData() || headerElement.hasConsensusData()) - break; - index++; - } - return index; - } - - /** - * Looks for the next non-empty header element - * - * @param start beginning of the region - * @param upTo limit to search for - * @return next position in local coordinates (relative to the windowHeader) with non-empty element; otherwise, the start position - */ - private int findNextNonEmptyElement(LinkedList header, int start, int upTo) { - ListIterator headerElementIterator = header.listIterator(start); - int index = start; - while (index < upTo) { - if (!headerElementIterator.hasNext()) - throw new ReviewedStingException("There are no more header elements in this window"); - - HeaderElement headerElement = headerElementIterator.next(); - if (!headerElement.isEmpty()) + if (headerElementIterator.next().hasFilteredData()) break; index++; } @@ -571,67 +665,26 @@ public class SlidingWindow { * @param header the window header * @param start the first header index to add to consensus * @param end the first header index NOT TO add to consensus - * @param strandType the strandedness that the synthetic read should be represented as having - * @return a non-null list of GATKSAMRecords representing finalized filtered consensus data. Empty list if no consensus was generated. */ @Requires({"start >= 0 && (end >= start || end == 0)"}) @Ensures("result != null") - private ObjectArrayList addToFilteredData(final LinkedList header, final int start, final int end, final SyntheticRead.StrandType strandType) { - ObjectArrayList result = new ObjectArrayList(); + private void addToFilteredData(final LinkedList header, final int start, final int end) { if (filteredDataConsensus == null) - filteredDataConsensus = new SyntheticRead(samHeader, readGroupAttribute, contig, contigIndex, filteredDataReadName + filteredDataConsensusCounter++, header.get(start).getLocation(), hasIndelQualities, strandType); + filteredDataConsensus = new SyntheticRead(samHeader, readGroupAttribute, contig, contigIndex, filteredDataReadName + filteredDataConsensusCounter++, header.get(start).getLocation(), hasIndelQualities, SyntheticRead.StrandType.STRANDLESS); ListIterator headerElementIterator = header.listIterator(start); for (int index = start; index < end; index++) { if (!headerElementIterator.hasNext()) throw new ReviewedStingException("Requested to create a filtered data synthetic read from " + start + " to " + end + " but " + index + " does not exist"); - HeaderElement headerElement = headerElementIterator.next(); - if (headerElement.hasConsensusData()) - throw new ReviewedStingException("Found consensus data inside region to add to filtered data."); + final HeaderElement headerElement = headerElementIterator.next(); if (!headerElement.hasFilteredData()) throw new ReviewedStingException("No filtered data in " + index); - if ( filteredDataConsensus.getRefStart() + filteredDataConsensus.size() != headerElement.getLocation() ) { - result.add(finalizeFilteredDataConsensus()); - filteredDataConsensus = new SyntheticRead(samHeader, readGroupAttribute, contig, contigIndex, filteredDataReadName + filteredDataConsensusCounter++, headerElement.getLocation(), hasIndelQualities, strandType); - } - genericAddBaseToConsensus(filteredDataConsensus, headerElement.getFilteredBaseCounts()); } - - return result; - } - - /** - * Adds bases to the filtered data synthetic read. - * - * Different from the addToConsensus method, this method assumes a contiguous sequence of filteredData - * bases. - * - * @param header the window header - * @param start the first header index to add to consensus - * @param end the first header index NOT TO add to consensus - * @param strandType the strandedness that the synthetic read should be represented as having - */ - @Requires({"start >= 0 && (end >= start || end == 0)"}) - private void addToRunningConsensus(final LinkedList header, final int start, final int end, final SyntheticRead.StrandType strandType) { - if (runningConsensus == null) - runningConsensus = new SyntheticRead(samHeader, readGroupAttribute, contig, contigIndex, consensusReadName + consensusCounter++, header.get(start).getLocation(), hasIndelQualities, strandType); - - Iterator headerElementIterator = header.listIterator(start); - for (int index = start; index < end; index++) { - if (!headerElementIterator.hasNext()) - throw new ReviewedStingException("Requested to create a running consensus synthetic read from " + start + " to " + end + " but " + index + " does not exist"); - - HeaderElement headerElement = headerElementIterator.next(); - if (!headerElement.hasConsensusData()) - throw new ReviewedStingException("No CONSENSUS data in " + index); - - genericAddBaseToConsensus(runningConsensus, headerElement.getConsensusBaseCounts()); - } } /** @@ -726,7 +779,7 @@ public class SlidingWindow { for ( int i = start; i <= stop; i++ ) { - final int nAlleles = windowHeader.get(i).getNumberOfBaseAlleles(MIN_ALT_PVALUE_TO_TRIGGER_VARIANT); + final int nAlleles = windowHeader.get(i).getNumberOfBaseAlleles(MIN_ALT_PVALUE_TO_TRIGGER_VARIANT, MIN_ALT_PROPORTION_TO_TRIGGER_VARIANT); // we will only work on diploid non-indel cases because we just don't want to handle/test other scenarios if ( nAlleles > 2 || nAlleles == -1 ) @@ -760,8 +813,8 @@ public class SlidingWindow { if ( headerElement.getLocation() == positionToSkip ) continue; - if ( headerElement.hasSignificantSoftclips(MIN_ALT_PVALUE_TO_TRIGGER_VARIANT) || - headerElement.getNumberOfBaseAlleles(MIN_ALT_PVALUE_TO_TRIGGER_VARIANT) > 1 ) + if ( headerElement.hasSignificantSoftclips(MIN_ALT_PVALUE_TO_TRIGGER_VARIANT, MIN_ALT_PROPORTION_TO_TRIGGER_VARIANT) || + headerElement.getNumberOfBaseAlleles(MIN_ALT_PVALUE_TO_TRIGGER_VARIANT, MIN_ALT_PROPORTION_TO_TRIGGER_VARIANT) > 1 ) return true; } @@ -784,6 +837,7 @@ public class SlidingWindow { final CloseVariantRegionResult result = new CloseVariantRegionResult(allReads.stopPerformed); result.reads.addAll(downsampleCoverage > 0 ? downsampleVariantRegion(allReads.reads) : allReads.reads); result.reads.addAll(addToSyntheticReads(windowHeader, 0, allReads.stopPerformed + 1, SyntheticRead.StrandType.STRANDLESS)); + result.reads.addAll(addToFilteredReads(windowHeader, 0, allReads.stopPerformed + 1)); result.reads.addAll(finalizeAndAdd(ConsensusType.BOTH)); return result; // finalized reads will be downsampled if necessary @@ -914,6 +968,7 @@ public class SlidingWindow { if (!windowHeader.isEmpty()) { finalizedReads.addAll(addToSyntheticReads(windowHeader, 0, windowHeader.size(), SyntheticRead.StrandType.STRANDLESS)); + finalizedReads.addAll(addToFilteredReads(windowHeader, 0, windowHeader.size())); finalizedReads.addAll(finalizeAndAdd(ConsensusType.BOTH)); // if it ended in running consensus, finish it up } } @@ -983,7 +1038,7 @@ public class SlidingWindow { // initialize the mapping from base (allele) to header final Byte2IntMap alleleHeaderMap = new Byte2IntArrayMap(2); - for ( final BaseIndex allele : windowHeader.get(hetRefPosition).getAlleles(MIN_ALT_PVALUE_TO_TRIGGER_VARIANT) ) { + for ( final BaseIndex allele : windowHeader.get(hetRefPosition).getAlleles(MIN_ALT_PVALUE_TO_TRIGGER_VARIANT, MIN_ALT_PROPORTION_TO_TRIGGER_VARIANT) ) { final int currentIndex = alleleHeaderMap.size(); if ( currentIndex > 1 ) throw new IllegalStateException("There are more than 2 alleles present when creating a diploid consensus"); @@ -997,7 +1052,7 @@ public class SlidingWindow { if ( alleleHeaderMap.size() != 2 ) throw new IllegalStateException("We expected to see 2 alleles when creating a diploid consensus but saw " + alleleHeaderMap.size()); - final ObjectList readsToRemoveFromHeader = new ObjectArrayList(); + final ObjectList readsToRemove = new ObjectArrayList(); for ( final GATKSAMRecord read : readsInWindow ) { @@ -1006,38 +1061,36 @@ public class SlidingWindow { continue; // remove all other reads from the read cache since we're going to use them here - readsInWindow.remove(read); + readsToRemove.add(read); - // if the read falls before the het position, we don't need to look at it - if ( read.getSoftEnd() < globalHetRefPosition ) + // if the read falls before the het position or has low MQ, we don't need to look at it + if ( read.getSoftEnd() < globalHetRefPosition || read.getMappingQuality() < MIN_MAPPING_QUALITY) continue; // remove all spanning reads from the consensus header since we're going to incorporate them into a consensus here instead removeFromHeader(windowHeader, read); - // make sure it meets the minimum mapping quality requirement (if not, we won't use it for the consensus) - if ( read.getMappingQuality() >= MIN_MAPPING_QUALITY ) { + // where on the read is the het position? + final int readPosOfHet = ReadUtils.getReadCoordinateForReferenceCoordinate(read, globalHetRefPosition, ReadUtils.ClippingTail.LEFT_TAIL); - // where on the read is the het position? - final int readPosOfHet = ReadUtils.getReadCoordinateForReferenceCoordinate(read, globalHetRefPosition, ReadUtils.ClippingTail.LEFT_TAIL); + // this is safe because indels are not supported + final byte base = read.getReadBases()[readPosOfHet]; - // this is safe because indels are not supported - final byte base = read.getReadBases()[readPosOfHet]; - final byte qual = read.getBaseQualities(EventType.BASE_SUBSTITUTION)[readPosOfHet]; + // check which allele this read represents + final Integer allele = alleleHeaderMap.get(base); - // check which allele this read represents - final Integer allele = alleleHeaderMap.get(base); - - // ignore the read if it represents a base that's not part of the consensus - if ( allele != null ) { - // add to the appropriate polyploid header - final SingleStrandConsensusData header = read.getReadNegativeStrandFlag() ? headersNegStrand[allele] : headersPosStrand[allele]; - header.reads.add(read); - addToHeader(header.consensus, read); - } + // ignore the read if it represents a base that's not part of the consensus + if ( allele != null ) { + // add to the appropriate polyploid header + final SingleStrandConsensusData header = read.getReadNegativeStrandFlag() ? headersNegStrand[allele] : headersPosStrand[allele]; + header.reads.add(read); + addToHeader(header.consensus, read); } } + for ( final GATKSAMRecord read : readsToRemove ) + readsInWindow.remove(read); + // create the polyploid synthetic reads if we can final ObjectList hetReads = new ObjectArrayList(); @@ -1171,15 +1224,15 @@ public class SlidingWindow { int readBaseIndex = 0; HeaderElement headerElement; - for ( CigarElement cigarElement : read.getCigar().getCigarElements() ) { + for ( final CigarElement cigarElement : read.getCigar().getCigarElements() ) { switch ( cigarElement.getOperator() ) { case H: break; case I: readBaseIndex += cigarElement.getLength(); - // special case, if we are removing a read that starts in insertion and we don't have the previous header element anymore, don't worry about it. - if ( removeRead && locationIndex == 0 ) + // special case, if we don't have the previous header element anymore, don't worry about it. + if ( locationIndex == 0 ) break; // insertions are added to the base to the left (previous element) @@ -1200,9 +1253,8 @@ public class SlidingWindow { headerElement.removeBase(BaseUtils.Base.D.base, mappingQuality, mappingQuality, mappingQuality, mappingQuality, MIN_BASE_QUAL_TO_COUNT, MIN_MAPPING_QUALITY, false); else headerElement.addBase(BaseUtils.Base.D.base, mappingQuality, mappingQuality, mappingQuality, mappingQuality, MIN_BASE_QUAL_TO_COUNT, MIN_MAPPING_QUALITY, false); - - locationIndex++; } + locationIndex += nDeletionBases; break; case S: case M: @@ -1211,6 +1263,8 @@ public class SlidingWindow { case X: final int nBasesToAdd = cigarElement.getLength(); final boolean isSoftClip = cigarElement.getOperator() == CigarOperator.S; + final byte[] readBases = read.getReadBases(); + final byte[] readQuals = read.getBaseQualities(); final boolean readHasIndelQuals = read.hasBaseIndelQualities(); final byte[] insertionQuals = readHasIndelQuals ? read.getBaseInsertionQualities() : null; final byte[] deletionQuals = readHasIndelQuals ? read.getBaseDeletionQualities() : null; @@ -1219,14 +1273,15 @@ public class SlidingWindow { headerElement = headerElementIterator.next(); final byte insertionQuality = readHasIndelQuals ? insertionQuals[readBaseIndex] : -1; final byte deletionQuality = readHasIndelQuals ? deletionQuals[readBaseIndex] : -1; + if ( removeRead ) - headerElement.removeBase(read.getReadBases()[readBaseIndex], read.getBaseQualities()[readBaseIndex], insertionQuality, deletionQuality, mappingQuality, MIN_BASE_QUAL_TO_COUNT, MIN_MAPPING_QUALITY, isSoftClip); + headerElement.removeBase(readBases[readBaseIndex], readQuals[readBaseIndex], insertionQuality, deletionQuality, mappingQuality, MIN_BASE_QUAL_TO_COUNT, MIN_MAPPING_QUALITY, isSoftClip); else - headerElement.addBase(read.getReadBases()[readBaseIndex], read.getBaseQualities()[readBaseIndex], insertionQuality, deletionQuality, mappingQuality, MIN_BASE_QUAL_TO_COUNT, MIN_MAPPING_QUALITY, isSoftClip); + headerElement.addBase(readBases[readBaseIndex], readQuals[readBaseIndex], insertionQuality, deletionQuality, mappingQuality, MIN_BASE_QUAL_TO_COUNT, MIN_MAPPING_QUALITY, isSoftClip); readBaseIndex++; - locationIndex++; } + locationIndex += nBasesToAdd; break; default: break; diff --git a/protected/java/src/org/broadinstitute/sting/gatk/walkers/qc/AssessReducedQuals.java b/protected/java/src/org/broadinstitute/sting/gatk/walkers/qc/AssessReducedQuals.java index 4e5652c45..a3bdc6691 100644 --- a/protected/java/src/org/broadinstitute/sting/gatk/walkers/qc/AssessReducedQuals.java +++ b/protected/java/src/org/broadinstitute/sting/gatk/walkers/qc/AssessReducedQuals.java @@ -102,6 +102,9 @@ public class AssessReducedQuals extends LocusWalker implem @Argument(fullName = "qual_epsilon", shortName = "epsilon", doc = "when |Quals_reduced_bam - Quals_original_bam| > (epsilon * Quals_original_bam) we output this interval", required = false) public double qual_epsilon = 0.10; + @Argument(fullName = "exclude_low_mq", shortName = "excludeMQ", doc = "ignore reads with mapping quality below this number", required = false) + public int excludeMQ = 0; + @Output protected PrintStream out; @@ -146,7 +149,7 @@ public class AssessReducedQuals extends LocusWalker implem } private boolean isGoodRead(final PileupElement p) { - return !p.isDeletion() && (int)p.getQual() >= 15 && p.getMappingQual() >= 20; + return !p.isDeletion() && (int)p.getQual() >= 15 && p.getMappingQual() >= excludeMQ; } private int getTagIndex(final List tags) { diff --git a/protected/java/test/org/broadinstitute/sting/gatk/walkers/compression/reducereads/BaseCountsUnitTest.java b/protected/java/test/org/broadinstitute/sting/gatk/walkers/compression/reducereads/BaseCountsUnitTest.java index 5ae6e86df..f988471a0 100644 --- a/protected/java/test/org/broadinstitute/sting/gatk/walkers/compression/reducereads/BaseCountsUnitTest.java +++ b/protected/java/test/org/broadinstitute/sting/gatk/walkers/compression/reducereads/BaseCountsUnitTest.java @@ -179,7 +179,7 @@ public class BaseCountsUnitTest extends BaseTest { BaseCounts counts = new BaseCounts(); for ( int qual : test.quals ) - counts.incr(BaseIndex.A, (byte)qual, 20); + counts.incr(BaseIndex.A, (byte)qual, 20, false); final int actualSum = (int)counts.getSumQuals((byte)'A'); final int expectedSum = qualSum(test.quals); diff --git a/protected/java/test/org/broadinstitute/sting/gatk/walkers/compression/reducereads/HeaderElementUnitTest.java b/protected/java/test/org/broadinstitute/sting/gatk/walkers/compression/reducereads/HeaderElementUnitTest.java index d73a71855..32791dd97 100644 --- a/protected/java/test/org/broadinstitute/sting/gatk/walkers/compression/reducereads/HeaderElementUnitTest.java +++ b/protected/java/test/org/broadinstitute/sting/gatk/walkers/compression/reducereads/HeaderElementUnitTest.java @@ -128,8 +128,8 @@ public class HeaderElementUnitTest extends BaseTest { Assert.assertEquals(headerElement.hasConsensusData(), test.MQ >= minMappingQual); Assert.assertEquals(headerElement.hasFilteredData(), test.MQ < minMappingQual); Assert.assertEquals(headerElement.hasConsensusData() ? headerElement.getConsensusBaseCounts().getRMS() : headerElement.getFilteredBaseCounts().getRMS(), (double)test.MQ); - Assert.assertFalse(headerElement.isVariantFromMismatches(0.05)); - Assert.assertEquals(headerElement.isVariant(0.05, 0.05), test.isClip); + Assert.assertFalse(headerElement.isVariantFromMismatches(0.05, 0.05)); + Assert.assertEquals(headerElement.isVariant(0.05, 0.05, 0.05), test.isClip); } @@ -177,7 +177,7 @@ public class HeaderElementUnitTest extends BaseTest { headerElement.addBase(base.b, byte20, byte10, byte10, byte20, minBaseQual, minMappingQual, false); } - final int nAllelesSeen = headerElement.getNumberOfBaseAlleles(test.pvalue); + final int nAllelesSeen = headerElement.getNumberOfBaseAlleles(test.pvalue, test.pvalue); final int nAllelesExpected = calculateExpectedAlleles(test.counts, test.pvalue); Assert.assertEquals(nAllelesSeen, nAllelesExpected); @@ -195,9 +195,14 @@ public class HeaderElementUnitTest extends BaseTest { if ( count == 0 ) continue; - final double pvalue = MathUtils.binomialCumulativeProbability(total, 0, count); + final boolean isSignificant; + if ( count <= HeaderElement.MIN_COUNT_FOR_USING_PVALUE ) { + isSignificant = MathUtils.binomialCumulativeProbability(total, 0, count) > targetPvalue; + } else { + isSignificant = (count >= targetPvalue * total); + } - if ( pvalue > targetPvalue ) { + if ( isSignificant ) { if ( index == BaseIndex.D.index ) return -1; result++; diff --git a/protected/java/test/org/broadinstitute/sting/gatk/walkers/compression/reducereads/ReduceReadsIntegrationTest.java b/protected/java/test/org/broadinstitute/sting/gatk/walkers/compression/reducereads/ReduceReadsIntegrationTest.java index 1ab001147..b5963498a 100644 --- a/protected/java/test/org/broadinstitute/sting/gatk/walkers/compression/reducereads/ReduceReadsIntegrationTest.java +++ b/protected/java/test/org/broadinstitute/sting/gatk/walkers/compression/reducereads/ReduceReadsIntegrationTest.java @@ -157,46 +157,44 @@ public class ReduceReadsIntegrationTest extends WalkerTest { @Test(enabled = true) public void testDefaultCompression() { - RRTest("testDefaultCompression ", L, "62f8cdb85a424e42e9c56f36302d1dba", false); + RRTest("testDefaultCompression ", L, "fa1cffc4539e0c20b818a11da5dba5b9", false); } @Test(enabled = true) public void testDefaultCompressionWithKnowns() { - RRTest("testDefaultCompressionWithKnowns ", L, "874c0e0a54c3db67f5e9d7c0d45b7844", true); + RRTest("testDefaultCompressionWithKnowns ", L, "d1b5fbc402810d9cdc020bb3503f1325", true); } private final String intervals = "-L 20:10,100,000-10,100,500 -L 20:10,200,000-10,200,500 -L 20:10,300,000-10,300,500 -L 20:10,400,000-10,500,000 -L 20:10,500,050-10,500,060 -L 20:10,600,000-10,600,015 -L 20:10,700,000-10,700,110"; @Test(enabled = true) public void testMultipleIntervals() { - RRTest("testMultipleIntervals ", intervals, "2e849f8324b27af36bae8cb9b01722e6", false); + RRTest("testMultipleIntervals ", intervals, "7e9dcd157ad742d4ebae7e56bc4af663", false); } @Test(enabled = true) public void testMultipleIntervalsWithKnowns() { - RRTest("testMultipleIntervalsWithKnowns ", intervals, "71bc2167cc6916288bd34dcf099feebc", true); + RRTest("testMultipleIntervalsWithKnowns ", intervals, "dbb1e95e1bcad956701142afac763717", true); } - final String highCompressionMD5 = "c83256fa2d6785d5188f50dd45c77e0f"; - @Test(enabled = true) public void testHighCompression() { - RRTest("testHighCompression ", " -cs 10 -min_pvalue 0.3 -mindel 0.3 " + L, highCompressionMD5, false); + RRTest("testHighCompression ", " -cs 10 -min_pvalue 0.3 -minvar 0.3 -mindel 0.3 " + L, "8f8fd1a53fa0789116f45e4cf2625906", false); } @Test(enabled = true) public void testHighCompressionWithKnowns() { - RRTest("testHighCompressionWithKnowns ", " -cs 10 -min_pvalue 0.3 -mindel 0.3 " + L, highCompressionMD5, true); + RRTest("testHighCompressionWithKnowns ", " -cs 10 -min_pvalue 0.3 -minvar 0.3 -mindel 0.3 " + L, "52fd2a77802a4677b604abb18e15d96a", true); } @Test(enabled = true) public void testLowCompression() { - RRTest("testLowCompression ", " -cs 30 -min_pvalue 0.001 -mindel 0.01 -minmap 5 -minqual 5 " + L, "a903558ef284381d74b0ad837deb19f6", false); + RRTest("testLowCompression ", " -cs 30 -min_pvalue 0.001 -minvar 0.01 -mindel 0.01 -minmap 5 -minqual 5 " + L, "79c6543d5ce84ebc2ca74404498edbd1", false); } @Test(enabled = true) public void testLowCompressionWithKnowns() { - RRTest("testLowCompressionWithKnowns ", " -cs 30 -min_pvalue 0.001 -mindel 0.01 -minmap 5 -minqual 5 " + L, "a4c5aa158c6ebbc703134cbe2d48619c", true); + RRTest("testLowCompressionWithKnowns ", " -cs 30 -min_pvalue 0.001 -minvar 0.01 -mindel 0.01 -minmap 5 -minqual 5 " + L, "271aec358b309603291a974b5ba3bd60", true); } @Test(enabled = true) @@ -208,7 +206,7 @@ public class ReduceReadsIntegrationTest extends WalkerTest { @Test(enabled = true) public void testIndelCompression() { - final String md5 = "56154baed62be07008d3684a0a4c0996"; + final String md5 = "d20e6012300898a0315c795cab7583d8"; RRTest("testIndelCompression ", " -cs 50 -L 20:10,100,500-10,100,600 ", md5, false); RRTest("testIndelCompressionWithKnowns ", " -cs 50 -L 20:10,100,500-10,100,600 ", md5, true); } @@ -216,25 +214,27 @@ public class ReduceReadsIntegrationTest extends WalkerTest { @Test(enabled = true) public void testFilteredDeletionCompression() { String base = String.format("-T ReduceReads -npt -R %s -I %s ", REF, DELETION_BAM) + " -o %s "; - executeTest("testFilteredDeletionCompression", new WalkerTestSpec(base, Arrays.asList("bam"), Arrays.asList("d7655de41d90aecb716f79e32d53b2d1"))); + executeTest("testFilteredDeletionCompression", new WalkerTestSpec(base, Arrays.asList("bam"), Arrays.asList("e5da09662708f562c0c617ba73cf4763")), "4f916da29d91852077f0a2fdbdd2c7f6"); } + private static final String COREDUCTION_QUALS_TEST_MD5 = "26d84a2bd549a01a63fcebf8847a1b7d"; + @Test(enabled = true) public void testCoReduction() { String base = String.format("-T ReduceReads %s -npt -R %s -I %s -I %s", COREDUCTION_L, REF, COREDUCTION_BAM_A, COREDUCTION_BAM_B) + " -o %s "; - executeTest("testCoReduction", new WalkerTestSpec(base, Arrays.asList("bam"), Arrays.asList("fa549ba96ca0ce5fbf3553ba173167e8"))); + executeTest("testCoReduction", new WalkerTestSpec(base, Arrays.asList("bam"), Arrays.asList("5f4d2c1d9c010dfd6865aeba7d0336fe")), COREDUCTION_QUALS_TEST_MD5); } @Test(enabled = true) public void testCoReductionWithKnowns() { String base = String.format("-T ReduceReads %s -npt -R %s -I %s -I %s -known %s", COREDUCTION_L, REF, COREDUCTION_BAM_A, COREDUCTION_BAM_B, DBSNP) + " -o %s "; - executeTest("testCoReductionWithKnowns", new WalkerTestSpec(base, Arrays.asList("bam"), Arrays.asList("9edcf09b21a4ae8d9fc25222bcb0486b"))); + executeTest("testCoReductionWithKnowns", new WalkerTestSpec(base, Arrays.asList("bam"), Arrays.asList("ca48dd972bf57595c691972c0f887cb4")), COREDUCTION_QUALS_TEST_MD5); } @Test(enabled = true) public void testInsertionsAtEdgeOfConsensus() { String base = String.format("-T ReduceReads -npt -R %s -I %s ", REF, INSERTIONS_AT_EDGE_OF_CONSENSUS_BAM) + " -o %s "; - executeTest("testInsertionsAtEdgeOfConsensus", new WalkerTestSpec(base, Arrays.asList("bam"), Arrays.asList("27cc8f1a336b2d0a29855ceb8fc988b0"))); + executeTest("testInsertionsAtEdgeOfConsensus", new WalkerTestSpec(base, Arrays.asList("bam"), Arrays.asList("760500a5b036b987f84099f45f26a804"))); } /** @@ -248,7 +248,7 @@ public class ReduceReadsIntegrationTest extends WalkerTest { @Test(enabled = true) public void testAddingReadAfterTailingTheStash() { String base = String.format("-T ReduceReads %s -npt -R %s -I %s", STASH_L, REF, STASH_BAM) + " -o %s "; - executeTest("testAddingReadAfterTailingTheStash", new WalkerTestSpec(base, Arrays.asList("bam"), Arrays.asList("34baf99904b676d5f132d3791030ed0a")), "3eab32c215ba68e75efd5ab7e9f7a2e7"); + executeTest("testAddingReadAfterTailingTheStash", new WalkerTestSpec(base, Arrays.asList("bam"), Arrays.asList("67f8a3a647f8ec5212104bdaafd8c862")), "3eab32c215ba68e75efd5ab7e9f7a2e7"); } /** @@ -259,7 +259,7 @@ public class ReduceReadsIntegrationTest extends WalkerTest { public void testDivideByZero() { String base = String.format("-T ReduceReads %s -npt -R %s -I %s", DIVIDEBYZERO_L, REF, DIVIDEBYZERO_BAM) + " -o %s "; // we expect to lose coverage due to the downsampling so don't run the systematic tests - executeTestWithoutAdditionalRRTests("testDivideByZero", new WalkerTestSpec(base, Arrays.asList("bam"), Arrays.asList("985c4f15a1d45267abb2f6790267930d"))); + executeTestWithoutAdditionalRRTests("testDivideByZero", new WalkerTestSpec(base, Arrays.asList("bam"), Arrays.asList("1663f35802f82333c5e15653e437ce2d"))); } /** @@ -269,7 +269,7 @@ public class ReduceReadsIntegrationTest extends WalkerTest { @Test(enabled = true) public void testReadOffContig() { String base = String.format("-T ReduceReads -npt -R %s -I %s ", REF, OFFCONTIG_BAM) + " -o %s "; - executeTest("testReadOffContig", new WalkerTestSpec(base, Arrays.asList("bam"), Arrays.asList("388ef48791965d637e4bdb45d5d7cf01"))); + executeTest("testReadOffContig", new WalkerTestSpec(base, Arrays.asList("bam"), Arrays.asList("0ce693b4ff925998867664e4099f3248"))); } /** @@ -279,7 +279,7 @@ public class ReduceReadsIntegrationTest extends WalkerTest { public void testPairedReadsInVariantRegion() { String base = String.format("-T ReduceReads -npt -R %s -I %s ", hg19Reference, BOTH_ENDS_OF_PAIR_IN_VARIANT_REGION_BAM) + " -o %s --downsample_coverage 250 -dcov 50 "; - executeTest("testPairedReadsInVariantRegion", new WalkerTestSpec(base, Arrays.asList("bam"), Arrays.asList("cfa2588f5edf74c5ddf3d190f5ac6f2d"))); + executeTest("testPairedReadsInVariantRegion", new WalkerTestSpec(base, Arrays.asList("bam"), Arrays.asList("7e7b358443827ca239db3b98f299aec6")), "2af063d1bd3c322b03405dbb3ecf59a9"); } } diff --git a/protected/java/test/org/broadinstitute/sting/gatk/walkers/compression/reducereads/SlidingWindowUnitTest.java b/protected/java/test/org/broadinstitute/sting/gatk/walkers/compression/reducereads/SlidingWindowUnitTest.java index 4bf67f5a2..56ad02084 100644 --- a/protected/java/test/org/broadinstitute/sting/gatk/walkers/compression/reducereads/SlidingWindowUnitTest.java +++ b/protected/java/test/org/broadinstitute/sting/gatk/walkers/compression/reducereads/SlidingWindowUnitTest.java @@ -251,14 +251,15 @@ public class SlidingWindowUnitTest extends BaseTest { } private class ConsensusCreationTest { - public final int expectedNumberOfReads, expectedNumberOfReadsWithHetCompression; + public final int expectedNumberOfReads, expectedNumberOfReadsWithHetCompression, expectedNumberOfReadsAtDeepCoverage; public final List myReads = new ArrayList(20); public final String description; - private ConsensusCreationTest(final List locs, final boolean readsShouldBeLowQuality, final boolean variantBaseShouldBeLowQuality, final int expectedNumberOfReads, final int expectedNumberOfReadsWithHetCompression) { + private ConsensusCreationTest(final List locs, final boolean readsShouldBeLowQuality, final boolean variantBaseShouldBeLowQuality, final int expectedNumberOfReads, final int expectedNumberOfReadsWithHetCompression, final int expectedNumberOfReadsAtDeepCoverage) { this.expectedNumberOfReads = expectedNumberOfReads; this.expectedNumberOfReadsWithHetCompression = expectedNumberOfReadsWithHetCompression; - this.description = String.format("%d %d", expectedNumberOfReads, expectedNumberOfReadsWithHetCompression); + this.expectedNumberOfReadsAtDeepCoverage = expectedNumberOfReadsAtDeepCoverage; + this.description = String.format("%d %d %d", expectedNumberOfReads, expectedNumberOfReadsWithHetCompression, expectedNumberOfReadsAtDeepCoverage); // first, add the basic reads to the collection myReads.addAll(basicReads); @@ -268,10 +269,11 @@ public class SlidingWindowUnitTest extends BaseTest { myReads.add(createVariantRead(loc, readsShouldBeLowQuality, variantBaseShouldBeLowQuality, CigarOperator.M)); } - private ConsensusCreationTest(final List locs, final CigarOperator operator, final int expectedNumberOfReads, final int expectedNumberOfReadsWithHetCompression) { + private ConsensusCreationTest(final List locs, final CigarOperator operator, final int expectedNumberOfReads, final int expectedNumberOfReadsWithHetCompression, final int expectedNumberOfReadsAtDeepCoverage) { this.expectedNumberOfReads = expectedNumberOfReads; this.expectedNumberOfReadsWithHetCompression = expectedNumberOfReadsWithHetCompression; - this.description = String.format("%s %d %d", operator.toString(), expectedNumberOfReads, expectedNumberOfReadsWithHetCompression); + this.expectedNumberOfReadsAtDeepCoverage = expectedNumberOfReadsAtDeepCoverage; + this.description = String.format("%s %d %d %d", operator.toString(), expectedNumberOfReads, expectedNumberOfReadsWithHetCompression, expectedNumberOfReadsAtDeepCoverage); // first, add the basic reads to the collection myReads.addAll(basicReads); @@ -319,46 +321,50 @@ public class SlidingWindowUnitTest extends BaseTest { private static final GenomeLoc loc295 = new UnvalidatingGenomeLoc("1", 0, 1000295, 1000295); private static final GenomeLoc loc309 = new UnvalidatingGenomeLoc("1", 0, 1000309, 1000309); private static final GenomeLoc loc310 = new UnvalidatingGenomeLoc("1", 0, 1000310, 1000310); + private static final GenomeLoc loc320 = new UnvalidatingGenomeLoc("1", 0, 1000320, 1000320); private static final GenomeLoc loc1100 = new UnvalidatingGenomeLoc("1", 0, 1001100, 1001100); + private static final int DEEP_COVERAGE_ITERATIONS = 100; + @DataProvider(name = "ConsensusCreation") public Object[][] createConsensusCreationTestData() { List tests = new ArrayList(); // test high quality reads and bases - tests.add(new Object[]{new ConsensusCreationTest(Arrays.asList(), false, false, 1, 1)}); - tests.add(new Object[]{new ConsensusCreationTest(Arrays.asList(loc290), false, false, 9, 6)}); - tests.add(new Object[]{new ConsensusCreationTest(Arrays.asList(loc290, loc295), false, false, 10, 10)}); - tests.add(new Object[]{new ConsensusCreationTest(Arrays.asList(loc290, loc309), false, false, 10, 10)}); - tests.add(new Object[]{new ConsensusCreationTest(Arrays.asList(loc290, loc310), false, false, 11, 11)}); + tests.add(new Object[]{new ConsensusCreationTest(Arrays.asList(), false, false, 1, 1, 1)}); + tests.add(new Object[]{new ConsensusCreationTest(Arrays.asList(loc290), false, false, 9, 6, 5 + DEEP_COVERAGE_ITERATIONS)}); + tests.add(new Object[]{new ConsensusCreationTest(Arrays.asList(loc290, loc295), false, false, 10, 10, 2 + (8 * DEEP_COVERAGE_ITERATIONS))}); + tests.add(new Object[]{new ConsensusCreationTest(Arrays.asList(loc290, loc309), false, false, 10, 10, 2 + (8 * DEEP_COVERAGE_ITERATIONS))}); + tests.add(new Object[]{new ConsensusCreationTest(Arrays.asList(loc290, loc310), false, false, 11, 11, 2 + (9 * DEEP_COVERAGE_ITERATIONS))}); + tests.add(new Object[]{new ConsensusCreationTest(Arrays.asList(loc290, loc320), false, false, 11, 10, 4 + (6 * DEEP_COVERAGE_ITERATIONS))}); // test low quality reads - tests.add(new Object[]{new ConsensusCreationTest(Arrays.asList(), true, false, 1, 1)}); - tests.add(new Object[]{new ConsensusCreationTest(Arrays.asList(loc290), true, false, 1, 1)}); - tests.add(new Object[]{new ConsensusCreationTest(Arrays.asList(loc290, loc295), true, false, 1, 1)}); - tests.add(new Object[]{new ConsensusCreationTest(Arrays.asList(loc290, loc309), true, false, 1, 1)}); - tests.add(new Object[]{new ConsensusCreationTest(Arrays.asList(loc290, loc310), true, false, 1, 1)}); + tests.add(new Object[]{new ConsensusCreationTest(Arrays.asList(), true, false, 1, 1, 1)}); + tests.add(new Object[]{new ConsensusCreationTest(Arrays.asList(loc290), true, false, 2, 2, 2)}); + tests.add(new Object[]{new ConsensusCreationTest(Arrays.asList(loc290, loc295), true, false, 2, 2, 2)}); + tests.add(new Object[]{new ConsensusCreationTest(Arrays.asList(loc290, loc309), true, false, 2, 2, 2)}); + tests.add(new Object[]{new ConsensusCreationTest(Arrays.asList(loc290, loc310), true, false, 2, 2, 2)}); // test low quality bases - tests.add(new Object[]{new ConsensusCreationTest(Arrays.asList(), false, true, 1, 1)}); - tests.add(new Object[]{new ConsensusCreationTest(Arrays.asList(loc290), false, true, 1, 1)}); - tests.add(new Object[]{new ConsensusCreationTest(Arrays.asList(loc290, loc295), false, true, 1, 1)}); - tests.add(new Object[]{new ConsensusCreationTest(Arrays.asList(loc290, loc309), false, true, 1, 1)}); - tests.add(new Object[]{new ConsensusCreationTest(Arrays.asList(loc290, loc310), false, true, 1, 1)}); + tests.add(new Object[]{new ConsensusCreationTest(Arrays.asList(), false, true, 1, 1, 1)}); + tests.add(new Object[]{new ConsensusCreationTest(Arrays.asList(loc290), false, true, 1, 1, 1)}); + tests.add(new Object[]{new ConsensusCreationTest(Arrays.asList(loc290, loc295), false, true, 1, 1, 1)}); + tests.add(new Object[]{new ConsensusCreationTest(Arrays.asList(loc290, loc309), false, true, 1, 1, 1)}); + tests.add(new Object[]{new ConsensusCreationTest(Arrays.asList(loc290, loc310), false, true, 1, 1, 1)}); // test mixture - tests.add(new Object[]{new ConsensusCreationTest(Arrays.asList(loc1100), true, false, 2, 2)}); - tests.add(new Object[]{new ConsensusCreationTest(Arrays.asList(loc1100), false, true, 1, 1)}); + tests.add(new Object[]{new ConsensusCreationTest(Arrays.asList(loc1100), true, false, 2, 2, 2)}); + tests.add(new Object[]{new ConsensusCreationTest(Arrays.asList(loc1100), false, true, 1, 1, 1)}); // test I/D operators - tests.add(new Object[]{new ConsensusCreationTest(Arrays.asList(loc290), CigarOperator.D, 9, 9)}); - tests.add(new Object[]{new ConsensusCreationTest(Arrays.asList(loc290, loc295), CigarOperator.D, 10, 10)}); - tests.add(new Object[]{new ConsensusCreationTest(Arrays.asList(loc290, loc309), CigarOperator.D, 10, 10)}); - tests.add(new Object[]{new ConsensusCreationTest(Arrays.asList(loc290, loc310), CigarOperator.D, 11, 11)}); - tests.add(new Object[]{new ConsensusCreationTest(Arrays.asList(loc290), CigarOperator.I, 9, 9)}); - tests.add(new Object[]{new ConsensusCreationTest(Arrays.asList(loc290, loc295), CigarOperator.I, 10, 10)}); - tests.add(new Object[]{new ConsensusCreationTest(Arrays.asList(loc290, loc309), CigarOperator.I, 10, 10)}); - tests.add(new Object[]{new ConsensusCreationTest(Arrays.asList(loc290, loc310), CigarOperator.I, 11, 11)}); + tests.add(new Object[]{new ConsensusCreationTest(Arrays.asList(loc290), CigarOperator.D, 9, 9, 2 + (7 * DEEP_COVERAGE_ITERATIONS))}); + tests.add(new Object[]{new ConsensusCreationTest(Arrays.asList(loc290, loc295), CigarOperator.D, 10, 10, 2 + (8 * DEEP_COVERAGE_ITERATIONS))}); + tests.add(new Object[]{new ConsensusCreationTest(Arrays.asList(loc290, loc309), CigarOperator.D, 10, 10, 2 + (8 * DEEP_COVERAGE_ITERATIONS))}); + tests.add(new Object[]{new ConsensusCreationTest(Arrays.asList(loc290, loc310), CigarOperator.D, 11, 11, 2 + (9 * DEEP_COVERAGE_ITERATIONS))}); + tests.add(new Object[]{new ConsensusCreationTest(Arrays.asList(loc290), CigarOperator.I, 9, 9, 2 + (7 * DEEP_COVERAGE_ITERATIONS))}); + tests.add(new Object[]{new ConsensusCreationTest(Arrays.asList(loc290, loc295), CigarOperator.I, 10, 10, 2 + (8 * DEEP_COVERAGE_ITERATIONS))}); + tests.add(new Object[]{new ConsensusCreationTest(Arrays.asList(loc290, loc309), CigarOperator.I, 10, 10, 2 + (8 * DEEP_COVERAGE_ITERATIONS))}); + tests.add(new Object[]{new ConsensusCreationTest(Arrays.asList(loc290, loc310), CigarOperator.I, 11, 11, 2 + (9 * DEEP_COVERAGE_ITERATIONS))}); return tests.toArray(new Object[][]{}); } @@ -368,14 +374,14 @@ public class SlidingWindowUnitTest extends BaseTest { final ObjectAVLTreeSet knownSNPs = new ObjectAVLTreeSet(); // test WITHOUT het compression - SlidingWindow slidingWindow = new SlidingWindow("1", 0, 10, header, new GATKSAMReadGroupRecord("test"), 0, 0.05, 0.05, 20, 20, 100, ReduceReads.DownsampleStrategy.Normal, false); + SlidingWindow slidingWindow = new SlidingWindow("1", 0, 10, header, new GATKSAMReadGroupRecord("test"), 0, 0.05, 0.05, 0.05, 20, 20, 100, ReduceReads.DownsampleStrategy.Normal, false); for ( final GATKSAMRecord read : test.myReads ) slidingWindow.addRead(read); Pair, CompressionStash> result = slidingWindow.close(knownSNPs); // currently empty Assert.assertEquals(result.getFirst().size(), test.expectedNumberOfReads); // test WITH het compression at KNOWN sites - slidingWindow = new SlidingWindow("1", 0, 10, header, new GATKSAMReadGroupRecord("test"), 0, 0.05, 0.05, 20, 20, 100, ReduceReads.DownsampleStrategy.Normal, false); + slidingWindow = new SlidingWindow("1", 0, 10, header, new GATKSAMReadGroupRecord("test"), 0, 0.05, 0.05, 0.05, 20, 20, 100, ReduceReads.DownsampleStrategy.Normal, false); for ( final GATKSAMRecord read : test.myReads ) slidingWindow.addRead(read); for ( int i = 0; i < 1200; i++ ) @@ -384,11 +390,28 @@ public class SlidingWindowUnitTest extends BaseTest { Assert.assertEquals(result.getFirst().size(), test.expectedNumberOfReadsWithHetCompression); // test WITH het compression at ALL sites - slidingWindow = new SlidingWindow("1", 0, 10, header, new GATKSAMReadGroupRecord("test"), 0, 0.05, 0.05, 20, 20, 100, ReduceReads.DownsampleStrategy.Normal, false); + slidingWindow = new SlidingWindow("1", 0, 10, header, new GATKSAMReadGroupRecord("test"), 0, 0.05, 0.05, 0.05, 20, 20, 100, ReduceReads.DownsampleStrategy.Normal, false); for ( final GATKSAMRecord read : test.myReads ) slidingWindow.addRead(read); result = slidingWindow.close(null); Assert.assertEquals(result.getFirst().size(), test.expectedNumberOfReadsWithHetCompression); + + // test with deep coverage + slidingWindow = new SlidingWindow("1", 0, 10, header, new GATKSAMReadGroupRecord("test"), 0, 0.05, 0.05, 0.05, 20, 20, 0, ReduceReads.DownsampleStrategy.Normal, false); + for ( int i = 0; i < DEEP_COVERAGE_ITERATIONS; i++ ) { + for ( final GATKSAMRecord read : test.myReads ) { + final GATKSAMRecord copy = ArtificialSAMUtils.createArtificialRead(header, read.getReadName() + "_" + (i+1), 0, read.getAlignmentStart(), readLength); + copy.setReadBases(read.getReadBases()); + copy.setBaseQualities(read.getBaseQualities()); + copy.setMappingQuality(read.getMappingQuality()); + copy.setReadNegativeStrandFlag(read.getReadNegativeStrandFlag()); + if ( read.getCigar() != null ) + copy.setCigar(read.getCigar()); + slidingWindow.addRead(copy); + } + } + result = slidingWindow.close(null); + Assert.assertEquals(result.getFirst().size(), test.expectedNumberOfReadsAtDeepCoverage); } @Test @@ -412,14 +435,14 @@ public class SlidingWindowUnitTest extends BaseTest { final ObjectAVLTreeSet knownSNPs = new ObjectAVLTreeSet(); // test WITHOUT het compression - SlidingWindow slidingWindow = new SlidingWindow("1", 0, 10, header, new GATKSAMReadGroupRecord("test"), 0, 0.1, 0.05, 20, 20, 100, ReduceReads.DownsampleStrategy.Normal, false); + SlidingWindow slidingWindow = new SlidingWindow("1", 0, 10, header, new GATKSAMReadGroupRecord("test"), 0, 0.1, 0.05, 0.05, 20, 20, 100, ReduceReads.DownsampleStrategy.Normal, false); for ( final GATKSAMRecord read : myReads ) slidingWindow.addRead(read); Pair, CompressionStash> result = slidingWindow.close(knownSNPs); // currently empty Assert.assertEquals(result.getFirst().size(), totalNumReads); // no compression at all // test WITH het compression at KNOWN sites - slidingWindow = new SlidingWindow("1", 0, 10, header, new GATKSAMReadGroupRecord("test"), 0, 0.1, 0.05, 20, 20, 100, ReduceReads.DownsampleStrategy.Normal, false); + slidingWindow = new SlidingWindow("1", 0, 10, header, new GATKSAMReadGroupRecord("test"), 0, 0.1, 0.05, 0.05, 20, 20, 100, ReduceReads.DownsampleStrategy.Normal, false); for ( final GATKSAMRecord read : myReads ) slidingWindow.addRead(read); for ( int i = 0; i < readLength; i++ ) @@ -428,13 +451,59 @@ public class SlidingWindowUnitTest extends BaseTest { Assert.assertEquals(result.getFirst().size(), totalNumReads); // no compression at all // test WITH het compression at ALL sites - slidingWindow = new SlidingWindow("1", 0, 10, header, new GATKSAMReadGroupRecord("test"), 0, 0.1, 0.05, 20, 20, 100, ReduceReads.DownsampleStrategy.Normal, false); + slidingWindow = new SlidingWindow("1", 0, 10, header, new GATKSAMReadGroupRecord("test"), 0, 0.1, 0.05, 0.05, 20, 20, 100, ReduceReads.DownsampleStrategy.Normal, false); for ( final GATKSAMRecord read : myReads ) slidingWindow.addRead(read); result = slidingWindow.close(knownSNPs); Assert.assertEquals(result.getFirst().size(), totalNumReads); // no compression at all } + @Test + public void testAddingReadPairWithSameCoordinates() { + final SlidingWindow slidingWindow = new SlidingWindow("1", 0, 10); + + final GATKSAMRecord read1 = ArtificialSAMUtils.createArtificialRead(header, "basicRead", 0, globalStartPosition, 1); + read1.setReadBases(new byte[]{(byte)'A'}); + read1.setBaseQualities(new byte[]{(byte)'A'}); + read1.setMappingQuality(30); + read1.setReadNegativeStrandFlag(false); + slidingWindow.addRead(read1); + + final GATKSAMRecord read2 = ArtificialSAMUtils.createArtificialRead(header, "basicRead", 0, globalStartPosition, 1); + read2.setReadBases(new byte[]{(byte)'A'}); + read2.setBaseQualities(new byte[]{(byte)'A'}); + read2.setMappingQuality(30); + read2.setReadNegativeStrandFlag(true); + slidingWindow.addRead(read2); + + Assert.assertEquals(slidingWindow.readsInWindow.size(), 2); + } + + @Test + public void testOnlySpanningReadHasLowQual() { + final SlidingWindow slidingWindow = new SlidingWindow("1", 0, 10, header, new GATKSAMReadGroupRecord("test"), 0, 0.1, 0.05, 0.05, 20, 20, 100, ReduceReads.DownsampleStrategy.Normal, false); + + final GATKSAMRecord read1 = ArtificialSAMUtils.createArtificialRead(header, "basicRead1", 0, globalStartPosition, 100); + final GATKSAMRecord read2 = ArtificialSAMUtils.createArtificialRead(header, "basicRead2", 0, globalStartPosition + 50, 100); + + final byte[] bases = Utils.dupBytes((byte) 'A', readLength); + read1.setReadBases(bases); + read2.setReadBases(bases); + + final byte[] baseQuals = Utils.dupBytes((byte) 30, readLength); + baseQuals[80] = (byte)10; + read1.setBaseQualities(baseQuals); + read2.setBaseQualities(baseQuals); + + read1.setMappingQuality(30); + read2.setMappingQuality(30); + + slidingWindow.addRead(read1); + slidingWindow.addRead(read2); + + Assert.assertEquals(slidingWindow.close(null).getFirst().size(), 1); + } + /////////////////////////////////////////////////////////// //// This section tests the downsampling functionality //// @@ -452,7 +521,7 @@ public class SlidingWindowUnitTest extends BaseTest { @Test(dataProvider = "Downsampling", enabled = true) public void testDownsamplingTest(final int dcov) { - final SlidingWindow slidingWindow = new SlidingWindow("1", 0, 10, header, new GATKSAMReadGroupRecord("test"), 0, 0.05, 0.05, 20, 20, dcov, ReduceReads.DownsampleStrategy.Normal, false); + final SlidingWindow slidingWindow = new SlidingWindow("1", 0, 10, header, new GATKSAMReadGroupRecord("test"), 0, 0.05, 0.05, 0.05, 20, 20, dcov, ReduceReads.DownsampleStrategy.Normal, false); final ObjectList result = slidingWindow.downsampleVariantRegion(basicReads); Assert.assertEquals(result.size(), Math.min(dcov, basicReads.size())); @@ -500,7 +569,7 @@ public class SlidingWindowUnitTest extends BaseTest { @Test(dataProvider = "ConsensusQuals", enabled = true) public void testConsensusQualsTest(QualsTest test) { - final SlidingWindow slidingWindow = new SlidingWindow("1", 0, 10, header, new GATKSAMReadGroupRecord("test"), 0, 0.05, 0.05, minUsableConsensusQual, 20, 100, ReduceReads.DownsampleStrategy.Normal, false); + final SlidingWindow slidingWindow = new SlidingWindow("1", 0, 10, header, new GATKSAMReadGroupRecord("test"), 0, 0.05, 0.05, 0.05, minUsableConsensusQual, 20, 100, ReduceReads.DownsampleStrategy.Normal, false); for ( final GATKSAMRecord read : test.myReads ) slidingWindow.addRead(read); final Pair, CompressionStash> result = slidingWindow.close(new ObjectAVLTreeSet()); @@ -569,7 +638,7 @@ public class SlidingWindowUnitTest extends BaseTest { read.setBaseQualities(Utils.dupBytes((byte) 30, readLength)); read.setMappingQuality(30); - final SlidingWindow slidingWindow = new SlidingWindow("1", 0, 10, header, new GATKSAMReadGroupRecord("test"), 0, 0.05, 0.05, 20, 20, 10, ReduceReads.DownsampleStrategy.Normal, false); + final SlidingWindow slidingWindow = new SlidingWindow("1", 0, 10, header, new GATKSAMReadGroupRecord("test"), 0, 0.05, 0.05, 0.05, 20, 20, 10, ReduceReads.DownsampleStrategy.Normal, false); int newIndex = slidingWindow.createNewHeaderElements(windowHeader, read, start); Assert.assertEquals(newIndex, start > 0 ? start : 0); @@ -613,7 +682,7 @@ public class SlidingWindowUnitTest extends BaseTest { read.setMappingQuality(30); // add the read - final SlidingWindow slidingWindow = new SlidingWindow("1", 0, 10, header, new GATKSAMReadGroupRecord("test"), 0, 0.05, 0.05, 20, 20, 10, ReduceReads.DownsampleStrategy.Normal, false); + final SlidingWindow slidingWindow = new SlidingWindow("1", 0, 10, header, new GATKSAMReadGroupRecord("test"), 0, 0.05, 0.05, 0.05, 20, 20, 10, ReduceReads.DownsampleStrategy.Normal, false); slidingWindow.actuallyUpdateHeaderForRead(windowHeader, read, false, start); for ( int i = 0; i < start; i++ ) Assert.assertEquals(windowHeader.get(i).getConsensusBaseCounts().countOfBase(BaseUtils.Base.A.base), 0); @@ -628,7 +697,6 @@ public class SlidingWindowUnitTest extends BaseTest { Assert.assertEquals(windowHeader.get(i).getConsensusBaseCounts().countOfBase(BaseUtils.Base.A.base), 0); } - ////////////////////////////////////////////////////////////////////////////////// //// This section tests functionality related to polyploid consensus creation //// ////////////////////////////////////////////////////////////////////////////////// @@ -691,7 +759,7 @@ public class SlidingWindowUnitTest extends BaseTest { read.setMappingQuality(30); // add the read - final SlidingWindow slidingWindow = new SlidingWindow("1", 0, 10, header, new GATKSAMReadGroupRecord("test"), 0, 0.05, 0.05, 20, 20, 10, ReduceReads.DownsampleStrategy.Normal, false); + final SlidingWindow slidingWindow = new SlidingWindow("1", 0, 10, header, new GATKSAMReadGroupRecord("test"), 0, 0.05, 0.05, 0.05, 20, 20, 10, ReduceReads.DownsampleStrategy.Normal, false); slidingWindow.actuallyUpdateHeaderForRead(windowHeader, read, false, 0); // set up and add a soft-clipped read if requested diff --git a/public/java/src/org/broadinstitute/sting/utils/MathUtils.java b/public/java/src/org/broadinstitute/sting/utils/MathUtils.java index f4644036f..1cc798e36 100644 --- a/public/java/src/org/broadinstitute/sting/utils/MathUtils.java +++ b/public/java/src/org/broadinstitute/sting/utils/MathUtils.java @@ -383,7 +383,7 @@ public class MathUtils { for (int hits = k_start; hits <= k_end; hits++) { prevProb = cumProb; - double probability = binomialProbability(n, hits); + final double probability = binomialProbability(n, hits); cumProb += probability; if (probability > 0 && cumProb - prevProb < probability / 2) { // loss of precision probCache = probCache.add(new BigDecimal(prevProb));