Merge pull request #181 from broadinstitute/eb_yet_more_rr_improvements_GSA-930

Various bug fixes for recent Reduce Reads additions plus solution implemented for low MQ reads.
This commit is contained in:
MauricioCarneiro 2013-04-24 15:40:03 -07:00
commit 27bb699e8b
14 changed files with 527 additions and 303 deletions

View File

@ -80,6 +80,21 @@ public class BaseAndQualsCounts extends BaseCounts {
* @param isLowQualBase true if the base is low quality * @param isLowQualBase true if the base is low quality
*/ */
public void incr(final byte base, final byte baseQual, final byte insQual, final byte delQual, final int baseMappingQual, final boolean isLowQualBase) { public void incr(final byte base, final byte baseQual, final byte insQual, final byte delQual, final int baseMappingQual, final boolean isLowQualBase) {
incr(base, baseQual, insQual, delQual, baseMappingQual, isLowQualBase, false);
}
/*
* Increments the count
*
* @param base the base
* @param baseQual the base quality
* @param insQual the insertion quality
* @param delQual the deletion quality
* @param baseMappingQual the mapping quality
* @param isLowQualBase true if the base is low quality
* @param isSoftClip true if is soft-clipped
*/
public void incr(final byte base, final byte baseQual, final byte insQual, final byte delQual, final int baseMappingQual, final boolean isLowQualBase, final boolean isSoftClip) {
// if we already have high quality bases, ignore low quality ones // if we already have high quality bases, ignore low quality ones
if ( isLowQualBase && !isLowQuality() ) if ( isLowQualBase && !isLowQuality() )
return; return;
@ -92,7 +107,7 @@ public class BaseAndQualsCounts extends BaseCounts {
} }
final BaseIndex i = BaseIndex.byteToBase(base); final BaseIndex i = BaseIndex.byteToBase(base);
super.incr(i, baseQual, baseMappingQual); super.incr(i, baseQual, baseMappingQual, isSoftClip);
switch (i) { switch (i) {
case A: sumInsertionQual_A += insQual; sumDeletionQual_A += delQual; break; case A: sumInsertionQual_A += insQual; sumDeletionQual_A += delQual; break;
case C: sumInsertionQual_C += insQual; sumDeletionQual_C += delQual; break; case C: sumInsertionQual_C += insQual; sumDeletionQual_C += delQual; break;
@ -115,12 +130,27 @@ public class BaseAndQualsCounts extends BaseCounts {
* @param isLowQualBase true if the base is low quality * @param isLowQualBase true if the base is low quality
*/ */
public void decr(final byte base, final byte baseQual, final byte insQual, final byte delQual, final int baseMappingQual, final boolean isLowQualBase) { public void decr(final byte base, final byte baseQual, final byte insQual, final byte delQual, final int baseMappingQual, final boolean isLowQualBase) {
decr(base, baseQual, insQual, delQual, baseMappingQual, isLowQualBase, false);
}
/*
* Decrements the count
*
* @param base the base
* @param baseQual the base quality
* @param insQual the insertion quality
* @param delQual the deletion quality
* @param baseMappingQual the mapping quality
* @param isLowQualBase true if the base is low quality
* @param isSoftClip true if is soft-clipped
*/
public void decr(final byte base, final byte baseQual, final byte insQual, final byte delQual, final int baseMappingQual, final boolean isLowQualBase, final boolean isSoftClip) {
// if this is not the right type of base, ignore it // if this is not the right type of base, ignore it
if ( isLowQualBase != isLowQuality() ) if ( isLowQualBase != isLowQuality() )
return; return;
final BaseIndex i = BaseIndex.byteToBase(base); final BaseIndex i = BaseIndex.byteToBase(base);
super.decr(i, baseQual, baseMappingQual); super.decr(i, baseQual, baseMappingQual, isSoftClip);
switch (i) { switch (i) {
case A: sumInsertionQual_A -= insQual; sumDeletionQual_A -= delQual; break; case A: sumInsertionQual_A -= insQual; sumDeletionQual_A -= delQual; break;
case C: sumInsertionQual_C -= insQual; sumDeletionQual_C -= delQual; break; case C: sumInsertionQual_C -= insQual; sumDeletionQual_C -= delQual; break;

View File

@ -80,6 +80,7 @@ import org.broadinstitute.sting.utils.MathUtils;
private int count_N = 0; private int count_N = 0;
private int sumQual_N = 0; private int sumQual_N = 0;
private int totalCount = 0; // keeps track of total count since this is requested so often private int totalCount = 0; // keeps track of total count since this is requested so often
private int nSoftClippedBases = 0;
private final IntArrayList mappingQualities = new IntArrayList(); // keeps the mapping quality of each read that contributed to this private final IntArrayList mappingQualities = new IntArrayList(); // keeps the mapping quality of each read that contributed to this
private boolean isLowQuality = true; // this object represents low quality bases unless we are told otherwise private boolean isLowQuality = true; // this object represents low quality bases unless we are told otherwise
@ -104,6 +105,7 @@ import org.broadinstitute.sting.utils.MathUtils;
this.count_I += other.count_I; this.count_I += other.count_I;
this.count_N += other.count_N; this.count_N += other.count_N;
this.totalCount += other.totalCount; this.totalCount += other.totalCount;
this.nSoftClippedBases = other.nSoftClippedBases;
this.mappingQualities.addAll(other.mappingQualities); this.mappingQualities.addAll(other.mappingQualities);
} }
@ -117,6 +119,7 @@ import org.broadinstitute.sting.utils.MathUtils;
this.count_I -= other.count_I; this.count_I -= other.count_I;
this.count_N -= other.count_N; this.count_N -= other.count_N;
this.totalCount -= other.totalCount; this.totalCount -= other.totalCount;
this.nSoftClippedBases -= other.nSoftClippedBases;
this.mappingQualities.removeAll(other.mappingQualities); this.mappingQualities.removeAll(other.mappingQualities);
} }
@ -126,7 +129,7 @@ import org.broadinstitute.sting.utils.MathUtils;
} }
@Ensures("totalCount() == old(totalCount()) || totalCount() == old(totalCount()) + 1") @Ensures("totalCount() == old(totalCount()) || totalCount() == old(totalCount()) + 1")
public void incr(final BaseIndex base, final byte qual, final int mappingQuality) { public void incr(final BaseIndex base, final byte qual, final int mappingQuality, final boolean isSoftclip) {
switch (base) { switch (base) {
case A: ++count_A; sumQual_A += qual; break; case A: ++count_A; sumQual_A += qual; break;
case C: ++count_C; sumQual_C += qual; break; case C: ++count_C; sumQual_C += qual; break;
@ -137,6 +140,7 @@ import org.broadinstitute.sting.utils.MathUtils;
case N: ++count_N; sumQual_N += qual; break; case N: ++count_N; sumQual_N += qual; break;
} }
++totalCount; ++totalCount;
nSoftClippedBases += isSoftclip ? 1 : 0;
mappingQualities.add(mappingQuality); mappingQualities.add(mappingQuality);
} }
@ -159,7 +163,7 @@ import org.broadinstitute.sting.utils.MathUtils;
} }
@Ensures("totalCount() == old(totalCount()) || totalCount() == old(totalCount()) - 1") @Ensures("totalCount() == old(totalCount()) || totalCount() == old(totalCount()) - 1")
public void decr(final BaseIndex base, final byte qual, final int mappingQuality) { public void decr(final BaseIndex base, final byte qual, final int mappingQuality, final boolean isSoftclip) {
switch (base) { switch (base) {
case A: --count_A; sumQual_A -= qual; break; case A: --count_A; sumQual_A -= qual; break;
case C: --count_C; sumQual_C -= qual; break; case C: --count_C; sumQual_C -= qual; break;
@ -170,6 +174,7 @@ import org.broadinstitute.sting.utils.MathUtils;
case N: --count_N; sumQual_N -= qual; break; case N: --count_N; sumQual_N -= qual; break;
} }
--totalCount; --totalCount;
nSoftClippedBases -= isSoftclip ? 1 : 0;
mappingQualities.remove((Integer) mappingQuality); mappingQualities.remove((Integer) mappingQuality);
} }
@ -231,6 +236,10 @@ import org.broadinstitute.sting.utils.MathUtils;
return (byte) (sumQualsOfBase(base) / countOfBase(base)); return (byte) (sumQualsOfBase(base) / countOfBase(base));
} }
@Ensures("result >= 0")
public int nSoftclips() {
return nSoftClippedBases;
}
@Ensures("result >= 0") @Ensures("result >= 0")
public int totalCount() { public int totalCount() {
@ -281,22 +290,42 @@ import org.broadinstitute.sting.utils.MathUtils;
return baseIndexWithMostCounts().getByte(); return baseIndexWithMostCounts().getByte();
} }
/**
* @return the base index for which the count is highest, including indel indexes
*/
@Ensures("result != null") @Ensures("result != null")
public BaseIndex baseIndexWithMostCounts() { public BaseIndex baseIndexWithMostCounts() {
BaseIndex maxI = MAX_BASE_INDEX_WITH_NO_COUNTS; return baseIndexWithMostCounts(true);
for (final BaseIndex i : BaseIndex.values()) {
if (countOfBase(i) > countOfBase(maxI))
maxI = i;
}
return maxI;
} }
/**
* @return the base index for which the count is highest, excluding indel indexes
*/
@Ensures("result != null") @Ensures("result != null")
public BaseIndex baseIndexWithMostCountsWithoutIndels() { public BaseIndex baseIndexWithMostCountsWithoutIndels() {
return baseIndexWithMostCounts(false);
}
/**
* Finds the base index with the most counts
*
* @param allowIndels should we allow base indexes representing indels?
* @return non-null base index
*/
@Ensures("result != null")
protected BaseIndex baseIndexWithMostCounts(final boolean allowIndels) {
BaseIndex maxI = MAX_BASE_INDEX_WITH_NO_COUNTS; BaseIndex maxI = MAX_BASE_INDEX_WITH_NO_COUNTS;
int maxCount = countOfBase(maxI);
for (final BaseIndex i : BaseIndex.values()) { for (final BaseIndex i : BaseIndex.values()) {
if (i.isNucleotide() && countOfBase(i) > countOfBase(maxI)) if ( !allowIndels && !i.isNucleotide() )
continue;
final int myCount = countOfBase(i);
if (myCount > maxCount) {
maxI = i; maxI = i;
maxCount = myCount;
}
} }
return maxI; return maxI;
} }
@ -307,22 +336,36 @@ import org.broadinstitute.sting.utils.MathUtils;
@Ensures("result != null") @Ensures("result != null")
public BaseIndex baseIndexWithMostProbability() { public BaseIndex baseIndexWithMostProbability() {
BaseIndex maxI = MAX_BASE_INDEX_WITH_NO_COUNTS; return baseIndexWithMostProbability(true);
for (final BaseIndex i : BaseIndex.values()) {
if (getSumQuals(i) > getSumQuals(maxI))
maxI = i;
}
return (getSumQuals(maxI) > 0L ? maxI : baseIndexWithMostCounts());
} }
@Ensures("result != null") @Ensures("result != null")
public BaseIndex baseIndexWithMostProbabilityWithoutIndels() { public BaseIndex baseIndexWithMostProbabilityWithoutIndels() {
BaseIndex maxI = MAX_BASE_INDEX_WITH_NO_COUNTS; return baseIndexWithMostProbability(false);
for (final BaseIndex i : BaseIndex.values()) {
if (i.isNucleotide() && getSumQuals(i) > getSumQuals(maxI))
maxI = i;
} }
return (getSumQuals(maxI) > 0L ? maxI : baseIndexWithMostCountsWithoutIndels());
/**
* Finds the base index with the most probability
*
* @param allowIndels should we allow base indexes representing indels?
* @return non-null base index
*/
@Ensures("result != null")
public BaseIndex baseIndexWithMostProbability(final boolean allowIndels) {
BaseIndex maxI = MAX_BASE_INDEX_WITH_NO_COUNTS;
long maxSum = getSumQuals(maxI);
for (final BaseIndex i : BaseIndex.values()) {
if ( !allowIndels && !i.isNucleotide() )
continue;
final long mySum = getSumQuals(i);
if (mySum > maxSum) {
maxI = i;
maxSum = mySum;
}
}
return (maxSum > 0L ? maxI : baseIndexWithMostCounts(allowIndels));
} }
@Ensures("result >=0") @Ensures("result >=0")
@ -362,6 +405,7 @@ import org.broadinstitute.sting.utils.MathUtils;
count_A = count_C = count_G = count_T = count_D = count_I = count_N = 0; count_A = count_C = count_G = count_T = count_D = count_I = count_N = 0;
sumQual_A = sumQual_C = sumQual_G = sumQual_T = sumQual_D = sumQual_I = sumQual_N = 0; sumQual_A = sumQual_C = sumQual_G = sumQual_T = sumQual_D = sumQual_I = sumQual_N = 0;
totalCount = 0; totalCount = 0;
nSoftClippedBases = 0;
mappingQualities.clear(); mappingQualities.clear();
} }
} }

View File

@ -121,7 +121,7 @@ public enum BaseIndex {
* *
* @return whether or not it is a nucleotide, given the definition above * @return whether or not it is a nucleotide, given the definition above
*/ */
public boolean isNucleotide() { public final boolean isNucleotide() {
return !isIndel(); return !isIndel();
} }
@ -130,7 +130,7 @@ public enum BaseIndex {
* *
* @return true for I or D, false otherwise * @return true for I or D, false otherwise
*/ */
public boolean isIndel() { public final boolean isIndel() {
return this == D || this == I; return this == D || this == I;
} }
} }

View File

@ -62,9 +62,10 @@ public class HeaderElement {
private BaseAndQualsCounts consensusBaseCounts; // How many A,C,G,T (and D's) are in this site. private BaseAndQualsCounts consensusBaseCounts; // How many A,C,G,T (and D's) are in this site.
private BaseAndQualsCounts filteredBaseCounts; // How many A,C,G,T (and D's) were filtered out in this site. private BaseAndQualsCounts filteredBaseCounts; // How many A,C,G,T (and D's) were filtered out in this site.
private int insertionsToTheRight; // How many reads in this site had insertions to the immediate right private int insertionsToTheRight; // How many reads in this site had insertions to the immediate right
private int nSoftClippedBases; // How many bases in this site came from soft clipped bases
private int location; // Genome location of this site (the sliding window knows which contig we're at private int location; // Genome location of this site (the sliding window knows which contig we're at
protected static final int MIN_COUNT_FOR_USING_PVALUE = 2;
public int getLocation() { public int getLocation() {
return location; return location;
} }
@ -84,7 +85,7 @@ public class HeaderElement {
* @param location the reference location for the new element * @param location the reference location for the new element
*/ */
public HeaderElement(final int location) { public HeaderElement(final int location) {
this(new BaseAndQualsCounts(), new BaseAndQualsCounts(), 0, 0, location); this(new BaseAndQualsCounts(), new BaseAndQualsCounts(), 0, location);
} }
/** /**
@ -94,7 +95,7 @@ public class HeaderElement {
* @param location the reference location for the new element * @param location the reference location for the new element
*/ */
public HeaderElement(final int location, final int insertionsToTheRight) { public HeaderElement(final int location, final int insertionsToTheRight) {
this(new BaseAndQualsCounts(), new BaseAndQualsCounts(), insertionsToTheRight, 0, location); this(new BaseAndQualsCounts(), new BaseAndQualsCounts(), insertionsToTheRight, location);
} }
/** /**
@ -103,15 +104,13 @@ public class HeaderElement {
* @param consensusBaseCounts the BaseCounts object for the running consensus synthetic read * @param consensusBaseCounts the BaseCounts object for the running consensus synthetic read
* @param filteredBaseCounts the BaseCounts object for the filtered data synthetic read * @param filteredBaseCounts the BaseCounts object for the filtered data synthetic read
* @param insertionsToTheRight number of insertions to the right of this HeaderElement * @param insertionsToTheRight number of insertions to the right of this HeaderElement
* @param nSoftClippedBases number of softclipped bases of this HeaderElement
* @param location the reference location of this reference element * @param location the reference location of this reference element
* HeaderElement * HeaderElement
*/ */
public HeaderElement(BaseAndQualsCounts consensusBaseCounts, BaseAndQualsCounts filteredBaseCounts, int insertionsToTheRight, int nSoftClippedBases, int location) { public HeaderElement(BaseAndQualsCounts consensusBaseCounts, BaseAndQualsCounts filteredBaseCounts, int insertionsToTheRight, int location) {
this.consensusBaseCounts = consensusBaseCounts; this.consensusBaseCounts = consensusBaseCounts;
this.filteredBaseCounts = filteredBaseCounts; this.filteredBaseCounts = filteredBaseCounts;
this.insertionsToTheRight = insertionsToTheRight; this.insertionsToTheRight = insertionsToTheRight;
this.nSoftClippedBases = nSoftClippedBases;
this.location = location; this.location = location;
} }
@ -119,10 +118,13 @@ public class HeaderElement {
* Whether or not the site represented by this HeaderElement is variant according to the definitions of variant * Whether or not the site represented by this HeaderElement is variant according to the definitions of variant
* by insertion, deletion and mismatches. * by insertion, deletion and mismatches.
* *
* @param minVariantPvalue min p-value for deciding that a position is or is not variable due to mismatches
* @param minVariantProportion min proportion for deciding that a position is or is not variable due to mismatches
* @param minIndelProportion min proportion for deciding that a position is or is not variable due to indels
* @return true if site is variant by any definition. False otherwise. * @return true if site is variant by any definition. False otherwise.
*/ */
public boolean isVariant(double minVariantPvalue, double minIndelProportion) { public boolean isVariant(final double minVariantPvalue, final double minVariantProportion, final double minIndelProportion) {
return hasConsensusData() && (isVariantFromInsertions(minIndelProportion) || isVariantFromMismatches(minVariantPvalue) || isVariantFromDeletions(minIndelProportion) || isVariantFromSoftClips()); return hasConsensusData() && (isVariantFromInsertions(minIndelProportion) || isVariantFromMismatches(minVariantPvalue, minVariantProportion) || isVariantFromDeletions(minIndelProportion) || isVariantFromSoftClips());
} }
/** /**
@ -140,11 +142,9 @@ public class HeaderElement {
public void addBase(byte base, byte baseQual, byte insQual, byte delQual, int baseMappingQuality, int minBaseQual, int minMappingQual, boolean isSoftClipped) { public void addBase(byte base, byte baseQual, byte insQual, byte delQual, int baseMappingQuality, int minBaseQual, int minMappingQual, boolean isSoftClipped) {
// If the base passes the MQ filter it is included in the consensus base counts, otherwise it's part of the filtered counts // If the base passes the MQ filter it is included in the consensus base counts, otherwise it's part of the filtered counts
if ( baseMappingQuality >= minMappingQual ) if ( baseMappingQuality >= minMappingQual )
consensusBaseCounts.incr(base, baseQual, insQual, delQual, baseMappingQuality, baseQual < minBaseQual); consensusBaseCounts.incr(base, baseQual, insQual, delQual, baseMappingQuality, baseQual < minBaseQual, isSoftClipped);
else else
filteredBaseCounts.incr(base, baseQual, insQual, delQual, baseMappingQuality, baseQual < minBaseQual); filteredBaseCounts.incr(base, baseQual, insQual, delQual, baseMappingQuality, baseQual < minBaseQual);
nSoftClippedBases += isSoftClipped ? 1 : 0;
} }
/** /**
@ -162,11 +162,9 @@ public class HeaderElement {
public void removeBase(byte base, byte baseQual, byte insQual, byte delQual, int baseMappingQuality, int minBaseQual, int minMappingQual, boolean isSoftClipped) { public void removeBase(byte base, byte baseQual, byte insQual, byte delQual, int baseMappingQuality, int minBaseQual, int minMappingQual, boolean isSoftClipped) {
// If the base passes the MQ filter it is included in the consensus base counts, otherwise it's part of the filtered counts // If the base passes the MQ filter it is included in the consensus base counts, otherwise it's part of the filtered counts
if ( baseMappingQuality >= minMappingQual ) if ( baseMappingQuality >= minMappingQual )
consensusBaseCounts.decr(base, baseQual, insQual, delQual, baseMappingQuality, baseQual < minBaseQual); consensusBaseCounts.decr(base, baseQual, insQual, delQual, baseMappingQuality, baseQual < minBaseQual, isSoftClipped);
else else
filteredBaseCounts.decr(base, baseQual, insQual, delQual, baseMappingQuality, baseQual < minBaseQual); filteredBaseCounts.decr(base, baseQual, insQual, delQual, baseMappingQuality, baseQual < minBaseQual);
nSoftClippedBases -= isSoftClipped ? 1 : 0;
} }
/** /**
* Adds an insertions to the right of the HeaderElement and updates all counts accordingly. All insertions * Adds an insertions to the right of the HeaderElement and updates all counts accordingly. All insertions
@ -246,15 +244,15 @@ public class HeaderElement {
/** /**
* Whether or not the HeaderElement is variant due to excess mismatches * Whether or not the HeaderElement is variant due to excess mismatches
* *
* @param minVariantPvalue the minimum pvalue to call a site variant. * @param minVariantPvalue the minimum pvalue to call a site variant (used with low coverage).
* @param minVariantProportion the minimum proportion to call a site variant (used with high coverage).
* @return whether or not the HeaderElement is variant due to excess mismatches * @return whether or not the HeaderElement is variant due to excess mismatches
*/ */
protected boolean isVariantFromMismatches(double minVariantPvalue) { protected boolean isVariantFromMismatches(final double minVariantPvalue, final double minVariantProportion) {
final int totalCount = consensusBaseCounts.totalCountWithoutIndels(); final int totalCount = consensusBaseCounts.totalCountWithoutIndels();
final BaseIndex mostCommon = consensusBaseCounts.baseIndexWithMostProbabilityWithoutIndels(); final BaseIndex mostCommon = consensusBaseCounts.baseIndexWithMostProbabilityWithoutIndels();
final int countOfOtherBases = totalCount - consensusBaseCounts.countOfBase(mostCommon); final int countOfOtherBases = totalCount - consensusBaseCounts.countOfBase(mostCommon);
final double pvalue = countOfOtherBases == 0 ? 0.0 : MathUtils.binomialCumulativeProbability(totalCount, 0, countOfOtherBases); return hasSignificantCount(countOfOtherBases, totalCount, minVariantPvalue, minVariantProportion);
return pvalue > minVariantPvalue;
} }
/** /**
@ -264,6 +262,7 @@ public class HeaderElement {
* @return true if we had more soft clipped bases contributing to this site than matches/mismatches. * @return true if we had more soft clipped bases contributing to this site than matches/mismatches.
*/ */
protected boolean isVariantFromSoftClips() { protected boolean isVariantFromSoftClips() {
final int nSoftClippedBases = consensusBaseCounts.nSoftclips();
return nSoftClippedBases > 0 && nSoftClippedBases >= (consensusBaseCounts.totalCount() - nSoftClippedBases); return nSoftClippedBases > 0 && nSoftClippedBases >= (consensusBaseCounts.totalCount() - nSoftClippedBases);
} }
@ -271,10 +270,11 @@ public class HeaderElement {
* Calculates the number of alleles necessary to represent this site. * Calculates the number of alleles necessary to represent this site.
* *
* @param minVariantPvalue the minimum pvalue to call a site variant. * @param minVariantPvalue the minimum pvalue to call a site variant.
* @param minVariantProportion the minimum proportion to call a site variant.
* @return the number of alleles necessary to represent this site or -1 if there are too many indels * @return the number of alleles necessary to represent this site or -1 if there are too many indels
*/ */
public int getNumberOfBaseAlleles(final double minVariantPvalue) { public int getNumberOfBaseAlleles(final double minVariantPvalue, final double minVariantProportion) {
final ObjectArrayList<BaseIndex> alleles = getAlleles(minVariantPvalue); final ObjectArrayList<BaseIndex> alleles = getAlleles(minVariantPvalue, minVariantProportion);
return alleles == null ? -1 : alleles.size(); return alleles == null ? -1 : alleles.size();
} }
@ -282,16 +282,18 @@ public class HeaderElement {
* Calculates the alleles necessary to represent this site. * Calculates the alleles necessary to represent this site.
* *
* @param minVariantPvalue the minimum pvalue to call a site variant. * @param minVariantPvalue the minimum pvalue to call a site variant.
* @param minVariantProportion the minimum proportion to call a site variant.
* @return the list of alleles necessary to represent this site or null if there are too many indels * @return the list of alleles necessary to represent this site or null if there are too many indels
*/ */
public ObjectArrayList<BaseIndex> getAlleles(final double minVariantPvalue) { public ObjectArrayList<BaseIndex> getAlleles(final double minVariantPvalue, final double minVariantProportion) {
// make sure we have bases at all // make sure we have bases at all
final int totalBaseCount = consensusBaseCounts.totalCount(); final int totalBaseCount = consensusBaseCounts.totalCount();
if ( totalBaseCount == 0 ) if ( totalBaseCount == 0 )
return new ObjectArrayList<BaseIndex>(0); return new ObjectArrayList<BaseIndex>(0);
// next, check for insertions // next, check for insertions; technically, the insertion count can be greater than totalBaseCount
if ( hasSignificantCount(insertionsToTheRight, minVariantPvalue) ) // (because of the way insertions are counted), so we need to account for that
if ( hasSignificantCount(Math.min(totalBaseCount, insertionsToTheRight), totalBaseCount, minVariantPvalue, minVariantProportion) )
return null; return null;
// finally, check for the bases themselves (including deletions) // finally, check for the bases themselves (including deletions)
@ -301,9 +303,7 @@ public class HeaderElement {
if ( baseCount == 0 ) if ( baseCount == 0 )
continue; continue;
final double pvalue = MathUtils.binomialCumulativeProbability(totalBaseCount, 0, baseCount); if ( hasSignificantCount(baseCount, totalBaseCount, minVariantPvalue, minVariantProportion) ) {
if ( pvalue > minVariantPvalue ) {
if ( base == BaseIndex.D ) if ( base == BaseIndex.D )
return null; return null;
alleles.add(base); alleles.add(base);
@ -316,26 +316,34 @@ public class HeaderElement {
* Checks whether there are a significant number of softclips. * Checks whether there are a significant number of softclips.
* *
* @param minVariantPvalue the minimum pvalue to call a site variant. * @param minVariantPvalue the minimum pvalue to call a site variant.
* @param minVariantProportion the minimum proportion to call a site variant.
* @return true if there are significant softclips, false otherwise * @return true if there are significant softclips, false otherwise
*/ */
public boolean hasSignificantSoftclips(final double minVariantPvalue) { public boolean hasSignificantSoftclips(final double minVariantPvalue, final double minVariantProportion) {
return hasSignificantCount(nSoftClippedBases, minVariantPvalue); return hasSignificantCount(consensusBaseCounts.nSoftclips(), consensusBaseCounts.totalCount(), minVariantPvalue, minVariantProportion);
} }
/* /*
* Checks whether there are a significant number of count. * Checks whether there are a significant number of count.
* *
* @param count the count to test against * @param count the count (k) to test against
* @param total the total (n) to test against
* @param minVariantPvalue the minimum pvalue to call a site variant. * @param minVariantPvalue the minimum pvalue to call a site variant.
* @param minVariantProportion the minimum proportion to call a site variant.
* @return true if there is a significant count given the provided pvalue, false otherwise * @return true if there is a significant count given the provided pvalue, false otherwise
*/ */
private boolean hasSignificantCount(final int count, final double minVariantPvalue) { private boolean hasSignificantCount(final int count, final int total, final double minVariantPvalue, final double minVariantProportion) {
final int totalBaseCount = consensusBaseCounts.totalCount(); if ( count == 0 || total == 0 )
if ( count == 0 || totalBaseCount == 0 )
return false; return false;
// technically, count can be greater than totalBaseCount (because of the way insertions are counted) so we need to account for that // use p-values for low counts of k
final double pvalue = MathUtils.binomialCumulativeProbability(totalBaseCount, 0, Math.min(count, totalBaseCount)); if ( count <= MIN_COUNT_FOR_USING_PVALUE ) {
final double pvalue = MathUtils.binomialCumulativeProbability(total, 0, count);
return pvalue > minVariantPvalue; return pvalue > minVariantPvalue;
} }
// otherwise, use straight proportions
final int minBaseCountForSignificance = (int)(minVariantProportion * total);
return count >= minBaseCountForSignificance;
}
} }

View File

@ -97,13 +97,14 @@ public class MultiSampleCompressor {
final int downsampleCoverage, final int downsampleCoverage,
final int minMappingQuality, final int minMappingQuality,
final double minAltPValueToTriggerVariant, final double minAltPValueToTriggerVariant,
final double minAltProportionToTriggerVariant,
final double minIndelProportionToTriggerVariant, final double minIndelProportionToTriggerVariant,
final int minBaseQual, final int minBaseQual,
final ReduceReads.DownsampleStrategy downsampleStrategy) { final ReduceReads.DownsampleStrategy downsampleStrategy) {
for ( String name : SampleUtils.getSAMFileSamples(header) ) { for ( String name : SampleUtils.getSAMFileSamples(header) ) {
compressorsPerSample.put(name, compressorsPerSample.put(name,
new SingleSampleCompressor(contextSize, downsampleCoverage, new SingleSampleCompressor(contextSize, downsampleCoverage,
minMappingQuality, minAltPValueToTriggerVariant, minIndelProportionToTriggerVariant, minBaseQual, downsampleStrategy)); minMappingQuality, minAltPValueToTriggerVariant, minAltProportionToTriggerVariant, minIndelProportionToTriggerVariant, minBaseQual, downsampleStrategy));
} }
} }

View File

@ -205,15 +205,17 @@ public class ReduceReads extends ReadWalker<ObjectArrayList<GATKSAMRecord>, Redu
/** /**
* Minimum proportion of mismatches in a site to trigger a variant region. Anything below this will be * Minimum proportion of mismatches in a site to trigger a variant region. Anything below this will be
* considered consensus. * considered consensus and reduced (otherwise we will try to trigger polyploid compression). Note that
* this value is used only regions with high coverage.
*/ */
@Deprecated @Advanced
@Argument(fullName = "minimum_alt_proportion_to_trigger_variant", shortName = "minvar", doc = "", required = false) @Argument(fullName = "minimum_alt_proportion_to_trigger_variant", shortName = "minvar", doc = "", required = false)
public double minAltProportionToTriggerVariant = 0.05; public double minAltProportionToTriggerVariant = 0.05;
/** /**
* Minimum p-value from binomial distribution of mismatches in a site to trigger a variant region. * Minimum p-value from binomial distribution of mismatches in a site to trigger a variant region.
* Any site with a value falling below this will be considered consensus and reduced (otherwise we will try to trigger polyploid compression). * Any site with a value falling below this will be considered consensus and reduced (otherwise we will try to
* trigger polyploid compression). Note that this value is used only regions with low coverage.
*/ */
@Advanced @Advanced
@Argument(fullName = "minimum_alt_pvalue_to_trigger_variant", shortName = "min_pvalue", doc = "", required = false) @Argument(fullName = "minimum_alt_pvalue_to_trigger_variant", shortName = "min_pvalue", doc = "", required = false)
@ -288,6 +290,9 @@ public class ReduceReads extends ReadWalker<ObjectArrayList<GATKSAMRecord>, Redu
if ( minAltPValueToTriggerVariant < 0.0 || minAltPValueToTriggerVariant > 1.0 ) if ( minAltPValueToTriggerVariant < 0.0 || minAltPValueToTriggerVariant > 1.0 )
throw new UserException.BadArgumentValue("--minimum_alt_pvalue_to_trigger_variant", "must be a value between 0 and 1 (inclusive)"); throw new UserException.BadArgumentValue("--minimum_alt_pvalue_to_trigger_variant", "must be a value between 0 and 1 (inclusive)");
if ( minAltProportionToTriggerVariant < 0.0 || minAltProportionToTriggerVariant > 1.0 )
throw new UserException.BadArgumentValue("--minimum_alt_proportion_to_trigger_variant", "must be a value between 0 and 1 (inclusive)");
if ( known.isEmpty() ) if ( known.isEmpty() )
knownSnpPositions = null; knownSnpPositions = null;
else else
@ -412,7 +417,7 @@ public class ReduceReads extends ReadWalker<ObjectArrayList<GATKSAMRecord>, Redu
*/ */
@Override @Override
public ReduceReadsStash reduceInit() { public ReduceReadsStash reduceInit() {
return new ReduceReadsStash(new MultiSampleCompressor(getToolkit().getSAMFileHeader(), contextSize, downsampleCoverage, minMappingQuality, minAltPValueToTriggerVariant, minIndelProportionToTriggerVariant, minBaseQual, downsampleStrategy)); return new ReduceReadsStash(new MultiSampleCompressor(getToolkit().getSAMFileHeader(), contextSize, downsampleCoverage, minMappingQuality, minAltPValueToTriggerVariant, minAltProportionToTriggerVariant, minIndelProportionToTriggerVariant, minBaseQual, downsampleStrategy));
} }
/** /**

View File

@ -63,6 +63,7 @@ public class SingleSampleCompressor {
final private int downsampleCoverage; final private int downsampleCoverage;
final private int minMappingQuality; final private int minMappingQuality;
final private double minAltPValueToTriggerVariant; final private double minAltPValueToTriggerVariant;
final private double minAltProportionToTriggerVariant;
final private double minIndelProportionToTriggerVariant; final private double minIndelProportionToTriggerVariant;
final private int minBaseQual; final private int minBaseQual;
final private ReduceReads.DownsampleStrategy downsampleStrategy; final private ReduceReads.DownsampleStrategy downsampleStrategy;
@ -76,6 +77,7 @@ public class SingleSampleCompressor {
final int downsampleCoverage, final int downsampleCoverage,
final int minMappingQuality, final int minMappingQuality,
final double minAltPValueToTriggerVariant, final double minAltPValueToTriggerVariant,
final double minAltProportionToTriggerVariant,
final double minIndelProportionToTriggerVariant, final double minIndelProportionToTriggerVariant,
final int minBaseQual, final int minBaseQual,
final ReduceReads.DownsampleStrategy downsampleStrategy) { final ReduceReads.DownsampleStrategy downsampleStrategy) {
@ -84,6 +86,7 @@ public class SingleSampleCompressor {
this.minMappingQuality = minMappingQuality; this.minMappingQuality = minMappingQuality;
this.slidingWindowCounter = 0; this.slidingWindowCounter = 0;
this.minAltPValueToTriggerVariant = minAltPValueToTriggerVariant; this.minAltPValueToTriggerVariant = minAltPValueToTriggerVariant;
this.minAltProportionToTriggerVariant = minAltProportionToTriggerVariant;
this.minIndelProportionToTriggerVariant = minIndelProportionToTriggerVariant; this.minIndelProportionToTriggerVariant = minIndelProportionToTriggerVariant;
this.minBaseQual = minBaseQual; this.minBaseQual = minBaseQual;
this.downsampleStrategy = downsampleStrategy; this.downsampleStrategy = downsampleStrategy;
@ -114,7 +117,9 @@ public class SingleSampleCompressor {
} }
if ( slidingWindow == null) { // this is the first read if ( slidingWindow == null) { // this is the first read
slidingWindow = new SlidingWindow(read.getReferenceName(), read.getReferenceIndex(), contextSize, read.getHeader(), read.getReadGroup(), slidingWindowCounter, minAltPValueToTriggerVariant, minIndelProportionToTriggerVariant, minBaseQual, minMappingQuality, downsampleCoverage, downsampleStrategy, read.hasBaseIndelQualities()); slidingWindow = new SlidingWindow(read.getReferenceName(), read.getReferenceIndex(), contextSize, read.getHeader(), read.getReadGroup(),
slidingWindowCounter, minAltPValueToTriggerVariant, minAltProportionToTriggerVariant, minIndelProportionToTriggerVariant,
minBaseQual, minMappingQuality, downsampleCoverage, downsampleStrategy, read.hasBaseIndelQualities());
slidingWindowCounter++; slidingWindowCounter++;
} }

View File

@ -60,7 +60,6 @@ import org.broadinstitute.sting.utils.GenomeLoc;
import org.broadinstitute.sting.utils.UnvalidatingGenomeLoc; import org.broadinstitute.sting.utils.UnvalidatingGenomeLoc;
import org.broadinstitute.sting.utils.collections.Pair; import org.broadinstitute.sting.utils.collections.Pair;
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException; import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
import org.broadinstitute.sting.utils.recalibration.EventType;
import org.broadinstitute.sting.utils.sam.AlignmentStartWithNoTiesComparator; import org.broadinstitute.sting.utils.sam.AlignmentStartWithNoTiesComparator;
import org.broadinstitute.sting.utils.sam.GATKSAMReadGroupRecord; import org.broadinstitute.sting.utils.sam.GATKSAMReadGroupRecord;
import org.broadinstitute.sting.utils.sam.GATKSAMRecord; import org.broadinstitute.sting.utils.sam.GATKSAMRecord;
@ -78,8 +77,8 @@ import java.util.*;
public class SlidingWindow { public class SlidingWindow {
// Sliding Window data // Sliding Window data
final private ObjectAVLTreeSet<GATKSAMRecord> readsInWindow; final protected PriorityQueue<GATKSAMRecord> readsInWindow;
final private LinkedList<HeaderElement> windowHeader; final protected LinkedList<HeaderElement> windowHeader;
protected int contextSize; // the largest context size (between mismatches and indels) protected int contextSize; // the largest context size (between mismatches and indels)
protected String contig; protected String contig;
protected int contigIndex; protected int contigIndex;
@ -99,6 +98,7 @@ public class SlidingWindow {
// Additional parameters // Additional parameters
protected double MIN_ALT_PVALUE_TO_TRIGGER_VARIANT; // pvalue has to be greater than this value to trigger variant region due to mismatches protected double MIN_ALT_PVALUE_TO_TRIGGER_VARIANT; // pvalue has to be greater than this value to trigger variant region due to mismatches
protected double MIN_ALT_PROPORTION_TO_TRIGGER_VARIANT; // proportion has to be greater than this value to trigger variant region due to mismatches
protected double MIN_INDEL_BASE_PROPORTION_TO_TRIGGER_VARIANT; // proportion has to be greater than this value to trigger variant region due to deletions protected double MIN_INDEL_BASE_PROPORTION_TO_TRIGGER_VARIANT; // proportion has to be greater than this value to trigger variant region due to deletions
protected int MIN_BASE_QUAL_TO_COUNT; // qual has to be greater than or equal to this value protected int MIN_BASE_QUAL_TO_COUNT; // qual has to be greater than or equal to this value
protected int MIN_MAPPING_QUALITY; protected int MIN_MAPPING_QUALITY;
@ -146,28 +146,33 @@ public class SlidingWindow {
this.windowHeader = new LinkedList<HeaderElement>(); this.windowHeader = new LinkedList<HeaderElement>();
windowHeader.addFirst(new HeaderElement(startLocation)); windowHeader.addFirst(new HeaderElement(startLocation));
this.readsInWindow = new ObjectAVLTreeSet<GATKSAMRecord>(); this.readsInWindow = new PriorityQueue<GATKSAMRecord>(100, new Comparator<GATKSAMRecord>() {
@Override
public int compare(GATKSAMRecord read1, GATKSAMRecord read2) {
return read1.getSoftEnd() - read2.getSoftEnd();
}
});
} }
public SlidingWindow(final String contig, final int contigIndex, final int contextSize, final SAMFileHeader samHeader, public SlidingWindow(final String contig, final int contigIndex, final int contextSize, final SAMFileHeader samHeader,
final GATKSAMReadGroupRecord readGroupAttribute, final int windowNumber, final GATKSAMReadGroupRecord readGroupAttribute, final int windowNumber,
final double minAltPValueToTriggerVariant, final double minIndelProportionToTriggerVariant, final double minAltPValueToTriggerVariant, final double minAltProportionToTriggerVariant, final double minIndelProportionToTriggerVariant,
final int minBaseQual, final int minMappingQuality, final int downsampleCoverage, final int minBaseQual, final int minMappingQuality, final int downsampleCoverage,
final ReduceReads.DownsampleStrategy downsampleStrategy, final boolean hasIndelQualities) { final ReduceReads.DownsampleStrategy downsampleStrategy, final boolean hasIndelQualities) {
this.contextSize = contextSize; this.contextSize = contextSize;
this.downsampleCoverage = downsampleCoverage; this.downsampleCoverage = downsampleCoverage;
this.MIN_ALT_PVALUE_TO_TRIGGER_VARIANT = minAltPValueToTriggerVariant; this.MIN_ALT_PVALUE_TO_TRIGGER_VARIANT = minAltPValueToTriggerVariant;
this.MIN_ALT_PROPORTION_TO_TRIGGER_VARIANT = minAltProportionToTriggerVariant;
this.MIN_INDEL_BASE_PROPORTION_TO_TRIGGER_VARIANT = minIndelProportionToTriggerVariant; this.MIN_INDEL_BASE_PROPORTION_TO_TRIGGER_VARIANT = minIndelProportionToTriggerVariant;
this.MIN_BASE_QUAL_TO_COUNT = minBaseQual; this.MIN_BASE_QUAL_TO_COUNT = minBaseQual;
this.MIN_MAPPING_QUALITY = minMappingQuality; this.MIN_MAPPING_QUALITY = minMappingQuality;
this.windowHeader = new LinkedList<HeaderElement>(); this.windowHeader = new LinkedList<HeaderElement>();
this.readsInWindow = new ObjectAVLTreeSet<GATKSAMRecord>(new Comparator<GATKSAMRecord>() { this.readsInWindow = new PriorityQueue<GATKSAMRecord>(1000, new Comparator<GATKSAMRecord>() {
@Override @Override
public int compare(GATKSAMRecord read1, GATKSAMRecord read2) { public int compare(GATKSAMRecord read1, GATKSAMRecord read2) {
final int difference = read1.getSoftEnd() - read2.getSoftEnd(); return read1.getSoftEnd() - read2.getSoftEnd();
return difference != 0 ? difference : read1.getReadName().compareTo(read2.getReadName());
} }
}); });
@ -290,8 +295,8 @@ public class SlidingWindow {
regions = findVariantRegions(0, breakpoint, markedSites.getVariantSiteBitSet(), !forceClose); regions = findVariantRegions(0, breakpoint, markedSites.getVariantSiteBitSet(), !forceClose);
} }
while (!readsInWindow.isEmpty() && readsInWindow.first().getSoftEnd() < windowHeaderStartLocation) { while (!readsInWindow.isEmpty() && readsInWindow.peek().getSoftEnd() < windowHeaderStartLocation) {
readsInWindow.remove(readsInWindow.first()); readsInWindow.poll();
} }
return regions; return regions;
@ -353,7 +358,7 @@ public class SlidingWindow {
/** /**
* returns an array marked with variant and non-variant regions (it uses markVariantRegion to make the marks) * returns an array marked with variant and non-variant regions (it uses markVariantRegion to make the marks)
* *
* @param stop check the window from start to stop (not-inclusive) * @param stop check the window from start to stop (not-inclusive); given in global coordinates
*/ */
protected void markSites(final int stop) { protected void markSites(final int stop) {
@ -363,21 +368,16 @@ public class SlidingWindow {
// copy over as many bits as we can from the previous calculation. Note that we can't trust the // copy over as many bits as we can from the previous calculation. Note that we can't trust the
// last (contextSize - 1) worth of bits because we may not have actually looked at variant regions there. // last (contextSize - 1) worth of bits because we may not have actually looked at variant regions there.
final int lastPositionMarked = markedSites.updateRegion(windowHeaderStartLocation, sizeOfMarkedRegion) - contextSize - 1; final int lastPositionMarked = markedSites.updateRegion(windowHeaderStartLocation, sizeOfMarkedRegion) - contextSize - 1;
final int locationToProcess = Math.min(lastPositionMarked, stop - contextSize); final int locationToProcess = Math.max(windowHeaderStartLocation, Math.min(lastPositionMarked, stop - contextSize));
// update the iterator to the correct position final ListIterator<HeaderElement> headerElementIterator = windowHeader.listIterator(locationToProcess - windowHeaderStartLocation);
Iterator<HeaderElement> headerElementIterator = windowHeader.iterator();
for (int i = windowHeaderStartLocation; i < locationToProcess; i++) {
if (headerElementIterator.hasNext())
headerElementIterator.next();
}
// process a contextSize worth of region from scratch in case there's a variant there // process a contextSize worth of region from scratch in case there's a variant there
for (int i = locationToProcess; i < stop; i++) { for (int i = locationToProcess; i < stop; i++) {
if (headerElementIterator.hasNext()) { if (headerElementIterator.hasNext()) {
HeaderElement headerElement = headerElementIterator.next(); HeaderElement headerElement = headerElementIterator.next();
if (headerElement.isVariant(MIN_ALT_PVALUE_TO_TRIGGER_VARIANT, MIN_INDEL_BASE_PROPORTION_TO_TRIGGER_VARIANT)) if (headerElement.isVariant(MIN_ALT_PVALUE_TO_TRIGGER_VARIANT, MIN_ALT_PROPORTION_TO_TRIGGER_VARIANT, MIN_INDEL_BASE_PROPORTION_TO_TRIGGER_VARIANT))
markVariantRegion(i - windowHeaderStartLocation); markVariantRegion(i - windowHeaderStartLocation);
} else } else
@ -409,7 +409,7 @@ public class SlidingWindow {
} }
/** /**
* Adds bases to the running consensus or filtered data accordingly * Adds bases to the running consensus
* *
* If adding a sequence with gaps, it will finalize multiple consensus reads and keep the last running consensus * If adding a sequence with gaps, it will finalize multiple consensus reads and keep the last running consensus
* *
@ -422,9 +422,10 @@ public class SlidingWindow {
@Requires({"start >= 0 && (end >= start || end == 0)"}) @Requires({"start >= 0 && (end >= start || end == 0)"})
@Ensures("result != null") @Ensures("result != null")
protected ObjectArrayList<GATKSAMRecord> addToSyntheticReads(final LinkedList<HeaderElement> header, final int start, final int end, final SyntheticRead.StrandType strandType) { protected ObjectArrayList<GATKSAMRecord> addToSyntheticReads(final LinkedList<HeaderElement> header, final int start, final int end, final SyntheticRead.StrandType strandType) {
ObjectArrayList<GATKSAMRecord> reads = new ObjectArrayList<GATKSAMRecord>(); final ObjectArrayList<GATKSAMRecord> reads = new ObjectArrayList<GATKSAMRecord>();
if (start < end) {
ListIterator<HeaderElement> headerElementIterator = header.listIterator(start); if ( start < end ) {
final ListIterator<HeaderElement> headerElementIterator = header.listIterator(start);
if (!headerElementIterator.hasNext()) if (!headerElementIterator.hasNext())
throw new ReviewedStingException(String.format("Requested to add to synthetic reads a region that contains no header element at index: %d - %d / %d", start, header.size(), end)); throw new ReviewedStingException(String.format("Requested to add to synthetic reads a region that contains no header element at index: %d - %d / %d", start, header.size(), end));
@ -432,37 +433,29 @@ public class SlidingWindow {
HeaderElement headerElement = headerElementIterator.next(); HeaderElement headerElement = headerElementIterator.next();
if (headerElement.hasConsensusData()) { if (headerElement.hasConsensusData()) {
reads.addAll(finalizeAndAdd(ConsensusType.FILTERED));
int endOfConsensus = findNextNonConsensusElement(header, start, end);
addToRunningConsensus(header, start, endOfConsensus, strandType);
// find the end of the consecutive consensus data in the window
final int endOfConsensus = findNextNonConsensusElement(header, start, end);
if (endOfConsensus <= start) if (endOfConsensus <= start)
throw new ReviewedStingException(String.format("next start is <= current start: (%d <= %d)", endOfConsensus, start)); throw new ReviewedStingException(String.format("next start is <= current start: (%d <= %d)", endOfConsensus, start));
// add to running consensus and recurse
addToRunningConsensus(header, start, endOfConsensus, strandType);
reads.addAll(addToSyntheticReads(header, endOfConsensus, end, strandType)); reads.addAll(addToSyntheticReads(header, endOfConsensus, end, strandType));
} else if (headerElement.hasFilteredData()) {
} else {
// add any outstanding consensus data
reads.addAll(finalizeAndAdd(ConsensusType.CONSENSUS)); reads.addAll(finalizeAndAdd(ConsensusType.CONSENSUS));
int endOfFilteredData = findNextNonFilteredDataElement(header, start, end); // find the end of the consecutive empty data in the window
reads.addAll(addToFilteredData(header, start, endOfFilteredData, strandType)); final int endOfEmptyData = findNextConsensusElement(header, start, end);
if (endOfFilteredData <= start)
throw new ReviewedStingException(String.format("next start is <= current start: (%d <= %d)", endOfFilteredData, start));
reads.addAll(addToSyntheticReads(header, endOfFilteredData, end, strandType));
} else if (headerElement.isEmpty()) {
reads.addAll(finalizeAndAdd(ConsensusType.BOTH));
int endOfEmptyData = findNextNonEmptyElement(header, start, end);
if (endOfEmptyData <= start) if (endOfEmptyData <= start)
throw new ReviewedStingException(String.format("next start is <= current start: (%d <= %d)", endOfEmptyData, start)); throw new ReviewedStingException(String.format("next start is <= current start: (%d <= %d)", endOfEmptyData, start));
// recurse out of the empty region
reads.addAll(addToSyntheticReads(header, endOfEmptyData, end, strandType)); reads.addAll(addToSyntheticReads(header, endOfEmptyData, end, strandType));
} else }
throw new ReviewedStingException(String.format("Header Element %d is neither Consensus, Data or Empty. Something is wrong.", start));
} }
return reads; return reads;
@ -474,24 +467,21 @@ public class SlidingWindow {
* @param type the synthetic reads you want to close * @param type the synthetic reads you want to close
* @return a possibly null list of GATKSAMRecords generated by finalizing the synthetic reads * @return a possibly null list of GATKSAMRecords generated by finalizing the synthetic reads
*/ */
private ObjectArrayList<GATKSAMRecord> finalizeAndAdd(ConsensusType type) { private ObjectArrayList<GATKSAMRecord> finalizeAndAdd(final ConsensusType type) {
GATKSAMRecord read = null;
ObjectArrayList<GATKSAMRecord> list = new ObjectArrayList<GATKSAMRecord>();
switch (type) { final ObjectArrayList<GATKSAMRecord> list = new ObjectArrayList<GATKSAMRecord>();
case CONSENSUS:
read = finalizeRunningConsensus(); if ( type == ConsensusType.CONSENSUS || type == ConsensusType.BOTH ) {
break; final GATKSAMRecord read = finalizeRunningConsensus();
case FILTERED: if ( read != null )
read = finalizeFilteredDataConsensus();
break;
case BOTH:
read = finalizeRunningConsensus();
if (read != null) list.add(read);
read = finalizeFilteredDataConsensus();
}
if (read != null)
list.add(read); list.add(read);
}
if ( type == ConsensusType.FILTERED || type == ConsensusType.BOTH ) {
final GATKSAMRecord read = finalizeFilteredDataConsensus();
if ( read != null )
list.add(read);
}
return list; return list;
} }
@ -499,19 +489,19 @@ public class SlidingWindow {
/** /**
* Looks for the next position without consensus data * Looks for the next position without consensus data
* *
* @param header the header to check
* @param start beginning of the filtered region * @param start beginning of the filtered region
* @param upTo limit to search for another consensus element * @param upTo limit to search for another consensus element
* @return next position in local coordinates (relative to the windowHeader) with consensus data; otherwise, the start position * @return next position in local coordinates (relative to the windowHeader) with consensus data; otherwise, the start position
*/ */
private int findNextNonConsensusElement(LinkedList<HeaderElement> header, int start, int upTo) { private int findNextNonConsensusElement(final LinkedList<HeaderElement> header, final int start, final int upTo) {
Iterator<HeaderElement> headerElementIterator = header.listIterator(start); final Iterator<HeaderElement> headerElementIterator = header.listIterator(start);
int index = start; int index = start;
while (index < upTo) { while (index < upTo) {
if (!headerElementIterator.hasNext()) if (!headerElementIterator.hasNext())
throw new ReviewedStingException("There are no more header elements in this window"); throw new ReviewedStingException("There are no more header elements in this window");
HeaderElement headerElement = headerElementIterator.next(); if (!headerElementIterator.next().hasConsensusData())
if (!headerElement.hasConsensusData())
break; break;
index++; index++;
} }
@ -519,92 +509,27 @@ public class SlidingWindow {
} }
/** /**
* Looks for the next position without filtered data * Looks for the next position witho consensus data
* *
* @param start beginning of the region * @param header the header to check
* @param upTo limit to search for * @param start beginning of the filtered region
* @return next position in local coordinates (relative to the windowHeader) with no filtered data; otherwise, the start position * @param upTo limit to search for another consensus element
* @return next position in local coordinates (relative to the windowHeader) with consensus data; otherwise, the start position
*/ */
private int findNextNonFilteredDataElement(LinkedList<HeaderElement> header, int start, int upTo) { private int findNextConsensusElement(final LinkedList<HeaderElement> header, final int start, final int upTo) {
Iterator<HeaderElement> headerElementIterator = header.listIterator(start); final Iterator<HeaderElement> headerElementIterator = header.listIterator(start);
int index = start; int index = start;
while (index < upTo) { while (index < upTo) {
if (!headerElementIterator.hasNext()) if (!headerElementIterator.hasNext())
throw new ReviewedStingException("There are no more header elements in this window"); throw new ReviewedStingException("There are no more header elements in this window");
HeaderElement headerElement = headerElementIterator.next(); if (headerElementIterator.next().hasConsensusData())
if (!headerElement.hasFilteredData() || headerElement.hasConsensusData())
break; break;
index++; index++;
} }
return index; return index;
} }
/**
* Looks for the next non-empty header element
*
* @param start beginning of the region
* @param upTo limit to search for
* @return next position in local coordinates (relative to the windowHeader) with non-empty element; otherwise, the start position
*/
private int findNextNonEmptyElement(LinkedList<HeaderElement> header, int start, int upTo) {
ListIterator<HeaderElement> headerElementIterator = header.listIterator(start);
int index = start;
while (index < upTo) {
if (!headerElementIterator.hasNext())
throw new ReviewedStingException("There are no more header elements in this window");
HeaderElement headerElement = headerElementIterator.next();
if (!headerElement.isEmpty())
break;
index++;
}
return index;
}
/**
* Adds bases to the filtered data synthetic read.
*
* Different from the addToConsensus method, this method assumes a contiguous sequence of filteredData bases.
*
* @param header the window header
* @param start the first header index to add to consensus
* @param end the first header index NOT TO add to consensus
* @param strandType the strandedness that the synthetic read should be represented as having
* @return a non-null list of GATKSAMRecords representing finalized filtered consensus data. Empty list if no consensus was generated.
*/
@Requires({"start >= 0 && (end >= start || end == 0)"})
@Ensures("result != null")
private ObjectArrayList<GATKSAMRecord> addToFilteredData(final LinkedList<HeaderElement> header, final int start, final int end, final SyntheticRead.StrandType strandType) {
ObjectArrayList<GATKSAMRecord> result = new ObjectArrayList<GATKSAMRecord>();
if (filteredDataConsensus == null)
filteredDataConsensus = new SyntheticRead(samHeader, readGroupAttribute, contig, contigIndex, filteredDataReadName + filteredDataConsensusCounter++, header.get(start).getLocation(), hasIndelQualities, strandType);
ListIterator<HeaderElement> headerElementIterator = header.listIterator(start);
for (int index = start; index < end; index++) {
if (!headerElementIterator.hasNext())
throw new ReviewedStingException("Requested to create a filtered data synthetic read from " + start + " to " + end + " but " + index + " does not exist");
HeaderElement headerElement = headerElementIterator.next();
if (headerElement.hasConsensusData())
throw new ReviewedStingException("Found consensus data inside region to add to filtered data.");
if (!headerElement.hasFilteredData())
throw new ReviewedStingException("No filtered data in " + index);
if ( filteredDataConsensus.getRefStart() + filteredDataConsensus.size() != headerElement.getLocation() ) {
result.add(finalizeFilteredDataConsensus());
filteredDataConsensus = new SyntheticRead(samHeader, readGroupAttribute, contig, contigIndex, filteredDataReadName + filteredDataConsensusCounter++, headerElement.getLocation(), hasIndelQualities, strandType);
}
genericAddBaseToConsensus(filteredDataConsensus, headerElement.getFilteredBaseCounts());
}
return result;
}
/** /**
* Adds bases to the filtered data synthetic read. * Adds bases to the filtered data synthetic read.
* *
@ -621,12 +546,13 @@ public class SlidingWindow {
if (runningConsensus == null) if (runningConsensus == null)
runningConsensus = new SyntheticRead(samHeader, readGroupAttribute, contig, contigIndex, consensusReadName + consensusCounter++, header.get(start).getLocation(), hasIndelQualities, strandType); runningConsensus = new SyntheticRead(samHeader, readGroupAttribute, contig, contigIndex, consensusReadName + consensusCounter++, header.get(start).getLocation(), hasIndelQualities, strandType);
Iterator<HeaderElement> headerElementIterator = header.listIterator(start); final Iterator<HeaderElement> headerElementIterator = header.listIterator(start);
for (int index = start; index < end; index++) { for (int index = start; index < end; index++) {
if (!headerElementIterator.hasNext()) if (!headerElementIterator.hasNext())
throw new ReviewedStingException("Requested to create a running consensus synthetic read from " + start + " to " + end + " but " + index + " does not exist"); throw new ReviewedStingException("Requested to create a running consensus synthetic read from " + start + " to " + end + " but " + index + " does not exist");
HeaderElement headerElement = headerElementIterator.next(); final HeaderElement headerElement = headerElementIterator.next();
if (!headerElement.hasConsensusData()) if (!headerElement.hasConsensusData())
throw new ReviewedStingException("No CONSENSUS data in " + index); throw new ReviewedStingException("No CONSENSUS data in " + index);
@ -634,6 +560,133 @@ public class SlidingWindow {
} }
} }
/**
* Adds bases to the running filtered data accordingly
*
* If adding a sequence with gaps, it will finalize multiple consensus reads and keep the last running consensus
*
* @param header the window header
* @param start the first header index to add to consensus
* @param end the first header index NOT TO add to consensus
* @return a non-null list of consensus reads generated by this call. Empty list if no consensus was generated.
*/
@Requires({"start >= 0 && (end >= start || end == 0)"})
@Ensures("result != null")
protected ObjectArrayList<GATKSAMRecord> addToFilteredReads(final LinkedList<HeaderElement> header, final int start, final int end) {
final ObjectArrayList<GATKSAMRecord> reads = new ObjectArrayList<GATKSAMRecord>();
if ( start < end ) {
final ListIterator<HeaderElement> headerElementIterator = header.listIterator(start);
if (!headerElementIterator.hasNext())
throw new ReviewedStingException(String.format("Requested to add to synthetic reads a region that contains no header element at index: %d - %d / %d", start, header.size(), end));
HeaderElement headerElement = headerElementIterator.next();
if (headerElement.hasFilteredData()) {
// find the end of the consecutive filtered data in the window
final int endOfFiltered = findNextNonFilteredElement(header, start, end);
if (endOfFiltered <= start)
throw new ReviewedStingException(String.format("next start is <= current start: (%d <= %d)", endOfFiltered, start));
// add to running filtered consensus and recurse
addToFilteredData(header, start, endOfFiltered);
reads.addAll(addToFilteredReads(header, endOfFiltered, end));
} else {
// add any outstanding filtered data
reads.addAll(finalizeAndAdd(ConsensusType.FILTERED));
// find the end of the consecutive empty data in the window
final int endOfEmptyData = findNextFilteredElement(header, start, end);
if (endOfEmptyData <= start)
throw new ReviewedStingException(String.format("next start is <= current start: (%d <= %d)", endOfEmptyData, start));
// recurse out of the empty region
reads.addAll(addToFilteredReads(header, endOfEmptyData, end));
}
}
return reads;
}
/**
* Looks for the next position without consensus data
*
* @param header the header to check
* @param start beginning of the filtered region
* @param upTo limit to search for another consensus element
* @return next position in local coordinates (relative to the windowHeader) with consensus data; otherwise, the start position
*/
private int findNextNonFilteredElement(final LinkedList<HeaderElement> header, final int start, final int upTo) {
final Iterator<HeaderElement> headerElementIterator = header.listIterator(start);
int index = start;
while (index < upTo) {
if (!headerElementIterator.hasNext())
throw new ReviewedStingException("There are no more header elements in this window");
if (!headerElementIterator.next().hasFilteredData())
break;
index++;
}
return index;
}
/**
* Looks for the next position witho consensus data
*
* @param header the header to check
* @param start beginning of the filtered region
* @param upTo limit to search for another consensus element
* @return next position in local coordinates (relative to the windowHeader) with consensus data; otherwise, the start position
*/
private int findNextFilteredElement(final LinkedList<HeaderElement> header, final int start, final int upTo) {
final Iterator<HeaderElement> headerElementIterator = header.listIterator(start);
int index = start;
while (index < upTo) {
if (!headerElementIterator.hasNext())
throw new ReviewedStingException("There are no more header elements in this window");
if (headerElementIterator.next().hasFilteredData())
break;
index++;
}
return index;
}
/**
* Adds bases to the filtered data synthetic read.
*
* Different from the addToConsensus method, this method assumes a contiguous sequence of filteredData bases.
*
* @param header the window header
* @param start the first header index to add to consensus
* @param end the first header index NOT TO add to consensus
*/
@Requires({"start >= 0 && (end >= start || end == 0)"})
@Ensures("result != null")
private void addToFilteredData(final LinkedList<HeaderElement> header, final int start, final int end) {
if (filteredDataConsensus == null)
filteredDataConsensus = new SyntheticRead(samHeader, readGroupAttribute, contig, contigIndex, filteredDataReadName + filteredDataConsensusCounter++, header.get(start).getLocation(), hasIndelQualities, SyntheticRead.StrandType.STRANDLESS);
ListIterator<HeaderElement> headerElementIterator = header.listIterator(start);
for (int index = start; index < end; index++) {
if (!headerElementIterator.hasNext())
throw new ReviewedStingException("Requested to create a filtered data synthetic read from " + start + " to " + end + " but " + index + " does not exist");
final HeaderElement headerElement = headerElementIterator.next();
if (!headerElement.hasFilteredData())
throw new ReviewedStingException("No filtered data in " + index);
genericAddBaseToConsensus(filteredDataConsensus, headerElement.getFilteredBaseCounts());
}
}
/** /**
* Generic accessor to add base and qualities to a synthetic read * Generic accessor to add base and qualities to a synthetic read
* *
@ -726,7 +779,7 @@ public class SlidingWindow {
for ( int i = start; i <= stop; i++ ) { for ( int i = start; i <= stop; i++ ) {
final int nAlleles = windowHeader.get(i).getNumberOfBaseAlleles(MIN_ALT_PVALUE_TO_TRIGGER_VARIANT); final int nAlleles = windowHeader.get(i).getNumberOfBaseAlleles(MIN_ALT_PVALUE_TO_TRIGGER_VARIANT, MIN_ALT_PROPORTION_TO_TRIGGER_VARIANT);
// we will only work on diploid non-indel cases because we just don't want to handle/test other scenarios // we will only work on diploid non-indel cases because we just don't want to handle/test other scenarios
if ( nAlleles > 2 || nAlleles == -1 ) if ( nAlleles > 2 || nAlleles == -1 )
@ -760,8 +813,8 @@ public class SlidingWindow {
if ( headerElement.getLocation() == positionToSkip ) if ( headerElement.getLocation() == positionToSkip )
continue; continue;
if ( headerElement.hasSignificantSoftclips(MIN_ALT_PVALUE_TO_TRIGGER_VARIANT) || if ( headerElement.hasSignificantSoftclips(MIN_ALT_PVALUE_TO_TRIGGER_VARIANT, MIN_ALT_PROPORTION_TO_TRIGGER_VARIANT) ||
headerElement.getNumberOfBaseAlleles(MIN_ALT_PVALUE_TO_TRIGGER_VARIANT) > 1 ) headerElement.getNumberOfBaseAlleles(MIN_ALT_PVALUE_TO_TRIGGER_VARIANT, MIN_ALT_PROPORTION_TO_TRIGGER_VARIANT) > 1 )
return true; return true;
} }
@ -784,6 +837,7 @@ public class SlidingWindow {
final CloseVariantRegionResult result = new CloseVariantRegionResult(allReads.stopPerformed); final CloseVariantRegionResult result = new CloseVariantRegionResult(allReads.stopPerformed);
result.reads.addAll(downsampleCoverage > 0 ? downsampleVariantRegion(allReads.reads) : allReads.reads); result.reads.addAll(downsampleCoverage > 0 ? downsampleVariantRegion(allReads.reads) : allReads.reads);
result.reads.addAll(addToSyntheticReads(windowHeader, 0, allReads.stopPerformed + 1, SyntheticRead.StrandType.STRANDLESS)); result.reads.addAll(addToSyntheticReads(windowHeader, 0, allReads.stopPerformed + 1, SyntheticRead.StrandType.STRANDLESS));
result.reads.addAll(addToFilteredReads(windowHeader, 0, allReads.stopPerformed + 1));
result.reads.addAll(finalizeAndAdd(ConsensusType.BOTH)); result.reads.addAll(finalizeAndAdd(ConsensusType.BOTH));
return result; // finalized reads will be downsampled if necessary return result; // finalized reads will be downsampled if necessary
@ -914,6 +968,7 @@ public class SlidingWindow {
if (!windowHeader.isEmpty()) { if (!windowHeader.isEmpty()) {
finalizedReads.addAll(addToSyntheticReads(windowHeader, 0, windowHeader.size(), SyntheticRead.StrandType.STRANDLESS)); finalizedReads.addAll(addToSyntheticReads(windowHeader, 0, windowHeader.size(), SyntheticRead.StrandType.STRANDLESS));
finalizedReads.addAll(addToFilteredReads(windowHeader, 0, windowHeader.size()));
finalizedReads.addAll(finalizeAndAdd(ConsensusType.BOTH)); // if it ended in running consensus, finish it up finalizedReads.addAll(finalizeAndAdd(ConsensusType.BOTH)); // if it ended in running consensus, finish it up
} }
} }
@ -983,7 +1038,7 @@ public class SlidingWindow {
// initialize the mapping from base (allele) to header // initialize the mapping from base (allele) to header
final Byte2IntMap alleleHeaderMap = new Byte2IntArrayMap(2); final Byte2IntMap alleleHeaderMap = new Byte2IntArrayMap(2);
for ( final BaseIndex allele : windowHeader.get(hetRefPosition).getAlleles(MIN_ALT_PVALUE_TO_TRIGGER_VARIANT) ) { for ( final BaseIndex allele : windowHeader.get(hetRefPosition).getAlleles(MIN_ALT_PVALUE_TO_TRIGGER_VARIANT, MIN_ALT_PROPORTION_TO_TRIGGER_VARIANT) ) {
final int currentIndex = alleleHeaderMap.size(); final int currentIndex = alleleHeaderMap.size();
if ( currentIndex > 1 ) if ( currentIndex > 1 )
throw new IllegalStateException("There are more than 2 alleles present when creating a diploid consensus"); throw new IllegalStateException("There are more than 2 alleles present when creating a diploid consensus");
@ -997,7 +1052,7 @@ public class SlidingWindow {
if ( alleleHeaderMap.size() != 2 ) if ( alleleHeaderMap.size() != 2 )
throw new IllegalStateException("We expected to see 2 alleles when creating a diploid consensus but saw " + alleleHeaderMap.size()); throw new IllegalStateException("We expected to see 2 alleles when creating a diploid consensus but saw " + alleleHeaderMap.size());
final ObjectList<GATKSAMRecord> readsToRemoveFromHeader = new ObjectArrayList<GATKSAMRecord>(); final ObjectList<GATKSAMRecord> readsToRemove = new ObjectArrayList<GATKSAMRecord>();
for ( final GATKSAMRecord read : readsInWindow ) { for ( final GATKSAMRecord read : readsInWindow ) {
@ -1006,24 +1061,20 @@ public class SlidingWindow {
continue; continue;
// remove all other reads from the read cache since we're going to use them here // remove all other reads from the read cache since we're going to use them here
readsInWindow.remove(read); readsToRemove.add(read);
// if the read falls before the het position, we don't need to look at it // if the read falls before the het position or has low MQ, we don't need to look at it
if ( read.getSoftEnd() < globalHetRefPosition ) if ( read.getSoftEnd() < globalHetRefPosition || read.getMappingQuality() < MIN_MAPPING_QUALITY)
continue; continue;
// remove all spanning reads from the consensus header since we're going to incorporate them into a consensus here instead // remove all spanning reads from the consensus header since we're going to incorporate them into a consensus here instead
removeFromHeader(windowHeader, read); removeFromHeader(windowHeader, read);
// make sure it meets the minimum mapping quality requirement (if not, we won't use it for the consensus)
if ( read.getMappingQuality() >= MIN_MAPPING_QUALITY ) {
// where on the read is the het position? // where on the read is the het position?
final int readPosOfHet = ReadUtils.getReadCoordinateForReferenceCoordinate(read, globalHetRefPosition, ReadUtils.ClippingTail.LEFT_TAIL); final int readPosOfHet = ReadUtils.getReadCoordinateForReferenceCoordinate(read, globalHetRefPosition, ReadUtils.ClippingTail.LEFT_TAIL);
// this is safe because indels are not supported // this is safe because indels are not supported
final byte base = read.getReadBases()[readPosOfHet]; final byte base = read.getReadBases()[readPosOfHet];
final byte qual = read.getBaseQualities(EventType.BASE_SUBSTITUTION)[readPosOfHet];
// check which allele this read represents // check which allele this read represents
final Integer allele = alleleHeaderMap.get(base); final Integer allele = alleleHeaderMap.get(base);
@ -1036,7 +1087,9 @@ public class SlidingWindow {
addToHeader(header.consensus, read); addToHeader(header.consensus, read);
} }
} }
}
for ( final GATKSAMRecord read : readsToRemove )
readsInWindow.remove(read);
// create the polyploid synthetic reads if we can // create the polyploid synthetic reads if we can
final ObjectList<GATKSAMRecord> hetReads = new ObjectArrayList<GATKSAMRecord>(); final ObjectList<GATKSAMRecord> hetReads = new ObjectArrayList<GATKSAMRecord>();
@ -1171,15 +1224,15 @@ public class SlidingWindow {
int readBaseIndex = 0; int readBaseIndex = 0;
HeaderElement headerElement; HeaderElement headerElement;
for ( CigarElement cigarElement : read.getCigar().getCigarElements() ) { for ( final CigarElement cigarElement : read.getCigar().getCigarElements() ) {
switch ( cigarElement.getOperator() ) { switch ( cigarElement.getOperator() ) {
case H: case H:
break; break;
case I: case I:
readBaseIndex += cigarElement.getLength(); readBaseIndex += cigarElement.getLength();
// special case, if we are removing a read that starts in insertion and we don't have the previous header element anymore, don't worry about it. // special case, if we don't have the previous header element anymore, don't worry about it.
if ( removeRead && locationIndex == 0 ) if ( locationIndex == 0 )
break; break;
// insertions are added to the base to the left (previous element) // insertions are added to the base to the left (previous element)
@ -1200,9 +1253,8 @@ public class SlidingWindow {
headerElement.removeBase(BaseUtils.Base.D.base, mappingQuality, mappingQuality, mappingQuality, mappingQuality, MIN_BASE_QUAL_TO_COUNT, MIN_MAPPING_QUALITY, false); headerElement.removeBase(BaseUtils.Base.D.base, mappingQuality, mappingQuality, mappingQuality, mappingQuality, MIN_BASE_QUAL_TO_COUNT, MIN_MAPPING_QUALITY, false);
else else
headerElement.addBase(BaseUtils.Base.D.base, mappingQuality, mappingQuality, mappingQuality, mappingQuality, MIN_BASE_QUAL_TO_COUNT, MIN_MAPPING_QUALITY, false); headerElement.addBase(BaseUtils.Base.D.base, mappingQuality, mappingQuality, mappingQuality, mappingQuality, MIN_BASE_QUAL_TO_COUNT, MIN_MAPPING_QUALITY, false);
locationIndex++;
} }
locationIndex += nDeletionBases;
break; break;
case S: case S:
case M: case M:
@ -1211,6 +1263,8 @@ public class SlidingWindow {
case X: case X:
final int nBasesToAdd = cigarElement.getLength(); final int nBasesToAdd = cigarElement.getLength();
final boolean isSoftClip = cigarElement.getOperator() == CigarOperator.S; final boolean isSoftClip = cigarElement.getOperator() == CigarOperator.S;
final byte[] readBases = read.getReadBases();
final byte[] readQuals = read.getBaseQualities();
final boolean readHasIndelQuals = read.hasBaseIndelQualities(); final boolean readHasIndelQuals = read.hasBaseIndelQualities();
final byte[] insertionQuals = readHasIndelQuals ? read.getBaseInsertionQualities() : null; final byte[] insertionQuals = readHasIndelQuals ? read.getBaseInsertionQualities() : null;
final byte[] deletionQuals = readHasIndelQuals ? read.getBaseDeletionQualities() : null; final byte[] deletionQuals = readHasIndelQuals ? read.getBaseDeletionQualities() : null;
@ -1219,14 +1273,15 @@ public class SlidingWindow {
headerElement = headerElementIterator.next(); headerElement = headerElementIterator.next();
final byte insertionQuality = readHasIndelQuals ? insertionQuals[readBaseIndex] : -1; final byte insertionQuality = readHasIndelQuals ? insertionQuals[readBaseIndex] : -1;
final byte deletionQuality = readHasIndelQuals ? deletionQuals[readBaseIndex] : -1; final byte deletionQuality = readHasIndelQuals ? deletionQuals[readBaseIndex] : -1;
if ( removeRead ) if ( removeRead )
headerElement.removeBase(read.getReadBases()[readBaseIndex], read.getBaseQualities()[readBaseIndex], insertionQuality, deletionQuality, mappingQuality, MIN_BASE_QUAL_TO_COUNT, MIN_MAPPING_QUALITY, isSoftClip); headerElement.removeBase(readBases[readBaseIndex], readQuals[readBaseIndex], insertionQuality, deletionQuality, mappingQuality, MIN_BASE_QUAL_TO_COUNT, MIN_MAPPING_QUALITY, isSoftClip);
else else
headerElement.addBase(read.getReadBases()[readBaseIndex], read.getBaseQualities()[readBaseIndex], insertionQuality, deletionQuality, mappingQuality, MIN_BASE_QUAL_TO_COUNT, MIN_MAPPING_QUALITY, isSoftClip); headerElement.addBase(readBases[readBaseIndex], readQuals[readBaseIndex], insertionQuality, deletionQuality, mappingQuality, MIN_BASE_QUAL_TO_COUNT, MIN_MAPPING_QUALITY, isSoftClip);
readBaseIndex++; readBaseIndex++;
locationIndex++;
} }
locationIndex += nBasesToAdd;
break; break;
default: default:
break; break;

View File

@ -102,6 +102,9 @@ public class AssessReducedQuals extends LocusWalker<GenomeLoc, GenomeLoc> implem
@Argument(fullName = "qual_epsilon", shortName = "epsilon", doc = "when |Quals_reduced_bam - Quals_original_bam| > (epsilon * Quals_original_bam) we output this interval", required = false) @Argument(fullName = "qual_epsilon", shortName = "epsilon", doc = "when |Quals_reduced_bam - Quals_original_bam| > (epsilon * Quals_original_bam) we output this interval", required = false)
public double qual_epsilon = 0.10; public double qual_epsilon = 0.10;
@Argument(fullName = "exclude_low_mq", shortName = "excludeMQ", doc = "ignore reads with mapping quality below this number", required = false)
public int excludeMQ = 0;
@Output @Output
protected PrintStream out; protected PrintStream out;
@ -146,7 +149,7 @@ public class AssessReducedQuals extends LocusWalker<GenomeLoc, GenomeLoc> implem
} }
private boolean isGoodRead(final PileupElement p) { private boolean isGoodRead(final PileupElement p) {
return !p.isDeletion() && (int)p.getQual() >= 15 && p.getMappingQual() >= 20; return !p.isDeletion() && (int)p.getQual() >= 15 && p.getMappingQual() >= excludeMQ;
} }
private int getTagIndex(final List<String> tags) { private int getTagIndex(final List<String> tags) {

View File

@ -179,7 +179,7 @@ public class BaseCountsUnitTest extends BaseTest {
BaseCounts counts = new BaseCounts(); BaseCounts counts = new BaseCounts();
for ( int qual : test.quals ) for ( int qual : test.quals )
counts.incr(BaseIndex.A, (byte)qual, 20); counts.incr(BaseIndex.A, (byte)qual, 20, false);
final int actualSum = (int)counts.getSumQuals((byte)'A'); final int actualSum = (int)counts.getSumQuals((byte)'A');
final int expectedSum = qualSum(test.quals); final int expectedSum = qualSum(test.quals);

View File

@ -128,8 +128,8 @@ public class HeaderElementUnitTest extends BaseTest {
Assert.assertEquals(headerElement.hasConsensusData(), test.MQ >= minMappingQual); Assert.assertEquals(headerElement.hasConsensusData(), test.MQ >= minMappingQual);
Assert.assertEquals(headerElement.hasFilteredData(), test.MQ < minMappingQual); Assert.assertEquals(headerElement.hasFilteredData(), test.MQ < minMappingQual);
Assert.assertEquals(headerElement.hasConsensusData() ? headerElement.getConsensusBaseCounts().getRMS() : headerElement.getFilteredBaseCounts().getRMS(), (double)test.MQ); Assert.assertEquals(headerElement.hasConsensusData() ? headerElement.getConsensusBaseCounts().getRMS() : headerElement.getFilteredBaseCounts().getRMS(), (double)test.MQ);
Assert.assertFalse(headerElement.isVariantFromMismatches(0.05)); Assert.assertFalse(headerElement.isVariantFromMismatches(0.05, 0.05));
Assert.assertEquals(headerElement.isVariant(0.05, 0.05), test.isClip); Assert.assertEquals(headerElement.isVariant(0.05, 0.05, 0.05), test.isClip);
} }
@ -177,7 +177,7 @@ public class HeaderElementUnitTest extends BaseTest {
headerElement.addBase(base.b, byte20, byte10, byte10, byte20, minBaseQual, minMappingQual, false); headerElement.addBase(base.b, byte20, byte10, byte10, byte20, minBaseQual, minMappingQual, false);
} }
final int nAllelesSeen = headerElement.getNumberOfBaseAlleles(test.pvalue); final int nAllelesSeen = headerElement.getNumberOfBaseAlleles(test.pvalue, test.pvalue);
final int nAllelesExpected = calculateExpectedAlleles(test.counts, test.pvalue); final int nAllelesExpected = calculateExpectedAlleles(test.counts, test.pvalue);
Assert.assertEquals(nAllelesSeen, nAllelesExpected); Assert.assertEquals(nAllelesSeen, nAllelesExpected);
@ -195,9 +195,14 @@ public class HeaderElementUnitTest extends BaseTest {
if ( count == 0 ) if ( count == 0 )
continue; continue;
final double pvalue = MathUtils.binomialCumulativeProbability(total, 0, count); final boolean isSignificant;
if ( count <= HeaderElement.MIN_COUNT_FOR_USING_PVALUE ) {
isSignificant = MathUtils.binomialCumulativeProbability(total, 0, count) > targetPvalue;
} else {
isSignificant = (count >= targetPvalue * total);
}
if ( pvalue > targetPvalue ) { if ( isSignificant ) {
if ( index == BaseIndex.D.index ) if ( index == BaseIndex.D.index )
return -1; return -1;
result++; result++;

View File

@ -157,46 +157,44 @@ public class ReduceReadsIntegrationTest extends WalkerTest {
@Test(enabled = true) @Test(enabled = true)
public void testDefaultCompression() { public void testDefaultCompression() {
RRTest("testDefaultCompression ", L, "62f8cdb85a424e42e9c56f36302d1dba", false); RRTest("testDefaultCompression ", L, "fa1cffc4539e0c20b818a11da5dba5b9", false);
} }
@Test(enabled = true) @Test(enabled = true)
public void testDefaultCompressionWithKnowns() { public void testDefaultCompressionWithKnowns() {
RRTest("testDefaultCompressionWithKnowns ", L, "874c0e0a54c3db67f5e9d7c0d45b7844", true); RRTest("testDefaultCompressionWithKnowns ", L, "d1b5fbc402810d9cdc020bb3503f1325", true);
} }
private final String intervals = "-L 20:10,100,000-10,100,500 -L 20:10,200,000-10,200,500 -L 20:10,300,000-10,300,500 -L 20:10,400,000-10,500,000 -L 20:10,500,050-10,500,060 -L 20:10,600,000-10,600,015 -L 20:10,700,000-10,700,110"; private final String intervals = "-L 20:10,100,000-10,100,500 -L 20:10,200,000-10,200,500 -L 20:10,300,000-10,300,500 -L 20:10,400,000-10,500,000 -L 20:10,500,050-10,500,060 -L 20:10,600,000-10,600,015 -L 20:10,700,000-10,700,110";
@Test(enabled = true) @Test(enabled = true)
public void testMultipleIntervals() { public void testMultipleIntervals() {
RRTest("testMultipleIntervals ", intervals, "2e849f8324b27af36bae8cb9b01722e6", false); RRTest("testMultipleIntervals ", intervals, "7e9dcd157ad742d4ebae7e56bc4af663", false);
} }
@Test(enabled = true) @Test(enabled = true)
public void testMultipleIntervalsWithKnowns() { public void testMultipleIntervalsWithKnowns() {
RRTest("testMultipleIntervalsWithKnowns ", intervals, "71bc2167cc6916288bd34dcf099feebc", true); RRTest("testMultipleIntervalsWithKnowns ", intervals, "dbb1e95e1bcad956701142afac763717", true);
} }
final String highCompressionMD5 = "c83256fa2d6785d5188f50dd45c77e0f";
@Test(enabled = true) @Test(enabled = true)
public void testHighCompression() { public void testHighCompression() {
RRTest("testHighCompression ", " -cs 10 -min_pvalue 0.3 -mindel 0.3 " + L, highCompressionMD5, false); RRTest("testHighCompression ", " -cs 10 -min_pvalue 0.3 -minvar 0.3 -mindel 0.3 " + L, "8f8fd1a53fa0789116f45e4cf2625906", false);
} }
@Test(enabled = true) @Test(enabled = true)
public void testHighCompressionWithKnowns() { public void testHighCompressionWithKnowns() {
RRTest("testHighCompressionWithKnowns ", " -cs 10 -min_pvalue 0.3 -mindel 0.3 " + L, highCompressionMD5, true); RRTest("testHighCompressionWithKnowns ", " -cs 10 -min_pvalue 0.3 -minvar 0.3 -mindel 0.3 " + L, "52fd2a77802a4677b604abb18e15d96a", true);
} }
@Test(enabled = true) @Test(enabled = true)
public void testLowCompression() { public void testLowCompression() {
RRTest("testLowCompression ", " -cs 30 -min_pvalue 0.001 -mindel 0.01 -minmap 5 -minqual 5 " + L, "a903558ef284381d74b0ad837deb19f6", false); RRTest("testLowCompression ", " -cs 30 -min_pvalue 0.001 -minvar 0.01 -mindel 0.01 -minmap 5 -minqual 5 " + L, "79c6543d5ce84ebc2ca74404498edbd1", false);
} }
@Test(enabled = true) @Test(enabled = true)
public void testLowCompressionWithKnowns() { public void testLowCompressionWithKnowns() {
RRTest("testLowCompressionWithKnowns ", " -cs 30 -min_pvalue 0.001 -mindel 0.01 -minmap 5 -minqual 5 " + L, "a4c5aa158c6ebbc703134cbe2d48619c", true); RRTest("testLowCompressionWithKnowns ", " -cs 30 -min_pvalue 0.001 -minvar 0.01 -mindel 0.01 -minmap 5 -minqual 5 " + L, "271aec358b309603291a974b5ba3bd60", true);
} }
@Test(enabled = true) @Test(enabled = true)
@ -208,7 +206,7 @@ public class ReduceReadsIntegrationTest extends WalkerTest {
@Test(enabled = true) @Test(enabled = true)
public void testIndelCompression() { public void testIndelCompression() {
final String md5 = "56154baed62be07008d3684a0a4c0996"; final String md5 = "d20e6012300898a0315c795cab7583d8";
RRTest("testIndelCompression ", " -cs 50 -L 20:10,100,500-10,100,600 ", md5, false); RRTest("testIndelCompression ", " -cs 50 -L 20:10,100,500-10,100,600 ", md5, false);
RRTest("testIndelCompressionWithKnowns ", " -cs 50 -L 20:10,100,500-10,100,600 ", md5, true); RRTest("testIndelCompressionWithKnowns ", " -cs 50 -L 20:10,100,500-10,100,600 ", md5, true);
} }
@ -216,25 +214,27 @@ public class ReduceReadsIntegrationTest extends WalkerTest {
@Test(enabled = true) @Test(enabled = true)
public void testFilteredDeletionCompression() { public void testFilteredDeletionCompression() {
String base = String.format("-T ReduceReads -npt -R %s -I %s ", REF, DELETION_BAM) + " -o %s "; String base = String.format("-T ReduceReads -npt -R %s -I %s ", REF, DELETION_BAM) + " -o %s ";
executeTest("testFilteredDeletionCompression", new WalkerTestSpec(base, Arrays.asList("bam"), Arrays.asList("d7655de41d90aecb716f79e32d53b2d1"))); executeTest("testFilteredDeletionCompression", new WalkerTestSpec(base, Arrays.asList("bam"), Arrays.asList("e5da09662708f562c0c617ba73cf4763")), "4f916da29d91852077f0a2fdbdd2c7f6");
} }
private static final String COREDUCTION_QUALS_TEST_MD5 = "26d84a2bd549a01a63fcebf8847a1b7d";
@Test(enabled = true) @Test(enabled = true)
public void testCoReduction() { public void testCoReduction() {
String base = String.format("-T ReduceReads %s -npt -R %s -I %s -I %s", COREDUCTION_L, REF, COREDUCTION_BAM_A, COREDUCTION_BAM_B) + " -o %s "; String base = String.format("-T ReduceReads %s -npt -R %s -I %s -I %s", COREDUCTION_L, REF, COREDUCTION_BAM_A, COREDUCTION_BAM_B) + " -o %s ";
executeTest("testCoReduction", new WalkerTestSpec(base, Arrays.asList("bam"), Arrays.asList("fa549ba96ca0ce5fbf3553ba173167e8"))); executeTest("testCoReduction", new WalkerTestSpec(base, Arrays.asList("bam"), Arrays.asList("5f4d2c1d9c010dfd6865aeba7d0336fe")), COREDUCTION_QUALS_TEST_MD5);
} }
@Test(enabled = true) @Test(enabled = true)
public void testCoReductionWithKnowns() { public void testCoReductionWithKnowns() {
String base = String.format("-T ReduceReads %s -npt -R %s -I %s -I %s -known %s", COREDUCTION_L, REF, COREDUCTION_BAM_A, COREDUCTION_BAM_B, DBSNP) + " -o %s "; String base = String.format("-T ReduceReads %s -npt -R %s -I %s -I %s -known %s", COREDUCTION_L, REF, COREDUCTION_BAM_A, COREDUCTION_BAM_B, DBSNP) + " -o %s ";
executeTest("testCoReductionWithKnowns", new WalkerTestSpec(base, Arrays.asList("bam"), Arrays.asList("9edcf09b21a4ae8d9fc25222bcb0486b"))); executeTest("testCoReductionWithKnowns", new WalkerTestSpec(base, Arrays.asList("bam"), Arrays.asList("ca48dd972bf57595c691972c0f887cb4")), COREDUCTION_QUALS_TEST_MD5);
} }
@Test(enabled = true) @Test(enabled = true)
public void testInsertionsAtEdgeOfConsensus() { public void testInsertionsAtEdgeOfConsensus() {
String base = String.format("-T ReduceReads -npt -R %s -I %s ", REF, INSERTIONS_AT_EDGE_OF_CONSENSUS_BAM) + " -o %s "; String base = String.format("-T ReduceReads -npt -R %s -I %s ", REF, INSERTIONS_AT_EDGE_OF_CONSENSUS_BAM) + " -o %s ";
executeTest("testInsertionsAtEdgeOfConsensus", new WalkerTestSpec(base, Arrays.asList("bam"), Arrays.asList("27cc8f1a336b2d0a29855ceb8fc988b0"))); executeTest("testInsertionsAtEdgeOfConsensus", new WalkerTestSpec(base, Arrays.asList("bam"), Arrays.asList("760500a5b036b987f84099f45f26a804")));
} }
/** /**
@ -248,7 +248,7 @@ public class ReduceReadsIntegrationTest extends WalkerTest {
@Test(enabled = true) @Test(enabled = true)
public void testAddingReadAfterTailingTheStash() { public void testAddingReadAfterTailingTheStash() {
String base = String.format("-T ReduceReads %s -npt -R %s -I %s", STASH_L, REF, STASH_BAM) + " -o %s "; String base = String.format("-T ReduceReads %s -npt -R %s -I %s", STASH_L, REF, STASH_BAM) + " -o %s ";
executeTest("testAddingReadAfterTailingTheStash", new WalkerTestSpec(base, Arrays.asList("bam"), Arrays.asList("34baf99904b676d5f132d3791030ed0a")), "3eab32c215ba68e75efd5ab7e9f7a2e7"); executeTest("testAddingReadAfterTailingTheStash", new WalkerTestSpec(base, Arrays.asList("bam"), Arrays.asList("67f8a3a647f8ec5212104bdaafd8c862")), "3eab32c215ba68e75efd5ab7e9f7a2e7");
} }
/** /**
@ -259,7 +259,7 @@ public class ReduceReadsIntegrationTest extends WalkerTest {
public void testDivideByZero() { public void testDivideByZero() {
String base = String.format("-T ReduceReads %s -npt -R %s -I %s", DIVIDEBYZERO_L, REF, DIVIDEBYZERO_BAM) + " -o %s "; String base = String.format("-T ReduceReads %s -npt -R %s -I %s", DIVIDEBYZERO_L, REF, DIVIDEBYZERO_BAM) + " -o %s ";
// we expect to lose coverage due to the downsampling so don't run the systematic tests // we expect to lose coverage due to the downsampling so don't run the systematic tests
executeTestWithoutAdditionalRRTests("testDivideByZero", new WalkerTestSpec(base, Arrays.asList("bam"), Arrays.asList("985c4f15a1d45267abb2f6790267930d"))); executeTestWithoutAdditionalRRTests("testDivideByZero", new WalkerTestSpec(base, Arrays.asList("bam"), Arrays.asList("1663f35802f82333c5e15653e437ce2d")));
} }
/** /**
@ -269,7 +269,7 @@ public class ReduceReadsIntegrationTest extends WalkerTest {
@Test(enabled = true) @Test(enabled = true)
public void testReadOffContig() { public void testReadOffContig() {
String base = String.format("-T ReduceReads -npt -R %s -I %s ", REF, OFFCONTIG_BAM) + " -o %s "; String base = String.format("-T ReduceReads -npt -R %s -I %s ", REF, OFFCONTIG_BAM) + " -o %s ";
executeTest("testReadOffContig", new WalkerTestSpec(base, Arrays.asList("bam"), Arrays.asList("388ef48791965d637e4bdb45d5d7cf01"))); executeTest("testReadOffContig", new WalkerTestSpec(base, Arrays.asList("bam"), Arrays.asList("0ce693b4ff925998867664e4099f3248")));
} }
/** /**
@ -279,7 +279,7 @@ public class ReduceReadsIntegrationTest extends WalkerTest {
public void testPairedReadsInVariantRegion() { public void testPairedReadsInVariantRegion() {
String base = String.format("-T ReduceReads -npt -R %s -I %s ", hg19Reference, BOTH_ENDS_OF_PAIR_IN_VARIANT_REGION_BAM) + String base = String.format("-T ReduceReads -npt -R %s -I %s ", hg19Reference, BOTH_ENDS_OF_PAIR_IN_VARIANT_REGION_BAM) +
" -o %s --downsample_coverage 250 -dcov 50 "; " -o %s --downsample_coverage 250 -dcov 50 ";
executeTest("testPairedReadsInVariantRegion", new WalkerTestSpec(base, Arrays.asList("bam"), Arrays.asList("cfa2588f5edf74c5ddf3d190f5ac6f2d"))); executeTest("testPairedReadsInVariantRegion", new WalkerTestSpec(base, Arrays.asList("bam"), Arrays.asList("7e7b358443827ca239db3b98f299aec6")), "2af063d1bd3c322b03405dbb3ecf59a9");
} }
} }

View File

@ -251,14 +251,15 @@ public class SlidingWindowUnitTest extends BaseTest {
} }
private class ConsensusCreationTest { private class ConsensusCreationTest {
public final int expectedNumberOfReads, expectedNumberOfReadsWithHetCompression; public final int expectedNumberOfReads, expectedNumberOfReadsWithHetCompression, expectedNumberOfReadsAtDeepCoverage;
public final List<GATKSAMRecord> myReads = new ArrayList<GATKSAMRecord>(20); public final List<GATKSAMRecord> myReads = new ArrayList<GATKSAMRecord>(20);
public final String description; public final String description;
private ConsensusCreationTest(final List<GenomeLoc> locs, final boolean readsShouldBeLowQuality, final boolean variantBaseShouldBeLowQuality, final int expectedNumberOfReads, final int expectedNumberOfReadsWithHetCompression) { private ConsensusCreationTest(final List<GenomeLoc> locs, final boolean readsShouldBeLowQuality, final boolean variantBaseShouldBeLowQuality, final int expectedNumberOfReads, final int expectedNumberOfReadsWithHetCompression, final int expectedNumberOfReadsAtDeepCoverage) {
this.expectedNumberOfReads = expectedNumberOfReads; this.expectedNumberOfReads = expectedNumberOfReads;
this.expectedNumberOfReadsWithHetCompression = expectedNumberOfReadsWithHetCompression; this.expectedNumberOfReadsWithHetCompression = expectedNumberOfReadsWithHetCompression;
this.description = String.format("%d %d", expectedNumberOfReads, expectedNumberOfReadsWithHetCompression); this.expectedNumberOfReadsAtDeepCoverage = expectedNumberOfReadsAtDeepCoverage;
this.description = String.format("%d %d %d", expectedNumberOfReads, expectedNumberOfReadsWithHetCompression, expectedNumberOfReadsAtDeepCoverage);
// first, add the basic reads to the collection // first, add the basic reads to the collection
myReads.addAll(basicReads); myReads.addAll(basicReads);
@ -268,10 +269,11 @@ public class SlidingWindowUnitTest extends BaseTest {
myReads.add(createVariantRead(loc, readsShouldBeLowQuality, variantBaseShouldBeLowQuality, CigarOperator.M)); myReads.add(createVariantRead(loc, readsShouldBeLowQuality, variantBaseShouldBeLowQuality, CigarOperator.M));
} }
private ConsensusCreationTest(final List<GenomeLoc> locs, final CigarOperator operator, final int expectedNumberOfReads, final int expectedNumberOfReadsWithHetCompression) { private ConsensusCreationTest(final List<GenomeLoc> locs, final CigarOperator operator, final int expectedNumberOfReads, final int expectedNumberOfReadsWithHetCompression, final int expectedNumberOfReadsAtDeepCoverage) {
this.expectedNumberOfReads = expectedNumberOfReads; this.expectedNumberOfReads = expectedNumberOfReads;
this.expectedNumberOfReadsWithHetCompression = expectedNumberOfReadsWithHetCompression; this.expectedNumberOfReadsWithHetCompression = expectedNumberOfReadsWithHetCompression;
this.description = String.format("%s %d %d", operator.toString(), expectedNumberOfReads, expectedNumberOfReadsWithHetCompression); this.expectedNumberOfReadsAtDeepCoverage = expectedNumberOfReadsAtDeepCoverage;
this.description = String.format("%s %d %d %d", operator.toString(), expectedNumberOfReads, expectedNumberOfReadsWithHetCompression, expectedNumberOfReadsAtDeepCoverage);
// first, add the basic reads to the collection // first, add the basic reads to the collection
myReads.addAll(basicReads); myReads.addAll(basicReads);
@ -319,46 +321,50 @@ public class SlidingWindowUnitTest extends BaseTest {
private static final GenomeLoc loc295 = new UnvalidatingGenomeLoc("1", 0, 1000295, 1000295); private static final GenomeLoc loc295 = new UnvalidatingGenomeLoc("1", 0, 1000295, 1000295);
private static final GenomeLoc loc309 = new UnvalidatingGenomeLoc("1", 0, 1000309, 1000309); private static final GenomeLoc loc309 = new UnvalidatingGenomeLoc("1", 0, 1000309, 1000309);
private static final GenomeLoc loc310 = new UnvalidatingGenomeLoc("1", 0, 1000310, 1000310); private static final GenomeLoc loc310 = new UnvalidatingGenomeLoc("1", 0, 1000310, 1000310);
private static final GenomeLoc loc320 = new UnvalidatingGenomeLoc("1", 0, 1000320, 1000320);
private static final GenomeLoc loc1100 = new UnvalidatingGenomeLoc("1", 0, 1001100, 1001100); private static final GenomeLoc loc1100 = new UnvalidatingGenomeLoc("1", 0, 1001100, 1001100);
private static final int DEEP_COVERAGE_ITERATIONS = 100;
@DataProvider(name = "ConsensusCreation") @DataProvider(name = "ConsensusCreation")
public Object[][] createConsensusCreationTestData() { public Object[][] createConsensusCreationTestData() {
List<Object[]> tests = new ArrayList<Object[]>(); List<Object[]> tests = new ArrayList<Object[]>();
// test high quality reads and bases // test high quality reads and bases
tests.add(new Object[]{new ConsensusCreationTest(Arrays.<GenomeLoc>asList(), false, false, 1, 1)}); tests.add(new Object[]{new ConsensusCreationTest(Arrays.<GenomeLoc>asList(), false, false, 1, 1, 1)});
tests.add(new Object[]{new ConsensusCreationTest(Arrays.<GenomeLoc>asList(loc290), false, false, 9, 6)}); tests.add(new Object[]{new ConsensusCreationTest(Arrays.<GenomeLoc>asList(loc290), false, false, 9, 6, 5 + DEEP_COVERAGE_ITERATIONS)});
tests.add(new Object[]{new ConsensusCreationTest(Arrays.<GenomeLoc>asList(loc290, loc295), false, false, 10, 10)}); tests.add(new Object[]{new ConsensusCreationTest(Arrays.<GenomeLoc>asList(loc290, loc295), false, false, 10, 10, 2 + (8 * DEEP_COVERAGE_ITERATIONS))});
tests.add(new Object[]{new ConsensusCreationTest(Arrays.<GenomeLoc>asList(loc290, loc309), false, false, 10, 10)}); tests.add(new Object[]{new ConsensusCreationTest(Arrays.<GenomeLoc>asList(loc290, loc309), false, false, 10, 10, 2 + (8 * DEEP_COVERAGE_ITERATIONS))});
tests.add(new Object[]{new ConsensusCreationTest(Arrays.<GenomeLoc>asList(loc290, loc310), false, false, 11, 11)}); tests.add(new Object[]{new ConsensusCreationTest(Arrays.<GenomeLoc>asList(loc290, loc310), false, false, 11, 11, 2 + (9 * DEEP_COVERAGE_ITERATIONS))});
tests.add(new Object[]{new ConsensusCreationTest(Arrays.<GenomeLoc>asList(loc290, loc320), false, false, 11, 10, 4 + (6 * DEEP_COVERAGE_ITERATIONS))});
// test low quality reads // test low quality reads
tests.add(new Object[]{new ConsensusCreationTest(Arrays.<GenomeLoc>asList(), true, false, 1, 1)}); tests.add(new Object[]{new ConsensusCreationTest(Arrays.<GenomeLoc>asList(), true, false, 1, 1, 1)});
tests.add(new Object[]{new ConsensusCreationTest(Arrays.<GenomeLoc>asList(loc290), true, false, 1, 1)}); tests.add(new Object[]{new ConsensusCreationTest(Arrays.<GenomeLoc>asList(loc290), true, false, 2, 2, 2)});
tests.add(new Object[]{new ConsensusCreationTest(Arrays.<GenomeLoc>asList(loc290, loc295), true, false, 1, 1)}); tests.add(new Object[]{new ConsensusCreationTest(Arrays.<GenomeLoc>asList(loc290, loc295), true, false, 2, 2, 2)});
tests.add(new Object[]{new ConsensusCreationTest(Arrays.<GenomeLoc>asList(loc290, loc309), true, false, 1, 1)}); tests.add(new Object[]{new ConsensusCreationTest(Arrays.<GenomeLoc>asList(loc290, loc309), true, false, 2, 2, 2)});
tests.add(new Object[]{new ConsensusCreationTest(Arrays.<GenomeLoc>asList(loc290, loc310), true, false, 1, 1)}); tests.add(new Object[]{new ConsensusCreationTest(Arrays.<GenomeLoc>asList(loc290, loc310), true, false, 2, 2, 2)});
// test low quality bases // test low quality bases
tests.add(new Object[]{new ConsensusCreationTest(Arrays.<GenomeLoc>asList(), false, true, 1, 1)}); tests.add(new Object[]{new ConsensusCreationTest(Arrays.<GenomeLoc>asList(), false, true, 1, 1, 1)});
tests.add(new Object[]{new ConsensusCreationTest(Arrays.<GenomeLoc>asList(loc290), false, true, 1, 1)}); tests.add(new Object[]{new ConsensusCreationTest(Arrays.<GenomeLoc>asList(loc290), false, true, 1, 1, 1)});
tests.add(new Object[]{new ConsensusCreationTest(Arrays.<GenomeLoc>asList(loc290, loc295), false, true, 1, 1)}); tests.add(new Object[]{new ConsensusCreationTest(Arrays.<GenomeLoc>asList(loc290, loc295), false, true, 1, 1, 1)});
tests.add(new Object[]{new ConsensusCreationTest(Arrays.<GenomeLoc>asList(loc290, loc309), false, true, 1, 1)}); tests.add(new Object[]{new ConsensusCreationTest(Arrays.<GenomeLoc>asList(loc290, loc309), false, true, 1, 1, 1)});
tests.add(new Object[]{new ConsensusCreationTest(Arrays.<GenomeLoc>asList(loc290, loc310), false, true, 1, 1)}); tests.add(new Object[]{new ConsensusCreationTest(Arrays.<GenomeLoc>asList(loc290, loc310), false, true, 1, 1, 1)});
// test mixture // test mixture
tests.add(new Object[]{new ConsensusCreationTest(Arrays.<GenomeLoc>asList(loc1100), true, false, 2, 2)}); tests.add(new Object[]{new ConsensusCreationTest(Arrays.<GenomeLoc>asList(loc1100), true, false, 2, 2, 2)});
tests.add(new Object[]{new ConsensusCreationTest(Arrays.<GenomeLoc>asList(loc1100), false, true, 1, 1)}); tests.add(new Object[]{new ConsensusCreationTest(Arrays.<GenomeLoc>asList(loc1100), false, true, 1, 1, 1)});
// test I/D operators // test I/D operators
tests.add(new Object[]{new ConsensusCreationTest(Arrays.<GenomeLoc>asList(loc290), CigarOperator.D, 9, 9)}); tests.add(new Object[]{new ConsensusCreationTest(Arrays.<GenomeLoc>asList(loc290), CigarOperator.D, 9, 9, 2 + (7 * DEEP_COVERAGE_ITERATIONS))});
tests.add(new Object[]{new ConsensusCreationTest(Arrays.<GenomeLoc>asList(loc290, loc295), CigarOperator.D, 10, 10)}); tests.add(new Object[]{new ConsensusCreationTest(Arrays.<GenomeLoc>asList(loc290, loc295), CigarOperator.D, 10, 10, 2 + (8 * DEEP_COVERAGE_ITERATIONS))});
tests.add(new Object[]{new ConsensusCreationTest(Arrays.<GenomeLoc>asList(loc290, loc309), CigarOperator.D, 10, 10)}); tests.add(new Object[]{new ConsensusCreationTest(Arrays.<GenomeLoc>asList(loc290, loc309), CigarOperator.D, 10, 10, 2 + (8 * DEEP_COVERAGE_ITERATIONS))});
tests.add(new Object[]{new ConsensusCreationTest(Arrays.<GenomeLoc>asList(loc290, loc310), CigarOperator.D, 11, 11)}); tests.add(new Object[]{new ConsensusCreationTest(Arrays.<GenomeLoc>asList(loc290, loc310), CigarOperator.D, 11, 11, 2 + (9 * DEEP_COVERAGE_ITERATIONS))});
tests.add(new Object[]{new ConsensusCreationTest(Arrays.<GenomeLoc>asList(loc290), CigarOperator.I, 9, 9)}); tests.add(new Object[]{new ConsensusCreationTest(Arrays.<GenomeLoc>asList(loc290), CigarOperator.I, 9, 9, 2 + (7 * DEEP_COVERAGE_ITERATIONS))});
tests.add(new Object[]{new ConsensusCreationTest(Arrays.<GenomeLoc>asList(loc290, loc295), CigarOperator.I, 10, 10)}); tests.add(new Object[]{new ConsensusCreationTest(Arrays.<GenomeLoc>asList(loc290, loc295), CigarOperator.I, 10, 10, 2 + (8 * DEEP_COVERAGE_ITERATIONS))});
tests.add(new Object[]{new ConsensusCreationTest(Arrays.<GenomeLoc>asList(loc290, loc309), CigarOperator.I, 10, 10)}); tests.add(new Object[]{new ConsensusCreationTest(Arrays.<GenomeLoc>asList(loc290, loc309), CigarOperator.I, 10, 10, 2 + (8 * DEEP_COVERAGE_ITERATIONS))});
tests.add(new Object[]{new ConsensusCreationTest(Arrays.<GenomeLoc>asList(loc290, loc310), CigarOperator.I, 11, 11)}); tests.add(new Object[]{new ConsensusCreationTest(Arrays.<GenomeLoc>asList(loc290, loc310), CigarOperator.I, 11, 11, 2 + (9 * DEEP_COVERAGE_ITERATIONS))});
return tests.toArray(new Object[][]{}); return tests.toArray(new Object[][]{});
} }
@ -368,14 +374,14 @@ public class SlidingWindowUnitTest extends BaseTest {
final ObjectAVLTreeSet<GenomeLoc> knownSNPs = new ObjectAVLTreeSet<GenomeLoc>(); final ObjectAVLTreeSet<GenomeLoc> knownSNPs = new ObjectAVLTreeSet<GenomeLoc>();
// test WITHOUT het compression // test WITHOUT het compression
SlidingWindow slidingWindow = new SlidingWindow("1", 0, 10, header, new GATKSAMReadGroupRecord("test"), 0, 0.05, 0.05, 20, 20, 100, ReduceReads.DownsampleStrategy.Normal, false); SlidingWindow slidingWindow = new SlidingWindow("1", 0, 10, header, new GATKSAMReadGroupRecord("test"), 0, 0.05, 0.05, 0.05, 20, 20, 100, ReduceReads.DownsampleStrategy.Normal, false);
for ( final GATKSAMRecord read : test.myReads ) for ( final GATKSAMRecord read : test.myReads )
slidingWindow.addRead(read); slidingWindow.addRead(read);
Pair<ObjectSet<GATKSAMRecord>, CompressionStash> result = slidingWindow.close(knownSNPs); // currently empty Pair<ObjectSet<GATKSAMRecord>, CompressionStash> result = slidingWindow.close(knownSNPs); // currently empty
Assert.assertEquals(result.getFirst().size(), test.expectedNumberOfReads); Assert.assertEquals(result.getFirst().size(), test.expectedNumberOfReads);
// test WITH het compression at KNOWN sites // test WITH het compression at KNOWN sites
slidingWindow = new SlidingWindow("1", 0, 10, header, new GATKSAMReadGroupRecord("test"), 0, 0.05, 0.05, 20, 20, 100, ReduceReads.DownsampleStrategy.Normal, false); slidingWindow = new SlidingWindow("1", 0, 10, header, new GATKSAMReadGroupRecord("test"), 0, 0.05, 0.05, 0.05, 20, 20, 100, ReduceReads.DownsampleStrategy.Normal, false);
for ( final GATKSAMRecord read : test.myReads ) for ( final GATKSAMRecord read : test.myReads )
slidingWindow.addRead(read); slidingWindow.addRead(read);
for ( int i = 0; i < 1200; i++ ) for ( int i = 0; i < 1200; i++ )
@ -384,11 +390,28 @@ public class SlidingWindowUnitTest extends BaseTest {
Assert.assertEquals(result.getFirst().size(), test.expectedNumberOfReadsWithHetCompression); Assert.assertEquals(result.getFirst().size(), test.expectedNumberOfReadsWithHetCompression);
// test WITH het compression at ALL sites // test WITH het compression at ALL sites
slidingWindow = new SlidingWindow("1", 0, 10, header, new GATKSAMReadGroupRecord("test"), 0, 0.05, 0.05, 20, 20, 100, ReduceReads.DownsampleStrategy.Normal, false); slidingWindow = new SlidingWindow("1", 0, 10, header, new GATKSAMReadGroupRecord("test"), 0, 0.05, 0.05, 0.05, 20, 20, 100, ReduceReads.DownsampleStrategy.Normal, false);
for ( final GATKSAMRecord read : test.myReads ) for ( final GATKSAMRecord read : test.myReads )
slidingWindow.addRead(read); slidingWindow.addRead(read);
result = slidingWindow.close(null); result = slidingWindow.close(null);
Assert.assertEquals(result.getFirst().size(), test.expectedNumberOfReadsWithHetCompression); Assert.assertEquals(result.getFirst().size(), test.expectedNumberOfReadsWithHetCompression);
// test with deep coverage
slidingWindow = new SlidingWindow("1", 0, 10, header, new GATKSAMReadGroupRecord("test"), 0, 0.05, 0.05, 0.05, 20, 20, 0, ReduceReads.DownsampleStrategy.Normal, false);
for ( int i = 0; i < DEEP_COVERAGE_ITERATIONS; i++ ) {
for ( final GATKSAMRecord read : test.myReads ) {
final GATKSAMRecord copy = ArtificialSAMUtils.createArtificialRead(header, read.getReadName() + "_" + (i+1), 0, read.getAlignmentStart(), readLength);
copy.setReadBases(read.getReadBases());
copy.setBaseQualities(read.getBaseQualities());
copy.setMappingQuality(read.getMappingQuality());
copy.setReadNegativeStrandFlag(read.getReadNegativeStrandFlag());
if ( read.getCigar() != null )
copy.setCigar(read.getCigar());
slidingWindow.addRead(copy);
}
}
result = slidingWindow.close(null);
Assert.assertEquals(result.getFirst().size(), test.expectedNumberOfReadsAtDeepCoverage);
} }
@Test @Test
@ -412,14 +435,14 @@ public class SlidingWindowUnitTest extends BaseTest {
final ObjectAVLTreeSet<GenomeLoc> knownSNPs = new ObjectAVLTreeSet<GenomeLoc>(); final ObjectAVLTreeSet<GenomeLoc> knownSNPs = new ObjectAVLTreeSet<GenomeLoc>();
// test WITHOUT het compression // test WITHOUT het compression
SlidingWindow slidingWindow = new SlidingWindow("1", 0, 10, header, new GATKSAMReadGroupRecord("test"), 0, 0.1, 0.05, 20, 20, 100, ReduceReads.DownsampleStrategy.Normal, false); SlidingWindow slidingWindow = new SlidingWindow("1", 0, 10, header, new GATKSAMReadGroupRecord("test"), 0, 0.1, 0.05, 0.05, 20, 20, 100, ReduceReads.DownsampleStrategy.Normal, false);
for ( final GATKSAMRecord read : myReads ) for ( final GATKSAMRecord read : myReads )
slidingWindow.addRead(read); slidingWindow.addRead(read);
Pair<ObjectSet<GATKSAMRecord>, CompressionStash> result = slidingWindow.close(knownSNPs); // currently empty Pair<ObjectSet<GATKSAMRecord>, CompressionStash> result = slidingWindow.close(knownSNPs); // currently empty
Assert.assertEquals(result.getFirst().size(), totalNumReads); // no compression at all Assert.assertEquals(result.getFirst().size(), totalNumReads); // no compression at all
// test WITH het compression at KNOWN sites // test WITH het compression at KNOWN sites
slidingWindow = new SlidingWindow("1", 0, 10, header, new GATKSAMReadGroupRecord("test"), 0, 0.1, 0.05, 20, 20, 100, ReduceReads.DownsampleStrategy.Normal, false); slidingWindow = new SlidingWindow("1", 0, 10, header, new GATKSAMReadGroupRecord("test"), 0, 0.1, 0.05, 0.05, 20, 20, 100, ReduceReads.DownsampleStrategy.Normal, false);
for ( final GATKSAMRecord read : myReads ) for ( final GATKSAMRecord read : myReads )
slidingWindow.addRead(read); slidingWindow.addRead(read);
for ( int i = 0; i < readLength; i++ ) for ( int i = 0; i < readLength; i++ )
@ -428,13 +451,59 @@ public class SlidingWindowUnitTest extends BaseTest {
Assert.assertEquals(result.getFirst().size(), totalNumReads); // no compression at all Assert.assertEquals(result.getFirst().size(), totalNumReads); // no compression at all
// test WITH het compression at ALL sites // test WITH het compression at ALL sites
slidingWindow = new SlidingWindow("1", 0, 10, header, new GATKSAMReadGroupRecord("test"), 0, 0.1, 0.05, 20, 20, 100, ReduceReads.DownsampleStrategy.Normal, false); slidingWindow = new SlidingWindow("1", 0, 10, header, new GATKSAMReadGroupRecord("test"), 0, 0.1, 0.05, 0.05, 20, 20, 100, ReduceReads.DownsampleStrategy.Normal, false);
for ( final GATKSAMRecord read : myReads ) for ( final GATKSAMRecord read : myReads )
slidingWindow.addRead(read); slidingWindow.addRead(read);
result = slidingWindow.close(knownSNPs); result = slidingWindow.close(knownSNPs);
Assert.assertEquals(result.getFirst().size(), totalNumReads); // no compression at all Assert.assertEquals(result.getFirst().size(), totalNumReads); // no compression at all
} }
@Test
public void testAddingReadPairWithSameCoordinates() {
final SlidingWindow slidingWindow = new SlidingWindow("1", 0, 10);
final GATKSAMRecord read1 = ArtificialSAMUtils.createArtificialRead(header, "basicRead", 0, globalStartPosition, 1);
read1.setReadBases(new byte[]{(byte)'A'});
read1.setBaseQualities(new byte[]{(byte)'A'});
read1.setMappingQuality(30);
read1.setReadNegativeStrandFlag(false);
slidingWindow.addRead(read1);
final GATKSAMRecord read2 = ArtificialSAMUtils.createArtificialRead(header, "basicRead", 0, globalStartPosition, 1);
read2.setReadBases(new byte[]{(byte)'A'});
read2.setBaseQualities(new byte[]{(byte)'A'});
read2.setMappingQuality(30);
read2.setReadNegativeStrandFlag(true);
slidingWindow.addRead(read2);
Assert.assertEquals(slidingWindow.readsInWindow.size(), 2);
}
@Test
public void testOnlySpanningReadHasLowQual() {
final SlidingWindow slidingWindow = new SlidingWindow("1", 0, 10, header, new GATKSAMReadGroupRecord("test"), 0, 0.1, 0.05, 0.05, 20, 20, 100, ReduceReads.DownsampleStrategy.Normal, false);
final GATKSAMRecord read1 = ArtificialSAMUtils.createArtificialRead(header, "basicRead1", 0, globalStartPosition, 100);
final GATKSAMRecord read2 = ArtificialSAMUtils.createArtificialRead(header, "basicRead2", 0, globalStartPosition + 50, 100);
final byte[] bases = Utils.dupBytes((byte) 'A', readLength);
read1.setReadBases(bases);
read2.setReadBases(bases);
final byte[] baseQuals = Utils.dupBytes((byte) 30, readLength);
baseQuals[80] = (byte)10;
read1.setBaseQualities(baseQuals);
read2.setBaseQualities(baseQuals);
read1.setMappingQuality(30);
read2.setMappingQuality(30);
slidingWindow.addRead(read1);
slidingWindow.addRead(read2);
Assert.assertEquals(slidingWindow.close(null).getFirst().size(), 1);
}
/////////////////////////////////////////////////////////// ///////////////////////////////////////////////////////////
//// This section tests the downsampling functionality //// //// This section tests the downsampling functionality ////
@ -452,7 +521,7 @@ public class SlidingWindowUnitTest extends BaseTest {
@Test(dataProvider = "Downsampling", enabled = true) @Test(dataProvider = "Downsampling", enabled = true)
public void testDownsamplingTest(final int dcov) { public void testDownsamplingTest(final int dcov) {
final SlidingWindow slidingWindow = new SlidingWindow("1", 0, 10, header, new GATKSAMReadGroupRecord("test"), 0, 0.05, 0.05, 20, 20, dcov, ReduceReads.DownsampleStrategy.Normal, false); final SlidingWindow slidingWindow = new SlidingWindow("1", 0, 10, header, new GATKSAMReadGroupRecord("test"), 0, 0.05, 0.05, 0.05, 20, 20, dcov, ReduceReads.DownsampleStrategy.Normal, false);
final ObjectList<GATKSAMRecord> result = slidingWindow.downsampleVariantRegion(basicReads); final ObjectList<GATKSAMRecord> result = slidingWindow.downsampleVariantRegion(basicReads);
Assert.assertEquals(result.size(), Math.min(dcov, basicReads.size())); Assert.assertEquals(result.size(), Math.min(dcov, basicReads.size()));
@ -500,7 +569,7 @@ public class SlidingWindowUnitTest extends BaseTest {
@Test(dataProvider = "ConsensusQuals", enabled = true) @Test(dataProvider = "ConsensusQuals", enabled = true)
public void testConsensusQualsTest(QualsTest test) { public void testConsensusQualsTest(QualsTest test) {
final SlidingWindow slidingWindow = new SlidingWindow("1", 0, 10, header, new GATKSAMReadGroupRecord("test"), 0, 0.05, 0.05, minUsableConsensusQual, 20, 100, ReduceReads.DownsampleStrategy.Normal, false); final SlidingWindow slidingWindow = new SlidingWindow("1", 0, 10, header, new GATKSAMReadGroupRecord("test"), 0, 0.05, 0.05, 0.05, minUsableConsensusQual, 20, 100, ReduceReads.DownsampleStrategy.Normal, false);
for ( final GATKSAMRecord read : test.myReads ) for ( final GATKSAMRecord read : test.myReads )
slidingWindow.addRead(read); slidingWindow.addRead(read);
final Pair<ObjectSet<GATKSAMRecord>, CompressionStash> result = slidingWindow.close(new ObjectAVLTreeSet<GenomeLoc>()); final Pair<ObjectSet<GATKSAMRecord>, CompressionStash> result = slidingWindow.close(new ObjectAVLTreeSet<GenomeLoc>());
@ -569,7 +638,7 @@ public class SlidingWindowUnitTest extends BaseTest {
read.setBaseQualities(Utils.dupBytes((byte) 30, readLength)); read.setBaseQualities(Utils.dupBytes((byte) 30, readLength));
read.setMappingQuality(30); read.setMappingQuality(30);
final SlidingWindow slidingWindow = new SlidingWindow("1", 0, 10, header, new GATKSAMReadGroupRecord("test"), 0, 0.05, 0.05, 20, 20, 10, ReduceReads.DownsampleStrategy.Normal, false); final SlidingWindow slidingWindow = new SlidingWindow("1", 0, 10, header, new GATKSAMReadGroupRecord("test"), 0, 0.05, 0.05, 0.05, 20, 20, 10, ReduceReads.DownsampleStrategy.Normal, false);
int newIndex = slidingWindow.createNewHeaderElements(windowHeader, read, start); int newIndex = slidingWindow.createNewHeaderElements(windowHeader, read, start);
Assert.assertEquals(newIndex, start > 0 ? start : 0); Assert.assertEquals(newIndex, start > 0 ? start : 0);
@ -613,7 +682,7 @@ public class SlidingWindowUnitTest extends BaseTest {
read.setMappingQuality(30); read.setMappingQuality(30);
// add the read // add the read
final SlidingWindow slidingWindow = new SlidingWindow("1", 0, 10, header, new GATKSAMReadGroupRecord("test"), 0, 0.05, 0.05, 20, 20, 10, ReduceReads.DownsampleStrategy.Normal, false); final SlidingWindow slidingWindow = new SlidingWindow("1", 0, 10, header, new GATKSAMReadGroupRecord("test"), 0, 0.05, 0.05, 0.05, 20, 20, 10, ReduceReads.DownsampleStrategy.Normal, false);
slidingWindow.actuallyUpdateHeaderForRead(windowHeader, read, false, start); slidingWindow.actuallyUpdateHeaderForRead(windowHeader, read, false, start);
for ( int i = 0; i < start; i++ ) for ( int i = 0; i < start; i++ )
Assert.assertEquals(windowHeader.get(i).getConsensusBaseCounts().countOfBase(BaseUtils.Base.A.base), 0); Assert.assertEquals(windowHeader.get(i).getConsensusBaseCounts().countOfBase(BaseUtils.Base.A.base), 0);
@ -628,7 +697,6 @@ public class SlidingWindowUnitTest extends BaseTest {
Assert.assertEquals(windowHeader.get(i).getConsensusBaseCounts().countOfBase(BaseUtils.Base.A.base), 0); Assert.assertEquals(windowHeader.get(i).getConsensusBaseCounts().countOfBase(BaseUtils.Base.A.base), 0);
} }
////////////////////////////////////////////////////////////////////////////////// //////////////////////////////////////////////////////////////////////////////////
//// This section tests functionality related to polyploid consensus creation //// //// This section tests functionality related to polyploid consensus creation ////
////////////////////////////////////////////////////////////////////////////////// //////////////////////////////////////////////////////////////////////////////////
@ -691,7 +759,7 @@ public class SlidingWindowUnitTest extends BaseTest {
read.setMappingQuality(30); read.setMappingQuality(30);
// add the read // add the read
final SlidingWindow slidingWindow = new SlidingWindow("1", 0, 10, header, new GATKSAMReadGroupRecord("test"), 0, 0.05, 0.05, 20, 20, 10, ReduceReads.DownsampleStrategy.Normal, false); final SlidingWindow slidingWindow = new SlidingWindow("1", 0, 10, header, new GATKSAMReadGroupRecord("test"), 0, 0.05, 0.05, 0.05, 20, 20, 10, ReduceReads.DownsampleStrategy.Normal, false);
slidingWindow.actuallyUpdateHeaderForRead(windowHeader, read, false, 0); slidingWindow.actuallyUpdateHeaderForRead(windowHeader, read, false, 0);
// set up and add a soft-clipped read if requested // set up and add a soft-clipped read if requested

View File

@ -383,7 +383,7 @@ public class MathUtils {
for (int hits = k_start; hits <= k_end; hits++) { for (int hits = k_start; hits <= k_end; hits++) {
prevProb = cumProb; prevProb = cumProb;
double probability = binomialProbability(n, hits); final double probability = binomialProbability(n, hits);
cumProb += probability; cumProb += probability;
if (probability > 0 && cumProb - prevProb < probability / 2) { // loss of precision if (probability > 0 && cumProb - prevProb < probability / 2) { // loss of precision
probCache = probCache.add(new BigDecimal(prevProb)); probCache = probCache.add(new BigDecimal(prevProb));