Merge pull request #181 from broadinstitute/eb_yet_more_rr_improvements_GSA-930
Various bug fixes for recent Reduce Reads additions plus solution implemented for low MQ reads.
This commit is contained in:
commit
27bb699e8b
|
|
@ -80,6 +80,21 @@ public class BaseAndQualsCounts extends BaseCounts {
|
|||
* @param isLowQualBase true if the base is low quality
|
||||
*/
|
||||
public void incr(final byte base, final byte baseQual, final byte insQual, final byte delQual, final int baseMappingQual, final boolean isLowQualBase) {
|
||||
incr(base, baseQual, insQual, delQual, baseMappingQual, isLowQualBase, false);
|
||||
}
|
||||
|
||||
/*
|
||||
* Increments the count
|
||||
*
|
||||
* @param base the base
|
||||
* @param baseQual the base quality
|
||||
* @param insQual the insertion quality
|
||||
* @param delQual the deletion quality
|
||||
* @param baseMappingQual the mapping quality
|
||||
* @param isLowQualBase true if the base is low quality
|
||||
* @param isSoftClip true if is soft-clipped
|
||||
*/
|
||||
public void incr(final byte base, final byte baseQual, final byte insQual, final byte delQual, final int baseMappingQual, final boolean isLowQualBase, final boolean isSoftClip) {
|
||||
// if we already have high quality bases, ignore low quality ones
|
||||
if ( isLowQualBase && !isLowQuality() )
|
||||
return;
|
||||
|
|
@ -92,7 +107,7 @@ public class BaseAndQualsCounts extends BaseCounts {
|
|||
}
|
||||
|
||||
final BaseIndex i = BaseIndex.byteToBase(base);
|
||||
super.incr(i, baseQual, baseMappingQual);
|
||||
super.incr(i, baseQual, baseMappingQual, isSoftClip);
|
||||
switch (i) {
|
||||
case A: sumInsertionQual_A += insQual; sumDeletionQual_A += delQual; break;
|
||||
case C: sumInsertionQual_C += insQual; sumDeletionQual_C += delQual; break;
|
||||
|
|
@ -114,13 +129,28 @@ public class BaseAndQualsCounts extends BaseCounts {
|
|||
* @param baseMappingQual the mapping quality
|
||||
* @param isLowQualBase true if the base is low quality
|
||||
*/
|
||||
public void decr(final byte base, final byte baseQual, final byte insQual, final byte delQual, final int baseMappingQual, final boolean isLowQualBase) {
|
||||
public void decr(final byte base, final byte baseQual, final byte insQual, final byte delQual, final int baseMappingQual, final boolean isLowQualBase) {
|
||||
decr(base, baseQual, insQual, delQual, baseMappingQual, isLowQualBase, false);
|
||||
}
|
||||
|
||||
/*
|
||||
* Decrements the count
|
||||
*
|
||||
* @param base the base
|
||||
* @param baseQual the base quality
|
||||
* @param insQual the insertion quality
|
||||
* @param delQual the deletion quality
|
||||
* @param baseMappingQual the mapping quality
|
||||
* @param isLowQualBase true if the base is low quality
|
||||
* @param isSoftClip true if is soft-clipped
|
||||
*/
|
||||
public void decr(final byte base, final byte baseQual, final byte insQual, final byte delQual, final int baseMappingQual, final boolean isLowQualBase, final boolean isSoftClip) {
|
||||
// if this is not the right type of base, ignore it
|
||||
if ( isLowQualBase != isLowQuality() )
|
||||
return;
|
||||
|
||||
final BaseIndex i = BaseIndex.byteToBase(base);
|
||||
super.decr(i, baseQual, baseMappingQual);
|
||||
super.decr(i, baseQual, baseMappingQual, isSoftClip);
|
||||
switch (i) {
|
||||
case A: sumInsertionQual_A -= insQual; sumDeletionQual_A -= delQual; break;
|
||||
case C: sumInsertionQual_C -= insQual; sumDeletionQual_C -= delQual; break;
|
||||
|
|
|
|||
|
|
@ -80,6 +80,7 @@ import org.broadinstitute.sting.utils.MathUtils;
|
|||
private int count_N = 0;
|
||||
private int sumQual_N = 0;
|
||||
private int totalCount = 0; // keeps track of total count since this is requested so often
|
||||
private int nSoftClippedBases = 0;
|
||||
private final IntArrayList mappingQualities = new IntArrayList(); // keeps the mapping quality of each read that contributed to this
|
||||
private boolean isLowQuality = true; // this object represents low quality bases unless we are told otherwise
|
||||
|
||||
|
|
@ -104,6 +105,7 @@ import org.broadinstitute.sting.utils.MathUtils;
|
|||
this.count_I += other.count_I;
|
||||
this.count_N += other.count_N;
|
||||
this.totalCount += other.totalCount;
|
||||
this.nSoftClippedBases = other.nSoftClippedBases;
|
||||
this.mappingQualities.addAll(other.mappingQualities);
|
||||
}
|
||||
|
||||
|
|
@ -117,6 +119,7 @@ import org.broadinstitute.sting.utils.MathUtils;
|
|||
this.count_I -= other.count_I;
|
||||
this.count_N -= other.count_N;
|
||||
this.totalCount -= other.totalCount;
|
||||
this.nSoftClippedBases -= other.nSoftClippedBases;
|
||||
this.mappingQualities.removeAll(other.mappingQualities);
|
||||
}
|
||||
|
||||
|
|
@ -126,7 +129,7 @@ import org.broadinstitute.sting.utils.MathUtils;
|
|||
}
|
||||
|
||||
@Ensures("totalCount() == old(totalCount()) || totalCount() == old(totalCount()) + 1")
|
||||
public void incr(final BaseIndex base, final byte qual, final int mappingQuality) {
|
||||
public void incr(final BaseIndex base, final byte qual, final int mappingQuality, final boolean isSoftclip) {
|
||||
switch (base) {
|
||||
case A: ++count_A; sumQual_A += qual; break;
|
||||
case C: ++count_C; sumQual_C += qual; break;
|
||||
|
|
@ -137,6 +140,7 @@ import org.broadinstitute.sting.utils.MathUtils;
|
|||
case N: ++count_N; sumQual_N += qual; break;
|
||||
}
|
||||
++totalCount;
|
||||
nSoftClippedBases += isSoftclip ? 1 : 0;
|
||||
mappingQualities.add(mappingQuality);
|
||||
}
|
||||
|
||||
|
|
@ -159,7 +163,7 @@ import org.broadinstitute.sting.utils.MathUtils;
|
|||
}
|
||||
|
||||
@Ensures("totalCount() == old(totalCount()) || totalCount() == old(totalCount()) - 1")
|
||||
public void decr(final BaseIndex base, final byte qual, final int mappingQuality) {
|
||||
public void decr(final BaseIndex base, final byte qual, final int mappingQuality, final boolean isSoftclip) {
|
||||
switch (base) {
|
||||
case A: --count_A; sumQual_A -= qual; break;
|
||||
case C: --count_C; sumQual_C -= qual; break;
|
||||
|
|
@ -170,6 +174,7 @@ import org.broadinstitute.sting.utils.MathUtils;
|
|||
case N: --count_N; sumQual_N -= qual; break;
|
||||
}
|
||||
--totalCount;
|
||||
nSoftClippedBases -= isSoftclip ? 1 : 0;
|
||||
mappingQualities.remove((Integer) mappingQuality);
|
||||
}
|
||||
|
||||
|
|
@ -231,6 +236,10 @@ import org.broadinstitute.sting.utils.MathUtils;
|
|||
return (byte) (sumQualsOfBase(base) / countOfBase(base));
|
||||
}
|
||||
|
||||
@Ensures("result >= 0")
|
||||
public int nSoftclips() {
|
||||
return nSoftClippedBases;
|
||||
}
|
||||
|
||||
@Ensures("result >= 0")
|
||||
public int totalCount() {
|
||||
|
|
@ -281,22 +290,42 @@ import org.broadinstitute.sting.utils.MathUtils;
|
|||
return baseIndexWithMostCounts().getByte();
|
||||
}
|
||||
|
||||
/**
|
||||
* @return the base index for which the count is highest, including indel indexes
|
||||
*/
|
||||
@Ensures("result != null")
|
||||
public BaseIndex baseIndexWithMostCounts() {
|
||||
BaseIndex maxI = MAX_BASE_INDEX_WITH_NO_COUNTS;
|
||||
for (final BaseIndex i : BaseIndex.values()) {
|
||||
if (countOfBase(i) > countOfBase(maxI))
|
||||
maxI = i;
|
||||
}
|
||||
return maxI;
|
||||
return baseIndexWithMostCounts(true);
|
||||
}
|
||||
|
||||
/**
|
||||
* @return the base index for which the count is highest, excluding indel indexes
|
||||
*/
|
||||
@Ensures("result != null")
|
||||
public BaseIndex baseIndexWithMostCountsWithoutIndels() {
|
||||
return baseIndexWithMostCounts(false);
|
||||
}
|
||||
|
||||
/**
|
||||
* Finds the base index with the most counts
|
||||
*
|
||||
* @param allowIndels should we allow base indexes representing indels?
|
||||
* @return non-null base index
|
||||
*/
|
||||
@Ensures("result != null")
|
||||
protected BaseIndex baseIndexWithMostCounts(final boolean allowIndels) {
|
||||
BaseIndex maxI = MAX_BASE_INDEX_WITH_NO_COUNTS;
|
||||
int maxCount = countOfBase(maxI);
|
||||
|
||||
for (final BaseIndex i : BaseIndex.values()) {
|
||||
if (i.isNucleotide() && countOfBase(i) > countOfBase(maxI))
|
||||
if ( !allowIndels && !i.isNucleotide() )
|
||||
continue;
|
||||
|
||||
final int myCount = countOfBase(i);
|
||||
if (myCount > maxCount) {
|
||||
maxI = i;
|
||||
maxCount = myCount;
|
||||
}
|
||||
}
|
||||
return maxI;
|
||||
}
|
||||
|
|
@ -307,22 +336,36 @@ import org.broadinstitute.sting.utils.MathUtils;
|
|||
|
||||
@Ensures("result != null")
|
||||
public BaseIndex baseIndexWithMostProbability() {
|
||||
BaseIndex maxI = MAX_BASE_INDEX_WITH_NO_COUNTS;
|
||||
for (final BaseIndex i : BaseIndex.values()) {
|
||||
if (getSumQuals(i) > getSumQuals(maxI))
|
||||
maxI = i;
|
||||
}
|
||||
return (getSumQuals(maxI) > 0L ? maxI : baseIndexWithMostCounts());
|
||||
return baseIndexWithMostProbability(true);
|
||||
}
|
||||
|
||||
@Ensures("result != null")
|
||||
public BaseIndex baseIndexWithMostProbabilityWithoutIndels() {
|
||||
return baseIndexWithMostProbability(false);
|
||||
}
|
||||
|
||||
/**
|
||||
* Finds the base index with the most probability
|
||||
*
|
||||
* @param allowIndels should we allow base indexes representing indels?
|
||||
* @return non-null base index
|
||||
*/
|
||||
@Ensures("result != null")
|
||||
public BaseIndex baseIndexWithMostProbability(final boolean allowIndels) {
|
||||
BaseIndex maxI = MAX_BASE_INDEX_WITH_NO_COUNTS;
|
||||
long maxSum = getSumQuals(maxI);
|
||||
|
||||
for (final BaseIndex i : BaseIndex.values()) {
|
||||
if (i.isNucleotide() && getSumQuals(i) > getSumQuals(maxI))
|
||||
if ( !allowIndels && !i.isNucleotide() )
|
||||
continue;
|
||||
|
||||
final long mySum = getSumQuals(i);
|
||||
if (mySum > maxSum) {
|
||||
maxI = i;
|
||||
maxSum = mySum;
|
||||
}
|
||||
}
|
||||
return (getSumQuals(maxI) > 0L ? maxI : baseIndexWithMostCountsWithoutIndels());
|
||||
return (maxSum > 0L ? maxI : baseIndexWithMostCounts(allowIndels));
|
||||
}
|
||||
|
||||
@Ensures("result >=0")
|
||||
|
|
@ -362,6 +405,7 @@ import org.broadinstitute.sting.utils.MathUtils;
|
|||
count_A = count_C = count_G = count_T = count_D = count_I = count_N = 0;
|
||||
sumQual_A = sumQual_C = sumQual_G = sumQual_T = sumQual_D = sumQual_I = sumQual_N = 0;
|
||||
totalCount = 0;
|
||||
nSoftClippedBases = 0;
|
||||
mappingQualities.clear();
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -121,7 +121,7 @@ public enum BaseIndex {
|
|||
*
|
||||
* @return whether or not it is a nucleotide, given the definition above
|
||||
*/
|
||||
public boolean isNucleotide() {
|
||||
public final boolean isNucleotide() {
|
||||
return !isIndel();
|
||||
}
|
||||
|
||||
|
|
@ -130,7 +130,7 @@ public enum BaseIndex {
|
|||
*
|
||||
* @return true for I or D, false otherwise
|
||||
*/
|
||||
public boolean isIndel() {
|
||||
public final boolean isIndel() {
|
||||
return this == D || this == I;
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -62,9 +62,10 @@ public class HeaderElement {
|
|||
private BaseAndQualsCounts consensusBaseCounts; // How many A,C,G,T (and D's) are in this site.
|
||||
private BaseAndQualsCounts filteredBaseCounts; // How many A,C,G,T (and D's) were filtered out in this site.
|
||||
private int insertionsToTheRight; // How many reads in this site had insertions to the immediate right
|
||||
private int nSoftClippedBases; // How many bases in this site came from soft clipped bases
|
||||
private int location; // Genome location of this site (the sliding window knows which contig we're at
|
||||
|
||||
protected static final int MIN_COUNT_FOR_USING_PVALUE = 2;
|
||||
|
||||
public int getLocation() {
|
||||
return location;
|
||||
}
|
||||
|
|
@ -84,7 +85,7 @@ public class HeaderElement {
|
|||
* @param location the reference location for the new element
|
||||
*/
|
||||
public HeaderElement(final int location) {
|
||||
this(new BaseAndQualsCounts(), new BaseAndQualsCounts(), 0, 0, location);
|
||||
this(new BaseAndQualsCounts(), new BaseAndQualsCounts(), 0, location);
|
||||
}
|
||||
|
||||
/**
|
||||
|
|
@ -94,7 +95,7 @@ public class HeaderElement {
|
|||
* @param location the reference location for the new element
|
||||
*/
|
||||
public HeaderElement(final int location, final int insertionsToTheRight) {
|
||||
this(new BaseAndQualsCounts(), new BaseAndQualsCounts(), insertionsToTheRight, 0, location);
|
||||
this(new BaseAndQualsCounts(), new BaseAndQualsCounts(), insertionsToTheRight, location);
|
||||
}
|
||||
|
||||
/**
|
||||
|
|
@ -103,15 +104,13 @@ public class HeaderElement {
|
|||
* @param consensusBaseCounts the BaseCounts object for the running consensus synthetic read
|
||||
* @param filteredBaseCounts the BaseCounts object for the filtered data synthetic read
|
||||
* @param insertionsToTheRight number of insertions to the right of this HeaderElement
|
||||
* @param nSoftClippedBases number of softclipped bases of this HeaderElement
|
||||
* @param location the reference location of this reference element
|
||||
* HeaderElement
|
||||
*/
|
||||
public HeaderElement(BaseAndQualsCounts consensusBaseCounts, BaseAndQualsCounts filteredBaseCounts, int insertionsToTheRight, int nSoftClippedBases, int location) {
|
||||
public HeaderElement(BaseAndQualsCounts consensusBaseCounts, BaseAndQualsCounts filteredBaseCounts, int insertionsToTheRight, int location) {
|
||||
this.consensusBaseCounts = consensusBaseCounts;
|
||||
this.filteredBaseCounts = filteredBaseCounts;
|
||||
this.insertionsToTheRight = insertionsToTheRight;
|
||||
this.nSoftClippedBases = nSoftClippedBases;
|
||||
this.location = location;
|
||||
}
|
||||
|
||||
|
|
@ -119,10 +118,13 @@ public class HeaderElement {
|
|||
* Whether or not the site represented by this HeaderElement is variant according to the definitions of variant
|
||||
* by insertion, deletion and mismatches.
|
||||
*
|
||||
* @param minVariantPvalue min p-value for deciding that a position is or is not variable due to mismatches
|
||||
* @param minVariantProportion min proportion for deciding that a position is or is not variable due to mismatches
|
||||
* @param minIndelProportion min proportion for deciding that a position is or is not variable due to indels
|
||||
* @return true if site is variant by any definition. False otherwise.
|
||||
*/
|
||||
public boolean isVariant(double minVariantPvalue, double minIndelProportion) {
|
||||
return hasConsensusData() && (isVariantFromInsertions(minIndelProportion) || isVariantFromMismatches(minVariantPvalue) || isVariantFromDeletions(minIndelProportion) || isVariantFromSoftClips());
|
||||
public boolean isVariant(final double minVariantPvalue, final double minVariantProportion, final double minIndelProportion) {
|
||||
return hasConsensusData() && (isVariantFromInsertions(minIndelProportion) || isVariantFromMismatches(minVariantPvalue, minVariantProportion) || isVariantFromDeletions(minIndelProportion) || isVariantFromSoftClips());
|
||||
}
|
||||
|
||||
/**
|
||||
|
|
@ -140,11 +142,9 @@ public class HeaderElement {
|
|||
public void addBase(byte base, byte baseQual, byte insQual, byte delQual, int baseMappingQuality, int minBaseQual, int minMappingQual, boolean isSoftClipped) {
|
||||
// If the base passes the MQ filter it is included in the consensus base counts, otherwise it's part of the filtered counts
|
||||
if ( baseMappingQuality >= minMappingQual )
|
||||
consensusBaseCounts.incr(base, baseQual, insQual, delQual, baseMappingQuality, baseQual < minBaseQual);
|
||||
consensusBaseCounts.incr(base, baseQual, insQual, delQual, baseMappingQuality, baseQual < minBaseQual, isSoftClipped);
|
||||
else
|
||||
filteredBaseCounts.incr(base, baseQual, insQual, delQual, baseMappingQuality, baseQual < minBaseQual);
|
||||
|
||||
nSoftClippedBases += isSoftClipped ? 1 : 0;
|
||||
}
|
||||
|
||||
/**
|
||||
|
|
@ -162,11 +162,9 @@ public class HeaderElement {
|
|||
public void removeBase(byte base, byte baseQual, byte insQual, byte delQual, int baseMappingQuality, int minBaseQual, int minMappingQual, boolean isSoftClipped) {
|
||||
// If the base passes the MQ filter it is included in the consensus base counts, otherwise it's part of the filtered counts
|
||||
if ( baseMappingQuality >= minMappingQual )
|
||||
consensusBaseCounts.decr(base, baseQual, insQual, delQual, baseMappingQuality, baseQual < minBaseQual);
|
||||
consensusBaseCounts.decr(base, baseQual, insQual, delQual, baseMappingQuality, baseQual < minBaseQual, isSoftClipped);
|
||||
else
|
||||
filteredBaseCounts.decr(base, baseQual, insQual, delQual, baseMappingQuality, baseQual < minBaseQual);
|
||||
|
||||
nSoftClippedBases -= isSoftClipped ? 1 : 0;
|
||||
}
|
||||
/**
|
||||
* Adds an insertions to the right of the HeaderElement and updates all counts accordingly. All insertions
|
||||
|
|
@ -246,15 +244,15 @@ public class HeaderElement {
|
|||
/**
|
||||
* Whether or not the HeaderElement is variant due to excess mismatches
|
||||
*
|
||||
* @param minVariantPvalue the minimum pvalue to call a site variant.
|
||||
* @param minVariantPvalue the minimum pvalue to call a site variant (used with low coverage).
|
||||
* @param minVariantProportion the minimum proportion to call a site variant (used with high coverage).
|
||||
* @return whether or not the HeaderElement is variant due to excess mismatches
|
||||
*/
|
||||
protected boolean isVariantFromMismatches(double minVariantPvalue) {
|
||||
protected boolean isVariantFromMismatches(final double minVariantPvalue, final double minVariantProportion) {
|
||||
final int totalCount = consensusBaseCounts.totalCountWithoutIndels();
|
||||
final BaseIndex mostCommon = consensusBaseCounts.baseIndexWithMostProbabilityWithoutIndels();
|
||||
final int countOfOtherBases = totalCount - consensusBaseCounts.countOfBase(mostCommon);
|
||||
final double pvalue = countOfOtherBases == 0 ? 0.0 : MathUtils.binomialCumulativeProbability(totalCount, 0, countOfOtherBases);
|
||||
return pvalue > minVariantPvalue;
|
||||
return hasSignificantCount(countOfOtherBases, totalCount, minVariantPvalue, minVariantProportion);
|
||||
}
|
||||
|
||||
/**
|
||||
|
|
@ -264,6 +262,7 @@ public class HeaderElement {
|
|||
* @return true if we had more soft clipped bases contributing to this site than matches/mismatches.
|
||||
*/
|
||||
protected boolean isVariantFromSoftClips() {
|
||||
final int nSoftClippedBases = consensusBaseCounts.nSoftclips();
|
||||
return nSoftClippedBases > 0 && nSoftClippedBases >= (consensusBaseCounts.totalCount() - nSoftClippedBases);
|
||||
}
|
||||
|
||||
|
|
@ -271,10 +270,11 @@ public class HeaderElement {
|
|||
* Calculates the number of alleles necessary to represent this site.
|
||||
*
|
||||
* @param minVariantPvalue the minimum pvalue to call a site variant.
|
||||
* @param minVariantProportion the minimum proportion to call a site variant.
|
||||
* @return the number of alleles necessary to represent this site or -1 if there are too many indels
|
||||
*/
|
||||
public int getNumberOfBaseAlleles(final double minVariantPvalue) {
|
||||
final ObjectArrayList<BaseIndex> alleles = getAlleles(minVariantPvalue);
|
||||
public int getNumberOfBaseAlleles(final double minVariantPvalue, final double minVariantProportion) {
|
||||
final ObjectArrayList<BaseIndex> alleles = getAlleles(minVariantPvalue, minVariantProportion);
|
||||
return alleles == null ? -1 : alleles.size();
|
||||
}
|
||||
|
||||
|
|
@ -282,16 +282,18 @@ public class HeaderElement {
|
|||
* Calculates the alleles necessary to represent this site.
|
||||
*
|
||||
* @param minVariantPvalue the minimum pvalue to call a site variant.
|
||||
* @param minVariantProportion the minimum proportion to call a site variant.
|
||||
* @return the list of alleles necessary to represent this site or null if there are too many indels
|
||||
*/
|
||||
public ObjectArrayList<BaseIndex> getAlleles(final double minVariantPvalue) {
|
||||
public ObjectArrayList<BaseIndex> getAlleles(final double minVariantPvalue, final double minVariantProportion) {
|
||||
// make sure we have bases at all
|
||||
final int totalBaseCount = consensusBaseCounts.totalCount();
|
||||
if ( totalBaseCount == 0 )
|
||||
return new ObjectArrayList<BaseIndex>(0);
|
||||
|
||||
// next, check for insertions
|
||||
if ( hasSignificantCount(insertionsToTheRight, minVariantPvalue) )
|
||||
// next, check for insertions; technically, the insertion count can be greater than totalBaseCount
|
||||
// (because of the way insertions are counted), so we need to account for that
|
||||
if ( hasSignificantCount(Math.min(totalBaseCount, insertionsToTheRight), totalBaseCount, minVariantPvalue, minVariantProportion) )
|
||||
return null;
|
||||
|
||||
// finally, check for the bases themselves (including deletions)
|
||||
|
|
@ -301,9 +303,7 @@ public class HeaderElement {
|
|||
if ( baseCount == 0 )
|
||||
continue;
|
||||
|
||||
final double pvalue = MathUtils.binomialCumulativeProbability(totalBaseCount, 0, baseCount);
|
||||
|
||||
if ( pvalue > minVariantPvalue ) {
|
||||
if ( hasSignificantCount(baseCount, totalBaseCount, minVariantPvalue, minVariantProportion) ) {
|
||||
if ( base == BaseIndex.D )
|
||||
return null;
|
||||
alleles.add(base);
|
||||
|
|
@ -316,26 +316,34 @@ public class HeaderElement {
|
|||
* Checks whether there are a significant number of softclips.
|
||||
*
|
||||
* @param minVariantPvalue the minimum pvalue to call a site variant.
|
||||
* @param minVariantProportion the minimum proportion to call a site variant.
|
||||
* @return true if there are significant softclips, false otherwise
|
||||
*/
|
||||
public boolean hasSignificantSoftclips(final double minVariantPvalue) {
|
||||
return hasSignificantCount(nSoftClippedBases, minVariantPvalue);
|
||||
public boolean hasSignificantSoftclips(final double minVariantPvalue, final double minVariantProportion) {
|
||||
return hasSignificantCount(consensusBaseCounts.nSoftclips(), consensusBaseCounts.totalCount(), minVariantPvalue, minVariantProportion);
|
||||
}
|
||||
|
||||
/*
|
||||
* Checks whether there are a significant number of count.
|
||||
*
|
||||
* @param count the count to test against
|
||||
* @param count the count (k) to test against
|
||||
* @param total the total (n) to test against
|
||||
* @param minVariantPvalue the minimum pvalue to call a site variant.
|
||||
* @param minVariantProportion the minimum proportion to call a site variant.
|
||||
* @return true if there is a significant count given the provided pvalue, false otherwise
|
||||
*/
|
||||
private boolean hasSignificantCount(final int count, final double minVariantPvalue) {
|
||||
final int totalBaseCount = consensusBaseCounts.totalCount();
|
||||
if ( count == 0 || totalBaseCount == 0 )
|
||||
private boolean hasSignificantCount(final int count, final int total, final double minVariantPvalue, final double minVariantProportion) {
|
||||
if ( count == 0 || total == 0 )
|
||||
return false;
|
||||
|
||||
// technically, count can be greater than totalBaseCount (because of the way insertions are counted) so we need to account for that
|
||||
final double pvalue = MathUtils.binomialCumulativeProbability(totalBaseCount, 0, Math.min(count, totalBaseCount));
|
||||
return pvalue > minVariantPvalue;
|
||||
// use p-values for low counts of k
|
||||
if ( count <= MIN_COUNT_FOR_USING_PVALUE ) {
|
||||
final double pvalue = MathUtils.binomialCumulativeProbability(total, 0, count);
|
||||
return pvalue > minVariantPvalue;
|
||||
}
|
||||
|
||||
// otherwise, use straight proportions
|
||||
final int minBaseCountForSignificance = (int)(minVariantProportion * total);
|
||||
return count >= minBaseCountForSignificance;
|
||||
}
|
||||
}
|
||||
|
|
@ -97,13 +97,14 @@ public class MultiSampleCompressor {
|
|||
final int downsampleCoverage,
|
||||
final int minMappingQuality,
|
||||
final double minAltPValueToTriggerVariant,
|
||||
final double minAltProportionToTriggerVariant,
|
||||
final double minIndelProportionToTriggerVariant,
|
||||
final int minBaseQual,
|
||||
final ReduceReads.DownsampleStrategy downsampleStrategy) {
|
||||
for ( String name : SampleUtils.getSAMFileSamples(header) ) {
|
||||
compressorsPerSample.put(name,
|
||||
new SingleSampleCompressor(contextSize, downsampleCoverage,
|
||||
minMappingQuality, minAltPValueToTriggerVariant, minIndelProportionToTriggerVariant, minBaseQual, downsampleStrategy));
|
||||
minMappingQuality, minAltPValueToTriggerVariant, minAltProportionToTriggerVariant, minIndelProportionToTriggerVariant, minBaseQual, downsampleStrategy));
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -205,15 +205,17 @@ public class ReduceReads extends ReadWalker<ObjectArrayList<GATKSAMRecord>, Redu
|
|||
|
||||
/**
|
||||
* Minimum proportion of mismatches in a site to trigger a variant region. Anything below this will be
|
||||
* considered consensus.
|
||||
* considered consensus and reduced (otherwise we will try to trigger polyploid compression). Note that
|
||||
* this value is used only regions with high coverage.
|
||||
*/
|
||||
@Deprecated
|
||||
@Advanced
|
||||
@Argument(fullName = "minimum_alt_proportion_to_trigger_variant", shortName = "minvar", doc = "", required = false)
|
||||
public double minAltProportionToTriggerVariant = 0.05;
|
||||
|
||||
/**
|
||||
* Minimum p-value from binomial distribution of mismatches in a site to trigger a variant region.
|
||||
* Any site with a value falling below this will be considered consensus and reduced (otherwise we will try to trigger polyploid compression).
|
||||
* Any site with a value falling below this will be considered consensus and reduced (otherwise we will try to
|
||||
* trigger polyploid compression). Note that this value is used only regions with low coverage.
|
||||
*/
|
||||
@Advanced
|
||||
@Argument(fullName = "minimum_alt_pvalue_to_trigger_variant", shortName = "min_pvalue", doc = "", required = false)
|
||||
|
|
@ -288,6 +290,9 @@ public class ReduceReads extends ReadWalker<ObjectArrayList<GATKSAMRecord>, Redu
|
|||
if ( minAltPValueToTriggerVariant < 0.0 || minAltPValueToTriggerVariant > 1.0 )
|
||||
throw new UserException.BadArgumentValue("--minimum_alt_pvalue_to_trigger_variant", "must be a value between 0 and 1 (inclusive)");
|
||||
|
||||
if ( minAltProportionToTriggerVariant < 0.0 || minAltProportionToTriggerVariant > 1.0 )
|
||||
throw new UserException.BadArgumentValue("--minimum_alt_proportion_to_trigger_variant", "must be a value between 0 and 1 (inclusive)");
|
||||
|
||||
if ( known.isEmpty() )
|
||||
knownSnpPositions = null;
|
||||
else
|
||||
|
|
@ -412,7 +417,7 @@ public class ReduceReads extends ReadWalker<ObjectArrayList<GATKSAMRecord>, Redu
|
|||
*/
|
||||
@Override
|
||||
public ReduceReadsStash reduceInit() {
|
||||
return new ReduceReadsStash(new MultiSampleCompressor(getToolkit().getSAMFileHeader(), contextSize, downsampleCoverage, minMappingQuality, minAltPValueToTriggerVariant, minIndelProportionToTriggerVariant, minBaseQual, downsampleStrategy));
|
||||
return new ReduceReadsStash(new MultiSampleCompressor(getToolkit().getSAMFileHeader(), contextSize, downsampleCoverage, minMappingQuality, minAltPValueToTriggerVariant, minAltProportionToTriggerVariant, minIndelProportionToTriggerVariant, minBaseQual, downsampleStrategy));
|
||||
}
|
||||
|
||||
/**
|
||||
|
|
|
|||
|
|
@ -63,6 +63,7 @@ public class SingleSampleCompressor {
|
|||
final private int downsampleCoverage;
|
||||
final private int minMappingQuality;
|
||||
final private double minAltPValueToTriggerVariant;
|
||||
final private double minAltProportionToTriggerVariant;
|
||||
final private double minIndelProportionToTriggerVariant;
|
||||
final private int minBaseQual;
|
||||
final private ReduceReads.DownsampleStrategy downsampleStrategy;
|
||||
|
|
@ -76,6 +77,7 @@ public class SingleSampleCompressor {
|
|||
final int downsampleCoverage,
|
||||
final int minMappingQuality,
|
||||
final double minAltPValueToTriggerVariant,
|
||||
final double minAltProportionToTriggerVariant,
|
||||
final double minIndelProportionToTriggerVariant,
|
||||
final int minBaseQual,
|
||||
final ReduceReads.DownsampleStrategy downsampleStrategy) {
|
||||
|
|
@ -84,6 +86,7 @@ public class SingleSampleCompressor {
|
|||
this.minMappingQuality = minMappingQuality;
|
||||
this.slidingWindowCounter = 0;
|
||||
this.minAltPValueToTriggerVariant = minAltPValueToTriggerVariant;
|
||||
this.minAltProportionToTriggerVariant = minAltProportionToTriggerVariant;
|
||||
this.minIndelProportionToTriggerVariant = minIndelProportionToTriggerVariant;
|
||||
this.minBaseQual = minBaseQual;
|
||||
this.downsampleStrategy = downsampleStrategy;
|
||||
|
|
@ -114,7 +117,9 @@ public class SingleSampleCompressor {
|
|||
}
|
||||
|
||||
if ( slidingWindow == null) { // this is the first read
|
||||
slidingWindow = new SlidingWindow(read.getReferenceName(), read.getReferenceIndex(), contextSize, read.getHeader(), read.getReadGroup(), slidingWindowCounter, minAltPValueToTriggerVariant, minIndelProportionToTriggerVariant, minBaseQual, minMappingQuality, downsampleCoverage, downsampleStrategy, read.hasBaseIndelQualities());
|
||||
slidingWindow = new SlidingWindow(read.getReferenceName(), read.getReferenceIndex(), contextSize, read.getHeader(), read.getReadGroup(),
|
||||
slidingWindowCounter, minAltPValueToTriggerVariant, minAltProportionToTriggerVariant, minIndelProportionToTriggerVariant,
|
||||
minBaseQual, minMappingQuality, downsampleCoverage, downsampleStrategy, read.hasBaseIndelQualities());
|
||||
slidingWindowCounter++;
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -60,7 +60,6 @@ import org.broadinstitute.sting.utils.GenomeLoc;
|
|||
import org.broadinstitute.sting.utils.UnvalidatingGenomeLoc;
|
||||
import org.broadinstitute.sting.utils.collections.Pair;
|
||||
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
|
||||
import org.broadinstitute.sting.utils.recalibration.EventType;
|
||||
import org.broadinstitute.sting.utils.sam.AlignmentStartWithNoTiesComparator;
|
||||
import org.broadinstitute.sting.utils.sam.GATKSAMReadGroupRecord;
|
||||
import org.broadinstitute.sting.utils.sam.GATKSAMRecord;
|
||||
|
|
@ -78,8 +77,8 @@ import java.util.*;
|
|||
public class SlidingWindow {
|
||||
|
||||
// Sliding Window data
|
||||
final private ObjectAVLTreeSet<GATKSAMRecord> readsInWindow;
|
||||
final private LinkedList<HeaderElement> windowHeader;
|
||||
final protected PriorityQueue<GATKSAMRecord> readsInWindow;
|
||||
final protected LinkedList<HeaderElement> windowHeader;
|
||||
protected int contextSize; // the largest context size (between mismatches and indels)
|
||||
protected String contig;
|
||||
protected int contigIndex;
|
||||
|
|
@ -99,6 +98,7 @@ public class SlidingWindow {
|
|||
|
||||
// Additional parameters
|
||||
protected double MIN_ALT_PVALUE_TO_TRIGGER_VARIANT; // pvalue has to be greater than this value to trigger variant region due to mismatches
|
||||
protected double MIN_ALT_PROPORTION_TO_TRIGGER_VARIANT; // proportion has to be greater than this value to trigger variant region due to mismatches
|
||||
protected double MIN_INDEL_BASE_PROPORTION_TO_TRIGGER_VARIANT; // proportion has to be greater than this value to trigger variant region due to deletions
|
||||
protected int MIN_BASE_QUAL_TO_COUNT; // qual has to be greater than or equal to this value
|
||||
protected int MIN_MAPPING_QUALITY;
|
||||
|
|
@ -146,28 +146,33 @@ public class SlidingWindow {
|
|||
|
||||
this.windowHeader = new LinkedList<HeaderElement>();
|
||||
windowHeader.addFirst(new HeaderElement(startLocation));
|
||||
this.readsInWindow = new ObjectAVLTreeSet<GATKSAMRecord>();
|
||||
this.readsInWindow = new PriorityQueue<GATKSAMRecord>(100, new Comparator<GATKSAMRecord>() {
|
||||
@Override
|
||||
public int compare(GATKSAMRecord read1, GATKSAMRecord read2) {
|
||||
return read1.getSoftEnd() - read2.getSoftEnd();
|
||||
}
|
||||
});
|
||||
}
|
||||
|
||||
public SlidingWindow(final String contig, final int contigIndex, final int contextSize, final SAMFileHeader samHeader,
|
||||
final GATKSAMReadGroupRecord readGroupAttribute, final int windowNumber,
|
||||
final double minAltPValueToTriggerVariant, final double minIndelProportionToTriggerVariant,
|
||||
final double minAltPValueToTriggerVariant, final double minAltProportionToTriggerVariant, final double minIndelProportionToTriggerVariant,
|
||||
final int minBaseQual, final int minMappingQuality, final int downsampleCoverage,
|
||||
final ReduceReads.DownsampleStrategy downsampleStrategy, final boolean hasIndelQualities) {
|
||||
this.contextSize = contextSize;
|
||||
this.downsampleCoverage = downsampleCoverage;
|
||||
|
||||
this.MIN_ALT_PVALUE_TO_TRIGGER_VARIANT = minAltPValueToTriggerVariant;
|
||||
this.MIN_ALT_PROPORTION_TO_TRIGGER_VARIANT = minAltProportionToTriggerVariant;
|
||||
this.MIN_INDEL_BASE_PROPORTION_TO_TRIGGER_VARIANT = minIndelProportionToTriggerVariant;
|
||||
this.MIN_BASE_QUAL_TO_COUNT = minBaseQual;
|
||||
this.MIN_MAPPING_QUALITY = minMappingQuality;
|
||||
|
||||
this.windowHeader = new LinkedList<HeaderElement>();
|
||||
this.readsInWindow = new ObjectAVLTreeSet<GATKSAMRecord>(new Comparator<GATKSAMRecord>() {
|
||||
this.readsInWindow = new PriorityQueue<GATKSAMRecord>(1000, new Comparator<GATKSAMRecord>() {
|
||||
@Override
|
||||
public int compare(GATKSAMRecord read1, GATKSAMRecord read2) {
|
||||
final int difference = read1.getSoftEnd() - read2.getSoftEnd();
|
||||
return difference != 0 ? difference : read1.getReadName().compareTo(read2.getReadName());
|
||||
return read1.getSoftEnd() - read2.getSoftEnd();
|
||||
}
|
||||
});
|
||||
|
||||
|
|
@ -290,8 +295,8 @@ public class SlidingWindow {
|
|||
regions = findVariantRegions(0, breakpoint, markedSites.getVariantSiteBitSet(), !forceClose);
|
||||
}
|
||||
|
||||
while (!readsInWindow.isEmpty() && readsInWindow.first().getSoftEnd() < windowHeaderStartLocation) {
|
||||
readsInWindow.remove(readsInWindow.first());
|
||||
while (!readsInWindow.isEmpty() && readsInWindow.peek().getSoftEnd() < windowHeaderStartLocation) {
|
||||
readsInWindow.poll();
|
||||
}
|
||||
|
||||
return regions;
|
||||
|
|
@ -353,7 +358,7 @@ public class SlidingWindow {
|
|||
/**
|
||||
* returns an array marked with variant and non-variant regions (it uses markVariantRegion to make the marks)
|
||||
*
|
||||
* @param stop check the window from start to stop (not-inclusive)
|
||||
* @param stop check the window from start to stop (not-inclusive); given in global coordinates
|
||||
*/
|
||||
protected void markSites(final int stop) {
|
||||
|
||||
|
|
@ -363,21 +368,16 @@ public class SlidingWindow {
|
|||
// copy over as many bits as we can from the previous calculation. Note that we can't trust the
|
||||
// last (contextSize - 1) worth of bits because we may not have actually looked at variant regions there.
|
||||
final int lastPositionMarked = markedSites.updateRegion(windowHeaderStartLocation, sizeOfMarkedRegion) - contextSize - 1;
|
||||
final int locationToProcess = Math.min(lastPositionMarked, stop - contextSize);
|
||||
final int locationToProcess = Math.max(windowHeaderStartLocation, Math.min(lastPositionMarked, stop - contextSize));
|
||||
|
||||
// update the iterator to the correct position
|
||||
Iterator<HeaderElement> headerElementIterator = windowHeader.iterator();
|
||||
for (int i = windowHeaderStartLocation; i < locationToProcess; i++) {
|
||||
if (headerElementIterator.hasNext())
|
||||
headerElementIterator.next();
|
||||
}
|
||||
final ListIterator<HeaderElement> headerElementIterator = windowHeader.listIterator(locationToProcess - windowHeaderStartLocation);
|
||||
|
||||
// process a contextSize worth of region from scratch in case there's a variant there
|
||||
for (int i = locationToProcess; i < stop; i++) {
|
||||
if (headerElementIterator.hasNext()) {
|
||||
HeaderElement headerElement = headerElementIterator.next();
|
||||
|
||||
if (headerElement.isVariant(MIN_ALT_PVALUE_TO_TRIGGER_VARIANT, MIN_INDEL_BASE_PROPORTION_TO_TRIGGER_VARIANT))
|
||||
if (headerElement.isVariant(MIN_ALT_PVALUE_TO_TRIGGER_VARIANT, MIN_ALT_PROPORTION_TO_TRIGGER_VARIANT, MIN_INDEL_BASE_PROPORTION_TO_TRIGGER_VARIANT))
|
||||
markVariantRegion(i - windowHeaderStartLocation);
|
||||
|
||||
} else
|
||||
|
|
@ -409,7 +409,7 @@ public class SlidingWindow {
|
|||
}
|
||||
|
||||
/**
|
||||
* Adds bases to the running consensus or filtered data accordingly
|
||||
* Adds bases to the running consensus
|
||||
*
|
||||
* If adding a sequence with gaps, it will finalize multiple consensus reads and keep the last running consensus
|
||||
*
|
||||
|
|
@ -422,9 +422,10 @@ public class SlidingWindow {
|
|||
@Requires({"start >= 0 && (end >= start || end == 0)"})
|
||||
@Ensures("result != null")
|
||||
protected ObjectArrayList<GATKSAMRecord> addToSyntheticReads(final LinkedList<HeaderElement> header, final int start, final int end, final SyntheticRead.StrandType strandType) {
|
||||
ObjectArrayList<GATKSAMRecord> reads = new ObjectArrayList<GATKSAMRecord>();
|
||||
if (start < end) {
|
||||
ListIterator<HeaderElement> headerElementIterator = header.listIterator(start);
|
||||
final ObjectArrayList<GATKSAMRecord> reads = new ObjectArrayList<GATKSAMRecord>();
|
||||
|
||||
if ( start < end ) {
|
||||
final ListIterator<HeaderElement> headerElementIterator = header.listIterator(start);
|
||||
|
||||
if (!headerElementIterator.hasNext())
|
||||
throw new ReviewedStingException(String.format("Requested to add to synthetic reads a region that contains no header element at index: %d - %d / %d", start, header.size(), end));
|
||||
|
|
@ -432,37 +433,29 @@ public class SlidingWindow {
|
|||
HeaderElement headerElement = headerElementIterator.next();
|
||||
|
||||
if (headerElement.hasConsensusData()) {
|
||||
reads.addAll(finalizeAndAdd(ConsensusType.FILTERED));
|
||||
|
||||
int endOfConsensus = findNextNonConsensusElement(header, start, end);
|
||||
addToRunningConsensus(header, start, endOfConsensus, strandType);
|
||||
|
||||
// find the end of the consecutive consensus data in the window
|
||||
final int endOfConsensus = findNextNonConsensusElement(header, start, end);
|
||||
if (endOfConsensus <= start)
|
||||
throw new ReviewedStingException(String.format("next start is <= current start: (%d <= %d)", endOfConsensus, start));
|
||||
|
||||
// add to running consensus and recurse
|
||||
addToRunningConsensus(header, start, endOfConsensus, strandType);
|
||||
reads.addAll(addToSyntheticReads(header, endOfConsensus, end, strandType));
|
||||
} else if (headerElement.hasFilteredData()) {
|
||||
|
||||
} else {
|
||||
|
||||
// add any outstanding consensus data
|
||||
reads.addAll(finalizeAndAdd(ConsensusType.CONSENSUS));
|
||||
|
||||
int endOfFilteredData = findNextNonFilteredDataElement(header, start, end);
|
||||
reads.addAll(addToFilteredData(header, start, endOfFilteredData, strandType));
|
||||
|
||||
if (endOfFilteredData <= start)
|
||||
throw new ReviewedStingException(String.format("next start is <= current start: (%d <= %d)", endOfFilteredData, start));
|
||||
|
||||
reads.addAll(addToSyntheticReads(header, endOfFilteredData, end, strandType));
|
||||
} else if (headerElement.isEmpty()) {
|
||||
reads.addAll(finalizeAndAdd(ConsensusType.BOTH));
|
||||
|
||||
int endOfEmptyData = findNextNonEmptyElement(header, start, end);
|
||||
|
||||
// find the end of the consecutive empty data in the window
|
||||
final int endOfEmptyData = findNextConsensusElement(header, start, end);
|
||||
if (endOfEmptyData <= start)
|
||||
throw new ReviewedStingException(String.format("next start is <= current start: (%d <= %d)", endOfEmptyData, start));
|
||||
|
||||
// recurse out of the empty region
|
||||
reads.addAll(addToSyntheticReads(header, endOfEmptyData, end, strandType));
|
||||
} else
|
||||
throw new ReviewedStingException(String.format("Header Element %d is neither Consensus, Data or Empty. Something is wrong.", start));
|
||||
|
||||
}
|
||||
}
|
||||
|
||||
return reads;
|
||||
|
|
@ -474,24 +467,21 @@ public class SlidingWindow {
|
|||
* @param type the synthetic reads you want to close
|
||||
* @return a possibly null list of GATKSAMRecords generated by finalizing the synthetic reads
|
||||
*/
|
||||
private ObjectArrayList<GATKSAMRecord> finalizeAndAdd(ConsensusType type) {
|
||||
GATKSAMRecord read = null;
|
||||
ObjectArrayList<GATKSAMRecord> list = new ObjectArrayList<GATKSAMRecord>();
|
||||
private ObjectArrayList<GATKSAMRecord> finalizeAndAdd(final ConsensusType type) {
|
||||
|
||||
switch (type) {
|
||||
case CONSENSUS:
|
||||
read = finalizeRunningConsensus();
|
||||
break;
|
||||
case FILTERED:
|
||||
read = finalizeFilteredDataConsensus();
|
||||
break;
|
||||
case BOTH:
|
||||
read = finalizeRunningConsensus();
|
||||
if (read != null) list.add(read);
|
||||
read = finalizeFilteredDataConsensus();
|
||||
final ObjectArrayList<GATKSAMRecord> list = new ObjectArrayList<GATKSAMRecord>();
|
||||
|
||||
if ( type == ConsensusType.CONSENSUS || type == ConsensusType.BOTH ) {
|
||||
final GATKSAMRecord read = finalizeRunningConsensus();
|
||||
if ( read != null )
|
||||
list.add(read);
|
||||
}
|
||||
|
||||
if ( type == ConsensusType.FILTERED || type == ConsensusType.BOTH ) {
|
||||
final GATKSAMRecord read = finalizeFilteredDataConsensus();
|
||||
if ( read != null )
|
||||
list.add(read);
|
||||
}
|
||||
if (read != null)
|
||||
list.add(read);
|
||||
|
||||
return list;
|
||||
}
|
||||
|
|
@ -499,19 +489,145 @@ public class SlidingWindow {
|
|||
/**
|
||||
* Looks for the next position without consensus data
|
||||
*
|
||||
* @param start beginning of the filtered region
|
||||
* @param upTo limit to search for another consensus element
|
||||
* @param header the header to check
|
||||
* @param start beginning of the filtered region
|
||||
* @param upTo limit to search for another consensus element
|
||||
* @return next position in local coordinates (relative to the windowHeader) with consensus data; otherwise, the start position
|
||||
*/
|
||||
private int findNextNonConsensusElement(LinkedList<HeaderElement> header, int start, int upTo) {
|
||||
Iterator<HeaderElement> headerElementIterator = header.listIterator(start);
|
||||
private int findNextNonConsensusElement(final LinkedList<HeaderElement> header, final int start, final int upTo) {
|
||||
final Iterator<HeaderElement> headerElementIterator = header.listIterator(start);
|
||||
int index = start;
|
||||
while (index < upTo) {
|
||||
if (!headerElementIterator.hasNext())
|
||||
throw new ReviewedStingException("There are no more header elements in this window");
|
||||
|
||||
HeaderElement headerElement = headerElementIterator.next();
|
||||
if (!headerElementIterator.next().hasConsensusData())
|
||||
break;
|
||||
index++;
|
||||
}
|
||||
return index;
|
||||
}
|
||||
|
||||
/**
|
||||
* Looks for the next position witho consensus data
|
||||
*
|
||||
* @param header the header to check
|
||||
* @param start beginning of the filtered region
|
||||
* @param upTo limit to search for another consensus element
|
||||
* @return next position in local coordinates (relative to the windowHeader) with consensus data; otherwise, the start position
|
||||
*/
|
||||
private int findNextConsensusElement(final LinkedList<HeaderElement> header, final int start, final int upTo) {
|
||||
final Iterator<HeaderElement> headerElementIterator = header.listIterator(start);
|
||||
int index = start;
|
||||
while (index < upTo) {
|
||||
if (!headerElementIterator.hasNext())
|
||||
throw new ReviewedStingException("There are no more header elements in this window");
|
||||
|
||||
if (headerElementIterator.next().hasConsensusData())
|
||||
break;
|
||||
index++;
|
||||
}
|
||||
return index;
|
||||
}
|
||||
|
||||
/**
|
||||
* Adds bases to the filtered data synthetic read.
|
||||
*
|
||||
* Different from the addToConsensus method, this method assumes a contiguous sequence of filteredData
|
||||
* bases.
|
||||
*
|
||||
* @param header the window header
|
||||
* @param start the first header index to add to consensus
|
||||
* @param end the first header index NOT TO add to consensus
|
||||
* @param strandType the strandedness that the synthetic read should be represented as having
|
||||
*/
|
||||
@Requires({"start >= 0 && (end >= start || end == 0)"})
|
||||
private void addToRunningConsensus(final LinkedList<HeaderElement> header, final int start, final int end, final SyntheticRead.StrandType strandType) {
|
||||
if (runningConsensus == null)
|
||||
runningConsensus = new SyntheticRead(samHeader, readGroupAttribute, contig, contigIndex, consensusReadName + consensusCounter++, header.get(start).getLocation(), hasIndelQualities, strandType);
|
||||
|
||||
final Iterator<HeaderElement> headerElementIterator = header.listIterator(start);
|
||||
|
||||
for (int index = start; index < end; index++) {
|
||||
if (!headerElementIterator.hasNext())
|
||||
throw new ReviewedStingException("Requested to create a running consensus synthetic read from " + start + " to " + end + " but " + index + " does not exist");
|
||||
|
||||
final HeaderElement headerElement = headerElementIterator.next();
|
||||
if (!headerElement.hasConsensusData())
|
||||
throw new ReviewedStingException("No CONSENSUS data in " + index);
|
||||
|
||||
genericAddBaseToConsensus(runningConsensus, headerElement.getConsensusBaseCounts());
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Adds bases to the running filtered data accordingly
|
||||
*
|
||||
* If adding a sequence with gaps, it will finalize multiple consensus reads and keep the last running consensus
|
||||
*
|
||||
* @param header the window header
|
||||
* @param start the first header index to add to consensus
|
||||
* @param end the first header index NOT TO add to consensus
|
||||
* @return a non-null list of consensus reads generated by this call. Empty list if no consensus was generated.
|
||||
*/
|
||||
@Requires({"start >= 0 && (end >= start || end == 0)"})
|
||||
@Ensures("result != null")
|
||||
protected ObjectArrayList<GATKSAMRecord> addToFilteredReads(final LinkedList<HeaderElement> header, final int start, final int end) {
|
||||
final ObjectArrayList<GATKSAMRecord> reads = new ObjectArrayList<GATKSAMRecord>();
|
||||
|
||||
if ( start < end ) {
|
||||
final ListIterator<HeaderElement> headerElementIterator = header.listIterator(start);
|
||||
|
||||
if (!headerElementIterator.hasNext())
|
||||
throw new ReviewedStingException(String.format("Requested to add to synthetic reads a region that contains no header element at index: %d - %d / %d", start, header.size(), end));
|
||||
|
||||
HeaderElement headerElement = headerElementIterator.next();
|
||||
|
||||
if (headerElement.hasFilteredData()) {
|
||||
|
||||
// find the end of the consecutive filtered data in the window
|
||||
final int endOfFiltered = findNextNonFilteredElement(header, start, end);
|
||||
if (endOfFiltered <= start)
|
||||
throw new ReviewedStingException(String.format("next start is <= current start: (%d <= %d)", endOfFiltered, start));
|
||||
|
||||
// add to running filtered consensus and recurse
|
||||
addToFilteredData(header, start, endOfFiltered);
|
||||
reads.addAll(addToFilteredReads(header, endOfFiltered, end));
|
||||
|
||||
} else {
|
||||
|
||||
// add any outstanding filtered data
|
||||
reads.addAll(finalizeAndAdd(ConsensusType.FILTERED));
|
||||
|
||||
// find the end of the consecutive empty data in the window
|
||||
final int endOfEmptyData = findNextFilteredElement(header, start, end);
|
||||
if (endOfEmptyData <= start)
|
||||
throw new ReviewedStingException(String.format("next start is <= current start: (%d <= %d)", endOfEmptyData, start));
|
||||
|
||||
// recurse out of the empty region
|
||||
reads.addAll(addToFilteredReads(header, endOfEmptyData, end));
|
||||
}
|
||||
}
|
||||
|
||||
return reads;
|
||||
}
|
||||
|
||||
/**
|
||||
* Looks for the next position without consensus data
|
||||
*
|
||||
* @param header the header to check
|
||||
* @param start beginning of the filtered region
|
||||
* @param upTo limit to search for another consensus element
|
||||
* @return next position in local coordinates (relative to the windowHeader) with consensus data; otherwise, the start position
|
||||
*/
|
||||
private int findNextNonFilteredElement(final LinkedList<HeaderElement> header, final int start, final int upTo) {
|
||||
final Iterator<HeaderElement> headerElementIterator = header.listIterator(start);
|
||||
int index = start;
|
||||
while (index < upTo) {
|
||||
if (!headerElementIterator.hasNext())
|
||||
throw new ReviewedStingException("There are no more header elements in this window");
|
||||
|
||||
if (!headerElementIterator.next().hasFilteredData())
|
||||
break;
|
||||
index++;
|
||||
}
|
||||
|
|
@ -519,43 +635,21 @@ public class SlidingWindow {
|
|||
}
|
||||
|
||||
/**
|
||||
* Looks for the next position without filtered data
|
||||
* Looks for the next position witho consensus data
|
||||
*
|
||||
* @param start beginning of the region
|
||||
* @param upTo limit to search for
|
||||
* @return next position in local coordinates (relative to the windowHeader) with no filtered data; otherwise, the start position
|
||||
* @param header the header to check
|
||||
* @param start beginning of the filtered region
|
||||
* @param upTo limit to search for another consensus element
|
||||
* @return next position in local coordinates (relative to the windowHeader) with consensus data; otherwise, the start position
|
||||
*/
|
||||
private int findNextNonFilteredDataElement(LinkedList<HeaderElement> header, int start, int upTo) {
|
||||
Iterator<HeaderElement> headerElementIterator = header.listIterator(start);
|
||||
private int findNextFilteredElement(final LinkedList<HeaderElement> header, final int start, final int upTo) {
|
||||
final Iterator<HeaderElement> headerElementIterator = header.listIterator(start);
|
||||
int index = start;
|
||||
while (index < upTo) {
|
||||
if (!headerElementIterator.hasNext())
|
||||
throw new ReviewedStingException("There are no more header elements in this window");
|
||||
|
||||
HeaderElement headerElement = headerElementIterator.next();
|
||||
if (!headerElement.hasFilteredData() || headerElement.hasConsensusData())
|
||||
break;
|
||||
index++;
|
||||
}
|
||||
return index;
|
||||
}
|
||||
|
||||
/**
|
||||
* Looks for the next non-empty header element
|
||||
*
|
||||
* @param start beginning of the region
|
||||
* @param upTo limit to search for
|
||||
* @return next position in local coordinates (relative to the windowHeader) with non-empty element; otherwise, the start position
|
||||
*/
|
||||
private int findNextNonEmptyElement(LinkedList<HeaderElement> header, int start, int upTo) {
|
||||
ListIterator<HeaderElement> headerElementIterator = header.listIterator(start);
|
||||
int index = start;
|
||||
while (index < upTo) {
|
||||
if (!headerElementIterator.hasNext())
|
||||
throw new ReviewedStingException("There are no more header elements in this window");
|
||||
|
||||
HeaderElement headerElement = headerElementIterator.next();
|
||||
if (!headerElement.isEmpty())
|
||||
if (headerElementIterator.next().hasFilteredData())
|
||||
break;
|
||||
index++;
|
||||
}
|
||||
|
|
@ -571,67 +665,26 @@ public class SlidingWindow {
|
|||
* @param header the window header
|
||||
* @param start the first header index to add to consensus
|
||||
* @param end the first header index NOT TO add to consensus
|
||||
* @param strandType the strandedness that the synthetic read should be represented as having
|
||||
* @return a non-null list of GATKSAMRecords representing finalized filtered consensus data. Empty list if no consensus was generated.
|
||||
*/
|
||||
@Requires({"start >= 0 && (end >= start || end == 0)"})
|
||||
@Ensures("result != null")
|
||||
private ObjectArrayList<GATKSAMRecord> addToFilteredData(final LinkedList<HeaderElement> header, final int start, final int end, final SyntheticRead.StrandType strandType) {
|
||||
ObjectArrayList<GATKSAMRecord> result = new ObjectArrayList<GATKSAMRecord>();
|
||||
private void addToFilteredData(final LinkedList<HeaderElement> header, final int start, final int end) {
|
||||
|
||||
if (filteredDataConsensus == null)
|
||||
filteredDataConsensus = new SyntheticRead(samHeader, readGroupAttribute, contig, contigIndex, filteredDataReadName + filteredDataConsensusCounter++, header.get(start).getLocation(), hasIndelQualities, strandType);
|
||||
filteredDataConsensus = new SyntheticRead(samHeader, readGroupAttribute, contig, contigIndex, filteredDataReadName + filteredDataConsensusCounter++, header.get(start).getLocation(), hasIndelQualities, SyntheticRead.StrandType.STRANDLESS);
|
||||
|
||||
ListIterator<HeaderElement> headerElementIterator = header.listIterator(start);
|
||||
for (int index = start; index < end; index++) {
|
||||
if (!headerElementIterator.hasNext())
|
||||
throw new ReviewedStingException("Requested to create a filtered data synthetic read from " + start + " to " + end + " but " + index + " does not exist");
|
||||
|
||||
HeaderElement headerElement = headerElementIterator.next();
|
||||
if (headerElement.hasConsensusData())
|
||||
throw new ReviewedStingException("Found consensus data inside region to add to filtered data.");
|
||||
final HeaderElement headerElement = headerElementIterator.next();
|
||||
|
||||
if (!headerElement.hasFilteredData())
|
||||
throw new ReviewedStingException("No filtered data in " + index);
|
||||
|
||||
if ( filteredDataConsensus.getRefStart() + filteredDataConsensus.size() != headerElement.getLocation() ) {
|
||||
result.add(finalizeFilteredDataConsensus());
|
||||
filteredDataConsensus = new SyntheticRead(samHeader, readGroupAttribute, contig, contigIndex, filteredDataReadName + filteredDataConsensusCounter++, headerElement.getLocation(), hasIndelQualities, strandType);
|
||||
}
|
||||
|
||||
genericAddBaseToConsensus(filteredDataConsensus, headerElement.getFilteredBaseCounts());
|
||||
}
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
/**
|
||||
* Adds bases to the filtered data synthetic read.
|
||||
*
|
||||
* Different from the addToConsensus method, this method assumes a contiguous sequence of filteredData
|
||||
* bases.
|
||||
*
|
||||
* @param header the window header
|
||||
* @param start the first header index to add to consensus
|
||||
* @param end the first header index NOT TO add to consensus
|
||||
* @param strandType the strandedness that the synthetic read should be represented as having
|
||||
*/
|
||||
@Requires({"start >= 0 && (end >= start || end == 0)"})
|
||||
private void addToRunningConsensus(final LinkedList<HeaderElement> header, final int start, final int end, final SyntheticRead.StrandType strandType) {
|
||||
if (runningConsensus == null)
|
||||
runningConsensus = new SyntheticRead(samHeader, readGroupAttribute, contig, contigIndex, consensusReadName + consensusCounter++, header.get(start).getLocation(), hasIndelQualities, strandType);
|
||||
|
||||
Iterator<HeaderElement> headerElementIterator = header.listIterator(start);
|
||||
for (int index = start; index < end; index++) {
|
||||
if (!headerElementIterator.hasNext())
|
||||
throw new ReviewedStingException("Requested to create a running consensus synthetic read from " + start + " to " + end + " but " + index + " does not exist");
|
||||
|
||||
HeaderElement headerElement = headerElementIterator.next();
|
||||
if (!headerElement.hasConsensusData())
|
||||
throw new ReviewedStingException("No CONSENSUS data in " + index);
|
||||
|
||||
genericAddBaseToConsensus(runningConsensus, headerElement.getConsensusBaseCounts());
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
|
|
@ -726,7 +779,7 @@ public class SlidingWindow {
|
|||
|
||||
for ( int i = start; i <= stop; i++ ) {
|
||||
|
||||
final int nAlleles = windowHeader.get(i).getNumberOfBaseAlleles(MIN_ALT_PVALUE_TO_TRIGGER_VARIANT);
|
||||
final int nAlleles = windowHeader.get(i).getNumberOfBaseAlleles(MIN_ALT_PVALUE_TO_TRIGGER_VARIANT, MIN_ALT_PROPORTION_TO_TRIGGER_VARIANT);
|
||||
|
||||
// we will only work on diploid non-indel cases because we just don't want to handle/test other scenarios
|
||||
if ( nAlleles > 2 || nAlleles == -1 )
|
||||
|
|
@ -760,8 +813,8 @@ public class SlidingWindow {
|
|||
if ( headerElement.getLocation() == positionToSkip )
|
||||
continue;
|
||||
|
||||
if ( headerElement.hasSignificantSoftclips(MIN_ALT_PVALUE_TO_TRIGGER_VARIANT) ||
|
||||
headerElement.getNumberOfBaseAlleles(MIN_ALT_PVALUE_TO_TRIGGER_VARIANT) > 1 )
|
||||
if ( headerElement.hasSignificantSoftclips(MIN_ALT_PVALUE_TO_TRIGGER_VARIANT, MIN_ALT_PROPORTION_TO_TRIGGER_VARIANT) ||
|
||||
headerElement.getNumberOfBaseAlleles(MIN_ALT_PVALUE_TO_TRIGGER_VARIANT, MIN_ALT_PROPORTION_TO_TRIGGER_VARIANT) > 1 )
|
||||
return true;
|
||||
}
|
||||
|
||||
|
|
@ -784,6 +837,7 @@ public class SlidingWindow {
|
|||
final CloseVariantRegionResult result = new CloseVariantRegionResult(allReads.stopPerformed);
|
||||
result.reads.addAll(downsampleCoverage > 0 ? downsampleVariantRegion(allReads.reads) : allReads.reads);
|
||||
result.reads.addAll(addToSyntheticReads(windowHeader, 0, allReads.stopPerformed + 1, SyntheticRead.StrandType.STRANDLESS));
|
||||
result.reads.addAll(addToFilteredReads(windowHeader, 0, allReads.stopPerformed + 1));
|
||||
result.reads.addAll(finalizeAndAdd(ConsensusType.BOTH));
|
||||
|
||||
return result; // finalized reads will be downsampled if necessary
|
||||
|
|
@ -914,6 +968,7 @@ public class SlidingWindow {
|
|||
|
||||
if (!windowHeader.isEmpty()) {
|
||||
finalizedReads.addAll(addToSyntheticReads(windowHeader, 0, windowHeader.size(), SyntheticRead.StrandType.STRANDLESS));
|
||||
finalizedReads.addAll(addToFilteredReads(windowHeader, 0, windowHeader.size()));
|
||||
finalizedReads.addAll(finalizeAndAdd(ConsensusType.BOTH)); // if it ended in running consensus, finish it up
|
||||
}
|
||||
}
|
||||
|
|
@ -983,7 +1038,7 @@ public class SlidingWindow {
|
|||
|
||||
// initialize the mapping from base (allele) to header
|
||||
final Byte2IntMap alleleHeaderMap = new Byte2IntArrayMap(2);
|
||||
for ( final BaseIndex allele : windowHeader.get(hetRefPosition).getAlleles(MIN_ALT_PVALUE_TO_TRIGGER_VARIANT) ) {
|
||||
for ( final BaseIndex allele : windowHeader.get(hetRefPosition).getAlleles(MIN_ALT_PVALUE_TO_TRIGGER_VARIANT, MIN_ALT_PROPORTION_TO_TRIGGER_VARIANT) ) {
|
||||
final int currentIndex = alleleHeaderMap.size();
|
||||
if ( currentIndex > 1 )
|
||||
throw new IllegalStateException("There are more than 2 alleles present when creating a diploid consensus");
|
||||
|
|
@ -997,7 +1052,7 @@ public class SlidingWindow {
|
|||
if ( alleleHeaderMap.size() != 2 )
|
||||
throw new IllegalStateException("We expected to see 2 alleles when creating a diploid consensus but saw " + alleleHeaderMap.size());
|
||||
|
||||
final ObjectList<GATKSAMRecord> readsToRemoveFromHeader = new ObjectArrayList<GATKSAMRecord>();
|
||||
final ObjectList<GATKSAMRecord> readsToRemove = new ObjectArrayList<GATKSAMRecord>();
|
||||
|
||||
for ( final GATKSAMRecord read : readsInWindow ) {
|
||||
|
||||
|
|
@ -1006,38 +1061,36 @@ public class SlidingWindow {
|
|||
continue;
|
||||
|
||||
// remove all other reads from the read cache since we're going to use them here
|
||||
readsInWindow.remove(read);
|
||||
readsToRemove.add(read);
|
||||
|
||||
// if the read falls before the het position, we don't need to look at it
|
||||
if ( read.getSoftEnd() < globalHetRefPosition )
|
||||
// if the read falls before the het position or has low MQ, we don't need to look at it
|
||||
if ( read.getSoftEnd() < globalHetRefPosition || read.getMappingQuality() < MIN_MAPPING_QUALITY)
|
||||
continue;
|
||||
|
||||
// remove all spanning reads from the consensus header since we're going to incorporate them into a consensus here instead
|
||||
removeFromHeader(windowHeader, read);
|
||||
|
||||
// make sure it meets the minimum mapping quality requirement (if not, we won't use it for the consensus)
|
||||
if ( read.getMappingQuality() >= MIN_MAPPING_QUALITY ) {
|
||||
// where on the read is the het position?
|
||||
final int readPosOfHet = ReadUtils.getReadCoordinateForReferenceCoordinate(read, globalHetRefPosition, ReadUtils.ClippingTail.LEFT_TAIL);
|
||||
|
||||
// where on the read is the het position?
|
||||
final int readPosOfHet = ReadUtils.getReadCoordinateForReferenceCoordinate(read, globalHetRefPosition, ReadUtils.ClippingTail.LEFT_TAIL);
|
||||
// this is safe because indels are not supported
|
||||
final byte base = read.getReadBases()[readPosOfHet];
|
||||
|
||||
// this is safe because indels are not supported
|
||||
final byte base = read.getReadBases()[readPosOfHet];
|
||||
final byte qual = read.getBaseQualities(EventType.BASE_SUBSTITUTION)[readPosOfHet];
|
||||
// check which allele this read represents
|
||||
final Integer allele = alleleHeaderMap.get(base);
|
||||
|
||||
// check which allele this read represents
|
||||
final Integer allele = alleleHeaderMap.get(base);
|
||||
|
||||
// ignore the read if it represents a base that's not part of the consensus
|
||||
if ( allele != null ) {
|
||||
// add to the appropriate polyploid header
|
||||
final SingleStrandConsensusData header = read.getReadNegativeStrandFlag() ? headersNegStrand[allele] : headersPosStrand[allele];
|
||||
header.reads.add(read);
|
||||
addToHeader(header.consensus, read);
|
||||
}
|
||||
// ignore the read if it represents a base that's not part of the consensus
|
||||
if ( allele != null ) {
|
||||
// add to the appropriate polyploid header
|
||||
final SingleStrandConsensusData header = read.getReadNegativeStrandFlag() ? headersNegStrand[allele] : headersPosStrand[allele];
|
||||
header.reads.add(read);
|
||||
addToHeader(header.consensus, read);
|
||||
}
|
||||
}
|
||||
|
||||
for ( final GATKSAMRecord read : readsToRemove )
|
||||
readsInWindow.remove(read);
|
||||
|
||||
// create the polyploid synthetic reads if we can
|
||||
final ObjectList<GATKSAMRecord> hetReads = new ObjectArrayList<GATKSAMRecord>();
|
||||
|
||||
|
|
@ -1171,15 +1224,15 @@ public class SlidingWindow {
|
|||
int readBaseIndex = 0;
|
||||
HeaderElement headerElement;
|
||||
|
||||
for ( CigarElement cigarElement : read.getCigar().getCigarElements() ) {
|
||||
for ( final CigarElement cigarElement : read.getCigar().getCigarElements() ) {
|
||||
switch ( cigarElement.getOperator() ) {
|
||||
case H:
|
||||
break;
|
||||
case I:
|
||||
readBaseIndex += cigarElement.getLength();
|
||||
|
||||
// special case, if we are removing a read that starts in insertion and we don't have the previous header element anymore, don't worry about it.
|
||||
if ( removeRead && locationIndex == 0 )
|
||||
// special case, if we don't have the previous header element anymore, don't worry about it.
|
||||
if ( locationIndex == 0 )
|
||||
break;
|
||||
|
||||
// insertions are added to the base to the left (previous element)
|
||||
|
|
@ -1200,9 +1253,8 @@ public class SlidingWindow {
|
|||
headerElement.removeBase(BaseUtils.Base.D.base, mappingQuality, mappingQuality, mappingQuality, mappingQuality, MIN_BASE_QUAL_TO_COUNT, MIN_MAPPING_QUALITY, false);
|
||||
else
|
||||
headerElement.addBase(BaseUtils.Base.D.base, mappingQuality, mappingQuality, mappingQuality, mappingQuality, MIN_BASE_QUAL_TO_COUNT, MIN_MAPPING_QUALITY, false);
|
||||
|
||||
locationIndex++;
|
||||
}
|
||||
locationIndex += nDeletionBases;
|
||||
break;
|
||||
case S:
|
||||
case M:
|
||||
|
|
@ -1211,6 +1263,8 @@ public class SlidingWindow {
|
|||
case X:
|
||||
final int nBasesToAdd = cigarElement.getLength();
|
||||
final boolean isSoftClip = cigarElement.getOperator() == CigarOperator.S;
|
||||
final byte[] readBases = read.getReadBases();
|
||||
final byte[] readQuals = read.getBaseQualities();
|
||||
final boolean readHasIndelQuals = read.hasBaseIndelQualities();
|
||||
final byte[] insertionQuals = readHasIndelQuals ? read.getBaseInsertionQualities() : null;
|
||||
final byte[] deletionQuals = readHasIndelQuals ? read.getBaseDeletionQualities() : null;
|
||||
|
|
@ -1219,14 +1273,15 @@ public class SlidingWindow {
|
|||
headerElement = headerElementIterator.next();
|
||||
final byte insertionQuality = readHasIndelQuals ? insertionQuals[readBaseIndex] : -1;
|
||||
final byte deletionQuality = readHasIndelQuals ? deletionQuals[readBaseIndex] : -1;
|
||||
|
||||
if ( removeRead )
|
||||
headerElement.removeBase(read.getReadBases()[readBaseIndex], read.getBaseQualities()[readBaseIndex], insertionQuality, deletionQuality, mappingQuality, MIN_BASE_QUAL_TO_COUNT, MIN_MAPPING_QUALITY, isSoftClip);
|
||||
headerElement.removeBase(readBases[readBaseIndex], readQuals[readBaseIndex], insertionQuality, deletionQuality, mappingQuality, MIN_BASE_QUAL_TO_COUNT, MIN_MAPPING_QUALITY, isSoftClip);
|
||||
else
|
||||
headerElement.addBase(read.getReadBases()[readBaseIndex], read.getBaseQualities()[readBaseIndex], insertionQuality, deletionQuality, mappingQuality, MIN_BASE_QUAL_TO_COUNT, MIN_MAPPING_QUALITY, isSoftClip);
|
||||
headerElement.addBase(readBases[readBaseIndex], readQuals[readBaseIndex], insertionQuality, deletionQuality, mappingQuality, MIN_BASE_QUAL_TO_COUNT, MIN_MAPPING_QUALITY, isSoftClip);
|
||||
|
||||
readBaseIndex++;
|
||||
locationIndex++;
|
||||
}
|
||||
locationIndex += nBasesToAdd;
|
||||
break;
|
||||
default:
|
||||
break;
|
||||
|
|
|
|||
|
|
@ -102,6 +102,9 @@ public class AssessReducedQuals extends LocusWalker<GenomeLoc, GenomeLoc> implem
|
|||
@Argument(fullName = "qual_epsilon", shortName = "epsilon", doc = "when |Quals_reduced_bam - Quals_original_bam| > (epsilon * Quals_original_bam) we output this interval", required = false)
|
||||
public double qual_epsilon = 0.10;
|
||||
|
||||
@Argument(fullName = "exclude_low_mq", shortName = "excludeMQ", doc = "ignore reads with mapping quality below this number", required = false)
|
||||
public int excludeMQ = 0;
|
||||
|
||||
@Output
|
||||
protected PrintStream out;
|
||||
|
||||
|
|
@ -146,7 +149,7 @@ public class AssessReducedQuals extends LocusWalker<GenomeLoc, GenomeLoc> implem
|
|||
}
|
||||
|
||||
private boolean isGoodRead(final PileupElement p) {
|
||||
return !p.isDeletion() && (int)p.getQual() >= 15 && p.getMappingQual() >= 20;
|
||||
return !p.isDeletion() && (int)p.getQual() >= 15 && p.getMappingQual() >= excludeMQ;
|
||||
}
|
||||
|
||||
private int getTagIndex(final List<String> tags) {
|
||||
|
|
|
|||
|
|
@ -179,7 +179,7 @@ public class BaseCountsUnitTest extends BaseTest {
|
|||
BaseCounts counts = new BaseCounts();
|
||||
|
||||
for ( int qual : test.quals )
|
||||
counts.incr(BaseIndex.A, (byte)qual, 20);
|
||||
counts.incr(BaseIndex.A, (byte)qual, 20, false);
|
||||
|
||||
final int actualSum = (int)counts.getSumQuals((byte)'A');
|
||||
final int expectedSum = qualSum(test.quals);
|
||||
|
|
|
|||
|
|
@ -128,8 +128,8 @@ public class HeaderElementUnitTest extends BaseTest {
|
|||
Assert.assertEquals(headerElement.hasConsensusData(), test.MQ >= minMappingQual);
|
||||
Assert.assertEquals(headerElement.hasFilteredData(), test.MQ < minMappingQual);
|
||||
Assert.assertEquals(headerElement.hasConsensusData() ? headerElement.getConsensusBaseCounts().getRMS() : headerElement.getFilteredBaseCounts().getRMS(), (double)test.MQ);
|
||||
Assert.assertFalse(headerElement.isVariantFromMismatches(0.05));
|
||||
Assert.assertEquals(headerElement.isVariant(0.05, 0.05), test.isClip);
|
||||
Assert.assertFalse(headerElement.isVariantFromMismatches(0.05, 0.05));
|
||||
Assert.assertEquals(headerElement.isVariant(0.05, 0.05, 0.05), test.isClip);
|
||||
}
|
||||
|
||||
|
||||
|
|
@ -177,7 +177,7 @@ public class HeaderElementUnitTest extends BaseTest {
|
|||
headerElement.addBase(base.b, byte20, byte10, byte10, byte20, minBaseQual, minMappingQual, false);
|
||||
}
|
||||
|
||||
final int nAllelesSeen = headerElement.getNumberOfBaseAlleles(test.pvalue);
|
||||
final int nAllelesSeen = headerElement.getNumberOfBaseAlleles(test.pvalue, test.pvalue);
|
||||
final int nAllelesExpected = calculateExpectedAlleles(test.counts, test.pvalue);
|
||||
|
||||
Assert.assertEquals(nAllelesSeen, nAllelesExpected);
|
||||
|
|
@ -195,9 +195,14 @@ public class HeaderElementUnitTest extends BaseTest {
|
|||
if ( count == 0 )
|
||||
continue;
|
||||
|
||||
final double pvalue = MathUtils.binomialCumulativeProbability(total, 0, count);
|
||||
final boolean isSignificant;
|
||||
if ( count <= HeaderElement.MIN_COUNT_FOR_USING_PVALUE ) {
|
||||
isSignificant = MathUtils.binomialCumulativeProbability(total, 0, count) > targetPvalue;
|
||||
} else {
|
||||
isSignificant = (count >= targetPvalue * total);
|
||||
}
|
||||
|
||||
if ( pvalue > targetPvalue ) {
|
||||
if ( isSignificant ) {
|
||||
if ( index == BaseIndex.D.index )
|
||||
return -1;
|
||||
result++;
|
||||
|
|
|
|||
|
|
@ -157,46 +157,44 @@ public class ReduceReadsIntegrationTest extends WalkerTest {
|
|||
|
||||
@Test(enabled = true)
|
||||
public void testDefaultCompression() {
|
||||
RRTest("testDefaultCompression ", L, "62f8cdb85a424e42e9c56f36302d1dba", false);
|
||||
RRTest("testDefaultCompression ", L, "fa1cffc4539e0c20b818a11da5dba5b9", false);
|
||||
}
|
||||
|
||||
@Test(enabled = true)
|
||||
public void testDefaultCompressionWithKnowns() {
|
||||
RRTest("testDefaultCompressionWithKnowns ", L, "874c0e0a54c3db67f5e9d7c0d45b7844", true);
|
||||
RRTest("testDefaultCompressionWithKnowns ", L, "d1b5fbc402810d9cdc020bb3503f1325", true);
|
||||
}
|
||||
|
||||
private final String intervals = "-L 20:10,100,000-10,100,500 -L 20:10,200,000-10,200,500 -L 20:10,300,000-10,300,500 -L 20:10,400,000-10,500,000 -L 20:10,500,050-10,500,060 -L 20:10,600,000-10,600,015 -L 20:10,700,000-10,700,110";
|
||||
|
||||
@Test(enabled = true)
|
||||
public void testMultipleIntervals() {
|
||||
RRTest("testMultipleIntervals ", intervals, "2e849f8324b27af36bae8cb9b01722e6", false);
|
||||
RRTest("testMultipleIntervals ", intervals, "7e9dcd157ad742d4ebae7e56bc4af663", false);
|
||||
}
|
||||
|
||||
@Test(enabled = true)
|
||||
public void testMultipleIntervalsWithKnowns() {
|
||||
RRTest("testMultipleIntervalsWithKnowns ", intervals, "71bc2167cc6916288bd34dcf099feebc", true);
|
||||
RRTest("testMultipleIntervalsWithKnowns ", intervals, "dbb1e95e1bcad956701142afac763717", true);
|
||||
}
|
||||
|
||||
final String highCompressionMD5 = "c83256fa2d6785d5188f50dd45c77e0f";
|
||||
|
||||
@Test(enabled = true)
|
||||
public void testHighCompression() {
|
||||
RRTest("testHighCompression ", " -cs 10 -min_pvalue 0.3 -mindel 0.3 " + L, highCompressionMD5, false);
|
||||
RRTest("testHighCompression ", " -cs 10 -min_pvalue 0.3 -minvar 0.3 -mindel 0.3 " + L, "8f8fd1a53fa0789116f45e4cf2625906", false);
|
||||
}
|
||||
|
||||
@Test(enabled = true)
|
||||
public void testHighCompressionWithKnowns() {
|
||||
RRTest("testHighCompressionWithKnowns ", " -cs 10 -min_pvalue 0.3 -mindel 0.3 " + L, highCompressionMD5, true);
|
||||
RRTest("testHighCompressionWithKnowns ", " -cs 10 -min_pvalue 0.3 -minvar 0.3 -mindel 0.3 " + L, "52fd2a77802a4677b604abb18e15d96a", true);
|
||||
}
|
||||
|
||||
@Test(enabled = true)
|
||||
public void testLowCompression() {
|
||||
RRTest("testLowCompression ", " -cs 30 -min_pvalue 0.001 -mindel 0.01 -minmap 5 -minqual 5 " + L, "a903558ef284381d74b0ad837deb19f6", false);
|
||||
RRTest("testLowCompression ", " -cs 30 -min_pvalue 0.001 -minvar 0.01 -mindel 0.01 -minmap 5 -minqual 5 " + L, "79c6543d5ce84ebc2ca74404498edbd1", false);
|
||||
}
|
||||
|
||||
@Test(enabled = true)
|
||||
public void testLowCompressionWithKnowns() {
|
||||
RRTest("testLowCompressionWithKnowns ", " -cs 30 -min_pvalue 0.001 -mindel 0.01 -minmap 5 -minqual 5 " + L, "a4c5aa158c6ebbc703134cbe2d48619c", true);
|
||||
RRTest("testLowCompressionWithKnowns ", " -cs 30 -min_pvalue 0.001 -minvar 0.01 -mindel 0.01 -minmap 5 -minqual 5 " + L, "271aec358b309603291a974b5ba3bd60", true);
|
||||
}
|
||||
|
||||
@Test(enabled = true)
|
||||
|
|
@ -208,7 +206,7 @@ public class ReduceReadsIntegrationTest extends WalkerTest {
|
|||
|
||||
@Test(enabled = true)
|
||||
public void testIndelCompression() {
|
||||
final String md5 = "56154baed62be07008d3684a0a4c0996";
|
||||
final String md5 = "d20e6012300898a0315c795cab7583d8";
|
||||
RRTest("testIndelCompression ", " -cs 50 -L 20:10,100,500-10,100,600 ", md5, false);
|
||||
RRTest("testIndelCompressionWithKnowns ", " -cs 50 -L 20:10,100,500-10,100,600 ", md5, true);
|
||||
}
|
||||
|
|
@ -216,25 +214,27 @@ public class ReduceReadsIntegrationTest extends WalkerTest {
|
|||
@Test(enabled = true)
|
||||
public void testFilteredDeletionCompression() {
|
||||
String base = String.format("-T ReduceReads -npt -R %s -I %s ", REF, DELETION_BAM) + " -o %s ";
|
||||
executeTest("testFilteredDeletionCompression", new WalkerTestSpec(base, Arrays.asList("bam"), Arrays.asList("d7655de41d90aecb716f79e32d53b2d1")));
|
||||
executeTest("testFilteredDeletionCompression", new WalkerTestSpec(base, Arrays.asList("bam"), Arrays.asList("e5da09662708f562c0c617ba73cf4763")), "4f916da29d91852077f0a2fdbdd2c7f6");
|
||||
}
|
||||
|
||||
private static final String COREDUCTION_QUALS_TEST_MD5 = "26d84a2bd549a01a63fcebf8847a1b7d";
|
||||
|
||||
@Test(enabled = true)
|
||||
public void testCoReduction() {
|
||||
String base = String.format("-T ReduceReads %s -npt -R %s -I %s -I %s", COREDUCTION_L, REF, COREDUCTION_BAM_A, COREDUCTION_BAM_B) + " -o %s ";
|
||||
executeTest("testCoReduction", new WalkerTestSpec(base, Arrays.asList("bam"), Arrays.asList("fa549ba96ca0ce5fbf3553ba173167e8")));
|
||||
executeTest("testCoReduction", new WalkerTestSpec(base, Arrays.asList("bam"), Arrays.asList("5f4d2c1d9c010dfd6865aeba7d0336fe")), COREDUCTION_QUALS_TEST_MD5);
|
||||
}
|
||||
|
||||
@Test(enabled = true)
|
||||
public void testCoReductionWithKnowns() {
|
||||
String base = String.format("-T ReduceReads %s -npt -R %s -I %s -I %s -known %s", COREDUCTION_L, REF, COREDUCTION_BAM_A, COREDUCTION_BAM_B, DBSNP) + " -o %s ";
|
||||
executeTest("testCoReductionWithKnowns", new WalkerTestSpec(base, Arrays.asList("bam"), Arrays.asList("9edcf09b21a4ae8d9fc25222bcb0486b")));
|
||||
executeTest("testCoReductionWithKnowns", new WalkerTestSpec(base, Arrays.asList("bam"), Arrays.asList("ca48dd972bf57595c691972c0f887cb4")), COREDUCTION_QUALS_TEST_MD5);
|
||||
}
|
||||
|
||||
@Test(enabled = true)
|
||||
public void testInsertionsAtEdgeOfConsensus() {
|
||||
String base = String.format("-T ReduceReads -npt -R %s -I %s ", REF, INSERTIONS_AT_EDGE_OF_CONSENSUS_BAM) + " -o %s ";
|
||||
executeTest("testInsertionsAtEdgeOfConsensus", new WalkerTestSpec(base, Arrays.asList("bam"), Arrays.asList("27cc8f1a336b2d0a29855ceb8fc988b0")));
|
||||
executeTest("testInsertionsAtEdgeOfConsensus", new WalkerTestSpec(base, Arrays.asList("bam"), Arrays.asList("760500a5b036b987f84099f45f26a804")));
|
||||
}
|
||||
|
||||
/**
|
||||
|
|
@ -248,7 +248,7 @@ public class ReduceReadsIntegrationTest extends WalkerTest {
|
|||
@Test(enabled = true)
|
||||
public void testAddingReadAfterTailingTheStash() {
|
||||
String base = String.format("-T ReduceReads %s -npt -R %s -I %s", STASH_L, REF, STASH_BAM) + " -o %s ";
|
||||
executeTest("testAddingReadAfterTailingTheStash", new WalkerTestSpec(base, Arrays.asList("bam"), Arrays.asList("34baf99904b676d5f132d3791030ed0a")), "3eab32c215ba68e75efd5ab7e9f7a2e7");
|
||||
executeTest("testAddingReadAfterTailingTheStash", new WalkerTestSpec(base, Arrays.asList("bam"), Arrays.asList("67f8a3a647f8ec5212104bdaafd8c862")), "3eab32c215ba68e75efd5ab7e9f7a2e7");
|
||||
}
|
||||
|
||||
/**
|
||||
|
|
@ -259,7 +259,7 @@ public class ReduceReadsIntegrationTest extends WalkerTest {
|
|||
public void testDivideByZero() {
|
||||
String base = String.format("-T ReduceReads %s -npt -R %s -I %s", DIVIDEBYZERO_L, REF, DIVIDEBYZERO_BAM) + " -o %s ";
|
||||
// we expect to lose coverage due to the downsampling so don't run the systematic tests
|
||||
executeTestWithoutAdditionalRRTests("testDivideByZero", new WalkerTestSpec(base, Arrays.asList("bam"), Arrays.asList("985c4f15a1d45267abb2f6790267930d")));
|
||||
executeTestWithoutAdditionalRRTests("testDivideByZero", new WalkerTestSpec(base, Arrays.asList("bam"), Arrays.asList("1663f35802f82333c5e15653e437ce2d")));
|
||||
}
|
||||
|
||||
/**
|
||||
|
|
@ -269,7 +269,7 @@ public class ReduceReadsIntegrationTest extends WalkerTest {
|
|||
@Test(enabled = true)
|
||||
public void testReadOffContig() {
|
||||
String base = String.format("-T ReduceReads -npt -R %s -I %s ", REF, OFFCONTIG_BAM) + " -o %s ";
|
||||
executeTest("testReadOffContig", new WalkerTestSpec(base, Arrays.asList("bam"), Arrays.asList("388ef48791965d637e4bdb45d5d7cf01")));
|
||||
executeTest("testReadOffContig", new WalkerTestSpec(base, Arrays.asList("bam"), Arrays.asList("0ce693b4ff925998867664e4099f3248")));
|
||||
}
|
||||
|
||||
/**
|
||||
|
|
@ -279,7 +279,7 @@ public class ReduceReadsIntegrationTest extends WalkerTest {
|
|||
public void testPairedReadsInVariantRegion() {
|
||||
String base = String.format("-T ReduceReads -npt -R %s -I %s ", hg19Reference, BOTH_ENDS_OF_PAIR_IN_VARIANT_REGION_BAM) +
|
||||
" -o %s --downsample_coverage 250 -dcov 50 ";
|
||||
executeTest("testPairedReadsInVariantRegion", new WalkerTestSpec(base, Arrays.asList("bam"), Arrays.asList("cfa2588f5edf74c5ddf3d190f5ac6f2d")));
|
||||
executeTest("testPairedReadsInVariantRegion", new WalkerTestSpec(base, Arrays.asList("bam"), Arrays.asList("7e7b358443827ca239db3b98f299aec6")), "2af063d1bd3c322b03405dbb3ecf59a9");
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -251,14 +251,15 @@ public class SlidingWindowUnitTest extends BaseTest {
|
|||
}
|
||||
|
||||
private class ConsensusCreationTest {
|
||||
public final int expectedNumberOfReads, expectedNumberOfReadsWithHetCompression;
|
||||
public final int expectedNumberOfReads, expectedNumberOfReadsWithHetCompression, expectedNumberOfReadsAtDeepCoverage;
|
||||
public final List<GATKSAMRecord> myReads = new ArrayList<GATKSAMRecord>(20);
|
||||
public final String description;
|
||||
|
||||
private ConsensusCreationTest(final List<GenomeLoc> locs, final boolean readsShouldBeLowQuality, final boolean variantBaseShouldBeLowQuality, final int expectedNumberOfReads, final int expectedNumberOfReadsWithHetCompression) {
|
||||
private ConsensusCreationTest(final List<GenomeLoc> locs, final boolean readsShouldBeLowQuality, final boolean variantBaseShouldBeLowQuality, final int expectedNumberOfReads, final int expectedNumberOfReadsWithHetCompression, final int expectedNumberOfReadsAtDeepCoverage) {
|
||||
this.expectedNumberOfReads = expectedNumberOfReads;
|
||||
this.expectedNumberOfReadsWithHetCompression = expectedNumberOfReadsWithHetCompression;
|
||||
this.description = String.format("%d %d", expectedNumberOfReads, expectedNumberOfReadsWithHetCompression);
|
||||
this.expectedNumberOfReadsAtDeepCoverage = expectedNumberOfReadsAtDeepCoverage;
|
||||
this.description = String.format("%d %d %d", expectedNumberOfReads, expectedNumberOfReadsWithHetCompression, expectedNumberOfReadsAtDeepCoverage);
|
||||
|
||||
// first, add the basic reads to the collection
|
||||
myReads.addAll(basicReads);
|
||||
|
|
@ -268,10 +269,11 @@ public class SlidingWindowUnitTest extends BaseTest {
|
|||
myReads.add(createVariantRead(loc, readsShouldBeLowQuality, variantBaseShouldBeLowQuality, CigarOperator.M));
|
||||
}
|
||||
|
||||
private ConsensusCreationTest(final List<GenomeLoc> locs, final CigarOperator operator, final int expectedNumberOfReads, final int expectedNumberOfReadsWithHetCompression) {
|
||||
private ConsensusCreationTest(final List<GenomeLoc> locs, final CigarOperator operator, final int expectedNumberOfReads, final int expectedNumberOfReadsWithHetCompression, final int expectedNumberOfReadsAtDeepCoverage) {
|
||||
this.expectedNumberOfReads = expectedNumberOfReads;
|
||||
this.expectedNumberOfReadsWithHetCompression = expectedNumberOfReadsWithHetCompression;
|
||||
this.description = String.format("%s %d %d", operator.toString(), expectedNumberOfReads, expectedNumberOfReadsWithHetCompression);
|
||||
this.expectedNumberOfReadsAtDeepCoverage = expectedNumberOfReadsAtDeepCoverage;
|
||||
this.description = String.format("%s %d %d %d", operator.toString(), expectedNumberOfReads, expectedNumberOfReadsWithHetCompression, expectedNumberOfReadsAtDeepCoverage);
|
||||
|
||||
// first, add the basic reads to the collection
|
||||
myReads.addAll(basicReads);
|
||||
|
|
@ -319,46 +321,50 @@ public class SlidingWindowUnitTest extends BaseTest {
|
|||
private static final GenomeLoc loc295 = new UnvalidatingGenomeLoc("1", 0, 1000295, 1000295);
|
||||
private static final GenomeLoc loc309 = new UnvalidatingGenomeLoc("1", 0, 1000309, 1000309);
|
||||
private static final GenomeLoc loc310 = new UnvalidatingGenomeLoc("1", 0, 1000310, 1000310);
|
||||
private static final GenomeLoc loc320 = new UnvalidatingGenomeLoc("1", 0, 1000320, 1000320);
|
||||
private static final GenomeLoc loc1100 = new UnvalidatingGenomeLoc("1", 0, 1001100, 1001100);
|
||||
|
||||
private static final int DEEP_COVERAGE_ITERATIONS = 100;
|
||||
|
||||
@DataProvider(name = "ConsensusCreation")
|
||||
public Object[][] createConsensusCreationTestData() {
|
||||
List<Object[]> tests = new ArrayList<Object[]>();
|
||||
|
||||
// test high quality reads and bases
|
||||
tests.add(new Object[]{new ConsensusCreationTest(Arrays.<GenomeLoc>asList(), false, false, 1, 1)});
|
||||
tests.add(new Object[]{new ConsensusCreationTest(Arrays.<GenomeLoc>asList(loc290), false, false, 9, 6)});
|
||||
tests.add(new Object[]{new ConsensusCreationTest(Arrays.<GenomeLoc>asList(loc290, loc295), false, false, 10, 10)});
|
||||
tests.add(new Object[]{new ConsensusCreationTest(Arrays.<GenomeLoc>asList(loc290, loc309), false, false, 10, 10)});
|
||||
tests.add(new Object[]{new ConsensusCreationTest(Arrays.<GenomeLoc>asList(loc290, loc310), false, false, 11, 11)});
|
||||
tests.add(new Object[]{new ConsensusCreationTest(Arrays.<GenomeLoc>asList(), false, false, 1, 1, 1)});
|
||||
tests.add(new Object[]{new ConsensusCreationTest(Arrays.<GenomeLoc>asList(loc290), false, false, 9, 6, 5 + DEEP_COVERAGE_ITERATIONS)});
|
||||
tests.add(new Object[]{new ConsensusCreationTest(Arrays.<GenomeLoc>asList(loc290, loc295), false, false, 10, 10, 2 + (8 * DEEP_COVERAGE_ITERATIONS))});
|
||||
tests.add(new Object[]{new ConsensusCreationTest(Arrays.<GenomeLoc>asList(loc290, loc309), false, false, 10, 10, 2 + (8 * DEEP_COVERAGE_ITERATIONS))});
|
||||
tests.add(new Object[]{new ConsensusCreationTest(Arrays.<GenomeLoc>asList(loc290, loc310), false, false, 11, 11, 2 + (9 * DEEP_COVERAGE_ITERATIONS))});
|
||||
tests.add(new Object[]{new ConsensusCreationTest(Arrays.<GenomeLoc>asList(loc290, loc320), false, false, 11, 10, 4 + (6 * DEEP_COVERAGE_ITERATIONS))});
|
||||
|
||||
// test low quality reads
|
||||
tests.add(new Object[]{new ConsensusCreationTest(Arrays.<GenomeLoc>asList(), true, false, 1, 1)});
|
||||
tests.add(new Object[]{new ConsensusCreationTest(Arrays.<GenomeLoc>asList(loc290), true, false, 1, 1)});
|
||||
tests.add(new Object[]{new ConsensusCreationTest(Arrays.<GenomeLoc>asList(loc290, loc295), true, false, 1, 1)});
|
||||
tests.add(new Object[]{new ConsensusCreationTest(Arrays.<GenomeLoc>asList(loc290, loc309), true, false, 1, 1)});
|
||||
tests.add(new Object[]{new ConsensusCreationTest(Arrays.<GenomeLoc>asList(loc290, loc310), true, false, 1, 1)});
|
||||
tests.add(new Object[]{new ConsensusCreationTest(Arrays.<GenomeLoc>asList(), true, false, 1, 1, 1)});
|
||||
tests.add(new Object[]{new ConsensusCreationTest(Arrays.<GenomeLoc>asList(loc290), true, false, 2, 2, 2)});
|
||||
tests.add(new Object[]{new ConsensusCreationTest(Arrays.<GenomeLoc>asList(loc290, loc295), true, false, 2, 2, 2)});
|
||||
tests.add(new Object[]{new ConsensusCreationTest(Arrays.<GenomeLoc>asList(loc290, loc309), true, false, 2, 2, 2)});
|
||||
tests.add(new Object[]{new ConsensusCreationTest(Arrays.<GenomeLoc>asList(loc290, loc310), true, false, 2, 2, 2)});
|
||||
|
||||
// test low quality bases
|
||||
tests.add(new Object[]{new ConsensusCreationTest(Arrays.<GenomeLoc>asList(), false, true, 1, 1)});
|
||||
tests.add(new Object[]{new ConsensusCreationTest(Arrays.<GenomeLoc>asList(loc290), false, true, 1, 1)});
|
||||
tests.add(new Object[]{new ConsensusCreationTest(Arrays.<GenomeLoc>asList(loc290, loc295), false, true, 1, 1)});
|
||||
tests.add(new Object[]{new ConsensusCreationTest(Arrays.<GenomeLoc>asList(loc290, loc309), false, true, 1, 1)});
|
||||
tests.add(new Object[]{new ConsensusCreationTest(Arrays.<GenomeLoc>asList(loc290, loc310), false, true, 1, 1)});
|
||||
tests.add(new Object[]{new ConsensusCreationTest(Arrays.<GenomeLoc>asList(), false, true, 1, 1, 1)});
|
||||
tests.add(new Object[]{new ConsensusCreationTest(Arrays.<GenomeLoc>asList(loc290), false, true, 1, 1, 1)});
|
||||
tests.add(new Object[]{new ConsensusCreationTest(Arrays.<GenomeLoc>asList(loc290, loc295), false, true, 1, 1, 1)});
|
||||
tests.add(new Object[]{new ConsensusCreationTest(Arrays.<GenomeLoc>asList(loc290, loc309), false, true, 1, 1, 1)});
|
||||
tests.add(new Object[]{new ConsensusCreationTest(Arrays.<GenomeLoc>asList(loc290, loc310), false, true, 1, 1, 1)});
|
||||
|
||||
// test mixture
|
||||
tests.add(new Object[]{new ConsensusCreationTest(Arrays.<GenomeLoc>asList(loc1100), true, false, 2, 2)});
|
||||
tests.add(new Object[]{new ConsensusCreationTest(Arrays.<GenomeLoc>asList(loc1100), false, true, 1, 1)});
|
||||
tests.add(new Object[]{new ConsensusCreationTest(Arrays.<GenomeLoc>asList(loc1100), true, false, 2, 2, 2)});
|
||||
tests.add(new Object[]{new ConsensusCreationTest(Arrays.<GenomeLoc>asList(loc1100), false, true, 1, 1, 1)});
|
||||
|
||||
// test I/D operators
|
||||
tests.add(new Object[]{new ConsensusCreationTest(Arrays.<GenomeLoc>asList(loc290), CigarOperator.D, 9, 9)});
|
||||
tests.add(new Object[]{new ConsensusCreationTest(Arrays.<GenomeLoc>asList(loc290, loc295), CigarOperator.D, 10, 10)});
|
||||
tests.add(new Object[]{new ConsensusCreationTest(Arrays.<GenomeLoc>asList(loc290, loc309), CigarOperator.D, 10, 10)});
|
||||
tests.add(new Object[]{new ConsensusCreationTest(Arrays.<GenomeLoc>asList(loc290, loc310), CigarOperator.D, 11, 11)});
|
||||
tests.add(new Object[]{new ConsensusCreationTest(Arrays.<GenomeLoc>asList(loc290), CigarOperator.I, 9, 9)});
|
||||
tests.add(new Object[]{new ConsensusCreationTest(Arrays.<GenomeLoc>asList(loc290, loc295), CigarOperator.I, 10, 10)});
|
||||
tests.add(new Object[]{new ConsensusCreationTest(Arrays.<GenomeLoc>asList(loc290, loc309), CigarOperator.I, 10, 10)});
|
||||
tests.add(new Object[]{new ConsensusCreationTest(Arrays.<GenomeLoc>asList(loc290, loc310), CigarOperator.I, 11, 11)});
|
||||
tests.add(new Object[]{new ConsensusCreationTest(Arrays.<GenomeLoc>asList(loc290), CigarOperator.D, 9, 9, 2 + (7 * DEEP_COVERAGE_ITERATIONS))});
|
||||
tests.add(new Object[]{new ConsensusCreationTest(Arrays.<GenomeLoc>asList(loc290, loc295), CigarOperator.D, 10, 10, 2 + (8 * DEEP_COVERAGE_ITERATIONS))});
|
||||
tests.add(new Object[]{new ConsensusCreationTest(Arrays.<GenomeLoc>asList(loc290, loc309), CigarOperator.D, 10, 10, 2 + (8 * DEEP_COVERAGE_ITERATIONS))});
|
||||
tests.add(new Object[]{new ConsensusCreationTest(Arrays.<GenomeLoc>asList(loc290, loc310), CigarOperator.D, 11, 11, 2 + (9 * DEEP_COVERAGE_ITERATIONS))});
|
||||
tests.add(new Object[]{new ConsensusCreationTest(Arrays.<GenomeLoc>asList(loc290), CigarOperator.I, 9, 9, 2 + (7 * DEEP_COVERAGE_ITERATIONS))});
|
||||
tests.add(new Object[]{new ConsensusCreationTest(Arrays.<GenomeLoc>asList(loc290, loc295), CigarOperator.I, 10, 10, 2 + (8 * DEEP_COVERAGE_ITERATIONS))});
|
||||
tests.add(new Object[]{new ConsensusCreationTest(Arrays.<GenomeLoc>asList(loc290, loc309), CigarOperator.I, 10, 10, 2 + (8 * DEEP_COVERAGE_ITERATIONS))});
|
||||
tests.add(new Object[]{new ConsensusCreationTest(Arrays.<GenomeLoc>asList(loc290, loc310), CigarOperator.I, 11, 11, 2 + (9 * DEEP_COVERAGE_ITERATIONS))});
|
||||
|
||||
return tests.toArray(new Object[][]{});
|
||||
}
|
||||
|
|
@ -368,14 +374,14 @@ public class SlidingWindowUnitTest extends BaseTest {
|
|||
final ObjectAVLTreeSet<GenomeLoc> knownSNPs = new ObjectAVLTreeSet<GenomeLoc>();
|
||||
|
||||
// test WITHOUT het compression
|
||||
SlidingWindow slidingWindow = new SlidingWindow("1", 0, 10, header, new GATKSAMReadGroupRecord("test"), 0, 0.05, 0.05, 20, 20, 100, ReduceReads.DownsampleStrategy.Normal, false);
|
||||
SlidingWindow slidingWindow = new SlidingWindow("1", 0, 10, header, new GATKSAMReadGroupRecord("test"), 0, 0.05, 0.05, 0.05, 20, 20, 100, ReduceReads.DownsampleStrategy.Normal, false);
|
||||
for ( final GATKSAMRecord read : test.myReads )
|
||||
slidingWindow.addRead(read);
|
||||
Pair<ObjectSet<GATKSAMRecord>, CompressionStash> result = slidingWindow.close(knownSNPs); // currently empty
|
||||
Assert.assertEquals(result.getFirst().size(), test.expectedNumberOfReads);
|
||||
|
||||
// test WITH het compression at KNOWN sites
|
||||
slidingWindow = new SlidingWindow("1", 0, 10, header, new GATKSAMReadGroupRecord("test"), 0, 0.05, 0.05, 20, 20, 100, ReduceReads.DownsampleStrategy.Normal, false);
|
||||
slidingWindow = new SlidingWindow("1", 0, 10, header, new GATKSAMReadGroupRecord("test"), 0, 0.05, 0.05, 0.05, 20, 20, 100, ReduceReads.DownsampleStrategy.Normal, false);
|
||||
for ( final GATKSAMRecord read : test.myReads )
|
||||
slidingWindow.addRead(read);
|
||||
for ( int i = 0; i < 1200; i++ )
|
||||
|
|
@ -384,11 +390,28 @@ public class SlidingWindowUnitTest extends BaseTest {
|
|||
Assert.assertEquals(result.getFirst().size(), test.expectedNumberOfReadsWithHetCompression);
|
||||
|
||||
// test WITH het compression at ALL sites
|
||||
slidingWindow = new SlidingWindow("1", 0, 10, header, new GATKSAMReadGroupRecord("test"), 0, 0.05, 0.05, 20, 20, 100, ReduceReads.DownsampleStrategy.Normal, false);
|
||||
slidingWindow = new SlidingWindow("1", 0, 10, header, new GATKSAMReadGroupRecord("test"), 0, 0.05, 0.05, 0.05, 20, 20, 100, ReduceReads.DownsampleStrategy.Normal, false);
|
||||
for ( final GATKSAMRecord read : test.myReads )
|
||||
slidingWindow.addRead(read);
|
||||
result = slidingWindow.close(null);
|
||||
Assert.assertEquals(result.getFirst().size(), test.expectedNumberOfReadsWithHetCompression);
|
||||
|
||||
// test with deep coverage
|
||||
slidingWindow = new SlidingWindow("1", 0, 10, header, new GATKSAMReadGroupRecord("test"), 0, 0.05, 0.05, 0.05, 20, 20, 0, ReduceReads.DownsampleStrategy.Normal, false);
|
||||
for ( int i = 0; i < DEEP_COVERAGE_ITERATIONS; i++ ) {
|
||||
for ( final GATKSAMRecord read : test.myReads ) {
|
||||
final GATKSAMRecord copy = ArtificialSAMUtils.createArtificialRead(header, read.getReadName() + "_" + (i+1), 0, read.getAlignmentStart(), readLength);
|
||||
copy.setReadBases(read.getReadBases());
|
||||
copy.setBaseQualities(read.getBaseQualities());
|
||||
copy.setMappingQuality(read.getMappingQuality());
|
||||
copy.setReadNegativeStrandFlag(read.getReadNegativeStrandFlag());
|
||||
if ( read.getCigar() != null )
|
||||
copy.setCigar(read.getCigar());
|
||||
slidingWindow.addRead(copy);
|
||||
}
|
||||
}
|
||||
result = slidingWindow.close(null);
|
||||
Assert.assertEquals(result.getFirst().size(), test.expectedNumberOfReadsAtDeepCoverage);
|
||||
}
|
||||
|
||||
@Test
|
||||
|
|
@ -412,14 +435,14 @@ public class SlidingWindowUnitTest extends BaseTest {
|
|||
final ObjectAVLTreeSet<GenomeLoc> knownSNPs = new ObjectAVLTreeSet<GenomeLoc>();
|
||||
|
||||
// test WITHOUT het compression
|
||||
SlidingWindow slidingWindow = new SlidingWindow("1", 0, 10, header, new GATKSAMReadGroupRecord("test"), 0, 0.1, 0.05, 20, 20, 100, ReduceReads.DownsampleStrategy.Normal, false);
|
||||
SlidingWindow slidingWindow = new SlidingWindow("1", 0, 10, header, new GATKSAMReadGroupRecord("test"), 0, 0.1, 0.05, 0.05, 20, 20, 100, ReduceReads.DownsampleStrategy.Normal, false);
|
||||
for ( final GATKSAMRecord read : myReads )
|
||||
slidingWindow.addRead(read);
|
||||
Pair<ObjectSet<GATKSAMRecord>, CompressionStash> result = slidingWindow.close(knownSNPs); // currently empty
|
||||
Assert.assertEquals(result.getFirst().size(), totalNumReads); // no compression at all
|
||||
|
||||
// test WITH het compression at KNOWN sites
|
||||
slidingWindow = new SlidingWindow("1", 0, 10, header, new GATKSAMReadGroupRecord("test"), 0, 0.1, 0.05, 20, 20, 100, ReduceReads.DownsampleStrategy.Normal, false);
|
||||
slidingWindow = new SlidingWindow("1", 0, 10, header, new GATKSAMReadGroupRecord("test"), 0, 0.1, 0.05, 0.05, 20, 20, 100, ReduceReads.DownsampleStrategy.Normal, false);
|
||||
for ( final GATKSAMRecord read : myReads )
|
||||
slidingWindow.addRead(read);
|
||||
for ( int i = 0; i < readLength; i++ )
|
||||
|
|
@ -428,13 +451,59 @@ public class SlidingWindowUnitTest extends BaseTest {
|
|||
Assert.assertEquals(result.getFirst().size(), totalNumReads); // no compression at all
|
||||
|
||||
// test WITH het compression at ALL sites
|
||||
slidingWindow = new SlidingWindow("1", 0, 10, header, new GATKSAMReadGroupRecord("test"), 0, 0.1, 0.05, 20, 20, 100, ReduceReads.DownsampleStrategy.Normal, false);
|
||||
slidingWindow = new SlidingWindow("1", 0, 10, header, new GATKSAMReadGroupRecord("test"), 0, 0.1, 0.05, 0.05, 20, 20, 100, ReduceReads.DownsampleStrategy.Normal, false);
|
||||
for ( final GATKSAMRecord read : myReads )
|
||||
slidingWindow.addRead(read);
|
||||
result = slidingWindow.close(knownSNPs);
|
||||
Assert.assertEquals(result.getFirst().size(), totalNumReads); // no compression at all
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testAddingReadPairWithSameCoordinates() {
|
||||
final SlidingWindow slidingWindow = new SlidingWindow("1", 0, 10);
|
||||
|
||||
final GATKSAMRecord read1 = ArtificialSAMUtils.createArtificialRead(header, "basicRead", 0, globalStartPosition, 1);
|
||||
read1.setReadBases(new byte[]{(byte)'A'});
|
||||
read1.setBaseQualities(new byte[]{(byte)'A'});
|
||||
read1.setMappingQuality(30);
|
||||
read1.setReadNegativeStrandFlag(false);
|
||||
slidingWindow.addRead(read1);
|
||||
|
||||
final GATKSAMRecord read2 = ArtificialSAMUtils.createArtificialRead(header, "basicRead", 0, globalStartPosition, 1);
|
||||
read2.setReadBases(new byte[]{(byte)'A'});
|
||||
read2.setBaseQualities(new byte[]{(byte)'A'});
|
||||
read2.setMappingQuality(30);
|
||||
read2.setReadNegativeStrandFlag(true);
|
||||
slidingWindow.addRead(read2);
|
||||
|
||||
Assert.assertEquals(slidingWindow.readsInWindow.size(), 2);
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testOnlySpanningReadHasLowQual() {
|
||||
final SlidingWindow slidingWindow = new SlidingWindow("1", 0, 10, header, new GATKSAMReadGroupRecord("test"), 0, 0.1, 0.05, 0.05, 20, 20, 100, ReduceReads.DownsampleStrategy.Normal, false);
|
||||
|
||||
final GATKSAMRecord read1 = ArtificialSAMUtils.createArtificialRead(header, "basicRead1", 0, globalStartPosition, 100);
|
||||
final GATKSAMRecord read2 = ArtificialSAMUtils.createArtificialRead(header, "basicRead2", 0, globalStartPosition + 50, 100);
|
||||
|
||||
final byte[] bases = Utils.dupBytes((byte) 'A', readLength);
|
||||
read1.setReadBases(bases);
|
||||
read2.setReadBases(bases);
|
||||
|
||||
final byte[] baseQuals = Utils.dupBytes((byte) 30, readLength);
|
||||
baseQuals[80] = (byte)10;
|
||||
read1.setBaseQualities(baseQuals);
|
||||
read2.setBaseQualities(baseQuals);
|
||||
|
||||
read1.setMappingQuality(30);
|
||||
read2.setMappingQuality(30);
|
||||
|
||||
slidingWindow.addRead(read1);
|
||||
slidingWindow.addRead(read2);
|
||||
|
||||
Assert.assertEquals(slidingWindow.close(null).getFirst().size(), 1);
|
||||
}
|
||||
|
||||
|
||||
///////////////////////////////////////////////////////////
|
||||
//// This section tests the downsampling functionality ////
|
||||
|
|
@ -452,7 +521,7 @@ public class SlidingWindowUnitTest extends BaseTest {
|
|||
|
||||
@Test(dataProvider = "Downsampling", enabled = true)
|
||||
public void testDownsamplingTest(final int dcov) {
|
||||
final SlidingWindow slidingWindow = new SlidingWindow("1", 0, 10, header, new GATKSAMReadGroupRecord("test"), 0, 0.05, 0.05, 20, 20, dcov, ReduceReads.DownsampleStrategy.Normal, false);
|
||||
final SlidingWindow slidingWindow = new SlidingWindow("1", 0, 10, header, new GATKSAMReadGroupRecord("test"), 0, 0.05, 0.05, 0.05, 20, 20, dcov, ReduceReads.DownsampleStrategy.Normal, false);
|
||||
final ObjectList<GATKSAMRecord> result = slidingWindow.downsampleVariantRegion(basicReads);
|
||||
|
||||
Assert.assertEquals(result.size(), Math.min(dcov, basicReads.size()));
|
||||
|
|
@ -500,7 +569,7 @@ public class SlidingWindowUnitTest extends BaseTest {
|
|||
|
||||
@Test(dataProvider = "ConsensusQuals", enabled = true)
|
||||
public void testConsensusQualsTest(QualsTest test) {
|
||||
final SlidingWindow slidingWindow = new SlidingWindow("1", 0, 10, header, new GATKSAMReadGroupRecord("test"), 0, 0.05, 0.05, minUsableConsensusQual, 20, 100, ReduceReads.DownsampleStrategy.Normal, false);
|
||||
final SlidingWindow slidingWindow = new SlidingWindow("1", 0, 10, header, new GATKSAMReadGroupRecord("test"), 0, 0.05, 0.05, 0.05, minUsableConsensusQual, 20, 100, ReduceReads.DownsampleStrategy.Normal, false);
|
||||
for ( final GATKSAMRecord read : test.myReads )
|
||||
slidingWindow.addRead(read);
|
||||
final Pair<ObjectSet<GATKSAMRecord>, CompressionStash> result = slidingWindow.close(new ObjectAVLTreeSet<GenomeLoc>());
|
||||
|
|
@ -569,7 +638,7 @@ public class SlidingWindowUnitTest extends BaseTest {
|
|||
read.setBaseQualities(Utils.dupBytes((byte) 30, readLength));
|
||||
read.setMappingQuality(30);
|
||||
|
||||
final SlidingWindow slidingWindow = new SlidingWindow("1", 0, 10, header, new GATKSAMReadGroupRecord("test"), 0, 0.05, 0.05, 20, 20, 10, ReduceReads.DownsampleStrategy.Normal, false);
|
||||
final SlidingWindow slidingWindow = new SlidingWindow("1", 0, 10, header, new GATKSAMReadGroupRecord("test"), 0, 0.05, 0.05, 0.05, 20, 20, 10, ReduceReads.DownsampleStrategy.Normal, false);
|
||||
int newIndex = slidingWindow.createNewHeaderElements(windowHeader, read, start);
|
||||
|
||||
Assert.assertEquals(newIndex, start > 0 ? start : 0);
|
||||
|
|
@ -613,7 +682,7 @@ public class SlidingWindowUnitTest extends BaseTest {
|
|||
read.setMappingQuality(30);
|
||||
|
||||
// add the read
|
||||
final SlidingWindow slidingWindow = new SlidingWindow("1", 0, 10, header, new GATKSAMReadGroupRecord("test"), 0, 0.05, 0.05, 20, 20, 10, ReduceReads.DownsampleStrategy.Normal, false);
|
||||
final SlidingWindow slidingWindow = new SlidingWindow("1", 0, 10, header, new GATKSAMReadGroupRecord("test"), 0, 0.05, 0.05, 0.05, 20, 20, 10, ReduceReads.DownsampleStrategy.Normal, false);
|
||||
slidingWindow.actuallyUpdateHeaderForRead(windowHeader, read, false, start);
|
||||
for ( int i = 0; i < start; i++ )
|
||||
Assert.assertEquals(windowHeader.get(i).getConsensusBaseCounts().countOfBase(BaseUtils.Base.A.base), 0);
|
||||
|
|
@ -628,7 +697,6 @@ public class SlidingWindowUnitTest extends BaseTest {
|
|||
Assert.assertEquals(windowHeader.get(i).getConsensusBaseCounts().countOfBase(BaseUtils.Base.A.base), 0);
|
||||
}
|
||||
|
||||
|
||||
//////////////////////////////////////////////////////////////////////////////////
|
||||
//// This section tests functionality related to polyploid consensus creation ////
|
||||
//////////////////////////////////////////////////////////////////////////////////
|
||||
|
|
@ -691,7 +759,7 @@ public class SlidingWindowUnitTest extends BaseTest {
|
|||
read.setMappingQuality(30);
|
||||
|
||||
// add the read
|
||||
final SlidingWindow slidingWindow = new SlidingWindow("1", 0, 10, header, new GATKSAMReadGroupRecord("test"), 0, 0.05, 0.05, 20, 20, 10, ReduceReads.DownsampleStrategy.Normal, false);
|
||||
final SlidingWindow slidingWindow = new SlidingWindow("1", 0, 10, header, new GATKSAMReadGroupRecord("test"), 0, 0.05, 0.05, 0.05, 20, 20, 10, ReduceReads.DownsampleStrategy.Normal, false);
|
||||
slidingWindow.actuallyUpdateHeaderForRead(windowHeader, read, false, 0);
|
||||
|
||||
// set up and add a soft-clipped read if requested
|
||||
|
|
|
|||
|
|
@ -383,7 +383,7 @@ public class MathUtils {
|
|||
|
||||
for (int hits = k_start; hits <= k_end; hits++) {
|
||||
prevProb = cumProb;
|
||||
double probability = binomialProbability(n, hits);
|
||||
final double probability = binomialProbability(n, hits);
|
||||
cumProb += probability;
|
||||
if (probability > 0 && cumProb - prevProb < probability / 2) { // loss of precision
|
||||
probCache = probCache.add(new BigDecimal(prevProb));
|
||||
|
|
|
|||
Loading…
Reference in New Issue