Merge pull request #181 from broadinstitute/eb_yet_more_rr_improvements_GSA-930
Various bug fixes for recent Reduce Reads additions plus solution implemented for low MQ reads.
This commit is contained in:
commit
27bb699e8b
|
|
@ -80,6 +80,21 @@ public class BaseAndQualsCounts extends BaseCounts {
|
||||||
* @param isLowQualBase true if the base is low quality
|
* @param isLowQualBase true if the base is low quality
|
||||||
*/
|
*/
|
||||||
public void incr(final byte base, final byte baseQual, final byte insQual, final byte delQual, final int baseMappingQual, final boolean isLowQualBase) {
|
public void incr(final byte base, final byte baseQual, final byte insQual, final byte delQual, final int baseMappingQual, final boolean isLowQualBase) {
|
||||||
|
incr(base, baseQual, insQual, delQual, baseMappingQual, isLowQualBase, false);
|
||||||
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Increments the count
|
||||||
|
*
|
||||||
|
* @param base the base
|
||||||
|
* @param baseQual the base quality
|
||||||
|
* @param insQual the insertion quality
|
||||||
|
* @param delQual the deletion quality
|
||||||
|
* @param baseMappingQual the mapping quality
|
||||||
|
* @param isLowQualBase true if the base is low quality
|
||||||
|
* @param isSoftClip true if is soft-clipped
|
||||||
|
*/
|
||||||
|
public void incr(final byte base, final byte baseQual, final byte insQual, final byte delQual, final int baseMappingQual, final boolean isLowQualBase, final boolean isSoftClip) {
|
||||||
// if we already have high quality bases, ignore low quality ones
|
// if we already have high quality bases, ignore low quality ones
|
||||||
if ( isLowQualBase && !isLowQuality() )
|
if ( isLowQualBase && !isLowQuality() )
|
||||||
return;
|
return;
|
||||||
|
|
@ -92,7 +107,7 @@ public class BaseAndQualsCounts extends BaseCounts {
|
||||||
}
|
}
|
||||||
|
|
||||||
final BaseIndex i = BaseIndex.byteToBase(base);
|
final BaseIndex i = BaseIndex.byteToBase(base);
|
||||||
super.incr(i, baseQual, baseMappingQual);
|
super.incr(i, baseQual, baseMappingQual, isSoftClip);
|
||||||
switch (i) {
|
switch (i) {
|
||||||
case A: sumInsertionQual_A += insQual; sumDeletionQual_A += delQual; break;
|
case A: sumInsertionQual_A += insQual; sumDeletionQual_A += delQual; break;
|
||||||
case C: sumInsertionQual_C += insQual; sumDeletionQual_C += delQual; break;
|
case C: sumInsertionQual_C += insQual; sumDeletionQual_C += delQual; break;
|
||||||
|
|
@ -114,13 +129,28 @@ public class BaseAndQualsCounts extends BaseCounts {
|
||||||
* @param baseMappingQual the mapping quality
|
* @param baseMappingQual the mapping quality
|
||||||
* @param isLowQualBase true if the base is low quality
|
* @param isLowQualBase true if the base is low quality
|
||||||
*/
|
*/
|
||||||
public void decr(final byte base, final byte baseQual, final byte insQual, final byte delQual, final int baseMappingQual, final boolean isLowQualBase) {
|
public void decr(final byte base, final byte baseQual, final byte insQual, final byte delQual, final int baseMappingQual, final boolean isLowQualBase) {
|
||||||
|
decr(base, baseQual, insQual, delQual, baseMappingQual, isLowQualBase, false);
|
||||||
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Decrements the count
|
||||||
|
*
|
||||||
|
* @param base the base
|
||||||
|
* @param baseQual the base quality
|
||||||
|
* @param insQual the insertion quality
|
||||||
|
* @param delQual the deletion quality
|
||||||
|
* @param baseMappingQual the mapping quality
|
||||||
|
* @param isLowQualBase true if the base is low quality
|
||||||
|
* @param isSoftClip true if is soft-clipped
|
||||||
|
*/
|
||||||
|
public void decr(final byte base, final byte baseQual, final byte insQual, final byte delQual, final int baseMappingQual, final boolean isLowQualBase, final boolean isSoftClip) {
|
||||||
// if this is not the right type of base, ignore it
|
// if this is not the right type of base, ignore it
|
||||||
if ( isLowQualBase != isLowQuality() )
|
if ( isLowQualBase != isLowQuality() )
|
||||||
return;
|
return;
|
||||||
|
|
||||||
final BaseIndex i = BaseIndex.byteToBase(base);
|
final BaseIndex i = BaseIndex.byteToBase(base);
|
||||||
super.decr(i, baseQual, baseMappingQual);
|
super.decr(i, baseQual, baseMappingQual, isSoftClip);
|
||||||
switch (i) {
|
switch (i) {
|
||||||
case A: sumInsertionQual_A -= insQual; sumDeletionQual_A -= delQual; break;
|
case A: sumInsertionQual_A -= insQual; sumDeletionQual_A -= delQual; break;
|
||||||
case C: sumInsertionQual_C -= insQual; sumDeletionQual_C -= delQual; break;
|
case C: sumInsertionQual_C -= insQual; sumDeletionQual_C -= delQual; break;
|
||||||
|
|
|
||||||
|
|
@ -80,6 +80,7 @@ import org.broadinstitute.sting.utils.MathUtils;
|
||||||
private int count_N = 0;
|
private int count_N = 0;
|
||||||
private int sumQual_N = 0;
|
private int sumQual_N = 0;
|
||||||
private int totalCount = 0; // keeps track of total count since this is requested so often
|
private int totalCount = 0; // keeps track of total count since this is requested so often
|
||||||
|
private int nSoftClippedBases = 0;
|
||||||
private final IntArrayList mappingQualities = new IntArrayList(); // keeps the mapping quality of each read that contributed to this
|
private final IntArrayList mappingQualities = new IntArrayList(); // keeps the mapping quality of each read that contributed to this
|
||||||
private boolean isLowQuality = true; // this object represents low quality bases unless we are told otherwise
|
private boolean isLowQuality = true; // this object represents low quality bases unless we are told otherwise
|
||||||
|
|
||||||
|
|
@ -104,6 +105,7 @@ import org.broadinstitute.sting.utils.MathUtils;
|
||||||
this.count_I += other.count_I;
|
this.count_I += other.count_I;
|
||||||
this.count_N += other.count_N;
|
this.count_N += other.count_N;
|
||||||
this.totalCount += other.totalCount;
|
this.totalCount += other.totalCount;
|
||||||
|
this.nSoftClippedBases = other.nSoftClippedBases;
|
||||||
this.mappingQualities.addAll(other.mappingQualities);
|
this.mappingQualities.addAll(other.mappingQualities);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
@ -117,6 +119,7 @@ import org.broadinstitute.sting.utils.MathUtils;
|
||||||
this.count_I -= other.count_I;
|
this.count_I -= other.count_I;
|
||||||
this.count_N -= other.count_N;
|
this.count_N -= other.count_N;
|
||||||
this.totalCount -= other.totalCount;
|
this.totalCount -= other.totalCount;
|
||||||
|
this.nSoftClippedBases -= other.nSoftClippedBases;
|
||||||
this.mappingQualities.removeAll(other.mappingQualities);
|
this.mappingQualities.removeAll(other.mappingQualities);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
@ -126,7 +129,7 @@ import org.broadinstitute.sting.utils.MathUtils;
|
||||||
}
|
}
|
||||||
|
|
||||||
@Ensures("totalCount() == old(totalCount()) || totalCount() == old(totalCount()) + 1")
|
@Ensures("totalCount() == old(totalCount()) || totalCount() == old(totalCount()) + 1")
|
||||||
public void incr(final BaseIndex base, final byte qual, final int mappingQuality) {
|
public void incr(final BaseIndex base, final byte qual, final int mappingQuality, final boolean isSoftclip) {
|
||||||
switch (base) {
|
switch (base) {
|
||||||
case A: ++count_A; sumQual_A += qual; break;
|
case A: ++count_A; sumQual_A += qual; break;
|
||||||
case C: ++count_C; sumQual_C += qual; break;
|
case C: ++count_C; sumQual_C += qual; break;
|
||||||
|
|
@ -137,6 +140,7 @@ import org.broadinstitute.sting.utils.MathUtils;
|
||||||
case N: ++count_N; sumQual_N += qual; break;
|
case N: ++count_N; sumQual_N += qual; break;
|
||||||
}
|
}
|
||||||
++totalCount;
|
++totalCount;
|
||||||
|
nSoftClippedBases += isSoftclip ? 1 : 0;
|
||||||
mappingQualities.add(mappingQuality);
|
mappingQualities.add(mappingQuality);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
@ -159,7 +163,7 @@ import org.broadinstitute.sting.utils.MathUtils;
|
||||||
}
|
}
|
||||||
|
|
||||||
@Ensures("totalCount() == old(totalCount()) || totalCount() == old(totalCount()) - 1")
|
@Ensures("totalCount() == old(totalCount()) || totalCount() == old(totalCount()) - 1")
|
||||||
public void decr(final BaseIndex base, final byte qual, final int mappingQuality) {
|
public void decr(final BaseIndex base, final byte qual, final int mappingQuality, final boolean isSoftclip) {
|
||||||
switch (base) {
|
switch (base) {
|
||||||
case A: --count_A; sumQual_A -= qual; break;
|
case A: --count_A; sumQual_A -= qual; break;
|
||||||
case C: --count_C; sumQual_C -= qual; break;
|
case C: --count_C; sumQual_C -= qual; break;
|
||||||
|
|
@ -170,6 +174,7 @@ import org.broadinstitute.sting.utils.MathUtils;
|
||||||
case N: --count_N; sumQual_N -= qual; break;
|
case N: --count_N; sumQual_N -= qual; break;
|
||||||
}
|
}
|
||||||
--totalCount;
|
--totalCount;
|
||||||
|
nSoftClippedBases -= isSoftclip ? 1 : 0;
|
||||||
mappingQualities.remove((Integer) mappingQuality);
|
mappingQualities.remove((Integer) mappingQuality);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
@ -231,6 +236,10 @@ import org.broadinstitute.sting.utils.MathUtils;
|
||||||
return (byte) (sumQualsOfBase(base) / countOfBase(base));
|
return (byte) (sumQualsOfBase(base) / countOfBase(base));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@Ensures("result >= 0")
|
||||||
|
public int nSoftclips() {
|
||||||
|
return nSoftClippedBases;
|
||||||
|
}
|
||||||
|
|
||||||
@Ensures("result >= 0")
|
@Ensures("result >= 0")
|
||||||
public int totalCount() {
|
public int totalCount() {
|
||||||
|
|
@ -281,22 +290,42 @@ import org.broadinstitute.sting.utils.MathUtils;
|
||||||
return baseIndexWithMostCounts().getByte();
|
return baseIndexWithMostCounts().getByte();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @return the base index for which the count is highest, including indel indexes
|
||||||
|
*/
|
||||||
@Ensures("result != null")
|
@Ensures("result != null")
|
||||||
public BaseIndex baseIndexWithMostCounts() {
|
public BaseIndex baseIndexWithMostCounts() {
|
||||||
BaseIndex maxI = MAX_BASE_INDEX_WITH_NO_COUNTS;
|
return baseIndexWithMostCounts(true);
|
||||||
for (final BaseIndex i : BaseIndex.values()) {
|
|
||||||
if (countOfBase(i) > countOfBase(maxI))
|
|
||||||
maxI = i;
|
|
||||||
}
|
|
||||||
return maxI;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @return the base index for which the count is highest, excluding indel indexes
|
||||||
|
*/
|
||||||
@Ensures("result != null")
|
@Ensures("result != null")
|
||||||
public BaseIndex baseIndexWithMostCountsWithoutIndels() {
|
public BaseIndex baseIndexWithMostCountsWithoutIndels() {
|
||||||
|
return baseIndexWithMostCounts(false);
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Finds the base index with the most counts
|
||||||
|
*
|
||||||
|
* @param allowIndels should we allow base indexes representing indels?
|
||||||
|
* @return non-null base index
|
||||||
|
*/
|
||||||
|
@Ensures("result != null")
|
||||||
|
protected BaseIndex baseIndexWithMostCounts(final boolean allowIndels) {
|
||||||
BaseIndex maxI = MAX_BASE_INDEX_WITH_NO_COUNTS;
|
BaseIndex maxI = MAX_BASE_INDEX_WITH_NO_COUNTS;
|
||||||
|
int maxCount = countOfBase(maxI);
|
||||||
|
|
||||||
for (final BaseIndex i : BaseIndex.values()) {
|
for (final BaseIndex i : BaseIndex.values()) {
|
||||||
if (i.isNucleotide() && countOfBase(i) > countOfBase(maxI))
|
if ( !allowIndels && !i.isNucleotide() )
|
||||||
|
continue;
|
||||||
|
|
||||||
|
final int myCount = countOfBase(i);
|
||||||
|
if (myCount > maxCount) {
|
||||||
maxI = i;
|
maxI = i;
|
||||||
|
maxCount = myCount;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
return maxI;
|
return maxI;
|
||||||
}
|
}
|
||||||
|
|
@ -307,22 +336,36 @@ import org.broadinstitute.sting.utils.MathUtils;
|
||||||
|
|
||||||
@Ensures("result != null")
|
@Ensures("result != null")
|
||||||
public BaseIndex baseIndexWithMostProbability() {
|
public BaseIndex baseIndexWithMostProbability() {
|
||||||
BaseIndex maxI = MAX_BASE_INDEX_WITH_NO_COUNTS;
|
return baseIndexWithMostProbability(true);
|
||||||
for (final BaseIndex i : BaseIndex.values()) {
|
|
||||||
if (getSumQuals(i) > getSumQuals(maxI))
|
|
||||||
maxI = i;
|
|
||||||
}
|
|
||||||
return (getSumQuals(maxI) > 0L ? maxI : baseIndexWithMostCounts());
|
|
||||||
}
|
}
|
||||||
|
|
||||||
@Ensures("result != null")
|
@Ensures("result != null")
|
||||||
public BaseIndex baseIndexWithMostProbabilityWithoutIndels() {
|
public BaseIndex baseIndexWithMostProbabilityWithoutIndels() {
|
||||||
|
return baseIndexWithMostProbability(false);
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Finds the base index with the most probability
|
||||||
|
*
|
||||||
|
* @param allowIndels should we allow base indexes representing indels?
|
||||||
|
* @return non-null base index
|
||||||
|
*/
|
||||||
|
@Ensures("result != null")
|
||||||
|
public BaseIndex baseIndexWithMostProbability(final boolean allowIndels) {
|
||||||
BaseIndex maxI = MAX_BASE_INDEX_WITH_NO_COUNTS;
|
BaseIndex maxI = MAX_BASE_INDEX_WITH_NO_COUNTS;
|
||||||
|
long maxSum = getSumQuals(maxI);
|
||||||
|
|
||||||
for (final BaseIndex i : BaseIndex.values()) {
|
for (final BaseIndex i : BaseIndex.values()) {
|
||||||
if (i.isNucleotide() && getSumQuals(i) > getSumQuals(maxI))
|
if ( !allowIndels && !i.isNucleotide() )
|
||||||
|
continue;
|
||||||
|
|
||||||
|
final long mySum = getSumQuals(i);
|
||||||
|
if (mySum > maxSum) {
|
||||||
maxI = i;
|
maxI = i;
|
||||||
|
maxSum = mySum;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
return (getSumQuals(maxI) > 0L ? maxI : baseIndexWithMostCountsWithoutIndels());
|
return (maxSum > 0L ? maxI : baseIndexWithMostCounts(allowIndels));
|
||||||
}
|
}
|
||||||
|
|
||||||
@Ensures("result >=0")
|
@Ensures("result >=0")
|
||||||
|
|
@ -362,6 +405,7 @@ import org.broadinstitute.sting.utils.MathUtils;
|
||||||
count_A = count_C = count_G = count_T = count_D = count_I = count_N = 0;
|
count_A = count_C = count_G = count_T = count_D = count_I = count_N = 0;
|
||||||
sumQual_A = sumQual_C = sumQual_G = sumQual_T = sumQual_D = sumQual_I = sumQual_N = 0;
|
sumQual_A = sumQual_C = sumQual_G = sumQual_T = sumQual_D = sumQual_I = sumQual_N = 0;
|
||||||
totalCount = 0;
|
totalCount = 0;
|
||||||
|
nSoftClippedBases = 0;
|
||||||
mappingQualities.clear();
|
mappingQualities.clear();
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -121,7 +121,7 @@ public enum BaseIndex {
|
||||||
*
|
*
|
||||||
* @return whether or not it is a nucleotide, given the definition above
|
* @return whether or not it is a nucleotide, given the definition above
|
||||||
*/
|
*/
|
||||||
public boolean isNucleotide() {
|
public final boolean isNucleotide() {
|
||||||
return !isIndel();
|
return !isIndel();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
@ -130,7 +130,7 @@ public enum BaseIndex {
|
||||||
*
|
*
|
||||||
* @return true for I or D, false otherwise
|
* @return true for I or D, false otherwise
|
||||||
*/
|
*/
|
||||||
public boolean isIndel() {
|
public final boolean isIndel() {
|
||||||
return this == D || this == I;
|
return this == D || this == I;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -62,9 +62,10 @@ public class HeaderElement {
|
||||||
private BaseAndQualsCounts consensusBaseCounts; // How many A,C,G,T (and D's) are in this site.
|
private BaseAndQualsCounts consensusBaseCounts; // How many A,C,G,T (and D's) are in this site.
|
||||||
private BaseAndQualsCounts filteredBaseCounts; // How many A,C,G,T (and D's) were filtered out in this site.
|
private BaseAndQualsCounts filteredBaseCounts; // How many A,C,G,T (and D's) were filtered out in this site.
|
||||||
private int insertionsToTheRight; // How many reads in this site had insertions to the immediate right
|
private int insertionsToTheRight; // How many reads in this site had insertions to the immediate right
|
||||||
private int nSoftClippedBases; // How many bases in this site came from soft clipped bases
|
|
||||||
private int location; // Genome location of this site (the sliding window knows which contig we're at
|
private int location; // Genome location of this site (the sliding window knows which contig we're at
|
||||||
|
|
||||||
|
protected static final int MIN_COUNT_FOR_USING_PVALUE = 2;
|
||||||
|
|
||||||
public int getLocation() {
|
public int getLocation() {
|
||||||
return location;
|
return location;
|
||||||
}
|
}
|
||||||
|
|
@ -84,7 +85,7 @@ public class HeaderElement {
|
||||||
* @param location the reference location for the new element
|
* @param location the reference location for the new element
|
||||||
*/
|
*/
|
||||||
public HeaderElement(final int location) {
|
public HeaderElement(final int location) {
|
||||||
this(new BaseAndQualsCounts(), new BaseAndQualsCounts(), 0, 0, location);
|
this(new BaseAndQualsCounts(), new BaseAndQualsCounts(), 0, location);
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
|
@ -94,7 +95,7 @@ public class HeaderElement {
|
||||||
* @param location the reference location for the new element
|
* @param location the reference location for the new element
|
||||||
*/
|
*/
|
||||||
public HeaderElement(final int location, final int insertionsToTheRight) {
|
public HeaderElement(final int location, final int insertionsToTheRight) {
|
||||||
this(new BaseAndQualsCounts(), new BaseAndQualsCounts(), insertionsToTheRight, 0, location);
|
this(new BaseAndQualsCounts(), new BaseAndQualsCounts(), insertionsToTheRight, location);
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
|
@ -103,15 +104,13 @@ public class HeaderElement {
|
||||||
* @param consensusBaseCounts the BaseCounts object for the running consensus synthetic read
|
* @param consensusBaseCounts the BaseCounts object for the running consensus synthetic read
|
||||||
* @param filteredBaseCounts the BaseCounts object for the filtered data synthetic read
|
* @param filteredBaseCounts the BaseCounts object for the filtered data synthetic read
|
||||||
* @param insertionsToTheRight number of insertions to the right of this HeaderElement
|
* @param insertionsToTheRight number of insertions to the right of this HeaderElement
|
||||||
* @param nSoftClippedBases number of softclipped bases of this HeaderElement
|
|
||||||
* @param location the reference location of this reference element
|
* @param location the reference location of this reference element
|
||||||
* HeaderElement
|
* HeaderElement
|
||||||
*/
|
*/
|
||||||
public HeaderElement(BaseAndQualsCounts consensusBaseCounts, BaseAndQualsCounts filteredBaseCounts, int insertionsToTheRight, int nSoftClippedBases, int location) {
|
public HeaderElement(BaseAndQualsCounts consensusBaseCounts, BaseAndQualsCounts filteredBaseCounts, int insertionsToTheRight, int location) {
|
||||||
this.consensusBaseCounts = consensusBaseCounts;
|
this.consensusBaseCounts = consensusBaseCounts;
|
||||||
this.filteredBaseCounts = filteredBaseCounts;
|
this.filteredBaseCounts = filteredBaseCounts;
|
||||||
this.insertionsToTheRight = insertionsToTheRight;
|
this.insertionsToTheRight = insertionsToTheRight;
|
||||||
this.nSoftClippedBases = nSoftClippedBases;
|
|
||||||
this.location = location;
|
this.location = location;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
@ -119,10 +118,13 @@ public class HeaderElement {
|
||||||
* Whether or not the site represented by this HeaderElement is variant according to the definitions of variant
|
* Whether or not the site represented by this HeaderElement is variant according to the definitions of variant
|
||||||
* by insertion, deletion and mismatches.
|
* by insertion, deletion and mismatches.
|
||||||
*
|
*
|
||||||
|
* @param minVariantPvalue min p-value for deciding that a position is or is not variable due to mismatches
|
||||||
|
* @param minVariantProportion min proportion for deciding that a position is or is not variable due to mismatches
|
||||||
|
* @param minIndelProportion min proportion for deciding that a position is or is not variable due to indels
|
||||||
* @return true if site is variant by any definition. False otherwise.
|
* @return true if site is variant by any definition. False otherwise.
|
||||||
*/
|
*/
|
||||||
public boolean isVariant(double minVariantPvalue, double minIndelProportion) {
|
public boolean isVariant(final double minVariantPvalue, final double minVariantProportion, final double minIndelProportion) {
|
||||||
return hasConsensusData() && (isVariantFromInsertions(minIndelProportion) || isVariantFromMismatches(minVariantPvalue) || isVariantFromDeletions(minIndelProportion) || isVariantFromSoftClips());
|
return hasConsensusData() && (isVariantFromInsertions(minIndelProportion) || isVariantFromMismatches(minVariantPvalue, minVariantProportion) || isVariantFromDeletions(minIndelProportion) || isVariantFromSoftClips());
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
|
@ -140,11 +142,9 @@ public class HeaderElement {
|
||||||
public void addBase(byte base, byte baseQual, byte insQual, byte delQual, int baseMappingQuality, int minBaseQual, int minMappingQual, boolean isSoftClipped) {
|
public void addBase(byte base, byte baseQual, byte insQual, byte delQual, int baseMappingQuality, int minBaseQual, int minMappingQual, boolean isSoftClipped) {
|
||||||
// If the base passes the MQ filter it is included in the consensus base counts, otherwise it's part of the filtered counts
|
// If the base passes the MQ filter it is included in the consensus base counts, otherwise it's part of the filtered counts
|
||||||
if ( baseMappingQuality >= minMappingQual )
|
if ( baseMappingQuality >= minMappingQual )
|
||||||
consensusBaseCounts.incr(base, baseQual, insQual, delQual, baseMappingQuality, baseQual < minBaseQual);
|
consensusBaseCounts.incr(base, baseQual, insQual, delQual, baseMappingQuality, baseQual < minBaseQual, isSoftClipped);
|
||||||
else
|
else
|
||||||
filteredBaseCounts.incr(base, baseQual, insQual, delQual, baseMappingQuality, baseQual < minBaseQual);
|
filteredBaseCounts.incr(base, baseQual, insQual, delQual, baseMappingQuality, baseQual < minBaseQual);
|
||||||
|
|
||||||
nSoftClippedBases += isSoftClipped ? 1 : 0;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
|
@ -162,11 +162,9 @@ public class HeaderElement {
|
||||||
public void removeBase(byte base, byte baseQual, byte insQual, byte delQual, int baseMappingQuality, int minBaseQual, int minMappingQual, boolean isSoftClipped) {
|
public void removeBase(byte base, byte baseQual, byte insQual, byte delQual, int baseMappingQuality, int minBaseQual, int minMappingQual, boolean isSoftClipped) {
|
||||||
// If the base passes the MQ filter it is included in the consensus base counts, otherwise it's part of the filtered counts
|
// If the base passes the MQ filter it is included in the consensus base counts, otherwise it's part of the filtered counts
|
||||||
if ( baseMappingQuality >= minMappingQual )
|
if ( baseMappingQuality >= minMappingQual )
|
||||||
consensusBaseCounts.decr(base, baseQual, insQual, delQual, baseMappingQuality, baseQual < minBaseQual);
|
consensusBaseCounts.decr(base, baseQual, insQual, delQual, baseMappingQuality, baseQual < minBaseQual, isSoftClipped);
|
||||||
else
|
else
|
||||||
filteredBaseCounts.decr(base, baseQual, insQual, delQual, baseMappingQuality, baseQual < minBaseQual);
|
filteredBaseCounts.decr(base, baseQual, insQual, delQual, baseMappingQuality, baseQual < minBaseQual);
|
||||||
|
|
||||||
nSoftClippedBases -= isSoftClipped ? 1 : 0;
|
|
||||||
}
|
}
|
||||||
/**
|
/**
|
||||||
* Adds an insertions to the right of the HeaderElement and updates all counts accordingly. All insertions
|
* Adds an insertions to the right of the HeaderElement and updates all counts accordingly. All insertions
|
||||||
|
|
@ -246,15 +244,15 @@ public class HeaderElement {
|
||||||
/**
|
/**
|
||||||
* Whether or not the HeaderElement is variant due to excess mismatches
|
* Whether or not the HeaderElement is variant due to excess mismatches
|
||||||
*
|
*
|
||||||
* @param minVariantPvalue the minimum pvalue to call a site variant.
|
* @param minVariantPvalue the minimum pvalue to call a site variant (used with low coverage).
|
||||||
|
* @param minVariantProportion the minimum proportion to call a site variant (used with high coverage).
|
||||||
* @return whether or not the HeaderElement is variant due to excess mismatches
|
* @return whether or not the HeaderElement is variant due to excess mismatches
|
||||||
*/
|
*/
|
||||||
protected boolean isVariantFromMismatches(double minVariantPvalue) {
|
protected boolean isVariantFromMismatches(final double minVariantPvalue, final double minVariantProportion) {
|
||||||
final int totalCount = consensusBaseCounts.totalCountWithoutIndels();
|
final int totalCount = consensusBaseCounts.totalCountWithoutIndels();
|
||||||
final BaseIndex mostCommon = consensusBaseCounts.baseIndexWithMostProbabilityWithoutIndels();
|
final BaseIndex mostCommon = consensusBaseCounts.baseIndexWithMostProbabilityWithoutIndels();
|
||||||
final int countOfOtherBases = totalCount - consensusBaseCounts.countOfBase(mostCommon);
|
final int countOfOtherBases = totalCount - consensusBaseCounts.countOfBase(mostCommon);
|
||||||
final double pvalue = countOfOtherBases == 0 ? 0.0 : MathUtils.binomialCumulativeProbability(totalCount, 0, countOfOtherBases);
|
return hasSignificantCount(countOfOtherBases, totalCount, minVariantPvalue, minVariantProportion);
|
||||||
return pvalue > minVariantPvalue;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
|
@ -264,6 +262,7 @@ public class HeaderElement {
|
||||||
* @return true if we had more soft clipped bases contributing to this site than matches/mismatches.
|
* @return true if we had more soft clipped bases contributing to this site than matches/mismatches.
|
||||||
*/
|
*/
|
||||||
protected boolean isVariantFromSoftClips() {
|
protected boolean isVariantFromSoftClips() {
|
||||||
|
final int nSoftClippedBases = consensusBaseCounts.nSoftclips();
|
||||||
return nSoftClippedBases > 0 && nSoftClippedBases >= (consensusBaseCounts.totalCount() - nSoftClippedBases);
|
return nSoftClippedBases > 0 && nSoftClippedBases >= (consensusBaseCounts.totalCount() - nSoftClippedBases);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
@ -271,10 +270,11 @@ public class HeaderElement {
|
||||||
* Calculates the number of alleles necessary to represent this site.
|
* Calculates the number of alleles necessary to represent this site.
|
||||||
*
|
*
|
||||||
* @param minVariantPvalue the minimum pvalue to call a site variant.
|
* @param minVariantPvalue the minimum pvalue to call a site variant.
|
||||||
|
* @param minVariantProportion the minimum proportion to call a site variant.
|
||||||
* @return the number of alleles necessary to represent this site or -1 if there are too many indels
|
* @return the number of alleles necessary to represent this site or -1 if there are too many indels
|
||||||
*/
|
*/
|
||||||
public int getNumberOfBaseAlleles(final double minVariantPvalue) {
|
public int getNumberOfBaseAlleles(final double minVariantPvalue, final double minVariantProportion) {
|
||||||
final ObjectArrayList<BaseIndex> alleles = getAlleles(minVariantPvalue);
|
final ObjectArrayList<BaseIndex> alleles = getAlleles(minVariantPvalue, minVariantProportion);
|
||||||
return alleles == null ? -1 : alleles.size();
|
return alleles == null ? -1 : alleles.size();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
@ -282,16 +282,18 @@ public class HeaderElement {
|
||||||
* Calculates the alleles necessary to represent this site.
|
* Calculates the alleles necessary to represent this site.
|
||||||
*
|
*
|
||||||
* @param minVariantPvalue the minimum pvalue to call a site variant.
|
* @param minVariantPvalue the minimum pvalue to call a site variant.
|
||||||
|
* @param minVariantProportion the minimum proportion to call a site variant.
|
||||||
* @return the list of alleles necessary to represent this site or null if there are too many indels
|
* @return the list of alleles necessary to represent this site or null if there are too many indels
|
||||||
*/
|
*/
|
||||||
public ObjectArrayList<BaseIndex> getAlleles(final double minVariantPvalue) {
|
public ObjectArrayList<BaseIndex> getAlleles(final double minVariantPvalue, final double minVariantProportion) {
|
||||||
// make sure we have bases at all
|
// make sure we have bases at all
|
||||||
final int totalBaseCount = consensusBaseCounts.totalCount();
|
final int totalBaseCount = consensusBaseCounts.totalCount();
|
||||||
if ( totalBaseCount == 0 )
|
if ( totalBaseCount == 0 )
|
||||||
return new ObjectArrayList<BaseIndex>(0);
|
return new ObjectArrayList<BaseIndex>(0);
|
||||||
|
|
||||||
// next, check for insertions
|
// next, check for insertions; technically, the insertion count can be greater than totalBaseCount
|
||||||
if ( hasSignificantCount(insertionsToTheRight, minVariantPvalue) )
|
// (because of the way insertions are counted), so we need to account for that
|
||||||
|
if ( hasSignificantCount(Math.min(totalBaseCount, insertionsToTheRight), totalBaseCount, minVariantPvalue, minVariantProportion) )
|
||||||
return null;
|
return null;
|
||||||
|
|
||||||
// finally, check for the bases themselves (including deletions)
|
// finally, check for the bases themselves (including deletions)
|
||||||
|
|
@ -301,9 +303,7 @@ public class HeaderElement {
|
||||||
if ( baseCount == 0 )
|
if ( baseCount == 0 )
|
||||||
continue;
|
continue;
|
||||||
|
|
||||||
final double pvalue = MathUtils.binomialCumulativeProbability(totalBaseCount, 0, baseCount);
|
if ( hasSignificantCount(baseCount, totalBaseCount, minVariantPvalue, minVariantProportion) ) {
|
||||||
|
|
||||||
if ( pvalue > minVariantPvalue ) {
|
|
||||||
if ( base == BaseIndex.D )
|
if ( base == BaseIndex.D )
|
||||||
return null;
|
return null;
|
||||||
alleles.add(base);
|
alleles.add(base);
|
||||||
|
|
@ -316,26 +316,34 @@ public class HeaderElement {
|
||||||
* Checks whether there are a significant number of softclips.
|
* Checks whether there are a significant number of softclips.
|
||||||
*
|
*
|
||||||
* @param minVariantPvalue the minimum pvalue to call a site variant.
|
* @param minVariantPvalue the minimum pvalue to call a site variant.
|
||||||
|
* @param minVariantProportion the minimum proportion to call a site variant.
|
||||||
* @return true if there are significant softclips, false otherwise
|
* @return true if there are significant softclips, false otherwise
|
||||||
*/
|
*/
|
||||||
public boolean hasSignificantSoftclips(final double minVariantPvalue) {
|
public boolean hasSignificantSoftclips(final double minVariantPvalue, final double minVariantProportion) {
|
||||||
return hasSignificantCount(nSoftClippedBases, minVariantPvalue);
|
return hasSignificantCount(consensusBaseCounts.nSoftclips(), consensusBaseCounts.totalCount(), minVariantPvalue, minVariantProportion);
|
||||||
}
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Checks whether there are a significant number of count.
|
* Checks whether there are a significant number of count.
|
||||||
*
|
*
|
||||||
* @param count the count to test against
|
* @param count the count (k) to test against
|
||||||
|
* @param total the total (n) to test against
|
||||||
* @param minVariantPvalue the minimum pvalue to call a site variant.
|
* @param minVariantPvalue the minimum pvalue to call a site variant.
|
||||||
|
* @param minVariantProportion the minimum proportion to call a site variant.
|
||||||
* @return true if there is a significant count given the provided pvalue, false otherwise
|
* @return true if there is a significant count given the provided pvalue, false otherwise
|
||||||
*/
|
*/
|
||||||
private boolean hasSignificantCount(final int count, final double minVariantPvalue) {
|
private boolean hasSignificantCount(final int count, final int total, final double minVariantPvalue, final double minVariantProportion) {
|
||||||
final int totalBaseCount = consensusBaseCounts.totalCount();
|
if ( count == 0 || total == 0 )
|
||||||
if ( count == 0 || totalBaseCount == 0 )
|
|
||||||
return false;
|
return false;
|
||||||
|
|
||||||
// technically, count can be greater than totalBaseCount (because of the way insertions are counted) so we need to account for that
|
// use p-values for low counts of k
|
||||||
final double pvalue = MathUtils.binomialCumulativeProbability(totalBaseCount, 0, Math.min(count, totalBaseCount));
|
if ( count <= MIN_COUNT_FOR_USING_PVALUE ) {
|
||||||
return pvalue > minVariantPvalue;
|
final double pvalue = MathUtils.binomialCumulativeProbability(total, 0, count);
|
||||||
|
return pvalue > minVariantPvalue;
|
||||||
|
}
|
||||||
|
|
||||||
|
// otherwise, use straight proportions
|
||||||
|
final int minBaseCountForSignificance = (int)(minVariantProportion * total);
|
||||||
|
return count >= minBaseCountForSignificance;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
@ -97,13 +97,14 @@ public class MultiSampleCompressor {
|
||||||
final int downsampleCoverage,
|
final int downsampleCoverage,
|
||||||
final int minMappingQuality,
|
final int minMappingQuality,
|
||||||
final double minAltPValueToTriggerVariant,
|
final double minAltPValueToTriggerVariant,
|
||||||
|
final double minAltProportionToTriggerVariant,
|
||||||
final double minIndelProportionToTriggerVariant,
|
final double minIndelProportionToTriggerVariant,
|
||||||
final int minBaseQual,
|
final int minBaseQual,
|
||||||
final ReduceReads.DownsampleStrategy downsampleStrategy) {
|
final ReduceReads.DownsampleStrategy downsampleStrategy) {
|
||||||
for ( String name : SampleUtils.getSAMFileSamples(header) ) {
|
for ( String name : SampleUtils.getSAMFileSamples(header) ) {
|
||||||
compressorsPerSample.put(name,
|
compressorsPerSample.put(name,
|
||||||
new SingleSampleCompressor(contextSize, downsampleCoverage,
|
new SingleSampleCompressor(contextSize, downsampleCoverage,
|
||||||
minMappingQuality, minAltPValueToTriggerVariant, minIndelProportionToTriggerVariant, minBaseQual, downsampleStrategy));
|
minMappingQuality, minAltPValueToTriggerVariant, minAltProportionToTriggerVariant, minIndelProportionToTriggerVariant, minBaseQual, downsampleStrategy));
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -205,15 +205,17 @@ public class ReduceReads extends ReadWalker<ObjectArrayList<GATKSAMRecord>, Redu
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Minimum proportion of mismatches in a site to trigger a variant region. Anything below this will be
|
* Minimum proportion of mismatches in a site to trigger a variant region. Anything below this will be
|
||||||
* considered consensus.
|
* considered consensus and reduced (otherwise we will try to trigger polyploid compression). Note that
|
||||||
|
* this value is used only regions with high coverage.
|
||||||
*/
|
*/
|
||||||
@Deprecated
|
@Advanced
|
||||||
@Argument(fullName = "minimum_alt_proportion_to_trigger_variant", shortName = "minvar", doc = "", required = false)
|
@Argument(fullName = "minimum_alt_proportion_to_trigger_variant", shortName = "minvar", doc = "", required = false)
|
||||||
public double minAltProportionToTriggerVariant = 0.05;
|
public double minAltProportionToTriggerVariant = 0.05;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Minimum p-value from binomial distribution of mismatches in a site to trigger a variant region.
|
* Minimum p-value from binomial distribution of mismatches in a site to trigger a variant region.
|
||||||
* Any site with a value falling below this will be considered consensus and reduced (otherwise we will try to trigger polyploid compression).
|
* Any site with a value falling below this will be considered consensus and reduced (otherwise we will try to
|
||||||
|
* trigger polyploid compression). Note that this value is used only regions with low coverage.
|
||||||
*/
|
*/
|
||||||
@Advanced
|
@Advanced
|
||||||
@Argument(fullName = "minimum_alt_pvalue_to_trigger_variant", shortName = "min_pvalue", doc = "", required = false)
|
@Argument(fullName = "minimum_alt_pvalue_to_trigger_variant", shortName = "min_pvalue", doc = "", required = false)
|
||||||
|
|
@ -288,6 +290,9 @@ public class ReduceReads extends ReadWalker<ObjectArrayList<GATKSAMRecord>, Redu
|
||||||
if ( minAltPValueToTriggerVariant < 0.0 || minAltPValueToTriggerVariant > 1.0 )
|
if ( minAltPValueToTriggerVariant < 0.0 || minAltPValueToTriggerVariant > 1.0 )
|
||||||
throw new UserException.BadArgumentValue("--minimum_alt_pvalue_to_trigger_variant", "must be a value between 0 and 1 (inclusive)");
|
throw new UserException.BadArgumentValue("--minimum_alt_pvalue_to_trigger_variant", "must be a value between 0 and 1 (inclusive)");
|
||||||
|
|
||||||
|
if ( minAltProportionToTriggerVariant < 0.0 || minAltProportionToTriggerVariant > 1.0 )
|
||||||
|
throw new UserException.BadArgumentValue("--minimum_alt_proportion_to_trigger_variant", "must be a value between 0 and 1 (inclusive)");
|
||||||
|
|
||||||
if ( known.isEmpty() )
|
if ( known.isEmpty() )
|
||||||
knownSnpPositions = null;
|
knownSnpPositions = null;
|
||||||
else
|
else
|
||||||
|
|
@ -412,7 +417,7 @@ public class ReduceReads extends ReadWalker<ObjectArrayList<GATKSAMRecord>, Redu
|
||||||
*/
|
*/
|
||||||
@Override
|
@Override
|
||||||
public ReduceReadsStash reduceInit() {
|
public ReduceReadsStash reduceInit() {
|
||||||
return new ReduceReadsStash(new MultiSampleCompressor(getToolkit().getSAMFileHeader(), contextSize, downsampleCoverage, minMappingQuality, minAltPValueToTriggerVariant, minIndelProportionToTriggerVariant, minBaseQual, downsampleStrategy));
|
return new ReduceReadsStash(new MultiSampleCompressor(getToolkit().getSAMFileHeader(), contextSize, downsampleCoverage, minMappingQuality, minAltPValueToTriggerVariant, minAltProportionToTriggerVariant, minIndelProportionToTriggerVariant, minBaseQual, downsampleStrategy));
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
|
|
||||||
|
|
@ -63,6 +63,7 @@ public class SingleSampleCompressor {
|
||||||
final private int downsampleCoverage;
|
final private int downsampleCoverage;
|
||||||
final private int minMappingQuality;
|
final private int minMappingQuality;
|
||||||
final private double minAltPValueToTriggerVariant;
|
final private double minAltPValueToTriggerVariant;
|
||||||
|
final private double minAltProportionToTriggerVariant;
|
||||||
final private double minIndelProportionToTriggerVariant;
|
final private double minIndelProportionToTriggerVariant;
|
||||||
final private int minBaseQual;
|
final private int minBaseQual;
|
||||||
final private ReduceReads.DownsampleStrategy downsampleStrategy;
|
final private ReduceReads.DownsampleStrategy downsampleStrategy;
|
||||||
|
|
@ -76,6 +77,7 @@ public class SingleSampleCompressor {
|
||||||
final int downsampleCoverage,
|
final int downsampleCoverage,
|
||||||
final int minMappingQuality,
|
final int minMappingQuality,
|
||||||
final double minAltPValueToTriggerVariant,
|
final double minAltPValueToTriggerVariant,
|
||||||
|
final double minAltProportionToTriggerVariant,
|
||||||
final double minIndelProportionToTriggerVariant,
|
final double minIndelProportionToTriggerVariant,
|
||||||
final int minBaseQual,
|
final int minBaseQual,
|
||||||
final ReduceReads.DownsampleStrategy downsampleStrategy) {
|
final ReduceReads.DownsampleStrategy downsampleStrategy) {
|
||||||
|
|
@ -84,6 +86,7 @@ public class SingleSampleCompressor {
|
||||||
this.minMappingQuality = minMappingQuality;
|
this.minMappingQuality = minMappingQuality;
|
||||||
this.slidingWindowCounter = 0;
|
this.slidingWindowCounter = 0;
|
||||||
this.minAltPValueToTriggerVariant = minAltPValueToTriggerVariant;
|
this.minAltPValueToTriggerVariant = minAltPValueToTriggerVariant;
|
||||||
|
this.minAltProportionToTriggerVariant = minAltProportionToTriggerVariant;
|
||||||
this.minIndelProportionToTriggerVariant = minIndelProportionToTriggerVariant;
|
this.minIndelProportionToTriggerVariant = minIndelProportionToTriggerVariant;
|
||||||
this.minBaseQual = minBaseQual;
|
this.minBaseQual = minBaseQual;
|
||||||
this.downsampleStrategy = downsampleStrategy;
|
this.downsampleStrategy = downsampleStrategy;
|
||||||
|
|
@ -114,7 +117,9 @@ public class SingleSampleCompressor {
|
||||||
}
|
}
|
||||||
|
|
||||||
if ( slidingWindow == null) { // this is the first read
|
if ( slidingWindow == null) { // this is the first read
|
||||||
slidingWindow = new SlidingWindow(read.getReferenceName(), read.getReferenceIndex(), contextSize, read.getHeader(), read.getReadGroup(), slidingWindowCounter, minAltPValueToTriggerVariant, minIndelProportionToTriggerVariant, minBaseQual, minMappingQuality, downsampleCoverage, downsampleStrategy, read.hasBaseIndelQualities());
|
slidingWindow = new SlidingWindow(read.getReferenceName(), read.getReferenceIndex(), contextSize, read.getHeader(), read.getReadGroup(),
|
||||||
|
slidingWindowCounter, minAltPValueToTriggerVariant, minAltProportionToTriggerVariant, minIndelProportionToTriggerVariant,
|
||||||
|
minBaseQual, minMappingQuality, downsampleCoverage, downsampleStrategy, read.hasBaseIndelQualities());
|
||||||
slidingWindowCounter++;
|
slidingWindowCounter++;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -60,7 +60,6 @@ import org.broadinstitute.sting.utils.GenomeLoc;
|
||||||
import org.broadinstitute.sting.utils.UnvalidatingGenomeLoc;
|
import org.broadinstitute.sting.utils.UnvalidatingGenomeLoc;
|
||||||
import org.broadinstitute.sting.utils.collections.Pair;
|
import org.broadinstitute.sting.utils.collections.Pair;
|
||||||
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
|
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
|
||||||
import org.broadinstitute.sting.utils.recalibration.EventType;
|
|
||||||
import org.broadinstitute.sting.utils.sam.AlignmentStartWithNoTiesComparator;
|
import org.broadinstitute.sting.utils.sam.AlignmentStartWithNoTiesComparator;
|
||||||
import org.broadinstitute.sting.utils.sam.GATKSAMReadGroupRecord;
|
import org.broadinstitute.sting.utils.sam.GATKSAMReadGroupRecord;
|
||||||
import org.broadinstitute.sting.utils.sam.GATKSAMRecord;
|
import org.broadinstitute.sting.utils.sam.GATKSAMRecord;
|
||||||
|
|
@ -78,8 +77,8 @@ import java.util.*;
|
||||||
public class SlidingWindow {
|
public class SlidingWindow {
|
||||||
|
|
||||||
// Sliding Window data
|
// Sliding Window data
|
||||||
final private ObjectAVLTreeSet<GATKSAMRecord> readsInWindow;
|
final protected PriorityQueue<GATKSAMRecord> readsInWindow;
|
||||||
final private LinkedList<HeaderElement> windowHeader;
|
final protected LinkedList<HeaderElement> windowHeader;
|
||||||
protected int contextSize; // the largest context size (between mismatches and indels)
|
protected int contextSize; // the largest context size (between mismatches and indels)
|
||||||
protected String contig;
|
protected String contig;
|
||||||
protected int contigIndex;
|
protected int contigIndex;
|
||||||
|
|
@ -99,6 +98,7 @@ public class SlidingWindow {
|
||||||
|
|
||||||
// Additional parameters
|
// Additional parameters
|
||||||
protected double MIN_ALT_PVALUE_TO_TRIGGER_VARIANT; // pvalue has to be greater than this value to trigger variant region due to mismatches
|
protected double MIN_ALT_PVALUE_TO_TRIGGER_VARIANT; // pvalue has to be greater than this value to trigger variant region due to mismatches
|
||||||
|
protected double MIN_ALT_PROPORTION_TO_TRIGGER_VARIANT; // proportion has to be greater than this value to trigger variant region due to mismatches
|
||||||
protected double MIN_INDEL_BASE_PROPORTION_TO_TRIGGER_VARIANT; // proportion has to be greater than this value to trigger variant region due to deletions
|
protected double MIN_INDEL_BASE_PROPORTION_TO_TRIGGER_VARIANT; // proportion has to be greater than this value to trigger variant region due to deletions
|
||||||
protected int MIN_BASE_QUAL_TO_COUNT; // qual has to be greater than or equal to this value
|
protected int MIN_BASE_QUAL_TO_COUNT; // qual has to be greater than or equal to this value
|
||||||
protected int MIN_MAPPING_QUALITY;
|
protected int MIN_MAPPING_QUALITY;
|
||||||
|
|
@ -146,28 +146,33 @@ public class SlidingWindow {
|
||||||
|
|
||||||
this.windowHeader = new LinkedList<HeaderElement>();
|
this.windowHeader = new LinkedList<HeaderElement>();
|
||||||
windowHeader.addFirst(new HeaderElement(startLocation));
|
windowHeader.addFirst(new HeaderElement(startLocation));
|
||||||
this.readsInWindow = new ObjectAVLTreeSet<GATKSAMRecord>();
|
this.readsInWindow = new PriorityQueue<GATKSAMRecord>(100, new Comparator<GATKSAMRecord>() {
|
||||||
|
@Override
|
||||||
|
public int compare(GATKSAMRecord read1, GATKSAMRecord read2) {
|
||||||
|
return read1.getSoftEnd() - read2.getSoftEnd();
|
||||||
|
}
|
||||||
|
});
|
||||||
}
|
}
|
||||||
|
|
||||||
public SlidingWindow(final String contig, final int contigIndex, final int contextSize, final SAMFileHeader samHeader,
|
public SlidingWindow(final String contig, final int contigIndex, final int contextSize, final SAMFileHeader samHeader,
|
||||||
final GATKSAMReadGroupRecord readGroupAttribute, final int windowNumber,
|
final GATKSAMReadGroupRecord readGroupAttribute, final int windowNumber,
|
||||||
final double minAltPValueToTriggerVariant, final double minIndelProportionToTriggerVariant,
|
final double minAltPValueToTriggerVariant, final double minAltProportionToTriggerVariant, final double minIndelProportionToTriggerVariant,
|
||||||
final int minBaseQual, final int minMappingQuality, final int downsampleCoverage,
|
final int minBaseQual, final int minMappingQuality, final int downsampleCoverage,
|
||||||
final ReduceReads.DownsampleStrategy downsampleStrategy, final boolean hasIndelQualities) {
|
final ReduceReads.DownsampleStrategy downsampleStrategy, final boolean hasIndelQualities) {
|
||||||
this.contextSize = contextSize;
|
this.contextSize = contextSize;
|
||||||
this.downsampleCoverage = downsampleCoverage;
|
this.downsampleCoverage = downsampleCoverage;
|
||||||
|
|
||||||
this.MIN_ALT_PVALUE_TO_TRIGGER_VARIANT = minAltPValueToTriggerVariant;
|
this.MIN_ALT_PVALUE_TO_TRIGGER_VARIANT = minAltPValueToTriggerVariant;
|
||||||
|
this.MIN_ALT_PROPORTION_TO_TRIGGER_VARIANT = minAltProportionToTriggerVariant;
|
||||||
this.MIN_INDEL_BASE_PROPORTION_TO_TRIGGER_VARIANT = minIndelProportionToTriggerVariant;
|
this.MIN_INDEL_BASE_PROPORTION_TO_TRIGGER_VARIANT = minIndelProportionToTriggerVariant;
|
||||||
this.MIN_BASE_QUAL_TO_COUNT = minBaseQual;
|
this.MIN_BASE_QUAL_TO_COUNT = minBaseQual;
|
||||||
this.MIN_MAPPING_QUALITY = minMappingQuality;
|
this.MIN_MAPPING_QUALITY = minMappingQuality;
|
||||||
|
|
||||||
this.windowHeader = new LinkedList<HeaderElement>();
|
this.windowHeader = new LinkedList<HeaderElement>();
|
||||||
this.readsInWindow = new ObjectAVLTreeSet<GATKSAMRecord>(new Comparator<GATKSAMRecord>() {
|
this.readsInWindow = new PriorityQueue<GATKSAMRecord>(1000, new Comparator<GATKSAMRecord>() {
|
||||||
@Override
|
@Override
|
||||||
public int compare(GATKSAMRecord read1, GATKSAMRecord read2) {
|
public int compare(GATKSAMRecord read1, GATKSAMRecord read2) {
|
||||||
final int difference = read1.getSoftEnd() - read2.getSoftEnd();
|
return read1.getSoftEnd() - read2.getSoftEnd();
|
||||||
return difference != 0 ? difference : read1.getReadName().compareTo(read2.getReadName());
|
|
||||||
}
|
}
|
||||||
});
|
});
|
||||||
|
|
||||||
|
|
@ -290,8 +295,8 @@ public class SlidingWindow {
|
||||||
regions = findVariantRegions(0, breakpoint, markedSites.getVariantSiteBitSet(), !forceClose);
|
regions = findVariantRegions(0, breakpoint, markedSites.getVariantSiteBitSet(), !forceClose);
|
||||||
}
|
}
|
||||||
|
|
||||||
while (!readsInWindow.isEmpty() && readsInWindow.first().getSoftEnd() < windowHeaderStartLocation) {
|
while (!readsInWindow.isEmpty() && readsInWindow.peek().getSoftEnd() < windowHeaderStartLocation) {
|
||||||
readsInWindow.remove(readsInWindow.first());
|
readsInWindow.poll();
|
||||||
}
|
}
|
||||||
|
|
||||||
return regions;
|
return regions;
|
||||||
|
|
@ -353,7 +358,7 @@ public class SlidingWindow {
|
||||||
/**
|
/**
|
||||||
* returns an array marked with variant and non-variant regions (it uses markVariantRegion to make the marks)
|
* returns an array marked with variant and non-variant regions (it uses markVariantRegion to make the marks)
|
||||||
*
|
*
|
||||||
* @param stop check the window from start to stop (not-inclusive)
|
* @param stop check the window from start to stop (not-inclusive); given in global coordinates
|
||||||
*/
|
*/
|
||||||
protected void markSites(final int stop) {
|
protected void markSites(final int stop) {
|
||||||
|
|
||||||
|
|
@ -363,21 +368,16 @@ public class SlidingWindow {
|
||||||
// copy over as many bits as we can from the previous calculation. Note that we can't trust the
|
// copy over as many bits as we can from the previous calculation. Note that we can't trust the
|
||||||
// last (contextSize - 1) worth of bits because we may not have actually looked at variant regions there.
|
// last (contextSize - 1) worth of bits because we may not have actually looked at variant regions there.
|
||||||
final int lastPositionMarked = markedSites.updateRegion(windowHeaderStartLocation, sizeOfMarkedRegion) - contextSize - 1;
|
final int lastPositionMarked = markedSites.updateRegion(windowHeaderStartLocation, sizeOfMarkedRegion) - contextSize - 1;
|
||||||
final int locationToProcess = Math.min(lastPositionMarked, stop - contextSize);
|
final int locationToProcess = Math.max(windowHeaderStartLocation, Math.min(lastPositionMarked, stop - contextSize));
|
||||||
|
|
||||||
// update the iterator to the correct position
|
final ListIterator<HeaderElement> headerElementIterator = windowHeader.listIterator(locationToProcess - windowHeaderStartLocation);
|
||||||
Iterator<HeaderElement> headerElementIterator = windowHeader.iterator();
|
|
||||||
for (int i = windowHeaderStartLocation; i < locationToProcess; i++) {
|
|
||||||
if (headerElementIterator.hasNext())
|
|
||||||
headerElementIterator.next();
|
|
||||||
}
|
|
||||||
|
|
||||||
// process a contextSize worth of region from scratch in case there's a variant there
|
// process a contextSize worth of region from scratch in case there's a variant there
|
||||||
for (int i = locationToProcess; i < stop; i++) {
|
for (int i = locationToProcess; i < stop; i++) {
|
||||||
if (headerElementIterator.hasNext()) {
|
if (headerElementIterator.hasNext()) {
|
||||||
HeaderElement headerElement = headerElementIterator.next();
|
HeaderElement headerElement = headerElementIterator.next();
|
||||||
|
|
||||||
if (headerElement.isVariant(MIN_ALT_PVALUE_TO_TRIGGER_VARIANT, MIN_INDEL_BASE_PROPORTION_TO_TRIGGER_VARIANT))
|
if (headerElement.isVariant(MIN_ALT_PVALUE_TO_TRIGGER_VARIANT, MIN_ALT_PROPORTION_TO_TRIGGER_VARIANT, MIN_INDEL_BASE_PROPORTION_TO_TRIGGER_VARIANT))
|
||||||
markVariantRegion(i - windowHeaderStartLocation);
|
markVariantRegion(i - windowHeaderStartLocation);
|
||||||
|
|
||||||
} else
|
} else
|
||||||
|
|
@ -409,7 +409,7 @@ public class SlidingWindow {
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Adds bases to the running consensus or filtered data accordingly
|
* Adds bases to the running consensus
|
||||||
*
|
*
|
||||||
* If adding a sequence with gaps, it will finalize multiple consensus reads and keep the last running consensus
|
* If adding a sequence with gaps, it will finalize multiple consensus reads and keep the last running consensus
|
||||||
*
|
*
|
||||||
|
|
@ -422,9 +422,10 @@ public class SlidingWindow {
|
||||||
@Requires({"start >= 0 && (end >= start || end == 0)"})
|
@Requires({"start >= 0 && (end >= start || end == 0)"})
|
||||||
@Ensures("result != null")
|
@Ensures("result != null")
|
||||||
protected ObjectArrayList<GATKSAMRecord> addToSyntheticReads(final LinkedList<HeaderElement> header, final int start, final int end, final SyntheticRead.StrandType strandType) {
|
protected ObjectArrayList<GATKSAMRecord> addToSyntheticReads(final LinkedList<HeaderElement> header, final int start, final int end, final SyntheticRead.StrandType strandType) {
|
||||||
ObjectArrayList<GATKSAMRecord> reads = new ObjectArrayList<GATKSAMRecord>();
|
final ObjectArrayList<GATKSAMRecord> reads = new ObjectArrayList<GATKSAMRecord>();
|
||||||
if (start < end) {
|
|
||||||
ListIterator<HeaderElement> headerElementIterator = header.listIterator(start);
|
if ( start < end ) {
|
||||||
|
final ListIterator<HeaderElement> headerElementIterator = header.listIterator(start);
|
||||||
|
|
||||||
if (!headerElementIterator.hasNext())
|
if (!headerElementIterator.hasNext())
|
||||||
throw new ReviewedStingException(String.format("Requested to add to synthetic reads a region that contains no header element at index: %d - %d / %d", start, header.size(), end));
|
throw new ReviewedStingException(String.format("Requested to add to synthetic reads a region that contains no header element at index: %d - %d / %d", start, header.size(), end));
|
||||||
|
|
@ -432,37 +433,29 @@ public class SlidingWindow {
|
||||||
HeaderElement headerElement = headerElementIterator.next();
|
HeaderElement headerElement = headerElementIterator.next();
|
||||||
|
|
||||||
if (headerElement.hasConsensusData()) {
|
if (headerElement.hasConsensusData()) {
|
||||||
reads.addAll(finalizeAndAdd(ConsensusType.FILTERED));
|
|
||||||
|
|
||||||
int endOfConsensus = findNextNonConsensusElement(header, start, end);
|
|
||||||
addToRunningConsensus(header, start, endOfConsensus, strandType);
|
|
||||||
|
|
||||||
|
// find the end of the consecutive consensus data in the window
|
||||||
|
final int endOfConsensus = findNextNonConsensusElement(header, start, end);
|
||||||
if (endOfConsensus <= start)
|
if (endOfConsensus <= start)
|
||||||
throw new ReviewedStingException(String.format("next start is <= current start: (%d <= %d)", endOfConsensus, start));
|
throw new ReviewedStingException(String.format("next start is <= current start: (%d <= %d)", endOfConsensus, start));
|
||||||
|
|
||||||
|
// add to running consensus and recurse
|
||||||
|
addToRunningConsensus(header, start, endOfConsensus, strandType);
|
||||||
reads.addAll(addToSyntheticReads(header, endOfConsensus, end, strandType));
|
reads.addAll(addToSyntheticReads(header, endOfConsensus, end, strandType));
|
||||||
} else if (headerElement.hasFilteredData()) {
|
|
||||||
|
} else {
|
||||||
|
|
||||||
|
// add any outstanding consensus data
|
||||||
reads.addAll(finalizeAndAdd(ConsensusType.CONSENSUS));
|
reads.addAll(finalizeAndAdd(ConsensusType.CONSENSUS));
|
||||||
|
|
||||||
int endOfFilteredData = findNextNonFilteredDataElement(header, start, end);
|
// find the end of the consecutive empty data in the window
|
||||||
reads.addAll(addToFilteredData(header, start, endOfFilteredData, strandType));
|
final int endOfEmptyData = findNextConsensusElement(header, start, end);
|
||||||
|
|
||||||
if (endOfFilteredData <= start)
|
|
||||||
throw new ReviewedStingException(String.format("next start is <= current start: (%d <= %d)", endOfFilteredData, start));
|
|
||||||
|
|
||||||
reads.addAll(addToSyntheticReads(header, endOfFilteredData, end, strandType));
|
|
||||||
} else if (headerElement.isEmpty()) {
|
|
||||||
reads.addAll(finalizeAndAdd(ConsensusType.BOTH));
|
|
||||||
|
|
||||||
int endOfEmptyData = findNextNonEmptyElement(header, start, end);
|
|
||||||
|
|
||||||
if (endOfEmptyData <= start)
|
if (endOfEmptyData <= start)
|
||||||
throw new ReviewedStingException(String.format("next start is <= current start: (%d <= %d)", endOfEmptyData, start));
|
throw new ReviewedStingException(String.format("next start is <= current start: (%d <= %d)", endOfEmptyData, start));
|
||||||
|
|
||||||
|
// recurse out of the empty region
|
||||||
reads.addAll(addToSyntheticReads(header, endOfEmptyData, end, strandType));
|
reads.addAll(addToSyntheticReads(header, endOfEmptyData, end, strandType));
|
||||||
} else
|
}
|
||||||
throw new ReviewedStingException(String.format("Header Element %d is neither Consensus, Data or Empty. Something is wrong.", start));
|
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
return reads;
|
return reads;
|
||||||
|
|
@ -474,24 +467,21 @@ public class SlidingWindow {
|
||||||
* @param type the synthetic reads you want to close
|
* @param type the synthetic reads you want to close
|
||||||
* @return a possibly null list of GATKSAMRecords generated by finalizing the synthetic reads
|
* @return a possibly null list of GATKSAMRecords generated by finalizing the synthetic reads
|
||||||
*/
|
*/
|
||||||
private ObjectArrayList<GATKSAMRecord> finalizeAndAdd(ConsensusType type) {
|
private ObjectArrayList<GATKSAMRecord> finalizeAndAdd(final ConsensusType type) {
|
||||||
GATKSAMRecord read = null;
|
|
||||||
ObjectArrayList<GATKSAMRecord> list = new ObjectArrayList<GATKSAMRecord>();
|
|
||||||
|
|
||||||
switch (type) {
|
final ObjectArrayList<GATKSAMRecord> list = new ObjectArrayList<GATKSAMRecord>();
|
||||||
case CONSENSUS:
|
|
||||||
read = finalizeRunningConsensus();
|
if ( type == ConsensusType.CONSENSUS || type == ConsensusType.BOTH ) {
|
||||||
break;
|
final GATKSAMRecord read = finalizeRunningConsensus();
|
||||||
case FILTERED:
|
if ( read != null )
|
||||||
read = finalizeFilteredDataConsensus();
|
list.add(read);
|
||||||
break;
|
}
|
||||||
case BOTH:
|
|
||||||
read = finalizeRunningConsensus();
|
if ( type == ConsensusType.FILTERED || type == ConsensusType.BOTH ) {
|
||||||
if (read != null) list.add(read);
|
final GATKSAMRecord read = finalizeFilteredDataConsensus();
|
||||||
read = finalizeFilteredDataConsensus();
|
if ( read != null )
|
||||||
|
list.add(read);
|
||||||
}
|
}
|
||||||
if (read != null)
|
|
||||||
list.add(read);
|
|
||||||
|
|
||||||
return list;
|
return list;
|
||||||
}
|
}
|
||||||
|
|
@ -499,19 +489,145 @@ public class SlidingWindow {
|
||||||
/**
|
/**
|
||||||
* Looks for the next position without consensus data
|
* Looks for the next position without consensus data
|
||||||
*
|
*
|
||||||
* @param start beginning of the filtered region
|
* @param header the header to check
|
||||||
* @param upTo limit to search for another consensus element
|
* @param start beginning of the filtered region
|
||||||
|
* @param upTo limit to search for another consensus element
|
||||||
* @return next position in local coordinates (relative to the windowHeader) with consensus data; otherwise, the start position
|
* @return next position in local coordinates (relative to the windowHeader) with consensus data; otherwise, the start position
|
||||||
*/
|
*/
|
||||||
private int findNextNonConsensusElement(LinkedList<HeaderElement> header, int start, int upTo) {
|
private int findNextNonConsensusElement(final LinkedList<HeaderElement> header, final int start, final int upTo) {
|
||||||
Iterator<HeaderElement> headerElementIterator = header.listIterator(start);
|
final Iterator<HeaderElement> headerElementIterator = header.listIterator(start);
|
||||||
int index = start;
|
int index = start;
|
||||||
while (index < upTo) {
|
while (index < upTo) {
|
||||||
if (!headerElementIterator.hasNext())
|
if (!headerElementIterator.hasNext())
|
||||||
throw new ReviewedStingException("There are no more header elements in this window");
|
throw new ReviewedStingException("There are no more header elements in this window");
|
||||||
|
|
||||||
HeaderElement headerElement = headerElementIterator.next();
|
if (!headerElementIterator.next().hasConsensusData())
|
||||||
|
break;
|
||||||
|
index++;
|
||||||
|
}
|
||||||
|
return index;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Looks for the next position witho consensus data
|
||||||
|
*
|
||||||
|
* @param header the header to check
|
||||||
|
* @param start beginning of the filtered region
|
||||||
|
* @param upTo limit to search for another consensus element
|
||||||
|
* @return next position in local coordinates (relative to the windowHeader) with consensus data; otherwise, the start position
|
||||||
|
*/
|
||||||
|
private int findNextConsensusElement(final LinkedList<HeaderElement> header, final int start, final int upTo) {
|
||||||
|
final Iterator<HeaderElement> headerElementIterator = header.listIterator(start);
|
||||||
|
int index = start;
|
||||||
|
while (index < upTo) {
|
||||||
|
if (!headerElementIterator.hasNext())
|
||||||
|
throw new ReviewedStingException("There are no more header elements in this window");
|
||||||
|
|
||||||
|
if (headerElementIterator.next().hasConsensusData())
|
||||||
|
break;
|
||||||
|
index++;
|
||||||
|
}
|
||||||
|
return index;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Adds bases to the filtered data synthetic read.
|
||||||
|
*
|
||||||
|
* Different from the addToConsensus method, this method assumes a contiguous sequence of filteredData
|
||||||
|
* bases.
|
||||||
|
*
|
||||||
|
* @param header the window header
|
||||||
|
* @param start the first header index to add to consensus
|
||||||
|
* @param end the first header index NOT TO add to consensus
|
||||||
|
* @param strandType the strandedness that the synthetic read should be represented as having
|
||||||
|
*/
|
||||||
|
@Requires({"start >= 0 && (end >= start || end == 0)"})
|
||||||
|
private void addToRunningConsensus(final LinkedList<HeaderElement> header, final int start, final int end, final SyntheticRead.StrandType strandType) {
|
||||||
|
if (runningConsensus == null)
|
||||||
|
runningConsensus = new SyntheticRead(samHeader, readGroupAttribute, contig, contigIndex, consensusReadName + consensusCounter++, header.get(start).getLocation(), hasIndelQualities, strandType);
|
||||||
|
|
||||||
|
final Iterator<HeaderElement> headerElementIterator = header.listIterator(start);
|
||||||
|
|
||||||
|
for (int index = start; index < end; index++) {
|
||||||
|
if (!headerElementIterator.hasNext())
|
||||||
|
throw new ReviewedStingException("Requested to create a running consensus synthetic read from " + start + " to " + end + " but " + index + " does not exist");
|
||||||
|
|
||||||
|
final HeaderElement headerElement = headerElementIterator.next();
|
||||||
if (!headerElement.hasConsensusData())
|
if (!headerElement.hasConsensusData())
|
||||||
|
throw new ReviewedStingException("No CONSENSUS data in " + index);
|
||||||
|
|
||||||
|
genericAddBaseToConsensus(runningConsensus, headerElement.getConsensusBaseCounts());
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Adds bases to the running filtered data accordingly
|
||||||
|
*
|
||||||
|
* If adding a sequence with gaps, it will finalize multiple consensus reads and keep the last running consensus
|
||||||
|
*
|
||||||
|
* @param header the window header
|
||||||
|
* @param start the first header index to add to consensus
|
||||||
|
* @param end the first header index NOT TO add to consensus
|
||||||
|
* @return a non-null list of consensus reads generated by this call. Empty list if no consensus was generated.
|
||||||
|
*/
|
||||||
|
@Requires({"start >= 0 && (end >= start || end == 0)"})
|
||||||
|
@Ensures("result != null")
|
||||||
|
protected ObjectArrayList<GATKSAMRecord> addToFilteredReads(final LinkedList<HeaderElement> header, final int start, final int end) {
|
||||||
|
final ObjectArrayList<GATKSAMRecord> reads = new ObjectArrayList<GATKSAMRecord>();
|
||||||
|
|
||||||
|
if ( start < end ) {
|
||||||
|
final ListIterator<HeaderElement> headerElementIterator = header.listIterator(start);
|
||||||
|
|
||||||
|
if (!headerElementIterator.hasNext())
|
||||||
|
throw new ReviewedStingException(String.format("Requested to add to synthetic reads a region that contains no header element at index: %d - %d / %d", start, header.size(), end));
|
||||||
|
|
||||||
|
HeaderElement headerElement = headerElementIterator.next();
|
||||||
|
|
||||||
|
if (headerElement.hasFilteredData()) {
|
||||||
|
|
||||||
|
// find the end of the consecutive filtered data in the window
|
||||||
|
final int endOfFiltered = findNextNonFilteredElement(header, start, end);
|
||||||
|
if (endOfFiltered <= start)
|
||||||
|
throw new ReviewedStingException(String.format("next start is <= current start: (%d <= %d)", endOfFiltered, start));
|
||||||
|
|
||||||
|
// add to running filtered consensus and recurse
|
||||||
|
addToFilteredData(header, start, endOfFiltered);
|
||||||
|
reads.addAll(addToFilteredReads(header, endOfFiltered, end));
|
||||||
|
|
||||||
|
} else {
|
||||||
|
|
||||||
|
// add any outstanding filtered data
|
||||||
|
reads.addAll(finalizeAndAdd(ConsensusType.FILTERED));
|
||||||
|
|
||||||
|
// find the end of the consecutive empty data in the window
|
||||||
|
final int endOfEmptyData = findNextFilteredElement(header, start, end);
|
||||||
|
if (endOfEmptyData <= start)
|
||||||
|
throw new ReviewedStingException(String.format("next start is <= current start: (%d <= %d)", endOfEmptyData, start));
|
||||||
|
|
||||||
|
// recurse out of the empty region
|
||||||
|
reads.addAll(addToFilteredReads(header, endOfEmptyData, end));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return reads;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Looks for the next position without consensus data
|
||||||
|
*
|
||||||
|
* @param header the header to check
|
||||||
|
* @param start beginning of the filtered region
|
||||||
|
* @param upTo limit to search for another consensus element
|
||||||
|
* @return next position in local coordinates (relative to the windowHeader) with consensus data; otherwise, the start position
|
||||||
|
*/
|
||||||
|
private int findNextNonFilteredElement(final LinkedList<HeaderElement> header, final int start, final int upTo) {
|
||||||
|
final Iterator<HeaderElement> headerElementIterator = header.listIterator(start);
|
||||||
|
int index = start;
|
||||||
|
while (index < upTo) {
|
||||||
|
if (!headerElementIterator.hasNext())
|
||||||
|
throw new ReviewedStingException("There are no more header elements in this window");
|
||||||
|
|
||||||
|
if (!headerElementIterator.next().hasFilteredData())
|
||||||
break;
|
break;
|
||||||
index++;
|
index++;
|
||||||
}
|
}
|
||||||
|
|
@ -519,43 +635,21 @@ public class SlidingWindow {
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Looks for the next position without filtered data
|
* Looks for the next position witho consensus data
|
||||||
*
|
*
|
||||||
* @param start beginning of the region
|
* @param header the header to check
|
||||||
* @param upTo limit to search for
|
* @param start beginning of the filtered region
|
||||||
* @return next position in local coordinates (relative to the windowHeader) with no filtered data; otherwise, the start position
|
* @param upTo limit to search for another consensus element
|
||||||
|
* @return next position in local coordinates (relative to the windowHeader) with consensus data; otherwise, the start position
|
||||||
*/
|
*/
|
||||||
private int findNextNonFilteredDataElement(LinkedList<HeaderElement> header, int start, int upTo) {
|
private int findNextFilteredElement(final LinkedList<HeaderElement> header, final int start, final int upTo) {
|
||||||
Iterator<HeaderElement> headerElementIterator = header.listIterator(start);
|
final Iterator<HeaderElement> headerElementIterator = header.listIterator(start);
|
||||||
int index = start;
|
int index = start;
|
||||||
while (index < upTo) {
|
while (index < upTo) {
|
||||||
if (!headerElementIterator.hasNext())
|
if (!headerElementIterator.hasNext())
|
||||||
throw new ReviewedStingException("There are no more header elements in this window");
|
throw new ReviewedStingException("There are no more header elements in this window");
|
||||||
|
|
||||||
HeaderElement headerElement = headerElementIterator.next();
|
if (headerElementIterator.next().hasFilteredData())
|
||||||
if (!headerElement.hasFilteredData() || headerElement.hasConsensusData())
|
|
||||||
break;
|
|
||||||
index++;
|
|
||||||
}
|
|
||||||
return index;
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Looks for the next non-empty header element
|
|
||||||
*
|
|
||||||
* @param start beginning of the region
|
|
||||||
* @param upTo limit to search for
|
|
||||||
* @return next position in local coordinates (relative to the windowHeader) with non-empty element; otherwise, the start position
|
|
||||||
*/
|
|
||||||
private int findNextNonEmptyElement(LinkedList<HeaderElement> header, int start, int upTo) {
|
|
||||||
ListIterator<HeaderElement> headerElementIterator = header.listIterator(start);
|
|
||||||
int index = start;
|
|
||||||
while (index < upTo) {
|
|
||||||
if (!headerElementIterator.hasNext())
|
|
||||||
throw new ReviewedStingException("There are no more header elements in this window");
|
|
||||||
|
|
||||||
HeaderElement headerElement = headerElementIterator.next();
|
|
||||||
if (!headerElement.isEmpty())
|
|
||||||
break;
|
break;
|
||||||
index++;
|
index++;
|
||||||
}
|
}
|
||||||
|
|
@ -571,67 +665,26 @@ public class SlidingWindow {
|
||||||
* @param header the window header
|
* @param header the window header
|
||||||
* @param start the first header index to add to consensus
|
* @param start the first header index to add to consensus
|
||||||
* @param end the first header index NOT TO add to consensus
|
* @param end the first header index NOT TO add to consensus
|
||||||
* @param strandType the strandedness that the synthetic read should be represented as having
|
|
||||||
* @return a non-null list of GATKSAMRecords representing finalized filtered consensus data. Empty list if no consensus was generated.
|
|
||||||
*/
|
*/
|
||||||
@Requires({"start >= 0 && (end >= start || end == 0)"})
|
@Requires({"start >= 0 && (end >= start || end == 0)"})
|
||||||
@Ensures("result != null")
|
@Ensures("result != null")
|
||||||
private ObjectArrayList<GATKSAMRecord> addToFilteredData(final LinkedList<HeaderElement> header, final int start, final int end, final SyntheticRead.StrandType strandType) {
|
private void addToFilteredData(final LinkedList<HeaderElement> header, final int start, final int end) {
|
||||||
ObjectArrayList<GATKSAMRecord> result = new ObjectArrayList<GATKSAMRecord>();
|
|
||||||
|
|
||||||
if (filteredDataConsensus == null)
|
if (filteredDataConsensus == null)
|
||||||
filteredDataConsensus = new SyntheticRead(samHeader, readGroupAttribute, contig, contigIndex, filteredDataReadName + filteredDataConsensusCounter++, header.get(start).getLocation(), hasIndelQualities, strandType);
|
filteredDataConsensus = new SyntheticRead(samHeader, readGroupAttribute, contig, contigIndex, filteredDataReadName + filteredDataConsensusCounter++, header.get(start).getLocation(), hasIndelQualities, SyntheticRead.StrandType.STRANDLESS);
|
||||||
|
|
||||||
ListIterator<HeaderElement> headerElementIterator = header.listIterator(start);
|
ListIterator<HeaderElement> headerElementIterator = header.listIterator(start);
|
||||||
for (int index = start; index < end; index++) {
|
for (int index = start; index < end; index++) {
|
||||||
if (!headerElementIterator.hasNext())
|
if (!headerElementIterator.hasNext())
|
||||||
throw new ReviewedStingException("Requested to create a filtered data synthetic read from " + start + " to " + end + " but " + index + " does not exist");
|
throw new ReviewedStingException("Requested to create a filtered data synthetic read from " + start + " to " + end + " but " + index + " does not exist");
|
||||||
|
|
||||||
HeaderElement headerElement = headerElementIterator.next();
|
final HeaderElement headerElement = headerElementIterator.next();
|
||||||
if (headerElement.hasConsensusData())
|
|
||||||
throw new ReviewedStingException("Found consensus data inside region to add to filtered data.");
|
|
||||||
|
|
||||||
if (!headerElement.hasFilteredData())
|
if (!headerElement.hasFilteredData())
|
||||||
throw new ReviewedStingException("No filtered data in " + index);
|
throw new ReviewedStingException("No filtered data in " + index);
|
||||||
|
|
||||||
if ( filteredDataConsensus.getRefStart() + filteredDataConsensus.size() != headerElement.getLocation() ) {
|
|
||||||
result.add(finalizeFilteredDataConsensus());
|
|
||||||
filteredDataConsensus = new SyntheticRead(samHeader, readGroupAttribute, contig, contigIndex, filteredDataReadName + filteredDataConsensusCounter++, headerElement.getLocation(), hasIndelQualities, strandType);
|
|
||||||
}
|
|
||||||
|
|
||||||
genericAddBaseToConsensus(filteredDataConsensus, headerElement.getFilteredBaseCounts());
|
genericAddBaseToConsensus(filteredDataConsensus, headerElement.getFilteredBaseCounts());
|
||||||
}
|
}
|
||||||
|
|
||||||
return result;
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Adds bases to the filtered data synthetic read.
|
|
||||||
*
|
|
||||||
* Different from the addToConsensus method, this method assumes a contiguous sequence of filteredData
|
|
||||||
* bases.
|
|
||||||
*
|
|
||||||
* @param header the window header
|
|
||||||
* @param start the first header index to add to consensus
|
|
||||||
* @param end the first header index NOT TO add to consensus
|
|
||||||
* @param strandType the strandedness that the synthetic read should be represented as having
|
|
||||||
*/
|
|
||||||
@Requires({"start >= 0 && (end >= start || end == 0)"})
|
|
||||||
private void addToRunningConsensus(final LinkedList<HeaderElement> header, final int start, final int end, final SyntheticRead.StrandType strandType) {
|
|
||||||
if (runningConsensus == null)
|
|
||||||
runningConsensus = new SyntheticRead(samHeader, readGroupAttribute, contig, contigIndex, consensusReadName + consensusCounter++, header.get(start).getLocation(), hasIndelQualities, strandType);
|
|
||||||
|
|
||||||
Iterator<HeaderElement> headerElementIterator = header.listIterator(start);
|
|
||||||
for (int index = start; index < end; index++) {
|
|
||||||
if (!headerElementIterator.hasNext())
|
|
||||||
throw new ReviewedStingException("Requested to create a running consensus synthetic read from " + start + " to " + end + " but " + index + " does not exist");
|
|
||||||
|
|
||||||
HeaderElement headerElement = headerElementIterator.next();
|
|
||||||
if (!headerElement.hasConsensusData())
|
|
||||||
throw new ReviewedStingException("No CONSENSUS data in " + index);
|
|
||||||
|
|
||||||
genericAddBaseToConsensus(runningConsensus, headerElement.getConsensusBaseCounts());
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
|
@ -726,7 +779,7 @@ public class SlidingWindow {
|
||||||
|
|
||||||
for ( int i = start; i <= stop; i++ ) {
|
for ( int i = start; i <= stop; i++ ) {
|
||||||
|
|
||||||
final int nAlleles = windowHeader.get(i).getNumberOfBaseAlleles(MIN_ALT_PVALUE_TO_TRIGGER_VARIANT);
|
final int nAlleles = windowHeader.get(i).getNumberOfBaseAlleles(MIN_ALT_PVALUE_TO_TRIGGER_VARIANT, MIN_ALT_PROPORTION_TO_TRIGGER_VARIANT);
|
||||||
|
|
||||||
// we will only work on diploid non-indel cases because we just don't want to handle/test other scenarios
|
// we will only work on diploid non-indel cases because we just don't want to handle/test other scenarios
|
||||||
if ( nAlleles > 2 || nAlleles == -1 )
|
if ( nAlleles > 2 || nAlleles == -1 )
|
||||||
|
|
@ -760,8 +813,8 @@ public class SlidingWindow {
|
||||||
if ( headerElement.getLocation() == positionToSkip )
|
if ( headerElement.getLocation() == positionToSkip )
|
||||||
continue;
|
continue;
|
||||||
|
|
||||||
if ( headerElement.hasSignificantSoftclips(MIN_ALT_PVALUE_TO_TRIGGER_VARIANT) ||
|
if ( headerElement.hasSignificantSoftclips(MIN_ALT_PVALUE_TO_TRIGGER_VARIANT, MIN_ALT_PROPORTION_TO_TRIGGER_VARIANT) ||
|
||||||
headerElement.getNumberOfBaseAlleles(MIN_ALT_PVALUE_TO_TRIGGER_VARIANT) > 1 )
|
headerElement.getNumberOfBaseAlleles(MIN_ALT_PVALUE_TO_TRIGGER_VARIANT, MIN_ALT_PROPORTION_TO_TRIGGER_VARIANT) > 1 )
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
@ -784,6 +837,7 @@ public class SlidingWindow {
|
||||||
final CloseVariantRegionResult result = new CloseVariantRegionResult(allReads.stopPerformed);
|
final CloseVariantRegionResult result = new CloseVariantRegionResult(allReads.stopPerformed);
|
||||||
result.reads.addAll(downsampleCoverage > 0 ? downsampleVariantRegion(allReads.reads) : allReads.reads);
|
result.reads.addAll(downsampleCoverage > 0 ? downsampleVariantRegion(allReads.reads) : allReads.reads);
|
||||||
result.reads.addAll(addToSyntheticReads(windowHeader, 0, allReads.stopPerformed + 1, SyntheticRead.StrandType.STRANDLESS));
|
result.reads.addAll(addToSyntheticReads(windowHeader, 0, allReads.stopPerformed + 1, SyntheticRead.StrandType.STRANDLESS));
|
||||||
|
result.reads.addAll(addToFilteredReads(windowHeader, 0, allReads.stopPerformed + 1));
|
||||||
result.reads.addAll(finalizeAndAdd(ConsensusType.BOTH));
|
result.reads.addAll(finalizeAndAdd(ConsensusType.BOTH));
|
||||||
|
|
||||||
return result; // finalized reads will be downsampled if necessary
|
return result; // finalized reads will be downsampled if necessary
|
||||||
|
|
@ -914,6 +968,7 @@ public class SlidingWindow {
|
||||||
|
|
||||||
if (!windowHeader.isEmpty()) {
|
if (!windowHeader.isEmpty()) {
|
||||||
finalizedReads.addAll(addToSyntheticReads(windowHeader, 0, windowHeader.size(), SyntheticRead.StrandType.STRANDLESS));
|
finalizedReads.addAll(addToSyntheticReads(windowHeader, 0, windowHeader.size(), SyntheticRead.StrandType.STRANDLESS));
|
||||||
|
finalizedReads.addAll(addToFilteredReads(windowHeader, 0, windowHeader.size()));
|
||||||
finalizedReads.addAll(finalizeAndAdd(ConsensusType.BOTH)); // if it ended in running consensus, finish it up
|
finalizedReads.addAll(finalizeAndAdd(ConsensusType.BOTH)); // if it ended in running consensus, finish it up
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
@ -983,7 +1038,7 @@ public class SlidingWindow {
|
||||||
|
|
||||||
// initialize the mapping from base (allele) to header
|
// initialize the mapping from base (allele) to header
|
||||||
final Byte2IntMap alleleHeaderMap = new Byte2IntArrayMap(2);
|
final Byte2IntMap alleleHeaderMap = new Byte2IntArrayMap(2);
|
||||||
for ( final BaseIndex allele : windowHeader.get(hetRefPosition).getAlleles(MIN_ALT_PVALUE_TO_TRIGGER_VARIANT) ) {
|
for ( final BaseIndex allele : windowHeader.get(hetRefPosition).getAlleles(MIN_ALT_PVALUE_TO_TRIGGER_VARIANT, MIN_ALT_PROPORTION_TO_TRIGGER_VARIANT) ) {
|
||||||
final int currentIndex = alleleHeaderMap.size();
|
final int currentIndex = alleleHeaderMap.size();
|
||||||
if ( currentIndex > 1 )
|
if ( currentIndex > 1 )
|
||||||
throw new IllegalStateException("There are more than 2 alleles present when creating a diploid consensus");
|
throw new IllegalStateException("There are more than 2 alleles present when creating a diploid consensus");
|
||||||
|
|
@ -997,7 +1052,7 @@ public class SlidingWindow {
|
||||||
if ( alleleHeaderMap.size() != 2 )
|
if ( alleleHeaderMap.size() != 2 )
|
||||||
throw new IllegalStateException("We expected to see 2 alleles when creating a diploid consensus but saw " + alleleHeaderMap.size());
|
throw new IllegalStateException("We expected to see 2 alleles when creating a diploid consensus but saw " + alleleHeaderMap.size());
|
||||||
|
|
||||||
final ObjectList<GATKSAMRecord> readsToRemoveFromHeader = new ObjectArrayList<GATKSAMRecord>();
|
final ObjectList<GATKSAMRecord> readsToRemove = new ObjectArrayList<GATKSAMRecord>();
|
||||||
|
|
||||||
for ( final GATKSAMRecord read : readsInWindow ) {
|
for ( final GATKSAMRecord read : readsInWindow ) {
|
||||||
|
|
||||||
|
|
@ -1006,38 +1061,36 @@ public class SlidingWindow {
|
||||||
continue;
|
continue;
|
||||||
|
|
||||||
// remove all other reads from the read cache since we're going to use them here
|
// remove all other reads from the read cache since we're going to use them here
|
||||||
readsInWindow.remove(read);
|
readsToRemove.add(read);
|
||||||
|
|
||||||
// if the read falls before the het position, we don't need to look at it
|
// if the read falls before the het position or has low MQ, we don't need to look at it
|
||||||
if ( read.getSoftEnd() < globalHetRefPosition )
|
if ( read.getSoftEnd() < globalHetRefPosition || read.getMappingQuality() < MIN_MAPPING_QUALITY)
|
||||||
continue;
|
continue;
|
||||||
|
|
||||||
// remove all spanning reads from the consensus header since we're going to incorporate them into a consensus here instead
|
// remove all spanning reads from the consensus header since we're going to incorporate them into a consensus here instead
|
||||||
removeFromHeader(windowHeader, read);
|
removeFromHeader(windowHeader, read);
|
||||||
|
|
||||||
// make sure it meets the minimum mapping quality requirement (if not, we won't use it for the consensus)
|
// where on the read is the het position?
|
||||||
if ( read.getMappingQuality() >= MIN_MAPPING_QUALITY ) {
|
final int readPosOfHet = ReadUtils.getReadCoordinateForReferenceCoordinate(read, globalHetRefPosition, ReadUtils.ClippingTail.LEFT_TAIL);
|
||||||
|
|
||||||
// where on the read is the het position?
|
// this is safe because indels are not supported
|
||||||
final int readPosOfHet = ReadUtils.getReadCoordinateForReferenceCoordinate(read, globalHetRefPosition, ReadUtils.ClippingTail.LEFT_TAIL);
|
final byte base = read.getReadBases()[readPosOfHet];
|
||||||
|
|
||||||
// this is safe because indels are not supported
|
// check which allele this read represents
|
||||||
final byte base = read.getReadBases()[readPosOfHet];
|
final Integer allele = alleleHeaderMap.get(base);
|
||||||
final byte qual = read.getBaseQualities(EventType.BASE_SUBSTITUTION)[readPosOfHet];
|
|
||||||
|
|
||||||
// check which allele this read represents
|
// ignore the read if it represents a base that's not part of the consensus
|
||||||
final Integer allele = alleleHeaderMap.get(base);
|
if ( allele != null ) {
|
||||||
|
// add to the appropriate polyploid header
|
||||||
// ignore the read if it represents a base that's not part of the consensus
|
final SingleStrandConsensusData header = read.getReadNegativeStrandFlag() ? headersNegStrand[allele] : headersPosStrand[allele];
|
||||||
if ( allele != null ) {
|
header.reads.add(read);
|
||||||
// add to the appropriate polyploid header
|
addToHeader(header.consensus, read);
|
||||||
final SingleStrandConsensusData header = read.getReadNegativeStrandFlag() ? headersNegStrand[allele] : headersPosStrand[allele];
|
|
||||||
header.reads.add(read);
|
|
||||||
addToHeader(header.consensus, read);
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
for ( final GATKSAMRecord read : readsToRemove )
|
||||||
|
readsInWindow.remove(read);
|
||||||
|
|
||||||
// create the polyploid synthetic reads if we can
|
// create the polyploid synthetic reads if we can
|
||||||
final ObjectList<GATKSAMRecord> hetReads = new ObjectArrayList<GATKSAMRecord>();
|
final ObjectList<GATKSAMRecord> hetReads = new ObjectArrayList<GATKSAMRecord>();
|
||||||
|
|
||||||
|
|
@ -1171,15 +1224,15 @@ public class SlidingWindow {
|
||||||
int readBaseIndex = 0;
|
int readBaseIndex = 0;
|
||||||
HeaderElement headerElement;
|
HeaderElement headerElement;
|
||||||
|
|
||||||
for ( CigarElement cigarElement : read.getCigar().getCigarElements() ) {
|
for ( final CigarElement cigarElement : read.getCigar().getCigarElements() ) {
|
||||||
switch ( cigarElement.getOperator() ) {
|
switch ( cigarElement.getOperator() ) {
|
||||||
case H:
|
case H:
|
||||||
break;
|
break;
|
||||||
case I:
|
case I:
|
||||||
readBaseIndex += cigarElement.getLength();
|
readBaseIndex += cigarElement.getLength();
|
||||||
|
|
||||||
// special case, if we are removing a read that starts in insertion and we don't have the previous header element anymore, don't worry about it.
|
// special case, if we don't have the previous header element anymore, don't worry about it.
|
||||||
if ( removeRead && locationIndex == 0 )
|
if ( locationIndex == 0 )
|
||||||
break;
|
break;
|
||||||
|
|
||||||
// insertions are added to the base to the left (previous element)
|
// insertions are added to the base to the left (previous element)
|
||||||
|
|
@ -1200,9 +1253,8 @@ public class SlidingWindow {
|
||||||
headerElement.removeBase(BaseUtils.Base.D.base, mappingQuality, mappingQuality, mappingQuality, mappingQuality, MIN_BASE_QUAL_TO_COUNT, MIN_MAPPING_QUALITY, false);
|
headerElement.removeBase(BaseUtils.Base.D.base, mappingQuality, mappingQuality, mappingQuality, mappingQuality, MIN_BASE_QUAL_TO_COUNT, MIN_MAPPING_QUALITY, false);
|
||||||
else
|
else
|
||||||
headerElement.addBase(BaseUtils.Base.D.base, mappingQuality, mappingQuality, mappingQuality, mappingQuality, MIN_BASE_QUAL_TO_COUNT, MIN_MAPPING_QUALITY, false);
|
headerElement.addBase(BaseUtils.Base.D.base, mappingQuality, mappingQuality, mappingQuality, mappingQuality, MIN_BASE_QUAL_TO_COUNT, MIN_MAPPING_QUALITY, false);
|
||||||
|
|
||||||
locationIndex++;
|
|
||||||
}
|
}
|
||||||
|
locationIndex += nDeletionBases;
|
||||||
break;
|
break;
|
||||||
case S:
|
case S:
|
||||||
case M:
|
case M:
|
||||||
|
|
@ -1211,6 +1263,8 @@ public class SlidingWindow {
|
||||||
case X:
|
case X:
|
||||||
final int nBasesToAdd = cigarElement.getLength();
|
final int nBasesToAdd = cigarElement.getLength();
|
||||||
final boolean isSoftClip = cigarElement.getOperator() == CigarOperator.S;
|
final boolean isSoftClip = cigarElement.getOperator() == CigarOperator.S;
|
||||||
|
final byte[] readBases = read.getReadBases();
|
||||||
|
final byte[] readQuals = read.getBaseQualities();
|
||||||
final boolean readHasIndelQuals = read.hasBaseIndelQualities();
|
final boolean readHasIndelQuals = read.hasBaseIndelQualities();
|
||||||
final byte[] insertionQuals = readHasIndelQuals ? read.getBaseInsertionQualities() : null;
|
final byte[] insertionQuals = readHasIndelQuals ? read.getBaseInsertionQualities() : null;
|
||||||
final byte[] deletionQuals = readHasIndelQuals ? read.getBaseDeletionQualities() : null;
|
final byte[] deletionQuals = readHasIndelQuals ? read.getBaseDeletionQualities() : null;
|
||||||
|
|
@ -1219,14 +1273,15 @@ public class SlidingWindow {
|
||||||
headerElement = headerElementIterator.next();
|
headerElement = headerElementIterator.next();
|
||||||
final byte insertionQuality = readHasIndelQuals ? insertionQuals[readBaseIndex] : -1;
|
final byte insertionQuality = readHasIndelQuals ? insertionQuals[readBaseIndex] : -1;
|
||||||
final byte deletionQuality = readHasIndelQuals ? deletionQuals[readBaseIndex] : -1;
|
final byte deletionQuality = readHasIndelQuals ? deletionQuals[readBaseIndex] : -1;
|
||||||
|
|
||||||
if ( removeRead )
|
if ( removeRead )
|
||||||
headerElement.removeBase(read.getReadBases()[readBaseIndex], read.getBaseQualities()[readBaseIndex], insertionQuality, deletionQuality, mappingQuality, MIN_BASE_QUAL_TO_COUNT, MIN_MAPPING_QUALITY, isSoftClip);
|
headerElement.removeBase(readBases[readBaseIndex], readQuals[readBaseIndex], insertionQuality, deletionQuality, mappingQuality, MIN_BASE_QUAL_TO_COUNT, MIN_MAPPING_QUALITY, isSoftClip);
|
||||||
else
|
else
|
||||||
headerElement.addBase(read.getReadBases()[readBaseIndex], read.getBaseQualities()[readBaseIndex], insertionQuality, deletionQuality, mappingQuality, MIN_BASE_QUAL_TO_COUNT, MIN_MAPPING_QUALITY, isSoftClip);
|
headerElement.addBase(readBases[readBaseIndex], readQuals[readBaseIndex], insertionQuality, deletionQuality, mappingQuality, MIN_BASE_QUAL_TO_COUNT, MIN_MAPPING_QUALITY, isSoftClip);
|
||||||
|
|
||||||
readBaseIndex++;
|
readBaseIndex++;
|
||||||
locationIndex++;
|
|
||||||
}
|
}
|
||||||
|
locationIndex += nBasesToAdd;
|
||||||
break;
|
break;
|
||||||
default:
|
default:
|
||||||
break;
|
break;
|
||||||
|
|
|
||||||
|
|
@ -102,6 +102,9 @@ public class AssessReducedQuals extends LocusWalker<GenomeLoc, GenomeLoc> implem
|
||||||
@Argument(fullName = "qual_epsilon", shortName = "epsilon", doc = "when |Quals_reduced_bam - Quals_original_bam| > (epsilon * Quals_original_bam) we output this interval", required = false)
|
@Argument(fullName = "qual_epsilon", shortName = "epsilon", doc = "when |Quals_reduced_bam - Quals_original_bam| > (epsilon * Quals_original_bam) we output this interval", required = false)
|
||||||
public double qual_epsilon = 0.10;
|
public double qual_epsilon = 0.10;
|
||||||
|
|
||||||
|
@Argument(fullName = "exclude_low_mq", shortName = "excludeMQ", doc = "ignore reads with mapping quality below this number", required = false)
|
||||||
|
public int excludeMQ = 0;
|
||||||
|
|
||||||
@Output
|
@Output
|
||||||
protected PrintStream out;
|
protected PrintStream out;
|
||||||
|
|
||||||
|
|
@ -146,7 +149,7 @@ public class AssessReducedQuals extends LocusWalker<GenomeLoc, GenomeLoc> implem
|
||||||
}
|
}
|
||||||
|
|
||||||
private boolean isGoodRead(final PileupElement p) {
|
private boolean isGoodRead(final PileupElement p) {
|
||||||
return !p.isDeletion() && (int)p.getQual() >= 15 && p.getMappingQual() >= 20;
|
return !p.isDeletion() && (int)p.getQual() >= 15 && p.getMappingQual() >= excludeMQ;
|
||||||
}
|
}
|
||||||
|
|
||||||
private int getTagIndex(final List<String> tags) {
|
private int getTagIndex(final List<String> tags) {
|
||||||
|
|
|
||||||
|
|
@ -179,7 +179,7 @@ public class BaseCountsUnitTest extends BaseTest {
|
||||||
BaseCounts counts = new BaseCounts();
|
BaseCounts counts = new BaseCounts();
|
||||||
|
|
||||||
for ( int qual : test.quals )
|
for ( int qual : test.quals )
|
||||||
counts.incr(BaseIndex.A, (byte)qual, 20);
|
counts.incr(BaseIndex.A, (byte)qual, 20, false);
|
||||||
|
|
||||||
final int actualSum = (int)counts.getSumQuals((byte)'A');
|
final int actualSum = (int)counts.getSumQuals((byte)'A');
|
||||||
final int expectedSum = qualSum(test.quals);
|
final int expectedSum = qualSum(test.quals);
|
||||||
|
|
|
||||||
|
|
@ -128,8 +128,8 @@ public class HeaderElementUnitTest extends BaseTest {
|
||||||
Assert.assertEquals(headerElement.hasConsensusData(), test.MQ >= minMappingQual);
|
Assert.assertEquals(headerElement.hasConsensusData(), test.MQ >= minMappingQual);
|
||||||
Assert.assertEquals(headerElement.hasFilteredData(), test.MQ < minMappingQual);
|
Assert.assertEquals(headerElement.hasFilteredData(), test.MQ < minMappingQual);
|
||||||
Assert.assertEquals(headerElement.hasConsensusData() ? headerElement.getConsensusBaseCounts().getRMS() : headerElement.getFilteredBaseCounts().getRMS(), (double)test.MQ);
|
Assert.assertEquals(headerElement.hasConsensusData() ? headerElement.getConsensusBaseCounts().getRMS() : headerElement.getFilteredBaseCounts().getRMS(), (double)test.MQ);
|
||||||
Assert.assertFalse(headerElement.isVariantFromMismatches(0.05));
|
Assert.assertFalse(headerElement.isVariantFromMismatches(0.05, 0.05));
|
||||||
Assert.assertEquals(headerElement.isVariant(0.05, 0.05), test.isClip);
|
Assert.assertEquals(headerElement.isVariant(0.05, 0.05, 0.05), test.isClip);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
@ -177,7 +177,7 @@ public class HeaderElementUnitTest extends BaseTest {
|
||||||
headerElement.addBase(base.b, byte20, byte10, byte10, byte20, minBaseQual, minMappingQual, false);
|
headerElement.addBase(base.b, byte20, byte10, byte10, byte20, minBaseQual, minMappingQual, false);
|
||||||
}
|
}
|
||||||
|
|
||||||
final int nAllelesSeen = headerElement.getNumberOfBaseAlleles(test.pvalue);
|
final int nAllelesSeen = headerElement.getNumberOfBaseAlleles(test.pvalue, test.pvalue);
|
||||||
final int nAllelesExpected = calculateExpectedAlleles(test.counts, test.pvalue);
|
final int nAllelesExpected = calculateExpectedAlleles(test.counts, test.pvalue);
|
||||||
|
|
||||||
Assert.assertEquals(nAllelesSeen, nAllelesExpected);
|
Assert.assertEquals(nAllelesSeen, nAllelesExpected);
|
||||||
|
|
@ -195,9 +195,14 @@ public class HeaderElementUnitTest extends BaseTest {
|
||||||
if ( count == 0 )
|
if ( count == 0 )
|
||||||
continue;
|
continue;
|
||||||
|
|
||||||
final double pvalue = MathUtils.binomialCumulativeProbability(total, 0, count);
|
final boolean isSignificant;
|
||||||
|
if ( count <= HeaderElement.MIN_COUNT_FOR_USING_PVALUE ) {
|
||||||
|
isSignificant = MathUtils.binomialCumulativeProbability(total, 0, count) > targetPvalue;
|
||||||
|
} else {
|
||||||
|
isSignificant = (count >= targetPvalue * total);
|
||||||
|
}
|
||||||
|
|
||||||
if ( pvalue > targetPvalue ) {
|
if ( isSignificant ) {
|
||||||
if ( index == BaseIndex.D.index )
|
if ( index == BaseIndex.D.index )
|
||||||
return -1;
|
return -1;
|
||||||
result++;
|
result++;
|
||||||
|
|
|
||||||
|
|
@ -157,46 +157,44 @@ public class ReduceReadsIntegrationTest extends WalkerTest {
|
||||||
|
|
||||||
@Test(enabled = true)
|
@Test(enabled = true)
|
||||||
public void testDefaultCompression() {
|
public void testDefaultCompression() {
|
||||||
RRTest("testDefaultCompression ", L, "62f8cdb85a424e42e9c56f36302d1dba", false);
|
RRTest("testDefaultCompression ", L, "fa1cffc4539e0c20b818a11da5dba5b9", false);
|
||||||
}
|
}
|
||||||
|
|
||||||
@Test(enabled = true)
|
@Test(enabled = true)
|
||||||
public void testDefaultCompressionWithKnowns() {
|
public void testDefaultCompressionWithKnowns() {
|
||||||
RRTest("testDefaultCompressionWithKnowns ", L, "874c0e0a54c3db67f5e9d7c0d45b7844", true);
|
RRTest("testDefaultCompressionWithKnowns ", L, "d1b5fbc402810d9cdc020bb3503f1325", true);
|
||||||
}
|
}
|
||||||
|
|
||||||
private final String intervals = "-L 20:10,100,000-10,100,500 -L 20:10,200,000-10,200,500 -L 20:10,300,000-10,300,500 -L 20:10,400,000-10,500,000 -L 20:10,500,050-10,500,060 -L 20:10,600,000-10,600,015 -L 20:10,700,000-10,700,110";
|
private final String intervals = "-L 20:10,100,000-10,100,500 -L 20:10,200,000-10,200,500 -L 20:10,300,000-10,300,500 -L 20:10,400,000-10,500,000 -L 20:10,500,050-10,500,060 -L 20:10,600,000-10,600,015 -L 20:10,700,000-10,700,110";
|
||||||
|
|
||||||
@Test(enabled = true)
|
@Test(enabled = true)
|
||||||
public void testMultipleIntervals() {
|
public void testMultipleIntervals() {
|
||||||
RRTest("testMultipleIntervals ", intervals, "2e849f8324b27af36bae8cb9b01722e6", false);
|
RRTest("testMultipleIntervals ", intervals, "7e9dcd157ad742d4ebae7e56bc4af663", false);
|
||||||
}
|
}
|
||||||
|
|
||||||
@Test(enabled = true)
|
@Test(enabled = true)
|
||||||
public void testMultipleIntervalsWithKnowns() {
|
public void testMultipleIntervalsWithKnowns() {
|
||||||
RRTest("testMultipleIntervalsWithKnowns ", intervals, "71bc2167cc6916288bd34dcf099feebc", true);
|
RRTest("testMultipleIntervalsWithKnowns ", intervals, "dbb1e95e1bcad956701142afac763717", true);
|
||||||
}
|
}
|
||||||
|
|
||||||
final String highCompressionMD5 = "c83256fa2d6785d5188f50dd45c77e0f";
|
|
||||||
|
|
||||||
@Test(enabled = true)
|
@Test(enabled = true)
|
||||||
public void testHighCompression() {
|
public void testHighCompression() {
|
||||||
RRTest("testHighCompression ", " -cs 10 -min_pvalue 0.3 -mindel 0.3 " + L, highCompressionMD5, false);
|
RRTest("testHighCompression ", " -cs 10 -min_pvalue 0.3 -minvar 0.3 -mindel 0.3 " + L, "8f8fd1a53fa0789116f45e4cf2625906", false);
|
||||||
}
|
}
|
||||||
|
|
||||||
@Test(enabled = true)
|
@Test(enabled = true)
|
||||||
public void testHighCompressionWithKnowns() {
|
public void testHighCompressionWithKnowns() {
|
||||||
RRTest("testHighCompressionWithKnowns ", " -cs 10 -min_pvalue 0.3 -mindel 0.3 " + L, highCompressionMD5, true);
|
RRTest("testHighCompressionWithKnowns ", " -cs 10 -min_pvalue 0.3 -minvar 0.3 -mindel 0.3 " + L, "52fd2a77802a4677b604abb18e15d96a", true);
|
||||||
}
|
}
|
||||||
|
|
||||||
@Test(enabled = true)
|
@Test(enabled = true)
|
||||||
public void testLowCompression() {
|
public void testLowCompression() {
|
||||||
RRTest("testLowCompression ", " -cs 30 -min_pvalue 0.001 -mindel 0.01 -minmap 5 -minqual 5 " + L, "a903558ef284381d74b0ad837deb19f6", false);
|
RRTest("testLowCompression ", " -cs 30 -min_pvalue 0.001 -minvar 0.01 -mindel 0.01 -minmap 5 -minqual 5 " + L, "79c6543d5ce84ebc2ca74404498edbd1", false);
|
||||||
}
|
}
|
||||||
|
|
||||||
@Test(enabled = true)
|
@Test(enabled = true)
|
||||||
public void testLowCompressionWithKnowns() {
|
public void testLowCompressionWithKnowns() {
|
||||||
RRTest("testLowCompressionWithKnowns ", " -cs 30 -min_pvalue 0.001 -mindel 0.01 -minmap 5 -minqual 5 " + L, "a4c5aa158c6ebbc703134cbe2d48619c", true);
|
RRTest("testLowCompressionWithKnowns ", " -cs 30 -min_pvalue 0.001 -minvar 0.01 -mindel 0.01 -minmap 5 -minqual 5 " + L, "271aec358b309603291a974b5ba3bd60", true);
|
||||||
}
|
}
|
||||||
|
|
||||||
@Test(enabled = true)
|
@Test(enabled = true)
|
||||||
|
|
@ -208,7 +206,7 @@ public class ReduceReadsIntegrationTest extends WalkerTest {
|
||||||
|
|
||||||
@Test(enabled = true)
|
@Test(enabled = true)
|
||||||
public void testIndelCompression() {
|
public void testIndelCompression() {
|
||||||
final String md5 = "56154baed62be07008d3684a0a4c0996";
|
final String md5 = "d20e6012300898a0315c795cab7583d8";
|
||||||
RRTest("testIndelCompression ", " -cs 50 -L 20:10,100,500-10,100,600 ", md5, false);
|
RRTest("testIndelCompression ", " -cs 50 -L 20:10,100,500-10,100,600 ", md5, false);
|
||||||
RRTest("testIndelCompressionWithKnowns ", " -cs 50 -L 20:10,100,500-10,100,600 ", md5, true);
|
RRTest("testIndelCompressionWithKnowns ", " -cs 50 -L 20:10,100,500-10,100,600 ", md5, true);
|
||||||
}
|
}
|
||||||
|
|
@ -216,25 +214,27 @@ public class ReduceReadsIntegrationTest extends WalkerTest {
|
||||||
@Test(enabled = true)
|
@Test(enabled = true)
|
||||||
public void testFilteredDeletionCompression() {
|
public void testFilteredDeletionCompression() {
|
||||||
String base = String.format("-T ReduceReads -npt -R %s -I %s ", REF, DELETION_BAM) + " -o %s ";
|
String base = String.format("-T ReduceReads -npt -R %s -I %s ", REF, DELETION_BAM) + " -o %s ";
|
||||||
executeTest("testFilteredDeletionCompression", new WalkerTestSpec(base, Arrays.asList("bam"), Arrays.asList("d7655de41d90aecb716f79e32d53b2d1")));
|
executeTest("testFilteredDeletionCompression", new WalkerTestSpec(base, Arrays.asList("bam"), Arrays.asList("e5da09662708f562c0c617ba73cf4763")), "4f916da29d91852077f0a2fdbdd2c7f6");
|
||||||
}
|
}
|
||||||
|
|
||||||
|
private static final String COREDUCTION_QUALS_TEST_MD5 = "26d84a2bd549a01a63fcebf8847a1b7d";
|
||||||
|
|
||||||
@Test(enabled = true)
|
@Test(enabled = true)
|
||||||
public void testCoReduction() {
|
public void testCoReduction() {
|
||||||
String base = String.format("-T ReduceReads %s -npt -R %s -I %s -I %s", COREDUCTION_L, REF, COREDUCTION_BAM_A, COREDUCTION_BAM_B) + " -o %s ";
|
String base = String.format("-T ReduceReads %s -npt -R %s -I %s -I %s", COREDUCTION_L, REF, COREDUCTION_BAM_A, COREDUCTION_BAM_B) + " -o %s ";
|
||||||
executeTest("testCoReduction", new WalkerTestSpec(base, Arrays.asList("bam"), Arrays.asList("fa549ba96ca0ce5fbf3553ba173167e8")));
|
executeTest("testCoReduction", new WalkerTestSpec(base, Arrays.asList("bam"), Arrays.asList("5f4d2c1d9c010dfd6865aeba7d0336fe")), COREDUCTION_QUALS_TEST_MD5);
|
||||||
}
|
}
|
||||||
|
|
||||||
@Test(enabled = true)
|
@Test(enabled = true)
|
||||||
public void testCoReductionWithKnowns() {
|
public void testCoReductionWithKnowns() {
|
||||||
String base = String.format("-T ReduceReads %s -npt -R %s -I %s -I %s -known %s", COREDUCTION_L, REF, COREDUCTION_BAM_A, COREDUCTION_BAM_B, DBSNP) + " -o %s ";
|
String base = String.format("-T ReduceReads %s -npt -R %s -I %s -I %s -known %s", COREDUCTION_L, REF, COREDUCTION_BAM_A, COREDUCTION_BAM_B, DBSNP) + " -o %s ";
|
||||||
executeTest("testCoReductionWithKnowns", new WalkerTestSpec(base, Arrays.asList("bam"), Arrays.asList("9edcf09b21a4ae8d9fc25222bcb0486b")));
|
executeTest("testCoReductionWithKnowns", new WalkerTestSpec(base, Arrays.asList("bam"), Arrays.asList("ca48dd972bf57595c691972c0f887cb4")), COREDUCTION_QUALS_TEST_MD5);
|
||||||
}
|
}
|
||||||
|
|
||||||
@Test(enabled = true)
|
@Test(enabled = true)
|
||||||
public void testInsertionsAtEdgeOfConsensus() {
|
public void testInsertionsAtEdgeOfConsensus() {
|
||||||
String base = String.format("-T ReduceReads -npt -R %s -I %s ", REF, INSERTIONS_AT_EDGE_OF_CONSENSUS_BAM) + " -o %s ";
|
String base = String.format("-T ReduceReads -npt -R %s -I %s ", REF, INSERTIONS_AT_EDGE_OF_CONSENSUS_BAM) + " -o %s ";
|
||||||
executeTest("testInsertionsAtEdgeOfConsensus", new WalkerTestSpec(base, Arrays.asList("bam"), Arrays.asList("27cc8f1a336b2d0a29855ceb8fc988b0")));
|
executeTest("testInsertionsAtEdgeOfConsensus", new WalkerTestSpec(base, Arrays.asList("bam"), Arrays.asList("760500a5b036b987f84099f45f26a804")));
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
|
@ -248,7 +248,7 @@ public class ReduceReadsIntegrationTest extends WalkerTest {
|
||||||
@Test(enabled = true)
|
@Test(enabled = true)
|
||||||
public void testAddingReadAfterTailingTheStash() {
|
public void testAddingReadAfterTailingTheStash() {
|
||||||
String base = String.format("-T ReduceReads %s -npt -R %s -I %s", STASH_L, REF, STASH_BAM) + " -o %s ";
|
String base = String.format("-T ReduceReads %s -npt -R %s -I %s", STASH_L, REF, STASH_BAM) + " -o %s ";
|
||||||
executeTest("testAddingReadAfterTailingTheStash", new WalkerTestSpec(base, Arrays.asList("bam"), Arrays.asList("34baf99904b676d5f132d3791030ed0a")), "3eab32c215ba68e75efd5ab7e9f7a2e7");
|
executeTest("testAddingReadAfterTailingTheStash", new WalkerTestSpec(base, Arrays.asList("bam"), Arrays.asList("67f8a3a647f8ec5212104bdaafd8c862")), "3eab32c215ba68e75efd5ab7e9f7a2e7");
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
|
@ -259,7 +259,7 @@ public class ReduceReadsIntegrationTest extends WalkerTest {
|
||||||
public void testDivideByZero() {
|
public void testDivideByZero() {
|
||||||
String base = String.format("-T ReduceReads %s -npt -R %s -I %s", DIVIDEBYZERO_L, REF, DIVIDEBYZERO_BAM) + " -o %s ";
|
String base = String.format("-T ReduceReads %s -npt -R %s -I %s", DIVIDEBYZERO_L, REF, DIVIDEBYZERO_BAM) + " -o %s ";
|
||||||
// we expect to lose coverage due to the downsampling so don't run the systematic tests
|
// we expect to lose coverage due to the downsampling so don't run the systematic tests
|
||||||
executeTestWithoutAdditionalRRTests("testDivideByZero", new WalkerTestSpec(base, Arrays.asList("bam"), Arrays.asList("985c4f15a1d45267abb2f6790267930d")));
|
executeTestWithoutAdditionalRRTests("testDivideByZero", new WalkerTestSpec(base, Arrays.asList("bam"), Arrays.asList("1663f35802f82333c5e15653e437ce2d")));
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
|
@ -269,7 +269,7 @@ public class ReduceReadsIntegrationTest extends WalkerTest {
|
||||||
@Test(enabled = true)
|
@Test(enabled = true)
|
||||||
public void testReadOffContig() {
|
public void testReadOffContig() {
|
||||||
String base = String.format("-T ReduceReads -npt -R %s -I %s ", REF, OFFCONTIG_BAM) + " -o %s ";
|
String base = String.format("-T ReduceReads -npt -R %s -I %s ", REF, OFFCONTIG_BAM) + " -o %s ";
|
||||||
executeTest("testReadOffContig", new WalkerTestSpec(base, Arrays.asList("bam"), Arrays.asList("388ef48791965d637e4bdb45d5d7cf01")));
|
executeTest("testReadOffContig", new WalkerTestSpec(base, Arrays.asList("bam"), Arrays.asList("0ce693b4ff925998867664e4099f3248")));
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
|
@ -279,7 +279,7 @@ public class ReduceReadsIntegrationTest extends WalkerTest {
|
||||||
public void testPairedReadsInVariantRegion() {
|
public void testPairedReadsInVariantRegion() {
|
||||||
String base = String.format("-T ReduceReads -npt -R %s -I %s ", hg19Reference, BOTH_ENDS_OF_PAIR_IN_VARIANT_REGION_BAM) +
|
String base = String.format("-T ReduceReads -npt -R %s -I %s ", hg19Reference, BOTH_ENDS_OF_PAIR_IN_VARIANT_REGION_BAM) +
|
||||||
" -o %s --downsample_coverage 250 -dcov 50 ";
|
" -o %s --downsample_coverage 250 -dcov 50 ";
|
||||||
executeTest("testPairedReadsInVariantRegion", new WalkerTestSpec(base, Arrays.asList("bam"), Arrays.asList("cfa2588f5edf74c5ddf3d190f5ac6f2d")));
|
executeTest("testPairedReadsInVariantRegion", new WalkerTestSpec(base, Arrays.asList("bam"), Arrays.asList("7e7b358443827ca239db3b98f299aec6")), "2af063d1bd3c322b03405dbb3ecf59a9");
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -251,14 +251,15 @@ public class SlidingWindowUnitTest extends BaseTest {
|
||||||
}
|
}
|
||||||
|
|
||||||
private class ConsensusCreationTest {
|
private class ConsensusCreationTest {
|
||||||
public final int expectedNumberOfReads, expectedNumberOfReadsWithHetCompression;
|
public final int expectedNumberOfReads, expectedNumberOfReadsWithHetCompression, expectedNumberOfReadsAtDeepCoverage;
|
||||||
public final List<GATKSAMRecord> myReads = new ArrayList<GATKSAMRecord>(20);
|
public final List<GATKSAMRecord> myReads = new ArrayList<GATKSAMRecord>(20);
|
||||||
public final String description;
|
public final String description;
|
||||||
|
|
||||||
private ConsensusCreationTest(final List<GenomeLoc> locs, final boolean readsShouldBeLowQuality, final boolean variantBaseShouldBeLowQuality, final int expectedNumberOfReads, final int expectedNumberOfReadsWithHetCompression) {
|
private ConsensusCreationTest(final List<GenomeLoc> locs, final boolean readsShouldBeLowQuality, final boolean variantBaseShouldBeLowQuality, final int expectedNumberOfReads, final int expectedNumberOfReadsWithHetCompression, final int expectedNumberOfReadsAtDeepCoverage) {
|
||||||
this.expectedNumberOfReads = expectedNumberOfReads;
|
this.expectedNumberOfReads = expectedNumberOfReads;
|
||||||
this.expectedNumberOfReadsWithHetCompression = expectedNumberOfReadsWithHetCompression;
|
this.expectedNumberOfReadsWithHetCompression = expectedNumberOfReadsWithHetCompression;
|
||||||
this.description = String.format("%d %d", expectedNumberOfReads, expectedNumberOfReadsWithHetCompression);
|
this.expectedNumberOfReadsAtDeepCoverage = expectedNumberOfReadsAtDeepCoverage;
|
||||||
|
this.description = String.format("%d %d %d", expectedNumberOfReads, expectedNumberOfReadsWithHetCompression, expectedNumberOfReadsAtDeepCoverage);
|
||||||
|
|
||||||
// first, add the basic reads to the collection
|
// first, add the basic reads to the collection
|
||||||
myReads.addAll(basicReads);
|
myReads.addAll(basicReads);
|
||||||
|
|
@ -268,10 +269,11 @@ public class SlidingWindowUnitTest extends BaseTest {
|
||||||
myReads.add(createVariantRead(loc, readsShouldBeLowQuality, variantBaseShouldBeLowQuality, CigarOperator.M));
|
myReads.add(createVariantRead(loc, readsShouldBeLowQuality, variantBaseShouldBeLowQuality, CigarOperator.M));
|
||||||
}
|
}
|
||||||
|
|
||||||
private ConsensusCreationTest(final List<GenomeLoc> locs, final CigarOperator operator, final int expectedNumberOfReads, final int expectedNumberOfReadsWithHetCompression) {
|
private ConsensusCreationTest(final List<GenomeLoc> locs, final CigarOperator operator, final int expectedNumberOfReads, final int expectedNumberOfReadsWithHetCompression, final int expectedNumberOfReadsAtDeepCoverage) {
|
||||||
this.expectedNumberOfReads = expectedNumberOfReads;
|
this.expectedNumberOfReads = expectedNumberOfReads;
|
||||||
this.expectedNumberOfReadsWithHetCompression = expectedNumberOfReadsWithHetCompression;
|
this.expectedNumberOfReadsWithHetCompression = expectedNumberOfReadsWithHetCompression;
|
||||||
this.description = String.format("%s %d %d", operator.toString(), expectedNumberOfReads, expectedNumberOfReadsWithHetCompression);
|
this.expectedNumberOfReadsAtDeepCoverage = expectedNumberOfReadsAtDeepCoverage;
|
||||||
|
this.description = String.format("%s %d %d %d", operator.toString(), expectedNumberOfReads, expectedNumberOfReadsWithHetCompression, expectedNumberOfReadsAtDeepCoverage);
|
||||||
|
|
||||||
// first, add the basic reads to the collection
|
// first, add the basic reads to the collection
|
||||||
myReads.addAll(basicReads);
|
myReads.addAll(basicReads);
|
||||||
|
|
@ -319,46 +321,50 @@ public class SlidingWindowUnitTest extends BaseTest {
|
||||||
private static final GenomeLoc loc295 = new UnvalidatingGenomeLoc("1", 0, 1000295, 1000295);
|
private static final GenomeLoc loc295 = new UnvalidatingGenomeLoc("1", 0, 1000295, 1000295);
|
||||||
private static final GenomeLoc loc309 = new UnvalidatingGenomeLoc("1", 0, 1000309, 1000309);
|
private static final GenomeLoc loc309 = new UnvalidatingGenomeLoc("1", 0, 1000309, 1000309);
|
||||||
private static final GenomeLoc loc310 = new UnvalidatingGenomeLoc("1", 0, 1000310, 1000310);
|
private static final GenomeLoc loc310 = new UnvalidatingGenomeLoc("1", 0, 1000310, 1000310);
|
||||||
|
private static final GenomeLoc loc320 = new UnvalidatingGenomeLoc("1", 0, 1000320, 1000320);
|
||||||
private static final GenomeLoc loc1100 = new UnvalidatingGenomeLoc("1", 0, 1001100, 1001100);
|
private static final GenomeLoc loc1100 = new UnvalidatingGenomeLoc("1", 0, 1001100, 1001100);
|
||||||
|
|
||||||
|
private static final int DEEP_COVERAGE_ITERATIONS = 100;
|
||||||
|
|
||||||
@DataProvider(name = "ConsensusCreation")
|
@DataProvider(name = "ConsensusCreation")
|
||||||
public Object[][] createConsensusCreationTestData() {
|
public Object[][] createConsensusCreationTestData() {
|
||||||
List<Object[]> tests = new ArrayList<Object[]>();
|
List<Object[]> tests = new ArrayList<Object[]>();
|
||||||
|
|
||||||
// test high quality reads and bases
|
// test high quality reads and bases
|
||||||
tests.add(new Object[]{new ConsensusCreationTest(Arrays.<GenomeLoc>asList(), false, false, 1, 1)});
|
tests.add(new Object[]{new ConsensusCreationTest(Arrays.<GenomeLoc>asList(), false, false, 1, 1, 1)});
|
||||||
tests.add(new Object[]{new ConsensusCreationTest(Arrays.<GenomeLoc>asList(loc290), false, false, 9, 6)});
|
tests.add(new Object[]{new ConsensusCreationTest(Arrays.<GenomeLoc>asList(loc290), false, false, 9, 6, 5 + DEEP_COVERAGE_ITERATIONS)});
|
||||||
tests.add(new Object[]{new ConsensusCreationTest(Arrays.<GenomeLoc>asList(loc290, loc295), false, false, 10, 10)});
|
tests.add(new Object[]{new ConsensusCreationTest(Arrays.<GenomeLoc>asList(loc290, loc295), false, false, 10, 10, 2 + (8 * DEEP_COVERAGE_ITERATIONS))});
|
||||||
tests.add(new Object[]{new ConsensusCreationTest(Arrays.<GenomeLoc>asList(loc290, loc309), false, false, 10, 10)});
|
tests.add(new Object[]{new ConsensusCreationTest(Arrays.<GenomeLoc>asList(loc290, loc309), false, false, 10, 10, 2 + (8 * DEEP_COVERAGE_ITERATIONS))});
|
||||||
tests.add(new Object[]{new ConsensusCreationTest(Arrays.<GenomeLoc>asList(loc290, loc310), false, false, 11, 11)});
|
tests.add(new Object[]{new ConsensusCreationTest(Arrays.<GenomeLoc>asList(loc290, loc310), false, false, 11, 11, 2 + (9 * DEEP_COVERAGE_ITERATIONS))});
|
||||||
|
tests.add(new Object[]{new ConsensusCreationTest(Arrays.<GenomeLoc>asList(loc290, loc320), false, false, 11, 10, 4 + (6 * DEEP_COVERAGE_ITERATIONS))});
|
||||||
|
|
||||||
// test low quality reads
|
// test low quality reads
|
||||||
tests.add(new Object[]{new ConsensusCreationTest(Arrays.<GenomeLoc>asList(), true, false, 1, 1)});
|
tests.add(new Object[]{new ConsensusCreationTest(Arrays.<GenomeLoc>asList(), true, false, 1, 1, 1)});
|
||||||
tests.add(new Object[]{new ConsensusCreationTest(Arrays.<GenomeLoc>asList(loc290), true, false, 1, 1)});
|
tests.add(new Object[]{new ConsensusCreationTest(Arrays.<GenomeLoc>asList(loc290), true, false, 2, 2, 2)});
|
||||||
tests.add(new Object[]{new ConsensusCreationTest(Arrays.<GenomeLoc>asList(loc290, loc295), true, false, 1, 1)});
|
tests.add(new Object[]{new ConsensusCreationTest(Arrays.<GenomeLoc>asList(loc290, loc295), true, false, 2, 2, 2)});
|
||||||
tests.add(new Object[]{new ConsensusCreationTest(Arrays.<GenomeLoc>asList(loc290, loc309), true, false, 1, 1)});
|
tests.add(new Object[]{new ConsensusCreationTest(Arrays.<GenomeLoc>asList(loc290, loc309), true, false, 2, 2, 2)});
|
||||||
tests.add(new Object[]{new ConsensusCreationTest(Arrays.<GenomeLoc>asList(loc290, loc310), true, false, 1, 1)});
|
tests.add(new Object[]{new ConsensusCreationTest(Arrays.<GenomeLoc>asList(loc290, loc310), true, false, 2, 2, 2)});
|
||||||
|
|
||||||
// test low quality bases
|
// test low quality bases
|
||||||
tests.add(new Object[]{new ConsensusCreationTest(Arrays.<GenomeLoc>asList(), false, true, 1, 1)});
|
tests.add(new Object[]{new ConsensusCreationTest(Arrays.<GenomeLoc>asList(), false, true, 1, 1, 1)});
|
||||||
tests.add(new Object[]{new ConsensusCreationTest(Arrays.<GenomeLoc>asList(loc290), false, true, 1, 1)});
|
tests.add(new Object[]{new ConsensusCreationTest(Arrays.<GenomeLoc>asList(loc290), false, true, 1, 1, 1)});
|
||||||
tests.add(new Object[]{new ConsensusCreationTest(Arrays.<GenomeLoc>asList(loc290, loc295), false, true, 1, 1)});
|
tests.add(new Object[]{new ConsensusCreationTest(Arrays.<GenomeLoc>asList(loc290, loc295), false, true, 1, 1, 1)});
|
||||||
tests.add(new Object[]{new ConsensusCreationTest(Arrays.<GenomeLoc>asList(loc290, loc309), false, true, 1, 1)});
|
tests.add(new Object[]{new ConsensusCreationTest(Arrays.<GenomeLoc>asList(loc290, loc309), false, true, 1, 1, 1)});
|
||||||
tests.add(new Object[]{new ConsensusCreationTest(Arrays.<GenomeLoc>asList(loc290, loc310), false, true, 1, 1)});
|
tests.add(new Object[]{new ConsensusCreationTest(Arrays.<GenomeLoc>asList(loc290, loc310), false, true, 1, 1, 1)});
|
||||||
|
|
||||||
// test mixture
|
// test mixture
|
||||||
tests.add(new Object[]{new ConsensusCreationTest(Arrays.<GenomeLoc>asList(loc1100), true, false, 2, 2)});
|
tests.add(new Object[]{new ConsensusCreationTest(Arrays.<GenomeLoc>asList(loc1100), true, false, 2, 2, 2)});
|
||||||
tests.add(new Object[]{new ConsensusCreationTest(Arrays.<GenomeLoc>asList(loc1100), false, true, 1, 1)});
|
tests.add(new Object[]{new ConsensusCreationTest(Arrays.<GenomeLoc>asList(loc1100), false, true, 1, 1, 1)});
|
||||||
|
|
||||||
// test I/D operators
|
// test I/D operators
|
||||||
tests.add(new Object[]{new ConsensusCreationTest(Arrays.<GenomeLoc>asList(loc290), CigarOperator.D, 9, 9)});
|
tests.add(new Object[]{new ConsensusCreationTest(Arrays.<GenomeLoc>asList(loc290), CigarOperator.D, 9, 9, 2 + (7 * DEEP_COVERAGE_ITERATIONS))});
|
||||||
tests.add(new Object[]{new ConsensusCreationTest(Arrays.<GenomeLoc>asList(loc290, loc295), CigarOperator.D, 10, 10)});
|
tests.add(new Object[]{new ConsensusCreationTest(Arrays.<GenomeLoc>asList(loc290, loc295), CigarOperator.D, 10, 10, 2 + (8 * DEEP_COVERAGE_ITERATIONS))});
|
||||||
tests.add(new Object[]{new ConsensusCreationTest(Arrays.<GenomeLoc>asList(loc290, loc309), CigarOperator.D, 10, 10)});
|
tests.add(new Object[]{new ConsensusCreationTest(Arrays.<GenomeLoc>asList(loc290, loc309), CigarOperator.D, 10, 10, 2 + (8 * DEEP_COVERAGE_ITERATIONS))});
|
||||||
tests.add(new Object[]{new ConsensusCreationTest(Arrays.<GenomeLoc>asList(loc290, loc310), CigarOperator.D, 11, 11)});
|
tests.add(new Object[]{new ConsensusCreationTest(Arrays.<GenomeLoc>asList(loc290, loc310), CigarOperator.D, 11, 11, 2 + (9 * DEEP_COVERAGE_ITERATIONS))});
|
||||||
tests.add(new Object[]{new ConsensusCreationTest(Arrays.<GenomeLoc>asList(loc290), CigarOperator.I, 9, 9)});
|
tests.add(new Object[]{new ConsensusCreationTest(Arrays.<GenomeLoc>asList(loc290), CigarOperator.I, 9, 9, 2 + (7 * DEEP_COVERAGE_ITERATIONS))});
|
||||||
tests.add(new Object[]{new ConsensusCreationTest(Arrays.<GenomeLoc>asList(loc290, loc295), CigarOperator.I, 10, 10)});
|
tests.add(new Object[]{new ConsensusCreationTest(Arrays.<GenomeLoc>asList(loc290, loc295), CigarOperator.I, 10, 10, 2 + (8 * DEEP_COVERAGE_ITERATIONS))});
|
||||||
tests.add(new Object[]{new ConsensusCreationTest(Arrays.<GenomeLoc>asList(loc290, loc309), CigarOperator.I, 10, 10)});
|
tests.add(new Object[]{new ConsensusCreationTest(Arrays.<GenomeLoc>asList(loc290, loc309), CigarOperator.I, 10, 10, 2 + (8 * DEEP_COVERAGE_ITERATIONS))});
|
||||||
tests.add(new Object[]{new ConsensusCreationTest(Arrays.<GenomeLoc>asList(loc290, loc310), CigarOperator.I, 11, 11)});
|
tests.add(new Object[]{new ConsensusCreationTest(Arrays.<GenomeLoc>asList(loc290, loc310), CigarOperator.I, 11, 11, 2 + (9 * DEEP_COVERAGE_ITERATIONS))});
|
||||||
|
|
||||||
return tests.toArray(new Object[][]{});
|
return tests.toArray(new Object[][]{});
|
||||||
}
|
}
|
||||||
|
|
@ -368,14 +374,14 @@ public class SlidingWindowUnitTest extends BaseTest {
|
||||||
final ObjectAVLTreeSet<GenomeLoc> knownSNPs = new ObjectAVLTreeSet<GenomeLoc>();
|
final ObjectAVLTreeSet<GenomeLoc> knownSNPs = new ObjectAVLTreeSet<GenomeLoc>();
|
||||||
|
|
||||||
// test WITHOUT het compression
|
// test WITHOUT het compression
|
||||||
SlidingWindow slidingWindow = new SlidingWindow("1", 0, 10, header, new GATKSAMReadGroupRecord("test"), 0, 0.05, 0.05, 20, 20, 100, ReduceReads.DownsampleStrategy.Normal, false);
|
SlidingWindow slidingWindow = new SlidingWindow("1", 0, 10, header, new GATKSAMReadGroupRecord("test"), 0, 0.05, 0.05, 0.05, 20, 20, 100, ReduceReads.DownsampleStrategy.Normal, false);
|
||||||
for ( final GATKSAMRecord read : test.myReads )
|
for ( final GATKSAMRecord read : test.myReads )
|
||||||
slidingWindow.addRead(read);
|
slidingWindow.addRead(read);
|
||||||
Pair<ObjectSet<GATKSAMRecord>, CompressionStash> result = slidingWindow.close(knownSNPs); // currently empty
|
Pair<ObjectSet<GATKSAMRecord>, CompressionStash> result = slidingWindow.close(knownSNPs); // currently empty
|
||||||
Assert.assertEquals(result.getFirst().size(), test.expectedNumberOfReads);
|
Assert.assertEquals(result.getFirst().size(), test.expectedNumberOfReads);
|
||||||
|
|
||||||
// test WITH het compression at KNOWN sites
|
// test WITH het compression at KNOWN sites
|
||||||
slidingWindow = new SlidingWindow("1", 0, 10, header, new GATKSAMReadGroupRecord("test"), 0, 0.05, 0.05, 20, 20, 100, ReduceReads.DownsampleStrategy.Normal, false);
|
slidingWindow = new SlidingWindow("1", 0, 10, header, new GATKSAMReadGroupRecord("test"), 0, 0.05, 0.05, 0.05, 20, 20, 100, ReduceReads.DownsampleStrategy.Normal, false);
|
||||||
for ( final GATKSAMRecord read : test.myReads )
|
for ( final GATKSAMRecord read : test.myReads )
|
||||||
slidingWindow.addRead(read);
|
slidingWindow.addRead(read);
|
||||||
for ( int i = 0; i < 1200; i++ )
|
for ( int i = 0; i < 1200; i++ )
|
||||||
|
|
@ -384,11 +390,28 @@ public class SlidingWindowUnitTest extends BaseTest {
|
||||||
Assert.assertEquals(result.getFirst().size(), test.expectedNumberOfReadsWithHetCompression);
|
Assert.assertEquals(result.getFirst().size(), test.expectedNumberOfReadsWithHetCompression);
|
||||||
|
|
||||||
// test WITH het compression at ALL sites
|
// test WITH het compression at ALL sites
|
||||||
slidingWindow = new SlidingWindow("1", 0, 10, header, new GATKSAMReadGroupRecord("test"), 0, 0.05, 0.05, 20, 20, 100, ReduceReads.DownsampleStrategy.Normal, false);
|
slidingWindow = new SlidingWindow("1", 0, 10, header, new GATKSAMReadGroupRecord("test"), 0, 0.05, 0.05, 0.05, 20, 20, 100, ReduceReads.DownsampleStrategy.Normal, false);
|
||||||
for ( final GATKSAMRecord read : test.myReads )
|
for ( final GATKSAMRecord read : test.myReads )
|
||||||
slidingWindow.addRead(read);
|
slidingWindow.addRead(read);
|
||||||
result = slidingWindow.close(null);
|
result = slidingWindow.close(null);
|
||||||
Assert.assertEquals(result.getFirst().size(), test.expectedNumberOfReadsWithHetCompression);
|
Assert.assertEquals(result.getFirst().size(), test.expectedNumberOfReadsWithHetCompression);
|
||||||
|
|
||||||
|
// test with deep coverage
|
||||||
|
slidingWindow = new SlidingWindow("1", 0, 10, header, new GATKSAMReadGroupRecord("test"), 0, 0.05, 0.05, 0.05, 20, 20, 0, ReduceReads.DownsampleStrategy.Normal, false);
|
||||||
|
for ( int i = 0; i < DEEP_COVERAGE_ITERATIONS; i++ ) {
|
||||||
|
for ( final GATKSAMRecord read : test.myReads ) {
|
||||||
|
final GATKSAMRecord copy = ArtificialSAMUtils.createArtificialRead(header, read.getReadName() + "_" + (i+1), 0, read.getAlignmentStart(), readLength);
|
||||||
|
copy.setReadBases(read.getReadBases());
|
||||||
|
copy.setBaseQualities(read.getBaseQualities());
|
||||||
|
copy.setMappingQuality(read.getMappingQuality());
|
||||||
|
copy.setReadNegativeStrandFlag(read.getReadNegativeStrandFlag());
|
||||||
|
if ( read.getCigar() != null )
|
||||||
|
copy.setCigar(read.getCigar());
|
||||||
|
slidingWindow.addRead(copy);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
result = slidingWindow.close(null);
|
||||||
|
Assert.assertEquals(result.getFirst().size(), test.expectedNumberOfReadsAtDeepCoverage);
|
||||||
}
|
}
|
||||||
|
|
||||||
@Test
|
@Test
|
||||||
|
|
@ -412,14 +435,14 @@ public class SlidingWindowUnitTest extends BaseTest {
|
||||||
final ObjectAVLTreeSet<GenomeLoc> knownSNPs = new ObjectAVLTreeSet<GenomeLoc>();
|
final ObjectAVLTreeSet<GenomeLoc> knownSNPs = new ObjectAVLTreeSet<GenomeLoc>();
|
||||||
|
|
||||||
// test WITHOUT het compression
|
// test WITHOUT het compression
|
||||||
SlidingWindow slidingWindow = new SlidingWindow("1", 0, 10, header, new GATKSAMReadGroupRecord("test"), 0, 0.1, 0.05, 20, 20, 100, ReduceReads.DownsampleStrategy.Normal, false);
|
SlidingWindow slidingWindow = new SlidingWindow("1", 0, 10, header, new GATKSAMReadGroupRecord("test"), 0, 0.1, 0.05, 0.05, 20, 20, 100, ReduceReads.DownsampleStrategy.Normal, false);
|
||||||
for ( final GATKSAMRecord read : myReads )
|
for ( final GATKSAMRecord read : myReads )
|
||||||
slidingWindow.addRead(read);
|
slidingWindow.addRead(read);
|
||||||
Pair<ObjectSet<GATKSAMRecord>, CompressionStash> result = slidingWindow.close(knownSNPs); // currently empty
|
Pair<ObjectSet<GATKSAMRecord>, CompressionStash> result = slidingWindow.close(knownSNPs); // currently empty
|
||||||
Assert.assertEquals(result.getFirst().size(), totalNumReads); // no compression at all
|
Assert.assertEquals(result.getFirst().size(), totalNumReads); // no compression at all
|
||||||
|
|
||||||
// test WITH het compression at KNOWN sites
|
// test WITH het compression at KNOWN sites
|
||||||
slidingWindow = new SlidingWindow("1", 0, 10, header, new GATKSAMReadGroupRecord("test"), 0, 0.1, 0.05, 20, 20, 100, ReduceReads.DownsampleStrategy.Normal, false);
|
slidingWindow = new SlidingWindow("1", 0, 10, header, new GATKSAMReadGroupRecord("test"), 0, 0.1, 0.05, 0.05, 20, 20, 100, ReduceReads.DownsampleStrategy.Normal, false);
|
||||||
for ( final GATKSAMRecord read : myReads )
|
for ( final GATKSAMRecord read : myReads )
|
||||||
slidingWindow.addRead(read);
|
slidingWindow.addRead(read);
|
||||||
for ( int i = 0; i < readLength; i++ )
|
for ( int i = 0; i < readLength; i++ )
|
||||||
|
|
@ -428,13 +451,59 @@ public class SlidingWindowUnitTest extends BaseTest {
|
||||||
Assert.assertEquals(result.getFirst().size(), totalNumReads); // no compression at all
|
Assert.assertEquals(result.getFirst().size(), totalNumReads); // no compression at all
|
||||||
|
|
||||||
// test WITH het compression at ALL sites
|
// test WITH het compression at ALL sites
|
||||||
slidingWindow = new SlidingWindow("1", 0, 10, header, new GATKSAMReadGroupRecord("test"), 0, 0.1, 0.05, 20, 20, 100, ReduceReads.DownsampleStrategy.Normal, false);
|
slidingWindow = new SlidingWindow("1", 0, 10, header, new GATKSAMReadGroupRecord("test"), 0, 0.1, 0.05, 0.05, 20, 20, 100, ReduceReads.DownsampleStrategy.Normal, false);
|
||||||
for ( final GATKSAMRecord read : myReads )
|
for ( final GATKSAMRecord read : myReads )
|
||||||
slidingWindow.addRead(read);
|
slidingWindow.addRead(read);
|
||||||
result = slidingWindow.close(knownSNPs);
|
result = slidingWindow.close(knownSNPs);
|
||||||
Assert.assertEquals(result.getFirst().size(), totalNumReads); // no compression at all
|
Assert.assertEquals(result.getFirst().size(), totalNumReads); // no compression at all
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void testAddingReadPairWithSameCoordinates() {
|
||||||
|
final SlidingWindow slidingWindow = new SlidingWindow("1", 0, 10);
|
||||||
|
|
||||||
|
final GATKSAMRecord read1 = ArtificialSAMUtils.createArtificialRead(header, "basicRead", 0, globalStartPosition, 1);
|
||||||
|
read1.setReadBases(new byte[]{(byte)'A'});
|
||||||
|
read1.setBaseQualities(new byte[]{(byte)'A'});
|
||||||
|
read1.setMappingQuality(30);
|
||||||
|
read1.setReadNegativeStrandFlag(false);
|
||||||
|
slidingWindow.addRead(read1);
|
||||||
|
|
||||||
|
final GATKSAMRecord read2 = ArtificialSAMUtils.createArtificialRead(header, "basicRead", 0, globalStartPosition, 1);
|
||||||
|
read2.setReadBases(new byte[]{(byte)'A'});
|
||||||
|
read2.setBaseQualities(new byte[]{(byte)'A'});
|
||||||
|
read2.setMappingQuality(30);
|
||||||
|
read2.setReadNegativeStrandFlag(true);
|
||||||
|
slidingWindow.addRead(read2);
|
||||||
|
|
||||||
|
Assert.assertEquals(slidingWindow.readsInWindow.size(), 2);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void testOnlySpanningReadHasLowQual() {
|
||||||
|
final SlidingWindow slidingWindow = new SlidingWindow("1", 0, 10, header, new GATKSAMReadGroupRecord("test"), 0, 0.1, 0.05, 0.05, 20, 20, 100, ReduceReads.DownsampleStrategy.Normal, false);
|
||||||
|
|
||||||
|
final GATKSAMRecord read1 = ArtificialSAMUtils.createArtificialRead(header, "basicRead1", 0, globalStartPosition, 100);
|
||||||
|
final GATKSAMRecord read2 = ArtificialSAMUtils.createArtificialRead(header, "basicRead2", 0, globalStartPosition + 50, 100);
|
||||||
|
|
||||||
|
final byte[] bases = Utils.dupBytes((byte) 'A', readLength);
|
||||||
|
read1.setReadBases(bases);
|
||||||
|
read2.setReadBases(bases);
|
||||||
|
|
||||||
|
final byte[] baseQuals = Utils.dupBytes((byte) 30, readLength);
|
||||||
|
baseQuals[80] = (byte)10;
|
||||||
|
read1.setBaseQualities(baseQuals);
|
||||||
|
read2.setBaseQualities(baseQuals);
|
||||||
|
|
||||||
|
read1.setMappingQuality(30);
|
||||||
|
read2.setMappingQuality(30);
|
||||||
|
|
||||||
|
slidingWindow.addRead(read1);
|
||||||
|
slidingWindow.addRead(read2);
|
||||||
|
|
||||||
|
Assert.assertEquals(slidingWindow.close(null).getFirst().size(), 1);
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
///////////////////////////////////////////////////////////
|
///////////////////////////////////////////////////////////
|
||||||
//// This section tests the downsampling functionality ////
|
//// This section tests the downsampling functionality ////
|
||||||
|
|
@ -452,7 +521,7 @@ public class SlidingWindowUnitTest extends BaseTest {
|
||||||
|
|
||||||
@Test(dataProvider = "Downsampling", enabled = true)
|
@Test(dataProvider = "Downsampling", enabled = true)
|
||||||
public void testDownsamplingTest(final int dcov) {
|
public void testDownsamplingTest(final int dcov) {
|
||||||
final SlidingWindow slidingWindow = new SlidingWindow("1", 0, 10, header, new GATKSAMReadGroupRecord("test"), 0, 0.05, 0.05, 20, 20, dcov, ReduceReads.DownsampleStrategy.Normal, false);
|
final SlidingWindow slidingWindow = new SlidingWindow("1", 0, 10, header, new GATKSAMReadGroupRecord("test"), 0, 0.05, 0.05, 0.05, 20, 20, dcov, ReduceReads.DownsampleStrategy.Normal, false);
|
||||||
final ObjectList<GATKSAMRecord> result = slidingWindow.downsampleVariantRegion(basicReads);
|
final ObjectList<GATKSAMRecord> result = slidingWindow.downsampleVariantRegion(basicReads);
|
||||||
|
|
||||||
Assert.assertEquals(result.size(), Math.min(dcov, basicReads.size()));
|
Assert.assertEquals(result.size(), Math.min(dcov, basicReads.size()));
|
||||||
|
|
@ -500,7 +569,7 @@ public class SlidingWindowUnitTest extends BaseTest {
|
||||||
|
|
||||||
@Test(dataProvider = "ConsensusQuals", enabled = true)
|
@Test(dataProvider = "ConsensusQuals", enabled = true)
|
||||||
public void testConsensusQualsTest(QualsTest test) {
|
public void testConsensusQualsTest(QualsTest test) {
|
||||||
final SlidingWindow slidingWindow = new SlidingWindow("1", 0, 10, header, new GATKSAMReadGroupRecord("test"), 0, 0.05, 0.05, minUsableConsensusQual, 20, 100, ReduceReads.DownsampleStrategy.Normal, false);
|
final SlidingWindow slidingWindow = new SlidingWindow("1", 0, 10, header, new GATKSAMReadGroupRecord("test"), 0, 0.05, 0.05, 0.05, minUsableConsensusQual, 20, 100, ReduceReads.DownsampleStrategy.Normal, false);
|
||||||
for ( final GATKSAMRecord read : test.myReads )
|
for ( final GATKSAMRecord read : test.myReads )
|
||||||
slidingWindow.addRead(read);
|
slidingWindow.addRead(read);
|
||||||
final Pair<ObjectSet<GATKSAMRecord>, CompressionStash> result = slidingWindow.close(new ObjectAVLTreeSet<GenomeLoc>());
|
final Pair<ObjectSet<GATKSAMRecord>, CompressionStash> result = slidingWindow.close(new ObjectAVLTreeSet<GenomeLoc>());
|
||||||
|
|
@ -569,7 +638,7 @@ public class SlidingWindowUnitTest extends BaseTest {
|
||||||
read.setBaseQualities(Utils.dupBytes((byte) 30, readLength));
|
read.setBaseQualities(Utils.dupBytes((byte) 30, readLength));
|
||||||
read.setMappingQuality(30);
|
read.setMappingQuality(30);
|
||||||
|
|
||||||
final SlidingWindow slidingWindow = new SlidingWindow("1", 0, 10, header, new GATKSAMReadGroupRecord("test"), 0, 0.05, 0.05, 20, 20, 10, ReduceReads.DownsampleStrategy.Normal, false);
|
final SlidingWindow slidingWindow = new SlidingWindow("1", 0, 10, header, new GATKSAMReadGroupRecord("test"), 0, 0.05, 0.05, 0.05, 20, 20, 10, ReduceReads.DownsampleStrategy.Normal, false);
|
||||||
int newIndex = slidingWindow.createNewHeaderElements(windowHeader, read, start);
|
int newIndex = slidingWindow.createNewHeaderElements(windowHeader, read, start);
|
||||||
|
|
||||||
Assert.assertEquals(newIndex, start > 0 ? start : 0);
|
Assert.assertEquals(newIndex, start > 0 ? start : 0);
|
||||||
|
|
@ -613,7 +682,7 @@ public class SlidingWindowUnitTest extends BaseTest {
|
||||||
read.setMappingQuality(30);
|
read.setMappingQuality(30);
|
||||||
|
|
||||||
// add the read
|
// add the read
|
||||||
final SlidingWindow slidingWindow = new SlidingWindow("1", 0, 10, header, new GATKSAMReadGroupRecord("test"), 0, 0.05, 0.05, 20, 20, 10, ReduceReads.DownsampleStrategy.Normal, false);
|
final SlidingWindow slidingWindow = new SlidingWindow("1", 0, 10, header, new GATKSAMReadGroupRecord("test"), 0, 0.05, 0.05, 0.05, 20, 20, 10, ReduceReads.DownsampleStrategy.Normal, false);
|
||||||
slidingWindow.actuallyUpdateHeaderForRead(windowHeader, read, false, start);
|
slidingWindow.actuallyUpdateHeaderForRead(windowHeader, read, false, start);
|
||||||
for ( int i = 0; i < start; i++ )
|
for ( int i = 0; i < start; i++ )
|
||||||
Assert.assertEquals(windowHeader.get(i).getConsensusBaseCounts().countOfBase(BaseUtils.Base.A.base), 0);
|
Assert.assertEquals(windowHeader.get(i).getConsensusBaseCounts().countOfBase(BaseUtils.Base.A.base), 0);
|
||||||
|
|
@ -628,7 +697,6 @@ public class SlidingWindowUnitTest extends BaseTest {
|
||||||
Assert.assertEquals(windowHeader.get(i).getConsensusBaseCounts().countOfBase(BaseUtils.Base.A.base), 0);
|
Assert.assertEquals(windowHeader.get(i).getConsensusBaseCounts().countOfBase(BaseUtils.Base.A.base), 0);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
//////////////////////////////////////////////////////////////////////////////////
|
//////////////////////////////////////////////////////////////////////////////////
|
||||||
//// This section tests functionality related to polyploid consensus creation ////
|
//// This section tests functionality related to polyploid consensus creation ////
|
||||||
//////////////////////////////////////////////////////////////////////////////////
|
//////////////////////////////////////////////////////////////////////////////////
|
||||||
|
|
@ -691,7 +759,7 @@ public class SlidingWindowUnitTest extends BaseTest {
|
||||||
read.setMappingQuality(30);
|
read.setMappingQuality(30);
|
||||||
|
|
||||||
// add the read
|
// add the read
|
||||||
final SlidingWindow slidingWindow = new SlidingWindow("1", 0, 10, header, new GATKSAMReadGroupRecord("test"), 0, 0.05, 0.05, 20, 20, 10, ReduceReads.DownsampleStrategy.Normal, false);
|
final SlidingWindow slidingWindow = new SlidingWindow("1", 0, 10, header, new GATKSAMReadGroupRecord("test"), 0, 0.05, 0.05, 0.05, 20, 20, 10, ReduceReads.DownsampleStrategy.Normal, false);
|
||||||
slidingWindow.actuallyUpdateHeaderForRead(windowHeader, read, false, 0);
|
slidingWindow.actuallyUpdateHeaderForRead(windowHeader, read, false, 0);
|
||||||
|
|
||||||
// set up and add a soft-clipped read if requested
|
// set up and add a soft-clipped read if requested
|
||||||
|
|
|
||||||
|
|
@ -383,7 +383,7 @@ public class MathUtils {
|
||||||
|
|
||||||
for (int hits = k_start; hits <= k_end; hits++) {
|
for (int hits = k_start; hits <= k_end; hits++) {
|
||||||
prevProb = cumProb;
|
prevProb = cumProb;
|
||||||
double probability = binomialProbability(n, hits);
|
final double probability = binomialProbability(n, hits);
|
||||||
cumProb += probability;
|
cumProb += probability;
|
||||||
if (probability > 0 && cumProb - prevProb < probability / 2) { // loss of precision
|
if (probability > 0 && cumProb - prevProb < probability / 2) { // loss of precision
|
||||||
probCache = probCache.add(new BigDecimal(prevProb));
|
probCache = probCache.add(new BigDecimal(prevProb));
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue