Merge pull request #181 from broadinstitute/eb_yet_more_rr_improvements_GSA-930

Various bug fixes for recent Reduce Reads additions plus solution implemented for low MQ reads.
This commit is contained in:
MauricioCarneiro 2013-04-24 15:40:03 -07:00
commit 27bb699e8b
14 changed files with 527 additions and 303 deletions

View File

@ -80,6 +80,21 @@ public class BaseAndQualsCounts extends BaseCounts {
* @param isLowQualBase true if the base is low quality
*/
public void incr(final byte base, final byte baseQual, final byte insQual, final byte delQual, final int baseMappingQual, final boolean isLowQualBase) {
incr(base, baseQual, insQual, delQual, baseMappingQual, isLowQualBase, false);
}
/*
* Increments the count
*
* @param base the base
* @param baseQual the base quality
* @param insQual the insertion quality
* @param delQual the deletion quality
* @param baseMappingQual the mapping quality
* @param isLowQualBase true if the base is low quality
* @param isSoftClip true if is soft-clipped
*/
public void incr(final byte base, final byte baseQual, final byte insQual, final byte delQual, final int baseMappingQual, final boolean isLowQualBase, final boolean isSoftClip) {
// if we already have high quality bases, ignore low quality ones
if ( isLowQualBase && !isLowQuality() )
return;
@ -92,7 +107,7 @@ public class BaseAndQualsCounts extends BaseCounts {
}
final BaseIndex i = BaseIndex.byteToBase(base);
super.incr(i, baseQual, baseMappingQual);
super.incr(i, baseQual, baseMappingQual, isSoftClip);
switch (i) {
case A: sumInsertionQual_A += insQual; sumDeletionQual_A += delQual; break;
case C: sumInsertionQual_C += insQual; sumDeletionQual_C += delQual; break;
@ -114,13 +129,28 @@ public class BaseAndQualsCounts extends BaseCounts {
* @param baseMappingQual the mapping quality
* @param isLowQualBase true if the base is low quality
*/
public void decr(final byte base, final byte baseQual, final byte insQual, final byte delQual, final int baseMappingQual, final boolean isLowQualBase) {
public void decr(final byte base, final byte baseQual, final byte insQual, final byte delQual, final int baseMappingQual, final boolean isLowQualBase) {
decr(base, baseQual, insQual, delQual, baseMappingQual, isLowQualBase, false);
}
/*
* Decrements the count
*
* @param base the base
* @param baseQual the base quality
* @param insQual the insertion quality
* @param delQual the deletion quality
* @param baseMappingQual the mapping quality
* @param isLowQualBase true if the base is low quality
* @param isSoftClip true if is soft-clipped
*/
public void decr(final byte base, final byte baseQual, final byte insQual, final byte delQual, final int baseMappingQual, final boolean isLowQualBase, final boolean isSoftClip) {
// if this is not the right type of base, ignore it
if ( isLowQualBase != isLowQuality() )
return;
final BaseIndex i = BaseIndex.byteToBase(base);
super.decr(i, baseQual, baseMappingQual);
super.decr(i, baseQual, baseMappingQual, isSoftClip);
switch (i) {
case A: sumInsertionQual_A -= insQual; sumDeletionQual_A -= delQual; break;
case C: sumInsertionQual_C -= insQual; sumDeletionQual_C -= delQual; break;

View File

@ -80,6 +80,7 @@ import org.broadinstitute.sting.utils.MathUtils;
private int count_N = 0;
private int sumQual_N = 0;
private int totalCount = 0; // keeps track of total count since this is requested so often
private int nSoftClippedBases = 0;
private final IntArrayList mappingQualities = new IntArrayList(); // keeps the mapping quality of each read that contributed to this
private boolean isLowQuality = true; // this object represents low quality bases unless we are told otherwise
@ -104,6 +105,7 @@ import org.broadinstitute.sting.utils.MathUtils;
this.count_I += other.count_I;
this.count_N += other.count_N;
this.totalCount += other.totalCount;
this.nSoftClippedBases = other.nSoftClippedBases;
this.mappingQualities.addAll(other.mappingQualities);
}
@ -117,6 +119,7 @@ import org.broadinstitute.sting.utils.MathUtils;
this.count_I -= other.count_I;
this.count_N -= other.count_N;
this.totalCount -= other.totalCount;
this.nSoftClippedBases -= other.nSoftClippedBases;
this.mappingQualities.removeAll(other.mappingQualities);
}
@ -126,7 +129,7 @@ import org.broadinstitute.sting.utils.MathUtils;
}
@Ensures("totalCount() == old(totalCount()) || totalCount() == old(totalCount()) + 1")
public void incr(final BaseIndex base, final byte qual, final int mappingQuality) {
public void incr(final BaseIndex base, final byte qual, final int mappingQuality, final boolean isSoftclip) {
switch (base) {
case A: ++count_A; sumQual_A += qual; break;
case C: ++count_C; sumQual_C += qual; break;
@ -137,6 +140,7 @@ import org.broadinstitute.sting.utils.MathUtils;
case N: ++count_N; sumQual_N += qual; break;
}
++totalCount;
nSoftClippedBases += isSoftclip ? 1 : 0;
mappingQualities.add(mappingQuality);
}
@ -159,7 +163,7 @@ import org.broadinstitute.sting.utils.MathUtils;
}
@Ensures("totalCount() == old(totalCount()) || totalCount() == old(totalCount()) - 1")
public void decr(final BaseIndex base, final byte qual, final int mappingQuality) {
public void decr(final BaseIndex base, final byte qual, final int mappingQuality, final boolean isSoftclip) {
switch (base) {
case A: --count_A; sumQual_A -= qual; break;
case C: --count_C; sumQual_C -= qual; break;
@ -170,6 +174,7 @@ import org.broadinstitute.sting.utils.MathUtils;
case N: --count_N; sumQual_N -= qual; break;
}
--totalCount;
nSoftClippedBases -= isSoftclip ? 1 : 0;
mappingQualities.remove((Integer) mappingQuality);
}
@ -231,6 +236,10 @@ import org.broadinstitute.sting.utils.MathUtils;
return (byte) (sumQualsOfBase(base) / countOfBase(base));
}
@Ensures("result >= 0")
public int nSoftclips() {
return nSoftClippedBases;
}
@Ensures("result >= 0")
public int totalCount() {
@ -281,22 +290,42 @@ import org.broadinstitute.sting.utils.MathUtils;
return baseIndexWithMostCounts().getByte();
}
/**
* @return the base index for which the count is highest, including indel indexes
*/
@Ensures("result != null")
public BaseIndex baseIndexWithMostCounts() {
BaseIndex maxI = MAX_BASE_INDEX_WITH_NO_COUNTS;
for (final BaseIndex i : BaseIndex.values()) {
if (countOfBase(i) > countOfBase(maxI))
maxI = i;
}
return maxI;
return baseIndexWithMostCounts(true);
}
/**
* @return the base index for which the count is highest, excluding indel indexes
*/
@Ensures("result != null")
public BaseIndex baseIndexWithMostCountsWithoutIndels() {
return baseIndexWithMostCounts(false);
}
/**
* Finds the base index with the most counts
*
* @param allowIndels should we allow base indexes representing indels?
* @return non-null base index
*/
@Ensures("result != null")
protected BaseIndex baseIndexWithMostCounts(final boolean allowIndels) {
BaseIndex maxI = MAX_BASE_INDEX_WITH_NO_COUNTS;
int maxCount = countOfBase(maxI);
for (final BaseIndex i : BaseIndex.values()) {
if (i.isNucleotide() && countOfBase(i) > countOfBase(maxI))
if ( !allowIndels && !i.isNucleotide() )
continue;
final int myCount = countOfBase(i);
if (myCount > maxCount) {
maxI = i;
maxCount = myCount;
}
}
return maxI;
}
@ -307,22 +336,36 @@ import org.broadinstitute.sting.utils.MathUtils;
@Ensures("result != null")
public BaseIndex baseIndexWithMostProbability() {
BaseIndex maxI = MAX_BASE_INDEX_WITH_NO_COUNTS;
for (final BaseIndex i : BaseIndex.values()) {
if (getSumQuals(i) > getSumQuals(maxI))
maxI = i;
}
return (getSumQuals(maxI) > 0L ? maxI : baseIndexWithMostCounts());
return baseIndexWithMostProbability(true);
}
@Ensures("result != null")
public BaseIndex baseIndexWithMostProbabilityWithoutIndels() {
return baseIndexWithMostProbability(false);
}
/**
* Finds the base index with the most probability
*
* @param allowIndels should we allow base indexes representing indels?
* @return non-null base index
*/
@Ensures("result != null")
public BaseIndex baseIndexWithMostProbability(final boolean allowIndels) {
BaseIndex maxI = MAX_BASE_INDEX_WITH_NO_COUNTS;
long maxSum = getSumQuals(maxI);
for (final BaseIndex i : BaseIndex.values()) {
if (i.isNucleotide() && getSumQuals(i) > getSumQuals(maxI))
if ( !allowIndels && !i.isNucleotide() )
continue;
final long mySum = getSumQuals(i);
if (mySum > maxSum) {
maxI = i;
maxSum = mySum;
}
}
return (getSumQuals(maxI) > 0L ? maxI : baseIndexWithMostCountsWithoutIndels());
return (maxSum > 0L ? maxI : baseIndexWithMostCounts(allowIndels));
}
@Ensures("result >=0")
@ -362,6 +405,7 @@ import org.broadinstitute.sting.utils.MathUtils;
count_A = count_C = count_G = count_T = count_D = count_I = count_N = 0;
sumQual_A = sumQual_C = sumQual_G = sumQual_T = sumQual_D = sumQual_I = sumQual_N = 0;
totalCount = 0;
nSoftClippedBases = 0;
mappingQualities.clear();
}
}

View File

@ -121,7 +121,7 @@ public enum BaseIndex {
*
* @return whether or not it is a nucleotide, given the definition above
*/
public boolean isNucleotide() {
public final boolean isNucleotide() {
return !isIndel();
}
@ -130,7 +130,7 @@ public enum BaseIndex {
*
* @return true for I or D, false otherwise
*/
public boolean isIndel() {
public final boolean isIndel() {
return this == D || this == I;
}
}

View File

@ -62,9 +62,10 @@ public class HeaderElement {
private BaseAndQualsCounts consensusBaseCounts; // How many A,C,G,T (and D's) are in this site.
private BaseAndQualsCounts filteredBaseCounts; // How many A,C,G,T (and D's) were filtered out in this site.
private int insertionsToTheRight; // How many reads in this site had insertions to the immediate right
private int nSoftClippedBases; // How many bases in this site came from soft clipped bases
private int location; // Genome location of this site (the sliding window knows which contig we're at
protected static final int MIN_COUNT_FOR_USING_PVALUE = 2;
public int getLocation() {
return location;
}
@ -84,7 +85,7 @@ public class HeaderElement {
* @param location the reference location for the new element
*/
public HeaderElement(final int location) {
this(new BaseAndQualsCounts(), new BaseAndQualsCounts(), 0, 0, location);
this(new BaseAndQualsCounts(), new BaseAndQualsCounts(), 0, location);
}
/**
@ -94,7 +95,7 @@ public class HeaderElement {
* @param location the reference location for the new element
*/
public HeaderElement(final int location, final int insertionsToTheRight) {
this(new BaseAndQualsCounts(), new BaseAndQualsCounts(), insertionsToTheRight, 0, location);
this(new BaseAndQualsCounts(), new BaseAndQualsCounts(), insertionsToTheRight, location);
}
/**
@ -103,15 +104,13 @@ public class HeaderElement {
* @param consensusBaseCounts the BaseCounts object for the running consensus synthetic read
* @param filteredBaseCounts the BaseCounts object for the filtered data synthetic read
* @param insertionsToTheRight number of insertions to the right of this HeaderElement
* @param nSoftClippedBases number of softclipped bases of this HeaderElement
* @param location the reference location of this reference element
* HeaderElement
*/
public HeaderElement(BaseAndQualsCounts consensusBaseCounts, BaseAndQualsCounts filteredBaseCounts, int insertionsToTheRight, int nSoftClippedBases, int location) {
public HeaderElement(BaseAndQualsCounts consensusBaseCounts, BaseAndQualsCounts filteredBaseCounts, int insertionsToTheRight, int location) {
this.consensusBaseCounts = consensusBaseCounts;
this.filteredBaseCounts = filteredBaseCounts;
this.insertionsToTheRight = insertionsToTheRight;
this.nSoftClippedBases = nSoftClippedBases;
this.location = location;
}
@ -119,10 +118,13 @@ public class HeaderElement {
* Whether or not the site represented by this HeaderElement is variant according to the definitions of variant
* by insertion, deletion and mismatches.
*
* @param minVariantPvalue min p-value for deciding that a position is or is not variable due to mismatches
* @param minVariantProportion min proportion for deciding that a position is or is not variable due to mismatches
* @param minIndelProportion min proportion for deciding that a position is or is not variable due to indels
* @return true if site is variant by any definition. False otherwise.
*/
public boolean isVariant(double minVariantPvalue, double minIndelProportion) {
return hasConsensusData() && (isVariantFromInsertions(minIndelProportion) || isVariantFromMismatches(minVariantPvalue) || isVariantFromDeletions(minIndelProportion) || isVariantFromSoftClips());
public boolean isVariant(final double minVariantPvalue, final double minVariantProportion, final double minIndelProportion) {
return hasConsensusData() && (isVariantFromInsertions(minIndelProportion) || isVariantFromMismatches(minVariantPvalue, minVariantProportion) || isVariantFromDeletions(minIndelProportion) || isVariantFromSoftClips());
}
/**
@ -140,11 +142,9 @@ public class HeaderElement {
public void addBase(byte base, byte baseQual, byte insQual, byte delQual, int baseMappingQuality, int minBaseQual, int minMappingQual, boolean isSoftClipped) {
// If the base passes the MQ filter it is included in the consensus base counts, otherwise it's part of the filtered counts
if ( baseMappingQuality >= minMappingQual )
consensusBaseCounts.incr(base, baseQual, insQual, delQual, baseMappingQuality, baseQual < minBaseQual);
consensusBaseCounts.incr(base, baseQual, insQual, delQual, baseMappingQuality, baseQual < minBaseQual, isSoftClipped);
else
filteredBaseCounts.incr(base, baseQual, insQual, delQual, baseMappingQuality, baseQual < minBaseQual);
nSoftClippedBases += isSoftClipped ? 1 : 0;
}
/**
@ -162,11 +162,9 @@ public class HeaderElement {
public void removeBase(byte base, byte baseQual, byte insQual, byte delQual, int baseMappingQuality, int minBaseQual, int minMappingQual, boolean isSoftClipped) {
// If the base passes the MQ filter it is included in the consensus base counts, otherwise it's part of the filtered counts
if ( baseMappingQuality >= minMappingQual )
consensusBaseCounts.decr(base, baseQual, insQual, delQual, baseMappingQuality, baseQual < minBaseQual);
consensusBaseCounts.decr(base, baseQual, insQual, delQual, baseMappingQuality, baseQual < minBaseQual, isSoftClipped);
else
filteredBaseCounts.decr(base, baseQual, insQual, delQual, baseMappingQuality, baseQual < minBaseQual);
nSoftClippedBases -= isSoftClipped ? 1 : 0;
}
/**
* Adds an insertions to the right of the HeaderElement and updates all counts accordingly. All insertions
@ -246,15 +244,15 @@ public class HeaderElement {
/**
* Whether or not the HeaderElement is variant due to excess mismatches
*
* @param minVariantPvalue the minimum pvalue to call a site variant.
* @param minVariantPvalue the minimum pvalue to call a site variant (used with low coverage).
* @param minVariantProportion the minimum proportion to call a site variant (used with high coverage).
* @return whether or not the HeaderElement is variant due to excess mismatches
*/
protected boolean isVariantFromMismatches(double minVariantPvalue) {
protected boolean isVariantFromMismatches(final double minVariantPvalue, final double minVariantProportion) {
final int totalCount = consensusBaseCounts.totalCountWithoutIndels();
final BaseIndex mostCommon = consensusBaseCounts.baseIndexWithMostProbabilityWithoutIndels();
final int countOfOtherBases = totalCount - consensusBaseCounts.countOfBase(mostCommon);
final double pvalue = countOfOtherBases == 0 ? 0.0 : MathUtils.binomialCumulativeProbability(totalCount, 0, countOfOtherBases);
return pvalue > minVariantPvalue;
return hasSignificantCount(countOfOtherBases, totalCount, minVariantPvalue, minVariantProportion);
}
/**
@ -264,6 +262,7 @@ public class HeaderElement {
* @return true if we had more soft clipped bases contributing to this site than matches/mismatches.
*/
protected boolean isVariantFromSoftClips() {
final int nSoftClippedBases = consensusBaseCounts.nSoftclips();
return nSoftClippedBases > 0 && nSoftClippedBases >= (consensusBaseCounts.totalCount() - nSoftClippedBases);
}
@ -271,10 +270,11 @@ public class HeaderElement {
* Calculates the number of alleles necessary to represent this site.
*
* @param minVariantPvalue the minimum pvalue to call a site variant.
* @param minVariantProportion the minimum proportion to call a site variant.
* @return the number of alleles necessary to represent this site or -1 if there are too many indels
*/
public int getNumberOfBaseAlleles(final double minVariantPvalue) {
final ObjectArrayList<BaseIndex> alleles = getAlleles(minVariantPvalue);
public int getNumberOfBaseAlleles(final double minVariantPvalue, final double minVariantProportion) {
final ObjectArrayList<BaseIndex> alleles = getAlleles(minVariantPvalue, minVariantProportion);
return alleles == null ? -1 : alleles.size();
}
@ -282,16 +282,18 @@ public class HeaderElement {
* Calculates the alleles necessary to represent this site.
*
* @param minVariantPvalue the minimum pvalue to call a site variant.
* @param minVariantProportion the minimum proportion to call a site variant.
* @return the list of alleles necessary to represent this site or null if there are too many indels
*/
public ObjectArrayList<BaseIndex> getAlleles(final double minVariantPvalue) {
public ObjectArrayList<BaseIndex> getAlleles(final double minVariantPvalue, final double minVariantProportion) {
// make sure we have bases at all
final int totalBaseCount = consensusBaseCounts.totalCount();
if ( totalBaseCount == 0 )
return new ObjectArrayList<BaseIndex>(0);
// next, check for insertions
if ( hasSignificantCount(insertionsToTheRight, minVariantPvalue) )
// next, check for insertions; technically, the insertion count can be greater than totalBaseCount
// (because of the way insertions are counted), so we need to account for that
if ( hasSignificantCount(Math.min(totalBaseCount, insertionsToTheRight), totalBaseCount, minVariantPvalue, minVariantProportion) )
return null;
// finally, check for the bases themselves (including deletions)
@ -301,9 +303,7 @@ public class HeaderElement {
if ( baseCount == 0 )
continue;
final double pvalue = MathUtils.binomialCumulativeProbability(totalBaseCount, 0, baseCount);
if ( pvalue > minVariantPvalue ) {
if ( hasSignificantCount(baseCount, totalBaseCount, minVariantPvalue, minVariantProportion) ) {
if ( base == BaseIndex.D )
return null;
alleles.add(base);
@ -316,26 +316,34 @@ public class HeaderElement {
* Checks whether there are a significant number of softclips.
*
* @param minVariantPvalue the minimum pvalue to call a site variant.
* @param minVariantProportion the minimum proportion to call a site variant.
* @return true if there are significant softclips, false otherwise
*/
public boolean hasSignificantSoftclips(final double minVariantPvalue) {
return hasSignificantCount(nSoftClippedBases, minVariantPvalue);
public boolean hasSignificantSoftclips(final double minVariantPvalue, final double minVariantProportion) {
return hasSignificantCount(consensusBaseCounts.nSoftclips(), consensusBaseCounts.totalCount(), minVariantPvalue, minVariantProportion);
}
/*
* Checks whether there are a significant number of count.
*
* @param count the count to test against
* @param count the count (k) to test against
* @param total the total (n) to test against
* @param minVariantPvalue the minimum pvalue to call a site variant.
* @param minVariantProportion the minimum proportion to call a site variant.
* @return true if there is a significant count given the provided pvalue, false otherwise
*/
private boolean hasSignificantCount(final int count, final double minVariantPvalue) {
final int totalBaseCount = consensusBaseCounts.totalCount();
if ( count == 0 || totalBaseCount == 0 )
private boolean hasSignificantCount(final int count, final int total, final double minVariantPvalue, final double minVariantProportion) {
if ( count == 0 || total == 0 )
return false;
// technically, count can be greater than totalBaseCount (because of the way insertions are counted) so we need to account for that
final double pvalue = MathUtils.binomialCumulativeProbability(totalBaseCount, 0, Math.min(count, totalBaseCount));
return pvalue > minVariantPvalue;
// use p-values for low counts of k
if ( count <= MIN_COUNT_FOR_USING_PVALUE ) {
final double pvalue = MathUtils.binomialCumulativeProbability(total, 0, count);
return pvalue > minVariantPvalue;
}
// otherwise, use straight proportions
final int minBaseCountForSignificance = (int)(minVariantProportion * total);
return count >= minBaseCountForSignificance;
}
}

View File

@ -97,13 +97,14 @@ public class MultiSampleCompressor {
final int downsampleCoverage,
final int minMappingQuality,
final double minAltPValueToTriggerVariant,
final double minAltProportionToTriggerVariant,
final double minIndelProportionToTriggerVariant,
final int minBaseQual,
final ReduceReads.DownsampleStrategy downsampleStrategy) {
for ( String name : SampleUtils.getSAMFileSamples(header) ) {
compressorsPerSample.put(name,
new SingleSampleCompressor(contextSize, downsampleCoverage,
minMappingQuality, minAltPValueToTriggerVariant, minIndelProportionToTriggerVariant, minBaseQual, downsampleStrategy));
minMappingQuality, minAltPValueToTriggerVariant, minAltProportionToTriggerVariant, minIndelProportionToTriggerVariant, minBaseQual, downsampleStrategy));
}
}

View File

@ -205,15 +205,17 @@ public class ReduceReads extends ReadWalker<ObjectArrayList<GATKSAMRecord>, Redu
/**
* Minimum proportion of mismatches in a site to trigger a variant region. Anything below this will be
* considered consensus.
* considered consensus and reduced (otherwise we will try to trigger polyploid compression). Note that
* this value is used only regions with high coverage.
*/
@Deprecated
@Advanced
@Argument(fullName = "minimum_alt_proportion_to_trigger_variant", shortName = "minvar", doc = "", required = false)
public double minAltProportionToTriggerVariant = 0.05;
/**
* Minimum p-value from binomial distribution of mismatches in a site to trigger a variant region.
* Any site with a value falling below this will be considered consensus and reduced (otherwise we will try to trigger polyploid compression).
* Any site with a value falling below this will be considered consensus and reduced (otherwise we will try to
* trigger polyploid compression). Note that this value is used only regions with low coverage.
*/
@Advanced
@Argument(fullName = "minimum_alt_pvalue_to_trigger_variant", shortName = "min_pvalue", doc = "", required = false)
@ -288,6 +290,9 @@ public class ReduceReads extends ReadWalker<ObjectArrayList<GATKSAMRecord>, Redu
if ( minAltPValueToTriggerVariant < 0.0 || minAltPValueToTriggerVariant > 1.0 )
throw new UserException.BadArgumentValue("--minimum_alt_pvalue_to_trigger_variant", "must be a value between 0 and 1 (inclusive)");
if ( minAltProportionToTriggerVariant < 0.0 || minAltProportionToTriggerVariant > 1.0 )
throw new UserException.BadArgumentValue("--minimum_alt_proportion_to_trigger_variant", "must be a value between 0 and 1 (inclusive)");
if ( known.isEmpty() )
knownSnpPositions = null;
else
@ -412,7 +417,7 @@ public class ReduceReads extends ReadWalker<ObjectArrayList<GATKSAMRecord>, Redu
*/
@Override
public ReduceReadsStash reduceInit() {
return new ReduceReadsStash(new MultiSampleCompressor(getToolkit().getSAMFileHeader(), contextSize, downsampleCoverage, minMappingQuality, minAltPValueToTriggerVariant, minIndelProportionToTriggerVariant, minBaseQual, downsampleStrategy));
return new ReduceReadsStash(new MultiSampleCompressor(getToolkit().getSAMFileHeader(), contextSize, downsampleCoverage, minMappingQuality, minAltPValueToTriggerVariant, minAltProportionToTriggerVariant, minIndelProportionToTriggerVariant, minBaseQual, downsampleStrategy));
}
/**

View File

@ -63,6 +63,7 @@ public class SingleSampleCompressor {
final private int downsampleCoverage;
final private int minMappingQuality;
final private double minAltPValueToTriggerVariant;
final private double minAltProportionToTriggerVariant;
final private double minIndelProportionToTriggerVariant;
final private int minBaseQual;
final private ReduceReads.DownsampleStrategy downsampleStrategy;
@ -76,6 +77,7 @@ public class SingleSampleCompressor {
final int downsampleCoverage,
final int minMappingQuality,
final double minAltPValueToTriggerVariant,
final double minAltProportionToTriggerVariant,
final double minIndelProportionToTriggerVariant,
final int minBaseQual,
final ReduceReads.DownsampleStrategy downsampleStrategy) {
@ -84,6 +86,7 @@ public class SingleSampleCompressor {
this.minMappingQuality = minMappingQuality;
this.slidingWindowCounter = 0;
this.minAltPValueToTriggerVariant = minAltPValueToTriggerVariant;
this.minAltProportionToTriggerVariant = minAltProportionToTriggerVariant;
this.minIndelProportionToTriggerVariant = minIndelProportionToTriggerVariant;
this.minBaseQual = minBaseQual;
this.downsampleStrategy = downsampleStrategy;
@ -114,7 +117,9 @@ public class SingleSampleCompressor {
}
if ( slidingWindow == null) { // this is the first read
slidingWindow = new SlidingWindow(read.getReferenceName(), read.getReferenceIndex(), contextSize, read.getHeader(), read.getReadGroup(), slidingWindowCounter, minAltPValueToTriggerVariant, minIndelProportionToTriggerVariant, minBaseQual, minMappingQuality, downsampleCoverage, downsampleStrategy, read.hasBaseIndelQualities());
slidingWindow = new SlidingWindow(read.getReferenceName(), read.getReferenceIndex(), contextSize, read.getHeader(), read.getReadGroup(),
slidingWindowCounter, minAltPValueToTriggerVariant, minAltProportionToTriggerVariant, minIndelProportionToTriggerVariant,
minBaseQual, minMappingQuality, downsampleCoverage, downsampleStrategy, read.hasBaseIndelQualities());
slidingWindowCounter++;
}

View File

@ -60,7 +60,6 @@ import org.broadinstitute.sting.utils.GenomeLoc;
import org.broadinstitute.sting.utils.UnvalidatingGenomeLoc;
import org.broadinstitute.sting.utils.collections.Pair;
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
import org.broadinstitute.sting.utils.recalibration.EventType;
import org.broadinstitute.sting.utils.sam.AlignmentStartWithNoTiesComparator;
import org.broadinstitute.sting.utils.sam.GATKSAMReadGroupRecord;
import org.broadinstitute.sting.utils.sam.GATKSAMRecord;
@ -78,8 +77,8 @@ import java.util.*;
public class SlidingWindow {
// Sliding Window data
final private ObjectAVLTreeSet<GATKSAMRecord> readsInWindow;
final private LinkedList<HeaderElement> windowHeader;
final protected PriorityQueue<GATKSAMRecord> readsInWindow;
final protected LinkedList<HeaderElement> windowHeader;
protected int contextSize; // the largest context size (between mismatches and indels)
protected String contig;
protected int contigIndex;
@ -99,6 +98,7 @@ public class SlidingWindow {
// Additional parameters
protected double MIN_ALT_PVALUE_TO_TRIGGER_VARIANT; // pvalue has to be greater than this value to trigger variant region due to mismatches
protected double MIN_ALT_PROPORTION_TO_TRIGGER_VARIANT; // proportion has to be greater than this value to trigger variant region due to mismatches
protected double MIN_INDEL_BASE_PROPORTION_TO_TRIGGER_VARIANT; // proportion has to be greater than this value to trigger variant region due to deletions
protected int MIN_BASE_QUAL_TO_COUNT; // qual has to be greater than or equal to this value
protected int MIN_MAPPING_QUALITY;
@ -146,28 +146,33 @@ public class SlidingWindow {
this.windowHeader = new LinkedList<HeaderElement>();
windowHeader.addFirst(new HeaderElement(startLocation));
this.readsInWindow = new ObjectAVLTreeSet<GATKSAMRecord>();
this.readsInWindow = new PriorityQueue<GATKSAMRecord>(100, new Comparator<GATKSAMRecord>() {
@Override
public int compare(GATKSAMRecord read1, GATKSAMRecord read2) {
return read1.getSoftEnd() - read2.getSoftEnd();
}
});
}
public SlidingWindow(final String contig, final int contigIndex, final int contextSize, final SAMFileHeader samHeader,
final GATKSAMReadGroupRecord readGroupAttribute, final int windowNumber,
final double minAltPValueToTriggerVariant, final double minIndelProportionToTriggerVariant,
final double minAltPValueToTriggerVariant, final double minAltProportionToTriggerVariant, final double minIndelProportionToTriggerVariant,
final int minBaseQual, final int minMappingQuality, final int downsampleCoverage,
final ReduceReads.DownsampleStrategy downsampleStrategy, final boolean hasIndelQualities) {
this.contextSize = contextSize;
this.downsampleCoverage = downsampleCoverage;
this.MIN_ALT_PVALUE_TO_TRIGGER_VARIANT = minAltPValueToTriggerVariant;
this.MIN_ALT_PROPORTION_TO_TRIGGER_VARIANT = minAltProportionToTriggerVariant;
this.MIN_INDEL_BASE_PROPORTION_TO_TRIGGER_VARIANT = minIndelProportionToTriggerVariant;
this.MIN_BASE_QUAL_TO_COUNT = minBaseQual;
this.MIN_MAPPING_QUALITY = minMappingQuality;
this.windowHeader = new LinkedList<HeaderElement>();
this.readsInWindow = new ObjectAVLTreeSet<GATKSAMRecord>(new Comparator<GATKSAMRecord>() {
this.readsInWindow = new PriorityQueue<GATKSAMRecord>(1000, new Comparator<GATKSAMRecord>() {
@Override
public int compare(GATKSAMRecord read1, GATKSAMRecord read2) {
final int difference = read1.getSoftEnd() - read2.getSoftEnd();
return difference != 0 ? difference : read1.getReadName().compareTo(read2.getReadName());
return read1.getSoftEnd() - read2.getSoftEnd();
}
});
@ -290,8 +295,8 @@ public class SlidingWindow {
regions = findVariantRegions(0, breakpoint, markedSites.getVariantSiteBitSet(), !forceClose);
}
while (!readsInWindow.isEmpty() && readsInWindow.first().getSoftEnd() < windowHeaderStartLocation) {
readsInWindow.remove(readsInWindow.first());
while (!readsInWindow.isEmpty() && readsInWindow.peek().getSoftEnd() < windowHeaderStartLocation) {
readsInWindow.poll();
}
return regions;
@ -353,7 +358,7 @@ public class SlidingWindow {
/**
* returns an array marked with variant and non-variant regions (it uses markVariantRegion to make the marks)
*
* @param stop check the window from start to stop (not-inclusive)
* @param stop check the window from start to stop (not-inclusive); given in global coordinates
*/
protected void markSites(final int stop) {
@ -363,21 +368,16 @@ public class SlidingWindow {
// copy over as many bits as we can from the previous calculation. Note that we can't trust the
// last (contextSize - 1) worth of bits because we may not have actually looked at variant regions there.
final int lastPositionMarked = markedSites.updateRegion(windowHeaderStartLocation, sizeOfMarkedRegion) - contextSize - 1;
final int locationToProcess = Math.min(lastPositionMarked, stop - contextSize);
final int locationToProcess = Math.max(windowHeaderStartLocation, Math.min(lastPositionMarked, stop - contextSize));
// update the iterator to the correct position
Iterator<HeaderElement> headerElementIterator = windowHeader.iterator();
for (int i = windowHeaderStartLocation; i < locationToProcess; i++) {
if (headerElementIterator.hasNext())
headerElementIterator.next();
}
final ListIterator<HeaderElement> headerElementIterator = windowHeader.listIterator(locationToProcess - windowHeaderStartLocation);
// process a contextSize worth of region from scratch in case there's a variant there
for (int i = locationToProcess; i < stop; i++) {
if (headerElementIterator.hasNext()) {
HeaderElement headerElement = headerElementIterator.next();
if (headerElement.isVariant(MIN_ALT_PVALUE_TO_TRIGGER_VARIANT, MIN_INDEL_BASE_PROPORTION_TO_TRIGGER_VARIANT))
if (headerElement.isVariant(MIN_ALT_PVALUE_TO_TRIGGER_VARIANT, MIN_ALT_PROPORTION_TO_TRIGGER_VARIANT, MIN_INDEL_BASE_PROPORTION_TO_TRIGGER_VARIANT))
markVariantRegion(i - windowHeaderStartLocation);
} else
@ -409,7 +409,7 @@ public class SlidingWindow {
}
/**
* Adds bases to the running consensus or filtered data accordingly
* Adds bases to the running consensus
*
* If adding a sequence with gaps, it will finalize multiple consensus reads and keep the last running consensus
*
@ -422,9 +422,10 @@ public class SlidingWindow {
@Requires({"start >= 0 && (end >= start || end == 0)"})
@Ensures("result != null")
protected ObjectArrayList<GATKSAMRecord> addToSyntheticReads(final LinkedList<HeaderElement> header, final int start, final int end, final SyntheticRead.StrandType strandType) {
ObjectArrayList<GATKSAMRecord> reads = new ObjectArrayList<GATKSAMRecord>();
if (start < end) {
ListIterator<HeaderElement> headerElementIterator = header.listIterator(start);
final ObjectArrayList<GATKSAMRecord> reads = new ObjectArrayList<GATKSAMRecord>();
if ( start < end ) {
final ListIterator<HeaderElement> headerElementIterator = header.listIterator(start);
if (!headerElementIterator.hasNext())
throw new ReviewedStingException(String.format("Requested to add to synthetic reads a region that contains no header element at index: %d - %d / %d", start, header.size(), end));
@ -432,37 +433,29 @@ public class SlidingWindow {
HeaderElement headerElement = headerElementIterator.next();
if (headerElement.hasConsensusData()) {
reads.addAll(finalizeAndAdd(ConsensusType.FILTERED));
int endOfConsensus = findNextNonConsensusElement(header, start, end);
addToRunningConsensus(header, start, endOfConsensus, strandType);
// find the end of the consecutive consensus data in the window
final int endOfConsensus = findNextNonConsensusElement(header, start, end);
if (endOfConsensus <= start)
throw new ReviewedStingException(String.format("next start is <= current start: (%d <= %d)", endOfConsensus, start));
// add to running consensus and recurse
addToRunningConsensus(header, start, endOfConsensus, strandType);
reads.addAll(addToSyntheticReads(header, endOfConsensus, end, strandType));
} else if (headerElement.hasFilteredData()) {
} else {
// add any outstanding consensus data
reads.addAll(finalizeAndAdd(ConsensusType.CONSENSUS));
int endOfFilteredData = findNextNonFilteredDataElement(header, start, end);
reads.addAll(addToFilteredData(header, start, endOfFilteredData, strandType));
if (endOfFilteredData <= start)
throw new ReviewedStingException(String.format("next start is <= current start: (%d <= %d)", endOfFilteredData, start));
reads.addAll(addToSyntheticReads(header, endOfFilteredData, end, strandType));
} else if (headerElement.isEmpty()) {
reads.addAll(finalizeAndAdd(ConsensusType.BOTH));
int endOfEmptyData = findNextNonEmptyElement(header, start, end);
// find the end of the consecutive empty data in the window
final int endOfEmptyData = findNextConsensusElement(header, start, end);
if (endOfEmptyData <= start)
throw new ReviewedStingException(String.format("next start is <= current start: (%d <= %d)", endOfEmptyData, start));
// recurse out of the empty region
reads.addAll(addToSyntheticReads(header, endOfEmptyData, end, strandType));
} else
throw new ReviewedStingException(String.format("Header Element %d is neither Consensus, Data or Empty. Something is wrong.", start));
}
}
return reads;
@ -474,24 +467,21 @@ public class SlidingWindow {
* @param type the synthetic reads you want to close
* @return a possibly null list of GATKSAMRecords generated by finalizing the synthetic reads
*/
private ObjectArrayList<GATKSAMRecord> finalizeAndAdd(ConsensusType type) {
GATKSAMRecord read = null;
ObjectArrayList<GATKSAMRecord> list = new ObjectArrayList<GATKSAMRecord>();
private ObjectArrayList<GATKSAMRecord> finalizeAndAdd(final ConsensusType type) {
switch (type) {
case CONSENSUS:
read = finalizeRunningConsensus();
break;
case FILTERED:
read = finalizeFilteredDataConsensus();
break;
case BOTH:
read = finalizeRunningConsensus();
if (read != null) list.add(read);
read = finalizeFilteredDataConsensus();
final ObjectArrayList<GATKSAMRecord> list = new ObjectArrayList<GATKSAMRecord>();
if ( type == ConsensusType.CONSENSUS || type == ConsensusType.BOTH ) {
final GATKSAMRecord read = finalizeRunningConsensus();
if ( read != null )
list.add(read);
}
if ( type == ConsensusType.FILTERED || type == ConsensusType.BOTH ) {
final GATKSAMRecord read = finalizeFilteredDataConsensus();
if ( read != null )
list.add(read);
}
if (read != null)
list.add(read);
return list;
}
@ -499,19 +489,145 @@ public class SlidingWindow {
/**
* Looks for the next position without consensus data
*
* @param start beginning of the filtered region
* @param upTo limit to search for another consensus element
* @param header the header to check
* @param start beginning of the filtered region
* @param upTo limit to search for another consensus element
* @return next position in local coordinates (relative to the windowHeader) with consensus data; otherwise, the start position
*/
private int findNextNonConsensusElement(LinkedList<HeaderElement> header, int start, int upTo) {
Iterator<HeaderElement> headerElementIterator = header.listIterator(start);
private int findNextNonConsensusElement(final LinkedList<HeaderElement> header, final int start, final int upTo) {
final Iterator<HeaderElement> headerElementIterator = header.listIterator(start);
int index = start;
while (index < upTo) {
if (!headerElementIterator.hasNext())
throw new ReviewedStingException("There are no more header elements in this window");
HeaderElement headerElement = headerElementIterator.next();
if (!headerElementIterator.next().hasConsensusData())
break;
index++;
}
return index;
}
/**
* Looks for the next position witho consensus data
*
* @param header the header to check
* @param start beginning of the filtered region
* @param upTo limit to search for another consensus element
* @return next position in local coordinates (relative to the windowHeader) with consensus data; otherwise, the start position
*/
private int findNextConsensusElement(final LinkedList<HeaderElement> header, final int start, final int upTo) {
final Iterator<HeaderElement> headerElementIterator = header.listIterator(start);
int index = start;
while (index < upTo) {
if (!headerElementIterator.hasNext())
throw new ReviewedStingException("There are no more header elements in this window");
if (headerElementIterator.next().hasConsensusData())
break;
index++;
}
return index;
}
/**
* Adds bases to the filtered data synthetic read.
*
* Different from the addToConsensus method, this method assumes a contiguous sequence of filteredData
* bases.
*
* @param header the window header
* @param start the first header index to add to consensus
* @param end the first header index NOT TO add to consensus
* @param strandType the strandedness that the synthetic read should be represented as having
*/
@Requires({"start >= 0 && (end >= start || end == 0)"})
private void addToRunningConsensus(final LinkedList<HeaderElement> header, final int start, final int end, final SyntheticRead.StrandType strandType) {
if (runningConsensus == null)
runningConsensus = new SyntheticRead(samHeader, readGroupAttribute, contig, contigIndex, consensusReadName + consensusCounter++, header.get(start).getLocation(), hasIndelQualities, strandType);
final Iterator<HeaderElement> headerElementIterator = header.listIterator(start);
for (int index = start; index < end; index++) {
if (!headerElementIterator.hasNext())
throw new ReviewedStingException("Requested to create a running consensus synthetic read from " + start + " to " + end + " but " + index + " does not exist");
final HeaderElement headerElement = headerElementIterator.next();
if (!headerElement.hasConsensusData())
throw new ReviewedStingException("No CONSENSUS data in " + index);
genericAddBaseToConsensus(runningConsensus, headerElement.getConsensusBaseCounts());
}
}
/**
* Adds bases to the running filtered data accordingly
*
* If adding a sequence with gaps, it will finalize multiple consensus reads and keep the last running consensus
*
* @param header the window header
* @param start the first header index to add to consensus
* @param end the first header index NOT TO add to consensus
* @return a non-null list of consensus reads generated by this call. Empty list if no consensus was generated.
*/
@Requires({"start >= 0 && (end >= start || end == 0)"})
@Ensures("result != null")
protected ObjectArrayList<GATKSAMRecord> addToFilteredReads(final LinkedList<HeaderElement> header, final int start, final int end) {
final ObjectArrayList<GATKSAMRecord> reads = new ObjectArrayList<GATKSAMRecord>();
if ( start < end ) {
final ListIterator<HeaderElement> headerElementIterator = header.listIterator(start);
if (!headerElementIterator.hasNext())
throw new ReviewedStingException(String.format("Requested to add to synthetic reads a region that contains no header element at index: %d - %d / %d", start, header.size(), end));
HeaderElement headerElement = headerElementIterator.next();
if (headerElement.hasFilteredData()) {
// find the end of the consecutive filtered data in the window
final int endOfFiltered = findNextNonFilteredElement(header, start, end);
if (endOfFiltered <= start)
throw new ReviewedStingException(String.format("next start is <= current start: (%d <= %d)", endOfFiltered, start));
// add to running filtered consensus and recurse
addToFilteredData(header, start, endOfFiltered);
reads.addAll(addToFilteredReads(header, endOfFiltered, end));
} else {
// add any outstanding filtered data
reads.addAll(finalizeAndAdd(ConsensusType.FILTERED));
// find the end of the consecutive empty data in the window
final int endOfEmptyData = findNextFilteredElement(header, start, end);
if (endOfEmptyData <= start)
throw new ReviewedStingException(String.format("next start is <= current start: (%d <= %d)", endOfEmptyData, start));
// recurse out of the empty region
reads.addAll(addToFilteredReads(header, endOfEmptyData, end));
}
}
return reads;
}
/**
* Looks for the next position without consensus data
*
* @param header the header to check
* @param start beginning of the filtered region
* @param upTo limit to search for another consensus element
* @return next position in local coordinates (relative to the windowHeader) with consensus data; otherwise, the start position
*/
private int findNextNonFilteredElement(final LinkedList<HeaderElement> header, final int start, final int upTo) {
final Iterator<HeaderElement> headerElementIterator = header.listIterator(start);
int index = start;
while (index < upTo) {
if (!headerElementIterator.hasNext())
throw new ReviewedStingException("There are no more header elements in this window");
if (!headerElementIterator.next().hasFilteredData())
break;
index++;
}
@ -519,43 +635,21 @@ public class SlidingWindow {
}
/**
* Looks for the next position without filtered data
* Looks for the next position witho consensus data
*
* @param start beginning of the region
* @param upTo limit to search for
* @return next position in local coordinates (relative to the windowHeader) with no filtered data; otherwise, the start position
* @param header the header to check
* @param start beginning of the filtered region
* @param upTo limit to search for another consensus element
* @return next position in local coordinates (relative to the windowHeader) with consensus data; otherwise, the start position
*/
private int findNextNonFilteredDataElement(LinkedList<HeaderElement> header, int start, int upTo) {
Iterator<HeaderElement> headerElementIterator = header.listIterator(start);
private int findNextFilteredElement(final LinkedList<HeaderElement> header, final int start, final int upTo) {
final Iterator<HeaderElement> headerElementIterator = header.listIterator(start);
int index = start;
while (index < upTo) {
if (!headerElementIterator.hasNext())
throw new ReviewedStingException("There are no more header elements in this window");
HeaderElement headerElement = headerElementIterator.next();
if (!headerElement.hasFilteredData() || headerElement.hasConsensusData())
break;
index++;
}
return index;
}
/**
* Looks for the next non-empty header element
*
* @param start beginning of the region
* @param upTo limit to search for
* @return next position in local coordinates (relative to the windowHeader) with non-empty element; otherwise, the start position
*/
private int findNextNonEmptyElement(LinkedList<HeaderElement> header, int start, int upTo) {
ListIterator<HeaderElement> headerElementIterator = header.listIterator(start);
int index = start;
while (index < upTo) {
if (!headerElementIterator.hasNext())
throw new ReviewedStingException("There are no more header elements in this window");
HeaderElement headerElement = headerElementIterator.next();
if (!headerElement.isEmpty())
if (headerElementIterator.next().hasFilteredData())
break;
index++;
}
@ -571,67 +665,26 @@ public class SlidingWindow {
* @param header the window header
* @param start the first header index to add to consensus
* @param end the first header index NOT TO add to consensus
* @param strandType the strandedness that the synthetic read should be represented as having
* @return a non-null list of GATKSAMRecords representing finalized filtered consensus data. Empty list if no consensus was generated.
*/
@Requires({"start >= 0 && (end >= start || end == 0)"})
@Ensures("result != null")
private ObjectArrayList<GATKSAMRecord> addToFilteredData(final LinkedList<HeaderElement> header, final int start, final int end, final SyntheticRead.StrandType strandType) {
ObjectArrayList<GATKSAMRecord> result = new ObjectArrayList<GATKSAMRecord>();
private void addToFilteredData(final LinkedList<HeaderElement> header, final int start, final int end) {
if (filteredDataConsensus == null)
filteredDataConsensus = new SyntheticRead(samHeader, readGroupAttribute, contig, contigIndex, filteredDataReadName + filteredDataConsensusCounter++, header.get(start).getLocation(), hasIndelQualities, strandType);
filteredDataConsensus = new SyntheticRead(samHeader, readGroupAttribute, contig, contigIndex, filteredDataReadName + filteredDataConsensusCounter++, header.get(start).getLocation(), hasIndelQualities, SyntheticRead.StrandType.STRANDLESS);
ListIterator<HeaderElement> headerElementIterator = header.listIterator(start);
for (int index = start; index < end; index++) {
if (!headerElementIterator.hasNext())
throw new ReviewedStingException("Requested to create a filtered data synthetic read from " + start + " to " + end + " but " + index + " does not exist");
HeaderElement headerElement = headerElementIterator.next();
if (headerElement.hasConsensusData())
throw new ReviewedStingException("Found consensus data inside region to add to filtered data.");
final HeaderElement headerElement = headerElementIterator.next();
if (!headerElement.hasFilteredData())
throw new ReviewedStingException("No filtered data in " + index);
if ( filteredDataConsensus.getRefStart() + filteredDataConsensus.size() != headerElement.getLocation() ) {
result.add(finalizeFilteredDataConsensus());
filteredDataConsensus = new SyntheticRead(samHeader, readGroupAttribute, contig, contigIndex, filteredDataReadName + filteredDataConsensusCounter++, headerElement.getLocation(), hasIndelQualities, strandType);
}
genericAddBaseToConsensus(filteredDataConsensus, headerElement.getFilteredBaseCounts());
}
return result;
}
/**
* Adds bases to the filtered data synthetic read.
*
* Different from the addToConsensus method, this method assumes a contiguous sequence of filteredData
* bases.
*
* @param header the window header
* @param start the first header index to add to consensus
* @param end the first header index NOT TO add to consensus
* @param strandType the strandedness that the synthetic read should be represented as having
*/
@Requires({"start >= 0 && (end >= start || end == 0)"})
private void addToRunningConsensus(final LinkedList<HeaderElement> header, final int start, final int end, final SyntheticRead.StrandType strandType) {
if (runningConsensus == null)
runningConsensus = new SyntheticRead(samHeader, readGroupAttribute, contig, contigIndex, consensusReadName + consensusCounter++, header.get(start).getLocation(), hasIndelQualities, strandType);
Iterator<HeaderElement> headerElementIterator = header.listIterator(start);
for (int index = start; index < end; index++) {
if (!headerElementIterator.hasNext())
throw new ReviewedStingException("Requested to create a running consensus synthetic read from " + start + " to " + end + " but " + index + " does not exist");
HeaderElement headerElement = headerElementIterator.next();
if (!headerElement.hasConsensusData())
throw new ReviewedStingException("No CONSENSUS data in " + index);
genericAddBaseToConsensus(runningConsensus, headerElement.getConsensusBaseCounts());
}
}
/**
@ -726,7 +779,7 @@ public class SlidingWindow {
for ( int i = start; i <= stop; i++ ) {
final int nAlleles = windowHeader.get(i).getNumberOfBaseAlleles(MIN_ALT_PVALUE_TO_TRIGGER_VARIANT);
final int nAlleles = windowHeader.get(i).getNumberOfBaseAlleles(MIN_ALT_PVALUE_TO_TRIGGER_VARIANT, MIN_ALT_PROPORTION_TO_TRIGGER_VARIANT);
// we will only work on diploid non-indel cases because we just don't want to handle/test other scenarios
if ( nAlleles > 2 || nAlleles == -1 )
@ -760,8 +813,8 @@ public class SlidingWindow {
if ( headerElement.getLocation() == positionToSkip )
continue;
if ( headerElement.hasSignificantSoftclips(MIN_ALT_PVALUE_TO_TRIGGER_VARIANT) ||
headerElement.getNumberOfBaseAlleles(MIN_ALT_PVALUE_TO_TRIGGER_VARIANT) > 1 )
if ( headerElement.hasSignificantSoftclips(MIN_ALT_PVALUE_TO_TRIGGER_VARIANT, MIN_ALT_PROPORTION_TO_TRIGGER_VARIANT) ||
headerElement.getNumberOfBaseAlleles(MIN_ALT_PVALUE_TO_TRIGGER_VARIANT, MIN_ALT_PROPORTION_TO_TRIGGER_VARIANT) > 1 )
return true;
}
@ -784,6 +837,7 @@ public class SlidingWindow {
final CloseVariantRegionResult result = new CloseVariantRegionResult(allReads.stopPerformed);
result.reads.addAll(downsampleCoverage > 0 ? downsampleVariantRegion(allReads.reads) : allReads.reads);
result.reads.addAll(addToSyntheticReads(windowHeader, 0, allReads.stopPerformed + 1, SyntheticRead.StrandType.STRANDLESS));
result.reads.addAll(addToFilteredReads(windowHeader, 0, allReads.stopPerformed + 1));
result.reads.addAll(finalizeAndAdd(ConsensusType.BOTH));
return result; // finalized reads will be downsampled if necessary
@ -914,6 +968,7 @@ public class SlidingWindow {
if (!windowHeader.isEmpty()) {
finalizedReads.addAll(addToSyntheticReads(windowHeader, 0, windowHeader.size(), SyntheticRead.StrandType.STRANDLESS));
finalizedReads.addAll(addToFilteredReads(windowHeader, 0, windowHeader.size()));
finalizedReads.addAll(finalizeAndAdd(ConsensusType.BOTH)); // if it ended in running consensus, finish it up
}
}
@ -983,7 +1038,7 @@ public class SlidingWindow {
// initialize the mapping from base (allele) to header
final Byte2IntMap alleleHeaderMap = new Byte2IntArrayMap(2);
for ( final BaseIndex allele : windowHeader.get(hetRefPosition).getAlleles(MIN_ALT_PVALUE_TO_TRIGGER_VARIANT) ) {
for ( final BaseIndex allele : windowHeader.get(hetRefPosition).getAlleles(MIN_ALT_PVALUE_TO_TRIGGER_VARIANT, MIN_ALT_PROPORTION_TO_TRIGGER_VARIANT) ) {
final int currentIndex = alleleHeaderMap.size();
if ( currentIndex > 1 )
throw new IllegalStateException("There are more than 2 alleles present when creating a diploid consensus");
@ -997,7 +1052,7 @@ public class SlidingWindow {
if ( alleleHeaderMap.size() != 2 )
throw new IllegalStateException("We expected to see 2 alleles when creating a diploid consensus but saw " + alleleHeaderMap.size());
final ObjectList<GATKSAMRecord> readsToRemoveFromHeader = new ObjectArrayList<GATKSAMRecord>();
final ObjectList<GATKSAMRecord> readsToRemove = new ObjectArrayList<GATKSAMRecord>();
for ( final GATKSAMRecord read : readsInWindow ) {
@ -1006,38 +1061,36 @@ public class SlidingWindow {
continue;
// remove all other reads from the read cache since we're going to use them here
readsInWindow.remove(read);
readsToRemove.add(read);
// if the read falls before the het position, we don't need to look at it
if ( read.getSoftEnd() < globalHetRefPosition )
// if the read falls before the het position or has low MQ, we don't need to look at it
if ( read.getSoftEnd() < globalHetRefPosition || read.getMappingQuality() < MIN_MAPPING_QUALITY)
continue;
// remove all spanning reads from the consensus header since we're going to incorporate them into a consensus here instead
removeFromHeader(windowHeader, read);
// make sure it meets the minimum mapping quality requirement (if not, we won't use it for the consensus)
if ( read.getMappingQuality() >= MIN_MAPPING_QUALITY ) {
// where on the read is the het position?
final int readPosOfHet = ReadUtils.getReadCoordinateForReferenceCoordinate(read, globalHetRefPosition, ReadUtils.ClippingTail.LEFT_TAIL);
// where on the read is the het position?
final int readPosOfHet = ReadUtils.getReadCoordinateForReferenceCoordinate(read, globalHetRefPosition, ReadUtils.ClippingTail.LEFT_TAIL);
// this is safe because indels are not supported
final byte base = read.getReadBases()[readPosOfHet];
// this is safe because indels are not supported
final byte base = read.getReadBases()[readPosOfHet];
final byte qual = read.getBaseQualities(EventType.BASE_SUBSTITUTION)[readPosOfHet];
// check which allele this read represents
final Integer allele = alleleHeaderMap.get(base);
// check which allele this read represents
final Integer allele = alleleHeaderMap.get(base);
// ignore the read if it represents a base that's not part of the consensus
if ( allele != null ) {
// add to the appropriate polyploid header
final SingleStrandConsensusData header = read.getReadNegativeStrandFlag() ? headersNegStrand[allele] : headersPosStrand[allele];
header.reads.add(read);
addToHeader(header.consensus, read);
}
// ignore the read if it represents a base that's not part of the consensus
if ( allele != null ) {
// add to the appropriate polyploid header
final SingleStrandConsensusData header = read.getReadNegativeStrandFlag() ? headersNegStrand[allele] : headersPosStrand[allele];
header.reads.add(read);
addToHeader(header.consensus, read);
}
}
for ( final GATKSAMRecord read : readsToRemove )
readsInWindow.remove(read);
// create the polyploid synthetic reads if we can
final ObjectList<GATKSAMRecord> hetReads = new ObjectArrayList<GATKSAMRecord>();
@ -1171,15 +1224,15 @@ public class SlidingWindow {
int readBaseIndex = 0;
HeaderElement headerElement;
for ( CigarElement cigarElement : read.getCigar().getCigarElements() ) {
for ( final CigarElement cigarElement : read.getCigar().getCigarElements() ) {
switch ( cigarElement.getOperator() ) {
case H:
break;
case I:
readBaseIndex += cigarElement.getLength();
// special case, if we are removing a read that starts in insertion and we don't have the previous header element anymore, don't worry about it.
if ( removeRead && locationIndex == 0 )
// special case, if we don't have the previous header element anymore, don't worry about it.
if ( locationIndex == 0 )
break;
// insertions are added to the base to the left (previous element)
@ -1200,9 +1253,8 @@ public class SlidingWindow {
headerElement.removeBase(BaseUtils.Base.D.base, mappingQuality, mappingQuality, mappingQuality, mappingQuality, MIN_BASE_QUAL_TO_COUNT, MIN_MAPPING_QUALITY, false);
else
headerElement.addBase(BaseUtils.Base.D.base, mappingQuality, mappingQuality, mappingQuality, mappingQuality, MIN_BASE_QUAL_TO_COUNT, MIN_MAPPING_QUALITY, false);
locationIndex++;
}
locationIndex += nDeletionBases;
break;
case S:
case M:
@ -1211,6 +1263,8 @@ public class SlidingWindow {
case X:
final int nBasesToAdd = cigarElement.getLength();
final boolean isSoftClip = cigarElement.getOperator() == CigarOperator.S;
final byte[] readBases = read.getReadBases();
final byte[] readQuals = read.getBaseQualities();
final boolean readHasIndelQuals = read.hasBaseIndelQualities();
final byte[] insertionQuals = readHasIndelQuals ? read.getBaseInsertionQualities() : null;
final byte[] deletionQuals = readHasIndelQuals ? read.getBaseDeletionQualities() : null;
@ -1219,14 +1273,15 @@ public class SlidingWindow {
headerElement = headerElementIterator.next();
final byte insertionQuality = readHasIndelQuals ? insertionQuals[readBaseIndex] : -1;
final byte deletionQuality = readHasIndelQuals ? deletionQuals[readBaseIndex] : -1;
if ( removeRead )
headerElement.removeBase(read.getReadBases()[readBaseIndex], read.getBaseQualities()[readBaseIndex], insertionQuality, deletionQuality, mappingQuality, MIN_BASE_QUAL_TO_COUNT, MIN_MAPPING_QUALITY, isSoftClip);
headerElement.removeBase(readBases[readBaseIndex], readQuals[readBaseIndex], insertionQuality, deletionQuality, mappingQuality, MIN_BASE_QUAL_TO_COUNT, MIN_MAPPING_QUALITY, isSoftClip);
else
headerElement.addBase(read.getReadBases()[readBaseIndex], read.getBaseQualities()[readBaseIndex], insertionQuality, deletionQuality, mappingQuality, MIN_BASE_QUAL_TO_COUNT, MIN_MAPPING_QUALITY, isSoftClip);
headerElement.addBase(readBases[readBaseIndex], readQuals[readBaseIndex], insertionQuality, deletionQuality, mappingQuality, MIN_BASE_QUAL_TO_COUNT, MIN_MAPPING_QUALITY, isSoftClip);
readBaseIndex++;
locationIndex++;
}
locationIndex += nBasesToAdd;
break;
default:
break;

View File

@ -102,6 +102,9 @@ public class AssessReducedQuals extends LocusWalker<GenomeLoc, GenomeLoc> implem
@Argument(fullName = "qual_epsilon", shortName = "epsilon", doc = "when |Quals_reduced_bam - Quals_original_bam| > (epsilon * Quals_original_bam) we output this interval", required = false)
public double qual_epsilon = 0.10;
@Argument(fullName = "exclude_low_mq", shortName = "excludeMQ", doc = "ignore reads with mapping quality below this number", required = false)
public int excludeMQ = 0;
@Output
protected PrintStream out;
@ -146,7 +149,7 @@ public class AssessReducedQuals extends LocusWalker<GenomeLoc, GenomeLoc> implem
}
private boolean isGoodRead(final PileupElement p) {
return !p.isDeletion() && (int)p.getQual() >= 15 && p.getMappingQual() >= 20;
return !p.isDeletion() && (int)p.getQual() >= 15 && p.getMappingQual() >= excludeMQ;
}
private int getTagIndex(final List<String> tags) {

View File

@ -179,7 +179,7 @@ public class BaseCountsUnitTest extends BaseTest {
BaseCounts counts = new BaseCounts();
for ( int qual : test.quals )
counts.incr(BaseIndex.A, (byte)qual, 20);
counts.incr(BaseIndex.A, (byte)qual, 20, false);
final int actualSum = (int)counts.getSumQuals((byte)'A');
final int expectedSum = qualSum(test.quals);

View File

@ -128,8 +128,8 @@ public class HeaderElementUnitTest extends BaseTest {
Assert.assertEquals(headerElement.hasConsensusData(), test.MQ >= minMappingQual);
Assert.assertEquals(headerElement.hasFilteredData(), test.MQ < minMappingQual);
Assert.assertEquals(headerElement.hasConsensusData() ? headerElement.getConsensusBaseCounts().getRMS() : headerElement.getFilteredBaseCounts().getRMS(), (double)test.MQ);
Assert.assertFalse(headerElement.isVariantFromMismatches(0.05));
Assert.assertEquals(headerElement.isVariant(0.05, 0.05), test.isClip);
Assert.assertFalse(headerElement.isVariantFromMismatches(0.05, 0.05));
Assert.assertEquals(headerElement.isVariant(0.05, 0.05, 0.05), test.isClip);
}
@ -177,7 +177,7 @@ public class HeaderElementUnitTest extends BaseTest {
headerElement.addBase(base.b, byte20, byte10, byte10, byte20, minBaseQual, minMappingQual, false);
}
final int nAllelesSeen = headerElement.getNumberOfBaseAlleles(test.pvalue);
final int nAllelesSeen = headerElement.getNumberOfBaseAlleles(test.pvalue, test.pvalue);
final int nAllelesExpected = calculateExpectedAlleles(test.counts, test.pvalue);
Assert.assertEquals(nAllelesSeen, nAllelesExpected);
@ -195,9 +195,14 @@ public class HeaderElementUnitTest extends BaseTest {
if ( count == 0 )
continue;
final double pvalue = MathUtils.binomialCumulativeProbability(total, 0, count);
final boolean isSignificant;
if ( count <= HeaderElement.MIN_COUNT_FOR_USING_PVALUE ) {
isSignificant = MathUtils.binomialCumulativeProbability(total, 0, count) > targetPvalue;
} else {
isSignificant = (count >= targetPvalue * total);
}
if ( pvalue > targetPvalue ) {
if ( isSignificant ) {
if ( index == BaseIndex.D.index )
return -1;
result++;

View File

@ -157,46 +157,44 @@ public class ReduceReadsIntegrationTest extends WalkerTest {
@Test(enabled = true)
public void testDefaultCompression() {
RRTest("testDefaultCompression ", L, "62f8cdb85a424e42e9c56f36302d1dba", false);
RRTest("testDefaultCompression ", L, "fa1cffc4539e0c20b818a11da5dba5b9", false);
}
@Test(enabled = true)
public void testDefaultCompressionWithKnowns() {
RRTest("testDefaultCompressionWithKnowns ", L, "874c0e0a54c3db67f5e9d7c0d45b7844", true);
RRTest("testDefaultCompressionWithKnowns ", L, "d1b5fbc402810d9cdc020bb3503f1325", true);
}
private final String intervals = "-L 20:10,100,000-10,100,500 -L 20:10,200,000-10,200,500 -L 20:10,300,000-10,300,500 -L 20:10,400,000-10,500,000 -L 20:10,500,050-10,500,060 -L 20:10,600,000-10,600,015 -L 20:10,700,000-10,700,110";
@Test(enabled = true)
public void testMultipleIntervals() {
RRTest("testMultipleIntervals ", intervals, "2e849f8324b27af36bae8cb9b01722e6", false);
RRTest("testMultipleIntervals ", intervals, "7e9dcd157ad742d4ebae7e56bc4af663", false);
}
@Test(enabled = true)
public void testMultipleIntervalsWithKnowns() {
RRTest("testMultipleIntervalsWithKnowns ", intervals, "71bc2167cc6916288bd34dcf099feebc", true);
RRTest("testMultipleIntervalsWithKnowns ", intervals, "dbb1e95e1bcad956701142afac763717", true);
}
final String highCompressionMD5 = "c83256fa2d6785d5188f50dd45c77e0f";
@Test(enabled = true)
public void testHighCompression() {
RRTest("testHighCompression ", " -cs 10 -min_pvalue 0.3 -mindel 0.3 " + L, highCompressionMD5, false);
RRTest("testHighCompression ", " -cs 10 -min_pvalue 0.3 -minvar 0.3 -mindel 0.3 " + L, "8f8fd1a53fa0789116f45e4cf2625906", false);
}
@Test(enabled = true)
public void testHighCompressionWithKnowns() {
RRTest("testHighCompressionWithKnowns ", " -cs 10 -min_pvalue 0.3 -mindel 0.3 " + L, highCompressionMD5, true);
RRTest("testHighCompressionWithKnowns ", " -cs 10 -min_pvalue 0.3 -minvar 0.3 -mindel 0.3 " + L, "52fd2a77802a4677b604abb18e15d96a", true);
}
@Test(enabled = true)
public void testLowCompression() {
RRTest("testLowCompression ", " -cs 30 -min_pvalue 0.001 -mindel 0.01 -minmap 5 -minqual 5 " + L, "a903558ef284381d74b0ad837deb19f6", false);
RRTest("testLowCompression ", " -cs 30 -min_pvalue 0.001 -minvar 0.01 -mindel 0.01 -minmap 5 -minqual 5 " + L, "79c6543d5ce84ebc2ca74404498edbd1", false);
}
@Test(enabled = true)
public void testLowCompressionWithKnowns() {
RRTest("testLowCompressionWithKnowns ", " -cs 30 -min_pvalue 0.001 -mindel 0.01 -minmap 5 -minqual 5 " + L, "a4c5aa158c6ebbc703134cbe2d48619c", true);
RRTest("testLowCompressionWithKnowns ", " -cs 30 -min_pvalue 0.001 -minvar 0.01 -mindel 0.01 -minmap 5 -minqual 5 " + L, "271aec358b309603291a974b5ba3bd60", true);
}
@Test(enabled = true)
@ -208,7 +206,7 @@ public class ReduceReadsIntegrationTest extends WalkerTest {
@Test(enabled = true)
public void testIndelCompression() {
final String md5 = "56154baed62be07008d3684a0a4c0996";
final String md5 = "d20e6012300898a0315c795cab7583d8";
RRTest("testIndelCompression ", " -cs 50 -L 20:10,100,500-10,100,600 ", md5, false);
RRTest("testIndelCompressionWithKnowns ", " -cs 50 -L 20:10,100,500-10,100,600 ", md5, true);
}
@ -216,25 +214,27 @@ public class ReduceReadsIntegrationTest extends WalkerTest {
@Test(enabled = true)
public void testFilteredDeletionCompression() {
String base = String.format("-T ReduceReads -npt -R %s -I %s ", REF, DELETION_BAM) + " -o %s ";
executeTest("testFilteredDeletionCompression", new WalkerTestSpec(base, Arrays.asList("bam"), Arrays.asList("d7655de41d90aecb716f79e32d53b2d1")));
executeTest("testFilteredDeletionCompression", new WalkerTestSpec(base, Arrays.asList("bam"), Arrays.asList("e5da09662708f562c0c617ba73cf4763")), "4f916da29d91852077f0a2fdbdd2c7f6");
}
private static final String COREDUCTION_QUALS_TEST_MD5 = "26d84a2bd549a01a63fcebf8847a1b7d";
@Test(enabled = true)
public void testCoReduction() {
String base = String.format("-T ReduceReads %s -npt -R %s -I %s -I %s", COREDUCTION_L, REF, COREDUCTION_BAM_A, COREDUCTION_BAM_B) + " -o %s ";
executeTest("testCoReduction", new WalkerTestSpec(base, Arrays.asList("bam"), Arrays.asList("fa549ba96ca0ce5fbf3553ba173167e8")));
executeTest("testCoReduction", new WalkerTestSpec(base, Arrays.asList("bam"), Arrays.asList("5f4d2c1d9c010dfd6865aeba7d0336fe")), COREDUCTION_QUALS_TEST_MD5);
}
@Test(enabled = true)
public void testCoReductionWithKnowns() {
String base = String.format("-T ReduceReads %s -npt -R %s -I %s -I %s -known %s", COREDUCTION_L, REF, COREDUCTION_BAM_A, COREDUCTION_BAM_B, DBSNP) + " -o %s ";
executeTest("testCoReductionWithKnowns", new WalkerTestSpec(base, Arrays.asList("bam"), Arrays.asList("9edcf09b21a4ae8d9fc25222bcb0486b")));
executeTest("testCoReductionWithKnowns", new WalkerTestSpec(base, Arrays.asList("bam"), Arrays.asList("ca48dd972bf57595c691972c0f887cb4")), COREDUCTION_QUALS_TEST_MD5);
}
@Test(enabled = true)
public void testInsertionsAtEdgeOfConsensus() {
String base = String.format("-T ReduceReads -npt -R %s -I %s ", REF, INSERTIONS_AT_EDGE_OF_CONSENSUS_BAM) + " -o %s ";
executeTest("testInsertionsAtEdgeOfConsensus", new WalkerTestSpec(base, Arrays.asList("bam"), Arrays.asList("27cc8f1a336b2d0a29855ceb8fc988b0")));
executeTest("testInsertionsAtEdgeOfConsensus", new WalkerTestSpec(base, Arrays.asList("bam"), Arrays.asList("760500a5b036b987f84099f45f26a804")));
}
/**
@ -248,7 +248,7 @@ public class ReduceReadsIntegrationTest extends WalkerTest {
@Test(enabled = true)
public void testAddingReadAfterTailingTheStash() {
String base = String.format("-T ReduceReads %s -npt -R %s -I %s", STASH_L, REF, STASH_BAM) + " -o %s ";
executeTest("testAddingReadAfterTailingTheStash", new WalkerTestSpec(base, Arrays.asList("bam"), Arrays.asList("34baf99904b676d5f132d3791030ed0a")), "3eab32c215ba68e75efd5ab7e9f7a2e7");
executeTest("testAddingReadAfterTailingTheStash", new WalkerTestSpec(base, Arrays.asList("bam"), Arrays.asList("67f8a3a647f8ec5212104bdaafd8c862")), "3eab32c215ba68e75efd5ab7e9f7a2e7");
}
/**
@ -259,7 +259,7 @@ public class ReduceReadsIntegrationTest extends WalkerTest {
public void testDivideByZero() {
String base = String.format("-T ReduceReads %s -npt -R %s -I %s", DIVIDEBYZERO_L, REF, DIVIDEBYZERO_BAM) + " -o %s ";
// we expect to lose coverage due to the downsampling so don't run the systematic tests
executeTestWithoutAdditionalRRTests("testDivideByZero", new WalkerTestSpec(base, Arrays.asList("bam"), Arrays.asList("985c4f15a1d45267abb2f6790267930d")));
executeTestWithoutAdditionalRRTests("testDivideByZero", new WalkerTestSpec(base, Arrays.asList("bam"), Arrays.asList("1663f35802f82333c5e15653e437ce2d")));
}
/**
@ -269,7 +269,7 @@ public class ReduceReadsIntegrationTest extends WalkerTest {
@Test(enabled = true)
public void testReadOffContig() {
String base = String.format("-T ReduceReads -npt -R %s -I %s ", REF, OFFCONTIG_BAM) + " -o %s ";
executeTest("testReadOffContig", new WalkerTestSpec(base, Arrays.asList("bam"), Arrays.asList("388ef48791965d637e4bdb45d5d7cf01")));
executeTest("testReadOffContig", new WalkerTestSpec(base, Arrays.asList("bam"), Arrays.asList("0ce693b4ff925998867664e4099f3248")));
}
/**
@ -279,7 +279,7 @@ public class ReduceReadsIntegrationTest extends WalkerTest {
public void testPairedReadsInVariantRegion() {
String base = String.format("-T ReduceReads -npt -R %s -I %s ", hg19Reference, BOTH_ENDS_OF_PAIR_IN_VARIANT_REGION_BAM) +
" -o %s --downsample_coverage 250 -dcov 50 ";
executeTest("testPairedReadsInVariantRegion", new WalkerTestSpec(base, Arrays.asList("bam"), Arrays.asList("cfa2588f5edf74c5ddf3d190f5ac6f2d")));
executeTest("testPairedReadsInVariantRegion", new WalkerTestSpec(base, Arrays.asList("bam"), Arrays.asList("7e7b358443827ca239db3b98f299aec6")), "2af063d1bd3c322b03405dbb3ecf59a9");
}
}

View File

@ -251,14 +251,15 @@ public class SlidingWindowUnitTest extends BaseTest {
}
private class ConsensusCreationTest {
public final int expectedNumberOfReads, expectedNumberOfReadsWithHetCompression;
public final int expectedNumberOfReads, expectedNumberOfReadsWithHetCompression, expectedNumberOfReadsAtDeepCoverage;
public final List<GATKSAMRecord> myReads = new ArrayList<GATKSAMRecord>(20);
public final String description;
private ConsensusCreationTest(final List<GenomeLoc> locs, final boolean readsShouldBeLowQuality, final boolean variantBaseShouldBeLowQuality, final int expectedNumberOfReads, final int expectedNumberOfReadsWithHetCompression) {
private ConsensusCreationTest(final List<GenomeLoc> locs, final boolean readsShouldBeLowQuality, final boolean variantBaseShouldBeLowQuality, final int expectedNumberOfReads, final int expectedNumberOfReadsWithHetCompression, final int expectedNumberOfReadsAtDeepCoverage) {
this.expectedNumberOfReads = expectedNumberOfReads;
this.expectedNumberOfReadsWithHetCompression = expectedNumberOfReadsWithHetCompression;
this.description = String.format("%d %d", expectedNumberOfReads, expectedNumberOfReadsWithHetCompression);
this.expectedNumberOfReadsAtDeepCoverage = expectedNumberOfReadsAtDeepCoverage;
this.description = String.format("%d %d %d", expectedNumberOfReads, expectedNumberOfReadsWithHetCompression, expectedNumberOfReadsAtDeepCoverage);
// first, add the basic reads to the collection
myReads.addAll(basicReads);
@ -268,10 +269,11 @@ public class SlidingWindowUnitTest extends BaseTest {
myReads.add(createVariantRead(loc, readsShouldBeLowQuality, variantBaseShouldBeLowQuality, CigarOperator.M));
}
private ConsensusCreationTest(final List<GenomeLoc> locs, final CigarOperator operator, final int expectedNumberOfReads, final int expectedNumberOfReadsWithHetCompression) {
private ConsensusCreationTest(final List<GenomeLoc> locs, final CigarOperator operator, final int expectedNumberOfReads, final int expectedNumberOfReadsWithHetCompression, final int expectedNumberOfReadsAtDeepCoverage) {
this.expectedNumberOfReads = expectedNumberOfReads;
this.expectedNumberOfReadsWithHetCompression = expectedNumberOfReadsWithHetCompression;
this.description = String.format("%s %d %d", operator.toString(), expectedNumberOfReads, expectedNumberOfReadsWithHetCompression);
this.expectedNumberOfReadsAtDeepCoverage = expectedNumberOfReadsAtDeepCoverage;
this.description = String.format("%s %d %d %d", operator.toString(), expectedNumberOfReads, expectedNumberOfReadsWithHetCompression, expectedNumberOfReadsAtDeepCoverage);
// first, add the basic reads to the collection
myReads.addAll(basicReads);
@ -319,46 +321,50 @@ public class SlidingWindowUnitTest extends BaseTest {
private static final GenomeLoc loc295 = new UnvalidatingGenomeLoc("1", 0, 1000295, 1000295);
private static final GenomeLoc loc309 = new UnvalidatingGenomeLoc("1", 0, 1000309, 1000309);
private static final GenomeLoc loc310 = new UnvalidatingGenomeLoc("1", 0, 1000310, 1000310);
private static final GenomeLoc loc320 = new UnvalidatingGenomeLoc("1", 0, 1000320, 1000320);
private static final GenomeLoc loc1100 = new UnvalidatingGenomeLoc("1", 0, 1001100, 1001100);
private static final int DEEP_COVERAGE_ITERATIONS = 100;
@DataProvider(name = "ConsensusCreation")
public Object[][] createConsensusCreationTestData() {
List<Object[]> tests = new ArrayList<Object[]>();
// test high quality reads and bases
tests.add(new Object[]{new ConsensusCreationTest(Arrays.<GenomeLoc>asList(), false, false, 1, 1)});
tests.add(new Object[]{new ConsensusCreationTest(Arrays.<GenomeLoc>asList(loc290), false, false, 9, 6)});
tests.add(new Object[]{new ConsensusCreationTest(Arrays.<GenomeLoc>asList(loc290, loc295), false, false, 10, 10)});
tests.add(new Object[]{new ConsensusCreationTest(Arrays.<GenomeLoc>asList(loc290, loc309), false, false, 10, 10)});
tests.add(new Object[]{new ConsensusCreationTest(Arrays.<GenomeLoc>asList(loc290, loc310), false, false, 11, 11)});
tests.add(new Object[]{new ConsensusCreationTest(Arrays.<GenomeLoc>asList(), false, false, 1, 1, 1)});
tests.add(new Object[]{new ConsensusCreationTest(Arrays.<GenomeLoc>asList(loc290), false, false, 9, 6, 5 + DEEP_COVERAGE_ITERATIONS)});
tests.add(new Object[]{new ConsensusCreationTest(Arrays.<GenomeLoc>asList(loc290, loc295), false, false, 10, 10, 2 + (8 * DEEP_COVERAGE_ITERATIONS))});
tests.add(new Object[]{new ConsensusCreationTest(Arrays.<GenomeLoc>asList(loc290, loc309), false, false, 10, 10, 2 + (8 * DEEP_COVERAGE_ITERATIONS))});
tests.add(new Object[]{new ConsensusCreationTest(Arrays.<GenomeLoc>asList(loc290, loc310), false, false, 11, 11, 2 + (9 * DEEP_COVERAGE_ITERATIONS))});
tests.add(new Object[]{new ConsensusCreationTest(Arrays.<GenomeLoc>asList(loc290, loc320), false, false, 11, 10, 4 + (6 * DEEP_COVERAGE_ITERATIONS))});
// test low quality reads
tests.add(new Object[]{new ConsensusCreationTest(Arrays.<GenomeLoc>asList(), true, false, 1, 1)});
tests.add(new Object[]{new ConsensusCreationTest(Arrays.<GenomeLoc>asList(loc290), true, false, 1, 1)});
tests.add(new Object[]{new ConsensusCreationTest(Arrays.<GenomeLoc>asList(loc290, loc295), true, false, 1, 1)});
tests.add(new Object[]{new ConsensusCreationTest(Arrays.<GenomeLoc>asList(loc290, loc309), true, false, 1, 1)});
tests.add(new Object[]{new ConsensusCreationTest(Arrays.<GenomeLoc>asList(loc290, loc310), true, false, 1, 1)});
tests.add(new Object[]{new ConsensusCreationTest(Arrays.<GenomeLoc>asList(), true, false, 1, 1, 1)});
tests.add(new Object[]{new ConsensusCreationTest(Arrays.<GenomeLoc>asList(loc290), true, false, 2, 2, 2)});
tests.add(new Object[]{new ConsensusCreationTest(Arrays.<GenomeLoc>asList(loc290, loc295), true, false, 2, 2, 2)});
tests.add(new Object[]{new ConsensusCreationTest(Arrays.<GenomeLoc>asList(loc290, loc309), true, false, 2, 2, 2)});
tests.add(new Object[]{new ConsensusCreationTest(Arrays.<GenomeLoc>asList(loc290, loc310), true, false, 2, 2, 2)});
// test low quality bases
tests.add(new Object[]{new ConsensusCreationTest(Arrays.<GenomeLoc>asList(), false, true, 1, 1)});
tests.add(new Object[]{new ConsensusCreationTest(Arrays.<GenomeLoc>asList(loc290), false, true, 1, 1)});
tests.add(new Object[]{new ConsensusCreationTest(Arrays.<GenomeLoc>asList(loc290, loc295), false, true, 1, 1)});
tests.add(new Object[]{new ConsensusCreationTest(Arrays.<GenomeLoc>asList(loc290, loc309), false, true, 1, 1)});
tests.add(new Object[]{new ConsensusCreationTest(Arrays.<GenomeLoc>asList(loc290, loc310), false, true, 1, 1)});
tests.add(new Object[]{new ConsensusCreationTest(Arrays.<GenomeLoc>asList(), false, true, 1, 1, 1)});
tests.add(new Object[]{new ConsensusCreationTest(Arrays.<GenomeLoc>asList(loc290), false, true, 1, 1, 1)});
tests.add(new Object[]{new ConsensusCreationTest(Arrays.<GenomeLoc>asList(loc290, loc295), false, true, 1, 1, 1)});
tests.add(new Object[]{new ConsensusCreationTest(Arrays.<GenomeLoc>asList(loc290, loc309), false, true, 1, 1, 1)});
tests.add(new Object[]{new ConsensusCreationTest(Arrays.<GenomeLoc>asList(loc290, loc310), false, true, 1, 1, 1)});
// test mixture
tests.add(new Object[]{new ConsensusCreationTest(Arrays.<GenomeLoc>asList(loc1100), true, false, 2, 2)});
tests.add(new Object[]{new ConsensusCreationTest(Arrays.<GenomeLoc>asList(loc1100), false, true, 1, 1)});
tests.add(new Object[]{new ConsensusCreationTest(Arrays.<GenomeLoc>asList(loc1100), true, false, 2, 2, 2)});
tests.add(new Object[]{new ConsensusCreationTest(Arrays.<GenomeLoc>asList(loc1100), false, true, 1, 1, 1)});
// test I/D operators
tests.add(new Object[]{new ConsensusCreationTest(Arrays.<GenomeLoc>asList(loc290), CigarOperator.D, 9, 9)});
tests.add(new Object[]{new ConsensusCreationTest(Arrays.<GenomeLoc>asList(loc290, loc295), CigarOperator.D, 10, 10)});
tests.add(new Object[]{new ConsensusCreationTest(Arrays.<GenomeLoc>asList(loc290, loc309), CigarOperator.D, 10, 10)});
tests.add(new Object[]{new ConsensusCreationTest(Arrays.<GenomeLoc>asList(loc290, loc310), CigarOperator.D, 11, 11)});
tests.add(new Object[]{new ConsensusCreationTest(Arrays.<GenomeLoc>asList(loc290), CigarOperator.I, 9, 9)});
tests.add(new Object[]{new ConsensusCreationTest(Arrays.<GenomeLoc>asList(loc290, loc295), CigarOperator.I, 10, 10)});
tests.add(new Object[]{new ConsensusCreationTest(Arrays.<GenomeLoc>asList(loc290, loc309), CigarOperator.I, 10, 10)});
tests.add(new Object[]{new ConsensusCreationTest(Arrays.<GenomeLoc>asList(loc290, loc310), CigarOperator.I, 11, 11)});
tests.add(new Object[]{new ConsensusCreationTest(Arrays.<GenomeLoc>asList(loc290), CigarOperator.D, 9, 9, 2 + (7 * DEEP_COVERAGE_ITERATIONS))});
tests.add(new Object[]{new ConsensusCreationTest(Arrays.<GenomeLoc>asList(loc290, loc295), CigarOperator.D, 10, 10, 2 + (8 * DEEP_COVERAGE_ITERATIONS))});
tests.add(new Object[]{new ConsensusCreationTest(Arrays.<GenomeLoc>asList(loc290, loc309), CigarOperator.D, 10, 10, 2 + (8 * DEEP_COVERAGE_ITERATIONS))});
tests.add(new Object[]{new ConsensusCreationTest(Arrays.<GenomeLoc>asList(loc290, loc310), CigarOperator.D, 11, 11, 2 + (9 * DEEP_COVERAGE_ITERATIONS))});
tests.add(new Object[]{new ConsensusCreationTest(Arrays.<GenomeLoc>asList(loc290), CigarOperator.I, 9, 9, 2 + (7 * DEEP_COVERAGE_ITERATIONS))});
tests.add(new Object[]{new ConsensusCreationTest(Arrays.<GenomeLoc>asList(loc290, loc295), CigarOperator.I, 10, 10, 2 + (8 * DEEP_COVERAGE_ITERATIONS))});
tests.add(new Object[]{new ConsensusCreationTest(Arrays.<GenomeLoc>asList(loc290, loc309), CigarOperator.I, 10, 10, 2 + (8 * DEEP_COVERAGE_ITERATIONS))});
tests.add(new Object[]{new ConsensusCreationTest(Arrays.<GenomeLoc>asList(loc290, loc310), CigarOperator.I, 11, 11, 2 + (9 * DEEP_COVERAGE_ITERATIONS))});
return tests.toArray(new Object[][]{});
}
@ -368,14 +374,14 @@ public class SlidingWindowUnitTest extends BaseTest {
final ObjectAVLTreeSet<GenomeLoc> knownSNPs = new ObjectAVLTreeSet<GenomeLoc>();
// test WITHOUT het compression
SlidingWindow slidingWindow = new SlidingWindow("1", 0, 10, header, new GATKSAMReadGroupRecord("test"), 0, 0.05, 0.05, 20, 20, 100, ReduceReads.DownsampleStrategy.Normal, false);
SlidingWindow slidingWindow = new SlidingWindow("1", 0, 10, header, new GATKSAMReadGroupRecord("test"), 0, 0.05, 0.05, 0.05, 20, 20, 100, ReduceReads.DownsampleStrategy.Normal, false);
for ( final GATKSAMRecord read : test.myReads )
slidingWindow.addRead(read);
Pair<ObjectSet<GATKSAMRecord>, CompressionStash> result = slidingWindow.close(knownSNPs); // currently empty
Assert.assertEquals(result.getFirst().size(), test.expectedNumberOfReads);
// test WITH het compression at KNOWN sites
slidingWindow = new SlidingWindow("1", 0, 10, header, new GATKSAMReadGroupRecord("test"), 0, 0.05, 0.05, 20, 20, 100, ReduceReads.DownsampleStrategy.Normal, false);
slidingWindow = new SlidingWindow("1", 0, 10, header, new GATKSAMReadGroupRecord("test"), 0, 0.05, 0.05, 0.05, 20, 20, 100, ReduceReads.DownsampleStrategy.Normal, false);
for ( final GATKSAMRecord read : test.myReads )
slidingWindow.addRead(read);
for ( int i = 0; i < 1200; i++ )
@ -384,11 +390,28 @@ public class SlidingWindowUnitTest extends BaseTest {
Assert.assertEquals(result.getFirst().size(), test.expectedNumberOfReadsWithHetCompression);
// test WITH het compression at ALL sites
slidingWindow = new SlidingWindow("1", 0, 10, header, new GATKSAMReadGroupRecord("test"), 0, 0.05, 0.05, 20, 20, 100, ReduceReads.DownsampleStrategy.Normal, false);
slidingWindow = new SlidingWindow("1", 0, 10, header, new GATKSAMReadGroupRecord("test"), 0, 0.05, 0.05, 0.05, 20, 20, 100, ReduceReads.DownsampleStrategy.Normal, false);
for ( final GATKSAMRecord read : test.myReads )
slidingWindow.addRead(read);
result = slidingWindow.close(null);
Assert.assertEquals(result.getFirst().size(), test.expectedNumberOfReadsWithHetCompression);
// test with deep coverage
slidingWindow = new SlidingWindow("1", 0, 10, header, new GATKSAMReadGroupRecord("test"), 0, 0.05, 0.05, 0.05, 20, 20, 0, ReduceReads.DownsampleStrategy.Normal, false);
for ( int i = 0; i < DEEP_COVERAGE_ITERATIONS; i++ ) {
for ( final GATKSAMRecord read : test.myReads ) {
final GATKSAMRecord copy = ArtificialSAMUtils.createArtificialRead(header, read.getReadName() + "_" + (i+1), 0, read.getAlignmentStart(), readLength);
copy.setReadBases(read.getReadBases());
copy.setBaseQualities(read.getBaseQualities());
copy.setMappingQuality(read.getMappingQuality());
copy.setReadNegativeStrandFlag(read.getReadNegativeStrandFlag());
if ( read.getCigar() != null )
copy.setCigar(read.getCigar());
slidingWindow.addRead(copy);
}
}
result = slidingWindow.close(null);
Assert.assertEquals(result.getFirst().size(), test.expectedNumberOfReadsAtDeepCoverage);
}
@Test
@ -412,14 +435,14 @@ public class SlidingWindowUnitTest extends BaseTest {
final ObjectAVLTreeSet<GenomeLoc> knownSNPs = new ObjectAVLTreeSet<GenomeLoc>();
// test WITHOUT het compression
SlidingWindow slidingWindow = new SlidingWindow("1", 0, 10, header, new GATKSAMReadGroupRecord("test"), 0, 0.1, 0.05, 20, 20, 100, ReduceReads.DownsampleStrategy.Normal, false);
SlidingWindow slidingWindow = new SlidingWindow("1", 0, 10, header, new GATKSAMReadGroupRecord("test"), 0, 0.1, 0.05, 0.05, 20, 20, 100, ReduceReads.DownsampleStrategy.Normal, false);
for ( final GATKSAMRecord read : myReads )
slidingWindow.addRead(read);
Pair<ObjectSet<GATKSAMRecord>, CompressionStash> result = slidingWindow.close(knownSNPs); // currently empty
Assert.assertEquals(result.getFirst().size(), totalNumReads); // no compression at all
// test WITH het compression at KNOWN sites
slidingWindow = new SlidingWindow("1", 0, 10, header, new GATKSAMReadGroupRecord("test"), 0, 0.1, 0.05, 20, 20, 100, ReduceReads.DownsampleStrategy.Normal, false);
slidingWindow = new SlidingWindow("1", 0, 10, header, new GATKSAMReadGroupRecord("test"), 0, 0.1, 0.05, 0.05, 20, 20, 100, ReduceReads.DownsampleStrategy.Normal, false);
for ( final GATKSAMRecord read : myReads )
slidingWindow.addRead(read);
for ( int i = 0; i < readLength; i++ )
@ -428,13 +451,59 @@ public class SlidingWindowUnitTest extends BaseTest {
Assert.assertEquals(result.getFirst().size(), totalNumReads); // no compression at all
// test WITH het compression at ALL sites
slidingWindow = new SlidingWindow("1", 0, 10, header, new GATKSAMReadGroupRecord("test"), 0, 0.1, 0.05, 20, 20, 100, ReduceReads.DownsampleStrategy.Normal, false);
slidingWindow = new SlidingWindow("1", 0, 10, header, new GATKSAMReadGroupRecord("test"), 0, 0.1, 0.05, 0.05, 20, 20, 100, ReduceReads.DownsampleStrategy.Normal, false);
for ( final GATKSAMRecord read : myReads )
slidingWindow.addRead(read);
result = slidingWindow.close(knownSNPs);
Assert.assertEquals(result.getFirst().size(), totalNumReads); // no compression at all
}
@Test
public void testAddingReadPairWithSameCoordinates() {
final SlidingWindow slidingWindow = new SlidingWindow("1", 0, 10);
final GATKSAMRecord read1 = ArtificialSAMUtils.createArtificialRead(header, "basicRead", 0, globalStartPosition, 1);
read1.setReadBases(new byte[]{(byte)'A'});
read1.setBaseQualities(new byte[]{(byte)'A'});
read1.setMappingQuality(30);
read1.setReadNegativeStrandFlag(false);
slidingWindow.addRead(read1);
final GATKSAMRecord read2 = ArtificialSAMUtils.createArtificialRead(header, "basicRead", 0, globalStartPosition, 1);
read2.setReadBases(new byte[]{(byte)'A'});
read2.setBaseQualities(new byte[]{(byte)'A'});
read2.setMappingQuality(30);
read2.setReadNegativeStrandFlag(true);
slidingWindow.addRead(read2);
Assert.assertEquals(slidingWindow.readsInWindow.size(), 2);
}
@Test
public void testOnlySpanningReadHasLowQual() {
final SlidingWindow slidingWindow = new SlidingWindow("1", 0, 10, header, new GATKSAMReadGroupRecord("test"), 0, 0.1, 0.05, 0.05, 20, 20, 100, ReduceReads.DownsampleStrategy.Normal, false);
final GATKSAMRecord read1 = ArtificialSAMUtils.createArtificialRead(header, "basicRead1", 0, globalStartPosition, 100);
final GATKSAMRecord read2 = ArtificialSAMUtils.createArtificialRead(header, "basicRead2", 0, globalStartPosition + 50, 100);
final byte[] bases = Utils.dupBytes((byte) 'A', readLength);
read1.setReadBases(bases);
read2.setReadBases(bases);
final byte[] baseQuals = Utils.dupBytes((byte) 30, readLength);
baseQuals[80] = (byte)10;
read1.setBaseQualities(baseQuals);
read2.setBaseQualities(baseQuals);
read1.setMappingQuality(30);
read2.setMappingQuality(30);
slidingWindow.addRead(read1);
slidingWindow.addRead(read2);
Assert.assertEquals(slidingWindow.close(null).getFirst().size(), 1);
}
///////////////////////////////////////////////////////////
//// This section tests the downsampling functionality ////
@ -452,7 +521,7 @@ public class SlidingWindowUnitTest extends BaseTest {
@Test(dataProvider = "Downsampling", enabled = true)
public void testDownsamplingTest(final int dcov) {
final SlidingWindow slidingWindow = new SlidingWindow("1", 0, 10, header, new GATKSAMReadGroupRecord("test"), 0, 0.05, 0.05, 20, 20, dcov, ReduceReads.DownsampleStrategy.Normal, false);
final SlidingWindow slidingWindow = new SlidingWindow("1", 0, 10, header, new GATKSAMReadGroupRecord("test"), 0, 0.05, 0.05, 0.05, 20, 20, dcov, ReduceReads.DownsampleStrategy.Normal, false);
final ObjectList<GATKSAMRecord> result = slidingWindow.downsampleVariantRegion(basicReads);
Assert.assertEquals(result.size(), Math.min(dcov, basicReads.size()));
@ -500,7 +569,7 @@ public class SlidingWindowUnitTest extends BaseTest {
@Test(dataProvider = "ConsensusQuals", enabled = true)
public void testConsensusQualsTest(QualsTest test) {
final SlidingWindow slidingWindow = new SlidingWindow("1", 0, 10, header, new GATKSAMReadGroupRecord("test"), 0, 0.05, 0.05, minUsableConsensusQual, 20, 100, ReduceReads.DownsampleStrategy.Normal, false);
final SlidingWindow slidingWindow = new SlidingWindow("1", 0, 10, header, new GATKSAMReadGroupRecord("test"), 0, 0.05, 0.05, 0.05, minUsableConsensusQual, 20, 100, ReduceReads.DownsampleStrategy.Normal, false);
for ( final GATKSAMRecord read : test.myReads )
slidingWindow.addRead(read);
final Pair<ObjectSet<GATKSAMRecord>, CompressionStash> result = slidingWindow.close(new ObjectAVLTreeSet<GenomeLoc>());
@ -569,7 +638,7 @@ public class SlidingWindowUnitTest extends BaseTest {
read.setBaseQualities(Utils.dupBytes((byte) 30, readLength));
read.setMappingQuality(30);
final SlidingWindow slidingWindow = new SlidingWindow("1", 0, 10, header, new GATKSAMReadGroupRecord("test"), 0, 0.05, 0.05, 20, 20, 10, ReduceReads.DownsampleStrategy.Normal, false);
final SlidingWindow slidingWindow = new SlidingWindow("1", 0, 10, header, new GATKSAMReadGroupRecord("test"), 0, 0.05, 0.05, 0.05, 20, 20, 10, ReduceReads.DownsampleStrategy.Normal, false);
int newIndex = slidingWindow.createNewHeaderElements(windowHeader, read, start);
Assert.assertEquals(newIndex, start > 0 ? start : 0);
@ -613,7 +682,7 @@ public class SlidingWindowUnitTest extends BaseTest {
read.setMappingQuality(30);
// add the read
final SlidingWindow slidingWindow = new SlidingWindow("1", 0, 10, header, new GATKSAMReadGroupRecord("test"), 0, 0.05, 0.05, 20, 20, 10, ReduceReads.DownsampleStrategy.Normal, false);
final SlidingWindow slidingWindow = new SlidingWindow("1", 0, 10, header, new GATKSAMReadGroupRecord("test"), 0, 0.05, 0.05, 0.05, 20, 20, 10, ReduceReads.DownsampleStrategy.Normal, false);
slidingWindow.actuallyUpdateHeaderForRead(windowHeader, read, false, start);
for ( int i = 0; i < start; i++ )
Assert.assertEquals(windowHeader.get(i).getConsensusBaseCounts().countOfBase(BaseUtils.Base.A.base), 0);
@ -628,7 +697,6 @@ public class SlidingWindowUnitTest extends BaseTest {
Assert.assertEquals(windowHeader.get(i).getConsensusBaseCounts().countOfBase(BaseUtils.Base.A.base), 0);
}
//////////////////////////////////////////////////////////////////////////////////
//// This section tests functionality related to polyploid consensus creation ////
//////////////////////////////////////////////////////////////////////////////////
@ -691,7 +759,7 @@ public class SlidingWindowUnitTest extends BaseTest {
read.setMappingQuality(30);
// add the read
final SlidingWindow slidingWindow = new SlidingWindow("1", 0, 10, header, new GATKSAMReadGroupRecord("test"), 0, 0.05, 0.05, 20, 20, 10, ReduceReads.DownsampleStrategy.Normal, false);
final SlidingWindow slidingWindow = new SlidingWindow("1", 0, 10, header, new GATKSAMReadGroupRecord("test"), 0, 0.05, 0.05, 0.05, 20, 20, 10, ReduceReads.DownsampleStrategy.Normal, false);
slidingWindow.actuallyUpdateHeaderForRead(windowHeader, read, false, 0);
// set up and add a soft-clipped read if requested

View File

@ -383,7 +383,7 @@ public class MathUtils {
for (int hits = k_start; hits <= k_end; hits++) {
prevProb = cumProb;
double probability = binomialProbability(n, hits);
final double probability = binomialProbability(n, hits);
cumProb += probability;
if (probability > 0 && cumProb - prevProb < probability / 2) { // loss of precision
probCache = probCache.add(new BigDecimal(prevProb));