Merge pull request #172 from broadinstitute/eb_reducereads_het_improvements
Eb reducereads het improvements
This commit is contained in:
commit
b27706859a
|
|
@ -69,10 +69,30 @@ public class BaseAndQualsCounts extends BaseCounts {
|
|||
private long sumInsertionQual_N = 0;
|
||||
private long sumDeletionQual_N = 0;
|
||||
|
||||
/*
|
||||
* Increments the count
|
||||
*
|
||||
* @param base the base
|
||||
* @param baseQual the base quality
|
||||
* @param insQual the insertion quality
|
||||
* @param delQual the deletion quality
|
||||
* @param baseMappingQual the mapping quality
|
||||
* @param isLowQualBase true if the base is low quality
|
||||
*/
|
||||
public void incr(final byte base, final byte baseQual, final byte insQual, final byte delQual, final int baseMappingQual, final boolean isLowQualBase) {
|
||||
// if we already have high quality bases, ignore low quality ones
|
||||
if ( isLowQualBase && !isLowQuality() )
|
||||
return;
|
||||
|
||||
// if this is a high quality base then remove any low quality bases and start from scratch
|
||||
if ( !isLowQualBase && isLowQuality() ) {
|
||||
if ( totalCount() > 0 )
|
||||
clear();
|
||||
setLowQuality(false);
|
||||
}
|
||||
|
||||
public void incr(final byte base, final byte baseQual, final byte insQual, final byte delQual) {
|
||||
final BaseIndex i = BaseIndex.byteToBase(base);
|
||||
super.incr(i, baseQual);
|
||||
super.incr(i, baseQual, baseMappingQual);
|
||||
switch (i) {
|
||||
case A: sumInsertionQual_A += insQual; sumDeletionQual_A += delQual; break;
|
||||
case C: sumInsertionQual_C += insQual; sumDeletionQual_C += delQual; break;
|
||||
|
|
@ -84,9 +104,23 @@ public class BaseAndQualsCounts extends BaseCounts {
|
|||
}
|
||||
}
|
||||
|
||||
public void decr(final byte base, final byte baseQual, final byte insQual, final byte delQual) {
|
||||
/*
|
||||
* Decrements the count
|
||||
*
|
||||
* @param base the base
|
||||
* @param baseQual the base quality
|
||||
* @param insQual the insertion quality
|
||||
* @param delQual the deletion quality
|
||||
* @param baseMappingQual the mapping quality
|
||||
* @param isLowQualBase true if the base is low quality
|
||||
*/
|
||||
public void decr(final byte base, final byte baseQual, final byte insQual, final byte delQual, final int baseMappingQual, final boolean isLowQualBase) {
|
||||
// if this is not the right type of base, ignore it
|
||||
if ( isLowQualBase != isLowQuality() )
|
||||
return;
|
||||
|
||||
final BaseIndex i = BaseIndex.byteToBase(base);
|
||||
super.decr(i, baseQual);
|
||||
super.decr(i, baseQual, baseMappingQual);
|
||||
switch (i) {
|
||||
case A: sumInsertionQual_A -= insQual; sumDeletionQual_A -= delQual; break;
|
||||
case C: sumInsertionQual_C -= insQual; sumDeletionQual_C -= delQual; break;
|
||||
|
|
@ -131,4 +165,13 @@ public class BaseAndQualsCounts extends BaseCounts {
|
|||
default: throw new IllegalArgumentException(base.name());
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Clears out all stored data in this object
|
||||
*/
|
||||
public void clear() {
|
||||
super.clear();
|
||||
sumInsertionQual_A = sumInsertionQual_C = sumInsertionQual_G = sumInsertionQual_T = sumInsertionQual_D = sumInsertionQual_I = sumInsertionQual_N = 0;
|
||||
sumDeletionQual_A = sumDeletionQual_C = sumDeletionQual_G = sumDeletionQual_T = sumDeletionQual_D = sumDeletionQual_I = sumDeletionQual_N = 0;
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -48,6 +48,8 @@ package org.broadinstitute.sting.gatk.walkers.compression.reducereads;
|
|||
|
||||
import com.google.java.contract.Ensures;
|
||||
import com.google.java.contract.Requires;
|
||||
import it.unimi.dsi.fastutil.ints.IntArrayList;
|
||||
import org.broadinstitute.sting.utils.MathUtils;
|
||||
|
||||
|
||||
/**
|
||||
|
|
@ -78,6 +80,8 @@ import com.google.java.contract.Requires;
|
|||
private int count_N = 0;
|
||||
private int sumQual_N = 0;
|
||||
private int totalCount = 0; // keeps track of total count since this is requested so often
|
||||
private final IntArrayList mappingQualities = new IntArrayList(); // keeps the mapping quality of each read that contributed to this
|
||||
private boolean isLowQuality = true; // this object represents low quality bases unless we are told otherwise
|
||||
|
||||
|
||||
public static BaseCounts createWithCounts(int[] countsACGT) {
|
||||
|
|
@ -100,6 +104,7 @@ import com.google.java.contract.Requires;
|
|||
this.count_I += other.count_I;
|
||||
this.count_N += other.count_N;
|
||||
this.totalCount += other.totalCount;
|
||||
this.mappingQualities.addAll(other.mappingQualities);
|
||||
}
|
||||
|
||||
@Requires("other != null")
|
||||
|
|
@ -112,6 +117,7 @@ import com.google.java.contract.Requires;
|
|||
this.count_I -= other.count_I;
|
||||
this.count_N -= other.count_N;
|
||||
this.totalCount -= other.totalCount;
|
||||
this.mappingQualities.removeAll(other.mappingQualities);
|
||||
}
|
||||
|
||||
@Ensures("totalCount() == old(totalCount()) || totalCount() == old(totalCount()) + 1")
|
||||
|
|
@ -120,7 +126,7 @@ import com.google.java.contract.Requires;
|
|||
}
|
||||
|
||||
@Ensures("totalCount() == old(totalCount()) || totalCount() == old(totalCount()) + 1")
|
||||
public void incr(final BaseIndex base, final byte qual) {
|
||||
public void incr(final BaseIndex base, final byte qual, final int mappingQuality) {
|
||||
switch (base) {
|
||||
case A: ++count_A; sumQual_A += qual; break;
|
||||
case C: ++count_C; sumQual_C += qual; break;
|
||||
|
|
@ -131,6 +137,7 @@ import com.google.java.contract.Requires;
|
|||
case N: ++count_N; sumQual_N += qual; break;
|
||||
}
|
||||
++totalCount;
|
||||
mappingQualities.add(mappingQuality);
|
||||
}
|
||||
|
||||
@Ensures("totalCount() == old(totalCount()) || totalCount() == old(totalCount()) - 1")
|
||||
|
|
@ -152,7 +159,7 @@ import com.google.java.contract.Requires;
|
|||
}
|
||||
|
||||
@Ensures("totalCount() == old(totalCount()) || totalCount() == old(totalCount()) - 1")
|
||||
public void decr(final BaseIndex base, final byte qual) {
|
||||
public void decr(final BaseIndex base, final byte qual, final int mappingQuality) {
|
||||
switch (base) {
|
||||
case A: --count_A; sumQual_A -= qual; break;
|
||||
case C: --count_C; sumQual_C -= qual; break;
|
||||
|
|
@ -163,6 +170,7 @@ import com.google.java.contract.Requires;
|
|||
case N: --count_N; sumQual_N -= qual; break;
|
||||
}
|
||||
--totalCount;
|
||||
mappingQualities.remove((Integer) mappingQuality);
|
||||
}
|
||||
|
||||
@Ensures("result >= 0")
|
||||
|
|
@ -229,6 +237,15 @@ import com.google.java.contract.Requires;
|
|||
return totalCount;
|
||||
}
|
||||
|
||||
/**
|
||||
* The RMS of the mapping qualities of all reads that contributed to this object
|
||||
*
|
||||
* @return the RMS of the mapping qualities of all reads that contributed to this object
|
||||
*/
|
||||
public double getRMS() {
|
||||
return MathUtils.rms(mappingQualities);
|
||||
}
|
||||
|
||||
/**
|
||||
* Given a base , it returns the proportional count of this base compared to all other bases
|
||||
*
|
||||
|
|
@ -325,4 +342,26 @@ import com.google.java.contract.Requires;
|
|||
final int total = totalCountWithoutIndels();
|
||||
return (total == 0) ? 0.0 : (double)countOfBase(base) / (double)total;
|
||||
}
|
||||
|
||||
/**
|
||||
* @return true if this instance represents low quality bases
|
||||
*/
|
||||
public boolean isLowQuality() { return isLowQuality; }
|
||||
|
||||
/**
|
||||
* Sets the low quality value
|
||||
*
|
||||
* @param value true if this instance represents low quality bases false otherwise
|
||||
*/
|
||||
public void setLowQuality(final boolean value) { isLowQuality = value; }
|
||||
|
||||
/**
|
||||
* Clears out all stored data in this object
|
||||
*/
|
||||
public void clear() {
|
||||
count_A = count_C = count_G = count_T = count_D = count_I = count_N = 0;
|
||||
sumQual_A = sumQual_C = sumQual_G = sumQual_T = sumQual_D = sumQual_I = sumQual_N = 0;
|
||||
totalCount = 0;
|
||||
mappingQualities.clear();
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -46,14 +46,10 @@
|
|||
|
||||
package org.broadinstitute.sting.gatk.walkers.compression.reducereads;
|
||||
|
||||
import it.unimi.dsi.fastutil.ints.IntArrayList;
|
||||
import it.unimi.dsi.fastutil.objects.ObjectArrayList;
|
||||
import org.broadinstitute.sting.utils.MathUtils;
|
||||
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
|
||||
|
||||
import java.util.ArrayList;
|
||||
import java.util.Collections;
|
||||
import java.util.List;
|
||||
|
||||
|
||||
/**
|
||||
* The element that describes the header of the sliding window.
|
||||
|
|
@ -68,7 +64,6 @@ public class HeaderElement {
|
|||
private int insertionsToTheRight; // How many reads in this site had insertions to the immediate right
|
||||
private int nSoftClippedBases; // How many bases in this site came from soft clipped bases
|
||||
private int location; // Genome location of this site (the sliding window knows which contig we're at
|
||||
private IntArrayList mappingQuality; // keeps the mapping quality of each read that contributed to this element (site)
|
||||
|
||||
public int getLocation() {
|
||||
return location;
|
||||
|
|
@ -89,7 +84,7 @@ public class HeaderElement {
|
|||
* @param location the reference location for the new element
|
||||
*/
|
||||
public HeaderElement(final int location) {
|
||||
this(new BaseAndQualsCounts(), new BaseAndQualsCounts(), 0, 0, location, new IntArrayList());
|
||||
this(new BaseAndQualsCounts(), new BaseAndQualsCounts(), 0, 0, location);
|
||||
}
|
||||
|
||||
/**
|
||||
|
|
@ -99,7 +94,7 @@ public class HeaderElement {
|
|||
* @param location the reference location for the new element
|
||||
*/
|
||||
public HeaderElement(final int location, final int insertionsToTheRight) {
|
||||
this(new BaseAndQualsCounts(), new BaseAndQualsCounts(), insertionsToTheRight, 0, location, new IntArrayList());
|
||||
this(new BaseAndQualsCounts(), new BaseAndQualsCounts(), insertionsToTheRight, 0, location);
|
||||
}
|
||||
|
||||
/**
|
||||
|
|
@ -110,16 +105,14 @@ public class HeaderElement {
|
|||
* @param insertionsToTheRight number of insertions to the right of this HeaderElement
|
||||
* @param nSoftClippedBases number of softclipped bases of this HeaderElement
|
||||
* @param location the reference location of this reference element
|
||||
* @param mappingQuality the list of mapping quality values of all reads that contributed to this
|
||||
* HeaderElement
|
||||
*/
|
||||
public HeaderElement(BaseAndQualsCounts consensusBaseCounts, BaseAndQualsCounts filteredBaseCounts, int insertionsToTheRight, int nSoftClippedBases, int location, IntArrayList mappingQuality) {
|
||||
public HeaderElement(BaseAndQualsCounts consensusBaseCounts, BaseAndQualsCounts filteredBaseCounts, int insertionsToTheRight, int nSoftClippedBases, int location) {
|
||||
this.consensusBaseCounts = consensusBaseCounts;
|
||||
this.filteredBaseCounts = filteredBaseCounts;
|
||||
this.insertionsToTheRight = insertionsToTheRight;
|
||||
this.nSoftClippedBases = nSoftClippedBases;
|
||||
this.location = location;
|
||||
this.mappingQuality = mappingQuality;
|
||||
}
|
||||
|
||||
/**
|
||||
|
|
@ -128,35 +121,52 @@ public class HeaderElement {
|
|||
*
|
||||
* @return true if site is variant by any definition. False otherwise.
|
||||
*/
|
||||
public boolean isVariant(double minVariantProportion, double minIndelProportion) {
|
||||
return hasConsensusData() && (isVariantFromInsertions(minIndelProportion) || isVariantFromMismatches(minVariantProportion) || isVariantFromDeletions(minIndelProportion) || isVariantFromSoftClips());
|
||||
public boolean isVariant(double minVariantPvalue, double minIndelProportion) {
|
||||
return hasConsensusData() && (isVariantFromInsertions(minIndelProportion) || isVariantFromMismatches(minVariantPvalue) || isVariantFromDeletions(minIndelProportion) || isVariantFromSoftClips());
|
||||
}
|
||||
|
||||
/**
|
||||
* Adds a new base to the HeaderElement updating all counts accordingly
|
||||
*
|
||||
* @param base the base to add
|
||||
* @param base the base to add
|
||||
* @param baseQual the base quality
|
||||
* @param insQual the base insertion quality
|
||||
* @param delQual the base deletion quality
|
||||
* @param baseMappingQuality the mapping quality of the read this base belongs to
|
||||
* @param minBaseQual the minimum base qual allowed to be a good base
|
||||
* @param minMappingQual the minimum mapping qual allowed to be a good read
|
||||
* @param isSoftClipped true if the base is soft-clipped in the original read
|
||||
*/
|
||||
public void addBase(byte base, byte baseQual, byte insQual, byte delQual, int baseMappingQuality, int minBaseQual, int minMappingQual, boolean isSoftClipped) {
|
||||
if (basePassesFilters(baseQual, minBaseQual, baseMappingQuality, minMappingQual))
|
||||
consensusBaseCounts.incr(base, baseQual, insQual, delQual); // If the base passes filters, it is included in the consensus base counts
|
||||
// If the base passes the MQ filter it is included in the consensus base counts, otherwise it's part of the filtered counts
|
||||
if ( baseMappingQuality >= minMappingQual )
|
||||
consensusBaseCounts.incr(base, baseQual, insQual, delQual, baseMappingQuality, baseQual < minBaseQual);
|
||||
else
|
||||
filteredBaseCounts.incr(base, baseQual, insQual, delQual); // If the base fails filters, it is included with the filtered data base counts
|
||||
filteredBaseCounts.incr(base, baseQual, insQual, delQual, baseMappingQuality, baseQual < minBaseQual);
|
||||
|
||||
this.mappingQuality.add(baseMappingQuality); // Filtered or not, the RMS mapping quality includes all bases in this site
|
||||
nSoftClippedBases += isSoftClipped ? 1 : 0; // if this base is softclipped, add the counter
|
||||
nSoftClippedBases += isSoftClipped ? 1 : 0;
|
||||
}
|
||||
|
||||
/**
|
||||
* Adds a new base to the HeaderElement updating all counts accordingly
|
||||
*
|
||||
* @param base the base to add
|
||||
* @param baseQual the base quality
|
||||
* @param insQual the base insertion quality
|
||||
* @param delQual the base deletion quality
|
||||
* @param baseMappingQuality the mapping quality of the read this base belongs to
|
||||
* @param minBaseQual the minimum base qual allowed to be a good base
|
||||
* @param minMappingQual the minimum mapping qual allowed to be a good read
|
||||
* @param isSoftClipped true if the base is soft-clipped in the original read
|
||||
*/
|
||||
public void removeBase(byte base, byte baseQual, byte insQual, byte delQual, int baseMappingQuality, int minBaseQual, int minMappingQual, boolean isSoftClipped) {
|
||||
if (basePassesFilters(baseQual, minBaseQual, baseMappingQuality, minMappingQual))
|
||||
consensusBaseCounts.decr(base, baseQual, insQual, delQual); // If the base passes filters, it is included in the consensus base counts
|
||||
// If the base passes the MQ filter it is included in the consensus base counts, otherwise it's part of the filtered counts
|
||||
if ( baseMappingQuality >= minMappingQual )
|
||||
consensusBaseCounts.decr(base, baseQual, insQual, delQual, baseMappingQuality, baseQual < minBaseQual);
|
||||
else
|
||||
filteredBaseCounts.decr(base, baseQual, insQual, delQual); // If the base fails filters, it is included with the filtered data base counts
|
||||
filteredBaseCounts.decr(base, baseQual, insQual, delQual, baseMappingQuality, baseQual < minBaseQual);
|
||||
|
||||
this.mappingQuality.remove((Integer) baseMappingQuality); // Filtered or not, the RMS mapping quality includes all bases in this site
|
||||
nSoftClippedBases -= isSoftClipped ? 1 : 0; // if this base is softclipped, add the counter
|
||||
nSoftClippedBases -= isSoftClipped ? 1 : 0;
|
||||
}
|
||||
/**
|
||||
* Adds an insertions to the right of the HeaderElement and updates all counts accordingly. All insertions
|
||||
|
|
@ -193,15 +203,6 @@ public class HeaderElement {
|
|||
return (!hasFilteredData() && !hasConsensusData());
|
||||
}
|
||||
|
||||
/**
|
||||
* The RMS of the mapping qualities of all reads that contributed to this HeaderElement
|
||||
*
|
||||
* @return the RMS of the mapping qualities of all reads that contributed to this HeaderElement
|
||||
*/
|
||||
public double getRMS() {
|
||||
return MathUtils.rms(mappingQuality);
|
||||
}
|
||||
|
||||
/**
|
||||
* removes an insertion from this element (if you removed a read that had an insertion)
|
||||
*/
|
||||
|
|
@ -236,7 +237,7 @@ public class HeaderElement {
|
|||
/**
|
||||
* Whether or not the HeaderElement is variant due to excess deletions
|
||||
*
|
||||
* @return whether or not the HeaderElement is variant due to excess insertions
|
||||
* @return whether or not the HeaderElement is variant due to excess deletions
|
||||
*/
|
||||
private boolean isVariantFromDeletions(double minIndelProportion) {
|
||||
return consensusBaseCounts.baseIndexWithMostCounts() == BaseIndex.D || consensusBaseCounts.baseCountProportion(BaseIndex.D) > minIndelProportion;
|
||||
|
|
@ -245,12 +246,15 @@ public class HeaderElement {
|
|||
/**
|
||||
* Whether or not the HeaderElement is variant due to excess mismatches
|
||||
*
|
||||
* @return whether or not the HeaderElement is variant due to excess insertions
|
||||
* @param minVariantPvalue the minimum pvalue to call a site variant.
|
||||
* @return whether or not the HeaderElement is variant due to excess mismatches
|
||||
*/
|
||||
protected boolean isVariantFromMismatches(double minVariantProportion) {
|
||||
BaseIndex mostCommon = consensusBaseCounts.baseIndexWithMostProbabilityWithoutIndels();
|
||||
double mostCommonProportion = consensusBaseCounts.baseCountProportionWithoutIndels(mostCommon);
|
||||
return mostCommonProportion != 0.0 && mostCommonProportion < (1 - minVariantProportion);
|
||||
protected boolean isVariantFromMismatches(double minVariantPvalue) {
|
||||
final int totalCount = consensusBaseCounts.totalCountWithoutIndels();
|
||||
final BaseIndex mostCommon = consensusBaseCounts.baseIndexWithMostProbabilityWithoutIndels();
|
||||
final int countOfOtherBases = totalCount - consensusBaseCounts.countOfBase(mostCommon);
|
||||
final double pvalue = countOfOtherBases == 0 ? 0.0 : MathUtils.binomialCumulativeProbability(totalCount, 0, countOfOtherBases);
|
||||
return pvalue > minVariantPvalue;
|
||||
}
|
||||
|
||||
/**
|
||||
|
|
@ -263,42 +267,44 @@ public class HeaderElement {
|
|||
return nSoftClippedBases > 0 && nSoftClippedBases >= (consensusBaseCounts.totalCount() - nSoftClippedBases);
|
||||
}
|
||||
|
||||
protected boolean basePassesFilters(byte baseQual, int minBaseQual, int baseMappingQuality, int minMappingQual) {
|
||||
return baseQual >= minBaseQual && baseMappingQuality >= minMappingQual;
|
||||
}
|
||||
|
||||
/**
|
||||
* Calculates the number of alleles necessary to represent this site.
|
||||
*
|
||||
* @param minVariantProportion the minimum proportion to call a site variant.
|
||||
* @param allowDeletions should we allow deletions?
|
||||
* @return the number of alleles necessary to represent this site or -1 if allowDeletions is false and there are a sufficient number of them
|
||||
* @param minVariantPvalue the minimum pvalue to call a site variant.
|
||||
* @return the number of alleles necessary to represent this site or -1 if there are too many indels
|
||||
*/
|
||||
public int getNumberOfAlleles(final double minVariantProportion, final boolean allowDeletions) {
|
||||
final List<BaseIndex> alleles = getAlleles(minVariantProportion, allowDeletions);
|
||||
public int getNumberOfBaseAlleles(final double minVariantPvalue) {
|
||||
final ObjectArrayList<BaseIndex> alleles = getAlleles(minVariantPvalue);
|
||||
return alleles == null ? -1 : alleles.size();
|
||||
}
|
||||
|
||||
/**
|
||||
* Calculates the alleles necessary to represent this site.
|
||||
*
|
||||
* @param minVariantProportion the minimum proportion to call a site variant.
|
||||
* @param allowDeletions should we allow deletions?
|
||||
* @return the list of alleles necessary to represent this site or null if allowDeletions is false and there are a sufficient number of them
|
||||
* @param minVariantPvalue the minimum pvalue to call a site variant.
|
||||
* @return the list of alleles necessary to represent this site or null if there are too many indels
|
||||
*/
|
||||
public List<BaseIndex> getAlleles(final double minVariantProportion, final boolean allowDeletions) {
|
||||
public ObjectArrayList<BaseIndex> getAlleles(final double minVariantPvalue) {
|
||||
// make sure we have bases at all
|
||||
final int totalBaseCount = consensusBaseCounts.totalCount();
|
||||
if ( totalBaseCount == 0 )
|
||||
return Collections.emptyList();
|
||||
return new ObjectArrayList<BaseIndex>(0);
|
||||
|
||||
final int minBaseCountForRelevantAlleles = Math.max(1, (int)(minVariantProportion * totalBaseCount));
|
||||
// next, check for insertions
|
||||
if ( hasSignificantCount(insertionsToTheRight, minVariantPvalue) )
|
||||
return null;
|
||||
|
||||
final List<BaseIndex> alleles = new ArrayList<BaseIndex>(4);
|
||||
// finally, check for the bases themselves (including deletions)
|
||||
final ObjectArrayList<BaseIndex> alleles = new ObjectArrayList<BaseIndex>(4);
|
||||
for ( final BaseIndex base : BaseIndex.values() ) {
|
||||
final int baseCount = consensusBaseCounts.countOfBase(base);
|
||||
if ( baseCount == 0 )
|
||||
continue;
|
||||
|
||||
if ( baseCount >= minBaseCountForRelevantAlleles ) {
|
||||
if ( !allowDeletions && base == BaseIndex.D )
|
||||
final double pvalue = MathUtils.binomialCumulativeProbability(totalBaseCount, 0, baseCount);
|
||||
|
||||
if ( pvalue > minVariantPvalue ) {
|
||||
if ( base == BaseIndex.D )
|
||||
return null;
|
||||
alleles.add(base);
|
||||
}
|
||||
|
|
@ -309,15 +315,27 @@ public class HeaderElement {
|
|||
/*
|
||||
* Checks whether there are a significant number of softclips.
|
||||
*
|
||||
* @param minVariantProportion the minimum proportion to consider something significant.
|
||||
* @param minVariantPvalue the minimum pvalue to call a site variant.
|
||||
* @return true if there are significant softclips, false otherwise
|
||||
*/
|
||||
public boolean hasSignificantSoftclips(final double minVariantProportion) {
|
||||
public boolean hasSignificantSoftclips(final double minVariantPvalue) {
|
||||
return hasSignificantCount(nSoftClippedBases, minVariantPvalue);
|
||||
}
|
||||
|
||||
/*
|
||||
* Checks whether there are a significant number of count.
|
||||
*
|
||||
* @param count the count to test against
|
||||
* @param minVariantPvalue the minimum pvalue to call a site variant.
|
||||
* @return true if there is a significant count given the provided pvalue, false otherwise
|
||||
*/
|
||||
private boolean hasSignificantCount(final int count, final double minVariantPvalue) {
|
||||
final int totalBaseCount = consensusBaseCounts.totalCount();
|
||||
if ( totalBaseCount == 0 )
|
||||
if ( count == 0 || totalBaseCount == 0 )
|
||||
return false;
|
||||
|
||||
final int minBaseCountForSignificance = Math.max(1, (int)(minVariantProportion * totalBaseCount));
|
||||
return nSoftClippedBases >= minBaseCountForSignificance;
|
||||
// technically, count can be greater than totalBaseCount (because of the way insertions are counted) so we need to account for that
|
||||
final double pvalue = MathUtils.binomialCumulativeProbability(totalBaseCount, 0, Math.min(count, totalBaseCount));
|
||||
return pvalue > minVariantPvalue;
|
||||
}
|
||||
}
|
||||
|
|
@ -96,14 +96,14 @@ public class MultiSampleCompressor {
|
|||
final int contextSize,
|
||||
final int downsampleCoverage,
|
||||
final int minMappingQuality,
|
||||
final double minAltProportionToTriggerVariant,
|
||||
final double minAltPValueToTriggerVariant,
|
||||
final double minIndelProportionToTriggerVariant,
|
||||
final int minBaseQual,
|
||||
final ReduceReads.DownsampleStrategy downsampleStrategy) {
|
||||
for ( String name : SampleUtils.getSAMFileSamples(header) ) {
|
||||
compressorsPerSample.put(name,
|
||||
new SingleSampleCompressor(contextSize, downsampleCoverage,
|
||||
minMappingQuality, minAltProportionToTriggerVariant, minIndelProportionToTriggerVariant, minBaseQual, downsampleStrategy));
|
||||
minMappingQuality, minAltPValueToTriggerVariant, minIndelProportionToTriggerVariant, minBaseQual, downsampleStrategy));
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -64,6 +64,7 @@ import org.broadinstitute.sting.gatk.io.StingSAMFileWriter;
|
|||
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
|
||||
import org.broadinstitute.sting.gatk.walkers.*;
|
||||
import org.broadinstitute.sting.utils.GenomeLoc;
|
||||
import org.broadinstitute.sting.utils.QualityUtils;
|
||||
import org.broadinstitute.sting.utils.Utils;
|
||||
import org.broadinstitute.sting.utils.clipping.ReadClipper;
|
||||
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
|
||||
|
|
@ -139,19 +140,21 @@ public class ReduceReads extends ReadWalker<ObjectArrayList<GATKSAMRecord>, Redu
|
|||
* towards variable regions.
|
||||
*/
|
||||
@Argument(fullName = "minimum_base_quality_to_consider", shortName = "minqual", doc = "", required = false)
|
||||
public byte minBaseQual = 20;
|
||||
public byte minBaseQual = 15;
|
||||
|
||||
/**
|
||||
* Reads have notoriously low quality bases on the tails (left and right). Consecutive bases with quality
|
||||
* lower than this threshold will be hard clipped off before entering the reduce reads algorithm.
|
||||
* Reads have notoriously low quality bases on the tails (left and right). Consecutive bases at the tails with
|
||||
* quality at or lower than this threshold will be hard clipped off before entering the reduce reads algorithm.
|
||||
*/
|
||||
@Argument(fullName = "minimum_tail_qualities", shortName = "mintail", doc = "", required = false)
|
||||
public byte minTailQuality = 2;
|
||||
|
||||
/**
|
||||
* Any number of VCF files representing known SNPs to be used for the experimental polyploid-based reduction.
|
||||
* Any number of VCF files representing known SNPs to be used for the polyploid-based reduction.
|
||||
* Could be e.g. dbSNP and/or official 1000 Genomes SNP calls. Non-SNP variants in these files will be ignored.
|
||||
* Note that polyploid ("het") compression will work only when a single SNP is present in a consensus window.
|
||||
* If provided, the polyploid ("het") compression will work only when a single SNP from the known set is present
|
||||
* in a consensus window (otherwise there will be no reduction); if not provided then polyploid compression will
|
||||
* be triggered anywhere there is a single SNP present in a consensus window.
|
||||
*/
|
||||
@Input(fullName="known_sites_for_polyploid_reduction", shortName = "known", doc="Input VCF file(s) with known SNPs", required=false)
|
||||
public List<RodBinding<VariantContext>> known = Collections.emptyList();
|
||||
|
|
@ -204,9 +207,18 @@ public class ReduceReads extends ReadWalker<ObjectArrayList<GATKSAMRecord>, Redu
|
|||
* Minimum proportion of mismatches in a site to trigger a variant region. Anything below this will be
|
||||
* considered consensus.
|
||||
*/
|
||||
@Deprecated
|
||||
@Argument(fullName = "minimum_alt_proportion_to_trigger_variant", shortName = "minvar", doc = "", required = false)
|
||||
public double minAltProportionToTriggerVariant = 0.05;
|
||||
|
||||
/**
|
||||
* Minimum p-value from binomial distribution of mismatches in a site to trigger a variant region.
|
||||
* Any site with a value falling below this will be considered consensus and reduced (otherwise we will try to trigger polyploid compression).
|
||||
*/
|
||||
@Advanced
|
||||
@Argument(fullName = "minimum_alt_pvalue_to_trigger_variant", shortName = "min_pvalue", doc = "", required = false)
|
||||
public double minAltPValueToTriggerVariant = 0.01;
|
||||
|
||||
/**
|
||||
* Minimum proportion of indels in a site to trigger a variant region. Anything below this will be
|
||||
* considered consensus.
|
||||
|
|
@ -253,7 +265,7 @@ public class ReduceReads extends ReadWalker<ObjectArrayList<GATKSAMRecord>, Redu
|
|||
|
||||
ObjectSortedSet<GenomeLoc> intervalList;
|
||||
|
||||
final ObjectSortedSet<GenomeLoc> knownSnpPositions = new ObjectAVLTreeSet<GenomeLoc>();
|
||||
ObjectSortedSet<GenomeLoc> knownSnpPositions;
|
||||
|
||||
// IMPORTANT: DO NOT CHANGE THE VALUE OF THIS CONSTANT VARIABLE; IT IS NOW PERMANENTLY THE @PG NAME THAT EXTERNAL TOOLS LOOK FOR IN THE BAM HEADER
|
||||
public static final String PROGRAM_RECORD_NAME = "GATK ReduceReads"; // The name that will go in the @PG tag
|
||||
|
|
@ -273,6 +285,14 @@ public class ReduceReads extends ReadWalker<ObjectArrayList<GATKSAMRecord>, Redu
|
|||
if ( nwayout && out != null )
|
||||
throw new UserException.CommandLineException("--out and --nwayout can not be used simultaneously; please use one or the other");
|
||||
|
||||
if ( minAltPValueToTriggerVariant < 0.0 || minAltPValueToTriggerVariant > 1.0 )
|
||||
throw new UserException.BadArgumentValue("--minimum_alt_pvalue_to_trigger_variant", "must be a value between 0 and 1 (inclusive)");
|
||||
|
||||
if ( known.isEmpty() )
|
||||
knownSnpPositions = null;
|
||||
else
|
||||
knownSnpPositions = new ObjectAVLTreeSet<GenomeLoc>();
|
||||
|
||||
GenomeAnalysisEngine toolkit = getToolkit();
|
||||
readNameHash = new Object2LongOpenHashMap<String>(100000); // prepare the read name hash to keep track of what reads have had their read names compressed
|
||||
intervalList = new ObjectAVLTreeSet<GenomeLoc>(); // get the interval list from the engine. If no interval list was provided, the walker will work in WGS mode
|
||||
|
|
@ -392,7 +412,7 @@ public class ReduceReads extends ReadWalker<ObjectArrayList<GATKSAMRecord>, Redu
|
|||
*/
|
||||
@Override
|
||||
public ReduceReadsStash reduceInit() {
|
||||
return new ReduceReadsStash(new MultiSampleCompressor(getToolkit().getSAMFileHeader(), contextSize, downsampleCoverage, minMappingQuality, minAltProportionToTriggerVariant, minIndelProportionToTriggerVariant, minBaseQual, downsampleStrategy));
|
||||
return new ReduceReadsStash(new MultiSampleCompressor(getToolkit().getSAMFileHeader(), contextSize, downsampleCoverage, minMappingQuality, minAltPValueToTriggerVariant, minIndelProportionToTriggerVariant, minBaseQual, downsampleStrategy));
|
||||
}
|
||||
|
||||
/**
|
||||
|
|
@ -470,8 +490,8 @@ public class ReduceReads extends ReadWalker<ObjectArrayList<GATKSAMRecord>, Redu
|
|||
* @param read the current read, used for checking whether there are stale positions we can remove
|
||||
*/
|
||||
protected void clearStaleKnownPositions(final GATKSAMRecord read) {
|
||||
// nothing to clear if empty
|
||||
if ( knownSnpPositions.isEmpty() )
|
||||
// nothing to clear if not used or empty
|
||||
if ( knownSnpPositions == null || knownSnpPositions.isEmpty() )
|
||||
return;
|
||||
|
||||
// not ready to be cleared until we encounter a read from a different contig
|
||||
|
|
|
|||
|
|
@ -62,7 +62,7 @@ public class SingleSampleCompressor {
|
|||
final private int contextSize;
|
||||
final private int downsampleCoverage;
|
||||
final private int minMappingQuality;
|
||||
final private double minAltProportionToTriggerVariant;
|
||||
final private double minAltPValueToTriggerVariant;
|
||||
final private double minIndelProportionToTriggerVariant;
|
||||
final private int minBaseQual;
|
||||
final private ReduceReads.DownsampleStrategy downsampleStrategy;
|
||||
|
|
@ -75,7 +75,7 @@ public class SingleSampleCompressor {
|
|||
public SingleSampleCompressor(final int contextSize,
|
||||
final int downsampleCoverage,
|
||||
final int minMappingQuality,
|
||||
final double minAltProportionToTriggerVariant,
|
||||
final double minAltPValueToTriggerVariant,
|
||||
final double minIndelProportionToTriggerVariant,
|
||||
final int minBaseQual,
|
||||
final ReduceReads.DownsampleStrategy downsampleStrategy) {
|
||||
|
|
@ -83,7 +83,7 @@ public class SingleSampleCompressor {
|
|||
this.downsampleCoverage = downsampleCoverage;
|
||||
this.minMappingQuality = minMappingQuality;
|
||||
this.slidingWindowCounter = 0;
|
||||
this.minAltProportionToTriggerVariant = minAltProportionToTriggerVariant;
|
||||
this.minAltPValueToTriggerVariant = minAltPValueToTriggerVariant;
|
||||
this.minIndelProportionToTriggerVariant = minIndelProportionToTriggerVariant;
|
||||
this.minBaseQual = minBaseQual;
|
||||
this.downsampleStrategy = downsampleStrategy;
|
||||
|
|
@ -114,7 +114,7 @@ public class SingleSampleCompressor {
|
|||
}
|
||||
|
||||
if ( slidingWindow == null) { // this is the first read
|
||||
slidingWindow = new SlidingWindow(read.getReferenceName(), read.getReferenceIndex(), contextSize, read.getHeader(), read.getReadGroup(), slidingWindowCounter, minAltProportionToTriggerVariant, minIndelProportionToTriggerVariant, minBaseQual, minMappingQuality, downsampleCoverage, downsampleStrategy, read.hasBaseIndelQualities());
|
||||
slidingWindow = new SlidingWindow(read.getReferenceName(), read.getReferenceIndex(), contextSize, read.getHeader(), read.getReadGroup(), slidingWindowCounter, minAltPValueToTriggerVariant, minIndelProportionToTriggerVariant, minBaseQual, minMappingQuality, downsampleCoverage, downsampleStrategy, read.hasBaseIndelQualities());
|
||||
slidingWindowCounter++;
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -97,9 +97,8 @@ public class SlidingWindow {
|
|||
protected int filteredDataConsensusCounter;
|
||||
protected String filteredDataReadName;
|
||||
|
||||
|
||||
// Additional parameters
|
||||
protected double MIN_ALT_BASE_PROPORTION_TO_TRIGGER_VARIANT; // proportion has to be greater than this value to trigger variant region due to mismatches
|
||||
protected double MIN_ALT_PVALUE_TO_TRIGGER_VARIANT; // pvalue has to be greater than this value to trigger variant region due to mismatches
|
||||
protected double MIN_INDEL_BASE_PROPORTION_TO_TRIGGER_VARIANT; // proportion has to be greater than this value to trigger variant region due to deletions
|
||||
protected int MIN_BASE_QUAL_TO_COUNT; // qual has to be greater than or equal to this value
|
||||
protected int MIN_MAPPING_QUALITY;
|
||||
|
|
@ -150,11 +149,15 @@ public class SlidingWindow {
|
|||
this.readsInWindow = new ObjectAVLTreeSet<GATKSAMRecord>();
|
||||
}
|
||||
|
||||
public SlidingWindow(String contig, int contigIndex, int contextSize, SAMFileHeader samHeader, GATKSAMReadGroupRecord readGroupAttribute, int windowNumber, final double minAltProportionToTriggerVariant, final double minIndelProportionToTriggerVariant, int minBaseQual, int minMappingQuality, int downsampleCoverage, final ReduceReads.DownsampleStrategy downsampleStrategy, boolean hasIndelQualities) {
|
||||
public SlidingWindow(final String contig, final int contigIndex, final int contextSize, final SAMFileHeader samHeader,
|
||||
final GATKSAMReadGroupRecord readGroupAttribute, final int windowNumber,
|
||||
final double minAltPValueToTriggerVariant, final double minIndelProportionToTriggerVariant,
|
||||
final int minBaseQual, final int minMappingQuality, final int downsampleCoverage,
|
||||
final ReduceReads.DownsampleStrategy downsampleStrategy, final boolean hasIndelQualities) {
|
||||
this.contextSize = contextSize;
|
||||
this.downsampleCoverage = downsampleCoverage;
|
||||
|
||||
this.MIN_ALT_BASE_PROPORTION_TO_TRIGGER_VARIANT = minAltProportionToTriggerVariant;
|
||||
this.MIN_ALT_PVALUE_TO_TRIGGER_VARIANT = minAltPValueToTriggerVariant;
|
||||
this.MIN_INDEL_BASE_PROPORTION_TO_TRIGGER_VARIANT = minIndelProportionToTriggerVariant;
|
||||
this.MIN_BASE_QUAL_TO_COUNT = minBaseQual;
|
||||
this.MIN_MAPPING_QUALITY = minMappingQuality;
|
||||
|
|
@ -341,8 +344,14 @@ public class SlidingWindow {
|
|||
private final MarkedSites markedSites = new MarkedSites();
|
||||
|
||||
/**
|
||||
* returns an array marked with variant and non-variant regions (it uses
|
||||
* markVariantRegions to make the marks)
|
||||
* returns the MarkedSites object so that it can be tested after adding data to the Sliding Window
|
||||
*
|
||||
* @return the Marked Sites object used by this Sliding Window
|
||||
*/
|
||||
protected MarkedSites getMarkedSitesForTesting() { return markedSites; }
|
||||
|
||||
/**
|
||||
* returns an array marked with variant and non-variant regions (it uses markVariantRegion to make the marks)
|
||||
*
|
||||
* @param stop check the window from start to stop (not-inclusive)
|
||||
*/
|
||||
|
|
@ -368,8 +377,8 @@ public class SlidingWindow {
|
|||
if (headerElementIterator.hasNext()) {
|
||||
HeaderElement headerElement = headerElementIterator.next();
|
||||
|
||||
if (headerElement.isVariant(MIN_ALT_BASE_PROPORTION_TO_TRIGGER_VARIANT, MIN_INDEL_BASE_PROPORTION_TO_TRIGGER_VARIANT))
|
||||
markVariantRegion(markedSites, i - windowHeaderStartLocation);
|
||||
if (headerElement.isVariant(MIN_ALT_PVALUE_TO_TRIGGER_VARIANT, MIN_INDEL_BASE_PROPORTION_TO_TRIGGER_VARIANT))
|
||||
markVariantRegion(i - windowHeaderStartLocation);
|
||||
|
||||
} else
|
||||
break;
|
||||
|
|
@ -379,14 +388,24 @@ public class SlidingWindow {
|
|||
/**
|
||||
* Marks the sites around the variant site (as true)
|
||||
*
|
||||
* @param markedSites the boolean array to bear the marks
|
||||
* @param variantSiteLocation the location where a variant site was found
|
||||
*/
|
||||
protected void markVariantRegion(final MarkedSites markedSites, final int variantSiteLocation) {
|
||||
protected void markVariantRegion(final int variantSiteLocation) {
|
||||
int from = (variantSiteLocation < contextSize) ? 0 : variantSiteLocation - contextSize;
|
||||
int to = (variantSiteLocation + contextSize + 1 > markedSites.getVariantSiteBitSet().length) ? markedSites.getVariantSiteBitSet().length : variantSiteLocation + contextSize + 1;
|
||||
for (int i = from; i < to; i++)
|
||||
markedSites.getVariantSiteBitSet()[i] = true;
|
||||
int to = (variantSiteLocation + contextSize + 1 > markedSites.getVariantSiteBitSet().length) ? markedSites.getVariantSiteBitSet().length - 1 : variantSiteLocation + contextSize;
|
||||
markRegionAs(from, to, true);
|
||||
}
|
||||
|
||||
/**
|
||||
* Marks the sites around the variant site (as true)
|
||||
*
|
||||
* @param from the start index (inclusive) to mark
|
||||
* @param to the end index (inclusive) to mark
|
||||
* @param isVariant mark the region with this boolean value
|
||||
*/
|
||||
private void markRegionAs(final int from, final int to, final boolean isVariant) {
|
||||
for (int i = from; i <= to; i++)
|
||||
markedSites.getVariantSiteBitSet()[i] = isVariant;
|
||||
}
|
||||
|
||||
/**
|
||||
|
|
@ -580,7 +599,7 @@ public class SlidingWindow {
|
|||
filteredDataConsensus = new SyntheticRead(samHeader, readGroupAttribute, contig, contigIndex, filteredDataReadName + filteredDataConsensusCounter++, headerElement.getLocation(), hasIndelQualities, strandType);
|
||||
}
|
||||
|
||||
genericAddBaseToConsensus(filteredDataConsensus, headerElement.getFilteredBaseCounts(), headerElement.getRMS());
|
||||
genericAddBaseToConsensus(filteredDataConsensus, headerElement.getFilteredBaseCounts());
|
||||
}
|
||||
|
||||
return result;
|
||||
|
|
@ -611,7 +630,7 @@ public class SlidingWindow {
|
|||
if (!headerElement.hasConsensusData())
|
||||
throw new ReviewedStingException("No CONSENSUS data in " + index);
|
||||
|
||||
genericAddBaseToConsensus(runningConsensus, headerElement.getConsensusBaseCounts(), headerElement.getRMS());
|
||||
genericAddBaseToConsensus(runningConsensus, headerElement.getConsensusBaseCounts());
|
||||
}
|
||||
}
|
||||
|
||||
|
|
@ -620,15 +639,14 @@ public class SlidingWindow {
|
|||
*
|
||||
* @param syntheticRead the synthetic read to add to
|
||||
* @param baseCounts the base counts object in the header element
|
||||
* @param rms the rms mapping quality in the header element
|
||||
*/
|
||||
private void genericAddBaseToConsensus(SyntheticRead syntheticRead, BaseAndQualsCounts baseCounts, double rms) {
|
||||
private void genericAddBaseToConsensus(final SyntheticRead syntheticRead, final BaseAndQualsCounts baseCounts) {
|
||||
final BaseIndex base = baseCounts.baseIndexWithMostProbability();
|
||||
byte count = (byte) Math.min(baseCounts.countOfBase(base), Byte.MAX_VALUE);
|
||||
byte qual = baseCounts.averageQualsOfBase(base);
|
||||
byte insQual = baseCounts.averageInsertionQualsOfBase(base);
|
||||
byte delQual = baseCounts.averageDeletionQualsOfBase(base);
|
||||
syntheticRead.add(base, count, qual, insQual, delQual, rms);
|
||||
syntheticRead.add(base, count, qual, insQual, delQual, baseCounts.getRMS());
|
||||
}
|
||||
|
||||
/**
|
||||
|
|
@ -636,39 +654,30 @@ public class SlidingWindow {
|
|||
*
|
||||
* @param start the first window header index in the variant region (inclusive)
|
||||
* @param stop the last window header index of the variant region (inclusive)
|
||||
* @param knownSnpPositions the set of known SNPs used to determine whether to allow polyploid consensus creation here
|
||||
* @return a non-null list of all reads contained in the variant region
|
||||
* @param knownSnpPositions the set of known SNPs used to determine whether to allow polyploid consensus creation here; can be null (to allow polyploid consensus anywhere)
|
||||
* @return a non-null object representing all reads contained in the variant region
|
||||
*/
|
||||
@Requires({"start >= 0 && (stop >= start || stop == 0)"})
|
||||
@Ensures("result != null")
|
||||
protected ObjectList<GATKSAMRecord> compressVariantRegion(final int start, final int stop, final ObjectSortedSet<GenomeLoc> knownSnpPositions) {
|
||||
ObjectList<GATKSAMRecord> allReads = new ObjectArrayList<GATKSAMRecord>();
|
||||
protected CloseVariantRegionResult compressVariantRegion(final int start, final int stop, final ObjectSortedSet<GenomeLoc> knownSnpPositions) {
|
||||
final CloseVariantRegionResult allReads = new CloseVariantRegionResult(stop);
|
||||
|
||||
// Try to compress into a polyploid consensus
|
||||
// Optimization: don't bother if there are no known SNPs
|
||||
final int hetRefPosition = knownSnpPositions.isEmpty() ? -1 : findSinglePolyploidCompressiblePosition(start, stop);
|
||||
|
||||
boolean successfullyCreatedPolyploidConsensus = false;
|
||||
// Optimization: don't bother if there are no known SNPs here
|
||||
final int hetRefPosition = (knownSnpPositions != null && knownSnpPositions.isEmpty()) ? -1 : findSinglePolyploidCompressiblePosition(start, stop);
|
||||
|
||||
// Note that using the hetRefPosition protects us from trying to compress variant regions that are created by
|
||||
// insertions (which we don't want because we can't confirm that they represent the same allele).
|
||||
// Also, we only allow polyploid consensus creation at known sites.
|
||||
// Also, we only allow polyploid consensus creation at known sites if provided.
|
||||
if ( hetRefPosition != -1 && matchesKnownPosition(windowHeader.get(hetRefPosition).getLocation(), knownSnpPositions) ) {
|
||||
|
||||
// try to create the polyploid consensus
|
||||
final ObjectList<GATKSAMRecord> polyploidReads = createPolyploidConsensus(start, stop, hetRefPosition);
|
||||
|
||||
// if successful we are good to go!
|
||||
if ( polyploidReads != null ) {
|
||||
allReads.addAll(polyploidReads);
|
||||
successfullyCreatedPolyploidConsensus = true;
|
||||
}
|
||||
allReads.reads.addAll(createPolyploidConsensus(hetRefPosition));
|
||||
allReads.stopPerformed = hetRefPosition; // we stopped at the het position
|
||||
}
|
||||
|
||||
// if we can't create a polyploid consensus here, return all reads that overlap the variant region and remove them
|
||||
// from the window header entirely; also remove all reads preceding the variant region (since they will be output
|
||||
// as consensus right after compression)
|
||||
if ( !successfullyCreatedPolyploidConsensus ) {
|
||||
else {
|
||||
final int refStart = windowHeader.get(start).getLocation();
|
||||
final int refStop = windowHeader.get(stop).getLocation();
|
||||
|
||||
|
|
@ -676,7 +685,7 @@ public class SlidingWindow {
|
|||
for ( final GATKSAMRecord read : readsInWindow ) {
|
||||
if ( read.getSoftStart() <= refStop ) {
|
||||
if ( read.getAlignmentEnd() >= refStart ) {
|
||||
allReads.add(read);
|
||||
allReads.reads.add(read);
|
||||
removeFromHeader(windowHeader, read);
|
||||
}
|
||||
toRemove.add(read);
|
||||
|
|
@ -687,6 +696,7 @@ public class SlidingWindow {
|
|||
for ( final GATKSAMRecord read : toRemove )
|
||||
readsInWindow.remove(read);
|
||||
}
|
||||
|
||||
return allReads;
|
||||
}
|
||||
|
||||
|
|
@ -694,13 +704,13 @@ public class SlidingWindow {
|
|||
* Determines whether the given position match one of the known sites
|
||||
*
|
||||
* @param targetPosition the position of the het site
|
||||
* @param knownSnpPositions the set of known SNPs used to determine whether to allow polyploid consensus creation here
|
||||
* @param knownSnpPositions the set of known SNPs used to determine whether to allow polyploid consensus creation here; can be null (to allow polyploid consensus anywhere)
|
||||
* @return true if the targetPosition matches a known SNP position, false otherwise
|
||||
*/
|
||||
@Requires({"targetPosition >= 1 && knownSnpPositions != null"})
|
||||
protected boolean matchesKnownPosition(final int targetPosition, final ObjectSortedSet<GenomeLoc> knownSnpPositions) {
|
||||
final GenomeLoc targetLoc = new UnvalidatingGenomeLoc(contig, contigIndex, targetPosition, targetPosition);
|
||||
return knownSnpPositions.contains(targetLoc);
|
||||
return knownSnpPositions == null || knownSnpPositions.contains(targetLoc);
|
||||
}
|
||||
|
||||
/*
|
||||
|
|
@ -716,7 +726,7 @@ public class SlidingWindow {
|
|||
|
||||
for ( int i = start; i <= stop; i++ ) {
|
||||
|
||||
final int nAlleles = windowHeader.get(i).getNumberOfAlleles(MIN_ALT_BASE_PROPORTION_TO_TRIGGER_VARIANT, false);
|
||||
final int nAlleles = windowHeader.get(i).getNumberOfBaseAlleles(MIN_ALT_PVALUE_TO_TRIGGER_VARIANT);
|
||||
|
||||
// we will only work on diploid non-indel cases because we just don't want to handle/test other scenarios
|
||||
if ( nAlleles > 2 || nAlleles == -1 )
|
||||
|
|
@ -736,21 +746,22 @@ public class SlidingWindow {
|
|||
}
|
||||
|
||||
/*
|
||||
* Checks whether there's a position in the header with a significant number of softclips.
|
||||
* Checks whether there's a position in the header with a significant number of softclips or a variant.
|
||||
*
|
||||
* @param header the window header to examine
|
||||
* @param positionToSkip the global position to skip in the examination (use negative number if you don't want to make use of this argument)
|
||||
* @return true if there exists a position with significant softclips, false otherwise
|
||||
*/
|
||||
@Requires("header != null")
|
||||
protected boolean hasSignificantSoftclipPosition(final List<HeaderElement> header, final int positionToSkip) {
|
||||
protected boolean hasPositionWithSignificantSoftclipsOrVariant(final List<HeaderElement> header, final int positionToSkip) {
|
||||
|
||||
for ( final HeaderElement headerElement : header ) {
|
||||
|
||||
if ( headerElement.getLocation() == positionToSkip )
|
||||
continue;
|
||||
|
||||
if ( headerElement.hasSignificantSoftclips(MIN_ALT_BASE_PROPORTION_TO_TRIGGER_VARIANT) )
|
||||
if ( headerElement.hasSignificantSoftclips(MIN_ALT_PVALUE_TO_TRIGGER_VARIANT) ||
|
||||
headerElement.getNumberOfBaseAlleles(MIN_ALT_PVALUE_TO_TRIGGER_VARIANT) > 1 )
|
||||
return true;
|
||||
}
|
||||
|
||||
|
|
@ -762,29 +773,45 @@ public class SlidingWindow {
|
|||
*
|
||||
* @param start the first window header index in the variant region (inclusive)
|
||||
* @param stop the last window header index of the variant region (inclusive)
|
||||
* @param knownSnpPositions the set of known SNPs used to determine whether to allow polyploid consensus creation here
|
||||
* @return a non-null list of all reads contained in the variant region plus any adjacent synthetic reads
|
||||
* @param knownSnpPositions the set of known SNPs used to determine whether to allow polyploid consensus creation here; can be null (to allow polyploid consensus anywhere)
|
||||
* @return a non-null object representing all reads contained in the variant region plus any adjacent synthetic reads
|
||||
*/
|
||||
@Requires({"start >= 0 && (stop >= start || stop == 0)"})
|
||||
@Ensures("result != null")
|
||||
protected ObjectList<GATKSAMRecord> closeVariantRegion(final int start, final int stop, final ObjectSortedSet<GenomeLoc> knownSnpPositions) {
|
||||
ObjectList<GATKSAMRecord> allReads = compressVariantRegion(start, stop, knownSnpPositions);
|
||||
protected CloseVariantRegionResult closeVariantRegion(final int start, final int stop, final ObjectSortedSet<GenomeLoc> knownSnpPositions) {
|
||||
final CloseVariantRegionResult allReads = compressVariantRegion(start, stop, knownSnpPositions);
|
||||
|
||||
ObjectList<GATKSAMRecord> result = (downsampleCoverage > 0) ? downsampleVariantRegion(allReads) : allReads;
|
||||
result.addAll(addToSyntheticReads(windowHeader, 0, stop+1, SyntheticRead.StrandType.STRANDLESS));
|
||||
result.addAll(finalizeAndAdd(ConsensusType.BOTH));
|
||||
final CloseVariantRegionResult result = new CloseVariantRegionResult(allReads.stopPerformed);
|
||||
result.reads.addAll(downsampleCoverage > 0 ? downsampleVariantRegion(allReads.reads) : allReads.reads);
|
||||
result.reads.addAll(addToSyntheticReads(windowHeader, 0, allReads.stopPerformed + 1, SyntheticRead.StrandType.STRANDLESS));
|
||||
result.reads.addAll(finalizeAndAdd(ConsensusType.BOTH));
|
||||
|
||||
return result; // finalized reads will be downsampled if necessary
|
||||
}
|
||||
|
||||
/*
|
||||
* @see #closeVariantRegions(CompressionStash, ObjectSortedSet<GenomeLoc>, boolean) with forceCloseFullRegions set to false
|
||||
*/
|
||||
public ObjectSet<GATKSAMRecord> closeVariantRegions(final CompressionStash regions, final ObjectSortedSet<GenomeLoc> knownSnpPositions) {
|
||||
return closeVariantRegions(regions, knownSnpPositions, false);
|
||||
}
|
||||
|
||||
private static final class CloseVariantRegionResult {
|
||||
final private ObjectList<GATKSAMRecord> reads = new ObjectArrayList<GATKSAMRecord>();
|
||||
private int stopPerformed;
|
||||
|
||||
public CloseVariantRegionResult(final int stopPerformed) { this.stopPerformed = stopPerformed; }
|
||||
}
|
||||
|
||||
/*
|
||||
* Finalizes the list of regions requested (and any regions preceding them)
|
||||
*
|
||||
* @param regions the list of regions to finalize
|
||||
* @param knownSnpPositions the set of known SNP positions
|
||||
* @param knownSnpPositions the set of known SNP positions; can be null (to allow polyploid consensus anywhere)
|
||||
* @param forceCloseFullRegions if true, requires this method to make sure all regions are fully closed; otherwise, we may decide not to close up to the very end (e.g. during het compression)
|
||||
* @return a non-null set of reduced reads representing the finalized regions
|
||||
*/
|
||||
public ObjectSet<GATKSAMRecord> closeVariantRegions(final CompressionStash regions, final ObjectSortedSet<GenomeLoc> knownSnpPositions) {
|
||||
public ObjectSet<GATKSAMRecord> closeVariantRegions(final CompressionStash regions, final ObjectSortedSet<GenomeLoc> knownSnpPositions, final boolean forceCloseFullRegions) {
|
||||
final ObjectAVLTreeSet<GATKSAMRecord> allReads = new ObjectAVLTreeSet<GATKSAMRecord>(new AlignmentStartWithNoTiesComparator());
|
||||
if ( !regions.isEmpty() ) {
|
||||
|
||||
|
|
@ -794,9 +821,33 @@ public class SlidingWindow {
|
|||
for ( final GenomeLoc region : regions ) {
|
||||
if (((FinishedGenomeLoc)region).isFinished() && region.getContig().equals(contig) && region.getStart() >= windowHeaderStart && region.getStop() < windowHeaderStart + windowHeader.size()) {
|
||||
final int start = region.getStart() - windowHeaderStart;
|
||||
final int stop = region.getStop() - windowHeaderStart;
|
||||
int stop = region.getStop() - windowHeaderStart;
|
||||
|
||||
allReads.addAll(closeVariantRegion(start, stop, knownSnpPositions));
|
||||
CloseVariantRegionResult closeVariantRegionResult = closeVariantRegion(start, stop, knownSnpPositions);
|
||||
allReads.addAll(closeVariantRegionResult.reads);
|
||||
|
||||
// check whether we didn't close the whole region that was requested
|
||||
if ( stop > 0 && closeVariantRegionResult.stopPerformed < stop ) {
|
||||
// we should update the variant sites bitset because the context size's worth of bases after the variant position are no longer "variant"
|
||||
markRegionAs(closeVariantRegionResult.stopPerformed + 1, stop, false);
|
||||
|
||||
// if the calling method said that it didn't care then we are okay so update the stop
|
||||
if ( !forceCloseFullRegions ) {
|
||||
stop = closeVariantRegionResult.stopPerformed;
|
||||
}
|
||||
// otherwise, we need to forcibly push the stop that we originally requested
|
||||
else {
|
||||
while ( closeVariantRegionResult.stopPerformed < stop ) {
|
||||
// first clean up used header elements so they don't get reused
|
||||
for ( int i = 0; i <= closeVariantRegionResult.stopPerformed; i++ )
|
||||
windowHeader.remove();
|
||||
stop -= (closeVariantRegionResult.stopPerformed + 1);
|
||||
|
||||
closeVariantRegionResult = closeVariantRegion(0, stop, knownSnpPositions);
|
||||
allReads.addAll(closeVariantRegionResult.reads);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// We need to clean up the window header elements up until the end of the requested region so that they don't get used for future regions.
|
||||
// Note that this cleanup used to happen outside the above for-loop, but that was causing an occasional doubling of the reduced reads
|
||||
|
|
@ -847,7 +898,7 @@ public class SlidingWindow {
|
|||
* regions that still exist regardless of being able to fulfill the
|
||||
* context size requirement in the end.
|
||||
*
|
||||
* @param knownSnpPositions the set of known SNP positions
|
||||
* @param knownSnpPositions the set of known SNP positions; can be null (to allow polyploid consensus anywhere)
|
||||
* @return A non-null set/list of all reads generated
|
||||
*/
|
||||
@Ensures("result != null")
|
||||
|
|
@ -855,12 +906,11 @@ public class SlidingWindow {
|
|||
// mark variant regions
|
||||
ObjectSet<GATKSAMRecord> finalizedReads = new ObjectAVLTreeSet<GATKSAMRecord>(new AlignmentStartWithNoTiesComparator());
|
||||
CompressionStash regions = new CompressionStash();
|
||||
boolean forceCloseUnfinishedRegions = true;
|
||||
|
||||
if (!windowHeader.isEmpty()) {
|
||||
markSites(getStopLocation(windowHeader) + 1);
|
||||
regions = findVariantRegions(0, windowHeader.size(), markedSites.getVariantSiteBitSet(), forceCloseUnfinishedRegions);
|
||||
finalizedReads = closeVariantRegions(regions, knownSnpPositions);
|
||||
regions = findVariantRegions(0, windowHeader.size(), markedSites.getVariantSiteBitSet(), true);
|
||||
finalizedReads = closeVariantRegions(regions, knownSnpPositions, true);
|
||||
|
||||
if (!windowHeader.isEmpty()) {
|
||||
finalizedReads.addAll(addToSyntheticReads(windowHeader, 0, windowHeader.size(), SyntheticRead.StrandType.STRANDLESS));
|
||||
|
|
@ -908,117 +958,105 @@ public class SlidingWindow {
|
|||
}
|
||||
|
||||
// define this so that we can use Java generics below
|
||||
private static class HeaderElementList extends LinkedList<HeaderElement> {}
|
||||
private final static class HeaderElementList extends LinkedList<HeaderElement> {}
|
||||
|
||||
private final static class SingleStrandConsensusData {
|
||||
final HeaderElementList consensus = new HeaderElementList();
|
||||
final ObjectList<GATKSAMRecord> reads = new ObjectArrayList<GATKSAMRecord>();
|
||||
}
|
||||
|
||||
/**
|
||||
* Finalizes a variant region for point mutations, and any adjacent synthetic reads. Indel sites are not supported.
|
||||
* Finalizes a variant region - and any adjacent synthetic reads - for point mutations (indel sites are not
|
||||
* supported) with polyploid compression.
|
||||
*
|
||||
* @param start the first window header index of the variant region (inclusive)
|
||||
* @param stop the last window header index of the variant region (inclusive)
|
||||
* @param hetRefPosition window header index of the het site; MUST NOT BE AN INDEL SITE!
|
||||
* @return a list of all reads contained in the variant region as a polyploid consensus, or null if not possible
|
||||
* @return a non-null list of all reads contained in the variant region as a polyploid consensus
|
||||
*/
|
||||
@Requires({"start >= 0 && (stop >= start || stop == 0)"})
|
||||
protected ObjectList<GATKSAMRecord> createPolyploidConsensus(final int start, final int stop, final int hetRefPosition) {
|
||||
@Ensures({"result != null"})
|
||||
protected ObjectList<GATKSAMRecord> createPolyploidConsensus(final int hetRefPosition) {
|
||||
// we will create two (positive strand, negative strand) headers for each haplotype
|
||||
final HeaderElementList[] headersPosStrand = new HeaderElementList[2];
|
||||
final HeaderElementList[] headersNegStrand = new HeaderElementList[2];
|
||||
final SingleStrandConsensusData[] headersPosStrand = new SingleStrandConsensusData[2];
|
||||
final SingleStrandConsensusData[] headersNegStrand = new SingleStrandConsensusData[2];
|
||||
|
||||
final int refStart = windowHeader.get(start).getLocation();
|
||||
final int refStop = windowHeader.get(stop).getLocation();
|
||||
final int globalHetRefPosition = windowHeader.get(hetRefPosition).getLocation();
|
||||
|
||||
// initialize the mapping from base (allele) to header
|
||||
final Byte2IntMap alleleHeaderMap = new Byte2IntArrayMap(2);
|
||||
for ( final BaseIndex allele : windowHeader.get(hetRefPosition).getAlleles(MIN_ALT_BASE_PROPORTION_TO_TRIGGER_VARIANT, false) ) {
|
||||
for ( final BaseIndex allele : windowHeader.get(hetRefPosition).getAlleles(MIN_ALT_PVALUE_TO_TRIGGER_VARIANT) ) {
|
||||
final int currentIndex = alleleHeaderMap.size();
|
||||
if ( currentIndex > 1 )
|
||||
throw new IllegalStateException("There are more than 2 alleles present when creating a diploid consensus");
|
||||
|
||||
alleleHeaderMap.put(allele.b, currentIndex);
|
||||
headersPosStrand[currentIndex] = new HeaderElementList();
|
||||
headersNegStrand[currentIndex] = new HeaderElementList();
|
||||
headersPosStrand[currentIndex] = new SingleStrandConsensusData();
|
||||
headersNegStrand[currentIndex] = new SingleStrandConsensusData();
|
||||
}
|
||||
|
||||
// sanity check that we saw 2 alleles
|
||||
if ( alleleHeaderMap.size() != 2 )
|
||||
throw new IllegalStateException("We expected to see 2 alleles when creating a diploid consensus but saw " + alleleHeaderMap.size());
|
||||
|
||||
final ObjectList<GATKSAMRecord> toRemoveFromReadCache = new ObjectArrayList<GATKSAMRecord>();
|
||||
final ObjectList<GATKSAMRecord> toRemoveFromHeader = new ObjectArrayList<GATKSAMRecord>();
|
||||
final ObjectList<GATKSAMRecord> readsToRemoveFromHeader = new ObjectArrayList<GATKSAMRecord>();
|
||||
|
||||
for ( final GATKSAMRecord read : readsInWindow ) {
|
||||
|
||||
// if the read falls after the region, just skip it for now (we'll get to it later)
|
||||
if ( read.getSoftStart() > refStop )
|
||||
// if the read falls after the het position, just skip it for now (we'll get to it later)
|
||||
if ( read.getSoftStart() > globalHetRefPosition )
|
||||
continue;
|
||||
|
||||
// if the read falls before the region, remove it
|
||||
if ( read.getSoftEnd() < refStart ) {
|
||||
toRemoveFromReadCache.add(read);
|
||||
continue;
|
||||
}
|
||||
|
||||
// check whether the read spans the het site
|
||||
if ( read.getSoftStart() <= globalHetRefPosition && read.getSoftEnd() >= globalHetRefPosition ) {
|
||||
|
||||
// make sure it meets the minimum mapping quality requirement (if not, we'll remove it and not use it for the consensuses)
|
||||
if ( read.getMappingQuality() >= MIN_MAPPING_QUALITY ) {
|
||||
|
||||
// where on the read is the het position?
|
||||
final int readPosOfHet = ReadUtils.getReadCoordinateForReferenceCoordinate(read, globalHetRefPosition, ReadUtils.ClippingTail.LEFT_TAIL);
|
||||
|
||||
// this is safe because indels are not supported
|
||||
final byte base = read.getReadBases()[readPosOfHet];
|
||||
final byte qual = read.getBaseQualities(EventType.BASE_SUBSTITUTION)[readPosOfHet];
|
||||
|
||||
// make sure that the base passes filters (if not, we'll remove it and not use it for the consensuses)
|
||||
if ( qual >= MIN_BASE_QUAL_TO_COUNT ) {
|
||||
|
||||
// check which allele this read represents
|
||||
final Integer allele = alleleHeaderMap.get(base);
|
||||
|
||||
// ignore the read if it represents a base that's not part of the consensus
|
||||
if ( allele != null ) {
|
||||
// add to the appropriate polyploid header
|
||||
final LinkedList<HeaderElement> header = read.getReadNegativeStrandFlag() ? headersNegStrand[allele] : headersPosStrand[allele];
|
||||
addToHeader(header, read);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// remove from the standard header so that we don't double count it
|
||||
toRemoveFromHeader.add(read);
|
||||
}
|
||||
|
||||
// we remove all reads falling inside the variant region from the window
|
||||
toRemoveFromReadCache.add(read);
|
||||
}
|
||||
|
||||
// sanity check that no new "variant region" exists on just a single consensus strand
|
||||
// due to softclips now that we've broken everything out into their component parts
|
||||
for ( final LinkedList<HeaderElement> header : headersPosStrand ) {
|
||||
if ( hasSignificantSoftclipPosition(header, globalHetRefPosition) )
|
||||
return null;
|
||||
}
|
||||
for ( final LinkedList<HeaderElement> header : headersNegStrand ) {
|
||||
if ( hasSignificantSoftclipPosition(header, globalHetRefPosition) )
|
||||
return null;
|
||||
}
|
||||
|
||||
// create the polyploid synthetic reads
|
||||
final ObjectList<GATKSAMRecord> hetReads = new ObjectArrayList<GATKSAMRecord>();
|
||||
for ( final LinkedList<HeaderElement> header : headersPosStrand )
|
||||
finalizeHetConsensus(header, false, hetReads);
|
||||
for ( final LinkedList<HeaderElement> header : headersNegStrand )
|
||||
finalizeHetConsensus(header, true, hetReads);
|
||||
|
||||
// remove all used reads
|
||||
for ( final GATKSAMRecord read : toRemoveFromReadCache )
|
||||
// remove all other reads from the read cache since we're going to use them here
|
||||
readsInWindow.remove(read);
|
||||
for ( final GATKSAMRecord read : toRemoveFromHeader )
|
||||
|
||||
// if the read falls before the het position, we don't need to look at it
|
||||
if ( read.getSoftEnd() < globalHetRefPosition )
|
||||
continue;
|
||||
|
||||
// remove all spanning reads from the consensus header since we're going to incorporate them into a consensus here instead
|
||||
removeFromHeader(windowHeader, read);
|
||||
|
||||
// make sure it meets the minimum mapping quality requirement (if not, we won't use it for the consensus)
|
||||
if ( read.getMappingQuality() >= MIN_MAPPING_QUALITY ) {
|
||||
|
||||
// where on the read is the het position?
|
||||
final int readPosOfHet = ReadUtils.getReadCoordinateForReferenceCoordinate(read, globalHetRefPosition, ReadUtils.ClippingTail.LEFT_TAIL);
|
||||
|
||||
// this is safe because indels are not supported
|
||||
final byte base = read.getReadBases()[readPosOfHet];
|
||||
final byte qual = read.getBaseQualities(EventType.BASE_SUBSTITUTION)[readPosOfHet];
|
||||
|
||||
// check which allele this read represents
|
||||
final Integer allele = alleleHeaderMap.get(base);
|
||||
|
||||
// ignore the read if it represents a base that's not part of the consensus
|
||||
if ( allele != null ) {
|
||||
// add to the appropriate polyploid header
|
||||
final SingleStrandConsensusData header = read.getReadNegativeStrandFlag() ? headersNegStrand[allele] : headersPosStrand[allele];
|
||||
header.reads.add(read);
|
||||
addToHeader(header.consensus, read);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// create the polyploid synthetic reads if we can
|
||||
final ObjectList<GATKSAMRecord> hetReads = new ObjectArrayList<GATKSAMRecord>();
|
||||
|
||||
// sanity check that no new "variant region" exists on just a single consensus strand due to softclips
|
||||
// or multi-allelic sites now that we've broken everything out into their component parts. if one does
|
||||
// exist then we need to back out the consensus for that strand only.
|
||||
for ( final SingleStrandConsensusData header : headersPosStrand ) {
|
||||
if ( hasPositionWithSignificantSoftclipsOrVariant(header.consensus, globalHetRefPosition) )
|
||||
hetReads.addAll(header.reads);
|
||||
else
|
||||
finalizeHetConsensus(header.consensus, false, hetReads);
|
||||
}
|
||||
for ( final SingleStrandConsensusData header : headersNegStrand ) {
|
||||
if ( hasPositionWithSignificantSoftclipsOrVariant(header.consensus, globalHetRefPosition) )
|
||||
hetReads.addAll(header.reads);
|
||||
else
|
||||
finalizeHetConsensus(header.consensus, true, hetReads);
|
||||
}
|
||||
|
||||
return hetReads;
|
||||
}
|
||||
|
||||
|
|
@ -1058,7 +1096,7 @@ public class SlidingWindow {
|
|||
int locationIndex = headerStart < 0 ? 0 : readStart - headerStart;
|
||||
|
||||
if ( removeRead && locationIndex < 0 )
|
||||
throw new ReviewedStingException("Provided read is behind the Sliding Window! Read = " + read + ", readStart = " + readStart + ", cigar = " + read.getCigarString() + ", window = " + headerStart + "-" + getStopLocation(header));
|
||||
throw new IllegalStateException("Provided read is behind the Sliding Window! Read = " + read + ", readStart = " + readStart + ", cigar = " + read.getCigarString() + ", window = " + headerStart + "-" + getStopLocation(header));
|
||||
|
||||
// we only need to create new header elements if we are adding the read, not when we're removing it
|
||||
if ( !removeRead )
|
||||
|
|
@ -1138,6 +1176,8 @@ public class SlidingWindow {
|
|||
case H:
|
||||
break;
|
||||
case I:
|
||||
readBaseIndex += cigarElement.getLength();
|
||||
|
||||
// special case, if we are removing a read that starts in insertion and we don't have the previous header element anymore, don't worry about it.
|
||||
if ( removeRead && locationIndex == 0 )
|
||||
break;
|
||||
|
|
@ -1150,7 +1190,6 @@ public class SlidingWindow {
|
|||
else
|
||||
headerElement.addInsertionToTheRight();
|
||||
|
||||
readBaseIndex += cigarElement.getLength();
|
||||
break;
|
||||
case D:
|
||||
// deletions are added to the baseCounts with the read mapping quality as it's quality score
|
||||
|
|
|
|||
|
|
@ -610,20 +610,8 @@ public class UnifiedGenotyperEngine {
|
|||
return stratifiedContexts;
|
||||
}
|
||||
|
||||
private final static double[] binomialProbabilityDepthCache = new double[10000];
|
||||
private final static double REF_BINOMIAL_PROB_LOG10_0_5 = Math.log10(0.5);
|
||||
|
||||
static {
|
||||
for ( int i = 1; i < binomialProbabilityDepthCache.length; i++ ) {
|
||||
binomialProbabilityDepthCache[i] = MathUtils.log10BinomialProbability(i, 0, REF_BINOMIAL_PROB_LOG10_0_5);
|
||||
}
|
||||
}
|
||||
|
||||
private final double getRefBinomialProbLog10(final int depth) {
|
||||
if ( depth < binomialProbabilityDepthCache.length )
|
||||
return binomialProbabilityDepthCache[depth];
|
||||
else
|
||||
return MathUtils.log10BinomialProbability(depth, 0, REF_BINOMIAL_PROB_LOG10_0_5);
|
||||
return MathUtils.log10BinomialProbability(depth, 0);
|
||||
}
|
||||
|
||||
private VariantCallContext estimateReferenceConfidence(VariantContext vc, Map<String, AlignmentContext> contexts, double theta, boolean ignoreCoveredSamples, double initialPofRef) {
|
||||
|
|
|
|||
|
|
@ -55,6 +55,7 @@ import org.broadinstitute.sting.gatk.walkers.LocusWalker;
|
|||
import org.broadinstitute.sting.gatk.walkers.TreeReducible;
|
||||
import org.broadinstitute.sting.utils.GenomeLoc;
|
||||
import org.broadinstitute.sting.utils.MathUtils;
|
||||
import org.broadinstitute.sting.utils.QualityUtils;
|
||||
import org.broadinstitute.sting.utils.exceptions.UserException;
|
||||
import org.broadinstitute.sting.utils.pileup.PileupElement;
|
||||
import org.broadinstitute.sting.utils.pileup.ReadBackedPileup;
|
||||
|
|
@ -99,7 +100,7 @@ public class AssessReducedQuals extends LocusWalker<GenomeLoc, GenomeLoc> implem
|
|||
public int sufficientQualSum = 600;
|
||||
|
||||
@Argument(fullName = "qual_epsilon", shortName = "epsilon", doc = "when |Quals_reduced_bam - Quals_original_bam| > (epsilon * Quals_original_bam) we output this interval", required = false)
|
||||
public double qual_epsilon = 0.25;
|
||||
public double qual_epsilon = 0.10;
|
||||
|
||||
@Output
|
||||
protected PrintStream out;
|
||||
|
|
@ -145,7 +146,7 @@ public class AssessReducedQuals extends LocusWalker<GenomeLoc, GenomeLoc> implem
|
|||
}
|
||||
|
||||
private boolean isGoodRead(final PileupElement p) {
|
||||
return !p.isDeletion() && (int)p.getQual() >= 20 && p.getMappingQual() >= 20;
|
||||
return !p.isDeletion() && (int)p.getQual() >= 15 && p.getMappingQual() >= 20;
|
||||
}
|
||||
|
||||
private int getTagIndex(final List<String> tags) {
|
||||
|
|
|
|||
|
|
@ -179,7 +179,7 @@ public class BaseCountsUnitTest extends BaseTest {
|
|||
BaseCounts counts = new BaseCounts();
|
||||
|
||||
for ( int qual : test.quals )
|
||||
counts.incr(BaseIndex.A, (byte)qual);
|
||||
counts.incr(BaseIndex.A, (byte)qual, 20);
|
||||
|
||||
final int actualSum = (int)counts.getSumQuals((byte)'A');
|
||||
final int expectedSum = qualSum(test.quals);
|
||||
|
|
|
|||
|
|
@ -48,12 +48,12 @@ package org.broadinstitute.sting.gatk.walkers.compression.reducereads;
|
|||
|
||||
|
||||
import org.broadinstitute.sting.BaseTest;
|
||||
import org.broadinstitute.sting.utils.MathUtils;
|
||||
import org.testng.Assert;
|
||||
import org.testng.annotations.DataProvider;
|
||||
import org.testng.annotations.Test;
|
||||
|
||||
import java.util.ArrayList;
|
||||
import java.util.Arrays;
|
||||
import java.util.List;
|
||||
|
||||
public class HeaderElementUnitTest extends BaseTest {
|
||||
|
|
@ -119,16 +119,15 @@ public class HeaderElementUnitTest extends BaseTest {
|
|||
Assert.assertFalse(headerElement.hasFilteredData());
|
||||
Assert.assertFalse(headerElement.hasInsertionToTheRight());
|
||||
Assert.assertTrue(headerElement.isEmpty());
|
||||
Assert.assertEquals(headerElement.getRMS(), 0.0);
|
||||
}
|
||||
|
||||
private void testHeaderData(final HeaderElement headerElement, final HETest test) {
|
||||
Assert.assertEquals(headerElement.getRMS(), (double)test.MQ);
|
||||
Assert.assertEquals(headerElement.isVariantFromSoftClips(), test.isClip);
|
||||
Assert.assertFalse(headerElement.isEmpty());
|
||||
Assert.assertFalse(headerElement.hasInsertionToTheRight());
|
||||
Assert.assertEquals(headerElement.hasConsensusData(), headerElement.basePassesFilters(test.baseQual, minBaseQual, test.MQ, minMappingQual));
|
||||
Assert.assertEquals(headerElement.hasFilteredData(), !headerElement.basePassesFilters(test.baseQual, minBaseQual, test.MQ, minMappingQual));
|
||||
Assert.assertEquals(headerElement.hasConsensusData(), test.MQ >= minMappingQual);
|
||||
Assert.assertEquals(headerElement.hasFilteredData(), test.MQ < minMappingQual);
|
||||
Assert.assertEquals(headerElement.hasConsensusData() ? headerElement.getConsensusBaseCounts().getRMS() : headerElement.getFilteredBaseCounts().getRMS(), (double)test.MQ);
|
||||
Assert.assertFalse(headerElement.isVariantFromMismatches(0.05));
|
||||
Assert.assertEquals(headerElement.isVariant(0.05, 0.05), test.isClip);
|
||||
}
|
||||
|
|
@ -136,13 +135,11 @@ public class HeaderElementUnitTest extends BaseTest {
|
|||
|
||||
private class AllelesTest {
|
||||
public final int[] counts;
|
||||
public final double proportion;
|
||||
public final boolean allowDeletions;
|
||||
public final double pvalue;
|
||||
|
||||
private AllelesTest(final int[] counts, final double proportion, final boolean allowDeletions) {
|
||||
private AllelesTest(final int[] counts, final double pvalue) {
|
||||
this.counts = counts;
|
||||
this.proportion = proportion;
|
||||
this.allowDeletions = allowDeletions;
|
||||
this.pvalue = pvalue;
|
||||
}
|
||||
}
|
||||
|
||||
|
|
@ -151,17 +148,15 @@ public class HeaderElementUnitTest extends BaseTest {
|
|||
List<Object[]> tests = new ArrayList<Object[]>();
|
||||
|
||||
final int[] counts = new int[]{ 0, 5, 10, 15, 20 };
|
||||
final double [] proportions = new double[]{ 0.0, 0.05, 0.10, 0.50, 1.0 };
|
||||
final double [] pvalues = new double[]{ 0.0, 0.01, 0.05, 0.20, 1.0 };
|
||||
|
||||
for ( final int countA : counts ) {
|
||||
for ( final int countC : counts ) {
|
||||
for ( final int countG : counts ) {
|
||||
for ( final int countT : counts ) {
|
||||
for ( final int countD : counts ) {
|
||||
for ( final double proportion : proportions ) {
|
||||
for ( final boolean allowDeletions : Arrays.asList(true, false) ) {
|
||||
tests.add(new Object[]{new AllelesTest(new int[]{countA, countC, countG, countT, countD}, proportion, allowDeletions)});
|
||||
}
|
||||
for ( final double pvalue : pvalues ) {
|
||||
tests.add(new Object[]{new AllelesTest(new int[]{countA, countC, countG, countT, countD}, pvalue)});
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
@ -182,28 +177,33 @@ public class HeaderElementUnitTest extends BaseTest {
|
|||
headerElement.addBase(base.b, byte20, byte10, byte10, byte20, minBaseQual, minMappingQual, false);
|
||||
}
|
||||
|
||||
final int nAllelesSeen = headerElement.getNumberOfAlleles(test.proportion, test.allowDeletions);
|
||||
final int nAllelesExpected = calculateExpectedAlleles(test.counts, test.proportion, test.allowDeletions);
|
||||
final int nAllelesSeen = headerElement.getNumberOfBaseAlleles(test.pvalue);
|
||||
final int nAllelesExpected = calculateExpectedAlleles(test.counts, test.pvalue);
|
||||
|
||||
Assert.assertEquals(nAllelesSeen, nAllelesExpected);
|
||||
}
|
||||
|
||||
private static int calculateExpectedAlleles(final int[] counts, final double proportion, final boolean allowDeletions) {
|
||||
double total = 0.0;
|
||||
private static int calculateExpectedAlleles(final int[] counts, final double targetPvalue) {
|
||||
int total = 0;
|
||||
for ( final int count : counts ) {
|
||||
total += count;
|
||||
}
|
||||
|
||||
final int minCount = Math.max(1, (int)(proportion * total));
|
||||
|
||||
if ( !allowDeletions && counts[BaseIndex.D.index] >= minCount )
|
||||
return -1;
|
||||
|
||||
int result = 0;
|
||||
for ( final int count : counts ) {
|
||||
if ( count > 0 && count >= minCount )
|
||||
for ( int index = 0; index < counts.length; index++ ) {
|
||||
final int count = counts[index];
|
||||
if ( count == 0 )
|
||||
continue;
|
||||
|
||||
final double pvalue = MathUtils.binomialCumulativeProbability(total, 0, count);
|
||||
|
||||
if ( pvalue > targetPvalue ) {
|
||||
if ( index == BaseIndex.D.index )
|
||||
return -1;
|
||||
result++;
|
||||
}
|
||||
}
|
||||
|
||||
return result;
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -48,6 +48,7 @@ package org.broadinstitute.sting.gatk.walkers.compression.reducereads;
|
|||
|
||||
import org.broadinstitute.sting.WalkerTest;
|
||||
import org.broadinstitute.sting.utils.collections.Pair;
|
||||
import org.broadinstitute.sting.utils.exceptions.UserException;
|
||||
import org.testng.annotations.Test;
|
||||
|
||||
import java.io.File;
|
||||
|
|
@ -74,10 +75,10 @@ public class ReduceReadsIntegrationTest extends WalkerTest {
|
|||
final static String emptyFileMd5 = "d41d8cd98f00b204e9800998ecf8427e";
|
||||
|
||||
protected Pair<List<File>, List<String>> executeTest(final String name, final WalkerTestSpec spec) {
|
||||
return executeTest(name, spec, false);
|
||||
return executeTest(name, spec, emptyFileMd5);
|
||||
}
|
||||
|
||||
protected Pair<List<File>, List<String>> executeTest(final String name, final WalkerTestSpec spec, final boolean disableQualsTest) {
|
||||
protected Pair<List<File>, List<String>> executeTest(final String name, final WalkerTestSpec spec, final String qualsTestMD5) {
|
||||
final Pair<List<File>, List<String>> result = super.executeTest(name, spec);
|
||||
|
||||
// perform some Reduce Reads specific testing now
|
||||
|
|
@ -93,15 +94,13 @@ public class ReduceReadsIntegrationTest extends WalkerTest {
|
|||
reducedInputs.append(file.getAbsolutePath());
|
||||
}
|
||||
|
||||
// run the coverage test
|
||||
final String coverageCommand = createCommandLine("AssessReducedCoverage", originalArgs);
|
||||
super.executeTest(name + " : COVERAGE_TEST", new WalkerTestSpec(coverageCommand + reducedInputs.toString(), Arrays.asList(emptyFileMd5)));
|
||||
// the coverage test is a less stricter version of the quals test so we can safely ignore it for now
|
||||
//final String coverageCommand = createCommandLine("AssessReducedCoverage", originalArgs);
|
||||
//super.executeTest(name + " : COVERAGE_TEST", new WalkerTestSpec(coverageCommand + reducedInputs.toString(), Arrays.asList(emptyFileMd5)));
|
||||
|
||||
// run the quals test
|
||||
if ( !disableQualsTest ) {
|
||||
final String qualsCommand = createCommandLine("AssessReducedQuals", originalArgs);
|
||||
super.executeTest(name + " : QUALS_TEST", new WalkerTestSpec(qualsCommand + reducedInputs.toString(), Arrays.asList(emptyFileMd5)));
|
||||
}
|
||||
final String qualsCommand = createCommandLine("AssessReducedQuals", originalArgs);
|
||||
super.executeTest(name + " : QUALS_TEST", new WalkerTestSpec(qualsCommand + reducedInputs.toString(), Arrays.asList(qualsTestMD5)));
|
||||
}
|
||||
|
||||
return result;
|
||||
|
|
@ -147,62 +146,69 @@ public class ReduceReadsIntegrationTest extends WalkerTest {
|
|||
}
|
||||
|
||||
private void RRTest(final String testName, final String args, final String md5, final boolean useKnowns) {
|
||||
this.RRTest(testName, args, md5, useKnowns, false);
|
||||
this.RRTest(testName, args, md5, useKnowns, emptyFileMd5);
|
||||
}
|
||||
|
||||
private void RRTest(final String testName, final String args, final String md5, final boolean useKnowns, final boolean disableQualsTest) {
|
||||
private void RRTest(final String testName, final String args, final String md5, final boolean useKnowns, final String qualsTestMD5) {
|
||||
String base = String.format("-T ReduceReads -npt -R %s -I %s ", REF, BAM) + " -o %s" + (useKnowns ? " -known " + DBSNP : "") + " ";
|
||||
WalkerTestSpec spec = new WalkerTestSpec(base + args, Arrays.asList("bam"), Arrays.asList(md5));
|
||||
executeTest(testName, spec, disableQualsTest);
|
||||
executeTest(testName, spec, qualsTestMD5);
|
||||
}
|
||||
|
||||
@Test(enabled = true)
|
||||
public void testDefaultCompression() {
|
||||
RRTest("testDefaultCompression ", L, "538362abd504200800145720b23c98ce", false);
|
||||
RRTest("testDefaultCompression ", L, "62f8cdb85a424e42e9c56f36302d1dba", false);
|
||||
}
|
||||
|
||||
@Test(enabled = true)
|
||||
public void testDefaultCompressionWithKnowns() {
|
||||
RRTest("testDefaultCompressionWithKnowns ", L, "79cdbd997196957af63f46353cff710b", true);
|
||||
RRTest("testDefaultCompressionWithKnowns ", L, "874c0e0a54c3db67f5e9d7c0d45b7844", true);
|
||||
}
|
||||
|
||||
private final String intervals = "-L 20:10,100,000-10,100,500 -L 20:10,200,000-10,200,500 -L 20:10,300,000-10,300,500 -L 20:10,400,000-10,500,000 -L 20:10,500,050-10,500,060 -L 20:10,600,000-10,600,015 -L 20:10,700,000-10,700,110";
|
||||
|
||||
@Test(enabled = true)
|
||||
public void testMultipleIntervals() {
|
||||
RRTest("testMultipleIntervals ", intervals, "6733b25e87e3fce5753cf7936ccf934f", false);
|
||||
RRTest("testMultipleIntervals ", intervals, "2e849f8324b27af36bae8cb9b01722e6", false);
|
||||
}
|
||||
|
||||
@Test(enabled = true)
|
||||
public void testMultipleIntervalsWithKnowns() {
|
||||
RRTest("testMultipleIntervalsWithKnowns ", intervals, "99e2a79befc71eaadb4197c66a0d6df8", true);
|
||||
RRTest("testMultipleIntervalsWithKnowns ", intervals, "71bc2167cc6916288bd34dcf099feebc", true);
|
||||
}
|
||||
|
||||
final String highCompressionMD5 = "c83256fa2d6785d5188f50dd45c77e0f";
|
||||
|
||||
@Test(enabled = true)
|
||||
public void testHighCompression() {
|
||||
RRTest("testHighCompression ", " -cs 10 -minvar 0.3 -mindel 0.3 " + L, "e3b7e14655973c8950d7fec96321e483", false);
|
||||
RRTest("testHighCompression ", " -cs 10 -min_pvalue 0.3 -mindel 0.3 " + L, highCompressionMD5, false);
|
||||
}
|
||||
|
||||
@Test(enabled = true)
|
||||
public void testHighCompressionWithKnowns() {
|
||||
RRTest("testHighCompressionWithKnowns ", " -cs 10 -minvar 0.3 -mindel 0.3 " + L, "30a7ed079b3a41ed63e520260fa6afe3", true);
|
||||
RRTest("testHighCompressionWithKnowns ", " -cs 10 -min_pvalue 0.3 -mindel 0.3 " + L, highCompressionMD5, true);
|
||||
}
|
||||
|
||||
@Test(enabled = true)
|
||||
public void testLowCompression() {
|
||||
// too much downsampling for quals test
|
||||
RRTest("testLowCompression ", " -cs 30 -minvar 0.01 -mindel 0.01 -minmap 5 -minqual 5 " + L, "e4cedfcf45cb747e58a7e729eec56de2", false, true);
|
||||
RRTest("testLowCompression ", " -cs 30 -min_pvalue 0.001 -mindel 0.01 -minmap 5 -minqual 5 " + L, "a903558ef284381d74b0ad837deb19f6", false);
|
||||
}
|
||||
|
||||
@Test(enabled = true)
|
||||
public void testLowCompressionWithKnowns() {
|
||||
// too much downsampling for quals test
|
||||
RRTest("testLowCompressionWithKnowns ", " -cs 30 -minvar 0.01 -mindel 0.01 -minmap 5 -minqual 5 " + L, "e4cedfcf45cb747e58a7e729eec56de2", true, true);
|
||||
RRTest("testLowCompressionWithKnowns ", " -cs 30 -min_pvalue 0.001 -mindel 0.01 -minmap 5 -minqual 5 " + L, "a4c5aa158c6ebbc703134cbe2d48619c", true);
|
||||
}
|
||||
|
||||
@Test(enabled = true)
|
||||
public void testBadPvalueInput() {
|
||||
final String cmd = String.format("-T ReduceReads -npt -R %s -I %s ", REF, BAM) + "-o %s -min_pvalue -0.01";
|
||||
WalkerTestSpec spec = new WalkerTestSpec(cmd, 1, UserException.BadArgumentValue.class);
|
||||
executeTest("testBadPvalueInput", spec);
|
||||
}
|
||||
|
||||
@Test(enabled = true)
|
||||
public void testIndelCompression() {
|
||||
final String md5 = "f58ae2154e0e5716be0e850b7605856e";
|
||||
final String md5 = "56154baed62be07008d3684a0a4c0996";
|
||||
RRTest("testIndelCompression ", " -cs 50 -L 20:10,100,500-10,100,600 ", md5, false);
|
||||
RRTest("testIndelCompressionWithKnowns ", " -cs 50 -L 20:10,100,500-10,100,600 ", md5, true);
|
||||
}
|
||||
|
|
@ -210,28 +216,25 @@ public class ReduceReadsIntegrationTest extends WalkerTest {
|
|||
@Test(enabled = true)
|
||||
public void testFilteredDeletionCompression() {
|
||||
String base = String.format("-T ReduceReads -npt -R %s -I %s ", REF, DELETION_BAM) + " -o %s ";
|
||||
// don't use quals test here (there's one location with a weird layout that won't pass; signed off by EB)
|
||||
executeTest("testFilteredDeletionCompression", new WalkerTestSpec(base, Arrays.asList("bam"), Arrays.asList("bfe0693aea74634f1035a9bd11302517")), true);
|
||||
executeTest("testFilteredDeletionCompression", new WalkerTestSpec(base, Arrays.asList("bam"), Arrays.asList("d7655de41d90aecb716f79e32d53b2d1")));
|
||||
}
|
||||
|
||||
@Test(enabled = true)
|
||||
public void testCoReduction() {
|
||||
String base = String.format("-T ReduceReads %s -npt -R %s -I %s -I %s", COREDUCTION_L, REF, COREDUCTION_BAM_A, COREDUCTION_BAM_B) + " -o %s ";
|
||||
// don't use quals test here (there's one location with a weird layout that won't pass; signed off by EB)
|
||||
executeTest("testCoReduction", new WalkerTestSpec(base, Arrays.asList("bam"), Arrays.asList("930ec2e2c3b62bec7a2425a82c64f022")), true);
|
||||
executeTest("testCoReduction", new WalkerTestSpec(base, Arrays.asList("bam"), Arrays.asList("fa549ba96ca0ce5fbf3553ba173167e8")));
|
||||
}
|
||||
|
||||
@Test(enabled = true)
|
||||
public void testCoReductionWithKnowns() {
|
||||
String base = String.format("-T ReduceReads %s -npt -R %s -I %s -I %s -known %s", COREDUCTION_L, REF, COREDUCTION_BAM_A, COREDUCTION_BAM_B, DBSNP) + " -o %s ";
|
||||
// don't use quals test here (there's one location with a weird layout that won't pass; signed off by EB)
|
||||
executeTest("testCoReductionWithKnowns", new WalkerTestSpec(base, Arrays.asList("bam"), Arrays.asList("fe7c9fd35e50a828e0f38a7ae25b60a7")), true);
|
||||
executeTest("testCoReductionWithKnowns", new WalkerTestSpec(base, Arrays.asList("bam"), Arrays.asList("9edcf09b21a4ae8d9fc25222bcb0486b")));
|
||||
}
|
||||
|
||||
@Test(enabled = true)
|
||||
public void testInsertionsAtEdgeOfConsensus() {
|
||||
String base = String.format("-T ReduceReads -npt -R %s -I %s ", REF, INSERTIONS_AT_EDGE_OF_CONSENSUS_BAM) + " -o %s ";
|
||||
executeTest("testInsertionsAtEdgeOfConsensus", new WalkerTestSpec(base, Arrays.asList("bam"), Arrays.asList("b4445db7aeddaf2f1d86e1af0cdc74c8")));
|
||||
executeTest("testInsertionsAtEdgeOfConsensus", new WalkerTestSpec(base, Arrays.asList("bam"), Arrays.asList("27cc8f1a336b2d0a29855ceb8fc988b0")));
|
||||
}
|
||||
|
||||
/**
|
||||
|
|
@ -245,7 +248,7 @@ public class ReduceReadsIntegrationTest extends WalkerTest {
|
|||
@Test(enabled = true)
|
||||
public void testAddingReadAfterTailingTheStash() {
|
||||
String base = String.format("-T ReduceReads %s -npt -R %s -I %s", STASH_L, REF, STASH_BAM) + " -o %s ";
|
||||
executeTest("testAddingReadAfterTailingTheStash", new WalkerTestSpec(base, Arrays.asList("bam"), Arrays.asList("f118e83c394d21d901a24230379864fc")));
|
||||
executeTest("testAddingReadAfterTailingTheStash", new WalkerTestSpec(base, Arrays.asList("bam"), Arrays.asList("34baf99904b676d5f132d3791030ed0a")), "3eab32c215ba68e75efd5ab7e9f7a2e7");
|
||||
}
|
||||
|
||||
/**
|
||||
|
|
@ -256,7 +259,7 @@ public class ReduceReadsIntegrationTest extends WalkerTest {
|
|||
public void testDivideByZero() {
|
||||
String base = String.format("-T ReduceReads %s -npt -R %s -I %s", DIVIDEBYZERO_L, REF, DIVIDEBYZERO_BAM) + " -o %s ";
|
||||
// we expect to lose coverage due to the downsampling so don't run the systematic tests
|
||||
executeTestWithoutAdditionalRRTests("testDivideByZero", new WalkerTestSpec(base, Arrays.asList("bam"), Arrays.asList("bd5198a3e21034887b741faaaa3964bf")));
|
||||
executeTestWithoutAdditionalRRTests("testDivideByZero", new WalkerTestSpec(base, Arrays.asList("bam"), Arrays.asList("985c4f15a1d45267abb2f6790267930d")));
|
||||
}
|
||||
|
||||
/**
|
||||
|
|
@ -266,7 +269,7 @@ public class ReduceReadsIntegrationTest extends WalkerTest {
|
|||
@Test(enabled = true)
|
||||
public void testReadOffContig() {
|
||||
String base = String.format("-T ReduceReads -npt -R %s -I %s ", REF, OFFCONTIG_BAM) + " -o %s ";
|
||||
executeTest("testReadOffContig", new WalkerTestSpec(base, Arrays.asList("bam"), Arrays.asList("b4dc66445ddf5f467f67860bed023ef8")));
|
||||
executeTest("testReadOffContig", new WalkerTestSpec(base, Arrays.asList("bam"), Arrays.asList("388ef48791965d637e4bdb45d5d7cf01")));
|
||||
}
|
||||
|
||||
/**
|
||||
|
|
@ -276,8 +279,7 @@ public class ReduceReadsIntegrationTest extends WalkerTest {
|
|||
public void testPairedReadsInVariantRegion() {
|
||||
String base = String.format("-T ReduceReads -npt -R %s -I %s ", hg19Reference, BOTH_ENDS_OF_PAIR_IN_VARIANT_REGION_BAM) +
|
||||
" -o %s --downsample_coverage 250 -dcov 50 ";
|
||||
// don't use quals test here (there's one location with low quals that won't pass; signed off by EB)
|
||||
executeTest("testPairedReadsInVariantRegion", new WalkerTestSpec(base, Arrays.asList("bam"), Arrays.asList("9bed260b6245f5ff47db8541405504aa")), true);
|
||||
executeTest("testPairedReadsInVariantRegion", new WalkerTestSpec(base, Arrays.asList("bam"), Arrays.asList("cfa2588f5edf74c5ddf3d190f5ac6f2d")));
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -46,9 +46,7 @@
|
|||
|
||||
package org.broadinstitute.sting.gatk.walkers.compression.reducereads;
|
||||
|
||||
import it.unimi.dsi.fastutil.objects.Object2LongOpenHashMap;
|
||||
import it.unimi.dsi.fastutil.objects.ObjectArrayList;
|
||||
import it.unimi.dsi.fastutil.objects.ObjectOpenHashSet;
|
||||
import it.unimi.dsi.fastutil.objects.*;
|
||||
import net.sf.samtools.SAMFileHeader;
|
||||
import org.broad.tribble.Feature;
|
||||
import org.broadinstitute.sting.BaseTest;
|
||||
|
|
@ -58,6 +56,7 @@ import org.broadinstitute.sting.gatk.refdata.RODRecordListImpl;
|
|||
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
|
||||
import org.broadinstitute.sting.gatk.refdata.utils.GATKFeature;
|
||||
import org.broadinstitute.sting.gatk.refdata.utils.RODRecordList;
|
||||
import org.broadinstitute.sting.utils.GenomeLoc;
|
||||
import org.broadinstitute.sting.utils.GenomeLocParser;
|
||||
import org.broadinstitute.sting.utils.sam.ArtificialSAMUtils;
|
||||
import org.broadinstitute.sting.utils.sam.GATKSAMRecord;
|
||||
|
|
@ -198,6 +197,7 @@ public class ReduceReadsUnitTest extends BaseTest {
|
|||
final ReduceReads rr = new ReduceReads();
|
||||
RodBinding.resetNameCounter();
|
||||
rr.known = Arrays.<RodBinding<VariantContext>>asList(new RodBinding(VariantContext.class, "known"));
|
||||
rr.knownSnpPositions = new ObjectAVLTreeSet<GenomeLoc>();
|
||||
|
||||
final GenomeAnalysisEngine engine = new GenomeAnalysisEngine();
|
||||
engine.setGenomeLocParser(genomeLocParser);
|
||||
|
|
|
|||
|
|
@ -200,17 +200,16 @@ public class SlidingWindowUnitTest extends BaseTest {
|
|||
@Test(enabled = true)
|
||||
public void testMarkVariantRegion() {
|
||||
final SlidingWindow slidingWindow = new SlidingWindow("1", 0, globalStartPosition);
|
||||
SlidingWindow.MarkedSites markedSites = slidingWindow.new MarkedSites();
|
||||
markedSites.updateRegion(100, 100);
|
||||
slidingWindow.getMarkedSitesForTesting().updateRegion(100, 100);
|
||||
|
||||
slidingWindow.markVariantRegion(markedSites, 40);
|
||||
Assert.assertEquals(countTrueBits(markedSites.getVariantSiteBitSet()), 21);
|
||||
slidingWindow.markVariantRegion(40);
|
||||
Assert.assertEquals(countTrueBits(slidingWindow.getMarkedSitesForTesting().getVariantSiteBitSet()), 21);
|
||||
|
||||
slidingWindow.markVariantRegion(markedSites, 5);
|
||||
Assert.assertEquals(countTrueBits(markedSites.getVariantSiteBitSet()), 37);
|
||||
slidingWindow.markVariantRegion(5);
|
||||
Assert.assertEquals(countTrueBits(slidingWindow.getMarkedSitesForTesting().getVariantSiteBitSet()), 37);
|
||||
|
||||
slidingWindow.markVariantRegion(markedSites, 95);
|
||||
Assert.assertEquals(countTrueBits(markedSites.getVariantSiteBitSet()), 52);
|
||||
slidingWindow.markVariantRegion(95);
|
||||
Assert.assertEquals(countTrueBits(slidingWindow.getMarkedSitesForTesting().getVariantSiteBitSet()), 52);
|
||||
}
|
||||
|
||||
private static int countTrueBits(final boolean[] bitset) {
|
||||
|
|
@ -254,10 +253,12 @@ public class SlidingWindowUnitTest extends BaseTest {
|
|||
private class ConsensusCreationTest {
|
||||
public final int expectedNumberOfReads, expectedNumberOfReadsWithHetCompression;
|
||||
public final List<GATKSAMRecord> myReads = new ArrayList<GATKSAMRecord>(20);
|
||||
public final String description;
|
||||
|
||||
private ConsensusCreationTest(final List<GenomeLoc> locs, final boolean readsShouldBeLowQuality, final boolean variantBaseShouldBeLowQuality, final int expectedNumberOfReads, final int expectedNumberOfReadsWithHetCompression) {
|
||||
this.expectedNumberOfReads = expectedNumberOfReads;
|
||||
this.expectedNumberOfReadsWithHetCompression = expectedNumberOfReadsWithHetCompression;
|
||||
this.description = String.format("%d %d", expectedNumberOfReads, expectedNumberOfReadsWithHetCompression);
|
||||
|
||||
// first, add the basic reads to the collection
|
||||
myReads.addAll(basicReads);
|
||||
|
|
@ -270,6 +271,7 @@ public class SlidingWindowUnitTest extends BaseTest {
|
|||
private ConsensusCreationTest(final List<GenomeLoc> locs, final CigarOperator operator, final int expectedNumberOfReads, final int expectedNumberOfReadsWithHetCompression) {
|
||||
this.expectedNumberOfReads = expectedNumberOfReads;
|
||||
this.expectedNumberOfReadsWithHetCompression = expectedNumberOfReadsWithHetCompression;
|
||||
this.description = String.format("%s %d %d", operator.toString(), expectedNumberOfReads, expectedNumberOfReadsWithHetCompression);
|
||||
|
||||
// first, add the basic reads to the collection
|
||||
myReads.addAll(basicReads);
|
||||
|
|
@ -279,6 +281,8 @@ public class SlidingWindowUnitTest extends BaseTest {
|
|||
myReads.add(createVariantRead(loc, false, false, operator));
|
||||
}
|
||||
|
||||
public String toString() { return description; }
|
||||
|
||||
private GATKSAMRecord createVariantRead(final GenomeLoc loc, final boolean readShouldBeLowQuality,
|
||||
final boolean variantBaseShouldBeLowQuality, final CigarOperator operator) {
|
||||
|
||||
|
|
@ -315,7 +319,6 @@ public class SlidingWindowUnitTest extends BaseTest {
|
|||
private static final GenomeLoc loc295 = new UnvalidatingGenomeLoc("1", 0, 1000295, 1000295);
|
||||
private static final GenomeLoc loc309 = new UnvalidatingGenomeLoc("1", 0, 1000309, 1000309);
|
||||
private static final GenomeLoc loc310 = new UnvalidatingGenomeLoc("1", 0, 1000310, 1000310);
|
||||
private static final GenomeLoc loc312 = new UnvalidatingGenomeLoc("1", 0, 1000312, 1000312);
|
||||
private static final GenomeLoc loc1100 = new UnvalidatingGenomeLoc("1", 0, 1001100, 1001100);
|
||||
|
||||
@DataProvider(name = "ConsensusCreation")
|
||||
|
|
@ -328,7 +331,6 @@ public class SlidingWindowUnitTest extends BaseTest {
|
|||
tests.add(new Object[]{new ConsensusCreationTest(Arrays.<GenomeLoc>asList(loc290, loc295), false, false, 10, 10)});
|
||||
tests.add(new Object[]{new ConsensusCreationTest(Arrays.<GenomeLoc>asList(loc290, loc309), false, false, 10, 10)});
|
||||
tests.add(new Object[]{new ConsensusCreationTest(Arrays.<GenomeLoc>asList(loc290, loc310), false, false, 11, 11)});
|
||||
tests.add(new Object[]{new ConsensusCreationTest(Arrays.<GenomeLoc>asList(loc290, loc312), false, false, 11, 8)});
|
||||
|
||||
// test low quality reads
|
||||
tests.add(new Object[]{new ConsensusCreationTest(Arrays.<GenomeLoc>asList(), true, false, 1, 1)});
|
||||
|
|
@ -346,7 +348,7 @@ public class SlidingWindowUnitTest extends BaseTest {
|
|||
|
||||
// test mixture
|
||||
tests.add(new Object[]{new ConsensusCreationTest(Arrays.<GenomeLoc>asList(loc1100), true, false, 2, 2)});
|
||||
tests.add(new Object[]{new ConsensusCreationTest(Arrays.<GenomeLoc>asList(loc1100), false, true, 3, 3)});
|
||||
tests.add(new Object[]{new ConsensusCreationTest(Arrays.<GenomeLoc>asList(loc1100), false, true, 1, 1)});
|
||||
|
||||
// test I/D operators
|
||||
tests.add(new Object[]{new ConsensusCreationTest(Arrays.<GenomeLoc>asList(loc290), CigarOperator.D, 9, 9)});
|
||||
|
|
@ -370,17 +372,22 @@ public class SlidingWindowUnitTest extends BaseTest {
|
|||
for ( final GATKSAMRecord read : test.myReads )
|
||||
slidingWindow.addRead(read);
|
||||
Pair<ObjectSet<GATKSAMRecord>, CompressionStash> result = slidingWindow.close(knownSNPs); // currently empty
|
||||
|
||||
Assert.assertEquals(result.getFirst().size(), test.expectedNumberOfReads);
|
||||
|
||||
// test WITH het compression
|
||||
// test WITH het compression at KNOWN sites
|
||||
slidingWindow = new SlidingWindow("1", 0, 10, header, new GATKSAMReadGroupRecord("test"), 0, 0.05, 0.05, 20, 20, 100, ReduceReads.DownsampleStrategy.Normal, false);
|
||||
for ( final GATKSAMRecord read : test.myReads )
|
||||
slidingWindow.addRead(read);
|
||||
for ( int i = 0; i < 1200; i++ )
|
||||
knownSNPs.add(new UnvalidatingGenomeLoc("1", 0, globalStartPosition + i, globalStartPosition + i));
|
||||
result = slidingWindow.close(knownSNPs);
|
||||
Assert.assertEquals(result.getFirst().size(), test.expectedNumberOfReadsWithHetCompression);
|
||||
|
||||
// test WITH het compression at ALL sites
|
||||
slidingWindow = new SlidingWindow("1", 0, 10, header, new GATKSAMReadGroupRecord("test"), 0, 0.05, 0.05, 20, 20, 100, ReduceReads.DownsampleStrategy.Normal, false);
|
||||
for ( final GATKSAMRecord read : test.myReads )
|
||||
slidingWindow.addRead(read);
|
||||
result = slidingWindow.close(null);
|
||||
Assert.assertEquals(result.getFirst().size(), test.expectedNumberOfReadsWithHetCompression);
|
||||
}
|
||||
|
||||
|
|
@ -405,21 +412,26 @@ public class SlidingWindowUnitTest extends BaseTest {
|
|||
final ObjectAVLTreeSet<GenomeLoc> knownSNPs = new ObjectAVLTreeSet<GenomeLoc>();
|
||||
|
||||
// test WITHOUT het compression
|
||||
SlidingWindow slidingWindow = new SlidingWindow("1", 0, 10, header, new GATKSAMReadGroupRecord("test"), 0, 0.05, 0.05, 20, 20, 100, ReduceReads.DownsampleStrategy.Normal, false);
|
||||
SlidingWindow slidingWindow = new SlidingWindow("1", 0, 10, header, new GATKSAMReadGroupRecord("test"), 0, 0.1, 0.05, 20, 20, 100, ReduceReads.DownsampleStrategy.Normal, false);
|
||||
for ( final GATKSAMRecord read : myReads )
|
||||
slidingWindow.addRead(read);
|
||||
Pair<ObjectSet<GATKSAMRecord>, CompressionStash> result = slidingWindow.close(knownSNPs); // currently empty
|
||||
|
||||
Assert.assertEquals(result.getFirst().size(), totalNumReads); // no compression at all
|
||||
|
||||
// test WITH het compression
|
||||
slidingWindow = new SlidingWindow("1", 0, 10, header, new GATKSAMReadGroupRecord("test"), 0, 0.05, 0.05, 20, 20, 100, ReduceReads.DownsampleStrategy.Normal, false);
|
||||
// test WITH het compression at KNOWN sites
|
||||
slidingWindow = new SlidingWindow("1", 0, 10, header, new GATKSAMReadGroupRecord("test"), 0, 0.1, 0.05, 20, 20, 100, ReduceReads.DownsampleStrategy.Normal, false);
|
||||
for ( final GATKSAMRecord read : myReads )
|
||||
slidingWindow.addRead(read);
|
||||
for ( int i = 0; i < readLength; i++ )
|
||||
knownSNPs.add(new UnvalidatingGenomeLoc("1", 0, globalStartPosition + i, globalStartPosition + i));
|
||||
result = slidingWindow.close(knownSNPs);
|
||||
Assert.assertEquals(result.getFirst().size(), totalNumReads); // no compression at all
|
||||
|
||||
// test WITH het compression at ALL sites
|
||||
slidingWindow = new SlidingWindow("1", 0, 10, header, new GATKSAMReadGroupRecord("test"), 0, 0.1, 0.05, 20, 20, 100, ReduceReads.DownsampleStrategy.Normal, false);
|
||||
for ( final GATKSAMRecord read : myReads )
|
||||
slidingWindow.addRead(read);
|
||||
result = slidingWindow.close(knownSNPs);
|
||||
Assert.assertEquals(result.getFirst().size(), totalNumReads); // no compression at all
|
||||
}
|
||||
|
||||
|
|
@ -692,7 +704,7 @@ public class SlidingWindowUnitTest extends BaseTest {
|
|||
slidingWindow.actuallyUpdateHeaderForRead(windowHeader, softclippedRead, false, indexWithSoftclips);
|
||||
}
|
||||
|
||||
final boolean result = slidingWindow.hasSignificantSoftclipPosition(windowHeader, currentHeaderStart + indexToSkip);
|
||||
final boolean result = slidingWindow.hasPositionWithSignificantSoftclipsOrVariant(windowHeader, currentHeaderStart + indexToSkip);
|
||||
Assert.assertEquals(result, indexWithSoftclips != -1 && indexWithSoftclips != indexToSkip);
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -27,7 +27,6 @@ package org.broadinstitute.sting.utils;
|
|||
|
||||
import com.google.java.contract.Ensures;
|
||||
import com.google.java.contract.Requires;
|
||||
import net.sf.samtools.SAMRecord;
|
||||
import org.broadinstitute.sting.gatk.GenomeAnalysisEngine;
|
||||
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
|
||||
import org.broadinstitute.sting.utils.exceptions.UserException;
|
||||
|
|
@ -63,6 +62,7 @@ public class MathUtils {
|
|||
* where the real-space value is 0.0.
|
||||
*/
|
||||
public final static double LOG10_P_OF_ZERO = -1000000.0;
|
||||
public final static double FAIR_BINOMIAL_PROB_LOG10_0_5 = Math.log10(0.5);
|
||||
|
||||
static {
|
||||
log10Cache = new double[LOG10_CACHE_SIZE];
|
||||
|
|
@ -70,6 +70,7 @@ public class MathUtils {
|
|||
jacobianLogTable = new double[JACOBIAN_LOG_TABLE_SIZE];
|
||||
|
||||
log10Cache[0] = Double.NEGATIVE_INFINITY;
|
||||
log10FactorialCache[0] = 0.0;
|
||||
for (int k = 1; k < LOG10_CACHE_SIZE; k++) {
|
||||
log10Cache[k] = Math.log10(k);
|
||||
log10FactorialCache[k] = log10FactorialCache[k-1] + log10Cache[k];
|
||||
|
|
@ -306,10 +307,25 @@ public class MathUtils {
|
|||
return a * b;
|
||||
}
|
||||
|
||||
/**
|
||||
* Calculates the log10 of the binomial coefficient. Designed to prevent
|
||||
* overflows even with very large numbers.
|
||||
*
|
||||
* @param n total number of trials
|
||||
* @param k number of successes
|
||||
* @return the log10 of the binomial coefficient
|
||||
*/
|
||||
public static double binomialCoefficient(final int n, final int k) {
|
||||
return Math.pow(10, log10BinomialCoefficient(n, k));
|
||||
}
|
||||
|
||||
/**
|
||||
* @see #binomialCoefficient(int, int) with log10 applied to result
|
||||
*/
|
||||
public static double log10BinomialCoefficient(final int n, final int k) {
|
||||
return log10Factorial(n) - log10Factorial(k) - log10Factorial(n - k);
|
||||
}
|
||||
|
||||
/**
|
||||
* Computes a binomial probability. This is computed using the formula
|
||||
* <p/>
|
||||
|
|
@ -326,23 +342,48 @@ public class MathUtils {
|
|||
return Math.pow(10, log10BinomialProbability(n, k, Math.log10(p)));
|
||||
}
|
||||
|
||||
/**
|
||||
* @see #binomialProbability(int, int, double) with log10 applied to result
|
||||
*/
|
||||
public static double log10BinomialProbability(final int n, final int k, final double log10p) {
|
||||
double log10OneMinusP = Math.log10(1 - Math.pow(10, log10p));
|
||||
return log10BinomialCoefficient(n, k) + log10p * k + log10OneMinusP * (n - k);
|
||||
}
|
||||
|
||||
/**
|
||||
* @see #binomialProbability(int, int, double) with p=0.5
|
||||
*/
|
||||
public static double binomialProbability(final int n, final int k) {
|
||||
return Math.pow(10, log10BinomialProbability(n, k));
|
||||
}
|
||||
|
||||
/**
|
||||
* @see #binomialProbability(int, int, double) with p=0.5 and log10 applied to result
|
||||
*/
|
||||
public static double log10BinomialProbability(final int n, final int k) {
|
||||
return log10BinomialCoefficient(n, k) + (n * FAIR_BINOMIAL_PROB_LOG10_0_5);
|
||||
}
|
||||
|
||||
/**
|
||||
* Performs the cumulative sum of binomial probabilities, where the probability calculation is done in log space.
|
||||
* Assumes that the probability of a successful hit is fair (i.e. 0.5).
|
||||
*
|
||||
* @param start - start of the cumulant sum (over hits)
|
||||
* @param end - end of the cumulant sum (over hits)
|
||||
* @param total - number of attempts for the number of hits
|
||||
* @param probHit - probability of a successful hit
|
||||
* @param n number of attempts for the number of hits
|
||||
* @param k_start start (inclusive) of the cumulant sum (over hits)
|
||||
* @param k_end end (inclusive) of the cumulant sum (over hits)
|
||||
* @return - returns the cumulative probability
|
||||
*/
|
||||
public static double binomialCumulativeProbability(final int start, final int end, final int total, final double probHit) {
|
||||
public static double binomialCumulativeProbability(final int n, final int k_start, final int k_end) {
|
||||
if ( k_end > n )
|
||||
throw new IllegalArgumentException(String.format("Value for k_end (%d) is greater than n (%d)", k_end, n));
|
||||
|
||||
double cumProb = 0.0;
|
||||
double prevProb;
|
||||
BigDecimal probCache = BigDecimal.ZERO;
|
||||
|
||||
for (int hits = start; hits < end; hits++) {
|
||||
for (int hits = k_start; hits <= k_end; hits++) {
|
||||
prevProb = cumProb;
|
||||
double probability = binomialProbability(total, hits, probHit);
|
||||
double probability = binomialProbability(n, hits);
|
||||
cumProb += probability;
|
||||
if (probability > 0 && cumProb - prevProb < probability / 2) { // loss of precision
|
||||
probCache = probCache.add(new BigDecimal(prevProb));
|
||||
|
|
@ -355,6 +396,41 @@ public class MathUtils {
|
|||
return probCache.add(new BigDecimal(cumProb)).doubleValue();
|
||||
}
|
||||
|
||||
/**
|
||||
* Calculates the log10 of the multinomial coefficient. Designed to prevent
|
||||
* overflows even with very large numbers.
|
||||
*
|
||||
* @param n total number of trials
|
||||
* @param k array of any size with the number of successes for each grouping (k1, k2, k3, ..., km)
|
||||
* @return
|
||||
*/
|
||||
public static double log10MultinomialCoefficient(final int n, final int[] k) {
|
||||
double denominator = 0.0;
|
||||
for (int x : k) {
|
||||
denominator += log10Factorial(x);
|
||||
}
|
||||
return log10Factorial(n) - denominator;
|
||||
}
|
||||
|
||||
/**
|
||||
* Computes the log10 of the multinomial distribution probability given a vector
|
||||
* of log10 probabilities. Designed to prevent overflows even with very large numbers.
|
||||
*
|
||||
* @param n number of trials
|
||||
* @param k array of number of successes for each possibility
|
||||
* @param log10p array of log10 probabilities
|
||||
* @return
|
||||
*/
|
||||
public static double log10MultinomialProbability(final int n, final int[] k, final double[] log10p) {
|
||||
if (log10p.length != k.length)
|
||||
throw new UserException.BadArgumentValue("p and k", "Array of log10 probabilities must have the same size as the array of number of sucesses: " + log10p.length + ", " + k.length);
|
||||
double log10Prod = 0.0;
|
||||
for (int i = 0; i < log10p.length; i++) {
|
||||
log10Prod += log10p[i] * k[i];
|
||||
}
|
||||
return log10MultinomialCoefficient(n, k) + log10Prod;
|
||||
}
|
||||
|
||||
/**
|
||||
* Computes a multinomial coefficient efficiently avoiding overflow even for large numbers.
|
||||
* This is computed using the formula:
|
||||
|
|
@ -1120,58 +1196,6 @@ public class MathUtils {
|
|||
return lnToLog10(lnGamma(x));
|
||||
}
|
||||
|
||||
/**
|
||||
* Calculates the log10 of the binomial coefficient. Designed to prevent
|
||||
* overflows even with very large numbers.
|
||||
*
|
||||
* @param n total number of trials
|
||||
* @param k number of successes
|
||||
* @return the log10 of the binomial coefficient
|
||||
*/
|
||||
public static double log10BinomialCoefficient(final int n, final int k) {
|
||||
return log10Factorial(n) - log10Factorial(k) - log10Factorial(n - k);
|
||||
}
|
||||
|
||||
public static double log10BinomialProbability(final int n, final int k, final double log10p) {
|
||||
double log10OneMinusP = Math.log10(1 - Math.pow(10, log10p));
|
||||
return log10BinomialCoefficient(n, k) + log10p * k + log10OneMinusP * (n - k);
|
||||
}
|
||||
|
||||
/**
|
||||
* Calculates the log10 of the multinomial coefficient. Designed to prevent
|
||||
* overflows even with very large numbers.
|
||||
*
|
||||
* @param n total number of trials
|
||||
* @param k array of any size with the number of successes for each grouping (k1, k2, k3, ..., km)
|
||||
* @return
|
||||
*/
|
||||
public static double log10MultinomialCoefficient(final int n, final int[] k) {
|
||||
double denominator = 0.0;
|
||||
for (int x : k) {
|
||||
denominator += log10Factorial(x);
|
||||
}
|
||||
return log10Factorial(n) - denominator;
|
||||
}
|
||||
|
||||
/**
|
||||
* Computes the log10 of the multinomial distribution probability given a vector
|
||||
* of log10 probabilities. Designed to prevent overflows even with very large numbers.
|
||||
*
|
||||
* @param n number of trials
|
||||
* @param k array of number of successes for each possibility
|
||||
* @param log10p array of log10 probabilities
|
||||
* @return
|
||||
*/
|
||||
public static double log10MultinomialProbability(final int n, final int[] k, final double[] log10p) {
|
||||
if (log10p.length != k.length)
|
||||
throw new UserException.BadArgumentValue("p and k", "Array of log10 probabilities must have the same size as the array of number of sucesses: " + log10p.length + ", " + k.length);
|
||||
double log10Prod = 0.0;
|
||||
for (int i = 0; i < log10p.length; i++) {
|
||||
log10Prod += log10p[i] * k[i];
|
||||
}
|
||||
return log10MultinomialCoefficient(n, k) + log10Prod;
|
||||
}
|
||||
|
||||
public static double factorial(final int x) {
|
||||
// avoid rounding errors caused by fact that 10^log(x) might be slightly lower than x and flooring may produce 1 less than real value
|
||||
return (double)Math.round(Math.pow(10, log10Factorial(x)));
|
||||
|
|
|
|||
|
|
@ -56,6 +56,22 @@ public class MathUtilsUnitTest extends BaseTest {
|
|||
Assert.assertEquals(MathUtils.binomialProbability(300, 112, 0.98), 2.34763e-236, 1e-237);
|
||||
}
|
||||
|
||||
/**
|
||||
* Tests that we get the right values from the binomial distribution
|
||||
*/
|
||||
@Test
|
||||
public void testCumulativeBinomialProbability() {
|
||||
logger.warn("Executing testCumulativeBinomialProbability");
|
||||
|
||||
final int numTrials = 10;
|
||||
for ( int i = 0; i < numTrials; i++ )
|
||||
Assert.assertEquals(MathUtils.binomialCumulativeProbability(numTrials, i, i), MathUtils.binomialProbability(numTrials, i), 1e-10, String.format("k=%d, n=%d", i, numTrials));
|
||||
|
||||
Assert.assertEquals(MathUtils.binomialCumulativeProbability(10, 0, 2), 0.05468750, 1e-7);
|
||||
Assert.assertEquals(MathUtils.binomialCumulativeProbability(10, 0, 5), 0.62304687, 1e-7);
|
||||
Assert.assertEquals(MathUtils.binomialCumulativeProbability(10, 0, 10), 1.0, 1e-7);
|
||||
}
|
||||
|
||||
/**
|
||||
* Tests that we get the right values from the multinomial distribution
|
||||
*/
|
||||
|
|
|
|||
Loading…
Reference in New Issue