Fully stranded implementation of RR (plus bug fix for insertions and het compression).
Now only filtered reads are unstranded. All consensus reads have strand, so that we emit 2 consensus reads in general now: one for each strand. This involved some refactoring of the sliding window which cleaned it up a lot. Also included is a bug fix: insertions downstream of a variant region weren't triggering a stop to the compression.
This commit is contained in:
parent
e5aab22680
commit
5dfa863caa
|
|
@ -59,7 +59,8 @@ import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
|
||||||
* out due to mapping or base quality.
|
* out due to mapping or base quality.
|
||||||
*/
|
*/
|
||||||
public class HeaderElement {
|
public class HeaderElement {
|
||||||
private BaseAndQualsCounts consensusBaseCounts; // How many A,C,G,T (and D's) are in this site.
|
private BaseAndQualsCounts positiveConsensusBaseCounts; // How many A,C,G,T (and D's) are in this site.
|
||||||
|
private BaseAndQualsCounts negativeConsensusBaseCounts; // How many A,C,G,T (and D's) are in this site.
|
||||||
private BaseAndQualsCounts filteredBaseCounts; // How many A,C,G,T (and D's) were filtered out in this site.
|
private BaseAndQualsCounts filteredBaseCounts; // How many A,C,G,T (and D's) were filtered out in this site.
|
||||||
private int insertionsToTheRight; // How many reads in this site had insertions to the immediate right
|
private int insertionsToTheRight; // How many reads in this site had insertions to the immediate right
|
||||||
private int location; // Genome location of this site (the sliding window knows which contig we're at
|
private int location; // Genome location of this site (the sliding window knows which contig we're at
|
||||||
|
|
@ -70,14 +71,20 @@ public class HeaderElement {
|
||||||
return location;
|
return location;
|
||||||
}
|
}
|
||||||
|
|
||||||
public BaseAndQualsCounts getFilteredBaseCounts() {
|
/**
|
||||||
|
* Get the base counts object for the consensus type
|
||||||
|
*
|
||||||
|
* @param consensusType the type to use
|
||||||
|
* @return non-null base counts
|
||||||
|
*/
|
||||||
|
public BaseAndQualsCounts getBaseCounts(final SlidingWindow.ConsensusType consensusType) {
|
||||||
|
if ( consensusType == SlidingWindow.ConsensusType.POSITIVE_CONSENSUS )
|
||||||
|
return positiveConsensusBaseCounts;
|
||||||
|
if ( consensusType == SlidingWindow.ConsensusType.NEGATIVE_CONSENSUS )
|
||||||
|
return negativeConsensusBaseCounts;
|
||||||
return filteredBaseCounts;
|
return filteredBaseCounts;
|
||||||
}
|
}
|
||||||
|
|
||||||
public BaseAndQualsCounts getConsensusBaseCounts() {
|
|
||||||
return consensusBaseCounts;
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Creates a new HeaderElement with the following default values: - empty consensusBaseCounts - empty
|
* Creates a new HeaderElement with the following default values: - empty consensusBaseCounts - empty
|
||||||
* filteredBaseCounts - 0 insertions to the right - empty mappingQuality list
|
* filteredBaseCounts - 0 insertions to the right - empty mappingQuality list
|
||||||
|
|
@ -85,7 +92,7 @@ public class HeaderElement {
|
||||||
* @param location the reference location for the new element
|
* @param location the reference location for the new element
|
||||||
*/
|
*/
|
||||||
public HeaderElement(final int location) {
|
public HeaderElement(final int location) {
|
||||||
this(new BaseAndQualsCounts(), new BaseAndQualsCounts(), 0, location);
|
this(new BaseAndQualsCounts(), new BaseAndQualsCounts(), new BaseAndQualsCounts(), 0, location);
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
|
@ -95,20 +102,22 @@ public class HeaderElement {
|
||||||
* @param location the reference location for the new element
|
* @param location the reference location for the new element
|
||||||
*/
|
*/
|
||||||
public HeaderElement(final int location, final int insertionsToTheRight) {
|
public HeaderElement(final int location, final int insertionsToTheRight) {
|
||||||
this(new BaseAndQualsCounts(), new BaseAndQualsCounts(), insertionsToTheRight, location);
|
this(new BaseAndQualsCounts(), new BaseAndQualsCounts(), new BaseAndQualsCounts(), insertionsToTheRight, location);
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Creates a new HeaderElement with all given parameters
|
* Creates a new HeaderElement with all given parameters
|
||||||
*
|
*
|
||||||
* @param consensusBaseCounts the BaseCounts object for the running consensus synthetic read
|
* @param positiveConsensusBaseCounts the BaseCounts object for the running positive consensus synthetic read
|
||||||
|
* @param negativeConsensusBaseCounts the BaseCounts object for the running negative consensus synthetic read
|
||||||
* @param filteredBaseCounts the BaseCounts object for the filtered data synthetic read
|
* @param filteredBaseCounts the BaseCounts object for the filtered data synthetic read
|
||||||
* @param insertionsToTheRight number of insertions to the right of this HeaderElement
|
* @param insertionsToTheRight number of insertions to the right of this HeaderElement
|
||||||
* @param location the reference location of this reference element
|
* @param location the reference location of this reference element
|
||||||
* HeaderElement
|
* HeaderElement
|
||||||
*/
|
*/
|
||||||
public HeaderElement(BaseAndQualsCounts consensusBaseCounts, BaseAndQualsCounts filteredBaseCounts, int insertionsToTheRight, int location) {
|
public HeaderElement(final BaseAndQualsCounts positiveConsensusBaseCounts, final BaseAndQualsCounts negativeConsensusBaseCounts, final BaseAndQualsCounts filteredBaseCounts, final int insertionsToTheRight, final int location) {
|
||||||
this.consensusBaseCounts = consensusBaseCounts;
|
this.positiveConsensusBaseCounts = positiveConsensusBaseCounts;
|
||||||
|
this.negativeConsensusBaseCounts = negativeConsensusBaseCounts;
|
||||||
this.filteredBaseCounts = filteredBaseCounts;
|
this.filteredBaseCounts = filteredBaseCounts;
|
||||||
this.insertionsToTheRight = insertionsToTheRight;
|
this.insertionsToTheRight = insertionsToTheRight;
|
||||||
this.location = location;
|
this.location = location;
|
||||||
|
|
@ -124,7 +133,8 @@ public class HeaderElement {
|
||||||
* @return true if site is variant by any definition. False otherwise.
|
* @return true if site is variant by any definition. False otherwise.
|
||||||
*/
|
*/
|
||||||
public boolean isVariant(final double minVariantPvalue, final double minVariantProportion, final double minIndelProportion) {
|
public boolean isVariant(final double minVariantPvalue, final double minVariantProportion, final double minIndelProportion) {
|
||||||
return hasConsensusData() && (isVariantFromInsertions(minIndelProportion) || isVariantFromMismatches(minVariantPvalue, minVariantProportion) || isVariantFromDeletions(minIndelProportion) || isVariantFromSoftClips());
|
return ( hasConsensusData(SlidingWindow.ConsensusType.POSITIVE_CONSENSUS) || hasConsensusData(SlidingWindow.ConsensusType.NEGATIVE_CONSENSUS) )
|
||||||
|
&& (isVariantFromInsertions(minIndelProportion) || isVariantFromMismatches(minVariantPvalue, minVariantProportion) || isVariantFromDeletions(minIndelProportion) || isVariantFromSoftClips());
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
|
@ -138,13 +148,18 @@ public class HeaderElement {
|
||||||
* @param minBaseQual the minimum base qual allowed to be a good base
|
* @param minBaseQual the minimum base qual allowed to be a good base
|
||||||
* @param minMappingQual the minimum mapping qual allowed to be a good read
|
* @param minMappingQual the minimum mapping qual allowed to be a good read
|
||||||
* @param isSoftClipped true if the base is soft-clipped in the original read
|
* @param isSoftClipped true if the base is soft-clipped in the original read
|
||||||
|
* @param isNegativeStrand true if the base comes from a read on the negative strand
|
||||||
*/
|
*/
|
||||||
public void addBase(byte base, byte baseQual, byte insQual, byte delQual, int baseMappingQuality, int minBaseQual, int minMappingQual, boolean isSoftClipped) {
|
public void addBase(final byte base, final byte baseQual, final byte insQual, final byte delQual, final int baseMappingQuality, final int minBaseQual, final int minMappingQual, final boolean isSoftClipped, final boolean isNegativeStrand) {
|
||||||
// If the base passes the MQ filter it is included in the consensus base counts, otherwise it's part of the filtered counts
|
// If the base passes the MQ filter it is included in the consensus base counts, otherwise it's part of the filtered counts
|
||||||
if ( baseMappingQuality >= minMappingQual )
|
if ( baseMappingQuality >= minMappingQual ) {
|
||||||
consensusBaseCounts.incr(base, baseQual, insQual, delQual, baseMappingQuality, baseQual < minBaseQual, isSoftClipped);
|
if ( isNegativeStrand )
|
||||||
else
|
negativeConsensusBaseCounts.incr(base, baseQual, insQual, delQual, baseMappingQuality, baseQual < minBaseQual, isSoftClipped);
|
||||||
|
else
|
||||||
|
positiveConsensusBaseCounts.incr(base, baseQual, insQual, delQual, baseMappingQuality, baseQual < minBaseQual, isSoftClipped);
|
||||||
|
} else {
|
||||||
filteredBaseCounts.incr(base, baseQual, insQual, delQual, baseMappingQuality, baseQual < minBaseQual);
|
filteredBaseCounts.incr(base, baseQual, insQual, delQual, baseMappingQuality, baseQual < minBaseQual);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
|
@ -158,14 +173,20 @@ public class HeaderElement {
|
||||||
* @param minBaseQual the minimum base qual allowed to be a good base
|
* @param minBaseQual the minimum base qual allowed to be a good base
|
||||||
* @param minMappingQual the minimum mapping qual allowed to be a good read
|
* @param minMappingQual the minimum mapping qual allowed to be a good read
|
||||||
* @param isSoftClipped true if the base is soft-clipped in the original read
|
* @param isSoftClipped true if the base is soft-clipped in the original read
|
||||||
|
* @param isNegativeStrand true if the base comes from a read on the negative strand
|
||||||
*/
|
*/
|
||||||
public void removeBase(byte base, byte baseQual, byte insQual, byte delQual, int baseMappingQuality, int minBaseQual, int minMappingQual, boolean isSoftClipped) {
|
public void removeBase(final byte base, final byte baseQual, final byte insQual, final byte delQual, final int baseMappingQuality, final int minBaseQual, final int minMappingQual, final boolean isSoftClipped, final boolean isNegativeStrand) {
|
||||||
// If the base passes the MQ filter it is included in the consensus base counts, otherwise it's part of the filtered counts
|
// If the base passes the MQ filter it is included in the consensus base counts, otherwise it's part of the filtered counts
|
||||||
if ( baseMappingQuality >= minMappingQual )
|
if ( baseMappingQuality >= minMappingQual ) {
|
||||||
consensusBaseCounts.decr(base, baseQual, insQual, delQual, baseMappingQuality, baseQual < minBaseQual, isSoftClipped);
|
if ( isNegativeStrand )
|
||||||
else
|
negativeConsensusBaseCounts.decr(base, baseQual, insQual, delQual, baseMappingQuality, baseQual < minBaseQual, isSoftClipped);
|
||||||
|
else
|
||||||
|
positiveConsensusBaseCounts.decr(base, baseQual, insQual, delQual, baseMappingQuality, baseQual < minBaseQual, isSoftClipped);
|
||||||
|
} else {
|
||||||
filteredBaseCounts.decr(base, baseQual, insQual, delQual, baseMappingQuality, baseQual < minBaseQual);
|
filteredBaseCounts.decr(base, baseQual, insQual, delQual, baseMappingQuality, baseQual < minBaseQual);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Adds an insertions to the right of the HeaderElement and updates all counts accordingly. All insertions
|
* Adds an insertions to the right of the HeaderElement and updates all counts accordingly. All insertions
|
||||||
* should be added to the right of the element.
|
* should be added to the right of the element.
|
||||||
|
|
@ -177,19 +198,11 @@ public class HeaderElement {
|
||||||
/**
|
/**
|
||||||
* Does this HeaderElement contain consensus data?
|
* Does this HeaderElement contain consensus data?
|
||||||
*
|
*
|
||||||
|
* @param consensusType the type to use
|
||||||
* @return whether or not this HeaderElement contains consensus data
|
* @return whether or not this HeaderElement contains consensus data
|
||||||
*/
|
*/
|
||||||
public boolean hasConsensusData() {
|
public boolean hasConsensusData(final SlidingWindow.ConsensusType consensusType) {
|
||||||
return consensusBaseCounts.totalCount() > 0;
|
return getBaseCounts(consensusType).totalCount() > 0;
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Does this HeaderElement contain filtered data?
|
|
||||||
*
|
|
||||||
* @return whether or not this HeaderElement contains filtered data
|
|
||||||
*/
|
|
||||||
public boolean hasFilteredData() {
|
|
||||||
return filteredBaseCounts.totalCount() > 0;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
|
@ -198,7 +211,7 @@ public class HeaderElement {
|
||||||
* @return whether or not this HeaderElement has no data
|
* @return whether or not this HeaderElement has no data
|
||||||
*/
|
*/
|
||||||
public boolean isEmpty() {
|
public boolean isEmpty() {
|
||||||
return (!hasFilteredData() && !hasConsensusData());
|
return !hasConsensusData(SlidingWindow.ConsensusType.POSITIVE_CONSENSUS) && !hasConsensusData(SlidingWindow.ConsensusType.NEGATIVE_CONSENSUS) && !hasConsensusData(SlidingWindow.ConsensusType.FILTERED);
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
|
@ -224,7 +237,7 @@ public class HeaderElement {
|
||||||
* @return whether or not the HeaderElement is variant due to excess insertions
|
* @return whether or not the HeaderElement is variant due to excess insertions
|
||||||
*/
|
*/
|
||||||
private boolean isVariantFromInsertions(double minIndelProportion) {
|
private boolean isVariantFromInsertions(double minIndelProportion) {
|
||||||
final int numberOfBases = consensusBaseCounts.totalCount();
|
final int numberOfBases = totalCountForBothStrands();
|
||||||
if (numberOfBases == 0)
|
if (numberOfBases == 0)
|
||||||
return (insertionsToTheRight > 0); // do we only have insertions?
|
return (insertionsToTheRight > 0); // do we only have insertions?
|
||||||
|
|
||||||
|
|
@ -232,13 +245,18 @@ public class HeaderElement {
|
||||||
return ((double) insertionsToTheRight / numberOfBases) > minIndelProportion;
|
return ((double) insertionsToTheRight / numberOfBases) > minIndelProportion;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
private int totalCountForBothStrands() {
|
||||||
|
return positiveConsensusBaseCounts.totalCount() + negativeConsensusBaseCounts.totalCount();
|
||||||
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Whether or not the HeaderElement is variant due to excess deletions
|
* Whether or not the HeaderElement is variant due to excess deletions
|
||||||
*
|
*
|
||||||
* @return whether or not the HeaderElement is variant due to excess deletions
|
* @return whether or not the HeaderElement is variant due to excess deletions
|
||||||
*/
|
*/
|
||||||
private boolean isVariantFromDeletions(double minIndelProportion) {
|
private boolean isVariantFromDeletions(double minIndelProportion) {
|
||||||
return consensusBaseCounts.baseIndexWithMostCounts() == BaseIndex.D || consensusBaseCounts.baseCountProportion(BaseIndex.D) > minIndelProportion;
|
return positiveConsensusBaseCounts.baseIndexWithMostCounts() == BaseIndex.D || positiveConsensusBaseCounts.baseCountProportion(BaseIndex.D) > minIndelProportion
|
||||||
|
|| negativeConsensusBaseCounts.baseIndexWithMostCounts() == BaseIndex.D || negativeConsensusBaseCounts.baseCountProportion(BaseIndex.D) > minIndelProportion;
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
|
@ -249,9 +267,23 @@ public class HeaderElement {
|
||||||
* @return whether or not the HeaderElement is variant due to excess mismatches
|
* @return whether or not the HeaderElement is variant due to excess mismatches
|
||||||
*/
|
*/
|
||||||
protected boolean isVariantFromMismatches(final double minVariantPvalue, final double minVariantProportion) {
|
protected boolean isVariantFromMismatches(final double minVariantPvalue, final double minVariantProportion) {
|
||||||
final int totalCount = consensusBaseCounts.totalCountWithoutIndels();
|
return isVariantFromMismatches(minVariantPvalue, minVariantProportion, SlidingWindow.ConsensusType.POSITIVE_CONSENSUS) ||
|
||||||
final BaseIndex mostCommon = consensusBaseCounts.baseIndexWithMostProbabilityWithoutIndels();
|
isVariantFromMismatches(minVariantPvalue, minVariantProportion, SlidingWindow.ConsensusType.NEGATIVE_CONSENSUS);
|
||||||
final int countOfOtherBases = totalCount - consensusBaseCounts.countOfBase(mostCommon);
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Whether or not the HeaderElement is variant due to excess mismatches
|
||||||
|
*
|
||||||
|
* @param minVariantPvalue the minimum pvalue to call a site variant (used with low coverage).
|
||||||
|
* @param minVariantProportion the minimum proportion to call a site variant (used with high coverage).
|
||||||
|
* @param consensusType the consensus type to use
|
||||||
|
* @return whether or not the HeaderElement is variant due to excess mismatches
|
||||||
|
*/
|
||||||
|
private boolean isVariantFromMismatches(final double minVariantPvalue, final double minVariantProportion, final SlidingWindow.ConsensusType consensusType) {
|
||||||
|
final BaseAndQualsCounts baseAndQualsCounts = getBaseCounts(consensusType);
|
||||||
|
final int totalCount = baseAndQualsCounts.totalCountWithoutIndels();
|
||||||
|
final BaseIndex mostCommon = baseAndQualsCounts.baseIndexWithMostProbabilityWithoutIndels();
|
||||||
|
final int countOfOtherBases = totalCount - baseAndQualsCounts.countOfBase(mostCommon);
|
||||||
return hasSignificantCount(countOfOtherBases, totalCount, minVariantPvalue, minVariantProportion);
|
return hasSignificantCount(countOfOtherBases, totalCount, minVariantPvalue, minVariantProportion);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
@ -262,8 +294,20 @@ public class HeaderElement {
|
||||||
* @return true if we had more soft clipped bases contributing to this site than matches/mismatches.
|
* @return true if we had more soft clipped bases contributing to this site than matches/mismatches.
|
||||||
*/
|
*/
|
||||||
protected boolean isVariantFromSoftClips() {
|
protected boolean isVariantFromSoftClips() {
|
||||||
final int nSoftClippedBases = consensusBaseCounts.nSoftclips();
|
return isVariantFromSoftClips(SlidingWindow.ConsensusType.POSITIVE_CONSENSUS) || isVariantFromSoftClips(SlidingWindow.ConsensusType.NEGATIVE_CONSENSUS);
|
||||||
return nSoftClippedBases > 0 && nSoftClippedBases >= (consensusBaseCounts.totalCount() - nSoftClippedBases);
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* This handles the special case where we have more bases that came from soft clips than bases that came from
|
||||||
|
* normal bases by forcing it to become a variant region. We don't want a consensus based on too little information.
|
||||||
|
*
|
||||||
|
* @param consensusType the consensus type to use
|
||||||
|
* @return true if we had more soft clipped bases contributing to this site than matches/mismatches.
|
||||||
|
*/
|
||||||
|
private boolean isVariantFromSoftClips(final SlidingWindow.ConsensusType consensusType) {
|
||||||
|
final BaseAndQualsCounts baseAndQualsCounts = getBaseCounts(consensusType);
|
||||||
|
final int nSoftClippedBases = baseAndQualsCounts.nSoftclips();
|
||||||
|
return nSoftClippedBases > 0 && nSoftClippedBases >= (baseAndQualsCounts.totalCount() - nSoftClippedBases);
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
|
@ -287,9 +331,9 @@ public class HeaderElement {
|
||||||
*/
|
*/
|
||||||
public ObjectArrayList<BaseIndex> getAlleles(final double minVariantPvalue, final double minVariantProportion) {
|
public ObjectArrayList<BaseIndex> getAlleles(final double minVariantPvalue, final double minVariantProportion) {
|
||||||
// make sure we have bases at all
|
// make sure we have bases at all
|
||||||
final int totalBaseCount = consensusBaseCounts.totalCount();
|
final int totalBaseCount = totalCountForBothStrands();
|
||||||
if ( totalBaseCount == 0 )
|
if ( totalBaseCount == 0 )
|
||||||
return new ObjectArrayList<BaseIndex>(0);
|
return new ObjectArrayList<>(0);
|
||||||
|
|
||||||
// next, check for insertions; technically, the insertion count can be greater than totalBaseCount
|
// next, check for insertions; technically, the insertion count can be greater than totalBaseCount
|
||||||
// (because of the way insertions are counted), so we need to account for that
|
// (because of the way insertions are counted), so we need to account for that
|
||||||
|
|
@ -297,9 +341,9 @@ public class HeaderElement {
|
||||||
return null;
|
return null;
|
||||||
|
|
||||||
// finally, check for the bases themselves (including deletions)
|
// finally, check for the bases themselves (including deletions)
|
||||||
final ObjectArrayList<BaseIndex> alleles = new ObjectArrayList<BaseIndex>(4);
|
final ObjectArrayList<BaseIndex> alleles = new ObjectArrayList<>(4);
|
||||||
for ( final BaseIndex base : BaseIndex.values() ) {
|
for ( final BaseIndex base : BaseIndex.values() ) {
|
||||||
final int baseCount = consensusBaseCounts.countOfBase(base);
|
final int baseCount = positiveConsensusBaseCounts.countOfBase(base) + negativeConsensusBaseCounts.countOfBase(base);
|
||||||
if ( baseCount == 0 )
|
if ( baseCount == 0 )
|
||||||
continue;
|
continue;
|
||||||
|
|
||||||
|
|
@ -320,7 +364,7 @@ public class HeaderElement {
|
||||||
* @return true if there are significant softclips, false otherwise
|
* @return true if there are significant softclips, false otherwise
|
||||||
*/
|
*/
|
||||||
public boolean hasSignificantSoftclips(final double minVariantPvalue, final double minVariantProportion) {
|
public boolean hasSignificantSoftclips(final double minVariantPvalue, final double minVariantProportion) {
|
||||||
return hasSignificantCount(consensusBaseCounts.nSoftclips(), consensusBaseCounts.totalCount(), minVariantPvalue, minVariantProportion);
|
return hasSignificantCount(positiveConsensusBaseCounts.nSoftclips() + negativeConsensusBaseCounts.nSoftclips(), totalCountForBothStrands(), minVariantPvalue, minVariantProportion);
|
||||||
}
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
|
|
|
||||||
|
|
@ -59,7 +59,6 @@ import org.broadinstitute.sting.utils.BaseUtils;
|
||||||
import org.broadinstitute.sting.utils.GenomeLoc;
|
import org.broadinstitute.sting.utils.GenomeLoc;
|
||||||
import org.broadinstitute.sting.utils.UnvalidatingGenomeLoc;
|
import org.broadinstitute.sting.utils.UnvalidatingGenomeLoc;
|
||||||
import org.broadinstitute.sting.utils.collections.Pair;
|
import org.broadinstitute.sting.utils.collections.Pair;
|
||||||
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
|
|
||||||
import org.broadinstitute.sting.utils.sam.AlignmentStartWithNoTiesComparator;
|
import org.broadinstitute.sting.utils.sam.AlignmentStartWithNoTiesComparator;
|
||||||
import org.broadinstitute.sting.utils.sam.GATKSAMReadGroupRecord;
|
import org.broadinstitute.sting.utils.sam.GATKSAMReadGroupRecord;
|
||||||
import org.broadinstitute.sting.utils.sam.GATKSAMRecord;
|
import org.broadinstitute.sting.utils.sam.GATKSAMRecord;
|
||||||
|
|
@ -87,12 +86,10 @@ public class SlidingWindow {
|
||||||
protected int downsampleCoverage;
|
protected int downsampleCoverage;
|
||||||
|
|
||||||
// Running consensus data
|
// Running consensus data
|
||||||
protected SyntheticRead runningConsensus;
|
|
||||||
protected int consensusCounter;
|
protected int consensusCounter;
|
||||||
protected String consensusReadName;
|
protected String consensusReadName;
|
||||||
|
|
||||||
// Filtered Data Consensus data
|
// Filtered Data Consensus data
|
||||||
protected SyntheticRead filteredDataConsensus;
|
|
||||||
protected int filteredDataConsensusCounter;
|
protected int filteredDataConsensusCounter;
|
||||||
protected String filteredDataReadName;
|
protected String filteredDataReadName;
|
||||||
|
|
||||||
|
|
@ -109,12 +106,12 @@ public class SlidingWindow {
|
||||||
private static CompressionStash emptyRegions = new CompressionStash();
|
private static CompressionStash emptyRegions = new CompressionStash();
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* The types of synthetic reads to use in the finalizeAndAdd method
|
* The types of synthetic reads
|
||||||
*/
|
*/
|
||||||
private enum ConsensusType {
|
protected enum ConsensusType {
|
||||||
CONSENSUS,
|
POSITIVE_CONSENSUS,
|
||||||
FILTERED,
|
NEGATIVE_CONSENSUS,
|
||||||
BOTH
|
FILTERED
|
||||||
}
|
}
|
||||||
|
|
||||||
public int getStopLocation() {
|
public int getStopLocation() {
|
||||||
|
|
@ -144,9 +141,9 @@ public class SlidingWindow {
|
||||||
|
|
||||||
contextSize = 10;
|
contextSize = 10;
|
||||||
|
|
||||||
this.windowHeader = new LinkedList<HeaderElement>();
|
this.windowHeader = new LinkedList<>();
|
||||||
windowHeader.addFirst(new HeaderElement(startLocation));
|
windowHeader.addFirst(new HeaderElement(startLocation));
|
||||||
this.readsInWindow = new PriorityQueue<GATKSAMRecord>(100, new Comparator<GATKSAMRecord>() {
|
this.readsInWindow = new PriorityQueue<>(100, new Comparator<GATKSAMRecord>() {
|
||||||
@Override
|
@Override
|
||||||
public int compare(GATKSAMRecord read1, GATKSAMRecord read2) {
|
public int compare(GATKSAMRecord read1, GATKSAMRecord read2) {
|
||||||
return read1.getSoftEnd() - read2.getSoftEnd();
|
return read1.getSoftEnd() - read2.getSoftEnd();
|
||||||
|
|
@ -168,8 +165,8 @@ public class SlidingWindow {
|
||||||
this.MIN_BASE_QUAL_TO_COUNT = minBaseQual;
|
this.MIN_BASE_QUAL_TO_COUNT = minBaseQual;
|
||||||
this.MIN_MAPPING_QUALITY = minMappingQuality;
|
this.MIN_MAPPING_QUALITY = minMappingQuality;
|
||||||
|
|
||||||
this.windowHeader = new LinkedList<HeaderElement>();
|
this.windowHeader = new LinkedList<>();
|
||||||
this.readsInWindow = new PriorityQueue<GATKSAMRecord>(1000, new Comparator<GATKSAMRecord>() {
|
this.readsInWindow = new PriorityQueue<>(1000, new Comparator<GATKSAMRecord>() {
|
||||||
@Override
|
@Override
|
||||||
public int compare(GATKSAMRecord read1, GATKSAMRecord read2) {
|
public int compare(GATKSAMRecord read1, GATKSAMRecord read2) {
|
||||||
return read1.getSoftEnd() - read2.getSoftEnd();
|
return read1.getSoftEnd() - read2.getSoftEnd();
|
||||||
|
|
@ -187,9 +184,6 @@ public class SlidingWindow {
|
||||||
this.filteredDataConsensusCounter = 0;
|
this.filteredDataConsensusCounter = 0;
|
||||||
this.filteredDataReadName = "Filtered-" + windowNumber + "-";
|
this.filteredDataReadName = "Filtered-" + windowNumber + "-";
|
||||||
|
|
||||||
this.runningConsensus = null;
|
|
||||||
this.filteredDataConsensus = null;
|
|
||||||
|
|
||||||
this.downsampleStrategy = downsampleStrategy;
|
this.downsampleStrategy = downsampleStrategy;
|
||||||
this.hasIndelQualities = hasIndelQualities;
|
this.hasIndelQualities = hasIndelQualities;
|
||||||
}
|
}
|
||||||
|
|
@ -209,7 +203,9 @@ public class SlidingWindow {
|
||||||
@Ensures("result != null")
|
@Ensures("result != null")
|
||||||
public CompressionStash addRead(GATKSAMRecord read) {
|
public CompressionStash addRead(GATKSAMRecord read) {
|
||||||
addToHeader(windowHeader, read); // update the window header counts
|
addToHeader(windowHeader, read); // update the window header counts
|
||||||
readsInWindow.add(read); // add read to sliding reads
|
// no need to track low mapping quality reads
|
||||||
|
if ( read.getMappingQuality() >= MIN_MAPPING_QUALITY )
|
||||||
|
readsInWindow.add(read); // add read to sliding reads
|
||||||
return slideWindow(read.getUnclippedStart());
|
return slideWindow(read.getUnclippedStart());
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
@ -296,7 +292,7 @@ public class SlidingWindow {
|
||||||
}
|
}
|
||||||
|
|
||||||
while (!readsInWindow.isEmpty() && readsInWindow.peek().getSoftEnd() < windowHeaderStartLocation) {
|
while (!readsInWindow.isEmpty() && readsInWindow.peek().getSoftEnd() < windowHeaderStartLocation) {
|
||||||
readsInWindow.poll();
|
readsInWindow.poll();
|
||||||
}
|
}
|
||||||
|
|
||||||
return regions;
|
return regions;
|
||||||
|
|
@ -413,280 +409,83 @@ public class SlidingWindow {
|
||||||
*
|
*
|
||||||
* If adding a sequence with gaps, it will finalize multiple consensus reads and keep the last running consensus
|
* If adding a sequence with gaps, it will finalize multiple consensus reads and keep the last running consensus
|
||||||
*
|
*
|
||||||
* @param header the window header
|
* @param header the header to use
|
||||||
* @param start the first header index to add to consensus
|
* @param start the first header index to add to consensus
|
||||||
* @param end the first header index NOT TO add to consensus
|
* @param end the first header index NOT TO add to consensus
|
||||||
* @param strandType the strandedness that the synthetic read should be represented as having
|
* @param consensusType the consensus type to use
|
||||||
* @return a non-null list of consensus reads generated by this call. Empty list if no consensus was generated.
|
* @return a non-null list of consensus reads generated by this call. Empty list if no consensus was generated.
|
||||||
*/
|
*/
|
||||||
@Requires({"start >= 0 && (end >= start || end == 0)"})
|
@Requires({"start >= 0 && (end >= start || end == 0)"})
|
||||||
@Ensures("result != null")
|
@Ensures("result != null")
|
||||||
protected ObjectArrayList<GATKSAMRecord> addToSyntheticReads(final LinkedList<HeaderElement> header, final int start, final int end, final SyntheticRead.StrandType strandType) {
|
protected ObjectArrayList<GATKSAMRecord> addToSyntheticReads(final LinkedList<HeaderElement> header, final int start, final int end, final ConsensusType consensusType) {
|
||||||
final ObjectArrayList<GATKSAMRecord> reads = new ObjectArrayList<GATKSAMRecord>();
|
final ObjectArrayList<GATKSAMRecord> reads = new ObjectArrayList<>();
|
||||||
|
|
||||||
if ( start < end ) {
|
SyntheticRead consensus = null;
|
||||||
final ListIterator<HeaderElement> headerElementIterator = header.listIterator(start);
|
final ListIterator<HeaderElement> headerElementIterator = header.listIterator(start);
|
||||||
|
boolean wasInConsensus = false;
|
||||||
|
|
||||||
if (!headerElementIterator.hasNext())
|
for ( int currentPosition = start; currentPosition < end; currentPosition++ ) {
|
||||||
throw new ReviewedStingException(String.format("Requested to add to synthetic reads a region that contains no header element at index: %d - %d / %d", start, header.size(), end));
|
|
||||||
|
|
||||||
HeaderElement headerElement = headerElementIterator.next();
|
if ( ! headerElementIterator.hasNext() )
|
||||||
|
throw new IllegalStateException(String.format("Requested to add to synthetic reads a region that contains no header element at index: %d - %d / %d", start, windowHeader.size(), end));
|
||||||
|
final HeaderElement headerElement = headerElementIterator.next();
|
||||||
|
|
||||||
if (headerElement.hasConsensusData()) {
|
if ( headerElement.hasConsensusData(consensusType) ) {
|
||||||
|
wasInConsensus = true;
|
||||||
|
|
||||||
// find the end of the consecutive consensus data in the window
|
// add to running consensus
|
||||||
final int endOfConsensus = findNextNonConsensusElement(header, start, end);
|
if ( consensus == null )
|
||||||
if (endOfConsensus <= start)
|
consensus = createNewConsensus(consensusType, headerElement.getLocation());
|
||||||
throw new ReviewedStingException(String.format("next start is <= current start: (%d <= %d)", endOfConsensus, start));
|
|
||||||
|
|
||||||
// add to running consensus and recurse
|
genericAddBaseToConsensus(consensus, headerElement.getBaseCounts(consensusType));
|
||||||
addToRunningConsensus(header, start, endOfConsensus, strandType);
|
|
||||||
reads.addAll(addToSyntheticReads(header, endOfConsensus, end, strandType));
|
|
||||||
|
|
||||||
} else {
|
} else {
|
||||||
|
|
||||||
// add any outstanding consensus data
|
// add any outstanding consensus data
|
||||||
reads.addAll(finalizeAndAdd(ConsensusType.CONSENSUS));
|
if ( wasInConsensus ) {
|
||||||
|
reads.addAll(finalizeAndAdd(consensus, consensusType));
|
||||||
|
consensus = null;
|
||||||
|
}
|
||||||
|
|
||||||
// find the end of the consecutive empty data in the window
|
wasInConsensus = false;
|
||||||
final int endOfEmptyData = findNextConsensusElement(header, start, end);
|
|
||||||
if (endOfEmptyData <= start)
|
|
||||||
throw new ReviewedStingException(String.format("next start is <= current start: (%d <= %d)", endOfEmptyData, start));
|
|
||||||
|
|
||||||
// recurse out of the empty region
|
|
||||||
reads.addAll(addToSyntheticReads(header, endOfEmptyData, end, strandType));
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// add any outstanding consensus data
|
||||||
|
reads.addAll(finalizeAndAdd(consensus, consensusType));
|
||||||
|
|
||||||
return reads;
|
return reads;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
private SyntheticRead createNewConsensus(final ConsensusType consensusType, final int start) {
|
||||||
|
if ( consensusType == ConsensusType.FILTERED )
|
||||||
|
return new SyntheticRead(samHeader, readGroupAttribute, contig, contigIndex, filteredDataReadName + filteredDataConsensusCounter++, start, hasIndelQualities, SyntheticRead.StrandType.STRANDLESS);
|
||||||
|
return new SyntheticRead(samHeader, readGroupAttribute, contig, contigIndex, consensusReadName + consensusCounter++, start, hasIndelQualities, consensusType == ConsensusType.POSITIVE_CONSENSUS ? SyntheticRead.StrandType.POSITIVE : SyntheticRead.StrandType.NEGATIVE);
|
||||||
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Finalizes one or more synthetic reads.
|
* Finalizes a synthetic read.
|
||||||
*
|
*
|
||||||
|
* @param consensus the consensus to finalize
|
||||||
* @param type the synthetic reads you want to close
|
* @param type the synthetic reads you want to close
|
||||||
* @return a possibly null list of GATKSAMRecords generated by finalizing the synthetic reads
|
* @return a possibly empty list of GATKSAMRecords generated by finalizing the synthetic reads
|
||||||
*/
|
*/
|
||||||
private ObjectArrayList<GATKSAMRecord> finalizeAndAdd(final ConsensusType type) {
|
private ObjectArrayList<GATKSAMRecord> finalizeAndAdd(final SyntheticRead consensus, final ConsensusType type) {
|
||||||
|
|
||||||
final ObjectArrayList<GATKSAMRecord> list = new ObjectArrayList<GATKSAMRecord>();
|
final ObjectArrayList<GATKSAMRecord> list = new ObjectArrayList<>();
|
||||||
|
|
||||||
if ( type == ConsensusType.CONSENSUS || type == ConsensusType.BOTH ) {
|
final GATKSAMRecord read;
|
||||||
final GATKSAMRecord read = finalizeRunningConsensus();
|
if ( type == ConsensusType.FILTERED )
|
||||||
if ( read != null )
|
read = finalizeFilteredDataConsensus(consensus);
|
||||||
list.add(read);
|
else
|
||||||
}
|
read = finalizeRunningConsensus(consensus);
|
||||||
|
|
||||||
if ( type == ConsensusType.FILTERED || type == ConsensusType.BOTH ) {
|
if ( read != null )
|
||||||
final GATKSAMRecord read = finalizeFilteredDataConsensus();
|
list.add(read);
|
||||||
if ( read != null )
|
|
||||||
list.add(read);
|
|
||||||
}
|
|
||||||
|
|
||||||
return list;
|
return list;
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
|
||||||
* Looks for the next position without consensus data
|
|
||||||
*
|
|
||||||
* @param header the header to check
|
|
||||||
* @param start beginning of the filtered region
|
|
||||||
* @param upTo limit to search for another consensus element
|
|
||||||
* @return next position in local coordinates (relative to the windowHeader) with consensus data; otherwise, the start position
|
|
||||||
*/
|
|
||||||
private int findNextNonConsensusElement(final LinkedList<HeaderElement> header, final int start, final int upTo) {
|
|
||||||
final Iterator<HeaderElement> headerElementIterator = header.listIterator(start);
|
|
||||||
int index = start;
|
|
||||||
while (index < upTo) {
|
|
||||||
if (!headerElementIterator.hasNext())
|
|
||||||
throw new ReviewedStingException("There are no more header elements in this window");
|
|
||||||
|
|
||||||
if (!headerElementIterator.next().hasConsensusData())
|
|
||||||
break;
|
|
||||||
index++;
|
|
||||||
}
|
|
||||||
return index;
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Looks for the next position witho consensus data
|
|
||||||
*
|
|
||||||
* @param header the header to check
|
|
||||||
* @param start beginning of the filtered region
|
|
||||||
* @param upTo limit to search for another consensus element
|
|
||||||
* @return next position in local coordinates (relative to the windowHeader) with consensus data; otherwise, the start position
|
|
||||||
*/
|
|
||||||
private int findNextConsensusElement(final LinkedList<HeaderElement> header, final int start, final int upTo) {
|
|
||||||
final Iterator<HeaderElement> headerElementIterator = header.listIterator(start);
|
|
||||||
int index = start;
|
|
||||||
while (index < upTo) {
|
|
||||||
if (!headerElementIterator.hasNext())
|
|
||||||
throw new ReviewedStingException("There are no more header elements in this window");
|
|
||||||
|
|
||||||
if (headerElementIterator.next().hasConsensusData())
|
|
||||||
break;
|
|
||||||
index++;
|
|
||||||
}
|
|
||||||
return index;
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Adds bases to the filtered data synthetic read.
|
|
||||||
*
|
|
||||||
* Different from the addToConsensus method, this method assumes a contiguous sequence of filteredData
|
|
||||||
* bases.
|
|
||||||
*
|
|
||||||
* @param header the window header
|
|
||||||
* @param start the first header index to add to consensus
|
|
||||||
* @param end the first header index NOT TO add to consensus
|
|
||||||
* @param strandType the strandedness that the synthetic read should be represented as having
|
|
||||||
*/
|
|
||||||
@Requires({"start >= 0 && (end >= start || end == 0)"})
|
|
||||||
private void addToRunningConsensus(final LinkedList<HeaderElement> header, final int start, final int end, final SyntheticRead.StrandType strandType) {
|
|
||||||
if (runningConsensus == null)
|
|
||||||
runningConsensus = new SyntheticRead(samHeader, readGroupAttribute, contig, contigIndex, consensusReadName + consensusCounter++, header.get(start).getLocation(), hasIndelQualities, strandType);
|
|
||||||
|
|
||||||
final Iterator<HeaderElement> headerElementIterator = header.listIterator(start);
|
|
||||||
|
|
||||||
for (int index = start; index < end; index++) {
|
|
||||||
if (!headerElementIterator.hasNext())
|
|
||||||
throw new ReviewedStingException("Requested to create a running consensus synthetic read from " + start + " to " + end + " but " + index + " does not exist");
|
|
||||||
|
|
||||||
final HeaderElement headerElement = headerElementIterator.next();
|
|
||||||
if (!headerElement.hasConsensusData())
|
|
||||||
throw new ReviewedStingException("No CONSENSUS data in " + index);
|
|
||||||
|
|
||||||
genericAddBaseToConsensus(runningConsensus, headerElement.getConsensusBaseCounts());
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Adds bases to the running filtered data accordingly
|
|
||||||
*
|
|
||||||
* If adding a sequence with gaps, it will finalize multiple consensus reads and keep the last running consensus
|
|
||||||
*
|
|
||||||
* @param header the window header
|
|
||||||
* @param start the first header index to add to consensus
|
|
||||||
* @param end the first header index NOT TO add to consensus
|
|
||||||
* @return a non-null list of consensus reads generated by this call. Empty list if no consensus was generated.
|
|
||||||
*/
|
|
||||||
@Requires({"start >= 0 && (end >= start || end == 0)"})
|
|
||||||
@Ensures("result != null")
|
|
||||||
protected ObjectArrayList<GATKSAMRecord> addToFilteredReads(final LinkedList<HeaderElement> header, final int start, final int end) {
|
|
||||||
final ObjectArrayList<GATKSAMRecord> reads = new ObjectArrayList<GATKSAMRecord>();
|
|
||||||
|
|
||||||
if ( start < end ) {
|
|
||||||
final ListIterator<HeaderElement> headerElementIterator = header.listIterator(start);
|
|
||||||
|
|
||||||
if (!headerElementIterator.hasNext())
|
|
||||||
throw new ReviewedStingException(String.format("Requested to add to synthetic reads a region that contains no header element at index: %d - %d / %d", start, header.size(), end));
|
|
||||||
|
|
||||||
HeaderElement headerElement = headerElementIterator.next();
|
|
||||||
|
|
||||||
if (headerElement.hasFilteredData()) {
|
|
||||||
|
|
||||||
// find the end of the consecutive filtered data in the window
|
|
||||||
final int endOfFiltered = findNextNonFilteredElement(header, start, end);
|
|
||||||
if (endOfFiltered <= start)
|
|
||||||
throw new ReviewedStingException(String.format("next start is <= current start: (%d <= %d)", endOfFiltered, start));
|
|
||||||
|
|
||||||
// add to running filtered consensus and recurse
|
|
||||||
addToFilteredData(header, start, endOfFiltered);
|
|
||||||
reads.addAll(addToFilteredReads(header, endOfFiltered, end));
|
|
||||||
|
|
||||||
} else {
|
|
||||||
|
|
||||||
// add any outstanding filtered data
|
|
||||||
reads.addAll(finalizeAndAdd(ConsensusType.FILTERED));
|
|
||||||
|
|
||||||
// find the end of the consecutive empty data in the window
|
|
||||||
final int endOfEmptyData = findNextFilteredElement(header, start, end);
|
|
||||||
if (endOfEmptyData <= start)
|
|
||||||
throw new ReviewedStingException(String.format("next start is <= current start: (%d <= %d)", endOfEmptyData, start));
|
|
||||||
|
|
||||||
// recurse out of the empty region
|
|
||||||
reads.addAll(addToFilteredReads(header, endOfEmptyData, end));
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
return reads;
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Looks for the next position without consensus data
|
|
||||||
*
|
|
||||||
* @param header the header to check
|
|
||||||
* @param start beginning of the filtered region
|
|
||||||
* @param upTo limit to search for another consensus element
|
|
||||||
* @return next position in local coordinates (relative to the windowHeader) with consensus data; otherwise, the start position
|
|
||||||
*/
|
|
||||||
private int findNextNonFilteredElement(final LinkedList<HeaderElement> header, final int start, final int upTo) {
|
|
||||||
final Iterator<HeaderElement> headerElementIterator = header.listIterator(start);
|
|
||||||
int index = start;
|
|
||||||
while (index < upTo) {
|
|
||||||
if (!headerElementIterator.hasNext())
|
|
||||||
throw new ReviewedStingException("There are no more header elements in this window");
|
|
||||||
|
|
||||||
if (!headerElementIterator.next().hasFilteredData())
|
|
||||||
break;
|
|
||||||
index++;
|
|
||||||
}
|
|
||||||
return index;
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Looks for the next position witho consensus data
|
|
||||||
*
|
|
||||||
* @param header the header to check
|
|
||||||
* @param start beginning of the filtered region
|
|
||||||
* @param upTo limit to search for another consensus element
|
|
||||||
* @return next position in local coordinates (relative to the windowHeader) with consensus data; otherwise, the start position
|
|
||||||
*/
|
|
||||||
private int findNextFilteredElement(final LinkedList<HeaderElement> header, final int start, final int upTo) {
|
|
||||||
final Iterator<HeaderElement> headerElementIterator = header.listIterator(start);
|
|
||||||
int index = start;
|
|
||||||
while (index < upTo) {
|
|
||||||
if (!headerElementIterator.hasNext())
|
|
||||||
throw new ReviewedStingException("There are no more header elements in this window");
|
|
||||||
|
|
||||||
if (headerElementIterator.next().hasFilteredData())
|
|
||||||
break;
|
|
||||||
index++;
|
|
||||||
}
|
|
||||||
return index;
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Adds bases to the filtered data synthetic read.
|
|
||||||
*
|
|
||||||
* Different from the addToConsensus method, this method assumes a contiguous sequence of filteredData bases.
|
|
||||||
*
|
|
||||||
* @param header the window header
|
|
||||||
* @param start the first header index to add to consensus
|
|
||||||
* @param end the first header index NOT TO add to consensus
|
|
||||||
*/
|
|
||||||
@Requires({"start >= 0 && (end >= start || end == 0)"})
|
|
||||||
@Ensures("result != null")
|
|
||||||
private void addToFilteredData(final LinkedList<HeaderElement> header, final int start, final int end) {
|
|
||||||
|
|
||||||
if (filteredDataConsensus == null)
|
|
||||||
filteredDataConsensus = new SyntheticRead(samHeader, readGroupAttribute, contig, contigIndex, filteredDataReadName + filteredDataConsensusCounter++, header.get(start).getLocation(), hasIndelQualities, SyntheticRead.StrandType.STRANDLESS);
|
|
||||||
|
|
||||||
ListIterator<HeaderElement> headerElementIterator = header.listIterator(start);
|
|
||||||
for (int index = start; index < end; index++) {
|
|
||||||
if (!headerElementIterator.hasNext())
|
|
||||||
throw new ReviewedStingException("Requested to create a filtered data synthetic read from " + start + " to " + end + " but " + index + " does not exist");
|
|
||||||
|
|
||||||
final HeaderElement headerElement = headerElementIterator.next();
|
|
||||||
|
|
||||||
if (!headerElement.hasFilteredData())
|
|
||||||
throw new ReviewedStingException("No filtered data in " + index);
|
|
||||||
|
|
||||||
genericAddBaseToConsensus(filteredDataConsensus, headerElement.getFilteredBaseCounts());
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Generic accessor to add base and qualities to a synthetic read
|
* Generic accessor to add base and qualities to a synthetic read
|
||||||
*
|
*
|
||||||
|
|
@ -734,7 +533,7 @@ public class SlidingWindow {
|
||||||
final int refStart = windowHeader.get(start).getLocation();
|
final int refStart = windowHeader.get(start).getLocation();
|
||||||
final int refStop = windowHeader.get(stop).getLocation();
|
final int refStop = windowHeader.get(stop).getLocation();
|
||||||
|
|
||||||
final ObjectList<GATKSAMRecord> toRemove = new ObjectArrayList<GATKSAMRecord>();
|
final ObjectList<GATKSAMRecord> toRemove = new ObjectArrayList<>();
|
||||||
for ( final GATKSAMRecord read : readsInWindow ) {
|
for ( final GATKSAMRecord read : readsInWindow ) {
|
||||||
if ( read.getSoftStart() <= refStop ) {
|
if ( read.getSoftStart() <= refStop ) {
|
||||||
if ( read.getAlignmentEnd() >= refStart ) {
|
if ( read.getAlignmentEnd() >= refStart ) {
|
||||||
|
|
@ -814,7 +613,7 @@ public class SlidingWindow {
|
||||||
continue;
|
continue;
|
||||||
|
|
||||||
if ( headerElement.hasSignificantSoftclips(MIN_ALT_PVALUE_TO_TRIGGER_VARIANT, MIN_ALT_PROPORTION_TO_TRIGGER_VARIANT) ||
|
if ( headerElement.hasSignificantSoftclips(MIN_ALT_PVALUE_TO_TRIGGER_VARIANT, MIN_ALT_PROPORTION_TO_TRIGGER_VARIANT) ||
|
||||||
headerElement.getNumberOfBaseAlleles(MIN_ALT_PVALUE_TO_TRIGGER_VARIANT, MIN_ALT_PROPORTION_TO_TRIGGER_VARIANT) > 1 )
|
headerElement.getNumberOfBaseAlleles(MIN_ALT_PVALUE_TO_TRIGGER_VARIANT, MIN_ALT_PROPORTION_TO_TRIGGER_VARIANT) != 1 )
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
@ -836,13 +635,26 @@ public class SlidingWindow {
|
||||||
|
|
||||||
final CloseVariantRegionResult result = new CloseVariantRegionResult(allReads.stopPerformed);
|
final CloseVariantRegionResult result = new CloseVariantRegionResult(allReads.stopPerformed);
|
||||||
result.reads.addAll(downsampleCoverage > 0 ? downsampleVariantRegion(allReads.reads) : allReads.reads);
|
result.reads.addAll(downsampleCoverage > 0 ? downsampleVariantRegion(allReads.reads) : allReads.reads);
|
||||||
result.reads.addAll(addToSyntheticReads(windowHeader, 0, allReads.stopPerformed + 1, SyntheticRead.StrandType.STRANDLESS));
|
result.reads.addAll(addAllSyntheticReadTypes(0, allReads.stopPerformed + 1));
|
||||||
result.reads.addAll(addToFilteredReads(windowHeader, 0, allReads.stopPerformed + 1));
|
|
||||||
result.reads.addAll(finalizeAndAdd(ConsensusType.BOTH));
|
|
||||||
|
|
||||||
return result; // finalized reads will be downsampled if necessary
|
return result; // finalized reads will be downsampled if necessary
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Adds reads for all possible strands (positive, negative, filtered) from the global windowHeader object
|
||||||
|
*
|
||||||
|
* @param start the start position (inclusive)
|
||||||
|
* @param end the end position (exclusive)
|
||||||
|
* @return non-null but possibly empty array list with reduced reads
|
||||||
|
*/
|
||||||
|
private ObjectArrayList<GATKSAMRecord> addAllSyntheticReadTypes(final int start, final int end) {
|
||||||
|
final ObjectArrayList<GATKSAMRecord> reads = new ObjectArrayList<>();
|
||||||
|
reads.addAll(addToSyntheticReads(windowHeader, start, end, ConsensusType.POSITIVE_CONSENSUS));
|
||||||
|
reads.addAll(addToSyntheticReads(windowHeader, start, end, ConsensusType.NEGATIVE_CONSENSUS));
|
||||||
|
reads.addAll(addToSyntheticReads(windowHeader, start, end, ConsensusType.FILTERED));
|
||||||
|
return reads;
|
||||||
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* @see #closeVariantRegions(CompressionStash, ObjectSortedSet<GenomeLoc>, boolean) with forceCloseFullRegions set to false
|
* @see #closeVariantRegions(CompressionStash, ObjectSortedSet<GenomeLoc>, boolean) with forceCloseFullRegions set to false
|
||||||
*/
|
*/
|
||||||
|
|
@ -851,7 +663,7 @@ public class SlidingWindow {
|
||||||
}
|
}
|
||||||
|
|
||||||
private static final class CloseVariantRegionResult {
|
private static final class CloseVariantRegionResult {
|
||||||
final private ObjectList<GATKSAMRecord> reads = new ObjectArrayList<GATKSAMRecord>();
|
final private ObjectList<GATKSAMRecord> reads = new ObjectArrayList<>();
|
||||||
private int stopPerformed;
|
private int stopPerformed;
|
||||||
|
|
||||||
public CloseVariantRegionResult(final int stopPerformed) { this.stopPerformed = stopPerformed; }
|
public CloseVariantRegionResult(final int stopPerformed) { this.stopPerformed = stopPerformed; }
|
||||||
|
|
@ -866,7 +678,7 @@ public class SlidingWindow {
|
||||||
* @return a non-null set of reduced reads representing the finalized regions
|
* @return a non-null set of reduced reads representing the finalized regions
|
||||||
*/
|
*/
|
||||||
public ObjectSet<GATKSAMRecord> closeVariantRegions(final CompressionStash regions, final ObjectSortedSet<GenomeLoc> knownSnpPositions, final boolean forceCloseFullRegions) {
|
public ObjectSet<GATKSAMRecord> closeVariantRegions(final CompressionStash regions, final ObjectSortedSet<GenomeLoc> knownSnpPositions, final boolean forceCloseFullRegions) {
|
||||||
final ObjectAVLTreeSet<GATKSAMRecord> allReads = new ObjectAVLTreeSet<GATKSAMRecord>(new AlignmentStartWithNoTiesComparator());
|
final ObjectAVLTreeSet<GATKSAMRecord> allReads = new ObjectAVLTreeSet<>(new AlignmentStartWithNoTiesComparator());
|
||||||
if ( !regions.isEmpty() ) {
|
if ( !regions.isEmpty() ) {
|
||||||
|
|
||||||
int windowHeaderStart = getStartLocation(windowHeader);
|
int windowHeaderStart = getStartLocation(windowHeader);
|
||||||
|
|
@ -945,9 +757,9 @@ public class SlidingWindow {
|
||||||
if (downsampleCoverage >= nReads)
|
if (downsampleCoverage >= nReads)
|
||||||
return allReads;
|
return allReads;
|
||||||
|
|
||||||
ReservoirDownsampler <GATKSAMRecord> downsampler = new ReservoirDownsampler<GATKSAMRecord>(downsampleCoverage);
|
ReservoirDownsampler <GATKSAMRecord> downsampler = new ReservoirDownsampler<>(downsampleCoverage);
|
||||||
downsampler.submit(allReads);
|
downsampler.submit(allReads);
|
||||||
return new ObjectArrayList<GATKSAMRecord>(downsampler.consumeFinalizedItems());
|
return new ObjectArrayList<>(downsampler.consumeFinalizedItems());
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
@ -962,7 +774,7 @@ public class SlidingWindow {
|
||||||
@Ensures("result != null")
|
@Ensures("result != null")
|
||||||
public Pair<ObjectSet<GATKSAMRecord>, CompressionStash> close(final ObjectSortedSet<GenomeLoc> knownSnpPositions) {
|
public Pair<ObjectSet<GATKSAMRecord>, CompressionStash> close(final ObjectSortedSet<GenomeLoc> knownSnpPositions) {
|
||||||
// mark variant regions
|
// mark variant regions
|
||||||
ObjectSet<GATKSAMRecord> finalizedReads = new ObjectAVLTreeSet<GATKSAMRecord>(new AlignmentStartWithNoTiesComparator());
|
ObjectSet<GATKSAMRecord> finalizedReads = new ObjectAVLTreeSet<>(new AlignmentStartWithNoTiesComparator());
|
||||||
CompressionStash regions = new CompressionStash();
|
CompressionStash regions = new CompressionStash();
|
||||||
|
|
||||||
if (!windowHeader.isEmpty()) {
|
if (!windowHeader.isEmpty()) {
|
||||||
|
|
@ -970,48 +782,45 @@ public class SlidingWindow {
|
||||||
regions = findVariantRegions(0, windowHeader.size(), markedSites.getVariantSiteBitSet(), true);
|
regions = findVariantRegions(0, windowHeader.size(), markedSites.getVariantSiteBitSet(), true);
|
||||||
finalizedReads = closeVariantRegions(regions, knownSnpPositions, true);
|
finalizedReads = closeVariantRegions(regions, knownSnpPositions, true);
|
||||||
|
|
||||||
if (!windowHeader.isEmpty()) {
|
if (!windowHeader.isEmpty())
|
||||||
finalizedReads.addAll(addToSyntheticReads(windowHeader, 0, windowHeader.size(), SyntheticRead.StrandType.STRANDLESS));
|
finalizedReads.addAll(addAllSyntheticReadTypes(0, windowHeader.size()));
|
||||||
finalizedReads.addAll(addToFilteredReads(windowHeader, 0, windowHeader.size()));
|
|
||||||
finalizedReads.addAll(finalizeAndAdd(ConsensusType.BOTH)); // if it ended in running consensus, finish it up
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
return new Pair<ObjectSet<GATKSAMRecord>, CompressionStash>(finalizedReads, regions);
|
return new Pair<>(finalizedReads, regions);
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* generates the SAM record for the running consensus read and resets it (to null)
|
* generates the SAM record for the running consensus read and resets it (to null)
|
||||||
*
|
*
|
||||||
|
* @param runningConsensus the consensus to finalize
|
||||||
* @return the read contained in the running consensus or null
|
* @return the read contained in the running consensus or null
|
||||||
*/
|
*/
|
||||||
protected GATKSAMRecord finalizeRunningConsensus() {
|
protected GATKSAMRecord finalizeRunningConsensus(final SyntheticRead runningConsensus) {
|
||||||
GATKSAMRecord finalizedRead = null;
|
GATKSAMRecord finalizedRead = null;
|
||||||
if (runningConsensus != null) {
|
|
||||||
if (runningConsensus.size() > 0)
|
if ( runningConsensus != null ) {
|
||||||
|
if ( runningConsensus.size() > 0 )
|
||||||
finalizedRead = runningConsensus.close();
|
finalizedRead = runningConsensus.close();
|
||||||
else
|
else
|
||||||
consensusCounter--;
|
consensusCounter--;
|
||||||
|
|
||||||
runningConsensus = null;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
return finalizedRead;
|
return finalizedRead;
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* generates the SAM record for the filtered data consensus and resets it (to null)
|
* generates the SAM record for the filtered data consensus and resets it (to null)
|
||||||
*
|
*
|
||||||
|
* @param filteredDataConsensus the consensus to finalize
|
||||||
* @return the read contained in the running consensus or null
|
* @return the read contained in the running consensus or null
|
||||||
*/
|
*/
|
||||||
protected GATKSAMRecord finalizeFilteredDataConsensus() {
|
protected GATKSAMRecord finalizeFilteredDataConsensus(final SyntheticRead filteredDataConsensus) {
|
||||||
GATKSAMRecord finalizedRead = null;
|
GATKSAMRecord finalizedRead = null;
|
||||||
if (filteredDataConsensus != null) {
|
if (filteredDataConsensus != null) {
|
||||||
if (filteredDataConsensus.size() > 0)
|
if (filteredDataConsensus.size() > 0)
|
||||||
finalizedRead = filteredDataConsensus.close();
|
finalizedRead = filteredDataConsensus.close();
|
||||||
else
|
else
|
||||||
filteredDataConsensusCounter--;
|
filteredDataConsensusCounter--;
|
||||||
|
|
||||||
filteredDataConsensus = null;
|
|
||||||
}
|
}
|
||||||
return finalizedRead;
|
return finalizedRead;
|
||||||
}
|
}
|
||||||
|
|
@ -1021,7 +830,7 @@ public class SlidingWindow {
|
||||||
|
|
||||||
private final static class SingleStrandConsensusData {
|
private final static class SingleStrandConsensusData {
|
||||||
final HeaderElementList consensus = new HeaderElementList();
|
final HeaderElementList consensus = new HeaderElementList();
|
||||||
final ObjectList<GATKSAMRecord> reads = new ObjectArrayList<GATKSAMRecord>();
|
final ObjectList<GATKSAMRecord> reads = new ObjectArrayList<>();
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
|
@ -1042,6 +851,7 @@ public class SlidingWindow {
|
||||||
|
|
||||||
// initialize the mapping from base (allele) to header
|
// initialize the mapping from base (allele) to header
|
||||||
final Byte2IntMap alleleHeaderMap = new Byte2IntArrayMap(2);
|
final Byte2IntMap alleleHeaderMap = new Byte2IntArrayMap(2);
|
||||||
|
alleleHeaderMap.defaultReturnValue(-1);
|
||||||
for ( final BaseIndex allele : windowHeader.get(hetRefPosition).getAlleles(MIN_ALT_PVALUE_TO_TRIGGER_VARIANT, MIN_ALT_PROPORTION_TO_TRIGGER_VARIANT) ) {
|
for ( final BaseIndex allele : windowHeader.get(hetRefPosition).getAlleles(MIN_ALT_PVALUE_TO_TRIGGER_VARIANT, MIN_ALT_PROPORTION_TO_TRIGGER_VARIANT) ) {
|
||||||
final int currentIndex = alleleHeaderMap.size();
|
final int currentIndex = alleleHeaderMap.size();
|
||||||
if ( currentIndex > 1 )
|
if ( currentIndex > 1 )
|
||||||
|
|
@ -1056,7 +866,7 @@ public class SlidingWindow {
|
||||||
if ( alleleHeaderMap.size() != 2 )
|
if ( alleleHeaderMap.size() != 2 )
|
||||||
throw new IllegalStateException("We expected to see 2 alleles when creating a diploid consensus but saw " + alleleHeaderMap.size());
|
throw new IllegalStateException("We expected to see 2 alleles when creating a diploid consensus but saw " + alleleHeaderMap.size());
|
||||||
|
|
||||||
final ObjectList<GATKSAMRecord> readsToRemove = new ObjectArrayList<GATKSAMRecord>();
|
final ObjectList<GATKSAMRecord> readsToRemove = new ObjectArrayList<>();
|
||||||
|
|
||||||
for ( final GATKSAMRecord read : readsInWindow ) {
|
for ( final GATKSAMRecord read : readsInWindow ) {
|
||||||
|
|
||||||
|
|
@ -1081,10 +891,10 @@ public class SlidingWindow {
|
||||||
final byte base = read.getReadBases()[readPosOfHet];
|
final byte base = read.getReadBases()[readPosOfHet];
|
||||||
|
|
||||||
// check which allele this read represents
|
// check which allele this read represents
|
||||||
final Integer allele = alleleHeaderMap.get(base);
|
final int allele = alleleHeaderMap.get(base);
|
||||||
|
|
||||||
// ignore the read if it represents a base that's not part of the consensus
|
// ignore the read if it represents a base that's not part of the consensus
|
||||||
if ( allele != null ) {
|
if ( allele != -1 ) {
|
||||||
// add to the appropriate polyploid header
|
// add to the appropriate polyploid header
|
||||||
final SingleStrandConsensusData header = read.getReadNegativeStrandFlag() ? headersNegStrand[allele] : headersPosStrand[allele];
|
final SingleStrandConsensusData header = read.getReadNegativeStrandFlag() ? headersNegStrand[allele] : headersPosStrand[allele];
|
||||||
header.reads.add(read);
|
header.reads.add(read);
|
||||||
|
|
@ -1096,7 +906,7 @@ public class SlidingWindow {
|
||||||
readsInWindow.remove(read);
|
readsInWindow.remove(read);
|
||||||
|
|
||||||
// create the polyploid synthetic reads if we can
|
// create the polyploid synthetic reads if we can
|
||||||
final ObjectList<GATKSAMRecord> hetReads = new ObjectArrayList<GATKSAMRecord>();
|
final ObjectList<GATKSAMRecord> hetReads = new ObjectArrayList<>();
|
||||||
|
|
||||||
// sanity check that no new "variant region" exists on just a single consensus strand due to softclips
|
// sanity check that no new "variant region" exists on just a single consensus strand due to softclips
|
||||||
// or multi-allelic sites now that we've broken everything out into their component parts. if one does
|
// or multi-allelic sites now that we've broken everything out into their component parts. if one does
|
||||||
|
|
@ -1125,10 +935,12 @@ public class SlidingWindow {
|
||||||
* @param result list in which to store results
|
* @param result list in which to store results
|
||||||
*/
|
*/
|
||||||
protected void finalizeHetConsensus(final LinkedList<HeaderElement> header, final boolean isNegativeStrand, final ObjectList<GATKSAMRecord> result) {
|
protected void finalizeHetConsensus(final LinkedList<HeaderElement> header, final boolean isNegativeStrand, final ObjectList<GATKSAMRecord> result) {
|
||||||
if ( header.size() > 0 )
|
if ( header.size() > 0 ) {
|
||||||
result.addAll(addToSyntheticReads(header, 0, header.size(), isNegativeStrand ? SyntheticRead.StrandType.NEGATIVE : SyntheticRead.StrandType.POSITIVE));
|
if ( isNegativeStrand )
|
||||||
if ( runningConsensus != null )
|
result.addAll(addToSyntheticReads(header, 0, header.size(), ConsensusType.NEGATIVE_CONSENSUS));
|
||||||
result.add(finalizeRunningConsensus());
|
else
|
||||||
|
result.addAll(addToSyntheticReads(header, 0, header.size(), ConsensusType.POSITIVE_CONSENSUS));
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
private void addToHeader(LinkedList<HeaderElement> header, GATKSAMRecord read) {
|
private void addToHeader(LinkedList<HeaderElement> header, GATKSAMRecord read) {
|
||||||
|
|
@ -1222,6 +1034,7 @@ public class SlidingWindow {
|
||||||
|
|
||||||
final Iterator<HeaderElement> headerElementIterator = header.listIterator(startIndex);
|
final Iterator<HeaderElement> headerElementIterator = header.listIterator(startIndex);
|
||||||
final byte mappingQuality = (byte) read.getMappingQuality();
|
final byte mappingQuality = (byte) read.getMappingQuality();
|
||||||
|
final boolean isNegativeStrand = read.getReadNegativeStrandFlag();
|
||||||
|
|
||||||
// iterator variables
|
// iterator variables
|
||||||
int locationIndex = startIndex;
|
int locationIndex = startIndex;
|
||||||
|
|
@ -1254,9 +1067,9 @@ public class SlidingWindow {
|
||||||
for ( int i = 0; i < nDeletionBases; i++ ) {
|
for ( int i = 0; i < nDeletionBases; i++ ) {
|
||||||
headerElement = headerElementIterator.next();
|
headerElement = headerElementIterator.next();
|
||||||
if (removeRead)
|
if (removeRead)
|
||||||
headerElement.removeBase(BaseUtils.Base.D.base, mappingQuality, mappingQuality, mappingQuality, mappingQuality, MIN_BASE_QUAL_TO_COUNT, MIN_MAPPING_QUALITY, false);
|
headerElement.removeBase(BaseUtils.Base.D.base, mappingQuality, mappingQuality, mappingQuality, mappingQuality, MIN_BASE_QUAL_TO_COUNT, MIN_MAPPING_QUALITY, false, isNegativeStrand);
|
||||||
else
|
else
|
||||||
headerElement.addBase(BaseUtils.Base.D.base, mappingQuality, mappingQuality, mappingQuality, mappingQuality, MIN_BASE_QUAL_TO_COUNT, MIN_MAPPING_QUALITY, false);
|
headerElement.addBase(BaseUtils.Base.D.base, mappingQuality, mappingQuality, mappingQuality, mappingQuality, MIN_BASE_QUAL_TO_COUNT, MIN_MAPPING_QUALITY, false, isNegativeStrand);
|
||||||
}
|
}
|
||||||
locationIndex += nDeletionBases;
|
locationIndex += nDeletionBases;
|
||||||
break;
|
break;
|
||||||
|
|
@ -1279,9 +1092,9 @@ public class SlidingWindow {
|
||||||
final byte deletionQuality = readHasIndelQuals ? deletionQuals[readBaseIndex] : -1;
|
final byte deletionQuality = readHasIndelQuals ? deletionQuals[readBaseIndex] : -1;
|
||||||
|
|
||||||
if ( removeRead )
|
if ( removeRead )
|
||||||
headerElement.removeBase(readBases[readBaseIndex], readQuals[readBaseIndex], insertionQuality, deletionQuality, mappingQuality, MIN_BASE_QUAL_TO_COUNT, MIN_MAPPING_QUALITY, isSoftClip);
|
headerElement.removeBase(readBases[readBaseIndex], readQuals[readBaseIndex], insertionQuality, deletionQuality, mappingQuality, MIN_BASE_QUAL_TO_COUNT, MIN_MAPPING_QUALITY, isSoftClip, isNegativeStrand);
|
||||||
else
|
else
|
||||||
headerElement.addBase(readBases[readBaseIndex], readQuals[readBaseIndex], insertionQuality, deletionQuality, mappingQuality, MIN_BASE_QUAL_TO_COUNT, MIN_MAPPING_QUALITY, isSoftClip);
|
headerElement.addBase(readBases[readBaseIndex], readQuals[readBaseIndex], insertionQuality, deletionQuality, mappingQuality, MIN_BASE_QUAL_TO_COUNT, MIN_MAPPING_QUALITY, isSoftClip, isNegativeStrand);
|
||||||
|
|
||||||
readBaseIndex++;
|
readBaseIndex++;
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -56,7 +56,6 @@ import org.broadinstitute.sting.gatk.walkers.LocusWalker;
|
||||||
import org.broadinstitute.sting.gatk.walkers.TreeReducible;
|
import org.broadinstitute.sting.gatk.walkers.TreeReducible;
|
||||||
import org.broadinstitute.sting.utils.GenomeLoc;
|
import org.broadinstitute.sting.utils.GenomeLoc;
|
||||||
import org.broadinstitute.sting.utils.MathUtils;
|
import org.broadinstitute.sting.utils.MathUtils;
|
||||||
import org.broadinstitute.sting.utils.QualityUtils;
|
|
||||||
import org.broadinstitute.sting.utils.exceptions.UserException;
|
import org.broadinstitute.sting.utils.exceptions.UserException;
|
||||||
import org.broadinstitute.sting.utils.pileup.PileupElement;
|
import org.broadinstitute.sting.utils.pileup.PileupElement;
|
||||||
import org.broadinstitute.sting.utils.pileup.ReadBackedPileup;
|
import org.broadinstitute.sting.utils.pileup.ReadBackedPileup;
|
||||||
|
|
@ -133,6 +132,11 @@ public class AssessReducedQuals extends LocusWalker<GenomeLoc, GenomeLoc> implem
|
||||||
return reportLocus ? ref.getLocus() : null;
|
return reportLocus ? ref.getLocus() : null;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Get the quals separated by version and strand
|
||||||
|
* @param readPileup the pileup
|
||||||
|
* @return 2x2 array with sum of quals separated by version in 1st dimension and strand in the 2nd
|
||||||
|
*/
|
||||||
private int[] getPileupQuals(final ReadBackedPileup readPileup) {
|
private int[] getPileupQuals(final ReadBackedPileup readPileup) {
|
||||||
|
|
||||||
final int[] quals = new int[2];
|
final int[] quals = new int[2];
|
||||||
|
|
|
||||||
|
|
@ -97,15 +97,15 @@ public class HeaderElementUnitTest extends BaseTest {
|
||||||
HeaderElement headerElement = new HeaderElement(1000, 0);
|
HeaderElement headerElement = new HeaderElement(1000, 0);
|
||||||
|
|
||||||
// first test that if we add and then remove it, we have no data
|
// first test that if we add and then remove it, we have no data
|
||||||
headerElement.addBase(test.base, test.baseQual, test.insQual, test.delQual, test.MQ, minBaseQual, minMappingQual, test.isClip);
|
headerElement.addBase(test.base, test.baseQual, test.insQual, test.delQual, test.MQ, minBaseQual, minMappingQual, test.isClip, false);
|
||||||
headerElement.addInsertionToTheRight();
|
headerElement.addInsertionToTheRight();
|
||||||
headerElement.removeBase(test.base, test.baseQual, test.insQual, test.delQual, test.MQ, minBaseQual, minMappingQual, test.isClip);
|
headerElement.removeBase(test.base, test.baseQual, test.insQual, test.delQual, test.MQ, minBaseQual, minMappingQual, test.isClip, false);
|
||||||
headerElement.removeInsertionToTheRight();
|
headerElement.removeInsertionToTheRight();
|
||||||
testHeaderIsEmpty(headerElement);
|
testHeaderIsEmpty(headerElement);
|
||||||
|
|
||||||
// now, test that the data was added as expected
|
// now, test that the data was added as expected
|
||||||
for ( int i = 0; i < 10; i++ )
|
for ( int i = 0; i < 10; i++ )
|
||||||
headerElement.addBase(test.base, test.baseQual, test.insQual, test.delQual, test.MQ, minBaseQual, minMappingQual, test.isClip);
|
headerElement.addBase(test.base, test.baseQual, test.insQual, test.delQual, test.MQ, minBaseQual, minMappingQual, test.isClip, false);
|
||||||
testHeaderData(headerElement, test);
|
testHeaderData(headerElement, test);
|
||||||
|
|
||||||
// test the insertion adding functionality
|
// test the insertion adding functionality
|
||||||
|
|
@ -115,8 +115,8 @@ public class HeaderElementUnitTest extends BaseTest {
|
||||||
}
|
}
|
||||||
|
|
||||||
private void testHeaderIsEmpty(final HeaderElement headerElement) {
|
private void testHeaderIsEmpty(final HeaderElement headerElement) {
|
||||||
Assert.assertFalse(headerElement.hasConsensusData());
|
Assert.assertFalse(headerElement.hasConsensusData(SlidingWindow.ConsensusType.POSITIVE_CONSENSUS));
|
||||||
Assert.assertFalse(headerElement.hasFilteredData());
|
Assert.assertFalse(headerElement.hasConsensusData(SlidingWindow.ConsensusType.FILTERED));
|
||||||
Assert.assertFalse(headerElement.hasInsertionToTheRight());
|
Assert.assertFalse(headerElement.hasInsertionToTheRight());
|
||||||
Assert.assertTrue(headerElement.isEmpty());
|
Assert.assertTrue(headerElement.isEmpty());
|
||||||
}
|
}
|
||||||
|
|
@ -125,9 +125,9 @@ public class HeaderElementUnitTest extends BaseTest {
|
||||||
Assert.assertEquals(headerElement.isVariantFromSoftClips(), test.isClip);
|
Assert.assertEquals(headerElement.isVariantFromSoftClips(), test.isClip);
|
||||||
Assert.assertFalse(headerElement.isEmpty());
|
Assert.assertFalse(headerElement.isEmpty());
|
||||||
Assert.assertFalse(headerElement.hasInsertionToTheRight());
|
Assert.assertFalse(headerElement.hasInsertionToTheRight());
|
||||||
Assert.assertEquals(headerElement.hasConsensusData(), test.MQ >= minMappingQual);
|
Assert.assertEquals(headerElement.hasConsensusData(SlidingWindow.ConsensusType.POSITIVE_CONSENSUS), test.MQ >= minMappingQual);
|
||||||
Assert.assertEquals(headerElement.hasFilteredData(), test.MQ < minMappingQual);
|
Assert.assertEquals(headerElement.hasConsensusData(SlidingWindow.ConsensusType.FILTERED), test.MQ < minMappingQual);
|
||||||
Assert.assertEquals(headerElement.hasConsensusData() ? headerElement.getConsensusBaseCounts().getRMS() : headerElement.getFilteredBaseCounts().getRMS(), (double)test.MQ);
|
Assert.assertEquals(headerElement.getBaseCounts(headerElement.hasConsensusData(SlidingWindow.ConsensusType.POSITIVE_CONSENSUS) ? SlidingWindow.ConsensusType.POSITIVE_CONSENSUS : SlidingWindow.ConsensusType.FILTERED).getRMS(), (double)test.MQ);
|
||||||
Assert.assertFalse(headerElement.isVariantFromMismatches(0.05, 0.05));
|
Assert.assertFalse(headerElement.isVariantFromMismatches(0.05, 0.05));
|
||||||
Assert.assertEquals(headerElement.isVariant(0.05, 0.05, 0.05), test.isClip);
|
Assert.assertEquals(headerElement.isVariant(0.05, 0.05, 0.05), test.isClip);
|
||||||
}
|
}
|
||||||
|
|
@ -145,7 +145,7 @@ public class HeaderElementUnitTest extends BaseTest {
|
||||||
|
|
||||||
@DataProvider(name = "alleles")
|
@DataProvider(name = "alleles")
|
||||||
public Object[][] createAllelesData() {
|
public Object[][] createAllelesData() {
|
||||||
List<Object[]> tests = new ArrayList<Object[]>();
|
List<Object[]> tests = new ArrayList<>();
|
||||||
|
|
||||||
final int[] counts = new int[]{ 0, 5, 10, 15, 20 };
|
final int[] counts = new int[]{ 0, 5, 10, 15, 20 };
|
||||||
final double [] pvalues = new double[]{ 0.0, 0.01, 0.05, 0.20, 1.0 };
|
final double [] pvalues = new double[]{ 0.0, 0.01, 0.05, 0.20, 1.0 };
|
||||||
|
|
@ -174,7 +174,7 @@ public class HeaderElementUnitTest extends BaseTest {
|
||||||
for ( int i = 0; i < test.counts.length; i++ ) {
|
for ( int i = 0; i < test.counts.length; i++ ) {
|
||||||
final BaseIndex base = BaseIndex.values()[i];
|
final BaseIndex base = BaseIndex.values()[i];
|
||||||
for ( int j = 0; j < test.counts[i]; j++ )
|
for ( int j = 0; j < test.counts[i]; j++ )
|
||||||
headerElement.addBase(base.b, byte20, byte10, byte10, byte20, minBaseQual, minMappingQual, false);
|
headerElement.addBase(base.b, byte20, byte10, byte10, byte20, minBaseQual, minMappingQual, false, false);
|
||||||
}
|
}
|
||||||
|
|
||||||
final int nAllelesSeen = headerElement.getNumberOfBaseAlleles(test.pvalue, test.pvalue);
|
final int nAllelesSeen = headerElement.getNumberOfBaseAlleles(test.pvalue, test.pvalue);
|
||||||
|
|
|
||||||
|
|
@ -158,44 +158,44 @@ public class ReduceReadsIntegrationTest extends WalkerTest {
|
||||||
|
|
||||||
@Test(enabled = true)
|
@Test(enabled = true)
|
||||||
public void testDefaultCompression() {
|
public void testDefaultCompression() {
|
||||||
RRTest("testDefaultCompression ", L, "fa1cffc4539e0c20b818a11da5dba5b9", false);
|
RRTest("testDefaultCompression ", L, "0e503f7b79ace4c89d74f0943a0de1c0", false);
|
||||||
}
|
}
|
||||||
|
|
||||||
@Test(enabled = true)
|
@Test(enabled = true)
|
||||||
public void testDefaultCompressionWithKnowns() {
|
public void testDefaultCompressionWithKnowns() {
|
||||||
RRTest("testDefaultCompressionWithKnowns ", L, "d1b5fbc402810d9cdc020bb3503f1325", true);
|
RRTest("testDefaultCompressionWithKnowns ", L, "6db7ce2733d006f8bd61c42a40d23728", true);
|
||||||
}
|
}
|
||||||
|
|
||||||
private final String intervals = "-L 20:10,100,000-10,100,500 -L 20:10,200,000-10,200,500 -L 20:10,300,000-10,300,500 -L 20:10,400,000-10,500,000 -L 20:10,500,050-10,500,060 -L 20:10,600,000-10,600,015 -L 20:10,700,000-10,700,110";
|
private final String intervals = "-L 20:10,100,000-10,100,500 -L 20:10,200,000-10,200,500 -L 20:10,300,000-10,300,500 -L 20:10,400,000-10,500,000 -L 20:10,500,050-10,500,060 -L 20:10,600,000-10,600,015 -L 20:10,700,000-10,700,110";
|
||||||
|
|
||||||
@Test(enabled = true)
|
@Test(enabled = true)
|
||||||
public void testMultipleIntervals() {
|
public void testMultipleIntervals() {
|
||||||
RRTest("testMultipleIntervals ", intervals, "7e9dcd157ad742d4ebae7e56bc4af663", false);
|
RRTest("testMultipleIntervals ", intervals, "207f2c6d3db956e19412a45a231ca367", false, "043b2838c27d8f9580379b54c18ff40a");
|
||||||
}
|
}
|
||||||
|
|
||||||
@Test(enabled = true)
|
@Test(enabled = true)
|
||||||
public void testMultipleIntervalsWithKnowns() {
|
public void testMultipleIntervalsWithKnowns() {
|
||||||
RRTest("testMultipleIntervalsWithKnowns ", intervals, "dbb1e95e1bcad956701142afac763717", true);
|
RRTest("testMultipleIntervalsWithKnowns ", intervals, "f3b11a8a7673b301e27137936fafc6b6", true, "043b2838c27d8f9580379b54c18ff40a");
|
||||||
}
|
}
|
||||||
|
|
||||||
@Test(enabled = true)
|
@Test(enabled = true)
|
||||||
public void testHighCompression() {
|
public void testHighCompression() {
|
||||||
RRTest("testHighCompression ", " -cs 10 -min_pvalue 0.3 -minvar 0.3 -mindel 0.3 " + L, "8f8fd1a53fa0789116f45e4cf2625906", false);
|
RRTest("testHighCompression ", " -cs 10 -min_pvalue 0.3 -minvar 0.3 -mindel 0.3 " + L, "dcc3716b3665aa1c2dbe6b22d6534aef", false);
|
||||||
}
|
}
|
||||||
|
|
||||||
@Test(enabled = true)
|
@Test(enabled = true)
|
||||||
public void testHighCompressionWithKnowns() {
|
public void testHighCompressionWithKnowns() {
|
||||||
RRTest("testHighCompressionWithKnowns ", " -cs 10 -min_pvalue 0.3 -minvar 0.3 -mindel 0.3 " + L, "52fd2a77802a4677b604abb18e15d96a", true);
|
RRTest("testHighCompressionWithKnowns ", " -cs 10 -min_pvalue 0.3 -minvar 0.3 -mindel 0.3 " + L, "97ae655bf0e483ea227b1aac67ced024", true);
|
||||||
}
|
}
|
||||||
|
|
||||||
@Test(enabled = true)
|
@Test(enabled = true)
|
||||||
public void testLowCompression() {
|
public void testLowCompression() {
|
||||||
RRTest("testLowCompression ", " -cs 30 -min_pvalue 0.001 -minvar 0.01 -mindel 0.01 -minmap 5 -minqual 5 " + L, "79c6543d5ce84ebc2ca74404498edbd1", false);
|
RRTest("testLowCompression ", " -cs 30 -min_pvalue 0.001 -minvar 0.01 -mindel 0.01 -minmap 5 -minqual 5 " + L, "a1377eb922e0b09a03a280b691b0b3ff", false);
|
||||||
}
|
}
|
||||||
|
|
||||||
@Test(enabled = true)
|
@Test(enabled = true)
|
||||||
public void testLowCompressionWithKnowns() {
|
public void testLowCompressionWithKnowns() {
|
||||||
RRTest("testLowCompressionWithKnowns ", " -cs 30 -min_pvalue 0.001 -minvar 0.01 -mindel 0.01 -minmap 5 -minqual 5 " + L, "271aec358b309603291a974b5ba3bd60", true);
|
RRTest("testLowCompressionWithKnowns ", " -cs 30 -min_pvalue 0.001 -minvar 0.01 -mindel 0.01 -minmap 5 -minqual 5 " + L, "bd7c5b0b210694f364ca6a41f5b89870", true);
|
||||||
}
|
}
|
||||||
|
|
||||||
@Test(enabled = true)
|
@Test(enabled = true)
|
||||||
|
|
@ -207,7 +207,7 @@ public class ReduceReadsIntegrationTest extends WalkerTest {
|
||||||
|
|
||||||
@Test(enabled = true)
|
@Test(enabled = true)
|
||||||
public void testIndelCompression() {
|
public void testIndelCompression() {
|
||||||
final String md5 = "d20e6012300898a0315c795cab7583d8";
|
final String md5 = "9c9305eda5e4e7f22246ec8a4b242c97";
|
||||||
RRTest("testIndelCompression ", " -cs 50 -L 20:10,100,500-10,100,600 ", md5, false);
|
RRTest("testIndelCompression ", " -cs 50 -L 20:10,100,500-10,100,600 ", md5, false);
|
||||||
RRTest("testIndelCompressionWithKnowns ", " -cs 50 -L 20:10,100,500-10,100,600 ", md5, true);
|
RRTest("testIndelCompressionWithKnowns ", " -cs 50 -L 20:10,100,500-10,100,600 ", md5, true);
|
||||||
}
|
}
|
||||||
|
|
@ -215,27 +215,25 @@ public class ReduceReadsIntegrationTest extends WalkerTest {
|
||||||
@Test(enabled = true)
|
@Test(enabled = true)
|
||||||
public void testFilteredDeletionCompression() {
|
public void testFilteredDeletionCompression() {
|
||||||
String base = String.format("-T ReduceReads -npt -R %s -I %s ", REF, DELETION_BAM) + " -o %s ";
|
String base = String.format("-T ReduceReads -npt -R %s -I %s ", REF, DELETION_BAM) + " -o %s ";
|
||||||
executeTest("testFilteredDeletionCompression", new WalkerTestSpec(base, Arrays.asList("bam"), Arrays.asList("e5da09662708f562c0c617ba73cf4763")), "4f916da29d91852077f0a2fdbdd2c7f6");
|
executeTest("testFilteredDeletionCompression", new WalkerTestSpec(base, Arrays.asList("bam"), Arrays.asList("1bda512143be1016dfaca1f7020b6398")), "4f916da29d91852077f0a2fdbdd2c7f6");
|
||||||
}
|
}
|
||||||
|
|
||||||
private static final String COREDUCTION_QUALS_TEST_MD5 = "26d84a2bd549a01a63fcebf8847a1b7d";
|
|
||||||
|
|
||||||
@Test(enabled = true)
|
@Test(enabled = true)
|
||||||
public void testCoReduction() {
|
public void testCoReduction() {
|
||||||
String base = String.format("-T ReduceReads %s --cancer_mode -npt -R %s -I %s -I %s", COREDUCTION_L, REF, COREDUCTION_BAM_A, COREDUCTION_BAM_B) + " -o %s ";
|
String base = String.format("-T ReduceReads %s --cancer_mode -npt -R %s -I %s -I %s", COREDUCTION_L, REF, COREDUCTION_BAM_A, COREDUCTION_BAM_B) + " -o %s ";
|
||||||
executeTest("testCoReduction", new WalkerTestSpec(base, Arrays.asList("bam"), Arrays.asList("5f4d2c1d9c010dfd6865aeba7d0336fe")), COREDUCTION_QUALS_TEST_MD5);
|
executeTest("testCoReduction", new WalkerTestSpec(base, Arrays.asList("bam"), Arrays.asList("2fdc77ff5139f62db9697427b559f866")));
|
||||||
}
|
}
|
||||||
|
|
||||||
@Test(enabled = true)
|
@Test(enabled = true)
|
||||||
public void testCoReductionWithKnowns() {
|
public void testCoReductionWithKnowns() {
|
||||||
String base = String.format("-T ReduceReads %s --cancer_mode -npt -R %s -I %s -I %s -known %s", COREDUCTION_L, REF, COREDUCTION_BAM_A, COREDUCTION_BAM_B, DBSNP) + " -o %s ";
|
String base = String.format("-T ReduceReads %s --cancer_mode -npt -R %s -I %s -I %s -known %s", COREDUCTION_L, REF, COREDUCTION_BAM_A, COREDUCTION_BAM_B, DBSNP) + " -o %s ";
|
||||||
executeTest("testCoReductionWithKnowns", new WalkerTestSpec(base, Arrays.asList("bam"), Arrays.asList("ca48dd972bf57595c691972c0f887cb4")), COREDUCTION_QUALS_TEST_MD5);
|
executeTest("testCoReductionWithKnowns", new WalkerTestSpec(base, Arrays.asList("bam"), Arrays.asList("6db7fca364ba64f7db9510b412d731f0")));
|
||||||
}
|
}
|
||||||
|
|
||||||
@Test(enabled = true)
|
@Test(enabled = true)
|
||||||
public void testInsertionsAtEdgeOfConsensus() {
|
public void testInsertionsAtEdgeOfConsensus() {
|
||||||
String base = String.format("-T ReduceReads -npt -R %s -I %s ", REF, INSERTIONS_AT_EDGE_OF_CONSENSUS_BAM) + " -o %s ";
|
String base = String.format("-T ReduceReads -npt -R %s -I %s ", REF, INSERTIONS_AT_EDGE_OF_CONSENSUS_BAM) + " -o %s ";
|
||||||
executeTest("testInsertionsAtEdgeOfConsensus", new WalkerTestSpec(base, Arrays.asList("bam"), Arrays.asList("760500a5b036b987f84099f45f26a804")));
|
executeTest("testInsertionsAtEdgeOfConsensus", new WalkerTestSpec(base, Arrays.asList("bam"), Arrays.asList("c10653a8c21fb32b5cf580d3704b0edd")));
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
|
@ -249,7 +247,7 @@ public class ReduceReadsIntegrationTest extends WalkerTest {
|
||||||
@Test(enabled = true)
|
@Test(enabled = true)
|
||||||
public void testAddingReadAfterTailingTheStash() {
|
public void testAddingReadAfterTailingTheStash() {
|
||||||
String base = String.format("-T ReduceReads %s -npt -R %s -I %s", STASH_L, REF, STASH_BAM) + " -o %s ";
|
String base = String.format("-T ReduceReads %s -npt -R %s -I %s", STASH_L, REF, STASH_BAM) + " -o %s ";
|
||||||
executeTest("testAddingReadAfterTailingTheStash", new WalkerTestSpec(base, Arrays.asList("bam"), Arrays.asList("67f8a3a647f8ec5212104bdaafd8c862")), "3eab32c215ba68e75efd5ab7e9f7a2e7");
|
executeTest("testAddingReadAfterTailingTheStash", new WalkerTestSpec(base, Arrays.asList("bam"), Arrays.asList("fddbec29d0945afbbb34b42994614c15")), "3eab32c215ba68e75efd5ab7e9f7a2e7");
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
|
@ -260,7 +258,7 @@ public class ReduceReadsIntegrationTest extends WalkerTest {
|
||||||
public void testDivideByZero() {
|
public void testDivideByZero() {
|
||||||
String base = String.format("-T ReduceReads %s -npt -R %s -I %s", DIVIDEBYZERO_L, REF, DIVIDEBYZERO_BAM) + " -o %s ";
|
String base = String.format("-T ReduceReads %s -npt -R %s -I %s", DIVIDEBYZERO_L, REF, DIVIDEBYZERO_BAM) + " -o %s ";
|
||||||
// we expect to lose coverage due to the downsampling so don't run the systematic tests
|
// we expect to lose coverage due to the downsampling so don't run the systematic tests
|
||||||
executeTestWithoutAdditionalRRTests("testDivideByZero", new WalkerTestSpec(base, Arrays.asList("bam"), Arrays.asList("4f0ef477c0417d1eb602b323474ef377")));
|
executeTestWithoutAdditionalRRTests("testDivideByZero", new WalkerTestSpec(base, Arrays.asList("bam"), Arrays.asList("82758efda419011642cb468809a50bf9")));
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
|
@ -270,7 +268,7 @@ public class ReduceReadsIntegrationTest extends WalkerTest {
|
||||||
@Test(enabled = true)
|
@Test(enabled = true)
|
||||||
public void testReadOffContig() {
|
public void testReadOffContig() {
|
||||||
String base = String.format("-T ReduceReads -npt -R %s -I %s ", REF, OFFCONTIG_BAM) + " -o %s ";
|
String base = String.format("-T ReduceReads -npt -R %s -I %s ", REF, OFFCONTIG_BAM) + " -o %s ";
|
||||||
executeTest("testReadOffContig", new WalkerTestSpec(base, Arrays.asList("bam"), Arrays.asList("0ce693b4ff925998867664e4099f3248")));
|
executeTest("testReadOffContig", new WalkerTestSpec(base, Arrays.asList("bam"), Arrays.asList("595e5812c37189930cae93e45765def4")));
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
|
@ -280,7 +278,7 @@ public class ReduceReadsIntegrationTest extends WalkerTest {
|
||||||
public void testPairedReadsInVariantRegion() {
|
public void testPairedReadsInVariantRegion() {
|
||||||
String base = String.format("-T ReduceReads -npt -R %s -I %s ", hg19Reference, BOTH_ENDS_OF_PAIR_IN_VARIANT_REGION_BAM) +
|
String base = String.format("-T ReduceReads -npt -R %s -I %s ", hg19Reference, BOTH_ENDS_OF_PAIR_IN_VARIANT_REGION_BAM) +
|
||||||
" -o %s --downsample_coverage 250 -dcov 50 ";
|
" -o %s --downsample_coverage 250 -dcov 50 ";
|
||||||
executeTest("testPairedReadsInVariantRegion", new WalkerTestSpec(base, Arrays.asList("bam"), Arrays.asList("7e7b358443827ca239db3b98f299aec6")), "2af063d1bd3c322b03405dbb3ecf59a9");
|
executeTest("testPairedReadsInVariantRegion", new WalkerTestSpec(base, Arrays.asList("bam"), Arrays.asList("b005727119eee27995705959a637085e")), "2af063d1bd3c322b03405dbb3ecf59a9");
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
|
|
||||||
|
|
@ -318,7 +318,7 @@ public class SlidingWindowUnitTest extends BaseTest {
|
||||||
this.expectedNumberOfReads = expectedNumberOfReads;
|
this.expectedNumberOfReads = expectedNumberOfReads;
|
||||||
this.expectedNumberOfReadsWithHetCompression = expectedNumberOfReadsWithHetCompression;
|
this.expectedNumberOfReadsWithHetCompression = expectedNumberOfReadsWithHetCompression;
|
||||||
this.expectedNumberOfReadsAtDeepCoverage = expectedNumberOfReadsAtDeepCoverage;
|
this.expectedNumberOfReadsAtDeepCoverage = expectedNumberOfReadsAtDeepCoverage;
|
||||||
this.description = String.format("%d %d %d", expectedNumberOfReads, expectedNumberOfReadsWithHetCompression, expectedNumberOfReadsAtDeepCoverage);
|
this.description = String.format("%d %d %d %b %b", expectedNumberOfReads, expectedNumberOfReadsWithHetCompression, expectedNumberOfReadsAtDeepCoverage, readsShouldBeLowQuality, variantBaseShouldBeLowQuality);
|
||||||
|
|
||||||
// first, add the basic reads to the collection
|
// first, add the basic reads to the collection
|
||||||
myReads.addAll(basicReads);
|
myReads.addAll(basicReads);
|
||||||
|
|
@ -390,40 +390,40 @@ public class SlidingWindowUnitTest extends BaseTest {
|
||||||
List<Object[]> tests = new ArrayList<Object[]>();
|
List<Object[]> tests = new ArrayList<Object[]>();
|
||||||
|
|
||||||
// test high quality reads and bases
|
// test high quality reads and bases
|
||||||
tests.add(new Object[]{new ConsensusCreationTest(Arrays.<GenomeLoc>asList(), false, false, 1, 1, 1)});
|
tests.add(new Object[]{new ConsensusCreationTest(Arrays.<GenomeLoc>asList(), false, false, 2, 2, 2)});
|
||||||
tests.add(new Object[]{new ConsensusCreationTest(Arrays.<GenomeLoc>asList(loc290), false, false, 9, 6, 5 + DEEP_COVERAGE_ITERATIONS)});
|
tests.add(new Object[]{new ConsensusCreationTest(Arrays.<GenomeLoc>asList(loc290), false, false, 11, 8, 7 + DEEP_COVERAGE_ITERATIONS)});
|
||||||
tests.add(new Object[]{new ConsensusCreationTest(Arrays.<GenomeLoc>asList(loc290, loc295), false, false, 10, 10, 2 + (8 * DEEP_COVERAGE_ITERATIONS))});
|
tests.add(new Object[]{new ConsensusCreationTest(Arrays.<GenomeLoc>asList(loc290, loc295), false, false, 12, 12, 4 + (8 * DEEP_COVERAGE_ITERATIONS))});
|
||||||
tests.add(new Object[]{new ConsensusCreationTest(Arrays.<GenomeLoc>asList(loc290, loc309), false, false, 10, 10, 2 + (8 * DEEP_COVERAGE_ITERATIONS))});
|
tests.add(new Object[]{new ConsensusCreationTest(Arrays.<GenomeLoc>asList(loc290, loc309), false, false, 12, 12, 4 + (8 * DEEP_COVERAGE_ITERATIONS))});
|
||||||
tests.add(new Object[]{new ConsensusCreationTest(Arrays.<GenomeLoc>asList(loc290, loc310), false, false, 11, 11, 2 + (9 * DEEP_COVERAGE_ITERATIONS))});
|
tests.add(new Object[]{new ConsensusCreationTest(Arrays.<GenomeLoc>asList(loc290, loc310), false, false, 13, 13, 4 + (9 * DEEP_COVERAGE_ITERATIONS))});
|
||||||
tests.add(new Object[]{new ConsensusCreationTest(Arrays.<GenomeLoc>asList(loc290, loc320), false, false, 11, 10, 4 + (6 * DEEP_COVERAGE_ITERATIONS))});
|
tests.add(new Object[]{new ConsensusCreationTest(Arrays.<GenomeLoc>asList(loc290, loc320), false, false, 13, 12, 6 + (6 * DEEP_COVERAGE_ITERATIONS))});
|
||||||
|
|
||||||
// test low quality reads
|
// test low quality reads
|
||||||
tests.add(new Object[]{new ConsensusCreationTest(Arrays.<GenomeLoc>asList(), true, false, 1, 1, 1)});
|
tests.add(new Object[]{new ConsensusCreationTest(Arrays.<GenomeLoc>asList(), true, false, 2, 2, 2)});
|
||||||
tests.add(new Object[]{new ConsensusCreationTest(Arrays.<GenomeLoc>asList(loc290), true, false, 2, 2, 2)});
|
tests.add(new Object[]{new ConsensusCreationTest(Arrays.<GenomeLoc>asList(loc290), true, false, 3, 3, 3)});
|
||||||
tests.add(new Object[]{new ConsensusCreationTest(Arrays.<GenomeLoc>asList(loc290, loc295), true, false, 2, 2, 2)});
|
tests.add(new Object[]{new ConsensusCreationTest(Arrays.<GenomeLoc>asList(loc290, loc295), true, false, 3, 3, 3)});
|
||||||
tests.add(new Object[]{new ConsensusCreationTest(Arrays.<GenomeLoc>asList(loc290, loc309), true, false, 2, 2, 2)});
|
tests.add(new Object[]{new ConsensusCreationTest(Arrays.<GenomeLoc>asList(loc290, loc309), true, false, 3, 3, 3)});
|
||||||
tests.add(new Object[]{new ConsensusCreationTest(Arrays.<GenomeLoc>asList(loc290, loc310), true, false, 2, 2, 2)});
|
tests.add(new Object[]{new ConsensusCreationTest(Arrays.<GenomeLoc>asList(loc290, loc310), true, false, 3, 3, 3)});
|
||||||
|
|
||||||
// test low quality bases
|
// test low quality bases
|
||||||
tests.add(new Object[]{new ConsensusCreationTest(Arrays.<GenomeLoc>asList(), false, true, 1, 1, 1)});
|
tests.add(new Object[]{new ConsensusCreationTest(Arrays.<GenomeLoc>asList(), false, true, 2, 2, 2)});
|
||||||
tests.add(new Object[]{new ConsensusCreationTest(Arrays.<GenomeLoc>asList(loc290), false, true, 1, 1, 1)});
|
tests.add(new Object[]{new ConsensusCreationTest(Arrays.<GenomeLoc>asList(loc290), false, true, 2, 2, 2)});
|
||||||
tests.add(new Object[]{new ConsensusCreationTest(Arrays.<GenomeLoc>asList(loc290, loc295), false, true, 1, 1, 1)});
|
tests.add(new Object[]{new ConsensusCreationTest(Arrays.<GenomeLoc>asList(loc290, loc295), false, true, 2, 2, 2)});
|
||||||
tests.add(new Object[]{new ConsensusCreationTest(Arrays.<GenomeLoc>asList(loc290, loc309), false, true, 1, 1, 1)});
|
tests.add(new Object[]{new ConsensusCreationTest(Arrays.<GenomeLoc>asList(loc290, loc309), false, true, 2, 2, 2)});
|
||||||
tests.add(new Object[]{new ConsensusCreationTest(Arrays.<GenomeLoc>asList(loc290, loc310), false, true, 1, 1, 1)});
|
tests.add(new Object[]{new ConsensusCreationTest(Arrays.<GenomeLoc>asList(loc290, loc310), false, true, 2, 2, 2)});
|
||||||
|
|
||||||
// test mixture
|
// test mixture
|
||||||
tests.add(new Object[]{new ConsensusCreationTest(Arrays.<GenomeLoc>asList(loc1100), true, false, 2, 2, 2)});
|
tests.add(new Object[]{new ConsensusCreationTest(Arrays.<GenomeLoc>asList(loc1100), true, false, 3, 3, 3)});
|
||||||
tests.add(new Object[]{new ConsensusCreationTest(Arrays.<GenomeLoc>asList(loc1100), false, true, 1, 1, 1)});
|
tests.add(new Object[]{new ConsensusCreationTest(Arrays.<GenomeLoc>asList(loc1100), false, true, 2, 2, 2)});
|
||||||
|
|
||||||
// test I/D operators
|
// test I/D operators
|
||||||
tests.add(new Object[]{new ConsensusCreationTest(Arrays.<GenomeLoc>asList(loc290), CigarOperator.D, 9, 9, 2 + (7 * DEEP_COVERAGE_ITERATIONS))});
|
tests.add(new Object[]{new ConsensusCreationTest(Arrays.<GenomeLoc>asList(loc290), CigarOperator.D, 11, 11, 4 + (7 * DEEP_COVERAGE_ITERATIONS))});
|
||||||
tests.add(new Object[]{new ConsensusCreationTest(Arrays.<GenomeLoc>asList(loc290, loc295), CigarOperator.D, 10, 10, 2 + (8 * DEEP_COVERAGE_ITERATIONS))});
|
tests.add(new Object[]{new ConsensusCreationTest(Arrays.<GenomeLoc>asList(loc290, loc295), CigarOperator.D, 12, 12, 4 + (8 * DEEP_COVERAGE_ITERATIONS))});
|
||||||
tests.add(new Object[]{new ConsensusCreationTest(Arrays.<GenomeLoc>asList(loc290, loc309), CigarOperator.D, 10, 10, 2 + (8 * DEEP_COVERAGE_ITERATIONS))});
|
tests.add(new Object[]{new ConsensusCreationTest(Arrays.<GenomeLoc>asList(loc290, loc309), CigarOperator.D, 12, 12, 4 + (8 * DEEP_COVERAGE_ITERATIONS))});
|
||||||
tests.add(new Object[]{new ConsensusCreationTest(Arrays.<GenomeLoc>asList(loc290, loc310), CigarOperator.D, 11, 11, 2 + (9 * DEEP_COVERAGE_ITERATIONS))});
|
tests.add(new Object[]{new ConsensusCreationTest(Arrays.<GenomeLoc>asList(loc290, loc310), CigarOperator.D, 13, 13, 4 + (9 * DEEP_COVERAGE_ITERATIONS))});
|
||||||
tests.add(new Object[]{new ConsensusCreationTest(Arrays.<GenomeLoc>asList(loc290), CigarOperator.I, 9, 9, 2 + (7 * DEEP_COVERAGE_ITERATIONS))});
|
tests.add(new Object[]{new ConsensusCreationTest(Arrays.<GenomeLoc>asList(loc290), CigarOperator.I, 11, 11, 4 + (7 * DEEP_COVERAGE_ITERATIONS))});
|
||||||
tests.add(new Object[]{new ConsensusCreationTest(Arrays.<GenomeLoc>asList(loc290, loc295), CigarOperator.I, 10, 10, 2 + (8 * DEEP_COVERAGE_ITERATIONS))});
|
tests.add(new Object[]{new ConsensusCreationTest(Arrays.<GenomeLoc>asList(loc290, loc295), CigarOperator.I, 12, 12, 4 + (8 * DEEP_COVERAGE_ITERATIONS))});
|
||||||
tests.add(new Object[]{new ConsensusCreationTest(Arrays.<GenomeLoc>asList(loc290, loc309), CigarOperator.I, 10, 10, 2 + (8 * DEEP_COVERAGE_ITERATIONS))});
|
tests.add(new Object[]{new ConsensusCreationTest(Arrays.<GenomeLoc>asList(loc290, loc309), CigarOperator.I, 12, 12, 4 + (8 * DEEP_COVERAGE_ITERATIONS))});
|
||||||
tests.add(new Object[]{new ConsensusCreationTest(Arrays.<GenomeLoc>asList(loc290, loc310), CigarOperator.I, 11, 11, 2 + (9 * DEEP_COVERAGE_ITERATIONS))});
|
tests.add(new Object[]{new ConsensusCreationTest(Arrays.<GenomeLoc>asList(loc290, loc310), CigarOperator.I, 13, 13, 4 + (9 * DEEP_COVERAGE_ITERATIONS))});
|
||||||
|
|
||||||
return tests.toArray(new Object[][]{});
|
return tests.toArray(new Object[][]{});
|
||||||
}
|
}
|
||||||
|
|
@ -517,6 +517,39 @@ public class SlidingWindowUnitTest extends BaseTest {
|
||||||
Assert.assertEquals(result.getFirst().size(), totalNumReads); // no compression at all
|
Assert.assertEquals(result.getFirst().size(), totalNumReads); // no compression at all
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void testConsensusCreationForInsertions() {
|
||||||
|
|
||||||
|
final int totalNumReads = 7;
|
||||||
|
final ObjectList<GATKSAMRecord> myReads = new ObjectArrayList<>(totalNumReads);
|
||||||
|
|
||||||
|
// add reads, one with a SNP and one with a SNP and insertion
|
||||||
|
for ( int i = 0; i < totalNumReads; i++ ) {
|
||||||
|
final GATKSAMRecord read = ArtificialSAMUtils.createArtificialRead(header, "basicRead" + i, 0, globalStartPosition, readLength);
|
||||||
|
read.setBaseQualities(Utils.dupBytes((byte)30, readLength));
|
||||||
|
read.setMappingQuality(30);
|
||||||
|
read.setReadNegativeStrandFlag(false);
|
||||||
|
|
||||||
|
final byte[] bases = Utils.dupBytes((byte) 'A', readLength);
|
||||||
|
if ( i < 2 )
|
||||||
|
bases[20] = 'C';
|
||||||
|
if ( i == 0 )
|
||||||
|
bases[80] = 'C';
|
||||||
|
read.setReadBases(bases);
|
||||||
|
|
||||||
|
if ( i == 0 )
|
||||||
|
read.setCigarString("80M1I19M");
|
||||||
|
|
||||||
|
myReads.add(read);
|
||||||
|
}
|
||||||
|
|
||||||
|
final SlidingWindow slidingWindow = new SlidingWindow("1", 0, 10, header, new GATKSAMReadGroupRecord("test"), 0, 0.1, 0.05, 0.05, 20, 20, 100, ReduceReads.DownsampleStrategy.Normal, false);
|
||||||
|
for ( final GATKSAMRecord read : myReads )
|
||||||
|
slidingWindow.addRead(read);
|
||||||
|
final Pair<ObjectSet<GATKSAMRecord>, CompressionStash> result = slidingWindow.close(null);
|
||||||
|
Assert.assertEquals(result.getFirst().size(), 3); // no compression at all for SNPs
|
||||||
|
}
|
||||||
|
|
||||||
@Test
|
@Test
|
||||||
public void testAddingReadPairWithSameCoordinates() {
|
public void testAddingReadPairWithSameCoordinates() {
|
||||||
final SlidingWindow slidingWindow = new SlidingWindow("1", 0, 10);
|
final SlidingWindow slidingWindow = new SlidingWindow("1", 0, 10);
|
||||||
|
|
@ -739,21 +772,22 @@ public class SlidingWindowUnitTest extends BaseTest {
|
||||||
read.setReadBases(Utils.dupBytes((byte) 'A', readLength));
|
read.setReadBases(Utils.dupBytes((byte) 'A', readLength));
|
||||||
read.setBaseQualities(Utils.dupBytes((byte)30, readLength));
|
read.setBaseQualities(Utils.dupBytes((byte)30, readLength));
|
||||||
read.setMappingQuality(30);
|
read.setMappingQuality(30);
|
||||||
|
read.setReadNegativeStrandFlag(false);
|
||||||
|
|
||||||
// add the read
|
// add the read
|
||||||
final SlidingWindow slidingWindow = new SlidingWindow("1", 0, 10, header, new GATKSAMReadGroupRecord("test"), 0, 0.05, 0.05, 0.05, 20, 20, 10, ReduceReads.DownsampleStrategy.Normal, false);
|
final SlidingWindow slidingWindow = new SlidingWindow("1", 0, 10, header, new GATKSAMReadGroupRecord("test"), 0, 0.05, 0.05, 0.05, 20, 20, 10, ReduceReads.DownsampleStrategy.Normal, false);
|
||||||
slidingWindow.actuallyUpdateHeaderForRead(windowHeader, read, false, start);
|
slidingWindow.actuallyUpdateHeaderForRead(windowHeader, read, false, start);
|
||||||
for ( int i = 0; i < start; i++ )
|
for ( int i = 0; i < start; i++ )
|
||||||
Assert.assertEquals(windowHeader.get(i).getConsensusBaseCounts().countOfBase(BaseUtils.Base.A.base), 0);
|
Assert.assertEquals(windowHeader.get(i).getBaseCounts(SlidingWindow.ConsensusType.POSITIVE_CONSENSUS).countOfBase(BaseUtils.Base.A.base), 0);
|
||||||
for ( int i = 0; i < readLength; i++ )
|
for ( int i = 0; i < readLength; i++ )
|
||||||
Assert.assertEquals(windowHeader.get(start + i).getConsensusBaseCounts().countOfBase(BaseUtils.Base.A.base), 1);
|
Assert.assertEquals(windowHeader.get(start + i).getBaseCounts(SlidingWindow.ConsensusType.POSITIVE_CONSENSUS).countOfBase(BaseUtils.Base.A.base), 1);
|
||||||
for ( int i = start + readLength; i < currentHeaderLength; i++ )
|
for ( int i = start + readLength; i < currentHeaderLength; i++ )
|
||||||
Assert.assertEquals(windowHeader.get(i).getConsensusBaseCounts().countOfBase(BaseUtils.Base.A.base), 0);
|
Assert.assertEquals(windowHeader.get(i).getBaseCounts(SlidingWindow.ConsensusType.POSITIVE_CONSENSUS).countOfBase(BaseUtils.Base.A.base), 0);
|
||||||
|
|
||||||
// now remove the read
|
// now remove the read
|
||||||
slidingWindow.actuallyUpdateHeaderForRead(windowHeader, read, true, start);
|
slidingWindow.actuallyUpdateHeaderForRead(windowHeader, read, true, start);
|
||||||
for ( int i = 0; i < currentHeaderLength; i++ )
|
for ( int i = 0; i < currentHeaderLength; i++ )
|
||||||
Assert.assertEquals(windowHeader.get(i).getConsensusBaseCounts().countOfBase(BaseUtils.Base.A.base), 0);
|
Assert.assertEquals(windowHeader.get(i).getBaseCounts(SlidingWindow.ConsensusType.POSITIVE_CONSENSUS).countOfBase(BaseUtils.Base.A.base), 0);
|
||||||
}
|
}
|
||||||
|
|
||||||
//////////////////////////////////////////////////////////////////////////////////
|
//////////////////////////////////////////////////////////////////////////////////
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue