Fully stranded implementation of RR (plus bug fix for insertions and het compression).

Now only filtered reads are unstranded.  All consensus reads have strand, so that we
emit 2 consensus reads in general now: one for each strand.

This involved some refactoring of the sliding window which cleaned it up a lot.

Also included is a bug fix:
insertions downstream of a variant region weren't triggering a stop to the compression.
This commit is contained in:
Eric Banks 2013-07-21 23:35:44 -04:00
parent e5aab22680
commit 5dfa863caa
6 changed files with 292 additions and 399 deletions

View File

@ -59,7 +59,8 @@ import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
* out due to mapping or base quality. * out due to mapping or base quality.
*/ */
public class HeaderElement { public class HeaderElement {
private BaseAndQualsCounts consensusBaseCounts; // How many A,C,G,T (and D's) are in this site. private BaseAndQualsCounts positiveConsensusBaseCounts; // How many A,C,G,T (and D's) are in this site.
private BaseAndQualsCounts negativeConsensusBaseCounts; // How many A,C,G,T (and D's) are in this site.
private BaseAndQualsCounts filteredBaseCounts; // How many A,C,G,T (and D's) were filtered out in this site. private BaseAndQualsCounts filteredBaseCounts; // How many A,C,G,T (and D's) were filtered out in this site.
private int insertionsToTheRight; // How many reads in this site had insertions to the immediate right private int insertionsToTheRight; // How many reads in this site had insertions to the immediate right
private int location; // Genome location of this site (the sliding window knows which contig we're at private int location; // Genome location of this site (the sliding window knows which contig we're at
@ -70,14 +71,20 @@ public class HeaderElement {
return location; return location;
} }
public BaseAndQualsCounts getFilteredBaseCounts() { /**
* Get the base counts object for the consensus type
*
* @param consensusType the type to use
* @return non-null base counts
*/
public BaseAndQualsCounts getBaseCounts(final SlidingWindow.ConsensusType consensusType) {
if ( consensusType == SlidingWindow.ConsensusType.POSITIVE_CONSENSUS )
return positiveConsensusBaseCounts;
if ( consensusType == SlidingWindow.ConsensusType.NEGATIVE_CONSENSUS )
return negativeConsensusBaseCounts;
return filteredBaseCounts; return filteredBaseCounts;
} }
public BaseAndQualsCounts getConsensusBaseCounts() {
return consensusBaseCounts;
}
/** /**
* Creates a new HeaderElement with the following default values: - empty consensusBaseCounts - empty * Creates a new HeaderElement with the following default values: - empty consensusBaseCounts - empty
* filteredBaseCounts - 0 insertions to the right - empty mappingQuality list * filteredBaseCounts - 0 insertions to the right - empty mappingQuality list
@ -85,7 +92,7 @@ public class HeaderElement {
* @param location the reference location for the new element * @param location the reference location for the new element
*/ */
public HeaderElement(final int location) { public HeaderElement(final int location) {
this(new BaseAndQualsCounts(), new BaseAndQualsCounts(), 0, location); this(new BaseAndQualsCounts(), new BaseAndQualsCounts(), new BaseAndQualsCounts(), 0, location);
} }
/** /**
@ -95,20 +102,22 @@ public class HeaderElement {
* @param location the reference location for the new element * @param location the reference location for the new element
*/ */
public HeaderElement(final int location, final int insertionsToTheRight) { public HeaderElement(final int location, final int insertionsToTheRight) {
this(new BaseAndQualsCounts(), new BaseAndQualsCounts(), insertionsToTheRight, location); this(new BaseAndQualsCounts(), new BaseAndQualsCounts(), new BaseAndQualsCounts(), insertionsToTheRight, location);
} }
/** /**
* Creates a new HeaderElement with all given parameters * Creates a new HeaderElement with all given parameters
* *
* @param consensusBaseCounts the BaseCounts object for the running consensus synthetic read * @param positiveConsensusBaseCounts the BaseCounts object for the running positive consensus synthetic read
* @param negativeConsensusBaseCounts the BaseCounts object for the running negative consensus synthetic read
* @param filteredBaseCounts the BaseCounts object for the filtered data synthetic read * @param filteredBaseCounts the BaseCounts object for the filtered data synthetic read
* @param insertionsToTheRight number of insertions to the right of this HeaderElement * @param insertionsToTheRight number of insertions to the right of this HeaderElement
* @param location the reference location of this reference element * @param location the reference location of this reference element
* HeaderElement * HeaderElement
*/ */
public HeaderElement(BaseAndQualsCounts consensusBaseCounts, BaseAndQualsCounts filteredBaseCounts, int insertionsToTheRight, int location) { public HeaderElement(final BaseAndQualsCounts positiveConsensusBaseCounts, final BaseAndQualsCounts negativeConsensusBaseCounts, final BaseAndQualsCounts filteredBaseCounts, final int insertionsToTheRight, final int location) {
this.consensusBaseCounts = consensusBaseCounts; this.positiveConsensusBaseCounts = positiveConsensusBaseCounts;
this.negativeConsensusBaseCounts = negativeConsensusBaseCounts;
this.filteredBaseCounts = filteredBaseCounts; this.filteredBaseCounts = filteredBaseCounts;
this.insertionsToTheRight = insertionsToTheRight; this.insertionsToTheRight = insertionsToTheRight;
this.location = location; this.location = location;
@ -124,7 +133,8 @@ public class HeaderElement {
* @return true if site is variant by any definition. False otherwise. * @return true if site is variant by any definition. False otherwise.
*/ */
public boolean isVariant(final double minVariantPvalue, final double minVariantProportion, final double minIndelProportion) { public boolean isVariant(final double minVariantPvalue, final double minVariantProportion, final double minIndelProportion) {
return hasConsensusData() && (isVariantFromInsertions(minIndelProportion) || isVariantFromMismatches(minVariantPvalue, minVariantProportion) || isVariantFromDeletions(minIndelProportion) || isVariantFromSoftClips()); return ( hasConsensusData(SlidingWindow.ConsensusType.POSITIVE_CONSENSUS) || hasConsensusData(SlidingWindow.ConsensusType.NEGATIVE_CONSENSUS) )
&& (isVariantFromInsertions(minIndelProportion) || isVariantFromMismatches(minVariantPvalue, minVariantProportion) || isVariantFromDeletions(minIndelProportion) || isVariantFromSoftClips());
} }
/** /**
@ -138,13 +148,18 @@ public class HeaderElement {
* @param minBaseQual the minimum base qual allowed to be a good base * @param minBaseQual the minimum base qual allowed to be a good base
* @param minMappingQual the minimum mapping qual allowed to be a good read * @param minMappingQual the minimum mapping qual allowed to be a good read
* @param isSoftClipped true if the base is soft-clipped in the original read * @param isSoftClipped true if the base is soft-clipped in the original read
* @param isNegativeStrand true if the base comes from a read on the negative strand
*/ */
public void addBase(byte base, byte baseQual, byte insQual, byte delQual, int baseMappingQuality, int minBaseQual, int minMappingQual, boolean isSoftClipped) { public void addBase(final byte base, final byte baseQual, final byte insQual, final byte delQual, final int baseMappingQuality, final int minBaseQual, final int minMappingQual, final boolean isSoftClipped, final boolean isNegativeStrand) {
// If the base passes the MQ filter it is included in the consensus base counts, otherwise it's part of the filtered counts // If the base passes the MQ filter it is included in the consensus base counts, otherwise it's part of the filtered counts
if ( baseMappingQuality >= minMappingQual ) if ( baseMappingQuality >= minMappingQual ) {
consensusBaseCounts.incr(base, baseQual, insQual, delQual, baseMappingQuality, baseQual < minBaseQual, isSoftClipped); if ( isNegativeStrand )
else negativeConsensusBaseCounts.incr(base, baseQual, insQual, delQual, baseMappingQuality, baseQual < minBaseQual, isSoftClipped);
else
positiveConsensusBaseCounts.incr(base, baseQual, insQual, delQual, baseMappingQuality, baseQual < minBaseQual, isSoftClipped);
} else {
filteredBaseCounts.incr(base, baseQual, insQual, delQual, baseMappingQuality, baseQual < minBaseQual); filteredBaseCounts.incr(base, baseQual, insQual, delQual, baseMappingQuality, baseQual < minBaseQual);
}
} }
/** /**
@ -158,14 +173,20 @@ public class HeaderElement {
* @param minBaseQual the minimum base qual allowed to be a good base * @param minBaseQual the minimum base qual allowed to be a good base
* @param minMappingQual the minimum mapping qual allowed to be a good read * @param minMappingQual the minimum mapping qual allowed to be a good read
* @param isSoftClipped true if the base is soft-clipped in the original read * @param isSoftClipped true if the base is soft-clipped in the original read
* @param isNegativeStrand true if the base comes from a read on the negative strand
*/ */
public void removeBase(byte base, byte baseQual, byte insQual, byte delQual, int baseMappingQuality, int minBaseQual, int minMappingQual, boolean isSoftClipped) { public void removeBase(final byte base, final byte baseQual, final byte insQual, final byte delQual, final int baseMappingQuality, final int minBaseQual, final int minMappingQual, final boolean isSoftClipped, final boolean isNegativeStrand) {
// If the base passes the MQ filter it is included in the consensus base counts, otherwise it's part of the filtered counts // If the base passes the MQ filter it is included in the consensus base counts, otherwise it's part of the filtered counts
if ( baseMappingQuality >= minMappingQual ) if ( baseMappingQuality >= minMappingQual ) {
consensusBaseCounts.decr(base, baseQual, insQual, delQual, baseMappingQuality, baseQual < minBaseQual, isSoftClipped); if ( isNegativeStrand )
else negativeConsensusBaseCounts.decr(base, baseQual, insQual, delQual, baseMappingQuality, baseQual < minBaseQual, isSoftClipped);
else
positiveConsensusBaseCounts.decr(base, baseQual, insQual, delQual, baseMappingQuality, baseQual < minBaseQual, isSoftClipped);
} else {
filteredBaseCounts.decr(base, baseQual, insQual, delQual, baseMappingQuality, baseQual < minBaseQual); filteredBaseCounts.decr(base, baseQual, insQual, delQual, baseMappingQuality, baseQual < minBaseQual);
}
} }
/** /**
* Adds an insertions to the right of the HeaderElement and updates all counts accordingly. All insertions * Adds an insertions to the right of the HeaderElement and updates all counts accordingly. All insertions
* should be added to the right of the element. * should be added to the right of the element.
@ -177,19 +198,11 @@ public class HeaderElement {
/** /**
* Does this HeaderElement contain consensus data? * Does this HeaderElement contain consensus data?
* *
* @param consensusType the type to use
* @return whether or not this HeaderElement contains consensus data * @return whether or not this HeaderElement contains consensus data
*/ */
public boolean hasConsensusData() { public boolean hasConsensusData(final SlidingWindow.ConsensusType consensusType) {
return consensusBaseCounts.totalCount() > 0; return getBaseCounts(consensusType).totalCount() > 0;
}
/**
* Does this HeaderElement contain filtered data?
*
* @return whether or not this HeaderElement contains filtered data
*/
public boolean hasFilteredData() {
return filteredBaseCounts.totalCount() > 0;
} }
/** /**
@ -198,7 +211,7 @@ public class HeaderElement {
* @return whether or not this HeaderElement has no data * @return whether or not this HeaderElement has no data
*/ */
public boolean isEmpty() { public boolean isEmpty() {
return (!hasFilteredData() && !hasConsensusData()); return !hasConsensusData(SlidingWindow.ConsensusType.POSITIVE_CONSENSUS) && !hasConsensusData(SlidingWindow.ConsensusType.NEGATIVE_CONSENSUS) && !hasConsensusData(SlidingWindow.ConsensusType.FILTERED);
} }
/** /**
@ -224,7 +237,7 @@ public class HeaderElement {
* @return whether or not the HeaderElement is variant due to excess insertions * @return whether or not the HeaderElement is variant due to excess insertions
*/ */
private boolean isVariantFromInsertions(double minIndelProportion) { private boolean isVariantFromInsertions(double minIndelProportion) {
final int numberOfBases = consensusBaseCounts.totalCount(); final int numberOfBases = totalCountForBothStrands();
if (numberOfBases == 0) if (numberOfBases == 0)
return (insertionsToTheRight > 0); // do we only have insertions? return (insertionsToTheRight > 0); // do we only have insertions?
@ -232,13 +245,18 @@ public class HeaderElement {
return ((double) insertionsToTheRight / numberOfBases) > minIndelProportion; return ((double) insertionsToTheRight / numberOfBases) > minIndelProportion;
} }
private int totalCountForBothStrands() {
return positiveConsensusBaseCounts.totalCount() + negativeConsensusBaseCounts.totalCount();
}
/** /**
* Whether or not the HeaderElement is variant due to excess deletions * Whether or not the HeaderElement is variant due to excess deletions
* *
* @return whether or not the HeaderElement is variant due to excess deletions * @return whether or not the HeaderElement is variant due to excess deletions
*/ */
private boolean isVariantFromDeletions(double minIndelProportion) { private boolean isVariantFromDeletions(double minIndelProportion) {
return consensusBaseCounts.baseIndexWithMostCounts() == BaseIndex.D || consensusBaseCounts.baseCountProportion(BaseIndex.D) > minIndelProportion; return positiveConsensusBaseCounts.baseIndexWithMostCounts() == BaseIndex.D || positiveConsensusBaseCounts.baseCountProportion(BaseIndex.D) > minIndelProportion
|| negativeConsensusBaseCounts.baseIndexWithMostCounts() == BaseIndex.D || negativeConsensusBaseCounts.baseCountProportion(BaseIndex.D) > minIndelProportion;
} }
/** /**
@ -249,9 +267,23 @@ public class HeaderElement {
* @return whether or not the HeaderElement is variant due to excess mismatches * @return whether or not the HeaderElement is variant due to excess mismatches
*/ */
protected boolean isVariantFromMismatches(final double minVariantPvalue, final double minVariantProportion) { protected boolean isVariantFromMismatches(final double minVariantPvalue, final double minVariantProportion) {
final int totalCount = consensusBaseCounts.totalCountWithoutIndels(); return isVariantFromMismatches(minVariantPvalue, minVariantProportion, SlidingWindow.ConsensusType.POSITIVE_CONSENSUS) ||
final BaseIndex mostCommon = consensusBaseCounts.baseIndexWithMostProbabilityWithoutIndels(); isVariantFromMismatches(minVariantPvalue, minVariantProportion, SlidingWindow.ConsensusType.NEGATIVE_CONSENSUS);
final int countOfOtherBases = totalCount - consensusBaseCounts.countOfBase(mostCommon); }
/**
* Whether or not the HeaderElement is variant due to excess mismatches
*
* @param minVariantPvalue the minimum pvalue to call a site variant (used with low coverage).
* @param minVariantProportion the minimum proportion to call a site variant (used with high coverage).
* @param consensusType the consensus type to use
* @return whether or not the HeaderElement is variant due to excess mismatches
*/
private boolean isVariantFromMismatches(final double minVariantPvalue, final double minVariantProportion, final SlidingWindow.ConsensusType consensusType) {
final BaseAndQualsCounts baseAndQualsCounts = getBaseCounts(consensusType);
final int totalCount = baseAndQualsCounts.totalCountWithoutIndels();
final BaseIndex mostCommon = baseAndQualsCounts.baseIndexWithMostProbabilityWithoutIndels();
final int countOfOtherBases = totalCount - baseAndQualsCounts.countOfBase(mostCommon);
return hasSignificantCount(countOfOtherBases, totalCount, minVariantPvalue, minVariantProportion); return hasSignificantCount(countOfOtherBases, totalCount, minVariantPvalue, minVariantProportion);
} }
@ -262,8 +294,20 @@ public class HeaderElement {
* @return true if we had more soft clipped bases contributing to this site than matches/mismatches. * @return true if we had more soft clipped bases contributing to this site than matches/mismatches.
*/ */
protected boolean isVariantFromSoftClips() { protected boolean isVariantFromSoftClips() {
final int nSoftClippedBases = consensusBaseCounts.nSoftclips(); return isVariantFromSoftClips(SlidingWindow.ConsensusType.POSITIVE_CONSENSUS) || isVariantFromSoftClips(SlidingWindow.ConsensusType.NEGATIVE_CONSENSUS);
return nSoftClippedBases > 0 && nSoftClippedBases >= (consensusBaseCounts.totalCount() - nSoftClippedBases); }
/**
* This handles the special case where we have more bases that came from soft clips than bases that came from
* normal bases by forcing it to become a variant region. We don't want a consensus based on too little information.
*
* @param consensusType the consensus type to use
* @return true if we had more soft clipped bases contributing to this site than matches/mismatches.
*/
private boolean isVariantFromSoftClips(final SlidingWindow.ConsensusType consensusType) {
final BaseAndQualsCounts baseAndQualsCounts = getBaseCounts(consensusType);
final int nSoftClippedBases = baseAndQualsCounts.nSoftclips();
return nSoftClippedBases > 0 && nSoftClippedBases >= (baseAndQualsCounts.totalCount() - nSoftClippedBases);
} }
/** /**
@ -287,9 +331,9 @@ public class HeaderElement {
*/ */
public ObjectArrayList<BaseIndex> getAlleles(final double minVariantPvalue, final double minVariantProportion) { public ObjectArrayList<BaseIndex> getAlleles(final double minVariantPvalue, final double minVariantProportion) {
// make sure we have bases at all // make sure we have bases at all
final int totalBaseCount = consensusBaseCounts.totalCount(); final int totalBaseCount = totalCountForBothStrands();
if ( totalBaseCount == 0 ) if ( totalBaseCount == 0 )
return new ObjectArrayList<BaseIndex>(0); return new ObjectArrayList<>(0);
// next, check for insertions; technically, the insertion count can be greater than totalBaseCount // next, check for insertions; technically, the insertion count can be greater than totalBaseCount
// (because of the way insertions are counted), so we need to account for that // (because of the way insertions are counted), so we need to account for that
@ -297,9 +341,9 @@ public class HeaderElement {
return null; return null;
// finally, check for the bases themselves (including deletions) // finally, check for the bases themselves (including deletions)
final ObjectArrayList<BaseIndex> alleles = new ObjectArrayList<BaseIndex>(4); final ObjectArrayList<BaseIndex> alleles = new ObjectArrayList<>(4);
for ( final BaseIndex base : BaseIndex.values() ) { for ( final BaseIndex base : BaseIndex.values() ) {
final int baseCount = consensusBaseCounts.countOfBase(base); final int baseCount = positiveConsensusBaseCounts.countOfBase(base) + negativeConsensusBaseCounts.countOfBase(base);
if ( baseCount == 0 ) if ( baseCount == 0 )
continue; continue;
@ -320,7 +364,7 @@ public class HeaderElement {
* @return true if there are significant softclips, false otherwise * @return true if there are significant softclips, false otherwise
*/ */
public boolean hasSignificantSoftclips(final double minVariantPvalue, final double minVariantProportion) { public boolean hasSignificantSoftclips(final double minVariantPvalue, final double minVariantProportion) {
return hasSignificantCount(consensusBaseCounts.nSoftclips(), consensusBaseCounts.totalCount(), minVariantPvalue, minVariantProportion); return hasSignificantCount(positiveConsensusBaseCounts.nSoftclips() + negativeConsensusBaseCounts.nSoftclips(), totalCountForBothStrands(), minVariantPvalue, minVariantProportion);
} }
/* /*

View File

@ -59,7 +59,6 @@ import org.broadinstitute.sting.utils.BaseUtils;
import org.broadinstitute.sting.utils.GenomeLoc; import org.broadinstitute.sting.utils.GenomeLoc;
import org.broadinstitute.sting.utils.UnvalidatingGenomeLoc; import org.broadinstitute.sting.utils.UnvalidatingGenomeLoc;
import org.broadinstitute.sting.utils.collections.Pair; import org.broadinstitute.sting.utils.collections.Pair;
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
import org.broadinstitute.sting.utils.sam.AlignmentStartWithNoTiesComparator; import org.broadinstitute.sting.utils.sam.AlignmentStartWithNoTiesComparator;
import org.broadinstitute.sting.utils.sam.GATKSAMReadGroupRecord; import org.broadinstitute.sting.utils.sam.GATKSAMReadGroupRecord;
import org.broadinstitute.sting.utils.sam.GATKSAMRecord; import org.broadinstitute.sting.utils.sam.GATKSAMRecord;
@ -87,12 +86,10 @@ public class SlidingWindow {
protected int downsampleCoverage; protected int downsampleCoverage;
// Running consensus data // Running consensus data
protected SyntheticRead runningConsensus;
protected int consensusCounter; protected int consensusCounter;
protected String consensusReadName; protected String consensusReadName;
// Filtered Data Consensus data // Filtered Data Consensus data
protected SyntheticRead filteredDataConsensus;
protected int filteredDataConsensusCounter; protected int filteredDataConsensusCounter;
protected String filteredDataReadName; protected String filteredDataReadName;
@ -109,12 +106,12 @@ public class SlidingWindow {
private static CompressionStash emptyRegions = new CompressionStash(); private static CompressionStash emptyRegions = new CompressionStash();
/** /**
* The types of synthetic reads to use in the finalizeAndAdd method * The types of synthetic reads
*/ */
private enum ConsensusType { protected enum ConsensusType {
CONSENSUS, POSITIVE_CONSENSUS,
FILTERED, NEGATIVE_CONSENSUS,
BOTH FILTERED
} }
public int getStopLocation() { public int getStopLocation() {
@ -144,9 +141,9 @@ public class SlidingWindow {
contextSize = 10; contextSize = 10;
this.windowHeader = new LinkedList<HeaderElement>(); this.windowHeader = new LinkedList<>();
windowHeader.addFirst(new HeaderElement(startLocation)); windowHeader.addFirst(new HeaderElement(startLocation));
this.readsInWindow = new PriorityQueue<GATKSAMRecord>(100, new Comparator<GATKSAMRecord>() { this.readsInWindow = new PriorityQueue<>(100, new Comparator<GATKSAMRecord>() {
@Override @Override
public int compare(GATKSAMRecord read1, GATKSAMRecord read2) { public int compare(GATKSAMRecord read1, GATKSAMRecord read2) {
return read1.getSoftEnd() - read2.getSoftEnd(); return read1.getSoftEnd() - read2.getSoftEnd();
@ -168,8 +165,8 @@ public class SlidingWindow {
this.MIN_BASE_QUAL_TO_COUNT = minBaseQual; this.MIN_BASE_QUAL_TO_COUNT = minBaseQual;
this.MIN_MAPPING_QUALITY = minMappingQuality; this.MIN_MAPPING_QUALITY = minMappingQuality;
this.windowHeader = new LinkedList<HeaderElement>(); this.windowHeader = new LinkedList<>();
this.readsInWindow = new PriorityQueue<GATKSAMRecord>(1000, new Comparator<GATKSAMRecord>() { this.readsInWindow = new PriorityQueue<>(1000, new Comparator<GATKSAMRecord>() {
@Override @Override
public int compare(GATKSAMRecord read1, GATKSAMRecord read2) { public int compare(GATKSAMRecord read1, GATKSAMRecord read2) {
return read1.getSoftEnd() - read2.getSoftEnd(); return read1.getSoftEnd() - read2.getSoftEnd();
@ -187,9 +184,6 @@ public class SlidingWindow {
this.filteredDataConsensusCounter = 0; this.filteredDataConsensusCounter = 0;
this.filteredDataReadName = "Filtered-" + windowNumber + "-"; this.filteredDataReadName = "Filtered-" + windowNumber + "-";
this.runningConsensus = null;
this.filteredDataConsensus = null;
this.downsampleStrategy = downsampleStrategy; this.downsampleStrategy = downsampleStrategy;
this.hasIndelQualities = hasIndelQualities; this.hasIndelQualities = hasIndelQualities;
} }
@ -209,7 +203,9 @@ public class SlidingWindow {
@Ensures("result != null") @Ensures("result != null")
public CompressionStash addRead(GATKSAMRecord read) { public CompressionStash addRead(GATKSAMRecord read) {
addToHeader(windowHeader, read); // update the window header counts addToHeader(windowHeader, read); // update the window header counts
readsInWindow.add(read); // add read to sliding reads // no need to track low mapping quality reads
if ( read.getMappingQuality() >= MIN_MAPPING_QUALITY )
readsInWindow.add(read); // add read to sliding reads
return slideWindow(read.getUnclippedStart()); return slideWindow(read.getUnclippedStart());
} }
@ -296,7 +292,7 @@ public class SlidingWindow {
} }
while (!readsInWindow.isEmpty() && readsInWindow.peek().getSoftEnd() < windowHeaderStartLocation) { while (!readsInWindow.isEmpty() && readsInWindow.peek().getSoftEnd() < windowHeaderStartLocation) {
readsInWindow.poll(); readsInWindow.poll();
} }
return regions; return regions;
@ -413,280 +409,83 @@ public class SlidingWindow {
* *
* If adding a sequence with gaps, it will finalize multiple consensus reads and keep the last running consensus * If adding a sequence with gaps, it will finalize multiple consensus reads and keep the last running consensus
* *
* @param header the window header * @param header the header to use
* @param start the first header index to add to consensus * @param start the first header index to add to consensus
* @param end the first header index NOT TO add to consensus * @param end the first header index NOT TO add to consensus
* @param strandType the strandedness that the synthetic read should be represented as having * @param consensusType the consensus type to use
* @return a non-null list of consensus reads generated by this call. Empty list if no consensus was generated. * @return a non-null list of consensus reads generated by this call. Empty list if no consensus was generated.
*/ */
@Requires({"start >= 0 && (end >= start || end == 0)"}) @Requires({"start >= 0 && (end >= start || end == 0)"})
@Ensures("result != null") @Ensures("result != null")
protected ObjectArrayList<GATKSAMRecord> addToSyntheticReads(final LinkedList<HeaderElement> header, final int start, final int end, final SyntheticRead.StrandType strandType) { protected ObjectArrayList<GATKSAMRecord> addToSyntheticReads(final LinkedList<HeaderElement> header, final int start, final int end, final ConsensusType consensusType) {
final ObjectArrayList<GATKSAMRecord> reads = new ObjectArrayList<GATKSAMRecord>(); final ObjectArrayList<GATKSAMRecord> reads = new ObjectArrayList<>();
if ( start < end ) { SyntheticRead consensus = null;
final ListIterator<HeaderElement> headerElementIterator = header.listIterator(start); final ListIterator<HeaderElement> headerElementIterator = header.listIterator(start);
boolean wasInConsensus = false;
if (!headerElementIterator.hasNext()) for ( int currentPosition = start; currentPosition < end; currentPosition++ ) {
throw new ReviewedStingException(String.format("Requested to add to synthetic reads a region that contains no header element at index: %d - %d / %d", start, header.size(), end));
HeaderElement headerElement = headerElementIterator.next(); if ( ! headerElementIterator.hasNext() )
throw new IllegalStateException(String.format("Requested to add to synthetic reads a region that contains no header element at index: %d - %d / %d", start, windowHeader.size(), end));
final HeaderElement headerElement = headerElementIterator.next();
if (headerElement.hasConsensusData()) { if ( headerElement.hasConsensusData(consensusType) ) {
wasInConsensus = true;
// find the end of the consecutive consensus data in the window // add to running consensus
final int endOfConsensus = findNextNonConsensusElement(header, start, end); if ( consensus == null )
if (endOfConsensus <= start) consensus = createNewConsensus(consensusType, headerElement.getLocation());
throw new ReviewedStingException(String.format("next start is <= current start: (%d <= %d)", endOfConsensus, start));
// add to running consensus and recurse genericAddBaseToConsensus(consensus, headerElement.getBaseCounts(consensusType));
addToRunningConsensus(header, start, endOfConsensus, strandType);
reads.addAll(addToSyntheticReads(header, endOfConsensus, end, strandType));
} else { } else {
// add any outstanding consensus data // add any outstanding consensus data
reads.addAll(finalizeAndAdd(ConsensusType.CONSENSUS)); if ( wasInConsensus ) {
reads.addAll(finalizeAndAdd(consensus, consensusType));
consensus = null;
}
// find the end of the consecutive empty data in the window wasInConsensus = false;
final int endOfEmptyData = findNextConsensusElement(header, start, end);
if (endOfEmptyData <= start)
throw new ReviewedStingException(String.format("next start is <= current start: (%d <= %d)", endOfEmptyData, start));
// recurse out of the empty region
reads.addAll(addToSyntheticReads(header, endOfEmptyData, end, strandType));
} }
} }
// add any outstanding consensus data
reads.addAll(finalizeAndAdd(consensus, consensusType));
return reads; return reads;
} }
private SyntheticRead createNewConsensus(final ConsensusType consensusType, final int start) {
if ( consensusType == ConsensusType.FILTERED )
return new SyntheticRead(samHeader, readGroupAttribute, contig, contigIndex, filteredDataReadName + filteredDataConsensusCounter++, start, hasIndelQualities, SyntheticRead.StrandType.STRANDLESS);
return new SyntheticRead(samHeader, readGroupAttribute, contig, contigIndex, consensusReadName + consensusCounter++, start, hasIndelQualities, consensusType == ConsensusType.POSITIVE_CONSENSUS ? SyntheticRead.StrandType.POSITIVE : SyntheticRead.StrandType.NEGATIVE);
}
/** /**
* Finalizes one or more synthetic reads. * Finalizes a synthetic read.
* *
* @param consensus the consensus to finalize
* @param type the synthetic reads you want to close * @param type the synthetic reads you want to close
* @return a possibly null list of GATKSAMRecords generated by finalizing the synthetic reads * @return a possibly empty list of GATKSAMRecords generated by finalizing the synthetic reads
*/ */
private ObjectArrayList<GATKSAMRecord> finalizeAndAdd(final ConsensusType type) { private ObjectArrayList<GATKSAMRecord> finalizeAndAdd(final SyntheticRead consensus, final ConsensusType type) {
final ObjectArrayList<GATKSAMRecord> list = new ObjectArrayList<GATKSAMRecord>(); final ObjectArrayList<GATKSAMRecord> list = new ObjectArrayList<>();
if ( type == ConsensusType.CONSENSUS || type == ConsensusType.BOTH ) { final GATKSAMRecord read;
final GATKSAMRecord read = finalizeRunningConsensus(); if ( type == ConsensusType.FILTERED )
if ( read != null ) read = finalizeFilteredDataConsensus(consensus);
list.add(read); else
} read = finalizeRunningConsensus(consensus);
if ( type == ConsensusType.FILTERED || type == ConsensusType.BOTH ) { if ( read != null )
final GATKSAMRecord read = finalizeFilteredDataConsensus(); list.add(read);
if ( read != null )
list.add(read);
}
return list; return list;
} }
/**
* Looks for the next position without consensus data
*
* @param header the header to check
* @param start beginning of the filtered region
* @param upTo limit to search for another consensus element
* @return next position in local coordinates (relative to the windowHeader) with consensus data; otherwise, the start position
*/
private int findNextNonConsensusElement(final LinkedList<HeaderElement> header, final int start, final int upTo) {
final Iterator<HeaderElement> headerElementIterator = header.listIterator(start);
int index = start;
while (index < upTo) {
if (!headerElementIterator.hasNext())
throw new ReviewedStingException("There are no more header elements in this window");
if (!headerElementIterator.next().hasConsensusData())
break;
index++;
}
return index;
}
/**
* Looks for the next position witho consensus data
*
* @param header the header to check
* @param start beginning of the filtered region
* @param upTo limit to search for another consensus element
* @return next position in local coordinates (relative to the windowHeader) with consensus data; otherwise, the start position
*/
private int findNextConsensusElement(final LinkedList<HeaderElement> header, final int start, final int upTo) {
final Iterator<HeaderElement> headerElementIterator = header.listIterator(start);
int index = start;
while (index < upTo) {
if (!headerElementIterator.hasNext())
throw new ReviewedStingException("There are no more header elements in this window");
if (headerElementIterator.next().hasConsensusData())
break;
index++;
}
return index;
}
/**
* Adds bases to the filtered data synthetic read.
*
* Different from the addToConsensus method, this method assumes a contiguous sequence of filteredData
* bases.
*
* @param header the window header
* @param start the first header index to add to consensus
* @param end the first header index NOT TO add to consensus
* @param strandType the strandedness that the synthetic read should be represented as having
*/
@Requires({"start >= 0 && (end >= start || end == 0)"})
private void addToRunningConsensus(final LinkedList<HeaderElement> header, final int start, final int end, final SyntheticRead.StrandType strandType) {
if (runningConsensus == null)
runningConsensus = new SyntheticRead(samHeader, readGroupAttribute, contig, contigIndex, consensusReadName + consensusCounter++, header.get(start).getLocation(), hasIndelQualities, strandType);
final Iterator<HeaderElement> headerElementIterator = header.listIterator(start);
for (int index = start; index < end; index++) {
if (!headerElementIterator.hasNext())
throw new ReviewedStingException("Requested to create a running consensus synthetic read from " + start + " to " + end + " but " + index + " does not exist");
final HeaderElement headerElement = headerElementIterator.next();
if (!headerElement.hasConsensusData())
throw new ReviewedStingException("No CONSENSUS data in " + index);
genericAddBaseToConsensus(runningConsensus, headerElement.getConsensusBaseCounts());
}
}
/**
* Adds bases to the running filtered data accordingly
*
* If adding a sequence with gaps, it will finalize multiple consensus reads and keep the last running consensus
*
* @param header the window header
* @param start the first header index to add to consensus
* @param end the first header index NOT TO add to consensus
* @return a non-null list of consensus reads generated by this call. Empty list if no consensus was generated.
*/
@Requires({"start >= 0 && (end >= start || end == 0)"})
@Ensures("result != null")
protected ObjectArrayList<GATKSAMRecord> addToFilteredReads(final LinkedList<HeaderElement> header, final int start, final int end) {
final ObjectArrayList<GATKSAMRecord> reads = new ObjectArrayList<GATKSAMRecord>();
if ( start < end ) {
final ListIterator<HeaderElement> headerElementIterator = header.listIterator(start);
if (!headerElementIterator.hasNext())
throw new ReviewedStingException(String.format("Requested to add to synthetic reads a region that contains no header element at index: %d - %d / %d", start, header.size(), end));
HeaderElement headerElement = headerElementIterator.next();
if (headerElement.hasFilteredData()) {
// find the end of the consecutive filtered data in the window
final int endOfFiltered = findNextNonFilteredElement(header, start, end);
if (endOfFiltered <= start)
throw new ReviewedStingException(String.format("next start is <= current start: (%d <= %d)", endOfFiltered, start));
// add to running filtered consensus and recurse
addToFilteredData(header, start, endOfFiltered);
reads.addAll(addToFilteredReads(header, endOfFiltered, end));
} else {
// add any outstanding filtered data
reads.addAll(finalizeAndAdd(ConsensusType.FILTERED));
// find the end of the consecutive empty data in the window
final int endOfEmptyData = findNextFilteredElement(header, start, end);
if (endOfEmptyData <= start)
throw new ReviewedStingException(String.format("next start is <= current start: (%d <= %d)", endOfEmptyData, start));
// recurse out of the empty region
reads.addAll(addToFilteredReads(header, endOfEmptyData, end));
}
}
return reads;
}
/**
* Looks for the next position without consensus data
*
* @param header the header to check
* @param start beginning of the filtered region
* @param upTo limit to search for another consensus element
* @return next position in local coordinates (relative to the windowHeader) with consensus data; otherwise, the start position
*/
private int findNextNonFilteredElement(final LinkedList<HeaderElement> header, final int start, final int upTo) {
final Iterator<HeaderElement> headerElementIterator = header.listIterator(start);
int index = start;
while (index < upTo) {
if (!headerElementIterator.hasNext())
throw new ReviewedStingException("There are no more header elements in this window");
if (!headerElementIterator.next().hasFilteredData())
break;
index++;
}
return index;
}
/**
* Looks for the next position witho consensus data
*
* @param header the header to check
* @param start beginning of the filtered region
* @param upTo limit to search for another consensus element
* @return next position in local coordinates (relative to the windowHeader) with consensus data; otherwise, the start position
*/
private int findNextFilteredElement(final LinkedList<HeaderElement> header, final int start, final int upTo) {
final Iterator<HeaderElement> headerElementIterator = header.listIterator(start);
int index = start;
while (index < upTo) {
if (!headerElementIterator.hasNext())
throw new ReviewedStingException("There are no more header elements in this window");
if (headerElementIterator.next().hasFilteredData())
break;
index++;
}
return index;
}
/**
* Adds bases to the filtered data synthetic read.
*
* Different from the addToConsensus method, this method assumes a contiguous sequence of filteredData bases.
*
* @param header the window header
* @param start the first header index to add to consensus
* @param end the first header index NOT TO add to consensus
*/
@Requires({"start >= 0 && (end >= start || end == 0)"})
@Ensures("result != null")
private void addToFilteredData(final LinkedList<HeaderElement> header, final int start, final int end) {
if (filteredDataConsensus == null)
filteredDataConsensus = new SyntheticRead(samHeader, readGroupAttribute, contig, contigIndex, filteredDataReadName + filteredDataConsensusCounter++, header.get(start).getLocation(), hasIndelQualities, SyntheticRead.StrandType.STRANDLESS);
ListIterator<HeaderElement> headerElementIterator = header.listIterator(start);
for (int index = start; index < end; index++) {
if (!headerElementIterator.hasNext())
throw new ReviewedStingException("Requested to create a filtered data synthetic read from " + start + " to " + end + " but " + index + " does not exist");
final HeaderElement headerElement = headerElementIterator.next();
if (!headerElement.hasFilteredData())
throw new ReviewedStingException("No filtered data in " + index);
genericAddBaseToConsensus(filteredDataConsensus, headerElement.getFilteredBaseCounts());
}
}
/** /**
* Generic accessor to add base and qualities to a synthetic read * Generic accessor to add base and qualities to a synthetic read
* *
@ -734,7 +533,7 @@ public class SlidingWindow {
final int refStart = windowHeader.get(start).getLocation(); final int refStart = windowHeader.get(start).getLocation();
final int refStop = windowHeader.get(stop).getLocation(); final int refStop = windowHeader.get(stop).getLocation();
final ObjectList<GATKSAMRecord> toRemove = new ObjectArrayList<GATKSAMRecord>(); final ObjectList<GATKSAMRecord> toRemove = new ObjectArrayList<>();
for ( final GATKSAMRecord read : readsInWindow ) { for ( final GATKSAMRecord read : readsInWindow ) {
if ( read.getSoftStart() <= refStop ) { if ( read.getSoftStart() <= refStop ) {
if ( read.getAlignmentEnd() >= refStart ) { if ( read.getAlignmentEnd() >= refStart ) {
@ -814,7 +613,7 @@ public class SlidingWindow {
continue; continue;
if ( headerElement.hasSignificantSoftclips(MIN_ALT_PVALUE_TO_TRIGGER_VARIANT, MIN_ALT_PROPORTION_TO_TRIGGER_VARIANT) || if ( headerElement.hasSignificantSoftclips(MIN_ALT_PVALUE_TO_TRIGGER_VARIANT, MIN_ALT_PROPORTION_TO_TRIGGER_VARIANT) ||
headerElement.getNumberOfBaseAlleles(MIN_ALT_PVALUE_TO_TRIGGER_VARIANT, MIN_ALT_PROPORTION_TO_TRIGGER_VARIANT) > 1 ) headerElement.getNumberOfBaseAlleles(MIN_ALT_PVALUE_TO_TRIGGER_VARIANT, MIN_ALT_PROPORTION_TO_TRIGGER_VARIANT) != 1 )
return true; return true;
} }
@ -836,13 +635,26 @@ public class SlidingWindow {
final CloseVariantRegionResult result = new CloseVariantRegionResult(allReads.stopPerformed); final CloseVariantRegionResult result = new CloseVariantRegionResult(allReads.stopPerformed);
result.reads.addAll(downsampleCoverage > 0 ? downsampleVariantRegion(allReads.reads) : allReads.reads); result.reads.addAll(downsampleCoverage > 0 ? downsampleVariantRegion(allReads.reads) : allReads.reads);
result.reads.addAll(addToSyntheticReads(windowHeader, 0, allReads.stopPerformed + 1, SyntheticRead.StrandType.STRANDLESS)); result.reads.addAll(addAllSyntheticReadTypes(0, allReads.stopPerformed + 1));
result.reads.addAll(addToFilteredReads(windowHeader, 0, allReads.stopPerformed + 1));
result.reads.addAll(finalizeAndAdd(ConsensusType.BOTH));
return result; // finalized reads will be downsampled if necessary return result; // finalized reads will be downsampled if necessary
} }
/**
* Adds reads for all possible strands (positive, negative, filtered) from the global windowHeader object
*
* @param start the start position (inclusive)
* @param end the end position (exclusive)
* @return non-null but possibly empty array list with reduced reads
*/
private ObjectArrayList<GATKSAMRecord> addAllSyntheticReadTypes(final int start, final int end) {
final ObjectArrayList<GATKSAMRecord> reads = new ObjectArrayList<>();
reads.addAll(addToSyntheticReads(windowHeader, start, end, ConsensusType.POSITIVE_CONSENSUS));
reads.addAll(addToSyntheticReads(windowHeader, start, end, ConsensusType.NEGATIVE_CONSENSUS));
reads.addAll(addToSyntheticReads(windowHeader, start, end, ConsensusType.FILTERED));
return reads;
}
/* /*
* @see #closeVariantRegions(CompressionStash, ObjectSortedSet<GenomeLoc>, boolean) with forceCloseFullRegions set to false * @see #closeVariantRegions(CompressionStash, ObjectSortedSet<GenomeLoc>, boolean) with forceCloseFullRegions set to false
*/ */
@ -851,7 +663,7 @@ public class SlidingWindow {
} }
private static final class CloseVariantRegionResult { private static final class CloseVariantRegionResult {
final private ObjectList<GATKSAMRecord> reads = new ObjectArrayList<GATKSAMRecord>(); final private ObjectList<GATKSAMRecord> reads = new ObjectArrayList<>();
private int stopPerformed; private int stopPerformed;
public CloseVariantRegionResult(final int stopPerformed) { this.stopPerformed = stopPerformed; } public CloseVariantRegionResult(final int stopPerformed) { this.stopPerformed = stopPerformed; }
@ -866,7 +678,7 @@ public class SlidingWindow {
* @return a non-null set of reduced reads representing the finalized regions * @return a non-null set of reduced reads representing the finalized regions
*/ */
public ObjectSet<GATKSAMRecord> closeVariantRegions(final CompressionStash regions, final ObjectSortedSet<GenomeLoc> knownSnpPositions, final boolean forceCloseFullRegions) { public ObjectSet<GATKSAMRecord> closeVariantRegions(final CompressionStash regions, final ObjectSortedSet<GenomeLoc> knownSnpPositions, final boolean forceCloseFullRegions) {
final ObjectAVLTreeSet<GATKSAMRecord> allReads = new ObjectAVLTreeSet<GATKSAMRecord>(new AlignmentStartWithNoTiesComparator()); final ObjectAVLTreeSet<GATKSAMRecord> allReads = new ObjectAVLTreeSet<>(new AlignmentStartWithNoTiesComparator());
if ( !regions.isEmpty() ) { if ( !regions.isEmpty() ) {
int windowHeaderStart = getStartLocation(windowHeader); int windowHeaderStart = getStartLocation(windowHeader);
@ -945,9 +757,9 @@ public class SlidingWindow {
if (downsampleCoverage >= nReads) if (downsampleCoverage >= nReads)
return allReads; return allReads;
ReservoirDownsampler <GATKSAMRecord> downsampler = new ReservoirDownsampler<GATKSAMRecord>(downsampleCoverage); ReservoirDownsampler <GATKSAMRecord> downsampler = new ReservoirDownsampler<>(downsampleCoverage);
downsampler.submit(allReads); downsampler.submit(allReads);
return new ObjectArrayList<GATKSAMRecord>(downsampler.consumeFinalizedItems()); return new ObjectArrayList<>(downsampler.consumeFinalizedItems());
} }
@ -962,7 +774,7 @@ public class SlidingWindow {
@Ensures("result != null") @Ensures("result != null")
public Pair<ObjectSet<GATKSAMRecord>, CompressionStash> close(final ObjectSortedSet<GenomeLoc> knownSnpPositions) { public Pair<ObjectSet<GATKSAMRecord>, CompressionStash> close(final ObjectSortedSet<GenomeLoc> knownSnpPositions) {
// mark variant regions // mark variant regions
ObjectSet<GATKSAMRecord> finalizedReads = new ObjectAVLTreeSet<GATKSAMRecord>(new AlignmentStartWithNoTiesComparator()); ObjectSet<GATKSAMRecord> finalizedReads = new ObjectAVLTreeSet<>(new AlignmentStartWithNoTiesComparator());
CompressionStash regions = new CompressionStash(); CompressionStash regions = new CompressionStash();
if (!windowHeader.isEmpty()) { if (!windowHeader.isEmpty()) {
@ -970,48 +782,45 @@ public class SlidingWindow {
regions = findVariantRegions(0, windowHeader.size(), markedSites.getVariantSiteBitSet(), true); regions = findVariantRegions(0, windowHeader.size(), markedSites.getVariantSiteBitSet(), true);
finalizedReads = closeVariantRegions(regions, knownSnpPositions, true); finalizedReads = closeVariantRegions(regions, knownSnpPositions, true);
if (!windowHeader.isEmpty()) { if (!windowHeader.isEmpty())
finalizedReads.addAll(addToSyntheticReads(windowHeader, 0, windowHeader.size(), SyntheticRead.StrandType.STRANDLESS)); finalizedReads.addAll(addAllSyntheticReadTypes(0, windowHeader.size()));
finalizedReads.addAll(addToFilteredReads(windowHeader, 0, windowHeader.size()));
finalizedReads.addAll(finalizeAndAdd(ConsensusType.BOTH)); // if it ended in running consensus, finish it up
}
} }
return new Pair<ObjectSet<GATKSAMRecord>, CompressionStash>(finalizedReads, regions); return new Pair<>(finalizedReads, regions);
} }
/** /**
* generates the SAM record for the running consensus read and resets it (to null) * generates the SAM record for the running consensus read and resets it (to null)
* *
* @param runningConsensus the consensus to finalize
* @return the read contained in the running consensus or null * @return the read contained in the running consensus or null
*/ */
protected GATKSAMRecord finalizeRunningConsensus() { protected GATKSAMRecord finalizeRunningConsensus(final SyntheticRead runningConsensus) {
GATKSAMRecord finalizedRead = null; GATKSAMRecord finalizedRead = null;
if (runningConsensus != null) {
if (runningConsensus.size() > 0) if ( runningConsensus != null ) {
if ( runningConsensus.size() > 0 )
finalizedRead = runningConsensus.close(); finalizedRead = runningConsensus.close();
else else
consensusCounter--; consensusCounter--;
runningConsensus = null;
} }
return finalizedRead; return finalizedRead;
} }
/** /**
* generates the SAM record for the filtered data consensus and resets it (to null) * generates the SAM record for the filtered data consensus and resets it (to null)
* *
* @param filteredDataConsensus the consensus to finalize
* @return the read contained in the running consensus or null * @return the read contained in the running consensus or null
*/ */
protected GATKSAMRecord finalizeFilteredDataConsensus() { protected GATKSAMRecord finalizeFilteredDataConsensus(final SyntheticRead filteredDataConsensus) {
GATKSAMRecord finalizedRead = null; GATKSAMRecord finalizedRead = null;
if (filteredDataConsensus != null) { if (filteredDataConsensus != null) {
if (filteredDataConsensus.size() > 0) if (filteredDataConsensus.size() > 0)
finalizedRead = filteredDataConsensus.close(); finalizedRead = filteredDataConsensus.close();
else else
filteredDataConsensusCounter--; filteredDataConsensusCounter--;
filteredDataConsensus = null;
} }
return finalizedRead; return finalizedRead;
} }
@ -1021,7 +830,7 @@ public class SlidingWindow {
private final static class SingleStrandConsensusData { private final static class SingleStrandConsensusData {
final HeaderElementList consensus = new HeaderElementList(); final HeaderElementList consensus = new HeaderElementList();
final ObjectList<GATKSAMRecord> reads = new ObjectArrayList<GATKSAMRecord>(); final ObjectList<GATKSAMRecord> reads = new ObjectArrayList<>();
} }
/** /**
@ -1042,6 +851,7 @@ public class SlidingWindow {
// initialize the mapping from base (allele) to header // initialize the mapping from base (allele) to header
final Byte2IntMap alleleHeaderMap = new Byte2IntArrayMap(2); final Byte2IntMap alleleHeaderMap = new Byte2IntArrayMap(2);
alleleHeaderMap.defaultReturnValue(-1);
for ( final BaseIndex allele : windowHeader.get(hetRefPosition).getAlleles(MIN_ALT_PVALUE_TO_TRIGGER_VARIANT, MIN_ALT_PROPORTION_TO_TRIGGER_VARIANT) ) { for ( final BaseIndex allele : windowHeader.get(hetRefPosition).getAlleles(MIN_ALT_PVALUE_TO_TRIGGER_VARIANT, MIN_ALT_PROPORTION_TO_TRIGGER_VARIANT) ) {
final int currentIndex = alleleHeaderMap.size(); final int currentIndex = alleleHeaderMap.size();
if ( currentIndex > 1 ) if ( currentIndex > 1 )
@ -1056,7 +866,7 @@ public class SlidingWindow {
if ( alleleHeaderMap.size() != 2 ) if ( alleleHeaderMap.size() != 2 )
throw new IllegalStateException("We expected to see 2 alleles when creating a diploid consensus but saw " + alleleHeaderMap.size()); throw new IllegalStateException("We expected to see 2 alleles when creating a diploid consensus but saw " + alleleHeaderMap.size());
final ObjectList<GATKSAMRecord> readsToRemove = new ObjectArrayList<GATKSAMRecord>(); final ObjectList<GATKSAMRecord> readsToRemove = new ObjectArrayList<>();
for ( final GATKSAMRecord read : readsInWindow ) { for ( final GATKSAMRecord read : readsInWindow ) {
@ -1081,10 +891,10 @@ public class SlidingWindow {
final byte base = read.getReadBases()[readPosOfHet]; final byte base = read.getReadBases()[readPosOfHet];
// check which allele this read represents // check which allele this read represents
final Integer allele = alleleHeaderMap.get(base); final int allele = alleleHeaderMap.get(base);
// ignore the read if it represents a base that's not part of the consensus // ignore the read if it represents a base that's not part of the consensus
if ( allele != null ) { if ( allele != -1 ) {
// add to the appropriate polyploid header // add to the appropriate polyploid header
final SingleStrandConsensusData header = read.getReadNegativeStrandFlag() ? headersNegStrand[allele] : headersPosStrand[allele]; final SingleStrandConsensusData header = read.getReadNegativeStrandFlag() ? headersNegStrand[allele] : headersPosStrand[allele];
header.reads.add(read); header.reads.add(read);
@ -1096,7 +906,7 @@ public class SlidingWindow {
readsInWindow.remove(read); readsInWindow.remove(read);
// create the polyploid synthetic reads if we can // create the polyploid synthetic reads if we can
final ObjectList<GATKSAMRecord> hetReads = new ObjectArrayList<GATKSAMRecord>(); final ObjectList<GATKSAMRecord> hetReads = new ObjectArrayList<>();
// sanity check that no new "variant region" exists on just a single consensus strand due to softclips // sanity check that no new "variant region" exists on just a single consensus strand due to softclips
// or multi-allelic sites now that we've broken everything out into their component parts. if one does // or multi-allelic sites now that we've broken everything out into their component parts. if one does
@ -1125,10 +935,12 @@ public class SlidingWindow {
* @param result list in which to store results * @param result list in which to store results
*/ */
protected void finalizeHetConsensus(final LinkedList<HeaderElement> header, final boolean isNegativeStrand, final ObjectList<GATKSAMRecord> result) { protected void finalizeHetConsensus(final LinkedList<HeaderElement> header, final boolean isNegativeStrand, final ObjectList<GATKSAMRecord> result) {
if ( header.size() > 0 ) if ( header.size() > 0 ) {
result.addAll(addToSyntheticReads(header, 0, header.size(), isNegativeStrand ? SyntheticRead.StrandType.NEGATIVE : SyntheticRead.StrandType.POSITIVE)); if ( isNegativeStrand )
if ( runningConsensus != null ) result.addAll(addToSyntheticReads(header, 0, header.size(), ConsensusType.NEGATIVE_CONSENSUS));
result.add(finalizeRunningConsensus()); else
result.addAll(addToSyntheticReads(header, 0, header.size(), ConsensusType.POSITIVE_CONSENSUS));
}
} }
private void addToHeader(LinkedList<HeaderElement> header, GATKSAMRecord read) { private void addToHeader(LinkedList<HeaderElement> header, GATKSAMRecord read) {
@ -1222,6 +1034,7 @@ public class SlidingWindow {
final Iterator<HeaderElement> headerElementIterator = header.listIterator(startIndex); final Iterator<HeaderElement> headerElementIterator = header.listIterator(startIndex);
final byte mappingQuality = (byte) read.getMappingQuality(); final byte mappingQuality = (byte) read.getMappingQuality();
final boolean isNegativeStrand = read.getReadNegativeStrandFlag();
// iterator variables // iterator variables
int locationIndex = startIndex; int locationIndex = startIndex;
@ -1254,9 +1067,9 @@ public class SlidingWindow {
for ( int i = 0; i < nDeletionBases; i++ ) { for ( int i = 0; i < nDeletionBases; i++ ) {
headerElement = headerElementIterator.next(); headerElement = headerElementIterator.next();
if (removeRead) if (removeRead)
headerElement.removeBase(BaseUtils.Base.D.base, mappingQuality, mappingQuality, mappingQuality, mappingQuality, MIN_BASE_QUAL_TO_COUNT, MIN_MAPPING_QUALITY, false); headerElement.removeBase(BaseUtils.Base.D.base, mappingQuality, mappingQuality, mappingQuality, mappingQuality, MIN_BASE_QUAL_TO_COUNT, MIN_MAPPING_QUALITY, false, isNegativeStrand);
else else
headerElement.addBase(BaseUtils.Base.D.base, mappingQuality, mappingQuality, mappingQuality, mappingQuality, MIN_BASE_QUAL_TO_COUNT, MIN_MAPPING_QUALITY, false); headerElement.addBase(BaseUtils.Base.D.base, mappingQuality, mappingQuality, mappingQuality, mappingQuality, MIN_BASE_QUAL_TO_COUNT, MIN_MAPPING_QUALITY, false, isNegativeStrand);
} }
locationIndex += nDeletionBases; locationIndex += nDeletionBases;
break; break;
@ -1279,9 +1092,9 @@ public class SlidingWindow {
final byte deletionQuality = readHasIndelQuals ? deletionQuals[readBaseIndex] : -1; final byte deletionQuality = readHasIndelQuals ? deletionQuals[readBaseIndex] : -1;
if ( removeRead ) if ( removeRead )
headerElement.removeBase(readBases[readBaseIndex], readQuals[readBaseIndex], insertionQuality, deletionQuality, mappingQuality, MIN_BASE_QUAL_TO_COUNT, MIN_MAPPING_QUALITY, isSoftClip); headerElement.removeBase(readBases[readBaseIndex], readQuals[readBaseIndex], insertionQuality, deletionQuality, mappingQuality, MIN_BASE_QUAL_TO_COUNT, MIN_MAPPING_QUALITY, isSoftClip, isNegativeStrand);
else else
headerElement.addBase(readBases[readBaseIndex], readQuals[readBaseIndex], insertionQuality, deletionQuality, mappingQuality, MIN_BASE_QUAL_TO_COUNT, MIN_MAPPING_QUALITY, isSoftClip); headerElement.addBase(readBases[readBaseIndex], readQuals[readBaseIndex], insertionQuality, deletionQuality, mappingQuality, MIN_BASE_QUAL_TO_COUNT, MIN_MAPPING_QUALITY, isSoftClip, isNegativeStrand);
readBaseIndex++; readBaseIndex++;
} }

View File

@ -56,7 +56,6 @@ import org.broadinstitute.sting.gatk.walkers.LocusWalker;
import org.broadinstitute.sting.gatk.walkers.TreeReducible; import org.broadinstitute.sting.gatk.walkers.TreeReducible;
import org.broadinstitute.sting.utils.GenomeLoc; import org.broadinstitute.sting.utils.GenomeLoc;
import org.broadinstitute.sting.utils.MathUtils; import org.broadinstitute.sting.utils.MathUtils;
import org.broadinstitute.sting.utils.QualityUtils;
import org.broadinstitute.sting.utils.exceptions.UserException; import org.broadinstitute.sting.utils.exceptions.UserException;
import org.broadinstitute.sting.utils.pileup.PileupElement; import org.broadinstitute.sting.utils.pileup.PileupElement;
import org.broadinstitute.sting.utils.pileup.ReadBackedPileup; import org.broadinstitute.sting.utils.pileup.ReadBackedPileup;
@ -133,6 +132,11 @@ public class AssessReducedQuals extends LocusWalker<GenomeLoc, GenomeLoc> implem
return reportLocus ? ref.getLocus() : null; return reportLocus ? ref.getLocus() : null;
} }
/**
* Get the quals separated by version and strand
* @param readPileup the pileup
* @return 2x2 array with sum of quals separated by version in 1st dimension and strand in the 2nd
*/
private int[] getPileupQuals(final ReadBackedPileup readPileup) { private int[] getPileupQuals(final ReadBackedPileup readPileup) {
final int[] quals = new int[2]; final int[] quals = new int[2];

View File

@ -97,15 +97,15 @@ public class HeaderElementUnitTest extends BaseTest {
HeaderElement headerElement = new HeaderElement(1000, 0); HeaderElement headerElement = new HeaderElement(1000, 0);
// first test that if we add and then remove it, we have no data // first test that if we add and then remove it, we have no data
headerElement.addBase(test.base, test.baseQual, test.insQual, test.delQual, test.MQ, minBaseQual, minMappingQual, test.isClip); headerElement.addBase(test.base, test.baseQual, test.insQual, test.delQual, test.MQ, minBaseQual, minMappingQual, test.isClip, false);
headerElement.addInsertionToTheRight(); headerElement.addInsertionToTheRight();
headerElement.removeBase(test.base, test.baseQual, test.insQual, test.delQual, test.MQ, minBaseQual, minMappingQual, test.isClip); headerElement.removeBase(test.base, test.baseQual, test.insQual, test.delQual, test.MQ, minBaseQual, minMappingQual, test.isClip, false);
headerElement.removeInsertionToTheRight(); headerElement.removeInsertionToTheRight();
testHeaderIsEmpty(headerElement); testHeaderIsEmpty(headerElement);
// now, test that the data was added as expected // now, test that the data was added as expected
for ( int i = 0; i < 10; i++ ) for ( int i = 0; i < 10; i++ )
headerElement.addBase(test.base, test.baseQual, test.insQual, test.delQual, test.MQ, minBaseQual, minMappingQual, test.isClip); headerElement.addBase(test.base, test.baseQual, test.insQual, test.delQual, test.MQ, minBaseQual, minMappingQual, test.isClip, false);
testHeaderData(headerElement, test); testHeaderData(headerElement, test);
// test the insertion adding functionality // test the insertion adding functionality
@ -115,8 +115,8 @@ public class HeaderElementUnitTest extends BaseTest {
} }
private void testHeaderIsEmpty(final HeaderElement headerElement) { private void testHeaderIsEmpty(final HeaderElement headerElement) {
Assert.assertFalse(headerElement.hasConsensusData()); Assert.assertFalse(headerElement.hasConsensusData(SlidingWindow.ConsensusType.POSITIVE_CONSENSUS));
Assert.assertFalse(headerElement.hasFilteredData()); Assert.assertFalse(headerElement.hasConsensusData(SlidingWindow.ConsensusType.FILTERED));
Assert.assertFalse(headerElement.hasInsertionToTheRight()); Assert.assertFalse(headerElement.hasInsertionToTheRight());
Assert.assertTrue(headerElement.isEmpty()); Assert.assertTrue(headerElement.isEmpty());
} }
@ -125,9 +125,9 @@ public class HeaderElementUnitTest extends BaseTest {
Assert.assertEquals(headerElement.isVariantFromSoftClips(), test.isClip); Assert.assertEquals(headerElement.isVariantFromSoftClips(), test.isClip);
Assert.assertFalse(headerElement.isEmpty()); Assert.assertFalse(headerElement.isEmpty());
Assert.assertFalse(headerElement.hasInsertionToTheRight()); Assert.assertFalse(headerElement.hasInsertionToTheRight());
Assert.assertEquals(headerElement.hasConsensusData(), test.MQ >= minMappingQual); Assert.assertEquals(headerElement.hasConsensusData(SlidingWindow.ConsensusType.POSITIVE_CONSENSUS), test.MQ >= minMappingQual);
Assert.assertEquals(headerElement.hasFilteredData(), test.MQ < minMappingQual); Assert.assertEquals(headerElement.hasConsensusData(SlidingWindow.ConsensusType.FILTERED), test.MQ < minMappingQual);
Assert.assertEquals(headerElement.hasConsensusData() ? headerElement.getConsensusBaseCounts().getRMS() : headerElement.getFilteredBaseCounts().getRMS(), (double)test.MQ); Assert.assertEquals(headerElement.getBaseCounts(headerElement.hasConsensusData(SlidingWindow.ConsensusType.POSITIVE_CONSENSUS) ? SlidingWindow.ConsensusType.POSITIVE_CONSENSUS : SlidingWindow.ConsensusType.FILTERED).getRMS(), (double)test.MQ);
Assert.assertFalse(headerElement.isVariantFromMismatches(0.05, 0.05)); Assert.assertFalse(headerElement.isVariantFromMismatches(0.05, 0.05));
Assert.assertEquals(headerElement.isVariant(0.05, 0.05, 0.05), test.isClip); Assert.assertEquals(headerElement.isVariant(0.05, 0.05, 0.05), test.isClip);
} }
@ -145,7 +145,7 @@ public class HeaderElementUnitTest extends BaseTest {
@DataProvider(name = "alleles") @DataProvider(name = "alleles")
public Object[][] createAllelesData() { public Object[][] createAllelesData() {
List<Object[]> tests = new ArrayList<Object[]>(); List<Object[]> tests = new ArrayList<>();
final int[] counts = new int[]{ 0, 5, 10, 15, 20 }; final int[] counts = new int[]{ 0, 5, 10, 15, 20 };
final double [] pvalues = new double[]{ 0.0, 0.01, 0.05, 0.20, 1.0 }; final double [] pvalues = new double[]{ 0.0, 0.01, 0.05, 0.20, 1.0 };
@ -174,7 +174,7 @@ public class HeaderElementUnitTest extends BaseTest {
for ( int i = 0; i < test.counts.length; i++ ) { for ( int i = 0; i < test.counts.length; i++ ) {
final BaseIndex base = BaseIndex.values()[i]; final BaseIndex base = BaseIndex.values()[i];
for ( int j = 0; j < test.counts[i]; j++ ) for ( int j = 0; j < test.counts[i]; j++ )
headerElement.addBase(base.b, byte20, byte10, byte10, byte20, minBaseQual, minMappingQual, false); headerElement.addBase(base.b, byte20, byte10, byte10, byte20, minBaseQual, minMappingQual, false, false);
} }
final int nAllelesSeen = headerElement.getNumberOfBaseAlleles(test.pvalue, test.pvalue); final int nAllelesSeen = headerElement.getNumberOfBaseAlleles(test.pvalue, test.pvalue);

View File

@ -158,44 +158,44 @@ public class ReduceReadsIntegrationTest extends WalkerTest {
@Test(enabled = true) @Test(enabled = true)
public void testDefaultCompression() { public void testDefaultCompression() {
RRTest("testDefaultCompression ", L, "fa1cffc4539e0c20b818a11da5dba5b9", false); RRTest("testDefaultCompression ", L, "0e503f7b79ace4c89d74f0943a0de1c0", false);
} }
@Test(enabled = true) @Test(enabled = true)
public void testDefaultCompressionWithKnowns() { public void testDefaultCompressionWithKnowns() {
RRTest("testDefaultCompressionWithKnowns ", L, "d1b5fbc402810d9cdc020bb3503f1325", true); RRTest("testDefaultCompressionWithKnowns ", L, "6db7ce2733d006f8bd61c42a40d23728", true);
} }
private final String intervals = "-L 20:10,100,000-10,100,500 -L 20:10,200,000-10,200,500 -L 20:10,300,000-10,300,500 -L 20:10,400,000-10,500,000 -L 20:10,500,050-10,500,060 -L 20:10,600,000-10,600,015 -L 20:10,700,000-10,700,110"; private final String intervals = "-L 20:10,100,000-10,100,500 -L 20:10,200,000-10,200,500 -L 20:10,300,000-10,300,500 -L 20:10,400,000-10,500,000 -L 20:10,500,050-10,500,060 -L 20:10,600,000-10,600,015 -L 20:10,700,000-10,700,110";
@Test(enabled = true) @Test(enabled = true)
public void testMultipleIntervals() { public void testMultipleIntervals() {
RRTest("testMultipleIntervals ", intervals, "7e9dcd157ad742d4ebae7e56bc4af663", false); RRTest("testMultipleIntervals ", intervals, "207f2c6d3db956e19412a45a231ca367", false, "043b2838c27d8f9580379b54c18ff40a");
} }
@Test(enabled = true) @Test(enabled = true)
public void testMultipleIntervalsWithKnowns() { public void testMultipleIntervalsWithKnowns() {
RRTest("testMultipleIntervalsWithKnowns ", intervals, "dbb1e95e1bcad956701142afac763717", true); RRTest("testMultipleIntervalsWithKnowns ", intervals, "f3b11a8a7673b301e27137936fafc6b6", true, "043b2838c27d8f9580379b54c18ff40a");
} }
@Test(enabled = true) @Test(enabled = true)
public void testHighCompression() { public void testHighCompression() {
RRTest("testHighCompression ", " -cs 10 -min_pvalue 0.3 -minvar 0.3 -mindel 0.3 " + L, "8f8fd1a53fa0789116f45e4cf2625906", false); RRTest("testHighCompression ", " -cs 10 -min_pvalue 0.3 -minvar 0.3 -mindel 0.3 " + L, "dcc3716b3665aa1c2dbe6b22d6534aef", false);
} }
@Test(enabled = true) @Test(enabled = true)
public void testHighCompressionWithKnowns() { public void testHighCompressionWithKnowns() {
RRTest("testHighCompressionWithKnowns ", " -cs 10 -min_pvalue 0.3 -minvar 0.3 -mindel 0.3 " + L, "52fd2a77802a4677b604abb18e15d96a", true); RRTest("testHighCompressionWithKnowns ", " -cs 10 -min_pvalue 0.3 -minvar 0.3 -mindel 0.3 " + L, "97ae655bf0e483ea227b1aac67ced024", true);
} }
@Test(enabled = true) @Test(enabled = true)
public void testLowCompression() { public void testLowCompression() {
RRTest("testLowCompression ", " -cs 30 -min_pvalue 0.001 -minvar 0.01 -mindel 0.01 -minmap 5 -minqual 5 " + L, "79c6543d5ce84ebc2ca74404498edbd1", false); RRTest("testLowCompression ", " -cs 30 -min_pvalue 0.001 -minvar 0.01 -mindel 0.01 -minmap 5 -minqual 5 " + L, "a1377eb922e0b09a03a280b691b0b3ff", false);
} }
@Test(enabled = true) @Test(enabled = true)
public void testLowCompressionWithKnowns() { public void testLowCompressionWithKnowns() {
RRTest("testLowCompressionWithKnowns ", " -cs 30 -min_pvalue 0.001 -minvar 0.01 -mindel 0.01 -minmap 5 -minqual 5 " + L, "271aec358b309603291a974b5ba3bd60", true); RRTest("testLowCompressionWithKnowns ", " -cs 30 -min_pvalue 0.001 -minvar 0.01 -mindel 0.01 -minmap 5 -minqual 5 " + L, "bd7c5b0b210694f364ca6a41f5b89870", true);
} }
@Test(enabled = true) @Test(enabled = true)
@ -207,7 +207,7 @@ public class ReduceReadsIntegrationTest extends WalkerTest {
@Test(enabled = true) @Test(enabled = true)
public void testIndelCompression() { public void testIndelCompression() {
final String md5 = "d20e6012300898a0315c795cab7583d8"; final String md5 = "9c9305eda5e4e7f22246ec8a4b242c97";
RRTest("testIndelCompression ", " -cs 50 -L 20:10,100,500-10,100,600 ", md5, false); RRTest("testIndelCompression ", " -cs 50 -L 20:10,100,500-10,100,600 ", md5, false);
RRTest("testIndelCompressionWithKnowns ", " -cs 50 -L 20:10,100,500-10,100,600 ", md5, true); RRTest("testIndelCompressionWithKnowns ", " -cs 50 -L 20:10,100,500-10,100,600 ", md5, true);
} }
@ -215,27 +215,25 @@ public class ReduceReadsIntegrationTest extends WalkerTest {
@Test(enabled = true) @Test(enabled = true)
public void testFilteredDeletionCompression() { public void testFilteredDeletionCompression() {
String base = String.format("-T ReduceReads -npt -R %s -I %s ", REF, DELETION_BAM) + " -o %s "; String base = String.format("-T ReduceReads -npt -R %s -I %s ", REF, DELETION_BAM) + " -o %s ";
executeTest("testFilteredDeletionCompression", new WalkerTestSpec(base, Arrays.asList("bam"), Arrays.asList("e5da09662708f562c0c617ba73cf4763")), "4f916da29d91852077f0a2fdbdd2c7f6"); executeTest("testFilteredDeletionCompression", new WalkerTestSpec(base, Arrays.asList("bam"), Arrays.asList("1bda512143be1016dfaca1f7020b6398")), "4f916da29d91852077f0a2fdbdd2c7f6");
} }
private static final String COREDUCTION_QUALS_TEST_MD5 = "26d84a2bd549a01a63fcebf8847a1b7d";
@Test(enabled = true) @Test(enabled = true)
public void testCoReduction() { public void testCoReduction() {
String base = String.format("-T ReduceReads %s --cancer_mode -npt -R %s -I %s -I %s", COREDUCTION_L, REF, COREDUCTION_BAM_A, COREDUCTION_BAM_B) + " -o %s "; String base = String.format("-T ReduceReads %s --cancer_mode -npt -R %s -I %s -I %s", COREDUCTION_L, REF, COREDUCTION_BAM_A, COREDUCTION_BAM_B) + " -o %s ";
executeTest("testCoReduction", new WalkerTestSpec(base, Arrays.asList("bam"), Arrays.asList("5f4d2c1d9c010dfd6865aeba7d0336fe")), COREDUCTION_QUALS_TEST_MD5); executeTest("testCoReduction", new WalkerTestSpec(base, Arrays.asList("bam"), Arrays.asList("2fdc77ff5139f62db9697427b559f866")));
} }
@Test(enabled = true) @Test(enabled = true)
public void testCoReductionWithKnowns() { public void testCoReductionWithKnowns() {
String base = String.format("-T ReduceReads %s --cancer_mode -npt -R %s -I %s -I %s -known %s", COREDUCTION_L, REF, COREDUCTION_BAM_A, COREDUCTION_BAM_B, DBSNP) + " -o %s "; String base = String.format("-T ReduceReads %s --cancer_mode -npt -R %s -I %s -I %s -known %s", COREDUCTION_L, REF, COREDUCTION_BAM_A, COREDUCTION_BAM_B, DBSNP) + " -o %s ";
executeTest("testCoReductionWithKnowns", new WalkerTestSpec(base, Arrays.asList("bam"), Arrays.asList("ca48dd972bf57595c691972c0f887cb4")), COREDUCTION_QUALS_TEST_MD5); executeTest("testCoReductionWithKnowns", new WalkerTestSpec(base, Arrays.asList("bam"), Arrays.asList("6db7fca364ba64f7db9510b412d731f0")));
} }
@Test(enabled = true) @Test(enabled = true)
public void testInsertionsAtEdgeOfConsensus() { public void testInsertionsAtEdgeOfConsensus() {
String base = String.format("-T ReduceReads -npt -R %s -I %s ", REF, INSERTIONS_AT_EDGE_OF_CONSENSUS_BAM) + " -o %s "; String base = String.format("-T ReduceReads -npt -R %s -I %s ", REF, INSERTIONS_AT_EDGE_OF_CONSENSUS_BAM) + " -o %s ";
executeTest("testInsertionsAtEdgeOfConsensus", new WalkerTestSpec(base, Arrays.asList("bam"), Arrays.asList("760500a5b036b987f84099f45f26a804"))); executeTest("testInsertionsAtEdgeOfConsensus", new WalkerTestSpec(base, Arrays.asList("bam"), Arrays.asList("c10653a8c21fb32b5cf580d3704b0edd")));
} }
/** /**
@ -249,7 +247,7 @@ public class ReduceReadsIntegrationTest extends WalkerTest {
@Test(enabled = true) @Test(enabled = true)
public void testAddingReadAfterTailingTheStash() { public void testAddingReadAfterTailingTheStash() {
String base = String.format("-T ReduceReads %s -npt -R %s -I %s", STASH_L, REF, STASH_BAM) + " -o %s "; String base = String.format("-T ReduceReads %s -npt -R %s -I %s", STASH_L, REF, STASH_BAM) + " -o %s ";
executeTest("testAddingReadAfterTailingTheStash", new WalkerTestSpec(base, Arrays.asList("bam"), Arrays.asList("67f8a3a647f8ec5212104bdaafd8c862")), "3eab32c215ba68e75efd5ab7e9f7a2e7"); executeTest("testAddingReadAfterTailingTheStash", new WalkerTestSpec(base, Arrays.asList("bam"), Arrays.asList("fddbec29d0945afbbb34b42994614c15")), "3eab32c215ba68e75efd5ab7e9f7a2e7");
} }
/** /**
@ -260,7 +258,7 @@ public class ReduceReadsIntegrationTest extends WalkerTest {
public void testDivideByZero() { public void testDivideByZero() {
String base = String.format("-T ReduceReads %s -npt -R %s -I %s", DIVIDEBYZERO_L, REF, DIVIDEBYZERO_BAM) + " -o %s "; String base = String.format("-T ReduceReads %s -npt -R %s -I %s", DIVIDEBYZERO_L, REF, DIVIDEBYZERO_BAM) + " -o %s ";
// we expect to lose coverage due to the downsampling so don't run the systematic tests // we expect to lose coverage due to the downsampling so don't run the systematic tests
executeTestWithoutAdditionalRRTests("testDivideByZero", new WalkerTestSpec(base, Arrays.asList("bam"), Arrays.asList("4f0ef477c0417d1eb602b323474ef377"))); executeTestWithoutAdditionalRRTests("testDivideByZero", new WalkerTestSpec(base, Arrays.asList("bam"), Arrays.asList("82758efda419011642cb468809a50bf9")));
} }
/** /**
@ -270,7 +268,7 @@ public class ReduceReadsIntegrationTest extends WalkerTest {
@Test(enabled = true) @Test(enabled = true)
public void testReadOffContig() { public void testReadOffContig() {
String base = String.format("-T ReduceReads -npt -R %s -I %s ", REF, OFFCONTIG_BAM) + " -o %s "; String base = String.format("-T ReduceReads -npt -R %s -I %s ", REF, OFFCONTIG_BAM) + " -o %s ";
executeTest("testReadOffContig", new WalkerTestSpec(base, Arrays.asList("bam"), Arrays.asList("0ce693b4ff925998867664e4099f3248"))); executeTest("testReadOffContig", new WalkerTestSpec(base, Arrays.asList("bam"), Arrays.asList("595e5812c37189930cae93e45765def4")));
} }
/** /**
@ -280,7 +278,7 @@ public class ReduceReadsIntegrationTest extends WalkerTest {
public void testPairedReadsInVariantRegion() { public void testPairedReadsInVariantRegion() {
String base = String.format("-T ReduceReads -npt -R %s -I %s ", hg19Reference, BOTH_ENDS_OF_PAIR_IN_VARIANT_REGION_BAM) + String base = String.format("-T ReduceReads -npt -R %s -I %s ", hg19Reference, BOTH_ENDS_OF_PAIR_IN_VARIANT_REGION_BAM) +
" -o %s --downsample_coverage 250 -dcov 50 "; " -o %s --downsample_coverage 250 -dcov 50 ";
executeTest("testPairedReadsInVariantRegion", new WalkerTestSpec(base, Arrays.asList("bam"), Arrays.asList("7e7b358443827ca239db3b98f299aec6")), "2af063d1bd3c322b03405dbb3ecf59a9"); executeTest("testPairedReadsInVariantRegion", new WalkerTestSpec(base, Arrays.asList("bam"), Arrays.asList("b005727119eee27995705959a637085e")), "2af063d1bd3c322b03405dbb3ecf59a9");
} }
/** /**

View File

@ -318,7 +318,7 @@ public class SlidingWindowUnitTest extends BaseTest {
this.expectedNumberOfReads = expectedNumberOfReads; this.expectedNumberOfReads = expectedNumberOfReads;
this.expectedNumberOfReadsWithHetCompression = expectedNumberOfReadsWithHetCompression; this.expectedNumberOfReadsWithHetCompression = expectedNumberOfReadsWithHetCompression;
this.expectedNumberOfReadsAtDeepCoverage = expectedNumberOfReadsAtDeepCoverage; this.expectedNumberOfReadsAtDeepCoverage = expectedNumberOfReadsAtDeepCoverage;
this.description = String.format("%d %d %d", expectedNumberOfReads, expectedNumberOfReadsWithHetCompression, expectedNumberOfReadsAtDeepCoverage); this.description = String.format("%d %d %d %b %b", expectedNumberOfReads, expectedNumberOfReadsWithHetCompression, expectedNumberOfReadsAtDeepCoverage, readsShouldBeLowQuality, variantBaseShouldBeLowQuality);
// first, add the basic reads to the collection // first, add the basic reads to the collection
myReads.addAll(basicReads); myReads.addAll(basicReads);
@ -390,40 +390,40 @@ public class SlidingWindowUnitTest extends BaseTest {
List<Object[]> tests = new ArrayList<Object[]>(); List<Object[]> tests = new ArrayList<Object[]>();
// test high quality reads and bases // test high quality reads and bases
tests.add(new Object[]{new ConsensusCreationTest(Arrays.<GenomeLoc>asList(), false, false, 1, 1, 1)}); tests.add(new Object[]{new ConsensusCreationTest(Arrays.<GenomeLoc>asList(), false, false, 2, 2, 2)});
tests.add(new Object[]{new ConsensusCreationTest(Arrays.<GenomeLoc>asList(loc290), false, false, 9, 6, 5 + DEEP_COVERAGE_ITERATIONS)}); tests.add(new Object[]{new ConsensusCreationTest(Arrays.<GenomeLoc>asList(loc290), false, false, 11, 8, 7 + DEEP_COVERAGE_ITERATIONS)});
tests.add(new Object[]{new ConsensusCreationTest(Arrays.<GenomeLoc>asList(loc290, loc295), false, false, 10, 10, 2 + (8 * DEEP_COVERAGE_ITERATIONS))}); tests.add(new Object[]{new ConsensusCreationTest(Arrays.<GenomeLoc>asList(loc290, loc295), false, false, 12, 12, 4 + (8 * DEEP_COVERAGE_ITERATIONS))});
tests.add(new Object[]{new ConsensusCreationTest(Arrays.<GenomeLoc>asList(loc290, loc309), false, false, 10, 10, 2 + (8 * DEEP_COVERAGE_ITERATIONS))}); tests.add(new Object[]{new ConsensusCreationTest(Arrays.<GenomeLoc>asList(loc290, loc309), false, false, 12, 12, 4 + (8 * DEEP_COVERAGE_ITERATIONS))});
tests.add(new Object[]{new ConsensusCreationTest(Arrays.<GenomeLoc>asList(loc290, loc310), false, false, 11, 11, 2 + (9 * DEEP_COVERAGE_ITERATIONS))}); tests.add(new Object[]{new ConsensusCreationTest(Arrays.<GenomeLoc>asList(loc290, loc310), false, false, 13, 13, 4 + (9 * DEEP_COVERAGE_ITERATIONS))});
tests.add(new Object[]{new ConsensusCreationTest(Arrays.<GenomeLoc>asList(loc290, loc320), false, false, 11, 10, 4 + (6 * DEEP_COVERAGE_ITERATIONS))}); tests.add(new Object[]{new ConsensusCreationTest(Arrays.<GenomeLoc>asList(loc290, loc320), false, false, 13, 12, 6 + (6 * DEEP_COVERAGE_ITERATIONS))});
// test low quality reads // test low quality reads
tests.add(new Object[]{new ConsensusCreationTest(Arrays.<GenomeLoc>asList(), true, false, 1, 1, 1)}); tests.add(new Object[]{new ConsensusCreationTest(Arrays.<GenomeLoc>asList(), true, false, 2, 2, 2)});
tests.add(new Object[]{new ConsensusCreationTest(Arrays.<GenomeLoc>asList(loc290), true, false, 2, 2, 2)}); tests.add(new Object[]{new ConsensusCreationTest(Arrays.<GenomeLoc>asList(loc290), true, false, 3, 3, 3)});
tests.add(new Object[]{new ConsensusCreationTest(Arrays.<GenomeLoc>asList(loc290, loc295), true, false, 2, 2, 2)}); tests.add(new Object[]{new ConsensusCreationTest(Arrays.<GenomeLoc>asList(loc290, loc295), true, false, 3, 3, 3)});
tests.add(new Object[]{new ConsensusCreationTest(Arrays.<GenomeLoc>asList(loc290, loc309), true, false, 2, 2, 2)}); tests.add(new Object[]{new ConsensusCreationTest(Arrays.<GenomeLoc>asList(loc290, loc309), true, false, 3, 3, 3)});
tests.add(new Object[]{new ConsensusCreationTest(Arrays.<GenomeLoc>asList(loc290, loc310), true, false, 2, 2, 2)}); tests.add(new Object[]{new ConsensusCreationTest(Arrays.<GenomeLoc>asList(loc290, loc310), true, false, 3, 3, 3)});
// test low quality bases // test low quality bases
tests.add(new Object[]{new ConsensusCreationTest(Arrays.<GenomeLoc>asList(), false, true, 1, 1, 1)}); tests.add(new Object[]{new ConsensusCreationTest(Arrays.<GenomeLoc>asList(), false, true, 2, 2, 2)});
tests.add(new Object[]{new ConsensusCreationTest(Arrays.<GenomeLoc>asList(loc290), false, true, 1, 1, 1)}); tests.add(new Object[]{new ConsensusCreationTest(Arrays.<GenomeLoc>asList(loc290), false, true, 2, 2, 2)});
tests.add(new Object[]{new ConsensusCreationTest(Arrays.<GenomeLoc>asList(loc290, loc295), false, true, 1, 1, 1)}); tests.add(new Object[]{new ConsensusCreationTest(Arrays.<GenomeLoc>asList(loc290, loc295), false, true, 2, 2, 2)});
tests.add(new Object[]{new ConsensusCreationTest(Arrays.<GenomeLoc>asList(loc290, loc309), false, true, 1, 1, 1)}); tests.add(new Object[]{new ConsensusCreationTest(Arrays.<GenomeLoc>asList(loc290, loc309), false, true, 2, 2, 2)});
tests.add(new Object[]{new ConsensusCreationTest(Arrays.<GenomeLoc>asList(loc290, loc310), false, true, 1, 1, 1)}); tests.add(new Object[]{new ConsensusCreationTest(Arrays.<GenomeLoc>asList(loc290, loc310), false, true, 2, 2, 2)});
// test mixture // test mixture
tests.add(new Object[]{new ConsensusCreationTest(Arrays.<GenomeLoc>asList(loc1100), true, false, 2, 2, 2)}); tests.add(new Object[]{new ConsensusCreationTest(Arrays.<GenomeLoc>asList(loc1100), true, false, 3, 3, 3)});
tests.add(new Object[]{new ConsensusCreationTest(Arrays.<GenomeLoc>asList(loc1100), false, true, 1, 1, 1)}); tests.add(new Object[]{new ConsensusCreationTest(Arrays.<GenomeLoc>asList(loc1100), false, true, 2, 2, 2)});
// test I/D operators // test I/D operators
tests.add(new Object[]{new ConsensusCreationTest(Arrays.<GenomeLoc>asList(loc290), CigarOperator.D, 9, 9, 2 + (7 * DEEP_COVERAGE_ITERATIONS))}); tests.add(new Object[]{new ConsensusCreationTest(Arrays.<GenomeLoc>asList(loc290), CigarOperator.D, 11, 11, 4 + (7 * DEEP_COVERAGE_ITERATIONS))});
tests.add(new Object[]{new ConsensusCreationTest(Arrays.<GenomeLoc>asList(loc290, loc295), CigarOperator.D, 10, 10, 2 + (8 * DEEP_COVERAGE_ITERATIONS))}); tests.add(new Object[]{new ConsensusCreationTest(Arrays.<GenomeLoc>asList(loc290, loc295), CigarOperator.D, 12, 12, 4 + (8 * DEEP_COVERAGE_ITERATIONS))});
tests.add(new Object[]{new ConsensusCreationTest(Arrays.<GenomeLoc>asList(loc290, loc309), CigarOperator.D, 10, 10, 2 + (8 * DEEP_COVERAGE_ITERATIONS))}); tests.add(new Object[]{new ConsensusCreationTest(Arrays.<GenomeLoc>asList(loc290, loc309), CigarOperator.D, 12, 12, 4 + (8 * DEEP_COVERAGE_ITERATIONS))});
tests.add(new Object[]{new ConsensusCreationTest(Arrays.<GenomeLoc>asList(loc290, loc310), CigarOperator.D, 11, 11, 2 + (9 * DEEP_COVERAGE_ITERATIONS))}); tests.add(new Object[]{new ConsensusCreationTest(Arrays.<GenomeLoc>asList(loc290, loc310), CigarOperator.D, 13, 13, 4 + (9 * DEEP_COVERAGE_ITERATIONS))});
tests.add(new Object[]{new ConsensusCreationTest(Arrays.<GenomeLoc>asList(loc290), CigarOperator.I, 9, 9, 2 + (7 * DEEP_COVERAGE_ITERATIONS))}); tests.add(new Object[]{new ConsensusCreationTest(Arrays.<GenomeLoc>asList(loc290), CigarOperator.I, 11, 11, 4 + (7 * DEEP_COVERAGE_ITERATIONS))});
tests.add(new Object[]{new ConsensusCreationTest(Arrays.<GenomeLoc>asList(loc290, loc295), CigarOperator.I, 10, 10, 2 + (8 * DEEP_COVERAGE_ITERATIONS))}); tests.add(new Object[]{new ConsensusCreationTest(Arrays.<GenomeLoc>asList(loc290, loc295), CigarOperator.I, 12, 12, 4 + (8 * DEEP_COVERAGE_ITERATIONS))});
tests.add(new Object[]{new ConsensusCreationTest(Arrays.<GenomeLoc>asList(loc290, loc309), CigarOperator.I, 10, 10, 2 + (8 * DEEP_COVERAGE_ITERATIONS))}); tests.add(new Object[]{new ConsensusCreationTest(Arrays.<GenomeLoc>asList(loc290, loc309), CigarOperator.I, 12, 12, 4 + (8 * DEEP_COVERAGE_ITERATIONS))});
tests.add(new Object[]{new ConsensusCreationTest(Arrays.<GenomeLoc>asList(loc290, loc310), CigarOperator.I, 11, 11, 2 + (9 * DEEP_COVERAGE_ITERATIONS))}); tests.add(new Object[]{new ConsensusCreationTest(Arrays.<GenomeLoc>asList(loc290, loc310), CigarOperator.I, 13, 13, 4 + (9 * DEEP_COVERAGE_ITERATIONS))});
return tests.toArray(new Object[][]{}); return tests.toArray(new Object[][]{});
} }
@ -517,6 +517,39 @@ public class SlidingWindowUnitTest extends BaseTest {
Assert.assertEquals(result.getFirst().size(), totalNumReads); // no compression at all Assert.assertEquals(result.getFirst().size(), totalNumReads); // no compression at all
} }
@Test
public void testConsensusCreationForInsertions() {
final int totalNumReads = 7;
final ObjectList<GATKSAMRecord> myReads = new ObjectArrayList<>(totalNumReads);
// add reads, one with a SNP and one with a SNP and insertion
for ( int i = 0; i < totalNumReads; i++ ) {
final GATKSAMRecord read = ArtificialSAMUtils.createArtificialRead(header, "basicRead" + i, 0, globalStartPosition, readLength);
read.setBaseQualities(Utils.dupBytes((byte)30, readLength));
read.setMappingQuality(30);
read.setReadNegativeStrandFlag(false);
final byte[] bases = Utils.dupBytes((byte) 'A', readLength);
if ( i < 2 )
bases[20] = 'C';
if ( i == 0 )
bases[80] = 'C';
read.setReadBases(bases);
if ( i == 0 )
read.setCigarString("80M1I19M");
myReads.add(read);
}
final SlidingWindow slidingWindow = new SlidingWindow("1", 0, 10, header, new GATKSAMReadGroupRecord("test"), 0, 0.1, 0.05, 0.05, 20, 20, 100, ReduceReads.DownsampleStrategy.Normal, false);
for ( final GATKSAMRecord read : myReads )
slidingWindow.addRead(read);
final Pair<ObjectSet<GATKSAMRecord>, CompressionStash> result = slidingWindow.close(null);
Assert.assertEquals(result.getFirst().size(), 3); // no compression at all for SNPs
}
@Test @Test
public void testAddingReadPairWithSameCoordinates() { public void testAddingReadPairWithSameCoordinates() {
final SlidingWindow slidingWindow = new SlidingWindow("1", 0, 10); final SlidingWindow slidingWindow = new SlidingWindow("1", 0, 10);
@ -739,21 +772,22 @@ public class SlidingWindowUnitTest extends BaseTest {
read.setReadBases(Utils.dupBytes((byte) 'A', readLength)); read.setReadBases(Utils.dupBytes((byte) 'A', readLength));
read.setBaseQualities(Utils.dupBytes((byte)30, readLength)); read.setBaseQualities(Utils.dupBytes((byte)30, readLength));
read.setMappingQuality(30); read.setMappingQuality(30);
read.setReadNegativeStrandFlag(false);
// add the read // add the read
final SlidingWindow slidingWindow = new SlidingWindow("1", 0, 10, header, new GATKSAMReadGroupRecord("test"), 0, 0.05, 0.05, 0.05, 20, 20, 10, ReduceReads.DownsampleStrategy.Normal, false); final SlidingWindow slidingWindow = new SlidingWindow("1", 0, 10, header, new GATKSAMReadGroupRecord("test"), 0, 0.05, 0.05, 0.05, 20, 20, 10, ReduceReads.DownsampleStrategy.Normal, false);
slidingWindow.actuallyUpdateHeaderForRead(windowHeader, read, false, start); slidingWindow.actuallyUpdateHeaderForRead(windowHeader, read, false, start);
for ( int i = 0; i < start; i++ ) for ( int i = 0; i < start; i++ )
Assert.assertEquals(windowHeader.get(i).getConsensusBaseCounts().countOfBase(BaseUtils.Base.A.base), 0); Assert.assertEquals(windowHeader.get(i).getBaseCounts(SlidingWindow.ConsensusType.POSITIVE_CONSENSUS).countOfBase(BaseUtils.Base.A.base), 0);
for ( int i = 0; i < readLength; i++ ) for ( int i = 0; i < readLength; i++ )
Assert.assertEquals(windowHeader.get(start + i).getConsensusBaseCounts().countOfBase(BaseUtils.Base.A.base), 1); Assert.assertEquals(windowHeader.get(start + i).getBaseCounts(SlidingWindow.ConsensusType.POSITIVE_CONSENSUS).countOfBase(BaseUtils.Base.A.base), 1);
for ( int i = start + readLength; i < currentHeaderLength; i++ ) for ( int i = start + readLength; i < currentHeaderLength; i++ )
Assert.assertEquals(windowHeader.get(i).getConsensusBaseCounts().countOfBase(BaseUtils.Base.A.base), 0); Assert.assertEquals(windowHeader.get(i).getBaseCounts(SlidingWindow.ConsensusType.POSITIVE_CONSENSUS).countOfBase(BaseUtils.Base.A.base), 0);
// now remove the read // now remove the read
slidingWindow.actuallyUpdateHeaderForRead(windowHeader, read, true, start); slidingWindow.actuallyUpdateHeaderForRead(windowHeader, read, true, start);
for ( int i = 0; i < currentHeaderLength; i++ ) for ( int i = 0; i < currentHeaderLength; i++ )
Assert.assertEquals(windowHeader.get(i).getConsensusBaseCounts().countOfBase(BaseUtils.Base.A.base), 0); Assert.assertEquals(windowHeader.get(i).getBaseCounts(SlidingWindow.ConsensusType.POSITIVE_CONSENSUS).countOfBase(BaseUtils.Base.A.base), 0);
} }
////////////////////////////////////////////////////////////////////////////////// //////////////////////////////////////////////////////////////////////////////////