Fully stranded implementation of RR (plus bug fix for insertions and het compression).
Now only filtered reads are unstranded. All consensus reads have strand, so that we emit 2 consensus reads in general now: one for each strand. This involved some refactoring of the sliding window which cleaned it up a lot. Also included is a bug fix: insertions downstream of a variant region weren't triggering a stop to the compression.
This commit is contained in:
parent
e5aab22680
commit
5dfa863caa
|
|
@ -59,7 +59,8 @@ import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
|
|||
* out due to mapping or base quality.
|
||||
*/
|
||||
public class HeaderElement {
|
||||
private BaseAndQualsCounts consensusBaseCounts; // How many A,C,G,T (and D's) are in this site.
|
||||
private BaseAndQualsCounts positiveConsensusBaseCounts; // How many A,C,G,T (and D's) are in this site.
|
||||
private BaseAndQualsCounts negativeConsensusBaseCounts; // How many A,C,G,T (and D's) are in this site.
|
||||
private BaseAndQualsCounts filteredBaseCounts; // How many A,C,G,T (and D's) were filtered out in this site.
|
||||
private int insertionsToTheRight; // How many reads in this site had insertions to the immediate right
|
||||
private int location; // Genome location of this site (the sliding window knows which contig we're at
|
||||
|
|
@ -70,14 +71,20 @@ public class HeaderElement {
|
|||
return location;
|
||||
}
|
||||
|
||||
public BaseAndQualsCounts getFilteredBaseCounts() {
|
||||
/**
|
||||
* Get the base counts object for the consensus type
|
||||
*
|
||||
* @param consensusType the type to use
|
||||
* @return non-null base counts
|
||||
*/
|
||||
public BaseAndQualsCounts getBaseCounts(final SlidingWindow.ConsensusType consensusType) {
|
||||
if ( consensusType == SlidingWindow.ConsensusType.POSITIVE_CONSENSUS )
|
||||
return positiveConsensusBaseCounts;
|
||||
if ( consensusType == SlidingWindow.ConsensusType.NEGATIVE_CONSENSUS )
|
||||
return negativeConsensusBaseCounts;
|
||||
return filteredBaseCounts;
|
||||
}
|
||||
|
||||
public BaseAndQualsCounts getConsensusBaseCounts() {
|
||||
return consensusBaseCounts;
|
||||
}
|
||||
|
||||
/**
|
||||
* Creates a new HeaderElement with the following default values: - empty consensusBaseCounts - empty
|
||||
* filteredBaseCounts - 0 insertions to the right - empty mappingQuality list
|
||||
|
|
@ -85,7 +92,7 @@ public class HeaderElement {
|
|||
* @param location the reference location for the new element
|
||||
*/
|
||||
public HeaderElement(final int location) {
|
||||
this(new BaseAndQualsCounts(), new BaseAndQualsCounts(), 0, location);
|
||||
this(new BaseAndQualsCounts(), new BaseAndQualsCounts(), new BaseAndQualsCounts(), 0, location);
|
||||
}
|
||||
|
||||
/**
|
||||
|
|
@ -95,20 +102,22 @@ public class HeaderElement {
|
|||
* @param location the reference location for the new element
|
||||
*/
|
||||
public HeaderElement(final int location, final int insertionsToTheRight) {
|
||||
this(new BaseAndQualsCounts(), new BaseAndQualsCounts(), insertionsToTheRight, location);
|
||||
this(new BaseAndQualsCounts(), new BaseAndQualsCounts(), new BaseAndQualsCounts(), insertionsToTheRight, location);
|
||||
}
|
||||
|
||||
/**
|
||||
* Creates a new HeaderElement with all given parameters
|
||||
*
|
||||
* @param consensusBaseCounts the BaseCounts object for the running consensus synthetic read
|
||||
* @param positiveConsensusBaseCounts the BaseCounts object for the running positive consensus synthetic read
|
||||
* @param negativeConsensusBaseCounts the BaseCounts object for the running negative consensus synthetic read
|
||||
* @param filteredBaseCounts the BaseCounts object for the filtered data synthetic read
|
||||
* @param insertionsToTheRight number of insertions to the right of this HeaderElement
|
||||
* @param location the reference location of this reference element
|
||||
* HeaderElement
|
||||
*/
|
||||
public HeaderElement(BaseAndQualsCounts consensusBaseCounts, BaseAndQualsCounts filteredBaseCounts, int insertionsToTheRight, int location) {
|
||||
this.consensusBaseCounts = consensusBaseCounts;
|
||||
public HeaderElement(final BaseAndQualsCounts positiveConsensusBaseCounts, final BaseAndQualsCounts negativeConsensusBaseCounts, final BaseAndQualsCounts filteredBaseCounts, final int insertionsToTheRight, final int location) {
|
||||
this.positiveConsensusBaseCounts = positiveConsensusBaseCounts;
|
||||
this.negativeConsensusBaseCounts = negativeConsensusBaseCounts;
|
||||
this.filteredBaseCounts = filteredBaseCounts;
|
||||
this.insertionsToTheRight = insertionsToTheRight;
|
||||
this.location = location;
|
||||
|
|
@ -124,7 +133,8 @@ public class HeaderElement {
|
|||
* @return true if site is variant by any definition. False otherwise.
|
||||
*/
|
||||
public boolean isVariant(final double minVariantPvalue, final double minVariantProportion, final double minIndelProportion) {
|
||||
return hasConsensusData() && (isVariantFromInsertions(minIndelProportion) || isVariantFromMismatches(minVariantPvalue, minVariantProportion) || isVariantFromDeletions(minIndelProportion) || isVariantFromSoftClips());
|
||||
return ( hasConsensusData(SlidingWindow.ConsensusType.POSITIVE_CONSENSUS) || hasConsensusData(SlidingWindow.ConsensusType.NEGATIVE_CONSENSUS) )
|
||||
&& (isVariantFromInsertions(minIndelProportion) || isVariantFromMismatches(minVariantPvalue, minVariantProportion) || isVariantFromDeletions(minIndelProportion) || isVariantFromSoftClips());
|
||||
}
|
||||
|
||||
/**
|
||||
|
|
@ -138,13 +148,18 @@ public class HeaderElement {
|
|||
* @param minBaseQual the minimum base qual allowed to be a good base
|
||||
* @param minMappingQual the minimum mapping qual allowed to be a good read
|
||||
* @param isSoftClipped true if the base is soft-clipped in the original read
|
||||
* @param isNegativeStrand true if the base comes from a read on the negative strand
|
||||
*/
|
||||
public void addBase(byte base, byte baseQual, byte insQual, byte delQual, int baseMappingQuality, int minBaseQual, int minMappingQual, boolean isSoftClipped) {
|
||||
public void addBase(final byte base, final byte baseQual, final byte insQual, final byte delQual, final int baseMappingQuality, final int minBaseQual, final int minMappingQual, final boolean isSoftClipped, final boolean isNegativeStrand) {
|
||||
// If the base passes the MQ filter it is included in the consensus base counts, otherwise it's part of the filtered counts
|
||||
if ( baseMappingQuality >= minMappingQual )
|
||||
consensusBaseCounts.incr(base, baseQual, insQual, delQual, baseMappingQuality, baseQual < minBaseQual, isSoftClipped);
|
||||
else
|
||||
if ( baseMappingQuality >= minMappingQual ) {
|
||||
if ( isNegativeStrand )
|
||||
negativeConsensusBaseCounts.incr(base, baseQual, insQual, delQual, baseMappingQuality, baseQual < minBaseQual, isSoftClipped);
|
||||
else
|
||||
positiveConsensusBaseCounts.incr(base, baseQual, insQual, delQual, baseMappingQuality, baseQual < minBaseQual, isSoftClipped);
|
||||
} else {
|
||||
filteredBaseCounts.incr(base, baseQual, insQual, delQual, baseMappingQuality, baseQual < minBaseQual);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
|
|
@ -158,14 +173,20 @@ public class HeaderElement {
|
|||
* @param minBaseQual the minimum base qual allowed to be a good base
|
||||
* @param minMappingQual the minimum mapping qual allowed to be a good read
|
||||
* @param isSoftClipped true if the base is soft-clipped in the original read
|
||||
* @param isNegativeStrand true if the base comes from a read on the negative strand
|
||||
*/
|
||||
public void removeBase(byte base, byte baseQual, byte insQual, byte delQual, int baseMappingQuality, int minBaseQual, int minMappingQual, boolean isSoftClipped) {
|
||||
public void removeBase(final byte base, final byte baseQual, final byte insQual, final byte delQual, final int baseMappingQuality, final int minBaseQual, final int minMappingQual, final boolean isSoftClipped, final boolean isNegativeStrand) {
|
||||
// If the base passes the MQ filter it is included in the consensus base counts, otherwise it's part of the filtered counts
|
||||
if ( baseMappingQuality >= minMappingQual )
|
||||
consensusBaseCounts.decr(base, baseQual, insQual, delQual, baseMappingQuality, baseQual < minBaseQual, isSoftClipped);
|
||||
else
|
||||
if ( baseMappingQuality >= minMappingQual ) {
|
||||
if ( isNegativeStrand )
|
||||
negativeConsensusBaseCounts.decr(base, baseQual, insQual, delQual, baseMappingQuality, baseQual < minBaseQual, isSoftClipped);
|
||||
else
|
||||
positiveConsensusBaseCounts.decr(base, baseQual, insQual, delQual, baseMappingQuality, baseQual < minBaseQual, isSoftClipped);
|
||||
} else {
|
||||
filteredBaseCounts.decr(base, baseQual, insQual, delQual, baseMappingQuality, baseQual < minBaseQual);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Adds an insertions to the right of the HeaderElement and updates all counts accordingly. All insertions
|
||||
* should be added to the right of the element.
|
||||
|
|
@ -177,19 +198,11 @@ public class HeaderElement {
|
|||
/**
|
||||
* Does this HeaderElement contain consensus data?
|
||||
*
|
||||
* @param consensusType the type to use
|
||||
* @return whether or not this HeaderElement contains consensus data
|
||||
*/
|
||||
public boolean hasConsensusData() {
|
||||
return consensusBaseCounts.totalCount() > 0;
|
||||
}
|
||||
|
||||
/**
|
||||
* Does this HeaderElement contain filtered data?
|
||||
*
|
||||
* @return whether or not this HeaderElement contains filtered data
|
||||
*/
|
||||
public boolean hasFilteredData() {
|
||||
return filteredBaseCounts.totalCount() > 0;
|
||||
public boolean hasConsensusData(final SlidingWindow.ConsensusType consensusType) {
|
||||
return getBaseCounts(consensusType).totalCount() > 0;
|
||||
}
|
||||
|
||||
/**
|
||||
|
|
@ -198,7 +211,7 @@ public class HeaderElement {
|
|||
* @return whether or not this HeaderElement has no data
|
||||
*/
|
||||
public boolean isEmpty() {
|
||||
return (!hasFilteredData() && !hasConsensusData());
|
||||
return !hasConsensusData(SlidingWindow.ConsensusType.POSITIVE_CONSENSUS) && !hasConsensusData(SlidingWindow.ConsensusType.NEGATIVE_CONSENSUS) && !hasConsensusData(SlidingWindow.ConsensusType.FILTERED);
|
||||
}
|
||||
|
||||
/**
|
||||
|
|
@ -224,7 +237,7 @@ public class HeaderElement {
|
|||
* @return whether or not the HeaderElement is variant due to excess insertions
|
||||
*/
|
||||
private boolean isVariantFromInsertions(double minIndelProportion) {
|
||||
final int numberOfBases = consensusBaseCounts.totalCount();
|
||||
final int numberOfBases = totalCountForBothStrands();
|
||||
if (numberOfBases == 0)
|
||||
return (insertionsToTheRight > 0); // do we only have insertions?
|
||||
|
||||
|
|
@ -232,13 +245,18 @@ public class HeaderElement {
|
|||
return ((double) insertionsToTheRight / numberOfBases) > minIndelProportion;
|
||||
}
|
||||
|
||||
private int totalCountForBothStrands() {
|
||||
return positiveConsensusBaseCounts.totalCount() + negativeConsensusBaseCounts.totalCount();
|
||||
}
|
||||
|
||||
/**
|
||||
* Whether or not the HeaderElement is variant due to excess deletions
|
||||
*
|
||||
* @return whether or not the HeaderElement is variant due to excess deletions
|
||||
*/
|
||||
private boolean isVariantFromDeletions(double minIndelProportion) {
|
||||
return consensusBaseCounts.baseIndexWithMostCounts() == BaseIndex.D || consensusBaseCounts.baseCountProportion(BaseIndex.D) > minIndelProportion;
|
||||
return positiveConsensusBaseCounts.baseIndexWithMostCounts() == BaseIndex.D || positiveConsensusBaseCounts.baseCountProportion(BaseIndex.D) > minIndelProportion
|
||||
|| negativeConsensusBaseCounts.baseIndexWithMostCounts() == BaseIndex.D || negativeConsensusBaseCounts.baseCountProportion(BaseIndex.D) > minIndelProportion;
|
||||
}
|
||||
|
||||
/**
|
||||
|
|
@ -249,9 +267,23 @@ public class HeaderElement {
|
|||
* @return whether or not the HeaderElement is variant due to excess mismatches
|
||||
*/
|
||||
protected boolean isVariantFromMismatches(final double minVariantPvalue, final double minVariantProportion) {
|
||||
final int totalCount = consensusBaseCounts.totalCountWithoutIndels();
|
||||
final BaseIndex mostCommon = consensusBaseCounts.baseIndexWithMostProbabilityWithoutIndels();
|
||||
final int countOfOtherBases = totalCount - consensusBaseCounts.countOfBase(mostCommon);
|
||||
return isVariantFromMismatches(minVariantPvalue, minVariantProportion, SlidingWindow.ConsensusType.POSITIVE_CONSENSUS) ||
|
||||
isVariantFromMismatches(minVariantPvalue, minVariantProportion, SlidingWindow.ConsensusType.NEGATIVE_CONSENSUS);
|
||||
}
|
||||
|
||||
/**
|
||||
* Whether or not the HeaderElement is variant due to excess mismatches
|
||||
*
|
||||
* @param minVariantPvalue the minimum pvalue to call a site variant (used with low coverage).
|
||||
* @param minVariantProportion the minimum proportion to call a site variant (used with high coverage).
|
||||
* @param consensusType the consensus type to use
|
||||
* @return whether or not the HeaderElement is variant due to excess mismatches
|
||||
*/
|
||||
private boolean isVariantFromMismatches(final double minVariantPvalue, final double minVariantProportion, final SlidingWindow.ConsensusType consensusType) {
|
||||
final BaseAndQualsCounts baseAndQualsCounts = getBaseCounts(consensusType);
|
||||
final int totalCount = baseAndQualsCounts.totalCountWithoutIndels();
|
||||
final BaseIndex mostCommon = baseAndQualsCounts.baseIndexWithMostProbabilityWithoutIndels();
|
||||
final int countOfOtherBases = totalCount - baseAndQualsCounts.countOfBase(mostCommon);
|
||||
return hasSignificantCount(countOfOtherBases, totalCount, minVariantPvalue, minVariantProportion);
|
||||
}
|
||||
|
||||
|
|
@ -262,8 +294,20 @@ public class HeaderElement {
|
|||
* @return true if we had more soft clipped bases contributing to this site than matches/mismatches.
|
||||
*/
|
||||
protected boolean isVariantFromSoftClips() {
|
||||
final int nSoftClippedBases = consensusBaseCounts.nSoftclips();
|
||||
return nSoftClippedBases > 0 && nSoftClippedBases >= (consensusBaseCounts.totalCount() - nSoftClippedBases);
|
||||
return isVariantFromSoftClips(SlidingWindow.ConsensusType.POSITIVE_CONSENSUS) || isVariantFromSoftClips(SlidingWindow.ConsensusType.NEGATIVE_CONSENSUS);
|
||||
}
|
||||
|
||||
/**
|
||||
* This handles the special case where we have more bases that came from soft clips than bases that came from
|
||||
* normal bases by forcing it to become a variant region. We don't want a consensus based on too little information.
|
||||
*
|
||||
* @param consensusType the consensus type to use
|
||||
* @return true if we had more soft clipped bases contributing to this site than matches/mismatches.
|
||||
*/
|
||||
private boolean isVariantFromSoftClips(final SlidingWindow.ConsensusType consensusType) {
|
||||
final BaseAndQualsCounts baseAndQualsCounts = getBaseCounts(consensusType);
|
||||
final int nSoftClippedBases = baseAndQualsCounts.nSoftclips();
|
||||
return nSoftClippedBases > 0 && nSoftClippedBases >= (baseAndQualsCounts.totalCount() - nSoftClippedBases);
|
||||
}
|
||||
|
||||
/**
|
||||
|
|
@ -287,9 +331,9 @@ public class HeaderElement {
|
|||
*/
|
||||
public ObjectArrayList<BaseIndex> getAlleles(final double minVariantPvalue, final double minVariantProportion) {
|
||||
// make sure we have bases at all
|
||||
final int totalBaseCount = consensusBaseCounts.totalCount();
|
||||
final int totalBaseCount = totalCountForBothStrands();
|
||||
if ( totalBaseCount == 0 )
|
||||
return new ObjectArrayList<BaseIndex>(0);
|
||||
return new ObjectArrayList<>(0);
|
||||
|
||||
// next, check for insertions; technically, the insertion count can be greater than totalBaseCount
|
||||
// (because of the way insertions are counted), so we need to account for that
|
||||
|
|
@ -297,9 +341,9 @@ public class HeaderElement {
|
|||
return null;
|
||||
|
||||
// finally, check for the bases themselves (including deletions)
|
||||
final ObjectArrayList<BaseIndex> alleles = new ObjectArrayList<BaseIndex>(4);
|
||||
final ObjectArrayList<BaseIndex> alleles = new ObjectArrayList<>(4);
|
||||
for ( final BaseIndex base : BaseIndex.values() ) {
|
||||
final int baseCount = consensusBaseCounts.countOfBase(base);
|
||||
final int baseCount = positiveConsensusBaseCounts.countOfBase(base) + negativeConsensusBaseCounts.countOfBase(base);
|
||||
if ( baseCount == 0 )
|
||||
continue;
|
||||
|
||||
|
|
@ -320,7 +364,7 @@ public class HeaderElement {
|
|||
* @return true if there are significant softclips, false otherwise
|
||||
*/
|
||||
public boolean hasSignificantSoftclips(final double minVariantPvalue, final double minVariantProportion) {
|
||||
return hasSignificantCount(consensusBaseCounts.nSoftclips(), consensusBaseCounts.totalCount(), minVariantPvalue, minVariantProportion);
|
||||
return hasSignificantCount(positiveConsensusBaseCounts.nSoftclips() + negativeConsensusBaseCounts.nSoftclips(), totalCountForBothStrands(), minVariantPvalue, minVariantProportion);
|
||||
}
|
||||
|
||||
/*
|
||||
|
|
|
|||
|
|
@ -59,7 +59,6 @@ import org.broadinstitute.sting.utils.BaseUtils;
|
|||
import org.broadinstitute.sting.utils.GenomeLoc;
|
||||
import org.broadinstitute.sting.utils.UnvalidatingGenomeLoc;
|
||||
import org.broadinstitute.sting.utils.collections.Pair;
|
||||
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
|
||||
import org.broadinstitute.sting.utils.sam.AlignmentStartWithNoTiesComparator;
|
||||
import org.broadinstitute.sting.utils.sam.GATKSAMReadGroupRecord;
|
||||
import org.broadinstitute.sting.utils.sam.GATKSAMRecord;
|
||||
|
|
@ -87,12 +86,10 @@ public class SlidingWindow {
|
|||
protected int downsampleCoverage;
|
||||
|
||||
// Running consensus data
|
||||
protected SyntheticRead runningConsensus;
|
||||
protected int consensusCounter;
|
||||
protected String consensusReadName;
|
||||
|
||||
// Filtered Data Consensus data
|
||||
protected SyntheticRead filteredDataConsensus;
|
||||
protected int filteredDataConsensusCounter;
|
||||
protected String filteredDataReadName;
|
||||
|
||||
|
|
@ -109,12 +106,12 @@ public class SlidingWindow {
|
|||
private static CompressionStash emptyRegions = new CompressionStash();
|
||||
|
||||
/**
|
||||
* The types of synthetic reads to use in the finalizeAndAdd method
|
||||
* The types of synthetic reads
|
||||
*/
|
||||
private enum ConsensusType {
|
||||
CONSENSUS,
|
||||
FILTERED,
|
||||
BOTH
|
||||
protected enum ConsensusType {
|
||||
POSITIVE_CONSENSUS,
|
||||
NEGATIVE_CONSENSUS,
|
||||
FILTERED
|
||||
}
|
||||
|
||||
public int getStopLocation() {
|
||||
|
|
@ -144,9 +141,9 @@ public class SlidingWindow {
|
|||
|
||||
contextSize = 10;
|
||||
|
||||
this.windowHeader = new LinkedList<HeaderElement>();
|
||||
this.windowHeader = new LinkedList<>();
|
||||
windowHeader.addFirst(new HeaderElement(startLocation));
|
||||
this.readsInWindow = new PriorityQueue<GATKSAMRecord>(100, new Comparator<GATKSAMRecord>() {
|
||||
this.readsInWindow = new PriorityQueue<>(100, new Comparator<GATKSAMRecord>() {
|
||||
@Override
|
||||
public int compare(GATKSAMRecord read1, GATKSAMRecord read2) {
|
||||
return read1.getSoftEnd() - read2.getSoftEnd();
|
||||
|
|
@ -168,8 +165,8 @@ public class SlidingWindow {
|
|||
this.MIN_BASE_QUAL_TO_COUNT = minBaseQual;
|
||||
this.MIN_MAPPING_QUALITY = minMappingQuality;
|
||||
|
||||
this.windowHeader = new LinkedList<HeaderElement>();
|
||||
this.readsInWindow = new PriorityQueue<GATKSAMRecord>(1000, new Comparator<GATKSAMRecord>() {
|
||||
this.windowHeader = new LinkedList<>();
|
||||
this.readsInWindow = new PriorityQueue<>(1000, new Comparator<GATKSAMRecord>() {
|
||||
@Override
|
||||
public int compare(GATKSAMRecord read1, GATKSAMRecord read2) {
|
||||
return read1.getSoftEnd() - read2.getSoftEnd();
|
||||
|
|
@ -187,9 +184,6 @@ public class SlidingWindow {
|
|||
this.filteredDataConsensusCounter = 0;
|
||||
this.filteredDataReadName = "Filtered-" + windowNumber + "-";
|
||||
|
||||
this.runningConsensus = null;
|
||||
this.filteredDataConsensus = null;
|
||||
|
||||
this.downsampleStrategy = downsampleStrategy;
|
||||
this.hasIndelQualities = hasIndelQualities;
|
||||
}
|
||||
|
|
@ -209,7 +203,9 @@ public class SlidingWindow {
|
|||
@Ensures("result != null")
|
||||
public CompressionStash addRead(GATKSAMRecord read) {
|
||||
addToHeader(windowHeader, read); // update the window header counts
|
||||
readsInWindow.add(read); // add read to sliding reads
|
||||
// no need to track low mapping quality reads
|
||||
if ( read.getMappingQuality() >= MIN_MAPPING_QUALITY )
|
||||
readsInWindow.add(read); // add read to sliding reads
|
||||
return slideWindow(read.getUnclippedStart());
|
||||
}
|
||||
|
||||
|
|
@ -296,7 +292,7 @@ public class SlidingWindow {
|
|||
}
|
||||
|
||||
while (!readsInWindow.isEmpty() && readsInWindow.peek().getSoftEnd() < windowHeaderStartLocation) {
|
||||
readsInWindow.poll();
|
||||
readsInWindow.poll();
|
||||
}
|
||||
|
||||
return regions;
|
||||
|
|
@ -413,280 +409,83 @@ public class SlidingWindow {
|
|||
*
|
||||
* If adding a sequence with gaps, it will finalize multiple consensus reads and keep the last running consensus
|
||||
*
|
||||
* @param header the window header
|
||||
* @param header the header to use
|
||||
* @param start the first header index to add to consensus
|
||||
* @param end the first header index NOT TO add to consensus
|
||||
* @param strandType the strandedness that the synthetic read should be represented as having
|
||||
* @param consensusType the consensus type to use
|
||||
* @return a non-null list of consensus reads generated by this call. Empty list if no consensus was generated.
|
||||
*/
|
||||
@Requires({"start >= 0 && (end >= start || end == 0)"})
|
||||
@Ensures("result != null")
|
||||
protected ObjectArrayList<GATKSAMRecord> addToSyntheticReads(final LinkedList<HeaderElement> header, final int start, final int end, final SyntheticRead.StrandType strandType) {
|
||||
final ObjectArrayList<GATKSAMRecord> reads = new ObjectArrayList<GATKSAMRecord>();
|
||||
protected ObjectArrayList<GATKSAMRecord> addToSyntheticReads(final LinkedList<HeaderElement> header, final int start, final int end, final ConsensusType consensusType) {
|
||||
final ObjectArrayList<GATKSAMRecord> reads = new ObjectArrayList<>();
|
||||
|
||||
if ( start < end ) {
|
||||
final ListIterator<HeaderElement> headerElementIterator = header.listIterator(start);
|
||||
SyntheticRead consensus = null;
|
||||
final ListIterator<HeaderElement> headerElementIterator = header.listIterator(start);
|
||||
boolean wasInConsensus = false;
|
||||
|
||||
if (!headerElementIterator.hasNext())
|
||||
throw new ReviewedStingException(String.format("Requested to add to synthetic reads a region that contains no header element at index: %d - %d / %d", start, header.size(), end));
|
||||
for ( int currentPosition = start; currentPosition < end; currentPosition++ ) {
|
||||
|
||||
HeaderElement headerElement = headerElementIterator.next();
|
||||
if ( ! headerElementIterator.hasNext() )
|
||||
throw new IllegalStateException(String.format("Requested to add to synthetic reads a region that contains no header element at index: %d - %d / %d", start, windowHeader.size(), end));
|
||||
final HeaderElement headerElement = headerElementIterator.next();
|
||||
|
||||
if (headerElement.hasConsensusData()) {
|
||||
if ( headerElement.hasConsensusData(consensusType) ) {
|
||||
wasInConsensus = true;
|
||||
|
||||
// find the end of the consecutive consensus data in the window
|
||||
final int endOfConsensus = findNextNonConsensusElement(header, start, end);
|
||||
if (endOfConsensus <= start)
|
||||
throw new ReviewedStingException(String.format("next start is <= current start: (%d <= %d)", endOfConsensus, start));
|
||||
// add to running consensus
|
||||
if ( consensus == null )
|
||||
consensus = createNewConsensus(consensusType, headerElement.getLocation());
|
||||
|
||||
// add to running consensus and recurse
|
||||
addToRunningConsensus(header, start, endOfConsensus, strandType);
|
||||
reads.addAll(addToSyntheticReads(header, endOfConsensus, end, strandType));
|
||||
genericAddBaseToConsensus(consensus, headerElement.getBaseCounts(consensusType));
|
||||
|
||||
} else {
|
||||
|
||||
// add any outstanding consensus data
|
||||
reads.addAll(finalizeAndAdd(ConsensusType.CONSENSUS));
|
||||
if ( wasInConsensus ) {
|
||||
reads.addAll(finalizeAndAdd(consensus, consensusType));
|
||||
consensus = null;
|
||||
}
|
||||
|
||||
// find the end of the consecutive empty data in the window
|
||||
final int endOfEmptyData = findNextConsensusElement(header, start, end);
|
||||
if (endOfEmptyData <= start)
|
||||
throw new ReviewedStingException(String.format("next start is <= current start: (%d <= %d)", endOfEmptyData, start));
|
||||
|
||||
// recurse out of the empty region
|
||||
reads.addAll(addToSyntheticReads(header, endOfEmptyData, end, strandType));
|
||||
wasInConsensus = false;
|
||||
}
|
||||
}
|
||||
|
||||
// add any outstanding consensus data
|
||||
reads.addAll(finalizeAndAdd(consensus, consensusType));
|
||||
|
||||
return reads;
|
||||
}
|
||||
|
||||
private SyntheticRead createNewConsensus(final ConsensusType consensusType, final int start) {
|
||||
if ( consensusType == ConsensusType.FILTERED )
|
||||
return new SyntheticRead(samHeader, readGroupAttribute, contig, contigIndex, filteredDataReadName + filteredDataConsensusCounter++, start, hasIndelQualities, SyntheticRead.StrandType.STRANDLESS);
|
||||
return new SyntheticRead(samHeader, readGroupAttribute, contig, contigIndex, consensusReadName + consensusCounter++, start, hasIndelQualities, consensusType == ConsensusType.POSITIVE_CONSENSUS ? SyntheticRead.StrandType.POSITIVE : SyntheticRead.StrandType.NEGATIVE);
|
||||
}
|
||||
|
||||
/**
|
||||
* Finalizes one or more synthetic reads.
|
||||
* Finalizes a synthetic read.
|
||||
*
|
||||
* @param consensus the consensus to finalize
|
||||
* @param type the synthetic reads you want to close
|
||||
* @return a possibly null list of GATKSAMRecords generated by finalizing the synthetic reads
|
||||
* @return a possibly empty list of GATKSAMRecords generated by finalizing the synthetic reads
|
||||
*/
|
||||
private ObjectArrayList<GATKSAMRecord> finalizeAndAdd(final ConsensusType type) {
|
||||
private ObjectArrayList<GATKSAMRecord> finalizeAndAdd(final SyntheticRead consensus, final ConsensusType type) {
|
||||
|
||||
final ObjectArrayList<GATKSAMRecord> list = new ObjectArrayList<GATKSAMRecord>();
|
||||
final ObjectArrayList<GATKSAMRecord> list = new ObjectArrayList<>();
|
||||
|
||||
if ( type == ConsensusType.CONSENSUS || type == ConsensusType.BOTH ) {
|
||||
final GATKSAMRecord read = finalizeRunningConsensus();
|
||||
if ( read != null )
|
||||
list.add(read);
|
||||
}
|
||||
final GATKSAMRecord read;
|
||||
if ( type == ConsensusType.FILTERED )
|
||||
read = finalizeFilteredDataConsensus(consensus);
|
||||
else
|
||||
read = finalizeRunningConsensus(consensus);
|
||||
|
||||
if ( type == ConsensusType.FILTERED || type == ConsensusType.BOTH ) {
|
||||
final GATKSAMRecord read = finalizeFilteredDataConsensus();
|
||||
if ( read != null )
|
||||
list.add(read);
|
||||
}
|
||||
if ( read != null )
|
||||
list.add(read);
|
||||
|
||||
return list;
|
||||
}
|
||||
|
||||
/**
|
||||
* Looks for the next position without consensus data
|
||||
*
|
||||
* @param header the header to check
|
||||
* @param start beginning of the filtered region
|
||||
* @param upTo limit to search for another consensus element
|
||||
* @return next position in local coordinates (relative to the windowHeader) with consensus data; otherwise, the start position
|
||||
*/
|
||||
private int findNextNonConsensusElement(final LinkedList<HeaderElement> header, final int start, final int upTo) {
|
||||
final Iterator<HeaderElement> headerElementIterator = header.listIterator(start);
|
||||
int index = start;
|
||||
while (index < upTo) {
|
||||
if (!headerElementIterator.hasNext())
|
||||
throw new ReviewedStingException("There are no more header elements in this window");
|
||||
|
||||
if (!headerElementIterator.next().hasConsensusData())
|
||||
break;
|
||||
index++;
|
||||
}
|
||||
return index;
|
||||
}
|
||||
|
||||
/**
|
||||
* Looks for the next position witho consensus data
|
||||
*
|
||||
* @param header the header to check
|
||||
* @param start beginning of the filtered region
|
||||
* @param upTo limit to search for another consensus element
|
||||
* @return next position in local coordinates (relative to the windowHeader) with consensus data; otherwise, the start position
|
||||
*/
|
||||
private int findNextConsensusElement(final LinkedList<HeaderElement> header, final int start, final int upTo) {
|
||||
final Iterator<HeaderElement> headerElementIterator = header.listIterator(start);
|
||||
int index = start;
|
||||
while (index < upTo) {
|
||||
if (!headerElementIterator.hasNext())
|
||||
throw new ReviewedStingException("There are no more header elements in this window");
|
||||
|
||||
if (headerElementIterator.next().hasConsensusData())
|
||||
break;
|
||||
index++;
|
||||
}
|
||||
return index;
|
||||
}
|
||||
|
||||
/**
|
||||
* Adds bases to the filtered data synthetic read.
|
||||
*
|
||||
* Different from the addToConsensus method, this method assumes a contiguous sequence of filteredData
|
||||
* bases.
|
||||
*
|
||||
* @param header the window header
|
||||
* @param start the first header index to add to consensus
|
||||
* @param end the first header index NOT TO add to consensus
|
||||
* @param strandType the strandedness that the synthetic read should be represented as having
|
||||
*/
|
||||
@Requires({"start >= 0 && (end >= start || end == 0)"})
|
||||
private void addToRunningConsensus(final LinkedList<HeaderElement> header, final int start, final int end, final SyntheticRead.StrandType strandType) {
|
||||
if (runningConsensus == null)
|
||||
runningConsensus = new SyntheticRead(samHeader, readGroupAttribute, contig, contigIndex, consensusReadName + consensusCounter++, header.get(start).getLocation(), hasIndelQualities, strandType);
|
||||
|
||||
final Iterator<HeaderElement> headerElementIterator = header.listIterator(start);
|
||||
|
||||
for (int index = start; index < end; index++) {
|
||||
if (!headerElementIterator.hasNext())
|
||||
throw new ReviewedStingException("Requested to create a running consensus synthetic read from " + start + " to " + end + " but " + index + " does not exist");
|
||||
|
||||
final HeaderElement headerElement = headerElementIterator.next();
|
||||
if (!headerElement.hasConsensusData())
|
||||
throw new ReviewedStingException("No CONSENSUS data in " + index);
|
||||
|
||||
genericAddBaseToConsensus(runningConsensus, headerElement.getConsensusBaseCounts());
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Adds bases to the running filtered data accordingly
|
||||
*
|
||||
* If adding a sequence with gaps, it will finalize multiple consensus reads and keep the last running consensus
|
||||
*
|
||||
* @param header the window header
|
||||
* @param start the first header index to add to consensus
|
||||
* @param end the first header index NOT TO add to consensus
|
||||
* @return a non-null list of consensus reads generated by this call. Empty list if no consensus was generated.
|
||||
*/
|
||||
@Requires({"start >= 0 && (end >= start || end == 0)"})
|
||||
@Ensures("result != null")
|
||||
protected ObjectArrayList<GATKSAMRecord> addToFilteredReads(final LinkedList<HeaderElement> header, final int start, final int end) {
|
||||
final ObjectArrayList<GATKSAMRecord> reads = new ObjectArrayList<GATKSAMRecord>();
|
||||
|
||||
if ( start < end ) {
|
||||
final ListIterator<HeaderElement> headerElementIterator = header.listIterator(start);
|
||||
|
||||
if (!headerElementIterator.hasNext())
|
||||
throw new ReviewedStingException(String.format("Requested to add to synthetic reads a region that contains no header element at index: %d - %d / %d", start, header.size(), end));
|
||||
|
||||
HeaderElement headerElement = headerElementIterator.next();
|
||||
|
||||
if (headerElement.hasFilteredData()) {
|
||||
|
||||
// find the end of the consecutive filtered data in the window
|
||||
final int endOfFiltered = findNextNonFilteredElement(header, start, end);
|
||||
if (endOfFiltered <= start)
|
||||
throw new ReviewedStingException(String.format("next start is <= current start: (%d <= %d)", endOfFiltered, start));
|
||||
|
||||
// add to running filtered consensus and recurse
|
||||
addToFilteredData(header, start, endOfFiltered);
|
||||
reads.addAll(addToFilteredReads(header, endOfFiltered, end));
|
||||
|
||||
} else {
|
||||
|
||||
// add any outstanding filtered data
|
||||
reads.addAll(finalizeAndAdd(ConsensusType.FILTERED));
|
||||
|
||||
// find the end of the consecutive empty data in the window
|
||||
final int endOfEmptyData = findNextFilteredElement(header, start, end);
|
||||
if (endOfEmptyData <= start)
|
||||
throw new ReviewedStingException(String.format("next start is <= current start: (%d <= %d)", endOfEmptyData, start));
|
||||
|
||||
// recurse out of the empty region
|
||||
reads.addAll(addToFilteredReads(header, endOfEmptyData, end));
|
||||
}
|
||||
}
|
||||
|
||||
return reads;
|
||||
}
|
||||
|
||||
/**
|
||||
* Looks for the next position without consensus data
|
||||
*
|
||||
* @param header the header to check
|
||||
* @param start beginning of the filtered region
|
||||
* @param upTo limit to search for another consensus element
|
||||
* @return next position in local coordinates (relative to the windowHeader) with consensus data; otherwise, the start position
|
||||
*/
|
||||
private int findNextNonFilteredElement(final LinkedList<HeaderElement> header, final int start, final int upTo) {
|
||||
final Iterator<HeaderElement> headerElementIterator = header.listIterator(start);
|
||||
int index = start;
|
||||
while (index < upTo) {
|
||||
if (!headerElementIterator.hasNext())
|
||||
throw new ReviewedStingException("There are no more header elements in this window");
|
||||
|
||||
if (!headerElementIterator.next().hasFilteredData())
|
||||
break;
|
||||
index++;
|
||||
}
|
||||
return index;
|
||||
}
|
||||
|
||||
/**
|
||||
* Looks for the next position witho consensus data
|
||||
*
|
||||
* @param header the header to check
|
||||
* @param start beginning of the filtered region
|
||||
* @param upTo limit to search for another consensus element
|
||||
* @return next position in local coordinates (relative to the windowHeader) with consensus data; otherwise, the start position
|
||||
*/
|
||||
private int findNextFilteredElement(final LinkedList<HeaderElement> header, final int start, final int upTo) {
|
||||
final Iterator<HeaderElement> headerElementIterator = header.listIterator(start);
|
||||
int index = start;
|
||||
while (index < upTo) {
|
||||
if (!headerElementIterator.hasNext())
|
||||
throw new ReviewedStingException("There are no more header elements in this window");
|
||||
|
||||
if (headerElementIterator.next().hasFilteredData())
|
||||
break;
|
||||
index++;
|
||||
}
|
||||
return index;
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Adds bases to the filtered data synthetic read.
|
||||
*
|
||||
* Different from the addToConsensus method, this method assumes a contiguous sequence of filteredData bases.
|
||||
*
|
||||
* @param header the window header
|
||||
* @param start the first header index to add to consensus
|
||||
* @param end the first header index NOT TO add to consensus
|
||||
*/
|
||||
@Requires({"start >= 0 && (end >= start || end == 0)"})
|
||||
@Ensures("result != null")
|
||||
private void addToFilteredData(final LinkedList<HeaderElement> header, final int start, final int end) {
|
||||
|
||||
if (filteredDataConsensus == null)
|
||||
filteredDataConsensus = new SyntheticRead(samHeader, readGroupAttribute, contig, contigIndex, filteredDataReadName + filteredDataConsensusCounter++, header.get(start).getLocation(), hasIndelQualities, SyntheticRead.StrandType.STRANDLESS);
|
||||
|
||||
ListIterator<HeaderElement> headerElementIterator = header.listIterator(start);
|
||||
for (int index = start; index < end; index++) {
|
||||
if (!headerElementIterator.hasNext())
|
||||
throw new ReviewedStingException("Requested to create a filtered data synthetic read from " + start + " to " + end + " but " + index + " does not exist");
|
||||
|
||||
final HeaderElement headerElement = headerElementIterator.next();
|
||||
|
||||
if (!headerElement.hasFilteredData())
|
||||
throw new ReviewedStingException("No filtered data in " + index);
|
||||
|
||||
genericAddBaseToConsensus(filteredDataConsensus, headerElement.getFilteredBaseCounts());
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Generic accessor to add base and qualities to a synthetic read
|
||||
*
|
||||
|
|
@ -734,7 +533,7 @@ public class SlidingWindow {
|
|||
final int refStart = windowHeader.get(start).getLocation();
|
||||
final int refStop = windowHeader.get(stop).getLocation();
|
||||
|
||||
final ObjectList<GATKSAMRecord> toRemove = new ObjectArrayList<GATKSAMRecord>();
|
||||
final ObjectList<GATKSAMRecord> toRemove = new ObjectArrayList<>();
|
||||
for ( final GATKSAMRecord read : readsInWindow ) {
|
||||
if ( read.getSoftStart() <= refStop ) {
|
||||
if ( read.getAlignmentEnd() >= refStart ) {
|
||||
|
|
@ -814,7 +613,7 @@ public class SlidingWindow {
|
|||
continue;
|
||||
|
||||
if ( headerElement.hasSignificantSoftclips(MIN_ALT_PVALUE_TO_TRIGGER_VARIANT, MIN_ALT_PROPORTION_TO_TRIGGER_VARIANT) ||
|
||||
headerElement.getNumberOfBaseAlleles(MIN_ALT_PVALUE_TO_TRIGGER_VARIANT, MIN_ALT_PROPORTION_TO_TRIGGER_VARIANT) > 1 )
|
||||
headerElement.getNumberOfBaseAlleles(MIN_ALT_PVALUE_TO_TRIGGER_VARIANT, MIN_ALT_PROPORTION_TO_TRIGGER_VARIANT) != 1 )
|
||||
return true;
|
||||
}
|
||||
|
||||
|
|
@ -836,13 +635,26 @@ public class SlidingWindow {
|
|||
|
||||
final CloseVariantRegionResult result = new CloseVariantRegionResult(allReads.stopPerformed);
|
||||
result.reads.addAll(downsampleCoverage > 0 ? downsampleVariantRegion(allReads.reads) : allReads.reads);
|
||||
result.reads.addAll(addToSyntheticReads(windowHeader, 0, allReads.stopPerformed + 1, SyntheticRead.StrandType.STRANDLESS));
|
||||
result.reads.addAll(addToFilteredReads(windowHeader, 0, allReads.stopPerformed + 1));
|
||||
result.reads.addAll(finalizeAndAdd(ConsensusType.BOTH));
|
||||
result.reads.addAll(addAllSyntheticReadTypes(0, allReads.stopPerformed + 1));
|
||||
|
||||
return result; // finalized reads will be downsampled if necessary
|
||||
}
|
||||
|
||||
/**
|
||||
* Adds reads for all possible strands (positive, negative, filtered) from the global windowHeader object
|
||||
*
|
||||
* @param start the start position (inclusive)
|
||||
* @param end the end position (exclusive)
|
||||
* @return non-null but possibly empty array list with reduced reads
|
||||
*/
|
||||
private ObjectArrayList<GATKSAMRecord> addAllSyntheticReadTypes(final int start, final int end) {
|
||||
final ObjectArrayList<GATKSAMRecord> reads = new ObjectArrayList<>();
|
||||
reads.addAll(addToSyntheticReads(windowHeader, start, end, ConsensusType.POSITIVE_CONSENSUS));
|
||||
reads.addAll(addToSyntheticReads(windowHeader, start, end, ConsensusType.NEGATIVE_CONSENSUS));
|
||||
reads.addAll(addToSyntheticReads(windowHeader, start, end, ConsensusType.FILTERED));
|
||||
return reads;
|
||||
}
|
||||
|
||||
/*
|
||||
* @see #closeVariantRegions(CompressionStash, ObjectSortedSet<GenomeLoc>, boolean) with forceCloseFullRegions set to false
|
||||
*/
|
||||
|
|
@ -851,7 +663,7 @@ public class SlidingWindow {
|
|||
}
|
||||
|
||||
private static final class CloseVariantRegionResult {
|
||||
final private ObjectList<GATKSAMRecord> reads = new ObjectArrayList<GATKSAMRecord>();
|
||||
final private ObjectList<GATKSAMRecord> reads = new ObjectArrayList<>();
|
||||
private int stopPerformed;
|
||||
|
||||
public CloseVariantRegionResult(final int stopPerformed) { this.stopPerformed = stopPerformed; }
|
||||
|
|
@ -866,7 +678,7 @@ public class SlidingWindow {
|
|||
* @return a non-null set of reduced reads representing the finalized regions
|
||||
*/
|
||||
public ObjectSet<GATKSAMRecord> closeVariantRegions(final CompressionStash regions, final ObjectSortedSet<GenomeLoc> knownSnpPositions, final boolean forceCloseFullRegions) {
|
||||
final ObjectAVLTreeSet<GATKSAMRecord> allReads = new ObjectAVLTreeSet<GATKSAMRecord>(new AlignmentStartWithNoTiesComparator());
|
||||
final ObjectAVLTreeSet<GATKSAMRecord> allReads = new ObjectAVLTreeSet<>(new AlignmentStartWithNoTiesComparator());
|
||||
if ( !regions.isEmpty() ) {
|
||||
|
||||
int windowHeaderStart = getStartLocation(windowHeader);
|
||||
|
|
@ -945,9 +757,9 @@ public class SlidingWindow {
|
|||
if (downsampleCoverage >= nReads)
|
||||
return allReads;
|
||||
|
||||
ReservoirDownsampler <GATKSAMRecord> downsampler = new ReservoirDownsampler<GATKSAMRecord>(downsampleCoverage);
|
||||
ReservoirDownsampler <GATKSAMRecord> downsampler = new ReservoirDownsampler<>(downsampleCoverage);
|
||||
downsampler.submit(allReads);
|
||||
return new ObjectArrayList<GATKSAMRecord>(downsampler.consumeFinalizedItems());
|
||||
return new ObjectArrayList<>(downsampler.consumeFinalizedItems());
|
||||
}
|
||||
|
||||
|
||||
|
|
@ -962,7 +774,7 @@ public class SlidingWindow {
|
|||
@Ensures("result != null")
|
||||
public Pair<ObjectSet<GATKSAMRecord>, CompressionStash> close(final ObjectSortedSet<GenomeLoc> knownSnpPositions) {
|
||||
// mark variant regions
|
||||
ObjectSet<GATKSAMRecord> finalizedReads = new ObjectAVLTreeSet<GATKSAMRecord>(new AlignmentStartWithNoTiesComparator());
|
||||
ObjectSet<GATKSAMRecord> finalizedReads = new ObjectAVLTreeSet<>(new AlignmentStartWithNoTiesComparator());
|
||||
CompressionStash regions = new CompressionStash();
|
||||
|
||||
if (!windowHeader.isEmpty()) {
|
||||
|
|
@ -970,48 +782,45 @@ public class SlidingWindow {
|
|||
regions = findVariantRegions(0, windowHeader.size(), markedSites.getVariantSiteBitSet(), true);
|
||||
finalizedReads = closeVariantRegions(regions, knownSnpPositions, true);
|
||||
|
||||
if (!windowHeader.isEmpty()) {
|
||||
finalizedReads.addAll(addToSyntheticReads(windowHeader, 0, windowHeader.size(), SyntheticRead.StrandType.STRANDLESS));
|
||||
finalizedReads.addAll(addToFilteredReads(windowHeader, 0, windowHeader.size()));
|
||||
finalizedReads.addAll(finalizeAndAdd(ConsensusType.BOTH)); // if it ended in running consensus, finish it up
|
||||
}
|
||||
if (!windowHeader.isEmpty())
|
||||
finalizedReads.addAll(addAllSyntheticReadTypes(0, windowHeader.size()));
|
||||
}
|
||||
|
||||
return new Pair<ObjectSet<GATKSAMRecord>, CompressionStash>(finalizedReads, regions);
|
||||
return new Pair<>(finalizedReads, regions);
|
||||
}
|
||||
|
||||
/**
|
||||
* generates the SAM record for the running consensus read and resets it (to null)
|
||||
*
|
||||
* @param runningConsensus the consensus to finalize
|
||||
* @return the read contained in the running consensus or null
|
||||
*/
|
||||
protected GATKSAMRecord finalizeRunningConsensus() {
|
||||
protected GATKSAMRecord finalizeRunningConsensus(final SyntheticRead runningConsensus) {
|
||||
GATKSAMRecord finalizedRead = null;
|
||||
if (runningConsensus != null) {
|
||||
if (runningConsensus.size() > 0)
|
||||
|
||||
if ( runningConsensus != null ) {
|
||||
if ( runningConsensus.size() > 0 )
|
||||
finalizedRead = runningConsensus.close();
|
||||
else
|
||||
consensusCounter--;
|
||||
|
||||
runningConsensus = null;
|
||||
}
|
||||
|
||||
return finalizedRead;
|
||||
}
|
||||
|
||||
/**
|
||||
* generates the SAM record for the filtered data consensus and resets it (to null)
|
||||
*
|
||||
* @param filteredDataConsensus the consensus to finalize
|
||||
* @return the read contained in the running consensus or null
|
||||
*/
|
||||
protected GATKSAMRecord finalizeFilteredDataConsensus() {
|
||||
protected GATKSAMRecord finalizeFilteredDataConsensus(final SyntheticRead filteredDataConsensus) {
|
||||
GATKSAMRecord finalizedRead = null;
|
||||
if (filteredDataConsensus != null) {
|
||||
if (filteredDataConsensus.size() > 0)
|
||||
finalizedRead = filteredDataConsensus.close();
|
||||
else
|
||||
filteredDataConsensusCounter--;
|
||||
|
||||
filteredDataConsensus = null;
|
||||
}
|
||||
return finalizedRead;
|
||||
}
|
||||
|
|
@ -1021,7 +830,7 @@ public class SlidingWindow {
|
|||
|
||||
private final static class SingleStrandConsensusData {
|
||||
final HeaderElementList consensus = new HeaderElementList();
|
||||
final ObjectList<GATKSAMRecord> reads = new ObjectArrayList<GATKSAMRecord>();
|
||||
final ObjectList<GATKSAMRecord> reads = new ObjectArrayList<>();
|
||||
}
|
||||
|
||||
/**
|
||||
|
|
@ -1042,6 +851,7 @@ public class SlidingWindow {
|
|||
|
||||
// initialize the mapping from base (allele) to header
|
||||
final Byte2IntMap alleleHeaderMap = new Byte2IntArrayMap(2);
|
||||
alleleHeaderMap.defaultReturnValue(-1);
|
||||
for ( final BaseIndex allele : windowHeader.get(hetRefPosition).getAlleles(MIN_ALT_PVALUE_TO_TRIGGER_VARIANT, MIN_ALT_PROPORTION_TO_TRIGGER_VARIANT) ) {
|
||||
final int currentIndex = alleleHeaderMap.size();
|
||||
if ( currentIndex > 1 )
|
||||
|
|
@ -1056,7 +866,7 @@ public class SlidingWindow {
|
|||
if ( alleleHeaderMap.size() != 2 )
|
||||
throw new IllegalStateException("We expected to see 2 alleles when creating a diploid consensus but saw " + alleleHeaderMap.size());
|
||||
|
||||
final ObjectList<GATKSAMRecord> readsToRemove = new ObjectArrayList<GATKSAMRecord>();
|
||||
final ObjectList<GATKSAMRecord> readsToRemove = new ObjectArrayList<>();
|
||||
|
||||
for ( final GATKSAMRecord read : readsInWindow ) {
|
||||
|
||||
|
|
@ -1081,10 +891,10 @@ public class SlidingWindow {
|
|||
final byte base = read.getReadBases()[readPosOfHet];
|
||||
|
||||
// check which allele this read represents
|
||||
final Integer allele = alleleHeaderMap.get(base);
|
||||
final int allele = alleleHeaderMap.get(base);
|
||||
|
||||
// ignore the read if it represents a base that's not part of the consensus
|
||||
if ( allele != null ) {
|
||||
if ( allele != -1 ) {
|
||||
// add to the appropriate polyploid header
|
||||
final SingleStrandConsensusData header = read.getReadNegativeStrandFlag() ? headersNegStrand[allele] : headersPosStrand[allele];
|
||||
header.reads.add(read);
|
||||
|
|
@ -1096,7 +906,7 @@ public class SlidingWindow {
|
|||
readsInWindow.remove(read);
|
||||
|
||||
// create the polyploid synthetic reads if we can
|
||||
final ObjectList<GATKSAMRecord> hetReads = new ObjectArrayList<GATKSAMRecord>();
|
||||
final ObjectList<GATKSAMRecord> hetReads = new ObjectArrayList<>();
|
||||
|
||||
// sanity check that no new "variant region" exists on just a single consensus strand due to softclips
|
||||
// or multi-allelic sites now that we've broken everything out into their component parts. if one does
|
||||
|
|
@ -1125,10 +935,12 @@ public class SlidingWindow {
|
|||
* @param result list in which to store results
|
||||
*/
|
||||
protected void finalizeHetConsensus(final LinkedList<HeaderElement> header, final boolean isNegativeStrand, final ObjectList<GATKSAMRecord> result) {
|
||||
if ( header.size() > 0 )
|
||||
result.addAll(addToSyntheticReads(header, 0, header.size(), isNegativeStrand ? SyntheticRead.StrandType.NEGATIVE : SyntheticRead.StrandType.POSITIVE));
|
||||
if ( runningConsensus != null )
|
||||
result.add(finalizeRunningConsensus());
|
||||
if ( header.size() > 0 ) {
|
||||
if ( isNegativeStrand )
|
||||
result.addAll(addToSyntheticReads(header, 0, header.size(), ConsensusType.NEGATIVE_CONSENSUS));
|
||||
else
|
||||
result.addAll(addToSyntheticReads(header, 0, header.size(), ConsensusType.POSITIVE_CONSENSUS));
|
||||
}
|
||||
}
|
||||
|
||||
private void addToHeader(LinkedList<HeaderElement> header, GATKSAMRecord read) {
|
||||
|
|
@ -1222,6 +1034,7 @@ public class SlidingWindow {
|
|||
|
||||
final Iterator<HeaderElement> headerElementIterator = header.listIterator(startIndex);
|
||||
final byte mappingQuality = (byte) read.getMappingQuality();
|
||||
final boolean isNegativeStrand = read.getReadNegativeStrandFlag();
|
||||
|
||||
// iterator variables
|
||||
int locationIndex = startIndex;
|
||||
|
|
@ -1254,9 +1067,9 @@ public class SlidingWindow {
|
|||
for ( int i = 0; i < nDeletionBases; i++ ) {
|
||||
headerElement = headerElementIterator.next();
|
||||
if (removeRead)
|
||||
headerElement.removeBase(BaseUtils.Base.D.base, mappingQuality, mappingQuality, mappingQuality, mappingQuality, MIN_BASE_QUAL_TO_COUNT, MIN_MAPPING_QUALITY, false);
|
||||
headerElement.removeBase(BaseUtils.Base.D.base, mappingQuality, mappingQuality, mappingQuality, mappingQuality, MIN_BASE_QUAL_TO_COUNT, MIN_MAPPING_QUALITY, false, isNegativeStrand);
|
||||
else
|
||||
headerElement.addBase(BaseUtils.Base.D.base, mappingQuality, mappingQuality, mappingQuality, mappingQuality, MIN_BASE_QUAL_TO_COUNT, MIN_MAPPING_QUALITY, false);
|
||||
headerElement.addBase(BaseUtils.Base.D.base, mappingQuality, mappingQuality, mappingQuality, mappingQuality, MIN_BASE_QUAL_TO_COUNT, MIN_MAPPING_QUALITY, false, isNegativeStrand);
|
||||
}
|
||||
locationIndex += nDeletionBases;
|
||||
break;
|
||||
|
|
@ -1279,9 +1092,9 @@ public class SlidingWindow {
|
|||
final byte deletionQuality = readHasIndelQuals ? deletionQuals[readBaseIndex] : -1;
|
||||
|
||||
if ( removeRead )
|
||||
headerElement.removeBase(readBases[readBaseIndex], readQuals[readBaseIndex], insertionQuality, deletionQuality, mappingQuality, MIN_BASE_QUAL_TO_COUNT, MIN_MAPPING_QUALITY, isSoftClip);
|
||||
headerElement.removeBase(readBases[readBaseIndex], readQuals[readBaseIndex], insertionQuality, deletionQuality, mappingQuality, MIN_BASE_QUAL_TO_COUNT, MIN_MAPPING_QUALITY, isSoftClip, isNegativeStrand);
|
||||
else
|
||||
headerElement.addBase(readBases[readBaseIndex], readQuals[readBaseIndex], insertionQuality, deletionQuality, mappingQuality, MIN_BASE_QUAL_TO_COUNT, MIN_MAPPING_QUALITY, isSoftClip);
|
||||
headerElement.addBase(readBases[readBaseIndex], readQuals[readBaseIndex], insertionQuality, deletionQuality, mappingQuality, MIN_BASE_QUAL_TO_COUNT, MIN_MAPPING_QUALITY, isSoftClip, isNegativeStrand);
|
||||
|
||||
readBaseIndex++;
|
||||
}
|
||||
|
|
|
|||
|
|
@ -56,7 +56,6 @@ import org.broadinstitute.sting.gatk.walkers.LocusWalker;
|
|||
import org.broadinstitute.sting.gatk.walkers.TreeReducible;
|
||||
import org.broadinstitute.sting.utils.GenomeLoc;
|
||||
import org.broadinstitute.sting.utils.MathUtils;
|
||||
import org.broadinstitute.sting.utils.QualityUtils;
|
||||
import org.broadinstitute.sting.utils.exceptions.UserException;
|
||||
import org.broadinstitute.sting.utils.pileup.PileupElement;
|
||||
import org.broadinstitute.sting.utils.pileup.ReadBackedPileup;
|
||||
|
|
@ -133,6 +132,11 @@ public class AssessReducedQuals extends LocusWalker<GenomeLoc, GenomeLoc> implem
|
|||
return reportLocus ? ref.getLocus() : null;
|
||||
}
|
||||
|
||||
/**
|
||||
* Get the quals separated by version and strand
|
||||
* @param readPileup the pileup
|
||||
* @return 2x2 array with sum of quals separated by version in 1st dimension and strand in the 2nd
|
||||
*/
|
||||
private int[] getPileupQuals(final ReadBackedPileup readPileup) {
|
||||
|
||||
final int[] quals = new int[2];
|
||||
|
|
|
|||
|
|
@ -97,15 +97,15 @@ public class HeaderElementUnitTest extends BaseTest {
|
|||
HeaderElement headerElement = new HeaderElement(1000, 0);
|
||||
|
||||
// first test that if we add and then remove it, we have no data
|
||||
headerElement.addBase(test.base, test.baseQual, test.insQual, test.delQual, test.MQ, minBaseQual, minMappingQual, test.isClip);
|
||||
headerElement.addBase(test.base, test.baseQual, test.insQual, test.delQual, test.MQ, minBaseQual, minMappingQual, test.isClip, false);
|
||||
headerElement.addInsertionToTheRight();
|
||||
headerElement.removeBase(test.base, test.baseQual, test.insQual, test.delQual, test.MQ, minBaseQual, minMappingQual, test.isClip);
|
||||
headerElement.removeBase(test.base, test.baseQual, test.insQual, test.delQual, test.MQ, minBaseQual, minMappingQual, test.isClip, false);
|
||||
headerElement.removeInsertionToTheRight();
|
||||
testHeaderIsEmpty(headerElement);
|
||||
|
||||
// now, test that the data was added as expected
|
||||
for ( int i = 0; i < 10; i++ )
|
||||
headerElement.addBase(test.base, test.baseQual, test.insQual, test.delQual, test.MQ, minBaseQual, minMappingQual, test.isClip);
|
||||
headerElement.addBase(test.base, test.baseQual, test.insQual, test.delQual, test.MQ, minBaseQual, minMappingQual, test.isClip, false);
|
||||
testHeaderData(headerElement, test);
|
||||
|
||||
// test the insertion adding functionality
|
||||
|
|
@ -115,8 +115,8 @@ public class HeaderElementUnitTest extends BaseTest {
|
|||
}
|
||||
|
||||
private void testHeaderIsEmpty(final HeaderElement headerElement) {
|
||||
Assert.assertFalse(headerElement.hasConsensusData());
|
||||
Assert.assertFalse(headerElement.hasFilteredData());
|
||||
Assert.assertFalse(headerElement.hasConsensusData(SlidingWindow.ConsensusType.POSITIVE_CONSENSUS));
|
||||
Assert.assertFalse(headerElement.hasConsensusData(SlidingWindow.ConsensusType.FILTERED));
|
||||
Assert.assertFalse(headerElement.hasInsertionToTheRight());
|
||||
Assert.assertTrue(headerElement.isEmpty());
|
||||
}
|
||||
|
|
@ -125,9 +125,9 @@ public class HeaderElementUnitTest extends BaseTest {
|
|||
Assert.assertEquals(headerElement.isVariantFromSoftClips(), test.isClip);
|
||||
Assert.assertFalse(headerElement.isEmpty());
|
||||
Assert.assertFalse(headerElement.hasInsertionToTheRight());
|
||||
Assert.assertEquals(headerElement.hasConsensusData(), test.MQ >= minMappingQual);
|
||||
Assert.assertEquals(headerElement.hasFilteredData(), test.MQ < minMappingQual);
|
||||
Assert.assertEquals(headerElement.hasConsensusData() ? headerElement.getConsensusBaseCounts().getRMS() : headerElement.getFilteredBaseCounts().getRMS(), (double)test.MQ);
|
||||
Assert.assertEquals(headerElement.hasConsensusData(SlidingWindow.ConsensusType.POSITIVE_CONSENSUS), test.MQ >= minMappingQual);
|
||||
Assert.assertEquals(headerElement.hasConsensusData(SlidingWindow.ConsensusType.FILTERED), test.MQ < minMappingQual);
|
||||
Assert.assertEquals(headerElement.getBaseCounts(headerElement.hasConsensusData(SlidingWindow.ConsensusType.POSITIVE_CONSENSUS) ? SlidingWindow.ConsensusType.POSITIVE_CONSENSUS : SlidingWindow.ConsensusType.FILTERED).getRMS(), (double)test.MQ);
|
||||
Assert.assertFalse(headerElement.isVariantFromMismatches(0.05, 0.05));
|
||||
Assert.assertEquals(headerElement.isVariant(0.05, 0.05, 0.05), test.isClip);
|
||||
}
|
||||
|
|
@ -145,7 +145,7 @@ public class HeaderElementUnitTest extends BaseTest {
|
|||
|
||||
@DataProvider(name = "alleles")
|
||||
public Object[][] createAllelesData() {
|
||||
List<Object[]> tests = new ArrayList<Object[]>();
|
||||
List<Object[]> tests = new ArrayList<>();
|
||||
|
||||
final int[] counts = new int[]{ 0, 5, 10, 15, 20 };
|
||||
final double [] pvalues = new double[]{ 0.0, 0.01, 0.05, 0.20, 1.0 };
|
||||
|
|
@ -174,7 +174,7 @@ public class HeaderElementUnitTest extends BaseTest {
|
|||
for ( int i = 0; i < test.counts.length; i++ ) {
|
||||
final BaseIndex base = BaseIndex.values()[i];
|
||||
for ( int j = 0; j < test.counts[i]; j++ )
|
||||
headerElement.addBase(base.b, byte20, byte10, byte10, byte20, minBaseQual, minMappingQual, false);
|
||||
headerElement.addBase(base.b, byte20, byte10, byte10, byte20, minBaseQual, minMappingQual, false, false);
|
||||
}
|
||||
|
||||
final int nAllelesSeen = headerElement.getNumberOfBaseAlleles(test.pvalue, test.pvalue);
|
||||
|
|
|
|||
|
|
@ -158,44 +158,44 @@ public class ReduceReadsIntegrationTest extends WalkerTest {
|
|||
|
||||
@Test(enabled = true)
|
||||
public void testDefaultCompression() {
|
||||
RRTest("testDefaultCompression ", L, "fa1cffc4539e0c20b818a11da5dba5b9", false);
|
||||
RRTest("testDefaultCompression ", L, "0e503f7b79ace4c89d74f0943a0de1c0", false);
|
||||
}
|
||||
|
||||
@Test(enabled = true)
|
||||
public void testDefaultCompressionWithKnowns() {
|
||||
RRTest("testDefaultCompressionWithKnowns ", L, "d1b5fbc402810d9cdc020bb3503f1325", true);
|
||||
RRTest("testDefaultCompressionWithKnowns ", L, "6db7ce2733d006f8bd61c42a40d23728", true);
|
||||
}
|
||||
|
||||
private final String intervals = "-L 20:10,100,000-10,100,500 -L 20:10,200,000-10,200,500 -L 20:10,300,000-10,300,500 -L 20:10,400,000-10,500,000 -L 20:10,500,050-10,500,060 -L 20:10,600,000-10,600,015 -L 20:10,700,000-10,700,110";
|
||||
|
||||
@Test(enabled = true)
|
||||
public void testMultipleIntervals() {
|
||||
RRTest("testMultipleIntervals ", intervals, "7e9dcd157ad742d4ebae7e56bc4af663", false);
|
||||
RRTest("testMultipleIntervals ", intervals, "207f2c6d3db956e19412a45a231ca367", false, "043b2838c27d8f9580379b54c18ff40a");
|
||||
}
|
||||
|
||||
@Test(enabled = true)
|
||||
public void testMultipleIntervalsWithKnowns() {
|
||||
RRTest("testMultipleIntervalsWithKnowns ", intervals, "dbb1e95e1bcad956701142afac763717", true);
|
||||
RRTest("testMultipleIntervalsWithKnowns ", intervals, "f3b11a8a7673b301e27137936fafc6b6", true, "043b2838c27d8f9580379b54c18ff40a");
|
||||
}
|
||||
|
||||
@Test(enabled = true)
|
||||
public void testHighCompression() {
|
||||
RRTest("testHighCompression ", " -cs 10 -min_pvalue 0.3 -minvar 0.3 -mindel 0.3 " + L, "8f8fd1a53fa0789116f45e4cf2625906", false);
|
||||
RRTest("testHighCompression ", " -cs 10 -min_pvalue 0.3 -minvar 0.3 -mindel 0.3 " + L, "dcc3716b3665aa1c2dbe6b22d6534aef", false);
|
||||
}
|
||||
|
||||
@Test(enabled = true)
|
||||
public void testHighCompressionWithKnowns() {
|
||||
RRTest("testHighCompressionWithKnowns ", " -cs 10 -min_pvalue 0.3 -minvar 0.3 -mindel 0.3 " + L, "52fd2a77802a4677b604abb18e15d96a", true);
|
||||
RRTest("testHighCompressionWithKnowns ", " -cs 10 -min_pvalue 0.3 -minvar 0.3 -mindel 0.3 " + L, "97ae655bf0e483ea227b1aac67ced024", true);
|
||||
}
|
||||
|
||||
@Test(enabled = true)
|
||||
public void testLowCompression() {
|
||||
RRTest("testLowCompression ", " -cs 30 -min_pvalue 0.001 -minvar 0.01 -mindel 0.01 -minmap 5 -minqual 5 " + L, "79c6543d5ce84ebc2ca74404498edbd1", false);
|
||||
RRTest("testLowCompression ", " -cs 30 -min_pvalue 0.001 -minvar 0.01 -mindel 0.01 -minmap 5 -minqual 5 " + L, "a1377eb922e0b09a03a280b691b0b3ff", false);
|
||||
}
|
||||
|
||||
@Test(enabled = true)
|
||||
public void testLowCompressionWithKnowns() {
|
||||
RRTest("testLowCompressionWithKnowns ", " -cs 30 -min_pvalue 0.001 -minvar 0.01 -mindel 0.01 -minmap 5 -minqual 5 " + L, "271aec358b309603291a974b5ba3bd60", true);
|
||||
RRTest("testLowCompressionWithKnowns ", " -cs 30 -min_pvalue 0.001 -minvar 0.01 -mindel 0.01 -minmap 5 -minqual 5 " + L, "bd7c5b0b210694f364ca6a41f5b89870", true);
|
||||
}
|
||||
|
||||
@Test(enabled = true)
|
||||
|
|
@ -207,7 +207,7 @@ public class ReduceReadsIntegrationTest extends WalkerTest {
|
|||
|
||||
@Test(enabled = true)
|
||||
public void testIndelCompression() {
|
||||
final String md5 = "d20e6012300898a0315c795cab7583d8";
|
||||
final String md5 = "9c9305eda5e4e7f22246ec8a4b242c97";
|
||||
RRTest("testIndelCompression ", " -cs 50 -L 20:10,100,500-10,100,600 ", md5, false);
|
||||
RRTest("testIndelCompressionWithKnowns ", " -cs 50 -L 20:10,100,500-10,100,600 ", md5, true);
|
||||
}
|
||||
|
|
@ -215,27 +215,25 @@ public class ReduceReadsIntegrationTest extends WalkerTest {
|
|||
@Test(enabled = true)
|
||||
public void testFilteredDeletionCompression() {
|
||||
String base = String.format("-T ReduceReads -npt -R %s -I %s ", REF, DELETION_BAM) + " -o %s ";
|
||||
executeTest("testFilteredDeletionCompression", new WalkerTestSpec(base, Arrays.asList("bam"), Arrays.asList("e5da09662708f562c0c617ba73cf4763")), "4f916da29d91852077f0a2fdbdd2c7f6");
|
||||
executeTest("testFilteredDeletionCompression", new WalkerTestSpec(base, Arrays.asList("bam"), Arrays.asList("1bda512143be1016dfaca1f7020b6398")), "4f916da29d91852077f0a2fdbdd2c7f6");
|
||||
}
|
||||
|
||||
private static final String COREDUCTION_QUALS_TEST_MD5 = "26d84a2bd549a01a63fcebf8847a1b7d";
|
||||
|
||||
@Test(enabled = true)
|
||||
public void testCoReduction() {
|
||||
String base = String.format("-T ReduceReads %s --cancer_mode -npt -R %s -I %s -I %s", COREDUCTION_L, REF, COREDUCTION_BAM_A, COREDUCTION_BAM_B) + " -o %s ";
|
||||
executeTest("testCoReduction", new WalkerTestSpec(base, Arrays.asList("bam"), Arrays.asList("5f4d2c1d9c010dfd6865aeba7d0336fe")), COREDUCTION_QUALS_TEST_MD5);
|
||||
executeTest("testCoReduction", new WalkerTestSpec(base, Arrays.asList("bam"), Arrays.asList("2fdc77ff5139f62db9697427b559f866")));
|
||||
}
|
||||
|
||||
@Test(enabled = true)
|
||||
public void testCoReductionWithKnowns() {
|
||||
String base = String.format("-T ReduceReads %s --cancer_mode -npt -R %s -I %s -I %s -known %s", COREDUCTION_L, REF, COREDUCTION_BAM_A, COREDUCTION_BAM_B, DBSNP) + " -o %s ";
|
||||
executeTest("testCoReductionWithKnowns", new WalkerTestSpec(base, Arrays.asList("bam"), Arrays.asList("ca48dd972bf57595c691972c0f887cb4")), COREDUCTION_QUALS_TEST_MD5);
|
||||
executeTest("testCoReductionWithKnowns", new WalkerTestSpec(base, Arrays.asList("bam"), Arrays.asList("6db7fca364ba64f7db9510b412d731f0")));
|
||||
}
|
||||
|
||||
@Test(enabled = true)
|
||||
public void testInsertionsAtEdgeOfConsensus() {
|
||||
String base = String.format("-T ReduceReads -npt -R %s -I %s ", REF, INSERTIONS_AT_EDGE_OF_CONSENSUS_BAM) + " -o %s ";
|
||||
executeTest("testInsertionsAtEdgeOfConsensus", new WalkerTestSpec(base, Arrays.asList("bam"), Arrays.asList("760500a5b036b987f84099f45f26a804")));
|
||||
executeTest("testInsertionsAtEdgeOfConsensus", new WalkerTestSpec(base, Arrays.asList("bam"), Arrays.asList("c10653a8c21fb32b5cf580d3704b0edd")));
|
||||
}
|
||||
|
||||
/**
|
||||
|
|
@ -249,7 +247,7 @@ public class ReduceReadsIntegrationTest extends WalkerTest {
|
|||
@Test(enabled = true)
|
||||
public void testAddingReadAfterTailingTheStash() {
|
||||
String base = String.format("-T ReduceReads %s -npt -R %s -I %s", STASH_L, REF, STASH_BAM) + " -o %s ";
|
||||
executeTest("testAddingReadAfterTailingTheStash", new WalkerTestSpec(base, Arrays.asList("bam"), Arrays.asList("67f8a3a647f8ec5212104bdaafd8c862")), "3eab32c215ba68e75efd5ab7e9f7a2e7");
|
||||
executeTest("testAddingReadAfterTailingTheStash", new WalkerTestSpec(base, Arrays.asList("bam"), Arrays.asList("fddbec29d0945afbbb34b42994614c15")), "3eab32c215ba68e75efd5ab7e9f7a2e7");
|
||||
}
|
||||
|
||||
/**
|
||||
|
|
@ -260,7 +258,7 @@ public class ReduceReadsIntegrationTest extends WalkerTest {
|
|||
public void testDivideByZero() {
|
||||
String base = String.format("-T ReduceReads %s -npt -R %s -I %s", DIVIDEBYZERO_L, REF, DIVIDEBYZERO_BAM) + " -o %s ";
|
||||
// we expect to lose coverage due to the downsampling so don't run the systematic tests
|
||||
executeTestWithoutAdditionalRRTests("testDivideByZero", new WalkerTestSpec(base, Arrays.asList("bam"), Arrays.asList("4f0ef477c0417d1eb602b323474ef377")));
|
||||
executeTestWithoutAdditionalRRTests("testDivideByZero", new WalkerTestSpec(base, Arrays.asList("bam"), Arrays.asList("82758efda419011642cb468809a50bf9")));
|
||||
}
|
||||
|
||||
/**
|
||||
|
|
@ -270,7 +268,7 @@ public class ReduceReadsIntegrationTest extends WalkerTest {
|
|||
@Test(enabled = true)
|
||||
public void testReadOffContig() {
|
||||
String base = String.format("-T ReduceReads -npt -R %s -I %s ", REF, OFFCONTIG_BAM) + " -o %s ";
|
||||
executeTest("testReadOffContig", new WalkerTestSpec(base, Arrays.asList("bam"), Arrays.asList("0ce693b4ff925998867664e4099f3248")));
|
||||
executeTest("testReadOffContig", new WalkerTestSpec(base, Arrays.asList("bam"), Arrays.asList("595e5812c37189930cae93e45765def4")));
|
||||
}
|
||||
|
||||
/**
|
||||
|
|
@ -280,7 +278,7 @@ public class ReduceReadsIntegrationTest extends WalkerTest {
|
|||
public void testPairedReadsInVariantRegion() {
|
||||
String base = String.format("-T ReduceReads -npt -R %s -I %s ", hg19Reference, BOTH_ENDS_OF_PAIR_IN_VARIANT_REGION_BAM) +
|
||||
" -o %s --downsample_coverage 250 -dcov 50 ";
|
||||
executeTest("testPairedReadsInVariantRegion", new WalkerTestSpec(base, Arrays.asList("bam"), Arrays.asList("7e7b358443827ca239db3b98f299aec6")), "2af063d1bd3c322b03405dbb3ecf59a9");
|
||||
executeTest("testPairedReadsInVariantRegion", new WalkerTestSpec(base, Arrays.asList("bam"), Arrays.asList("b005727119eee27995705959a637085e")), "2af063d1bd3c322b03405dbb3ecf59a9");
|
||||
}
|
||||
|
||||
/**
|
||||
|
|
|
|||
|
|
@ -318,7 +318,7 @@ public class SlidingWindowUnitTest extends BaseTest {
|
|||
this.expectedNumberOfReads = expectedNumberOfReads;
|
||||
this.expectedNumberOfReadsWithHetCompression = expectedNumberOfReadsWithHetCompression;
|
||||
this.expectedNumberOfReadsAtDeepCoverage = expectedNumberOfReadsAtDeepCoverage;
|
||||
this.description = String.format("%d %d %d", expectedNumberOfReads, expectedNumberOfReadsWithHetCompression, expectedNumberOfReadsAtDeepCoverage);
|
||||
this.description = String.format("%d %d %d %b %b", expectedNumberOfReads, expectedNumberOfReadsWithHetCompression, expectedNumberOfReadsAtDeepCoverage, readsShouldBeLowQuality, variantBaseShouldBeLowQuality);
|
||||
|
||||
// first, add the basic reads to the collection
|
||||
myReads.addAll(basicReads);
|
||||
|
|
@ -390,40 +390,40 @@ public class SlidingWindowUnitTest extends BaseTest {
|
|||
List<Object[]> tests = new ArrayList<Object[]>();
|
||||
|
||||
// test high quality reads and bases
|
||||
tests.add(new Object[]{new ConsensusCreationTest(Arrays.<GenomeLoc>asList(), false, false, 1, 1, 1)});
|
||||
tests.add(new Object[]{new ConsensusCreationTest(Arrays.<GenomeLoc>asList(loc290), false, false, 9, 6, 5 + DEEP_COVERAGE_ITERATIONS)});
|
||||
tests.add(new Object[]{new ConsensusCreationTest(Arrays.<GenomeLoc>asList(loc290, loc295), false, false, 10, 10, 2 + (8 * DEEP_COVERAGE_ITERATIONS))});
|
||||
tests.add(new Object[]{new ConsensusCreationTest(Arrays.<GenomeLoc>asList(loc290, loc309), false, false, 10, 10, 2 + (8 * DEEP_COVERAGE_ITERATIONS))});
|
||||
tests.add(new Object[]{new ConsensusCreationTest(Arrays.<GenomeLoc>asList(loc290, loc310), false, false, 11, 11, 2 + (9 * DEEP_COVERAGE_ITERATIONS))});
|
||||
tests.add(new Object[]{new ConsensusCreationTest(Arrays.<GenomeLoc>asList(loc290, loc320), false, false, 11, 10, 4 + (6 * DEEP_COVERAGE_ITERATIONS))});
|
||||
tests.add(new Object[]{new ConsensusCreationTest(Arrays.<GenomeLoc>asList(), false, false, 2, 2, 2)});
|
||||
tests.add(new Object[]{new ConsensusCreationTest(Arrays.<GenomeLoc>asList(loc290), false, false, 11, 8, 7 + DEEP_COVERAGE_ITERATIONS)});
|
||||
tests.add(new Object[]{new ConsensusCreationTest(Arrays.<GenomeLoc>asList(loc290, loc295), false, false, 12, 12, 4 + (8 * DEEP_COVERAGE_ITERATIONS))});
|
||||
tests.add(new Object[]{new ConsensusCreationTest(Arrays.<GenomeLoc>asList(loc290, loc309), false, false, 12, 12, 4 + (8 * DEEP_COVERAGE_ITERATIONS))});
|
||||
tests.add(new Object[]{new ConsensusCreationTest(Arrays.<GenomeLoc>asList(loc290, loc310), false, false, 13, 13, 4 + (9 * DEEP_COVERAGE_ITERATIONS))});
|
||||
tests.add(new Object[]{new ConsensusCreationTest(Arrays.<GenomeLoc>asList(loc290, loc320), false, false, 13, 12, 6 + (6 * DEEP_COVERAGE_ITERATIONS))});
|
||||
|
||||
// test low quality reads
|
||||
tests.add(new Object[]{new ConsensusCreationTest(Arrays.<GenomeLoc>asList(), true, false, 1, 1, 1)});
|
||||
tests.add(new Object[]{new ConsensusCreationTest(Arrays.<GenomeLoc>asList(loc290), true, false, 2, 2, 2)});
|
||||
tests.add(new Object[]{new ConsensusCreationTest(Arrays.<GenomeLoc>asList(loc290, loc295), true, false, 2, 2, 2)});
|
||||
tests.add(new Object[]{new ConsensusCreationTest(Arrays.<GenomeLoc>asList(loc290, loc309), true, false, 2, 2, 2)});
|
||||
tests.add(new Object[]{new ConsensusCreationTest(Arrays.<GenomeLoc>asList(loc290, loc310), true, false, 2, 2, 2)});
|
||||
tests.add(new Object[]{new ConsensusCreationTest(Arrays.<GenomeLoc>asList(), true, false, 2, 2, 2)});
|
||||
tests.add(new Object[]{new ConsensusCreationTest(Arrays.<GenomeLoc>asList(loc290), true, false, 3, 3, 3)});
|
||||
tests.add(new Object[]{new ConsensusCreationTest(Arrays.<GenomeLoc>asList(loc290, loc295), true, false, 3, 3, 3)});
|
||||
tests.add(new Object[]{new ConsensusCreationTest(Arrays.<GenomeLoc>asList(loc290, loc309), true, false, 3, 3, 3)});
|
||||
tests.add(new Object[]{new ConsensusCreationTest(Arrays.<GenomeLoc>asList(loc290, loc310), true, false, 3, 3, 3)});
|
||||
|
||||
// test low quality bases
|
||||
tests.add(new Object[]{new ConsensusCreationTest(Arrays.<GenomeLoc>asList(), false, true, 1, 1, 1)});
|
||||
tests.add(new Object[]{new ConsensusCreationTest(Arrays.<GenomeLoc>asList(loc290), false, true, 1, 1, 1)});
|
||||
tests.add(new Object[]{new ConsensusCreationTest(Arrays.<GenomeLoc>asList(loc290, loc295), false, true, 1, 1, 1)});
|
||||
tests.add(new Object[]{new ConsensusCreationTest(Arrays.<GenomeLoc>asList(loc290, loc309), false, true, 1, 1, 1)});
|
||||
tests.add(new Object[]{new ConsensusCreationTest(Arrays.<GenomeLoc>asList(loc290, loc310), false, true, 1, 1, 1)});
|
||||
tests.add(new Object[]{new ConsensusCreationTest(Arrays.<GenomeLoc>asList(), false, true, 2, 2, 2)});
|
||||
tests.add(new Object[]{new ConsensusCreationTest(Arrays.<GenomeLoc>asList(loc290), false, true, 2, 2, 2)});
|
||||
tests.add(new Object[]{new ConsensusCreationTest(Arrays.<GenomeLoc>asList(loc290, loc295), false, true, 2, 2, 2)});
|
||||
tests.add(new Object[]{new ConsensusCreationTest(Arrays.<GenomeLoc>asList(loc290, loc309), false, true, 2, 2, 2)});
|
||||
tests.add(new Object[]{new ConsensusCreationTest(Arrays.<GenomeLoc>asList(loc290, loc310), false, true, 2, 2, 2)});
|
||||
|
||||
// test mixture
|
||||
tests.add(new Object[]{new ConsensusCreationTest(Arrays.<GenomeLoc>asList(loc1100), true, false, 2, 2, 2)});
|
||||
tests.add(new Object[]{new ConsensusCreationTest(Arrays.<GenomeLoc>asList(loc1100), false, true, 1, 1, 1)});
|
||||
tests.add(new Object[]{new ConsensusCreationTest(Arrays.<GenomeLoc>asList(loc1100), true, false, 3, 3, 3)});
|
||||
tests.add(new Object[]{new ConsensusCreationTest(Arrays.<GenomeLoc>asList(loc1100), false, true, 2, 2, 2)});
|
||||
|
||||
// test I/D operators
|
||||
tests.add(new Object[]{new ConsensusCreationTest(Arrays.<GenomeLoc>asList(loc290), CigarOperator.D, 9, 9, 2 + (7 * DEEP_COVERAGE_ITERATIONS))});
|
||||
tests.add(new Object[]{new ConsensusCreationTest(Arrays.<GenomeLoc>asList(loc290, loc295), CigarOperator.D, 10, 10, 2 + (8 * DEEP_COVERAGE_ITERATIONS))});
|
||||
tests.add(new Object[]{new ConsensusCreationTest(Arrays.<GenomeLoc>asList(loc290, loc309), CigarOperator.D, 10, 10, 2 + (8 * DEEP_COVERAGE_ITERATIONS))});
|
||||
tests.add(new Object[]{new ConsensusCreationTest(Arrays.<GenomeLoc>asList(loc290, loc310), CigarOperator.D, 11, 11, 2 + (9 * DEEP_COVERAGE_ITERATIONS))});
|
||||
tests.add(new Object[]{new ConsensusCreationTest(Arrays.<GenomeLoc>asList(loc290), CigarOperator.I, 9, 9, 2 + (7 * DEEP_COVERAGE_ITERATIONS))});
|
||||
tests.add(new Object[]{new ConsensusCreationTest(Arrays.<GenomeLoc>asList(loc290, loc295), CigarOperator.I, 10, 10, 2 + (8 * DEEP_COVERAGE_ITERATIONS))});
|
||||
tests.add(new Object[]{new ConsensusCreationTest(Arrays.<GenomeLoc>asList(loc290, loc309), CigarOperator.I, 10, 10, 2 + (8 * DEEP_COVERAGE_ITERATIONS))});
|
||||
tests.add(new Object[]{new ConsensusCreationTest(Arrays.<GenomeLoc>asList(loc290, loc310), CigarOperator.I, 11, 11, 2 + (9 * DEEP_COVERAGE_ITERATIONS))});
|
||||
tests.add(new Object[]{new ConsensusCreationTest(Arrays.<GenomeLoc>asList(loc290), CigarOperator.D, 11, 11, 4 + (7 * DEEP_COVERAGE_ITERATIONS))});
|
||||
tests.add(new Object[]{new ConsensusCreationTest(Arrays.<GenomeLoc>asList(loc290, loc295), CigarOperator.D, 12, 12, 4 + (8 * DEEP_COVERAGE_ITERATIONS))});
|
||||
tests.add(new Object[]{new ConsensusCreationTest(Arrays.<GenomeLoc>asList(loc290, loc309), CigarOperator.D, 12, 12, 4 + (8 * DEEP_COVERAGE_ITERATIONS))});
|
||||
tests.add(new Object[]{new ConsensusCreationTest(Arrays.<GenomeLoc>asList(loc290, loc310), CigarOperator.D, 13, 13, 4 + (9 * DEEP_COVERAGE_ITERATIONS))});
|
||||
tests.add(new Object[]{new ConsensusCreationTest(Arrays.<GenomeLoc>asList(loc290), CigarOperator.I, 11, 11, 4 + (7 * DEEP_COVERAGE_ITERATIONS))});
|
||||
tests.add(new Object[]{new ConsensusCreationTest(Arrays.<GenomeLoc>asList(loc290, loc295), CigarOperator.I, 12, 12, 4 + (8 * DEEP_COVERAGE_ITERATIONS))});
|
||||
tests.add(new Object[]{new ConsensusCreationTest(Arrays.<GenomeLoc>asList(loc290, loc309), CigarOperator.I, 12, 12, 4 + (8 * DEEP_COVERAGE_ITERATIONS))});
|
||||
tests.add(new Object[]{new ConsensusCreationTest(Arrays.<GenomeLoc>asList(loc290, loc310), CigarOperator.I, 13, 13, 4 + (9 * DEEP_COVERAGE_ITERATIONS))});
|
||||
|
||||
return tests.toArray(new Object[][]{});
|
||||
}
|
||||
|
|
@ -517,6 +517,39 @@ public class SlidingWindowUnitTest extends BaseTest {
|
|||
Assert.assertEquals(result.getFirst().size(), totalNumReads); // no compression at all
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testConsensusCreationForInsertions() {
|
||||
|
||||
final int totalNumReads = 7;
|
||||
final ObjectList<GATKSAMRecord> myReads = new ObjectArrayList<>(totalNumReads);
|
||||
|
||||
// add reads, one with a SNP and one with a SNP and insertion
|
||||
for ( int i = 0; i < totalNumReads; i++ ) {
|
||||
final GATKSAMRecord read = ArtificialSAMUtils.createArtificialRead(header, "basicRead" + i, 0, globalStartPosition, readLength);
|
||||
read.setBaseQualities(Utils.dupBytes((byte)30, readLength));
|
||||
read.setMappingQuality(30);
|
||||
read.setReadNegativeStrandFlag(false);
|
||||
|
||||
final byte[] bases = Utils.dupBytes((byte) 'A', readLength);
|
||||
if ( i < 2 )
|
||||
bases[20] = 'C';
|
||||
if ( i == 0 )
|
||||
bases[80] = 'C';
|
||||
read.setReadBases(bases);
|
||||
|
||||
if ( i == 0 )
|
||||
read.setCigarString("80M1I19M");
|
||||
|
||||
myReads.add(read);
|
||||
}
|
||||
|
||||
final SlidingWindow slidingWindow = new SlidingWindow("1", 0, 10, header, new GATKSAMReadGroupRecord("test"), 0, 0.1, 0.05, 0.05, 20, 20, 100, ReduceReads.DownsampleStrategy.Normal, false);
|
||||
for ( final GATKSAMRecord read : myReads )
|
||||
slidingWindow.addRead(read);
|
||||
final Pair<ObjectSet<GATKSAMRecord>, CompressionStash> result = slidingWindow.close(null);
|
||||
Assert.assertEquals(result.getFirst().size(), 3); // no compression at all for SNPs
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testAddingReadPairWithSameCoordinates() {
|
||||
final SlidingWindow slidingWindow = new SlidingWindow("1", 0, 10);
|
||||
|
|
@ -739,21 +772,22 @@ public class SlidingWindowUnitTest extends BaseTest {
|
|||
read.setReadBases(Utils.dupBytes((byte) 'A', readLength));
|
||||
read.setBaseQualities(Utils.dupBytes((byte)30, readLength));
|
||||
read.setMappingQuality(30);
|
||||
read.setReadNegativeStrandFlag(false);
|
||||
|
||||
// add the read
|
||||
final SlidingWindow slidingWindow = new SlidingWindow("1", 0, 10, header, new GATKSAMReadGroupRecord("test"), 0, 0.05, 0.05, 0.05, 20, 20, 10, ReduceReads.DownsampleStrategy.Normal, false);
|
||||
slidingWindow.actuallyUpdateHeaderForRead(windowHeader, read, false, start);
|
||||
for ( int i = 0; i < start; i++ )
|
||||
Assert.assertEquals(windowHeader.get(i).getConsensusBaseCounts().countOfBase(BaseUtils.Base.A.base), 0);
|
||||
Assert.assertEquals(windowHeader.get(i).getBaseCounts(SlidingWindow.ConsensusType.POSITIVE_CONSENSUS).countOfBase(BaseUtils.Base.A.base), 0);
|
||||
for ( int i = 0; i < readLength; i++ )
|
||||
Assert.assertEquals(windowHeader.get(start + i).getConsensusBaseCounts().countOfBase(BaseUtils.Base.A.base), 1);
|
||||
Assert.assertEquals(windowHeader.get(start + i).getBaseCounts(SlidingWindow.ConsensusType.POSITIVE_CONSENSUS).countOfBase(BaseUtils.Base.A.base), 1);
|
||||
for ( int i = start + readLength; i < currentHeaderLength; i++ )
|
||||
Assert.assertEquals(windowHeader.get(i).getConsensusBaseCounts().countOfBase(BaseUtils.Base.A.base), 0);
|
||||
Assert.assertEquals(windowHeader.get(i).getBaseCounts(SlidingWindow.ConsensusType.POSITIVE_CONSENSUS).countOfBase(BaseUtils.Base.A.base), 0);
|
||||
|
||||
// now remove the read
|
||||
slidingWindow.actuallyUpdateHeaderForRead(windowHeader, read, true, start);
|
||||
for ( int i = 0; i < currentHeaderLength; i++ )
|
||||
Assert.assertEquals(windowHeader.get(i).getConsensusBaseCounts().countOfBase(BaseUtils.Base.A.base), 0);
|
||||
Assert.assertEquals(windowHeader.get(i).getBaseCounts(SlidingWindow.ConsensusType.POSITIVE_CONSENSUS).countOfBase(BaseUtils.Base.A.base), 0);
|
||||
}
|
||||
|
||||
//////////////////////////////////////////////////////////////////////////////////
|
||||
|
|
|
|||
Loading…
Reference in New Issue