Refactoring of SlidingWindow class in RR to reduce complexity and fix important bug.
* Allow RR to write its BAM to stdout by setting required=true for @Output.
* Fixed bug in sliding window where a break in coverage after a long stretch without
a variant region was causing a doubling of all the reads before the break.
* Refactored SlidingWindow.updateHeaderCounts() into 3 separate tested methods.
* Refactored polyploid consensus code out of SlidingWindow.compressVariantRegion().
This commit is contained in:
parent
7dce4f8630
commit
05e69b6294
|
|
@ -112,7 +112,7 @@ import org.broadinstitute.sting.utils.sam.ReadUtils;
|
||||||
@Downsample(by=DownsampleType.BY_SAMPLE, toCoverage=40)
|
@Downsample(by=DownsampleType.BY_SAMPLE, toCoverage=40)
|
||||||
public class ReduceReads extends ReadWalker<ObjectArrayList<GATKSAMRecord>, ReduceReadsStash> {
|
public class ReduceReads extends ReadWalker<ObjectArrayList<GATKSAMRecord>, ReduceReadsStash> {
|
||||||
|
|
||||||
@Output
|
@Output(required=true)
|
||||||
private StingSAMFileWriter out = null;
|
private StingSAMFileWriter out = null;
|
||||||
private SAMFileWriter writerToUse = null;
|
private SAMFileWriter writerToUse = null;
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -50,13 +50,12 @@ import com.google.java.contract.Ensures;
|
||||||
import com.google.java.contract.Requires;
|
import com.google.java.contract.Requires;
|
||||||
import it.unimi.dsi.fastutil.bytes.Byte2IntArrayMap;
|
import it.unimi.dsi.fastutil.bytes.Byte2IntArrayMap;
|
||||||
import it.unimi.dsi.fastutil.bytes.Byte2IntMap;
|
import it.unimi.dsi.fastutil.bytes.Byte2IntMap;
|
||||||
import it.unimi.dsi.fastutil.bytes.Byte2IntOpenHashMap;
|
|
||||||
import it.unimi.dsi.fastutil.objects.*;
|
import it.unimi.dsi.fastutil.objects.*;
|
||||||
import net.sf.samtools.Cigar;
|
|
||||||
import net.sf.samtools.CigarElement;
|
import net.sf.samtools.CigarElement;
|
||||||
import net.sf.samtools.CigarOperator;
|
import net.sf.samtools.CigarOperator;
|
||||||
import net.sf.samtools.SAMFileHeader;
|
import net.sf.samtools.SAMFileHeader;
|
||||||
import org.broadinstitute.sting.gatk.downsampling.ReservoirDownsampler;
|
import org.broadinstitute.sting.gatk.downsampling.ReservoirDownsampler;
|
||||||
|
import org.broadinstitute.sting.utils.BaseUtils;
|
||||||
import org.broadinstitute.sting.utils.GenomeLoc;
|
import org.broadinstitute.sting.utils.GenomeLoc;
|
||||||
import org.broadinstitute.sting.utils.collections.Pair;
|
import org.broadinstitute.sting.utils.collections.Pair;
|
||||||
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
|
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
|
||||||
|
|
@ -127,8 +126,8 @@ public class SlidingWindow {
|
||||||
return getStopLocation(windowHeader);
|
return getStopLocation(windowHeader);
|
||||||
}
|
}
|
||||||
|
|
||||||
private int getStopLocation(LinkedList<HeaderElement> header) {
|
private int getStopLocation(final LinkedList<HeaderElement> header) {
|
||||||
return getStartLocation(header) + header.size() - 1;
|
return header.isEmpty() ? -1 : header.peekLast().getLocation();
|
||||||
}
|
}
|
||||||
|
|
||||||
public String getContig() {
|
public String getContig() {
|
||||||
|
|
@ -139,7 +138,7 @@ public class SlidingWindow {
|
||||||
return contigIndex;
|
return contigIndex;
|
||||||
}
|
}
|
||||||
|
|
||||||
public int getStartLocation(LinkedList<HeaderElement> header) {
|
public int getStartLocation(final LinkedList<HeaderElement> header) {
|
||||||
return header.isEmpty() ? -1 : header.peek().getLocation();
|
return header.isEmpty() ? -1 : header.peek().getLocation();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
@ -652,51 +651,27 @@ public class SlidingWindow {
|
||||||
ObjectList<GATKSAMRecord> allReads = new ObjectArrayList<GATKSAMRecord>();
|
ObjectList<GATKSAMRecord> allReads = new ObjectArrayList<GATKSAMRecord>();
|
||||||
|
|
||||||
// Try to compress into a polyploid consensus
|
// Try to compress into a polyploid consensus
|
||||||
int nVariantPositions = 0;
|
|
||||||
int hetRefPosition = -1;
|
int hetRefPosition = -1;
|
||||||
boolean canCompress = true;
|
final Object[] header = windowHeader.toArray();
|
||||||
Object[] header = windowHeader.toArray();
|
|
||||||
|
|
||||||
// foundEvent will remain false if we don't allow polyploid reduction
|
if ( allowPolyploidReductionInGeneral && !disallowPolyploidReductionAtThisPosition )
|
||||||
if ( allowPolyploidReductionInGeneral && !disallowPolyploidReductionAtThisPosition ) {
|
hetRefPosition = findSinglePolyploidCompressiblePosition(header, start, stop);
|
||||||
for (int i = start; i<=stop; i++) {
|
|
||||||
|
|
||||||
int nAlleles = ((HeaderElement) header[i]).getNumberOfAlleles(MIN_ALT_BASE_PROPORTION_TO_TRIGGER_VARIANT);
|
|
||||||
|
|
||||||
// we will only work on diploid cases because we just don't want to handle/test other scenarios
|
|
||||||
if ( nAlleles > 2 ) {
|
|
||||||
canCompress = false;
|
|
||||||
break;
|
|
||||||
} else if ( nAlleles == 2 ) {
|
|
||||||
nVariantPositions++;
|
|
||||||
|
|
||||||
// make sure that there is only 1 site in the variant region that contains more than one allele
|
|
||||||
if ( nVariantPositions == 1 ) {
|
|
||||||
hetRefPosition = i;
|
|
||||||
} else if ( nVariantPositions > 1 ) {
|
|
||||||
canCompress = false;
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// Try to compress the variant region; note that using the hetRefPosition protects us from trying to compress
|
// Try to compress the variant region; note that using the hetRefPosition protects us from trying to compress
|
||||||
// variant regions that are created by insertions (since we can't confirm here that they represent the same allele)
|
// variant regions that are created by insertions (since we can't confirm here that they represent the same allele)
|
||||||
if ( canCompress && hetRefPosition != -1 ) {
|
if ( hetRefPosition != -1 ) {
|
||||||
allReads = createPolyploidConsensus(start, stop, ((HeaderElement) header[hetRefPosition]).getLocation());
|
allReads = createPolyploidConsensus(start, stop, ((HeaderElement) header[hetRefPosition]).getLocation());
|
||||||
}
|
}
|
||||||
|
|
||||||
// Return all reads that overlap the variant region and remove them from the window header entirely
|
// Return all reads that overlap the variant region and remove them from the window header entirely
|
||||||
// also remove all reads preceding the variant region (since they will be output as consensus right after compression
|
// also remove all reads preceding the variant region (since they will be output as consensus right after compression
|
||||||
else {
|
else {
|
||||||
final int refStart = windowHeader.get(start).getLocation();
|
final int refStart = windowHeader.get(start).getLocation();
|
||||||
final int refStop = windowHeader.get(stop).getLocation();
|
final int refStop = windowHeader.get(stop).getLocation();
|
||||||
|
|
||||||
ObjectList<GATKSAMRecord> toRemove = new ObjectArrayList<GATKSAMRecord>();
|
final ObjectList<GATKSAMRecord> toRemove = new ObjectArrayList<GATKSAMRecord>();
|
||||||
for (GATKSAMRecord read : readsInWindow) {
|
for ( final GATKSAMRecord read : readsInWindow ) {
|
||||||
if (read.getSoftStart() <= refStop) {
|
if ( read.getSoftStart() <= refStop ) {
|
||||||
if (read.getAlignmentEnd() >= refStart) {
|
if ( read.getAlignmentEnd() >= refStart ) {
|
||||||
allReads.add(read);
|
allReads.add(read);
|
||||||
removeFromHeader(windowHeader, read);
|
removeFromHeader(windowHeader, read);
|
||||||
}
|
}
|
||||||
|
|
@ -708,6 +683,39 @@ public class SlidingWindow {
|
||||||
return allReads;
|
return allReads;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Finds the het variant position located within start and stop (inclusive) if one exists.
|
||||||
|
*
|
||||||
|
* @param header the header element array
|
||||||
|
* @param start the first header index in the region to check (inclusive)
|
||||||
|
* @param stop the last header index of the region to check (inclusive)
|
||||||
|
* @return the window header index of the single het position or -1 if either none or more than one exists
|
||||||
|
*/
|
||||||
|
@Requires("header != null && start >= 0 && (stop >= start || stop == 0)")
|
||||||
|
protected int findSinglePolyploidCompressiblePosition(final Object[] header, final int start, final int stop) {
|
||||||
|
int hetRefPosition = -1;
|
||||||
|
|
||||||
|
for ( int i = start; i <= stop; i++ ) {
|
||||||
|
|
||||||
|
final int nAlleles = ((HeaderElement) header[i]).getNumberOfAlleles(MIN_ALT_BASE_PROPORTION_TO_TRIGGER_VARIANT);
|
||||||
|
|
||||||
|
// we will only work on diploid cases because we just don't want to handle/test other scenarios
|
||||||
|
if ( nAlleles > 2 )
|
||||||
|
return -1;
|
||||||
|
|
||||||
|
if ( nAlleles == 2 ) {
|
||||||
|
|
||||||
|
// make sure that there is only 1 site in the region that contains more than one allele
|
||||||
|
if ( hetRefPosition >= 0 )
|
||||||
|
return -1;
|
||||||
|
|
||||||
|
hetRefPosition = i;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return hetRefPosition;
|
||||||
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Finalizes a variant region, any adjacent synthetic reads.
|
* Finalizes a variant region, any adjacent synthetic reads.
|
||||||
*
|
*
|
||||||
|
|
@ -728,31 +736,41 @@ public class SlidingWindow {
|
||||||
return result; // finalized reads will be downsampled if necessary
|
return result; // finalized reads will be downsampled if necessary
|
||||||
}
|
}
|
||||||
|
|
||||||
public ObjectSet<GATKSAMRecord> closeVariantRegions(CompressionStash regions) {
|
/*
|
||||||
ObjectAVLTreeSet<GATKSAMRecord> allReads = new ObjectAVLTreeSet<GATKSAMRecord>(new AlignmentStartWithNoTiesComparator());
|
* Finalizes the list of regions requested (and any regions preceding them)
|
||||||
if (!regions.isEmpty()) {
|
*
|
||||||
int lastStop = -1;
|
* @param regions the list of regions to finalize
|
||||||
int windowHeaderStart = getStartLocation(windowHeader);
|
* @return a non-null set of reduced reads representing the finalized regions
|
||||||
|
*/
|
||||||
|
public ObjectSet<GATKSAMRecord> closeVariantRegions(final CompressionStash regions) {
|
||||||
|
final ObjectAVLTreeSet<GATKSAMRecord> allReads = new ObjectAVLTreeSet<GATKSAMRecord>(new AlignmentStartWithNoTiesComparator());
|
||||||
|
if ( !regions.isEmpty() ) {
|
||||||
|
|
||||||
for (GenomeLoc region : regions) {
|
int windowHeaderStart = getStartLocation(windowHeader);
|
||||||
|
HeaderElement lastCleanedElement = null;
|
||||||
|
|
||||||
|
for ( final GenomeLoc region : regions ) {
|
||||||
if (((FinishedGenomeLoc)region).isFinished() && region.getContig().equals(contig) && region.getStart() >= windowHeaderStart && region.getStop() < windowHeaderStart + windowHeader.size()) {
|
if (((FinishedGenomeLoc)region).isFinished() && region.getContig().equals(contig) && region.getStart() >= windowHeaderStart && region.getStop() < windowHeaderStart + windowHeader.size()) {
|
||||||
int start = region.getStart() - windowHeaderStart;
|
final int start = region.getStart() - windowHeaderStart;
|
||||||
int stop = region.getStop() - windowHeaderStart;
|
final int stop = region.getStop() - windowHeaderStart;
|
||||||
|
|
||||||
allReads.addAll(closeVariantRegion(start, stop, regions.size() > 1)); // todo -- add condition here dependent on dbSNP track
|
allReads.addAll(closeVariantRegion(start, stop, regions.size() > 1)); // todo -- add condition here dependent on dbSNP track
|
||||||
lastStop = stop;
|
|
||||||
|
// We need to clean up the window header elements up until the end of the requested region so that they don't get used for future regions.
|
||||||
|
// Note that this cleanup used to happen outside the above for-loop, but that was causing an occasional doubling of the reduced reads
|
||||||
|
// (in the case where there are multiple regions to close we'd reuse the reads for each region).
|
||||||
|
if ( stop >= 0 ) {
|
||||||
|
for ( int i = 0; i < stop; i++ )
|
||||||
|
windowHeader.remove();
|
||||||
|
lastCleanedElement = windowHeader.remove();
|
||||||
|
windowHeaderStart = getStartLocation(windowHeader);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// clean up the window header elements up until the end of the variant region.
|
// we need to keep the last element of the last cleaned region in the event that the following element has a read that starts with an insertion.
|
||||||
// note that we keep the last element of the region in the event that the following element has a read that starts with insertion.
|
if ( lastCleanedElement != null && lastCleanedElement.hasInsertionToTheRight() )
|
||||||
if ( lastStop >= 0 ) {
|
windowHeader.addFirst(new HeaderElement(lastCleanedElement.getLocation(), lastCleanedElement.numInsertionsToTheRight()));
|
||||||
for (int i = 0; i < lastStop; i++)
|
|
||||||
windowHeader.remove();
|
|
||||||
final HeaderElement lastOfRegion = windowHeader.remove();
|
|
||||||
if ( lastOfRegion.hasInsertionToTheRight() )
|
|
||||||
windowHeader.addFirst(new HeaderElement(lastOfRegion.getLocation(), lastOfRegion.numInsertionsToTheRight()));
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
return allReads;
|
return allReads;
|
||||||
}
|
}
|
||||||
|
|
@ -925,7 +943,6 @@ public class SlidingWindow {
|
||||||
return hetReads;
|
return hetReads;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
private void addToHeader(LinkedList<HeaderElement> header, GATKSAMRecord read) {
|
private void addToHeader(LinkedList<HeaderElement> header, GATKSAMRecord read) {
|
||||||
updateHeaderCounts(header, read, false);
|
updateHeaderCounts(header, read, false);
|
||||||
}
|
}
|
||||||
|
|
@ -934,84 +951,123 @@ public class SlidingWindow {
|
||||||
updateHeaderCounts(header, read, true);
|
updateHeaderCounts(header, read, true);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Updates the sliding window's header counts with the incoming read bases, insertions
|
* Updates the sliding window's header counts with the incoming read bases, insertions
|
||||||
* and deletions.
|
* and deletions.
|
||||||
*
|
*
|
||||||
* @param header the sliding window header to use
|
* @param header the sliding window header to use
|
||||||
* @param read the incoming read to be added to the sliding window
|
* @param read the incoming read to be added to the sliding window
|
||||||
* @param removeRead if we are removing the read from the header or adding
|
* @param removeRead if we are removing the read from the header or adding
|
||||||
*/
|
*/
|
||||||
private void updateHeaderCounts(final LinkedList<HeaderElement> header, final GATKSAMRecord read, final boolean removeRead) {
|
protected void updateHeaderCounts(final LinkedList<HeaderElement> header, final GATKSAMRecord read, final boolean removeRead) {
|
||||||
byte[] bases = read.getReadBases();
|
final int readStart = read.getSoftStart();
|
||||||
byte[] quals = read.getBaseQualities();
|
final int headerStart = getStartLocation(header);
|
||||||
byte[] insQuals = read.getExistingBaseInsertionQualities();
|
int locationIndex = headerStart < 0 ? 0 : readStart - headerStart;
|
||||||
byte[] delQuals = read.getExistingBaseDeletionQualities();
|
|
||||||
int readStart = read.getSoftStart();
|
|
||||||
int readEnd = read.getSoftEnd();
|
|
||||||
Cigar cigar = read.getCigar();
|
|
||||||
|
|
||||||
int readBaseIndex = 0;
|
if ( removeRead && locationIndex < 0 )
|
||||||
int startLocation = getStartLocation(header);
|
throw new ReviewedStingException("Provided read is behind the Sliding Window! Read = " + read + ", readStart = " + readStart + ", cigar = " + read.getCigarString() + ", window = " + headerStart + "-" + getStopLocation(header));
|
||||||
int locationIndex = startLocation < 0 ? 0 : readStart - startLocation;
|
|
||||||
int stopLocation = getStopLocation(header);
|
|
||||||
|
|
||||||
if (removeRead && locationIndex < 0)
|
// we only need to create new header elements if we are adding the read, not when we're removing it
|
||||||
throw new ReviewedStingException("read is behind the Sliding Window. read: " + read + " start " + read.getUnclippedStart() + "," + read.getUnclippedEnd() + " cigar: " + read.getCigarString() + " window: " + startLocation + "," + stopLocation);
|
if ( !removeRead )
|
||||||
|
locationIndex = createNewHeaderElements(header, read, locationIndex);
|
||||||
|
|
||||||
if (!removeRead) { // we only need to create new header elements if we are adding the read, not when we're removing it
|
actuallyUpdateHeaderForRead(header, read, removeRead, locationIndex);
|
||||||
if (locationIndex < 0) { // Do we need to add extra elements before the start of the header? -- this may happen if the previous read was clipped and this alignment starts before the beginning of the window
|
}
|
||||||
for (int i = 1; i <= -locationIndex; i++)
|
|
||||||
header.addFirst(new HeaderElement(startLocation - i));
|
|
||||||
|
|
||||||
startLocation = readStart; // update start location accordingly
|
/*
|
||||||
locationIndex = 0;
|
* Creates new header elements if needed for the given read.
|
||||||
}
|
*
|
||||||
|
* @param header the sliding window header to use
|
||||||
|
* @param read the incoming read to be added to the sliding window
|
||||||
|
* @param startIndex the start location index into the header for this read
|
||||||
|
*
|
||||||
|
* @return an updated index into the modified header
|
||||||
|
*/
|
||||||
|
@Requires("header != null && read != null")
|
||||||
|
protected int createNewHeaderElements(final LinkedList<HeaderElement> header, final GATKSAMRecord read, final int startIndex) {
|
||||||
|
|
||||||
if (stopLocation < readEnd) { // Do we need to add extra elements to the header?
|
int headerStart = getStartLocation(header);
|
||||||
int elementsToAdd = (stopLocation < 0) ? readEnd - readStart + 1 : readEnd - stopLocation;
|
int locationIndex = startIndex;
|
||||||
while (elementsToAdd-- > 0)
|
|
||||||
header.addLast(new HeaderElement(readEnd - elementsToAdd));
|
|
||||||
}
|
|
||||||
|
|
||||||
// Special case for leading insertions before the beginning of the sliding read
|
// Do we need to add extra elements before the start of the header? This could happen if the previous read was
|
||||||
if (ReadUtils.readStartsWithInsertion(read).getFirst() && (readStart == startLocation || startLocation < 0)) {
|
// clipped and this alignment starts before the beginning of the window
|
||||||
header.addFirst(new HeaderElement(readStart - 1)); // create a new first element to the window header with no bases added
|
final int readStart = read.getSoftStart();
|
||||||
locationIndex = 1; // This allows the first element (I) to look at locationIndex - 1 in the subsequent switch and do the right thing.
|
if ( startIndex < 0 ) {
|
||||||
}
|
for ( int i = 1; i <= -startIndex; i++ )
|
||||||
|
header.addFirst(new HeaderElement(headerStart - i));
|
||||||
|
|
||||||
|
// update the start location accordingly
|
||||||
|
headerStart = readStart;
|
||||||
|
locationIndex = 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
Iterator<HeaderElement> headerElementIterator = header.listIterator(locationIndex);
|
// Do we need to add extra elements to the end of the header?
|
||||||
|
final int headerStop = getStopLocation(header);
|
||||||
|
final int readEnd = read.getSoftEnd();
|
||||||
|
if ( headerStop < readEnd ) {
|
||||||
|
final int elementsToAdd = (headerStop < 0) ? readEnd - readStart + 1 : readEnd - headerStop;
|
||||||
|
for ( int i = elementsToAdd - 1; i >= 0; i-- )
|
||||||
|
header.addLast(new HeaderElement(readEnd - i));
|
||||||
|
}
|
||||||
|
|
||||||
|
// Special case for leading insertions before the beginning of the sliding read
|
||||||
|
if ( ReadUtils.readStartsWithInsertion(read).getFirst() && (readStart == headerStart || headerStart < 0) ) {
|
||||||
|
// create a new first element to the window header with no bases added
|
||||||
|
header.addFirst(new HeaderElement(readStart - 1));
|
||||||
|
// this allows the first element (I) to look at locationIndex - 1 when we update the header and do the right thing
|
||||||
|
locationIndex = 1;
|
||||||
|
}
|
||||||
|
|
||||||
|
return locationIndex;
|
||||||
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Actually updates the sliding window's header counts with the incoming read bases and quals (including insertion and deletion quals).
|
||||||
|
*
|
||||||
|
* @param header the sliding window header to use
|
||||||
|
* @param read the incoming read to be added to the sliding window
|
||||||
|
* @param removeRead if we are removing the read from the header or adding
|
||||||
|
* @param startIndex the start location index into the header for this read
|
||||||
|
*/
|
||||||
|
@Requires("header != null && read != null && startIndex >= 0")
|
||||||
|
protected void actuallyUpdateHeaderForRead(final LinkedList<HeaderElement> header, final GATKSAMRecord read, final boolean removeRead, final int startIndex) {
|
||||||
|
|
||||||
|
final Iterator<HeaderElement> headerElementIterator = header.listIterator(startIndex);
|
||||||
|
final byte mappingQuality = (byte) read.getMappingQuality();
|
||||||
|
|
||||||
|
// iterator variables
|
||||||
|
int locationIndex = startIndex;
|
||||||
|
int readBaseIndex = 0;
|
||||||
HeaderElement headerElement;
|
HeaderElement headerElement;
|
||||||
for (CigarElement cigarElement : cigar.getCigarElements()) {
|
|
||||||
switch (cigarElement.getOperator()) {
|
for ( CigarElement cigarElement : read.getCigar().getCigarElements() ) {
|
||||||
|
switch ( cigarElement.getOperator() ) {
|
||||||
case H:
|
case H:
|
||||||
break;
|
break;
|
||||||
case I:
|
case I:
|
||||||
if (removeRead && locationIndex == 0) { // special case, if we are removing a read that starts in insertion and we don't have the previous header element anymore, don't worry about it.
|
// special case, if we are removing a read that starts in insertion and we don't have the previous header element anymore, don't worry about it.
|
||||||
|
if ( removeRead && locationIndex == 0 )
|
||||||
break;
|
break;
|
||||||
}
|
|
||||||
|
|
||||||
headerElement = header.get(locationIndex - 1); // insertions are added to the base to the left (previous element)
|
// insertions are added to the base to the left (previous element)
|
||||||
|
headerElement = header.get(locationIndex - 1);
|
||||||
|
|
||||||
if (removeRead) {
|
if ( removeRead )
|
||||||
headerElement.removeInsertionToTheRight();
|
headerElement.removeInsertionToTheRight();
|
||||||
}
|
else
|
||||||
else {
|
|
||||||
headerElement.addInsertionToTheRight();
|
headerElement.addInsertionToTheRight();
|
||||||
}
|
|
||||||
readBaseIndex += cigarElement.getLength();
|
readBaseIndex += cigarElement.getLength();
|
||||||
break; // just ignore the insertions at the beginning of the read
|
break;
|
||||||
case D:
|
case D:
|
||||||
int nDeletions = cigarElement.getLength();
|
// deletions are added to the baseCounts with the read mapping quality as it's quality score
|
||||||
while (nDeletions-- > 0) { // deletions are added to the baseCounts with the read mapping quality as it's quality score
|
final int nDeletionBases = cigarElement.getLength();
|
||||||
|
for ( int i = 0; i < nDeletionBases; i++ ) {
|
||||||
headerElement = headerElementIterator.next();
|
headerElement = headerElementIterator.next();
|
||||||
byte mq = (byte) read.getMappingQuality();
|
|
||||||
if (removeRead)
|
if (removeRead)
|
||||||
headerElement.removeBase((byte) 'D', mq, mq, mq, mq, MIN_BASE_QUAL_TO_COUNT, MIN_MAPPING_QUALITY, false);
|
headerElement.removeBase(BaseUtils.Base.D.base, mappingQuality, mappingQuality, mappingQuality, mappingQuality, MIN_BASE_QUAL_TO_COUNT, MIN_MAPPING_QUALITY, false);
|
||||||
else
|
else
|
||||||
headerElement.addBase((byte) 'D', mq, mq, mq, mq, MIN_BASE_QUAL_TO_COUNT, MIN_MAPPING_QUALITY, false);
|
headerElement.addBase(BaseUtils.Base.D.base, mappingQuality, mappingQuality, mappingQuality, mappingQuality, MIN_BASE_QUAL_TO_COUNT, MIN_MAPPING_QUALITY, false);
|
||||||
|
|
||||||
locationIndex++;
|
locationIndex++;
|
||||||
}
|
}
|
||||||
|
|
@ -1021,26 +1077,33 @@ public class SlidingWindow {
|
||||||
case P:
|
case P:
|
||||||
case EQ:
|
case EQ:
|
||||||
case X:
|
case X:
|
||||||
int nBasesToAdd = cigarElement.getLength();
|
final int nBasesToAdd = cigarElement.getLength();
|
||||||
while (nBasesToAdd-- > 0) {
|
final boolean isSoftClip = cigarElement.getOperator() == CigarOperator.S;
|
||||||
|
final boolean readHasIndelQuals = read.hasBaseIndelQualities();
|
||||||
|
final byte[] insertionQuals = readHasIndelQuals ? read.getBaseInsertionQualities() : null;
|
||||||
|
final byte[] deletionQuals = readHasIndelQuals ? read.getBaseDeletionQualities() : null;
|
||||||
|
|
||||||
|
for ( int i = 0; i < nBasesToAdd; i++ ) {
|
||||||
headerElement = headerElementIterator.next();
|
headerElement = headerElementIterator.next();
|
||||||
byte insertionQuality = insQuals == null ? -1 : insQuals[readBaseIndex]; // if the read doesn't have indel qualities, use -1 (doesn't matter the value because it won't be used for anything)
|
final byte insertionQuality = readHasIndelQuals ? insertionQuals[readBaseIndex] : -1;
|
||||||
byte deletionQuality = delQuals == null ? -1 : delQuals[readBaseIndex];
|
final byte deletionQuality = readHasIndelQuals ? deletionQuals[readBaseIndex] : -1;
|
||||||
if (removeRead)
|
if ( removeRead )
|
||||||
headerElement.removeBase(bases[readBaseIndex], quals[readBaseIndex], insertionQuality, deletionQuality, read.getMappingQuality(), MIN_BASE_QUAL_TO_COUNT, MIN_MAPPING_QUALITY, cigarElement.getOperator() == CigarOperator.S);
|
headerElement.removeBase(read.getReadBases()[readBaseIndex], read.getBaseQualities()[readBaseIndex], insertionQuality, deletionQuality, mappingQuality, MIN_BASE_QUAL_TO_COUNT, MIN_MAPPING_QUALITY, isSoftClip);
|
||||||
else
|
else
|
||||||
headerElement.addBase(bases[readBaseIndex], quals[readBaseIndex], insertionQuality, deletionQuality, read.getMappingQuality(), MIN_BASE_QUAL_TO_COUNT, MIN_MAPPING_QUALITY, cigarElement.getOperator() == CigarOperator.S);
|
headerElement.addBase(read.getReadBases()[readBaseIndex], read.getBaseQualities()[readBaseIndex], insertionQuality, deletionQuality, mappingQuality, MIN_BASE_QUAL_TO_COUNT, MIN_MAPPING_QUALITY, isSoftClip);
|
||||||
|
|
||||||
readBaseIndex++;
|
readBaseIndex++;
|
||||||
locationIndex++;
|
locationIndex++;
|
||||||
}
|
}
|
||||||
break;
|
break;
|
||||||
|
default:
|
||||||
|
break;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
private void removeReadsFromWindow (ObjectList<GATKSAMRecord> readsToRemove) {
|
private void removeReadsFromWindow (final ObjectList<GATKSAMRecord> readsToRemove) {
|
||||||
for (GATKSAMRecord read : readsToRemove) {
|
for (final GATKSAMRecord read : readsToRemove) {
|
||||||
readsInWindow.remove(read);
|
readsInWindow.remove(read);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -147,7 +147,7 @@ public class ReduceReadsIntegrationTest extends WalkerTest {
|
||||||
@Test(enabled = true)
|
@Test(enabled = true)
|
||||||
public void testReadOffContig() {
|
public void testReadOffContig() {
|
||||||
String base = String.format("-T ReduceReads -npt -R %s -I %s ", REF, OFFCONTIG_BAM) + " -o %s ";
|
String base = String.format("-T ReduceReads -npt -R %s -I %s ", REF, OFFCONTIG_BAM) + " -o %s ";
|
||||||
executeTest("testReadOffContig", new WalkerTestSpec(base, Arrays.asList("922be8b1151dd0d92602af93b77f7a51")));
|
executeTest("testReadOffContig", new WalkerTestSpec(base, Arrays.asList("c57cd191dc391983131be43f6cc2e381")));
|
||||||
}
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -55,6 +55,7 @@ import net.sf.samtools.CigarElement;
|
||||||
import net.sf.samtools.CigarOperator;
|
import net.sf.samtools.CigarOperator;
|
||||||
import net.sf.samtools.SAMFileHeader;
|
import net.sf.samtools.SAMFileHeader;
|
||||||
import org.broadinstitute.sting.BaseTest;
|
import org.broadinstitute.sting.BaseTest;
|
||||||
|
import org.broadinstitute.sting.utils.BaseUtils;
|
||||||
import org.broadinstitute.sting.utils.GenomeLoc;
|
import org.broadinstitute.sting.utils.GenomeLoc;
|
||||||
import org.broadinstitute.sting.utils.UnvalidatingGenomeLoc;
|
import org.broadinstitute.sting.utils.UnvalidatingGenomeLoc;
|
||||||
import org.broadinstitute.sting.utils.Utils;
|
import org.broadinstitute.sting.utils.Utils;
|
||||||
|
|
@ -72,8 +73,8 @@ import java.io.File;
|
||||||
import java.io.FileNotFoundException;
|
import java.io.FileNotFoundException;
|
||||||
import java.util.ArrayList;
|
import java.util.ArrayList;
|
||||||
import java.util.Arrays;
|
import java.util.Arrays;
|
||||||
|
import java.util.LinkedList;
|
||||||
import java.util.List;
|
import java.util.List;
|
||||||
import java.util.Set;
|
|
||||||
|
|
||||||
public class SlidingWindowUnitTest extends BaseTest {
|
public class SlidingWindowUnitTest extends BaseTest {
|
||||||
|
|
||||||
|
|
@ -385,30 +386,22 @@ public class SlidingWindowUnitTest extends BaseTest {
|
||||||
//// This section tests the downsampling functionality ////
|
//// This section tests the downsampling functionality ////
|
||||||
///////////////////////////////////////////////////////////
|
///////////////////////////////////////////////////////////
|
||||||
|
|
||||||
private class DSTest {
|
|
||||||
public final int dcov;
|
|
||||||
|
|
||||||
private DSTest(final int dcov) {
|
|
||||||
this.dcov = dcov;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
@DataProvider(name = "Downsampling")
|
@DataProvider(name = "Downsampling")
|
||||||
public Object[][] createDownsamplingTestData() {
|
public Object[][] createDownsamplingTestData() {
|
||||||
List<Object[]> tests = new ArrayList<Object[]>();
|
List<Object[]> tests = new ArrayList<Object[]>();
|
||||||
|
|
||||||
for ( int i = 1; i < basicReads.size() + 10; i++ )
|
for ( int i = 1; i < basicReads.size() + 10; i++ )
|
||||||
tests.add(new Object[]{new DSTest(i)});
|
tests.add(new Object[]{i});
|
||||||
|
|
||||||
return tests.toArray(new Object[][]{});
|
return tests.toArray(new Object[][]{});
|
||||||
}
|
}
|
||||||
|
|
||||||
@Test(dataProvider = "Downsampling", enabled = true)
|
@Test(dataProvider = "Downsampling", enabled = true)
|
||||||
public void testDownsamplingTest(DSTest test) {
|
public void testDownsamplingTest(final int dcov) {
|
||||||
final SlidingWindow slidingWindow = new SlidingWindow("1", 0, 10, header, new GATKSAMReadGroupRecord("test"), 0, 0.05, 0.05, 20, 20, test.dcov, ReduceReads.DownsampleStrategy.Normal, false, false);
|
final SlidingWindow slidingWindow = new SlidingWindow("1", 0, 10, header, new GATKSAMReadGroupRecord("test"), 0, 0.05, 0.05, 20, 20, dcov, ReduceReads.DownsampleStrategy.Normal, false, false);
|
||||||
final ObjectList<GATKSAMRecord> result = slidingWindow.downsampleVariantRegion(basicReads);
|
final ObjectList<GATKSAMRecord> result = slidingWindow.downsampleVariantRegion(basicReads);
|
||||||
|
|
||||||
Assert.assertEquals(result.size(), Math.min(test.dcov, basicReads.size()));
|
Assert.assertEquals(result.size(), Math.min(dcov, basicReads.size()));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
@ -487,5 +480,97 @@ public class SlidingWindowUnitTest extends BaseTest {
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
////////////////////////////////////////////////////
|
||||||
|
//// This section tests the new header creation ////
|
||||||
|
////////////////////////////////////////////////////
|
||||||
|
|
||||||
|
@DataProvider(name = "CreateNewHeader")
|
||||||
|
public Object[][] CreateNewHeaderTestData() {
|
||||||
|
List<Object[]> tests = new ArrayList<Object[]>();
|
||||||
|
|
||||||
|
for ( final int start : Arrays.asList(-10, -1, 0, 1, 10) ) {
|
||||||
|
for ( final int stop : Arrays.asList(-10, -1, 0, 1, 10) ) {
|
||||||
|
tests.add(new Object[]{start, stop});
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return tests.toArray(new Object[][]{});
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test(dataProvider = "CreateNewHeader", enabled = true)
|
||||||
|
public void createNewHeaderTest(final int start, final int stop) {
|
||||||
|
|
||||||
|
// set up the window header
|
||||||
|
final int currentHeaderStart = 100;
|
||||||
|
final int currentHeaderLength = 50;
|
||||||
|
final LinkedList<HeaderElement> windowHeader = new LinkedList<HeaderElement>();
|
||||||
|
for ( int i = 0; i < currentHeaderLength; i++ )
|
||||||
|
windowHeader.add(new HeaderElement(currentHeaderStart + i));
|
||||||
|
|
||||||
|
// set up the read
|
||||||
|
final int readStart = currentHeaderStart + start;
|
||||||
|
final int readLength = currentHeaderLength + stop - start;
|
||||||
|
final GATKSAMRecord read = ArtificialSAMUtils.createArtificialRead(header, "basicRead", 0, readStart, readLength);
|
||||||
|
read.setReadBases(Utils.dupBytes((byte) 'A', readLength));
|
||||||
|
read.setBaseQualities(Utils.dupBytes((byte) 30, readLength));
|
||||||
|
read.setMappingQuality(30);
|
||||||
|
|
||||||
|
final SlidingWindow slidingWindow = new SlidingWindow("1", 0, 10, header, new GATKSAMReadGroupRecord("test"), 0, 0.05, 0.05, 20, 20, 10, ReduceReads.DownsampleStrategy.Normal, false, false);
|
||||||
|
int newIndex = slidingWindow.createNewHeaderElements(windowHeader, read, start);
|
||||||
|
|
||||||
|
Assert.assertEquals(newIndex, start > 0 ? start : 0);
|
||||||
|
|
||||||
|
final int expectedNewLength = currentHeaderLength + (start < 0 ? -start : 0) + (stop > 0 ? stop : 0);
|
||||||
|
Assert.assertEquals(windowHeader.size(), expectedNewLength);
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
////////////////////////////////////////////////////////////
|
||||||
|
//// This section tests updating the header from a read ////
|
||||||
|
////////////////////////////////////////////////////////////
|
||||||
|
|
||||||
|
@DataProvider(name = "UpdateHeaderForRead")
|
||||||
|
public Object[][] UpdateHeaderForReadTestData() {
|
||||||
|
List<Object[]> tests = new ArrayList<Object[]>();
|
||||||
|
|
||||||
|
for ( final int start : Arrays.asList(0, 1, 10) ) {
|
||||||
|
for ( final int readLength : Arrays.asList(1, 5, 10) ) {
|
||||||
|
tests.add(new Object[]{start, readLength});
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return tests.toArray(new Object[][]{});
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test(dataProvider = "UpdateHeaderForRead", enabled = true)
|
||||||
|
public void updateHeaderForReadTest(final int start, final int readLength) {
|
||||||
|
|
||||||
|
// set up the window header
|
||||||
|
final int currentHeaderStart = 100;
|
||||||
|
final int currentHeaderLength = 50;
|
||||||
|
final LinkedList<HeaderElement> windowHeader = new LinkedList<HeaderElement>();
|
||||||
|
for ( int i = 0; i < currentHeaderLength; i++ )
|
||||||
|
windowHeader.add(new HeaderElement(currentHeaderStart + i));
|
||||||
|
|
||||||
|
// set up the read
|
||||||
|
final GATKSAMRecord read = ArtificialSAMUtils.createArtificialRead(header, "basicRead", 0, currentHeaderStart + start, readLength);
|
||||||
|
read.setReadBases(Utils.dupBytes((byte) 'A', readLength));
|
||||||
|
read.setBaseQualities(Utils.dupBytes((byte)30, readLength));
|
||||||
|
read.setMappingQuality(30);
|
||||||
|
|
||||||
|
// add the read
|
||||||
|
final SlidingWindow slidingWindow = new SlidingWindow("1", 0, 10, header, new GATKSAMReadGroupRecord("test"), 0, 0.05, 0.05, 20, 20, 10, ReduceReads.DownsampleStrategy.Normal, false, false);
|
||||||
|
slidingWindow.actuallyUpdateHeaderForRead(windowHeader, read, false, start);
|
||||||
|
for ( int i = 0; i < start; i++ )
|
||||||
|
Assert.assertEquals(windowHeader.get(i).getConsensusBaseCounts().countOfBase(BaseUtils.Base.A.base), 0);
|
||||||
|
for ( int i = 0; i < readLength; i++ )
|
||||||
|
Assert.assertEquals(windowHeader.get(start + i).getConsensusBaseCounts().countOfBase(BaseUtils.Base.A.base), 1);
|
||||||
|
for ( int i = start + readLength; i < currentHeaderLength; i++ )
|
||||||
|
Assert.assertEquals(windowHeader.get(i).getConsensusBaseCounts().countOfBase(BaseUtils.Base.A.base), 0);
|
||||||
|
|
||||||
|
// now remove the read
|
||||||
|
slidingWindow.actuallyUpdateHeaderForRead(windowHeader, read, true, start);
|
||||||
|
for ( int i = 0; i < currentHeaderLength; i++ )
|
||||||
|
Assert.assertEquals(windowHeader.get(i).getConsensusBaseCounts().countOfBase(BaseUtils.Base.A.base), 0);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue