diff --git a/protected/java/src/org/broadinstitute/sting/gatk/walkers/compression/reducereads/SlidingWindow.java b/protected/java/src/org/broadinstitute/sting/gatk/walkers/compression/reducereads/SlidingWindow.java index 8a74044d8..0adea416e 100644 --- a/protected/java/src/org/broadinstitute/sting/gatk/walkers/compression/reducereads/SlidingWindow.java +++ b/protected/java/src/org/broadinstitute/sting/gatk/walkers/compression/reducereads/SlidingWindow.java @@ -27,10 +27,9 @@ public class SlidingWindow { final private LinkedList readsInWindow; final private LinkedList windowHeader; protected int contextSize; // the largest context size (between mismatches and indels) - protected int stopLocation; protected String contig; protected int contigIndex; - protected SAMFileHeader header; + protected SAMFileHeader samHeader; protected GATKSAMReadGroupRecord readGroupAttribute; protected int downsampleCoverage; @@ -66,7 +65,11 @@ public class SlidingWindow { } public int getStopLocation() { - return stopLocation; + return getStopLocation(windowHeader); + } + + private int getStopLocation(LinkedList header) { + return getStartLocation(header) + header.size() - 1; } public String getContig() { @@ -82,8 +85,7 @@ public class SlidingWindow { } - public SlidingWindow(String contig, int contigIndex, int contextSize, SAMFileHeader header, GATKSAMReadGroupRecord readGroupAttribute, int windowNumber, final double minAltProportionToTriggerVariant, final double minIndelProportionToTriggerVariant, int minBaseQual, int minMappingQuality, int downsampleCoverage, final ReduceReads.DownsampleStrategy downsampleStrategy, boolean hasIndelQualities, int nContigs) { - this.stopLocation = -1; + public SlidingWindow(String contig, int contigIndex, int contextSize, SAMFileHeader samHeader, GATKSAMReadGroupRecord readGroupAttribute, int windowNumber, final double minAltProportionToTriggerVariant, final double minIndelProportionToTriggerVariant, int minBaseQual, int minMappingQuality, int downsampleCoverage, final ReduceReads.DownsampleStrategy downsampleStrategy, boolean hasIndelQualities, int nContigs) { this.contextSize = contextSize; this.downsampleCoverage = downsampleCoverage; @@ -97,7 +99,7 @@ public class SlidingWindow { this.contig = contig; this.contigIndex = contigIndex; - this.header = header; + this.samHeader = samHeader; this.readGroupAttribute = readGroupAttribute; this.consensusCounter = 0; @@ -202,7 +204,7 @@ public class SlidingWindow { List readsToRemove = new LinkedList(); for (GATKSAMRecord read : readsInWindow) { // todo -- unnecessarily going through all reads in the window !! Optimize this (But remember reads are not sorted by alignment end!) - if (read.getAlignmentEnd() < getStartLocation(windowHeader)) { + if (read.getSoftEnd() < getStartLocation(windowHeader)) { readsToRemove.add(read); } } @@ -261,7 +263,7 @@ public class SlidingWindow { * @param end the first header index NOT TO add to consensus * @return a list of consensus reads generated by this call. Empty list if no consensus was generated. */ - protected List addToSyntheticReads(List header, int start, int end) { + protected List addToSyntheticReads(LinkedList header, int start, int end) { LinkedList reads = new LinkedList(); if (start < end) { ListIterator headerElementIterator = header.listIterator(start); @@ -274,8 +276,8 @@ public class SlidingWindow { if (headerElement.hasConsensusData()) { reads.addAll(finalizeAndAdd(ConsensusType.FILTERED)); - int endOfConsensus = findNextNonConsensusElement(start, end); - addToRunningConsensus(start, endOfConsensus); + int endOfConsensus = findNextNonConsensusElement(header, start, end); + addToRunningConsensus(header, start, endOfConsensus); if (endOfConsensus <= start) throw new ReviewedStingException(String.format("next start is <= current start: (%d <= %d)", endOfConsensus, start)); @@ -284,8 +286,8 @@ public class SlidingWindow { } else if (headerElement.hasFilteredData()) { reads.addAll(finalizeAndAdd(ConsensusType.CONSENSUS)); - int endOfFilteredData = findNextNonFilteredDataElement(start, end); - addToFilteredData(start, endOfFilteredData); + int endOfFilteredData = findNextNonFilteredDataElement(header, start, end); + addToFilteredData(header, start, endOfFilteredData); if (endOfFilteredData <= start) throw new ReviewedStingException(String.format("next start is <= current start: (%d <= %d)", endOfFilteredData, start)); @@ -294,7 +296,7 @@ public class SlidingWindow { } else if (headerElement.isEmpty()) { reads.addAll(finalizeAndAdd(ConsensusType.BOTH)); - int endOfEmptyData = findNextNonEmptyElement(start, end); + int endOfEmptyData = findNextNonEmptyElement(header, start, end); if (endOfEmptyData <= start) throw new ReviewedStingException(String.format("next start is <= current start: (%d <= %d)", endOfEmptyData, start)); @@ -343,8 +345,8 @@ public class SlidingWindow { * @param upTo limit to search for another consensus element * @return next position with consensus data or empty */ - private int findNextNonConsensusElement(int start, int upTo) { - Iterator headerElementIterator = windowHeader.listIterator(start); + private int findNextNonConsensusElement(LinkedList header, int start, int upTo) { + Iterator headerElementIterator = header.listIterator(start); int index = start; while (index < upTo) { if (!headerElementIterator.hasNext()) @@ -365,8 +367,8 @@ public class SlidingWindow { * @param upTo limit to search for * @return next position with no filtered data */ - private int findNextNonFilteredDataElement(int start, int upTo) { - Iterator headerElementIterator = windowHeader.listIterator(start); + private int findNextNonFilteredDataElement(LinkedList header, int start, int upTo) { + Iterator headerElementIterator = header.listIterator(start); int index = start; while (index < upTo) { if (!headerElementIterator.hasNext()) @@ -387,8 +389,8 @@ public class SlidingWindow { * @param upTo limit to search for * @return next position with non-empty element */ - private int findNextNonEmptyElement(int start, int upTo) { - ListIterator headerElementIterator = windowHeader.listIterator(start); + private int findNextNonEmptyElement(LinkedList header, int start, int upTo) { + ListIterator headerElementIterator = header.listIterator(start); int index = start; while (index < upTo) { if (!headerElementIterator.hasNext()) @@ -412,11 +414,11 @@ public class SlidingWindow { * @param start the first header index to add to consensus * @param end the first header index NOT TO add to consensus */ - private void addToFilteredData(int start, int end) { + private void addToFilteredData(LinkedList header, int start, int end) { if (filteredDataConsensus == null) - filteredDataConsensus = new SyntheticRead(header, readGroupAttribute, contig, contigIndex, filteredDataReadName + filteredDataConsensusCounter++, windowHeader.get(start).getLocation(), GATKSAMRecord.REDUCED_READ_CONSENSUS_TAG, hasIndelQualities); + filteredDataConsensus = new SyntheticRead(samHeader, readGroupAttribute, contig, contigIndex, filteredDataReadName + filteredDataConsensusCounter++, windowHeader.get(start).getLocation(), GATKSAMRecord.REDUCED_READ_CONSENSUS_TAG, hasIndelQualities); - ListIterator headerElementIterator = windowHeader.listIterator(start); + ListIterator headerElementIterator = header.listIterator(start); for (int index = start; index < end; index++) { if (!headerElementIterator.hasNext()) throw new ReviewedStingException("Requested to create a filtered data synthetic read from " + start + " to " + end + " but " + index + " does not exist"); @@ -441,11 +443,11 @@ public class SlidingWindow { * @param start the first header index to add to consensus * @param end the first header index NOT TO add to consensus */ - private void addToRunningConsensus(int start, int end) { + private void addToRunningConsensus(LinkedList header, int start, int end) { if (runningConsensus == null) - runningConsensus = new SyntheticRead(header, readGroupAttribute, contig, contigIndex, consensusReadName + consensusCounter++, windowHeader.get(start).getLocation(), GATKSAMRecord.REDUCED_READ_CONSENSUS_TAG, hasIndelQualities); + runningConsensus = new SyntheticRead(samHeader, readGroupAttribute, contig, contigIndex, consensusReadName + consensusCounter++, windowHeader.get(start).getLocation(), GATKSAMRecord.REDUCED_READ_CONSENSUS_TAG, hasIndelQualities); - Iterator headerElementIterator = windowHeader.listIterator(start); + Iterator headerElementIterator = header.listIterator(start); for (int index = start; index < end; index++) { if (!headerElementIterator.hasNext()) throw new ReviewedStingException("Requested to create a running consensus synthetic read from " + start + " to " + end + " but " + index + " does not exist"); @@ -508,19 +510,25 @@ public class SlidingWindow { // Try to compress the variant region if (canCompress) { - allReads = createPolyploidConsensus(start, stop, nHaplotypes, hetRefPosition); + allReads = createPolyploidConsensus(start, stop, nHaplotypes, ((HeaderElement) header[hetRefPosition]).getLocation()); } - // Return all reads that overlap the variant region and remove them read from the window header entirely + // Return all reads that overlap the variant region and remove them from the window header entirely + // also remove all reads preceding the variant region (since they will be output as consensus right after compression else { + LinkedList toRemove = new LinkedList(); for (GATKSAMRecord read : readsInWindow) { - if (read.getSoftStart() <= refStop && read.getAlignmentEnd() >= refStart) { - allReads.add(read); - removeFromHeader(windowHeader, read); + if (read.getSoftStart() <= refStop) { + if (read.getAlignmentEnd() >= refStart) { + allReads.add(read); + removeFromHeader(windowHeader, read); + } + toRemove.add(read); } } - for (GATKSAMRecord read : allReads) + for (GATKSAMRecord read : toRemove) { readsInWindow.remove(read); + } } return allReads; } @@ -598,7 +606,7 @@ public class SlidingWindow { List finalizedReads = new LinkedList(); if (!windowHeader.isEmpty()) { - boolean[] variantSite = markSites(stopLocation + 1); + boolean[] variantSite = markSites(getStopLocation(windowHeader) + 1); List> regions = getAllVariantRegions(0, windowHeader.size(), variantSite); finalizedReads = closeVariantRegions(regions, true); @@ -653,6 +661,7 @@ public class SlidingWindow { // we will create two (positive strand, negative strand) headers for each contig List> headersPosStrand = new ArrayList>(); List> headersNegStrand = new ArrayList>(); + List hetReads = new LinkedList(); Map haplotypeHeaderMap = new HashMap(nHaplotypes); int currentHaplotype = 0; int refStart = windowHeader.get(start).getLocation(); @@ -661,40 +670,51 @@ public class SlidingWindow { for (GATKSAMRecord read : readsInWindow) { int haplotype = -1; - // check if the read is inside the variant region - if ( read.getMappingQuality() > MIN_MAPPING_QUALITY && (read.getSoftStart() <= refStop && read.getSoftEnd() >= refStart)) { + // check if the read is either before or inside the variant region + if (read.getSoftStart() <= refStop) { + // check if the read is inside the variant region + if (read.getMappingQuality() > MIN_MAPPING_QUALITY && read.getSoftEnd() >= refStart) { + // check if the read contains the het site + if (read.getSoftStart() <= hetRefPosition && read.getSoftEnd() >= hetRefPosition) { + int readPos = ReadUtils.getReadCoordinateForReferenceCoordinate(read, hetRefPosition, ReadUtils.ClippingTail.LEFT_TAIL); + byte base = read.getReadBases()[readPos]; + byte qual = read.getBaseQualities(EventType.BASE_SUBSTITUTION)[readPos]; - // check if the read contains the het site - if (read.getSoftStart() <= hetRefPosition && read.getSoftEnd() >= hetRefPosition) { - int readPos = ReadUtils.getReadCoordinateForReferenceCoordinate(read, hetRefPosition, ReadUtils.ClippingTail.LEFT_TAIL); - byte base = read.getReadBases()[readPos]; - byte qual = read.getBaseQualities(EventType.BASE_SUBSTITUTION)[readPos]; - - // check if base passes the filters! - if (qual > MIN_BASE_QUAL_TO_COUNT) { - // check which haplotype this read represents and take the index of it from the list of headers - if (haplotypeHeaderMap.containsKey(base)) { - haplotype = haplotypeHeaderMap.get(base); - } - // create new lists if this haplotype has not been seen yet - else { - haplotype = ++currentHaplotype; - haplotypeHeaderMap.put(base, currentHaplotype); - headersPosStrand.add(new LinkedList()); - headersNegStrand.add(new LinkedList()); + // check if base passes the filters! + if (qual > MIN_BASE_QUAL_TO_COUNT) { + // check which haplotype this read represents and take the index of it from the list of headers + if (haplotypeHeaderMap.containsKey(base)) { + haplotype = haplotypeHeaderMap.get(base); + } + // create new lists if this haplotype has not been seen yet + else { + haplotype = currentHaplotype; + haplotypeHeaderMap.put(base, currentHaplotype); + headersPosStrand.add(new LinkedList()); + headersNegStrand.add(new LinkedList()); + currentHaplotype++; + } + LinkedList header = read.getReadNegativeStrandFlag() ? headersNegStrand.get(haplotype) : headersPosStrand.get(haplotype); + addToHeader(header, read); } } - LinkedList header = read.getReadNegativeStrandFlag() ? headersNegStrand.get(haplotype) : headersPosStrand.get(haplotype); - addToHeader(header, read); - toRemove.add(read); } + // we remove all reads before and inside the variant region from the window + toRemove.add(read); } } - List hetReads = new LinkedList(); - for (List header : headersPosStrand) { - hetReads.addAll(addToSyntheticReads(header, 0, header.size())); - hetReads.add(finalizeRunningConsensus()); + for (LinkedList header : headersPosStrand) { + if (header.size() > 0) + hetReads.addAll(addToSyntheticReads(header, 0, header.size())); + if (runningConsensus != null) + hetReads.add(finalizeRunningConsensus()); + } + for (LinkedList header : headersNegStrand) { + if (header.size() > 0) + hetReads.addAll(addToSyntheticReads(header, 0, header.size())); + if (runningConsensus != null) + hetReads.add(finalizeRunningConsensus()); } for (GATKSAMRecord read : toRemove) { @@ -733,6 +753,7 @@ public class SlidingWindow { int readBaseIndex = 0; int startLocation = getStartLocation(header); int locationIndex = startLocation < 0 ? 0 : readStart - startLocation; + int stopLocation = getStopLocation(header); if (removeRead && locationIndex < 0) throw new ReviewedStingException("read is behind the Sliding Window. read: " + read + " start " + read.getUnclippedStart() + "," + read.getUnclippedEnd() + " cigar: " + read.getCigarString() + " window: " + startLocation + "," + stopLocation); @@ -750,8 +771,6 @@ public class SlidingWindow { int elementsToAdd = (stopLocation < 0) ? readEnd - readStart + 1 : readEnd - stopLocation; while (elementsToAdd-- > 0) header.addLast(new HeaderElement(readEnd - elementsToAdd)); - - stopLocation = readEnd; // update stopLocation accordingly } // Special case for leading insertions before the beginning of the sliding read