Program runs, but the consensus reads are all out of place and need more tags
This commit is contained in:
parent
3494a52ddc
commit
97874b92d1
|
|
@ -27,10 +27,9 @@ public class SlidingWindow {
|
|||
final private LinkedList<GATKSAMRecord> readsInWindow;
|
||||
final private LinkedList<HeaderElement> windowHeader;
|
||||
protected int contextSize; // the largest context size (between mismatches and indels)
|
||||
protected int stopLocation;
|
||||
protected String contig;
|
||||
protected int contigIndex;
|
||||
protected SAMFileHeader header;
|
||||
protected SAMFileHeader samHeader;
|
||||
protected GATKSAMReadGroupRecord readGroupAttribute;
|
||||
protected int downsampleCoverage;
|
||||
|
||||
|
|
@ -66,7 +65,11 @@ public class SlidingWindow {
|
|||
}
|
||||
|
||||
public int getStopLocation() {
|
||||
return stopLocation;
|
||||
return getStopLocation(windowHeader);
|
||||
}
|
||||
|
||||
private int getStopLocation(LinkedList<HeaderElement> header) {
|
||||
return getStartLocation(header) + header.size() - 1;
|
||||
}
|
||||
|
||||
public String getContig() {
|
||||
|
|
@ -82,8 +85,7 @@ public class SlidingWindow {
|
|||
}
|
||||
|
||||
|
||||
public SlidingWindow(String contig, int contigIndex, int contextSize, SAMFileHeader header, GATKSAMReadGroupRecord readGroupAttribute, int windowNumber, final double minAltProportionToTriggerVariant, final double minIndelProportionToTriggerVariant, int minBaseQual, int minMappingQuality, int downsampleCoverage, final ReduceReads.DownsampleStrategy downsampleStrategy, boolean hasIndelQualities, int nContigs) {
|
||||
this.stopLocation = -1;
|
||||
public SlidingWindow(String contig, int contigIndex, int contextSize, SAMFileHeader samHeader, GATKSAMReadGroupRecord readGroupAttribute, int windowNumber, final double minAltProportionToTriggerVariant, final double minIndelProportionToTriggerVariant, int minBaseQual, int minMappingQuality, int downsampleCoverage, final ReduceReads.DownsampleStrategy downsampleStrategy, boolean hasIndelQualities, int nContigs) {
|
||||
this.contextSize = contextSize;
|
||||
this.downsampleCoverage = downsampleCoverage;
|
||||
|
||||
|
|
@ -97,7 +99,7 @@ public class SlidingWindow {
|
|||
|
||||
this.contig = contig;
|
||||
this.contigIndex = contigIndex;
|
||||
this.header = header;
|
||||
this.samHeader = samHeader;
|
||||
this.readGroupAttribute = readGroupAttribute;
|
||||
|
||||
this.consensusCounter = 0;
|
||||
|
|
@ -202,7 +204,7 @@ public class SlidingWindow {
|
|||
|
||||
List<GATKSAMRecord> readsToRemove = new LinkedList<GATKSAMRecord>();
|
||||
for (GATKSAMRecord read : readsInWindow) { // todo -- unnecessarily going through all reads in the window !! Optimize this (But remember reads are not sorted by alignment end!)
|
||||
if (read.getAlignmentEnd() < getStartLocation(windowHeader)) {
|
||||
if (read.getSoftEnd() < getStartLocation(windowHeader)) {
|
||||
readsToRemove.add(read);
|
||||
}
|
||||
}
|
||||
|
|
@ -261,7 +263,7 @@ public class SlidingWindow {
|
|||
* @param end the first header index NOT TO add to consensus
|
||||
* @return a list of consensus reads generated by this call. Empty list if no consensus was generated.
|
||||
*/
|
||||
protected List<GATKSAMRecord> addToSyntheticReads(List<HeaderElement> header, int start, int end) {
|
||||
protected List<GATKSAMRecord> addToSyntheticReads(LinkedList<HeaderElement> header, int start, int end) {
|
||||
LinkedList<GATKSAMRecord> reads = new LinkedList<GATKSAMRecord>();
|
||||
if (start < end) {
|
||||
ListIterator<HeaderElement> headerElementIterator = header.listIterator(start);
|
||||
|
|
@ -274,8 +276,8 @@ public class SlidingWindow {
|
|||
if (headerElement.hasConsensusData()) {
|
||||
reads.addAll(finalizeAndAdd(ConsensusType.FILTERED));
|
||||
|
||||
int endOfConsensus = findNextNonConsensusElement(start, end);
|
||||
addToRunningConsensus(start, endOfConsensus);
|
||||
int endOfConsensus = findNextNonConsensusElement(header, start, end);
|
||||
addToRunningConsensus(header, start, endOfConsensus);
|
||||
|
||||
if (endOfConsensus <= start)
|
||||
throw new ReviewedStingException(String.format("next start is <= current start: (%d <= %d)", endOfConsensus, start));
|
||||
|
|
@ -284,8 +286,8 @@ public class SlidingWindow {
|
|||
} else if (headerElement.hasFilteredData()) {
|
||||
reads.addAll(finalizeAndAdd(ConsensusType.CONSENSUS));
|
||||
|
||||
int endOfFilteredData = findNextNonFilteredDataElement(start, end);
|
||||
addToFilteredData(start, endOfFilteredData);
|
||||
int endOfFilteredData = findNextNonFilteredDataElement(header, start, end);
|
||||
addToFilteredData(header, start, endOfFilteredData);
|
||||
|
||||
if (endOfFilteredData <= start)
|
||||
throw new ReviewedStingException(String.format("next start is <= current start: (%d <= %d)", endOfFilteredData, start));
|
||||
|
|
@ -294,7 +296,7 @@ public class SlidingWindow {
|
|||
} else if (headerElement.isEmpty()) {
|
||||
reads.addAll(finalizeAndAdd(ConsensusType.BOTH));
|
||||
|
||||
int endOfEmptyData = findNextNonEmptyElement(start, end);
|
||||
int endOfEmptyData = findNextNonEmptyElement(header, start, end);
|
||||
|
||||
if (endOfEmptyData <= start)
|
||||
throw new ReviewedStingException(String.format("next start is <= current start: (%d <= %d)", endOfEmptyData, start));
|
||||
|
|
@ -343,8 +345,8 @@ public class SlidingWindow {
|
|||
* @param upTo limit to search for another consensus element
|
||||
* @return next position with consensus data or empty
|
||||
*/
|
||||
private int findNextNonConsensusElement(int start, int upTo) {
|
||||
Iterator<HeaderElement> headerElementIterator = windowHeader.listIterator(start);
|
||||
private int findNextNonConsensusElement(LinkedList<HeaderElement> header, int start, int upTo) {
|
||||
Iterator<HeaderElement> headerElementIterator = header.listIterator(start);
|
||||
int index = start;
|
||||
while (index < upTo) {
|
||||
if (!headerElementIterator.hasNext())
|
||||
|
|
@ -365,8 +367,8 @@ public class SlidingWindow {
|
|||
* @param upTo limit to search for
|
||||
* @return next position with no filtered data
|
||||
*/
|
||||
private int findNextNonFilteredDataElement(int start, int upTo) {
|
||||
Iterator<HeaderElement> headerElementIterator = windowHeader.listIterator(start);
|
||||
private int findNextNonFilteredDataElement(LinkedList<HeaderElement> header, int start, int upTo) {
|
||||
Iterator<HeaderElement> headerElementIterator = header.listIterator(start);
|
||||
int index = start;
|
||||
while (index < upTo) {
|
||||
if (!headerElementIterator.hasNext())
|
||||
|
|
@ -387,8 +389,8 @@ public class SlidingWindow {
|
|||
* @param upTo limit to search for
|
||||
* @return next position with non-empty element
|
||||
*/
|
||||
private int findNextNonEmptyElement(int start, int upTo) {
|
||||
ListIterator<HeaderElement> headerElementIterator = windowHeader.listIterator(start);
|
||||
private int findNextNonEmptyElement(LinkedList<HeaderElement> header, int start, int upTo) {
|
||||
ListIterator<HeaderElement> headerElementIterator = header.listIterator(start);
|
||||
int index = start;
|
||||
while (index < upTo) {
|
||||
if (!headerElementIterator.hasNext())
|
||||
|
|
@ -412,11 +414,11 @@ public class SlidingWindow {
|
|||
* @param start the first header index to add to consensus
|
||||
* @param end the first header index NOT TO add to consensus
|
||||
*/
|
||||
private void addToFilteredData(int start, int end) {
|
||||
private void addToFilteredData(LinkedList<HeaderElement> header, int start, int end) {
|
||||
if (filteredDataConsensus == null)
|
||||
filteredDataConsensus = new SyntheticRead(header, readGroupAttribute, contig, contigIndex, filteredDataReadName + filteredDataConsensusCounter++, windowHeader.get(start).getLocation(), GATKSAMRecord.REDUCED_READ_CONSENSUS_TAG, hasIndelQualities);
|
||||
filteredDataConsensus = new SyntheticRead(samHeader, readGroupAttribute, contig, contigIndex, filteredDataReadName + filteredDataConsensusCounter++, windowHeader.get(start).getLocation(), GATKSAMRecord.REDUCED_READ_CONSENSUS_TAG, hasIndelQualities);
|
||||
|
||||
ListIterator<HeaderElement> headerElementIterator = windowHeader.listIterator(start);
|
||||
ListIterator<HeaderElement> headerElementIterator = header.listIterator(start);
|
||||
for (int index = start; index < end; index++) {
|
||||
if (!headerElementIterator.hasNext())
|
||||
throw new ReviewedStingException("Requested to create a filtered data synthetic read from " + start + " to " + end + " but " + index + " does not exist");
|
||||
|
|
@ -441,11 +443,11 @@ public class SlidingWindow {
|
|||
* @param start the first header index to add to consensus
|
||||
* @param end the first header index NOT TO add to consensus
|
||||
*/
|
||||
private void addToRunningConsensus(int start, int end) {
|
||||
private void addToRunningConsensus(LinkedList<HeaderElement> header, int start, int end) {
|
||||
if (runningConsensus == null)
|
||||
runningConsensus = new SyntheticRead(header, readGroupAttribute, contig, contigIndex, consensusReadName + consensusCounter++, windowHeader.get(start).getLocation(), GATKSAMRecord.REDUCED_READ_CONSENSUS_TAG, hasIndelQualities);
|
||||
runningConsensus = new SyntheticRead(samHeader, readGroupAttribute, contig, contigIndex, consensusReadName + consensusCounter++, windowHeader.get(start).getLocation(), GATKSAMRecord.REDUCED_READ_CONSENSUS_TAG, hasIndelQualities);
|
||||
|
||||
Iterator<HeaderElement> headerElementIterator = windowHeader.listIterator(start);
|
||||
Iterator<HeaderElement> headerElementIterator = header.listIterator(start);
|
||||
for (int index = start; index < end; index++) {
|
||||
if (!headerElementIterator.hasNext())
|
||||
throw new ReviewedStingException("Requested to create a running consensus synthetic read from " + start + " to " + end + " but " + index + " does not exist");
|
||||
|
|
@ -508,19 +510,25 @@ public class SlidingWindow {
|
|||
|
||||
// Try to compress the variant region
|
||||
if (canCompress) {
|
||||
allReads = createPolyploidConsensus(start, stop, nHaplotypes, hetRefPosition);
|
||||
allReads = createPolyploidConsensus(start, stop, nHaplotypes, ((HeaderElement) header[hetRefPosition]).getLocation());
|
||||
}
|
||||
|
||||
// Return all reads that overlap the variant region and remove them read from the window header entirely
|
||||
// Return all reads that overlap the variant region and remove them from the window header entirely
|
||||
// also remove all reads preceding the variant region (since they will be output as consensus right after compression
|
||||
else {
|
||||
LinkedList<GATKSAMRecord> toRemove = new LinkedList<GATKSAMRecord>();
|
||||
for (GATKSAMRecord read : readsInWindow) {
|
||||
if (read.getSoftStart() <= refStop && read.getAlignmentEnd() >= refStart) {
|
||||
allReads.add(read);
|
||||
removeFromHeader(windowHeader, read);
|
||||
if (read.getSoftStart() <= refStop) {
|
||||
if (read.getAlignmentEnd() >= refStart) {
|
||||
allReads.add(read);
|
||||
removeFromHeader(windowHeader, read);
|
||||
}
|
||||
toRemove.add(read);
|
||||
}
|
||||
}
|
||||
for (GATKSAMRecord read : allReads)
|
||||
for (GATKSAMRecord read : toRemove) {
|
||||
readsInWindow.remove(read);
|
||||
}
|
||||
}
|
||||
return allReads;
|
||||
}
|
||||
|
|
@ -598,7 +606,7 @@ public class SlidingWindow {
|
|||
List<GATKSAMRecord> finalizedReads = new LinkedList<GATKSAMRecord>();
|
||||
|
||||
if (!windowHeader.isEmpty()) {
|
||||
boolean[] variantSite = markSites(stopLocation + 1);
|
||||
boolean[] variantSite = markSites(getStopLocation(windowHeader) + 1);
|
||||
List<Pair<Integer,Integer>> regions = getAllVariantRegions(0, windowHeader.size(), variantSite);
|
||||
finalizedReads = closeVariantRegions(regions, true);
|
||||
|
||||
|
|
@ -653,6 +661,7 @@ public class SlidingWindow {
|
|||
// we will create two (positive strand, negative strand) headers for each contig
|
||||
List<LinkedList<HeaderElement>> headersPosStrand = new ArrayList<LinkedList<HeaderElement>>();
|
||||
List<LinkedList<HeaderElement>> headersNegStrand = new ArrayList<LinkedList<HeaderElement>>();
|
||||
List<GATKSAMRecord> hetReads = new LinkedList<GATKSAMRecord>();
|
||||
Map<Byte, Integer> haplotypeHeaderMap = new HashMap<Byte, Integer>(nHaplotypes);
|
||||
int currentHaplotype = 0;
|
||||
int refStart = windowHeader.get(start).getLocation();
|
||||
|
|
@ -661,40 +670,51 @@ public class SlidingWindow {
|
|||
for (GATKSAMRecord read : readsInWindow) {
|
||||
int haplotype = -1;
|
||||
|
||||
// check if the read is inside the variant region
|
||||
if ( read.getMappingQuality() > MIN_MAPPING_QUALITY && (read.getSoftStart() <= refStop && read.getSoftEnd() >= refStart)) {
|
||||
// check if the read is either before or inside the variant region
|
||||
if (read.getSoftStart() <= refStop) {
|
||||
// check if the read is inside the variant region
|
||||
if (read.getMappingQuality() > MIN_MAPPING_QUALITY && read.getSoftEnd() >= refStart) {
|
||||
// check if the read contains the het site
|
||||
if (read.getSoftStart() <= hetRefPosition && read.getSoftEnd() >= hetRefPosition) {
|
||||
int readPos = ReadUtils.getReadCoordinateForReferenceCoordinate(read, hetRefPosition, ReadUtils.ClippingTail.LEFT_TAIL);
|
||||
byte base = read.getReadBases()[readPos];
|
||||
byte qual = read.getBaseQualities(EventType.BASE_SUBSTITUTION)[readPos];
|
||||
|
||||
// check if the read contains the het site
|
||||
if (read.getSoftStart() <= hetRefPosition && read.getSoftEnd() >= hetRefPosition) {
|
||||
int readPos = ReadUtils.getReadCoordinateForReferenceCoordinate(read, hetRefPosition, ReadUtils.ClippingTail.LEFT_TAIL);
|
||||
byte base = read.getReadBases()[readPos];
|
||||
byte qual = read.getBaseQualities(EventType.BASE_SUBSTITUTION)[readPos];
|
||||
|
||||
// check if base passes the filters!
|
||||
if (qual > MIN_BASE_QUAL_TO_COUNT) {
|
||||
// check which haplotype this read represents and take the index of it from the list of headers
|
||||
if (haplotypeHeaderMap.containsKey(base)) {
|
||||
haplotype = haplotypeHeaderMap.get(base);
|
||||
}
|
||||
// create new lists if this haplotype has not been seen yet
|
||||
else {
|
||||
haplotype = ++currentHaplotype;
|
||||
haplotypeHeaderMap.put(base, currentHaplotype);
|
||||
headersPosStrand.add(new LinkedList<HeaderElement>());
|
||||
headersNegStrand.add(new LinkedList<HeaderElement>());
|
||||
// check if base passes the filters!
|
||||
if (qual > MIN_BASE_QUAL_TO_COUNT) {
|
||||
// check which haplotype this read represents and take the index of it from the list of headers
|
||||
if (haplotypeHeaderMap.containsKey(base)) {
|
||||
haplotype = haplotypeHeaderMap.get(base);
|
||||
}
|
||||
// create new lists if this haplotype has not been seen yet
|
||||
else {
|
||||
haplotype = currentHaplotype;
|
||||
haplotypeHeaderMap.put(base, currentHaplotype);
|
||||
headersPosStrand.add(new LinkedList<HeaderElement>());
|
||||
headersNegStrand.add(new LinkedList<HeaderElement>());
|
||||
currentHaplotype++;
|
||||
}
|
||||
LinkedList<HeaderElement> header = read.getReadNegativeStrandFlag() ? headersNegStrand.get(haplotype) : headersPosStrand.get(haplotype);
|
||||
addToHeader(header, read);
|
||||
}
|
||||
}
|
||||
LinkedList<HeaderElement> header = read.getReadNegativeStrandFlag() ? headersNegStrand.get(haplotype) : headersPosStrand.get(haplotype);
|
||||
addToHeader(header, read);
|
||||
toRemove.add(read);
|
||||
}
|
||||
// we remove all reads before and inside the variant region from the window
|
||||
toRemove.add(read);
|
||||
}
|
||||
}
|
||||
|
||||
List<GATKSAMRecord> hetReads = new LinkedList<GATKSAMRecord>();
|
||||
for (List<HeaderElement> header : headersPosStrand) {
|
||||
hetReads.addAll(addToSyntheticReads(header, 0, header.size()));
|
||||
hetReads.add(finalizeRunningConsensus());
|
||||
for (LinkedList<HeaderElement> header : headersPosStrand) {
|
||||
if (header.size() > 0)
|
||||
hetReads.addAll(addToSyntheticReads(header, 0, header.size()));
|
||||
if (runningConsensus != null)
|
||||
hetReads.add(finalizeRunningConsensus());
|
||||
}
|
||||
for (LinkedList<HeaderElement> header : headersNegStrand) {
|
||||
if (header.size() > 0)
|
||||
hetReads.addAll(addToSyntheticReads(header, 0, header.size()));
|
||||
if (runningConsensus != null)
|
||||
hetReads.add(finalizeRunningConsensus());
|
||||
}
|
||||
|
||||
for (GATKSAMRecord read : toRemove) {
|
||||
|
|
@ -733,6 +753,7 @@ public class SlidingWindow {
|
|||
int readBaseIndex = 0;
|
||||
int startLocation = getStartLocation(header);
|
||||
int locationIndex = startLocation < 0 ? 0 : readStart - startLocation;
|
||||
int stopLocation = getStopLocation(header);
|
||||
|
||||
if (removeRead && locationIndex < 0)
|
||||
throw new ReviewedStingException("read is behind the Sliding Window. read: " + read + " start " + read.getUnclippedStart() + "," + read.getUnclippedEnd() + " cigar: " + read.getCigarString() + " window: " + startLocation + "," + stopLocation);
|
||||
|
|
@ -750,8 +771,6 @@ public class SlidingWindow {
|
|||
int elementsToAdd = (stopLocation < 0) ? readEnd - readStart + 1 : readEnd - stopLocation;
|
||||
while (elementsToAdd-- > 0)
|
||||
header.addLast(new HeaderElement(readEnd - elementsToAdd));
|
||||
|
||||
stopLocation = readEnd; // update stopLocation accordingly
|
||||
}
|
||||
|
||||
// Special case for leading insertions before the beginning of the sliding read
|
||||
|
|
|
|||
Loading…
Reference in New Issue