From 2c3dc291c0da00371d7ddb7c0de300c1e8f0961e Mon Sep 17 00:00:00 2001 From: Mauricio Carneiro Date: Wed, 15 Aug 2012 15:29:55 -0400 Subject: [PATCH] Added positive/negative strand to the synthetic reads --- .../reducereads/SlidingWindow.java | 33 ++++++++++--------- .../reducereads/SyntheticRead.java | 10 ++++-- .../reducereads/SyntheticReadUnitTest.java | 2 +- 3 files changed, 25 insertions(+), 20 deletions(-) diff --git a/protected/java/src/org/broadinstitute/sting/gatk/walkers/compression/reducereads/SlidingWindow.java b/protected/java/src/org/broadinstitute/sting/gatk/walkers/compression/reducereads/SlidingWindow.java index 50dd2e810..5820dc5f5 100644 --- a/protected/java/src/org/broadinstitute/sting/gatk/walkers/compression/reducereads/SlidingWindow.java +++ b/protected/java/src/org/broadinstitute/sting/gatk/walkers/compression/reducereads/SlidingWindow.java @@ -263,7 +263,7 @@ public class SlidingWindow { * @param end the first header index NOT TO add to consensus * @return a list of consensus reads generated by this call. Empty list if no consensus was generated. */ - protected List addToSyntheticReads(LinkedList header, int start, int end) { + protected List addToSyntheticReads(LinkedList header, int start, int end, boolean isNegativeStrand) { LinkedList reads = new LinkedList(); if (start < end) { ListIterator headerElementIterator = header.listIterator(start); @@ -277,22 +277,22 @@ public class SlidingWindow { reads.addAll(finalizeAndAdd(ConsensusType.FILTERED)); int endOfConsensus = findNextNonConsensusElement(header, start, end); - addToRunningConsensus(header, start, endOfConsensus); + addToRunningConsensus(header, start, endOfConsensus, isNegativeStrand); if (endOfConsensus <= start) throw new ReviewedStingException(String.format("next start is <= current start: (%d <= %d)", endOfConsensus, start)); - reads.addAll(addToSyntheticReads(header, endOfConsensus, end)); + reads.addAll(addToSyntheticReads(header, endOfConsensus, end, isNegativeStrand)); } else if (headerElement.hasFilteredData()) { reads.addAll(finalizeAndAdd(ConsensusType.CONSENSUS)); int endOfFilteredData = findNextNonFilteredDataElement(header, start, end); - addToFilteredData(header, start, endOfFilteredData); + addToFilteredData(header, start, endOfFilteredData, isNegativeStrand); if (endOfFilteredData <= start) throw new ReviewedStingException(String.format("next start is <= current start: (%d <= %d)", endOfFilteredData, start)); - reads.addAll(addToSyntheticReads(header, endOfFilteredData, end)); + reads.addAll(addToSyntheticReads(header, endOfFilteredData, end, isNegativeStrand)); } else if (headerElement.isEmpty()) { reads.addAll(finalizeAndAdd(ConsensusType.BOTH)); @@ -301,7 +301,7 @@ public class SlidingWindow { if (endOfEmptyData <= start) throw new ReviewedStingException(String.format("next start is <= current start: (%d <= %d)", endOfEmptyData, start)); - reads.addAll(addToSyntheticReads(header, endOfEmptyData, end)); + reads.addAll(addToSyntheticReads(header, endOfEmptyData, end, isNegativeStrand)); } else throw new ReviewedStingException(String.format("Header Element %d is neither Consensus, Data or Empty. Something is wrong.", start)); @@ -414,9 +414,9 @@ public class SlidingWindow { * @param start the first header index to add to consensus * @param end the first header index NOT TO add to consensus */ - private void addToFilteredData(LinkedList header, int start, int end) { + private void addToFilteredData(LinkedList header, int start, int end, boolean isNegativeStrand) { if (filteredDataConsensus == null) - filteredDataConsensus = new SyntheticRead(samHeader, readGroupAttribute, contig, contigIndex, filteredDataReadName + filteredDataConsensusCounter++, header.get(start).getLocation(), GATKSAMRecord.REDUCED_READ_CONSENSUS_TAG, hasIndelQualities); + filteredDataConsensus = new SyntheticRead(samHeader, readGroupAttribute, contig, contigIndex, filteredDataReadName + filteredDataConsensusCounter++, header.get(start).getLocation(), GATKSAMRecord.REDUCED_READ_CONSENSUS_TAG, hasIndelQualities, isNegativeStrand); ListIterator headerElementIterator = header.listIterator(start); for (int index = start; index < end; index++) { @@ -443,9 +443,9 @@ public class SlidingWindow { * @param start the first header index to add to consensus * @param end the first header index NOT TO add to consensus */ - private void addToRunningConsensus(LinkedList header, int start, int end) { + private void addToRunningConsensus(LinkedList header, int start, int end, boolean isNegativeStrand) { if (runningConsensus == null) - runningConsensus = new SyntheticRead(samHeader, readGroupAttribute, contig, contigIndex, consensusReadName + consensusCounter++, header.get(start).getLocation(), GATKSAMRecord.REDUCED_READ_CONSENSUS_TAG, hasIndelQualities); + runningConsensus = new SyntheticRead(samHeader, readGroupAttribute, contig, contigIndex, consensusReadName + consensusCounter++, header.get(start).getLocation(), GATKSAMRecord.REDUCED_READ_CONSENSUS_TAG, hasIndelQualities, isNegativeStrand); Iterator headerElementIterator = header.listIterator(start); for (int index = start; index < end; index++) { @@ -509,7 +509,8 @@ public class SlidingWindow { int refStop = windowHeader.get(stop).getLocation(); // Try to compress the variant region - if (canCompress) { + // the "foundEvent" protects us from trying to compress variant regions that are created by insertions + if (canCompress && foundEvent) { allReads = createPolyploidConsensus(start, stop, nHaplotypes, ((HeaderElement) header[hetRefPosition]).getLocation()); } @@ -545,7 +546,7 @@ public class SlidingWindow { List allReads = compressVariantRegion(start, stop); List result = (downsampleCoverage > 0) ? downsampleVariantRegion(allReads) : allReads; - result.addAll(addToSyntheticReads(windowHeader, 0, start)); + result.addAll(addToSyntheticReads(windowHeader, 0, start, false)); result.addAll(finalizeAndAdd(ConsensusType.BOTH)); return result; // finalized reads will be downsampled if necessary @@ -611,7 +612,7 @@ public class SlidingWindow { finalizedReads = closeVariantRegions(regions, true); if (!windowHeader.isEmpty()) { - finalizedReads.addAll(addToSyntheticReads(windowHeader, 0, windowHeader.size() - 1)); + finalizedReads.addAll(addToSyntheticReads(windowHeader, 0, windowHeader.size() - 1, false)); finalizedReads.addAll(finalizeAndAdd(ConsensusType.BOTH)); // if it ended in running consensus, finish it up } @@ -668,7 +669,7 @@ public class SlidingWindow { int refStop = windowHeader.get(stop).getLocation(); List toRemove = new LinkedList(); for (GATKSAMRecord read : readsInWindow) { - int haplotype = -1; + int haplotype; // check if the read is either before or inside the variant region if (read.getSoftStart() <= refStop) { @@ -706,13 +707,13 @@ public class SlidingWindow { for (LinkedList header : headersPosStrand) { if (header.size() > 0) - hetReads.addAll(addToSyntheticReads(header, 0, header.size())); + hetReads.addAll(addToSyntheticReads(header, 0, header.size(), false)); if (runningConsensus != null) hetReads.add(finalizeRunningConsensus()); } for (LinkedList header : headersNegStrand) { if (header.size() > 0) - hetReads.addAll(addToSyntheticReads(header, 0, header.size())); + hetReads.addAll(addToSyntheticReads(header, 0, header.size(), true)); if (runningConsensus != null) hetReads.add(finalizeRunningConsensus()); } diff --git a/protected/java/src/org/broadinstitute/sting/gatk/walkers/compression/reducereads/SyntheticRead.java b/protected/java/src/org/broadinstitute/sting/gatk/walkers/compression/reducereads/SyntheticRead.java index 6134101d9..ab65020c3 100644 --- a/protected/java/src/org/broadinstitute/sting/gatk/walkers/compression/reducereads/SyntheticRead.java +++ b/protected/java/src/org/broadinstitute/sting/gatk/walkers/compression/reducereads/SyntheticRead.java @@ -5,9 +5,9 @@ import net.sf.samtools.Cigar; import net.sf.samtools.CigarElement; import net.sf.samtools.CigarOperator; import net.sf.samtools.SAMFileHeader; -import org.broadinstitute.sting.utils.recalibration.EventType; import org.broadinstitute.sting.utils.MathUtils; import org.broadinstitute.sting.utils.exceptions.ReviewedStingException; +import org.broadinstitute.sting.utils.recalibration.EventType; import org.broadinstitute.sting.utils.sam.GATKSAMReadGroupRecord; import org.broadinstitute.sting.utils.sam.GATKSAMRecord; @@ -46,6 +46,7 @@ public class SyntheticRead { private String readName; private Integer refStart; private boolean hasIndelQualities = false; + private boolean isNegativeStrand = false; /** * Full initialization of the running consensus if you have all the information and are ready to @@ -59,7 +60,7 @@ public class SyntheticRead { * @param refStart the alignment start (reference based) * @param readTag the reduce reads tag for the synthetic read */ - public SyntheticRead(SAMFileHeader header, GATKSAMReadGroupRecord readGroupRecord, String contig, int contigIndex, String readName, Integer refStart, String readTag, boolean hasIndelQualities) { + public SyntheticRead(SAMFileHeader header, GATKSAMReadGroupRecord readGroupRecord, String contig, int contigIndex, String readName, Integer refStart, String readTag, boolean hasIndelQualities, boolean isNegativeRead) { final int initialCapacity = 10000; bases = new ArrayList(initialCapacity); counts = new ArrayList(initialCapacity); @@ -76,9 +77,10 @@ public class SyntheticRead { this.readName = readName; this.refStart = refStart; this.hasIndelQualities = hasIndelQualities; + this.isNegativeStrand = isNegativeRead; } - public SyntheticRead(List bases, List counts, List quals, List insertionQuals, List deletionQuals, double mappingQuality, String readTag, SAMFileHeader header, GATKSAMReadGroupRecord readGroupRecord, String contig, int contigIndex, String readName, Integer refStart, boolean hasIndelQualities) { + public SyntheticRead(List bases, List counts, List quals, List insertionQuals, List deletionQuals, double mappingQuality, String readTag, SAMFileHeader header, GATKSAMReadGroupRecord readGroupRecord, String contig, int contigIndex, String readName, Integer refStart, boolean hasIndelQualities, boolean isNegativeRead) { this.bases = bases; this.counts = counts; this.quals = quals; @@ -93,6 +95,7 @@ public class SyntheticRead { this.readName = readName; this.refStart = refStart; this.hasIndelQualities = hasIndelQualities; + this.isNegativeStrand = isNegativeRead; } /** @@ -133,6 +136,7 @@ public class SyntheticRead { read.setReferenceIndex(contigIndex); read.setReadPairedFlag(false); read.setReadUnmappedFlag(false); + read.setReadNegativeStrandFlag(isNegativeStrand); read.setCigar(buildCigar()); // the alignment start may change while building the cigar (leading deletions) read.setAlignmentStart(refStart); read.setReadName(readName); diff --git a/protected/java/test/org/broadinstitute/sting/gatk/walkers/compression/reducereads/SyntheticReadUnitTest.java b/protected/java/test/org/broadinstitute/sting/gatk/walkers/compression/reducereads/SyntheticReadUnitTest.java index e651c018c..738fe4a2e 100644 --- a/protected/java/test/org/broadinstitute/sting/gatk/walkers/compression/reducereads/SyntheticReadUnitTest.java +++ b/protected/java/test/org/broadinstitute/sting/gatk/walkers/compression/reducereads/SyntheticReadUnitTest.java @@ -35,7 +35,7 @@ public void testBaseCounts() { new TestRead(bases, quals, new Byte[] {1, 127, 51, 126}, new byte [] {1, 126, 50, 125})}; for (TestRead testRead : testReads) { - SyntheticRead syntheticRead = new SyntheticRead(Arrays.asList(testRead.getBases()), Arrays.asList(testRead.getCounts()), Arrays.asList(testRead.getQuals()), Arrays.asList(testRead.getInsQuals()), Arrays.asList(testRead.getDelQuals()), artificialMappingQuality, GATKSAMRecord.REDUCED_READ_CONSENSUS_TAG, artificialSAMHeader, artificialGATKRG, artificialContig, artificialContigIndex, artificialReadName, artificialRefStart, false); + SyntheticRead syntheticRead = new SyntheticRead(Arrays.asList(testRead.getBases()), Arrays.asList(testRead.getCounts()), Arrays.asList(testRead.getQuals()), Arrays.asList(testRead.getInsQuals()), Arrays.asList(testRead.getDelQuals()), artificialMappingQuality, GATKSAMRecord.REDUCED_READ_CONSENSUS_TAG, artificialSAMHeader, artificialGATKRG, artificialContig, artificialContigIndex, artificialReadName, artificialRefStart, false, false); Assert.assertEquals(syntheticRead.convertBaseCounts(), testRead.getExpectedCounts()); } }