diff --git a/public/java/src/org/broadinstitute/sting/gatk/executive/WindowMaker.java b/public/java/src/org/broadinstitute/sting/gatk/executive/WindowMaker.java index cfbce58ee..43ea46002 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/executive/WindowMaker.java +++ b/public/java/src/org/broadinstitute/sting/gatk/executive/WindowMaker.java @@ -10,6 +10,7 @@ import org.broadinstitute.sting.gatk.iterators.LocusIteratorByState; import org.broadinstitute.sting.gatk.iterators.StingSAMIterator; import org.broadinstitute.sting.utils.GenomeLoc; import org.broadinstitute.sting.utils.GenomeLocParser; +import org.broadinstitute.sting.utils.exceptions.ReviewedStingException; import java.util.Iterator; import java.util.List; @@ -37,7 +38,7 @@ public class WindowMaker implements Iterable, I /** * The data source for reads. Will probably come directly from the BAM file. */ - private final Iterator sourceIterator; + private final PeekableIterator sourceIterator; /** * Stores the sequence of intervals that the windowmaker should be tracking. @@ -69,7 +70,7 @@ public class WindowMaker implements Iterable, I this.sourceInfo = shard.getReadProperties(); this.readIterator = iterator; - this.sourceIterator = new LocusIteratorByState(iterator,sourceInfo,genomeLocParser,sampleData); + this.sourceIterator = new PeekableIterator(new LocusIteratorByState(iterator,sourceInfo,genomeLocParser,sampleData)); this.intervalIterator = intervals.size()>0 ? new PeekableIterator(intervals.iterator()) : null; } @@ -100,11 +101,6 @@ public class WindowMaker implements Iterable, I */ private final GenomeLoc locus; - /** - * Signal not to advance the iterator because we're currently sitting at the next element. - */ - private boolean atNextElement = false; - public WindowMakerIterator(GenomeLoc locus) { this.locus = locus; advance(); @@ -124,58 +120,45 @@ public class WindowMaker implements Iterable, I public boolean hasNext() { advance(); - return atNextElement; + return currentAlignmentContext != null; } public AlignmentContext next() { - advance(); - if(!atNextElement) throw new NoSuchElementException("WindowMakerIterator is out of elements for this interval."); + if(!hasNext()) throw new NoSuchElementException("WindowMakerIterator is out of elements for this interval."); - // Prepare object state for no next element. + // Consume this alignment context. AlignmentContext toReturn = currentAlignmentContext; currentAlignmentContext = null; - atNextElement = false; // Return the current element. return toReturn; } private void advance() { - // No shard boundaries specified. If currentAlignmentContext has been consumed, grab the next one. - if(locus == null) { - if(!atNextElement && sourceIterator.hasNext()) { - currentAlignmentContext = sourceIterator.next(); - atNextElement = true; - } - return; - } - - // Can't possibly find another element. Skip out early. - if(currentAlignmentContext == null && !sourceIterator.hasNext()) - return; - // Need to find the next element that is not past shard boundaries. If we travel past the edge of // shard boundaries, stop and let the next interval pick it up. - while(sourceIterator.hasNext()) { - // Seed the current alignment context first time through the loop. - if(currentAlignmentContext == null) - currentAlignmentContext = sourceIterator.next(); + while(currentAlignmentContext == null && sourceIterator.hasNext()) { + // Advance the iterator and try again. + AlignmentContext candidateAlignmentContext = sourceIterator.peek(); - // Found a match. - if(locus.containsP(currentAlignmentContext.getLocation())) { - atNextElement = true; + if(locus == null) { + // No filter present. Return everything that LocusIteratorByState provides us. + currentAlignmentContext = sourceIterator.next(); + } + else if(locus.isPast(candidateAlignmentContext.getLocation())) + // Found a locus before the current window; claim this alignment context and throw it away. + sourceIterator.next(); + else if(locus.containsP(candidateAlignmentContext.getLocation())) { + // Found a locus within the current window; claim this alignment context and call it the next entry. + currentAlignmentContext = sourceIterator.next(); + } + else if(locus.isBefore(candidateAlignmentContext.getLocation())) { + // Whoops. Skipped passed the end of the region. Iteration for this window is complete. Do + // not claim this alignment context in case it is part of the next shard. break; } - // Whoops. Skipped passed the end of the region. Iteration for this window is complete. - if(locus.isBefore(currentAlignmentContext.getLocation())) - break; - - // No more elements to examine. Iteration is complete. - if(!sourceIterator.hasNext()) - break; - - // Advance the iterator and try again. - currentAlignmentContext = sourceIterator.next(); + else + throw new ReviewedStingException("BUG: filtering locus does not contain, is not before, and is not past the given alignment context"); } } } diff --git a/public/java/src/org/broadinstitute/sting/utils/clipreads/ReadClipper.java b/public/java/src/org/broadinstitute/sting/utils/clipreads/ReadClipper.java index 275f476dc..11a59de10 100644 --- a/public/java/src/org/broadinstitute/sting/utils/clipreads/ReadClipper.java +++ b/public/java/src/org/broadinstitute/sting/utils/clipreads/ReadClipper.java @@ -68,7 +68,7 @@ public class ReadClipper { return result; } - private SAMRecord hardClipByReferenceCoordinates(int refStart, int refStop) { + protected SAMRecord hardClipByReferenceCoordinates(int refStart, int refStop) { int start = (refStart < 0) ? 0 : ReadUtils.getReadCoordinateForReferenceCoordinate(read, refStart, ReadUtils.ClippingTail.RIGHT_TAIL); int stop = (refStop < 0) ? read.getReadLength() - 1 : ReadUtils.getReadCoordinateForReferenceCoordinate(read, refStop, ReadUtils.ClippingTail.LEFT_TAIL); @@ -94,6 +94,12 @@ public class ReadClipper { if (left == right) return new SAMRecord(read.getHeader()); SAMRecord leftTailRead = hardClipByReferenceCoordinates(right, -1); + + // after clipping one tail, it is possible that the consequent hard clipping of adjacent deletions + // make the left cut index no longer part of the read. In that case, clip the read entirely. + if (left > leftTailRead.getAlignmentEnd()) + return new SAMRecord(read.getHeader()); + ReadClipper clipper = new ReadClipper(leftTailRead); return clipper.hardClipByReferenceCoordinatesLeftTail(left); } diff --git a/public/java/src/org/broadinstitute/sting/utils/sam/ReadUtils.java b/public/java/src/org/broadinstitute/sting/utils/sam/ReadUtils.java index e0a3a5a53..3c389fd4c 100755 --- a/public/java/src/org/broadinstitute/sting/utils/sam/ReadUtils.java +++ b/public/java/src/org/broadinstitute/sting/utils/sam/ReadUtils.java @@ -52,6 +52,7 @@ public class ReadUtils { // ---------------------------------------------------------------------------------------------------- public static final String REDUCED_READ_QUALITY_TAG = "RQ"; + public static final String REDUCED_READ_CONSENSUS_COUNTS_TAG = "CC"; public final static Integer getReducedReadQualityTagValue(final SAMRecord read) { return read.getIntegerAttribute(ReadUtils.REDUCED_READ_QUALITY_TAG); @@ -965,4 +966,5 @@ public class ReadUtils { AlignmentStartComparator comp = new AlignmentStartComparator(); return comp.compare(read1, read2); } + } diff --git a/public/java/test/org/broadinstitute/sting/gatk/datasources/providers/LocusViewTemplate.java b/public/java/test/org/broadinstitute/sting/gatk/datasources/providers/LocusViewTemplate.java index acfefd627..e5cf80826 100755 --- a/public/java/test/org/broadinstitute/sting/gatk/datasources/providers/LocusViewTemplate.java +++ b/public/java/test/org/broadinstitute/sting/gatk/datasources/providers/LocusViewTemplate.java @@ -118,6 +118,20 @@ public abstract class LocusViewTemplate extends BaseTest { testReadsInContext(view, shard.getGenomeLocs(), Collections.singletonList(read)); } + @Test + public void readAndLocusOverlapAtLastBase() { + SAMRecord read = buildSAMRecord("chr1", 1, 5); + SAMRecordIterator iterator = new SAMRecordIterator(read); + + Shard shard = new MockLocusShard(genomeLocParser,Collections.singletonList(genomeLocParser.createGenomeLoc("chr1", 5, 5))); + WindowMaker windowMaker = new WindowMaker(shard,genomeLocParser,iterator,shard.getGenomeLocs(),new SampleDataSource()); + WindowMaker.WindowMakerIterator window = windowMaker.next(); + LocusShardDataProvider dataProvider = new LocusShardDataProvider(shard, window.getSourceInfo(), genomeLocParser, window.getLocus(), window, null, null); + LocusView view = createView(dataProvider); + + testReadsInContext(view, shard.getGenomeLocs(), Collections.singletonList(read)); + } + @Test public void readOverlappingStartTest() { SAMRecord read = buildSAMRecord("chr1", 1, 10); diff --git a/public/java/test/org/broadinstitute/sting/utils/clipreads/ReadClipperUnitTest.java b/public/java/test/org/broadinstitute/sting/utils/clipreads/ReadClipperUnitTest.java new file mode 100644 index 000000000..d8695cf38 --- /dev/null +++ b/public/java/test/org/broadinstitute/sting/utils/clipreads/ReadClipperUnitTest.java @@ -0,0 +1,225 @@ +/* + * Copyright (c) 2010 The Broad Institute + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, + * copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following + * conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES + * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT + * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, + * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR + * THE USE OR OTHER DEALINGS IN THE SOFTWARE. + */ + +package org.broadinstitute.sting.utils.clipreads; + +import net.sf.samtools.*; +import org.broadinstitute.sting.BaseTest; +import org.broadinstitute.sting.utils.sam.ArtificialSAMUtils; +import org.testng.Assert; +import org.testng.annotations.BeforeClass; +import org.testng.annotations.Test; + +import java.util.LinkedList; +import java.util.List; + +/** + * Created by IntelliJ IDEA. + * User: roger + * Date: 9/28/11 + * Time: 9:54 PM + * To change this template use File | Settings | File Templates. + */ +public class ReadClipperUnitTest extends BaseTest { + + // TODO: Add error messages on failed tests + + SAMRecord read, expected; + ReadClipper readClipper; + final static String BASES = "ACTG"; + final static String QUALS = "!+5?"; //ASCII values = 33,43,53,63 + + @BeforeClass + public void init() { + SAMFileHeader header = ArtificialSAMUtils.createArtificialSamHeader(1, 1, 1000); + read = ArtificialSAMUtils.createArtificialRead(header, "read1", 0, 1, BASES.length()); + read.setReadUnmappedFlag(true); + read.setReadBases(new String(BASES).getBytes()); + read.setBaseQualityString(new String(QUALS)); + + readClipper = new ReadClipper(read); + } + + @Test ( enabled = false ) + public void testHardClipBothEndsByReferenceCoordinates() { + logger.warn("Executing testHardClipBothEndsByReferenceCoordinates"); + + //Clip whole read + Assert.assertEquals(readClipper.hardClipBothEndsByReferenceCoordinates(0,0), new SAMRecord(read.getHeader())); + //clip 1 base + expected = readClipper.hardClipBothEndsByReferenceCoordinates(0,3); + Assert.assertEquals(expected.getReadBases(), BASES.substring(1,3).getBytes()); + Assert.assertEquals(expected.getBaseQualityString(), QUALS.substring(1,3)); + Assert.assertEquals(expected.getCigarString(), "1H2M1H"); + + } + + @Test ( enabled = false ) + public void testHardClipByReadCoordinates() { + logger.warn("Executing testHardClipByReadCoordinates"); + + //Clip whole read + Assert.assertEquals(readClipper.hardClipByReadCoordinates(0,3), new SAMRecord(read.getHeader())); + + //clip 1 base at start + expected = readClipper.hardClipByReadCoordinates(0,0); + Assert.assertEquals(expected.getReadBases(), BASES.substring(1,4).getBytes()); + Assert.assertEquals(expected.getBaseQualityString(), QUALS.substring(1,4)); + Assert.assertEquals(expected.getCigarString(), "1H3M"); + + //clip 1 base at end + expected = readClipper.hardClipByReadCoordinates(3,3); + Assert.assertEquals(expected.getReadBases(), BASES.substring(0,3).getBytes()); + Assert.assertEquals(expected.getBaseQualityString(), QUALS.substring(0,3)); + Assert.assertEquals(expected.getCigarString(), "3M1H"); + + //clip 2 bases at start + expected = readClipper.hardClipByReadCoordinates(0,1); + Assert.assertEquals(expected.getReadBases(), BASES.substring(2,4).getBytes()); + Assert.assertEquals(expected.getBaseQualityString(), QUALS.substring(2,4)); + Assert.assertEquals(expected.getCigarString(), "2H2M"); + + //clip 2 bases at end + expected = readClipper.hardClipByReadCoordinates(2,3); + Assert.assertEquals(expected.getReadBases(), BASES.substring(0,2).getBytes()); + Assert.assertEquals(expected.getBaseQualityString(), QUALS.substring(0,2)); + Assert.assertEquals(expected.getCigarString(), "2M2H"); + + } + + @Test ( enabled = false ) + public void testHardClipByReferenceCoordinates() { + logger.warn("Executing testHardClipByReferenceCoordinates"); + + //Clip whole read + Assert.assertEquals(readClipper.hardClipByReferenceCoordinates(1,4), new SAMRecord(read.getHeader())); + + //clip 1 base at start + expected = readClipper.hardClipByReferenceCoordinates(-1,1); + Assert.assertEquals(expected.getReadBases(), BASES.substring(1,4).getBytes()); + Assert.assertEquals(expected.getBaseQualityString(), QUALS.substring(1,4)); + Assert.assertEquals(expected.getCigarString(), "1H3M"); + + //clip 1 base at end + expected = readClipper.hardClipByReferenceCoordinates(3,-1); + Assert.assertEquals(expected.getReadBases(), BASES.substring(0,3).getBytes()); + Assert.assertEquals(expected.getBaseQualityString(), QUALS.substring(0,3)); + Assert.assertEquals(expected.getCigarString(), "3M1H"); + + //clip 2 bases at start + expected = readClipper.hardClipByReferenceCoordinates(-1,2); + Assert.assertEquals(expected.getReadBases(), BASES.substring(2,4).getBytes()); + Assert.assertEquals(expected.getBaseQualityString(), QUALS.substring(2,4)); + Assert.assertEquals(expected.getCigarString(), "2H2M"); + + //clip 2 bases at end + expected = readClipper.hardClipByReferenceCoordinates(2,-1); + Assert.assertEquals(expected.getReadBases(), BASES.substring(0,2).getBytes()); + Assert.assertEquals(expected.getBaseQualityString(), QUALS.substring(0,2)); + Assert.assertEquals(expected.getCigarString(), "2M2H"); + + } + + @Test ( enabled = false ) + public void testHardClipByReferenceCoordinatesLeftTail() { + logger.warn("Executing testHardClipByReferenceCoordinatesLeftTail"); + + //Clip whole read + Assert.assertEquals(readClipper.hardClipByReferenceCoordinatesLeftTail(4), new SAMRecord(read.getHeader())); + + //clip 1 base at start + expected = readClipper.hardClipByReferenceCoordinatesLeftTail(1); + Assert.assertEquals(expected.getReadBases(), BASES.substring(1,4).getBytes()); + Assert.assertEquals(expected.getBaseQualityString(), QUALS.substring(1,4)); + Assert.assertEquals(expected.getCigarString(), "1H3M"); + + //clip 2 bases at start + expected = readClipper.hardClipByReferenceCoordinatesLeftTail(2); + Assert.assertEquals(expected.getReadBases(), BASES.substring(2,4).getBytes()); + Assert.assertEquals(expected.getBaseQualityString(), QUALS.substring(2,4)); + Assert.assertEquals(expected.getCigarString(), "2H2M"); + + } + + @Test ( enabled = false ) + public void testHardClipByReferenceCoordinatesRightTail() { + logger.warn("Executing testHardClipByReferenceCoordinatesRightTail"); + + //Clip whole read + Assert.assertEquals(readClipper.hardClipByReferenceCoordinatesRightTail(1), new SAMRecord(read.getHeader())); + + //clip 1 base at end + expected = readClipper.hardClipByReferenceCoordinatesRightTail(3); + Assert.assertEquals(expected.getReadBases(), BASES.substring(0,3).getBytes()); + Assert.assertEquals(expected.getBaseQualityString(), QUALS.substring(0,3)); + Assert.assertEquals(expected.getCigarString(), "3M1H"); + + //clip 2 bases at end + expected = readClipper.hardClipByReferenceCoordinatesRightTail(2); + Assert.assertEquals(expected.getReadBases(), BASES.substring(0,2).getBytes()); + Assert.assertEquals(expected.getBaseQualityString(), QUALS.substring(0,2)); + Assert.assertEquals(expected.getCigarString(), "2M2H"); + + } + + @Test ( enabled = false ) + public void testHardClipLowQualEnds() { + logger.warn("Executing testHardClipByReferenceCoordinates"); + + + //Clip whole read + Assert.assertEquals(readClipper.hardClipLowQualEnds((byte)64), new SAMRecord(read.getHeader())); + + //clip 1 base at start + expected = readClipper.hardClipLowQualEnds((byte)34); + Assert.assertEquals(expected.getReadBases(), BASES.substring(1,4).getBytes()); + Assert.assertEquals(expected.getBaseQualityString(), QUALS.substring(1,4)); + Assert.assertEquals(expected.getCigarString(), "1H3M"); + + //clip 2 bases at start + expected = readClipper.hardClipLowQualEnds((byte)44); + Assert.assertEquals(expected.getReadBases(), BASES.substring(2,4).getBytes()); + Assert.assertEquals(expected.getBaseQualityString(), QUALS.substring(2,4)); + Assert.assertEquals(expected.getCigarString(), "2H2M"); + + // Reverse Quals sequence + readClipper.getRead().setBaseQualityString("?5+!"); // 63,53,43,33 + + //clip 1 base at end + expected = readClipper.hardClipLowQualEnds((byte)34); + Assert.assertEquals(expected.getReadBases(), BASES.substring(0,3).getBytes()); + Assert.assertEquals(expected.getBaseQualityString(), QUALS.substring(0,3)); + Assert.assertEquals(expected.getCigarString(), "3M1H"); + + //clip 2 bases at end + expected = readClipper.hardClipLowQualEnds((byte)44); + Assert.assertEquals(expected.getReadBases(), BASES.substring(0,2).getBytes()); + Assert.assertEquals(expected.getBaseQualityString(), QUALS.substring(0,2)); + Assert.assertEquals(expected.getCigarString(), "2M2H"); + + // revert Qual sequence + readClipper.getRead().setBaseQualityString(QUALS); + } +}