Merge branch 'master' of ssh://nickel.broadinstitute.org/humgen/gsa-scr1/gsa-engineering/git/unstable

This commit is contained in:
Guillermo del Angel 2011-09-30 10:35:26 -04:00
commit f75573dd54
5 changed files with 273 additions and 43 deletions

View File

@ -10,6 +10,7 @@ import org.broadinstitute.sting.gatk.iterators.LocusIteratorByState;
import org.broadinstitute.sting.gatk.iterators.StingSAMIterator;
import org.broadinstitute.sting.utils.GenomeLoc;
import org.broadinstitute.sting.utils.GenomeLocParser;
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
import java.util.Iterator;
import java.util.List;
@ -37,7 +38,7 @@ public class WindowMaker implements Iterable<WindowMaker.WindowMakerIterator>, I
/**
* The data source for reads. Will probably come directly from the BAM file.
*/
private final Iterator<AlignmentContext> sourceIterator;
private final PeekableIterator<AlignmentContext> sourceIterator;
/**
* Stores the sequence of intervals that the windowmaker should be tracking.
@ -69,7 +70,7 @@ public class WindowMaker implements Iterable<WindowMaker.WindowMakerIterator>, I
this.sourceInfo = shard.getReadProperties();
this.readIterator = iterator;
this.sourceIterator = new LocusIteratorByState(iterator,sourceInfo,genomeLocParser,sampleData);
this.sourceIterator = new PeekableIterator<AlignmentContext>(new LocusIteratorByState(iterator,sourceInfo,genomeLocParser,sampleData));
this.intervalIterator = intervals.size()>0 ? new PeekableIterator<GenomeLoc>(intervals.iterator()) : null;
}
@ -100,11 +101,6 @@ public class WindowMaker implements Iterable<WindowMaker.WindowMakerIterator>, I
*/
private final GenomeLoc locus;
/**
* Signal not to advance the iterator because we're currently sitting at the next element.
*/
private boolean atNextElement = false;
public WindowMakerIterator(GenomeLoc locus) {
this.locus = locus;
advance();
@ -124,58 +120,45 @@ public class WindowMaker implements Iterable<WindowMaker.WindowMakerIterator>, I
public boolean hasNext() {
advance();
return atNextElement;
return currentAlignmentContext != null;
}
public AlignmentContext next() {
advance();
if(!atNextElement) throw new NoSuchElementException("WindowMakerIterator is out of elements for this interval.");
if(!hasNext()) throw new NoSuchElementException("WindowMakerIterator is out of elements for this interval.");
// Prepare object state for no next element.
// Consume this alignment context.
AlignmentContext toReturn = currentAlignmentContext;
currentAlignmentContext = null;
atNextElement = false;
// Return the current element.
return toReturn;
}
private void advance() {
// No shard boundaries specified. If currentAlignmentContext has been consumed, grab the next one.
if(locus == null) {
if(!atNextElement && sourceIterator.hasNext()) {
currentAlignmentContext = sourceIterator.next();
atNextElement = true;
}
return;
}
// Can't possibly find another element. Skip out early.
if(currentAlignmentContext == null && !sourceIterator.hasNext())
return;
// Need to find the next element that is not past shard boundaries. If we travel past the edge of
// shard boundaries, stop and let the next interval pick it up.
while(sourceIterator.hasNext()) {
// Seed the current alignment context first time through the loop.
if(currentAlignmentContext == null)
currentAlignmentContext = sourceIterator.next();
while(currentAlignmentContext == null && sourceIterator.hasNext()) {
// Advance the iterator and try again.
AlignmentContext candidateAlignmentContext = sourceIterator.peek();
// Found a match.
if(locus.containsP(currentAlignmentContext.getLocation())) {
atNextElement = true;
if(locus == null) {
// No filter present. Return everything that LocusIteratorByState provides us.
currentAlignmentContext = sourceIterator.next();
}
else if(locus.isPast(candidateAlignmentContext.getLocation()))
// Found a locus before the current window; claim this alignment context and throw it away.
sourceIterator.next();
else if(locus.containsP(candidateAlignmentContext.getLocation())) {
// Found a locus within the current window; claim this alignment context and call it the next entry.
currentAlignmentContext = sourceIterator.next();
}
else if(locus.isBefore(candidateAlignmentContext.getLocation())) {
// Whoops. Skipped passed the end of the region. Iteration for this window is complete. Do
// not claim this alignment context in case it is part of the next shard.
break;
}
// Whoops. Skipped passed the end of the region. Iteration for this window is complete.
if(locus.isBefore(currentAlignmentContext.getLocation()))
break;
// No more elements to examine. Iteration is complete.
if(!sourceIterator.hasNext())
break;
// Advance the iterator and try again.
currentAlignmentContext = sourceIterator.next();
else
throw new ReviewedStingException("BUG: filtering locus does not contain, is not before, and is not past the given alignment context");
}
}
}

View File

@ -68,7 +68,7 @@ public class ReadClipper {
return result;
}
private SAMRecord hardClipByReferenceCoordinates(int refStart, int refStop) {
protected SAMRecord hardClipByReferenceCoordinates(int refStart, int refStop) {
int start = (refStart < 0) ? 0 : ReadUtils.getReadCoordinateForReferenceCoordinate(read, refStart, ReadUtils.ClippingTail.RIGHT_TAIL);
int stop = (refStop < 0) ? read.getReadLength() - 1 : ReadUtils.getReadCoordinateForReferenceCoordinate(read, refStop, ReadUtils.ClippingTail.LEFT_TAIL);
@ -94,6 +94,12 @@ public class ReadClipper {
if (left == right)
return new SAMRecord(read.getHeader());
SAMRecord leftTailRead = hardClipByReferenceCoordinates(right, -1);
// after clipping one tail, it is possible that the consequent hard clipping of adjacent deletions
// make the left cut index no longer part of the read. In that case, clip the read entirely.
if (left > leftTailRead.getAlignmentEnd())
return new SAMRecord(read.getHeader());
ReadClipper clipper = new ReadClipper(leftTailRead);
return clipper.hardClipByReferenceCoordinatesLeftTail(left);
}

View File

@ -52,6 +52,7 @@ public class ReadUtils {
// ----------------------------------------------------------------------------------------------------
public static final String REDUCED_READ_QUALITY_TAG = "RQ";
public static final String REDUCED_READ_CONSENSUS_COUNTS_TAG = "CC";
public final static Integer getReducedReadQualityTagValue(final SAMRecord read) {
return read.getIntegerAttribute(ReadUtils.REDUCED_READ_QUALITY_TAG);
@ -965,4 +966,5 @@ public class ReadUtils {
AlignmentStartComparator comp = new AlignmentStartComparator();
return comp.compare(read1, read2);
}
}

View File

@ -118,6 +118,20 @@ public abstract class LocusViewTemplate extends BaseTest {
testReadsInContext(view, shard.getGenomeLocs(), Collections.singletonList(read));
}
@Test
public void readAndLocusOverlapAtLastBase() {
SAMRecord read = buildSAMRecord("chr1", 1, 5);
SAMRecordIterator iterator = new SAMRecordIterator(read);
Shard shard = new MockLocusShard(genomeLocParser,Collections.singletonList(genomeLocParser.createGenomeLoc("chr1", 5, 5)));
WindowMaker windowMaker = new WindowMaker(shard,genomeLocParser,iterator,shard.getGenomeLocs(),new SampleDataSource());
WindowMaker.WindowMakerIterator window = windowMaker.next();
LocusShardDataProvider dataProvider = new LocusShardDataProvider(shard, window.getSourceInfo(), genomeLocParser, window.getLocus(), window, null, null);
LocusView view = createView(dataProvider);
testReadsInContext(view, shard.getGenomeLocs(), Collections.singletonList(read));
}
@Test
public void readOverlappingStartTest() {
SAMRecord read = buildSAMRecord("chr1", 1, 10);

View File

@ -0,0 +1,225 @@
/*
* Copyright (c) 2010 The Broad Institute
*
* Permission is hereby granted, free of charge, to any person
* obtaining a copy of this software and associated documentation
* files (the "Software"), to deal in the Software without
* restriction, including without limitation the rights to use,
* copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following
* conditions:
*
* The above copyright notice and this permission notice shall be
* included in all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR
* THE USE OR OTHER DEALINGS IN THE SOFTWARE.
*/
package org.broadinstitute.sting.utils.clipreads;
import net.sf.samtools.*;
import org.broadinstitute.sting.BaseTest;
import org.broadinstitute.sting.utils.sam.ArtificialSAMUtils;
import org.testng.Assert;
import org.testng.annotations.BeforeClass;
import org.testng.annotations.Test;
import java.util.LinkedList;
import java.util.List;
/**
* Created by IntelliJ IDEA.
* User: roger
* Date: 9/28/11
* Time: 9:54 PM
* To change this template use File | Settings | File Templates.
*/
public class ReadClipperUnitTest extends BaseTest {
// TODO: Add error messages on failed tests
SAMRecord read, expected;
ReadClipper readClipper;
final static String BASES = "ACTG";
final static String QUALS = "!+5?"; //ASCII values = 33,43,53,63
@BeforeClass
public void init() {
SAMFileHeader header = ArtificialSAMUtils.createArtificialSamHeader(1, 1, 1000);
read = ArtificialSAMUtils.createArtificialRead(header, "read1", 0, 1, BASES.length());
read.setReadUnmappedFlag(true);
read.setReadBases(new String(BASES).getBytes());
read.setBaseQualityString(new String(QUALS));
readClipper = new ReadClipper(read);
}
@Test ( enabled = false )
public void testHardClipBothEndsByReferenceCoordinates() {
logger.warn("Executing testHardClipBothEndsByReferenceCoordinates");
//Clip whole read
Assert.assertEquals(readClipper.hardClipBothEndsByReferenceCoordinates(0,0), new SAMRecord(read.getHeader()));
//clip 1 base
expected = readClipper.hardClipBothEndsByReferenceCoordinates(0,3);
Assert.assertEquals(expected.getReadBases(), BASES.substring(1,3).getBytes());
Assert.assertEquals(expected.getBaseQualityString(), QUALS.substring(1,3));
Assert.assertEquals(expected.getCigarString(), "1H2M1H");
}
@Test ( enabled = false )
public void testHardClipByReadCoordinates() {
logger.warn("Executing testHardClipByReadCoordinates");
//Clip whole read
Assert.assertEquals(readClipper.hardClipByReadCoordinates(0,3), new SAMRecord(read.getHeader()));
//clip 1 base at start
expected = readClipper.hardClipByReadCoordinates(0,0);
Assert.assertEquals(expected.getReadBases(), BASES.substring(1,4).getBytes());
Assert.assertEquals(expected.getBaseQualityString(), QUALS.substring(1,4));
Assert.assertEquals(expected.getCigarString(), "1H3M");
//clip 1 base at end
expected = readClipper.hardClipByReadCoordinates(3,3);
Assert.assertEquals(expected.getReadBases(), BASES.substring(0,3).getBytes());
Assert.assertEquals(expected.getBaseQualityString(), QUALS.substring(0,3));
Assert.assertEquals(expected.getCigarString(), "3M1H");
//clip 2 bases at start
expected = readClipper.hardClipByReadCoordinates(0,1);
Assert.assertEquals(expected.getReadBases(), BASES.substring(2,4).getBytes());
Assert.assertEquals(expected.getBaseQualityString(), QUALS.substring(2,4));
Assert.assertEquals(expected.getCigarString(), "2H2M");
//clip 2 bases at end
expected = readClipper.hardClipByReadCoordinates(2,3);
Assert.assertEquals(expected.getReadBases(), BASES.substring(0,2).getBytes());
Assert.assertEquals(expected.getBaseQualityString(), QUALS.substring(0,2));
Assert.assertEquals(expected.getCigarString(), "2M2H");
}
@Test ( enabled = false )
public void testHardClipByReferenceCoordinates() {
logger.warn("Executing testHardClipByReferenceCoordinates");
//Clip whole read
Assert.assertEquals(readClipper.hardClipByReferenceCoordinates(1,4), new SAMRecord(read.getHeader()));
//clip 1 base at start
expected = readClipper.hardClipByReferenceCoordinates(-1,1);
Assert.assertEquals(expected.getReadBases(), BASES.substring(1,4).getBytes());
Assert.assertEquals(expected.getBaseQualityString(), QUALS.substring(1,4));
Assert.assertEquals(expected.getCigarString(), "1H3M");
//clip 1 base at end
expected = readClipper.hardClipByReferenceCoordinates(3,-1);
Assert.assertEquals(expected.getReadBases(), BASES.substring(0,3).getBytes());
Assert.assertEquals(expected.getBaseQualityString(), QUALS.substring(0,3));
Assert.assertEquals(expected.getCigarString(), "3M1H");
//clip 2 bases at start
expected = readClipper.hardClipByReferenceCoordinates(-1,2);
Assert.assertEquals(expected.getReadBases(), BASES.substring(2,4).getBytes());
Assert.assertEquals(expected.getBaseQualityString(), QUALS.substring(2,4));
Assert.assertEquals(expected.getCigarString(), "2H2M");
//clip 2 bases at end
expected = readClipper.hardClipByReferenceCoordinates(2,-1);
Assert.assertEquals(expected.getReadBases(), BASES.substring(0,2).getBytes());
Assert.assertEquals(expected.getBaseQualityString(), QUALS.substring(0,2));
Assert.assertEquals(expected.getCigarString(), "2M2H");
}
@Test ( enabled = false )
public void testHardClipByReferenceCoordinatesLeftTail() {
logger.warn("Executing testHardClipByReferenceCoordinatesLeftTail");
//Clip whole read
Assert.assertEquals(readClipper.hardClipByReferenceCoordinatesLeftTail(4), new SAMRecord(read.getHeader()));
//clip 1 base at start
expected = readClipper.hardClipByReferenceCoordinatesLeftTail(1);
Assert.assertEquals(expected.getReadBases(), BASES.substring(1,4).getBytes());
Assert.assertEquals(expected.getBaseQualityString(), QUALS.substring(1,4));
Assert.assertEquals(expected.getCigarString(), "1H3M");
//clip 2 bases at start
expected = readClipper.hardClipByReferenceCoordinatesLeftTail(2);
Assert.assertEquals(expected.getReadBases(), BASES.substring(2,4).getBytes());
Assert.assertEquals(expected.getBaseQualityString(), QUALS.substring(2,4));
Assert.assertEquals(expected.getCigarString(), "2H2M");
}
@Test ( enabled = false )
public void testHardClipByReferenceCoordinatesRightTail() {
logger.warn("Executing testHardClipByReferenceCoordinatesRightTail");
//Clip whole read
Assert.assertEquals(readClipper.hardClipByReferenceCoordinatesRightTail(1), new SAMRecord(read.getHeader()));
//clip 1 base at end
expected = readClipper.hardClipByReferenceCoordinatesRightTail(3);
Assert.assertEquals(expected.getReadBases(), BASES.substring(0,3).getBytes());
Assert.assertEquals(expected.getBaseQualityString(), QUALS.substring(0,3));
Assert.assertEquals(expected.getCigarString(), "3M1H");
//clip 2 bases at end
expected = readClipper.hardClipByReferenceCoordinatesRightTail(2);
Assert.assertEquals(expected.getReadBases(), BASES.substring(0,2).getBytes());
Assert.assertEquals(expected.getBaseQualityString(), QUALS.substring(0,2));
Assert.assertEquals(expected.getCigarString(), "2M2H");
}
@Test ( enabled = false )
public void testHardClipLowQualEnds() {
logger.warn("Executing testHardClipByReferenceCoordinates");
//Clip whole read
Assert.assertEquals(readClipper.hardClipLowQualEnds((byte)64), new SAMRecord(read.getHeader()));
//clip 1 base at start
expected = readClipper.hardClipLowQualEnds((byte)34);
Assert.assertEquals(expected.getReadBases(), BASES.substring(1,4).getBytes());
Assert.assertEquals(expected.getBaseQualityString(), QUALS.substring(1,4));
Assert.assertEquals(expected.getCigarString(), "1H3M");
//clip 2 bases at start
expected = readClipper.hardClipLowQualEnds((byte)44);
Assert.assertEquals(expected.getReadBases(), BASES.substring(2,4).getBytes());
Assert.assertEquals(expected.getBaseQualityString(), QUALS.substring(2,4));
Assert.assertEquals(expected.getCigarString(), "2H2M");
// Reverse Quals sequence
readClipper.getRead().setBaseQualityString("?5+!"); // 63,53,43,33
//clip 1 base at end
expected = readClipper.hardClipLowQualEnds((byte)34);
Assert.assertEquals(expected.getReadBases(), BASES.substring(0,3).getBytes());
Assert.assertEquals(expected.getBaseQualityString(), QUALS.substring(0,3));
Assert.assertEquals(expected.getCigarString(), "3M1H");
//clip 2 bases at end
expected = readClipper.hardClipLowQualEnds((byte)44);
Assert.assertEquals(expected.getReadBases(), BASES.substring(0,2).getBytes());
Assert.assertEquals(expected.getBaseQualityString(), QUALS.substring(0,2));
Assert.assertEquals(expected.getCigarString(), "2M2H");
// revert Qual sequence
readClipper.getRead().setBaseQualityString(QUALS);
}
}