Merge branch 'master' of ssh://nickel.broadinstitute.org/humgen/gsa-scr1/gsa-engineering/git/unstable
This commit is contained in:
commit
f75573dd54
|
|
@ -10,6 +10,7 @@ import org.broadinstitute.sting.gatk.iterators.LocusIteratorByState;
|
|||
import org.broadinstitute.sting.gatk.iterators.StingSAMIterator;
|
||||
import org.broadinstitute.sting.utils.GenomeLoc;
|
||||
import org.broadinstitute.sting.utils.GenomeLocParser;
|
||||
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
|
||||
|
||||
import java.util.Iterator;
|
||||
import java.util.List;
|
||||
|
|
@ -37,7 +38,7 @@ public class WindowMaker implements Iterable<WindowMaker.WindowMakerIterator>, I
|
|||
/**
|
||||
* The data source for reads. Will probably come directly from the BAM file.
|
||||
*/
|
||||
private final Iterator<AlignmentContext> sourceIterator;
|
||||
private final PeekableIterator<AlignmentContext> sourceIterator;
|
||||
|
||||
/**
|
||||
* Stores the sequence of intervals that the windowmaker should be tracking.
|
||||
|
|
@ -69,7 +70,7 @@ public class WindowMaker implements Iterable<WindowMaker.WindowMakerIterator>, I
|
|||
this.sourceInfo = shard.getReadProperties();
|
||||
this.readIterator = iterator;
|
||||
|
||||
this.sourceIterator = new LocusIteratorByState(iterator,sourceInfo,genomeLocParser,sampleData);
|
||||
this.sourceIterator = new PeekableIterator<AlignmentContext>(new LocusIteratorByState(iterator,sourceInfo,genomeLocParser,sampleData));
|
||||
this.intervalIterator = intervals.size()>0 ? new PeekableIterator<GenomeLoc>(intervals.iterator()) : null;
|
||||
}
|
||||
|
||||
|
|
@ -100,11 +101,6 @@ public class WindowMaker implements Iterable<WindowMaker.WindowMakerIterator>, I
|
|||
*/
|
||||
private final GenomeLoc locus;
|
||||
|
||||
/**
|
||||
* Signal not to advance the iterator because we're currently sitting at the next element.
|
||||
*/
|
||||
private boolean atNextElement = false;
|
||||
|
||||
public WindowMakerIterator(GenomeLoc locus) {
|
||||
this.locus = locus;
|
||||
advance();
|
||||
|
|
@ -124,58 +120,45 @@ public class WindowMaker implements Iterable<WindowMaker.WindowMakerIterator>, I
|
|||
|
||||
public boolean hasNext() {
|
||||
advance();
|
||||
return atNextElement;
|
||||
return currentAlignmentContext != null;
|
||||
}
|
||||
|
||||
public AlignmentContext next() {
|
||||
advance();
|
||||
if(!atNextElement) throw new NoSuchElementException("WindowMakerIterator is out of elements for this interval.");
|
||||
if(!hasNext()) throw new NoSuchElementException("WindowMakerIterator is out of elements for this interval.");
|
||||
|
||||
// Prepare object state for no next element.
|
||||
// Consume this alignment context.
|
||||
AlignmentContext toReturn = currentAlignmentContext;
|
||||
currentAlignmentContext = null;
|
||||
atNextElement = false;
|
||||
|
||||
// Return the current element.
|
||||
return toReturn;
|
||||
}
|
||||
|
||||
private void advance() {
|
||||
// No shard boundaries specified. If currentAlignmentContext has been consumed, grab the next one.
|
||||
if(locus == null) {
|
||||
if(!atNextElement && sourceIterator.hasNext()) {
|
||||
currentAlignmentContext = sourceIterator.next();
|
||||
atNextElement = true;
|
||||
}
|
||||
return;
|
||||
}
|
||||
|
||||
// Can't possibly find another element. Skip out early.
|
||||
if(currentAlignmentContext == null && !sourceIterator.hasNext())
|
||||
return;
|
||||
|
||||
// Need to find the next element that is not past shard boundaries. If we travel past the edge of
|
||||
// shard boundaries, stop and let the next interval pick it up.
|
||||
while(sourceIterator.hasNext()) {
|
||||
// Seed the current alignment context first time through the loop.
|
||||
if(currentAlignmentContext == null)
|
||||
currentAlignmentContext = sourceIterator.next();
|
||||
while(currentAlignmentContext == null && sourceIterator.hasNext()) {
|
||||
// Advance the iterator and try again.
|
||||
AlignmentContext candidateAlignmentContext = sourceIterator.peek();
|
||||
|
||||
// Found a match.
|
||||
if(locus.containsP(currentAlignmentContext.getLocation())) {
|
||||
atNextElement = true;
|
||||
if(locus == null) {
|
||||
// No filter present. Return everything that LocusIteratorByState provides us.
|
||||
currentAlignmentContext = sourceIterator.next();
|
||||
}
|
||||
else if(locus.isPast(candidateAlignmentContext.getLocation()))
|
||||
// Found a locus before the current window; claim this alignment context and throw it away.
|
||||
sourceIterator.next();
|
||||
else if(locus.containsP(candidateAlignmentContext.getLocation())) {
|
||||
// Found a locus within the current window; claim this alignment context and call it the next entry.
|
||||
currentAlignmentContext = sourceIterator.next();
|
||||
}
|
||||
else if(locus.isBefore(candidateAlignmentContext.getLocation())) {
|
||||
// Whoops. Skipped passed the end of the region. Iteration for this window is complete. Do
|
||||
// not claim this alignment context in case it is part of the next shard.
|
||||
break;
|
||||
}
|
||||
// Whoops. Skipped passed the end of the region. Iteration for this window is complete.
|
||||
if(locus.isBefore(currentAlignmentContext.getLocation()))
|
||||
break;
|
||||
|
||||
// No more elements to examine. Iteration is complete.
|
||||
if(!sourceIterator.hasNext())
|
||||
break;
|
||||
|
||||
// Advance the iterator and try again.
|
||||
currentAlignmentContext = sourceIterator.next();
|
||||
else
|
||||
throw new ReviewedStingException("BUG: filtering locus does not contain, is not before, and is not past the given alignment context");
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -68,7 +68,7 @@ public class ReadClipper {
|
|||
return result;
|
||||
}
|
||||
|
||||
private SAMRecord hardClipByReferenceCoordinates(int refStart, int refStop) {
|
||||
protected SAMRecord hardClipByReferenceCoordinates(int refStart, int refStop) {
|
||||
int start = (refStart < 0) ? 0 : ReadUtils.getReadCoordinateForReferenceCoordinate(read, refStart, ReadUtils.ClippingTail.RIGHT_TAIL);
|
||||
int stop = (refStop < 0) ? read.getReadLength() - 1 : ReadUtils.getReadCoordinateForReferenceCoordinate(read, refStop, ReadUtils.ClippingTail.LEFT_TAIL);
|
||||
|
||||
|
|
@ -94,6 +94,12 @@ public class ReadClipper {
|
|||
if (left == right)
|
||||
return new SAMRecord(read.getHeader());
|
||||
SAMRecord leftTailRead = hardClipByReferenceCoordinates(right, -1);
|
||||
|
||||
// after clipping one tail, it is possible that the consequent hard clipping of adjacent deletions
|
||||
// make the left cut index no longer part of the read. In that case, clip the read entirely.
|
||||
if (left > leftTailRead.getAlignmentEnd())
|
||||
return new SAMRecord(read.getHeader());
|
||||
|
||||
ReadClipper clipper = new ReadClipper(leftTailRead);
|
||||
return clipper.hardClipByReferenceCoordinatesLeftTail(left);
|
||||
}
|
||||
|
|
|
|||
|
|
@ -52,6 +52,7 @@ public class ReadUtils {
|
|||
// ----------------------------------------------------------------------------------------------------
|
||||
|
||||
public static final String REDUCED_READ_QUALITY_TAG = "RQ";
|
||||
public static final String REDUCED_READ_CONSENSUS_COUNTS_TAG = "CC";
|
||||
|
||||
public final static Integer getReducedReadQualityTagValue(final SAMRecord read) {
|
||||
return read.getIntegerAttribute(ReadUtils.REDUCED_READ_QUALITY_TAG);
|
||||
|
|
@ -965,4 +966,5 @@ public class ReadUtils {
|
|||
AlignmentStartComparator comp = new AlignmentStartComparator();
|
||||
return comp.compare(read1, read2);
|
||||
}
|
||||
|
||||
}
|
||||
|
|
|
|||
|
|
@ -118,6 +118,20 @@ public abstract class LocusViewTemplate extends BaseTest {
|
|||
testReadsInContext(view, shard.getGenomeLocs(), Collections.singletonList(read));
|
||||
}
|
||||
|
||||
@Test
|
||||
public void readAndLocusOverlapAtLastBase() {
|
||||
SAMRecord read = buildSAMRecord("chr1", 1, 5);
|
||||
SAMRecordIterator iterator = new SAMRecordIterator(read);
|
||||
|
||||
Shard shard = new MockLocusShard(genomeLocParser,Collections.singletonList(genomeLocParser.createGenomeLoc("chr1", 5, 5)));
|
||||
WindowMaker windowMaker = new WindowMaker(shard,genomeLocParser,iterator,shard.getGenomeLocs(),new SampleDataSource());
|
||||
WindowMaker.WindowMakerIterator window = windowMaker.next();
|
||||
LocusShardDataProvider dataProvider = new LocusShardDataProvider(shard, window.getSourceInfo(), genomeLocParser, window.getLocus(), window, null, null);
|
||||
LocusView view = createView(dataProvider);
|
||||
|
||||
testReadsInContext(view, shard.getGenomeLocs(), Collections.singletonList(read));
|
||||
}
|
||||
|
||||
@Test
|
||||
public void readOverlappingStartTest() {
|
||||
SAMRecord read = buildSAMRecord("chr1", 1, 10);
|
||||
|
|
|
|||
|
|
@ -0,0 +1,225 @@
|
|||
/*
|
||||
* Copyright (c) 2010 The Broad Institute
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person
|
||||
* obtaining a copy of this software and associated documentation
|
||||
* files (the "Software"), to deal in the Software without
|
||||
* restriction, including without limitation the rights to use,
|
||||
* copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
* copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following
|
||||
* conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be
|
||||
* included in all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
|
||||
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
||||
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
|
||||
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
|
||||
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR
|
||||
* THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
package org.broadinstitute.sting.utils.clipreads;
|
||||
|
||||
import net.sf.samtools.*;
|
||||
import org.broadinstitute.sting.BaseTest;
|
||||
import org.broadinstitute.sting.utils.sam.ArtificialSAMUtils;
|
||||
import org.testng.Assert;
|
||||
import org.testng.annotations.BeforeClass;
|
||||
import org.testng.annotations.Test;
|
||||
|
||||
import java.util.LinkedList;
|
||||
import java.util.List;
|
||||
|
||||
/**
|
||||
* Created by IntelliJ IDEA.
|
||||
* User: roger
|
||||
* Date: 9/28/11
|
||||
* Time: 9:54 PM
|
||||
* To change this template use File | Settings | File Templates.
|
||||
*/
|
||||
public class ReadClipperUnitTest extends BaseTest {
|
||||
|
||||
// TODO: Add error messages on failed tests
|
||||
|
||||
SAMRecord read, expected;
|
||||
ReadClipper readClipper;
|
||||
final static String BASES = "ACTG";
|
||||
final static String QUALS = "!+5?"; //ASCII values = 33,43,53,63
|
||||
|
||||
@BeforeClass
|
||||
public void init() {
|
||||
SAMFileHeader header = ArtificialSAMUtils.createArtificialSamHeader(1, 1, 1000);
|
||||
read = ArtificialSAMUtils.createArtificialRead(header, "read1", 0, 1, BASES.length());
|
||||
read.setReadUnmappedFlag(true);
|
||||
read.setReadBases(new String(BASES).getBytes());
|
||||
read.setBaseQualityString(new String(QUALS));
|
||||
|
||||
readClipper = new ReadClipper(read);
|
||||
}
|
||||
|
||||
@Test ( enabled = false )
|
||||
public void testHardClipBothEndsByReferenceCoordinates() {
|
||||
logger.warn("Executing testHardClipBothEndsByReferenceCoordinates");
|
||||
|
||||
//Clip whole read
|
||||
Assert.assertEquals(readClipper.hardClipBothEndsByReferenceCoordinates(0,0), new SAMRecord(read.getHeader()));
|
||||
//clip 1 base
|
||||
expected = readClipper.hardClipBothEndsByReferenceCoordinates(0,3);
|
||||
Assert.assertEquals(expected.getReadBases(), BASES.substring(1,3).getBytes());
|
||||
Assert.assertEquals(expected.getBaseQualityString(), QUALS.substring(1,3));
|
||||
Assert.assertEquals(expected.getCigarString(), "1H2M1H");
|
||||
|
||||
}
|
||||
|
||||
@Test ( enabled = false )
|
||||
public void testHardClipByReadCoordinates() {
|
||||
logger.warn("Executing testHardClipByReadCoordinates");
|
||||
|
||||
//Clip whole read
|
||||
Assert.assertEquals(readClipper.hardClipByReadCoordinates(0,3), new SAMRecord(read.getHeader()));
|
||||
|
||||
//clip 1 base at start
|
||||
expected = readClipper.hardClipByReadCoordinates(0,0);
|
||||
Assert.assertEquals(expected.getReadBases(), BASES.substring(1,4).getBytes());
|
||||
Assert.assertEquals(expected.getBaseQualityString(), QUALS.substring(1,4));
|
||||
Assert.assertEquals(expected.getCigarString(), "1H3M");
|
||||
|
||||
//clip 1 base at end
|
||||
expected = readClipper.hardClipByReadCoordinates(3,3);
|
||||
Assert.assertEquals(expected.getReadBases(), BASES.substring(0,3).getBytes());
|
||||
Assert.assertEquals(expected.getBaseQualityString(), QUALS.substring(0,3));
|
||||
Assert.assertEquals(expected.getCigarString(), "3M1H");
|
||||
|
||||
//clip 2 bases at start
|
||||
expected = readClipper.hardClipByReadCoordinates(0,1);
|
||||
Assert.assertEquals(expected.getReadBases(), BASES.substring(2,4).getBytes());
|
||||
Assert.assertEquals(expected.getBaseQualityString(), QUALS.substring(2,4));
|
||||
Assert.assertEquals(expected.getCigarString(), "2H2M");
|
||||
|
||||
//clip 2 bases at end
|
||||
expected = readClipper.hardClipByReadCoordinates(2,3);
|
||||
Assert.assertEquals(expected.getReadBases(), BASES.substring(0,2).getBytes());
|
||||
Assert.assertEquals(expected.getBaseQualityString(), QUALS.substring(0,2));
|
||||
Assert.assertEquals(expected.getCigarString(), "2M2H");
|
||||
|
||||
}
|
||||
|
||||
@Test ( enabled = false )
|
||||
public void testHardClipByReferenceCoordinates() {
|
||||
logger.warn("Executing testHardClipByReferenceCoordinates");
|
||||
|
||||
//Clip whole read
|
||||
Assert.assertEquals(readClipper.hardClipByReferenceCoordinates(1,4), new SAMRecord(read.getHeader()));
|
||||
|
||||
//clip 1 base at start
|
||||
expected = readClipper.hardClipByReferenceCoordinates(-1,1);
|
||||
Assert.assertEquals(expected.getReadBases(), BASES.substring(1,4).getBytes());
|
||||
Assert.assertEquals(expected.getBaseQualityString(), QUALS.substring(1,4));
|
||||
Assert.assertEquals(expected.getCigarString(), "1H3M");
|
||||
|
||||
//clip 1 base at end
|
||||
expected = readClipper.hardClipByReferenceCoordinates(3,-1);
|
||||
Assert.assertEquals(expected.getReadBases(), BASES.substring(0,3).getBytes());
|
||||
Assert.assertEquals(expected.getBaseQualityString(), QUALS.substring(0,3));
|
||||
Assert.assertEquals(expected.getCigarString(), "3M1H");
|
||||
|
||||
//clip 2 bases at start
|
||||
expected = readClipper.hardClipByReferenceCoordinates(-1,2);
|
||||
Assert.assertEquals(expected.getReadBases(), BASES.substring(2,4).getBytes());
|
||||
Assert.assertEquals(expected.getBaseQualityString(), QUALS.substring(2,4));
|
||||
Assert.assertEquals(expected.getCigarString(), "2H2M");
|
||||
|
||||
//clip 2 bases at end
|
||||
expected = readClipper.hardClipByReferenceCoordinates(2,-1);
|
||||
Assert.assertEquals(expected.getReadBases(), BASES.substring(0,2).getBytes());
|
||||
Assert.assertEquals(expected.getBaseQualityString(), QUALS.substring(0,2));
|
||||
Assert.assertEquals(expected.getCigarString(), "2M2H");
|
||||
|
||||
}
|
||||
|
||||
@Test ( enabled = false )
|
||||
public void testHardClipByReferenceCoordinatesLeftTail() {
|
||||
logger.warn("Executing testHardClipByReferenceCoordinatesLeftTail");
|
||||
|
||||
//Clip whole read
|
||||
Assert.assertEquals(readClipper.hardClipByReferenceCoordinatesLeftTail(4), new SAMRecord(read.getHeader()));
|
||||
|
||||
//clip 1 base at start
|
||||
expected = readClipper.hardClipByReferenceCoordinatesLeftTail(1);
|
||||
Assert.assertEquals(expected.getReadBases(), BASES.substring(1,4).getBytes());
|
||||
Assert.assertEquals(expected.getBaseQualityString(), QUALS.substring(1,4));
|
||||
Assert.assertEquals(expected.getCigarString(), "1H3M");
|
||||
|
||||
//clip 2 bases at start
|
||||
expected = readClipper.hardClipByReferenceCoordinatesLeftTail(2);
|
||||
Assert.assertEquals(expected.getReadBases(), BASES.substring(2,4).getBytes());
|
||||
Assert.assertEquals(expected.getBaseQualityString(), QUALS.substring(2,4));
|
||||
Assert.assertEquals(expected.getCigarString(), "2H2M");
|
||||
|
||||
}
|
||||
|
||||
@Test ( enabled = false )
|
||||
public void testHardClipByReferenceCoordinatesRightTail() {
|
||||
logger.warn("Executing testHardClipByReferenceCoordinatesRightTail");
|
||||
|
||||
//Clip whole read
|
||||
Assert.assertEquals(readClipper.hardClipByReferenceCoordinatesRightTail(1), new SAMRecord(read.getHeader()));
|
||||
|
||||
//clip 1 base at end
|
||||
expected = readClipper.hardClipByReferenceCoordinatesRightTail(3);
|
||||
Assert.assertEquals(expected.getReadBases(), BASES.substring(0,3).getBytes());
|
||||
Assert.assertEquals(expected.getBaseQualityString(), QUALS.substring(0,3));
|
||||
Assert.assertEquals(expected.getCigarString(), "3M1H");
|
||||
|
||||
//clip 2 bases at end
|
||||
expected = readClipper.hardClipByReferenceCoordinatesRightTail(2);
|
||||
Assert.assertEquals(expected.getReadBases(), BASES.substring(0,2).getBytes());
|
||||
Assert.assertEquals(expected.getBaseQualityString(), QUALS.substring(0,2));
|
||||
Assert.assertEquals(expected.getCigarString(), "2M2H");
|
||||
|
||||
}
|
||||
|
||||
@Test ( enabled = false )
|
||||
public void testHardClipLowQualEnds() {
|
||||
logger.warn("Executing testHardClipByReferenceCoordinates");
|
||||
|
||||
|
||||
//Clip whole read
|
||||
Assert.assertEquals(readClipper.hardClipLowQualEnds((byte)64), new SAMRecord(read.getHeader()));
|
||||
|
||||
//clip 1 base at start
|
||||
expected = readClipper.hardClipLowQualEnds((byte)34);
|
||||
Assert.assertEquals(expected.getReadBases(), BASES.substring(1,4).getBytes());
|
||||
Assert.assertEquals(expected.getBaseQualityString(), QUALS.substring(1,4));
|
||||
Assert.assertEquals(expected.getCigarString(), "1H3M");
|
||||
|
||||
//clip 2 bases at start
|
||||
expected = readClipper.hardClipLowQualEnds((byte)44);
|
||||
Assert.assertEquals(expected.getReadBases(), BASES.substring(2,4).getBytes());
|
||||
Assert.assertEquals(expected.getBaseQualityString(), QUALS.substring(2,4));
|
||||
Assert.assertEquals(expected.getCigarString(), "2H2M");
|
||||
|
||||
// Reverse Quals sequence
|
||||
readClipper.getRead().setBaseQualityString("?5+!"); // 63,53,43,33
|
||||
|
||||
//clip 1 base at end
|
||||
expected = readClipper.hardClipLowQualEnds((byte)34);
|
||||
Assert.assertEquals(expected.getReadBases(), BASES.substring(0,3).getBytes());
|
||||
Assert.assertEquals(expected.getBaseQualityString(), QUALS.substring(0,3));
|
||||
Assert.assertEquals(expected.getCigarString(), "3M1H");
|
||||
|
||||
//clip 2 bases at end
|
||||
expected = readClipper.hardClipLowQualEnds((byte)44);
|
||||
Assert.assertEquals(expected.getReadBases(), BASES.substring(0,2).getBytes());
|
||||
Assert.assertEquals(expected.getBaseQualityString(), QUALS.substring(0,2));
|
||||
Assert.assertEquals(expected.getCigarString(), "2M2H");
|
||||
|
||||
// revert Qual sequence
|
||||
readClipper.getRead().setBaseQualityString(QUALS);
|
||||
}
|
||||
}
|
||||
Loading…
Reference in New Issue