The GATK -L unmapped is for GenomeLocs with SAMRecord.NO_ALIGNMENT_REFERENCE_NAME, not SAMRecord.getReadUnmappedFlag() Previously unmapped flag reads in the last bin were being printed while also seeking for the reads without a reference contig.
This commit is contained in:
parent
005cdcad5b
commit
9801dd114f
|
|
@ -28,6 +28,7 @@ import net.sf.samtools.SAMRecord;
|
|||
import net.sf.samtools.util.CloseableIterator;
|
||||
import org.broadinstitute.sting.utils.GenomeLoc;
|
||||
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
|
||||
import org.broadinstitute.sting.utils.sam.AlignmentUtils;
|
||||
|
||||
import java.util.List;
|
||||
import java.util.NoSuchElementException;
|
||||
|
|
@ -154,8 +155,8 @@ class IntervalOverlapFilteringIterator implements CloseableIterator<SAMRecord> {
|
|||
}
|
||||
}
|
||||
else {
|
||||
// Found an unmapped read. We're done.
|
||||
if(candidateRead.getReadUnmappedFlag()) {
|
||||
// Found a -L UNMAPPED read. NOTE: this is different than just being flagged as unmapped! We're done.
|
||||
if(AlignmentUtils.isReadGenomeLocUnmapped(candidateRead)) {
|
||||
nextRead = candidateRead;
|
||||
break;
|
||||
}
|
||||
|
|
|
|||
|
|
@ -525,6 +525,17 @@ public class AlignmentUtils {
|
|||
return alignment;
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns true if the read does not belong to a contig, i.e. it's location is GenomeLoc.UNMAPPED.
|
||||
* NOTE: A read can have a mapped GenomeLoc and still have an unmapped flag!
|
||||
*
|
||||
* @param r record
|
||||
* @return true if read is unmapped to a genome loc
|
||||
*/
|
||||
public static boolean isReadGenomeLocUnmapped(final SAMRecord r) {
|
||||
return SAMRecord.NO_ALIGNMENT_REFERENCE_NAME.equals(r.getReferenceName());
|
||||
}
|
||||
|
||||
/**
|
||||
* Due to (unfortunate) multiple ways to indicate that read is unmapped allowed by SAM format
|
||||
* specification, one may need this convenience shortcut. Checks both 'read unmapped' flag and
|
||||
|
|
|
|||
|
|
@ -334,6 +334,33 @@ public class ArtificialSAMUtils {
|
|||
return new ArtificialSAMQueryIterator(startingChr, endingChr, readCount, unmappedReadCount, header);
|
||||
}
|
||||
|
||||
/**
|
||||
* Create an iterator containing the specified reads
|
||||
*
|
||||
* @param reads the reads
|
||||
* @return iterator for the reads
|
||||
*/
|
||||
public static StingSAMIterator createReadIterator(SAMRecord... reads) {
|
||||
return createReadIterator(Arrays.asList(reads));
|
||||
}
|
||||
|
||||
/**
|
||||
* Create an iterator containing the specified reads
|
||||
*
|
||||
* @param reads the reads
|
||||
* @return iterator for the reads
|
||||
*/
|
||||
public static StingSAMIterator createReadIterator(List<SAMRecord> reads) {
|
||||
final Iterator<SAMRecord> iter = reads.iterator();
|
||||
return new StingSAMIterator() {
|
||||
@Override public void close() {}
|
||||
@Override public Iterator<SAMRecord> iterator() { return iter; }
|
||||
@Override public boolean hasNext() { return iter.hasNext(); }
|
||||
@Override public SAMRecord next() { return iter.next(); }
|
||||
@Override public void remove() { iter.remove(); }
|
||||
};
|
||||
}
|
||||
|
||||
private final static int ranIntInclusive(Random ran, int start, int stop) {
|
||||
final int range = stop - start;
|
||||
return ran.nextInt(range) + start;
|
||||
|
|
|
|||
|
|
@ -0,0 +1,149 @@
|
|||
/*
|
||||
* Copyright (c) 2012, The Broad Institute
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person
|
||||
* obtaining a copy of this software and associated documentation
|
||||
* files (the "Software"), to deal in the Software without
|
||||
* restriction, including without limitation the rights to use,
|
||||
* copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
* copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following
|
||||
* conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be
|
||||
* included in all copies or substantial portions of the Software.
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
|
||||
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
||||
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
|
||||
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
|
||||
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
|
||||
* OTHER DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
package org.broadinstitute.sting.gatk.datasources.reads;
|
||||
|
||||
import net.sf.samtools.SAMFileHeader;
|
||||
import net.sf.samtools.SAMRecord;
|
||||
import net.sf.samtools.SAMSequenceRecord;
|
||||
import org.broadinstitute.sting.utils.GenomeLoc;
|
||||
import org.broadinstitute.sting.utils.GenomeLocParser;
|
||||
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
|
||||
import org.broadinstitute.sting.utils.sam.ArtificialSAMUtils;
|
||||
import org.testng.Assert;
|
||||
import org.testng.annotations.BeforeClass;
|
||||
import org.testng.annotations.DataProvider;
|
||||
import org.testng.annotations.Test;
|
||||
|
||||
import java.util.ArrayList;
|
||||
import java.util.Arrays;
|
||||
import java.util.List;
|
||||
|
||||
public class IntervalOverlapFilteringIteratorUnitTest {
|
||||
|
||||
private SAMFileHeader header;
|
||||
private GenomeLoc firstContig;
|
||||
private GenomeLoc secondContig;
|
||||
|
||||
/** Basic aligned and mapped read. */
|
||||
private SAMRecord readMapped;
|
||||
|
||||
/** Read with no contig specified in the read, -L UNMAPPED */
|
||||
private SAMRecord readNoReference;
|
||||
|
||||
/** This read has a start position, but is flagged that it's not mapped. */
|
||||
private SAMRecord readUnmappedFlag;
|
||||
|
||||
/** This read is from the second contig. */
|
||||
private SAMRecord readSecondContig;
|
||||
|
||||
/** This read says it's aligned, but actually has an unknown start. */
|
||||
private SAMRecord readUnknownStart;
|
||||
|
||||
/** The above reads in the order one would expect to find them in a sorted BAM. */
|
||||
private List<SAMRecord> testReads;
|
||||
|
||||
@BeforeClass
|
||||
public void init() {
|
||||
header = ArtificialSAMUtils.createArtificialSamHeader(3, 1, ArtificialSAMUtils.DEFAULT_READ_LENGTH * 2);
|
||||
GenomeLocParser genomeLocParser = new GenomeLocParser(header.getSequenceDictionary());
|
||||
SAMSequenceRecord record;
|
||||
|
||||
record = header.getSequence(0);
|
||||
firstContig = genomeLocParser.createGenomeLoc(record.getSequenceName(), 1, record.getSequenceLength());
|
||||
record = header.getSequence(1);
|
||||
secondContig = genomeLocParser.createGenomeLoc(record.getSequenceName(), 1, record.getSequenceLength());
|
||||
|
||||
readMapped = createMappedRead("mapped", 1);
|
||||
|
||||
readUnmappedFlag = createMappedRead("unmappedFlagged", 2);
|
||||
readUnmappedFlag.setReadUnmappedFlag(true);
|
||||
|
||||
readSecondContig = createMappedRead("secondContig", 3);
|
||||
readSecondContig.setReferenceName(secondContig.getContig());
|
||||
|
||||
/* This read says it's aligned, but to a contig not in the header. */
|
||||
SAMRecord readUnknownContig = createMappedRead("unknownContig", 4);
|
||||
readUnknownContig.setReferenceName("unknownContig");
|
||||
|
||||
readUnknownStart = createMappedRead("unknownStart", 1);
|
||||
readUnknownStart.setAlignmentStart(SAMRecord.NO_ALIGNMENT_START);
|
||||
|
||||
readNoReference = createUnmappedRead("unmappedNoReference");
|
||||
|
||||
testReads = new ArrayList<SAMRecord>();
|
||||
testReads.add(readMapped);
|
||||
testReads.add(readUnmappedFlag);
|
||||
testReads.add(readUnknownStart);
|
||||
testReads.add(readSecondContig);
|
||||
testReads.add(readUnknownContig);
|
||||
testReads.add(readNoReference);
|
||||
}
|
||||
|
||||
@DataProvider(name = "filteringIteratorTestData")
|
||||
public Object[][] getFilteringIteratorTestData() {
|
||||
return new Object[][] {
|
||||
new Object[] {Arrays.asList(firstContig), Arrays.asList(readMapped, readUnmappedFlag, readUnknownStart)},
|
||||
new Object[] {Arrays.asList(GenomeLoc.UNMAPPED), Arrays.asList(readNoReference)},
|
||||
new Object[] {Arrays.asList(firstContig, secondContig), Arrays.asList(readMapped, readUnmappedFlag, readUnknownStart, readSecondContig)}
|
||||
};
|
||||
}
|
||||
|
||||
@Test(dataProvider = "filteringIteratorTestData")
|
||||
public void testFilteringIterator(List<GenomeLoc> locs, List<SAMRecord> expected) {
|
||||
IntervalOverlapFilteringIterator filterIter = new IntervalOverlapFilteringIterator(
|
||||
ArtificialSAMUtils.createReadIterator(testReads), locs);
|
||||
|
||||
List<SAMRecord> actual = new ArrayList<SAMRecord>();
|
||||
while (filterIter.hasNext()) {
|
||||
actual.add(filterIter.next());
|
||||
}
|
||||
Assert.assertEquals(actual, expected);
|
||||
}
|
||||
|
||||
@Test(expectedExceptions = ReviewedStingException.class)
|
||||
public void testMappedAndUnmapped() {
|
||||
new IntervalOverlapFilteringIterator(
|
||||
ArtificialSAMUtils.createReadIterator(testReads),
|
||||
Arrays.asList(firstContig, GenomeLoc.UNMAPPED));
|
||||
}
|
||||
|
||||
private SAMRecord createUnmappedRead(String name) {
|
||||
return ArtificialSAMUtils.createArtificialRead(
|
||||
header,
|
||||
name,
|
||||
SAMRecord.NO_ALIGNMENT_REFERENCE_INDEX,
|
||||
SAMRecord.NO_ALIGNMENT_START,
|
||||
ArtificialSAMUtils.DEFAULT_READ_LENGTH);
|
||||
}
|
||||
|
||||
private SAMRecord createMappedRead(String name, int start) {
|
||||
return ArtificialSAMUtils.createArtificialRead(
|
||||
header,
|
||||
name,
|
||||
0,
|
||||
start,
|
||||
ArtificialSAMUtils.DEFAULT_READ_LENGTH);
|
||||
}
|
||||
}
|
||||
|
|
@ -5,49 +5,52 @@ import org.testng.annotations.DataProvider;
|
|||
import org.testng.annotations.Test;
|
||||
|
||||
import java.util.Arrays;
|
||||
import java.util.HashMap;
|
||||
|
||||
public class PrintReadsIntegrationTest extends WalkerTest {
|
||||
private static class PRTest {
|
||||
final static String REF = hg18Reference;
|
||||
final static String BAM = validationDataLocation + "HiSeq.1mb.bam";
|
||||
String args;
|
||||
String md5;
|
||||
final String reference;
|
||||
final String bam;
|
||||
final String args;
|
||||
final String md5;
|
||||
|
||||
private PRTest(String args, String md5) {
|
||||
private PRTest(String reference, String bam, String args, String md5) {
|
||||
this.reference = reference;
|
||||
this.bam = bam;
|
||||
this.args = args;
|
||||
this.md5 = md5;
|
||||
}
|
||||
|
||||
@Override
|
||||
public String toString() {
|
||||
return String.format("PRTest(bam='%s', args='%s')", bam, args);
|
||||
}
|
||||
}
|
||||
|
||||
@DataProvider(name = "PRTest")
|
||||
public Object[][] createData1() {
|
||||
public Object[][] createPrintReadsTestData() {
|
||||
return new Object[][]{
|
||||
{new PRTest("", "dc8e5451dd29757c336013146010f73a")},
|
||||
{new PRTest(" -compress 0", "fde82269c78c9e91e57286433531b4af")},
|
||||
{new PRTest(" -simplifyBAM", "0531717b32a7e21c0de70b1526b0751f")},
|
||||
{new PRTest(" -n 10", "cdc4ddf9ee1d2ecf37168da8ef23c270")} };
|
||||
{new PRTest(hg18Reference, "HiSeq.1mb.bam", "", "dc8e5451dd29757c336013146010f73a")},
|
||||
{new PRTest(hg18Reference, "HiSeq.1mb.bam", " -compress 0", "fde82269c78c9e91e57286433531b4af")},
|
||||
{new PRTest(hg18Reference, "HiSeq.1mb.bam", " -simplifyBAM", "0531717b32a7e21c0de70b1526b0751f")},
|
||||
{new PRTest(hg18Reference, "HiSeq.1mb.bam", " -n 10", "cdc4ddf9ee1d2ecf37168da8ef23c270")},
|
||||
// See: GATKBAMIndex.getStartOfLastLinearBin(), BAMScheduler.advance(), IntervalOverlapFilteringIterator.advance()
|
||||
{new PRTest(b37KGReference, "unmappedFlagReadsInLastLinearBin.bam", "", "0a9ce949d07a84cb33a1a8e3358bf679")},
|
||||
{new PRTest(b37KGReference, "unmappedFlagReadsInLastLinearBin.bam", " -L 1", "6e920b8505e7e95d67634b0905237dbc")},
|
||||
{new PRTest(b37KGReference, "unmappedFlagReadsInLastLinearBin.bam", " -L unmapped", "13bb9a91b1d4dd2425f73302b8a1ac1c")},
|
||||
{new PRTest(b37KGReference, "unmappedFlagReadsInLastLinearBin.bam", " -L 1 -L unmapped", "6e920b8505e7e95d67634b0905237dbc")},
|
||||
{new PRTest(b37KGReference, "oneReadAllInsertion.bam", "", "6caec4f8a25befb6aba562955401af93")}
|
||||
};
|
||||
}
|
||||
|
||||
@Test(dataProvider = "PRTest")
|
||||
public void testPrintReads(PRTest params) {
|
||||
WalkerTestSpec spec = new WalkerTestSpec(
|
||||
"-T PrintReads -R " + params.REF +
|
||||
" -I " + params.BAM +
|
||||
"-T PrintReads" +
|
||||
" -R " + params.reference +
|
||||
" -I " + validationDataLocation + params.bam +
|
||||
params.args +
|
||||
" -o %s",
|
||||
Arrays.asList(params.md5));
|
||||
executeTest("testPrintReads-"+params.args, spec).getFirst();
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testPrintReadsReadAllInsertion() {
|
||||
WalkerTestSpec spec = new WalkerTestSpec(
|
||||
"-T PrintReads -R " + b37KGReference +
|
||||
" -I " + validationDataLocation + "oneReadAllInsertion.bam" +
|
||||
" -o %s",
|
||||
Arrays.asList("6caec4f8a25befb6aba562955401af93"));
|
||||
executeTest("testPrintReads-oneReadAllInsertion", spec);
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -0,0 +1,123 @@
|
|||
/*
|
||||
* Copyright (c) 2012, The Broad Institute
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person
|
||||
* obtaining a copy of this software and associated documentation
|
||||
* files (the "Software"), to deal in the Software without
|
||||
* restriction, including without limitation the rights to use,
|
||||
* copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
* copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following
|
||||
* conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be
|
||||
* included in all copies or substantial portions of the Software.
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
|
||||
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
||||
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
|
||||
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
|
||||
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
|
||||
* OTHER DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
package org.broadinstitute.sting.utils.sam;
|
||||
|
||||
import junit.framework.Assert;
|
||||
import net.sf.samtools.SAMFileHeader;
|
||||
import net.sf.samtools.SAMRecord;
|
||||
import org.testng.annotations.BeforeClass;
|
||||
import org.testng.annotations.DataProvider;
|
||||
import org.testng.annotations.Test;
|
||||
|
||||
public class AlignmentUtilsUnitTest {
|
||||
private SAMFileHeader header;
|
||||
|
||||
/** Basic aligned and mapped read. */
|
||||
private SAMRecord readMapped;
|
||||
|
||||
/** Read with no contig specified in the read, -L UNMAPPED */
|
||||
private SAMRecord readNoReference;
|
||||
|
||||
/** This read has a start position, but is flagged that it's not mapped. */
|
||||
private SAMRecord readUnmappedFlag;
|
||||
|
||||
/** This read says it's aligned, but to a contig not in the header. */
|
||||
private SAMRecord readUnknownContig;
|
||||
|
||||
/** This read says it's aligned, but actually has an unknown start. */
|
||||
private SAMRecord readUnknownStart;
|
||||
|
||||
@BeforeClass
|
||||
public void init() {
|
||||
header = ArtificialSAMUtils.createArtificialSamHeader(3, 1, ArtificialSAMUtils.DEFAULT_READ_LENGTH * 2);
|
||||
|
||||
readMapped = createMappedRead("mapped", 1);
|
||||
|
||||
readNoReference = createUnmappedRead("unmappedNoReference");
|
||||
|
||||
readUnmappedFlag = createMappedRead("unmappedFlagged", 2);
|
||||
readUnmappedFlag.setReadUnmappedFlag(true);
|
||||
|
||||
readUnknownContig = createMappedRead("unknownContig", 3);
|
||||
readUnknownContig.setReferenceName("unknownContig");
|
||||
|
||||
readUnknownStart = createMappedRead("unknownStart", 1);
|
||||
readUnknownStart.setAlignmentStart(SAMRecord.NO_ALIGNMENT_START);
|
||||
}
|
||||
|
||||
/**
|
||||
* Test for -L UNMAPPED
|
||||
*/
|
||||
@DataProvider(name = "genomeLocUnmappedReadTests")
|
||||
public Object[][] getGenomeLocUnmappedReadTests() {
|
||||
return new Object[][] {
|
||||
new Object[] {readNoReference, true},
|
||||
new Object[] {readMapped, false},
|
||||
new Object[] {readUnmappedFlag, false},
|
||||
new Object[] {readUnknownContig, false},
|
||||
new Object[] {readUnknownStart, false}
|
||||
};
|
||||
}
|
||||
@Test(dataProvider = "genomeLocUnmappedReadTests")
|
||||
public void testIsReadGenomeLocUnmapped(SAMRecord read, boolean expected) {
|
||||
Assert.assertEquals(AlignmentUtils.isReadGenomeLocUnmapped(read), expected);
|
||||
}
|
||||
|
||||
/**
|
||||
* Test for read being truly unmapped
|
||||
*/
|
||||
@DataProvider(name = "unmappedReadTests")
|
||||
public Object[][] getUnmappedReadTests() {
|
||||
return new Object[][] {
|
||||
new Object[] {readNoReference, true},
|
||||
new Object[] {readMapped, false},
|
||||
new Object[] {readUnmappedFlag, true},
|
||||
new Object[] {readUnknownContig, false},
|
||||
new Object[] {readUnknownStart, true}
|
||||
};
|
||||
}
|
||||
@Test(dataProvider = "unmappedReadTests")
|
||||
public void testIsReadUnmapped(SAMRecord read, boolean expected) {
|
||||
Assert.assertEquals(AlignmentUtils.isReadUnmapped(read), expected);
|
||||
}
|
||||
|
||||
private SAMRecord createUnmappedRead(String name) {
|
||||
return ArtificialSAMUtils.createArtificialRead(
|
||||
header,
|
||||
name,
|
||||
SAMRecord.NO_ALIGNMENT_REFERENCE_INDEX,
|
||||
SAMRecord.NO_ALIGNMENT_START,
|
||||
ArtificialSAMUtils.DEFAULT_READ_LENGTH);
|
||||
}
|
||||
|
||||
private SAMRecord createMappedRead(String name, int start) {
|
||||
return ArtificialSAMUtils.createArtificialRead(
|
||||
header,
|
||||
name,
|
||||
0,
|
||||
start,
|
||||
ArtificialSAMUtils.DEFAULT_READ_LENGTH);
|
||||
}
|
||||
}
|
||||
Loading…
Reference in New Issue