The GATK -L unmapped is for GenomeLocs with SAMRecord.NO_ALIGNMENT_REFERENCE_NAME, not SAMRecord.getReadUnmappedFlag()
Previously unmapped flag reads in the last bin were being printed while also seeking for the reads without a reference contig.
This commit is contained in:
Khalid Shakir 2012-04-27 09:58:38 -04:00
parent 005cdcad5b
commit 9801dd114f
6 changed files with 340 additions and 26 deletions

View File

@ -28,6 +28,7 @@ import net.sf.samtools.SAMRecord;
import net.sf.samtools.util.CloseableIterator;
import org.broadinstitute.sting.utils.GenomeLoc;
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
import org.broadinstitute.sting.utils.sam.AlignmentUtils;
import java.util.List;
import java.util.NoSuchElementException;
@ -154,8 +155,8 @@ class IntervalOverlapFilteringIterator implements CloseableIterator<SAMRecord> {
}
}
else {
// Found an unmapped read. We're done.
if(candidateRead.getReadUnmappedFlag()) {
// Found a -L UNMAPPED read. NOTE: this is different than just being flagged as unmapped! We're done.
if(AlignmentUtils.isReadGenomeLocUnmapped(candidateRead)) {
nextRead = candidateRead;
break;
}

View File

@ -525,6 +525,17 @@ public class AlignmentUtils {
return alignment;
}
/**
* Returns true if the read does not belong to a contig, i.e. it's location is GenomeLoc.UNMAPPED.
* NOTE: A read can have a mapped GenomeLoc and still have an unmapped flag!
*
* @param r record
* @return true if read is unmapped to a genome loc
*/
public static boolean isReadGenomeLocUnmapped(final SAMRecord r) {
return SAMRecord.NO_ALIGNMENT_REFERENCE_NAME.equals(r.getReferenceName());
}
/**
* Due to (unfortunate) multiple ways to indicate that read is unmapped allowed by SAM format
* specification, one may need this convenience shortcut. Checks both 'read unmapped' flag and

View File

@ -334,6 +334,33 @@ public class ArtificialSAMUtils {
return new ArtificialSAMQueryIterator(startingChr, endingChr, readCount, unmappedReadCount, header);
}
/**
* Create an iterator containing the specified reads
*
* @param reads the reads
* @return iterator for the reads
*/
public static StingSAMIterator createReadIterator(SAMRecord... reads) {
return createReadIterator(Arrays.asList(reads));
}
/**
* Create an iterator containing the specified reads
*
* @param reads the reads
* @return iterator for the reads
*/
public static StingSAMIterator createReadIterator(List<SAMRecord> reads) {
final Iterator<SAMRecord> iter = reads.iterator();
return new StingSAMIterator() {
@Override public void close() {}
@Override public Iterator<SAMRecord> iterator() { return iter; }
@Override public boolean hasNext() { return iter.hasNext(); }
@Override public SAMRecord next() { return iter.next(); }
@Override public void remove() { iter.remove(); }
};
}
private final static int ranIntInclusive(Random ran, int start, int stop) {
final int range = stop - start;
return ran.nextInt(range) + start;

View File

@ -0,0 +1,149 @@
/*
* Copyright (c) 2012, The Broad Institute
*
* Permission is hereby granted, free of charge, to any person
* obtaining a copy of this software and associated documentation
* files (the "Software"), to deal in the Software without
* restriction, including without limitation the rights to use,
* copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following
* conditions:
*
* The above copyright notice and this permission notice shall be
* included in all copies or substantial portions of the Software.
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
* OTHER DEALINGS IN THE SOFTWARE.
*/
package org.broadinstitute.sting.gatk.datasources.reads;
import net.sf.samtools.SAMFileHeader;
import net.sf.samtools.SAMRecord;
import net.sf.samtools.SAMSequenceRecord;
import org.broadinstitute.sting.utils.GenomeLoc;
import org.broadinstitute.sting.utils.GenomeLocParser;
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
import org.broadinstitute.sting.utils.sam.ArtificialSAMUtils;
import org.testng.Assert;
import org.testng.annotations.BeforeClass;
import org.testng.annotations.DataProvider;
import org.testng.annotations.Test;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.List;
public class IntervalOverlapFilteringIteratorUnitTest {
private SAMFileHeader header;
private GenomeLoc firstContig;
private GenomeLoc secondContig;
/** Basic aligned and mapped read. */
private SAMRecord readMapped;
/** Read with no contig specified in the read, -L UNMAPPED */
private SAMRecord readNoReference;
/** This read has a start position, but is flagged that it's not mapped. */
private SAMRecord readUnmappedFlag;
/** This read is from the second contig. */
private SAMRecord readSecondContig;
/** This read says it's aligned, but actually has an unknown start. */
private SAMRecord readUnknownStart;
/** The above reads in the order one would expect to find them in a sorted BAM. */
private List<SAMRecord> testReads;
@BeforeClass
public void init() {
header = ArtificialSAMUtils.createArtificialSamHeader(3, 1, ArtificialSAMUtils.DEFAULT_READ_LENGTH * 2);
GenomeLocParser genomeLocParser = new GenomeLocParser(header.getSequenceDictionary());
SAMSequenceRecord record;
record = header.getSequence(0);
firstContig = genomeLocParser.createGenomeLoc(record.getSequenceName(), 1, record.getSequenceLength());
record = header.getSequence(1);
secondContig = genomeLocParser.createGenomeLoc(record.getSequenceName(), 1, record.getSequenceLength());
readMapped = createMappedRead("mapped", 1);
readUnmappedFlag = createMappedRead("unmappedFlagged", 2);
readUnmappedFlag.setReadUnmappedFlag(true);
readSecondContig = createMappedRead("secondContig", 3);
readSecondContig.setReferenceName(secondContig.getContig());
/* This read says it's aligned, but to a contig not in the header. */
SAMRecord readUnknownContig = createMappedRead("unknownContig", 4);
readUnknownContig.setReferenceName("unknownContig");
readUnknownStart = createMappedRead("unknownStart", 1);
readUnknownStart.setAlignmentStart(SAMRecord.NO_ALIGNMENT_START);
readNoReference = createUnmappedRead("unmappedNoReference");
testReads = new ArrayList<SAMRecord>();
testReads.add(readMapped);
testReads.add(readUnmappedFlag);
testReads.add(readUnknownStart);
testReads.add(readSecondContig);
testReads.add(readUnknownContig);
testReads.add(readNoReference);
}
@DataProvider(name = "filteringIteratorTestData")
public Object[][] getFilteringIteratorTestData() {
return new Object[][] {
new Object[] {Arrays.asList(firstContig), Arrays.asList(readMapped, readUnmappedFlag, readUnknownStart)},
new Object[] {Arrays.asList(GenomeLoc.UNMAPPED), Arrays.asList(readNoReference)},
new Object[] {Arrays.asList(firstContig, secondContig), Arrays.asList(readMapped, readUnmappedFlag, readUnknownStart, readSecondContig)}
};
}
@Test(dataProvider = "filteringIteratorTestData")
public void testFilteringIterator(List<GenomeLoc> locs, List<SAMRecord> expected) {
IntervalOverlapFilteringIterator filterIter = new IntervalOverlapFilteringIterator(
ArtificialSAMUtils.createReadIterator(testReads), locs);
List<SAMRecord> actual = new ArrayList<SAMRecord>();
while (filterIter.hasNext()) {
actual.add(filterIter.next());
}
Assert.assertEquals(actual, expected);
}
@Test(expectedExceptions = ReviewedStingException.class)
public void testMappedAndUnmapped() {
new IntervalOverlapFilteringIterator(
ArtificialSAMUtils.createReadIterator(testReads),
Arrays.asList(firstContig, GenomeLoc.UNMAPPED));
}
private SAMRecord createUnmappedRead(String name) {
return ArtificialSAMUtils.createArtificialRead(
header,
name,
SAMRecord.NO_ALIGNMENT_REFERENCE_INDEX,
SAMRecord.NO_ALIGNMENT_START,
ArtificialSAMUtils.DEFAULT_READ_LENGTH);
}
private SAMRecord createMappedRead(String name, int start) {
return ArtificialSAMUtils.createArtificialRead(
header,
name,
0,
start,
ArtificialSAMUtils.DEFAULT_READ_LENGTH);
}
}

View File

@ -5,49 +5,52 @@ import org.testng.annotations.DataProvider;
import org.testng.annotations.Test;
import java.util.Arrays;
import java.util.HashMap;
public class PrintReadsIntegrationTest extends WalkerTest {
private static class PRTest {
final static String REF = hg18Reference;
final static String BAM = validationDataLocation + "HiSeq.1mb.bam";
String args;
String md5;
final String reference;
final String bam;
final String args;
final String md5;
private PRTest(String args, String md5) {
private PRTest(String reference, String bam, String args, String md5) {
this.reference = reference;
this.bam = bam;
this.args = args;
this.md5 = md5;
}
@Override
public String toString() {
return String.format("PRTest(bam='%s', args='%s')", bam, args);
}
}
@DataProvider(name = "PRTest")
public Object[][] createData1() {
public Object[][] createPrintReadsTestData() {
return new Object[][]{
{new PRTest("", "dc8e5451dd29757c336013146010f73a")},
{new PRTest(" -compress 0", "fde82269c78c9e91e57286433531b4af")},
{new PRTest(" -simplifyBAM", "0531717b32a7e21c0de70b1526b0751f")},
{new PRTest(" -n 10", "cdc4ddf9ee1d2ecf37168da8ef23c270")} };
{new PRTest(hg18Reference, "HiSeq.1mb.bam", "", "dc8e5451dd29757c336013146010f73a")},
{new PRTest(hg18Reference, "HiSeq.1mb.bam", " -compress 0", "fde82269c78c9e91e57286433531b4af")},
{new PRTest(hg18Reference, "HiSeq.1mb.bam", " -simplifyBAM", "0531717b32a7e21c0de70b1526b0751f")},
{new PRTest(hg18Reference, "HiSeq.1mb.bam", " -n 10", "cdc4ddf9ee1d2ecf37168da8ef23c270")},
// See: GATKBAMIndex.getStartOfLastLinearBin(), BAMScheduler.advance(), IntervalOverlapFilteringIterator.advance()
{new PRTest(b37KGReference, "unmappedFlagReadsInLastLinearBin.bam", "", "0a9ce949d07a84cb33a1a8e3358bf679")},
{new PRTest(b37KGReference, "unmappedFlagReadsInLastLinearBin.bam", " -L 1", "6e920b8505e7e95d67634b0905237dbc")},
{new PRTest(b37KGReference, "unmappedFlagReadsInLastLinearBin.bam", " -L unmapped", "13bb9a91b1d4dd2425f73302b8a1ac1c")},
{new PRTest(b37KGReference, "unmappedFlagReadsInLastLinearBin.bam", " -L 1 -L unmapped", "6e920b8505e7e95d67634b0905237dbc")},
{new PRTest(b37KGReference, "oneReadAllInsertion.bam", "", "6caec4f8a25befb6aba562955401af93")}
};
}
@Test(dataProvider = "PRTest")
public void testPrintReads(PRTest params) {
WalkerTestSpec spec = new WalkerTestSpec(
"-T PrintReads -R " + params.REF +
" -I " + params.BAM +
"-T PrintReads" +
" -R " + params.reference +
" -I " + validationDataLocation + params.bam +
params.args +
" -o %s",
Arrays.asList(params.md5));
executeTest("testPrintReads-"+params.args, spec).getFirst();
}
@Test
public void testPrintReadsReadAllInsertion() {
WalkerTestSpec spec = new WalkerTestSpec(
"-T PrintReads -R " + b37KGReference +
" -I " + validationDataLocation + "oneReadAllInsertion.bam" +
" -o %s",
Arrays.asList("6caec4f8a25befb6aba562955401af93"));
executeTest("testPrintReads-oneReadAllInsertion", spec);
}
}

View File

@ -0,0 +1,123 @@
/*
* Copyright (c) 2012, The Broad Institute
*
* Permission is hereby granted, free of charge, to any person
* obtaining a copy of this software and associated documentation
* files (the "Software"), to deal in the Software without
* restriction, including without limitation the rights to use,
* copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following
* conditions:
*
* The above copyright notice and this permission notice shall be
* included in all copies or substantial portions of the Software.
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
* OTHER DEALINGS IN THE SOFTWARE.
*/
package org.broadinstitute.sting.utils.sam;
import junit.framework.Assert;
import net.sf.samtools.SAMFileHeader;
import net.sf.samtools.SAMRecord;
import org.testng.annotations.BeforeClass;
import org.testng.annotations.DataProvider;
import org.testng.annotations.Test;
public class AlignmentUtilsUnitTest {
private SAMFileHeader header;
/** Basic aligned and mapped read. */
private SAMRecord readMapped;
/** Read with no contig specified in the read, -L UNMAPPED */
private SAMRecord readNoReference;
/** This read has a start position, but is flagged that it's not mapped. */
private SAMRecord readUnmappedFlag;
/** This read says it's aligned, but to a contig not in the header. */
private SAMRecord readUnknownContig;
/** This read says it's aligned, but actually has an unknown start. */
private SAMRecord readUnknownStart;
@BeforeClass
public void init() {
header = ArtificialSAMUtils.createArtificialSamHeader(3, 1, ArtificialSAMUtils.DEFAULT_READ_LENGTH * 2);
readMapped = createMappedRead("mapped", 1);
readNoReference = createUnmappedRead("unmappedNoReference");
readUnmappedFlag = createMappedRead("unmappedFlagged", 2);
readUnmappedFlag.setReadUnmappedFlag(true);
readUnknownContig = createMappedRead("unknownContig", 3);
readUnknownContig.setReferenceName("unknownContig");
readUnknownStart = createMappedRead("unknownStart", 1);
readUnknownStart.setAlignmentStart(SAMRecord.NO_ALIGNMENT_START);
}
/**
* Test for -L UNMAPPED
*/
@DataProvider(name = "genomeLocUnmappedReadTests")
public Object[][] getGenomeLocUnmappedReadTests() {
return new Object[][] {
new Object[] {readNoReference, true},
new Object[] {readMapped, false},
new Object[] {readUnmappedFlag, false},
new Object[] {readUnknownContig, false},
new Object[] {readUnknownStart, false}
};
}
@Test(dataProvider = "genomeLocUnmappedReadTests")
public void testIsReadGenomeLocUnmapped(SAMRecord read, boolean expected) {
Assert.assertEquals(AlignmentUtils.isReadGenomeLocUnmapped(read), expected);
}
/**
* Test for read being truly unmapped
*/
@DataProvider(name = "unmappedReadTests")
public Object[][] getUnmappedReadTests() {
return new Object[][] {
new Object[] {readNoReference, true},
new Object[] {readMapped, false},
new Object[] {readUnmappedFlag, true},
new Object[] {readUnknownContig, false},
new Object[] {readUnknownStart, true}
};
}
@Test(dataProvider = "unmappedReadTests")
public void testIsReadUnmapped(SAMRecord read, boolean expected) {
Assert.assertEquals(AlignmentUtils.isReadUnmapped(read), expected);
}
private SAMRecord createUnmappedRead(String name) {
return ArtificialSAMUtils.createArtificialRead(
header,
name,
SAMRecord.NO_ALIGNMENT_REFERENCE_INDEX,
SAMRecord.NO_ALIGNMENT_START,
ArtificialSAMUtils.DEFAULT_READ_LENGTH);
}
private SAMRecord createMappedRead(String name, int start) {
return ArtificialSAMUtils.createArtificialRead(
header,
name,
0,
start,
ArtificialSAMUtils.DEFAULT_READ_LENGTH);
}
}