More descriptive error when VerifyingSamIterator hits an inconsistent alignment. Also updated

case UserException.MalformedBAM to match case of UserExceptio.MissortedBAM for consistency and
ease-of-use.


git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@5364 348d0f76-0448-11de-a6fe-93d51630548a
This commit is contained in:
hanna 2011-03-03 03:55:24 +00:00
parent a0309e7fb0
commit 7a22f19366
14 changed files with 175 additions and 37 deletions

View File

@ -29,7 +29,7 @@ public class PlatformUnitFilter implements SamRecordFilter {
if ( pu_attr == null ) {
// no platform unit in the record, go get from read group
SAMReadGroupRecord rgr = samRecord.getReadGroup();
if ( rgr == null ) throw new UserException.MalformedBam(samRecord, "Read " + samRecord.getReadName() +" has NO associated read group record");
if ( rgr == null ) throw new UserException.MalformedBAM(samRecord, "Read " + samRecord.getReadName() +" has NO associated read group record");
pu_attr = rgr.getAttribute("PU") ;
}
if ( pu_attr == null ) return false; // could not get PU, forget about the filtering...

View File

@ -210,7 +210,7 @@ public class LocusIteratorByState extends LocusIterator {
if ( generateExtendedEvents ) {
// we see insertions only once, when we step right onto them; the position on the read is scrolled
// past the insertion right after that
if ( eventDelayedFlag > 1 ) throw new UserException.MalformedBam(read, "Adjacent I/D events in read "+read.getReadName());
if ( eventDelayedFlag > 1 ) throw new UserException.MalformedBAM(read, "Adjacent I/D events in read "+read.getReadName());
insertedBases = Arrays.copyOfRange(read.getReadBases(),readOffset+1,readOffset+1+curElement.getLength());
eventLength = curElement.getLength() ;
eventStart = readOffset;
@ -226,7 +226,7 @@ public class LocusIteratorByState extends LocusIterator {
if ( cigarElementCounter == 1) {
// generate an extended event only if we just stepped into the deletion (i.e. don't
// generate the event at every deleted position on the ref, that's what cigarElementCounter==1 is for!)
if ( eventDelayedFlag > 1 ) throw new UserException.MalformedBam(read, "Adjacent I/D events in read "+read.getReadName());
if ( eventDelayedFlag > 1 ) throw new UserException.MalformedBAM(read, "Adjacent I/D events in read "+read.getReadName());
eventLength = curElement.getLength();
eventDelayedFlag = 2; // deletion on the ref causes an immediate return, so we have to delay by 1 only
eventStart = readOffset;

View File

@ -43,6 +43,11 @@ public class VerifyingSamIterator implements StingSAMIterator {
if ( last == null || cur.getReadUnmappedFlag() )
return false;
else {
if(last.getReferenceIndex() == SAMRecord.NO_ALIGNMENT_REFERENCE_INDEX || last.getAlignmentStart() == SAMRecord.NO_ALIGNMENT_START)
throw new UserException.MalformedBAM(last,String.format("read %s has inconsistent mapping information.",last.format()));
if(cur.getReferenceIndex() == SAMRecord.NO_ALIGNMENT_REFERENCE_INDEX || cur.getAlignmentStart() == SAMRecord.NO_ALIGNMENT_START)
throw new UserException.MalformedBAM(last,String.format("read %s has inconsistent mapping information.",cur.format()));
GenomeLoc lastLoc = genomeLocParser.createGenomeLoc( last );
GenomeLoc curLoc = genomeLocParser.createGenomeLoc( cur );
return curLoc.compareTo(lastLoc) == -1;

View File

@ -128,7 +128,7 @@ public class CoverageUtils {
SAMReadGroupRecord rg = r.getReadGroup();
if ( rg == null ) {
String msg = "Read "+r.getReadName()+" lacks read group information; Please associate all reads with read groups";
throw new UserException.MalformedBam(r, msg);
throw new UserException.MalformedBAM(r, msg);
}
return rg;

View File

@ -289,7 +289,7 @@ public class DiploidSNPGenotypeLikelihoods implements Cloneable {
byte qual = p.getQual();
if ( qual > SAMUtils.MAX_PHRED_SCORE )
throw new UserException.MalformedBam(p.getRead(), String.format("the maximum allowed quality score is %d, but a quality of %d was observed in read %s. Perhaps your BAM incorrectly encodes the quality scores in Sanger format; see http://en.wikipedia.org/wiki/FASTQ_format for more details", SAMUtils.MAX_PHRED_SCORE, qual, p.getRead().getReadName()));
throw new UserException.MalformedBAM(p.getRead(), String.format("the maximum allowed quality score is %d, but a quality of %d was observed in read %s. Perhaps your BAM incorrectly encodes the quality scores in Sanger format; see http://en.wikipedia.org/wiki/FASTQ_format for more details", SAMUtils.MAX_PHRED_SCORE, qual, p.getRead().getReadName()));
if ( capBaseQualsAtMappingQual )
qual = (byte)Math.min((int)p.getQual(), p.getMappingQual());

View File

@ -100,8 +100,8 @@ public class CycleQualityWalker extends ReadWalker<Integer,Integer> {
String lane = read.getReadGroup().getPlatformUnit();
String library = read.getReadGroup().getLibrary();
if ( lane == null ) throw new UserException.MalformedBam(read, "Read "+read.getReadName()+" has no platform unit information");
if ( library == null ) throw new UserException.MalformedBam(read, "Read "+read.getReadName()+" has no library information");
if ( lane == null ) throw new UserException.MalformedBAM(read, "Read "+read.getReadName()+" has no platform unit information");
if ( library == null ) throw new UserException.MalformedBAM(read, "Read "+read.getReadName()+" has no library information");
int end = 0;
@ -109,11 +109,11 @@ public class CycleQualityWalker extends ReadWalker<Integer,Integer> {
if ( read.getFirstOfPairFlag() ) {
if ( read.getSecondOfPairFlag() )
throw new UserException.MalformedBam(read, "Read "+read.getReadName()+" has conflicting first/second in pair attributes");
throw new UserException.MalformedBAM(read, "Read "+read.getReadName()+" has conflicting first/second in pair attributes");
end = 1;
} else {
if ( ! read.getSecondOfPairFlag() )
throw new UserException.MalformedBam(read, "Read "+read.getReadName()+" has conflicting first/second in pair attributes");
throw new UserException.MalformedBAM(read, "Read "+read.getReadName()+" has conflicting first/second in pair attributes");
end = 2;
}
}

View File

@ -235,7 +235,7 @@ public class RecalDataManager {
readGroup.setPlatform( RAC.DEFAULT_PLATFORM );
((GATKSAMRecord)read).setReadGroup( readGroup );
} else {
throw new UserException.MalformedBam(read, "The input .bam file contains reads with no read group. First observed at read with name = " + read.getReadName() +
throw new UserException.MalformedBAM(read, "The input .bam file contains reads with no read group. First observed at read with name = " + read.getReadName() +
" Users must set both the default read group using the --default_read_group <String> argument and the default platform using the --default_platform <String> argument." );
}
}
@ -261,7 +261,7 @@ public class RecalDataManager {
}
readGroup.setPlatform( RAC.DEFAULT_PLATFORM );
} else {
throw new UserException.MalformedBam(read, "The input .bam file contains reads with no platform information. First observed at read with name = " + read.getReadName() +
throw new UserException.MalformedBAM(read, "The input .bam file contains reads with no platform information. First observed at read with name = " + read.getReadName() +
" Users must set the default platform using the --default_platform <String> argument." );
}
}
@ -282,7 +282,7 @@ public class RecalDataManager {
if( attr instanceof String ) {
colorSpace = ((String)attr).getBytes();
} else {
throw new UserException.MalformedBam(read, String.format("Value encoded by %s in %s isn't a string!", RecalDataManager.COLOR_SPACE_ATTRIBUTE_TAG, read.getReadName()));
throw new UserException.MalformedBAM(read, String.format("Value encoded by %s in %s isn't a string!", RecalDataManager.COLOR_SPACE_ATTRIBUTE_TAG, read.getReadName()));
}
// Loop over the read and calculate first the inferred bases from the color and then check if it is consistent with the read
@ -301,7 +301,7 @@ public class RecalDataManager {
read.setAttribute( RecalDataManager.COLOR_SPACE_INCONSISTENCY_TAG, inconsistency );
} else {
throw new UserException.MalformedBam(read, "Unable to find color space information in SOLiD read. First observed at read with name = " + read.getReadName() +
throw new UserException.MalformedBAM(read, "Unable to find color space information in SOLiD read. First observed at read with name = " + read.getReadName() +
" Unfortunately this .bam file can not be recalibrated without color space information because of potential reference bias.");
}
}
@ -358,7 +358,7 @@ public class RecalDataManager {
}
} else {
throw new UserException.MalformedBam(read, "Unable to find color space information in SOLiD read. First observed at read with name = " + read.getReadName() +
throw new UserException.MalformedBAM(read, "Unable to find color space information in SOLiD read. First observed at read with name = " + read.getReadName() +
" Unfortunately this .bam file can not be recalibrated without color space information because of potential reference bias.");
}
@ -383,7 +383,7 @@ public class RecalDataManager {
}
} else {
throw new UserException.MalformedBam(read, "Unable to find color space information in SOLiD read. First observed at read with name = " + read.getReadName() +
throw new UserException.MalformedBAM(read, "Unable to find color space information in SOLiD read. First observed at read with name = " + read.getReadName() +
" Unfortunately this .bam file can not be recalibrated without color space information because of potential reference bias.");
}
}
@ -493,7 +493,7 @@ public class RecalDataManager {
}
read.setReadBases( readBases );
} else { // No color space quality tag in file
throw new UserException.MalformedBam(read, "REMOVE_REF_BIAS recal mode requires color space qualities but they can't be found for read: " + read.getReadName());
throw new UserException.MalformedBAM(read, "REMOVE_REF_BIAS recal mode requires color space qualities but they can't be found for read: " + read.getReadName());
}
}
@ -514,7 +514,7 @@ public class RecalDataManager {
case '3':
return performColorThree( prevBase );
default:
throw new UserException.MalformedBam(read, "Unrecognized color space in SOLID read, color = " + (char)color +
throw new UserException.MalformedBAM(read, "Unrecognized color space in SOLID read, color = " + (char)color +
" Unfortunately this bam file can not be recalibrated without full color space information because of potential reference bias.");
}
}

View File

@ -47,7 +47,7 @@ public class TileCovariate implements ExperimentalCovariate {
Integer tile = IlluminaUtil.getTileFromReadName(read.getReadName());
if (tile == null) {
if( exceptionWhenNoTile ) {
throw new UserException.MalformedBam(read, "Tile covariate specified but tile number not defined for read: " + read.getReadName() );
throw new UserException.MalformedBAM(read, "Tile covariate specified but tile number not defined for read: " + read.getReadName() );
} else {
return -1;
}

View File

@ -392,7 +392,7 @@ public class DSBWalkerV3 extends ReadWalker<Integer,Integer> {
} else if ( controlReadGroups.contains( read.getReadGroup().getReadGroupId() )) {
addControl(read);
} else {
throw new UserException.MalformedBam(read, "Read "+read + " belongs to unrecognized read group");
throw new UserException.MalformedBAM(read, "Read "+read + " belongs to unrecognized read group");
}
return 1;
}

View File

@ -250,7 +250,7 @@ public class GenomicMap implements Iterable<Map.Entry<String, Collection<GenomeL
// get mapping from read's contig onto a "global" contig (as a list of intervals on the latter):
Collection<GenomeLoc> segments = getContigMapping(r.getReferenceName());
if ( segments == null ) throw new UserException.MalformedBam(r, "Can not remap a record: unknown custom contig name "+r.getReferenceName());
if ( segments == null ) throw new UserException.MalformedBAM(r, "Can not remap a record: unknown custom contig name "+r.getReferenceName());
// scroll the list of intervals until we find the interval that the alignment start falls into:
Pair<? extends Iterator<GenomeLoc>, Integer> p = seekForward(segments,customStart);
@ -324,7 +324,7 @@ public class GenomicMap implements Iterable<Map.Entry<String, Collection<GenomeL
if ( discardCrossContig ) {
// System.out.println("WARNING: ALIGNMENT DISCARDED: "+message);
return null;
} else throw new UserException.MalformedBam(r, message);
} else throw new UserException.MalformedBAM(r, message);
}
gl = iter.next(); // advance to next segment
@ -332,11 +332,11 @@ public class GenomicMap implements Iterable<Map.Entry<String, Collection<GenomeL
refPos = (int)gl.getStart(); // we jump to the start of next segment on the master ref
if ( gl.getContigIndex() != r.getReferenceIndex() )
throw new UserException.MalformedBam(r, "Contig "+oldRefName+
throw new UserException.MalformedBAM(r, "Contig "+oldRefName+
" has segments on different master contigs: currently unsupported");
if ( refPos < currStop + 1 )
throw new UserException.MalformedBam(r, "Contig "+oldRefName+
throw new UserException.MalformedBAM(r, "Contig "+oldRefName+
" has segments that are out of order or strictly adjacent: currently unsupported");
if ( len > 0 && refPos > currStop + 1 ) {
// add "panning" N's w/respect to the master ref over the region between adjacent segments

View File

@ -427,7 +427,7 @@ public class BAQ {
int baq_delta = (int)baq[i] - 64;
int newval = rawQual - baq_delta;
if ( newval < 0 )
throw new UserException.MalformedBam(read, "BAQ tag error: the BAQ value is larger than the base quality");
throw new UserException.MalformedBAM(read, "BAQ tag error: the BAQ value is larger than the base quality");
newQuals[i] = (byte)newval;
}
} else if ( ! useRawQualsIfNoBAQTag ) {

View File

@ -116,18 +116,6 @@ public class UserException extends ReviewedStingException {
}
}
public static class MalformedBam extends UserException {
public MalformedBam(SAMRecord read, String message) {
super(String.format("SAM/BAM file %s is malformed: %s", read.getFileSource().getReader(), message));
}
}
public static class ReadMissingReadGroup extends MalformedBam {
public ReadMissingReadGroup(SAMRecord read) {
super(read, String.format("Read %s is either missing the read group or its read group is not defined in the BAM header, both of which are required by the GATK. Please use http://www.broadinstitute.org/gsa/wiki/index.php/ReplaceReadGroups to fix this problem", read.getReadName()));
}
}
public static class MissortedBAM extends UserException {
public MissortedBAM(SAMFileHeader.SortOrder order, File file, SAMFileHeader header) {
super(String.format("Missorted Input SAM/BAM files: %s is must be sorted in %s order but order was: %s", file, order, header.getSortOrder()));
@ -147,6 +135,19 @@ public class UserException extends ReviewedStingException {
}
}
public static class MalformedBAM extends UserException {
public MalformedBAM(SAMRecord read, String message) {
super(String.format("SAM/BAM file %s is malformed: %s", read.getFileSource().getReader(), message));
}
}
public static class ReadMissingReadGroup extends MalformedBAM {
public ReadMissingReadGroup(SAMRecord read) {
super(read, String.format("Read %s is either missing the read group or its read group is not defined in the BAM header, both of which are required by the GATK. Please use http://www.broadinstitute.org/gsa/wiki/index.php/ReplaceReadGroups to fix this problem", read.getReadName()));
}
}
public static class MissortedFile extends UserException {
public MissortedFile(File file, String message, Exception e) {
super(String.format("Missorted Input file: %s is must be sorted in coordinate order. %s and got error %s", file, message, e.getMessage()));

View File

@ -88,7 +88,7 @@ public class GATKSAMRecord extends SAMRecord {
// sanity check that the lengths of the base and quality strings are equal
if ( getBaseQualities().length != getReadLength() )
throw new UserException.MalformedBam(this, String.format("Error: the number of base qualities does not match the number of bases in %s. Use -DBQ x to set a default base quality score to all reads so GATK can proceed.", mRecord.getReadName()));
throw new UserException.MalformedBAM(this, String.format("Error: the number of base qualities does not match the number of bases in %s. Use -DBQ x to set a default base quality score to all reads so GATK can proceed.", mRecord.getReadName()));
}
public void setGoodBases(GATKSAMRecordFilter filter, boolean abortIfAlreadySet) {

View File

@ -0,0 +1,132 @@
/*
* Copyright (c) 2011, The Broad Institute
*
* Permission is hereby granted, free of charge, to any person
* obtaining a copy of this software and associated documentation
* files (the "Software"), to deal in the Software without
* restriction, including without limitation the rights to use,
* copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following
* conditions:
*
* The above copyright notice and this permission notice shall be
* included in all copies or substantial portions of the Software.
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
* OTHER DEALINGS IN THE SOFTWARE.
*/
package org.broadinstitute.sting.gatk.iterators;
import net.sf.samtools.SAMFileHeader;
import net.sf.samtools.SAMRecord;
import net.sf.samtools.SAMSequenceDictionary;
import net.sf.samtools.SAMSequenceRecord;
import org.broadinstitute.sting.utils.GenomeLocParser;
import org.broadinstitute.sting.utils.exceptions.UserException;
import org.broadinstitute.sting.utils.sam.ArtificialSAMUtils;
import org.testng.Assert;
import org.testng.annotations.BeforeClass;
import org.testng.annotations.Test;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.List;
/**
* Created by IntelliJ IDEA.
* User: mhanna
* Date: Mar 2, 2011
* Time: 9:48:10 PM
* To change this template use File | Settings | File Templates.
*/
public class VerifyingSamIteratorUnitTest {
private SAMFileHeader samFileHeader;
private GenomeLocParser genomeLocParser;
@BeforeClass
public void init() {
SAMSequenceDictionary sequenceDictionary = new SAMSequenceDictionary();
sequenceDictionary.addSequence(new SAMSequenceRecord("1",500));
sequenceDictionary.addSequence(new SAMSequenceRecord("2",500));
samFileHeader = new SAMFileHeader();
samFileHeader.setSequenceDictionary(sequenceDictionary);
genomeLocParser = new GenomeLocParser(sequenceDictionary);
}
@Test
public void testSortedReadsBasic() {
SAMRecord read1 = ArtificialSAMUtils.createArtificialRead(samFileHeader,"read1",getContig(0).getSequenceIndex(),1,10);
SAMRecord read2 = ArtificialSAMUtils.createArtificialRead(samFileHeader,"read2",getContig(0).getSequenceIndex(),2,10);
List<SAMRecord> reads = Arrays.asList(read1,read2);
VerifyingSamIterator iterator = new VerifyingSamIterator(genomeLocParser,StingSAMIteratorAdapter.adapt(reads.iterator()));
Assert.assertTrue(iterator.hasNext(),"Insufficient reads");
Assert.assertSame(iterator.next(),read1,"Incorrect read in read 1 position");
Assert.assertTrue(iterator.hasNext(),"Insufficient reads");
Assert.assertSame(iterator.next(),read2,"Incorrect read in read 2 position");
Assert.assertFalse(iterator.hasNext(),"Too many reads in iterator");
}
@Test
public void testSortedReadsAcrossContigs() {
SAMRecord read1 = ArtificialSAMUtils.createArtificialRead(samFileHeader,"read1",getContig(0).getSequenceIndex(),2,10);
SAMRecord read2 = ArtificialSAMUtils.createArtificialRead(samFileHeader,"read2",getContig(1).getSequenceIndex(),1,10);
List<SAMRecord> reads = Arrays.asList(read1,read2);
VerifyingSamIterator iterator = new VerifyingSamIterator(genomeLocParser,StingSAMIteratorAdapter.adapt(reads.iterator()));
Assert.assertTrue(iterator.hasNext(),"Insufficient reads");
Assert.assertSame(iterator.next(),read1,"Incorrect read in read 1 position");
Assert.assertTrue(iterator.hasNext(),"Insufficient reads");
Assert.assertSame(iterator.next(),read2,"Incorrect read in read 2 position");
Assert.assertFalse(iterator.hasNext(),"Too many reads in iterator");
}
@Test(expectedExceptions=UserException.MissortedBAM.class)
public void testImproperlySortedReads() {
SAMRecord read1 = ArtificialSAMUtils.createArtificialRead(samFileHeader,"read1",getContig(0).getSequenceIndex(),2,10);
SAMRecord read2 = ArtificialSAMUtils.createArtificialRead(samFileHeader,"read2",getContig(0).getSequenceIndex(),1,10);
List<SAMRecord> reads = Arrays.asList(read1,read2);
VerifyingSamIterator iterator = new VerifyingSamIterator(genomeLocParser,StingSAMIteratorAdapter.adapt(reads.iterator()));
Assert.assertTrue(iterator.hasNext(),"Insufficient reads");
Assert.assertSame(iterator.next(),read1,"Incorrect read in read 1 position");
Assert.assertTrue(iterator.hasNext(),"Insufficient reads");
// Should trigger MissortedBAM exception.
iterator.next();
}
@Test(expectedExceptions=UserException.MalformedBAM.class)
public void testInvalidAlignment() {
// Create an invalid alignment state.
SAMRecord read1 = ArtificialSAMUtils.createArtificialRead(samFileHeader,"read1",getContig(0).getSequenceIndex(),1,10);
SAMRecord read2 = ArtificialSAMUtils.createArtificialRead(samFileHeader,"read1",getContig(0).getSequenceIndex(),2,10);
read1.setReferenceIndex(SAMRecord.NO_ALIGNMENT_REFERENCE_INDEX);
List<SAMRecord> reads = Arrays.asList(read1,read2);
VerifyingSamIterator iterator = new VerifyingSamIterator(genomeLocParser,StingSAMIteratorAdapter.adapt(reads.iterator()));
Assert.assertTrue(iterator.hasNext(),"Insufficient reads");
Assert.assertSame(iterator.next(),read1,"Incorrect read in read 1 position");
Assert.assertTrue(iterator.hasNext(),"Insufficient reads");
// Should trigger MalformedBAM exception.
iterator.next();
}
private SAMSequenceRecord getContig(final int contigIndex) {
return samFileHeader.getSequence(contigIndex);
}
}