diff --git a/public/java/src/org/broadinstitute/sting/gatk/filters/MalformedReadFilter.java b/public/java/src/org/broadinstitute/sting/gatk/filters/MalformedReadFilter.java index 366e927dc..f7d1d0297 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/filters/MalformedReadFilter.java +++ b/public/java/src/org/broadinstitute/sting/gatk/filters/MalformedReadFilter.java @@ -45,6 +45,9 @@ public class MalformedReadFilter extends ReadFilter { @Argument(fullName = "filter_mismatching_base_and_quals", shortName = "filterMBQ", doc = "if a read has mismatching number of bases and base qualities, filter out the read instead of blowing up.", required = false) boolean filterMismatchingBaseAndQuals = false; + @Argument(fullName = "filter_bases_not_stored", shortName = "filterNoBases", doc = "if a read has no stored bases (i.e. a '*'), filter out the read instead of blowing up.", required = false) + boolean filterBasesNotStored = false; + @Override public void initialize(GenomeAnalysisEngine engine) { this.header = engine.getSAMFileHeader(); @@ -57,7 +60,8 @@ public class MalformedReadFilter extends ReadFilter { !checkAlignmentDisagreesWithHeader(this.header,read) || !checkHasReadGroup(read) || !checkMismatchingBasesAndQuals(read, filterMismatchingBaseAndQuals) || - !checkCigarDisagreesWithAlignment(read); + !checkCigarDisagreesWithAlignment(read) || + !checkSeqStored(read, filterBasesNotStored); } private static boolean checkHasReadGroup(final SAMRecord read) { @@ -146,4 +150,20 @@ public class MalformedReadFilter extends ReadFilter { return result; } + + /** + * Check if the read has its base sequence stored + * @param read the read to validate + * @return true if the sequence is stored and false otherwise ("*" in the SEQ field). + */ + protected static boolean checkSeqStored(final SAMRecord read, final boolean filterBasesNotStored) { + + if ( read.getReadBases() != SAMRecord.NULL_SEQUENCE ) + return true; + + if ( filterBasesNotStored ) + return false; + + throw new UserException.MalformedBAM(read, String.format("the BAM file has a read with no stored bases (i.e. it uses '*') which is not supported in the GATK; see the --filter_bases_not_stored argument. Offender: %s", read.getReadName())); + } } diff --git a/public/java/test/org/broadinstitute/sting/gatk/filters/MalformedReadFilterUnitTest.java b/public/java/test/org/broadinstitute/sting/gatk/filters/MalformedReadFilterUnitTest.java new file mode 100644 index 000000000..981d54d54 --- /dev/null +++ b/public/java/test/org/broadinstitute/sting/gatk/filters/MalformedReadFilterUnitTest.java @@ -0,0 +1,62 @@ +/* +* Copyright (c) 2012 The Broad Institute +* +* Permission is hereby granted, free of charge, to any person +* obtaining a copy of this software and associated documentation +* files (the "Software"), to deal in the Software without +* restriction, including without limitation the rights to use, +* copy, modify, merge, publish, distribute, sublicense, and/or sell +* copies of the Software, and to permit persons to whom the +* Software is furnished to do so, subject to the following +* conditions: +* +* The above copyright notice and this permission notice shall be +* included in all copies or substantial portions of the Software. +* +* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, +* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES +* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND +* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT +* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, +* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING +* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR +* THE USE OR OTHER DEALINGS IN THE SOFTWARE. +*/ + +package org.broadinstitute.sting.gatk.filters; + +import org.broadinstitute.sting.utils.exceptions.UserException; +import org.broadinstitute.sting.utils.sam.ArtificialSAMUtils; +import org.broadinstitute.sting.utils.sam.GATKSAMRecord; +import org.testng.Assert; +import org.testng.annotations.Test; + + +/** + * Tests for the MalformedReadFilter + * + * @author Eric Banks + * @since 3/14/13 + */ +public class MalformedReadFilterUnitTest { + + ////////////////////////////////////// + // Test the checkSeqStored() method // + ////////////////////////////////////// + + @Test(enabled = true) + public void testcheckSeqStored () { + + final GATKSAMRecord goodRead = ArtificialSAMUtils.createArtificialRead(new byte[]{(byte)'A'}, new byte[]{(byte)'A'}, "1M"); + final GATKSAMRecord badRead = ArtificialSAMUtils.createArtificialRead(new byte[]{}, new byte[]{}, "1M"); + badRead.setReadString("*"); + + Assert.assertTrue(MalformedReadFilter.checkSeqStored(goodRead, true)); + Assert.assertFalse(MalformedReadFilter.checkSeqStored(badRead, true)); + + try { + MalformedReadFilter.checkSeqStored(badRead, false); + Assert.assertTrue(false, "We should have exceptioned out in the previous line"); + } catch (UserException e) { } + } +}