From 102dafdcbc84ccd1cddd671639ddddb79896186e Mon Sep 17 00:00:00 2001 From: Mauricio Carneiro Date: Fri, 21 Oct 2011 17:40:43 -0400 Subject: [PATCH] Validation of GATKSamRecord in read filters Moved the validation of the GATKSamRecord to the MalformedReadFilter with the intent to make the read filter the ultimate validation location for sam records. This way we can opt to filter out malformed reads if we know what we are doing or blow up otherwise. --- .../gatk/filters/MalformedReadFilter.java | 26 +++++++++++++++++-- .../sting/utils/sam/GATKSAMRecord.java | 4 --- 2 files changed, 24 insertions(+), 6 deletions(-) diff --git a/public/java/src/org/broadinstitute/sting/gatk/filters/MalformedReadFilter.java b/public/java/src/org/broadinstitute/sting/gatk/filters/MalformedReadFilter.java index 74deace9a..11bbf9e4c 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/filters/MalformedReadFilter.java +++ b/public/java/src/org/broadinstitute/sting/gatk/filters/MalformedReadFilter.java @@ -27,7 +27,9 @@ package org.broadinstitute.sting.gatk.filters; import net.sf.samtools.SAMFileHeader; import net.sf.samtools.SAMRecord; import net.sf.samtools.SAMSequenceRecord; +import org.broadinstitute.sting.commandline.Argument; import org.broadinstitute.sting.gatk.GenomeAnalysisEngine; +import org.broadinstitute.sting.utils.exceptions.UserException; /** * Filter out malformed reads. @@ -37,14 +39,25 @@ import org.broadinstitute.sting.gatk.GenomeAnalysisEngine; */ public class MalformedReadFilter extends ReadFilter { private SAMFileHeader header; - + + @Argument(fullName = "filter_mismatching_base_and_quals", shortName = "filterMBQ", doc = "if a read has mismatching number of bases and base qualities, filter out the read instead of blowing up.", required = false) + boolean filterMismatchingBaseAndQuals = false; + @Override public void initialize(GenomeAnalysisEngine engine) { this.header = engine.getSAMFileHeader(); } public boolean filterOut(SAMRecord read) { - return !checkInvalidAlignmentStart(read) || + // slowly changing the behavior to blow up first and filtering out if a parameter is explicitly provided + if (!checkMismatchingBasesAndQuals(read)) { + if (!filterMismatchingBaseAndQuals) + throw new UserException.MalformedBAM(read, "BAM file has a read with mismatching number of bases and base qualities. Offender: " + read.getReadName() +" [" + read.getReadLength() + " bases] [" +read.getBaseQualities().length +"] quals"); + else + return true; + } + + return !checkInvalidAlignmentStart(read) || !checkInvalidAlignmentEnd(read) || !checkAlignmentDisagreesWithHeader(this.header,read) || !checkCigarDisagreesWithAlignment(read); @@ -108,4 +121,13 @@ public class MalformedReadFilter extends ReadFilter { return false; return true; } + + /** + * Check if the read has the same number of bases and base qualities + * @param read the read to validate + * @return true if they have the same number. False otherwise. + */ + private static boolean checkMismatchingBasesAndQuals(SAMRecord read) { + return (read.getReadLength() == read.getBaseQualities().length); + } } diff --git a/public/java/src/org/broadinstitute/sting/utils/sam/GATKSAMRecord.java b/public/java/src/org/broadinstitute/sting/utils/sam/GATKSAMRecord.java index c55a462f1..e7c235cf7 100755 --- a/public/java/src/org/broadinstitute/sting/utils/sam/GATKSAMRecord.java +++ b/public/java/src/org/broadinstitute/sting/utils/sam/GATKSAMRecord.java @@ -86,10 +86,6 @@ public class GATKSAMRecord extends SAMRecord { if ( originalQuals != null ) mRecord.setBaseQualities(originalQuals); } - - // sanity check that the lengths of the base and quality strings are equal - if ( getBaseQualities().length != getReadLength() ) - throw new UserException.MalformedBAM(this, String.format("Error: the number of base qualities does not match the number of bases in %s.", mRecord.getReadName())); } ///////////////////////////////////////////////////////////////////////////////