diff --git a/java/src/org/broadinstitute/sting/gatk/walkers/recalibration/CovariateCounterWalker.java b/java/src/org/broadinstitute/sting/gatk/walkers/recalibration/CovariateCounterWalker.java index 4992e9d7a..2c86b835e 100755 --- a/java/src/org/broadinstitute/sting/gatk/walkers/recalibration/CovariateCounterWalker.java +++ b/java/src/org/broadinstitute/sting/gatk/walkers/recalibration/CovariateCounterWalker.java @@ -263,28 +263,32 @@ public class CovariateCounterWalker extends LocusWalker { offset = p.getOffset(); RecalDataManager.parseSAMRecord( read, RAC ); - RecalDataManager.parseColorSpace( read ); - // Skip if base quality is zero - if( read.getBaseQualities()[offset] > 0 ) { + // Skip over reads with no calls in the color space if the user requested it + if( !RAC.IGNORE_NOCALL_COLORSPACE || !RecalDataManager.checkNoCallColorSpace( read ) ) { + RecalDataManager.parseColorSpace( read ); - bases = read.getReadBases(); - refBase = (byte)ref.getBase(); + // Skip if base quality is zero + if( read.getBaseQualities()[offset] > 0 ) { - // Skip if this base is an 'N' or etc. - if( BaseUtils.isRegularBase( (char)(bases[offset]) ) ) { + bases = read.getReadBases(); + refBase = (byte)ref.getBase(); - // SOLID bams have inserted the reference base into the read if the color space in inconsistent with the read base so skip it - if( !read.getReadGroup().getPlatform().toUpperCase().contains("SOLID") || RAC.SOLID_RECAL_MODE.equalsIgnoreCase("DO_NOTHING") || !RecalDataManager.isInconsistentColorSpace( read, offset ) ) { + // Skip if this base is an 'N' or etc. + if( BaseUtils.isRegularBase( (char)(bases[offset]) ) ) { - // This base finally passed all the checks for a good base, so add it to the big data hashmap - updateDataFromRead( read, offset, refBase ); + // SOLID bams have inserted the reference base into the read if the color space in inconsistent with the read base so skip it + if( !read.getReadGroup().getPlatform().toUpperCase().contains("SOLID") || RAC.SOLID_RECAL_MODE.equalsIgnoreCase("DO_NOTHING") || !RecalDataManager.isInconsistentColorSpace( read, offset ) ) { - } else { // calculate SOLID reference insertion rate - if( ref.getBase() == (char)bases[offset] ) { - solidInsertedReferenceBases++; - } else { - otherColorSpaceInconsistency++; + // This base finally passed all the checks for a good base, so add it to the big data hashmap + updateDataFromRead( read, offset, refBase ); + + } else { // calculate SOLID reference insertion rate + if( ref.getBase() == (char)bases[offset] ) { + solidInsertedReferenceBases++; + } else { + otherColorSpaceInconsistency++; + } } } } diff --git a/java/src/org/broadinstitute/sting/gatk/walkers/recalibration/RecalDataManager.java b/java/src/org/broadinstitute/sting/gatk/walkers/recalibration/RecalDataManager.java index 519115a73..23e200ed2 100644 --- a/java/src/org/broadinstitute/sting/gatk/walkers/recalibration/RecalDataManager.java +++ b/java/src/org/broadinstitute/sting/gatk/walkers/recalibration/RecalDataManager.java @@ -170,7 +170,8 @@ public class RecalDataManager { final Object val = data.get(comp); if( val instanceof RecalDatum ) { // We are at the end of the nested hash maps if( data.keySet().size() == 1) { - data.clear(); // don't TableRecalibrate a non-required covariate if it only has one element because that correction has already been done in a previous sequential step + data.clear(); // don't TableRecalibrate a non-required covariate if it only has one element because that correction has already been done ... + // in a previous step of the sequential calculation model } } else { // Another layer in the nested hash map checkForSingletons( (Map) val ); @@ -358,6 +359,32 @@ public class RecalDataManager { return originalQualScores; } + public static boolean checkNoCallColorSpace( final SAMRecord read ) { + if( read.getReadGroup().getPlatform().toUpperCase().contains("SOLID") ) { + final Object attr = read.getAttribute(RecalDataManager.COLOR_SPACE_ATTRIBUTE_TAG); + if( attr != null ) { + char[] colorSpace; + if( attr instanceof String ) { + colorSpace = ((String)attr).substring(1).toCharArray(); // trim off the Sentinel + } else { + throw new StingException(String.format("Value encoded by %s in %s isn't a string!", RecalDataManager.COLOR_SPACE_ATTRIBUTE_TAG, read.getReadName())); + } + + for( char color : colorSpace ) { + if( color != '0' && color != '1' && color != '2' && color != '3' ) { + return true; // There is a bad color in this SOLiD read and the user wants to skip over it + } + } + + } else { + throw new StingException("Unable to find color space information in SOLiD read. First observed at read with name = " + read.getReadName() + + " Unfortunately this .bam file can not be recalibrated without color space information because of potential reference bias."); + } + } + + return false; // There aren't any color no calls in this SOLiD read + } + /** * Perform the SET_Q_ZERO solid recalibration. Inconsistent color space bases and their previous base are set to quality zero * @param read The SAMRecord to recalibrate @@ -516,7 +543,7 @@ public class RecalDataManager { // } //} - } else { + } else { // No inconsistency array, so nothing is inconsistent return false; } } diff --git a/java/src/org/broadinstitute/sting/gatk/walkers/recalibration/RecalibrationArgumentCollection.java b/java/src/org/broadinstitute/sting/gatk/walkers/recalibration/RecalibrationArgumentCollection.java index 2a18c559b..198fb7ed0 100755 --- a/java/src/org/broadinstitute/sting/gatk/walkers/recalibration/RecalibrationArgumentCollection.java +++ b/java/src/org/broadinstitute/sting/gatk/walkers/recalibration/RecalibrationArgumentCollection.java @@ -61,6 +61,9 @@ public class RecalibrationArgumentCollection { public int HOMOPOLYMER_NBACK = 7; @Argument(fullName = "exception_if_no_tile", shortName="throwTileException", doc="If provided, TileCovariate will throw an exception when no tile can be found. The default behavior is to use tile = -1", required=false) public boolean EXCEPTION_IF_NO_TILE = false; + @Argument(fullName = "ignore_nocall_colorspace", shortName="ignore_nocall_colorspace", doc="If provided, the recalibrator will skip over reads with no calls in the color space instead of halting with an exception", required=false) + public boolean IGNORE_NOCALL_COLORSPACE = false; + public final boolean checkSolidRecalMode() { return ( SOLID_RECAL_MODE.equalsIgnoreCase("DO_NOTHING") || SOLID_RECAL_MODE.equalsIgnoreCase("SET_Q_ZERO") || diff --git a/java/src/org/broadinstitute/sting/gatk/walkers/recalibration/TableRecalibrationWalker.java b/java/src/org/broadinstitute/sting/gatk/walkers/recalibration/TableRecalibrationWalker.java index ebd454581..6e4788037 100644 --- a/java/src/org/broadinstitute/sting/gatk/walkers/recalibration/TableRecalibrationWalker.java +++ b/java/src/org/broadinstitute/sting/gatk/walkers/recalibration/TableRecalibrationWalker.java @@ -98,6 +98,7 @@ public class TableRecalibrationWalker extends ReadWalker