diff --git a/java/src/org/broadinstitute/sting/gatk/walkers/recalibration/CovariateCounterWalker.java b/java/src/org/broadinstitute/sting/gatk/walkers/recalibration/CovariateCounterWalker.java index 3449baafd..9eebf9223 100755 --- a/java/src/org/broadinstitute/sting/gatk/walkers/recalibration/CovariateCounterWalker.java +++ b/java/src/org/broadinstitute/sting/gatk/walkers/recalibration/CovariateCounterWalker.java @@ -103,7 +103,7 @@ public class CovariateCounterWalker extends LocusWalker { private long solidInsertedReferenceBases = 0; // Number of bases where we believe SOLID has inserted the reference because the color space is inconsistent with the read base private long otherColorSpaceInconsistency = 0; // Number of bases where the color space is inconsistent with the read but the reference wasn't inserted. private int numUnprocessed = 0; // Number of consecutive loci skipped because we are only processing every Nth site - private static final String versionString = "v2.2.2"; // Major version, minor version, and build number + private static final String versionString = "v2.2.3"; // Major version, minor version, and build number private Pair dbSNP_counts = new Pair(0L, 0L); // mismatch/base counts for dbSNP loci private Pair novel_counts = new Pair(0L, 0L); // mismatch/base counts for non-dbSNP loci private static final double DBSNP_VS_NOVEL_MISMATCH_RATE = 2.0; // rate at which dbSNP sites (on an individual level) mismatch relative to novel sites (determined by looking at NA12878) @@ -311,7 +311,7 @@ public class CovariateCounterWalker extends LocusWalker { * @param context The AlignmentContext which holds the reads covered by this locus * @param ref The reference base */ - private static void updateMismatchCounts(Pair counts, AlignmentContext context, char ref) { + private static void updateMismatchCounts(final Pair counts, final AlignmentContext context, final char ref) { for( PileupElement p : context.getPileup() ) { final char readChar = (char)(p.getBase()); final int readCharBaseIndex = BaseUtils.simpleBaseToBaseIndex(readChar); @@ -377,7 +377,7 @@ public class CovariateCounterWalker extends LocusWalker { final long curMismatches = datum.getNumMismatches(); // Add one to the number of observations and potentially one to the number of mismatches - datum.increment( (char)base, (char)refBase ); // Dangerous: If you don't cast to char than the bytes default to the (long, long) version of the increment method which is really bad + datum.increment( (char)base, (char)refBase ); // Dangerous: If you don't cast to char then the bytes default to the (long, long) version of the increment method which is really bad countedBases++; novel_counts.second++; novel_counts.first += datum.getNumMismatches() - curMismatches; // For sanity check to ensure novel mismatch rate vs dnsnp mismatch rate is reasonable diff --git a/java/src/org/broadinstitute/sting/gatk/walkers/recalibration/RecalDataManager.java b/java/src/org/broadinstitute/sting/gatk/walkers/recalibration/RecalDataManager.java index 7e1f60c2a..07cd9a175 100644 --- a/java/src/org/broadinstitute/sting/gatk/walkers/recalibration/RecalDataManager.java +++ b/java/src/org/broadinstitute/sting/gatk/walkers/recalibration/RecalDataManager.java @@ -260,11 +260,11 @@ public class RecalDataManager { if( read.getReadNegativeStrandFlag() ) { readBases = BaseUtils.simpleReverseComplement( read.getReadBases() ); } - byte[] inconsistency = new byte[readBases.length]; + final byte[] inconsistency = new byte[readBases.length]; int iii; byte prevBase = (byte) colorSpace[0]; // The sentinel for( iii = 0; iii < readBases.length; iii++ ) { - byte thisBase = (byte)getNextBaseFromColor( (char)prevBase, colorSpace[iii + 1] ); + final byte thisBase = (byte)getNextBaseFromColor( (char)prevBase, colorSpace[iii + 1] ); inconsistency[iii] = (byte)( thisBase == readBases[iii] ? 0 : 1 ); prevBase = readBases[iii]; } @@ -289,9 +289,9 @@ public class RecalDataManager { * @param refBases The reference for this read * @return A new array of quality scores that have been ref bias corrected */ - public static byte[] calcColorSpace( SAMRecord read, byte[] originalQualScores, final String SOLID_RECAL_MODE, final Random coinFlip, final char[] refBases ) { + public static byte[] calcColorSpace( final SAMRecord read, byte[] originalQualScores, final String SOLID_RECAL_MODE, final Random coinFlip, final char[] refBases ) { - Object attr = read.getAttribute(RecalDataManager.COLOR_SPACE_ATTRIBUTE_TAG); + final Object attr = read.getAttribute(RecalDataManager.COLOR_SPACE_ATTRIBUTE_TAG); if( attr != null ) { char[] colorSpace; if( attr instanceof String ) { @@ -308,16 +308,16 @@ public class RecalDataManager { readBases = BaseUtils.simpleReverseComplement( read.getReadBases() ); refBasesDirRead = BaseUtils.simpleReverseComplement( refBases ); } - int[] inconsistency = new int[readBases.length]; + final int[] inconsistency = new int[readBases.length]; byte prevBase = (byte) colorSpace[0]; // The sentinel for( int iii = 0; iii < readBases.length; iii++ ) { - byte thisBase = (byte)getNextBaseFromColor( (char)prevBase, colorSpace[iii + 1] ); + final byte thisBase = (byte)getNextBaseFromColor( (char)prevBase, colorSpace[iii + 1] ); colorImpliedBases[iii] = thisBase; inconsistency[iii] = ( thisBase == readBases[iii] ? 0 : 1 ); prevBase = readBases[iii]; } - boolean isMappedToReference = (refBases != null && refBases.length == inconsistency.length); + final boolean isMappedToReference = (refBases != null && refBases.length == inconsistency.length); // Now that we have the inconsistency array apply the desired correction to the inconsistent bases if( SOLID_RECAL_MODE.equalsIgnoreCase("SET_Q_ZERO") ) { // Set inconsistent bases and the one before it to Q0 @@ -328,7 +328,7 @@ public class RecalDataManager { originalQualScores = solidRecalSetToQZero(read, readBases, inconsistency, originalQualScores, refBasesDirRead, isMappedToReference, setBaseN); } else if( SOLID_RECAL_MODE.equalsIgnoreCase("REMOVE_REF_BIAS") ) { // Use the color space quality to probabilistically remove ref bases at inconsistent color space bases solidRecalRemoveRefBias(read, readBases, inconsistency, colorImpliedBases, refBasesDirRead, isMappedToReference, coinFlip); - } + } } else if ( !warnUserNoColorSpace ) { // Warn the user if we can't find the color space tag Utils.warnUser("Unable to find color space information in SOLID read. First observed at read with name = " + read.getReadName()); @@ -350,27 +350,26 @@ public class RecalDataManager { * @param setBaseN Should we also set the base to N as well as quality zero in order to visualize in IGV or something similar * @return The byte array of original quality scores some of which might have been set to zero */ - private static byte[] solidRecalSetToQZero( SAMRecord read, byte[] readBases, int[] inconsistency, byte[] originalQualScores, + private static byte[] solidRecalSetToQZero( final SAMRecord read, byte[] readBases, final int[] inconsistency, final byte[] originalQualScores, final char[] refBases, final boolean isMappedToRef, final boolean setBaseN ) { + final boolean negStrand = read.getReadNegativeStrandFlag(); for( int iii = 1; iii < originalQualScores.length - 1; iii++ ) { if( inconsistency[iii] == 1 ) { if( !isMappedToRef || (char)readBases[iii] == refBases[iii] ) { - originalQualScores[iii] = (byte)0; + if( negStrand ) { originalQualScores[originalQualScores.length-(iii+1)] = (byte)0; } + else { originalQualScores[iii] = (byte)0; } if( setBaseN ) { readBases[iii] = (byte)'N'; } } // Set the prev base to Q0 as well if( !isMappedToRef || (char)readBases[iii-1] == refBases[iii-1] ) { - originalQualScores[iii-1] = (byte)0; + if( negStrand ) { originalQualScores[originalQualScores.length-iii] = (byte)0; } + else { originalQualScores[iii-1] = (byte)0; } if( setBaseN ) { readBases[iii-1] = (byte)'N'; } } - //if( !isMappedToRef || (char)readBases[iii+1] == refBases[iii+1] ) { - // originalQualScores[iii+1] = (byte)0; - // if( setBaseN ) { readBases[iii+1] = (byte)'N'; } - //} } } - if( read.getReadNegativeStrandFlag() ) { + if( negStrand ) { readBases = BaseUtils.simpleReverseComplement( readBases.clone() ); // Put the bases back in reverse order to stuff them back in the read } read.setReadBases( readBases ); @@ -387,10 +386,10 @@ public class RecalDataManager { * @param isMappedToRef Is this read mapped fully to a reference * @param coinFlip A random number generator */ - private static void solidRecalRemoveRefBias( SAMRecord read, byte[] readBases, int[] inconsistency, byte[] colorImpliedBases, - final char[] refBases, final boolean isMappedToRef, final Random coinFlip ) { + private static void solidRecalRemoveRefBias( final SAMRecord read, byte[] readBases, final int[] inconsistency, final byte[] colorImpliedBases, + final char[] refBases, final boolean isMappedToRef, final Random coinFlip) { - Object attr = read.getAttribute(RecalDataManager.COLOR_SPACE_QUAL_ATTRIBUTE_TAG); + final Object attr = read.getAttribute(RecalDataManager.COLOR_SPACE_QUAL_ATTRIBUTE_TAG); if( attr != null ) { byte[] colorSpaceQuals; if( attr instanceof String ) { @@ -402,27 +401,32 @@ public class RecalDataManager { for( int iii = 1; iii < inconsistency.length - 1; iii++ ) { if( inconsistency[iii] == 1 ) { for( int jjj = iii - 1; jjj <= iii; jjj++ ) { // Correct this base and the one before it along the direction of the read - if( !isMappedToRef || (char)readBases[jjj] == refBases[jjj] ) { - if( colorSpaceQuals[jjj] == colorSpaceQuals[jjj+1] ) { // Equal evidence for the color implied base and the reference base, so flip a coin - int rand = coinFlip.nextInt( 2 ); - if( rand == 0 ) { // The color implied base won the coin flip - readBases[jjj] = colorImpliedBases[jjj]; - } - } else { - int maxQuality = Math.max((int)colorSpaceQuals[jjj], (int)colorSpaceQuals[jjj+1]); - int minQuality = Math.min((int)colorSpaceQuals[jjj], (int)colorSpaceQuals[jjj+1]); - int diffInQuality = maxQuality - minQuality; - int numLow = minQuality; - if( numLow == 0 ) { numLow = 1; } - int numHigh = Math.round( numLow * (float)Math.pow(10.0f, (float) diffInQuality / 10.0f) ); // The color with higher quality is exponentially more likely - int rand = coinFlip.nextInt( numLow + numHigh ); - if( rand >= numLow ) { - if( maxQuality == (int)colorSpaceQuals[jjj] ) { + if( jjj == iii || inconsistency[jjj] == 0 ) { // Don't want to correct the previous base a second time if it was already corrected in the previous step + if( !isMappedToRef || (char)readBases[jjj] == refBases[jjj] ) { + if( colorSpaceQuals[jjj] == colorSpaceQuals[jjj+1] ) { // Equal evidence for the color implied base and the reference base, so flip a coin + final int rand = coinFlip.nextInt( 2 ); + if( rand == 0 ) { // The color implied base won the coin flip readBases[jjj] = colorImpliedBases[jjj]; } } else { - if( minQuality == (int)colorSpaceQuals[jjj] ) { - readBases[jjj] = colorImpliedBases[jjj]; + final int maxQuality = Math.max((int)colorSpaceQuals[jjj], (int)colorSpaceQuals[jjj+1]); + final int minQuality = Math.min((int)colorSpaceQuals[jjj], (int)colorSpaceQuals[jjj+1]); + int diffInQuality = maxQuality - minQuality; + int numLow = minQuality; + if( numLow == 0 ) { + numLow++; + diffInQuality++; + } + final int numHigh = Math.round( numLow * (float)Math.pow(10.0f, (float) diffInQuality / 10.0f) ); // The color with higher quality is exponentially more likely + final int rand = coinFlip.nextInt( numLow + numHigh ); + if( rand >= numLow ) { // higher q score won + if( maxQuality == (int)colorSpaceQuals[jjj] ) { + readBases[jjj] = colorImpliedBases[jjj]; + } // else ref color had higher q score, and won out, so nothing to do here + } else { // lower q score won + if( minQuality == (int)colorSpaceQuals[jjj] ) { + readBases[jjj] = colorImpliedBases[jjj]; + } // else ref color had lower q score, and won out, so nothing to do here } } } @@ -457,7 +461,7 @@ public class RecalDataManager { case '3': // simple complement return BaseUtils.simpleComplement( prevBase ); default: - throw new StingException( "Unrecognized color space in SOLID bam, color = " + color ); + throw new StingException( "Unrecognized color space in SOLID read, color = " + color ); } } @@ -468,9 +472,9 @@ public class RecalDataManager { * @return Returns true if the base is inconsistent with the color space */ public static boolean isInconsistentColorSpace( final SAMRecord read, final int offset ) { - Object attr = read.getAttribute(RecalDataManager.COLOR_SPACE_INCONSISTENCY_TAG); + final Object attr = read.getAttribute(RecalDataManager.COLOR_SPACE_INCONSISTENCY_TAG); if( attr != null ) { - byte[] colorSpace = (byte[])attr; + final byte[] colorSpace = (byte[])attr; return colorSpace[offset] != 0; } else { return false; diff --git a/java/src/org/broadinstitute/sting/gatk/walkers/recalibration/TableRecalibrationWalker.java b/java/src/org/broadinstitute/sting/gatk/walkers/recalibration/TableRecalibrationWalker.java index bd709e2fe..eb1d649fc 100644 --- a/java/src/org/broadinstitute/sting/gatk/walkers/recalibration/TableRecalibrationWalker.java +++ b/java/src/org/broadinstitute/sting/gatk/walkers/recalibration/TableRecalibrationWalker.java @@ -94,7 +94,7 @@ public class TableRecalibrationWalker extends ReadWalker e = new HashMap(); e.put( validationDataLocation + "NA12892.SLX.SRP000031.2009_06.selected.bam", "6c59d291c37d053e0f188b762f3060a5" ); - e.put( validationDataLocation + "NA12878.1kg.p2.chr1_10mb_11_mb.SOLID.bam", "27b3eaf3c02ffc5fb3d7815468d9958e"); + e.put( validationDataLocation + "NA12878.1kg.p2.chr1_10mb_11_mb.SOLID.bam", "0f4a882d78747380af2822317c7b6045"); e.put( validationDataLocation + "NA12873.454.SRP000031.2009_06.chr1.10_20mb.bam", "7ebdce416b72679e1cf88cc9886a5edc" ); - e.put( validationDataLocation + "NA12878.1kg.p2.chr1_10mb_11_mb.allTechs.bam", "a39afc94ed74f8137c9d43285997bd90" ); + e.put( validationDataLocation + "NA12878.1kg.p2.chr1_10mb_11_mb.allTechs.bam", "41b0313bfcaf5afb3cb85f9830e79769" ); for ( Map.Entry entry : e.entrySet() ) { String bam = entry.getKey(); @@ -80,7 +80,7 @@ public class RecalibrationWalkersIntegrationTest extends WalkerTest { @Test public void testTableRecalibratorMaxQ70() { HashMap e = new HashMap(); - e.put( validationDataLocation + "NA12878.1kg.p2.chr1_10mb_11_mb.SOLID.bam", "bf8d62beda7d3cf38d660d7939901192" ); + e.put( validationDataLocation + "NA12878.1kg.p2.chr1_10mb_11_mb.SOLID.bam", "8120b648a43b293b84a7cb6374513fb8" ); for ( Map.Entry entry : e.entrySet() ) { String bam = entry.getKey(); @@ -165,7 +165,7 @@ public class RecalibrationWalkersIntegrationTest extends WalkerTest { @Test public void testTableRecalibratorNoReadGroups() { HashMap e = new HashMap(); - e.put( validationDataLocation + "NA12762.SOLID.SRP000031.2009_07.chr1.10_20mb.bam", "ad345fcfb2faaf97eb0291ffa61b3228" ); + e.put( validationDataLocation + "NA12762.SOLID.SRP000031.2009_07.chr1.10_20mb.bam", "d9d29f38f8ea31ac267f9d937cad9291" ); for ( Map.Entry entry : e.entrySet() ) { String bam = entry.getKey();