From 70df30fc1bbf620b7a10586e8d93f3cece778c47 Mon Sep 17 00:00:00 2001 From: rpoplin Date: Fri, 15 Jan 2010 15:36:59 +0000 Subject: [PATCH] Added method to AlignmentUtils which takes a read's cigar and the refBases char array given to a ReadWalker and returns the aligned reference char array. Bug fix in solid_recal_modes to use this aligned reference array. Recalibrator version number is no longer separate for each of the two walkers. git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@2589 348d0f76-0448-11de-a6fe-93d51630548a --- .../recalibration/CovariateCounterWalker.java | 3 +- .../recalibration/RecalDataManager.java | 27 +++++++------- .../RecalibrationArgumentCollection.java | 2 +- .../TableRecalibrationWalker.java | 5 ++- .../sting/utils/AlignmentUtils.java | 35 +++++++++++++++++++ 5 files changed, 51 insertions(+), 21 deletions(-) diff --git a/java/src/org/broadinstitute/sting/gatk/walkers/recalibration/CovariateCounterWalker.java b/java/src/org/broadinstitute/sting/gatk/walkers/recalibration/CovariateCounterWalker.java index e9bb510fa..cbb542b1a 100755 --- a/java/src/org/broadinstitute/sting/gatk/walkers/recalibration/CovariateCounterWalker.java +++ b/java/src/org/broadinstitute/sting/gatk/walkers/recalibration/CovariateCounterWalker.java @@ -102,7 +102,6 @@ public class CovariateCounterWalker extends LocusWalker { private long solidInsertedReferenceBases = 0; // Number of bases where we believe SOLID has inserted the reference because the color space is inconsistent with the read base private long otherColorSpaceInconsistency = 0; // Number of bases where the color space is inconsistent with the read but the reference wasn't inserted. private int numUnprocessed = 0; // Number of consecutive loci skipped because we are only processing every Nth site - private static final String versionString = "v2.2.5"; // Major version, minor version, and build number private Pair dbSNP_counts = new Pair(0L, 0L); // mismatch/base counts for dbSNP loci private Pair novel_counts = new Pair(0L, 0L); // mismatch/base counts for non-dbSNP loci private static final double DBSNP_VS_NOVEL_MISMATCH_RATE = 2.0; // rate at which dbSNP sites (on an individual level) mismatch relative to novel sites (determined by looking at NA12878) @@ -121,7 +120,7 @@ public class CovariateCounterWalker extends LocusWalker { */ public void initialize() { - logger.info( "CovariateCounterWalker version: " + versionString ); + logger.info( "Recalibrator version: " + RecalDataManager.versionString ); if( RAC.FORCE_READ_GROUP != null ) { RAC.DEFAULT_READ_GROUP = RAC.FORCE_READ_GROUP; } if( RAC.FORCE_PLATFORM != null ) { RAC.DEFAULT_PLATFORM = RAC.FORCE_PLATFORM; } DBSNP_VALIDATION_CHECK_FREQUENCY *= PROCESS_EVERY_NTH_LOCUS; diff --git a/java/src/org/broadinstitute/sting/gatk/walkers/recalibration/RecalDataManager.java b/java/src/org/broadinstitute/sting/gatk/walkers/recalibration/RecalDataManager.java index 586b30050..41ed957b7 100644 --- a/java/src/org/broadinstitute/sting/gatk/walkers/recalibration/RecalDataManager.java +++ b/java/src/org/broadinstitute/sting/gatk/walkers/recalibration/RecalDataManager.java @@ -57,6 +57,8 @@ public class RecalDataManager { private static boolean warnUserNullReadGroup = false; private static boolean warnUserNoColorSpace = false; + public static final String versionString = "v2.2.10"; // Major version, minor version, and build number + RecalDataManager() { data = new NestedHashMap(); dataCollapsedReadGroup = null; @@ -261,7 +263,6 @@ public class RecalDataManager { throw new StingException(String.format("Value encoded by %s in %s isn't a string!", RecalDataManager.COLOR_SPACE_ATTRIBUTE_TAG, read.getReadName())); } - // Loop over the read and calculate first the infered bases from the color and then check if it is consistent with the read byte[] readBases = read.getReadBases(); if( read.getReadNegativeStrandFlag() ) { @@ -310,10 +311,10 @@ public class RecalDataManager { // Loop over the read and calculate first the infered bases from the color and then check if it is consistent with the read byte[] readBases = read.getReadBases(); final byte[] colorImpliedBases = readBases.clone(); - char[] refBasesDirRead = refBases; + char[] refBasesDirRead = AlignmentUtils.alignmentToCharArray( read.getCigar(), read.getReadString().toCharArray(), refBases ); //BUGBUG: This needs to change when read walkers are changed to give the aligned refBases if( read.getReadNegativeStrandFlag() ) { readBases = BaseUtils.simpleReverseComplement( read.getReadBases() ); - refBasesDirRead = BaseUtils.simpleReverseComplement( refBases ); + refBasesDirRead = BaseUtils.simpleReverseComplement( refBasesDirRead.clone() ); } final int[] inconsistency = new int[readBases.length]; byte prevBase = (byte) colorSpace[0]; // The sentinel @@ -324,17 +325,15 @@ public class RecalDataManager { prevBase = readBases[iii]; } - final boolean isMappedToReference = (refBases != null && refBases.length == inconsistency.length); - // Now that we have the inconsistency array apply the desired correction to the inconsistent bases if( SOLID_RECAL_MODE.equalsIgnoreCase("SET_Q_ZERO") ) { // Set inconsistent bases and the one before it to Q0 final boolean setBaseN = false; - originalQualScores = solidRecalSetToQZero(read, readBases, inconsistency, originalQualScores, refBasesDirRead, isMappedToReference, setBaseN); + originalQualScores = solidRecalSetToQZero(read, readBases, inconsistency, originalQualScores, refBasesDirRead, setBaseN); } else if( SOLID_RECAL_MODE.equalsIgnoreCase("SET_Q_ZERO_BASE_N") ) { final boolean setBaseN = true; - originalQualScores = solidRecalSetToQZero(read, readBases, inconsistency, originalQualScores, refBasesDirRead, isMappedToReference, setBaseN); + originalQualScores = solidRecalSetToQZero(read, readBases, inconsistency, originalQualScores, refBasesDirRead, setBaseN); } else if( SOLID_RECAL_MODE.equalsIgnoreCase("REMOVE_REF_BIAS") ) { // Use the color space quality to probabilistically remove ref bases at inconsistent color space bases - solidRecalRemoveRefBias(read, readBases, inconsistency, colorImpliedBases, refBasesDirRead, isMappedToReference, coinFlip); + solidRecalRemoveRefBias(read, readBases, inconsistency, colorImpliedBases, refBasesDirRead, coinFlip); } } else if ( !warnUserNoColorSpace ) { // Warn the user if we can't find the color space tag @@ -353,23 +352,22 @@ public class RecalDataManager { * @param inconsistency The array of 1/0 that says if this base is inconsistent with its color * @param originalQualScores The array of original quality scores to set to zero if needed * @param refBases The reference which has been RC'd if necessary - * @param isMappedToRef Is this read mapped fully to a reference * @param setBaseN Should we also set the base to N as well as quality zero in order to visualize in IGV or something similar * @return The byte array of original quality scores some of which might have been set to zero */ private static byte[] solidRecalSetToQZero( final SAMRecord read, byte[] readBases, final int[] inconsistency, final byte[] originalQualScores, - final char[] refBases, final boolean isMappedToRef, final boolean setBaseN ) { + final char[] refBases, final boolean setBaseN ) { final boolean negStrand = read.getReadNegativeStrandFlag(); for( int iii = 1; iii < originalQualScores.length - 1; iii++ ) { if( inconsistency[iii] == 1 ) { - if( !isMappedToRef || (char)readBases[iii] == refBases[iii] ) { + if( (char)readBases[iii] == refBases[iii] ) { if( negStrand ) { originalQualScores[originalQualScores.length-(iii+1)] = (byte)0; } else { originalQualScores[iii] = (byte)0; } if( setBaseN ) { readBases[iii] = (byte)'N'; } } // Set the prev base to Q0 as well - if( !isMappedToRef || (char)readBases[iii-1] == refBases[iii-1] ) { + if( (char)readBases[iii-1] == refBases[iii-1] ) { if( negStrand ) { originalQualScores[originalQualScores.length-iii] = (byte)0; } else { originalQualScores[iii-1] = (byte)0; } if( setBaseN ) { readBases[iii-1] = (byte)'N'; } @@ -390,11 +388,10 @@ public class RecalDataManager { * @param inconsistency The array of 1/0 that says if this base is inconsistent with its color * @param colorImpliedBases The bases implied by the color space, RC'd if necessary * @param refBases The reference which has been RC'd if necessary - * @param isMappedToRef Is this read mapped fully to a reference * @param coinFlip A random number generator */ private static void solidRecalRemoveRefBias( final SAMRecord read, byte[] readBases, final int[] inconsistency, final byte[] colorImpliedBases, - final char[] refBases, final boolean isMappedToRef, final Random coinFlip) { + final char[] refBases, final Random coinFlip) { final Object attr = read.getAttribute(RecalDataManager.COLOR_SPACE_QUAL_ATTRIBUTE_TAG); if( attr != null ) { @@ -409,7 +406,7 @@ public class RecalDataManager { if( inconsistency[iii] == 1 ) { for( int jjj = iii - 1; jjj <= iii; jjj++ ) { // Correct this base and the one before it along the direction of the read if( jjj == iii || inconsistency[jjj] == 0 ) { // Don't want to correct the previous base a second time if it was already corrected in the previous step - if( !isMappedToRef || (char)readBases[jjj] == refBases[jjj] ) { + if( (char)readBases[jjj] == refBases[jjj] ) { if( colorSpaceQuals[jjj] == colorSpaceQuals[jjj+1] ) { // Equal evidence for the color implied base and the reference base, so flip a coin final int rand = coinFlip.nextInt( 2 ); if( rand == 0 ) { // The color implied base won the coin flip diff --git a/java/src/org/broadinstitute/sting/gatk/walkers/recalibration/RecalibrationArgumentCollection.java b/java/src/org/broadinstitute/sting/gatk/walkers/recalibration/RecalibrationArgumentCollection.java index ecc07afec..dbb600a9b 100755 --- a/java/src/org/broadinstitute/sting/gatk/walkers/recalibration/RecalibrationArgumentCollection.java +++ b/java/src/org/broadinstitute/sting/gatk/walkers/recalibration/RecalibrationArgumentCollection.java @@ -61,10 +61,10 @@ public class RecalibrationArgumentCollection { public int HOMOPOLYMER_NBACK = 7; @Argument(fullName = "exception_if_no_tile", shortName="throwTileException", doc="If provided, TileCovariate will throw an exception when no tile can be found. The default behavior is to use tile = -1", required=false) public boolean EXCEPTION_IF_NO_TILE = false; - public final boolean checkSolidRecalMode() { return ( SOLID_RECAL_MODE.equalsIgnoreCase("DO_NOTHING") || SOLID_RECAL_MODE.equalsIgnoreCase("SET_Q_ZERO") || SOLID_RECAL_MODE.equalsIgnoreCase("SET_Q_ZERO_BASE_N") || SOLID_RECAL_MODE.equalsIgnoreCase("REMOVE_REF_BIAS") ); } + } diff --git a/java/src/org/broadinstitute/sting/gatk/walkers/recalibration/TableRecalibrationWalker.java b/java/src/org/broadinstitute/sting/gatk/walkers/recalibration/TableRecalibrationWalker.java index 540d5d6cf..51e66d0af 100644 --- a/java/src/org/broadinstitute/sting/gatk/walkers/recalibration/TableRecalibrationWalker.java +++ b/java/src/org/broadinstitute/sting/gatk/walkers/recalibration/TableRecalibrationWalker.java @@ -95,7 +95,6 @@ public class TableRecalibrationWalker extends ReadWalker