From 6fbf77be9502f1311fbe28862b8c7a3a8c8940dc Mon Sep 17 00:00:00 2001 From: rpoplin Date: Thu, 17 Dec 2009 20:07:26 +0000 Subject: [PATCH] Updating the two solid_recal_mode options to also change the previous base since solid aligner prefers single color mismatch alignments over true SNP alignments. COUNT_AS_MISMATCH mode has been removed completely. The default mode is now SET_Q_ZERO. git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@2394 348d0f76-0448-11de-a6fe-93d51630548a --- .../recalibration/CovariateCounterWalker.java | 10 +- .../recalibration/RecalDataManager.java | 223 ++++++++++++------ .../RecalibrationArgumentCollection.java | 6 +- .../TableRecalibrationWalker.java | 19 +- .../broadinstitute/sting/utils/BaseUtils.java | 48 ++++ .../RecalibrationWalkersIntegrationTest.java | 24 +- 6 files changed, 222 insertions(+), 108 deletions(-) diff --git a/java/src/org/broadinstitute/sting/gatk/walkers/recalibration/CovariateCounterWalker.java b/java/src/org/broadinstitute/sting/gatk/walkers/recalibration/CovariateCounterWalker.java index 4a00a8b08..15186f166 100755 --- a/java/src/org/broadinstitute/sting/gatk/walkers/recalibration/CovariateCounterWalker.java +++ b/java/src/org/broadinstitute/sting/gatk/walkers/recalibration/CovariateCounterWalker.java @@ -103,7 +103,7 @@ public class CovariateCounterWalker extends LocusWalker { private long solidInsertedReferenceBases = 0; // Number of bases where we believe SOLID has inserted the reference because the color space is inconsistent with the read base private long otherColorSpaceInconsistency = 0; // Number of bases where the color space is inconsistent with the read but the reference wasn't inserted. BUGBUG: I don't understand what is going on in this case private int numUnprocessed = 0; // Number of consecutive loci skipped because we are only processing every Nth site - private static final String versionString = "v2.1.1"; // Major version, minor version, and build number + private static final String versionString = "v2.1.2"; // Major version, minor version, and build number private Pair dbSNP_counts = new Pair(0L, 0L); // mismatch/base counts for dbSNP loci private Pair novel_counts = new Pair(0L, 0L); // mismatch/base counts for non-dbSNP loci private static final double DBSNP_VS_NOVEL_MISMATCH_RATE = 2.0; // rate at which dbSNP sites (on an individual level) mismatch relative to novel sites (determined by looking at NA12878) @@ -127,7 +127,7 @@ public class CovariateCounterWalker extends LocusWalker { if( RAC.FORCE_PLATFORM != null ) { RAC.DEFAULT_PLATFORM = RAC.FORCE_PLATFORM; } DBSNP_VALIDATION_CHECK_FREQUENCY *= PROCESS_EVERY_NTH_LOCUS; if( !RAC.checkSolidRecalMode() ) { - throw new StingException( "Unrecognized --solid_recal_mode argument. Implemented options: DO_NOTHING, SET_Q_ZERO, SET_Q_ZERO_BASE_N, COUNT_AS_MISMATCH, or REMOVE_REF_BIAS"); + throw new StingException( "Unrecognized --solid_recal_mode argument. Implemented options: DO_NOTHING, SET_Q_ZERO, SET_Q_ZERO_BASE_N, or REMOVE_REF_BIAS"); } // Get a list of all available covariates @@ -300,10 +300,6 @@ public class CovariateCounterWalker extends LocusWalker { } else { otherColorSpaceInconsistency++; } - if( RAC.SOLID_RECAL_MODE.equalsIgnoreCase("COUNT_AS_MISMATCH") ) { - refBase = (byte)'X'; // This will always mismatch since we are already filtering out reads that don't pass BaseUtils.isRegularBase - updateDataFromRead( read, offset, refBase ); - } } } } @@ -313,7 +309,7 @@ public class CovariateCounterWalker extends LocusWalker { } else { // We skipped over the dbSNP site, and we are only processing every Nth locus skippedSites++; if( isSNP ) { - updateMismatchCounts(dbSNP_counts, context, ref.getBase());// For sanity check to ensure novel mismatch rate vs dnsnp mismatch rate is reasonable + updateMismatchCounts(dbSNP_counts, context, ref.getBase()); // For sanity check to ensure novel mismatch rate vs dnsnp mismatch rate is reasonable } } diff --git a/java/src/org/broadinstitute/sting/gatk/walkers/recalibration/RecalDataManager.java b/java/src/org/broadinstitute/sting/gatk/walkers/recalibration/RecalDataManager.java index 86f652efb..8092f8c1a 100644 --- a/java/src/org/broadinstitute/sting/gatk/walkers/recalibration/RecalDataManager.java +++ b/java/src/org/broadinstitute/sting/gatk/walkers/recalibration/RecalDataManager.java @@ -42,6 +42,7 @@ import net.sf.samtools.SAMReadGroupRecord; * Date: Nov 6, 2009 * * This helper class holds the data HashMap as well as submaps that represent the marginal distributions collapsed over all needed dimensions. + * It also has static methods that are used to perform the various solid recalibration modes that attempt to correct the reference bias. */ public class RecalDataManager { @@ -58,7 +59,7 @@ public class RecalDataManager { public final static String ORIGINAL_QUAL_ATTRIBUTE_TAG = "OQ"; // The tag that holds the original quality scores public final static String COLOR_SPACE_QUAL_ATTRIBUTE_TAG = "CQ"; // The tag that holds the color space quality scores for SOLID bams public final static String COLOR_SPACE_ATTRIBUTE_TAG = "CS"; // The tag that holds the color space for SOLID bams - public final static String COLOR_SPACE_INCONSISTENCY_TAG = "ZC"; // A new tag made up for the recalibrator which will hold an array of ints which if this base is inconsistent with its color + public final static String COLOR_SPACE_INCONSISTENCY_TAG = "ZC"; // A new tag made up for the recalibrator which will hold an array of ints which say if this base is inconsistent with its color private static boolean warnUserNullReadGroup = false; private static boolean warnUserNoColorSpace = false; @@ -88,21 +89,27 @@ public class RecalDataManager { * Add the given mapping to all of the collapsed hash tables * @param key The list of comparables that is the key for this mapping * @param fullDatum The RecalDatum which is the data for this mapping + * @param PRESERVE_QSCORES_LESS_THAN The threshold in report quality for adding to the aggregate collapsed table */ - public final void addToAllTables( final List key, final RecalDatum fullDatum ) { + public final void addToAllTables( final List key, final RecalDatum fullDatum, final int PRESERVE_QSCORES_LESS_THAN ) { // The full dataset isn't actually ever used for anything because of the sequential calculation so no need to keep the full data HashMap around //data.put(key, thisDatum); // add the mapping to the main table + int qualityScore = Integer.parseInt( key.get(1).toString() ); + ArrayList newKey; + RecalDatum collapsedDatum; + // Create dataCollapsedReadGroup, the table where everything except read group has been collapsed - //BUGBUG: Bases with Reported Quality less than -pQ argument shouldn't be included in this collapsed table - ArrayList newKey = new ArrayList(); - newKey.add( key.get(0) ); // Make a new key with just the read group - RecalDatum collapsedDatum = dataCollapsedReadGroup.get( newKey ); - if( collapsedDatum == null ) { - dataCollapsedReadGroup.put( newKey, new RecalDatum(fullDatum) ); - } else { - collapsedDatum.combine( fullDatum ); // using combine instead of increment in order to calculate overall aggregateQReported + if( qualityScore >= PRESERVE_QSCORES_LESS_THAN ) { + newKey = new ArrayList(); + newKey.add( key.get(0) ); // Make a new key with just the read group + collapsedDatum = dataCollapsedReadGroup.get( newKey ); + if( collapsedDatum == null ) { + dataCollapsedReadGroup.put( newKey, new RecalDatum(fullDatum) ); + } else { + collapsedDatum.combine( fullDatum ); // using combine instead of increment in order to calculate overall aggregateQReported + } } newKey = new ArrayList(); @@ -248,6 +255,10 @@ public class RecalDataManager { } } + /** + * Parse through the color space of the read and add a new tag to the SAMRecord that says which bases are inconsistent with the color space + * @param read The SAMRecord to parse + */ public static void parseColorSpace( final SAMRecord read ) { // If this is a SOLID read then we have to check if the color space is inconsistent. This is our only sign that SOLID has inserted the reference base if( read.getReadGroup().getPlatform().equalsIgnoreCase("SOLID") ) { @@ -265,19 +276,9 @@ public class RecalDataManager { for( iii = 0; iii < readBases.length; iii++ ) { byte thisBase = (byte)getNextBaseFromColor( (char)prevBase, colorSpace[iii + 1] ); inconsistency[iii] = ( thisBase == readBases[iii] ? 0 : 1 ); - prevBase = ( thisBase == readBases[iii] ? thisBase : readBases[iii] ); - //System.out.print((char)thisBase); + prevBase = readBases[iii]; } - //System.out.println(); read.setAttribute( RecalDataManager.COLOR_SPACE_INCONSISTENCY_TAG, inconsistency ); - //for( iii = 0; iii < readBases.length; iii++ ) { - // System.out.print((char)readBases[iii]); - //} - //System.out.println(); - //for( iii = 0; iii < readBases.length; iii++ ) { - // System.out.print(inconsistency[iii]); - //} - //System.out.println("\n"); } else if ( !warnUserNoColorSpace ) { // Warn the user if we can't find the color space tag Utils.warnUser("Unable to find color space information in SOLID read. First observed at read with name = " + read.getReadName()); @@ -288,77 +289,48 @@ public class RecalDataManager { } } - public static byte[] calcColorSpace( final SAMRecord read, byte[] originalQualScores, String SOLID_RECAL_MODE, Random coinFlip ) { + /** + * Parse through the color space of the read and apply the desired --solid_recal_mode correction to the bases + * This method doesn't add the inconsistent tag to the read like parseColorSpace does + * @param read The SAMRecord to parse + * @param originalQualScores The array of original quality scores to modify during the correction + * @param SOLID_RECAL_MODE Which mode of solid recalibration to apply + * @param coinFlip A random number generator + * @param refBases The reference for this read + * @return A new array of quality scores that have been ref bias corrected + */ + public static byte[] calcColorSpace( SAMRecord read, byte[] originalQualScores, final String SOLID_RECAL_MODE, final Random coinFlip, final char[] refBases ) { - int[] inconsistency = null; if( read.getAttribute(RecalDataManager.COLOR_SPACE_ATTRIBUTE_TAG) != null ) { char[] colorSpace = ((String)read.getAttribute(RecalDataManager.COLOR_SPACE_ATTRIBUTE_TAG)).toCharArray(); // Loop over the read and calculate first the infered bases from the color and then check if it is consistent with the read byte[] readBases = read.getReadBases(); byte[] colorImpliedBases = readBases.clone(); + char[] refBasesDirRead = refBases; if( read.getReadNegativeStrandFlag() ) { readBases = BaseUtils.simpleReverseComplement( read.getReadBases() ); + refBasesDirRead = BaseUtils.simpleReverseComplement( refBases ); } - inconsistency = new int[readBases.length]; - int iii; + int[] inconsistency = new int[readBases.length]; byte prevBase = (byte) colorSpace[0]; // The sentinel - for( iii = 0; iii < readBases.length; iii++ ) { + for( int iii = 0; iii < readBases.length; iii++ ) { byte thisBase = (byte)getNextBaseFromColor( (char)prevBase, colorSpace[iii + 1] ); colorImpliedBases[iii] = thisBase; inconsistency[iii] = ( thisBase == readBases[iii] ? 0 : 1 ); - prevBase = ( thisBase == readBases[iii] ? thisBase : readBases[iii] ); - //System.out.print((char)thisBase); + prevBase = readBases[iii]; } + boolean isMappedToReference = (refBases != null && refBases.length == inconsistency.length); - //System.out.println(); - //for( iii = 0; iii < readBases.length; iii++ ) { - // System.out.print((char)readBases[iii]); - //} - //System.out.println(); - //for( iii = 0; iii < readBases.length; iii++ ) { - // System.out.print(inconsistency[iii]); - //} - //System.out.println("\n"); - - if( SOLID_RECAL_MODE.equalsIgnoreCase("SET_Q_ZERO") ) { - for( iii = 0; iii < originalQualScores.length; iii++ ) { - if( inconsistency[iii] == 1 ) { - originalQualScores[iii] = (byte)0; - } - } + // Now that we have the inconsistency array apply the desired correction to the inconsistent bases + if( SOLID_RECAL_MODE.equalsIgnoreCase("SET_Q_ZERO") ) { // Set inconsistent bases and the one before it to Q0 + boolean setBaseN = false; + originalQualScores = solidRecalSetToQZero(read, readBases, inconsistency, originalQualScores, refBasesDirRead, isMappedToReference, setBaseN); } else if( SOLID_RECAL_MODE.equalsIgnoreCase("SET_Q_ZERO_BASE_N") ) { - byte[] tmpBases = read.getReadBases(); - for( iii = 0; iii < originalQualScores.length; iii++ ) { - if( inconsistency[iii] == 1 ) { - originalQualScores[iii] = (byte)0; - tmpBases[iii] = (byte)'N'; - } - } - read.setReadBases( tmpBases ); - - } else if( SOLID_RECAL_MODE.equalsIgnoreCase("REMOVE_REF_BIAS") ) { - if( read.getAttribute(RecalDataManager.COLOR_SPACE_QUAL_ATTRIBUTE_TAG) != null ) { // BUGBUG: Warn user if using this mode but there are no color space quality scores in the bam - byte[] colorSpaceQuals = QualityUtils.fastqToPhred((String)read.getAttribute(RecalDataManager.COLOR_SPACE_QUAL_ATTRIBUTE_TAG)); - byte[] tmpBases = read.getReadBases(); - if( read.getReadNegativeStrandFlag() ) { - colorImpliedBases = BaseUtils.simpleComplement(colorImpliedBases.clone()); - } - for( iii = 0; iii < inconsistency.length - 1; iii++ ) { - if( inconsistency[iii] == 1 ) { - if( colorSpaceQuals[iii] > colorSpaceQuals[iii+1] ) { // reference was inserted here, but there is more evidence for the color implied base - tmpBases[iii] = colorImpliedBases[iii]; - } else if( colorSpaceQuals[iii] == colorSpaceQuals[iii+1] ) { // equal evidence for the color implied base and the reference base, so flip a coin - int rand = coinFlip.nextInt( 2 ); - if( rand == 0 ) { // the color implied base won the coin flip - tmpBases[iii] = colorImpliedBases[iii]; - } - } - } - } - - read.setReadBases( tmpBases ); - } + boolean setBaseN = true; + originalQualScores = solidRecalSetToQZero(read, readBases, inconsistency, originalQualScores, refBasesDirRead, isMappedToReference, setBaseN); + } else if( SOLID_RECAL_MODE.equalsIgnoreCase("REMOVE_REF_BIAS") ) { // Use the color space quality to probabilistically remove ref bases at inconsistent color space bases + solidRecalRemoveRefBias(read, readBases, inconsistency, colorImpliedBases, refBases, isMappedToReference, coinFlip); } } else if ( !warnUserNoColorSpace ) { // Warn the user if we can't find the color space tag @@ -369,6 +341,107 @@ public class RecalDataManager { return originalQualScores; } + + /** + * Perform the SET_Q_ZERO solid recalibration. Inconsistent color space bases and their previous base are set to quality zero + * @param read The SAMRecord to recalibrate + * @param readBases The bases in the read RC'd if necessary + * @param inconsistency The array of 1/0 that says if this base is inconsistent with its color + * @param originalQualScores The array of original quality scores to set to zero if needed + * @param refBases The reference which has been RC'd if necessary + * @param isMappedToRef Is this read mapped fully to a reference + * @param setBaseN Should we also set the base to N as well as quality zero in order to visualize in IGV or something similar + * @return The byte array of original quality scores some of which might have been set to zero + */ + private static byte[] solidRecalSetToQZero( SAMRecord read, byte[] readBases, int[] inconsistency, byte[] originalQualScores, + final char[] refBases, final boolean isMappedToRef, final boolean setBaseN ) { + + for( int iii = 1; iii < originalQualScores.length - 1; iii++ ) { // BUGBUG: This relies on the fact that in SOLID bams the first and last base are set to Q0 and aren't recalibrated anyway + if( inconsistency[iii] == 1 ) { + if( !isMappedToRef || (char)readBases[iii] == refBases[iii] ) { + originalQualScores[iii] = (byte)0; + if( setBaseN ) { readBases[iii] = (byte)'N'; } + } + // Set the prev base to Q0 as well + if( !isMappedToRef || (char)readBases[iii-1] == refBases[iii-1] ) { + originalQualScores[iii-1] = (byte)0; + if( setBaseN ) { readBases[iii-1] = (byte)'N'; } + } + if( !isMappedToRef || (char)readBases[iii+1] == refBases[iii+1] ) { + originalQualScores[iii+1] = (byte)0; + if( setBaseN ) { readBases[iii+1] = (byte)'N'; } + } + } + } + if( read.getReadNegativeStrandFlag() ) { + readBases = BaseUtils.simpleReverseComplement( readBases.clone() ); // Put the bases back in reverse order to stuff them back in the read + } + read.setReadBases( readBases ); + return originalQualScores; + } + + /** + * Peform the REMOVE_REF_BIAS solid recalibration. Look at the color space qualities and probabilistically decide if the base should be change to match the color or left as reference + * @param read The SAMRecord to recalibrate + * @param readBases The bases in the read RC'd if necessary + * @param inconsistency The array of 1/0 that says if this base is inconsistent with its color + * @param colorImpliedBases The bases implied by the color space, RC'd if necessary + * @param refBases The reference which has been RC'd if necessary + * @param isMappedToRef Is this read mapped fully to a reference + * @param coinFlip A random number generator + */ + private static void solidRecalRemoveRefBias( SAMRecord read, byte[] readBases, int[] inconsistency, byte[] colorImpliedBases, + final char[] refBases, final boolean isMappedToRef, final Random coinFlip ) { + if( read.getAttribute(RecalDataManager.COLOR_SPACE_QUAL_ATTRIBUTE_TAG) != null ) { + byte[] colorSpaceQuals = QualityUtils.fastqToPhred((String)read.getAttribute(RecalDataManager.COLOR_SPACE_QUAL_ATTRIBUTE_TAG)); + + for( int iii = 1; iii < inconsistency.length - 2; iii++ ) { + if( inconsistency[iii] == 1 ) { + for( int jjj = iii - 1; jjj <= iii + 1; jjj++ ) { // Correct this base and the one before it along the direction of the read + if( !isMappedToRef || (char)readBases[jjj] == refBases[jjj] ) { + if( colorSpaceQuals[jjj] == colorSpaceQuals[jjj+1] ) { // Equal evidence for the color implied base and the reference base, so flip a coin + int rand = coinFlip.nextInt( 2 ); + if( rand == 0 ) { // The color implied base won the coin flip + readBases[jjj] = colorImpliedBases[jjj]; + } + } else { + int maxQuality = Math.max((int)colorSpaceQuals[jjj], (int)colorSpaceQuals[jjj+1]); + int minQuality = Math.min((int)colorSpaceQuals[jjj], (int)colorSpaceQuals[jjj+1]); + int diffInQuality = maxQuality - minQuality; + int numLow = minQuality; + if(numLow == 0) { numLow = 1; } + int numHigh = Math.round(numLow * (float)Math.pow(10.0f, (float) diffInQuality / 10.0f) ); // The color with higher quality is exponentially more likely + int rand = coinFlip.nextInt( numLow + numHigh ); + if( rand >= numLow ) { + if( maxQuality == (int)colorSpaceQuals[jjj] ) { + readBases[jjj] = colorImpliedBases[jjj]; + } + } else { + if( minQuality == (int)colorSpaceQuals[jjj] ) { + readBases[jjj] = colorImpliedBases[jjj]; + } + } + } + } + } + } + } + + if( read.getReadNegativeStrandFlag() ) { + readBases = BaseUtils.simpleReverseComplement( readBases.clone() ); // Put the bases back in reverse order to stuff them back in the read + } + read.setReadBases( readBases ); + } else { // No color space quality tag in file + throw new StingException("REMOVE_REF_BIAS recal mode requires color space qualities but they can't be found for read: " + read.getReadName()); + } + } + + /** + * Given the base and the color calculate the next base in the sequence + * @param prevBase The base + * @param color The color + * @return The next base in the sequence + */ private static char getNextBaseFromColor( final char prevBase, final char color ) { switch(color) { case '0': // same base diff --git a/java/src/org/broadinstitute/sting/gatk/walkers/recalibration/RecalibrationArgumentCollection.java b/java/src/org/broadinstitute/sting/gatk/walkers/recalibration/RecalibrationArgumentCollection.java index 9d904e5fc..780330210 100755 --- a/java/src/org/broadinstitute/sting/gatk/walkers/recalibration/RecalibrationArgumentCollection.java +++ b/java/src/org/broadinstitute/sting/gatk/walkers/recalibration/RecalibrationArgumentCollection.java @@ -53,8 +53,8 @@ public class RecalibrationArgumentCollection { public String FORCE_READ_GROUP = null; @Argument(fullName="force_platform", shortName="fP", required=false, doc="If provided, the platform of EVERY read will be forced to be the provided String. Valid options are illumina, 454, and solid.") public String FORCE_PLATFORM = null; - @Argument(fullName="solid_recal_mode", shortName="sMode", required = false, doc="How should we recalibrate solid bases in which the reference was inserted? EXPERIMENTAL OPTION. USE AT OWN RISK. Options = DO_NOTHING, SET_Q_ZERO, SET_Q_ZERO_BASE_N, COUNT_AS_MISMATCH, or REMOVE_REF_BIAS") - public String SOLID_RECAL_MODE = "DO_NOTHING"; + @Argument(fullName="solid_recal_mode", shortName="sMode", required = false, doc="How should we recalibrate solid bases in which the reference was inserted? Options = DO_NOTHING, SET_Q_ZERO, SET_Q_ZERO_BASE_N, or REMOVE_REF_BIAS") + public String SOLID_RECAL_MODE = "SET_Q_ZERO"; @Argument(fullName = "window_size_nqs", shortName="nqs", doc="The window size used by MinimumNQSCovariate for its calculation", required=false) public int WINDOW_SIZE = 5; @Argument(fullName = "homopolymer_nback", shortName="nback", doc="The number of previous bases to look at in HomopolymerCovariate", required=false) @@ -62,6 +62,6 @@ public class RecalibrationArgumentCollection { public boolean checkSolidRecalMode() { return ( SOLID_RECAL_MODE.equalsIgnoreCase("DO_NOTHING") || SOLID_RECAL_MODE.equalsIgnoreCase("SET_Q_ZERO") || - SOLID_RECAL_MODE.equalsIgnoreCase("SET_Q_ZERO_BASE_N") || SOLID_RECAL_MODE.equalsIgnoreCase("COUNT_AS_MISMATCH") || SOLID_RECAL_MODE.equalsIgnoreCase("REMOVE_REF_BIAS") ); + SOLID_RECAL_MODE.equalsIgnoreCase("SET_Q_ZERO_BASE_N") || SOLID_RECAL_MODE.equalsIgnoreCase("REMOVE_REF_BIAS") ); } } diff --git a/java/src/org/broadinstitute/sting/gatk/walkers/recalibration/TableRecalibrationWalker.java b/java/src/org/broadinstitute/sting/gatk/walkers/recalibration/TableRecalibrationWalker.java index 0d279315a..021d5b18e 100644 --- a/java/src/org/broadinstitute/sting/gatk/walkers/recalibration/TableRecalibrationWalker.java +++ b/java/src/org/broadinstitute/sting/gatk/walkers/recalibration/TableRecalibrationWalker.java @@ -58,8 +58,7 @@ import java.lang.reflect.Field; */ @WalkerName("TableRecalibration") -@Requires({ DataSource.READS, DataSource.REFERENCE }) // This walker requires -I input.bam, it also requires -R reference.fasta, but by not saying @requires REFERENCE_BASES I'm telling the - // GATK to not actually spend time giving me the refBase array since I don't use it +@Requires({ DataSource.READS, DataSource.REFERENCE, DataSource.REFERENCE_BASES }) // This walker requires -I input.bam, it also requires -R reference.fasta public class TableRecalibrationWalker extends ReadWalker { ///////////////////////////// @@ -94,9 +93,10 @@ public class TableRecalibrationWalker extends ReadWalker e = new HashMap(); e.put( "/humgen/gsa-scr1/GATK_Data/Validation_Data/NA12892.SLX.SRP000031.2009_06.selected.bam", "16f87c9644b27c3c3fe7a963eed45d2d" ); - e.put( "/humgen/gsa-scr1/GATK_Data/Validation_Data/NA12878.1kg.p2.chr1_10mb_11_mb.SOLID.bam", "add8196af03d5a674aa3de702995cbbf"); + e.put( "/humgen/gsa-scr1/GATK_Data/Validation_Data/NA12878.1kg.p2.chr1_10mb_11_mb.SOLID.bam", "5a4873064e602e1f945f3881c6b4a3e5"); e.put( "/humgen/gsa-scr1/GATK_Data/Validation_Data/NA12873.454.SRP000031.2009_06.chr1.10_20mb.bam", "b039f405580ad844cd1dfd4d8ba25913" ); - e.put( "/humgen/gsa-scr1/GATK_Data/Validation_Data/NA12878.1kg.p2.chr1_10mb_11_mb.allTechs.bam", "c730bff43ffb89dcb0774926286cac6f" ); + e.put( "/humgen/gsa-scr1/GATK_Data/Validation_Data/NA12878.1kg.p2.chr1_10mb_11_mb.allTechs.bam", "dbbb9c49f2e0b1f117b88eb386fb6d0f" ); for ( Map.Entry entry : e.entrySet() ) { String bam = entry.getKey(); @@ -37,7 +37,7 @@ public class RecalibrationWalkersIntegrationTest extends WalkerTest { " -cov CycleCovariate" + " -cov DinucCovariate" + " --sorted_output" + - " --solid_recal_mode DO_NOTHING" + + " --solid_recal_mode SET_Q_ZERO" + " -recalFile %s", 1, // just one output file Arrays.asList(md5)); @@ -50,9 +50,9 @@ public class RecalibrationWalkersIntegrationTest extends WalkerTest { public void testTableRecalibrator1() { HashMap e = new HashMap(); e.put( "/humgen/gsa-scr1/GATK_Data/Validation_Data/NA12892.SLX.SRP000031.2009_06.selected.bam", "d7f3e0db5ed9fefc917144a0f937d50d" ); - e.put( "/humgen/gsa-scr1/GATK_Data/Validation_Data/NA12878.1kg.p2.chr1_10mb_11_mb.SOLID.bam", "fb82046d5acfb07625d5459526d5f6c8"); + e.put( "/humgen/gsa-scr1/GATK_Data/Validation_Data/NA12878.1kg.p2.chr1_10mb_11_mb.SOLID.bam", "1f1770363f381d06e8db9fead03ad7bb"); e.put( "/humgen/gsa-scr1/GATK_Data/Validation_Data/NA12873.454.SRP000031.2009_06.chr1.10_20mb.bam", "3fe6ee95cfa6155f8cc8e3e0a6330ad2" ); - e.put( "/humgen/gsa-scr1/GATK_Data/Validation_Data/NA12878.1kg.p2.chr1_10mb_11_mb.allTechs.bam", "e4eae955f7d408b2af34c9224616f564" ); + e.put( "/humgen/gsa-scr1/GATK_Data/Validation_Data/NA12878.1kg.p2.chr1_10mb_11_mb.allTechs.bam", "732267e9cd02a72b8c85238f47487509" ); for ( Map.Entry entry : e.entrySet() ) { String bam = entry.getKey(); @@ -68,7 +68,7 @@ public class RecalibrationWalkersIntegrationTest extends WalkerTest { ? " -L 1:10,800,000-10,810,000" : " -L 1:10,100,000-10,300,000" ) + " -outputBam %s" + " --no_pg_tag" + - " --solid_recal_mode DO_NOTHING" + + " --solid_recal_mode SET_Q_ZERO" + " -recalFile " + paramsFile, 1, // just one output file Arrays.asList(md5)); @@ -80,7 +80,7 @@ public class RecalibrationWalkersIntegrationTest extends WalkerTest { @Test public void testCountCovariatesVCF() { HashMap e = new HashMap(); - e.put( "/humgen/gsa-scr1/GATK_Data/Validation_Data/NA12878.1kg.p2.chr1_10mb_11_mb.SOLID.bam", "d0bdd1934f003406e93f935b63ab6edd"); + e.put( "/humgen/gsa-scr1/GATK_Data/Validation_Data/NA12878.1kg.p2.chr1_10mb_11_mb.SOLID.bam", "bc0e62dc366f2f6ca70847a832b4aad4"); for ( Map.Entry entry : e.entrySet() ) { String bam = entry.getKey(); @@ -97,7 +97,7 @@ public class RecalibrationWalkersIntegrationTest extends WalkerTest { " -cov CycleCovariate" + " -cov DinucCovariate" + " --sorted_output" + - " --solid_recal_mode DO_NOTHING" + + " --solid_recal_mode SET_Q_ZERO" + " -recalFile %s", 1, // just one output file Arrays.asList(md5)); @@ -108,7 +108,7 @@ public class RecalibrationWalkersIntegrationTest extends WalkerTest { @Test public void testCountCovariatesNoReadGroups() { HashMap e = new HashMap(); - e.put( "/humgen/gsa-scr1/GATK_Data/Validation_Data/NA12762.SOLID.SRP000031.2009_07.chr1.10_20mb.bam", "fadfa35415a694781ed2b4d1b1d9535b" ); + e.put( "/humgen/gsa-scr1/GATK_Data/Validation_Data/NA12762.SOLID.SRP000031.2009_07.chr1.10_20mb.bam", "9d83a1653e076e028fecf4c2575f56e9" ); for ( Map.Entry entry : e.entrySet() ) { String bam = entry.getKey(); @@ -126,7 +126,7 @@ public class RecalibrationWalkersIntegrationTest extends WalkerTest { " -cov DinucCovariate" + " --default_platform illumina" + " --sorted_output" + - " --solid_recal_mode DO_NOTHING" + + " --solid_recal_mode SET_Q_ZERO" + " -recalFile %s", 1, // just one output file Arrays.asList(md5)); @@ -138,7 +138,7 @@ public class RecalibrationWalkersIntegrationTest extends WalkerTest { @Test public void testTableRecalibratorNoReadGroups() { HashMap e = new HashMap(); - e.put( "/humgen/gsa-scr1/GATK_Data/Validation_Data/NA12762.SOLID.SRP000031.2009_07.chr1.10_20mb.bam", "4879036295b567514f637d21030bf9c8" ); + e.put( "/humgen/gsa-scr1/GATK_Data/Validation_Data/NA12762.SOLID.SRP000031.2009_07.chr1.10_20mb.bam", "8853add9cfd0a8171ca917b1c6452e0a" ); for ( Map.Entry entry : e.entrySet() ) { String bam = entry.getKey(); @@ -153,7 +153,7 @@ public class RecalibrationWalkersIntegrationTest extends WalkerTest { " -L 1:10,100,000-10,300,000" + " -outputBam %s" + " --no_pg_tag" + - " --solid_recal_mode DO_NOTHING" + + " --solid_recal_mode SET_Q_ZERO" + " --default_platform illumina" + " -recalFile " + paramsFile, 1, // just one output file