From a1054efe8a17eaa9a23b66f81ea7f0723b2312e1 Mon Sep 17 00:00:00 2001 From: rpoplin Date: Tue, 26 Jan 2010 18:47:40 +0000 Subject: [PATCH] Default platform and default read group are no longer set to values by default. The recalibrator throws an exception if needed values are empty in the bam file and the args weren't set by the user. This is done to make it more obvious to the user when the bam file is malformed. Similarly, the recalibrator now refuses to recalibrate any solid reads in which it can't find the color space information with an exception message explaining this. The recalibrator no longer maintains its own version number and instead uses the new global GATK version number. git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@2690 348d0f76-0448-11de-a6fe-93d51630548a --- .../recalibration/CovariateCounterWalker.java | 1 - .../walkers/recalibration/CycleCovariate.java | 32 +++++----- .../recalibration/RecalDataManager.java | 58 ++++++++++--------- .../RecalibrationArgumentCollection.java | 4 +- .../TableRecalibrationWalker.java | 9 +-- .../RecalibrationWalkersIntegrationTest.java | 2 + 6 files changed, 57 insertions(+), 49 deletions(-) diff --git a/java/src/org/broadinstitute/sting/gatk/walkers/recalibration/CovariateCounterWalker.java b/java/src/org/broadinstitute/sting/gatk/walkers/recalibration/CovariateCounterWalker.java index 6530a7acb..fb9865e27 100755 --- a/java/src/org/broadinstitute/sting/gatk/walkers/recalibration/CovariateCounterWalker.java +++ b/java/src/org/broadinstitute/sting/gatk/walkers/recalibration/CovariateCounterWalker.java @@ -120,7 +120,6 @@ public class CovariateCounterWalker extends LocusWalker { */ public void initialize() { - logger.info( "Recalibrator version: " + RecalDataManager.versionString ); if( RAC.FORCE_READ_GROUP != null ) { RAC.DEFAULT_READ_GROUP = RAC.FORCE_READ_GROUP; } if( RAC.FORCE_PLATFORM != null ) { RAC.DEFAULT_PLATFORM = RAC.FORCE_PLATFORM; } DBSNP_VALIDATION_CHECK_FREQUENCY *= PROCESS_EVERY_NTH_LOCUS; diff --git a/java/src/org/broadinstitute/sting/gatk/walkers/recalibration/CycleCovariate.java b/java/src/org/broadinstitute/sting/gatk/walkers/recalibration/CycleCovariate.java index 1d3cd5c38..7b0859390 100755 --- a/java/src/org/broadinstitute/sting/gatk/walkers/recalibration/CycleCovariate.java +++ b/java/src/org/broadinstitute/sting/gatk/walkers/recalibration/CycleCovariate.java @@ -46,15 +46,17 @@ import net.sf.samtools.SAMRecord; public class CycleCovariate implements StandardCovariate { private static boolean warnedUserBadPlatform = false; - private static String defaultPlatform; + private static String defaultPlatform = null; // Initialize any member variables using the command-line arguments passed to the walkers public void initialize( final RecalibrationArgumentCollection RAC ) { - if( RAC.DEFAULT_PLATFORM.equalsIgnoreCase( "SLX" ) || RAC.DEFAULT_PLATFORM.equalsIgnoreCase( "ILLUMINA" ) || - RAC.DEFAULT_PLATFORM.contains( "454" ) || RAC.DEFAULT_PLATFORM.equalsIgnoreCase( "SOLID" ) || RAC.DEFAULT_PLATFORM.equalsIgnoreCase( "ABI_SOLID" ) ) { - defaultPlatform = RAC.DEFAULT_PLATFORM; - } else { - throw new StingException( "The requested default platform (" + RAC.DEFAULT_PLATFORM +") is not a recognized platform. Implemented options are illumina, 454, and solid"); + if( RAC.DEFAULT_PLATFORM != null ) { + if( RAC.DEFAULT_PLATFORM.equalsIgnoreCase( "SLX" ) || RAC.DEFAULT_PLATFORM.equalsIgnoreCase( "ILLUMINA" ) || + RAC.DEFAULT_PLATFORM.contains( "454" ) || RAC.DEFAULT_PLATFORM.equalsIgnoreCase( "SOLID" ) || RAC.DEFAULT_PLATFORM.equalsIgnoreCase( "ABI_SOLID" ) ) { + defaultPlatform = RAC.DEFAULT_PLATFORM; + } else { + throw new StingException( "The requested default platform (" + RAC.DEFAULT_PLATFORM +") is not a recognized platform. Implemented options are illumina, 454, and solid"); + } } } @@ -128,19 +130,19 @@ public class CycleCovariate implements StandardCovariate { //----------------------------- else { // Platform is unrecognized so revert to the default platform but warn the user first - if( !warnedUserBadPlatform ) { - if( defaultPlatform != null) { // The user set a default platform + if( defaultPlatform != null) { // The user set a default platform + if( !warnedUserBadPlatform ) { Utils.warnUser( "Platform string (" + read.getReadGroup().getPlatform() + ") unrecognized in CycleCovariate. " + - "Reverting to platform = " + defaultPlatform + ". Users may set the default platform using the --default_platform argument." ); - } else { // The user did not set a default platform - Utils.warnUser( "Platform string (" + read.getReadGroup().getPlatform() + ") unrecognized in CycleCovariate. " + - "Reverting to platform = Illumina. Users may set the default platform using the --default_platform argument." ); - defaultPlatform = "Illumina"; + "Defaulting to platform = " + defaultPlatform + "." ); } warnedUserBadPlatform = true; + + read.getReadGroup().setPlatform( defaultPlatform ); + return getValue( read, offset ); // A recursive call + } else { // The user did not set a default platform + throw new StingException( "Platform string (" + read.getReadGroup().getPlatform() + ") unrecognized in CycleCovariate. " + + "No default platform specified. Users must set the default platform using the --default_platform argument." ); } - read.getReadGroup().setPlatform( defaultPlatform ); - return getValue( read, offset ); // A recursive call } // Differentiate between first and second of pair. diff --git a/java/src/org/broadinstitute/sting/gatk/walkers/recalibration/RecalDataManager.java b/java/src/org/broadinstitute/sting/gatk/walkers/recalibration/RecalDataManager.java index f021e3bff..519115a73 100644 --- a/java/src/org/broadinstitute/sting/gatk/walkers/recalibration/RecalDataManager.java +++ b/java/src/org/broadinstitute/sting/gatk/walkers/recalibration/RecalDataManager.java @@ -55,11 +55,8 @@ public class RecalDataManager { public final static String COLOR_SPACE_ATTRIBUTE_TAG = "CS"; // The tag that holds the color space for SOLID bams public final static String COLOR_SPACE_INCONSISTENCY_TAG = "ZC"; // A new tag made up for the recalibrator which will hold an array of ints which say if this base is inconsistent with its color private static boolean warnUserNullReadGroup = false; - private static boolean warnUserNoColorSpace = false; private static boolean warnUserNullPlatform = false; - public static final String versionString = "v2.2.17"; // Major version, minor version, and build number - RecalDataManager() { data = new NestedHashMap(); dataCollapsedReadGroup = null; @@ -219,16 +216,21 @@ public class RecalDataManager { // If there are no read groups we have to default to something, and that something could be specified by the user using command line arguments if( readGroup == null ) { - if( !warnUserNullReadGroup && RAC.FORCE_READ_GROUP == null ) { - Utils.warnUser("The input .bam file contains reads with no read group. " + - "Defaulting to read group ID = " + RAC.DEFAULT_READ_GROUP + " and platform = " + RAC.DEFAULT_PLATFORM + ". " + - "First observed at read with name = " + read.getReadName() ); - warnUserNullReadGroup = true; + if( RAC.DEFAULT_READ_GROUP != null && RAC.DEFAULT_PLATFORM != null) { + if( !warnUserNullReadGroup && RAC.FORCE_READ_GROUP == null ) { + Utils.warnUser("The input .bam file contains reads with no read group. " + + "Defaulting to read group ID = " + RAC.DEFAULT_READ_GROUP + " and platform = " + RAC.DEFAULT_PLATFORM + ". " + + "First observed at read with name = " + read.getReadName() ); + warnUserNullReadGroup = true; + } + // There is no readGroup so defaulting to these values + readGroup = new SAMReadGroupRecord( RAC.DEFAULT_READ_GROUP ); + readGroup.setPlatform( RAC.DEFAULT_PLATFORM ); + ((GATKSAMRecord)read).setReadGroup( readGroup ); + } else { + throw new StingException("The input .bam file contains reads with no read group. First observed at read with name = " + read.getReadName() + + " Users must set both the default read group using the --default_read_group argument and the default platform using the --default_platform argument." ); } - // There is no readGroup so defaulting to these values - readGroup = new SAMReadGroupRecord( RAC.DEFAULT_READ_GROUP ); - readGroup.setPlatform( RAC.DEFAULT_PLATFORM ); - ((GATKSAMRecord)read).setReadGroup( readGroup ); } if( RAC.FORCE_READ_GROUP != null && !readGroup.getReadGroupId().equals(RAC.FORCE_READ_GROUP) ) { // Collapse all the read groups into a single common String provided by the user @@ -243,14 +245,18 @@ public class RecalDataManager { } if ( readGroup.getPlatform() == null ) { - if( !warnUserNullPlatform ) { - Utils.warnUser("The input .bam file contains reads with no platform information. " + - "Defaulting to platform = " + RAC.DEFAULT_PLATFORM + ". " + - "First observed at read with name = " + read.getReadName() ); - Utils.warnUser("Users may set the default platform using the --default_platform argument."); - warnUserNullPlatform = true; + if( RAC.DEFAULT_PLATFORM != null ) { + if( !warnUserNullPlatform ) { + Utils.warnUser("The input .bam file contains reads with no platform information. " + + "Defaulting to platform = " + RAC.DEFAULT_PLATFORM + ". " + + "First observed at read with name = " + read.getReadName() ); + warnUserNullPlatform = true; + } + readGroup.setPlatform( RAC.DEFAULT_PLATFORM ); + } else { + throw new StingException("The input .bam file contains reads with no platform information. First observed at read with name = " + read.getReadName() + + " Users must set the default platform using the --default_platform argument." ); } - readGroup.setPlatform( RAC.DEFAULT_PLATFORM ); } } @@ -287,10 +293,9 @@ public class RecalDataManager { } read.setAttribute( RecalDataManager.COLOR_SPACE_INCONSISTENCY_TAG, inconsistency ); - } else if ( !warnUserNoColorSpace ) { // Warn the user if we can't find the color space tag - Utils.warnUser("Unable to find color space information in SOLiD read. First observed at read with name = " + read.getReadName()); - Utils.warnUser("This calculation is critically dependent on being able to know when reference bases were inserted into SOLiD reads. Are you sure you want to proceed?"); - warnUserNoColorSpace = true; + } else { + throw new StingException("Unable to find color space information in SOLiD read. First observed at read with name = " + read.getReadName() + + " Unfortunately this .bam file can not be recalibrated without color space information because of potential reference bias."); } } } @@ -345,10 +350,9 @@ public class RecalDataManager { solidRecalRemoveRefBias(read, readBases, inconsistency, colorImpliedBases, refBasesDirRead, coinFlip); } - } else if ( !warnUserNoColorSpace ) { // Warn the user if we can't find the color space tag - Utils.warnUser("Unable to find color space information in SOLiD read. First observed at read with name = " + read.getReadName()); - Utils.warnUser("This calculation is critically dependent on being able to know when reference bases were inserted into SOLiD reads. Are you sure you want to proceed?"); - warnUserNoColorSpace = true; + } else { + throw new StingException("Unable to find color space information in SOLiD read. First observed at read with name = " + read.getReadName() + + " Unfortunately this .bam file can not be recalibrated without color space information because of potential reference bias."); } return originalQualScores; diff --git a/java/src/org/broadinstitute/sting/gatk/walkers/recalibration/RecalibrationArgumentCollection.java b/java/src/org/broadinstitute/sting/gatk/walkers/recalibration/RecalibrationArgumentCollection.java index dbb600a9b..2a18c559b 100755 --- a/java/src/org/broadinstitute/sting/gatk/walkers/recalibration/RecalibrationArgumentCollection.java +++ b/java/src/org/broadinstitute/sting/gatk/walkers/recalibration/RecalibrationArgumentCollection.java @@ -46,9 +46,9 @@ public class RecalibrationArgumentCollection { @Argument(fullName = "use_original_quals", shortName="OQ", doc="If provided, we will use the quals from the original qualities OQ attribute field instead of the quals in the regular QUALS field", required=false) public boolean USE_ORIGINAL_QUALS = false; @Argument(fullName="default_read_group", shortName="dRG", required=false, doc="If a read has no read group then default to the provided String.") - public String DEFAULT_READ_GROUP = ReadGroupCovariate.defaultReadGroup; + public String DEFAULT_READ_GROUP = null; @Argument(fullName="default_platform", shortName="dP", required=false, doc="If a read has no platform then default to the provided String. Valid options are illumina, 454, and solid.") - public String DEFAULT_PLATFORM = "Illumina"; + public String DEFAULT_PLATFORM = null; @Argument(fullName="force_read_group", shortName="fRG", required=false, doc="If provided, the read group ID of EVERY read will be forced to be the provided String. This is useful to collapse all data into a single read group.") public String FORCE_READ_GROUP = null; @Argument(fullName="force_platform", shortName="fP", required=false, doc="If provided, the platform of EVERY read will be forced to be the provided String. Valid options are illumina, 454, and solid.") diff --git a/java/src/org/broadinstitute/sting/gatk/walkers/recalibration/TableRecalibrationWalker.java b/java/src/org/broadinstitute/sting/gatk/walkers/recalibration/TableRecalibrationWalker.java index c61557a0d..ebd454581 100644 --- a/java/src/org/broadinstitute/sting/gatk/walkers/recalibration/TableRecalibrationWalker.java +++ b/java/src/org/broadinstitute/sting/gatk/walkers/recalibration/TableRecalibrationWalker.java @@ -14,6 +14,7 @@ import org.broadinstitute.sting.utils.*; import java.util.ArrayList; import java.util.List; import java.util.Random; +import java.util.ResourceBundle; import java.util.regex.Pattern; import java.io.File; import java.io.FileNotFoundException; @@ -76,9 +77,9 @@ public class TableRecalibrationWalker extends ReadWalker