Default platform and default read group are no longer set to values by default. The recalibrator throws an exception if needed values are empty in the bam file and the args weren't set by the user. This is done to make it more obvious to the user when the bam file is malformed. Similarly, the recalibrator now refuses to recalibrate any solid reads in which it can't find the color space information with an exception message explaining this. The recalibrator no longer maintains its own version number and instead uses the new global GATK version number.

git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@2690 348d0f76-0448-11de-a6fe-93d51630548a
This commit is contained in:
rpoplin 2010-01-26 18:47:40 +00:00
parent 31d252c0a1
commit a1054efe8a
6 changed files with 57 additions and 49 deletions

View File

@ -120,7 +120,6 @@ public class CovariateCounterWalker extends LocusWalker<Integer, PrintStream> {
*/ */
public void initialize() { public void initialize() {
logger.info( "Recalibrator version: " + RecalDataManager.versionString );
if( RAC.FORCE_READ_GROUP != null ) { RAC.DEFAULT_READ_GROUP = RAC.FORCE_READ_GROUP; } if( RAC.FORCE_READ_GROUP != null ) { RAC.DEFAULT_READ_GROUP = RAC.FORCE_READ_GROUP; }
if( RAC.FORCE_PLATFORM != null ) { RAC.DEFAULT_PLATFORM = RAC.FORCE_PLATFORM; } if( RAC.FORCE_PLATFORM != null ) { RAC.DEFAULT_PLATFORM = RAC.FORCE_PLATFORM; }
DBSNP_VALIDATION_CHECK_FREQUENCY *= PROCESS_EVERY_NTH_LOCUS; DBSNP_VALIDATION_CHECK_FREQUENCY *= PROCESS_EVERY_NTH_LOCUS;

View File

@ -46,10 +46,11 @@ import net.sf.samtools.SAMRecord;
public class CycleCovariate implements StandardCovariate { public class CycleCovariate implements StandardCovariate {
private static boolean warnedUserBadPlatform = false; private static boolean warnedUserBadPlatform = false;
private static String defaultPlatform; private static String defaultPlatform = null;
// Initialize any member variables using the command-line arguments passed to the walkers // Initialize any member variables using the command-line arguments passed to the walkers
public void initialize( final RecalibrationArgumentCollection RAC ) { public void initialize( final RecalibrationArgumentCollection RAC ) {
if( RAC.DEFAULT_PLATFORM != null ) {
if( RAC.DEFAULT_PLATFORM.equalsIgnoreCase( "SLX" ) || RAC.DEFAULT_PLATFORM.equalsIgnoreCase( "ILLUMINA" ) || if( RAC.DEFAULT_PLATFORM.equalsIgnoreCase( "SLX" ) || RAC.DEFAULT_PLATFORM.equalsIgnoreCase( "ILLUMINA" ) ||
RAC.DEFAULT_PLATFORM.contains( "454" ) || RAC.DEFAULT_PLATFORM.equalsIgnoreCase( "SOLID" ) || RAC.DEFAULT_PLATFORM.equalsIgnoreCase( "ABI_SOLID" ) ) { RAC.DEFAULT_PLATFORM.contains( "454" ) || RAC.DEFAULT_PLATFORM.equalsIgnoreCase( "SOLID" ) || RAC.DEFAULT_PLATFORM.equalsIgnoreCase( "ABI_SOLID" ) ) {
defaultPlatform = RAC.DEFAULT_PLATFORM; defaultPlatform = RAC.DEFAULT_PLATFORM;
@ -57,6 +58,7 @@ public class CycleCovariate implements StandardCovariate {
throw new StingException( "The requested default platform (" + RAC.DEFAULT_PLATFORM +") is not a recognized platform. Implemented options are illumina, 454, and solid"); throw new StingException( "The requested default platform (" + RAC.DEFAULT_PLATFORM +") is not a recognized platform. Implemented options are illumina, 454, and solid");
} }
} }
}
// Used to pick out the covariate's value from attributes of the read // Used to pick out the covariate's value from attributes of the read
public final Comparable getValue( final SAMRecord read, final int offset ) { public final Comparable getValue( final SAMRecord read, final int offset ) {
@ -128,19 +130,19 @@ public class CycleCovariate implements StandardCovariate {
//----------------------------- //-----------------------------
else { // Platform is unrecognized so revert to the default platform but warn the user first else { // Platform is unrecognized so revert to the default platform but warn the user first
if( !warnedUserBadPlatform ) {
if( defaultPlatform != null) { // The user set a default platform if( defaultPlatform != null) { // The user set a default platform
if( !warnedUserBadPlatform ) {
Utils.warnUser( "Platform string (" + read.getReadGroup().getPlatform() + ") unrecognized in CycleCovariate. " + Utils.warnUser( "Platform string (" + read.getReadGroup().getPlatform() + ") unrecognized in CycleCovariate. " +
"Reverting to platform = " + defaultPlatform + ". Users may set the default platform using the --default_platform <String> argument." ); "Defaulting to platform = " + defaultPlatform + "." );
} else { // The user did not set a default platform
Utils.warnUser( "Platform string (" + read.getReadGroup().getPlatform() + ") unrecognized in CycleCovariate. " +
"Reverting to platform = Illumina. Users may set the default platform using the --default_platform <String> argument." );
defaultPlatform = "Illumina";
} }
warnedUserBadPlatform = true; warnedUserBadPlatform = true;
}
read.getReadGroup().setPlatform( defaultPlatform ); read.getReadGroup().setPlatform( defaultPlatform );
return getValue( read, offset ); // A recursive call return getValue( read, offset ); // A recursive call
} else { // The user did not set a default platform
throw new StingException( "Platform string (" + read.getReadGroup().getPlatform() + ") unrecognized in CycleCovariate. " +
"No default platform specified. Users must set the default platform using the --default_platform <String> argument." );
}
} }
// Differentiate between first and second of pair. // Differentiate between first and second of pair.

View File

@ -55,11 +55,8 @@ public class RecalDataManager {
public final static String COLOR_SPACE_ATTRIBUTE_TAG = "CS"; // The tag that holds the color space for SOLID bams public final static String COLOR_SPACE_ATTRIBUTE_TAG = "CS"; // The tag that holds the color space for SOLID bams
public final static String COLOR_SPACE_INCONSISTENCY_TAG = "ZC"; // A new tag made up for the recalibrator which will hold an array of ints which say if this base is inconsistent with its color public final static String COLOR_SPACE_INCONSISTENCY_TAG = "ZC"; // A new tag made up for the recalibrator which will hold an array of ints which say if this base is inconsistent with its color
private static boolean warnUserNullReadGroup = false; private static boolean warnUserNullReadGroup = false;
private static boolean warnUserNoColorSpace = false;
private static boolean warnUserNullPlatform = false; private static boolean warnUserNullPlatform = false;
public static final String versionString = "v2.2.17"; // Major version, minor version, and build number
RecalDataManager() { RecalDataManager() {
data = new NestedHashMap(); data = new NestedHashMap();
dataCollapsedReadGroup = null; dataCollapsedReadGroup = null;
@ -219,6 +216,7 @@ public class RecalDataManager {
// If there are no read groups we have to default to something, and that something could be specified by the user using command line arguments // If there are no read groups we have to default to something, and that something could be specified by the user using command line arguments
if( readGroup == null ) { if( readGroup == null ) {
if( RAC.DEFAULT_READ_GROUP != null && RAC.DEFAULT_PLATFORM != null) {
if( !warnUserNullReadGroup && RAC.FORCE_READ_GROUP == null ) { if( !warnUserNullReadGroup && RAC.FORCE_READ_GROUP == null ) {
Utils.warnUser("The input .bam file contains reads with no read group. " + Utils.warnUser("The input .bam file contains reads with no read group. " +
"Defaulting to read group ID = " + RAC.DEFAULT_READ_GROUP + " and platform = " + RAC.DEFAULT_PLATFORM + ". " + "Defaulting to read group ID = " + RAC.DEFAULT_READ_GROUP + " and platform = " + RAC.DEFAULT_PLATFORM + ". " +
@ -229,6 +227,10 @@ public class RecalDataManager {
readGroup = new SAMReadGroupRecord( RAC.DEFAULT_READ_GROUP ); readGroup = new SAMReadGroupRecord( RAC.DEFAULT_READ_GROUP );
readGroup.setPlatform( RAC.DEFAULT_PLATFORM ); readGroup.setPlatform( RAC.DEFAULT_PLATFORM );
((GATKSAMRecord)read).setReadGroup( readGroup ); ((GATKSAMRecord)read).setReadGroup( readGroup );
} else {
throw new StingException("The input .bam file contains reads with no read group. First observed at read with name = " + read.getReadName() +
" Users must set both the default read group using the --default_read_group <String> argument and the default platform using the --default_platform <String> argument." );
}
} }
if( RAC.FORCE_READ_GROUP != null && !readGroup.getReadGroupId().equals(RAC.FORCE_READ_GROUP) ) { // Collapse all the read groups into a single common String provided by the user if( RAC.FORCE_READ_GROUP != null && !readGroup.getReadGroupId().equals(RAC.FORCE_READ_GROUP) ) { // Collapse all the read groups into a single common String provided by the user
@ -243,14 +245,18 @@ public class RecalDataManager {
} }
if ( readGroup.getPlatform() == null ) { if ( readGroup.getPlatform() == null ) {
if( RAC.DEFAULT_PLATFORM != null ) {
if( !warnUserNullPlatform ) { if( !warnUserNullPlatform ) {
Utils.warnUser("The input .bam file contains reads with no platform information. " + Utils.warnUser("The input .bam file contains reads with no platform information. " +
"Defaulting to platform = " + RAC.DEFAULT_PLATFORM + ". " + "Defaulting to platform = " + RAC.DEFAULT_PLATFORM + ". " +
"First observed at read with name = " + read.getReadName() ); "First observed at read with name = " + read.getReadName() );
Utils.warnUser("Users may set the default platform using the --default_platform <String> argument.");
warnUserNullPlatform = true; warnUserNullPlatform = true;
} }
readGroup.setPlatform( RAC.DEFAULT_PLATFORM ); readGroup.setPlatform( RAC.DEFAULT_PLATFORM );
} else {
throw new StingException("The input .bam file contains reads with no platform information. First observed at read with name = " + read.getReadName() +
" Users must set the default platform using the --default_platform <String> argument." );
}
} }
} }
@ -287,10 +293,9 @@ public class RecalDataManager {
} }
read.setAttribute( RecalDataManager.COLOR_SPACE_INCONSISTENCY_TAG, inconsistency ); read.setAttribute( RecalDataManager.COLOR_SPACE_INCONSISTENCY_TAG, inconsistency );
} else if ( !warnUserNoColorSpace ) { // Warn the user if we can't find the color space tag } else {
Utils.warnUser("Unable to find color space information in SOLiD read. First observed at read with name = " + read.getReadName()); throw new StingException("Unable to find color space information in SOLiD read. First observed at read with name = " + read.getReadName() +
Utils.warnUser("This calculation is critically dependent on being able to know when reference bases were inserted into SOLiD reads. Are you sure you want to proceed?"); " Unfortunately this .bam file can not be recalibrated without color space information because of potential reference bias.");
warnUserNoColorSpace = true;
} }
} }
} }
@ -345,10 +350,9 @@ public class RecalDataManager {
solidRecalRemoveRefBias(read, readBases, inconsistency, colorImpliedBases, refBasesDirRead, coinFlip); solidRecalRemoveRefBias(read, readBases, inconsistency, colorImpliedBases, refBasesDirRead, coinFlip);
} }
} else if ( !warnUserNoColorSpace ) { // Warn the user if we can't find the color space tag } else {
Utils.warnUser("Unable to find color space information in SOLiD read. First observed at read with name = " + read.getReadName()); throw new StingException("Unable to find color space information in SOLiD read. First observed at read with name = " + read.getReadName() +
Utils.warnUser("This calculation is critically dependent on being able to know when reference bases were inserted into SOLiD reads. Are you sure you want to proceed?"); " Unfortunately this .bam file can not be recalibrated without color space information because of potential reference bias.");
warnUserNoColorSpace = true;
} }
return originalQualScores; return originalQualScores;

View File

@ -46,9 +46,9 @@ public class RecalibrationArgumentCollection {
@Argument(fullName = "use_original_quals", shortName="OQ", doc="If provided, we will use the quals from the original qualities OQ attribute field instead of the quals in the regular QUALS field", required=false) @Argument(fullName = "use_original_quals", shortName="OQ", doc="If provided, we will use the quals from the original qualities OQ attribute field instead of the quals in the regular QUALS field", required=false)
public boolean USE_ORIGINAL_QUALS = false; public boolean USE_ORIGINAL_QUALS = false;
@Argument(fullName="default_read_group", shortName="dRG", required=false, doc="If a read has no read group then default to the provided String.") @Argument(fullName="default_read_group", shortName="dRG", required=false, doc="If a read has no read group then default to the provided String.")
public String DEFAULT_READ_GROUP = ReadGroupCovariate.defaultReadGroup; public String DEFAULT_READ_GROUP = null;
@Argument(fullName="default_platform", shortName="dP", required=false, doc="If a read has no platform then default to the provided String. Valid options are illumina, 454, and solid.") @Argument(fullName="default_platform", shortName="dP", required=false, doc="If a read has no platform then default to the provided String. Valid options are illumina, 454, and solid.")
public String DEFAULT_PLATFORM = "Illumina"; public String DEFAULT_PLATFORM = null;
@Argument(fullName="force_read_group", shortName="fRG", required=false, doc="If provided, the read group ID of EVERY read will be forced to be the provided String. This is useful to collapse all data into a single read group.") @Argument(fullName="force_read_group", shortName="fRG", required=false, doc="If provided, the read group ID of EVERY read will be forced to be the provided String. This is useful to collapse all data into a single read group.")
public String FORCE_READ_GROUP = null; public String FORCE_READ_GROUP = null;
@Argument(fullName="force_platform", shortName="fP", required=false, doc="If provided, the platform of EVERY read will be forced to be the provided String. Valid options are illumina, 454, and solid.") @Argument(fullName="force_platform", shortName="fP", required=false, doc="If provided, the platform of EVERY read will be forced to be the provided String. Valid options are illumina, 454, and solid.")

View File

@ -14,6 +14,7 @@ import org.broadinstitute.sting.utils.*;
import java.util.ArrayList; import java.util.ArrayList;
import java.util.List; import java.util.List;
import java.util.Random; import java.util.Random;
import java.util.ResourceBundle;
import java.util.regex.Pattern; import java.util.regex.Pattern;
import java.io.File; import java.io.File;
import java.io.FileNotFoundException; import java.io.FileNotFoundException;
@ -76,9 +77,9 @@ public class TableRecalibrationWalker extends ReadWalker<SAMRecord, SAMFileWrite
@Argument(fullName="preserve_qscores_less_than", shortName="pQ", @Argument(fullName="preserve_qscores_less_than", shortName="pQ",
doc="Bases with quality scores less than this threshold won't be recalibrated, default=5. In general it's unsafe to change qualities scores below < 5, since base callers use these values to indicate random or bad bases", required=false) doc="Bases with quality scores less than this threshold won't be recalibrated, default=5. In general it's unsafe to change qualities scores below < 5, since base callers use these values to indicate random or bad bases", required=false)
private int PRESERVE_QSCORES_LESS_THAN = 5; private int PRESERVE_QSCORES_LESS_THAN = 5;
@Argument(fullName="smoothing", shortName="sm", required = false, doc="Number of imaginary counts to add to each bin in order to smooth out bins with few data points") @Argument(fullName="smoothing", shortName="sm", required = false, doc="Number of imaginary counts to add to each bin in order to smooth out bins with few data points, default=1")
private int SMOOTHING = 1; private int SMOOTHING = 1;
@Argument(fullName="max_quality_score", shortName="maxQ", required = false, doc="The integer value at which to cap the quality scores, default is 40") @Argument(fullName="max_quality_score", shortName="maxQ", required = false, doc="The integer value at which to cap the quality scores, default=40")
private int MAX_QUALITY_SCORE = 40; private int MAX_QUALITY_SCORE = 40;
///////////////////////////// /////////////////////////////
@ -111,7 +112,6 @@ public class TableRecalibrationWalker extends ReadWalker<SAMRecord, SAMFileWrite
*/ */
public void initialize() { public void initialize() {
logger.info( "Recalibrator version: " + RecalDataManager.versionString );
if( RAC.FORCE_READ_GROUP != null ) { RAC.DEFAULT_READ_GROUP = RAC.FORCE_READ_GROUP; } if( RAC.FORCE_READ_GROUP != null ) { RAC.DEFAULT_READ_GROUP = RAC.FORCE_READ_GROUP; }
if( RAC.FORCE_PLATFORM != null ) { RAC.DEFAULT_PLATFORM = RAC.FORCE_PLATFORM; } if( RAC.FORCE_PLATFORM != null ) { RAC.DEFAULT_PLATFORM = RAC.FORCE_PLATFORM; }
if( !RAC.checkSolidRecalMode() ) { if( !RAC.checkSolidRecalMode() ) {
@ -221,7 +221,8 @@ public class TableRecalibrationWalker extends ReadWalker<SAMRecord, SAMFileWrite
final SAMFileHeader header = getToolkit().getSAMFileHeader().clone(); final SAMFileHeader header = getToolkit().getSAMFileHeader().clone();
if( !NO_PG_TAG ) { if( !NO_PG_TAG ) {
final SAMProgramRecord programRecord = new SAMProgramRecord( "GATK TableRecalibration" ); final SAMProgramRecord programRecord = new SAMProgramRecord( "GATK TableRecalibration" );
programRecord.setProgramVersion( RecalDataManager.versionString ); final ResourceBundle headerInfo = TextFormattingUtils.loadResourceBundle("StingText");
programRecord.setProgramVersion( headerInfo.getString("org.broadinstitute.sting.gatk.version") );
String commandLineString = "Covariates=["; String commandLineString = "Covariates=[";
for( Covariate cov : requestedCovariates ) { for( Covariate cov : requestedCovariates ) {
commandLineString += cov.getClass().getSimpleName() + ", "; commandLineString += cov.getClass().getSimpleName() + ", ";

View File

@ -243,6 +243,7 @@ public class RecalibrationWalkersIntegrationTest extends WalkerTest {
" -cov QualityScoreCovariate" + " -cov QualityScoreCovariate" +
" -cov CycleCovariate" + " -cov CycleCovariate" +
" -cov DinucCovariate" + " -cov DinucCovariate" +
" --default_read_group DefaultReadGroup" +
" --default_platform illumina" + " --default_platform illumina" +
" --solid_recal_mode SET_Q_ZERO" + " --solid_recal_mode SET_Q_ZERO" +
" -recalFile %s", " -recalFile %s",
@ -272,6 +273,7 @@ public class RecalibrationWalkersIntegrationTest extends WalkerTest {
" -outputBam %s" + " -outputBam %s" +
" --no_pg_tag" + " --no_pg_tag" +
" --solid_recal_mode SET_Q_ZERO" + " --solid_recal_mode SET_Q_ZERO" +
" --default_read_group DefaultReadGroup" +
" --default_platform illumina" + " --default_platform illumina" +
" -recalFile " + paramsFile, " -recalFile " + paramsFile,
1, // just one output file 1, // just one output file