Default platform and default read group are no longer set to values by default. The recalibrator throws an exception if needed values are empty in the bam file and the args weren't set by the user. This is done to make it more obvious to the user when the bam file is malformed. Similarly, the recalibrator now refuses to recalibrate any solid reads in which it can't find the color space information with an exception message explaining this. The recalibrator no longer maintains its own version number and instead uses the new global GATK version number.

git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@2690 348d0f76-0448-11de-a6fe-93d51630548a
This commit is contained in:
rpoplin 2010-01-26 18:47:40 +00:00
parent 31d252c0a1
commit a1054efe8a
6 changed files with 57 additions and 49 deletions

View File

@ -120,7 +120,6 @@ public class CovariateCounterWalker extends LocusWalker<Integer, PrintStream> {
*/
public void initialize() {
logger.info( "Recalibrator version: " + RecalDataManager.versionString );
if( RAC.FORCE_READ_GROUP != null ) { RAC.DEFAULT_READ_GROUP = RAC.FORCE_READ_GROUP; }
if( RAC.FORCE_PLATFORM != null ) { RAC.DEFAULT_PLATFORM = RAC.FORCE_PLATFORM; }
DBSNP_VALIDATION_CHECK_FREQUENCY *= PROCESS_EVERY_NTH_LOCUS;

View File

@ -46,15 +46,17 @@ import net.sf.samtools.SAMRecord;
public class CycleCovariate implements StandardCovariate {
private static boolean warnedUserBadPlatform = false;
private static String defaultPlatform;
private static String defaultPlatform = null;
// Initialize any member variables using the command-line arguments passed to the walkers
public void initialize( final RecalibrationArgumentCollection RAC ) {
if( RAC.DEFAULT_PLATFORM.equalsIgnoreCase( "SLX" ) || RAC.DEFAULT_PLATFORM.equalsIgnoreCase( "ILLUMINA" ) ||
RAC.DEFAULT_PLATFORM.contains( "454" ) || RAC.DEFAULT_PLATFORM.equalsIgnoreCase( "SOLID" ) || RAC.DEFAULT_PLATFORM.equalsIgnoreCase( "ABI_SOLID" ) ) {
defaultPlatform = RAC.DEFAULT_PLATFORM;
} else {
throw new StingException( "The requested default platform (" + RAC.DEFAULT_PLATFORM +") is not a recognized platform. Implemented options are illumina, 454, and solid");
if( RAC.DEFAULT_PLATFORM != null ) {
if( RAC.DEFAULT_PLATFORM.equalsIgnoreCase( "SLX" ) || RAC.DEFAULT_PLATFORM.equalsIgnoreCase( "ILLUMINA" ) ||
RAC.DEFAULT_PLATFORM.contains( "454" ) || RAC.DEFAULT_PLATFORM.equalsIgnoreCase( "SOLID" ) || RAC.DEFAULT_PLATFORM.equalsIgnoreCase( "ABI_SOLID" ) ) {
defaultPlatform = RAC.DEFAULT_PLATFORM;
} else {
throw new StingException( "The requested default platform (" + RAC.DEFAULT_PLATFORM +") is not a recognized platform. Implemented options are illumina, 454, and solid");
}
}
}
@ -128,19 +130,19 @@ public class CycleCovariate implements StandardCovariate {
//-----------------------------
else { // Platform is unrecognized so revert to the default platform but warn the user first
if( !warnedUserBadPlatform ) {
if( defaultPlatform != null) { // The user set a default platform
if( defaultPlatform != null) { // The user set a default platform
if( !warnedUserBadPlatform ) {
Utils.warnUser( "Platform string (" + read.getReadGroup().getPlatform() + ") unrecognized in CycleCovariate. " +
"Reverting to platform = " + defaultPlatform + ". Users may set the default platform using the --default_platform <String> argument." );
} else { // The user did not set a default platform
Utils.warnUser( "Platform string (" + read.getReadGroup().getPlatform() + ") unrecognized in CycleCovariate. " +
"Reverting to platform = Illumina. Users may set the default platform using the --default_platform <String> argument." );
defaultPlatform = "Illumina";
"Defaulting to platform = " + defaultPlatform + "." );
}
warnedUserBadPlatform = true;
read.getReadGroup().setPlatform( defaultPlatform );
return getValue( read, offset ); // A recursive call
} else { // The user did not set a default platform
throw new StingException( "Platform string (" + read.getReadGroup().getPlatform() + ") unrecognized in CycleCovariate. " +
"No default platform specified. Users must set the default platform using the --default_platform <String> argument." );
}
read.getReadGroup().setPlatform( defaultPlatform );
return getValue( read, offset ); // A recursive call
}
// Differentiate between first and second of pair.

View File

@ -55,11 +55,8 @@ public class RecalDataManager {
public final static String COLOR_SPACE_ATTRIBUTE_TAG = "CS"; // The tag that holds the color space for SOLID bams
public final static String COLOR_SPACE_INCONSISTENCY_TAG = "ZC"; // A new tag made up for the recalibrator which will hold an array of ints which say if this base is inconsistent with its color
private static boolean warnUserNullReadGroup = false;
private static boolean warnUserNoColorSpace = false;
private static boolean warnUserNullPlatform = false;
public static final String versionString = "v2.2.17"; // Major version, minor version, and build number
RecalDataManager() {
data = new NestedHashMap();
dataCollapsedReadGroup = null;
@ -219,16 +216,21 @@ public class RecalDataManager {
// If there are no read groups we have to default to something, and that something could be specified by the user using command line arguments
if( readGroup == null ) {
if( !warnUserNullReadGroup && RAC.FORCE_READ_GROUP == null ) {
Utils.warnUser("The input .bam file contains reads with no read group. " +
"Defaulting to read group ID = " + RAC.DEFAULT_READ_GROUP + " and platform = " + RAC.DEFAULT_PLATFORM + ". " +
"First observed at read with name = " + read.getReadName() );
warnUserNullReadGroup = true;
if( RAC.DEFAULT_READ_GROUP != null && RAC.DEFAULT_PLATFORM != null) {
if( !warnUserNullReadGroup && RAC.FORCE_READ_GROUP == null ) {
Utils.warnUser("The input .bam file contains reads with no read group. " +
"Defaulting to read group ID = " + RAC.DEFAULT_READ_GROUP + " and platform = " + RAC.DEFAULT_PLATFORM + ". " +
"First observed at read with name = " + read.getReadName() );
warnUserNullReadGroup = true;
}
// There is no readGroup so defaulting to these values
readGroup = new SAMReadGroupRecord( RAC.DEFAULT_READ_GROUP );
readGroup.setPlatform( RAC.DEFAULT_PLATFORM );
((GATKSAMRecord)read).setReadGroup( readGroup );
} else {
throw new StingException("The input .bam file contains reads with no read group. First observed at read with name = " + read.getReadName() +
" Users must set both the default read group using the --default_read_group <String> argument and the default platform using the --default_platform <String> argument." );
}
// There is no readGroup so defaulting to these values
readGroup = new SAMReadGroupRecord( RAC.DEFAULT_READ_GROUP );
readGroup.setPlatform( RAC.DEFAULT_PLATFORM );
((GATKSAMRecord)read).setReadGroup( readGroup );
}
if( RAC.FORCE_READ_GROUP != null && !readGroup.getReadGroupId().equals(RAC.FORCE_READ_GROUP) ) { // Collapse all the read groups into a single common String provided by the user
@ -243,14 +245,18 @@ public class RecalDataManager {
}
if ( readGroup.getPlatform() == null ) {
if( !warnUserNullPlatform ) {
Utils.warnUser("The input .bam file contains reads with no platform information. " +
"Defaulting to platform = " + RAC.DEFAULT_PLATFORM + ". " +
"First observed at read with name = " + read.getReadName() );
Utils.warnUser("Users may set the default platform using the --default_platform <String> argument.");
warnUserNullPlatform = true;
if( RAC.DEFAULT_PLATFORM != null ) {
if( !warnUserNullPlatform ) {
Utils.warnUser("The input .bam file contains reads with no platform information. " +
"Defaulting to platform = " + RAC.DEFAULT_PLATFORM + ". " +
"First observed at read with name = " + read.getReadName() );
warnUserNullPlatform = true;
}
readGroup.setPlatform( RAC.DEFAULT_PLATFORM );
} else {
throw new StingException("The input .bam file contains reads with no platform information. First observed at read with name = " + read.getReadName() +
" Users must set the default platform using the --default_platform <String> argument." );
}
readGroup.setPlatform( RAC.DEFAULT_PLATFORM );
}
}
@ -287,10 +293,9 @@ public class RecalDataManager {
}
read.setAttribute( RecalDataManager.COLOR_SPACE_INCONSISTENCY_TAG, inconsistency );
} else if ( !warnUserNoColorSpace ) { // Warn the user if we can't find the color space tag
Utils.warnUser("Unable to find color space information in SOLiD read. First observed at read with name = " + read.getReadName());
Utils.warnUser("This calculation is critically dependent on being able to know when reference bases were inserted into SOLiD reads. Are you sure you want to proceed?");
warnUserNoColorSpace = true;
} else {
throw new StingException("Unable to find color space information in SOLiD read. First observed at read with name = " + read.getReadName() +
" Unfortunately this .bam file can not be recalibrated without color space information because of potential reference bias.");
}
}
}
@ -345,10 +350,9 @@ public class RecalDataManager {
solidRecalRemoveRefBias(read, readBases, inconsistency, colorImpliedBases, refBasesDirRead, coinFlip);
}
} else if ( !warnUserNoColorSpace ) { // Warn the user if we can't find the color space tag
Utils.warnUser("Unable to find color space information in SOLiD read. First observed at read with name = " + read.getReadName());
Utils.warnUser("This calculation is critically dependent on being able to know when reference bases were inserted into SOLiD reads. Are you sure you want to proceed?");
warnUserNoColorSpace = true;
} else {
throw new StingException("Unable to find color space information in SOLiD read. First observed at read with name = " + read.getReadName() +
" Unfortunately this .bam file can not be recalibrated without color space information because of potential reference bias.");
}
return originalQualScores;

View File

@ -46,9 +46,9 @@ public class RecalibrationArgumentCollection {
@Argument(fullName = "use_original_quals", shortName="OQ", doc="If provided, we will use the quals from the original qualities OQ attribute field instead of the quals in the regular QUALS field", required=false)
public boolean USE_ORIGINAL_QUALS = false;
@Argument(fullName="default_read_group", shortName="dRG", required=false, doc="If a read has no read group then default to the provided String.")
public String DEFAULT_READ_GROUP = ReadGroupCovariate.defaultReadGroup;
public String DEFAULT_READ_GROUP = null;
@Argument(fullName="default_platform", shortName="dP", required=false, doc="If a read has no platform then default to the provided String. Valid options are illumina, 454, and solid.")
public String DEFAULT_PLATFORM = "Illumina";
public String DEFAULT_PLATFORM = null;
@Argument(fullName="force_read_group", shortName="fRG", required=false, doc="If provided, the read group ID of EVERY read will be forced to be the provided String. This is useful to collapse all data into a single read group.")
public String FORCE_READ_GROUP = null;
@Argument(fullName="force_platform", shortName="fP", required=false, doc="If provided, the platform of EVERY read will be forced to be the provided String. Valid options are illumina, 454, and solid.")

View File

@ -14,6 +14,7 @@ import org.broadinstitute.sting.utils.*;
import java.util.ArrayList;
import java.util.List;
import java.util.Random;
import java.util.ResourceBundle;
import java.util.regex.Pattern;
import java.io.File;
import java.io.FileNotFoundException;
@ -76,9 +77,9 @@ public class TableRecalibrationWalker extends ReadWalker<SAMRecord, SAMFileWrite
@Argument(fullName="preserve_qscores_less_than", shortName="pQ",
doc="Bases with quality scores less than this threshold won't be recalibrated, default=5. In general it's unsafe to change qualities scores below < 5, since base callers use these values to indicate random or bad bases", required=false)
private int PRESERVE_QSCORES_LESS_THAN = 5;
@Argument(fullName="smoothing", shortName="sm", required = false, doc="Number of imaginary counts to add to each bin in order to smooth out bins with few data points")
@Argument(fullName="smoothing", shortName="sm", required = false, doc="Number of imaginary counts to add to each bin in order to smooth out bins with few data points, default=1")
private int SMOOTHING = 1;
@Argument(fullName="max_quality_score", shortName="maxQ", required = false, doc="The integer value at which to cap the quality scores, default is 40")
@Argument(fullName="max_quality_score", shortName="maxQ", required = false, doc="The integer value at which to cap the quality scores, default=40")
private int MAX_QUALITY_SCORE = 40;
/////////////////////////////
@ -111,7 +112,6 @@ public class TableRecalibrationWalker extends ReadWalker<SAMRecord, SAMFileWrite
*/
public void initialize() {
logger.info( "Recalibrator version: " + RecalDataManager.versionString );
if( RAC.FORCE_READ_GROUP != null ) { RAC.DEFAULT_READ_GROUP = RAC.FORCE_READ_GROUP; }
if( RAC.FORCE_PLATFORM != null ) { RAC.DEFAULT_PLATFORM = RAC.FORCE_PLATFORM; }
if( !RAC.checkSolidRecalMode() ) {
@ -221,7 +221,8 @@ public class TableRecalibrationWalker extends ReadWalker<SAMRecord, SAMFileWrite
final SAMFileHeader header = getToolkit().getSAMFileHeader().clone();
if( !NO_PG_TAG ) {
final SAMProgramRecord programRecord = new SAMProgramRecord( "GATK TableRecalibration" );
programRecord.setProgramVersion( RecalDataManager.versionString );
final ResourceBundle headerInfo = TextFormattingUtils.loadResourceBundle("StingText");
programRecord.setProgramVersion( headerInfo.getString("org.broadinstitute.sting.gatk.version") );
String commandLineString = "Covariates=[";
for( Covariate cov : requestedCovariates ) {
commandLineString += cov.getClass().getSimpleName() + ", ";

View File

@ -243,6 +243,7 @@ public class RecalibrationWalkersIntegrationTest extends WalkerTest {
" -cov QualityScoreCovariate" +
" -cov CycleCovariate" +
" -cov DinucCovariate" +
" --default_read_group DefaultReadGroup" +
" --default_platform illumina" +
" --solid_recal_mode SET_Q_ZERO" +
" -recalFile %s",
@ -272,6 +273,7 @@ public class RecalibrationWalkersIntegrationTest extends WalkerTest {
" -outputBam %s" +
" --no_pg_tag" +
" --solid_recal_mode SET_Q_ZERO" +
" --default_read_group DefaultReadGroup" +
" --default_platform illumina" +
" -recalFile " + paramsFile,
1, // just one output file