Added arguments to the recalibration walkers so the user can specify the default read group id and platform to use when a read has no read group. There are also options to force every read group and every platform to be the specified values. Added integration tests that use a bam file with no read groups. Added comments to all the covariates to explain what each of the methods in the Covariate interface are used for.
git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@2157 348d0f76-0448-11de-a6fe-93d51630548a
This commit is contained in:
parent
cfbd9332b0
commit
6ff8526592
|
|
@ -36,7 +36,7 @@ package org.broadinstitute.sting.gatk.walkers.recalibration;
|
|||
*/
|
||||
|
||||
public interface Covariate {
|
||||
public Comparable getValue(ReadHashDatum readDatum, int offset); // used to pick out the value from attributes of the read
|
||||
public Comparable getValue(String str); // used to get value from input file
|
||||
public int estimatedNumberOfBins(); // used to estimate the amount space required for the HashMap
|
||||
public Comparable getValue(ReadHashDatum readDatum, int offset); // Used to pick out the covariate's value from attributes of the read
|
||||
public Comparable getValue(String str); // Used to get the covariate's value from input csv file in TableRecalibrationWalker
|
||||
public int estimatedNumberOfBins(); // Used to estimate the amount space required for the full data HashMap
|
||||
}
|
||||
|
|
|
|||
|
|
@ -83,22 +83,26 @@ public class CovariateCounterWalker extends LocusWalker<Integer, PrintStream> {
|
|||
private String RECAL_FILE = "output.recal_data.csv";
|
||||
@Argument(fullName="process_nth_locus", shortName="pN", required=false, doc="Only process every Nth covered locus we see.")
|
||||
private int PROCESS_EVERY_NTH_LOCUS = 1;
|
||||
@Argument(fullName="default_read_group", shortName="dRG", required=false, doc="If a read has no read group then default to the provided String.")
|
||||
private String DEFAULT_READ_GROUP = ReadGroupCovariate.defaultReadGroup;
|
||||
@Argument(fullName="default_platform", shortName="dP", required=false, doc="If a read has no platform then default to the provided String. Valid options are illumina, 454, and solid.")
|
||||
private String DEFAULT_PLATFORM = "Illumina";
|
||||
@Argument(fullName="force_read_group", shortName="fRG", required=false, doc="If provided, the read group ID of EVERY read will be forced to be the provided String. This is useful to collapse all data into a single read group.")
|
||||
private String FORCE_READ_GROUP = null;
|
||||
@Argument(fullName="force_platform", shortName="fP", required=false, doc="If provided, the platform of EVERY read will be forced to be the provided String. Valid options are illumina, 454, and solid.")
|
||||
private String FORCE_PLATFORM = null;
|
||||
|
||||
/////////////////////////////
|
||||
// Debugging-only Arguments
|
||||
/////////////////////////////
|
||||
@Argument(fullName="no_print_header", shortName="noHeader", required=false, doc="Don't print the usual header on the table recalibration file. For debugging purposes only.")
|
||||
@Argument(fullName="no_print_header", shortName="noHeader", required=false, doc="Don't print the usual header on the table recalibration file. FOR DEBUGGING PURPOSES ONLY.")
|
||||
private boolean NO_PRINT_HEADER = false;
|
||||
@Argument(fullName="validate_old_recalibrator", shortName="validate", required=false, doc="Depricated. Match the output of the old recalibrator exactly. FOR DEBUGGING PURPOSES ONLY.")
|
||||
@Argument(fullName="validate_old_recalibrator", shortName="validate", required=false, doc="!!!Depricated!!! Match the output of the old recalibrator exactly. FOR DEBUGGING PURPOSES ONLY.")
|
||||
private boolean VALIDATE_OLD_RECALIBRATOR = false;
|
||||
// BUGBUG: This validate option no longer does exactly what it says because now using read filter to filter out zero mapping quality reads. The old recalibrator counted those reads in the counted_sites variable but the new one doesn't.
|
||||
@Argument(fullName="use_slx_platform", shortName="useSLX", required=false, doc="Force the platform to be Illumina regardless of what it actually says. FOR DEBUGGING PURPOSES ONLY.")
|
||||
private boolean USE_SLX_PLATFORM = false;
|
||||
@Argument(fullName="sorted_output", shortName="sorted", required=false, doc="The outputted table recalibration file will be in sorted order at the cost of added overhead. FOR DEBUGGING PURPOSES ONLY. This option is required in order to pass integration tests.")
|
||||
private boolean SORTED_OUTPUT = false;
|
||||
@Argument(fullName="ignore_readgroup", shortName="noRG", required=false, doc="All read groups are combined together. FOR DEBUGGING PURPOSES ONLY. This option is required in order to pass integration tests.")
|
||||
private boolean IGNORE_READGROUP = false;
|
||||
|
||||
|
||||
/////////////////////////////
|
||||
// Private Member Variables
|
||||
/////////////////////////////
|
||||
|
|
@ -110,7 +114,7 @@ public class CovariateCounterWalker extends LocusWalker<Integer, PrintStream> {
|
|||
private long countedBases = 0; // Number of bases used in the calculations, used for reporting in the output file
|
||||
private long skippedSites = 0; // Number of loci skipped because it was a dbSNP site, used for reporting in the output file
|
||||
private int numUnprocessed = 0; // Number of consecutive loci skipped because we are only processing every Nth site
|
||||
private final String versionString = "v2.0.5"; // Major version, minor version, and build number
|
||||
private final String versionString = "v2.0.6"; // Major version, minor version, and build number
|
||||
private Pair<Long, Long> dbSNP_counts = new Pair<Long, Long>(0L, 0L); // mismatch/base counts for dbSNP loci
|
||||
private Pair<Long, Long> novel_counts = new Pair<Long, Long>(0L, 0L); // mismatch/base counts for non-dbSNP loci
|
||||
private static final double DBSNP_VS_NOVEL_MISMATCH_RATE = 2.0; // rate at which dbSNP sites (on an individual level) mismatch relative to novel sites (determined by looking at NA12878)
|
||||
|
|
@ -131,6 +135,8 @@ public class CovariateCounterWalker extends LocusWalker<Integer, PrintStream> {
|
|||
public void initialize() {
|
||||
|
||||
logger.info( "CovariateCounterWalker version: " + versionString );
|
||||
if( FORCE_READ_GROUP != null ) { DEFAULT_READ_GROUP = FORCE_READ_GROUP; }
|
||||
if( FORCE_PLATFORM != null ) { DEFAULT_PLATFORM = FORCE_PLATFORM; }
|
||||
|
||||
// Get a list of all available covariates
|
||||
final List<Class<? extends Covariate>> classes = PackageUtils.getClassesImplementingInterface( Covariate.class );
|
||||
|
|
@ -159,6 +165,7 @@ public class CovariateCounterWalker extends LocusWalker<Integer, PrintStream> {
|
|||
|
||||
|
||||
// Initialize the requested covariates by parsing the -cov argument
|
||||
// BUGBUG: This is a mess because there are a lot of cases (validate, all, none, and supplied covList). Clean up needed.
|
||||
requestedCovariates = new ArrayList<Covariate>();
|
||||
int estimatedCapacity = 1; // Capacity is multiplicitive so this starts at one
|
||||
if( VALIDATE_OLD_RECALIBRATOR ) {
|
||||
|
|
@ -175,6 +182,7 @@ public class CovariateCounterWalker extends LocusWalker<Integer, PrintStream> {
|
|||
Covariate covariate = (Covariate)covClass.newInstance();
|
||||
estimatedCapacity *= covariate.estimatedNumberOfBins();
|
||||
// Some covariates need parameters (user supplied command line arguments) passed to them
|
||||
if( covariate instanceof CycleCovariate && DEFAULT_PLATFORM != null ) { covariate = new CycleCovariate( DEFAULT_PLATFORM ); }
|
||||
if( covariate instanceof MinimumNQSCovariate ) { covariate = new MinimumNQSCovariate( WINDOW_SIZE ); }
|
||||
if( !( covariate instanceof ReadGroupCovariate || covariate instanceof QualityScoreCovariate ) ) { // These were already added so don't add them again
|
||||
requestedCovariates.add( covariate );
|
||||
|
|
@ -203,6 +211,7 @@ public class CovariateCounterWalker extends LocusWalker<Integer, PrintStream> {
|
|||
Covariate covariate = (Covariate)covClass.newInstance();
|
||||
estimatedCapacity *= covariate.estimatedNumberOfBins();
|
||||
// Some covariates need parameters (user supplied command line arguments) passed to them
|
||||
if( covariate instanceof CycleCovariate && DEFAULT_PLATFORM != null ) { covariate = new CycleCovariate( DEFAULT_PLATFORM ); }
|
||||
if( covariate instanceof MinimumNQSCovariate ) { covariate = new MinimumNQSCovariate( WINDOW_SIZE ); }
|
||||
requestedCovariates.add( covariate );
|
||||
} catch ( InstantiationException e ) {
|
||||
|
|
@ -317,23 +326,26 @@ public class CovariateCounterWalker extends LocusWalker<Integer, PrintStream> {
|
|||
isNegStrand = read.getReadNegativeStrandFlag();
|
||||
final SAMReadGroupRecord readGroup = read.getReadGroup();
|
||||
if( readGroup == null ) {
|
||||
if( !warnUserNullReadGroup ) {
|
||||
Utils.warnUser("The input .bam file contains reads with no read group. Defaulting to Illumina platform and empty read group name.");
|
||||
if( !warnUserNullReadGroup && FORCE_READ_GROUP == null ) {
|
||||
Utils.warnUser("The input .bam file contains reads with no read group. " +
|
||||
"Defaulting to read group ID = " + DEFAULT_READ_GROUP + " and platform = " + DEFAULT_PLATFORM + ". " +
|
||||
"First observed at read with name = " + read.getReadName() );
|
||||
warnUserNullReadGroup = true;
|
||||
}
|
||||
readGroupId = ReadGroupCovariate.collapsedReadGroupCode;
|
||||
platform = "ILLUMINA";
|
||||
// There is no readGroup so defaulting to these values
|
||||
readGroupId = DEFAULT_READ_GROUP;
|
||||
platform = DEFAULT_PLATFORM;
|
||||
} else {
|
||||
readGroupId = readGroup.getReadGroupId();
|
||||
platform = readGroup.getPlatform();
|
||||
}
|
||||
mappingQuality = read.getMappingQuality();
|
||||
length = bases.length;
|
||||
if( USE_SLX_PLATFORM ) {
|
||||
platform = "ILLUMINA";
|
||||
if( FORCE_READ_GROUP != null ) { // Collapse all the read groups into a single common String provided by the user
|
||||
readGroupId = FORCE_READ_GROUP;
|
||||
}
|
||||
if( IGNORE_READGROUP ) {
|
||||
readGroupId = ReadGroupCovariate.collapsedReadGroupCode;
|
||||
if( FORCE_PLATFORM != null ) {
|
||||
platform = FORCE_PLATFORM;
|
||||
}
|
||||
|
||||
readDatum = new ReadHashDatum( readGroupId, platform, quals, bases, isNegStrand, mappingQuality, length );
|
||||
|
|
|
|||
|
|
@ -1,6 +1,7 @@
|
|||
package org.broadinstitute.sting.gatk.walkers.recalibration;
|
||||
|
||||
import org.broadinstitute.sting.utils.Utils;
|
||||
import org.broadinstitute.sting.utils.StingException;
|
||||
|
||||
/*
|
||||
* Copyright (c) 2009 The Broad Institute
|
||||
|
|
@ -42,45 +43,70 @@ import org.broadinstitute.sting.utils.Utils;
|
|||
|
||||
public class CycleCovariate implements Covariate {
|
||||
|
||||
private static boolean warnedUserNoPlatform = false;
|
||||
private static boolean warnedUserBadPlatform = false;
|
||||
private static String defaultPlatform;
|
||||
|
||||
public CycleCovariate() { // empty constructor is required to instantiate covariate in CovariateCounterWalker and TableRecalibrationWalker
|
||||
public CycleCovariate() { // Empty constructor is required to instantiate covariate in CovariateCounterWalker and TableRecalibrationWalker
|
||||
defaultPlatform = null;
|
||||
}
|
||||
|
||||
public CycleCovariate( final String _defaultPlatform ) {
|
||||
if( _defaultPlatform.equalsIgnoreCase( "ILLUMINA" ) || _defaultPlatform.contains( "454" ) || _defaultPlatform.equalsIgnoreCase( "SOLID" ) ) {
|
||||
defaultPlatform = _defaultPlatform;
|
||||
} else {
|
||||
throw new StingException( "The requested default platform (" + _defaultPlatform +") is not a recognized platform. Implemented options are illumina, 454, and solid");
|
||||
}
|
||||
}
|
||||
|
||||
// Used to pick out the covariate's value from attributes of the read
|
||||
public final Comparable getValue( final ReadHashDatum readDatum, final int offset ) {
|
||||
|
||||
if( readDatum.platform.equalsIgnoreCase( "ILLUMINA" ) ) {
|
||||
int cycle = offset;
|
||||
if( readDatum.isNegStrand ) {
|
||||
cycle = readDatum.bases.length - (offset + 1);
|
||||
}
|
||||
return cycle;
|
||||
} else if( readDatum.platform.contains( "454" ) ) { // some bams have "LS454" and others have just "454"
|
||||
} else if( readDatum.platform.contains( "454" ) ) { // Some bams have "LS454" and others have just "454"
|
||||
int cycle = 0;
|
||||
//BUGBUG: should reverse directions on negative strand reads!
|
||||
byte prevBase = readDatum.bases[0];
|
||||
for( int iii = 1; iii <= offset; iii++ ) {
|
||||
if(readDatum.bases[iii] != prevBase) { // this base doesn't match the previous one so it is a new cycle
|
||||
if(readDatum.bases[iii] != prevBase) { // This base doesn't match the previous one so it is a new cycle
|
||||
cycle++;
|
||||
prevBase = readDatum.bases[iii];
|
||||
}
|
||||
}
|
||||
return cycle;
|
||||
} else if( readDatum.platform.equalsIgnoreCase( "SOLID" ) ) {
|
||||
// the ligation cycle according to http://www3.appliedbiosystems.com/cms/groups/mcb_marketing/documents/generaldocuments/cms_057511.pdf
|
||||
// The ligation cycle according to http://www3.appliedbiosystems.com/cms/groups/mcb_marketing/documents/generaldocuments/cms_057511.pdf
|
||||
//BUGBUG: should reverse directions on negative strand reads!
|
||||
return offset / 5; // integer division
|
||||
} else { // platform is unrecognized so revert to Illumina definition of cycle but warn the user
|
||||
if( !warnedUserNoPlatform ) {
|
||||
Utils.warnUser( "Platform (" + readDatum.platform + ") unrecognized. Reverting to Illumina definition of machine cycle." );
|
||||
warnedUserNoPlatform = true;
|
||||
} else { // Platform is unrecognized so revert to the default platform but warn the user first
|
||||
if( !warnedUserBadPlatform ) {
|
||||
if( defaultPlatform != null) { // the user set a default platform
|
||||
Utils.warnUser( "Platform string (" + readDatum.platform + ") unrecognized in CycleCovariate. " +
|
||||
"Reverting to " + defaultPlatform + " definition of machine cycle." );
|
||||
} else { // the user did not set a default platform
|
||||
Utils.warnUser( "Platform string (" + readDatum.platform + ") unrecognized in CycleCovariate. " +
|
||||
"Reverting to Illumina definition of machine cycle. Users may set the default platform using the --default_platform <String> argument." );
|
||||
defaultPlatform = "Illumina";
|
||||
}
|
||||
warnedUserBadPlatform = true;
|
||||
}
|
||||
return PositionCovariate.revertToPositionAsCycle( readDatum, offset );
|
||||
ReadHashDatum correctedReadDatum = new ReadHashDatum( readDatum );
|
||||
correctedReadDatum.platform = defaultPlatform;
|
||||
return getValue( correctedReadDatum, offset ); // a recursive call
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
|
||||
// Used to get the covariate's value from input csv file in TableRecalibrationWalker
|
||||
public final Comparable getValue( final String str ) {
|
||||
return (int)Integer.parseInt( str ); // cast to primitive int (as opposed to Integer Object) is required so that the return value from the two getValue methods hash to same thing
|
||||
return (int)Integer.parseInt( str ); // Cast to primitive int (as opposed to Integer Object) is required so that the return value from the two getValue methods hash to same thing
|
||||
}
|
||||
|
||||
// Used to estimate the amount space required for the full data HashMap
|
||||
public final int estimatedNumberOfBins() {
|
||||
return 100;
|
||||
}
|
||||
|
|
|
|||
|
|
@ -53,9 +53,10 @@ public class DinucCovariate implements Covariate {
|
|||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
// Used to pick out the covariate's value from attributes of the read
|
||||
public final Comparable getValue( final ReadHashDatum readDatum, final int offset ) {
|
||||
|
||||
|
||||
byte base;
|
||||
byte prevBase;
|
||||
// If this is a negative strand read then we need to reverse the direction for our previous base
|
||||
|
|
@ -71,12 +72,14 @@ public class DinucCovariate implements Covariate {
|
|||
return dinucHashMap.get( Dinuc.hashBytes( prevBase, base ) );
|
||||
//return String.format("%c%c", prevBase, base); // This return statement is too slow
|
||||
}
|
||||
|
||||
|
||||
// Used to get the covariate's value from input csv file in TableRecalibrationWalker
|
||||
public final Comparable getValue( final String str ) {
|
||||
//return str;
|
||||
return dinucHashMap.get( Dinuc.hashBytes( (byte)str.charAt(0), (byte)str.charAt(1) ) );
|
||||
}
|
||||
|
||||
// Used to estimate the amount space required for the full data HashMap
|
||||
public final int estimatedNumberOfBins() {
|
||||
return 16;
|
||||
}
|
||||
|
|
|
|||
|
|
@ -38,15 +38,18 @@ public class MappingQualityCovariate implements Covariate {
|
|||
public MappingQualityCovariate() { // empty constructor is required to instantiate covariate in CovariateCounterWalker and TableRecalibrationWalker
|
||||
}
|
||||
|
||||
// Used to pick out the covariate's value from attributes of the read
|
||||
public final Comparable getValue( final ReadHashDatum readDatum, final int offset ) {
|
||||
|
||||
return readDatum.mappingQuality;
|
||||
}
|
||||
|
||||
|
||||
// Used to get the covariate's value from input csv file in TableRecalibrationWalker
|
||||
public final Comparable getValue( final String str ) {
|
||||
return (int)Integer.parseInt( str ); // cast to primitive int (as opposed to Integer Object) is required so that the return value from the two getValue methods hash to same thing
|
||||
}
|
||||
|
||||
// Used to estimate the amount space required for the full data HashMap
|
||||
public final int estimatedNumberOfBins() {
|
||||
return 100;
|
||||
}
|
||||
|
|
|
|||
|
|
@ -46,6 +46,7 @@ public class MinimumNQSCovariate implements Covariate {
|
|||
windowReach = windowSize / 2; // integer division
|
||||
}
|
||||
|
||||
// Used to pick out the covariate's value from attributes of the read
|
||||
public final Comparable getValue( final ReadHashDatum readDatum, final int offset ) {
|
||||
|
||||
// Loop over the list of base quality scores in the window and find the minimum
|
||||
|
|
@ -59,11 +60,13 @@ public class MinimumNQSCovariate implements Covariate {
|
|||
}
|
||||
return minQual;
|
||||
}
|
||||
|
||||
|
||||
// Used to get the covariate's value from input csv file in TableRecalibrationWalker
|
||||
public final Comparable getValue( final String str ) {
|
||||
return (int)Integer.parseInt( str ); // cast to primitive int (as opposed to Integer Object) is required so that the return value from the two getValue methods hash to same thing
|
||||
}
|
||||
|
||||
// Used to estimate the amount space required for the full data HashMap
|
||||
public final int estimatedNumberOfBins() {
|
||||
return 40;
|
||||
}
|
||||
|
|
|
|||
|
|
@ -39,6 +39,7 @@ public class PositionCovariate implements Covariate {
|
|||
public PositionCovariate() { // empty constructor is required to instantiate covariate in CovariateCounterWalker and TableRecalibrationWalker
|
||||
}
|
||||
|
||||
// Used to pick out the covariate's value from attributes of the read
|
||||
public final Comparable getValue( final ReadHashDatum readDatum, final int offset ) {
|
||||
int cycle = offset;
|
||||
if( readDatum.isNegStrand ) {
|
||||
|
|
@ -47,18 +48,12 @@ public class PositionCovariate implements Covariate {
|
|||
return cycle;
|
||||
}
|
||||
|
||||
public static Comparable revertToPositionAsCycle( final ReadHashDatum readDatum, final int offset ) { // called from CycleCovariate if platform was unrecognized
|
||||
int cycle = offset;
|
||||
if( readDatum.isNegStrand ) {
|
||||
cycle = readDatum.bases.length - (offset + 1);
|
||||
}
|
||||
return cycle;
|
||||
}
|
||||
|
||||
// Used to get the covariate's value from input csv file in TableRecalibrationWalker
|
||||
public final Comparable getValue( final String str ) {
|
||||
return (int)Integer.parseInt( str ); // cast to primitive int (as opposed to Integer Object) is required so that the return value from the two getValue methods hash to same thing
|
||||
}
|
||||
|
||||
// Used to estimate the amount space required for the full data HashMap
|
||||
public final int estimatedNumberOfBins() {
|
||||
return 100;
|
||||
}
|
||||
|
|
|
|||
|
|
@ -40,6 +40,7 @@ public class PrimerRoundCovariate implements Covariate {
|
|||
public PrimerRoundCovariate() { // empty constructor is required to instantiate covariate in CovariateCounterWalker and TableRecalibrationWalker
|
||||
}
|
||||
|
||||
// Used to pick out the covariate's value from attributes of the read
|
||||
public final Comparable getValue( final ReadHashDatum readDatum, final int offset ) {
|
||||
if( readDatum.platform.equalsIgnoreCase( "SOLID" ) ) {
|
||||
return offset % 5; // the primer round according to http://www3.appliedbiosystems.com/cms/groups/mcb_marketing/documents/generaldocuments/cms_057511.pdf
|
||||
|
|
@ -49,10 +50,12 @@ public class PrimerRoundCovariate implements Covariate {
|
|||
|
||||
}
|
||||
|
||||
// Used to get the covariate's value from input csv file in TableRecalibrationWalker
|
||||
public final Comparable getValue( final String str ) {
|
||||
return (int)Integer.parseInt( str ); // cast to primitive int (as opposed to Integer Object) is required so that the return value from the two getValue methods hash to same thing
|
||||
}
|
||||
|
||||
// Used to estimate the amount space required for the full data HashMap
|
||||
public final int estimatedNumberOfBins() {
|
||||
return 5;
|
||||
}
|
||||
|
|
|
|||
|
|
@ -38,15 +38,18 @@ public class QualityScoreCovariate implements Covariate {
|
|||
public QualityScoreCovariate() { // empty constructor is required to instantiate covariate in CovariateCounterWalker and TableRecalibrationWalker
|
||||
}
|
||||
|
||||
// Used to pick out the covariate's value from attributes of the read
|
||||
public final Comparable getValue( final ReadHashDatum readDatum, final int offset ) {
|
||||
|
||||
return (int)(readDatum.quals[offset]);
|
||||
}
|
||||
|
||||
|
||||
// Used to get the covariate's value from input csv file in TableRecalibrationWalker
|
||||
public final Comparable getValue( final String str ) {
|
||||
return (int)Integer.parseInt( str ); // cast to primitive int (as opposed to Integer Object) is required so that the return value from the two getValue methods hash to same thing
|
||||
}
|
||||
|
||||
// Used to estimate the amount space required for the full data HashMap
|
||||
public final int estimatedNumberOfBins() {
|
||||
return 40;
|
||||
}
|
||||
|
|
|
|||
|
|
@ -35,19 +35,22 @@ package org.broadinstitute.sting.gatk.walkers.recalibration;
|
|||
|
||||
public class ReadGroupCovariate implements Covariate{
|
||||
|
||||
public static final String collapsedReadGroupCode = "Read Group Collapsed";
|
||||
public static final String defaultReadGroup = "DefaultReadGroup";
|
||||
|
||||
public ReadGroupCovariate() { // empty constructor is required to instantiate covariate in CovariateCounterWalker and TableRecalibrationWalker
|
||||
}
|
||||
|
||||
// Used to pick out the covariate's value from attributes of the read
|
||||
public final Comparable getValue( final ReadHashDatum readDatum, final int offset ) {
|
||||
return readDatum.readGroup;
|
||||
}
|
||||
|
||||
|
||||
// Used to get the covariate's value from input csv file in TableRecalibrationWalker
|
||||
public final Comparable getValue( final String str ) {
|
||||
return str;
|
||||
}
|
||||
|
||||
// Used to estimate the amount space required for the full data HashMap
|
||||
public final int estimatedNumberOfBins() {
|
||||
return 300;
|
||||
}
|
||||
|
|
|
|||
|
|
@ -23,4 +23,14 @@ public class ReadHashDatum {
|
|||
mappingQuality = _mappingQuality;
|
||||
length = _length;
|
||||
}
|
||||
|
||||
public ReadHashDatum(ReadHashDatum that) {
|
||||
this.readGroup = that.readGroup;
|
||||
this.platform = that.platform;
|
||||
this.quals = that.quals;
|
||||
this.bases = that.bases;
|
||||
this.isNegStrand = that.isNegStrand;
|
||||
this.mappingQuality = that.mappingQuality;
|
||||
this.length = that.length;
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -87,6 +87,7 @@ public class RecalDataManager {
|
|||
//data.put(key, thisDatum); // add the mapping to the main table
|
||||
|
||||
// Create dataCollapsedReadGroup, the table where everything except read group has been collapsed
|
||||
//BUGBUG: Bases with Reported Quality less than -pQ argument shouldn't be included in this collapsed table
|
||||
ArrayList<Comparable> newKey = new ArrayList<Comparable>();
|
||||
newKey.add( key.get(0) ); // Make a new key with just the read group
|
||||
RecalDatum collapsedDatum = dataCollapsedReadGroup.get( newKey );
|
||||
|
|
|
|||
|
|
@ -76,20 +76,23 @@ public class TableRecalibrationWalker extends ReadWalker<SAMRecord, SAMFileWrite
|
|||
private int WINDOW_SIZE = 5;
|
||||
@Argument(fullName="smoothing", shortName="sm", required = false, doc="Number of imaginary counts to add to each bin in order to smooth out bins with few data points")
|
||||
private int SMOOTHING = 1;
|
||||
@Argument(fullName="default_read_group", shortName="dRG", required=false, doc="If a read has no read group then default to the provided String.")
|
||||
private String DEFAULT_READ_GROUP = ReadGroupCovariate.defaultReadGroup;
|
||||
@Argument(fullName="default_platform", shortName="dP", required=false, doc="If a read has no platform then default to the provided String. Valid options are illumina, 454, and solid.")
|
||||
private String DEFAULT_PLATFORM = "Illumina";
|
||||
@Argument(fullName="force_read_group", shortName="fRG", required=false, doc="If provided, the read group ID of EVERY read will be forced to be the provided String.")
|
||||
private String FORCE_READ_GROUP = null;
|
||||
@Argument(fullName="force_platform", shortName="fP", required=false, doc="If provided, the platform of EVERY read will be forced to be the provided String. Valid options are illumina, 454, and solid.")
|
||||
private String FORCE_PLATFORM = null;
|
||||
|
||||
/////////////////////////////
|
||||
// Debugging-only Arguments
|
||||
/////////////////////////////
|
||||
@Argument(fullName="validate_old_recalibrator", shortName="validateOldRecalibrator", required=false, doc="Match the output of the old recalibrator exactly. FOR DEBUGGING PURPOSES ONLY.")
|
||||
private boolean VALIDATE_OLD_RECALIBRATOR = false;
|
||||
@Argument(fullName="use_slx_platform", shortName="useSLXPlatform", required=false, doc="Force the platform to be Illumina regardless of what it actually says. FOR DEBUGGING PURPOSES ONLY.")
|
||||
private boolean USE_SLX_PLATFORM = false;
|
||||
@Argument(fullName="ignore_readgroup", shortName="noRG", required=false, doc="All read groups are combined together. FOR DEBUGGING PURPOSES ONLY. This option is required in order to pass integration tests.")
|
||||
private boolean IGNORE_READGROUP = false;
|
||||
@Argument(fullName="no_pg_tag", shortName="noPG", required=false, doc="Don't output the usual PG tag in the recalibrated bam file header. FOR DEBUGGING PURPOSES ONLY. This option is required in order to pass integration tests.")
|
||||
private boolean NO_PG_TAG = false;
|
||||
|
||||
|
||||
/////////////////////////////
|
||||
// Private Member Variables
|
||||
/////////////////////////////
|
||||
|
|
@ -100,7 +103,7 @@ public class TableRecalibrationWalker extends ReadWalker<SAMRecord, SAMFileWrite
|
|||
private static Pattern COMMENT_PATTERN = Pattern.compile("^#.*");
|
||||
private static Pattern OLD_RECALIBRATOR_HEADER = Pattern.compile("^rg,.*");
|
||||
private static Pattern COVARIATE_PATTERN = Pattern.compile("^@!.*");
|
||||
private final String versionString = "v2.0.3"; // Major version, minor version, and build number
|
||||
private final String versionString = "v2.0.4"; // Major version, minor version, and build number
|
||||
private boolean warnUserNullReadGroup = false; // Has the walker warned the user about null read groups yet?
|
||||
private SAMFileWriter OUTPUT_BAM = null;// The File Writer that will write out the recalibrated bam
|
||||
|
||||
|
|
@ -118,6 +121,8 @@ public class TableRecalibrationWalker extends ReadWalker<SAMRecord, SAMFileWrite
|
|||
public void initialize() {
|
||||
|
||||
logger.info( "TableRecalibrationWalker version: " + versionString );
|
||||
if( FORCE_READ_GROUP != null ) { DEFAULT_READ_GROUP = FORCE_READ_GROUP; }
|
||||
if( FORCE_PLATFORM != null ) { DEFAULT_PLATFORM = FORCE_PLATFORM; }
|
||||
|
||||
// Get a list of all available covariates
|
||||
List<Class<? extends Covariate>> classes = PackageUtils.getClassesImplementingInterface(Covariate.class);
|
||||
|
|
@ -164,6 +169,7 @@ public class TableRecalibrationWalker extends ReadWalker<SAMRecord, SAMFileWrite
|
|||
try {
|
||||
Covariate covariate = (Covariate)covClass.newInstance();
|
||||
// Some covariates need parameters (user supplied command line arguments) passed to them
|
||||
if( covariate instanceof CycleCovariate && DEFAULT_PLATFORM != null ) { covariate = new CycleCovariate( DEFAULT_PLATFORM ); }
|
||||
if( covariate instanceof MinimumNQSCovariate ) { covariate = new MinimumNQSCovariate( WINDOW_SIZE ); }
|
||||
requestedCovariates.add( covariate );
|
||||
estimatedCapacity *= covariate.estimatedNumberOfBins();
|
||||
|
|
@ -297,12 +303,15 @@ public class TableRecalibrationWalker extends ReadWalker<SAMRecord, SAMFileWrite
|
|||
String readGroupId;
|
||||
String platform;
|
||||
if( readGroup == null ) {
|
||||
if( !warnUserNullReadGroup ) {
|
||||
Utils.warnUser("The input .bam file contains reads with no read group. Defaulting to Illumina platform and empty read group name.");
|
||||
if( !warnUserNullReadGroup && FORCE_READ_GROUP == null) {
|
||||
Utils.warnUser("The input .bam file contains reads with no read group. " +
|
||||
"Defaulting to read group ID = " + DEFAULT_READ_GROUP + " and platform = " + DEFAULT_PLATFORM + ". " +
|
||||
"First observed at read with name = " + read.getReadName() );
|
||||
warnUserNullReadGroup = true;
|
||||
}
|
||||
readGroupId = ReadGroupCovariate.collapsedReadGroupCode;
|
||||
platform = "ILLUMINA";
|
||||
// There is no readGroup so defaulting to these values
|
||||
readGroupId = DEFAULT_READ_GROUP;
|
||||
platform = DEFAULT_PLATFORM;
|
||||
} else {
|
||||
readGroupId = readGroup.getReadGroupId();
|
||||
platform = readGroup.getPlatform();
|
||||
|
|
@ -316,11 +325,11 @@ public class TableRecalibrationWalker extends ReadWalker<SAMRecord, SAMFileWrite
|
|||
startPos = 0;
|
||||
stopPos = bases.length - 1;
|
||||
}
|
||||
if( USE_SLX_PLATFORM ) {
|
||||
platform = "ILLUMINA";
|
||||
if( FORCE_READ_GROUP != null ) { // Collapse all the read groups into a single common String provided by the user
|
||||
readGroupId = FORCE_READ_GROUP;
|
||||
}
|
||||
if( IGNORE_READGROUP ) {
|
||||
readGroupId = ReadGroupCovariate.collapsedReadGroupCode;
|
||||
if( FORCE_PLATFORM != null ) {
|
||||
platform = FORCE_PLATFORM;
|
||||
}
|
||||
|
||||
ReadHashDatum readDatum = new ReadHashDatum( readGroupId, platform, originalQuals, bases, isNegStrand, read.getMappingQuality(), bases.length );
|
||||
|
|
|
|||
|
|
@ -11,6 +11,7 @@ import java.io.File;
|
|||
|
||||
public class RecalibrationWalkersIntegrationTest extends WalkerTest {
|
||||
static HashMap<String, String> paramsFiles = new HashMap<String, String>();
|
||||
static HashMap<String, String> paramsFilesNoReadGroupTest = new HashMap<String, String>();
|
||||
|
||||
@Test
|
||||
public void testCountCovariates1() {
|
||||
|
|
@ -21,7 +22,6 @@ public class RecalibrationWalkersIntegrationTest extends WalkerTest {
|
|||
e.put( "/humgen/gsa-scr1/GATK_Data/Validation_Data/NA12873.454.SRP000031.2009_06.chr1.10_20mb.bam", "cbe17568c5f03516a88ff0c5ce33a87b" );
|
||||
e.put( "/humgen/gsa-scr1/GATK_Data/Validation_Data/NA12878.1kg.p2.chr1_10mb_11_mb.allTechs.bam", "8806fb7f0e669d1abab61dc4b3d10d8f" );
|
||||
|
||||
|
||||
for ( Map.Entry<String, String> entry : e.entrySet() ) {
|
||||
String bam = entry.getKey();
|
||||
String md5 = entry.getValue();
|
||||
|
|
@ -82,7 +82,6 @@ public class RecalibrationWalkersIntegrationTest extends WalkerTest {
|
|||
HashMap<String, String> e = new HashMap<String, String>();
|
||||
e.put( "/humgen/gsa-scr1/GATK_Data/Validation_Data/NA12878.1kg.p2.chr1_10mb_11_mb.SOLID.bam", "d5962c39a53a511d7ec710d5343e7a37");
|
||||
|
||||
|
||||
for ( Map.Entry<String, String> entry : e.entrySet() ) {
|
||||
String bam = entry.getKey();
|
||||
String md5 = entry.getValue();
|
||||
|
|
@ -104,4 +103,62 @@ public class RecalibrationWalkersIntegrationTest extends WalkerTest {
|
|||
executeTest("testCountCovariatesVCF", spec);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@Test
|
||||
public void testCountCovariatesNoReadGroups() {
|
||||
HashMap<String, String> e = new HashMap<String, String>();
|
||||
e.put( "/humgen/gsa-scr1/GATK_Data/Validation_Data/NA12762.SOLID.SRP000031.2009_07.chr1.10_20mb.bam", "ca9e1312102190bbd58f82809eabf175" );
|
||||
|
||||
for ( Map.Entry<String, String> entry : e.entrySet() ) {
|
||||
String bam = entry.getKey();
|
||||
String md5 = entry.getValue();
|
||||
|
||||
WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(
|
||||
"-R /broad/1KG/reference/human_b36_both.fasta" +
|
||||
" --DBSNP /humgen/gsa-scr1/GATK_Data/dbsnp_129_b36.rod" +
|
||||
" -T CountCovariates" +
|
||||
" -I " + bam +
|
||||
" -L 1:10,000,000-10,200,000" +
|
||||
" -cov ReadGroupCovariate" +
|
||||
" -cov QualityScoreCovariate" +
|
||||
" -cov CycleCovariate" +
|
||||
" -cov DinucCovariate" +
|
||||
" --default_platform illumina" +
|
||||
" --sorted_output" +
|
||||
" -recalFile %s",
|
||||
1, // just one output file
|
||||
Arrays.asList(md5));
|
||||
List<File> result = executeTest("testCountCovariatesNoReadGroups", spec).getFirst();
|
||||
paramsFilesNoReadGroupTest.put(bam, result.get(0).getAbsolutePath());
|
||||
}
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testTableRecalibratorNoReadGroups() {
|
||||
HashMap<String, String> e = new HashMap<String, String>();
|
||||
e.put( "/humgen/gsa-scr1/GATK_Data/Validation_Data/NA12762.SOLID.SRP000031.2009_07.chr1.10_20mb.bam", "9bbb281c4f55b0c094457e9f69e61962" );
|
||||
|
||||
for ( Map.Entry<String, String> entry : e.entrySet() ) {
|
||||
String bam = entry.getKey();
|
||||
String md5 = entry.getValue();
|
||||
String paramsFile = paramsFilesNoReadGroupTest.get(bam);
|
||||
System.out.printf("PARAMS FOR %s is %s%n", bam, paramsFile);
|
||||
if ( paramsFile != null ) {
|
||||
WalkerTestSpec spec = new WalkerTestSpec(
|
||||
"-R /broad/1KG/reference/human_b36_both.fasta" +
|
||||
" -T TableRecalibration" +
|
||||
" -I " + bam +
|
||||
" -L 1:10,100,000-10,300,000" +
|
||||
" -outputBam %s" +
|
||||
" --no_pg_tag" +
|
||||
" --default_platform illumina" +
|
||||
" -recalFile " + paramsFile,
|
||||
1, // just one output file
|
||||
Arrays.asList(md5));
|
||||
executeTest("testTableRecalibratorNoReadGroups", spec);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
|
|
|
|||
Loading…
Reference in New Issue