Cleaning up the covariate classes and removing unused code from the bqsr optimizations in 2009.

This commit is contained in:
Ryan Poplin 2012-02-06 08:54:42 -05:00
parent 5343f8ba67
commit b7ffd144e8
14 changed files with 61 additions and 85 deletions

View File

@ -215,8 +215,8 @@ public class GenomeAnalysisEngine {
resetRandomGenerator(System.currentTimeMillis()); resetRandomGenerator(System.currentTimeMillis());
// if the use specified an input BQSR recalibration table then enable on the fly recalibration // if the use specified an input BQSR recalibration table then enable on the fly recalibration
if (this.getArguments().RECAL_FILE != null) if (this.getArguments().BQSR_RECAL_FILE != null)
setBaseRecalibration(this.getArguments().RECAL_FILE); setBaseRecalibration(this.getArguments().BQSR_RECAL_FILE);
// Determine how the threads should be divided between CPU vs. IO. // Determine how the threads should be divided between CPU vs. IO.
determineThreadAllocation(); determineThreadAllocation();

View File

@ -192,7 +192,7 @@ public class GATKArgumentCollection {
* and the raw empirical quality score calculated by phred-scaling the mismatch rate. * and the raw empirical quality score calculated by phred-scaling the mismatch rate.
*/ */
@Input(fullName="BQSR", shortName="BQSR", required=false, doc="Filename for the input covariates table recalibration .csv file which enables on the fly base quality score recalibration") @Input(fullName="BQSR", shortName="BQSR", required=false, doc="Filename for the input covariates table recalibration .csv file which enables on the fly base quality score recalibration")
public File RECAL_FILE = null; // BUGBUG: need a better argument name once we decide how BQSRs v1 and v2 will live in the code base simultaneously public File BQSR_RECAL_FILE = null; // BUGBUG: need a better argument name once we decide how BQSRs v1 and v2 will live in the code base simultaneously
@Argument(fullName="defaultBaseQualities", shortName = "DBQ", doc = "If reads are missing some or all base quality scores, this value will be used for all base quality scores", required=false) @Argument(fullName="defaultBaseQualities", shortName = "DBQ", doc = "If reads are missing some or all base quality scores, this value will be used for all base quality scores", required=false)
public byte defaultBaseQualities = -1; public byte defaultBaseQualities = -1;

View File

@ -41,12 +41,14 @@ public class ContextCovariate implements Covariate {
String allN = ""; String allN = "";
// Initialize any member variables using the command-line arguments passed to the walkers // Initialize any member variables using the command-line arguments passed to the walkers
@Override
public void initialize( final RecalibrationArgumentCollection RAC ) { public void initialize( final RecalibrationArgumentCollection RAC ) {
for( int iii = 0; iii < CONTEXT_SIZE; iii++ ) { for( int iii = 0; iii < CONTEXT_SIZE; iii++ ) {
allN += "N"; allN += "N";
} }
} }
@Override
public void getValues(SAMRecord read, Comparable[] comparable) { public void getValues(SAMRecord read, Comparable[] comparable) {
byte[] bases = read.getReadBases(); byte[] bases = read.getReadBases();
for(int i = 0; i < read.getReadLength(); i++) { for(int i = 0; i < read.getReadLength(); i++) {
@ -55,8 +57,8 @@ public class ContextCovariate implements Covariate {
} }
// Used to get the covariate's value from input csv file in TableRecalibrationWalker // Used to get the covariate's value from input csv file in TableRecalibrationWalker
@Override
public final Comparable getValue( final String str ) { public final Comparable getValue( final String str ) {
return str; return str;
} }
} }

View File

@ -51,6 +51,7 @@ public class CycleCovariate implements StandardCovariate {
private final static EnumSet<NGSPlatform> FLOW_CYCLE_PLATFORMS = EnumSet.of(NGSPlatform.LS454, NGSPlatform.ION_TORRENT); private final static EnumSet<NGSPlatform> FLOW_CYCLE_PLATFORMS = EnumSet.of(NGSPlatform.LS454, NGSPlatform.ION_TORRENT);
// Initialize any member variables using the command-line arguments passed to the walkers // Initialize any member variables using the command-line arguments passed to the walkers
@Override
public void initialize( final RecalibrationArgumentCollection RAC ) { public void initialize( final RecalibrationArgumentCollection RAC ) {
if( RAC.DEFAULT_PLATFORM != null ) { if( RAC.DEFAULT_PLATFORM != null ) {
if( RAC.DEFAULT_PLATFORM.equalsIgnoreCase( "SLX" ) || RAC.DEFAULT_PLATFORM.equalsIgnoreCase( "ILLUMINA" ) || if( RAC.DEFAULT_PLATFORM.equalsIgnoreCase( "SLX" ) || RAC.DEFAULT_PLATFORM.equalsIgnoreCase( "ILLUMINA" ) ||
@ -63,6 +64,7 @@ public class CycleCovariate implements StandardCovariate {
} }
// Used to pick out the covariate's value from attributes of the read // Used to pick out the covariate's value from attributes of the read
@Override
public void getValues(SAMRecord read, Comparable[] comparable) { public void getValues(SAMRecord read, Comparable[] comparable) {
//----------------------------- //-----------------------------
@ -164,6 +166,7 @@ public class CycleCovariate implements StandardCovariate {
} }
// Used to get the covariate's value from input csv file in TableRecalibrationWalker // Used to get the covariate's value from input csv file in TableRecalibrationWalker
@Override
public final Comparable getValue( final String str ) { public final Comparable getValue( final String str ) {
return Integer.parseInt( str ); return Integer.parseInt( str );
} }

View File

@ -48,6 +48,7 @@ public class DinucCovariate implements StandardCovariate {
private HashMap<Integer, Dinuc> dinucHashMap; private HashMap<Integer, Dinuc> dinucHashMap;
// Initialize any member variables using the command-line arguments passed to the walkers // Initialize any member variables using the command-line arguments passed to the walkers
@Override
public void initialize( final RecalibrationArgumentCollection RAC ) { public void initialize( final RecalibrationArgumentCollection RAC ) {
final byte[] BASES = { (byte)'A', (byte)'C', (byte)'G', (byte)'T' }; final byte[] BASES = { (byte)'A', (byte)'C', (byte)'G', (byte)'T' };
dinucHashMap = new HashMap<Integer, Dinuc>(); dinucHashMap = new HashMap<Integer, Dinuc>();
@ -60,44 +61,10 @@ public class DinucCovariate implements StandardCovariate {
dinucHashMap.put( Dinuc.hashBytes(NO_CALL, NO_CALL), NO_DINUC ); dinucHashMap.put( Dinuc.hashBytes(NO_CALL, NO_CALL), NO_DINUC );
} }
/*
// Used to pick out the covariate's value from attributes of the read
public final Comparable getValue( final SAMRecord read, final int offset ) {
byte base;
byte prevBase;
final byte[] bases = read.getReadBases();
// If this is a negative strand read then we need to reverse the direction for our previous base
if( read.getReadNegativeStrandFlag() ) {
// No dinuc at the beginning of the read
if( offset == bases.length-1 ) {
return NO_DINUC;
}
base = (byte)BaseUtils.simpleComplement( (char)(bases[offset]) );
// Note: We are using the previous base in the read, not the previous base in the reference. This is done in part to be consistent with unmapped reads.
prevBase = (byte)BaseUtils.simpleComplement( (char)(bases[offset + 1]) );
} else {
// No dinuc at the beginning of the read
if( offset == 0 ) {
return NO_DINUC;
}
base = bases[offset];
// Note: We are using the previous base in the read, not the previous base in the reference. This is done in part to be consistent with unmapped reads.
prevBase = bases[offset - 1];
}
// Make sure the previous base is good
if( !BaseUtils.isRegularBase( prevBase ) ) {
return NO_DINUC;
}
return dinucHashMap.get( Dinuc.hashBytes( prevBase, base ) );
}
*/
/** /**
* Takes an array of size (at least) read.getReadLength() and fills it with the covariate values for each position in the read. * Takes an array of size (at least) read.getReadLength() and fills it with the covariate values for each position in the read.
*/ */
@Override
public void getValues( SAMRecord read, Comparable[] result ) { public void getValues( SAMRecord read, Comparable[] result ) {
final HashMap<Integer, Dinuc> dinucHashMapRef = this.dinucHashMap; //optimize access to dinucHashMap final HashMap<Integer, Dinuc> dinucHashMapRef = this.dinucHashMap; //optimize access to dinucHashMap
final int readLength = read.getReadLength(); final int readLength = read.getReadLength();
@ -134,6 +101,7 @@ public class DinucCovariate implements StandardCovariate {
} }
// Used to get the covariate's value from input csv file in TableRecalibrationWalker // Used to get the covariate's value from input csv file in TableRecalibrationWalker
@Override
public final Comparable getValue( final String str ) { public final Comparable getValue( final String str ) {
byte[] bytes = str.getBytes(); byte[] bytes = str.getBytes();
final Dinuc returnDinuc = dinucHashMap.get( Dinuc.hashBytes( bytes[0], bytes[1] ) ); final Dinuc returnDinuc = dinucHashMap.get( Dinuc.hashBytes( bytes[0], bytes[1] ) );
@ -143,7 +111,6 @@ public class DinucCovariate implements StandardCovariate {
return returnDinuc; return returnDinuc;
} }
/** /**
* Reverses the given array in place. * Reverses the given array in place.
* *

View File

@ -41,12 +41,13 @@ public class GCContentCovariate implements ExperimentalCovariate {
int numBack = 7; int numBack = 7;
// Initialize any member variables using the command-line arguments passed to the walkers // Initialize any member variables using the command-line arguments passed to the walkers
@Override
public void initialize( final RecalibrationArgumentCollection RAC ) { public void initialize( final RecalibrationArgumentCollection RAC ) {
numBack = RAC.HOMOPOLYMER_NBACK; numBack = RAC.HOMOPOLYMER_NBACK;
} }
// Used to pick out the covariate's value from attributes of the read // Used to pick out the covariate's value from attributes of the read
public final Comparable getValue( final SAMRecord read, final int offset ) { private final Comparable getValue( final SAMRecord read, final int offset ) {
// ATTGCCCCGTAAAAAAAGAGAA // ATTGCCCCGTAAAAAAAGAGAA
// 0000123456654321001122 // 0000123456654321001122
@ -75,7 +76,8 @@ public class GCContentCovariate implements ExperimentalCovariate {
return -1; return -1;
} }
} }
@Override
public void getValues(SAMRecord read, Comparable[] comparable) { public void getValues(SAMRecord read, Comparable[] comparable) {
for(int iii = 0; iii < read.getReadLength(); iii++) { for(int iii = 0; iii < read.getReadLength(); iii++) {
comparable[iii] = getValue(read, iii); // BUGBUG: this can be optimized comparable[iii] = getValue(read, iii); // BUGBUG: this can be optimized
@ -83,10 +85,8 @@ public class GCContentCovariate implements ExperimentalCovariate {
} }
// Used to get the covariate's value from input csv file in TableRecalibrationWalker // Used to get the covariate's value from input csv file in TableRecalibrationWalker
@Override
public final Comparable getValue( final String str ) { public final Comparable getValue( final String str ) {
return Integer.parseInt( str ); return Integer.parseInt( str );
} }
} }

View File

@ -43,12 +43,13 @@ public class HomopolymerCovariate implements ExperimentalCovariate {
int numBack = 7; int numBack = 7;
// Initialize any member variables using the command-line arguments passed to the walkers // Initialize any member variables using the command-line arguments passed to the walkers
@Override
public void initialize( final RecalibrationArgumentCollection RAC ) { public void initialize( final RecalibrationArgumentCollection RAC ) {
numBack = RAC.HOMOPOLYMER_NBACK; numBack = RAC.HOMOPOLYMER_NBACK;
} }
// Used to pick out the covariate's value from attributes of the read // Used to pick out the covariate's value from attributes of the read
public final Comparable getValue( final SAMRecord read, final int offset ) { private final Comparable getValue( final SAMRecord read, final int offset ) {
// This block of code is for if you don't want to only count consecutive bases // This block of code is for if you don't want to only count consecutive bases
// ATTGCCCCGTAAAAAAAAATA // ATTGCCCCGTAAAAAAAAATA
@ -90,6 +91,7 @@ public class HomopolymerCovariate implements ExperimentalCovariate {
return numAgree; return numAgree;
} }
@Override
public void getValues(SAMRecord read, Comparable[] comparable) { public void getValues(SAMRecord read, Comparable[] comparable) {
for(int iii = 0; iii < read.getReadLength(); iii++) { for(int iii = 0; iii < read.getReadLength(); iii++) {
comparable[iii] = getValue(read, iii); // BUGBUG: this can be optimized comparable[iii] = getValue(read, iii); // BUGBUG: this can be optimized
@ -97,8 +99,8 @@ public class HomopolymerCovariate implements ExperimentalCovariate {
} }
// Used to get the covariate's value from input csv file in TableRecalibrationWalker // Used to get the covariate's value from input csv file in TableRecalibrationWalker
@Override
public final Comparable getValue( final String str ) { public final Comparable getValue( final String str ) {
return Integer.parseInt( str ); return Integer.parseInt( str );
} }
} }

View File

@ -38,23 +38,25 @@ import net.sf.samtools.SAMRecord;
public class MappingQualityCovariate implements ExperimentalCovariate { public class MappingQualityCovariate implements ExperimentalCovariate {
// Initialize any member variables using the command-line arguments passed to the walkers // Initialize any member variables using the command-line arguments passed to the walkers
@Override
public void initialize( final RecalibrationArgumentCollection RAC ) { public void initialize( final RecalibrationArgumentCollection RAC ) {
} }
// Used to pick out the covariate's value from attributes of the read // Used to pick out the covariate's value from attributes of the read
public final Comparable getValue( final SAMRecord read, final int offset ) { private final Comparable getValue( final SAMRecord read, final int offset ) {
return read.getMappingQuality(); return read.getMappingQuality();
} }
// Used to get the covariate's value from input csv file in TableRecalibrationWalker // Used to get the covariate's value from input csv file in TableRecalibrationWalker
@Override
public final Comparable getValue( final String str ) { public final Comparable getValue( final String str ) {
return Integer.parseInt( str ); return Integer.parseInt( str );
} }
@Override
public void getValues(SAMRecord read, Comparable[] comparable) { public void getValues(SAMRecord read, Comparable[] comparable) {
for(int iii = 0; iii < read.getReadLength(); iii++) { for(int iii = 0; iii < read.getReadLength(); iii++) {
comparable[iii] = getValue(read, iii); // BUGBUG: this can be optimized comparable[iii] = getValue(read, iii); // BUGBUG: this can be optimized
} }
} }
} }

View File

@ -41,12 +41,13 @@ public class MinimumNQSCovariate implements ExperimentalCovariate {
private int windowReach; // How far in each direction from the current base to look private int windowReach; // How far in each direction from the current base to look
// Initialize any member variables using the command-line arguments passed to the walkers // Initialize any member variables using the command-line arguments passed to the walkers
@Override
public void initialize( final RecalibrationArgumentCollection RAC ) { public void initialize( final RecalibrationArgumentCollection RAC ) {
windowReach = RAC.WINDOW_SIZE / 2; // integer division windowReach = RAC.WINDOW_SIZE / 2; // integer division
} }
// Used to pick out the covariate's value from attributes of the read // Used to pick out the covariate's value from attributes of the read
public final Comparable getValue( final SAMRecord read, final int offset ) { private final Comparable getValue( final SAMRecord read, final int offset ) {
// Loop over the list of base quality scores in the window and find the minimum // Loop over the list of base quality scores in the window and find the minimum
final byte[] quals = read.getBaseQualities(); final byte[] quals = read.getBaseQualities();
@ -61,14 +62,16 @@ public class MinimumNQSCovariate implements ExperimentalCovariate {
return minQual; return minQual;
} }
// Used to get the covariate's value from input csv file in TableRecalibrationWalker @Override
public final Comparable getValue( final String str ) {
return Integer.parseInt( str );
}
public void getValues(SAMRecord read, Comparable[] comparable) { public void getValues(SAMRecord read, Comparable[] comparable) {
for(int iii = 0; iii < read.getReadLength(); iii++) { for(int iii = 0; iii < read.getReadLength(); iii++) {
comparable[iii] = getValue(read, iii); // BUGBUG: this can be optimized comparable[iii] = getValue(read, iii); // BUGBUG: this can be optimized
} }
} }
// Used to get the covariate's value from input csv file in TableRecalibrationWalker
@Override
public final Comparable getValue( final String str ) {
return Integer.parseInt( str );
}
} }

View File

@ -39,11 +39,12 @@ import net.sf.samtools.SAMRecord;
public class PositionCovariate implements ExperimentalCovariate { public class PositionCovariate implements ExperimentalCovariate {
// Initialize any member variables using the command-line arguments passed to the walkers // Initialize any member variables using the command-line arguments passed to the walkers
@Override
public void initialize( final RecalibrationArgumentCollection RAC ) { public void initialize( final RecalibrationArgumentCollection RAC ) {
} }
// Used to pick out the covariate's value from attributes of the read // Used to pick out the covariate's value from attributes of the read
public final Comparable getValue( final SAMRecord read, final int offset ) { private final Comparable getValue( final SAMRecord read, final int offset ) {
int cycle = offset; int cycle = offset;
if( read.getReadNegativeStrandFlag() ) { if( read.getReadNegativeStrandFlag() ) {
cycle = read.getReadLength() - (offset + 1); cycle = read.getReadLength() - (offset + 1);
@ -51,15 +52,16 @@ public class PositionCovariate implements ExperimentalCovariate {
return cycle; return cycle;
} }
// Used to get the covariate's value from input csv file in TableRecalibrationWalker @Override
public final Comparable getValue( final String str ) {
return Integer.parseInt( str );
}
public void getValues(SAMRecord read, Comparable[] comparable) { public void getValues(SAMRecord read, Comparable[] comparable) {
for(int iii = 0; iii < read.getReadLength(); iii++) { for(int iii = 0; iii < read.getReadLength(); iii++) {
comparable[iii] = getValue(read, iii); // BUGBUG: this can be optimized comparable[iii] = getValue(read, iii); // BUGBUG: this can be optimized
} }
} }
// Used to get the covariate's value from input csv file in TableRecalibrationWalker
@Override
public final Comparable getValue( final String str ) {
return Integer.parseInt( str );
}
} }

View File

@ -40,11 +40,12 @@ import net.sf.samtools.SAMRecord;
public class PrimerRoundCovariate implements ExperimentalCovariate { public class PrimerRoundCovariate implements ExperimentalCovariate {
// Initialize any member variables using the command-line arguments passed to the walkers // Initialize any member variables using the command-line arguments passed to the walkers
@Override
public void initialize( final RecalibrationArgumentCollection RAC ) { public void initialize( final RecalibrationArgumentCollection RAC ) {
} }
// Used to pick out the covariate's value from attributes of the read // Used to pick out the covariate's value from attributes of the read
public final Comparable getValue( final SAMRecord read, final int offset ) { private final Comparable getValue( final SAMRecord read, final int offset ) {
if( read.getReadGroup().getPlatform().equalsIgnoreCase( "SOLID" ) || read.getReadGroup().getPlatform().equalsIgnoreCase( "ABI_SOLID" ) ) { if( read.getReadGroup().getPlatform().equalsIgnoreCase( "SOLID" ) || read.getReadGroup().getPlatform().equalsIgnoreCase( "ABI_SOLID" ) ) {
int pos = offset; int pos = offset;
if( read.getReadNegativeStrandFlag() ) { if( read.getReadNegativeStrandFlag() ) {
@ -57,14 +58,16 @@ public class PrimerRoundCovariate implements ExperimentalCovariate {
} }
// Used to get the covariate's value from input csv file in TableRecalibrationWalker @Override
public final Comparable getValue( final String str ) {
return Integer.parseInt( str );
}
public void getValues(SAMRecord read, Comparable[] comparable) { public void getValues(SAMRecord read, Comparable[] comparable) {
for(int iii = 0; iii < read.getReadLength(); iii++) { for(int iii = 0; iii < read.getReadLength(); iii++) {
comparable[iii] = getValue(read, iii); // BUGBUG: this can be optimized comparable[iii] = getValue(read, iii); // BUGBUG: this can be optimized
} }
} }
// Used to get the covariate's value from input csv file in TableRecalibrationWalker
@Override
public final Comparable getValue( final String str ) {
return Integer.parseInt( str );
}
} }

View File

@ -38,16 +38,11 @@ import net.sf.samtools.SAMRecord;
public class QualityScoreCovariate implements RequiredCovariate { public class QualityScoreCovariate implements RequiredCovariate {
// Initialize any member variables using the command-line arguments passed to the walkers // Initialize any member variables using the command-line arguments passed to the walkers
@Override
public void initialize( final RecalibrationArgumentCollection RAC ) { public void initialize( final RecalibrationArgumentCollection RAC ) {
} }
/* @Override
// Used to pick out the covariate's value from attributes of the read
public final Comparable getValue( final SAMRecord read, final int offset ) {
return (int)(read.getBaseQualities()[offset]);
}
*/
public void getValues(SAMRecord read, Comparable[] comparable) { public void getValues(SAMRecord read, Comparable[] comparable) {
byte[] baseQualities = read.getBaseQualities(); byte[] baseQualities = read.getBaseQualities();
for(int i = 0; i < read.getReadLength(); i++) { for(int i = 0; i < read.getReadLength(); i++) {
@ -56,8 +51,8 @@ public class QualityScoreCovariate implements RequiredCovariate {
} }
// Used to get the covariate's value from input csv file in TableRecalibrationWalker // Used to get the covariate's value from input csv file in TableRecalibrationWalker
@Override
public final Comparable getValue( final String str ) { public final Comparable getValue( final String str ) {
return Integer.parseInt( str ); return Integer.parseInt( str );
} }
} }

View File

@ -40,16 +40,11 @@ public class ReadGroupCovariate implements RequiredCovariate{
public static final String defaultReadGroup = "DefaultReadGroup"; public static final String defaultReadGroup = "DefaultReadGroup";
// Initialize any member variables using the command-line arguments passed to the walkers // Initialize any member variables using the command-line arguments passed to the walkers
@Override
public void initialize( final RecalibrationArgumentCollection RAC ) { public void initialize( final RecalibrationArgumentCollection RAC ) {
} }
/* @Override
// Used to pick out the covariate's value from attributes of the read
public final Comparable getValue( final SAMRecord read, final int offset ) {
return read.getReadGroup().getReadGroupId();
}
*/
public void getValues(SAMRecord read, Comparable[] comparable) { public void getValues(SAMRecord read, Comparable[] comparable) {
final String readGroupId = read.getReadGroup().getReadGroupId(); final String readGroupId = read.getReadGroup().getReadGroupId();
for(int i = 0; i < read.getReadLength(); i++) { for(int i = 0; i < read.getReadLength(); i++) {
@ -58,10 +53,10 @@ public class ReadGroupCovariate implements RequiredCovariate{
} }
// Used to get the covariate's value from input csv file in TableRecalibrationWalker // Used to get the covariate's value from input csv file in TableRecalibrationWalker
@Override
public final Comparable getValue( final String str ) { public final Comparable getValue( final String str ) {
return str; return str;
} }
} }

View File

@ -162,9 +162,11 @@ public class GATKSAMRecord extends BAMRecord {
return super.equals(o); return super.equals(o);
} }
/*
@Override @Override
public byte[] getBaseQualities() { public byte[] getBaseQualities() {
return super.getBaseQualities();
/*
if( getAttribute( BQSR_BASES_HAVE_BEEN_RECALIBRATED_TAG ) != null ) { if( getAttribute( BQSR_BASES_HAVE_BEEN_RECALIBRATED_TAG ) != null ) {
return super.getBaseQualities(); return super.getBaseQualities();
} else { } else {
@ -178,8 +180,8 @@ public class GATKSAMRecord extends BAMRecord {
return super.getBaseQualities(); return super.getBaseQualities();
} }
} }
*/
} }
*/
/** /**
* Accessors for base insertion and base deletion quality scores * Accessors for base insertion and base deletion quality scores