Updating Covariate interface with Mauricio to include an errorModel parameter. On the fly recalibration of base insertion and base deletion quals is live for the HaplotypeCaller
This commit is contained in:
parent
b7ffd144e8
commit
dc05b71e39
|
|
@ -26,6 +26,7 @@
|
|||
package org.broadinstitute.sting.gatk.walkers.recalibration;
|
||||
|
||||
import net.sf.samtools.SAMRecord;
|
||||
import org.broadinstitute.sting.utils.recalibration.BaseRecalibration;
|
||||
|
||||
import java.util.Arrays;
|
||||
|
||||
|
|
@ -35,7 +36,7 @@ import java.util.Arrays;
|
|||
* Date: 9/26/11
|
||||
*/
|
||||
|
||||
public class ContextCovariate implements Covariate {
|
||||
public class ContextCovariate implements ExperimentalCovariate {
|
||||
|
||||
final int CONTEXT_SIZE = 8;
|
||||
String allN = "";
|
||||
|
|
@ -49,7 +50,7 @@ public class ContextCovariate implements Covariate {
|
|||
}
|
||||
|
||||
@Override
|
||||
public void getValues(SAMRecord read, Comparable[] comparable) {
|
||||
public void getValues( final SAMRecord read, final Comparable[] comparable, final BaseRecalibration.BaseRecalibrationType modelType ) {
|
||||
byte[] bases = read.getReadBases();
|
||||
for(int i = 0; i < read.getReadLength(); i++) {
|
||||
comparable[i] = ( i-CONTEXT_SIZE < 0 ? allN : new String(Arrays.copyOfRange(bases,i-CONTEXT_SIZE,i)) );
|
||||
|
|
|
|||
|
|
@ -41,6 +41,7 @@ import org.broadinstitute.sting.utils.collections.NestedHashMap;
|
|||
import org.broadinstitute.sting.utils.exceptions.DynamicClassResolutionException;
|
||||
import org.broadinstitute.sting.utils.exceptions.UserException;
|
||||
import org.broadinstitute.sting.utils.pileup.PileupElement;
|
||||
import org.broadinstitute.sting.utils.recalibration.BaseRecalibration;
|
||||
import org.broadinstitute.sting.utils.sam.GATKSAMRecord;
|
||||
|
||||
import java.io.PrintStream;
|
||||
|
|
@ -374,7 +375,7 @@ public class CountCovariatesWalker extends LocusWalker<CountCovariatesWalker.Cou
|
|||
|
||||
RecalDataManager.parseColorSpace( gatkRead );
|
||||
gatkRead.setTemporaryAttribute( COVARS_ATTRIBUTE,
|
||||
RecalDataManager.computeCovariates( gatkRead, requestedCovariates ));
|
||||
RecalDataManager.computeCovariates( gatkRead, requestedCovariates, BaseRecalibration.BaseRecalibrationType.BASE_SUBSTITUTION ));
|
||||
}
|
||||
|
||||
// Skip this position if base quality is zero
|
||||
|
|
|
|||
|
|
@ -1,6 +1,7 @@
|
|||
package org.broadinstitute.sting.gatk.walkers.recalibration;
|
||||
|
||||
import net.sf.samtools.SAMRecord;
|
||||
import org.broadinstitute.sting.utils.recalibration.BaseRecalibration;
|
||||
|
||||
/*
|
||||
* Copyright (c) 2009 The Broad Institute
|
||||
|
|
@ -32,7 +33,7 @@ import net.sf.samtools.SAMRecord;
|
|||
* User: rpoplin
|
||||
* Date: Oct 30, 2009
|
||||
*
|
||||
* The Covariate interface. A Covariate is a feature used in the recalibration that can be picked out of the read, offset, and corresponding reference bases
|
||||
* The Covariate interface. A Covariate is a feature used in the recalibration that can be picked out of the read.
|
||||
* In general most error checking and adjustments to the data are done before the call to the covariates getValue methods in order to speed up the code.
|
||||
* This unfortunately muddies the code, but most of these corrections can be done per read while the covariates get called per base, resulting in a big speed up.
|
||||
*/
|
||||
|
|
@ -40,9 +41,10 @@ import net.sf.samtools.SAMRecord;
|
|||
public interface Covariate {
|
||||
public void initialize( RecalibrationArgumentCollection RAC ); // Initialize any member variables using the command-line arguments passed to the walkers
|
||||
public Comparable getValue( String str ); // Used to get the covariate's value from input csv file in TableRecalibrationWalker
|
||||
public void getValues( SAMRecord read, Comparable[] comparable ); //Takes an array of size (at least) read.getReadLength() and fills it with covariate
|
||||
//values for each position in the read. This method was created as an optimization over calling getValue( read, offset ) for each offset and allows
|
||||
//read-specific calculations to be done just once rather than for each offset.
|
||||
public void getValues( SAMRecord read, Comparable[] comparable, BaseRecalibration.BaseRecalibrationType modelType );
|
||||
//Takes an array of size (at least) read.getReadLength() and fills it with covariate
|
||||
//values for each position in the read. This method was created as an optimization over calling getValue( read, offset ) for each offset and allows
|
||||
//read-specific calculations to be done just once rather than for each offset.
|
||||
}
|
||||
|
||||
interface RequiredCovariate extends Covariate {
|
||||
|
|
|
|||
|
|
@ -4,6 +4,7 @@ import net.sf.samtools.SAMRecord;
|
|||
import org.broadinstitute.sting.utils.BaseUtils;
|
||||
import org.broadinstitute.sting.utils.NGSPlatform;
|
||||
import org.broadinstitute.sting.utils.exceptions.UserException;
|
||||
import org.broadinstitute.sting.utils.recalibration.BaseRecalibration;
|
||||
import org.broadinstitute.sting.utils.sam.GATKSAMRecord;
|
||||
|
||||
import java.util.EnumSet;
|
||||
|
|
@ -65,7 +66,7 @@ public class CycleCovariate implements StandardCovariate {
|
|||
|
||||
// Used to pick out the covariate's value from attributes of the read
|
||||
@Override
|
||||
public void getValues(SAMRecord read, Comparable[] comparable) {
|
||||
public void getValues( final SAMRecord read, final Comparable[] comparable, final BaseRecalibration.BaseRecalibrationType modelType ) {
|
||||
|
||||
//-----------------------------
|
||||
// Illumina, Solid, PacBio, and Complete Genomics
|
||||
|
|
|
|||
|
|
@ -2,6 +2,7 @@ package org.broadinstitute.sting.gatk.walkers.recalibration;
|
|||
|
||||
import net.sf.samtools.SAMRecord;
|
||||
import org.broadinstitute.sting.utils.BaseUtils;
|
||||
import org.broadinstitute.sting.utils.recalibration.BaseRecalibration;
|
||||
|
||||
import java.util.HashMap;
|
||||
|
||||
|
|
@ -65,7 +66,7 @@ public class DinucCovariate implements StandardCovariate {
|
|||
* Takes an array of size (at least) read.getReadLength() and fills it with the covariate values for each position in the read.
|
||||
*/
|
||||
@Override
|
||||
public void getValues( SAMRecord read, Comparable[] result ) {
|
||||
public void getValues( final SAMRecord read, final Comparable[] comparable, final BaseRecalibration.BaseRecalibrationType modelType ) {
|
||||
final HashMap<Integer, Dinuc> dinucHashMapRef = this.dinucHashMap; //optimize access to dinucHashMap
|
||||
final int readLength = read.getReadLength();
|
||||
final boolean negativeStrand = read.getReadNegativeStrandFlag();
|
||||
|
|
@ -78,7 +79,7 @@ public class DinucCovariate implements StandardCovariate {
|
|||
if(negativeStrand) {
|
||||
bases = BaseUtils.simpleReverseComplement(bases); //this is NOT in-place
|
||||
}
|
||||
result[0] = NO_DINUC; // No dinuc at the beginning of the read
|
||||
comparable[0] = NO_DINUC; // No dinuc at the beginning of the read
|
||||
|
||||
prevBase = bases[0];
|
||||
offset++;
|
||||
|
|
@ -87,16 +88,16 @@ public class DinucCovariate implements StandardCovariate {
|
|||
// previous base in the reference. This is done in part to be consistent with unmapped reads.
|
||||
base = bases[offset];
|
||||
if( BaseUtils.isRegularBase( prevBase ) ) {
|
||||
result[offset] = dinucHashMapRef.get( Dinuc.hashBytes( prevBase, base ) );
|
||||
comparable[offset] = dinucHashMapRef.get( Dinuc.hashBytes( prevBase, base ) );
|
||||
} else {
|
||||
result[offset] = NO_DINUC;
|
||||
comparable[offset] = NO_DINUC;
|
||||
}
|
||||
|
||||
offset++;
|
||||
prevBase = base;
|
||||
}
|
||||
if(negativeStrand) {
|
||||
reverse( result );
|
||||
reverse( comparable );
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -1,6 +1,7 @@
|
|||
package org.broadinstitute.sting.gatk.walkers.recalibration;
|
||||
|
||||
import net.sf.samtools.SAMRecord;
|
||||
import org.broadinstitute.sting.utils.recalibration.BaseRecalibration;
|
||||
|
||||
/*
|
||||
* Copyright (c) 2010 The Broad Institute
|
||||
|
|
@ -78,7 +79,7 @@ public class GCContentCovariate implements ExperimentalCovariate {
|
|||
}
|
||||
|
||||
@Override
|
||||
public void getValues(SAMRecord read, Comparable[] comparable) {
|
||||
public void getValues( final SAMRecord read, final Comparable[] comparable, final BaseRecalibration.BaseRecalibrationType modelType ) {
|
||||
for(int iii = 0; iii < read.getReadLength(); iii++) {
|
||||
comparable[iii] = getValue(read, iii); // BUGBUG: this can be optimized
|
||||
}
|
||||
|
|
|
|||
|
|
@ -1,6 +1,7 @@
|
|||
package org.broadinstitute.sting.gatk.walkers.recalibration;
|
||||
|
||||
import net.sf.samtools.SAMRecord;
|
||||
import org.broadinstitute.sting.utils.recalibration.BaseRecalibration;
|
||||
|
||||
/*
|
||||
* Copyright (c) 2009 The Broad Institute
|
||||
|
|
@ -92,7 +93,7 @@ public class HomopolymerCovariate implements ExperimentalCovariate {
|
|||
}
|
||||
|
||||
@Override
|
||||
public void getValues(SAMRecord read, Comparable[] comparable) {
|
||||
public void getValues( final SAMRecord read, final Comparable[] comparable, final BaseRecalibration.BaseRecalibrationType modelType ) {
|
||||
for(int iii = 0; iii < read.getReadLength(); iii++) {
|
||||
comparable[iii] = getValue(read, iii); // BUGBUG: this can be optimized
|
||||
}
|
||||
|
|
|
|||
|
|
@ -1,6 +1,7 @@
|
|||
package org.broadinstitute.sting.gatk.walkers.recalibration;
|
||||
|
||||
import net.sf.samtools.SAMRecord;
|
||||
import org.broadinstitute.sting.utils.recalibration.BaseRecalibration;
|
||||
|
||||
/*
|
||||
* Copyright (c) 2009 The Broad Institute
|
||||
|
|
@ -54,7 +55,7 @@ public class MappingQualityCovariate implements ExperimentalCovariate {
|
|||
}
|
||||
|
||||
@Override
|
||||
public void getValues(SAMRecord read, Comparable[] comparable) {
|
||||
public void getValues( final SAMRecord read, final Comparable[] comparable, final BaseRecalibration.BaseRecalibrationType modelType ) {
|
||||
for(int iii = 0; iii < read.getReadLength(); iii++) {
|
||||
comparable[iii] = getValue(read, iii); // BUGBUG: this can be optimized
|
||||
}
|
||||
|
|
|
|||
|
|
@ -1,6 +1,7 @@
|
|||
package org.broadinstitute.sting.gatk.walkers.recalibration;
|
||||
|
||||
import net.sf.samtools.SAMRecord;
|
||||
import org.broadinstitute.sting.utils.recalibration.BaseRecalibration;
|
||||
|
||||
/*
|
||||
* Copyright (c) 2009 The Broad Institute
|
||||
|
|
@ -63,7 +64,7 @@ public class MinimumNQSCovariate implements ExperimentalCovariate {
|
|||
}
|
||||
|
||||
@Override
|
||||
public void getValues(SAMRecord read, Comparable[] comparable) {
|
||||
public void getValues( final SAMRecord read, final Comparable[] comparable, final BaseRecalibration.BaseRecalibrationType modelType ) {
|
||||
for(int iii = 0; iii < read.getReadLength(); iii++) {
|
||||
comparable[iii] = getValue(read, iii); // BUGBUG: this can be optimized
|
||||
}
|
||||
|
|
|
|||
|
|
@ -1,6 +1,7 @@
|
|||
package org.broadinstitute.sting.gatk.walkers.recalibration;
|
||||
|
||||
import net.sf.samtools.SAMRecord;
|
||||
import org.broadinstitute.sting.utils.recalibration.BaseRecalibration;
|
||||
|
||||
/*
|
||||
* Copyright (c) 2009 The Broad Institute
|
||||
|
|
@ -53,7 +54,7 @@ public class PositionCovariate implements ExperimentalCovariate {
|
|||
}
|
||||
|
||||
@Override
|
||||
public void getValues(SAMRecord read, Comparable[] comparable) {
|
||||
public void getValues( final SAMRecord read, final Comparable[] comparable, final BaseRecalibration.BaseRecalibrationType modelType ) {
|
||||
for(int iii = 0; iii < read.getReadLength(); iii++) {
|
||||
comparable[iii] = getValue(read, iii); // BUGBUG: this can be optimized
|
||||
}
|
||||
|
|
|
|||
|
|
@ -1,6 +1,7 @@
|
|||
package org.broadinstitute.sting.gatk.walkers.recalibration;
|
||||
|
||||
import net.sf.samtools.SAMRecord;
|
||||
import org.broadinstitute.sting.utils.recalibration.BaseRecalibration;
|
||||
|
||||
/*
|
||||
* Copyright (c) 2009 The Broad Institute
|
||||
|
|
@ -59,7 +60,7 @@ public class PrimerRoundCovariate implements ExperimentalCovariate {
|
|||
}
|
||||
|
||||
@Override
|
||||
public void getValues(SAMRecord read, Comparable[] comparable) {
|
||||
public void getValues( final SAMRecord read, final Comparable[] comparable, final BaseRecalibration.BaseRecalibrationType modelType ) {
|
||||
for(int iii = 0; iii < read.getReadLength(); iii++) {
|
||||
comparable[iii] = getValue(read, iii); // BUGBUG: this can be optimized
|
||||
}
|
||||
|
|
|
|||
|
|
@ -1,6 +1,9 @@
|
|||
package org.broadinstitute.sting.gatk.walkers.recalibration;
|
||||
|
||||
import net.sf.samtools.SAMRecord;
|
||||
import org.broadinstitute.sting.utils.recalibration.BaseRecalibration;
|
||||
|
||||
import java.util.Arrays;
|
||||
|
||||
/*
|
||||
* Copyright (c) 2009 The Broad Institute
|
||||
|
|
@ -43,10 +46,15 @@ public class QualityScoreCovariate implements RequiredCovariate {
|
|||
}
|
||||
|
||||
@Override
|
||||
public void getValues(SAMRecord read, Comparable[] comparable) {
|
||||
byte[] baseQualities = read.getBaseQualities();
|
||||
for(int i = 0; i < read.getReadLength(); i++) {
|
||||
comparable[i] = (int) baseQualities[i];
|
||||
public void getValues( final SAMRecord read, final Comparable[] comparable, final BaseRecalibration.BaseRecalibrationType modelType ) {
|
||||
if( modelType == BaseRecalibration.BaseRecalibrationType.BASE_SUBSTITUTION ) {
|
||||
byte[] baseQualities = read.getBaseQualities();
|
||||
for(int i = 0; i < read.getReadLength(); i++) {
|
||||
comparable[i] = (int) baseQualities[i];
|
||||
}
|
||||
} else { // model == BASE_INSERTION || model == BASE_DELETION
|
||||
Arrays.fill(comparable, 45); // Some day in the future when base insertion and base deletion quals exist the samtools API will
|
||||
// be updated and the original quals will be pulled here, but for now we assume the original quality is a flat Q45
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -1,6 +1,7 @@
|
|||
package org.broadinstitute.sting.gatk.walkers.recalibration;
|
||||
|
||||
import net.sf.samtools.SAMRecord;
|
||||
import org.broadinstitute.sting.utils.recalibration.BaseRecalibration;
|
||||
|
||||
/*
|
||||
* Copyright (c) 2009 The Broad Institute
|
||||
|
|
@ -35,7 +36,7 @@ import net.sf.samtools.SAMRecord;
|
|||
* The Read Group covariate.
|
||||
*/
|
||||
|
||||
public class ReadGroupCovariate implements RequiredCovariate{
|
||||
public class ReadGroupCovariate implements RequiredCovariate {
|
||||
|
||||
public static final String defaultReadGroup = "DefaultReadGroup";
|
||||
|
||||
|
|
@ -45,7 +46,7 @@ public class ReadGroupCovariate implements RequiredCovariate{
|
|||
}
|
||||
|
||||
@Override
|
||||
public void getValues(SAMRecord read, Comparable[] comparable) {
|
||||
public void getValues( final SAMRecord read, final Comparable[] comparable, final BaseRecalibration.BaseRecalibrationType modelType ) {
|
||||
final String readGroupId = read.getReadGroup().getReadGroupId();
|
||||
for(int i = 0; i < read.getReadLength(); i++) {
|
||||
comparable[i] = readGroupId;
|
||||
|
|
|
|||
|
|
@ -33,6 +33,7 @@ import org.broadinstitute.sting.utils.Utils;
|
|||
import org.broadinstitute.sting.utils.collections.NestedHashMap;
|
||||
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
|
||||
import org.broadinstitute.sting.utils.exceptions.UserException;
|
||||
import org.broadinstitute.sting.utils.recalibration.BaseRecalibration;
|
||||
import org.broadinstitute.sting.utils.sam.AlignmentUtils;
|
||||
import org.broadinstitute.sting.utils.sam.GATKSAMReadGroupRecord;
|
||||
import org.broadinstitute.sting.utils.sam.GATKSAMRecord;
|
||||
|
|
@ -571,7 +572,7 @@ public class RecalDataManager {
|
|||
* value for the ith position in the read and the jth covariate in
|
||||
* reqeustedCovariates list.
|
||||
*/
|
||||
public static Comparable[][] computeCovariates(final GATKSAMRecord gatkRead, final List<Covariate> requestedCovariates) {
|
||||
public static Comparable[][] computeCovariates( final GATKSAMRecord gatkRead, final List<Covariate> requestedCovariates, final BaseRecalibration.BaseRecalibrationType modelType ) {
|
||||
//compute all covariates for this read
|
||||
final List<Covariate> requestedCovariatesRef = requestedCovariates;
|
||||
final int numRequestedCovariates = requestedCovariatesRef.size();
|
||||
|
|
@ -582,7 +583,7 @@ public class RecalDataManager {
|
|||
|
||||
// Loop through the list of requested covariates and compute the values of each covariate for all positions in this read
|
||||
for( int i = 0; i < numRequestedCovariates; i++ ) {
|
||||
requestedCovariatesRef.get(i).getValues( gatkRead, tempCovariateValuesHolder );
|
||||
requestedCovariatesRef.get(i).getValues( gatkRead, tempCovariateValuesHolder, modelType );
|
||||
for(int j = 0; j < readLength; j++) {
|
||||
//copy values into a 2D array that allows all covar types to be extracted at once for
|
||||
//an offset j by doing covariateValues_offset_x_covar[j]. This avoids the need to later iterate over covar types.
|
||||
|
|
|
|||
|
|
@ -39,6 +39,7 @@ import org.broadinstitute.sting.utils.classloader.PluginManager;
|
|||
import org.broadinstitute.sting.utils.collections.NestedHashMap;
|
||||
import org.broadinstitute.sting.utils.exceptions.DynamicClassResolutionException;
|
||||
import org.broadinstitute.sting.utils.exceptions.UserException;
|
||||
import org.broadinstitute.sting.utils.recalibration.BaseRecalibration;
|
||||
import org.broadinstitute.sting.utils.sam.GATKSAMRecord;
|
||||
import org.broadinstitute.sting.utils.text.TextFormattingUtils;
|
||||
import org.broadinstitute.sting.utils.text.XReadLines;
|
||||
|
|
@ -398,7 +399,7 @@ public class TableRecalibrationWalker extends ReadWalker<SAMRecord, SAMFileWrite
|
|||
|
||||
//compute all covariate values for this read
|
||||
final Comparable[][] covariateValues_offset_x_covar =
|
||||
RecalDataManager.computeCovariates(read, requestedCovariates);
|
||||
RecalDataManager.computeCovariates(read, requestedCovariates, BaseRecalibration.BaseRecalibrationType.BASE_SUBSTITUTION);
|
||||
|
||||
// For each base in the read
|
||||
for( int offset = 0; offset < read.getReadLength(); offset++ ) {
|
||||
|
|
|
|||
|
|
@ -177,13 +177,13 @@ public class BaseRecalibration {
|
|||
dataManager.addToAllTables( key, datum, QualityUtils.MIN_USABLE_Q_SCORE ); //BUGBUG: used to be Q5 now is Q6, probably doesn't matter
|
||||
}
|
||||
|
||||
public byte[] recalibrateRead( final GATKSAMRecord read, final byte[] originalQuals ) {
|
||||
public byte[] recalibrateRead( final GATKSAMRecord read, final byte[] originalQuals, final BaseRecalibrationType modelType ) {
|
||||
|
||||
final byte[] recalQuals = originalQuals.clone();
|
||||
|
||||
//compute all covariate values for this read
|
||||
final Comparable[][] covariateValues_offset_x_covar =
|
||||
RecalDataManager.computeCovariates(read, requestedCovariates);
|
||||
RecalDataManager.computeCovariates(read, requestedCovariates, modelType);
|
||||
|
||||
// For each base in the read
|
||||
for( int offset = 0; offset < read.getReadLength(); offset++ ) {
|
||||
|
|
|
|||
|
|
@ -27,6 +27,7 @@ package org.broadinstitute.sting.utils.sam;
|
|||
import net.sf.samtools.*;
|
||||
import org.broadinstitute.sting.gatk.GenomeAnalysisEngine;
|
||||
import org.broadinstitute.sting.utils.NGSPlatform;
|
||||
import org.broadinstitute.sting.utils.recalibration.BaseRecalibration;
|
||||
|
||||
import java.util.Arrays;
|
||||
import java.util.HashMap;
|
||||
|
|
@ -194,7 +195,7 @@ public class GATKSAMRecord extends BAMRecord {
|
|||
// if the recal data was populated in the engine then recalibrate the quality scores on the fly
|
||||
// else give default values which are flat Q45
|
||||
if( GenomeAnalysisEngine.hasBaseRecalibration() ) {
|
||||
quals = GenomeAnalysisEngine.getBaseRecalibration().recalibrateRead( this, quals ); // the original quals here are the flat base insertion/deletion quals, NOT the original base qualities
|
||||
quals = GenomeAnalysisEngine.getBaseRecalibration().recalibrateRead( this, quals, BaseRecalibration.BaseRecalibrationType.BASE_INSERTION ); // the original quals here are the flat base insertion/deletion quals, NOT the original base qualities
|
||||
}
|
||||
// add the qual array to the read so that we don't have to do the recalibration work again
|
||||
setAttribute( BQSR_BASE_INSERTION_QUALITIES, quals );
|
||||
|
|
@ -210,7 +211,7 @@ public class GATKSAMRecord extends BAMRecord {
|
|||
// if the recal data was populated in the engine then recalibrate the quality scores on the fly
|
||||
// else give default values which are flat Q45
|
||||
if( GenomeAnalysisEngine.hasBaseRecalibration() ) {
|
||||
quals = GenomeAnalysisEngine.getBaseRecalibration().recalibrateRead( this, quals ); // the original quals here are the flat base insertion/deletion quals, NOT the original base qualities
|
||||
quals = GenomeAnalysisEngine.getBaseRecalibration().recalibrateRead( this, quals, BaseRecalibration.BaseRecalibrationType.BASE_DELETION ); // the original quals here are the flat base insertion/deletion quals, NOT the original base qualities
|
||||
}
|
||||
// add the qual array to the read so that we don't have to do the recalibration work again
|
||||
setAttribute( BQSR_BASE_DELETION_QUALITIES, quals );
|
||||
|
|
|
|||
Loading…
Reference in New Issue