TableRecalibration is now much smarter about hashing calculations, taking advantage of the sequential recalibration formulation. Instead of hashing RecalDatums it hashes the empirical quality score itself. This cuts the runtime by 20 percent. TableRecalibration also now skips over reads with zero mapping quality (outputs them to the new bam but doesn't touch their base quality scores).
git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@2069 348d0f76-0448-11de-a6fe-93d51630548a
This commit is contained in:
parent
be31d7f4cc
commit
f0a234ab29
|
|
@ -51,7 +51,7 @@ public class DinucCovariate implements Covariate {
|
|||
dinucHashMap = new HashMap<Integer, Dinuc>();
|
||||
for(byte byte1 : BASES) {
|
||||
for(byte byte2: BASES) {
|
||||
dinucHashMap.put( Dinuc.hashBytes(byte1, byte2), new Dinuc(byte1, byte2) );
|
||||
dinucHashMap.put( Dinuc.hashBytes(byte1, byte2), new Dinuc(byte1, byte2) ); // This might seem silly, but Strings are too slow
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -1,6 +1,5 @@
|
|||
package org.broadinstitute.sting.playground.gatk.walkers.Recalibration;
|
||||
|
||||
import org.broadinstitute.sting.utils.StingException;
|
||||
import org.broadinstitute.sting.utils.QualityUtils;
|
||||
|
||||
import java.util.*;
|
||||
|
|
@ -43,98 +42,134 @@ public class RecalDataManager {
|
|||
private NHashMap<RecalDatum> dataCollapsedReadGroup; // table where everything except read group has been collapsed
|
||||
private NHashMap<RecalDatum> dataCollapsedQualityScore; // table where everything except read group and quality score has been collapsed
|
||||
private ArrayList<NHashMap<RecalDatum>> dataCollapsedByCovariate; // tables where everything except read group, quality score, and given covariate has been collapsed
|
||||
private boolean collapsedTablesCreated;
|
||||
public NHashMap<Double> dataSumExpectedErrors; // table used to calculate the overall aggregate quality score in which everything except read group is collapsed
|
||||
|
||||
private NHashMap<Double> dataCollapsedReadGroupDouble; // table of empirical qualities where everything except read group has been collapsed
|
||||
private NHashMap<Double> dataCollapsedQualityScoreDouble; // table of empirical qualities where everything except read group and quality score has been collapsed
|
||||
private ArrayList<NHashMap<Double>> dataCollapsedByCovariateDouble; // table of empirical qualities where everything except read group, quality score, and given covariate has been collapsed
|
||||
|
||||
|
||||
public final static String ORIGINAL_QUAL_ATTRIBUTE_TAG = "OQ"; // the tag that holds the original quality scores
|
||||
public final static String COLOR_SPACE_QUAL_ATTRIBUTE_TAG = "CQ"; // the tag that holds the color space quality scores for SOLID bams
|
||||
|
||||
RecalDataManager() {
|
||||
data = new NHashMap<RecalDatum>();
|
||||
collapsedTablesCreated = false;
|
||||
}
|
||||
|
||||
RecalDataManager( final int estimatedCapacity, final boolean createCollapsedTables, final int numCovariates ) {
|
||||
if( createCollapsedTables ) { // initialize all the collapsed tables
|
||||
dataCollapsedReadGroup = new NHashMap<RecalDatum>();
|
||||
dataCollapsedQualityScore = new NHashMap<RecalDatum>();
|
||||
dataCollapsedByCovariate = new ArrayList<NHashMap<RecalDatum>>();
|
||||
for( int iii = 0; iii < numCovariates - 2; iii++ ) { // readGroup and QualityScore aren't counted here, their tables are separate
|
||||
dataCollapsedByCovariate.add( new NHashMap<RecalDatum>() );
|
||||
}
|
||||
dataSumExpectedErrors = new NHashMap<Double>();
|
||||
} else {
|
||||
data = new NHashMap<RecalDatum>( estimatedCapacity, 0.8f);
|
||||
}
|
||||
}
|
||||
|
||||
RecalDataManager( int estimatedCapacity ) {
|
||||
RecalDataManager( final int estimatedCapacity ) {
|
||||
data = new NHashMap<RecalDatum>( estimatedCapacity, 0.8f ); // second arg is the 'loading factor',
|
||||
// a number to monkey around with when optimizing performace of the HashMap
|
||||
collapsedTablesCreated = false;
|
||||
}
|
||||
|
||||
// BUGBUG: A lot going on in this method, doing a lot of pre-calculations for use in the sequential mode calculation later in TableRecalibrationWalker
|
||||
/**
|
||||
* Create all the collapsed tables that will be used in the sequential calculation in TableRecalibrationWalker
|
||||
* @param numCovariates The number of covariates you have determines the number of tables to create
|
||||
* Add the given mapping to all of the collapsed hash tables
|
||||
* @param key The list of comparables that is the key for this mapping
|
||||
* @param fullDatum The RecalDatum which is the data for this mapping
|
||||
*/
|
||||
public final void createCollapsedTables( final int numCovariates ) {
|
||||
dataCollapsedReadGroup = new NHashMap<RecalDatum>();
|
||||
dataCollapsedQualityScore = new NHashMap<RecalDatum>();
|
||||
dataCollapsedByCovariate = new ArrayList<NHashMap<RecalDatum>>();
|
||||
for( int iii = 0; iii < numCovariates - 2; iii++ ) { // readGroup and QualityScore aren't counted here, their tables are separate
|
||||
dataCollapsedByCovariate.add( new NHashMap<RecalDatum>() );
|
||||
public final void addToAllTables( final List<? extends Comparable> key, final RecalDatum fullDatum ) {
|
||||
|
||||
// The full dataset isn't actually ever used for anything because of the sequential calculation so no need to keep the full data HashMap around
|
||||
//data.put(key, thisDatum); // add the mapping to the main table
|
||||
|
||||
// create dataCollapsedReadGroup, the table where everything except read group has been collapsed
|
||||
ArrayList<Comparable> newKey = new ArrayList<Comparable>();
|
||||
newKey.add( key.get(0) ); // make a new key with just the read group
|
||||
RecalDatum collapsedDatum = dataCollapsedReadGroup.get( newKey );
|
||||
if( collapsedDatum == null ) {
|
||||
dataCollapsedReadGroup.put( newKey, new RecalDatum(fullDatum) );
|
||||
} else {
|
||||
collapsedDatum.increment(fullDatum);
|
||||
}
|
||||
dataSumExpectedErrors = new NHashMap<Double>();
|
||||
|
||||
// preallocate for use in for loops below
|
||||
RecalDatum thisDatum;
|
||||
RecalDatum collapsedDatum;
|
||||
List<? extends Comparable> key;
|
||||
ArrayList<Comparable> newKey;
|
||||
Double sumExpectedErrors;
|
||||
// create dataSumExpectedErrors, the table used to calculate the overall aggregate quality score in which everything except read group is collapsed
|
||||
newKey = new ArrayList<Comparable>();
|
||||
newKey.add( key.get(0) ); // make a new key with just the read group
|
||||
Double sumExpectedErrors = dataSumExpectedErrors.get( newKey );
|
||||
if( sumExpectedErrors == null ) {
|
||||
dataSumExpectedErrors.put( newKey, 0.0 );
|
||||
} else {
|
||||
dataSumExpectedErrors.remove( newKey );
|
||||
sumExpectedErrors += QualityUtils.qualToErrorProb(Byte.parseByte(key.get(1).toString())) * fullDatum.getNumObservations();
|
||||
dataSumExpectedErrors.put( newKey, sumExpectedErrors );
|
||||
}
|
||||
|
||||
// for every data point in the map
|
||||
for( Map.Entry<List<? extends Comparable>,RecalDatum> entry : data.entrySet() ) {
|
||||
thisDatum = entry.getValue();
|
||||
key = entry.getKey();
|
||||
|
||||
// create dataCollapsedReadGroup, the table where everything except read group has been collapsed
|
||||
newKey = new ArrayList<Comparable>();
|
||||
// create dataCollapsedQuality, the table where everything except read group and quality score has been collapsed
|
||||
newKey.add( key.get(0) ); // make a new key with the read group ...
|
||||
newKey.add( key.get(1) ); // and quality score
|
||||
collapsedDatum = dataCollapsedQualityScore.get( newKey );
|
||||
if( collapsedDatum == null ) {
|
||||
dataCollapsedQualityScore.put( newKey, new RecalDatum(fullDatum) );
|
||||
} else {
|
||||
collapsedDatum.increment(fullDatum);
|
||||
}
|
||||
|
||||
// create dataCollapsedByCovariate's, the tables where everything except read group, quality score, and given covariate has been collapsed
|
||||
for( int iii = 0; iii < dataCollapsedByCovariate.size(); iii++ ) { // readGroup and QualityScore aren't counted
|
||||
newKey = new ArrayList<Comparable>();
|
||||
newKey.add( key.get(0) ); // make a new key with just the read group
|
||||
collapsedDatum = dataCollapsedReadGroup.get( newKey );
|
||||
if( collapsedDatum == null ) {
|
||||
dataCollapsedReadGroup.put( newKey, new RecalDatum( thisDatum ) );
|
||||
} else {
|
||||
collapsedDatum.increment( thisDatum );
|
||||
}
|
||||
|
||||
// create dataSumExpectedErrors, the table used to calculate the overall aggregate quality score in which everything except read group is collapsed
|
||||
newKey = new ArrayList<Comparable>();
|
||||
newKey.add( key.get(0) ); // make a new key with just the read group
|
||||
sumExpectedErrors = dataSumExpectedErrors.get( newKey );
|
||||
if( sumExpectedErrors == null ) {
|
||||
dataSumExpectedErrors.put( newKey, 0.0 );
|
||||
} else {
|
||||
dataSumExpectedErrors.remove( newKey );
|
||||
sumExpectedErrors += QualityUtils.qualToErrorProb(Byte.parseByte(key.get(1).toString())) * thisDatum.getNumObservations();
|
||||
dataSumExpectedErrors.put( newKey, sumExpectedErrors );
|
||||
}
|
||||
|
||||
newKey = new ArrayList<Comparable>();
|
||||
// create dataCollapsedQuality, the table where everything except read group and quality score has been collapsed
|
||||
newKey.add( key.get(0) ); // make a new key with the read group ...
|
||||
newKey.add( key.get(1) ); // and quality score
|
||||
collapsedDatum = dataCollapsedQualityScore.get( newKey );
|
||||
newKey.add( key.get(1) ); // and quality score ...
|
||||
newKey.add( key.get(iii + 2) ); // and the given covariate
|
||||
collapsedDatum = dataCollapsedByCovariate.get(iii).get( newKey );
|
||||
if( collapsedDatum == null ) {
|
||||
dataCollapsedQualityScore.put( newKey, new RecalDatum( thisDatum ) );
|
||||
dataCollapsedByCovariate.get(iii).put( newKey, new RecalDatum(fullDatum) );
|
||||
} else {
|
||||
collapsedDatum.increment( thisDatum );
|
||||
collapsedDatum.increment(fullDatum);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// create dataCollapsedByCovariate's, the tables where everything except read group, quality score, and given covariate has been collapsed
|
||||
for( int iii = 0; iii < numCovariates - 2; iii++ ) { // readGroup and QualityScore aren't counted
|
||||
newKey = new ArrayList<Comparable>();
|
||||
newKey.add( key.get(0) ); // make a new key with the read group ...
|
||||
newKey.add( key.get(1) ); // and quality score ...
|
||||
newKey.add( key.get(iii + 2) ); // and the given covariate
|
||||
collapsedDatum = dataCollapsedByCovariate.get(iii).get( newKey );
|
||||
if( collapsedDatum == null ) {
|
||||
dataCollapsedByCovariate.get(iii).put( newKey, new RecalDatum( thisDatum ) );
|
||||
} else {
|
||||
collapsedDatum.increment( thisDatum );
|
||||
}
|
||||
/**
|
||||
* Loop over all the collapsed tables and turn the recalDatums found there into an empricial quality score
|
||||
* that will be used in the sequential calculation in TableRecalibrationWalker
|
||||
* @param numCovariates The number of covariates you have determines the number of tables to create
|
||||
* @param smoothing The smoothing paramter that goes into empirical quality score calculation
|
||||
*/
|
||||
public final void generateEmpiricalQualities( final int numCovariates, final int smoothing ) {
|
||||
|
||||
dataCollapsedReadGroupDouble = new NHashMap<Double>();
|
||||
dataCollapsedQualityScoreDouble = new NHashMap<Double>();
|
||||
dataCollapsedByCovariateDouble = new ArrayList<NHashMap<Double>>();
|
||||
for( int iii = 0; iii < numCovariates - 2; iii++ ) { // readGroup and QualityScore aren't counted here, their tables are separate
|
||||
dataCollapsedByCovariateDouble.add( new NHashMap<Double>() );
|
||||
}
|
||||
|
||||
// Hash the empirical quality scores so we don't have to call Math.log at every base for every read
|
||||
// Looping over the entrySet is really expensive but worth it
|
||||
for( Map.Entry<List<? extends Comparable>,RecalDatum> entry : dataCollapsedReadGroup.entrySet() ) {
|
||||
dataCollapsedReadGroupDouble.put( entry.getKey(), entry.getValue().empiricalQualDouble( smoothing ));
|
||||
}
|
||||
for( Map.Entry<List<? extends Comparable>,RecalDatum> entry : dataCollapsedQualityScore.entrySet() ) {
|
||||
dataCollapsedQualityScoreDouble.put( entry.getKey(), entry.getValue().empiricalQualDouble( smoothing ));
|
||||
}
|
||||
for( int iii = 0; iii < numCovariates - 2; iii++ ) {
|
||||
for( Map.Entry<List<? extends Comparable>,RecalDatum> entry : dataCollapsedByCovariate.get(iii).entrySet() ) {
|
||||
dataCollapsedByCovariateDouble.get(iii).put( entry.getKey(), entry.getValue().empiricalQualDouble( smoothing ));
|
||||
}
|
||||
}
|
||||
|
||||
collapsedTablesCreated = true;
|
||||
dataCollapsedQualityScore.clear();
|
||||
dataCollapsedByCovariate.clear();
|
||||
dataCollapsedQualityScore = null; // will never need this again
|
||||
dataCollapsedByCovariate = null; // will never need this again
|
||||
if( data!=null ) {
|
||||
data.clear();
|
||||
data = null; // will never need this again
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
|
|
@ -143,10 +178,6 @@ public class RecalDataManager {
|
|||
* @return The desired collapsed HashMap
|
||||
*/
|
||||
public final NHashMap<RecalDatum> getCollapsedTable( final int covariate ) {
|
||||
if( !collapsedTablesCreated ) {
|
||||
throw new StingException("Trying to get collapsed tables before they have been populated. Null pointers abound.");
|
||||
}
|
||||
|
||||
if( covariate == 0) {
|
||||
return dataCollapsedReadGroup; // table where everything except read group has been collapsed
|
||||
} else if( covariate == 1 ) {
|
||||
|
|
@ -155,4 +186,19 @@ public class RecalDataManager {
|
|||
return dataCollapsedByCovariate.get( covariate - 2 ); // table where everything except read group, quality score, and given covariate has been collapsed
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Get the appropriate collapsed table of emprical quality out of the set of all the tables held by this Object
|
||||
* @param covariate Which covariate indexes the desired collapsed NHashMap<Double>
|
||||
* @return The desired collapsed NHashMap<Double>
|
||||
*/
|
||||
public final NHashMap<Double> getCollapsedDoubleTable( final int covariate ) {
|
||||
if( covariate == 0) {
|
||||
return dataCollapsedReadGroupDouble; // table of empirical qualities where everything except read group has been collapsed
|
||||
} else if( covariate == 1 ) {
|
||||
return dataCollapsedQualityScoreDouble; // table of empirical qualities where everything except read group and quality score has been collapsed
|
||||
} else {
|
||||
return dataCollapsedByCovariateDouble.get( covariate - 2 ); // table of empirical qualities where everything except read group, quality score, and given covariate has been collapsed
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -86,8 +86,8 @@ public class RecalDatum {
|
|||
}
|
||||
}
|
||||
|
||||
public final void increment( final char curBase, final char ref ) {
|
||||
increment( 1, BaseUtils.simpleBaseToBaseIndex(curBase) == BaseUtils.simpleBaseToBaseIndex(ref) ? 0 : 1 ); // increment takes num observations, then num mismatches
|
||||
public final void increment( final char curBase, final char refBase ) {
|
||||
increment( 1, BaseUtils.simpleBaseToBaseIndex(curBase) == BaseUtils.simpleBaseToBaseIndex(refBase) ? 0 : 1 ); // increment takes num observations, then num mismatches
|
||||
}
|
||||
|
||||
//---------------------------------------------------------------------------------------------------------------
|
||||
|
|
@ -100,7 +100,7 @@ public class RecalDatum {
|
|||
double doubleMismatches = (double) ( numMismatches + smoothing );
|
||||
double doubleObservations = (double) ( numObservations + smoothing );
|
||||
double empiricalQual = -10 * Math.log10(doubleMismatches / doubleObservations);
|
||||
if (empiricalQual > QualityUtils.MAX_REASONABLE_Q_SCORE) empiricalQual = QualityUtils.MAX_REASONABLE_Q_SCORE;
|
||||
if (empiricalQual > QualityUtils.MAX_REASONABLE_Q_SCORE) { empiricalQual = QualityUtils.MAX_REASONABLE_Q_SCORE; }
|
||||
return empiricalQual;
|
||||
}
|
||||
public final double empiricalQualDouble() { return empiricalQualDouble( 0 ); } // 'default' behavior is to use smoothing value of zero
|
||||
|
|
|
|||
|
|
@ -74,7 +74,7 @@ public class TableRecalibrationWalker extends ReadWalker<SAMRecord, SAMFileWrite
|
|||
@Argument(fullName = "window_size_nqs", shortName="nqs", doc="How big of a window should the MinimumNQSCovariate use for its calculation", required=false)
|
||||
private int WINDOW_SIZE = 3;
|
||||
@Argument(fullName="smoothing", shortName="sm", required = false, doc="Number of imaginary counts to add to each bin in order to smooth out bins with few data points")
|
||||
public int SMOOTHING = 1;
|
||||
private int SMOOTHING = 1;
|
||||
|
||||
//public enum RecalibrationMode {
|
||||
// COMBINATORIAL,
|
||||
|
|
@ -83,11 +83,11 @@ public class TableRecalibrationWalker extends ReadWalker<SAMRecord, SAMFileWrite
|
|||
//}
|
||||
|
||||
//@Argument(fullName="recalibrationMode", shortName="mode", doc="Which calculation to use when recalibrating, default is SEQUENTIAL", required=false)
|
||||
public String MODE_STRING = "SEQUENTIAL";
|
||||
private String MODE_STRING = "SEQUENTIAL";
|
||||
//public RecalibrationMode MODE = RecalibrationMode.SEQUENTIAL; //BUGBUG: do we need to support the other modes?
|
||||
|
||||
protected RecalDataManager dataManager;
|
||||
protected ArrayList<Covariate> requestedCovariates;
|
||||
private RecalDataManager dataManager;
|
||||
private ArrayList<Covariate> requestedCovariates;
|
||||
|
||||
private static Pattern COMMENT_PATTERN = Pattern.compile("^#.*");
|
||||
private static Pattern COVARIATE_PATTERN = Pattern.compile("^@!.*");
|
||||
|
|
@ -162,10 +162,10 @@ public class TableRecalibrationWalker extends ReadWalker<SAMRecord, SAMFileWrite
|
|||
} else { // found some data
|
||||
if( !foundAllCovariates ) {
|
||||
foundAllCovariates = true;
|
||||
logger.info( "The covariates being used here: " );
|
||||
logger.info( requestedCovariates );
|
||||
if(estimatedCapacity > 300 * 40 * 200 * 16) { estimatedCapacity = 300 * 40 * 200 * 16; } // Don't want to crash with out of heap space exception
|
||||
dataManager = new RecalDataManager( estimatedCapacity );
|
||||
final boolean createCollapsedTables = true;
|
||||
// Initialize the data hashMaps
|
||||
dataManager = new RecalDataManager( estimatedCapacity, createCollapsedTables, requestedCovariates.size() );
|
||||
|
||||
}
|
||||
addCSVData(line); // parse the line and add the data to the HashMap
|
||||
|
|
@ -179,10 +179,13 @@ public class TableRecalibrationWalker extends ReadWalker<SAMRecord, SAMFileWrite
|
|||
}
|
||||
logger.info( "...done!" );
|
||||
|
||||
logger.info( "The covariates being used here: " );
|
||||
logger.info( requestedCovariates );
|
||||
|
||||
// Create the collapsed tables that are used in the sequential calculation
|
||||
if( MODE_STRING.equalsIgnoreCase("SEQUENTIAL") ) {
|
||||
logger.info( "Creating collapsed tables for use in sequential calculation..." );
|
||||
dataManager.createCollapsedTables( requestedCovariates.size() );
|
||||
logger.info( "Generating tables of empirical qualities for use in sequential calculation..." );
|
||||
dataManager.generateEmpiricalQualities( requestedCovariates.size(), SMOOTHING );
|
||||
logger.info( "...done!" );
|
||||
}
|
||||
}
|
||||
|
|
@ -201,7 +204,8 @@ public class TableRecalibrationWalker extends ReadWalker<SAMRecord, SAMFileWrite
|
|||
key.add( cov.getValue( vals[iii] ) );
|
||||
}
|
||||
RecalDatum datum = new RecalDatum( Long.parseLong( vals[iii] ), Long.parseLong( vals[iii + 1] ) );
|
||||
dataManager.data.put( key, datum );
|
||||
dataManager.addToAllTables( key, datum );
|
||||
|
||||
}
|
||||
|
||||
//---------------------------------------------------------------------------------------------------------------
|
||||
|
|
@ -218,12 +222,12 @@ public class TableRecalibrationWalker extends ReadWalker<SAMRecord, SAMFileWrite
|
|||
*/
|
||||
public SAMRecord map( char[] refBases, SAMRecord read ) {
|
||||
|
||||
// WARNING: refBases is always null because this walker doesn't have @REQUIRES({DataSource.REFERENCE_BASES})
|
||||
// WARNING: refBases is always null because this walker doesn't have @Requires({DataSource.REFERENCE_BASES})
|
||||
// This is done in order to speed up the code
|
||||
|
||||
// if( read.getMappingQuality() <= 0 ) {
|
||||
// return read; // early return here, unmapped reads and mapping quality zero reads should be left alone
|
||||
// }
|
||||
if( read.getMappingQuality() <= 0 ) {
|
||||
return read; // early return here, unmapped reads and mapping quality zero reads should be left alone
|
||||
}
|
||||
|
||||
byte[] originalQuals = read.getBaseQualities();
|
||||
// Check if we need to use the original quality scores instead
|
||||
|
|
@ -253,21 +257,19 @@ public class TableRecalibrationWalker extends ReadWalker<SAMRecord, SAMFileWrite
|
|||
key.add( covariate.getValue( read, iii, readGroup, originalQuals, bases ) ); // offset is zero based so passing iii is correct here
|
||||
}
|
||||
|
||||
if( MODE_STRING.equalsIgnoreCase("COMBINATORIAL") ) {
|
||||
RecalDatum datum = dataManager.data.get( key );
|
||||
if( datum != null ) { // if we have data for this combination of covariates then recalibrate the quality score otherwise do nothing
|
||||
recalQuals[iii] = datum.empiricalQualByte( SMOOTHING );
|
||||
}
|
||||
} else if( MODE_STRING.equalsIgnoreCase("SEQUENTIAL") ) {
|
||||
recalQuals[iii] = performSequentialQualityCalculation( key );
|
||||
} else {
|
||||
throw new StingException( "Specified RecalibrationMode is not supported: " + MODE_STRING );
|
||||
}
|
||||
recalQuals[iii] = performSequentialQualityCalculation( key );
|
||||
|
||||
// Do some error checking on the new quality score
|
||||
if ( recalQuals[iii] <= 0 || recalQuals[iii] > QualityUtils.MAX_REASONABLE_Q_SCORE ) {
|
||||
throw new StingException( "Assigning bad quality score " + key + " => " + recalQuals[iii] );
|
||||
}
|
||||
//if( MODE_STRING.equalsIgnoreCase("COMBINATORIAL") ) { // BUGBUG: This isn't supported. No need to keep the full data hashmap around so it was removed for major speed up
|
||||
// //RecalDatum datum = dataManager.data.get( key );
|
||||
// //if( datum != null ) { // if we have data for this combination of covariates then recalibrate the quality score otherwise do nothing
|
||||
// // recalQuals[iii] = datum.empiricalQualByte( SMOOTHING );
|
||||
// //}
|
||||
// throw new StingException("The Combinatorial mode isn't supported.");
|
||||
//} else if( MODE_STRING.equalsIgnoreCase("SEQUENTIAL") ) {
|
||||
//
|
||||
//} else {
|
||||
// throw new StingException( "Specified RecalibrationMode is not supported: " + MODE_STRING );
|
||||
//}
|
||||
}
|
||||
|
||||
preserveQScores( originalQuals, recalQuals ); // overwrite the work done if original quality score is too low
|
||||
|
|
@ -303,42 +305,40 @@ public class TableRecalibrationWalker extends ReadWalker<SAMRecord, SAMFileWrite
|
|||
*/
|
||||
private byte performSequentialQualityCalculation( List<? extends Comparable> key ) {
|
||||
|
||||
byte qualFromRead = Byte.parseByte(key.get(1).toString());
|
||||
ArrayList<Comparable> newKey;
|
||||
String readGroupKeyElement = key.get(0).toString();
|
||||
int qualityScoreKeyElement = Integer.parseInt(key.get(1).toString());
|
||||
byte qualFromRead = (byte)qualityScoreKeyElement;
|
||||
ArrayList<Comparable> newKey = new ArrayList<Comparable>();
|
||||
|
||||
// The global quality shift (over the read group only)
|
||||
newKey = new ArrayList<Comparable>();
|
||||
newKey.add( key.get(0) ); // read group
|
||||
newKey.add( readGroupKeyElement );
|
||||
RecalDatum globalDeltaQDatum = dataManager.getCollapsedTable(0).get( newKey );
|
||||
Double globalDeltaQEmpirical = dataManager.getCollapsedDoubleTable(0).get( newKey );
|
||||
double globalDeltaQ = 0.0;
|
||||
double aggregrateQreported = 0.0;
|
||||
if( globalDeltaQDatum != null ) {
|
||||
aggregrateQreported = QualityUtils.phredScaleErrorRate( dataManager.dataSumExpectedErrors.get( newKey ) / ((double) globalDeltaQDatum.getNumObservations()) );
|
||||
globalDeltaQ = globalDeltaQDatum.empiricalQualDouble( SMOOTHING ) - aggregrateQreported;
|
||||
aggregrateQreported = QualityUtils.phredScaleErrorRate( dataManager.dataSumExpectedErrors.get( newKey ) / ((double) globalDeltaQDatum.getNumObservations()) );
|
||||
globalDeltaQ = globalDeltaQEmpirical - aggregrateQreported;
|
||||
}
|
||||
|
||||
|
||||
// The shift in quality between reported and empirical
|
||||
newKey = new ArrayList<Comparable>();
|
||||
newKey.add( key.get(0) ); // read group
|
||||
newKey.add( key.get(1) ); // quality score
|
||||
RecalDatum deltaQReportedDatum = dataManager.getCollapsedTable(1).get( newKey );
|
||||
newKey.add( qualityScoreKeyElement );
|
||||
Double deltaQReportedEmpirical = dataManager.getCollapsedDoubleTable(1).get( newKey );
|
||||
double deltaQReported = 0.0;
|
||||
if( deltaQReportedDatum != null ) {
|
||||
deltaQReported = deltaQReportedDatum.empiricalQualDouble( SMOOTHING ) - qualFromRead - globalDeltaQ;
|
||||
if( deltaQReportedEmpirical != null ) {
|
||||
deltaQReported = deltaQReportedEmpirical - qualFromRead - globalDeltaQ;
|
||||
}
|
||||
|
||||
// The shift in quality due to each covariate by itself in turn
|
||||
double deltaQCovariates = 0.0;
|
||||
RecalDatum deltaQCovariateDatum;
|
||||
Double deltaQCovariateEmpirical;
|
||||
for( int iii = 2; iii < key.size(); iii++ ) {
|
||||
newKey = new ArrayList<Comparable>();
|
||||
newKey.add( key.get(0) ); // read group
|
||||
newKey.add( key.get(1) ); // quality score
|
||||
newKey.add( key.get(iii) ); // given covariate
|
||||
deltaQCovariateDatum = dataManager.getCollapsedTable(iii).get( newKey );
|
||||
if( deltaQCovariateDatum != null ) {
|
||||
deltaQCovariates += ( deltaQCovariateDatum.empiricalQualDouble( SMOOTHING ) - qualFromRead - (globalDeltaQ + deltaQReported) );
|
||||
newKey.add( key.get(iii) ); // the given covariate
|
||||
deltaQCovariateEmpirical = dataManager.getCollapsedDoubleTable(iii).get( newKey );
|
||||
if( deltaQCovariateEmpirical != null ) {
|
||||
deltaQCovariates += ( deltaQCovariateEmpirical - qualFromRead - (globalDeltaQ + deltaQReported) );
|
||||
}
|
||||
newKey.remove( 2 ); // this new covariate is always added in at position 2 in the newKey list
|
||||
}
|
||||
|
||||
double newQuality = qualFromRead + globalDeltaQ + deltaQReported + deltaQCovariates;
|
||||
|
|
|
|||
Loading…
Reference in New Issue