Sequential quality score calculation is now in place in the refactored recalibrator and matches the quality scores calculated by the old recalibrator exactly; at least on the small sets of data used so far. Validation, documentation, and optimization work is on going.

git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@1985 348d0f76-0448-11de-a6fe-93d51630548a
This commit is contained in:
rpoplin 2009-11-07 15:55:16 +00:00
parent bf1bc94060
commit 84ba604611
8 changed files with 25 additions and 23 deletions

View File

@ -126,7 +126,7 @@ public class CovariateCounterWalker extends LocusWalker<Integer, PrintStream> {
private void updateDataFromRead(SAMRecord read, int offset, ReferenceContext ref) {
ArrayList<Comparable<?>> key = new ArrayList<Comparable<?>>();
List<Comparable<?>> key = new ArrayList<Comparable<?>>();
Comparable<?> keyElement; // preallocate for use in for loop below
boolean badKey = false;
for( Covariate covariate : requestedCovariates ) {

View File

@ -21,7 +21,7 @@ public class CycleCovariate implements Covariate {
public Comparable<?> getValue(SAMRecord read, int offset, char[] refBases) {
//BUGBUG: assumes Solexia platform
int cycle = offset;
Integer cycle = offset;
if( read.getReadNegativeStrandFlag() ) {
cycle = read.getReadLength() - (offset + 1);
}

View File

@ -13,7 +13,7 @@ public class MappingQualityCovariate implements Covariate {
}
public Comparable<?> getValue(SAMRecord read, int offset, char[] refBases) {
return read.getMappingQuality();
return (Integer)(read.getMappingQuality());
}
public Comparable<?> getValue(String str) {

View File

@ -32,7 +32,7 @@ public class MinimumNQSCovariate implements Covariate {
}
}
int minQual = quals[0];
Integer minQual = (int)(quals[0]);
for ( int qual : quals ) {
if( qual < minQual ) {
minQual = qual;

View File

@ -32,7 +32,7 @@ public class QualityScoreCovariate implements Covariate {
}
}
return quals[offset];
return ((Integer)((int)quals[offset]));
}
public Comparable<?> getValue(String str) {

View File

@ -56,7 +56,7 @@ public class RecalDataManager {
} else {
collapsedDatum.increment( thisDatum );
}
newKey = new ArrayList<Comparable<?>>();
newKey.add( key.get(0) ); // make a new key with just the read group
sumExpectedErrors = dataSumExpectedErrors.get( newKey );
@ -75,6 +75,7 @@ public class RecalDataManager {
newKey.add( key.get(1) ); // and quality score
collapsedDatum = dataCollapsedQualityScore.get( newKey );
if( collapsedDatum == null ) {
//System.out.println("Added: " + newKey + " " + newKey.hashCode());
dataCollapsedQualityScore.put( newKey, new RecalDatum( thisDatum ) );
} else {
collapsedDatum.increment( thisDatum );
@ -85,7 +86,7 @@ public class RecalDataManager {
newKey = new ArrayList<Comparable<?>>();
newKey.add( key.get(0) ); // make a new key with the read group ...
newKey.add( key.get(1) ); // and quality score ...
newKey.add( key.get(iii) ); // and the given covariate
newKey.add( key.get(iii + 2) ); // and the given covariate
collapsedDatum = dataCollapsedByCovariate.get(iii).get( newKey );
if( collapsedDatum == null ) {
dataCollapsedByCovariate.get(iii).put( newKey, new RecalDatum( thisDatum ) );

View File

@ -104,4 +104,8 @@ public class RecalDatum {
public Long getNumObservations() {
return numObservations;
}
public String toString() {
return String.format( "RecalDatum: %d,%d,%d", numObservations, numMismatches, (int)empiricalQualByte() );
}
}

View File

@ -43,8 +43,8 @@ public class TableRecalibrationWalker extends ReadWalker<SAMRecord, SAMFileWrite
public String MODE_STRING = RecalibrationMode.SEQUENTIAL.toString();
public RecalibrationMode MODE = RecalibrationMode.SEQUENTIAL; //BUGBUG: need some code here to set this properly
protected static RecalDataManager dataManager;
protected static ArrayList<Covariate> requestedCovariates;
protected RecalDataManager dataManager;
protected ArrayList<Covariate> requestedCovariates;
private static Pattern COVARIATE_PATTERN = Pattern.compile("^@!.*");
public final static String ORIGINAL_QUAL_ATTRIBUTE_TAG = "OQ";
@ -117,11 +117,13 @@ public class TableRecalibrationWalker extends ReadWalker<SAMRecord, SAMFileWrite
dataManager.createCollapsedTables( requestedCovariates.size() );
out.println( "...done!" );
}
//System.out.println(dataManager.getCollapsedTable(1));
}
private void addCSVData(String line) {
String[] vals = line.split(",");
List<Comparable<?>> key = new ArrayList<Comparable<?>>();
ArrayList<Comparable<?>> key = new ArrayList<Comparable<?>>();
Covariate cov; // preallocated for use in for loop below
int iii;
for( iii = 0; iii < requestedCovariates.size(); iii++ ) {
@ -192,17 +194,18 @@ public class TableRecalibrationWalker extends ReadWalker<SAMRecord, SAMFileWrite
byte qualFromRead = Byte.parseByte(key.get(1).toString());
ArrayList<Comparable<?>> newKey;
newKey = new ArrayList<Comparable<?>>();
newKey.add( key.get(0) ); // read group
RecalDatum globalDeltaQDatum = dataManager.getCollapsedTable(0).get( newKey );
double globalDeltaQ = 0.0;
double aggregrateQreported = 0.0;
if( globalDeltaQDatum != null ) {
globalDeltaQ = globalDeltaQDatum.empiricalQualDouble( SMOOTHING ) - ( dataManager.dataSumExpectedErrors.get( newKey ) / ((double) globalDeltaQDatum.getNumObservations()) );
aggregrateQreported = QualityUtils.phredScaleErrorRate( dataManager.dataSumExpectedErrors.get( newKey ) / ((double) globalDeltaQDatum.getNumObservations()) );
globalDeltaQ = globalDeltaQDatum.empiricalQualDouble( SMOOTHING ) - aggregrateQreported;
}
//System.out.printf("Global quality score shift is %.2f - %.2f = %.2f%n",
// globalDeltaQDatum.empiricalQualDouble( SMOOTHING ), ( dataManager.dataSumExpectedErrors.get( newKey ) / ((double) globalDeltaQDatum.getNumObservations())), globalDeltaQ);
newKey = new ArrayList<Comparable<?>>();
newKey.add( key.get(0) ); // read group
newKey.add( key.get(1) ); // quality score
@ -211,7 +214,8 @@ public class TableRecalibrationWalker extends ReadWalker<SAMRecord, SAMFileWrite
if( deltaQReportedDatum != null ) {
deltaQReported = deltaQReportedDatum.empiricalQualDouble( SMOOTHING ) - qualFromRead - globalDeltaQ;
}
double deltaQCovariates = 0.0;
RecalDatum deltaQCovariateDatum;
for( int iii = 2; iii < key.size(); iii++ ) {
@ -227,10 +231,6 @@ public class TableRecalibrationWalker extends ReadWalker<SAMRecord, SAMFileWrite
double newQuality = qualFromRead + globalDeltaQ + deltaQReported + deltaQCovariates;
byte newQualityByte = QualityUtils.boundQual( (int)Math.round(newQuality), QualityUtils.MAX_REASONABLE_Q_SCORE );
//System.out.println( "base quality score calculated: " + key +
// String.format( " => %d + %.2f + %.2f + %.2f = %d", qualFromRead, globalDeltaQ, deltaQReported, deltaQCovariates, newQualityByte ) );
if( newQualityByte <= 0 && newQualityByte >= QualityUtils.MAX_REASONABLE_Q_SCORE ) {
throw new StingException( "Illegal base quality score calculated: " + key +
@ -260,7 +260,4 @@ public class TableRecalibrationWalker extends ReadWalker<SAMRecord, SAMFileWrite
return output;
}
public void onTraversalDone( SAMFileWriter reduceResult ) {
}
}