Sequential quality score calculation is now in place in the refactored recalibrator and matches the quality scores calculated by the old recalibrator exactly; at least on the small sets of data used so far. Validation, documentation, and optimization work is on going.

git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@1985 348d0f76-0448-11de-a6fe-93d51630548a
This commit is contained in:
rpoplin 2009-11-07 15:55:16 +00:00
parent bf1bc94060
commit 84ba604611
8 changed files with 25 additions and 23 deletions

View File

@ -126,7 +126,7 @@ public class CovariateCounterWalker extends LocusWalker<Integer, PrintStream> {
private void updateDataFromRead(SAMRecord read, int offset, ReferenceContext ref) { private void updateDataFromRead(SAMRecord read, int offset, ReferenceContext ref) {
ArrayList<Comparable<?>> key = new ArrayList<Comparable<?>>(); List<Comparable<?>> key = new ArrayList<Comparable<?>>();
Comparable<?> keyElement; // preallocate for use in for loop below Comparable<?> keyElement; // preallocate for use in for loop below
boolean badKey = false; boolean badKey = false;
for( Covariate covariate : requestedCovariates ) { for( Covariate covariate : requestedCovariates ) {

View File

@ -21,7 +21,7 @@ public class CycleCovariate implements Covariate {
public Comparable<?> getValue(SAMRecord read, int offset, char[] refBases) { public Comparable<?> getValue(SAMRecord read, int offset, char[] refBases) {
//BUGBUG: assumes Solexia platform //BUGBUG: assumes Solexia platform
int cycle = offset; Integer cycle = offset;
if( read.getReadNegativeStrandFlag() ) { if( read.getReadNegativeStrandFlag() ) {
cycle = read.getReadLength() - (offset + 1); cycle = read.getReadLength() - (offset + 1);
} }

View File

@ -13,7 +13,7 @@ public class MappingQualityCovariate implements Covariate {
} }
public Comparable<?> getValue(SAMRecord read, int offset, char[] refBases) { public Comparable<?> getValue(SAMRecord read, int offset, char[] refBases) {
return read.getMappingQuality(); return (Integer)(read.getMappingQuality());
} }
public Comparable<?> getValue(String str) { public Comparable<?> getValue(String str) {

View File

@ -32,7 +32,7 @@ public class MinimumNQSCovariate implements Covariate {
} }
} }
int minQual = quals[0]; Integer minQual = (int)(quals[0]);
for ( int qual : quals ) { for ( int qual : quals ) {
if( qual < minQual ) { if( qual < minQual ) {
minQual = qual; minQual = qual;

View File

@ -32,7 +32,7 @@ public class QualityScoreCovariate implements Covariate {
} }
} }
return quals[offset]; return ((Integer)((int)quals[offset]));
} }
public Comparable<?> getValue(String str) { public Comparable<?> getValue(String str) {

View File

@ -56,7 +56,7 @@ public class RecalDataManager {
} else { } else {
collapsedDatum.increment( thisDatum ); collapsedDatum.increment( thisDatum );
} }
newKey = new ArrayList<Comparable<?>>(); newKey = new ArrayList<Comparable<?>>();
newKey.add( key.get(0) ); // make a new key with just the read group newKey.add( key.get(0) ); // make a new key with just the read group
sumExpectedErrors = dataSumExpectedErrors.get( newKey ); sumExpectedErrors = dataSumExpectedErrors.get( newKey );
@ -75,6 +75,7 @@ public class RecalDataManager {
newKey.add( key.get(1) ); // and quality score newKey.add( key.get(1) ); // and quality score
collapsedDatum = dataCollapsedQualityScore.get( newKey ); collapsedDatum = dataCollapsedQualityScore.get( newKey );
if( collapsedDatum == null ) { if( collapsedDatum == null ) {
//System.out.println("Added: " + newKey + " " + newKey.hashCode());
dataCollapsedQualityScore.put( newKey, new RecalDatum( thisDatum ) ); dataCollapsedQualityScore.put( newKey, new RecalDatum( thisDatum ) );
} else { } else {
collapsedDatum.increment( thisDatum ); collapsedDatum.increment( thisDatum );
@ -85,7 +86,7 @@ public class RecalDataManager {
newKey = new ArrayList<Comparable<?>>(); newKey = new ArrayList<Comparable<?>>();
newKey.add( key.get(0) ); // make a new key with the read group ... newKey.add( key.get(0) ); // make a new key with the read group ...
newKey.add( key.get(1) ); // and quality score ... newKey.add( key.get(1) ); // and quality score ...
newKey.add( key.get(iii) ); // and the given covariate newKey.add( key.get(iii + 2) ); // and the given covariate
collapsedDatum = dataCollapsedByCovariate.get(iii).get( newKey ); collapsedDatum = dataCollapsedByCovariate.get(iii).get( newKey );
if( collapsedDatum == null ) { if( collapsedDatum == null ) {
dataCollapsedByCovariate.get(iii).put( newKey, new RecalDatum( thisDatum ) ); dataCollapsedByCovariate.get(iii).put( newKey, new RecalDatum( thisDatum ) );

View File

@ -104,4 +104,8 @@ public class RecalDatum {
public Long getNumObservations() { public Long getNumObservations() {
return numObservations; return numObservations;
} }
public String toString() {
return String.format( "RecalDatum: %d,%d,%d", numObservations, numMismatches, (int)empiricalQualByte() );
}
} }

View File

@ -43,8 +43,8 @@ public class TableRecalibrationWalker extends ReadWalker<SAMRecord, SAMFileWrite
public String MODE_STRING = RecalibrationMode.SEQUENTIAL.toString(); public String MODE_STRING = RecalibrationMode.SEQUENTIAL.toString();
public RecalibrationMode MODE = RecalibrationMode.SEQUENTIAL; //BUGBUG: need some code here to set this properly public RecalibrationMode MODE = RecalibrationMode.SEQUENTIAL; //BUGBUG: need some code here to set this properly
protected static RecalDataManager dataManager; protected RecalDataManager dataManager;
protected static ArrayList<Covariate> requestedCovariates; protected ArrayList<Covariate> requestedCovariates;
private static Pattern COVARIATE_PATTERN = Pattern.compile("^@!.*"); private static Pattern COVARIATE_PATTERN = Pattern.compile("^@!.*");
public final static String ORIGINAL_QUAL_ATTRIBUTE_TAG = "OQ"; public final static String ORIGINAL_QUAL_ATTRIBUTE_TAG = "OQ";
@ -117,11 +117,13 @@ public class TableRecalibrationWalker extends ReadWalker<SAMRecord, SAMFileWrite
dataManager.createCollapsedTables( requestedCovariates.size() ); dataManager.createCollapsedTables( requestedCovariates.size() );
out.println( "...done!" ); out.println( "...done!" );
} }
//System.out.println(dataManager.getCollapsedTable(1));
} }
private void addCSVData(String line) { private void addCSVData(String line) {
String[] vals = line.split(","); String[] vals = line.split(",");
List<Comparable<?>> key = new ArrayList<Comparable<?>>(); ArrayList<Comparable<?>> key = new ArrayList<Comparable<?>>();
Covariate cov; // preallocated for use in for loop below Covariate cov; // preallocated for use in for loop below
int iii; int iii;
for( iii = 0; iii < requestedCovariates.size(); iii++ ) { for( iii = 0; iii < requestedCovariates.size(); iii++ ) {
@ -192,17 +194,18 @@ public class TableRecalibrationWalker extends ReadWalker<SAMRecord, SAMFileWrite
byte qualFromRead = Byte.parseByte(key.get(1).toString()); byte qualFromRead = Byte.parseByte(key.get(1).toString());
ArrayList<Comparable<?>> newKey; ArrayList<Comparable<?>> newKey;
newKey = new ArrayList<Comparable<?>>(); newKey = new ArrayList<Comparable<?>>();
newKey.add( key.get(0) ); // read group newKey.add( key.get(0) ); // read group
RecalDatum globalDeltaQDatum = dataManager.getCollapsedTable(0).get( newKey ); RecalDatum globalDeltaQDatum = dataManager.getCollapsedTable(0).get( newKey );
double globalDeltaQ = 0.0; double globalDeltaQ = 0.0;
double aggregrateQreported = 0.0;
if( globalDeltaQDatum != null ) { if( globalDeltaQDatum != null ) {
globalDeltaQ = globalDeltaQDatum.empiricalQualDouble( SMOOTHING ) - ( dataManager.dataSumExpectedErrors.get( newKey ) / ((double) globalDeltaQDatum.getNumObservations()) ); aggregrateQreported = QualityUtils.phredScaleErrorRate( dataManager.dataSumExpectedErrors.get( newKey ) / ((double) globalDeltaQDatum.getNumObservations()) );
globalDeltaQ = globalDeltaQDatum.empiricalQualDouble( SMOOTHING ) - aggregrateQreported;
} }
//System.out.printf("Global quality score shift is %.2f - %.2f = %.2f%n",
// globalDeltaQDatum.empiricalQualDouble( SMOOTHING ), ( dataManager.dataSumExpectedErrors.get( newKey ) / ((double) globalDeltaQDatum.getNumObservations())), globalDeltaQ);
newKey = new ArrayList<Comparable<?>>(); newKey = new ArrayList<Comparable<?>>();
newKey.add( key.get(0) ); // read group newKey.add( key.get(0) ); // read group
newKey.add( key.get(1) ); // quality score newKey.add( key.get(1) ); // quality score
@ -211,7 +214,8 @@ public class TableRecalibrationWalker extends ReadWalker<SAMRecord, SAMFileWrite
if( deltaQReportedDatum != null ) { if( deltaQReportedDatum != null ) {
deltaQReported = deltaQReportedDatum.empiricalQualDouble( SMOOTHING ) - qualFromRead - globalDeltaQ; deltaQReported = deltaQReportedDatum.empiricalQualDouble( SMOOTHING ) - qualFromRead - globalDeltaQ;
} }
double deltaQCovariates = 0.0; double deltaQCovariates = 0.0;
RecalDatum deltaQCovariateDatum; RecalDatum deltaQCovariateDatum;
for( int iii = 2; iii < key.size(); iii++ ) { for( int iii = 2; iii < key.size(); iii++ ) {
@ -227,10 +231,6 @@ public class TableRecalibrationWalker extends ReadWalker<SAMRecord, SAMFileWrite
double newQuality = qualFromRead + globalDeltaQ + deltaQReported + deltaQCovariates; double newQuality = qualFromRead + globalDeltaQ + deltaQReported + deltaQCovariates;
byte newQualityByte = QualityUtils.boundQual( (int)Math.round(newQuality), QualityUtils.MAX_REASONABLE_Q_SCORE ); byte newQualityByte = QualityUtils.boundQual( (int)Math.round(newQuality), QualityUtils.MAX_REASONABLE_Q_SCORE );
//System.out.println( "base quality score calculated: " + key +
// String.format( " => %d + %.2f + %.2f + %.2f = %d", qualFromRead, globalDeltaQ, deltaQReported, deltaQCovariates, newQualityByte ) );
if( newQualityByte <= 0 && newQualityByte >= QualityUtils.MAX_REASONABLE_Q_SCORE ) { if( newQualityByte <= 0 && newQualityByte >= QualityUtils.MAX_REASONABLE_Q_SCORE ) {
throw new StingException( "Illegal base quality score calculated: " + key + throw new StingException( "Illegal base quality score calculated: " + key +
@ -260,7 +260,4 @@ public class TableRecalibrationWalker extends ReadWalker<SAMRecord, SAMFileWrite
return output; return output;
} }
public void onTraversalDone( SAMFileWriter reduceResult ) {
}
} }