Sequential quality score calculation is now in place in the refactored recalibrator and matches the quality scores calculated by the old recalibrator exactly; at least on the small sets of data used so far. Validation, documentation, and optimization work is on going.
git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@1985 348d0f76-0448-11de-a6fe-93d51630548a
This commit is contained in:
parent
bf1bc94060
commit
84ba604611
|
|
@ -126,7 +126,7 @@ public class CovariateCounterWalker extends LocusWalker<Integer, PrintStream> {
|
|||
|
||||
private void updateDataFromRead(SAMRecord read, int offset, ReferenceContext ref) {
|
||||
|
||||
ArrayList<Comparable<?>> key = new ArrayList<Comparable<?>>();
|
||||
List<Comparable<?>> key = new ArrayList<Comparable<?>>();
|
||||
Comparable<?> keyElement; // preallocate for use in for loop below
|
||||
boolean badKey = false;
|
||||
for( Covariate covariate : requestedCovariates ) {
|
||||
|
|
|
|||
|
|
@ -21,7 +21,7 @@ public class CycleCovariate implements Covariate {
|
|||
|
||||
public Comparable<?> getValue(SAMRecord read, int offset, char[] refBases) {
|
||||
//BUGBUG: assumes Solexia platform
|
||||
int cycle = offset;
|
||||
Integer cycle = offset;
|
||||
if( read.getReadNegativeStrandFlag() ) {
|
||||
cycle = read.getReadLength() - (offset + 1);
|
||||
}
|
||||
|
|
|
|||
|
|
@ -13,7 +13,7 @@ public class MappingQualityCovariate implements Covariate {
|
|||
}
|
||||
|
||||
public Comparable<?> getValue(SAMRecord read, int offset, char[] refBases) {
|
||||
return read.getMappingQuality();
|
||||
return (Integer)(read.getMappingQuality());
|
||||
}
|
||||
|
||||
public Comparable<?> getValue(String str) {
|
||||
|
|
|
|||
|
|
@ -32,7 +32,7 @@ public class MinimumNQSCovariate implements Covariate {
|
|||
}
|
||||
}
|
||||
|
||||
int minQual = quals[0];
|
||||
Integer minQual = (int)(quals[0]);
|
||||
for ( int qual : quals ) {
|
||||
if( qual < minQual ) {
|
||||
minQual = qual;
|
||||
|
|
|
|||
|
|
@ -32,7 +32,7 @@ public class QualityScoreCovariate implements Covariate {
|
|||
}
|
||||
}
|
||||
|
||||
return quals[offset];
|
||||
return ((Integer)((int)quals[offset]));
|
||||
}
|
||||
|
||||
public Comparable<?> getValue(String str) {
|
||||
|
|
|
|||
|
|
@ -56,7 +56,7 @@ public class RecalDataManager {
|
|||
} else {
|
||||
collapsedDatum.increment( thisDatum );
|
||||
}
|
||||
|
||||
|
||||
newKey = new ArrayList<Comparable<?>>();
|
||||
newKey.add( key.get(0) ); // make a new key with just the read group
|
||||
sumExpectedErrors = dataSumExpectedErrors.get( newKey );
|
||||
|
|
@ -75,6 +75,7 @@ public class RecalDataManager {
|
|||
newKey.add( key.get(1) ); // and quality score
|
||||
collapsedDatum = dataCollapsedQualityScore.get( newKey );
|
||||
if( collapsedDatum == null ) {
|
||||
//System.out.println("Added: " + newKey + " " + newKey.hashCode());
|
||||
dataCollapsedQualityScore.put( newKey, new RecalDatum( thisDatum ) );
|
||||
} else {
|
||||
collapsedDatum.increment( thisDatum );
|
||||
|
|
@ -85,7 +86,7 @@ public class RecalDataManager {
|
|||
newKey = new ArrayList<Comparable<?>>();
|
||||
newKey.add( key.get(0) ); // make a new key with the read group ...
|
||||
newKey.add( key.get(1) ); // and quality score ...
|
||||
newKey.add( key.get(iii) ); // and the given covariate
|
||||
newKey.add( key.get(iii + 2) ); // and the given covariate
|
||||
collapsedDatum = dataCollapsedByCovariate.get(iii).get( newKey );
|
||||
if( collapsedDatum == null ) {
|
||||
dataCollapsedByCovariate.get(iii).put( newKey, new RecalDatum( thisDatum ) );
|
||||
|
|
|
|||
|
|
@ -104,4 +104,8 @@ public class RecalDatum {
|
|||
public Long getNumObservations() {
|
||||
return numObservations;
|
||||
}
|
||||
|
||||
public String toString() {
|
||||
return String.format( "RecalDatum: %d,%d,%d", numObservations, numMismatches, (int)empiricalQualByte() );
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -43,8 +43,8 @@ public class TableRecalibrationWalker extends ReadWalker<SAMRecord, SAMFileWrite
|
|||
public String MODE_STRING = RecalibrationMode.SEQUENTIAL.toString();
|
||||
public RecalibrationMode MODE = RecalibrationMode.SEQUENTIAL; //BUGBUG: need some code here to set this properly
|
||||
|
||||
protected static RecalDataManager dataManager;
|
||||
protected static ArrayList<Covariate> requestedCovariates;
|
||||
protected RecalDataManager dataManager;
|
||||
protected ArrayList<Covariate> requestedCovariates;
|
||||
|
||||
private static Pattern COVARIATE_PATTERN = Pattern.compile("^@!.*");
|
||||
public final static String ORIGINAL_QUAL_ATTRIBUTE_TAG = "OQ";
|
||||
|
|
@ -117,11 +117,13 @@ public class TableRecalibrationWalker extends ReadWalker<SAMRecord, SAMFileWrite
|
|||
dataManager.createCollapsedTables( requestedCovariates.size() );
|
||||
out.println( "...done!" );
|
||||
}
|
||||
|
||||
//System.out.println(dataManager.getCollapsedTable(1));
|
||||
}
|
||||
|
||||
private void addCSVData(String line) {
|
||||
String[] vals = line.split(",");
|
||||
List<Comparable<?>> key = new ArrayList<Comparable<?>>();
|
||||
ArrayList<Comparable<?>> key = new ArrayList<Comparable<?>>();
|
||||
Covariate cov; // preallocated for use in for loop below
|
||||
int iii;
|
||||
for( iii = 0; iii < requestedCovariates.size(); iii++ ) {
|
||||
|
|
@ -192,17 +194,18 @@ public class TableRecalibrationWalker extends ReadWalker<SAMRecord, SAMFileWrite
|
|||
|
||||
byte qualFromRead = Byte.parseByte(key.get(1).toString());
|
||||
ArrayList<Comparable<?>> newKey;
|
||||
|
||||
|
||||
newKey = new ArrayList<Comparable<?>>();
|
||||
newKey.add( key.get(0) ); // read group
|
||||
RecalDatum globalDeltaQDatum = dataManager.getCollapsedTable(0).get( newKey );
|
||||
double globalDeltaQ = 0.0;
|
||||
double aggregrateQreported = 0.0;
|
||||
if( globalDeltaQDatum != null ) {
|
||||
globalDeltaQ = globalDeltaQDatum.empiricalQualDouble( SMOOTHING ) - ( dataManager.dataSumExpectedErrors.get( newKey ) / ((double) globalDeltaQDatum.getNumObservations()) );
|
||||
aggregrateQreported = QualityUtils.phredScaleErrorRate( dataManager.dataSumExpectedErrors.get( newKey ) / ((double) globalDeltaQDatum.getNumObservations()) );
|
||||
globalDeltaQ = globalDeltaQDatum.empiricalQualDouble( SMOOTHING ) - aggregrateQreported;
|
||||
}
|
||||
//System.out.printf("Global quality score shift is %.2f - %.2f = %.2f%n",
|
||||
// globalDeltaQDatum.empiricalQualDouble( SMOOTHING ), ( dataManager.dataSumExpectedErrors.get( newKey ) / ((double) globalDeltaQDatum.getNumObservations())), globalDeltaQ);
|
||||
|
||||
|
||||
|
||||
newKey = new ArrayList<Comparable<?>>();
|
||||
newKey.add( key.get(0) ); // read group
|
||||
newKey.add( key.get(1) ); // quality score
|
||||
|
|
@ -211,7 +214,8 @@ public class TableRecalibrationWalker extends ReadWalker<SAMRecord, SAMFileWrite
|
|||
if( deltaQReportedDatum != null ) {
|
||||
deltaQReported = deltaQReportedDatum.empiricalQualDouble( SMOOTHING ) - qualFromRead - globalDeltaQ;
|
||||
}
|
||||
|
||||
|
||||
|
||||
double deltaQCovariates = 0.0;
|
||||
RecalDatum deltaQCovariateDatum;
|
||||
for( int iii = 2; iii < key.size(); iii++ ) {
|
||||
|
|
@ -227,10 +231,6 @@ public class TableRecalibrationWalker extends ReadWalker<SAMRecord, SAMFileWrite
|
|||
|
||||
double newQuality = qualFromRead + globalDeltaQ + deltaQReported + deltaQCovariates;
|
||||
byte newQualityByte = QualityUtils.boundQual( (int)Math.round(newQuality), QualityUtils.MAX_REASONABLE_Q_SCORE );
|
||||
|
||||
|
||||
//System.out.println( "base quality score calculated: " + key +
|
||||
// String.format( " => %d + %.2f + %.2f + %.2f = %d", qualFromRead, globalDeltaQ, deltaQReported, deltaQCovariates, newQualityByte ) );
|
||||
|
||||
if( newQualityByte <= 0 && newQualityByte >= QualityUtils.MAX_REASONABLE_Q_SCORE ) {
|
||||
throw new StingException( "Illegal base quality score calculated: " + key +
|
||||
|
|
@ -260,7 +260,4 @@ public class TableRecalibrationWalker extends ReadWalker<SAMRecord, SAMFileWrite
|
|||
|
||||
return output;
|
||||
}
|
||||
|
||||
public void onTraversalDone( SAMFileWriter reduceResult ) {
|
||||
}
|
||||
}
|
||||
|
|
|
|||
Loading…
Reference in New Issue