Sequential quality score calculation is now in place in the refactored recalibrator and matches the quality scores calculated by the old recalibrator exactly; at least on the small sets of data used so far. Validation, documentation, and optimization work is on going.
git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@1985 348d0f76-0448-11de-a6fe-93d51630548a
This commit is contained in:
parent
bf1bc94060
commit
84ba604611
|
|
@ -126,7 +126,7 @@ public class CovariateCounterWalker extends LocusWalker<Integer, PrintStream> {
|
||||||
|
|
||||||
private void updateDataFromRead(SAMRecord read, int offset, ReferenceContext ref) {
|
private void updateDataFromRead(SAMRecord read, int offset, ReferenceContext ref) {
|
||||||
|
|
||||||
ArrayList<Comparable<?>> key = new ArrayList<Comparable<?>>();
|
List<Comparable<?>> key = new ArrayList<Comparable<?>>();
|
||||||
Comparable<?> keyElement; // preallocate for use in for loop below
|
Comparable<?> keyElement; // preallocate for use in for loop below
|
||||||
boolean badKey = false;
|
boolean badKey = false;
|
||||||
for( Covariate covariate : requestedCovariates ) {
|
for( Covariate covariate : requestedCovariates ) {
|
||||||
|
|
|
||||||
|
|
@ -21,7 +21,7 @@ public class CycleCovariate implements Covariate {
|
||||||
|
|
||||||
public Comparable<?> getValue(SAMRecord read, int offset, char[] refBases) {
|
public Comparable<?> getValue(SAMRecord read, int offset, char[] refBases) {
|
||||||
//BUGBUG: assumes Solexia platform
|
//BUGBUG: assumes Solexia platform
|
||||||
int cycle = offset;
|
Integer cycle = offset;
|
||||||
if( read.getReadNegativeStrandFlag() ) {
|
if( read.getReadNegativeStrandFlag() ) {
|
||||||
cycle = read.getReadLength() - (offset + 1);
|
cycle = read.getReadLength() - (offset + 1);
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -13,7 +13,7 @@ public class MappingQualityCovariate implements Covariate {
|
||||||
}
|
}
|
||||||
|
|
||||||
public Comparable<?> getValue(SAMRecord read, int offset, char[] refBases) {
|
public Comparable<?> getValue(SAMRecord read, int offset, char[] refBases) {
|
||||||
return read.getMappingQuality();
|
return (Integer)(read.getMappingQuality());
|
||||||
}
|
}
|
||||||
|
|
||||||
public Comparable<?> getValue(String str) {
|
public Comparable<?> getValue(String str) {
|
||||||
|
|
|
||||||
|
|
@ -32,7 +32,7 @@ public class MinimumNQSCovariate implements Covariate {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
int minQual = quals[0];
|
Integer minQual = (int)(quals[0]);
|
||||||
for ( int qual : quals ) {
|
for ( int qual : quals ) {
|
||||||
if( qual < minQual ) {
|
if( qual < minQual ) {
|
||||||
minQual = qual;
|
minQual = qual;
|
||||||
|
|
|
||||||
|
|
@ -32,7 +32,7 @@ public class QualityScoreCovariate implements Covariate {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
return quals[offset];
|
return ((Integer)((int)quals[offset]));
|
||||||
}
|
}
|
||||||
|
|
||||||
public Comparable<?> getValue(String str) {
|
public Comparable<?> getValue(String str) {
|
||||||
|
|
|
||||||
|
|
@ -75,6 +75,7 @@ public class RecalDataManager {
|
||||||
newKey.add( key.get(1) ); // and quality score
|
newKey.add( key.get(1) ); // and quality score
|
||||||
collapsedDatum = dataCollapsedQualityScore.get( newKey );
|
collapsedDatum = dataCollapsedQualityScore.get( newKey );
|
||||||
if( collapsedDatum == null ) {
|
if( collapsedDatum == null ) {
|
||||||
|
//System.out.println("Added: " + newKey + " " + newKey.hashCode());
|
||||||
dataCollapsedQualityScore.put( newKey, new RecalDatum( thisDatum ) );
|
dataCollapsedQualityScore.put( newKey, new RecalDatum( thisDatum ) );
|
||||||
} else {
|
} else {
|
||||||
collapsedDatum.increment( thisDatum );
|
collapsedDatum.increment( thisDatum );
|
||||||
|
|
@ -85,7 +86,7 @@ public class RecalDataManager {
|
||||||
newKey = new ArrayList<Comparable<?>>();
|
newKey = new ArrayList<Comparable<?>>();
|
||||||
newKey.add( key.get(0) ); // make a new key with the read group ...
|
newKey.add( key.get(0) ); // make a new key with the read group ...
|
||||||
newKey.add( key.get(1) ); // and quality score ...
|
newKey.add( key.get(1) ); // and quality score ...
|
||||||
newKey.add( key.get(iii) ); // and the given covariate
|
newKey.add( key.get(iii + 2) ); // and the given covariate
|
||||||
collapsedDatum = dataCollapsedByCovariate.get(iii).get( newKey );
|
collapsedDatum = dataCollapsedByCovariate.get(iii).get( newKey );
|
||||||
if( collapsedDatum == null ) {
|
if( collapsedDatum == null ) {
|
||||||
dataCollapsedByCovariate.get(iii).put( newKey, new RecalDatum( thisDatum ) );
|
dataCollapsedByCovariate.get(iii).put( newKey, new RecalDatum( thisDatum ) );
|
||||||
|
|
|
||||||
|
|
@ -104,4 +104,8 @@ public class RecalDatum {
|
||||||
public Long getNumObservations() {
|
public Long getNumObservations() {
|
||||||
return numObservations;
|
return numObservations;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
public String toString() {
|
||||||
|
return String.format( "RecalDatum: %d,%d,%d", numObservations, numMismatches, (int)empiricalQualByte() );
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -43,8 +43,8 @@ public class TableRecalibrationWalker extends ReadWalker<SAMRecord, SAMFileWrite
|
||||||
public String MODE_STRING = RecalibrationMode.SEQUENTIAL.toString();
|
public String MODE_STRING = RecalibrationMode.SEQUENTIAL.toString();
|
||||||
public RecalibrationMode MODE = RecalibrationMode.SEQUENTIAL; //BUGBUG: need some code here to set this properly
|
public RecalibrationMode MODE = RecalibrationMode.SEQUENTIAL; //BUGBUG: need some code here to set this properly
|
||||||
|
|
||||||
protected static RecalDataManager dataManager;
|
protected RecalDataManager dataManager;
|
||||||
protected static ArrayList<Covariate> requestedCovariates;
|
protected ArrayList<Covariate> requestedCovariates;
|
||||||
|
|
||||||
private static Pattern COVARIATE_PATTERN = Pattern.compile("^@!.*");
|
private static Pattern COVARIATE_PATTERN = Pattern.compile("^@!.*");
|
||||||
public final static String ORIGINAL_QUAL_ATTRIBUTE_TAG = "OQ";
|
public final static String ORIGINAL_QUAL_ATTRIBUTE_TAG = "OQ";
|
||||||
|
|
@ -117,11 +117,13 @@ public class TableRecalibrationWalker extends ReadWalker<SAMRecord, SAMFileWrite
|
||||||
dataManager.createCollapsedTables( requestedCovariates.size() );
|
dataManager.createCollapsedTables( requestedCovariates.size() );
|
||||||
out.println( "...done!" );
|
out.println( "...done!" );
|
||||||
}
|
}
|
||||||
|
|
||||||
|
//System.out.println(dataManager.getCollapsedTable(1));
|
||||||
}
|
}
|
||||||
|
|
||||||
private void addCSVData(String line) {
|
private void addCSVData(String line) {
|
||||||
String[] vals = line.split(",");
|
String[] vals = line.split(",");
|
||||||
List<Comparable<?>> key = new ArrayList<Comparable<?>>();
|
ArrayList<Comparable<?>> key = new ArrayList<Comparable<?>>();
|
||||||
Covariate cov; // preallocated for use in for loop below
|
Covariate cov; // preallocated for use in for loop below
|
||||||
int iii;
|
int iii;
|
||||||
for( iii = 0; iii < requestedCovariates.size(); iii++ ) {
|
for( iii = 0; iii < requestedCovariates.size(); iii++ ) {
|
||||||
|
|
@ -197,11 +199,12 @@ public class TableRecalibrationWalker extends ReadWalker<SAMRecord, SAMFileWrite
|
||||||
newKey.add( key.get(0) ); // read group
|
newKey.add( key.get(0) ); // read group
|
||||||
RecalDatum globalDeltaQDatum = dataManager.getCollapsedTable(0).get( newKey );
|
RecalDatum globalDeltaQDatum = dataManager.getCollapsedTable(0).get( newKey );
|
||||||
double globalDeltaQ = 0.0;
|
double globalDeltaQ = 0.0;
|
||||||
|
double aggregrateQreported = 0.0;
|
||||||
if( globalDeltaQDatum != null ) {
|
if( globalDeltaQDatum != null ) {
|
||||||
globalDeltaQ = globalDeltaQDatum.empiricalQualDouble( SMOOTHING ) - ( dataManager.dataSumExpectedErrors.get( newKey ) / ((double) globalDeltaQDatum.getNumObservations()) );
|
aggregrateQreported = QualityUtils.phredScaleErrorRate( dataManager.dataSumExpectedErrors.get( newKey ) / ((double) globalDeltaQDatum.getNumObservations()) );
|
||||||
|
globalDeltaQ = globalDeltaQDatum.empiricalQualDouble( SMOOTHING ) - aggregrateQreported;
|
||||||
}
|
}
|
||||||
//System.out.printf("Global quality score shift is %.2f - %.2f = %.2f%n",
|
|
||||||
// globalDeltaQDatum.empiricalQualDouble( SMOOTHING ), ( dataManager.dataSumExpectedErrors.get( newKey ) / ((double) globalDeltaQDatum.getNumObservations())), globalDeltaQ);
|
|
||||||
|
|
||||||
newKey = new ArrayList<Comparable<?>>();
|
newKey = new ArrayList<Comparable<?>>();
|
||||||
newKey.add( key.get(0) ); // read group
|
newKey.add( key.get(0) ); // read group
|
||||||
|
|
@ -212,6 +215,7 @@ public class TableRecalibrationWalker extends ReadWalker<SAMRecord, SAMFileWrite
|
||||||
deltaQReported = deltaQReportedDatum.empiricalQualDouble( SMOOTHING ) - qualFromRead - globalDeltaQ;
|
deltaQReported = deltaQReportedDatum.empiricalQualDouble( SMOOTHING ) - qualFromRead - globalDeltaQ;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
double deltaQCovariates = 0.0;
|
double deltaQCovariates = 0.0;
|
||||||
RecalDatum deltaQCovariateDatum;
|
RecalDatum deltaQCovariateDatum;
|
||||||
for( int iii = 2; iii < key.size(); iii++ ) {
|
for( int iii = 2; iii < key.size(); iii++ ) {
|
||||||
|
|
@ -228,10 +232,6 @@ public class TableRecalibrationWalker extends ReadWalker<SAMRecord, SAMFileWrite
|
||||||
double newQuality = qualFromRead + globalDeltaQ + deltaQReported + deltaQCovariates;
|
double newQuality = qualFromRead + globalDeltaQ + deltaQReported + deltaQCovariates;
|
||||||
byte newQualityByte = QualityUtils.boundQual( (int)Math.round(newQuality), QualityUtils.MAX_REASONABLE_Q_SCORE );
|
byte newQualityByte = QualityUtils.boundQual( (int)Math.round(newQuality), QualityUtils.MAX_REASONABLE_Q_SCORE );
|
||||||
|
|
||||||
|
|
||||||
//System.out.println( "base quality score calculated: " + key +
|
|
||||||
// String.format( " => %d + %.2f + %.2f + %.2f = %d", qualFromRead, globalDeltaQ, deltaQReported, deltaQCovariates, newQualityByte ) );
|
|
||||||
|
|
||||||
if( newQualityByte <= 0 && newQualityByte >= QualityUtils.MAX_REASONABLE_Q_SCORE ) {
|
if( newQualityByte <= 0 && newQualityByte >= QualityUtils.MAX_REASONABLE_Q_SCORE ) {
|
||||||
throw new StingException( "Illegal base quality score calculated: " + key +
|
throw new StingException( "Illegal base quality score calculated: " + key +
|
||||||
String.format( " => %d + %.2f + %.2f + %.2f = %d", qualFromRead, globalDeltaQ, deltaQReported, deltaQCovariates, newQualityByte ) );
|
String.format( " => %d + %.2f + %.2f + %.2f = %d", qualFromRead, globalDeltaQ, deltaQReported, deltaQCovariates, newQualityByte ) );
|
||||||
|
|
@ -260,7 +260,4 @@ public class TableRecalibrationWalker extends ReadWalker<SAMRecord, SAMFileWrite
|
||||||
|
|
||||||
return output;
|
return output;
|
||||||
}
|
}
|
||||||
|
|
||||||
public void onTraversalDone( SAMFileWriter reduceResult ) {
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue