Encapsulating annotation decoding function in order to use same fixed random seed in both VariantOptimizer and ApplyVariantClusters

git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@3363 348d0f76-0448-11de-a6fe-93d51630548a
This commit is contained in:
rpoplin 2010-05-14 20:03:38 +00:00
parent 32389dc0a9
commit 6efd05831b
3 changed files with 28 additions and 45 deletions

View File

@ -180,7 +180,7 @@ public class ApplyVariantClustersWalker extends RodWalker<ExpandingArrayList<Var
}
variantDatum.isKnown = isKnown;
final double pTrue = theModel.evaluateVariant( vc.getAttributes(), vc.getPhredScaledQual() );
final double pTrue = theModel.evaluateVariant( vc );
double recalQual = 400.0 * QualityUtils.phredScaleErrorRate( Math.max(1.0 - pTrue, 0.000000001) );
if( variantDatum.isKnown && KNOWN_VAR_QUAL_PRIOR > 0.1 ) { // only use the known prior if the value is specified (meaning not equal to zero)
@ -193,7 +193,7 @@ public class ApplyVariantClustersWalker extends RodWalker<ExpandingArrayList<Var
vcf.addInfoField("OQ", ((Double)vc.getPhredScaledQual()).toString() );
vcf.setQual( variantDatum.qual );
vcf.setFilterString(VCFRecord.UNFILTERED);
vcf.setFilterString(VCFRecord.UNFILTERED); //BUGBUG: Set to passes filters
vcfWriter.addRecord( vcf );
} else { // not a SNP or is filtered so just dump it out to the VCF file

View File

@ -26,6 +26,7 @@
package org.broadinstitute.sting.playground.gatk.walkers.variantoptimizer;
import org.apache.log4j.Logger;
import org.broadinstitute.sting.gatk.contexts.variantcontext.VariantContext;
import org.broadinstitute.sting.utils.collections.ExpandingArrayList;
import org.broadinstitute.sting.utils.StingException;
import org.broadinstitute.sting.utils.text.XReadLines;
@ -35,7 +36,6 @@ import Jama.*;
import java.io.File;
import java.io.FileNotFoundException;
import java.io.PrintStream;
import java.util.Map;
import java.util.Random;
import java.util.regex.Pattern;
@ -52,8 +52,8 @@ public final class VariantGaussianMixtureModel extends VariantOptimizationModel
public final VariantDataManager dataManager;
private final int numGaussians;
private final int numIterations;
private final long RANDOM_SEED = 91801305;
private final Random rand = new Random( RANDOM_SEED );
private final static long RANDOM_SEED = 91801305;
private final static Random rand = new Random( RANDOM_SEED );
private final double MIN_PROB = 1E-7;
private final double MIN_SIGMA = 1E-5;
private final double MIN_DETERMINANT = 1E-5;
@ -428,31 +428,31 @@ public final class VariantGaussianMixtureModel extends VariantOptimizationModel
}
}
public final double evaluateVariant( final Map<String,Object> annotationMap, final double qualityScore ) {
public static double decodeAnnotation( final String annotationKey, final VariantContext vc ) {
double value = 0.0;
if( annotationKey.equals("AB") && !vc.getAttributes().containsKey(annotationKey) ) {
value = (0.5 - 0.005) + (0.01 * rand.nextDouble()); // HomVar calls don't have an allele balance
} else if( annotationKey.equals("QUAL") ) {
value = vc.getPhredScaledQual();
} else {
try {
value = Double.parseDouble( (String)vc.getAttribute( annotationKey, "0.0" ) );
if( Double.isInfinite(value) ) {
value = ( value > 0 ? 1.0 : -1.0 ) * INFINITE_ANNOTATION_VALUE;
}
} catch( NumberFormatException e ) {
// do nothing, default value is 0.0
}
}
return value;
}
public final double evaluateVariant( final VariantContext vc ) {
final double[] pVarInCluster = new double[numGaussians];
final double[] annotations = new double[dataManager.numAnnotations];
for( int jjj = 0; jjj < dataManager.numAnnotations; jjj++ ) {
double value = 0.0;
final String annotationKey = dataManager.annotationKeys.get(jjj);
if( annotationKey.equals("QUAL") ) {
value = qualityScore;
} else if( annotationKey.equals("AB") && !annotationMap.containsKey(annotationKey) ) {
value = (0.5 - 0.005) + (0.01 * Math.random()); // HomVar calls don't have an allele balance
} else {
try {
final Object stringValue = annotationMap.get( annotationKey );
if( stringValue != null ) {
value = Double.parseDouble( stringValue.toString() );
if( Double.isInfinite(value) ) {
value = ( value > 0 ? 1.0 : -1.0 ) * INFINITE_ANNOTATION_VALUE;
}
}
} catch( NumberFormatException e ) {
// do nothing, default value is 0.0
}
}
final double value = decodeAnnotation( dataManager.annotationKeys.get(jjj), vc );
annotations[jjj] = (value - dataManager.meanVector[jjj]) / dataManager.varianceVector[jjj];
}

View File

@ -129,7 +129,7 @@ public class VariantOptimizer extends RodWalker<ExpandingArrayList<VariantDatum>
return mapList;
}
double annotationValues[] = new double[annotationKeys.size()];
final double annotationValues[] = new double[annotationKeys.size()];
// todo -- do we really need to support multiple tracks -- logic is cleaner without this case -- what's the use case?
for( final VariantContext vc : tracker.getAllVariantContexts(ref, null, context.getLocation(), false, false) ) {
@ -137,23 +137,7 @@ public class VariantOptimizer extends RodWalker<ExpandingArrayList<VariantDatum>
if( !vc.isFiltered() || IGNORE_ALL_INPUT_FILTERS || (ignoreInputFilterSet != null && ignoreInputFilterSet.containsAll(vc.getFilters())) ) {
int iii = 0;
for( final String key : annotationKeys ) {
double value = 0.0;
if( key.equals("AB") && !vc.getAttributes().containsKey(key) ) {
value = (0.5 - 0.005) + (0.01 * Math.random()); // HomVar calls don't have an allele balance
} else if( key.equals("QUAL") ) {
value = vc.getPhredScaledQual();
} else {
try {
value = Double.parseDouble( (String)vc.getAttribute( key, "0.0" ) );
if( Double.isInfinite(value) ) {
value = ( value > 0 ? 1.0 : -1.0 ) * INFINITE_ANNOTATION_VALUE;
}
} catch( NumberFormatException e ) {
// do nothing, default value is 0.0
}
}
annotationValues[iii++] = value;
annotationValues[iii++] = VariantGaussianMixtureModel.decodeAnnotation( key, vc );
}
final VariantDatum variantDatum = new VariantDatum();
@ -234,7 +218,6 @@ public class VariantOptimizer extends RodWalker<ExpandingArrayList<VariantDatum>
final Process p = Runtime.getRuntime().exec( rScriptCommandLine );
p.waitFor();
} catch (InterruptedException e) {
e.printStackTrace();
throw new StingException(e.getMessage());
} catch ( IOException e ) {
throw new StingException( "Unable to execute RScript command: " + rScriptCommandLine );