Encapsulating annotation decoding function in order to use same fixed random seed in both VariantOptimizer and ApplyVariantClusters
git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@3363 348d0f76-0448-11de-a6fe-93d51630548a
This commit is contained in:
parent
32389dc0a9
commit
6efd05831b
|
|
@ -180,7 +180,7 @@ public class ApplyVariantClustersWalker extends RodWalker<ExpandingArrayList<Var
|
|||
}
|
||||
variantDatum.isKnown = isKnown;
|
||||
|
||||
final double pTrue = theModel.evaluateVariant( vc.getAttributes(), vc.getPhredScaledQual() );
|
||||
final double pTrue = theModel.evaluateVariant( vc );
|
||||
double recalQual = 400.0 * QualityUtils.phredScaleErrorRate( Math.max(1.0 - pTrue, 0.000000001) );
|
||||
|
||||
if( variantDatum.isKnown && KNOWN_VAR_QUAL_PRIOR > 0.1 ) { // only use the known prior if the value is specified (meaning not equal to zero)
|
||||
|
|
@ -193,7 +193,7 @@ public class ApplyVariantClustersWalker extends RodWalker<ExpandingArrayList<Var
|
|||
|
||||
vcf.addInfoField("OQ", ((Double)vc.getPhredScaledQual()).toString() );
|
||||
vcf.setQual( variantDatum.qual );
|
||||
vcf.setFilterString(VCFRecord.UNFILTERED);
|
||||
vcf.setFilterString(VCFRecord.UNFILTERED); //BUGBUG: Set to passes filters
|
||||
vcfWriter.addRecord( vcf );
|
||||
|
||||
} else { // not a SNP or is filtered so just dump it out to the VCF file
|
||||
|
|
|
|||
|
|
@ -26,6 +26,7 @@
|
|||
package org.broadinstitute.sting.playground.gatk.walkers.variantoptimizer;
|
||||
|
||||
import org.apache.log4j.Logger;
|
||||
import org.broadinstitute.sting.gatk.contexts.variantcontext.VariantContext;
|
||||
import org.broadinstitute.sting.utils.collections.ExpandingArrayList;
|
||||
import org.broadinstitute.sting.utils.StingException;
|
||||
import org.broadinstitute.sting.utils.text.XReadLines;
|
||||
|
|
@ -35,7 +36,6 @@ import Jama.*;
|
|||
import java.io.File;
|
||||
import java.io.FileNotFoundException;
|
||||
import java.io.PrintStream;
|
||||
import java.util.Map;
|
||||
import java.util.Random;
|
||||
import java.util.regex.Pattern;
|
||||
|
||||
|
|
@ -52,8 +52,8 @@ public final class VariantGaussianMixtureModel extends VariantOptimizationModel
|
|||
public final VariantDataManager dataManager;
|
||||
private final int numGaussians;
|
||||
private final int numIterations;
|
||||
private final long RANDOM_SEED = 91801305;
|
||||
private final Random rand = new Random( RANDOM_SEED );
|
||||
private final static long RANDOM_SEED = 91801305;
|
||||
private final static Random rand = new Random( RANDOM_SEED );
|
||||
private final double MIN_PROB = 1E-7;
|
||||
private final double MIN_SIGMA = 1E-5;
|
||||
private final double MIN_DETERMINANT = 1E-5;
|
||||
|
|
@ -428,31 +428,31 @@ public final class VariantGaussianMixtureModel extends VariantOptimizationModel
|
|||
}
|
||||
}
|
||||
|
||||
public final double evaluateVariant( final Map<String,Object> annotationMap, final double qualityScore ) {
|
||||
public static double decodeAnnotation( final String annotationKey, final VariantContext vc ) {
|
||||
double value = 0.0;
|
||||
if( annotationKey.equals("AB") && !vc.getAttributes().containsKey(annotationKey) ) {
|
||||
value = (0.5 - 0.005) + (0.01 * rand.nextDouble()); // HomVar calls don't have an allele balance
|
||||
} else if( annotationKey.equals("QUAL") ) {
|
||||
value = vc.getPhredScaledQual();
|
||||
} else {
|
||||
try {
|
||||
value = Double.parseDouble( (String)vc.getAttribute( annotationKey, "0.0" ) );
|
||||
if( Double.isInfinite(value) ) {
|
||||
value = ( value > 0 ? 1.0 : -1.0 ) * INFINITE_ANNOTATION_VALUE;
|
||||
}
|
||||
} catch( NumberFormatException e ) {
|
||||
// do nothing, default value is 0.0
|
||||
}
|
||||
}
|
||||
return value;
|
||||
}
|
||||
|
||||
public final double evaluateVariant( final VariantContext vc ) {
|
||||
final double[] pVarInCluster = new double[numGaussians];
|
||||
final double[] annotations = new double[dataManager.numAnnotations];
|
||||
|
||||
for( int jjj = 0; jjj < dataManager.numAnnotations; jjj++ ) {
|
||||
double value = 0.0;
|
||||
final String annotationKey = dataManager.annotationKeys.get(jjj);
|
||||
if( annotationKey.equals("QUAL") ) {
|
||||
value = qualityScore;
|
||||
} else if( annotationKey.equals("AB") && !annotationMap.containsKey(annotationKey) ) {
|
||||
value = (0.5 - 0.005) + (0.01 * Math.random()); // HomVar calls don't have an allele balance
|
||||
} else {
|
||||
try {
|
||||
final Object stringValue = annotationMap.get( annotationKey );
|
||||
if( stringValue != null ) {
|
||||
value = Double.parseDouble( stringValue.toString() );
|
||||
if( Double.isInfinite(value) ) {
|
||||
value = ( value > 0 ? 1.0 : -1.0 ) * INFINITE_ANNOTATION_VALUE;
|
||||
}
|
||||
}
|
||||
} catch( NumberFormatException e ) {
|
||||
// do nothing, default value is 0.0
|
||||
}
|
||||
}
|
||||
|
||||
final double value = decodeAnnotation( dataManager.annotationKeys.get(jjj), vc );
|
||||
annotations[jjj] = (value - dataManager.meanVector[jjj]) / dataManager.varianceVector[jjj];
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -129,7 +129,7 @@ public class VariantOptimizer extends RodWalker<ExpandingArrayList<VariantDatum>
|
|||
return mapList;
|
||||
}
|
||||
|
||||
double annotationValues[] = new double[annotationKeys.size()];
|
||||
final double annotationValues[] = new double[annotationKeys.size()];
|
||||
|
||||
// todo -- do we really need to support multiple tracks -- logic is cleaner without this case -- what's the use case?
|
||||
for( final VariantContext vc : tracker.getAllVariantContexts(ref, null, context.getLocation(), false, false) ) {
|
||||
|
|
@ -137,23 +137,7 @@ public class VariantOptimizer extends RodWalker<ExpandingArrayList<VariantDatum>
|
|||
if( !vc.isFiltered() || IGNORE_ALL_INPUT_FILTERS || (ignoreInputFilterSet != null && ignoreInputFilterSet.containsAll(vc.getFilters())) ) {
|
||||
int iii = 0;
|
||||
for( final String key : annotationKeys ) {
|
||||
|
||||
double value = 0.0;
|
||||
if( key.equals("AB") && !vc.getAttributes().containsKey(key) ) {
|
||||
value = (0.5 - 0.005) + (0.01 * Math.random()); // HomVar calls don't have an allele balance
|
||||
} else if( key.equals("QUAL") ) {
|
||||
value = vc.getPhredScaledQual();
|
||||
} else {
|
||||
try {
|
||||
value = Double.parseDouble( (String)vc.getAttribute( key, "0.0" ) );
|
||||
if( Double.isInfinite(value) ) {
|
||||
value = ( value > 0 ? 1.0 : -1.0 ) * INFINITE_ANNOTATION_VALUE;
|
||||
}
|
||||
} catch( NumberFormatException e ) {
|
||||
// do nothing, default value is 0.0
|
||||
}
|
||||
}
|
||||
annotationValues[iii++] = value;
|
||||
annotationValues[iii++] = VariantGaussianMixtureModel.decodeAnnotation( key, vc );
|
||||
}
|
||||
|
||||
final VariantDatum variantDatum = new VariantDatum();
|
||||
|
|
@ -234,7 +218,6 @@ public class VariantOptimizer extends RodWalker<ExpandingArrayList<VariantDatum>
|
|||
final Process p = Runtime.getRuntime().exec( rScriptCommandLine );
|
||||
p.waitFor();
|
||||
} catch (InterruptedException e) {
|
||||
e.printStackTrace();
|
||||
throw new StingException(e.getMessage());
|
||||
} catch ( IOException e ) {
|
||||
throw new StingException( "Unable to execute RScript command: " + rScriptCommandLine );
|
||||
|
|
|
|||
Loading…
Reference in New Issue