support for -singleton_fp_rate arguments to variant recalibrator instead of the pop.gen. AF prior. Worth experimenting with Ryan.

git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@3913 348d0f76-0448-11de-a6fe-93d51630548a
This commit is contained in:
depristo 2010-07-31 21:17:47 +00:00
parent 6dcb63888d
commit 7fab5c0a8f
2 changed files with 22 additions and 3 deletions

View File

@ -50,6 +50,11 @@ public final class VariantGaussianMixtureModel extends VariantOptimizationModel
protected final static Logger logger = Logger.getLogger(VariantGaussianMixtureModel.class); protected final static Logger logger = Logger.getLogger(VariantGaussianMixtureModel.class);
/**
* Est. FP rate for singleton calls. Used to estimate FP rate as a function of AC
*/
private double singletonFPRate = -1;
public final VariantDataManager dataManager; public final VariantDataManager dataManager;
private final int maxGaussians; private final int maxGaussians;
private final int maxIterations; private final int maxIterations;
@ -152,7 +157,9 @@ public final class VariantGaussianMixtureModel extends VariantOptimizationModel
alleleCountFactorArray = new double[alleleCountLines.size() + 1]; alleleCountFactorArray = new double[alleleCountLines.size() + 1];
for( final String line : alleleCountLines ) { for( final String line : alleleCountLines ) {
final String[] vals = line.split(","); final String[] vals = line.split(",");
alleleCountFactorArray[Integer.parseInt(vals[1])] = Double.parseDouble(vals[2]); int i = Integer.parseInt(vals[1]);
alleleCountFactorArray[i] = Double.parseDouble(vals[2]);
double oldACPrior = alleleCountFactorArray[i];
} }
int kkk = 0; int kkk = 0;
@ -315,8 +322,16 @@ public final class VariantGaussianMixtureModel extends VariantOptimizationModel
} }
} }
public void setSingletonFPRate(double rate) {
this.singletonFPRate = rate;
}
public final double getAlleleCountPrior( final int alleleCount ) { public final double getAlleleCountPrior( final int alleleCount ) {
if ( this.singletonFPRate == -1 )
return alleleCountFactorArray[alleleCount]; return alleleCountFactorArray[alleleCount];
else {
return 1 - Math.pow(singletonFPRate, alleleCount);
}
} }
private void initializeUsingKMeans( final VariantDatum[] data ) { private void initializeUsingKMeans( final VariantDatum[] data ) {

View File

@ -89,6 +89,8 @@ public class VariantRecalibrator extends RodWalker<ExpandingArrayList<VariantDat
private String PATH_TO_RESOURCES = "R/"; private String PATH_TO_RESOURCES = "R/";
@Argument(fullName="quality_step", shortName="qStep", doc="Resolution in QUAL units for optimization and tranche calculations", required=false) @Argument(fullName="quality_step", shortName="qStep", doc="Resolution in QUAL units for optimization and tranche calculations", required=false)
private double QUAL_STEP = 0.1; private double QUAL_STEP = 0.1;
@Argument(fullName="singleton_fp_rate", shortName="fp_rate", doc="Prior expectation that a singleton call would be a FP", required=false)
private double SINGLETON_FP_RATE = -1;
///////////////////////////// /////////////////////////////
// Private Member Variables // Private Member Variables
@ -113,6 +115,8 @@ public class VariantRecalibrator extends RodWalker<ExpandingArrayList<VariantDat
switch (OPTIMIZATION_MODEL) { switch (OPTIMIZATION_MODEL) {
case GAUSSIAN_MIXTURE_MODEL: case GAUSSIAN_MIXTURE_MODEL:
theModel = new VariantGaussianMixtureModel( TARGET_TITV, CLUSTER_FILENAME, BACKOFF_FACTOR ); theModel = new VariantGaussianMixtureModel( TARGET_TITV, CLUSTER_FILENAME, BACKOFF_FACTOR );
if ( SINGLETON_FP_RATE != -1 )
theModel.setSingletonFPRate(SINGLETON_FP_RATE);
break; break;
//case K_NEAREST_NEIGHBORS: //case K_NEAREST_NEIGHBORS:
// theModel = new VariantNearestNeighborsModel( dataManager, TARGET_TITV, NUM_KNN ); // theModel = new VariantNearestNeighborsModel( dataManager, TARGET_TITV, NUM_KNN );