diff --git a/java/src/org/broadinstitute/sting/playground/gatk/walkers/variantoptimizer/VariantNearestNeighborsModel.java b/java/src/org/broadinstitute/sting/playground/gatk/walkers/variantoptimizer/VariantNearestNeighborsModel.java index 45214b47e..39998c05a 100755 --- a/java/src/org/broadinstitute/sting/playground/gatk/walkers/variantoptimizer/VariantNearestNeighborsModel.java +++ b/java/src/org/broadinstitute/sting/playground/gatk/walkers/variantoptimizer/VariantNearestNeighborsModel.java @@ -35,8 +35,11 @@ import java.io.PrintStream; public final class VariantNearestNeighborsModel extends VariantOptimizationModel { - public VariantNearestNeighborsModel( VariantDataManager _dataManager, final double _targetTITV ) { - super( _dataManager, _targetTITV ); + private final int numKNN; + + public VariantNearestNeighborsModel( VariantDataManager _dataManager, final double _targetTITV, final int _numKNN ) { + super( _dataManager, _targetTITV ); + numKNN = _numKNN; } public void run( final String outputPrefix ) { @@ -45,7 +48,7 @@ public final class VariantNearestNeighborsModel extends VariantOptimizationModel final double[] pTrueVariant = new double[numVariants]; - final VariantTree vTree = new VariantTree( 2000 ); + final VariantTree vTree = new VariantTree( numKNN ); vTree.createTreeFromData( dataManager.data ); System.out.println("Finished creating the kd-tree."); @@ -54,7 +57,18 @@ public final class VariantNearestNeighborsModel extends VariantOptimizationModel pTrueVariant[iii] = calcTruePositiveRateFromTITV( vTree.calcNeighborhoodTITV( dataManager.data[iii] ) ); } - //BUGBUG: need to output pTrueVariant and other metrics in this method - //return pTrueVariant; + PrintStream outputFile = null; + try { + outputFile = new PrintStream( outputPrefix + ".knn.optimize" ); + } catch (Exception e) { + e.printStackTrace(); + System.exit(-1); + } + for(int iii = 0; iii < numVariants; iii++) { + outputFile.print(String.format("%.4f",pTrueVariant[iii]) + ","); + outputFile.println( (dataManager.data[iii].isTransition ? 1 : 0) + + "," + (dataManager.data[iii].isKnown? 1 : 0) + + "," + (dataManager.data[iii].isFiltered ? 1 : 0) ); + } } } diff --git a/java/src/org/broadinstitute/sting/playground/gatk/walkers/variantoptimizer/VariantOptimizationModel.java b/java/src/org/broadinstitute/sting/playground/gatk/walkers/variantoptimizer/VariantOptimizationModel.java index aee28491e..6d85c2f52 100755 --- a/java/src/org/broadinstitute/sting/playground/gatk/walkers/variantoptimizer/VariantOptimizationModel.java +++ b/java/src/org/broadinstitute/sting/playground/gatk/walkers/variantoptimizer/VariantOptimizationModel.java @@ -32,6 +32,12 @@ package org.broadinstitute.sting.playground.gatk.walkers.variantoptimizer; */ public abstract class VariantOptimizationModel implements VariantOptimizationInterface { + + public enum Model { + GAUSSIAN_MIXTURE_MODEL, + K_NEAREST_NEIGHBORS + } + protected final VariantDataManager dataManager; protected final double targetTITV; diff --git a/java/src/org/broadinstitute/sting/playground/gatk/walkers/variantoptimizer/VariantOptimizer.java b/java/src/org/broadinstitute/sting/playground/gatk/walkers/variantoptimizer/VariantOptimizer.java index c4ce9adf2..5b578325c 100755 --- a/java/src/org/broadinstitute/sting/playground/gatk/walkers/variantoptimizer/VariantOptimizer.java +++ b/java/src/org/broadinstitute/sting/playground/gatk/walkers/variantoptimizer/VariantOptimizer.java @@ -7,6 +7,7 @@ import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; import org.broadinstitute.sting.gatk.walkers.RodWalker; import org.broadinstitute.sting.utils.BaseUtils; import org.broadinstitute.sting.utils.ExpandingArrayList; +import org.broadinstitute.sting.utils.StingException; import org.broadinstitute.sting.utils.cmdLine.Argument; /* @@ -61,10 +62,14 @@ public class VariantOptimizer extends RodWalker private String[] FORCED_ANNOTATIONS = null; @Argument(fullName="output", shortName="output", doc="The output file name", required=false) private String OUTPUT_FILE = "optimizer.data"; - @Argument(fullName="numGaussians", shortName="nG", doc="The number of Gaussians to be used in the Gaussian mixture model", required=false) + @Argument(fullName="numGaussians", shortName="nG", doc="The number of Gaussians to be used in the Gaussian Mixture model", required=false) private int NUM_GAUSSIANS = 32; - @Argument(fullName="numIterations", shortName="nI", doc="The number of iterations to be performed in the Gaussian mixture model", required=false) - private int NUM_ITERATIONS = 5; //BUGBUG: should automatically decided when to stop by looking at how entropy changes with each iteration + @Argument(fullName="numIterations", shortName="nI", doc="The number of iterations to be performed in the Gaussian Mixture model", required=false) + private int NUM_ITERATIONS = 10; + @Argument(fullName="knn", shortName="knn", doc="The number of nearest neighbors to be used in the k-Nearest Neighbors model", required=false) + private int NUM_KNN = 2000; + @Argument(fullName = "optimization_model", shortName = "om", doc = "Optimization calculation model to employ -- GAUSSIAN_MIXTURE_MODEL is currently the default, while K_NEAREST_NEIGHBORS is also available for small callsets.", required = false) + private VariantOptimizationModel.Model OPTIMIZATION_MODEL = VariantOptimizationModel.Model.GAUSSIAN_MIXTURE_MODEL; ///////////////////////////// @@ -179,8 +184,19 @@ public class VariantOptimizer extends RodWalker dataManager.normalizeData(); // Each data point is now [ (x - mean) / standard deviation ] // Create either the Gaussian Mixture Model or the Nearest Neighbors model and run it - final VariantOptimizationModel gmm = new VariantGaussianMixtureModel( dataManager, TARGET_TITV, NUM_GAUSSIANS, NUM_ITERATIONS ); - gmm.run( OUTPUT_FILE ); + VariantOptimizationModel theModel; + switch (OPTIMIZATION_MODEL) { + case GAUSSIAN_MIXTURE_MODEL: + theModel = new VariantGaussianMixtureModel( dataManager, TARGET_TITV, NUM_GAUSSIANS, NUM_ITERATIONS ); + break; + case K_NEAREST_NEIGHBORS: + theModel = new VariantNearestNeighborsModel( dataManager, TARGET_TITV, NUM_KNN ); + break; + default: + throw new StingException("Variant Optimization Model is unrecognized. Implemented options are GAUSSIAN_MIXTURE_MODEL and K_NEAREST_NEIGHBORS"); + } + + theModel.run( OUTPUT_FILE ); } }