From 933823c8bcf437446c1549bf1ca5fa60e2dfbf4f Mon Sep 17 00:00:00 2001 From: rpoplin Date: Tue, 16 Mar 2010 19:45:02 +0000 Subject: [PATCH] Removed the StingException when mkdir fails for Sendu in AnalyzeCovariates. Incremental updates to VariantOptimizer. git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@3013 348d0f76-0448-11de-a6fe-93d51630548a --- R/plot_OptimizationCurve.R | 33 ++++++ .../analyzecovariates/AnalyzeCovariates.java | 3 +- .../variantoptimizer/VariantDataManager.java | 12 +- .../variantoptimizer/VariantDatum.java | 1 - .../VariantGaussianMixtureModel.java | 109 +++++++++--------- .../VariantNearestNeighborsModel.java | 8 +- .../variantoptimizer/VariantOptimizer.java | 34 ++++-- 7 files changed, 132 insertions(+), 68 deletions(-) create mode 100755 R/plot_OptimizationCurve.R diff --git a/R/plot_OptimizationCurve.R b/R/plot_OptimizationCurve.R new file mode 100755 index 000000000..23705b79c --- /dev/null +++ b/R/plot_OptimizationCurve.R @@ -0,0 +1,33 @@ +#!/broad/tools/apps/R-2.6.0/bin/Rscript + +args <- commandArgs(TRUE) +verbose = TRUE + +input = args[1] +targetTITV = as.numeric(args[2]) + + +data = read.table(input,sep=",",head=T) +maxVars = max(data$numKnown, data$numNovel) +maxTITV = max(data$knownTITV[is.finite(data$knownTITV) & data$numKnown>2000], data$novelTITV[is.finite(data$novelTITV) & data$numNovel > 2000], targetTITV) +minTITV = min(data$knownTITV[length(data$knownTITV)], data$novelTITV[length(data$novelTITV)], targetTITV) + + +outfile = paste(input, ".optimizationCurve.pdf", sep="") +pdf(outfile, height=7, width=8) + +par(mar=c(4,4,1,4),cex=1.3) +plot(data$pCut, data$knownTITV, axes=F,xlab="Keep variants with p(true) > X",ylab="",ylim=c(minTITV,maxTITV),col="Blue",pch=20) +points(data$pCut, data$novelTITV,,col="DarkBlue",pch=20) +abline(h=targetTITV,lty=3,col="Blue") +axis(side=2,col="DarkBlue") +axis(side=1) +mtext("Ti/Tv Ratio", side=2, line=2, col="blue",cex=1.4) +legend("left", c("Known Ti/Tv","Novel Ti/Tv"), col=c("Blue","DarkBlue"), pch=c(20,20),cex=0.7) +par(new=T) +plot(data$pCut, data$numKnown, axes=F,xlab="",ylab="",ylim=c(0,maxVars),col="Green",pch=20) +points(data$pCut, data$numNovel,col="DarkGreen",pch=20) +axis(side=4,col="DarkGreen") +mtext("Number of Variants", side=4, line=2, col="DarkGreen",cex=1.4) +legend("topright", c("Known","Novel"), col=c("Green","DarkGreen"), pch=c(20,20),cex=0.7) +dev.off() \ No newline at end of file diff --git a/java/src/org/broadinstitute/sting/analyzecovariates/AnalyzeCovariates.java b/java/src/org/broadinstitute/sting/analyzecovariates/AnalyzeCovariates.java index 9158cdda4..8c437d285 100755 --- a/java/src/org/broadinstitute/sting/analyzecovariates/AnalyzeCovariates.java +++ b/java/src/org/broadinstitute/sting/analyzecovariates/AnalyzeCovariates.java @@ -84,7 +84,8 @@ class AnalyzeCovariatesCLP extends CommandLineProgram { try { Process p = Runtime.getRuntime().exec("mkdir " + OUTPUT_DIR); } catch (IOException e) { - throw new RuntimeException("Couldn't create directory: " + OUTPUT_DIR); + System.out.println("Couldn't create directory: " + OUTPUT_DIR); + System.out.println("User is responsible for making sure the output directory exists."); } if( !OUTPUT_DIR.endsWith("/") ) { OUTPUT_DIR = OUTPUT_DIR + "/"; } if( !PATH_TO_RESOURCES.endsWith("/") ) { PATH_TO_RESOURCES = PATH_TO_RESOURCES + "/"; } diff --git a/java/src/org/broadinstitute/sting/playground/gatk/walkers/variantoptimizer/VariantDataManager.java b/java/src/org/broadinstitute/sting/playground/gatk/walkers/variantoptimizer/VariantDataManager.java index 76da26164..492881ae8 100755 --- a/java/src/org/broadinstitute/sting/playground/gatk/walkers/variantoptimizer/VariantDataManager.java +++ b/java/src/org/broadinstitute/sting/playground/gatk/walkers/variantoptimizer/VariantDataManager.java @@ -3,6 +3,8 @@ package org.broadinstitute.sting.playground.gatk.walkers.variantoptimizer; import org.broadinstitute.sting.utils.ExpandingArrayList; import org.broadinstitute.sting.utils.StingException; +import java.io.PrintStream; + /* * Copyright (c) 2010 The Broad Institute * @@ -39,6 +41,7 @@ public class VariantDataManager { public final int numVariants; public final int numAnnotations; public final double[] varianceVector; + public final double[] meanVector; public boolean isNormalized; private final ExpandingArrayList annotationKeys; @@ -52,6 +55,7 @@ public class VariantDataManager { if( numAnnotations <= 0 ) { throw new StingException( "There are zero annotations! (or possibly a problem with integer overflow)" ); } + meanVector = new double[numAnnotations]; varianceVector = new double[numAnnotations]; isNormalized = false; annotationKeys = _annotationKeys; @@ -68,7 +72,7 @@ public class VariantDataManager { if( theSTD < 1E-8 ) { throw new StingException("Zero variance is a problem: standard deviation = " + theSTD); } - + meanVector[jjj] = theMean; varianceVector[jjj] = theSTD * theSTD; for( int iii = 0; iii < numVariants; iii++ ) { data[iii].annotations[jjj] = ( data[iii].annotations[jjj] - theMean ) / theSTD; @@ -95,4 +99,10 @@ public class VariantDataManager { return Math.sqrt( sum ); } +// public void printClusterFileHeader( PrintStream outputFile ) { +// for( int jjj = 0; jjj < numAnnotations; jjj++ ) { +// outputFile.println("@ANNOTATION," + (jjj == numAnnotations-1 ? "QUAL" : annotationKeys.get(jjj)) + "," + meanVector[jjj] + "," + varianceVector[jjj]); +// } +// } + } diff --git a/java/src/org/broadinstitute/sting/playground/gatk/walkers/variantoptimizer/VariantDatum.java b/java/src/org/broadinstitute/sting/playground/gatk/walkers/variantoptimizer/VariantDatum.java index 7de62a898..83d3e83a5 100755 --- a/java/src/org/broadinstitute/sting/playground/gatk/walkers/variantoptimizer/VariantDatum.java +++ b/java/src/org/broadinstitute/sting/playground/gatk/walkers/variantoptimizer/VariantDatum.java @@ -36,5 +36,4 @@ public class VariantDatum { public double[] originalAnnotations; public boolean isTransition; public boolean isKnown; - public boolean isFiltered; } diff --git a/java/src/org/broadinstitute/sting/playground/gatk/walkers/variantoptimizer/VariantGaussianMixtureModel.java b/java/src/org/broadinstitute/sting/playground/gatk/walkers/variantoptimizer/VariantGaussianMixtureModel.java index f39bb7b09..b84cb5fdc 100755 --- a/java/src/org/broadinstitute/sting/playground/gatk/walkers/variantoptimizer/VariantGaussianMixtureModel.java +++ b/java/src/org/broadinstitute/sting/playground/gatk/walkers/variantoptimizer/VariantGaussianMixtureModel.java @@ -43,13 +43,13 @@ public final class VariantGaussianMixtureModel extends VariantOptimizationModel private final double MIN_PROB = 1E-30; private final double MIN_SUM_PROB = 1E-20; - private final double[][] mu; - private final double[][] sigma; + private final double[][] mu; // The means for the clusters + private final double[][] sigma; // The variances for the clusters, sigma is really sigma^2 private final double[] pCluster; private final int[] numMaxClusterKnown; private final int[] numMaxClusterNovel; private final double[] clusterTITV; - private final double[] clusterTruePositiveRate; + private final double[] clusterTruePositiveRate; // The true positive rate implied by the cluster's Ti/Tv ratio public VariantGaussianMixtureModel( VariantDataManager _dataManager, final double _targetTITV, final int _numGaussians, final int _numIterations ) { super( _dataManager, _targetTITV ); @@ -76,15 +76,18 @@ public final class VariantGaussianMixtureModel extends VariantOptimizationModel } VariantDatum[] data; - if( numNovel * 2 * 1.3 < dataManager.numVariants ) { - data = new VariantDatum[numNovel*2]; + // Grab a set of data that is all of the novel variants plus 1.5x as many known variants drawn at random + // If there are almost as many novels as known, simply use all the variants + final int numSubset = (int)Math.floor(numNovel*2.5); + if( numSubset * 1.3 < dataManager.numVariants ) { + data = new VariantDatum[numSubset]; int iii = 0; for( final VariantDatum datum : dataManager.data ) { if( !datum.isKnown ) { data[iii++] = datum; } } - while( iii < numNovel*2 ) { // grab an equal number of known variants at random + while( iii < numSubset ) { // grab an equal number of known variants at random final VariantDatum datum = dataManager.data[rand.nextInt(dataManager.numVariants)]; if( datum.isKnown ) { data[iii++] = datum; @@ -95,11 +98,11 @@ public final class VariantGaussianMixtureModel extends VariantOptimizationModel } System.out.println("Clustering with " + numNovel + " novel variants and " + (data.length - numNovel) + " known variants..."); - if( data.length == dataManager.numVariants ) { System.out.println(" (used all variants since 2*numNovel is so large compared to the full set) "); } + if( data.length == dataManager.numVariants ) { System.out.println(" (used all variants since 2.5*numNovel is so large compared to the full set) "); } createClusters( data ); // Using a subset of the data System.out.println("Printing out cluster parameters..."); printClusters( outputPrefix ); - System.out.println("Applying clusters to all the variants..."); + System.out.println("Applying clusters to all variants..."); applyClusters( dataManager.data, outputPrefix ); // Using all the data } @@ -108,7 +111,7 @@ public final class VariantGaussianMixtureModel extends VariantOptimizationModel final int numVariants = data.length; final int numAnnotations = data[0].annotations.length; - final double[][] pVarInCluster = new double[numGaussians][numVariants]; + final double[][] pVarInCluster = new double[numGaussians][numVariants]; // Probability that the variant is in that cluster = simply evaluate the multivariate Gaussian final double[] probTi = new double[numGaussians]; final double[] probTv = new double[numGaussians]; @@ -123,11 +126,7 @@ public final class VariantGaussianMixtureModel extends VariantOptimizationModel numMaxClusterKnown[kkk] = 0; numMaxClusterNovel[kkk] = 0; pCluster[kkk] = 1.0 / ((double) numGaussians); - //final double[] randMu = new double[numAnnotations]; - //for( int jjj = 0; jjj < numAnnotations; jjj++ ) { - // randMu[jjj] = data[rand.nextInt(numVariants)].annotations[jjj]; - //} - mu[kkk] = data[rand.nextInt(numVariants)].annotations; //randMu; + mu[kkk] = data[rand.nextInt(numVariants)].annotations; final double[] randSigma = new double[numAnnotations]; if( dataManager.isNormalized ) { for( int jjj = 0; jjj < numAnnotations; jjj++ ) { @@ -141,6 +140,7 @@ public final class VariantGaussianMixtureModel extends VariantOptimizationModel sigma[kkk] = randSigma; } + // The EM loop for( int ttt = 0; ttt < numIterations; ttt++ ) { ///////////////////////////////////////////////////////////////////////////////////////////////////////////////// @@ -165,6 +165,7 @@ public final class VariantGaussianMixtureModel extends VariantOptimizationModel probTi[kkk] = 0.0; probTv[kkk] = 0.0; } + // Use the cluster's probabilistic Ti/Tv ratio as the indication of the cluster's true positive rate for( int iii = 0; iii < numVariants; iii++ ) { if( data[iii].isTransition ) { // transition for( int kkk = 0; kkk < numGaussians; kkk++ ) { @@ -176,6 +177,7 @@ public final class VariantGaussianMixtureModel extends VariantOptimizationModel } } + // Calculate which cluster has the maximum probability for this variant for use as a metric of how well clustered the data is double maxProb = pVarInCluster[0][iii]; int maxCluster = 0; for( int kkk = 1; kkk < numGaussians; kkk++ ) { @@ -202,7 +204,8 @@ public final class VariantGaussianMixtureModel extends VariantOptimizationModel int clusterNumber = 0; final int numAnnotations = mu[0].length; for( int kkk = 0; kkk < numGaussians; kkk++ ) { - if( numMaxClusterKnown[kkk] + numMaxClusterNovel[kkk] >= 2000 ) { + //BUGBUG: only output the good clusters here to protect against over-fitting? + //if( numMaxClusterKnown[kkk] + numMaxClusterNovel[kkk] >= 2000 ) { outputFile.print(clusterNumber + ","); outputFile.print(numMaxClusterKnown[kkk] + ","); outputFile.print(numMaxClusterNovel[kkk] + ","); @@ -216,7 +219,7 @@ public final class VariantGaussianMixtureModel extends VariantOptimizationModel } outputFile.println(-1); clusterNumber++; - } + //} } } catch (Exception e) { e.printStackTrace(); @@ -227,11 +230,10 @@ public final class VariantGaussianMixtureModel extends VariantOptimizationModel public final void applyClusters( final VariantDatum[] data, final String outputPrefix ) { final int numVariants = data.length; - final int numAnnotations = data[0].annotations.length; final double[] pTrueVariant = new double[numVariants]; + final boolean[] markedVariant = new boolean[numVariants]; final double[] pVarInCluster = new double[numGaussians]; - final int[] clusterAssignment = new int[numVariants]; ///////////////////////////////////////////////////////////////////////////////////////////////////////////////// // Evaluate each variant using the probability of being in each cluster and that cluster's true positive rate @@ -240,50 +242,51 @@ public final class VariantGaussianMixtureModel extends VariantOptimizationModel evaluateGaussiansForSingleVariant( data[iii], pVarInCluster ); pTrueVariant[iii] = 0.0; + markedVariant[iii] = false; for( int kkk = 0; kkk < numGaussians; kkk++ ) { pTrueVariant[iii] += pVarInCluster[kkk] * clusterTruePositiveRate[kkk]; } - - double maxProb = -1.0;//pVarInCluster[0][iii]; - int maxCluster = -1; - for( int kkk = 0; kkk < numGaussians; kkk++ ) { - if( numMaxClusterKnown[kkk] + numMaxClusterNovel[kkk] >= 2000 && pVarInCluster[kkk] > maxProb ) { - maxProb = pVarInCluster[kkk]; - maxCluster = kkk; - } - } - clusterAssignment[iii] = maxCluster; } PrintStream outputFile = null; try { - outputFile = new PrintStream( outputPrefix + ".data" ); + outputFile = new PrintStream( outputPrefix + ".dat" ); } catch (Exception e) { e.printStackTrace(); System.exit(-1); } - for(int iii = 0; iii < numVariants; iii++) { - outputFile.print(pTrueVariant[iii] + ","); - outputFile.print(clusterAssignment[iii] + ","); - for(int jjj = 0; jjj < numAnnotations; jjj++ ) { - outputFile.print(data[iii].originalAnnotations[jjj] + ","); - } - outputFile.println( (data[iii].isTransition ? 1 : 0) - + "," + (data[iii].isKnown? 1 : 0) - + "," + (data[iii].isFiltered ? 1 : 0)); - } - try { - outputFile = new PrintStream( outputPrefix + ".optimize" ); - } catch (Exception e) { - e.printStackTrace(); - System.exit(-1); - } - for(int iii = 0; iii < numVariants; iii++) { - outputFile.print(String.format("%.4f",pTrueVariant[iii]) + ","); - outputFile.println( (data[iii].isTransition ? 1 : 0) - + "," + (data[iii].isKnown? 1 : 0) - + "," + (data[iii].isFiltered ? 1 : 0)); + int numKnown = 0; + int numNovel = 0; + int numKnownTi = 0; + int numKnownTv = 0; + int numNovelTi = 0; + int numNovelTv = 0; + outputFile.println("pCut,numKnown,numNovel,knownTITV,novelTITV"); + for( double pCut = 1.0; pCut >= 0.0; pCut -= 0.001 ) { + for( int iii = 0; iii < numVariants; iii++ ) { + if( !markedVariant[iii] ) { + if( pTrueVariant[iii] >= pCut ) { + markedVariant[iii] = true; + if( data[iii].isKnown ) { // known + numKnown++; + if( data[iii].isTransition ) { // transition + numKnownTi++; + } else { // transversion + numKnownTv++; + } + } else { // novel + numNovel++; + if( data[iii].isTransition ) { // transition + numNovelTi++; + } else { // transversion + numNovelTv++; + } + } + } + } + } + outputFile.println( pCut + "," + numKnown + "," + numNovel + "," + ( ((double)numKnownTi) / ((double)numKnownTv) ) + "," + ( ((double)numNovelTi) / ((double)numNovelTv) )); } } @@ -324,6 +327,7 @@ public final class VariantGaussianMixtureModel extends VariantOptimizationModel double sumProb = 0.0; for( int kkk = 0; kkk < numGaussians; kkk++ ) { + //BUGBUG: only use the good clusters here to protect against over-fitting? double sum = 0.0; for( int jjj = 0; jjj < numAnnotations; jjj++ ) { sum += ( (datum.annotations[jjj] - mu[kkk][jjj]) * (datum.annotations[jjj] - mu[kkk][jjj]) ) @@ -398,7 +402,6 @@ public final class VariantGaussianMixtureModel extends VariantOptimizationModel // Clean up extra big or extra small clusters for( int kkk = 0; kkk < numGaussians; kkk++ ) { if( pCluster[kkk] > 0.45 ) { // This is a very large cluster compared to all the others - System.out.println("Very large cluster!"); final int numToReplace = 4; final double[] savedSigma = sigma[kkk]; for( int rrr = 0; rrr < numToReplace; rrr++ ) { @@ -445,15 +448,15 @@ public final class VariantGaussianMixtureModel extends VariantOptimizationModel } } + // Replace small clusters with another random draw from the dataset for( int kkk = 0; kkk < numGaussians; kkk++ ) { if( pCluster[kkk] < 0.05 * (1.0 / ((double) numGaussians)) ) { // This is a very small cluster compared to all the others - System.out.println("Very small cluster!"); pCluster[kkk] = 1.0 / ((double) numGaussians); mu[kkk] = data[rand.nextInt(numVariants)].annotations; final double[] randSigma = new double[numAnnotations]; if( dataManager.isNormalized ) { for( int jjj = 0; jjj < numAnnotations; jjj++ ) { - randSigma[jjj] = 0.6 + 0.4 * rand.nextDouble(); // Explore a wider range + randSigma[jjj] = 0.6 + 0.4 * rand.nextDouble(); // Explore a wider range of possible sigma values since we are tossing out clusters anyway } } else { // BUGBUG: if not normalized then the varianceVector hasn't been calculated --> null pointer for( int jjj = 0; jjj < numAnnotations; jjj++ ) { diff --git a/java/src/org/broadinstitute/sting/playground/gatk/walkers/variantoptimizer/VariantNearestNeighborsModel.java b/java/src/org/broadinstitute/sting/playground/gatk/walkers/variantoptimizer/VariantNearestNeighborsModel.java index 39998c05a..f6fcd50fd 100755 --- a/java/src/org/broadinstitute/sting/playground/gatk/walkers/variantoptimizer/VariantNearestNeighborsModel.java +++ b/java/src/org/broadinstitute/sting/playground/gatk/walkers/variantoptimizer/VariantNearestNeighborsModel.java @@ -1,5 +1,7 @@ package org.broadinstitute.sting.playground.gatk.walkers.variantoptimizer; +import org.broadinstitute.sting.utils.StingException; + import java.io.PrintStream; /* @@ -44,6 +46,8 @@ public final class VariantNearestNeighborsModel extends VariantOptimizationModel public void run( final String outputPrefix ) { + throw new StingException( "Nearest Neighbors model hasn't been updated yet." ); + /* final int numVariants = dataManager.numVariants; final double[] pTrueVariant = new double[numVariants]; @@ -67,8 +71,8 @@ public final class VariantNearestNeighborsModel extends VariantOptimizationModel for(int iii = 0; iii < numVariants; iii++) { outputFile.print(String.format("%.4f",pTrueVariant[iii]) + ","); outputFile.println( (dataManager.data[iii].isTransition ? 1 : 0) - + "," + (dataManager.data[iii].isKnown? 1 : 0) - + "," + (dataManager.data[iii].isFiltered ? 1 : 0) ); + + "," + (dataManager.data[iii].isKnown? 1 : 0)); } + */ } } diff --git a/java/src/org/broadinstitute/sting/playground/gatk/walkers/variantoptimizer/VariantOptimizer.java b/java/src/org/broadinstitute/sting/playground/gatk/walkers/variantoptimizer/VariantOptimizer.java index 5b578325c..c108282f3 100755 --- a/java/src/org/broadinstitute/sting/playground/gatk/walkers/variantoptimizer/VariantOptimizer.java +++ b/java/src/org/broadinstitute/sting/playground/gatk/walkers/variantoptimizer/VariantOptimizer.java @@ -10,6 +10,8 @@ import org.broadinstitute.sting.utils.ExpandingArrayList; import org.broadinstitute.sting.utils.StingException; import org.broadinstitute.sting.utils.cmdLine.Argument; +import java.io.IOException; + /* * Copyright (c) 2010 The Broad Institute * @@ -51,17 +53,15 @@ public class VariantOptimizer extends RodWalker // Command Line Arguments ///////////////////////////// @Argument(fullName="target_titv", shortName="titv", doc="The target Ti/Tv ratio towards which to optimize. (~~2.2 for whole genome experiments)", required=true) - private double TARGET_TITV = 2.12; - //@Argument(fullName="filter_output", shortName="filter", doc="If specified the optimizer will not only update the QUAL field of the output VCF file but will also filter the variants", required=false) - //private boolean FILTER_OUTPUT = false; + private double TARGET_TITV = 2.1; @Argument(fullName="ignore_input_filters", shortName="ignoreFilters", doc="If specified the optimizer will use variants even if the FILTER column is marked in the VCF file", required=false) private boolean IGNORE_INPUT_FILTERS = false; @Argument(fullName="exclude_annotation", shortName="exclude", doc="The names of the annotations which should be excluded from the calculations", required=false) private String[] EXCLUDED_ANNOTATIONS = null; @Argument(fullName="force_annotation", shortName="force", doc="The names of the annotations which should be forced into the calculations even if they aren't present in every variant", required=false) private String[] FORCED_ANNOTATIONS = null; - @Argument(fullName="output", shortName="output", doc="The output file name", required=false) - private String OUTPUT_FILE = "optimizer.data"; + @Argument(fullName="output_prefix", shortName="output", doc="The output prefix added to output cluster file name and optimization curve pdf file name", required=false) + private String OUTPUT_PREFIX = "optimizer"; @Argument(fullName="numGaussians", shortName="nG", doc="The number of Gaussians to be used in the Gaussian Mixture model", required=false) private int NUM_GAUSSIANS = 32; @Argument(fullName="numIterations", shortName="nI", doc="The number of iterations to be performed in the Gaussian Mixture model", required=false) @@ -70,7 +70,10 @@ public class VariantOptimizer extends RodWalker private int NUM_KNN = 2000; @Argument(fullName = "optimization_model", shortName = "om", doc = "Optimization calculation model to employ -- GAUSSIAN_MIXTURE_MODEL is currently the default, while K_NEAREST_NEIGHBORS is also available for small callsets.", required = false) private VariantOptimizationModel.Model OPTIMIZATION_MODEL = VariantOptimizationModel.Model.GAUSSIAN_MIXTURE_MODEL; - + @Argument(fullName = "path_to_Rscript", shortName = "Rscript", doc = "The path to your implementation of Rscript. For Broad users this is probably /broad/tools/apps/R-2.6.0/bin/Rscript", required = false) + private String PATH_TO_RSCRIPT = "/broad/tools/apps/R-2.6.0/bin/Rscript"; + @Argument(fullName = "path_to_resources", shortName = "resources", doc = "Path to resources folder holding the Sting R scripts.", required = false) + private String PATH_TO_RESOURCES = "R/"; ///////////////////////////// // Private Member Variables @@ -87,7 +90,7 @@ public class VariantOptimizer extends RodWalker //--------------------------------------------------------------------------------------------------------------- public void initialize() { - + if( !PATH_TO_RESOURCES.endsWith("/") ) { PATH_TO_RESOURCES = PATH_TO_RESOURCES + "/"; } } //--------------------------------------------------------------------------------------------------------------- @@ -138,7 +141,7 @@ public class VariantOptimizer extends RodWalker value = ( value > 0 ? 1.0 : -1.0 ) * INFINITE_ANNOTATION_VALUE; } } catch( NumberFormatException e ) { - // do nothing, default value is 0.0, + // do nothing, default value is 0.0 } annotationValues[iii++] = value; } @@ -150,7 +153,6 @@ public class VariantOptimizer extends RodWalker variantDatum.annotations = annotationValues; variantDatum.isTransition = vc.getSNPSubstitutionType().compareTo(BaseUtils.BaseSubstitutionType.TRANSITION) == 0; variantDatum.isKnown = !vc.getAttribute("ID").equals("."); - variantDatum.isFiltered = vc.isFiltered(); // BUGBUG: This field won't be needed in the final version. mapList.add( variantDatum ); } } @@ -195,8 +197,20 @@ public class VariantOptimizer extends RodWalker default: throw new StingException("Variant Optimization Model is unrecognized. Implemented options are GAUSSIAN_MIXTURE_MODEL and K_NEAREST_NEIGHBORS"); } + + theModel.run( OUTPUT_PREFIX ); - theModel.run( OUTPUT_FILE ); + // Execute Rscript command to plot the optimization curve + // Print out the command line to make it clear to the user what is being executed and how one might modify it + final String rScriptCommandLine = PATH_TO_RSCRIPT + " " + PATH_TO_RESOURCES + "plot_OptimizationCurve.R" + " " + OUTPUT_PREFIX + ".dat" + " " + TARGET_TITV; + System.out.println( rScriptCommandLine ); + + // Execute the RScript command to plot the table of truth values + try { + Runtime.getRuntime().exec( rScriptCommandLine ); + } catch ( IOException e ) { + throw new StingException( "Unable to execute RScript command: " + rScriptCommandLine ); + } } }