Variant Optimizer accepts a dbSNP rod arugment to use in determining known/novel status as opposed to using the rsID in the vcf record. VO generates plots of annotation values used in clustering broken out by knowns and novels. Useful for showing which annotations are approximately Gaussian.
git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@3332 348d0f76-0448-11de-a6fe-93d51630548a
This commit is contained in:
parent
76efa757f0
commit
33a9549896
|
|
@ -0,0 +1,19 @@
|
||||||
|
#!/broad/tools/apps/R-2.6.0/bin/Rscript
|
||||||
|
|
||||||
|
args <- commandArgs(TRUE)
|
||||||
|
verbose = TRUE
|
||||||
|
|
||||||
|
input = args[1]
|
||||||
|
annotationName = args[2]
|
||||||
|
|
||||||
|
data = read.table(input,sep=",",head=T)
|
||||||
|
|
||||||
|
outfile = paste(input, ".ClusterReport.pdf", sep="")
|
||||||
|
pdf(outfile, height=7, width=8)
|
||||||
|
|
||||||
|
maxP = max(data$knownDist, data$novelDist)
|
||||||
|
|
||||||
|
plot(data$annotationValue, data$knownDist, ylim=c(0,maxP),type="b",col="orange",lwd=2,xlab=annotationName,ylab="fraction of SNPs")
|
||||||
|
points(data$annotationValue, data$novelDist, type="b",col="blue",lwd=2)
|
||||||
|
legend('topright', c('knowns','novels'),lwd=2,col=c("orange","blue"))
|
||||||
|
dev.off()
|
||||||
|
|
@ -33,6 +33,7 @@ import org.broadinstitute.sting.gatk.datasources.simpleDataSources.ReferenceOrde
|
||||||
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
|
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
|
||||||
import org.broadinstitute.sting.gatk.refdata.VariantContextAdaptors;
|
import org.broadinstitute.sting.gatk.refdata.VariantContextAdaptors;
|
||||||
import org.broadinstitute.sting.gatk.refdata.tracks.RMDTrack;
|
import org.broadinstitute.sting.gatk.refdata.tracks.RMDTrack;
|
||||||
|
import org.broadinstitute.sting.gatk.refdata.utils.helpers.DbSNPHelper;
|
||||||
import org.broadinstitute.sting.gatk.walkers.RodWalker;
|
import org.broadinstitute.sting.gatk.walkers.RodWalker;
|
||||||
import org.broadinstitute.sting.utils.*;
|
import org.broadinstitute.sting.utils.*;
|
||||||
import org.broadinstitute.sting.utils.collections.ExpandingArrayList;
|
import org.broadinstitute.sting.utils.collections.ExpandingArrayList;
|
||||||
|
|
@ -89,6 +90,7 @@ public class ApplyVariantClustersWalker extends RodWalker<ExpandingArrayList<Var
|
||||||
private VCFWriter vcfWriter;
|
private VCFWriter vcfWriter;
|
||||||
private Set<String> ignoreInputFilterSet = null;
|
private Set<String> ignoreInputFilterSet = null;
|
||||||
private final ArrayList<String> ALLOWED_FORMAT_FIELDS = new ArrayList<String>();
|
private final ArrayList<String> ALLOWED_FORMAT_FIELDS = new ArrayList<String>();
|
||||||
|
private boolean usingDBSNP = false;
|
||||||
|
|
||||||
|
|
||||||
//---------------------------------------------------------------------------------------------------------------
|
//---------------------------------------------------------------------------------------------------------------
|
||||||
|
|
@ -127,18 +129,25 @@ public class ApplyVariantClustersWalker extends RodWalker<ExpandingArrayList<Var
|
||||||
hInfo.add(new VCFHeaderLine("source", "VariantOptimizer"));
|
hInfo.add(new VCFHeaderLine("source", "VariantOptimizer"));
|
||||||
vcfWriter = new VCFWriter( new File(OUTPUT_PREFIX + ".vcf") );
|
vcfWriter = new VCFWriter( new File(OUTPUT_PREFIX + ".vcf") );
|
||||||
final TreeSet<String> samples = new TreeSet<String>();
|
final TreeSet<String> samples = new TreeSet<String>();
|
||||||
List<ReferenceOrderedDataSource> dataSources = this.getToolkit().getRodDataSources();
|
final List<ReferenceOrderedDataSource> dataSources = this.getToolkit().getRodDataSources();
|
||||||
for ( ReferenceOrderedDataSource source : dataSources ) {
|
for ( final ReferenceOrderedDataSource source : dataSources ) {
|
||||||
RMDTrack rod = source.getReferenceOrderedData();
|
final RMDTrack rod = source.getReferenceOrderedData();
|
||||||
if ( rod.getType().equals(VCFCodec.class) ) {
|
if ( rod.getType().equals(VCFCodec.class) ) {
|
||||||
VCFReader reader = new VCFReader(rod.getFile());
|
final VCFReader reader = new VCFReader(rod.getFile());
|
||||||
Set<String> vcfSamples = reader.getHeader().getGenotypeSamples();
|
final Set<String> vcfSamples = reader.getHeader().getGenotypeSamples();
|
||||||
samples.addAll(vcfSamples);
|
samples.addAll(vcfSamples);
|
||||||
reader.close();
|
reader.close();
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
final VCFHeader vcfHeader = new VCFHeader(hInfo, samples);
|
final VCFHeader vcfHeader = new VCFHeader(hInfo, samples);
|
||||||
vcfWriter.writeHeader(vcfHeader);
|
vcfWriter.writeHeader(vcfHeader);
|
||||||
|
|
||||||
|
for( final ReferenceOrderedDataSource source : dataSources ) {
|
||||||
|
final RMDTrack rod = source.getReferenceOrderedData();
|
||||||
|
if ( rod.getName().equals(DbSNPHelper.STANDARD_DBSNP_TRACK_NAME) ) {
|
||||||
|
usingDBSNP = true;
|
||||||
|
}
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
//---------------------------------------------------------------------------------------------------------------
|
//---------------------------------------------------------------------------------------------------------------
|
||||||
|
|
@ -155,13 +164,22 @@ public class ApplyVariantClustersWalker extends RodWalker<ExpandingArrayList<Var
|
||||||
}
|
}
|
||||||
|
|
||||||
for( final VariantContext vc : tracker.getAllVariantContexts(ref, null, context.getLocation(), false, false) ) {
|
for( final VariantContext vc : tracker.getAllVariantContexts(ref, null, context.getLocation(), false, false) ) {
|
||||||
final VCFRecord vcf = VariantContextAdaptors.toVCF(vc, ref.getBase(), ALLOWED_FORMAT_FIELDS, false, false);
|
if( vc != null && !vc.getName().equals(DbSNPHelper.STANDARD_DBSNP_TRACK_NAME) && vc.isSNP() ) {
|
||||||
if( vc != null && vc.isSNP() ) {
|
final VCFRecord vcf = VariantContextAdaptors.toVCF(vc, ref.getBase(), ALLOWED_FORMAT_FIELDS, false, false);
|
||||||
if( !vc.isFiltered() || IGNORE_ALL_INPUT_FILTERS || (ignoreInputFilterSet != null && ignoreInputFilterSet.containsAll(vc.getFilters())) ) {
|
if( !vc.isFiltered() || IGNORE_ALL_INPUT_FILTERS || (ignoreInputFilterSet != null && ignoreInputFilterSet.containsAll(vc.getFilters())) ) {
|
||||||
final VariantDatum variantDatum = new VariantDatum();
|
final VariantDatum variantDatum = new VariantDatum();
|
||||||
variantDatum.isTransition = vc.getSNPSubstitutionType().compareTo(BaseUtils.BaseSubstitutionType.TRANSITION) == 0;
|
variantDatum.isTransition = vc.getSNPSubstitutionType().compareTo(BaseUtils.BaseSubstitutionType.TRANSITION) == 0;
|
||||||
variantDatum.isKnown = !vc.getAttribute("ID").equals(".");
|
boolean isKnown = !vc.getAttribute("ID").equals(".");
|
||||||
|
if(usingDBSNP) {
|
||||||
|
isKnown = false;
|
||||||
|
for( VariantContext dbsnpVC : tracker.getVariantContexts(DbSNPHelper.STANDARD_DBSNP_TRACK_NAME, null, context.getLocation(), false, false) ) {
|
||||||
|
if(dbsnpVC != null && dbsnpVC.isSNP()) {
|
||||||
|
isKnown=true;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
variantDatum.isKnown = isKnown;
|
||||||
|
|
||||||
final double pTrue = theModel.evaluateVariant( vc.getAttributes(), vc.getPhredScaledQual() );
|
final double pTrue = theModel.evaluateVariant( vc.getAttributes(), vc.getPhredScaledQual() );
|
||||||
double recalQual = 400.0 * QualityUtils.phredScaleErrorRate( Math.max(1.0 - pTrue, 0.000000001) );
|
double recalQual = 400.0 * QualityUtils.phredScaleErrorRate( Math.max(1.0 - pTrue, 0.000000001) );
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -107,6 +107,8 @@ public class VariantConcordanceROCCurveWalker extends RodWalker<ExpandingArrayLi
|
||||||
trueNegGlobal[kkk] = 0;
|
trueNegGlobal[kkk] = 0;
|
||||||
falseNegGlobal[kkk] = 0;
|
falseNegGlobal[kkk] = 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
//---------------------------------------------------------------------------------------------------------------
|
//---------------------------------------------------------------------------------------------------------------
|
||||||
|
|
|
||||||
|
|
@ -506,6 +506,61 @@ public final class VariantGaussianMixtureModel extends VariantOptimizationModel
|
||||||
*/
|
*/
|
||||||
}
|
}
|
||||||
|
|
||||||
|
public final void outputClusterReports( final String outputPrefix ) {
|
||||||
|
final double STD_STEP = 0.2;
|
||||||
|
final double MAX_STD = 4.0;
|
||||||
|
final double MIN_STD = -4.0;
|
||||||
|
final int NUM_BINS = (int)Math.floor((Math.abs(MIN_STD) + Math.abs(MAX_STD)) / STD_STEP);
|
||||||
|
final int numAnnotations = dataManager.numAnnotations;
|
||||||
|
int totalCountsKnown = 0;
|
||||||
|
int totalCountsNovel = 0;
|
||||||
|
|
||||||
|
final int counts[][][] = new int[numAnnotations][NUM_BINS][2];
|
||||||
|
for( int jjj = 0; jjj < numAnnotations; jjj++ ) {
|
||||||
|
for( int iii = 0; iii < NUM_BINS; iii++ ) {
|
||||||
|
counts[jjj][iii][0] = 0;
|
||||||
|
counts[jjj][iii][1] = 0;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
for( VariantDatum datum : dataManager.data ) {
|
||||||
|
final int isKnown = ( datum.isKnown ? 1 : 0 );
|
||||||
|
for( int jjj = 0; jjj < numAnnotations; jjj++ ) {
|
||||||
|
int histBin = (int)Math.round((datum.annotations[jjj]-MIN_STD) * (1.0 / STD_STEP));
|
||||||
|
if(histBin < 0) { histBin = 0; }
|
||||||
|
if(histBin > NUM_BINS-1) { histBin = NUM_BINS-1; }
|
||||||
|
if(histBin >= 0 && histBin <= NUM_BINS-1) {
|
||||||
|
counts[jjj][histBin][isKnown]++;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if( isKnown == 1 ) { totalCountsKnown++; }
|
||||||
|
else { totalCountsNovel++; }
|
||||||
|
}
|
||||||
|
|
||||||
|
int annIndex = 0;
|
||||||
|
for( final String annotation : dataManager.annotationKeys ) {
|
||||||
|
PrintStream outputFile;
|
||||||
|
try {
|
||||||
|
outputFile = new PrintStream( outputPrefix + "." + annotation + ".dat" );
|
||||||
|
} catch (Exception e) {
|
||||||
|
throw new StingException( "Unable to create output file: " + outputPrefix + ".dat" );
|
||||||
|
}
|
||||||
|
|
||||||
|
outputFile.println("annotationValue,knownDist,novelDist");
|
||||||
|
|
||||||
|
for( int iii = 0; iii < NUM_BINS; iii++ ) {
|
||||||
|
final double annotationValue = (((double)iii * STD_STEP)+MIN_STD) * dataManager.varianceVector[annIndex] + dataManager.meanVector[annIndex];
|
||||||
|
outputFile.println( annotationValue + "," + ( ((double)counts[annIndex][iii][1])/((double)totalCountsKnown) ) +
|
||||||
|
"," + ( ((double)counts[annIndex][iii][0])/((double)totalCountsNovel) ));
|
||||||
|
}
|
||||||
|
|
||||||
|
annIndex++;
|
||||||
|
}
|
||||||
|
|
||||||
|
// BUGBUG: next output the actual cluster on top by integrating out every other annotation
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
public final void outputOptimizationCurve( final VariantDatum[] data, final String outputPrefix, final int desiredNumVariants ) {
|
public final void outputOptimizationCurve( final VariantDatum[] data, final String outputPrefix, final int desiredNumVariants ) {
|
||||||
|
|
||||||
final int numVariants = data.length;
|
final int numVariants = data.length;
|
||||||
|
|
@ -719,7 +774,7 @@ public final class VariantGaussianMixtureModel extends VariantOptimizationModel
|
||||||
}
|
}
|
||||||
|
|
||||||
final double denom = Math.pow(2.0 * 3.14159, ((double)numAnnotations) / 2.0) * Math.pow(determinant[kkk], 0.5);
|
final double denom = Math.pow(2.0 * 3.14159, ((double)numAnnotations) / 2.0) * Math.pow(determinant[kkk], 0.5);
|
||||||
pVarInCluster[kkk] = (1.0 / ((double) numGaussians)) * (Math.exp( -0.5 * sum )) / denom;
|
pVarInCluster[kkk] = pCluster[kkk] * (Math.exp( -0.5 * sum )) / denom;
|
||||||
|
|
||||||
/*
|
/*
|
||||||
if( isUsingTiTvModel ) {
|
if( isUsingTiTvModel ) {
|
||||||
|
|
|
||||||
|
|
@ -28,14 +28,19 @@ package org.broadinstitute.sting.playground.gatk.walkers.variantoptimizer;
|
||||||
import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
|
import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
|
||||||
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
|
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
|
||||||
import org.broadinstitute.sting.gatk.contexts.variantcontext.VariantContext;
|
import org.broadinstitute.sting.gatk.contexts.variantcontext.VariantContext;
|
||||||
|
import org.broadinstitute.sting.gatk.datasources.simpleDataSources.ReferenceOrderedDataSource;
|
||||||
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
|
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
|
||||||
|
import org.broadinstitute.sting.gatk.refdata.tracks.RMDTrack;
|
||||||
|
import org.broadinstitute.sting.gatk.refdata.utils.helpers.DbSNPHelper;
|
||||||
import org.broadinstitute.sting.gatk.walkers.RodWalker;
|
import org.broadinstitute.sting.gatk.walkers.RodWalker;
|
||||||
import org.broadinstitute.sting.utils.BaseUtils;
|
import org.broadinstitute.sting.utils.BaseUtils;
|
||||||
import org.broadinstitute.sting.utils.collections.ExpandingArrayList;
|
import org.broadinstitute.sting.utils.collections.ExpandingArrayList;
|
||||||
import org.broadinstitute.sting.utils.StingException;
|
import org.broadinstitute.sting.utils.StingException;
|
||||||
import org.broadinstitute.sting.commandline.Argument;
|
import org.broadinstitute.sting.commandline.Argument;
|
||||||
|
|
||||||
|
import java.io.IOException;
|
||||||
import java.util.Arrays;
|
import java.util.Arrays;
|
||||||
|
import java.util.List;
|
||||||
import java.util.Set;
|
import java.util.Set;
|
||||||
import java.util.TreeSet;
|
import java.util.TreeSet;
|
||||||
|
|
||||||
|
|
@ -66,11 +71,16 @@ public class VariantOptimizer extends RodWalker<ExpandingArrayList<VariantDatum>
|
||||||
@Argument(fullName="clusterFile", shortName="clusterFile", doc="The output cluster file", required=true)
|
@Argument(fullName="clusterFile", shortName="clusterFile", doc="The output cluster file", required=true)
|
||||||
private String CLUSTER_FILENAME = "optimizer.cluster";
|
private String CLUSTER_FILENAME = "optimizer.cluster";
|
||||||
@Argument(fullName="numGaussians", shortName="nG", doc="The number of Gaussians to be used in the Gaussian Mixture model", required=false)
|
@Argument(fullName="numGaussians", shortName="nG", doc="The number of Gaussians to be used in the Gaussian Mixture model", required=false)
|
||||||
private int NUM_GAUSSIANS = 26;
|
private int NUM_GAUSSIANS = 7;
|
||||||
@Argument(fullName="numIterations", shortName="nI", doc="The number of iterations to be performed in the Gaussian Mixture model", required=false)
|
@Argument(fullName="numIterations", shortName="nI", doc="The number of iterations to be performed in the Gaussian Mixture model", required=false)
|
||||||
private int NUM_ITERATIONS = 10;
|
private int NUM_ITERATIONS = 10;
|
||||||
@Argument(fullName="minVarInCluster", shortName="minVar", doc="The minimum number of variants in a cluster to be considered a valid cluster. It can be used to prevent overfitting.", required=false)
|
@Argument(fullName="minVarInCluster", shortName="minVar", doc="The minimum number of variants in a cluster to be considered a valid cluster. It can be used to prevent overfitting.", required=false)
|
||||||
private int MIN_VAR_IN_CLUSTER = 1000;
|
private int MIN_VAR_IN_CLUSTER = 1000;
|
||||||
|
@Argument(fullName = "path_to_Rscript", shortName = "Rscript", doc = "The path to your implementation of Rscript. For Broad users this is probably /broad/tools/apps/R-2.6.0/bin/Rscript", required = false)
|
||||||
|
private String PATH_TO_RSCRIPT = "/broad/tools/apps/R-2.6.0/bin/Rscript";
|
||||||
|
@Argument(fullName = "path_to_resources", shortName = "resources", doc = "Path to resources folder holding the Sting R scripts.", required = false)
|
||||||
|
private String PATH_TO_RESOURCES = "R/";
|
||||||
|
|
||||||
|
|
||||||
//@Argument(fullName="knn", shortName="knn", doc="The number of nearest neighbors to be used in the k-Nearest Neighbors model", required=false)
|
//@Argument(fullName="knn", shortName="knn", doc="The number of nearest neighbors to be used in the k-Nearest Neighbors model", required=false)
|
||||||
//private int NUM_KNN = 2000;
|
//private int NUM_KNN = 2000;
|
||||||
|
|
@ -85,6 +95,7 @@ public class VariantOptimizer extends RodWalker<ExpandingArrayList<VariantDatum>
|
||||||
private int numAnnotations = 0;
|
private int numAnnotations = 0;
|
||||||
private static final double INFINITE_ANNOTATION_VALUE = 10000.0;
|
private static final double INFINITE_ANNOTATION_VALUE = 10000.0;
|
||||||
private Set<String> ignoreInputFilterSet = null;
|
private Set<String> ignoreInputFilterSet = null;
|
||||||
|
private boolean usingDBSNP = false;
|
||||||
|
|
||||||
//---------------------------------------------------------------------------------------------------------------
|
//---------------------------------------------------------------------------------------------------------------
|
||||||
//
|
//
|
||||||
|
|
@ -96,6 +107,13 @@ public class VariantOptimizer extends RodWalker<ExpandingArrayList<VariantDatum>
|
||||||
if( IGNORE_INPUT_FILTERS != null ) {
|
if( IGNORE_INPUT_FILTERS != null ) {
|
||||||
ignoreInputFilterSet = new TreeSet<String>(Arrays.asList(IGNORE_INPUT_FILTERS));
|
ignoreInputFilterSet = new TreeSet<String>(Arrays.asList(IGNORE_INPUT_FILTERS));
|
||||||
}
|
}
|
||||||
|
final List<ReferenceOrderedDataSource> dataSources = this.getToolkit().getRodDataSources();
|
||||||
|
for( final ReferenceOrderedDataSource source : dataSources ) {
|
||||||
|
final RMDTrack rod = source.getReferenceOrderedData();
|
||||||
|
if ( rod.getName().equals(DbSNPHelper.STANDARD_DBSNP_TRACK_NAME) ) {
|
||||||
|
usingDBSNP = true;
|
||||||
|
}
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
//---------------------------------------------------------------------------------------------------------------
|
//---------------------------------------------------------------------------------------------------------------
|
||||||
|
|
@ -116,7 +134,7 @@ public class VariantOptimizer extends RodWalker<ExpandingArrayList<VariantDatum>
|
||||||
|
|
||||||
for( final VariantContext vc : tracker.getAllVariantContexts(ref, null, context.getLocation(), false, false) ) {
|
for( final VariantContext vc : tracker.getAllVariantContexts(ref, null, context.getLocation(), false, false) ) {
|
||||||
|
|
||||||
if( vc != null && vc.isSNP() ) {
|
if( vc != null && !vc.getName().equals(DbSNPHelper.STANDARD_DBSNP_TRACK_NAME) && vc.isSNP() ) {
|
||||||
if( !vc.isFiltered() || IGNORE_ALL_INPUT_FILTERS || (ignoreInputFilterSet != null && ignoreInputFilterSet.containsAll(vc.getFilters())) ) {
|
if( !vc.isFiltered() || IGNORE_ALL_INPUT_FILTERS || (ignoreInputFilterSet != null && ignoreInputFilterSet.containsAll(vc.getFilters())) ) {
|
||||||
if( firstVariant ) { // This is the first variant encountered so set up the list of annotations
|
if( firstVariant ) { // This is the first variant encountered so set up the list of annotations
|
||||||
annotationKeys.addAll( vc.getAttributes().keySet() );
|
annotationKeys.addAll( vc.getAttributes().keySet() );
|
||||||
|
|
@ -162,7 +180,16 @@ public class VariantOptimizer extends RodWalker<ExpandingArrayList<VariantDatum>
|
||||||
final VariantDatum variantDatum = new VariantDatum();
|
final VariantDatum variantDatum = new VariantDatum();
|
||||||
variantDatum.annotations = annotationValues;
|
variantDatum.annotations = annotationValues;
|
||||||
variantDatum.isTransition = vc.getSNPSubstitutionType().compareTo(BaseUtils.BaseSubstitutionType.TRANSITION) == 0;
|
variantDatum.isTransition = vc.getSNPSubstitutionType().compareTo(BaseUtils.BaseSubstitutionType.TRANSITION) == 0;
|
||||||
variantDatum.isKnown = !vc.getAttribute("ID").equals(".");
|
boolean isKnown = !vc.getAttribute("ID").equals(".");
|
||||||
|
if(usingDBSNP) {
|
||||||
|
isKnown = false;
|
||||||
|
for( VariantContext dbsnpVC : tracker.getVariantContexts(DbSNPHelper.STANDARD_DBSNP_TRACK_NAME, null, context.getLocation(), false, false) ) {
|
||||||
|
if(dbsnpVC != null && dbsnpVC.isSNP()) {
|
||||||
|
isKnown=true;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
variantDatum.isKnown = isKnown;
|
||||||
|
|
||||||
mapList.add( variantDatum );
|
mapList.add( variantDatum );
|
||||||
}
|
}
|
||||||
|
|
@ -198,7 +225,7 @@ public class VariantOptimizer extends RodWalker<ExpandingArrayList<VariantDatum>
|
||||||
dataManager.normalizeData(); // Each data point is now [ (x - mean) / standard deviation ]
|
dataManager.normalizeData(); // Each data point is now [ (x - mean) / standard deviation ]
|
||||||
|
|
||||||
// Create either the Gaussian Mixture Model or the Nearest Neighbors model and run it
|
// Create either the Gaussian Mixture Model or the Nearest Neighbors model and run it
|
||||||
VariantOptimizationModel theModel;
|
VariantGaussianMixtureModel theModel;
|
||||||
switch (OPTIMIZATION_MODEL) {
|
switch (OPTIMIZATION_MODEL) {
|
||||||
case GAUSSIAN_MIXTURE_MODEL:
|
case GAUSSIAN_MIXTURE_MODEL:
|
||||||
theModel = new VariantGaussianMixtureModel( dataManager, TARGET_TITV, NUM_GAUSSIANS, NUM_ITERATIONS, MIN_VAR_IN_CLUSTER );
|
theModel = new VariantGaussianMixtureModel( dataManager, TARGET_TITV, NUM_GAUSSIANS, NUM_ITERATIONS, MIN_VAR_IN_CLUSTER );
|
||||||
|
|
@ -211,6 +238,27 @@ public class VariantOptimizer extends RodWalker<ExpandingArrayList<VariantDatum>
|
||||||
}
|
}
|
||||||
|
|
||||||
theModel.run( CLUSTER_FILENAME );
|
theModel.run( CLUSTER_FILENAME );
|
||||||
|
|
||||||
|
theModel.outputClusterReports( CLUSTER_FILENAME );
|
||||||
|
|
||||||
|
|
||||||
|
for( final String annotation : annotationKeys ) {
|
||||||
|
// Execute Rscript command to plot the optimization curve
|
||||||
|
// Print out the command line to make it clear to the user what is being executed and how one might modify it
|
||||||
|
final String rScriptCommandLine = PATH_TO_RSCRIPT + " " + PATH_TO_RESOURCES + "plot_ClusterReport.R" + " " + CLUSTER_FILENAME + "." + annotation + ".dat " + annotation;
|
||||||
|
System.out.println( rScriptCommandLine );
|
||||||
|
|
||||||
|
// Execute the RScript command to plot the table of truth values
|
||||||
|
try {
|
||||||
|
final Process p = Runtime.getRuntime().exec( rScriptCommandLine );
|
||||||
|
p.waitFor();
|
||||||
|
} catch (InterruptedException e) {
|
||||||
|
e.printStackTrace();
|
||||||
|
System.exit(-1);
|
||||||
|
} catch ( IOException e ) {
|
||||||
|
throw new StingException( "Unable to execute RScript command: " + rScriptCommandLine );
|
||||||
|
}
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue