Variant optimizer now outputs VCF files via ApplyVariantClustersWalker. Documentation to be added to the wiki. It is ready to be used by other people but only with great caution.
git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@3028 348d0f76-0448-11de-a6fe-93d51630548a
This commit is contained in:
parent
d9398dc347
commit
58a31bab6a
|
|
@ -11,13 +11,14 @@ data = read.table(input,sep=",",head=T)
|
|||
maxVars = max(data$numKnown, data$numNovel)
|
||||
maxTITV = max(data$knownTITV[is.finite(data$knownTITV) & data$numKnown>2000], data$novelTITV[is.finite(data$novelTITV) & data$numNovel > 2000], targetTITV)
|
||||
minTITV = min(data$knownTITV[length(data$knownTITV)], data$novelTITV[length(data$novelTITV)], targetTITV)
|
||||
maxPCut = max(data$pCut[data$numKnown>0 | data$numNovel>0])
|
||||
|
||||
|
||||
outfile = paste(input, ".optimizationCurve.pdf", sep="")
|
||||
pdf(outfile, height=7, width=8)
|
||||
|
||||
par(mar=c(4,4,1,4),cex=1.3)
|
||||
plot(data$pCut, data$knownTITV, axes=F,xlab="Keep variants with p(true) > X",ylab="",ylim=c(minTITV,maxTITV),col="Blue",pch=20)
|
||||
plot(data$pCut, data$knownTITV, axes=F,xlab="Keep variants with QUAL >= X",ylab="",ylim=c(minTITV,maxTITV),xlim=c(0,maxPCut),col="Blue",pch=20)
|
||||
points(data$pCut, data$novelTITV,,col="DarkBlue",pch=20)
|
||||
abline(h=targetTITV,lty=3,col="Blue")
|
||||
axis(side=2,col="DarkBlue")
|
||||
|
|
@ -25,7 +26,7 @@ axis(side=1)
|
|||
mtext("Ti/Tv Ratio", side=2, line=2, col="blue",cex=1.4)
|
||||
legend("left", c("Known Ti/Tv","Novel Ti/Tv"), col=c("Blue","DarkBlue"), pch=c(20,20),cex=0.7)
|
||||
par(new=T)
|
||||
plot(data$pCut, data$numKnown, axes=F,xlab="",ylab="",ylim=c(0,maxVars),col="Green",pch=20)
|
||||
plot(data$pCut, data$numKnown, axes=F,xlab="",ylab="",ylim=c(0,maxVars),xlim=c(0,maxPCut),col="Green",pch=20)
|
||||
points(data$pCut, data$numNovel,col="DarkGreen",pch=20)
|
||||
axis(side=4,col="DarkGreen")
|
||||
mtext("Number of Variants", side=4, line=2, col="DarkGreen",cex=1.4)
|
||||
|
|
|
|||
|
|
@ -3,10 +3,19 @@ package org.broadinstitute.sting.playground.gatk.walkers.variantoptimizer;
|
|||
import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
|
||||
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
|
||||
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
|
||||
import org.broadinstitute.sting.gatk.refdata.ReferenceOrderedDatum;
|
||||
import org.broadinstitute.sting.gatk.refdata.RodVCF;
|
||||
import org.broadinstitute.sting.gatk.walkers.RodWalker;
|
||||
import org.broadinstitute.sting.utils.ExpandingArrayList;
|
||||
import org.broadinstitute.sting.utils.StingException;
|
||||
import org.broadinstitute.sting.utils.*;
|
||||
import org.broadinstitute.sting.utils.cmdLine.Argument;
|
||||
import org.broadinstitute.sting.utils.genotype.vcf.*;
|
||||
|
||||
import java.io.File;
|
||||
import java.io.IOException;
|
||||
import java.util.HashMap;
|
||||
import java.util.HashSet;
|
||||
import java.util.Set;
|
||||
import java.util.TreeSet;
|
||||
|
||||
/*
|
||||
* Copyright (c) 2010 The Broad Institute
|
||||
|
|
@ -47,8 +56,8 @@ public class ApplyVariantClustersWalker extends RodWalker<ExpandingArrayList<Var
|
|||
/////////////////////////////
|
||||
// Command Line Arguments
|
||||
/////////////////////////////
|
||||
//@Argument(fullName="target_titv", shortName="titv", doc="The target Ti/Tv ratio towards which to optimize. (~~2.2 for whole genome experiments)", required=false)
|
||||
//private double TARGET_TITV = 2.1;
|
||||
@Argument(fullName="target_titv", shortName="titv", doc="The target Ti/Tv ratio towards which to optimize. (~~2.2 for whole genome experiments)", required=true)
|
||||
private double TARGET_TITV = 2.1;
|
||||
@Argument(fullName="desired_num_variants", shortName="dV", doc="The desired number of variants to keep in a theoretically filtered set", required=false)
|
||||
private int DESIRED_NUM_VARIANTS = 0;
|
||||
@Argument(fullName="ignore_input_filters", shortName="ignoreFilters", doc="If specified the optimizer will use variants even if the FILTER column is marked in the VCF file", required=false)
|
||||
|
|
@ -67,7 +76,8 @@ public class ApplyVariantClustersWalker extends RodWalker<ExpandingArrayList<Var
|
|||
/////////////////////////////
|
||||
// Private Member Variables
|
||||
/////////////////////////////
|
||||
private final ExpandingArrayList<String> annotationKeys = new ExpandingArrayList<String>();
|
||||
private VariantGaussianMixtureModel theModel = null;
|
||||
private VCFWriter vcfWriter;
|
||||
|
||||
//---------------------------------------------------------------------------------------------------------------
|
||||
//
|
||||
|
|
@ -78,7 +88,6 @@ public class ApplyVariantClustersWalker extends RodWalker<ExpandingArrayList<Var
|
|||
public void initialize() {
|
||||
if( !PATH_TO_RESOURCES.endsWith("/") ) { PATH_TO_RESOURCES = PATH_TO_RESOURCES + "/"; }
|
||||
|
||||
VariantOptimizationModel theModel = null;
|
||||
switch (OPTIMIZATION_MODEL) {
|
||||
case GAUSSIAN_MIXTURE_MODEL:
|
||||
theModel = new VariantGaussianMixtureModel( CLUSTER_FILENAME );
|
||||
|
|
@ -89,6 +98,18 @@ public class ApplyVariantClustersWalker extends RodWalker<ExpandingArrayList<Var
|
|||
default:
|
||||
throw new StingException( "Variant Optimization Model is unrecognized. Implemented options are GAUSSIAN_MIXTURE_MODEL and K_NEAREST_NEIGHBORS" );
|
||||
}
|
||||
|
||||
|
||||
// setup the header fields
|
||||
Set<VCFHeaderLine> hInfo = new HashSet<VCFHeaderLine>();
|
||||
hInfo.addAll(VCFUtils.getHeaderFields(getToolkit()));
|
||||
hInfo.add(new VCFInfoHeaderLine("OQ", 1, VCFInfoHeaderLine.INFO_TYPE.Float, "The original variant quality score"));
|
||||
hInfo.add(new VCFHeaderLine("source", "VariantOptimizer"));
|
||||
vcfWriter = new VCFWriter( new File(OUTPUT_PREFIX + ".vcf") );
|
||||
TreeSet<String> samples = new TreeSet<String>();
|
||||
SampleUtils.getUniquifiedSamplesFromRods(getToolkit(), samples, new HashMap<Pair<String, String>, String>());
|
||||
VCFHeader vcfHeader = new VCFHeader(hInfo, samples);
|
||||
vcfWriter.writeHeader(vcfHeader);
|
||||
}
|
||||
|
||||
//---------------------------------------------------------------------------------------------------------------
|
||||
|
|
@ -99,6 +120,35 @@ public class ApplyVariantClustersWalker extends RodWalker<ExpandingArrayList<Var
|
|||
|
||||
public ExpandingArrayList<VariantDatum> map( RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context ) {
|
||||
final ExpandingArrayList<VariantDatum> mapList = new ExpandingArrayList<VariantDatum>();
|
||||
|
||||
if( tracker == null ) { // For some reason RodWalkers get map calls with null trackers
|
||||
return mapList;
|
||||
}
|
||||
|
||||
|
||||
for( ReferenceOrderedDatum rod : tracker.getAllRods() ) {
|
||||
if( rod != null && rod instanceof RodVCF ) {
|
||||
final RodVCF rodVCF = ((RodVCF) rod);
|
||||
if( rodVCF.isSNP() && (IGNORE_INPUT_FILTERS || !rodVCF.isFiltered()) ) {
|
||||
final double pTrue = theModel.evaluateVariant( rodVCF.getInfoValues(), rodVCF.getQual() );
|
||||
final double recalQual = QualityUtils.phredScaleErrorRate( Math.max( 1.0 - pTrue, 0.000000001) );
|
||||
rodVCF.mCurrentRecord.addInfoField("OQ", ((Double)rodVCF.getQual()).toString() );
|
||||
rodVCF.mCurrentRecord.setQual( recalQual );
|
||||
vcfWriter.addRecord( rodVCF.mCurrentRecord );
|
||||
|
||||
VariantDatum variantDatum = new VariantDatum();
|
||||
variantDatum.isTransition = BaseUtils.isTransition((byte)rodVCF.getAlternativeBaseForSNP(), (byte)rodVCF.getReferenceForSNP()); //vc.getSNPSubstitutionType().compareTo(BaseUtils.BaseSubstitutionType.TRANSITION) == 0;
|
||||
variantDatum.isKnown = !rodVCF.isNovel(); //!vc.getAttribute("ID").equals(".");
|
||||
variantDatum.qual = recalQual;
|
||||
mapList.add( variantDatum );
|
||||
|
||||
} else { // not a SNP or is filtered so just dump it out to the VCF file
|
||||
vcfWriter.addRecord( rodVCF.mCurrentRecord );
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
return mapList;
|
||||
}
|
||||
|
||||
|
|
@ -119,7 +169,22 @@ public class ApplyVariantClustersWalker extends RodWalker<ExpandingArrayList<Var
|
|||
|
||||
public void onTraversalDone( ExpandingArrayList<VariantDatum> reduceSum ) {
|
||||
|
||||
final VariantDataManager dataManager = new VariantDataManager( reduceSum, annotationKeys );
|
||||
final VariantDataManager dataManager = new VariantDataManager( reduceSum, theModel.dataManager.annotationKeys );
|
||||
reduceSum.clear(); // Don't need this ever again, clean up some memory
|
||||
|
||||
theModel.outputOptimizationCurve( dataManager.data, OUTPUT_PREFIX, DESIRED_NUM_VARIANTS );
|
||||
|
||||
// Execute Rscript command to plot the optimization curve
|
||||
// Print out the command line to make it clear to the user what is being executed and how one might modify it
|
||||
final String rScriptCommandLine = PATH_TO_RSCRIPT + " " + PATH_TO_RESOURCES + "plot_OptimizationCurve.R" + " " + OUTPUT_PREFIX + ".dat" + " " + TARGET_TITV;
|
||||
System.out.println( rScriptCommandLine );
|
||||
|
||||
// Execute the RScript command to plot the table of truth values
|
||||
try {
|
||||
Runtime.getRuntime().exec( rScriptCommandLine );
|
||||
} catch ( IOException e ) {
|
||||
throw new StingException( "Unable to execute RScript command: " + rScriptCommandLine );
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -4,8 +4,6 @@ import org.broadinstitute.sting.utils.ExpandingArrayList;
|
|||
import org.broadinstitute.sting.utils.StingException;
|
||||
|
||||
import java.io.PrintStream;
|
||||
import java.util.ArrayList;
|
||||
import java.util.regex.Pattern;
|
||||
|
||||
/*
|
||||
* Copyright (c) 2010 The Broad Institute
|
||||
|
|
@ -43,9 +41,9 @@ public class VariantDataManager {
|
|||
public final int numVariants;
|
||||
public final int numAnnotations;
|
||||
public final double[] meanVector;
|
||||
public final double[] varianceVector;
|
||||
public final double[] varianceVector; // This is really the standard deviation
|
||||
public boolean isNormalized;
|
||||
private final ExpandingArrayList<String> annotationKeys;
|
||||
public final ExpandingArrayList<String> annotationKeys;
|
||||
|
||||
public VariantDataManager( final ExpandingArrayList<VariantDatum> dataList, final ExpandingArrayList<String> _annotationKeys ) {
|
||||
numVariants = dataList.size();
|
||||
|
|
@ -53,7 +51,7 @@ public class VariantDataManager {
|
|||
if( numVariants <= 0 ) {
|
||||
throw new StingException( "There are zero variants! (or possibly a problem with integer overflow)" );
|
||||
}
|
||||
numAnnotations = data[0].annotations.length;
|
||||
numAnnotations = _annotationKeys.size() + 1; // +1 for QUAL
|
||||
if( numAnnotations <= 0 ) {
|
||||
throw new StingException( "There are zero annotations! (or possibly a problem with integer overflow)" );
|
||||
}
|
||||
|
|
@ -91,7 +89,7 @@ public class VariantDataManager {
|
|||
throw new StingException("Zero variance is a problem: standard deviation = " + theSTD);
|
||||
}
|
||||
meanVector[jjj] = theMean;
|
||||
varianceVector[jjj] = theSTD * theSTD;
|
||||
varianceVector[jjj] = theSTD;
|
||||
for( int iii = 0; iii < numVariants; iii++ ) {
|
||||
data[iii].annotations[jjj] = ( data[iii].annotations[jjj] - theMean ) / theSTD;
|
||||
}
|
||||
|
|
|
|||
|
|
@ -35,5 +35,5 @@ public class VariantDatum {
|
|||
public double[] annotations;
|
||||
public boolean isTransition;
|
||||
public boolean isKnown;
|
||||
public double pTrue;
|
||||
public double qual;
|
||||
}
|
||||
|
|
|
|||
|
|
@ -7,7 +7,7 @@ import org.broadinstitute.sting.utils.xReadLines;
|
|||
import java.io.File;
|
||||
import java.io.FileNotFoundException;
|
||||
import java.io.PrintStream;
|
||||
import java.util.ArrayList;
|
||||
import java.util.Map;
|
||||
import java.util.Random;
|
||||
import java.util.regex.Pattern;
|
||||
|
||||
|
|
@ -44,7 +44,7 @@ import java.util.regex.Pattern;
|
|||
|
||||
public final class VariantGaussianMixtureModel extends VariantOptimizationModel implements VariantClusteringModel {
|
||||
|
||||
private final VariantDataManager dataManager;
|
||||
public final VariantDataManager dataManager;
|
||||
private final int numGaussians;
|
||||
private final int numIterations;
|
||||
private final long RANDOM_SEED = 91801305;
|
||||
|
|
@ -59,9 +59,9 @@ public final class VariantGaussianMixtureModel extends VariantOptimizationModel
|
|||
private final int[] numMaxClusterNovel;
|
||||
private final double[] clusterTITV;
|
||||
private final double[] clusterTruePositiveRate; // The true positive rate implied by the cluster's Ti/Tv ratio
|
||||
//private final int desiredNumVariants;
|
||||
private final int minVarInCluster;
|
||||
|
||||
private static final double INFINITE_ANNOTATION_VALUE = 10000.0;
|
||||
private static final Pattern ANNOTATION_PATTERN = Pattern.compile("^@!ANNOTATION.*");
|
||||
private static final Pattern CLUSTER_PATTERN = Pattern.compile("^@!CLUSTER.*");
|
||||
|
||||
|
|
@ -78,7 +78,6 @@ public final class VariantGaussianMixtureModel extends VariantOptimizationModel
|
|||
numMaxClusterNovel = new int[numGaussians];
|
||||
clusterTITV = new double[numGaussians];
|
||||
clusterTruePositiveRate = new double[numGaussians];
|
||||
//desiredNumVariants = _desiredNumVariants;
|
||||
minVarInCluster = _minVarInCluster;
|
||||
}
|
||||
|
||||
|
|
@ -107,14 +106,25 @@ public final class VariantGaussianMixtureModel extends VariantOptimizationModel
|
|||
numMaxClusterKnown = null;
|
||||
numMaxClusterNovel = null;
|
||||
clusterTITV = null;
|
||||
clusterTruePositiveRate = null;
|
||||
//desiredNumVariants = _desiredNumVariants;
|
||||
minVarInCluster = 0;
|
||||
|
||||
numGaussians = clusterLines.size();
|
||||
mu = new double[numGaussians][];
|
||||
sigma = new double[numGaussians][];
|
||||
mu = new double[numGaussians][dataManager.numAnnotations];
|
||||
sigma = new double[numGaussians][dataManager.numAnnotations];
|
||||
clusterTruePositiveRate = new double[numGaussians];
|
||||
|
||||
int kkk = 0;
|
||||
for( String line : clusterLines ) {
|
||||
String[] vals = line.split(",");
|
||||
clusterTruePositiveRate[kkk] = Double.parseDouble(vals[4]);
|
||||
for( int jjj = 0; jjj < dataManager.numAnnotations; jjj++ ) {
|
||||
mu[kkk][jjj] = Double.parseDouble(vals[5+jjj]);
|
||||
sigma[kkk][jjj] = Double.parseDouble(vals[5+dataManager.numAnnotations+jjj]);
|
||||
}
|
||||
kkk++;
|
||||
}
|
||||
|
||||
System.out.println("Found " + numGaussians + " clusters and using " + dataManager.numAnnotations + " annotations: " + dataManager.annotationKeys);
|
||||
}
|
||||
|
||||
public final void run( final String clusterFileName ) {
|
||||
|
|
@ -278,27 +288,45 @@ public final class VariantGaussianMixtureModel extends VariantOptimizationModel
|
|||
}
|
||||
}
|
||||
|
||||
/*
|
||||
public final void applyClusters( final VariantDatum[] data, final String outputPrefix ) {
|
||||
public final double evaluateVariant( final Map<String,String> annotationMap, final double qualityScore ) {
|
||||
final double[] pVarInCluster = new double[numGaussians];
|
||||
final double[] annotations = new double[dataManager.numAnnotations];
|
||||
|
||||
for( int jjj = 0; jjj < dataManager.numAnnotations; jjj++ ) {
|
||||
double value = 0.0;
|
||||
if( dataManager.annotationKeys.get(jjj).equals("QUAL") ) {
|
||||
value = qualityScore;
|
||||
} else {
|
||||
try {
|
||||
final String stringValue = annotationMap.get( dataManager.annotationKeys.get(jjj) );
|
||||
if( stringValue != null ) {
|
||||
value = Double.parseDouble( stringValue );
|
||||
if( Double.isInfinite(value) ) {
|
||||
value = ( value > 0 ? 1.0 : -1.0 ) * INFINITE_ANNOTATION_VALUE;
|
||||
}
|
||||
}
|
||||
} catch( NumberFormatException e ) {
|
||||
// do nothing, default value is 0.0
|
||||
}
|
||||
}
|
||||
|
||||
annotations[jjj] = (value - dataManager.meanVector[jjj]) / dataManager.varianceVector[jjj];
|
||||
}
|
||||
|
||||
evaluateGaussiansForSingleVariant( annotations, pVarInCluster );
|
||||
|
||||
double sum = 0;
|
||||
for( int kkk = 0; kkk < numGaussians; kkk++ ) {
|
||||
sum += pVarInCluster[kkk] * clusterTruePositiveRate[kkk];
|
||||
}
|
||||
|
||||
return sum;
|
||||
}
|
||||
|
||||
public final void outputOptimizationCurve( final VariantDatum[] data, final String outputPrefix, final int desiredNumVariants ) {
|
||||
|
||||
final int numVariants = data.length;
|
||||
|
||||
final double[] pTrueVariant = new double[numVariants];
|
||||
final boolean[] markedVariant = new boolean[numVariants];
|
||||
final double[] pVarInCluster = new double[numGaussians];
|
||||
|
||||
/////////////////////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
// Evaluate each variant using the probability of being in each cluster and that cluster's true positive rate
|
||||
/////////////////////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
for( int iii = 0; iii < numVariants; iii++ ) {
|
||||
evaluateGaussiansForSingleVariant( data[iii], pVarInCluster );
|
||||
|
||||
pTrueVariant[iii] = 0.0;
|
||||
markedVariant[iii] = false;
|
||||
for( int kkk = 0; kkk < numGaussians; kkk++ ) {
|
||||
pTrueVariant[iii] += pVarInCluster[kkk] * clusterTruePositiveRate[kkk];
|
||||
}
|
||||
}
|
||||
|
||||
PrintStream outputFile = null;
|
||||
try {
|
||||
|
|
@ -316,10 +344,10 @@ public final class VariantGaussianMixtureModel extends VariantOptimizationModel
|
|||
int numNovelTv = 0;
|
||||
boolean foundDesiredNumVariants = false;
|
||||
outputFile.println("pCut,numKnown,numNovel,knownTITV,novelTITV");
|
||||
for( double pCut = 1.0; pCut >= 0.0; pCut -= 0.001 ) {
|
||||
for( double pCut = 100.0; pCut >= 0.0; pCut -= 0.1 ) {
|
||||
for( int iii = 0; iii < numVariants; iii++ ) {
|
||||
if( !markedVariant[iii] ) {
|
||||
if( pTrueVariant[iii] >= pCut ) {
|
||||
if( data[iii].qual >= pCut ) {
|
||||
markedVariant[iii] = true;
|
||||
if( data[iii].isKnown ) { // known
|
||||
numKnown++;
|
||||
|
|
@ -340,7 +368,7 @@ public final class VariantGaussianMixtureModel extends VariantOptimizationModel
|
|||
}
|
||||
}
|
||||
if( desiredNumVariants != 0 && !foundDesiredNumVariants && (numKnown + numNovel) >= desiredNumVariants ) {
|
||||
System.out.println( "Keeping variants with p(true) >= " + String.format("%.3f",pCut) + " results in a filtered set with: " );
|
||||
System.out.println( "Keeping variants with p(true) >= " + String.format("%.1f",pCut) + " results in a filtered set with: " );
|
||||
System.out.println("\t" + numKnown + " known variants");
|
||||
System.out.println("\t" + numNovel + " novel variants, (dbSNP rate = " + String.format("%.2f",((double) numKnown * 100.0) / ((double) numKnown + numNovel) ) + "%)");
|
||||
System.out.println("\t" + String.format("%.4f known Ti/Tv ratio", ((double)numKnownTi) / ((double)numKnownTv)));
|
||||
|
|
@ -353,7 +381,6 @@ public final class VariantGaussianMixtureModel extends VariantOptimizationModel
|
|||
( numNovelTi ==0 || numNovelTv == 0 ? "NaN" : ( ((double)numNovelTi) / ((double)numNovelTv) )));
|
||||
}
|
||||
}
|
||||
*/
|
||||
|
||||
|
||||
private void evaluateGaussians( final VariantDatum[] data, final double[][] pVarInCluster ) {
|
||||
|
|
@ -386,18 +413,20 @@ public final class VariantGaussianMixtureModel extends VariantOptimizationModel
|
|||
}
|
||||
|
||||
|
||||
private void evaluateGaussiansForSingleVariant( final VariantDatum datum, final double[] pVarInCluster ) {
|
||||
private void evaluateGaussiansForSingleVariant( final double[] annotations, final double[] pVarInCluster ) {
|
||||
|
||||
final int numAnnotations = datum.annotations.length;
|
||||
final int numAnnotations = annotations.length;
|
||||
|
||||
double sumProb = 0.0;
|
||||
for( int kkk = 0; kkk < numGaussians; kkk++ ) {
|
||||
double sum = 0.0;
|
||||
for( int jjj = 0; jjj < numAnnotations; jjj++ ) {
|
||||
sum += ( (datum.annotations[jjj] - mu[kkk][jjj]) * (datum.annotations[jjj] - mu[kkk][jjj]) )
|
||||
sum += ( (annotations[jjj] - mu[kkk][jjj]) * (annotations[jjj] - mu[kkk][jjj]) )
|
||||
/ sigma[kkk][jjj];
|
||||
}
|
||||
pVarInCluster[kkk] = pCluster[kkk] * Math.exp( -0.5 * sum );
|
||||
//BUGBUG: removed pCluster[kkk]*, for the second pass
|
||||
//pVarInCluster[kkk] = pCluster[kkk] * Math.exp( -0.5 * sum );
|
||||
pVarInCluster[kkk] = Math.exp( -0.5 * sum );
|
||||
|
||||
if( pVarInCluster[kkk] < MIN_PROB) { // Very small numbers are a very big problem
|
||||
pVarInCluster[kkk] = MIN_PROB;
|
||||
|
|
|
|||
|
|
@ -53,8 +53,6 @@ public class VariantOptimizer extends RodWalker<ExpandingArrayList<VariantDatum>
|
|||
/////////////////////////////
|
||||
@Argument(fullName="target_titv", shortName="titv", doc="The target Ti/Tv ratio towards which to optimize. (~~2.2 for whole genome experiments)", required=true)
|
||||
private double TARGET_TITV = 2.1;
|
||||
//@Argument(fullName="desired_num_variants", shortName="dV", doc="The desired number of variants to keep in a theoretically filtered set", required=false)
|
||||
//private int DESIRED_NUM_VARIANTS = 0;
|
||||
@Argument(fullName="ignore_input_filters", shortName="ignoreFilters", doc="If specified the optimizer will use variants even if the FILTER column is marked in the VCF file", required=false)
|
||||
private boolean IGNORE_INPUT_FILTERS = false;
|
||||
@Argument(fullName="exclude_annotation", shortName="exclude", doc="The names of the annotations which should be excluded from the calculations", required=false)
|
||||
|
|
@ -74,10 +72,6 @@ public class VariantOptimizer extends RodWalker<ExpandingArrayList<VariantDatum>
|
|||
//private int NUM_KNN = 2000;
|
||||
//@Argument(fullName = "optimization_model", shortName = "om", doc = "Optimization calculation model to employ -- GAUSSIAN_MIXTURE_MODEL is currently the default, while K_NEAREST_NEIGHBORS is also available for small callsets.", required = false)
|
||||
private VariantOptimizationModel.Model OPTIMIZATION_MODEL = VariantOptimizationModel.Model.GAUSSIAN_MIXTURE_MODEL;
|
||||
//@Argument(fullName = "path_to_Rscript", shortName = "Rscript", doc = "The path to your implementation of Rscript. For Broad users this is probably /broad/tools/apps/R-2.6.0/bin/Rscript", required = false)
|
||||
//private String PATH_TO_RSCRIPT = "/broad/tools/apps/R-2.6.0/bin/Rscript";
|
||||
//@Argument(fullName = "path_to_resources", shortName = "resources", doc = "Path to resources folder holding the Sting R scripts.", required = false)
|
||||
//private String PATH_TO_RESOURCES = "R/";
|
||||
|
||||
/////////////////////////////
|
||||
// Private Member Variables
|
||||
|
|
@ -115,7 +109,7 @@ public class VariantOptimizer extends RodWalker<ExpandingArrayList<VariantDatum>
|
|||
|
||||
for( final VariantContext vc : tracker.getAllVariantContexts(null, context.getLocation(), false, false) ) {
|
||||
|
||||
if( vc != null && vc.isSNP() && (IGNORE_INPUT_FILTERS || !vc.isFiltered()) ) {
|
||||
if( vc != null && vc.isSNP() && (IGNORE_INPUT_FILTERS || !vc.isFiltered()) ) {
|
||||
if( firstVariant ) { // This is the first variant encountered so set up the list of annotations
|
||||
annotationKeys.addAll( vc.getAttributes().keySet() );
|
||||
if( annotationKeys.contains("ID") ) { annotationKeys.remove("ID"); } // ID field is added to the vc's INFO field?
|
||||
|
|
@ -203,21 +197,6 @@ public class VariantOptimizer extends RodWalker<ExpandingArrayList<VariantDatum>
|
|||
}
|
||||
|
||||
theModel.run( CLUSTER_FILENAME );
|
||||
|
||||
|
||||
// Functionality moved to different walker
|
||||
|
||||
// Execute Rscript command to plot the optimization curve
|
||||
// Print out the command line to make it clear to the user what is being executed and how one might modify it
|
||||
//final String rScriptCommandLine = PATH_TO_RSCRIPT + " " + PATH_TO_RESOURCES + "plot_OptimizationCurve.R" + " " + OUTPUT_PREFIX + ".dat" + " " + TARGET_TITV;
|
||||
//System.out.println( rScriptCommandLine );
|
||||
|
||||
// Execute the RScript command to plot the table of truth values
|
||||
//try {
|
||||
// Runtime.getRuntime().exec( rScriptCommandLine );
|
||||
//} catch ( IOException e ) {
|
||||
// throw new StingException( "Unable to execute RScript command: " + rScriptCommandLine );
|
||||
//}
|
||||
}
|
||||
|
||||
}
|
||||
|
|
|
|||
|
|
@ -56,7 +56,7 @@ public class QualityUtils {
|
|||
}
|
||||
|
||||
/**
|
||||
* Convert a probability to a quality score. Note, this is capped at Q40.
|
||||
* Convert a probability to a quality score. Note, this is capped at a quality score which is determined by _eps_.
|
||||
*
|
||||
* @param prob a probability (0.0-1.0)
|
||||
* @param eps min probabilty allowed (0.0-1.0)
|
||||
|
|
|
|||
Loading…
Reference in New Issue