diff --git a/java/src/org/broadinstitute/sting/gatk/walkers/variantrecalibration/VariantDataManager.java b/java/src/org/broadinstitute/sting/gatk/walkers/variantrecalibration/VariantDataManager.java index 42ad75d17..936e68b84 100755 --- a/java/src/org/broadinstitute/sting/gatk/walkers/variantrecalibration/VariantDataManager.java +++ b/java/src/org/broadinstitute/sting/gatk/walkers/variantrecalibration/VariantDataManager.java @@ -28,8 +28,10 @@ package org.broadinstitute.sting.gatk.walkers.variantrecalibration; import org.apache.log4j.Logger; import org.broadinstitute.sting.utils.collections.ExpandingArrayList; import org.broadinstitute.sting.utils.exceptions.UserException; +import org.broadinstitute.sting.utils.Utils; import java.io.PrintStream; +import java.util.Arrays; /** * Created by IntelliJ IDEA. @@ -38,6 +40,11 @@ import java.io.PrintStream; */ public class VariantDataManager { + public enum TailType { + TWO_TAILED, + SMALL_IS_GOOD, + BIG_IS_GOOD + } protected final static Logger logger = Logger.getLogger(VariantDataManager.class); @@ -46,6 +53,7 @@ public class VariantDataManager { public final int numAnnotations; public final double[] meanVector; public final double[] varianceVector; // This is really the standard deviation + public final TailType[] tailTypes; public boolean isNormalized; public final ExpandingArrayList annotationKeys; @@ -59,6 +67,7 @@ public class VariantDataManager { numAnnotations = 0; meanVector = null; varianceVector = null; + tailTypes = null; } else { numAnnotations = _annotationKeys.size(); if( numAnnotations <= 0 ) { @@ -66,7 +75,9 @@ public class VariantDataManager { } meanVector = new double[numAnnotations]; varianceVector = new double[numAnnotations]; - isNormalized = false; + tailTypes = new TailType[numAnnotations]; + Arrays.fill(tailTypes, TailType.TWO_TAILED); + isNormalized = false; } annotationKeys = _annotationKeys; } @@ -79,6 +90,8 @@ public class VariantDataManager { varianceVector = new double[numAnnotations]; isNormalized = true; annotationKeys = new ExpandingArrayList(); + tailTypes = new TailType[numAnnotations]; + Arrays.fill(tailTypes, TailType.TWO_TAILED); int jjj = 0; for( final String line : annotationLines ) { @@ -90,6 +103,22 @@ public class VariantDataManager { } } + // + // code for dealing with TailTypes + // + + public void setAnnotationType(String annotation, TailType t ) { + int i = annotationKeys.indexOf(annotation); + if ( i == -1 ) throw new UserException.BadInput("Attempt to modify unknown annotation " + annotation + ": cluster file has " + Utils.join(",", annotationKeys)); + tailTypes[i] = t; + logger.info("Updating annotation " + annotation + " to have tail type " + t); + } + + public TailType getAnnotationType(int i) { + return tailTypes[i]; + } + + public void normalizeData() { boolean foundZeroVarianceAnnotation = false; for( int jjj = 0; jjj < numAnnotations; jjj++ ) { diff --git a/java/src/org/broadinstitute/sting/gatk/walkers/variantrecalibration/VariantGaussianMixtureModel.java b/java/src/org/broadinstitute/sting/gatk/walkers/variantrecalibration/VariantGaussianMixtureModel.java index 3375fbbdf..52e16fd16 100755 --- a/java/src/org/broadinstitute/sting/gatk/walkers/variantrecalibration/VariantGaussianMixtureModel.java +++ b/java/src/org/broadinstitute/sting/gatk/walkers/variantrecalibration/VariantGaussianMixtureModel.java @@ -86,6 +86,8 @@ public final class VariantGaussianMixtureModel extends VariantOptimizationModel private static final Pattern ANNOTATION_PATTERN = Pattern.compile("^@!ANNOTATION.*"); private static final Pattern CLUSTER_PATTERN = Pattern.compile("^@!CLUSTER.*"); + private static final double MAX_SIGMA_TO_BE_IN_GAUSSIAN = 4.0; + public VariantGaussianMixtureModel() { dataManager = null; maxGaussians = 0; @@ -449,7 +451,30 @@ public final class VariantGaussianMixtureModel extends VariantOptimizationModel for( int jjj = 0; jjj < dataManager.numAnnotations; jjj++ ) { final double value = decodeAnnotation( genomeLocParser, dataManager.annotationKeys.get(jjj), vc, false ); - annotations[jjj] = (value - dataManager.meanVector[jjj]) / dataManager.varianceVector[jjj]; + + double z = (value - dataManager.meanVector[jjj]) / dataManager.varianceVector[jjj]; +// switch ( dataManager.getAnnotationType(jjj) ) { +// case TWO_TAILED: // z is already fine +// break; +// case SMALL_IS_GOOD: +// if ( z < 0.0 ) { +//// logger.info(String.format("One-tailed test correction: %s %.2f mu=%.2f sigma=%.2f z=%.2f, newZ = 0", +//// dataManager.annotationKeys.get(jjj), value, dataManager.meanVector[jjj], dataManager.varianceVector[jjj], z)); +// z = 0.0; +// } +// break; +// case BIG_IS_GOOD: +// if ( z > 0.0 ) { +//// logger.info(String.format("One-tailed test correction: %s %.2f mu=%.2f sigma=%.2f z=%.2f, newZ = 0", +//// dataManager.annotationKeys.get(jjj), value, dataManager.meanVector[jjj], dataManager.varianceVector[jjj], z)); +// z = 0.0; +// } +// break; +// default: +// throw new ReviewedStingException("Unexpected annotation tail type: " + dataManager.getAnnotationType(jjj)); +// } + + annotations[jjj] = z; } return evaluateGaussiansForSingleVariant( annotations, log10pVarInCluster ); @@ -783,9 +808,18 @@ public final class VariantGaussianMixtureModel extends VariantOptimizationModel double value = 0.0; for( int ppp = 0; ppp < numAnnotations; ppp++ ) { final double myMu = mu[kkk][ppp]; - final double myAnn = annotations[ppp]; + double myAnn = annotations[ppp]; final double mySigma = sigmaVals[ppp][jjj]; - value += (myAnn - myMu) * mySigma; + double z = (myAnn - myMu) * mySigma; + + switch ( dataManager.getAnnotationType(jjj) ) { + case TWO_TAILED: break; + case SMALL_IS_GOOD: if( myAnn < myMu && Math.abs(z) < MAX_SIGMA_TO_BE_IN_GAUSSIAN ) { z = 0.0; } break; + case BIG_IS_GOOD: if( myAnn > myMu && Math.abs(z) < MAX_SIGMA_TO_BE_IN_GAUSSIAN ) { z = 0.0; } break; + default: throw new ReviewedStingException("Unexpected annotation tail type: " + dataManager.getAnnotationType(jjj)); + } + + value += z; } final double jNorm = annotations[jjj] - mu[kkk][jjj]; final double prod = value * jNorm; diff --git a/java/src/org/broadinstitute/sting/gatk/walkers/variantrecalibration/VariantRecalibrator.java b/java/src/org/broadinstitute/sting/gatk/walkers/variantrecalibration/VariantRecalibrator.java index dcd82c7de..7b26ea24e 100755 --- a/java/src/org/broadinstitute/sting/gatk/walkers/variantrecalibration/VariantRecalibrator.java +++ b/java/src/org/broadinstitute/sting/gatk/walkers/variantrecalibration/VariantRecalibrator.java @@ -1,3 +1,4 @@ + /* * Copyright (c) 2010 The Broad Institute * @@ -121,6 +122,11 @@ public class VariantRecalibrator extends RodWalker USE_ANNOTATIONS = null; + public enum SelectionMetricType { NOVEL_TITV, TRUTH_SENSITIVITY @@ -172,6 +178,25 @@ public class VariantRecalibrator extends RodWalker