Experimental support for sided annotations. Currently not more/less valuable than two-tailed testing. Future experiments are needed
git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@4864 348d0f76-0448-11de-a6fe-93d51630548a
This commit is contained in:
parent
21dc05138a
commit
32d5397c01
|
|
@ -28,8 +28,10 @@ package org.broadinstitute.sting.gatk.walkers.variantrecalibration;
|
|||
import org.apache.log4j.Logger;
|
||||
import org.broadinstitute.sting.utils.collections.ExpandingArrayList;
|
||||
import org.broadinstitute.sting.utils.exceptions.UserException;
|
||||
import org.broadinstitute.sting.utils.Utils;
|
||||
|
||||
import java.io.PrintStream;
|
||||
import java.util.Arrays;
|
||||
|
||||
/**
|
||||
* Created by IntelliJ IDEA.
|
||||
|
|
@ -38,6 +40,11 @@ import java.io.PrintStream;
|
|||
*/
|
||||
|
||||
public class VariantDataManager {
|
||||
public enum TailType {
|
||||
TWO_TAILED,
|
||||
SMALL_IS_GOOD,
|
||||
BIG_IS_GOOD
|
||||
}
|
||||
|
||||
protected final static Logger logger = Logger.getLogger(VariantDataManager.class);
|
||||
|
||||
|
|
@ -46,6 +53,7 @@ public class VariantDataManager {
|
|||
public final int numAnnotations;
|
||||
public final double[] meanVector;
|
||||
public final double[] varianceVector; // This is really the standard deviation
|
||||
public final TailType[] tailTypes;
|
||||
public boolean isNormalized;
|
||||
public final ExpandingArrayList<String> annotationKeys;
|
||||
|
||||
|
|
@ -59,6 +67,7 @@ public class VariantDataManager {
|
|||
numAnnotations = 0;
|
||||
meanVector = null;
|
||||
varianceVector = null;
|
||||
tailTypes = null;
|
||||
} else {
|
||||
numAnnotations = _annotationKeys.size();
|
||||
if( numAnnotations <= 0 ) {
|
||||
|
|
@ -66,7 +75,9 @@ public class VariantDataManager {
|
|||
}
|
||||
meanVector = new double[numAnnotations];
|
||||
varianceVector = new double[numAnnotations];
|
||||
isNormalized = false;
|
||||
tailTypes = new TailType[numAnnotations];
|
||||
Arrays.fill(tailTypes, TailType.TWO_TAILED);
|
||||
isNormalized = false;
|
||||
}
|
||||
annotationKeys = _annotationKeys;
|
||||
}
|
||||
|
|
@ -79,6 +90,8 @@ public class VariantDataManager {
|
|||
varianceVector = new double[numAnnotations];
|
||||
isNormalized = true;
|
||||
annotationKeys = new ExpandingArrayList<String>();
|
||||
tailTypes = new TailType[numAnnotations];
|
||||
Arrays.fill(tailTypes, TailType.TWO_TAILED);
|
||||
|
||||
int jjj = 0;
|
||||
for( final String line : annotationLines ) {
|
||||
|
|
@ -90,6 +103,22 @@ public class VariantDataManager {
|
|||
}
|
||||
}
|
||||
|
||||
//
|
||||
// code for dealing with TailTypes
|
||||
//
|
||||
|
||||
public void setAnnotationType(String annotation, TailType t ) {
|
||||
int i = annotationKeys.indexOf(annotation);
|
||||
if ( i == -1 ) throw new UserException.BadInput("Attempt to modify unknown annotation " + annotation + ": cluster file has " + Utils.join(",", annotationKeys));
|
||||
tailTypes[i] = t;
|
||||
logger.info("Updating annotation " + annotation + " to have tail type " + t);
|
||||
}
|
||||
|
||||
public TailType getAnnotationType(int i) {
|
||||
return tailTypes[i];
|
||||
}
|
||||
|
||||
|
||||
public void normalizeData() {
|
||||
boolean foundZeroVarianceAnnotation = false;
|
||||
for( int jjj = 0; jjj < numAnnotations; jjj++ ) {
|
||||
|
|
|
|||
|
|
@ -86,6 +86,8 @@ public final class VariantGaussianMixtureModel extends VariantOptimizationModel
|
|||
private static final Pattern ANNOTATION_PATTERN = Pattern.compile("^@!ANNOTATION.*");
|
||||
private static final Pattern CLUSTER_PATTERN = Pattern.compile("^@!CLUSTER.*");
|
||||
|
||||
private static final double MAX_SIGMA_TO_BE_IN_GAUSSIAN = 4.0;
|
||||
|
||||
public VariantGaussianMixtureModel() {
|
||||
dataManager = null;
|
||||
maxGaussians = 0;
|
||||
|
|
@ -449,7 +451,30 @@ public final class VariantGaussianMixtureModel extends VariantOptimizationModel
|
|||
|
||||
for( int jjj = 0; jjj < dataManager.numAnnotations; jjj++ ) {
|
||||
final double value = decodeAnnotation( genomeLocParser, dataManager.annotationKeys.get(jjj), vc, false );
|
||||
annotations[jjj] = (value - dataManager.meanVector[jjj]) / dataManager.varianceVector[jjj];
|
||||
|
||||
double z = (value - dataManager.meanVector[jjj]) / dataManager.varianceVector[jjj];
|
||||
// switch ( dataManager.getAnnotationType(jjj) ) {
|
||||
// case TWO_TAILED: // z is already fine
|
||||
// break;
|
||||
// case SMALL_IS_GOOD:
|
||||
// if ( z < 0.0 ) {
|
||||
//// logger.info(String.format("One-tailed test correction: %s %.2f mu=%.2f sigma=%.2f z=%.2f, newZ = 0",
|
||||
//// dataManager.annotationKeys.get(jjj), value, dataManager.meanVector[jjj], dataManager.varianceVector[jjj], z));
|
||||
// z = 0.0;
|
||||
// }
|
||||
// break;
|
||||
// case BIG_IS_GOOD:
|
||||
// if ( z > 0.0 ) {
|
||||
//// logger.info(String.format("One-tailed test correction: %s %.2f mu=%.2f sigma=%.2f z=%.2f, newZ = 0",
|
||||
//// dataManager.annotationKeys.get(jjj), value, dataManager.meanVector[jjj], dataManager.varianceVector[jjj], z));
|
||||
// z = 0.0;
|
||||
// }
|
||||
// break;
|
||||
// default:
|
||||
// throw new ReviewedStingException("Unexpected annotation tail type: " + dataManager.getAnnotationType(jjj));
|
||||
// }
|
||||
|
||||
annotations[jjj] = z;
|
||||
}
|
||||
|
||||
return evaluateGaussiansForSingleVariant( annotations, log10pVarInCluster );
|
||||
|
|
@ -783,9 +808,18 @@ public final class VariantGaussianMixtureModel extends VariantOptimizationModel
|
|||
double value = 0.0;
|
||||
for( int ppp = 0; ppp < numAnnotations; ppp++ ) {
|
||||
final double myMu = mu[kkk][ppp];
|
||||
final double myAnn = annotations[ppp];
|
||||
double myAnn = annotations[ppp];
|
||||
final double mySigma = sigmaVals[ppp][jjj];
|
||||
value += (myAnn - myMu) * mySigma;
|
||||
double z = (myAnn - myMu) * mySigma;
|
||||
|
||||
switch ( dataManager.getAnnotationType(jjj) ) {
|
||||
case TWO_TAILED: break;
|
||||
case SMALL_IS_GOOD: if( myAnn < myMu && Math.abs(z) < MAX_SIGMA_TO_BE_IN_GAUSSIAN ) { z = 0.0; } break;
|
||||
case BIG_IS_GOOD: if( myAnn > myMu && Math.abs(z) < MAX_SIGMA_TO_BE_IN_GAUSSIAN ) { z = 0.0; } break;
|
||||
default: throw new ReviewedStingException("Unexpected annotation tail type: " + dataManager.getAnnotationType(jjj));
|
||||
}
|
||||
|
||||
value += z;
|
||||
}
|
||||
final double jNorm = annotations[jjj] - mu[kkk][jjj];
|
||||
final double prod = value * jNorm;
|
||||
|
|
|
|||
|
|
@ -1,3 +1,4 @@
|
|||
|
||||
/*
|
||||
* Copyright (c) 2010 The Broad Institute
|
||||
*
|
||||
|
|
@ -121,6 +122,11 @@ public class VariantRecalibrator extends RodWalker<ExpandingArrayList<VariantDat
|
|||
@Argument(fullName = "selectionMetric", shortName = "sm", doc = "Selection metric to use", required=false)
|
||||
private SelectionMetricType SELECTION_METRIC_TYPE = SelectionMetricType.NOVEL_TITV;
|
||||
|
||||
// arguments for handling 1 sided annotations
|
||||
@Hidden
|
||||
@Argument(fullName="use_annotation", shortName="an", doc="Allows us to provide information about the sidedness of annotations. Can be either -AN QD [two-sided], -AN QD- [values less than mean are good] and -AN QD+ [values greater than the mean are good]", required=false)
|
||||
private List<String> USE_ANNOTATIONS = null;
|
||||
|
||||
public enum SelectionMetricType {
|
||||
NOVEL_TITV,
|
||||
TRUTH_SENSITIVITY
|
||||
|
|
@ -172,6 +178,25 @@ public class VariantRecalibrator extends RodWalker<ExpandingArrayList<VariantDat
|
|||
throw new UserException.BadArgumentValue("OPTIMIZATION_MODEL", "Variant Optimization Model is unrecognized. Implemented options are GAUSSIAN_MIXTURE_MODEL and K_NEAREST_NEIGHBORS" );
|
||||
}
|
||||
|
||||
|
||||
// deal with annotations
|
||||
if ( USE_ANNOTATIONS != null ) {
|
||||
for ( String annotation : USE_ANNOTATIONS ) {
|
||||
VariantDataManager.TailType type = VariantDataManager.TailType.TWO_TAILED;
|
||||
String annotationName = annotation;
|
||||
if ( annotation.endsWith("-") ) {
|
||||
type = VariantDataManager.TailType.SMALL_IS_GOOD;
|
||||
annotationName = annotation.substring(0, annotation.length()-1);
|
||||
}
|
||||
else if ( annotation.endsWith("+") ) {
|
||||
type = VariantDataManager.TailType.BIG_IS_GOOD;
|
||||
annotationName = annotation.substring(0, annotation.length()-1);
|
||||
}
|
||||
|
||||
theModel.dataManager.setAnnotationType( annotationName, type );
|
||||
}
|
||||
}
|
||||
|
||||
boolean foundDBSNP = false;
|
||||
for( ReferenceOrderedDataSource d : this.getToolkit().getRodDataSources() ) {
|
||||
if( d.getName().startsWith("input") ) {
|
||||
|
|
|
|||
|
|
@ -18,7 +18,7 @@ class recalibrate extends QScript {
|
|||
@Argument(fullName = "prefix", doc="Prefix argument", required=false)
|
||||
var prefix: String = ""
|
||||
|
||||
trait UNIVERSAL_GATK_ARGS extends CommandLineGATK { logging_level = "INFO"; jarFile = gatkJarFile; memoryLimit = Some(4) }
|
||||
trait UNIVERSAL_GATK_ARGS extends CommandLineGATK { logging_level = "INFO"; jarFile = gatkJarFile; memoryLimit = Some(3) }
|
||||
|
||||
class Target(val name: String, val reference: File, val rodName: String, val VCF: File, val intervals: Option[String], val titvTarget: Double) {
|
||||
def clusterFile = new File(name + ".clusters")
|
||||
|
|
@ -37,11 +37,20 @@ val TGPWExGdA = new Target("1000G.WEx.GdA", hg19, "b37", new File("/humgen/gsa-s
|
|||
|
||||
val targets = List(HiSeq, WEx, LowPassN60, LowPassAugust, TGPWExFH, TGPWExGdA)
|
||||
|
||||
val allTailedAnnotations = List("QD+", "SB-", "HaplotypeScore-", "HRun-")
|
||||
val someTailedAnnotations = List("QD", "SB-", "HaplotypeScore-", "HRun")
|
||||
val twoTailedAnnotations = List("QD", "SB", "HaplotypeScore", "HRun")
|
||||
|
||||
def script = {
|
||||
for (target <- targets) {
|
||||
add(new GenerateVariantClusters(target) )
|
||||
add(new VariantRecalibratorTiTv(target) )
|
||||
add(new VariantRecalibratorNRS(target) )
|
||||
add(new VariantRecalibratorTiTv(target, someTailedAnnotations, ".sb.hs.tailed") )
|
||||
add(new VariantRecalibratorNRS(target, someTailedAnnotations, ".sb.hs.tailed") )
|
||||
add(new VariantRecalibratorTiTv(target, allTailedAnnotations, ".all.tailed") )
|
||||
add(new VariantRecalibratorNRS(target, allTailedAnnotations, ".all.tailed") )
|
||||
add(new VariantRecalibratorTiTv(target, twoTailedAnnotations, ".untailed") )
|
||||
add(new VariantRecalibratorNRS(target, twoTailedAnnotations, ".untailed") )
|
||||
|
||||
}
|
||||
}
|
||||
|
||||
|
|
@ -66,7 +75,7 @@ class GenerateVariantClusters(t: Target) extends org.broadinstitute.sting.queue.
|
|||
}
|
||||
|
||||
|
||||
class VariantRecalibratorBase(t: Target) extends org.broadinstitute.sting.queue.extensions.gatk.VariantRecalibrator with UNIVERSAL_GATK_ARGS {
|
||||
class VariantRecalibratorBase(t: Target, ans: List[String]) extends org.broadinstitute.sting.queue.extensions.gatk.VariantRecalibrator with UNIVERSAL_GATK_ARGS {
|
||||
this.reference_sequence = t.reference
|
||||
this.DBSNP = new File("/humgen/gsa-hpprojects/GATK/data/dbsnp_129_" + t.rodName + ".rod")
|
||||
this.rodBind :+= RodBind("hapmap", "VCF", "/humgen/gsa-hpprojects/GATK/data/Comparisons/Validated/HapMap/3.2/genotypes_r27_nr." + t.rodName + "_fwd.vcf")
|
||||
|
|
@ -80,18 +89,20 @@ class VariantRecalibratorBase(t: Target) extends org.broadinstitute.sting.queue.
|
|||
this.priorDBSNP = Some(2.0)
|
||||
this.priorHapMap = Some(2.0)
|
||||
this.target_titv = t.titvTarget
|
||||
this.use_annotation ++= ans
|
||||
this.out = new File("/dev/null")
|
||||
}
|
||||
|
||||
class VariantRecalibratorTiTv(t: Target) extends VariantRecalibratorBase(t) {
|
||||
class VariantRecalibratorTiTv(t: Target, ans: List[String], prefix: String) extends VariantRecalibratorBase(t, ans) {
|
||||
this.tranche ++= List("0.1", "1.0", "10.0", "100.0")
|
||||
this.out = new File(t.name + ".titv.recalibrated.vcf")
|
||||
this.tranchesFile = new File(t.name + ".titv.tranches")
|
||||
//this.out = new File(t.name + ".titv.recalibrated.vcf")
|
||||
this.tranchesFile = new File(t.name + prefix + ".titv.tranches")
|
||||
}
|
||||
|
||||
class VariantRecalibratorNRS(t: Target) extends VariantRecalibratorBase(t) {
|
||||
class VariantRecalibratorNRS(t: Target, ans: List[String], prefix: String) extends VariantRecalibratorBase(t,ans) {
|
||||
this.sm = Some(org.broadinstitute.sting.gatk.walkers.variantrecalibration.VariantRecalibrator.SelectionMetricType.TRUTH_SENSITIVITY)
|
||||
this.tranche ++= List("50", "25", "10", "5", "2", "1", "0.5", "0.1")
|
||||
this.out = new File(t.name + ".ts.recalibrated.vcf")
|
||||
this.tranchesFile = new File(t.name + ".ts.tranches")
|
||||
//this.out = new File(t.name + ".ts.recalibrated.vcf")
|
||||
this.tranchesFile = new File(t.name + prefix + ".ts.tranches")
|
||||
}
|
||||
}
|
||||
|
|
|
|||
Loading…
Reference in New Issue