From 00453919d29da2b06faa387659ead11a2603eb18 Mon Sep 17 00:00:00 2001 From: rpoplin Date: Tue, 18 Jan 2011 20:48:19 +0000 Subject: [PATCH] VQSR now only uses the valid polymorphic sites for training and truth sensitivity calculations. Any number of tracks whose ROD binding begins with the name truth can be used as truth sensitivity tracks. git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@5012 348d0f76-0448-11de-a6fe-93d51630548a --- .../GenerateVariantClustersWalker.java | 8 ++--- .../VariantRecalibrator.java | 33 ++++++++++++------- .../sting/queue/pipeline/VariantCalling.scala | 2 +- 3 files changed, 27 insertions(+), 16 deletions(-) diff --git a/java/src/org/broadinstitute/sting/gatk/walkers/variantrecalibration/GenerateVariantClustersWalker.java b/java/src/org/broadinstitute/sting/gatk/walkers/variantrecalibration/GenerateVariantClustersWalker.java index 83aec2b93..ffd058280 100755 --- a/java/src/org/broadinstitute/sting/gatk/walkers/variantrecalibration/GenerateVariantClustersWalker.java +++ b/java/src/org/broadinstitute/sting/gatk/walkers/variantrecalibration/GenerateVariantClustersWalker.java @@ -196,13 +196,13 @@ public class GenerateVariantClustersWalker extends RodWalker, ExpandingArrayList> { public static final String VQS_LOD_KEY = "VQSLOD"; - public static final double SMALLEST_LOG10_PVAR = -1e6; ///////////////////////////// // Inputs @@ -78,7 +77,7 @@ public class VariantRecalibrator extends RodWalker ignoreInputFilterSet = null; private Set inputNames = new HashSet(); + private Set truthNames = new HashSet(); private NestedHashMap priorCache = new NestedHashMap(); private boolean trustACField = false; private PrintStream tranchesStream = null; @@ -202,6 +202,9 @@ public class VariantRecalibrator extends RodWalker hInfo = new HashSet(); final TreeSet samples = new TreeSet(); @@ -253,9 +255,18 @@ public class VariantRecalibrator extends RodWalker vcsTruth = tracker.getVariantContexts(ref, "truth", null, context.getLocation(), false, true); - final VariantContext vcTruth = ( vcsTruth.size() != 0 ? vcsTruth.iterator().next() : null ); - nTruthSites += vcTruth != null && vcTruth.isVariant() ? 1 : 0; + final Collection vcsTruth = tracker.getVariantContexts(ref, truthNames, null, context.getLocation(), false, true); + boolean isAtTruthSite = false; + for( final VariantContext vcTruth : vcsTruth ) { + if( vcTruth != null && vcTruth.isVariant() && !vcTruth.isFiltered() && (!vcTruth.hasGenotypes() || vcTruth.isPolymorphic()) ) { + isAtTruthSite = true; + break; + } + } + nTruthSites += isAtTruthSite ? 1 : 0; + + //final VariantContext vcTruth = ( vcsTruth.size() != 0 ? vcsTruth.iterator().next() : null ); + //nTruthSites += vcTruth != null && vcTruth.isVariant() ? 1 : 0; for( final VariantContext vc : tracker.getVariantContexts(ref, inputNames, null, context.getLocation(), false, false) ) { if( vc != null && vc.isSNP() ) { @@ -271,13 +282,13 @@ public class VariantRecalibrator extends RodWalker attrs = new HashMap(vc.getAttributes()); diff --git a/scala/src/org/broadinstitute/sting/queue/pipeline/VariantCalling.scala b/scala/src/org/broadinstitute/sting/queue/pipeline/VariantCalling.scala index 2505b62d8..650e5190e 100755 --- a/scala/src/org/broadinstitute/sting/queue/pipeline/VariantCalling.scala +++ b/scala/src/org/broadinstitute/sting/queue/pipeline/VariantCalling.scala @@ -178,7 +178,7 @@ class VariantCalling(attribs: Pipeline,gatkJar: File) { vr.analysisName = "VariantQualityRecalibration" vr.rodBind :+= new RodBind("input","VCF",raw_vcf) vr.cluster_file = cluster - vr.target_titv = target_titv + vr.target_titv = Some(target_titv) vr.out = out_vcf vr.tranches_file = out_tranches vr.tranche :+= "0.1"