From 3534f412c9b723ab232879f41a1c0599a78bf337 Mon Sep 17 00:00:00 2001 From: rpoplin Date: Mon, 13 Jun 2011 14:45:28 +0000 Subject: [PATCH] Better error message for the case of input variants found in ApplyRecalibration that were never seen during VariantRecalibrator. git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@5979 348d0f76-0448-11de-a6fe-93d51630548a --- .../ApplyRecalibration.java | 47 ++++++++++--------- .../VariantDataManager.java | 2 +- .../variantrecalibration/VariantDatum.java | 6 +-- .../VariantRecalibrator.java | 4 +- 4 files changed, 31 insertions(+), 28 deletions(-) diff --git a/java/src/org/broadinstitute/sting/gatk/walkers/variantrecalibration/ApplyRecalibration.java b/java/src/org/broadinstitute/sting/gatk/walkers/variantrecalibration/ApplyRecalibration.java index 6b7c8ed43..48deabc14 100755 --- a/java/src/org/broadinstitute/sting/gatk/walkers/variantrecalibration/ApplyRecalibration.java +++ b/java/src/org/broadinstitute/sting/gatk/walkers/variantrecalibration/ApplyRecalibration.java @@ -170,35 +170,36 @@ public class ApplyRecalibration extends RodWalker { for( VariantContext vc : tracker.getVariantContexts(ref, inputNames, null, context.getLocation(), true, false) ) { if( vc != null ) { - if( VariantRecalibrator.checkRecalibrationMode( vc, MODE ) ) { + if( VariantRecalibrator.checkRecalibrationMode( vc, MODE ) && (vc.isNotFiltered() || ignoreInputFilterSet.containsAll(vc.getFilters())) ) { String filterString = null; final Map attrs = new HashMap(vc.getAttributes()); - final Double lod = (Double) lodMap.get( ref.getLocus().getContig(), ref.getLocus().getStart(), ref.getLocus().getStop() ); - if( vc.isNotFiltered() || ignoreInputFilterSet.containsAll(vc.getFilters()) ) { - attrs.put(VariantRecalibrator.VQS_LOD_KEY, String.format("%.4f", lod)); - for( int i = tranches.size() - 1; i >= 0; i-- ) { - final Tranche tranche = tranches.get(i); - if( lod >= tranche.minVQSLod ) { - if( i == tranches.size() - 1 ) { - filterString = VCFConstants.PASSES_FILTERS_v4; - } else { - filterString = tranche.name; - } - break; + final Double lod = (Double) lodMap.get( vc.getChr(), vc.getStart(), vc.getEnd() ); + if( lod == null ) { + throw new UserException("Encountered input variant which isn't found in the input recal file. Please make sure VariantRecalibrator and ApplyRecalibration were run on the same set of input variants. First seen at: " + vc ); + } + + attrs.put(VariantRecalibrator.VQS_LOD_KEY, String.format("%.4f", lod)); + for( int i = tranches.size() - 1; i >= 0; i-- ) { + final Tranche tranche = tranches.get(i); + if( lod >= tranche.minVQSLod ) { + if( i == tranches.size() - 1 ) { + filterString = VCFConstants.PASSES_FILTERS_v4; + } else { + filterString = tranche.name; } - } - - if( filterString == null ) { - filterString = tranches.get(0).name+"+"; - } - - if( !filterString.equals(VCFConstants.PASSES_FILTERS_v4) ) { - final Set filters = new HashSet(); - filters.add(filterString); - vc = VariantContext.modifyFilters(vc, filters); + break; } } + if( filterString == null ) { + filterString = tranches.get(0).name+"+"; + } + + if( !filterString.equals(VCFConstants.PASSES_FILTERS_v4) ) { + final Set filters = new HashSet(); + filters.add(filterString); + vc = VariantContext.modifyFilters(vc, filters); + } vcfWriter.add( VariantContext.modifyPErrorFiltersAndAttributes(vc, vc.getNegLog10PError(), vc.getFilters(), attrs), ref.getBase() ); } else { // valid VC but not compatible with this mode, so just emit the variant untouched vcfWriter.add( vc, ref.getBase() ); diff --git a/java/src/org/broadinstitute/sting/gatk/walkers/variantrecalibration/VariantDataManager.java b/java/src/org/broadinstitute/sting/gatk/walkers/variantrecalibration/VariantDataManager.java index 0131b1bc9..3c80dcf05 100755 --- a/java/src/org/broadinstitute/sting/gatk/walkers/variantrecalibration/VariantDataManager.java +++ b/java/src/org/broadinstitute/sting/gatk/walkers/variantrecalibration/VariantDataManager.java @@ -253,7 +253,7 @@ public class VariantDataManager { public void writeOutRecalibrationTable( final PrintStream RECAL_FILE ) { for( final VariantDatum datum : data ) { - RECAL_FILE.println(String.format("%s,%d,%d,%.4f", datum.pos.getContig(), datum.pos.getStart(), datum.pos.getStop(), datum.lod)); + RECAL_FILE.println(String.format("%s,%d,%d,%.4f", datum.contig, datum.start, datum.stop, datum.lod)); } } } diff --git a/java/src/org/broadinstitute/sting/gatk/walkers/variantrecalibration/VariantDatum.java b/java/src/org/broadinstitute/sting/gatk/walkers/variantrecalibration/VariantDatum.java index 08e44f6cf..ac875b645 100755 --- a/java/src/org/broadinstitute/sting/gatk/walkers/variantrecalibration/VariantDatum.java +++ b/java/src/org/broadinstitute/sting/gatk/walkers/variantrecalibration/VariantDatum.java @@ -1,7 +1,5 @@ package org.broadinstitute.sting.gatk.walkers.variantrecalibration; -import org.broadinstitute.sting.utils.GenomeLoc; - /** * Created by IntelliJ IDEA. * User: rpoplin @@ -22,8 +20,10 @@ public class VariantDatum implements Comparable { public double originalQual; public double prior; public int consensusCount; - public GenomeLoc pos; public int usedForTraining; + public String contig; + public int start; + public int stop; public MultivariateGaussian assignment; // used in K-means implementation public int compareTo( final VariantDatum other ) { diff --git a/java/src/org/broadinstitute/sting/gatk/walkers/variantrecalibration/VariantRecalibrator.java b/java/src/org/broadinstitute/sting/gatk/walkers/variantrecalibration/VariantRecalibrator.java index f8cfbf17f..aee8101d0 100755 --- a/java/src/org/broadinstitute/sting/gatk/walkers/variantrecalibration/VariantRecalibrator.java +++ b/java/src/org/broadinstitute/sting/gatk/walkers/variantrecalibration/VariantRecalibrator.java @@ -168,7 +168,9 @@ public class VariantRecalibrator extends RodWalker