From ea793d8e27712804c26c91a128d7a0508eb5e277 Mon Sep 17 00:00:00 2001 From: Eric Banks Date: Tue, 17 Apr 2012 21:21:29 -0400 Subject: [PATCH 1/6] Khalid pressured me into adding an integration test that makes sure we don't fail on reads with adjacent I and D events. --- .../genotyper/UnifiedGenotyperIntegrationTest.java | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/public/java/test/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyperIntegrationTest.java b/public/java/test/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyperIntegrationTest.java index 015f11048..4d00f6113 100755 --- a/public/java/test/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyperIntegrationTest.java +++ b/public/java/test/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyperIntegrationTest.java @@ -66,6 +66,14 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest { executeTest("test Multiple SNP alleles", spec); } + @Test + public void testBadRead() { + WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec( + "-T UnifiedGenotyper -R " + b37KGReference + " -nosl -NO_HEADER -glm BOTH -I " + validationDataLocation + "badRead.test.bam -o %s -L 1:22753424-22753464", 1, + Arrays.asList("7678827a2ee21870a41c09d28d26b996")); + executeTest("test bad read", spec); + } + // -------------------------------------------------------------------------------------------------------------- // // testing compressed output From 6d03bce0d3a04eaa5ad328861d2707041e227132 Mon Sep 17 00:00:00 2001 From: Eric Banks Date: Tue, 17 Apr 2012 22:38:18 -0400 Subject: [PATCH 2/6] Important refactoring of the VQSR recal file format: we now use a VCF instead of a CSV file. The most important reason for this change is that we no longer need to read the entire recal file into memory up front in ApplyRecalibration. For 1000G calling this was prohibitive in terms of memory requirements. Now we go through the rod system and pull in just the records we need at a given position. As an added bonus, once BCF2 is live we can drastically cut down the sizes of these recal files (which can grow large for whole genome calling). --- .../ApplyRecalibration.java | 47 +++++++------------ .../VariantDataManager.java | 38 +++++++++++---- .../variantrecalibration/VariantDatum.java | 6 +-- .../VariantRecalibrator.java | 9 ++-- 4 files changed, 54 insertions(+), 46 deletions(-) diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/variantrecalibration/ApplyRecalibration.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/variantrecalibration/ApplyRecalibration.java index 6b36f4e1b..fd0997802 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/variantrecalibration/ApplyRecalibration.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/variantrecalibration/ApplyRecalibration.java @@ -37,14 +37,11 @@ import org.broadinstitute.sting.gatk.walkers.PartitionType; import org.broadinstitute.sting.gatk.walkers.RodWalker; import org.broadinstitute.sting.utils.SampleUtils; import org.broadinstitute.sting.utils.codecs.vcf.*; -import org.broadinstitute.sting.utils.collections.NestedHashMap; import org.broadinstitute.sting.utils.exceptions.UserException; -import org.broadinstitute.sting.utils.text.XReadLines; import org.broadinstitute.sting.utils.variantcontext.VariantContext; import org.broadinstitute.sting.utils.variantcontext.VariantContextBuilder; import java.io.File; -import java.io.FileNotFoundException; import java.util.*; /** @@ -98,9 +95,9 @@ public class ApplyRecalibration extends RodWalker { @Input(fullName="input", shortName = "input", doc="The raw input variants to be recalibrated", required=true) public List> input; @Input(fullName="recal_file", shortName="recalFile", doc="The input recal file used by ApplyRecalibration", required=true) - private File RECAL_FILE; + protected RodBinding recal; @Input(fullName="tranches_file", shortName="tranchesFile", doc="The input tranches file describing where to cut the data", required=true) - private File TRANCHES_FILE; + protected File TRANCHES_FILE; ///////////////////////////// // Outputs @@ -123,8 +120,6 @@ public class ApplyRecalibration extends RodWalker { ///////////////////////////// final private List tranches = new ArrayList(); final private Set inputNames = new HashSet(); - final private NestedHashMap lodMap = new NestedHashMap(); - final private NestedHashMap annotationMap = new NestedHashMap(); final private Set ignoreInputFilterSet = new TreeSet(); //--------------------------------------------------------------------------------------------------------------- @@ -174,20 +169,6 @@ public class ApplyRecalibration extends RodWalker { final VCFHeader vcfHeader = new VCFHeader(hInfo, samples); vcfWriter.writeHeader(vcfHeader); - - try { - logger.info("Reading in recalibration table..."); - for ( final String line : new XReadLines( RECAL_FILE ) ) { - final String[] vals = line.split(","); - lodMap.put( Double.parseDouble(vals[3]), vals[0], Integer.parseInt(vals[1]), Integer.parseInt(vals[2]) ); // value comes before the keys - annotationMap.put( vals[4], vals[0], Integer.parseInt(vals[1]), Integer.parseInt(vals[2]) ); // value comes before the keys - } - } catch ( FileNotFoundException e ) { - throw new UserException.CouldNotReadInputFile(RECAL_FILE, e); - } catch ( Exception e ) { - throw new UserException.MalformedFile(RECAL_FILE, "Could not parse LOD and annotation information in input recal file. File is somehow malformed."); - } - } //--------------------------------------------------------------------------------------------------------------- @@ -202,21 +183,27 @@ public class ApplyRecalibration extends RodWalker { return 1; } - for( VariantContext vc : tracker.getValues(input, context.getLocation()) ) { + for( final VariantContext vc : tracker.getValues(input, context.getLocation()) ) { if( vc != null ) { - if( VariantRecalibrator.checkRecalibrationMode( vc, MODE ) && (vc.isNotFiltered() || ignoreInputFilterSet.containsAll(vc.getFilters())) ) { - VariantContextBuilder builder = new VariantContextBuilder(vc); - String filterString = null; - final Double lod = (Double) lodMap.get( vc.getChr(), vc.getStart(), vc.getEnd() ); - final String worstAnnotation = (String) annotationMap.get( vc.getChr(), vc.getStart(), vc.getEnd() ); - if( lod == null ) { + if( VariantRecalibrator.checkRecalibrationMode( vc, MODE ) && (vc.isNotFiltered() || ignoreInputFilterSet.containsAll(vc.getFilters())) ) { + + final VariantContext recalDatum = tracker.getFirstValue(recal, context.getLocation()); + if( recalDatum == null ) { throw new UserException("Encountered input variant which isn't found in the input recal file. Please make sure VariantRecalibrator and ApplyRecalibration were run on the same set of input variants. First seen at: " + vc ); } + final double lod = recalDatum.getAttributeAsDouble(VariantRecalibrator.VQS_LOD_KEY, Double.NEGATIVE_INFINITY); + if( lod == Double.NEGATIVE_INFINITY ) { + throw new UserException("Encountered a malformed record in the input recal file. There is no lod for the record at: " + vc ); + } + + VariantContextBuilder builder = new VariantContextBuilder(vc); + String filterString = null; + // Annotate the new record with its VQSLOD and the worst performing annotation - builder.attribute(VariantRecalibrator.VQS_LOD_KEY, String.format("%.4f", lod)); - builder.attribute(VariantRecalibrator.CULPRIT_KEY, worstAnnotation); + builder.attribute(VariantRecalibrator.VQS_LOD_KEY, lod); + builder.attribute(VariantRecalibrator.CULPRIT_KEY, recalDatum.getAttribute(VariantRecalibrator.CULPRIT_KEY)); for( int i = tranches.size() - 1; i >= 0; i-- ) { final Tranche tranche = tranches.get(i); diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/variantrecalibration/VariantDataManager.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/variantrecalibration/VariantDataManager.java index a957bfd85..6f82d0885 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/variantrecalibration/VariantDataManager.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/variantrecalibration/VariantDataManager.java @@ -30,14 +30,16 @@ import org.broadinstitute.sting.gatk.GenomeAnalysisEngine; import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; import org.broadinstitute.sting.utils.GenomeLoc; import org.broadinstitute.sting.utils.MathUtils; +import org.broadinstitute.sting.utils.codecs.vcf.VCFConstants; +import org.broadinstitute.sting.utils.codecs.vcf.VCFHeader; +import org.broadinstitute.sting.utils.codecs.vcf.VCFWriter; import org.broadinstitute.sting.utils.collections.ExpandingArrayList; import org.broadinstitute.sting.utils.exceptions.UserException; +import org.broadinstitute.sting.utils.variantcontext.Allele; import org.broadinstitute.sting.utils.variantcontext.VariantContext; +import org.broadinstitute.sting.utils.variantcontext.VariantContextBuilder; -import java.io.PrintStream; -import java.util.ArrayList; -import java.util.Collections; -import java.util.List; +import java.util.*; /** * Created by IntelliJ IDEA. @@ -285,11 +287,31 @@ public class VariantDataManager { (TRUST_ALL_POLYMORPHIC || !trainVC.hasGenotypes() || trainVC.isPolymorphicInSamples()); } - public void writeOutRecalibrationTable( final PrintStream RECAL_FILE ) { + public void writeOutRecalibrationTable( final VCFWriter recalWriter ) { + // we need to sort in coordinate order in order to produce a valid VCF + Collections.sort( data, new Comparator() { + public int compare(VariantDatum vd1, VariantDatum vd2) { + return vd1.loc.compareTo(vd2.loc); + }} ); + + // create dummy alleles to be used + final List alleles = new ArrayList(2); + alleles.add(Allele.create("N", true)); + alleles.add(Allele.create("", false)); + + final VCFHeader vcfHeader = new VCFHeader( null, Collections.emptySet() ); + recalWriter.writeHeader(vcfHeader); + + // to be used for the important INFO tags + final HashMap attributes = new HashMap(3); + for( final VariantDatum datum : data ) { - RECAL_FILE.println(String.format("%s,%d,%d,%.4f,%s", - datum.contig, datum.start, datum.stop, datum.lod, - (datum.worstAnnotation != -1 ? annotationKeys.get(datum.worstAnnotation) : "NULL"))); + attributes.put(VCFConstants.END_KEY, datum.loc.getStop()); + attributes.put(VariantRecalibrator.VQS_LOD_KEY, String.format("%.4f", datum.lod)); + attributes.put(VariantRecalibrator.CULPRIT_KEY, (datum.worstAnnotation != -1 ? annotationKeys.get(datum.worstAnnotation) : "NULL")); + + VariantContextBuilder builder = new VariantContextBuilder("VQSR", datum.loc.getContig(), datum.loc.getStart(), datum.loc.getStop(), alleles).attributes(attributes); + recalWriter.add(builder.make()); } } } diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/variantrecalibration/VariantDatum.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/variantrecalibration/VariantDatum.java index eb9e98fcb..32350f0fa 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/variantrecalibration/VariantDatum.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/variantrecalibration/VariantDatum.java @@ -25,6 +25,8 @@ package org.broadinstitute.sting.gatk.walkers.variantrecalibration; +import org.broadinstitute.sting.utils.GenomeLoc; + /** * Created by IntelliJ IDEA. * User: rpoplin @@ -46,9 +48,7 @@ public class VariantDatum implements Comparable { public double originalQual; public double prior; public int consensusCount; - public String contig; - public int start; - public int stop; + public GenomeLoc loc; public int worstAnnotation; public MultivariateGaussian assignment; // used in K-means implementation diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/variantrecalibration/VariantRecalibrator.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/variantrecalibration/VariantRecalibrator.java index 3cdcf4982..58bbec7d9 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/variantrecalibration/VariantRecalibrator.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/variantrecalibration/VariantRecalibrator.java @@ -37,6 +37,7 @@ import org.broadinstitute.sting.utils.MathUtils; import org.broadinstitute.sting.utils.QualityUtils; import org.broadinstitute.sting.utils.R.RScriptExecutor; import org.broadinstitute.sting.utils.Utils; +import org.broadinstitute.sting.utils.codecs.vcf.VCFWriter; import org.broadinstitute.sting.utils.collections.ExpandingArrayList; import org.broadinstitute.sting.utils.exceptions.UserException; import org.broadinstitute.sting.utils.io.Resource; @@ -136,7 +137,7 @@ public class VariantRecalibrator extends RodWalker Date: Tue, 17 Apr 2012 23:17:28 -0400 Subject: [PATCH 3/6] Minor tweaks and updated integration tests MD5s --- .../ApplyRecalibration.java | 97 +++++++++++-------- .../VariantDataManager.java | 5 +- .../VariantRecalibrator.java | 17 +++- ...ntRecalibrationWalkersIntegrationTest.java | 4 +- 4 files changed, 69 insertions(+), 54 deletions(-) diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/variantrecalibration/ApplyRecalibration.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/variantrecalibration/ApplyRecalibration.java index fd0997802..26f881063 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/variantrecalibration/ApplyRecalibration.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/variantrecalibration/ApplyRecalibration.java @@ -109,7 +109,7 @@ public class ApplyRecalibration extends RodWalker { // Command Line Arguments ///////////////////////////// @Argument(fullName="ts_filter_level", shortName="ts_filter_level", doc="The truth sensitivity level at which to start filtering", required=false) - private double TS_FILTER_LEVEL = 99.0; + protected double TS_FILTER_LEVEL = 99.0; @Argument(fullName="ignore_filter", shortName="ignoreFilter", doc="If specified the variant recalibrator will use variants even if the specified filter name is marked in the input VCF file", required=false) private String[] IGNORE_INPUT_FILTERS = null; @Argument(fullName = "mode", shortName = "mode", doc = "Recalibration mode to employ: 1.) SNP for recalibrating only SNPs (emitting indels untouched in the output VCF); 2.) INDEL for indels; and 3.) BOTH for recalibrating both SNPs and indels simultaneously.", required = false) @@ -183,58 +183,69 @@ public class ApplyRecalibration extends RodWalker { return 1; } - for( final VariantContext vc : tracker.getValues(input, context.getLocation()) ) { - if( vc != null ) { + final List VCs = tracker.getValues(input, context.getLocation()); + final List recals = tracker.getValues(recal, context.getLocation()); - if( VariantRecalibrator.checkRecalibrationMode( vc, MODE ) && (vc.isNotFiltered() || ignoreInputFilterSet.containsAll(vc.getFilters())) ) { + for( final VariantContext vc : VCs ) { - final VariantContext recalDatum = tracker.getFirstValue(recal, context.getLocation()); - if( recalDatum == null ) { - throw new UserException("Encountered input variant which isn't found in the input recal file. Please make sure VariantRecalibrator and ApplyRecalibration were run on the same set of input variants. First seen at: " + vc ); - } + if( VariantRecalibrator.checkRecalibrationMode( vc, MODE ) && (vc.isNotFiltered() || ignoreInputFilterSet.containsAll(vc.getFilters())) ) { - final double lod = recalDatum.getAttributeAsDouble(VariantRecalibrator.VQS_LOD_KEY, Double.NEGATIVE_INFINITY); - if( lod == Double.NEGATIVE_INFINITY ) { - throw new UserException("Encountered a malformed record in the input recal file. There is no lod for the record at: " + vc ); - } - - VariantContextBuilder builder = new VariantContextBuilder(vc); - String filterString = null; - - // Annotate the new record with its VQSLOD and the worst performing annotation - builder.attribute(VariantRecalibrator.VQS_LOD_KEY, lod); - builder.attribute(VariantRecalibrator.CULPRIT_KEY, recalDatum.getAttribute(VariantRecalibrator.CULPRIT_KEY)); - - for( int i = tranches.size() - 1; i >= 0; i-- ) { - final Tranche tranche = tranches.get(i); - if( lod >= tranche.minVQSLod ) { - if( i == tranches.size() - 1 ) { - filterString = VCFConstants.PASSES_FILTERS_v4; - } else { - filterString = tranche.name; - } - break; - } - } - - if( filterString == null ) { - filterString = tranches.get(0).name+"+"; - } - - if( !filterString.equals(VCFConstants.PASSES_FILTERS_v4) ) { - builder.filters(filterString); - } - - vcfWriter.add( builder.make() ); - } else { // valid VC but not compatible with this mode, so just emit the variant untouched - vcfWriter.add( vc ); + final VariantContext recalDatum = getMatchingRecalVC(vc, recals); + if( recalDatum == null ) { + throw new UserException("Encountered input variant which isn't found in the input recal file. Please make sure VariantRecalibrator and ApplyRecalibration were run on the same set of input variants. First seen at: " + vc ); } + + final double lod = recalDatum.getAttributeAsDouble(VariantRecalibrator.VQS_LOD_KEY, Double.NEGATIVE_INFINITY); + if( lod == Double.NEGATIVE_INFINITY ) { + throw new UserException("Encountered a malformed record in the input recal file. There is no lod for the record at: " + vc ); + } + + VariantContextBuilder builder = new VariantContextBuilder(vc); + String filterString = null; + + // Annotate the new record with its VQSLOD and the worst performing annotation + builder.attribute(VariantRecalibrator.VQS_LOD_KEY, lod); + builder.attribute(VariantRecalibrator.CULPRIT_KEY, recalDatum.getAttribute(VariantRecalibrator.CULPRIT_KEY)); + + for( int i = tranches.size() - 1; i >= 0; i-- ) { + final Tranche tranche = tranches.get(i); + if( lod >= tranche.minVQSLod ) { + if( i == tranches.size() - 1 ) { + filterString = VCFConstants.PASSES_FILTERS_v4; + } else { + filterString = tranche.name; + } + break; + } + } + + if( filterString == null ) { + filterString = tranches.get(0).name+"+"; + } + + if( !filterString.equals(VCFConstants.PASSES_FILTERS_v4) ) { + builder.filters(filterString); + } + + vcfWriter.add( builder.make() ); + } else { // valid VC but not compatible with this mode, so just emit the variant untouched + vcfWriter.add( vc ); } } return 1; // This value isn't used for anything } + private static VariantContext getMatchingRecalVC(final VariantContext target, final List recalVCs) { + for( final VariantContext recalVC : recalVCs ) { + if ( target.getEnd() == recalVC.getEnd() ) { + return recalVC; + } + } + + return null; + } + //--------------------------------------------------------------------------------------------------------------- // // reduce diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/variantrecalibration/VariantDataManager.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/variantrecalibration/VariantDataManager.java index 6f82d0885..e2d1692d0 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/variantrecalibration/VariantDataManager.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/variantrecalibration/VariantDataManager.java @@ -299,9 +299,6 @@ public class VariantDataManager { alleles.add(Allele.create("N", true)); alleles.add(Allele.create("", false)); - final VCFHeader vcfHeader = new VCFHeader( null, Collections.emptySet() ); - recalWriter.writeHeader(vcfHeader); - // to be used for the important INFO tags final HashMap attributes = new HashMap(3); @@ -310,7 +307,7 @@ public class VariantDataManager { attributes.put(VariantRecalibrator.VQS_LOD_KEY, String.format("%.4f", datum.lod)); attributes.put(VariantRecalibrator.CULPRIT_KEY, (datum.worstAnnotation != -1 ? annotationKeys.get(datum.worstAnnotation) : "NULL")); - VariantContextBuilder builder = new VariantContextBuilder("VQSR", datum.loc.getContig(), datum.loc.getStart(), datum.loc.getStop(), alleles).attributes(attributes); + VariantContextBuilder builder = new VariantContextBuilder("VQSR", datum.loc.getContig(), datum.loc.getStart(), datum.loc.getStart(), alleles).attributes(attributes); recalWriter.add(builder.make()); } } diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/variantrecalibration/VariantRecalibrator.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/variantrecalibration/VariantRecalibrator.java index 58bbec7d9..f86908dbe 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/variantrecalibration/VariantRecalibrator.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/variantrecalibration/VariantRecalibrator.java @@ -37,7 +37,8 @@ import org.broadinstitute.sting.utils.MathUtils; import org.broadinstitute.sting.utils.QualityUtils; import org.broadinstitute.sting.utils.R.RScriptExecutor; import org.broadinstitute.sting.utils.Utils; -import org.broadinstitute.sting.utils.codecs.vcf.VCFWriter; +import org.broadinstitute.sting.utils.codecs.vcf.StandardVCFWriter; +import org.broadinstitute.sting.utils.codecs.vcf.VCFHeader; import org.broadinstitute.sting.utils.collections.ExpandingArrayList; import org.broadinstitute.sting.utils.exceptions.UserException; import org.broadinstitute.sting.utils.io.Resource; @@ -137,9 +138,11 @@ public class VariantRecalibrator extends RodWalkeremptySet() ); + recalWriter = new StandardVCFWriter(recalFile, getMasterSequenceDictionary(), false); + recalWriter.writeHeader(vcfHeader); } //--------------------------------------------------------------------------------------------------------------- diff --git a/public/java/test/org/broadinstitute/sting/gatk/walkers/variantrecalibration/VariantRecalibrationWalkersIntegrationTest.java b/public/java/test/org/broadinstitute/sting/gatk/walkers/variantrecalibration/VariantRecalibrationWalkersIntegrationTest.java index c81891ac6..91a06bd42 100755 --- a/public/java/test/org/broadinstitute/sting/gatk/walkers/variantrecalibration/VariantRecalibrationWalkersIntegrationTest.java +++ b/public/java/test/org/broadinstitute/sting/gatk/walkers/variantrecalibration/VariantRecalibrationWalkersIntegrationTest.java @@ -27,7 +27,7 @@ public class VariantRecalibrationWalkersIntegrationTest extends WalkerTest { VRTest lowPass = new VRTest("phase1.projectConsensus.chr20.raw.snps.vcf", "0ddd1e0e483d2eaf56004615cea23ec7", // tranches - "58780f63182e139fdbe17f6c18b5b774", // recal file + "f8e21a1987960b950db1f0d98be45352", // recal file "f67d844b6252a55452cf4167b77530b1"); // cut VCF @DataProvider(name = "VRTest") @@ -74,7 +74,7 @@ public class VariantRecalibrationWalkersIntegrationTest extends WalkerTest { VRTest indel = new VRTest("combined.phase1.chr20.raw.indels.sites.vcf", "6d7ee4cb651c8b666e4a4523363caaff", // tranches - "4759b111a5aa53975d46e0f22c7983bf", // recal file + "ee5b408c8434a594496118875690c438", // recal file "5d7e07d8813db96ba3f3dfe4737f83d1"); // cut VCF @DataProvider(name = "VRIndelTest") From 4448a3ea76fae50d9efcb4b27a59833ab44674ce Mon Sep 17 00:00:00 2001 From: Eric Banks Date: Tue, 17 Apr 2012 23:54:10 -0400 Subject: [PATCH 5/6] Final tweaks. Added an integration test to cover the case of SNPs and indels that start at the same position. --- .../variantrecalibration/ApplyRecalibration.java | 12 +++++++++--- ...riantRecalibrationWalkersIntegrationTest.java | 16 ++++++++++++++++ 2 files changed, 25 insertions(+), 3 deletions(-) diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/variantrecalibration/ApplyRecalibration.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/variantrecalibration/ApplyRecalibration.java index 26f881063..898401e1b 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/variantrecalibration/ApplyRecalibration.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/variantrecalibration/ApplyRecalibration.java @@ -195,16 +195,22 @@ public class ApplyRecalibration extends RodWalker { throw new UserException("Encountered input variant which isn't found in the input recal file. Please make sure VariantRecalibrator and ApplyRecalibration were run on the same set of input variants. First seen at: " + vc ); } - final double lod = recalDatum.getAttributeAsDouble(VariantRecalibrator.VQS_LOD_KEY, Double.NEGATIVE_INFINITY); - if( lod == Double.NEGATIVE_INFINITY ) { + final String lodString = recalDatum.getAttributeAsString(VariantRecalibrator.VQS_LOD_KEY, null); + if( lodString == null ) { throw new UserException("Encountered a malformed record in the input recal file. There is no lod for the record at: " + vc ); } + final double lod; + try { + lod = Double.valueOf(lodString); + } catch (NumberFormatException e) { + throw new UserException("Encountered a malformed record in the input recal file. The lod is unreadable for the record at: " + vc ); + } VariantContextBuilder builder = new VariantContextBuilder(vc); String filterString = null; // Annotate the new record with its VQSLOD and the worst performing annotation - builder.attribute(VariantRecalibrator.VQS_LOD_KEY, lod); + builder.attribute(VariantRecalibrator.VQS_LOD_KEY, lodString); // use the String representation so that we don't lose precision on output builder.attribute(VariantRecalibrator.CULPRIT_KEY, recalDatum.getAttribute(VariantRecalibrator.CULPRIT_KEY)); for( int i = tranches.size() - 1; i >= 0; i-- ) { diff --git a/public/java/test/org/broadinstitute/sting/gatk/walkers/variantrecalibration/VariantRecalibrationWalkersIntegrationTest.java b/public/java/test/org/broadinstitute/sting/gatk/walkers/variantrecalibration/VariantRecalibrationWalkersIntegrationTest.java index 91a06bd42..11e093a6c 100755 --- a/public/java/test/org/broadinstitute/sting/gatk/walkers/variantrecalibration/VariantRecalibrationWalkersIntegrationTest.java +++ b/public/java/test/org/broadinstitute/sting/gatk/walkers/variantrecalibration/VariantRecalibrationWalkersIntegrationTest.java @@ -118,5 +118,21 @@ public class VariantRecalibrationWalkersIntegrationTest extends WalkerTest { Arrays.asList(params.cutVCFMD5)); executeTest("testApplyRecalibrationIndel-"+params.inVCF, spec); } + + @Test + public void testApplyRecalibrationSnpAndIndelTogether() { + WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec( + "-R " + b37KGReference + + " -T ApplyRecalibration" + + " -L 20:1000100-1000500" + + " -mode BOTH" + + " -NO_HEADER" + + " -input " + validationDataLocation + "VQSR.mixedTest.input" + + " -o %s" + + " -tranchesFile " + validationDataLocation + "VQSR.mixedTest.tranches" + + " -recalFile " + validationDataLocation + "VQSR.mixedTest.recal", + Arrays.asList("08060b7f5c9cf3bb1692b50c58fd5a4b")); + executeTest("testApplyRecalibrationSnpAndIndelTogether", spec); + } } From 8a844566268c55b159dbfa7016405ff08437ea3c Mon Sep 17 00:00:00 2001 From: Ryan Poplin Date: Wed, 18 Apr 2012 11:24:04 -0400 Subject: [PATCH 6/6] Following Eric's awesome update to change the VQSR recal file into a VCF file, the ApplyRecalibration step is now scatter/gather-able and tree reducible. --- .../walkers/variantrecalibration/ApplyRecalibration.java | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/variantrecalibration/ApplyRecalibration.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/variantrecalibration/ApplyRecalibration.java index 898401e1b..5b1d69f14 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/variantrecalibration/ApplyRecalibration.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/variantrecalibration/ApplyRecalibration.java @@ -35,6 +35,7 @@ import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; import org.broadinstitute.sting.gatk.walkers.PartitionBy; import org.broadinstitute.sting.gatk.walkers.PartitionType; import org.broadinstitute.sting.gatk.walkers.RodWalker; +import org.broadinstitute.sting.gatk.walkers.TreeReducible; import org.broadinstitute.sting.utils.SampleUtils; import org.broadinstitute.sting.utils.codecs.vcf.*; import org.broadinstitute.sting.utils.exceptions.UserException; @@ -83,8 +84,8 @@ import java.util.*; * */ -@PartitionBy(PartitionType.NONE) -public class ApplyRecalibration extends RodWalker { +@PartitionBy(PartitionType.LOCUS) +public class ApplyRecalibration extends RodWalker implements TreeReducible { ///////////////////////////// // Inputs @@ -266,6 +267,10 @@ public class ApplyRecalibration extends RodWalker { return 1; // This value isn't used for anything } + public Integer treeReduce( final Integer lhs, final Integer rhs ) { + return 1; // This value isn't used for anything + } + public void onTraversalDone( final Integer reduceSum ) { } }