From dd5fe8291dae2c3037c29418f50febc06059c10d Mon Sep 17 00:00:00 2001 From: Ryan Poplin Date: Thu, 11 Aug 2011 08:36:00 -0400 Subject: [PATCH 1/6] Fixing up some comments in the BQSR --- .../walkers/recalibration/CountCovariatesWalker.java | 12 ++++++------ .../recalibration/TableRecalibrationWalker.java | 3 +-- 2 files changed, 7 insertions(+), 8 deletions(-) diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/recalibration/CountCovariatesWalker.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/recalibration/CountCovariatesWalker.java index 914f54363..b167885b9 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/recalibration/CountCovariatesWalker.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/recalibration/CountCovariatesWalker.java @@ -100,7 +100,7 @@ public class CountCovariatesWalker extends LocusWalker requestedCovariates = new ArrayList(); // A list to hold the covariate objects that were requested - private static final double DBSNP_VS_NOVEL_MISMATCH_RATE = 2.0; // rate at which dbSNP sites (on an individual level) mismatch relative to novel sites (determined by looking at NA12878) - private static int DBSNP_VALIDATION_CHECK_FREQUENCY = 1000000; // how often to validate dbsnp mismatch rate (in terms of loci seen) + private static final double DBSNP_VS_NOVEL_MISMATCH_RATE = 2.0; // rate at which dbSNP sites (on an individual level) mismatch relative to novel sites (determined by looking at NA12878) + private static int DBSNP_VALIDATION_CHECK_FREQUENCY = 1000000; // how often to validate dbsnp mismatch rate (in terms of loci seen) public static class CountedData { private long countedSites = 0; // Number of loci used in the calculations, used for reporting in the output file @@ -136,7 +136,7 @@ public class CountCovariatesWalker extends LocusWalker Date: Thu, 11 Aug 2011 08:58:30 -0400 Subject: [PATCH 2/6] Fixed broken RodBinding defaults. -- Verified now to be correct at runtime -- UnitTest covers this -- createTypeDefault now takes a Type, not a Class, so that parameterized classes can have their parameter fetched in the defaults. --- .../sting/commandline/ArgumentSource.java | 2 +- .../commandline/ArgumentTypeDescriptor.java | 10 ++++--- .../sting/commandline/ParsingEngine.java | 2 +- .../OutputStreamArgumentTypeDescriptor.java | 4 +-- .../SAMFileWriterArgumentTypeDescriptor.java | 2 +- .../VCFWriterArgumentTypeDescriptor.java | 2 +- .../commandline/ParsingEngineUnitTest.java | 28 +++++++++++++++---- 7 files changed, 34 insertions(+), 16 deletions(-) diff --git a/public/java/src/org/broadinstitute/sting/commandline/ArgumentSource.java b/public/java/src/org/broadinstitute/sting/commandline/ArgumentSource.java index c3402bd0a..e0e2ac378 100644 --- a/public/java/src/org/broadinstitute/sting/commandline/ArgumentSource.java +++ b/public/java/src/org/broadinstitute/sting/commandline/ArgumentSource.java @@ -185,7 +185,7 @@ public class ArgumentSource { * @return A default value for the given type. */ public Object createTypeDefault(ParsingEngine parsingEngine) { - return typeDescriptor.createTypeDefault(parsingEngine,this,field.getType()); + return typeDescriptor.createTypeDefault(parsingEngine,this,field.getGenericType()); } /** diff --git a/public/java/src/org/broadinstitute/sting/commandline/ArgumentTypeDescriptor.java b/public/java/src/org/broadinstitute/sting/commandline/ArgumentTypeDescriptor.java index 64c00a726..d1d4ff914 100644 --- a/public/java/src/org/broadinstitute/sting/commandline/ArgumentTypeDescriptor.java +++ b/public/java/src/org/broadinstitute/sting/commandline/ArgumentTypeDescriptor.java @@ -96,12 +96,13 @@ public abstract class ArgumentTypeDescriptor { /** * Generates a default for the given type. + * * @param parsingEngine the parsing engine used to validate this argument type descriptor. * @param source Source of the command-line argument. * @param type Type of value to create, in case the command-line argument system wants influence. * @return A default value for the given type. */ - public Object createTypeDefault(ParsingEngine parsingEngine,ArgumentSource source,Class type) { throw new UnsupportedOperationException("Unable to create default for type " + getClass()); } + public Object createTypeDefault(ParsingEngine parsingEngine,ArgumentSource source, Type type) { throw new UnsupportedOperationException("Unable to create default for type " + getClass()); } /** * Given the given argument source and attributes, synthesize argument definitions for command-line arguments. @@ -323,8 +324,9 @@ class RodBindingArgumentTypeDescriptor extends ArgumentTypeDescriptor { public boolean createsTypeDefault(ArgumentSource source) { return ! source.isRequired(); } @Override - public Object createTypeDefault(ParsingEngine parsingEngine, ArgumentSource source, Class type) { - return RodBinding.makeUnbound((Class)type); + public Object createTypeDefault(ParsingEngine parsingEngine, ArgumentSource source, Type type) { + Class parameterType = getParameterizedTypeClass(type); + return RodBinding.makeUnbound((Class)parameterType); } @Override @@ -646,7 +648,7 @@ class MultiplexArgumentTypeDescriptor extends ArgumentTypeDescriptor { } @Override - public Object createTypeDefault(ParsingEngine parsingEngine,ArgumentSource source,Class type) { + public Object createTypeDefault(ParsingEngine parsingEngine,ArgumentSource source, Type type) { if(multiplexer == null || multiplexedIds == null) throw new ReviewedStingException("No multiplexed ids available"); diff --git a/public/java/src/org/broadinstitute/sting/commandline/ParsingEngine.java b/public/java/src/org/broadinstitute/sting/commandline/ParsingEngine.java index 9b543142b..fbf8c6516 100755 --- a/public/java/src/org/broadinstitute/sting/commandline/ParsingEngine.java +++ b/public/java/src/org/broadinstitute/sting/commandline/ParsingEngine.java @@ -364,7 +364,7 @@ public class ParsingEngine { */ private void loadValueIntoObject( ArgumentSource source, Object instance, ArgumentMatches argumentMatches ) { // Nothing to load - if( argumentMatches.size() == 0 && !(source.createsTypeDefault() && source.isRequired())) + if( argumentMatches.size() == 0 && ! source.createsTypeDefault() ) return; // Target instance into which to inject the value. diff --git a/public/java/src/org/broadinstitute/sting/gatk/io/stubs/OutputStreamArgumentTypeDescriptor.java b/public/java/src/org/broadinstitute/sting/gatk/io/stubs/OutputStreamArgumentTypeDescriptor.java index b70d9eeec..da4eb3955 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/io/stubs/OutputStreamArgumentTypeDescriptor.java +++ b/public/java/src/org/broadinstitute/sting/gatk/io/stubs/OutputStreamArgumentTypeDescriptor.java @@ -75,12 +75,12 @@ public class OutputStreamArgumentTypeDescriptor extends ArgumentTypeDescriptor { } @Override - public Object createTypeDefault(ParsingEngine parsingEngine,ArgumentSource source,Class type) { + public Object createTypeDefault(ParsingEngine parsingEngine,ArgumentSource source, Type type) { if(!source.isRequired()) throw new ReviewedStingException("BUG: tried to create type default for argument type descriptor that can't support a type default."); OutputStreamStub stub = new OutputStreamStub(defaultOutputStream); engine.addOutput(stub); - return createInstanceOfClass(type,stub); + return createInstanceOfClass((Class)type,stub); } @Override diff --git a/public/java/src/org/broadinstitute/sting/gatk/io/stubs/SAMFileWriterArgumentTypeDescriptor.java b/public/java/src/org/broadinstitute/sting/gatk/io/stubs/SAMFileWriterArgumentTypeDescriptor.java index 8d67a837e..43ec934ed 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/io/stubs/SAMFileWriterArgumentTypeDescriptor.java +++ b/public/java/src/org/broadinstitute/sting/gatk/io/stubs/SAMFileWriterArgumentTypeDescriptor.java @@ -99,7 +99,7 @@ public class SAMFileWriterArgumentTypeDescriptor extends ArgumentTypeDescriptor } @Override - public Object createTypeDefault(ParsingEngine parsingEngine,ArgumentSource source,Class type) { + public Object createTypeDefault(ParsingEngine parsingEngine,ArgumentSource source, Type type) { if(!source.isRequired()) throw new ReviewedStingException("BUG: tried to create type default for argument type descriptor that can't support a type default."); SAMFileWriterStub stub = new SAMFileWriterStub(engine,defaultOutputStream); diff --git a/public/java/src/org/broadinstitute/sting/gatk/io/stubs/VCFWriterArgumentTypeDescriptor.java b/public/java/src/org/broadinstitute/sting/gatk/io/stubs/VCFWriterArgumentTypeDescriptor.java index 6e5499339..98026554b 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/io/stubs/VCFWriterArgumentTypeDescriptor.java +++ b/public/java/src/org/broadinstitute/sting/gatk/io/stubs/VCFWriterArgumentTypeDescriptor.java @@ -114,7 +114,7 @@ public class VCFWriterArgumentTypeDescriptor extends ArgumentTypeDescriptor { } @Override - public Object createTypeDefault(ParsingEngine parsingEngine,ArgumentSource source,Class type) { + public Object createTypeDefault(ParsingEngine parsingEngine,ArgumentSource source, Type type) { if(!source.isRequired()) throw new ReviewedStingException("BUG: tried to create type default for argument type descriptor that can't support a type default."); VCFWriterStub stub = new VCFWriterStub(engine, defaultOutputStream, false, argumentSources, false, false); diff --git a/public/java/test/org/broadinstitute/sting/commandline/ParsingEngineUnitTest.java b/public/java/test/org/broadinstitute/sting/commandline/ParsingEngineUnitTest.java index 366401ad6..79e9172dd 100755 --- a/public/java/test/org/broadinstitute/sting/commandline/ParsingEngineUnitTest.java +++ b/public/java/test/org/broadinstitute/sting/commandline/ParsingEngineUnitTest.java @@ -632,8 +632,8 @@ public class ParsingEngineUnitTest extends BaseTest { // -------------------------------------------------------------------------------- private class SingleRodBindingArgProvider { - @Input(fullName="binding", shortName="V", required=false) - public RodBinding binding = RodBinding.makeUnbound(Feature.class); + @Input(fullName="binding", shortName="V", required=true) + public RodBinding binding; } @Test @@ -656,7 +656,7 @@ public class ParsingEngineUnitTest extends BaseTest { private class ShortNameOnlyRodBindingArgProvider { @Input(shortName="short", required=false) - public RodBinding binding = RodBinding.makeUnbound(Feature.class); + public RodBinding binding; // = RodBinding.makeUnbound(Feature.class); } @Test @@ -677,22 +677,38 @@ public class ParsingEngineUnitTest extends BaseTest { Assert.assertEquals(argProvider.binding.getTags().getPositionalTags().size(), 1, "Tags aren't correctly set"); } + private class OptionalRodBindingArgProvider { + @Input(fullName="binding", shortName="V", required=false) + public RodBinding binding; + + @Input(fullName="bindingNull", shortName="VN", required=false) + public RodBinding bindingNull = null; + } + @Test - public void unbasicRodBindingArgumentTest() { + public void optionalRodBindingArgumentTest() { final String[] commandLine = new String[] {}; - parsingEngine.addArgumentSource( SingleRodBindingArgProvider.class ); + parsingEngine.addArgumentSource( OptionalRodBindingArgProvider.class ); parsingEngine.parse( commandLine ); parsingEngine.validate(); - SingleRodBindingArgProvider argProvider = new SingleRodBindingArgProvider(); + OptionalRodBindingArgProvider argProvider = new OptionalRodBindingArgProvider(); parsingEngine.loadArgumentsIntoObject( argProvider ); + Assert.assertNotNull(argProvider.binding, "Default value not applied corrected to RodBinding"); Assert.assertEquals(argProvider.binding.getName(), RodBinding.UNBOUND_VARIABLE_NAME, "Name isn't set properly"); Assert.assertEquals(argProvider.binding.getSource(), RodBinding.UNBOUND_SOURCE, "Source isn't set to its expected value"); Assert.assertEquals(argProvider.binding.getType(), Feature.class, "Type isn't set to its expected value"); Assert.assertEquals(argProvider.binding.isBound(), false, "Bound() isn't returning its expected value"); Assert.assertEquals(argProvider.binding.getTags().getPositionalTags().size(), 0, "Tags aren't correctly set"); + + Assert.assertNotNull(argProvider.bindingNull, "Default value not applied corrected to RodBinding"); + Assert.assertEquals(argProvider.bindingNull.getName(), RodBinding.UNBOUND_VARIABLE_NAME, "Name isn't set properly"); + Assert.assertEquals(argProvider.bindingNull.getSource(), RodBinding.UNBOUND_SOURCE, "Source isn't set to its expected value"); + Assert.assertEquals(argProvider.bindingNull.getType(), VariantContext.class, "Type isn't set to its expected value"); + Assert.assertEquals(argProvider.bindingNull.isBound(), false, "Bound() isn't returning its expected value"); + Assert.assertEquals(argProvider.bindingNull.getTags().getPositionalTags().size(), 0, "Tags aren't correctly set"); } @Test(expectedExceptions = UserException.class) From ea42ee4a9575724c2f487f5e17166297c935118e Mon Sep 17 00:00:00 2001 From: Ryan Poplin Date: Thu, 11 Aug 2011 09:58:42 -0400 Subject: [PATCH 3/6] Updating BQSR for the new rod binding system. --- .../recalibration/CountCovariatesWalker.java | 51 +++++-------------- .../RecalibrationWalkersIntegrationTest.java | 38 +++----------- 2 files changed, 20 insertions(+), 69 deletions(-) diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/recalibration/CountCovariatesWalker.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/recalibration/CountCovariatesWalker.java index b167885b9..b4739f366 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/recalibration/CountCovariatesWalker.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/recalibration/CountCovariatesWalker.java @@ -25,15 +25,10 @@ package org.broadinstitute.sting.gatk.walkers.recalibration; -import org.broad.tribble.bed.BEDCodec; -import org.broad.tribble.dbsnp.DbSNPCodec; -import org.broadinstitute.sting.commandline.Argument; -import org.broadinstitute.sting.commandline.ArgumentCollection; -import org.broadinstitute.sting.commandline.Gather; -import org.broadinstitute.sting.commandline.Output; +import org.broad.tribble.Feature; +import org.broadinstitute.sting.commandline.*; import org.broadinstitute.sting.gatk.contexts.AlignmentContext; import org.broadinstitute.sting.gatk.contexts.ReferenceContext; -import org.broadinstitute.sting.gatk.datasources.rmd.ReferenceOrderedDataSource; import org.broadinstitute.sting.gatk.filters.MappingQualityUnavailableReadFilter; import org.broadinstitute.sting.gatk.filters.MappingQualityZeroReadFilter; import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; @@ -42,8 +37,6 @@ import org.broadinstitute.sting.utils.BaseUtils; import org.broadinstitute.sting.utils.Utils; import org.broadinstitute.sting.utils.baq.BAQ; import org.broadinstitute.sting.utils.classloader.PluginManager; -import org.broadinstitute.sting.utils.codecs.vcf.VCF3Codec; -import org.broadinstitute.sting.utils.codecs.vcf.VCFCodec; import org.broadinstitute.sting.utils.collections.NestedHashMap; import org.broadinstitute.sting.utils.exceptions.DynamicClassResolutionException; import org.broadinstitute.sting.utils.exceptions.UserException; @@ -94,12 +87,17 @@ public class CountCovariatesWalker extends LocusWalker> knownSites = Collections.emptyList(); + @Output + PrintStream out; @Output(fullName="recal_file", shortName="recalFile", required=true, doc="Filename for the output covariates table recalibration file") @Gather(CountCovariatesGatherer.class) public PrintStream RECAL_FILE; @@ -190,20 +188,8 @@ public class CountCovariatesWalker extends LocusWalker 0; - // Only use data from non-dbsnp sites // Assume every mismatch at a non-dbsnp site is indicative of poor quality CountedData counter = new CountedData(); - if( !isSNP ) { + if( tracker.getValues(knownSites).size() == 0 ) { // If something here is in one of the knownSites tracks then skip over it, otherwise proceed // For each read at this locus for( final PileupElement p : context.getBasePileup() ) { final GATKSAMRecord gatkRead = (GATKSAMRecord) p.getRead(); @@ -358,8 +335,6 @@ public class CountCovariatesWalker extends LocusWalker e = new HashMap(); - e.put( validationDataLocation + "NA12892.SLX.SRP000031.2009_06.selected.bam", "170f0c3cc4b8d72c539136effeec9a16"); - - for ( Map.Entry entry : e.entrySet() ) { - String bam = entry.getKey(); - String md5 = entry.getValue(); - - WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec( - "-R " + b36KGReference + - " -B:dbsnp,VCF3 " + validationDataLocation + "vcfexample3.vcf" + - " -T CountCovariates" + - " -I " + bam + - " -L 1:10,000,000-10,200,000" + - " -standard" + - " --solid_recal_mode SET_Q_ZERO" + - " -recalFile %s", - 1, // just one output file - Arrays.asList(md5)); - executeTest("testCountCovariatesVCF", spec); - } - } - @Test public void testCountCovariatesBED() { HashMap e = new HashMap(); @@ -260,7 +236,7 @@ public class RecalibrationWalkersIntegrationTest extends WalkerTest { WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec( "-R " + b36KGReference + - " -B:bed,bed " + validationDataLocation + "recalibrationTest.bed" + + " -knownSites:bed " + validationDataLocation + "recalibrationTest.bed" + " -T CountCovariates" + " -I " + bam + " -L 1:10,000,000-10,200,000" + @@ -284,10 +260,10 @@ public class RecalibrationWalkersIntegrationTest extends WalkerTest { WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec( "-R " + b36KGReference + - " -B:anyNameABCD,VCF3 " + validationDataLocation + "vcfexample3.vcf" + + " -knownSites:anyNameABCD,VCF3 " + validationDataLocation + "vcfexample3.vcf" + " -T CountCovariates" + " -I " + bam + - " -B:dbsnp,vcf " + b36dbSNP129 + + " -knownSites " + b36dbSNP129 + " -L 1:10,000,000-10,200,000" + " -cov ReadGroupCovariate" + " -cov QualityScoreCovariate" + @@ -312,7 +288,7 @@ public class RecalibrationWalkersIntegrationTest extends WalkerTest { WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec( "-R " + b36KGReference + - " -B:dbsnp,vcf " + b36dbSNP129 + + " -knownSites " + b36dbSNP129 + " -T CountCovariates" + " -I " + bam + " -cov ReadGroupCovariate" + From e71255d3c29b848c41a763dbbb4111721421c0d3 Mon Sep 17 00:00:00 2001 From: Mark DePristo Date: Thu, 11 Aug 2011 11:01:21 -0400 Subject: [PATCH 4/6] GATKDocsExample walker -- Shows the best practice for documentating a walker with the GATKdocs -- See http://www.broadinstitute.org/gsa/wiki/index.php/GATKdocs#Writing_GATKdocs_for_your_walkers for a brief discussion --- .../sting/gatk/examples/GATKDocsExample.java | 82 +++++++++++++++++++ 1 file changed, 82 insertions(+) create mode 100644 public/java/src/org/broadinstitute/sting/gatk/examples/GATKDocsExample.java diff --git a/public/java/src/org/broadinstitute/sting/gatk/examples/GATKDocsExample.java b/public/java/src/org/broadinstitute/sting/gatk/examples/GATKDocsExample.java new file mode 100644 index 000000000..4541a0537 --- /dev/null +++ b/public/java/src/org/broadinstitute/sting/gatk/examples/GATKDocsExample.java @@ -0,0 +1,82 @@ +/* + * Copyright (c) 2011, The Broad Institute + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, + * copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following + * conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES + * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT + * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, + * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + */ + +package org.broadinstitute.sting.gatk.examples; + +import org.broadinstitute.sting.commandline.Argument; +import org.broadinstitute.sting.commandline.ArgumentCollection; +import org.broadinstitute.sting.gatk.arguments.StandardVariantContextInputArgumentCollection; +import org.broadinstitute.sting.gatk.contexts.AlignmentContext; +import org.broadinstitute.sting.gatk.contexts.ReferenceContext; +import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; +import org.broadinstitute.sting.gatk.walkers.RodWalker; + +/** + * [Short one sentence description of this walker] + * + *

+ * [Functionality of this walker] + *

+ * + *

Input

+ *

+ * [Input description] + *

+ * + *

Output

+ *

+ * [Output description] + *

+ * + *

Examples

+ *
+ *    java
+ *      -jar GenomeAnalysisTK.jar
+ *      -T $WalkerName
+ *  
+ * + * @author Your Name + * @since Date created + */ +public class GATKDocsExample extends RodWalker { + /** + * Put detailed documentation about the argument here. No need to duplicate the summary information + * in doc annotation field, as that will be added before this text in the documentation page. + * + * Notes: + *
    + *
  • This field can contain HTML as a normal javadoc
  • + *
  • Don't include information about the default value, as gatkdocs adds this automatically
  • + *
  • Try your best to describe in detail the behavior of the argument, as ultimately confusing + * docs here will just result in user posts on the forum
  • + *
+ */ + @Argument(fullName="full", shortName="short", doc="Brief summary of argument [~ 80 characters of text]", required=false) + private boolean myWalkerArgument = false; + + public Integer map(RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context) { return 0; } + public Integer reduceInit() { return 0; } + public Integer reduce(Integer value, Integer sum) { return value + sum; } + public void onTraversalDone(Integer result) { } +} From c7b9a9ef0a7702b38464419d2ffdf61f52157a13 Mon Sep 17 00:00:00 2001 From: Ryan Poplin Date: Thu, 11 Aug 2011 11:02:11 -0400 Subject: [PATCH 5/6] Updating UnifiedGenotyper to use the new rod binding system. --- .../GenotypeLikelihoodsCalculationModel.java | 2 + ...elGenotypeLikelihoodsCalculationModel.java | 2 +- ...NPGenotypeLikelihoodsCalculationModel.java | 7 ++-- .../genotyper/UnifiedArgumentCollection.java | 9 +++++ .../walkers/genotyper/UnifiedGenotyper.java | 37 +++++++++++++++---- .../genotyper/UnifiedGenotyperEngine.java | 5 ++- .../UnifiedGenotyperIntegrationTest.java | 10 ++--- 7 files changed, 54 insertions(+), 18 deletions(-) diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/GenotypeLikelihoodsCalculationModel.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/GenotypeLikelihoodsCalculationModel.java index 8261cd588..594c1dd28 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/GenotypeLikelihoodsCalculationModel.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/GenotypeLikelihoodsCalculationModel.java @@ -26,6 +26,7 @@ package org.broadinstitute.sting.gatk.walkers.genotyper; import org.apache.log4j.Logger; +import org.broadinstitute.sting.commandline.RodBinding; import org.broadinstitute.sting.gatk.contexts.AlignmentContext; import org.broadinstitute.sting.gatk.contexts.AlignmentContextUtils; import org.broadinstitute.sting.gatk.contexts.ReferenceContext; @@ -35,6 +36,7 @@ import org.broadinstitute.sting.utils.exceptions.ReviewedStingException; import org.broadinstitute.sting.utils.pileup.PileupElement; import org.broadinstitute.sting.utils.pileup.ReadBackedPileup; import org.broadinstitute.sting.utils.variantcontext.Allele; +import org.broadinstitute.sting.utils.variantcontext.VariantContext; import java.util.Map; diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/IndelGenotypeLikelihoodsCalculationModel.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/IndelGenotypeLikelihoodsCalculationModel.java index 897e1a668..41b340058 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/IndelGenotypeLikelihoodsCalculationModel.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/IndelGenotypeLikelihoodsCalculationModel.java @@ -321,7 +321,7 @@ public class IndelGenotypeLikelihoodsCalculationModel extends GenotypeLikelihood haplotypeMap.clear(); if (getAlleleListFromVCF) { - for( final VariantContext vc_input : tracker.getValues(VariantContext.class, "alleles") ) { + for( final VariantContext vc_input : tracker.getValues(UAC.alleles) ) { if( vc_input != null && allowableTypes.contains(vc_input.getType()) && ref.getLocus().getStart() == vc_input.getStart()) { diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/SNPGenotypeLikelihoodsCalculationModel.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/SNPGenotypeLikelihoodsCalculationModel.java index 9205e33a0..477155241 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/SNPGenotypeLikelihoodsCalculationModel.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/SNPGenotypeLikelihoodsCalculationModel.java @@ -26,6 +26,7 @@ package org.broadinstitute.sting.gatk.walkers.genotyper; import org.apache.log4j.Logger; +import org.broadinstitute.sting.commandline.RodBinding; import org.broadinstitute.sting.gatk.contexts.AlignmentContext; import org.broadinstitute.sting.gatk.contexts.AlignmentContextUtils; import org.broadinstitute.sting.gatk.contexts.ReferenceContext; @@ -57,13 +58,13 @@ public class SNPGenotypeLikelihoodsCalculationModel extends GenotypeLikelihoodsC useAlleleFromVCF = UAC.GenotypingMode == GENOTYPING_MODE.GENOTYPE_GIVEN_ALLELES; } - public static VariantContext getSNPVCFromAllelesRod(RefMetaDataTracker tracker, ReferenceContext ref, boolean requireSNP, Logger logger) { + public static VariantContext getSNPVCFromAllelesRod(RefMetaDataTracker tracker, ReferenceContext ref, boolean requireSNP, Logger logger, final RodBinding allelesBinding) { if ( tracker == null || ref == null || logger == null ) throw new ReviewedStingException("Bad arguments: tracker=" + tracker + " ref=" + ref + " logger=" + logger); VariantContext vc = null; // search for usable record - for( final VariantContext vc_input : tracker.getValues(VariantContext.class, "alleles", ref.getLocus()) ) { + for( final VariantContext vc_input : tracker.getValues(allelesBinding) ) { if ( vc_input != null && ! vc_input.isFiltered() && (! requireSNP || vc_input.isSNP() )) { if ( vc == null ) { vc = vc_input; @@ -95,7 +96,7 @@ public class SNPGenotypeLikelihoodsCalculationModel extends GenotypeLikelihoodsC if ( alternateAlleleToUse != null ) { bestAlternateAllele = alternateAlleleToUse.getBases()[0]; } else if ( useAlleleFromVCF ) { - VariantContext vc = getSNPVCFromAllelesRod(tracker, ref, true, logger); + VariantContext vc = getSNPVCFromAllelesRod(tracker, ref, true, logger, UAC.alleles); // ignore places where we don't have a variant if ( vc == null ) diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedArgumentCollection.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedArgumentCollection.java index 52bf3f715..1a76bfd07 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedArgumentCollection.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedArgumentCollection.java @@ -27,6 +27,9 @@ package org.broadinstitute.sting.gatk.walkers.genotyper; import org.broadinstitute.sting.commandline.Argument; import org.broadinstitute.sting.commandline.Hidden; +import org.broadinstitute.sting.commandline.Input; +import org.broadinstitute.sting.commandline.RodBinding; +import org.broadinstitute.sting.utils.variantcontext.VariantContext; import java.io.File; @@ -61,6 +64,11 @@ public class UnifiedArgumentCollection { @Argument(fullName = "computeSLOD", shortName = "sl", doc = "If provided, we will calculate the SLOD", required = false) public boolean COMPUTE_SLOD = false; + /** + * When the UnifiedGenotyper is put into GENOTYPE_GIVEN_ALLELES mode it will genotype the samples using only the alleles provide in this rod binding + */ + @Input(fullName="alleles", shortName = "alleles", doc="The set of alleles at which to genotype when in GENOTYPE_MODE = GENOTYPE_GIVEN_ALLELES", required=false) + public RodBinding alleles; // control the error modes @Hidden @@ -168,6 +176,7 @@ public class UnifiedArgumentCollection { uac.OUTPUT_DEBUG_INDEL_INFO = OUTPUT_DEBUG_INDEL_INFO; uac.INDEL_HAPLOTYPE_SIZE = INDEL_HAPLOTYPE_SIZE; uac.DO_CONTEXT_DEPENDENT_PENALTIES = DO_CONTEXT_DEPENDENT_PENALTIES; + uac.alleles = alleles; uac.GET_GAP_PENALTIES_FROM_DATA = GET_GAP_PENALTIES_FROM_DATA; uac.INDEL_RECAL_FILE = INDEL_RECAL_FILE; diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyper.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyper.java index 0f2d73c3a..5202d97d0 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyper.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyper.java @@ -25,10 +25,7 @@ package org.broadinstitute.sting.gatk.walkers.genotyper; -import org.broadinstitute.sting.commandline.Argument; -import org.broadinstitute.sting.commandline.ArgumentCollection; -import org.broadinstitute.sting.commandline.Output; -import org.broadinstitute.sting.commandline.RodBinding; +import org.broadinstitute.sting.commandline.*; import org.broadinstitute.sting.gatk.DownsampleType; import org.broadinstitute.sting.gatk.arguments.DbsnpArgumentCollection; import org.broadinstitute.sting.gatk.contexts.AlignmentContext; @@ -69,9 +66,35 @@ public class UnifiedGenotyper extends LocusWalker getDbsnpRodBinding() { return dbsnp.dbsnp; } - public RodBinding getSnpEffRodBinding() { return null; } - public List> getCompRodBindings() { return Collections.emptyList(); } - public List> getResourceRodBindings() { return Collections.emptyList(); } + + /** + * The INFO field will be annotated with information on the most biologically-significant effect + * listed in the SnpEff output file for each variant. + */ + @Input(fullName="snpEffFile", shortName = "snpEffFile", doc="SnpEff file", required=false) + public RodBinding snpEffFile; + public RodBinding getSnpEffRodBinding() { return snpEffFile; } + + /** + * If a record in the 'variant' track overlaps with a record from the provided comp track, the INFO field will be annotated + * as such in the output with the track name (e.g. -comp:FOO will have 'FOO' in the INFO field). Records that are filtered in the comp track will be ignored. + * Note that 'dbSNP' has been special-cased (see the --dbsnp argument). + */ + @Input(fullName="comp", shortName = "comp", doc="comparison VCF file", required=false) + public List> comps = Collections.emptyList(); + public List> getCompRodBindings() { return comps; } + + /** + * An external resource VCF file or files from which to annotate. + * + * One can add annotations from one of the resource VCFs to the output. + * For example, if you want to annotate your 'variant' VCF with the AC field value from the rod bound to 'resource', + * you can specify '-E resource.AC' and records in the output VCF will be annotated with 'resource.AC=N' when a record exists in that rod at the given position. + * If multiple records in the rod overlap the given position, one is chosen arbitrarily. + */ + @Input(fullName="resource", shortName = "resource", doc="external resource VCF file", required=false) + public List> resources = Collections.emptyList(); + public List> getResourceRodBindings() { return resources; } // control the output @Output(doc="File to which variants should be written",required=true) diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyperEngine.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyperEngine.java index 77f1c5e25..dc728ff6b 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyperEngine.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyperEngine.java @@ -58,6 +58,7 @@ public class UnifiedGenotyperEngine { // the unified argument collection private final UnifiedArgumentCollection UAC; + public UnifiedArgumentCollection getUAC() { return UAC; } // the annotation engine private final VariantAnnotatorEngine annotationEngine; @@ -232,7 +233,7 @@ public class UnifiedGenotyperEngine { private VariantCallContext generateEmptyContext(RefMetaDataTracker tracker, ReferenceContext ref, Map stratifiedContexts, AlignmentContext rawContext) { VariantContext vc; if ( UAC.GenotypingMode == GenotypeLikelihoodsCalculationModel.GENOTYPING_MODE.GENOTYPE_GIVEN_ALLELES ) { - VariantContext vcInput = SNPGenotypeLikelihoodsCalculationModel.getSNPVCFromAllelesRod(tracker, ref, false, logger); + VariantContext vcInput = SNPGenotypeLikelihoodsCalculationModel.getSNPVCFromAllelesRod(tracker, ref, false, logger, UAC.alleles); if ( vcInput == null ) return null; vc = new VariantContext("UG_call", vcInput.getChr(), vcInput.getStart(), vcInput.getEnd(), vcInput.getAlleles()); @@ -630,7 +631,7 @@ public class UnifiedGenotyperEngine { // no extended event pileup // if we're genotyping given alleles and we have a requested SNP at this position, do SNP if (UAC.GenotypingMode == GenotypeLikelihoodsCalculationModel.GENOTYPING_MODE.GENOTYPE_GIVEN_ALLELES) { - VariantContext vcInput = SNPGenotypeLikelihoodsCalculationModel.getSNPVCFromAllelesRod(tracker, refContext, false, logger); + VariantContext vcInput = SNPGenotypeLikelihoodsCalculationModel.getSNPVCFromAllelesRod(tracker, refContext, false, logger, UAC.alleles); if (vcInput == null) return null; diff --git a/public/java/test/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyperIntegrationTest.java b/public/java/test/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyperIntegrationTest.java index 88c5116b1..d11fb7e29 100755 --- a/public/java/test/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyperIntegrationTest.java +++ b/public/java/test/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyperIntegrationTest.java @@ -45,7 +45,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest { GenomeAnalysisEngine.resetRandomGenerator(); WalkerTest.WalkerTestSpec spec2 = new WalkerTest.WalkerTestSpec( - baseCommand + " --genotyping_mode GENOTYPE_GIVEN_ALLELES -B:alleles,vcf " + result.get(0).getAbsolutePath() + " -I " + validationDataLocation + "pilot2_daughters.chr20.10k-11k.bam -o %s -L 20:10,000,000-10,050,000", 1, + baseCommand + " --genotyping_mode GENOTYPE_GIVEN_ALLELES -alleles " + result.get(0).getAbsolutePath() + " -I " + validationDataLocation + "pilot2_daughters.chr20.10k-11k.bam -o %s -L 20:10,000,000-10,050,000", 1, Arrays.asList(md5)); executeTest("test MultiSample Pilot2 with alleles passed in", spec2); } @@ -53,12 +53,12 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest { @Test public void testWithAllelesPassedIn() { WalkerTest.WalkerTestSpec spec1 = new WalkerTest.WalkerTestSpec( - baseCommand + " --genotyping_mode GENOTYPE_GIVEN_ALLELES -B:alleles,vcf " + validationDataLocation + "allelesForUG.vcf -I " + validationDataLocation + "pilot2_daughters.chr20.10k-11k.bam -o %s -L 20:10,000,000-10,025,000", 1, + baseCommand + " --genotyping_mode GENOTYPE_GIVEN_ALLELES -alleles " + validationDataLocation + "allelesForUG.vcf -I " + validationDataLocation + "pilot2_daughters.chr20.10k-11k.bam -o %s -L 20:10,000,000-10,025,000", 1, Arrays.asList("811ddc0bd8322b14f14f58df8c627aa9")); executeTest("test MultiSample Pilot2 with alleles passed in", spec1); WalkerTest.WalkerTestSpec spec2 = new WalkerTest.WalkerTestSpec( - baseCommand + " --output_mode EMIT_ALL_SITES --genotyping_mode GENOTYPE_GIVEN_ALLELES -B:alleles,vcf " + validationDataLocation + "allelesForUG.vcf -I " + validationDataLocation + "pilot2_daughters.chr20.10k-11k.bam -o %s -L 20:10,000,000-10,025,000", 1, + baseCommand + " --output_mode EMIT_ALL_SITES --genotyping_mode GENOTYPE_GIVEN_ALLELES -alleles " + validationDataLocation + "allelesForUG.vcf -I " + validationDataLocation + "pilot2_daughters.chr20.10k-11k.bam -o %s -L 20:10,000,000-10,025,000", 1, Arrays.asList("5cf08dd7ac3d218082f7be3915ce0b15")); executeTest("test MultiSample Pilot2 with alleles passed in and emitting all sites", spec2); } @@ -286,13 +286,13 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest { @Test public void testWithIndelAllelesPassedIn() { WalkerTest.WalkerTestSpec spec1 = new WalkerTest.WalkerTestSpec( - baseCommandIndels + " --genotyping_mode GENOTYPE_GIVEN_ALLELES -B:alleles,vcf " + validationDataLocation + "indelAllelesForUG.vcf -I " + validationDataLocation + + baseCommandIndels + " --genotyping_mode GENOTYPE_GIVEN_ALLELES -alleles " + validationDataLocation + "indelAllelesForUG.vcf -I " + validationDataLocation + "pilot2_daughters.chr20.10k-11k.bam -o %s -L 20:10,000,000-10,100,000", 1, Arrays.asList("69b0b3f089c80b9864294d838a061336")); executeTest("test MultiSample Pilot2 indels with alleles passed in", spec1); WalkerTest.WalkerTestSpec spec2 = new WalkerTest.WalkerTestSpec( - baseCommandIndels + " --output_mode EMIT_ALL_SITES --genotyping_mode GENOTYPE_GIVEN_ALLELES -B:alleles,vcf " + baseCommandIndels + " --output_mode EMIT_ALL_SITES --genotyping_mode GENOTYPE_GIVEN_ALLELES -alleles " + validationDataLocation + "indelAllelesForUG.vcf -I " + validationDataLocation + "pilot2_daughters.chr20.10k-11k.bam -o %s -L 20:10,000,000-10,100,000", 1, Arrays.asList("c90174cfd7dd68bdef36fe2c60145e10")); From b705d9cf15c2a0d39b5c0e589f45dec6d122f641 Mon Sep 17 00:00:00 2001 From: Ryan Poplin Date: Thu, 11 Aug 2011 13:17:16 -0400 Subject: [PATCH 6/6] Oops, these VariantAnnotator input bindings aren't needed during the UG --- .../walkers/genotyper/UnifiedGenotyper.java | 32 ++----------------- 1 file changed, 3 insertions(+), 29 deletions(-) diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyper.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyper.java index 5202d97d0..cbda870aa 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyper.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyper.java @@ -66,35 +66,9 @@ public class UnifiedGenotyper extends LocusWalker getDbsnpRodBinding() { return dbsnp.dbsnp; } - - /** - * The INFO field will be annotated with information on the most biologically-significant effect - * listed in the SnpEff output file for each variant. - */ - @Input(fullName="snpEffFile", shortName = "snpEffFile", doc="SnpEff file", required=false) - public RodBinding snpEffFile; - public RodBinding getSnpEffRodBinding() { return snpEffFile; } - - /** - * If a record in the 'variant' track overlaps with a record from the provided comp track, the INFO field will be annotated - * as such in the output with the track name (e.g. -comp:FOO will have 'FOO' in the INFO field). Records that are filtered in the comp track will be ignored. - * Note that 'dbSNP' has been special-cased (see the --dbsnp argument). - */ - @Input(fullName="comp", shortName = "comp", doc="comparison VCF file", required=false) - public List> comps = Collections.emptyList(); - public List> getCompRodBindings() { return comps; } - - /** - * An external resource VCF file or files from which to annotate. - * - * One can add annotations from one of the resource VCFs to the output. - * For example, if you want to annotate your 'variant' VCF with the AC field value from the rod bound to 'resource', - * you can specify '-E resource.AC' and records in the output VCF will be annotated with 'resource.AC=N' when a record exists in that rod at the given position. - * If multiple records in the rod overlap the given position, one is chosen arbitrarily. - */ - @Input(fullName="resource", shortName = "resource", doc="external resource VCF file", required=false) - public List> resources = Collections.emptyList(); - public List> getResourceRodBindings() { return resources; } + public RodBinding getSnpEffRodBinding() { return null; } + public List> getCompRodBindings() { return Collections.emptyList(); } + public List> getResourceRodBindings() { return Collections.emptyList(); } // control the output @Output(doc="File to which variants should be written",required=true)