From 2e486d83e212b5a587840981adda290c03126d7f Mon Sep 17 00:00:00 2001 From: Ryan Poplin Date: Mon, 23 Jul 2012 10:05:42 -0400 Subject: [PATCH] Updating HaplotypeCaller docs and expanding integration tests. --- .../haplotypecaller/GenotypingEngine.java | 1 - .../haplotypecaller/HaplotypeCaller.java | 36 +++++++++++-------- .../HaplotypeCallerIntegrationTest.java | 12 +++++++ .../walkers/genotyper/UnifiedGenotyper.java | 4 +-- 4 files changed, 35 insertions(+), 18 deletions(-) diff --git a/protected/java/src/org/broadinstitute/sting/gatk/walkers/haplotypecaller/GenotypingEngine.java b/protected/java/src/org/broadinstitute/sting/gatk/walkers/haplotypecaller/GenotypingEngine.java index bdd04cba1..e2445e926 100644 --- a/protected/java/src/org/broadinstitute/sting/gatk/walkers/haplotypecaller/GenotypingEngine.java +++ b/protected/java/src/org/broadinstitute/sting/gatk/walkers/haplotypecaller/GenotypingEngine.java @@ -446,7 +446,6 @@ public class GenotypingEngine { } @Requires({"x11 >= 0", "x12 >= 0", "x21 >= 0", "x22 >= 0"}) - @Ensures({"result >= 0.0", "result <= 1.0"}) protected static double calculateR2LD( final int x11, final int x12, final int x21, final int x22 ) { final int total = x11 + x12 + x21 + x22; final double pa1b1 = ((double) x11) / ((double) total); diff --git a/protected/java/src/org/broadinstitute/sting/gatk/walkers/haplotypecaller/HaplotypeCaller.java b/protected/java/src/org/broadinstitute/sting/gatk/walkers/haplotypecaller/HaplotypeCaller.java index 546f53002..343e6c96c 100755 --- a/protected/java/src/org/broadinstitute/sting/gatk/walkers/haplotypecaller/HaplotypeCaller.java +++ b/protected/java/src/org/broadinstitute/sting/gatk/walkers/haplotypecaller/HaplotypeCaller.java @@ -82,10 +82,19 @@ import java.util.*; * -jar GenomeAnalysisTK.jar * -T HaplotypeCaller * -R reference/human_g1k_v37.fasta - * -I input.bam + * -I sample1.bam [-I sample2.bam ...] \ + * --dbsnp dbSNP.vcf \ + * -stand_call_conf [50.0] \ + * -stand_emit_conf 10.0 \ + * [-L targets.interval_list] * -o output.raw.snps.indels.vcf * * + *

Caveats

+ * + * * @author rpoplin * @since 8/22/11 */ @@ -95,7 +104,7 @@ import java.util.*; public class HaplotypeCaller extends ActiveRegionWalker implements AnnotatorCompatible { /** - * A raw, unfiltered, highly specific callset in VCF format. + * A raw, unfiltered, highly sensitive callset in VCF format. */ @Output(doc="File to which variants should be written", required = true) protected VariantContextWriter vcfWriter = null; @@ -103,17 +112,16 @@ public class HaplotypeCaller extends ActiveRegionWalker implem @Output(fullName="graphOutput", shortName="graph", doc="File to which debug assembly graph information should be written", required = false) protected PrintStream graphWriter = null; - @Argument(fullName = "assembler", shortName = "assembler", doc = "Assembler to use; currently only SIMPLE_DE_BRUIJN is available.", required = false) - protected LocalAssemblyEngine.ASSEMBLER ASSEMBLER_TO_USE = LocalAssemblyEngine.ASSEMBLER.SIMPLE_DE_BRUIJN; - - @Argument(fullName="keepRG", shortName="keepRG", doc="keepRG", required = false) + @Hidden + @Argument(fullName="keepRG", shortName="keepRG", doc="Only use read from this read group when making calls (but use all reads to build the assembly)", required = false) protected String keepRG = null; - + + @Hidden @Argument(fullName="mnpLookAhead", shortName="mnpLookAhead", doc = "The number of bases to combine together to form MNPs out of nearby consecutive SNPs on the same haplotype", required = false) protected int MNP_LOOK_AHEAD = 0; - @Argument(fullName="minPruning", shortName="minPruning", doc = "The minimum allowed pruning factor in assembly graph. Paths with <= X supporting kmers are pruned from the graph", required = true) - protected int MIN_PRUNE_FACTOR = 0; + @Argument(fullName="minPruning", shortName="minPruning", doc = "The minimum allowed pruning factor in assembly graph. Paths with <= X supporting kmers are pruned from the graph", required = false) + protected int MIN_PRUNE_FACTOR = 1; @Argument(fullName="genotypeFullActiveRegion", shortName="genotypeFullActiveRegion", doc = "If specified, alternate alleles are considered to be the full active region for the purposes of genotyping", required = false) protected boolean GENOTYPE_FULL_ACTIVE_REGION = false; @@ -121,10 +129,11 @@ public class HaplotypeCaller extends ActiveRegionWalker implem @Argument(fullName="fullHaplotype", shortName="fullHaplotype", doc = "If specified, output the full haplotype sequence instead of converting to individual variants w.r.t. the reference", required = false) protected boolean OUTPUT_FULL_HAPLOTYPE_SEQUENCE = false; - @Argument(fullName="gcpHMM", shortName="gcpHMM", doc="gcpHMM", required = false) + @Advanced + @Argument(fullName="gcpHMM", shortName="gcpHMM", doc="Gap continuation penalty for use in the Pair HMM", required = false) protected int gcpHMM = 10; - @Argument(fullName="downsampleRegion", shortName="dr", doc="coverage per sample to downsample each region to", required = false) + @Argument(fullName="downsampleRegion", shortName="dr", doc="coverage, per-sample, to downsample each active region to", required = false) protected int DOWNSAMPLE_PER_SAMPLE_PER_REGION = 1000; @Argument(fullName="useExpandedTriggerSet", shortName="expandedTriggers", doc = "If specified, use additional, experimental triggers designed to capture larger indels but which may lead to an increase in the false positive rate", required=false) @@ -185,9 +194,6 @@ public class HaplotypeCaller extends ActiveRegionWalker implem @Argument(fullName="debug", shortName="debug", doc="If specified, print out very verbose debug information about each triggering active region", required = false) protected boolean DEBUG; - @Argument(fullName="doBanded", shortName="doBanded", doc="If specified, use the banded option", required = false) - protected boolean doBanded = false; - // the assembly engine LocalAssemblyEngine assemblyEngine = null; @@ -275,7 +281,7 @@ public class HaplotypeCaller extends ActiveRegionWalker implem } assemblyEngine = new SimpleDeBruijnAssembler( DEBUG, graphWriter ); - likelihoodCalculationEngine = new LikelihoodCalculationEngine( (byte)gcpHMM, DEBUG, doBanded ); + likelihoodCalculationEngine = new LikelihoodCalculationEngine( (byte)gcpHMM, DEBUG, false ); genotypingEngine = new GenotypingEngine( DEBUG, MNP_LOOK_AHEAD, OUTPUT_FULL_HAPLOTYPE_SEQUENCE ); } diff --git a/protected/java/test/org/broadinstitute/sting/gatk/walkers/haplotypecaller/HaplotypeCallerIntegrationTest.java b/protected/java/test/org/broadinstitute/sting/gatk/walkers/haplotypecaller/HaplotypeCallerIntegrationTest.java index 70b558054..a87703423 100644 --- a/protected/java/test/org/broadinstitute/sting/gatk/walkers/haplotypecaller/HaplotypeCallerIntegrationTest.java +++ b/protected/java/test/org/broadinstitute/sting/gatk/walkers/haplotypecaller/HaplotypeCallerIntegrationTest.java @@ -32,5 +32,17 @@ public class HaplotypeCallerIntegrationTest extends WalkerTest { public void testHaplotypeCallerMultiSampleGGA() { HCTest(CEUTRIO_BAM, "-gt_mode GENOTYPE_GIVEN_ALLELES -alleles " + validationDataLocation + "combined.phase1.chr20.raw.indels.sites.vcf", "ff370c42c8b09a29f1aeff5ac57c7ea6"); } + + private void HCTestComplexVariants(String bam, String args, String md5) { + final String base = String.format("-T HaplotypeCaller -R %s -I %s", REF, bam) + " -L 20:10431524-10431924 -L 20:10723661-10724061 -L 20:10903555-10903955 --no_cmdline_in_header -o %s -minPruning 3"; + final WalkerTestSpec spec = new WalkerTestSpec(base + " " + args, Arrays.asList(md5)); + executeTest("testHaplotypeCallerComplexVariants: args=" + args, spec); + } + + @Test + public void testHaplotypeCallerMultiSampleComplex() { + HCTestComplexVariants(CEUTRIO_BAM, "", "6f9fda3ea82c5696bed1d48ee90cd76b"); + } + } diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyper.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyper.java index 10c8c0d69..b3a0b2a5d 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyper.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyper.java @@ -67,7 +67,7 @@ import java.util.*; * *

Output

*

- * A raw, unfiltered, highly specific callset in VCF format. + * A raw, unfiltered, highly sensitive callset in VCF format. *

* *

Example generic command for multi-sample SNP calling

@@ -150,7 +150,7 @@ public class UnifiedGenotyper extends LocusWalker, Unif public boolean alwaysAppendDbsnpId() { return false; } /** - * A raw, unfiltered, highly specific callset in VCF format. + * A raw, unfiltered, highly sensitive callset in VCF format. */ @Output(doc="File to which variants should be written",required=true) protected VariantContextWriter writer = null;