Updating HaplotypeCaller docs and expanding integration tests.

This commit is contained in:
Ryan Poplin 2012-07-23 10:05:42 -04:00
parent cd4b0dda71
commit 2e486d83e2
4 changed files with 35 additions and 18 deletions

View File

@ -446,7 +446,6 @@ public class GenotypingEngine {
}
@Requires({"x11 >= 0", "x12 >= 0", "x21 >= 0", "x22 >= 0"})
@Ensures({"result >= 0.0", "result <= 1.0"})
protected static double calculateR2LD( final int x11, final int x12, final int x21, final int x22 ) {
final int total = x11 + x12 + x21 + x22;
final double pa1b1 = ((double) x11) / ((double) total);

View File

@ -82,10 +82,19 @@ import java.util.*;
* -jar GenomeAnalysisTK.jar
* -T HaplotypeCaller
* -R reference/human_g1k_v37.fasta
* -I input.bam
* -I sample1.bam [-I sample2.bam ...] \
* --dbsnp dbSNP.vcf \
* -stand_call_conf [50.0] \
* -stand_emit_conf 10.0 \
* [-L targets.interval_list]
* -o output.raw.snps.indels.vcf
* </pre>
*
* <h2>Caveats</h2>
* <ul>
* <li>The system is under active and continuous development. All outputs, the underlying likelihood model, and command line arguments are likely to change often.</li>
* </ul>
*
* @author rpoplin
* @since 8/22/11
*/
@ -95,7 +104,7 @@ import java.util.*;
public class HaplotypeCaller extends ActiveRegionWalker<Integer, Integer> implements AnnotatorCompatible {
/**
* A raw, unfiltered, highly specific callset in VCF format.
* A raw, unfiltered, highly sensitive callset in VCF format.
*/
@Output(doc="File to which variants should be written", required = true)
protected VariantContextWriter vcfWriter = null;
@ -103,17 +112,16 @@ public class HaplotypeCaller extends ActiveRegionWalker<Integer, Integer> implem
@Output(fullName="graphOutput", shortName="graph", doc="File to which debug assembly graph information should be written", required = false)
protected PrintStream graphWriter = null;
@Argument(fullName = "assembler", shortName = "assembler", doc = "Assembler to use; currently only SIMPLE_DE_BRUIJN is available.", required = false)
protected LocalAssemblyEngine.ASSEMBLER ASSEMBLER_TO_USE = LocalAssemblyEngine.ASSEMBLER.SIMPLE_DE_BRUIJN;
@Argument(fullName="keepRG", shortName="keepRG", doc="keepRG", required = false)
@Hidden
@Argument(fullName="keepRG", shortName="keepRG", doc="Only use read from this read group when making calls (but use all reads to build the assembly)", required = false)
protected String keepRG = null;
@Hidden
@Argument(fullName="mnpLookAhead", shortName="mnpLookAhead", doc = "The number of bases to combine together to form MNPs out of nearby consecutive SNPs on the same haplotype", required = false)
protected int MNP_LOOK_AHEAD = 0;
@Argument(fullName="minPruning", shortName="minPruning", doc = "The minimum allowed pruning factor in assembly graph. Paths with <= X supporting kmers are pruned from the graph", required = true)
protected int MIN_PRUNE_FACTOR = 0;
@Argument(fullName="minPruning", shortName="minPruning", doc = "The minimum allowed pruning factor in assembly graph. Paths with <= X supporting kmers are pruned from the graph", required = false)
protected int MIN_PRUNE_FACTOR = 1;
@Argument(fullName="genotypeFullActiveRegion", shortName="genotypeFullActiveRegion", doc = "If specified, alternate alleles are considered to be the full active region for the purposes of genotyping", required = false)
protected boolean GENOTYPE_FULL_ACTIVE_REGION = false;
@ -121,10 +129,11 @@ public class HaplotypeCaller extends ActiveRegionWalker<Integer, Integer> implem
@Argument(fullName="fullHaplotype", shortName="fullHaplotype", doc = "If specified, output the full haplotype sequence instead of converting to individual variants w.r.t. the reference", required = false)
protected boolean OUTPUT_FULL_HAPLOTYPE_SEQUENCE = false;
@Argument(fullName="gcpHMM", shortName="gcpHMM", doc="gcpHMM", required = false)
@Advanced
@Argument(fullName="gcpHMM", shortName="gcpHMM", doc="Gap continuation penalty for use in the Pair HMM", required = false)
protected int gcpHMM = 10;
@Argument(fullName="downsampleRegion", shortName="dr", doc="coverage per sample to downsample each region to", required = false)
@Argument(fullName="downsampleRegion", shortName="dr", doc="coverage, per-sample, to downsample each active region to", required = false)
protected int DOWNSAMPLE_PER_SAMPLE_PER_REGION = 1000;
@Argument(fullName="useExpandedTriggerSet", shortName="expandedTriggers", doc = "If specified, use additional, experimental triggers designed to capture larger indels but which may lead to an increase in the false positive rate", required=false)
@ -185,9 +194,6 @@ public class HaplotypeCaller extends ActiveRegionWalker<Integer, Integer> implem
@Argument(fullName="debug", shortName="debug", doc="If specified, print out very verbose debug information about each triggering active region", required = false)
protected boolean DEBUG;
@Argument(fullName="doBanded", shortName="doBanded", doc="If specified, use the banded option", required = false)
protected boolean doBanded = false;
// the assembly engine
LocalAssemblyEngine assemblyEngine = null;
@ -275,7 +281,7 @@ public class HaplotypeCaller extends ActiveRegionWalker<Integer, Integer> implem
}
assemblyEngine = new SimpleDeBruijnAssembler( DEBUG, graphWriter );
likelihoodCalculationEngine = new LikelihoodCalculationEngine( (byte)gcpHMM, DEBUG, doBanded );
likelihoodCalculationEngine = new LikelihoodCalculationEngine( (byte)gcpHMM, DEBUG, false );
genotypingEngine = new GenotypingEngine( DEBUG, MNP_LOOK_AHEAD, OUTPUT_FULL_HAPLOTYPE_SEQUENCE );
}

View File

@ -32,5 +32,17 @@ public class HaplotypeCallerIntegrationTest extends WalkerTest {
public void testHaplotypeCallerMultiSampleGGA() {
HCTest(CEUTRIO_BAM, "-gt_mode GENOTYPE_GIVEN_ALLELES -alleles " + validationDataLocation + "combined.phase1.chr20.raw.indels.sites.vcf", "ff370c42c8b09a29f1aeff5ac57c7ea6");
}
private void HCTestComplexVariants(String bam, String args, String md5) {
final String base = String.format("-T HaplotypeCaller -R %s -I %s", REF, bam) + " -L 20:10431524-10431924 -L 20:10723661-10724061 -L 20:10903555-10903955 --no_cmdline_in_header -o %s -minPruning 3";
final WalkerTestSpec spec = new WalkerTestSpec(base + " " + args, Arrays.asList(md5));
executeTest("testHaplotypeCallerComplexVariants: args=" + args, spec);
}
@Test
public void testHaplotypeCallerMultiSampleComplex() {
HCTestComplexVariants(CEUTRIO_BAM, "", "6f9fda3ea82c5696bed1d48ee90cd76b");
}
}

View File

@ -67,7 +67,7 @@ import java.util.*;
*
* <h2>Output</h2>
* <p>
* A raw, unfiltered, highly specific callset in VCF format.
* A raw, unfiltered, highly sensitive callset in VCF format.
* </p>
*
* <h2>Example generic command for multi-sample SNP calling</h2>
@ -150,7 +150,7 @@ public class UnifiedGenotyper extends LocusWalker<List<VariantCallContext>, Unif
public boolean alwaysAppendDbsnpId() { return false; }
/**
* A raw, unfiltered, highly specific callset in VCF format.
* A raw, unfiltered, highly sensitive callset in VCF format.
*/
@Output(doc="File to which variants should be written",required=true)
protected VariantContextWriter writer = null;