From 671330950d6c0249f92e37b0dd68dea58ced23e8 Mon Sep 17 00:00:00 2001 From: Guillermo del Angel Date: Wed, 17 Aug 2011 15:46:31 -0400 Subject: [PATCH] Updated Beagle walker for gatkdocs format. Pushed unsupported, undocumented arguments to @Hidden --- .../beagle/BeagleOutputToVCFWalker.java | 37 +++++++++++---- .../beagle/ProduceBeagleInputWalker.java | 45 ++++++++++++++++--- 2 files changed, 67 insertions(+), 15 deletions(-) diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/beagle/BeagleOutputToVCFWalker.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/beagle/BeagleOutputToVCFWalker.java index 40e6748ed..aca176bc2 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/beagle/BeagleOutputToVCFWalker.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/beagle/BeagleOutputToVCFWalker.java @@ -48,6 +48,31 @@ import static java.lang.Math.log10; /** * Takes files produced by Beagle imputation engine and creates a vcf with modified annotations. + * + *

This walker is intended to be run after Beagle has successfully executed. The full calling sequence for using Beagle along with the GATK is:

+ * + *

1. Run ProduceBeagleInputWalker.

+ *

2. Run Beagle

+ *

3. Uncompress output files

+ *

4. Run BeagleOutputToVCFWalker.

+ * + * + * Note that this walker requires all input files produced by Beagle. + * + * + *

Example

+ *
+ *     java -Xmx4000m -jar dist/GenomeAnalysisTK.jar \
+ *      -R reffile.fasta -T BeagleOutputToVCF \
+ *      -B:variant,VCF input_vcf.vcf \
+ *      -B:beagleR2,BEAGLE /myrun.beagle_output.r2 \
+ *      -B:beaglePhased,BEAGLE /myrun.beagle_output.phased \
+ *      -B:beagleProbs,BEAGLE /myrun.beagle_output.gprobs \
+ *      --out output_vcf.vcf
+ *      
+ +

Note that Beagle produces some of these files compressed as .gz, so gunzip must be run on them before walker is run in order to decompress them

+ */ public class BeagleOutputToVCFWalker extends RodWalker { @@ -57,22 +82,18 @@ public class BeagleOutputToVCFWalker extends RodWalker { @Input(fullName="comp", shortName = "comp", doc="Comparison VCF file", required=false) public RodBinding comp; - @Input(fullName="beagleR2", shortName = "beagleR2", doc="VCF file", required=true) + @Input(fullName="beagleR2", shortName = "beagleR2", doc="Beagle-produced .r2 file containing R^2 values for all markers", required=true) public RodBinding beagleR2; - @Input(fullName="beagleProbs", shortName = "beagleProbs", doc="VCF file", required=true) + @Input(fullName="beagleProbs", shortName = "beagleProbs", doc="Beagle-produced .probs file containing posterior genotype probabilities", required=true) public RodBinding beagleProbs; - @Input(fullName="beaglePhased", shortName = "beaglePhased", doc="VCF file", required=true) + @Input(fullName="beaglePhased", shortName = "beaglePhased", doc="Beagle-produced .phased file containing phased genotypes", required=true) public RodBinding beaglePhased; - @Output(doc="File to which variants should be written",required=true) + @Output(doc="VCF File to which variants should be written",required=true) protected VCFWriter vcfWriter = null; - @Argument(fullName="output_file", shortName="output", doc="Please use --out instead" ,required=false) - @Deprecated - protected String oldOutputArg; - @Argument(fullName="dont_mark_monomorphic_sites_as_filtered", shortName="keep_monomorphic", doc="If provided, we won't filter sites that beagle tags as monomorphic. Useful for imputing a sample's genotypes from a reference panel" ,required=false) public boolean DONT_FILTER_MONOMORPHIC_SITES = false; diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/beagle/ProduceBeagleInputWalker.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/beagle/ProduceBeagleInputWalker.java index c1508cf83..6ac817555 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/beagle/ProduceBeagleInputWalker.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/beagle/ProduceBeagleInputWalker.java @@ -48,19 +48,45 @@ import java.io.PrintStream; import java.util.*; /** - * Produces an input file to Beagle imputation engine, listing genotype likelihoods for each sample in input variant file + * Converts the input VCF into a format accepted by the Beagle imputation/analysis program. + *

+ * + *

Input

+ *

+ * A VCF with variants to convert to Beagle format + *

+ * + *

Outputs

+ *

+ * A single text file which can be fed to Beagle + *

+ *

+ * Optional: A file with a list of markers + *

+ * + *

Examples

+ *
+ *     java -Xmx2g -jar dist/GenomeAnalysisTK.jar -L 20 \
+ *      -R reffile.fasta -T ProduceBeagleInput \
+ *      -B:variant,VCF path_to_input_vcf/inputvcf.vcf -o path_to_beagle_output/beagle_output
+ * 
+ * */ + public class ProduceBeagleInputWalker extends RodWalker { @ArgumentCollection protected StandardVariantContextInputArgumentCollection variantCollection = new StandardVariantContextInputArgumentCollection(); - @Input(fullName="validation", shortName = "validation", doc="Input VCF file", required=false) + @Hidden + @Input(fullName="validation", shortName = "validation", doc="Validation VCF file", required=false) public RodBinding validation; + @Output(doc="File to which BEAGLE input should be written",required=true) protected PrintStream beagleWriter = null; - @Output(doc="File to which BEAGLE markers should be written", shortName="markers", fullName = "markers", required = false) + @Hidden + @Output(doc="File to which BEAGLE markers should be written", shortName="markers", fullName = "markers", required = false) protected PrintStream markers = null; int markerCounter = 1; @@ -73,14 +99,19 @@ public class ProduceBeagleInputWalker extends RodWalker { @Argument(doc="VQSqual key", shortName = "vqskey", required=false) protected String VQSLOD_KEY = "VQSqual"; - @Argument(fullName = "inserted_nocall_rate", shortName = "nc_rate", doc = "Rate (0-1) at which genotype no-calls will be randomly inserted, for testing", required = false) + @Hidden + @Argument(fullName = "inserted_nocall_rate", shortName = "nc_rate", doc = "Rate (0-1) at which genotype no-calls will be randomly inserted, for testing", required = false) public double insertedNoCallRate = 0; - @Argument(fullName = "validation_genotype_ptrue", shortName = "valp", doc = "Flat probability to assign to validation genotypes. Will override GL field.", required = false) + @Hidden + @Argument(fullName = "validation_genotype_ptrue", shortName = "valp", doc = "Flat probability to assign to validation genotypes. Will override GL field.", required = false) public double validationPrior = -1.0; - @Argument(fullName = "validation_bootstrap", shortName = "bs", doc = "Proportion of records to be used in bootstrap set", required = false) + @Hidden + @Argument(fullName = "validation_bootstrap", shortName = "bs", doc = "Proportion of records to be used in bootstrap set", required = false) public double bootstrap = 0.0; - @Argument(fullName = "bootstrap_vcf",shortName = "bvcf", doc = "Output a VCF with the records used for bootstrapping filtered out", required = false) + @Hidden + @Argument(fullName = "bootstrap_vcf",shortName = "bvcf", doc = "Output a VCF with the records used for bootstrapping filtered out", required = false) VCFWriter bootstrapVCFOutput = null; + @Argument(fullName = "checkIsMaleOnChrX", shortName = "checkIsMaleOnChrX", doc = "Set to true when Beagle-ing chrX and want to ensure male samples don't have heterozygous calls.", required = false) public boolean CHECK_IS_MALE_ON_CHR_X = false;