From 3873dccb35eb904d2824c4ae8e7aad0c84c55635 Mon Sep 17 00:00:00 2001 From: delangel Date: Wed, 26 May 2010 21:03:23 +0000 Subject: [PATCH] First fully functional (though preliminary) version of walker that takes an input VCF and outputs a Beagle .bgl file that can be used for missing genotype calls/haplotype imputation. For now, only supported input format is likelihood format for unrelated individuals. git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@3444 348d0f76-0448-11de-a6fe-93d51630548a --- .../walkers/ProduceBeagleInputWalker.java | 152 ++++++++++++++++++ 1 file changed, 152 insertions(+) create mode 100755 java/src/org/broadinstitute/sting/oneoffprojects/walkers/ProduceBeagleInputWalker.java diff --git a/java/src/org/broadinstitute/sting/oneoffprojects/walkers/ProduceBeagleInputWalker.java b/java/src/org/broadinstitute/sting/oneoffprojects/walkers/ProduceBeagleInputWalker.java new file mode 100755 index 000000000..375d10288 --- /dev/null +++ b/java/src/org/broadinstitute/sting/oneoffprojects/walkers/ProduceBeagleInputWalker.java @@ -0,0 +1,152 @@ +/* + * Copyright (c) 2010 The Broad Institute + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, + * copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following + * conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES + * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT + * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, + * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR + * THE USE OR OTHER DEALINGS IN THE SOFTWARE. + */ + +package org.broadinstitute.sting.oneoffprojects.walkers; + +import org.broad.tribble.vcf.VCFCodec; +import org.broadinstitute.sting.commandline.Argument; +import org.broadinstitute.sting.commandline.ArgumentCollection; +import org.broadinstitute.sting.gatk.GenomeAnalysisEngine; +import org.broadinstitute.sting.gatk.contexts.AlignmentContext; +import org.broadinstitute.sting.gatk.contexts.ReferenceContext; +import org.broadinstitute.sting.gatk.contexts.variantcontext.Allele; +import org.broadinstitute.sting.gatk.contexts.variantcontext.Genotype; +import org.broadinstitute.sting.gatk.contexts.variantcontext.VariantContext; +import org.broadinstitute.sting.gatk.datasources.simpleDataSources.ReferenceOrderedDataSource; +import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; +import org.broadinstitute.sting.gatk.refdata.tracks.RMDTrack; +import org.broadinstitute.sting.gatk.refdata.utils.helpers.DbSNPHelper; +import org.broadinstitute.sting.gatk.walkers.RodWalker; +import org.broadinstitute.sting.gatk.walkers.genotyper.UnifiedArgumentCollection; +import org.broadinstitute.sting.utils.GenomeLoc; +import org.broadinstitute.sting.utils.SampleUtils; +import org.broadinstitute.sting.utils.genotype.vcf.VCFReader; + +import java.io.PrintStream; +import java.util.*; + +/** + * Produces an input file to Beagle imputation engine, listing genotype likelihoods for each sample in input VCF fiel + */ +public class ProduceBeagleInputWalker extends RodWalker { + + @Argument(fullName = "beagle_file", shortName = "beagle", doc = "File to print BEAGLE-specific data for use with imputation", required = false) + public PrintStream beagleWriter = null; + + final TreeSet samples = new TreeSet(); + + + + public void initialize() { + + final List dataSources = this.getToolkit().getRodDataSources(); + for ( final ReferenceOrderedDataSource source : dataSources ) { + final RMDTrack rod = source.getReferenceOrderedData(); + if ( rod.getType().equals(VCFCodec.class) ) { + final VCFReader reader = new VCFReader(rod.getFile()); + final Set vcfSamples = reader.getHeader().getGenotypeSamples(); + samples.addAll(vcfSamples); + reader.close(); + } + } + + + if ( beagleWriter != null ) { + beagleWriter.print("marker alleleA alleleB"); + for ( String sample : samples ) + beagleWriter.print(String.format(" %s %s %s", sample, sample, sample)); + + beagleWriter.println(); + } + + } + public Integer map( RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context ) { + + if( tracker != null ) { + EnumSet vc = EnumSet.of(VariantContext.Type.SNP); + GenomeLoc loc = context.getLocation(); + VariantContext vc_eval; + + + try { + vc_eval = tracker.getVariantContext(ref,"eval", vc, loc, true); + } catch (java.util.NoSuchElementException e) { + return 0; + } + + // output marker ID to Beagle input file + beagleWriter.print(String.format("%s ",vc_eval.getLocation().toString())); + + for (Allele allele: vc_eval.getAlleles()) { + // TODO -- check whether this is really needed by Beagle + String bglPrintString; + if (allele.isNoCall() || allele.isNull()) + bglPrintString = "0"; + else + bglPrintString = allele.toString().substring(0,1); // get rid of * in case of reference allele + + beagleWriter.print(String.format("%s ", bglPrintString)); + } + + Map genotypes = vc_eval.getGenotypes(); + if ( genotypes == null || genotypes.size() == 0 ) + return null; + for ( String sample : samples ) { + // use sample as key into genotypes structure + Genotype genotype = genotypes.get(sample); + String gls = "0 0 0 "; + if (genotype.isCalled()) { + String[] glArray = genotype.getAttributeAsString("GL").split(","); + + for (String gl : glArray) { + Double d_gl = -Double.valueOf(gl); + beagleWriter.print(String.format("%5.2f ",d_gl)); + } + } + else + beagleWriter.print(gls); // write 0 likelihoods for uncalled genotypes. + + } + + beagleWriter.println(); + + } + return 1; + + } + + public Integer reduceInit() { + return 0; // Nothing to do here + } + + public Integer reduce( Integer value, Integer sum ) { + return 0; // Nothing to do here + } + + public void onTraversalDone( Integer sum ) { + + } + +}