gatk-3.8/java/src/org/broadinstitute/sting/oneoffprojects/walkers/ProduceBeagleInputWalker.java

153 lines
5.9 KiB
Java
Executable File

/*
* Copyright (c) 2010 The Broad Institute
*
* Permission is hereby granted, free of charge, to any person
* obtaining a copy of this software and associated documentation
* files (the "Software"), to deal in the Software without
* restriction, including without limitation the rights to use,
* copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following
* conditions:
*
* The above copyright notice and this permission notice shall be
* included in all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR
* THE USE OR OTHER DEALINGS IN THE SOFTWARE.
*/
package org.broadinstitute.sting.oneoffprojects.walkers;
import org.broad.tribble.vcf.VCFCodec;
import org.broadinstitute.sting.commandline.Argument;
import org.broadinstitute.sting.commandline.ArgumentCollection;
import org.broadinstitute.sting.gatk.GenomeAnalysisEngine;
import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
import org.broadinstitute.sting.gatk.contexts.variantcontext.Allele;
import org.broadinstitute.sting.gatk.contexts.variantcontext.Genotype;
import org.broadinstitute.sting.gatk.contexts.variantcontext.VariantContext;
import org.broadinstitute.sting.gatk.datasources.simpleDataSources.ReferenceOrderedDataSource;
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
import org.broadinstitute.sting.gatk.refdata.tracks.RMDTrack;
import org.broadinstitute.sting.gatk.refdata.utils.helpers.DbSNPHelper;
import org.broadinstitute.sting.gatk.walkers.RodWalker;
import org.broadinstitute.sting.gatk.walkers.genotyper.UnifiedArgumentCollection;
import org.broadinstitute.sting.utils.GenomeLoc;
import org.broadinstitute.sting.utils.SampleUtils;
import org.broadinstitute.sting.utils.genotype.vcf.VCFReader;
import java.io.PrintStream;
import java.util.*;
/**
* Produces an input file to Beagle imputation engine, listing genotype likelihoods for each sample in input VCF fiel
*/
public class ProduceBeagleInputWalker extends RodWalker<Integer, Integer> {
@Argument(fullName = "beagle_file", shortName = "beagle", doc = "File to print BEAGLE-specific data for use with imputation", required = false)
public PrintStream beagleWriter = null;
final TreeSet<String> samples = new TreeSet<String>();
public void initialize() {
final List<ReferenceOrderedDataSource> dataSources = this.getToolkit().getRodDataSources();
for ( final ReferenceOrderedDataSource source : dataSources ) {
final RMDTrack rod = source.getReferenceOrderedData();
if ( rod.getType().equals(VCFCodec.class) ) {
final VCFReader reader = new VCFReader(rod.getFile());
final Set<String> vcfSamples = reader.getHeader().getGenotypeSamples();
samples.addAll(vcfSamples);
reader.close();
}
}
if ( beagleWriter != null ) {
beagleWriter.print("marker alleleA alleleB");
for ( String sample : samples )
beagleWriter.print(String.format(" %s %s %s", sample, sample, sample));
beagleWriter.println();
}
}
public Integer map( RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context ) {
if( tracker != null ) {
EnumSet<VariantContext.Type> vc = EnumSet.of(VariantContext.Type.SNP);
GenomeLoc loc = context.getLocation();
VariantContext vc_eval;
try {
vc_eval = tracker.getVariantContext(ref,"eval", vc, loc, true);
} catch (java.util.NoSuchElementException e) {
return 0;
}
// output marker ID to Beagle input file
beagleWriter.print(String.format("%s ",vc_eval.getLocation().toString()));
for (Allele allele: vc_eval.getAlleles()) {
// TODO -- check whether this is really needed by Beagle
String bglPrintString;
if (allele.isNoCall() || allele.isNull())
bglPrintString = "0";
else
bglPrintString = allele.toString().substring(0,1); // get rid of * in case of reference allele
beagleWriter.print(String.format("%s ", bglPrintString));
}
Map<String, Genotype> genotypes = vc_eval.getGenotypes();
if ( genotypes == null || genotypes.size() == 0 )
return null;
for ( String sample : samples ) {
// use sample as key into genotypes structure
Genotype genotype = genotypes.get(sample);
String gls = "0 0 0 ";
if (genotype.isCalled()) {
String[] glArray = genotype.getAttributeAsString("GL").split(",");
for (String gl : glArray) {
Double d_gl = -Double.valueOf(gl);
beagleWriter.print(String.format("%5.2f ",d_gl));
}
}
else
beagleWriter.print(gls); // write 0 likelihoods for uncalled genotypes.
}
beagleWriter.println();
}
return 1;
}
public Integer reduceInit() {
return 0; // Nothing to do here
}
public Integer reduce( Integer value, Integer sum ) {
return 0; // Nothing to do here
}
public void onTraversalDone( Integer sum ) {
}
}