No longer uses VCFRecord.
git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@3762 348d0f76-0448-11de-a6fe-93d51630548a
This commit is contained in:
parent
e75b3e13bd
commit
f130d29318
|
|
@ -25,7 +25,6 @@
|
||||||
|
|
||||||
package org.broadinstitute.sting.playground.gatk.walkers;
|
package org.broadinstitute.sting.playground.gatk.walkers;
|
||||||
|
|
||||||
import org.broad.tribble.vcf.VCFRecord;
|
|
||||||
import org.broad.tribble.vcf.VCFConstants;
|
import org.broad.tribble.vcf.VCFConstants;
|
||||||
import org.broadinstitute.sting.commandline.Argument;
|
import org.broadinstitute.sting.commandline.Argument;
|
||||||
import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
|
import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
|
||||||
|
|
@ -33,26 +32,25 @@ import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
|
||||||
import org.broadinstitute.sting.gatk.contexts.variantcontext.Allele;
|
import org.broadinstitute.sting.gatk.contexts.variantcontext.Allele;
|
||||||
import org.broadinstitute.sting.gatk.contexts.variantcontext.Genotype;
|
import org.broadinstitute.sting.gatk.contexts.variantcontext.Genotype;
|
||||||
import org.broadinstitute.sting.gatk.contexts.variantcontext.VariantContext;
|
import org.broadinstitute.sting.gatk.contexts.variantcontext.VariantContext;
|
||||||
import org.broadinstitute.sting.gatk.datasources.simpleDataSources.ReferenceOrderedDataSource;
|
|
||||||
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
|
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
|
||||||
import org.broadinstitute.sting.gatk.refdata.tracks.RMDTrack;
|
import org.broadinstitute.sting.gatk.refdata.ReferenceOrderedDatum;
|
||||||
import org.broadinstitute.sting.gatk.walkers.RodWalker;
|
import org.broadinstitute.sting.gatk.walkers.RodWalker;
|
||||||
import org.broadinstitute.sting.gatk.walkers.RMD;
|
import org.broadinstitute.sting.gatk.walkers.RMD;
|
||||||
import org.broadinstitute.sting.gatk.walkers.Requires;
|
import org.broadinstitute.sting.gatk.walkers.Requires;
|
||||||
import org.broadinstitute.sting.utils.GenomeLoc;
|
import org.broadinstitute.sting.utils.GenomeLoc;
|
||||||
import org.broadinstitute.sting.utils.MathUtils;
|
import org.broadinstitute.sting.utils.MathUtils;
|
||||||
import org.broadinstitute.sting.utils.genotype.vcf.VCFReader;
|
|
||||||
|
|
||||||
import java.io.PrintStream;
|
import java.io.PrintStream;
|
||||||
import java.util.*;
|
import java.util.*;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Produces an input file to Beagle imputation engine, listing genotype likelihoods for each sample in input VCF file
|
* Produces an input file to Beagle imputation engine, listing genotype likelihoods for each sample in input variant file
|
||||||
* @help.summary Produces an input file to Beagle imputation engine, listing genotype likelihoods for each sample in input VCF file
|
|
||||||
*/
|
*/
|
||||||
@Requires(value={},referenceMetaData=@RMD(name="vcf",type= VCFRecord.class))
|
@Requires(value={},referenceMetaData=@RMD(name=ProduceBeagleInputWalker.ROD_NAME, type=ReferenceOrderedDatum.class))
|
||||||
public class ProduceBeagleInputWalker extends RodWalker<Integer, Integer> {
|
public class ProduceBeagleInputWalker extends RodWalker<Integer, Integer> {
|
||||||
|
|
||||||
|
public static final String ROD_NAME = "variant";
|
||||||
|
|
||||||
@Argument(fullName = "beagle_file", shortName = "beagle", doc = "File to print BEAGLE-specific data for use with imputation", required = true)
|
@Argument(fullName = "beagle_file", shortName = "beagle", doc = "File to print BEAGLE-specific data for use with imputation", required = true)
|
||||||
public PrintStream beagleWriter = null;
|
public PrintStream beagleWriter = null;
|
||||||
@Argument(fullName = "genotypes_file", shortName = "genotypes", doc = "File to print reference genotypes for error analysis", required = false)
|
@Argument(fullName = "genotypes_file", shortName = "genotypes", doc = "File to print reference genotypes for error analysis", required = false)
|
||||||
|
|
@ -60,24 +58,13 @@ public class ProduceBeagleInputWalker extends RodWalker<Integer, Integer> {
|
||||||
@Argument(fullName = "inserted_nocall_rate", shortName = "nc_rate", doc = "Rate (0-1) at which genotype no-calls will be randomly inserted, for testing", required = false)
|
@Argument(fullName = "inserted_nocall_rate", shortName = "nc_rate", doc = "Rate (0-1) at which genotype no-calls will be randomly inserted, for testing", required = false)
|
||||||
public double insertedNoCallRate = 0;
|
public double insertedNoCallRate = 0;
|
||||||
|
|
||||||
final TreeSet<String> samples = new TreeSet<String>();
|
private TreeSet<String> samples = null;
|
||||||
Random generator;
|
private Random generator;
|
||||||
|
|
||||||
public void initialize() {
|
|
||||||
|
|
||||||
final List<ReferenceOrderedDataSource> dataSources = this.getToolkit().getRodDataSources();
|
|
||||||
for ( final ReferenceOrderedDataSource source : dataSources ) {
|
|
||||||
final RMDTrack rod = source.getReferenceOrderedData();
|
|
||||||
if ( rod.getRecordType().equals(VCFRecord.class) ) {
|
|
||||||
final VCFReader reader = new VCFReader(rod.getFile());
|
|
||||||
final Set<String> vcfSamples = reader.getHeader().getGenotypeSamples();
|
|
||||||
samples.addAll(vcfSamples);
|
|
||||||
reader.close();
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
|
private void initialize(Set<String> sampleNames) {
|
||||||
generator = new Random();
|
generator = new Random();
|
||||||
beagleWriter.print("marker alleleA alleleB");
|
beagleWriter.print("marker alleleA alleleB");
|
||||||
|
samples = new TreeSet<String>(sampleNames);
|
||||||
for ( String sample : samples )
|
for ( String sample : samples )
|
||||||
beagleWriter.print(String.format(" %s %s %s", sample, sample, sample));
|
beagleWriter.print(String.format(" %s %s %s", sample, sample, sample));
|
||||||
|
|
||||||
|
|
@ -85,21 +72,26 @@ public class ProduceBeagleInputWalker extends RodWalker<Integer, Integer> {
|
||||||
if (beagleGenotypesWriter != null)
|
if (beagleGenotypesWriter != null)
|
||||||
beagleGenotypesWriter.println("dummy header");
|
beagleGenotypesWriter.println("dummy header");
|
||||||
}
|
}
|
||||||
|
|
||||||
public Integer map( RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context ) {
|
public Integer map( RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context ) {
|
||||||
|
|
||||||
if( tracker != null ) {
|
if( tracker != null ) {
|
||||||
GenomeLoc loc = context.getLocation();
|
GenomeLoc loc = context.getLocation();
|
||||||
VariantContext vc_eval;
|
VariantContext vc_eval;
|
||||||
|
|
||||||
vc_eval = tracker.getVariantContext(ref,"vcf", null, loc, false);
|
vc_eval = tracker.getVariantContext(ref, ROD_NAME, null, loc, false);
|
||||||
if ( vc_eval == null || vc_eval.isFiltered() )
|
if ( vc_eval == null || vc_eval.isFiltered() )
|
||||||
return 0;
|
return 0;
|
||||||
|
|
||||||
|
if ( samples == null ) {
|
||||||
|
initialize(vc_eval.getSampleNames());
|
||||||
|
}
|
||||||
|
|
||||||
// output marker ID to Beagle input file
|
// output marker ID to Beagle input file
|
||||||
beagleWriter.print(String.format("%s ",vc_eval.getLocation().toString()));
|
beagleWriter.print(String.format("%s ", vc_eval.getLocation().toString()));
|
||||||
|
|
||||||
if (beagleGenotypesWriter != null)
|
if (beagleGenotypesWriter != null)
|
||||||
beagleGenotypesWriter.print(String.format("%s ",vc_eval.getLocation().toString()));
|
beagleGenotypesWriter.print(String.format("%s ", vc_eval.getLocation().toString()));
|
||||||
|
|
||||||
for (Allele allele: vc_eval.getAlleles()) {
|
for (Allele allele: vc_eval.getAlleles()) {
|
||||||
// TODO -- check whether this is really needed by Beagle
|
// TODO -- check whether this is really needed by Beagle
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue