No longer uses VCFRecord

git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@3763 348d0f76-0448-11de-a6fe-93d51630548a
This commit is contained in:
ebanks 2010-07-12 13:57:39 +00:00
parent f130d29318
commit 0c4a32843c
2 changed files with 144 additions and 157 deletions

View File

@ -428,4 +428,9 @@ public class VariantContextUtils {
public static String mergedSampleName(String trackName, String sampleName, boolean uniqify ) { public static String mergedSampleName(String trackName, String sampleName, boolean uniqify ) {
return uniqify ? sampleName + "." + trackName : sampleName; return uniqify ? sampleName + "." + trackName : sampleName;
} }
public static VariantContext modifyAttributes(VariantContext vc, Map<String, Object> attributes) {
return new VariantContext(vc.getName(), vc.getLocation(), vc.getAlleles(), vc.genotypes, vc.getNegLog10PError(), vc.getFilters(), attributes);
}
} }

View File

@ -25,23 +25,20 @@
package org.broadinstitute.sting.playground.gatk.walkers; package org.broadinstitute.sting.playground.gatk.walkers;
import org.broad.tribble.vcf.*;
import org.broadinstitute.sting.commandline.Argument; import org.broadinstitute.sting.commandline.Argument;
import org.broadinstitute.sting.gatk.contexts.AlignmentContext; import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
import org.broadinstitute.sting.gatk.contexts.ReferenceContext; import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
import org.broadinstitute.sting.gatk.contexts.variantcontext.*; import org.broadinstitute.sting.gatk.contexts.variantcontext.*;
import org.broadinstitute.sting.gatk.datasources.simpleDataSources.ReferenceOrderedDataSource;
import org.broadinstitute.sting.gatk.refdata.features.beagle.BeagleFeature; import org.broadinstitute.sting.gatk.refdata.features.beagle.BeagleFeature;
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
import org.broadinstitute.sting.gatk.refdata.VariantContextAdaptors; import org.broadinstitute.sting.gatk.refdata.ReferenceOrderedDatum;
import org.broadinstitute.sting.gatk.refdata.tracks.RMDTrack;
import org.broadinstitute.sting.gatk.walkers.RodWalker; import org.broadinstitute.sting.gatk.walkers.RodWalker;
import org.broadinstitute.sting.gatk.walkers.RMD; import org.broadinstitute.sting.gatk.walkers.RMD;
import org.broadinstitute.sting.gatk.walkers.Requires; import org.broadinstitute.sting.gatk.walkers.Requires;
import org.broadinstitute.sting.utils.GenomeLoc; import org.broadinstitute.sting.utils.GenomeLoc;
import org.broadinstitute.sting.utils.genotype.vcf.VCFReader;
import org.broadinstitute.sting.utils.genotype.vcf.VCFUtils; import org.broadinstitute.sting.utils.genotype.vcf.VCFUtils;
import org.broadinstitute.sting.utils.genotype.vcf.VCFWriter; import org.broadinstitute.sting.utils.genotype.vcf.VCFWriter;
import org.broad.tribble.vcf.*;
import java.io.*; import java.io.*;
import java.util.*; import java.util.*;
@ -51,11 +48,11 @@ import static java.lang.Math.log10;
/** /**
* Takes files produced by Beagle imputation engine and creates a vcf with modified annotations. * Takes files produced by Beagle imputation engine and creates a vcf with modified annotations.
*/ */
@Requires(value={},referenceMetaData=@RMD(name=BeagleOutputToVCFWalker.INPUT_ROD_NAME,type= VCFRecord.class)) @Requires(value={},referenceMetaData=@RMD(name=BeagleOutputToVCFWalker.INPUT_ROD_NAME, type=ReferenceOrderedDatum.class))
public class BeagleOutputToVCFWalker extends RodWalker<Integer, Integer> { public class BeagleOutputToVCFWalker extends RodWalker<Integer, Integer> {
private VCFWriter vcfWriter; public static final String INPUT_ROD_NAME = "variant";
@Argument(fullName="output_file", shortName="output", doc="VCF file to which output should be written", required=true) @Argument(fullName="output_file", shortName="output", doc="VCF file to which output should be written", required=true)
private String OUTPUT_FILE = null; private String OUTPUT_FILE = null;
@ -63,45 +60,33 @@ public class BeagleOutputToVCFWalker extends RodWalker<Integer, Integer> {
@Argument(fullName="nocall_threshold", shortName="ncthr", doc="Threshold of confidence at which a genotype won't be called", required=false) @Argument(fullName="nocall_threshold", shortName="ncthr", doc="Threshold of confidence at which a genotype won't be called", required=false)
private double noCallThreshold = 0.0; private double noCallThreshold = 0.0;
public static final String INPUT_ROD_NAME = "inputvcf";
protected static String line = null; protected static String line = null;
private VCFWriter vcfWriter;
// protected HashMap<String,BeagleSampleRecord> beagleSampleRecords; // protected HashMap<String,BeagleSampleRecord> beagleSampleRecords;
final TreeSet<String> samples = new TreeSet<String>(); TreeSet<String> samples = null;
private final double MIN_PROB_ERROR = 0.000001; private final double MIN_PROB_ERROR = 0.000001;
private final double MAX_GENOTYPE_QUALITY = 6.0; private final double MAX_GENOTYPE_QUALITY = 6.0;
public void initialize() { private void initialize(Set<String> sampleNames) {
// setup the header fields // setup the header fields
final Set<VCFHeaderLine> hInfo = new HashSet<VCFHeaderLine>(); final Set<VCFHeaderLine> hInfo = new HashSet<VCFHeaderLine>();
hInfo.addAll(VCFUtils.getHeaderFields(getToolkit())); hInfo.addAll(VCFUtils.getHeaderFields(getToolkit()));
hInfo.add(new VCFFormatHeaderLine("OG",1,VCFHeaderLineType.String, "Original Genotype input to Beagle")); hInfo.add(new VCFFormatHeaderLine("OG",1, VCFHeaderLineType.String, "Original Genotype input to Beagle"));
hInfo.add(new VCFInfoHeaderLine("R2", 1, VCFHeaderLineType.Float, "r2 Value reported by Beagle on each site")); hInfo.add(new VCFInfoHeaderLine("R2", 1, VCFHeaderLineType.Float, "r2 Value reported by Beagle on each site"));
hInfo.add(new VCFInfoHeaderLine("GenotypesChanged", 1, VCFHeaderLineType.Flag, "r2 Value reported by Beagle on each site")); hInfo.add(new VCFInfoHeaderLine("GenotypesChanged", 1, VCFHeaderLineType.Flag, "r2 Value reported by Beagle on each site"));
hInfo.add(new VCFHeaderLine("source", "BeagleImputation")); hInfo.add(new VCFHeaderLine("source", "BeagleImputation"));
final List<ReferenceOrderedDataSource> dataSources = this.getToolkit().getRodDataSources();
// Open output file specified by output VCF ROD // Open output file specified by output VCF ROD
vcfWriter = new VCFWriter(new File(OUTPUT_FILE)); vcfWriter = new VCFWriter(new File(OUTPUT_FILE));
for( final ReferenceOrderedDataSource source : dataSources ) { samples = new TreeSet<String>(sampleNames);
final RMDTrack rod = source.getReferenceOrderedData();
if( rod.getRecordType().equals(VCFRecord.class) && rod.getName().equalsIgnoreCase(INPUT_ROD_NAME)) {
final VCFReader reader = new VCFReader(rod.getFile());
final Set<String> vcfSamples = reader.getHeader().getGenotypeSamples();
samples.addAll(vcfSamples);
reader.close();
}
}
final VCFHeader vcfHeader = new VCFHeader(hInfo, samples); final VCFHeader vcfHeader = new VCFHeader(hInfo, samples);
vcfWriter.writeHeader(vcfHeader); vcfWriter.writeHeader(vcfHeader);
@ -118,6 +103,9 @@ public class BeagleOutputToVCFWalker extends RodWalker<Integer, Integer> {
if ( vc_input == null ) if ( vc_input == null )
return 0; return 0;
if ( samples == null ) {
initialize(vc_input.getSampleNames());
}
List<Object> r2rods = tracker.getReferenceMetaData("beagleR2"); List<Object> r2rods = tracker.getReferenceMetaData("beagleR2");
@ -154,7 +142,6 @@ public class BeagleOutputToVCFWalker extends RodWalker<Integer, Integer> {
boolean genotypesChangedByBeagle = false; boolean genotypesChangedByBeagle = false;
for ( Map.Entry<String, Genotype> originalGenotypes : vc_input.getGenotypes().entrySet() ) { for ( Map.Entry<String, Genotype> originalGenotypes : vc_input.getGenotypes().entrySet() ) {
Genotype g = originalGenotypes.getValue(); Genotype g = originalGenotypes.getValue();
Set<String> filters = new LinkedHashSet<String>(g.getFilters()); Set<String> filters = new LinkedHashSet<String>(g.getFilters());
@ -238,7 +225,6 @@ public class BeagleOutputToVCFWalker extends RodWalker<Integer, Integer> {
og = a1+"/"+a2; og = a1+"/"+a2;
// See if Beagle switched genotypes // See if Beagle switched genotypes
if (!((refAllele.equals(originalAlleleA) && altAllele.equals(originalAlleleB) || if (!((refAllele.equals(originalAlleleA) && altAllele.equals(originalAlleleB) ||
(refAllele.equals(originalAlleleB) && altAllele.equals(originalAlleleA))))){ (refAllele.equals(originalAlleleB) && altAllele.equals(originalAlleleA))))){
@ -255,10 +241,8 @@ public class BeagleOutputToVCFWalker extends RodWalker<Integer, Integer> {
} }
VariantContext filteredVC = new VariantContext("outputvcf", vc_input.getLocation(), vc_input.getAlleles(), genotypes, vc_input.getNegLog10PError(), vc_input.getFilters(), vc_input.getAttributes()); VariantContext filteredVC = new VariantContext("outputvcf", vc_input.getLocation(), vc_input.getAlleles(), genotypes, vc_input.getNegLog10PError(), vc_input.getFilters(), vc_input.getAttributes());
Set<Allele> altAlleles = filteredVC.getAlternateAlleles(); Set<Allele> altAlleles = filteredVC.getAlternateAlleles();
StringBuffer altAlleleCountString = new StringBuffer(); StringBuffer altAlleleCountString = new StringBuffer();
for ( Allele allele : altAlleles ) { for ( Allele allele : altAlleles ) {
@ -267,22 +251,20 @@ public class BeagleOutputToVCFWalker extends RodWalker<Integer, Integer> {
altAlleleCountString.append(filteredVC.getChromosomeCount(allele)); altAlleleCountString.append(filteredVC.getChromosomeCount(allele));
} }
VCFRecord vcf = VariantContextAdaptors.toVCF(filteredVC, ref.getBase()); HashMap<String, Object> attributes = new HashMap<String, Object>(filteredVC.getAttributes());
if ( filteredVC.getChromosomeCount() > 0 ) { if ( filteredVC.getChromosomeCount() > 0 ) {
vcf.addInfoField(VCFConstants.ALLELE_NUMBER_KEY, String.format("%d", filteredVC.getChromosomeCount())); attributes.put(VCFConstants.ALLELE_NUMBER_KEY, String.format("%d", filteredVC.getChromosomeCount()));
if ( altAlleleCountString.length() > 0 ) { if ( altAlleleCountString.length() > 0 ) {
vcf.addInfoField(VCFConstants.ALLELE_COUNT_KEY, altAlleleCountString.toString()); attributes.put(VCFConstants.ALLELE_COUNT_KEY, altAlleleCountString.toString());
vcf.addInfoField(VCFConstants.ALLELE_FREQUENCY_KEY, String.format("%4.2f", attributes.put(VCFConstants.ALLELE_FREQUENCY_KEY, String.format("%4.2f",
Double.valueOf(altAlleleCountString.toString())/(filteredVC.getChromosomeCount()))); Double.valueOf(altAlleleCountString.toString())/(filteredVC.getChromosomeCount())));
} }
} }
attributes.put("GenotypesChanged", (genotypesChangedByBeagle)? "1":"0" );
attributes.put("R2", beagleR2Feature.getR2value().toString() );
vcf.addInfoField("GenotypesChanged", (genotypesChangedByBeagle)? "1":"0" ); vcfWriter.add(VariantContextUtils.modifyAttributes(filteredVC, attributes), new byte[]{ref.getBase()});
vcf.addInfoField("R2", beagleR2Feature.getR2value().toString() );
vcfWriter.addRecord(vcf);
return 1; return 1;