VariantFiltration now outputs VCF. Important changes:

1. VariantsToVCF can now be called statically to output VCF for a single ROD instance; this is temporary until we have a VCF ROD.
2. VariantFiltration now outputs only 2 files, both mandatory: all variants that pass filters in geli text, and all variants in VCF.
If there are any problems, go find Aaron.


git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@1569 348d0f76-0448-11de-a6fe-93d51630548a
This commit is contained in:
ebanks 2009-09-09 20:04:32 +00:00
parent dd0085c428
commit cb31d5a0ab
15 changed files with 203 additions and 131 deletions

View File

@ -67,4 +67,9 @@ public class VECAlleleBalance extends RatioFilter {
public String getStudyInfo() { public String getStudyInfo() {
return (exclude ? "fail" : "pass") + "\t" + ratio; return (exclude ? "fail" : "pass") + "\t" + ratio;
} }
public String getVCFFilterString() {
return "AlleleBalance" + ratio;
}
} }

View File

@ -56,5 +56,9 @@ public class VECClusteredSnps implements VariantExclusionCriterion {
return (exclude ? "fail" : "pass") + "\t" + (exclude ? distance : "N/A"); return (exclude ? "fail" : "pass") + "\t" + (exclude ? distance : "N/A");
} }
public String getVCFFilterString() {
return "ClusteredSnp";
}
public boolean useZeroQualityReads() { return false; } public boolean useZeroQualityReads() { return false; }
} }

View File

@ -46,5 +46,9 @@ public class VECDepthOfCoverage implements VariantExclusionCriterion {
return (exclude ? "fail" : "pass") + "\t" + depth; return (exclude ? "fail" : "pass") + "\t" + depth;
} }
public String getVCFFilterString() {
return "DoC" + depth;
}
public boolean useZeroQualityReads() { return false; } public boolean useZeroQualityReads() { return false; }
} }

View File

@ -47,6 +47,10 @@ public class VECFisherStrand implements VariantExclusionCriterion {
return (exclude ? "fail" : "pass") + "\t" + pValue; return (exclude ? "fail" : "pass") + "\t" + pValue;
} }
public String getVCFFilterString() {
return "strand" + pValue;
}
public boolean useZeroQualityReads() { return false; } public boolean useZeroQualityReads() { return false; }
public boolean strandTest(char ref, AlignmentContext context, int allele1, int allele2, double threshold, StringBuffer out) { public boolean strandTest(char ref, AlignmentContext context, int allele1, int allele2, double threshold, StringBuffer out) {

View File

@ -116,4 +116,8 @@ public class VECHomopolymer implements VariantExclusionCriterion {
public double inclusionProbability() { public double inclusionProbability() {
return exclude ? 0.0 : 1.0; return exclude ? 0.0 : 1.0;
} }
public String getVCFFilterString() {
return "homopolymer";
}
} }

View File

@ -52,5 +52,9 @@ public class VECIndelArtifact implements VariantExclusionCriterion {
return (exclude ? "fail" : "pass") + "\t" + source; return (exclude ? "fail" : "pass") + "\t" + source;
} }
public String getVCFFilterString() {
return "InDel";
}
public boolean useZeroQualityReads() { return false; } public boolean useZeroQualityReads() { return false; }
} }

View File

@ -35,4 +35,7 @@ public class VECLodThreshold implements VariantExclusionCriterion {
return (exclude ? "fail" : "pass") + "\t" + lod; return (exclude ? "fail" : "pass") + "\t" + lod;
} }
public String getVCFFilterString() {
return "LOD" + lod;
}
} }

View File

@ -33,6 +33,10 @@ public class VECLodThresholdByCoverage implements VariantExclusionCriterion {
return String.format("%s\t%d\t%f", exclude ? "fail" : "pass", (int) depth, lod); return String.format("%s\t%d\t%f", exclude ? "fail" : "pass", (int) depth, lod);
} }
public String getVCFFilterString() {
return "LODbyDoC";
}
public boolean useZeroQualityReads() { public boolean useZeroQualityReads() {
return false; return false;
} }

View File

@ -41,5 +41,9 @@ public class VECMappingQuality implements VariantExclusionCriterion {
return (exclude ? "fail" : "pass") + "\t" + rms; return (exclude ? "fail" : "pass") + "\t" + rms;
} }
public String getVCFFilterString() {
return "MQ" + rms;
}
public boolean useZeroQualityReads() { return true; } public boolean useZeroQualityReads() { return true; }
} }

View File

@ -41,5 +41,9 @@ public class VECMappingQualityZero implements VariantExclusionCriterion {
return (exclude ? "fail" : "pass") + "\t" + mq0Count; return (exclude ? "fail" : "pass") + "\t" + mq0Count;
} }
public String getVCFFilterString() {
return "MQzero" + mq0Count;
}
public boolean useZeroQualityReads() { return true; } public boolean useZeroQualityReads() { return true; }
} }

View File

@ -20,6 +20,10 @@ public class VECNull implements VariantExclusionCriterion {
return ""; return "";
} }
public String getVCFFilterString() {
return "";
}
public boolean useZeroQualityReads() { public boolean useZeroQualityReads() {
return false; return false;
} }

View File

@ -63,4 +63,8 @@ public class VECOnOffGenotypeRatio extends RatioFilter {
public String getStudyInfo() { public String getStudyInfo() {
return (exclude ? "fail" : "pass") + "\t" + ratio; return (exclude ? "fail" : "pass") + "\t" + ratio;
} }
public String getVCFFilterString() {
return "onOffGenotype" + ratio;
}
} }

View File

@ -10,7 +10,8 @@ public interface VariantExclusionCriterion {
public double inclusionProbability(); public double inclusionProbability();
public String getStudyHeader(); public String getStudyHeader();
public String getStudyInfo(); public String getStudyInfo();
public String getVCFFilterString();
public boolean useZeroQualityReads(); public boolean useZeroQualityReads();
} }

View File

@ -5,11 +5,14 @@ import org.broadinstitute.sting.gatk.refdata.*;
import org.broadinstitute.sting.gatk.walkers.*; import org.broadinstitute.sting.gatk.walkers.*;
import org.broadinstitute.sting.utils.*; import org.broadinstitute.sting.utils.*;
import org.broadinstitute.sting.utils.genotype.geli.GeliTextWriter; import org.broadinstitute.sting.utils.genotype.geli.GeliTextWriter;
import org.broadinstitute.sting.utils.genotype.vcf.*;
import org.broadinstitute.sting.utils.cmdLine.Argument; import org.broadinstitute.sting.utils.cmdLine.Argument;
import org.broadinstitute.sting.playground.gatk.walkers.variantstovcf.VariantsToVCF;
import java.io.FileNotFoundException; import java.io.FileNotFoundException;
import java.io.PrintWriter; import java.io.PrintWriter;
import java.util.*; import java.util.*;
import java.io.*;
/** /**
@ -19,7 +22,10 @@ import java.util.*;
*/ */
@Requires(value={DataSource.READS, DataSource.REFERENCE},referenceMetaData=@RMD(name="variant",type= RodGeliText.class)) @Requires(value={DataSource.READS, DataSource.REFERENCE},referenceMetaData=@RMD(name="variant",type= RodGeliText.class))
public class VariantFiltrationWalker extends LocusWalker<Integer, Integer> { public class VariantFiltrationWalker extends LocusWalker<Integer, Integer> {
@Argument(fullName="variants_out_head", shortName="VOH", doc="File to which modified variants should be written") public String VARIANTS_OUT_HEAD; @Argument(fullName="vcfOutput", shortName="vcf", doc="VCF file to which all variants should be written with annotations", required=true) public File VCF_OUT;
@Argument(fullName="sampleName", shortName="sample", doc="Temporary hack to get VCF to work: the sample (NA-ID) corresponding to the variants", required=true) public String sampleName;
@Argument(fullName="includedOutput", shortName="included", doc="File to which all variants passing filters should be written", required=true) public String INCLUDED_OUT;
@Argument(fullName="annotatedOutput", shortName="annotated", doc="File to which all variants should be written with annotations - for debugging/parameterizing", required=false) public String ANNOTATED_OUT;
@Argument(fullName="features", shortName="F", doc="Feature test (optionally with arguments) to apply to genotype posteriors. Syntax: 'testname[:arguments]'", required=false) public String[] FEATURES; @Argument(fullName="features", shortName="F", doc="Feature test (optionally with arguments) to apply to genotype posteriors. Syntax: 'testname[:arguments]'", required=false) public String[] FEATURES;
@Argument(fullName="exclusion_criterion", shortName="X", doc="Exclusion test (optionally with arguments) to apply to variant call. Syntax: 'testname[:key1=arg1,key2=arg2,...]'", required=false) public String[] EXCLUSIONS; @Argument(fullName="exclusion_criterion", shortName="X", doc="Exclusion test (optionally with arguments) to apply to variant call. Syntax: 'testname[:key1=arg1,key2=arg2,...]'", required=false) public String[] EXCLUSIONS;
@Argument(fullName="inclusion_threshold", shortName="IT", doc="The product of the probability to include variants based on these filters must be greater than the value specified here in order to be included", required=false) public Double INCLUSION_THRESHOLD = 0.9; @Argument(fullName="inclusion_threshold", shortName="IT", doc="The product of the probability to include variants based on these filters must be greater than the value specified here in order to be included", required=false) public Double INCLUSION_THRESHOLD = 0.9;
@ -29,9 +35,11 @@ public class VariantFiltrationWalker extends LocusWalker<Integer, Integer> {
private List<Class<? extends IndependentVariantFeature>> featureClasses; private List<Class<? extends IndependentVariantFeature>> featureClasses;
private List<Class<? extends VariantExclusionCriterion>> exclusionClasses; private List<Class<? extends VariantExclusionCriterion>> exclusionClasses;
private PrintWriter variantsWriter; private VCFWriter vcfWriter;
private PrintWriter paramsWriter; private VCFHeader vcfHeader;
private HashMap<String, PrintWriter> exclusionWriters; private PrintWriter annotatedWriter;
private PrintWriter includedWriter;
private HashMap<String, String> sampleNames = new HashMap<String, String>();
private ArrayList<IndependentVariantFeature> requestedFeatures; private ArrayList<IndependentVariantFeature> requestedFeatures;
private ArrayList<VariantExclusionCriterion> requestedExclusions; private ArrayList<VariantExclusionCriterion> requestedExclusions;
@ -57,12 +65,15 @@ public class VariantFiltrationWalker extends LocusWalker<Integer, Integer> {
if (LIST) { listFiltersAndExit(); } if (LIST) { listFiltersAndExit(); }
try { try {
variantsWriter = new PrintWriter(VARIANTS_OUT_HEAD + ".included.geli.calls"); sampleNames.put(sampleName.toUpperCase(), "variant");
variantsWriter.println(GeliTextWriter.headerLine); vcfHeader = VariantsToVCF.getHeader(this.getToolkit().getArguments(), sampleNames.keySet());
vcfWriter = new VCFWriter(vcfHeader, VCF_OUT);
paramsWriter = new PrintWriter(VARIANTS_OUT_HEAD + ".params.out"); includedWriter = new PrintWriter(INCLUDED_OUT);
paramsWriter.print("Chr\tPosition\t"); includedWriter.println(GeliTextWriter.headerLine);
if ( ANNOTATED_OUT != null ) {
annotatedWriter = new PrintWriter(ANNOTATED_OUT);
annotatedWriter.print("Chr\tPosition\t");
}
requestedFeatures = new ArrayList<IndependentVariantFeature>(); requestedFeatures = new ArrayList<IndependentVariantFeature>();
requestedExclusions = new ArrayList<VariantExclusionCriterion>(); requestedExclusions = new ArrayList<VariantExclusionCriterion>();
@ -85,7 +96,8 @@ public class VariantFiltrationWalker extends LocusWalker<Integer, Integer> {
ivf.initialize(requestedFeatureArgs); ivf.initialize(requestedFeatureArgs);
requestedFeatures.add(ivf); requestedFeatures.add(ivf);
paramsWriter.print(ivf.getStudyHeader() + "\t"); if ( annotatedWriter != null )
annotatedWriter.print(ivf.getStudyHeader() + "\t");
} catch (InstantiationException e) { } catch (InstantiationException e) {
throw new StingException(String.format("Cannot instantiate feature class '%s': must be concrete class", featureClass.getSimpleName())); throw new StingException(String.format("Cannot instantiate feature class '%s': must be concrete class", featureClass.getSimpleName()));
} catch (IllegalAccessException e) { } catch (IllegalAccessException e) {
@ -100,9 +112,6 @@ public class VariantFiltrationWalker extends LocusWalker<Integer, Integer> {
} }
} }
// Initialize requested exclusion criteria
exclusionWriters = new HashMap<String, PrintWriter>();
if (EXCLUSIONS != null) { if (EXCLUSIONS != null) {
for (String requestedExclusionString : EXCLUSIONS) { for (String requestedExclusionString : EXCLUSIONS) {
String[] requestedExclusionPieces = requestedExclusionString.split(":"); String[] requestedExclusionPieces = requestedExclusionString.split(":");
@ -129,12 +138,8 @@ public class VariantFiltrationWalker extends LocusWalker<Integer, Integer> {
vec.initialize(requestedArgs); vec.initialize(requestedArgs);
requestedExclusions.add(vec); requestedExclusions.add(vec);
paramsWriter.print(vec.getStudyHeader() + "\t"); if ( annotatedWriter != null )
annotatedWriter.print(vec.getStudyHeader() + "\t");
PrintWriter writer = new PrintWriter(VARIANTS_OUT_HEAD + ".excluded." + exclusionClassName + ".geli.calls");
writer.println(GeliTextWriter.headerLine);
exclusionWriters.put(exclusionClassName, writer);
} catch (InstantiationException e) { } catch (InstantiationException e) {
throw new StingException(String.format("Cannot instantiate exclusion class '%s': must be concrete class", exclusionClass.getSimpleName())); throw new StingException(String.format("Cannot instantiate exclusion class '%s': must be concrete class", exclusionClass.getSimpleName()));
} catch (IllegalAccessException e) { } catch (IllegalAccessException e) {
@ -149,7 +154,8 @@ public class VariantFiltrationWalker extends LocusWalker<Integer, Integer> {
} }
} }
paramsWriter.print("inDbSNP\tinHapMap\tisHet\n"); if ( annotatedWriter != null )
annotatedWriter.print("inDbSNP\tinHapMap\tisHet\n");
} catch (FileNotFoundException e) { } catch (FileNotFoundException e) {
throw new StingException(String.format("Could not open file(s) for writing")); throw new StingException(String.format("Could not open file(s) for writing"));
} }
@ -235,7 +241,8 @@ public class VariantFiltrationWalker extends LocusWalker<Integer, Integer> {
if (VERBOSE) { out.println("Original:\n" + variant); } if (VERBOSE) { out.println("Original:\n" + variant); }
GenomeLoc loc = context.getAlignmentContext(true).getLocation(); GenomeLoc loc = context.getAlignmentContext(true).getLocation();
paramsWriter.print(loc.getContig() + "\t" + loc.getStart() + "\t"); if ( annotatedWriter != null )
annotatedWriter.print(loc.getContig() + "\t" + loc.getStart() + "\t");
// Apply features that modify the likelihoods and LOD scores // Apply features that modify the likelihoods and LOD scores
for ( IndependentVariantFeature ivf : requestedFeatures ) { for ( IndependentVariantFeature ivf : requestedFeatures ) {
@ -247,7 +254,8 @@ public class VariantFiltrationWalker extends LocusWalker<Integer, Integer> {
if (VERBOSE) { out.println(rationalizeClassName(ivf.getClass()) + ":\n " + variant); } if (VERBOSE) { out.println(rationalizeClassName(ivf.getClass()) + ":\n " + variant); }
paramsWriter.print(ivf.getStudyInfo() + "\t"); if ( annotatedWriter != null )
annotatedWriter.print(ivf.getStudyInfo() + "\t");
} }
// Apply exclusion tests that score the variant call // Apply exclusion tests that score the variant call
@ -256,6 +264,7 @@ public class VariantFiltrationWalker extends LocusWalker<Integer, Integer> {
} }
// Use the filters to score the variant // Use the filters to score the variant
String filterFailureString = "";
double jointInclusionProbability = 1.0; double jointInclusionProbability = 1.0;
for ( VariantExclusionCriterion vec : requestedExclusions ) { for ( VariantExclusionCriterion vec : requestedExclusions ) {
vec.compute(variantContextWindow); vec.compute(variantContextWindow);
@ -267,23 +276,20 @@ public class VariantFiltrationWalker extends LocusWalker<Integer, Integer> {
exclusionResults.put(exclusionClassName, inclusionProbability); exclusionResults.put(exclusionClassName, inclusionProbability);
if (inclusionProbability < INCLUSION_THRESHOLD) { if (inclusionProbability < INCLUSION_THRESHOLD) {
PrintWriter ewriter = exclusionWriters.get(exclusionClassName); filterFailureString += vec.getVCFFilterString() + ";";
if (ewriter != null) {
ewriter.println(variant);
ewriter.flush();
}
} }
if (VERBOSE) { if (VERBOSE) {
out.print(exclusionClassName + "=" + inclusionProbability + ";"); out.print(exclusionClassName + "=" + inclusionProbability + ";");
} }
paramsWriter.print(vec.getStudyInfo() + "\t"); if ( annotatedWriter != null )
annotatedWriter.print(vec.getStudyInfo() + "\t");
} }
// Decide whether we should keep the call or not // Decide whether we should keep the call or not
if (jointInclusionProbability >= INCLUSION_THRESHOLD) { if (jointInclusionProbability >= INCLUSION_THRESHOLD) {
variantsWriter.println(variant); includedWriter.println(variant);
if (VERBOSE) { out.println("] JointInclusionProbability:" + jointInclusionProbability + " State:included\n"); } if (VERBOSE) { out.println("] JointInclusionProbability:" + jointInclusionProbability + " State:included\n"); }
} else { } else {
@ -291,13 +297,21 @@ public class VariantFiltrationWalker extends LocusWalker<Integer, Integer> {
} }
rodDbSNP dbsnp = (rodDbSNP) context.getTracker().lookup("dbSNP", null); rodDbSNP dbsnp = (rodDbSNP) context.getTracker().lookup("dbSNP", null);
if ( dbsnp == null ) { if ( annotatedWriter != null ) {
paramsWriter.print("false\tfalse\t"); if ( dbsnp == null )
} else { annotatedWriter.print("false\tfalse\t");
paramsWriter.print(dbsnp.isSNP() + "\t" + dbsnp.isHapmap() + "\t"); else
annotatedWriter.print(dbsnp.isSNP() + "\t" + dbsnp.isHapmap() + "\t");
annotatedWriter.println(GenotypeUtils.isHet(variant));
} }
paramsWriter.println(GenotypeUtils.isHet(variant)); List<VCFGenotypeRecord> gt = new ArrayList<VCFGenotypeRecord>();
Map<VCFHeader.HEADER_FIELDS,String> map = new HashMap<VCFHeader.HEADER_FIELDS,String>();
if ( VariantsToVCF.generateVCFRecord(context.getTracker(), context.getReferenceContext(), context.getAlignmentContext(true), vcfHeader, gt, map, sampleNames, out, false, false) ) {
if ( !filterFailureString.equals("") )
map.put(VCFHeader.HEADER_FIELDS.FILTER, filterFailureString);
vcfWriter.addRecord(new VCFRecord(vcfHeader, map, "GT:GQ:DP", gt));
}
} }
/** /**
@ -330,11 +344,9 @@ public class VariantFiltrationWalker extends LocusWalker<Integer, Integer> {
out.printf("Processed %d loci.\n", result); out.printf("Processed %d loci.\n", result);
variantsWriter.close(); vcfWriter.close();
paramsWriter.close(); includedWriter.close();
if ( annotatedWriter != null )
for (PrintWriter ewriter : exclusionWriters.values()) { annotatedWriter.close();
ewriter.close();
}
} }
} }

View File

@ -1,5 +1,6 @@
package org.broadinstitute.sting.playground.gatk.walkers.variantstovcf; package org.broadinstitute.sting.playground.gatk.walkers.variantstovcf;
import org.broadinstitute.sting.gatk.GATKArgumentCollection;
import org.broadinstitute.sting.gatk.contexts.AlignmentContext; import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
import org.broadinstitute.sting.gatk.contexts.ReferenceContext; import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
import org.broadinstitute.sting.gatk.refdata.*; import org.broadinstitute.sting.gatk.refdata.*;
@ -12,7 +13,7 @@ import org.broadinstitute.sting.utils.genotype.vcf.VCFHeader;
import org.broadinstitute.sting.utils.genotype.vcf.VCFRecord; import org.broadinstitute.sting.utils.genotype.vcf.VCFRecord;
import org.broadinstitute.sting.utils.genotype.vcf.VCFWriter; import org.broadinstitute.sting.utils.genotype.vcf.VCFWriter;
import java.io.File; import java.io.*;
import java.util.*; import java.util.*;
public class VariantsToVCF extends RefWalker<Integer, Integer> { public class VariantsToVCF extends RefWalker<Integer, Integer> {
@ -22,43 +23,47 @@ public class VariantsToVCF extends RefWalker<Integer, Integer> {
private VCFWriter vcfwriter = null; private VCFWriter vcfwriter = null;
private VCFHeader vcfheader = null; private VCFHeader vcfheader = null;
private HashSet<String> sampleNames = null; private TreeMap<String, String> sampleNames = null;
public void initialize() { public void initialize() {
sampleNames = new TreeMap<String, String>();
GATKArgumentCollection args = this.getToolkit().getArguments();
for (String rodName : args.RODBindings) {
//out.println(rodName);
String[] rodPieces = rodName.split(",");
String sampleName = rodPieces[0];
if (sampleName.startsWith("NA"))
sampleNames.put(sampleName.toUpperCase(), sampleName.toUpperCase());
}
vcfheader = getHeader(args, sampleNames.keySet());
vcfwriter = new VCFWriter(vcfheader, VCF_OUT);
}
public static VCFHeader getHeader(GATKArgumentCollection args, Set<String> sampleNames) {
Map<String, String> metaData = new HashMap<String, String>(); Map<String, String> metaData = new HashMap<String, String>();
List<String> additionalColumns = new ArrayList<String>(); List<String> additionalColumns = new ArrayList<String>();
sampleNames = new HashSet<String>();
Calendar cal = Calendar.getInstance(); Calendar cal = Calendar.getInstance();
metaData.put("format", "VCRv3.2"); metaData.put("format", "VCRv3.2");
metaData.put("fileDate", String.format("%d%02d%02d", cal.get(Calendar.YEAR), cal.get(Calendar.MONTH), cal.get(Calendar.DAY_OF_MONTH))); metaData.put("fileDate", String.format("%d%02d%02d", cal.get(Calendar.YEAR), cal.get(Calendar.MONTH), cal.get(Calendar.DAY_OF_MONTH)));
metaData.put("source", "VariantsToVCF"); metaData.put("source", "VariantsToVCF");
metaData.put("reference", this.getToolkit().getArguments().referenceFile.getAbsolutePath()); metaData.put("reference", args.referenceFile.getAbsolutePath());
additionalColumns.add("FORMAT"); additionalColumns.add("FORMAT");
additionalColumns.addAll(sampleNames);
for (String rodName : this.getToolkit().getArguments().RODBindings) { return new VCFHeader(metaData, additionalColumns);
//out.println(rodName);
String[] rodPieces = rodName.split(",");
String sampleName = rodPieces[0];
if (sampleName.startsWith("NA")) {
additionalColumns.add(sampleName);
sampleNames.add(sampleName.toUpperCase());
}
}
vcfheader = new VCFHeader(metaData, additionalColumns);
vcfwriter = new VCFWriter(vcfheader, VCF_OUT);
} }
public boolean filter(RefMetaDataTracker tracker, char ref, AlignmentContext context) { public boolean filter(RefMetaDataTracker tracker, char ref, AlignmentContext context) {
if (BaseUtils.simpleBaseToBaseIndex(ref) > -1) { return true; } if (BaseUtils.simpleBaseToBaseIndex(ref) > -1) { return true; }
for (ReferenceOrderedDatum rod : tracker.getAllRods()) { for (ReferenceOrderedDatum rod : tracker.getAllRods()) {
if (rod != null && sampleNames.contains(rod.getName().toUpperCase())) { if (rod != null && sampleNames.keySet().contains(rod.getName().toUpperCase())) {
return true; return true;
} }
} }
@ -67,17 +72,28 @@ public class VariantsToVCF extends RefWalker<Integer, Integer> {
} }
public Integer map(RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context) { public Integer map(RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context) {
List<VCFGenotypeRecord> gt = new ArrayList<VCFGenotypeRecord>();
Map<VCFHeader.HEADER_FIELDS,String> map = new HashMap<VCFHeader.HEADER_FIELDS,String>();
if ( generateVCFRecord(tracker, ref, context, vcfheader, gt, map, sampleNames, out, SUPPRESS_MULTISTATE, VERBOSE) ) {
vcfwriter.addRecord(new VCFRecord(vcfheader, map, "GT:GQ:DP", gt));
//vcfwriter.addRecord(new VCFRecord(vcfheader, map, "GT", gt));
return 1;
}
return 0;
}
public static boolean generateVCFRecord(RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context,
VCFHeader vcfheader, List<VCFGenotypeRecord> gt, Map<VCFHeader.HEADER_FIELDS,String> map,
Map<String, String> sampleNamesToRods, PrintStream out, boolean SUPPRESS_MULTISTATE, boolean VERBOSE) {
int[] alleleCounts = new int[4];
int numSNPs = 0; int numSNPs = 0;
int numRefs = 0; int numRefs = 0;
int[] alleleNames = { 0, 1, 2, 3 }; int[] alleleNames = { 0, 1, 2, 3 };
int[] alleleCounts = new int[4];
double snpQual = 0.0; double snpQual = 0.0;
List<VCFGenotypeRecord> gt = new ArrayList<VCFGenotypeRecord>();
int refbase = BaseUtils.simpleBaseToBaseIndex(ref.getBase()); int refbase = BaseUtils.simpleBaseToBaseIndex(ref.getBase());
for (String name : vcfheader.getGenotypeSamples()) { for (String name : vcfheader.getGenotypeSamples()) {
ReferenceOrderedDatum rod = tracker.lookup(name, null); ReferenceOrderedDatum rod = tracker.lookup(sampleNamesToRods.get(name), null);
if (rod != null) { if (rod != null) {
AllelicVariant av = (AllelicVariant) rod; AllelicVariant av = (AllelicVariant) rod;
String lod = String.format("%d", av.getVariationConfidence() > 99 ? 99 : (int) av.getVariationConfidence()); String lod = String.format("%d", av.getVariationConfidence() > 99 ? 99 : (int) av.getVariationConfidence());
@ -122,77 +138,72 @@ public class VariantsToVCF extends RefWalker<Integer, Integer> {
} }
} }
if (numSNPs > 0) { if (numSNPs == 0)
Integer[] perm = Utils.SortPermutation(alleleCounts); return false;
int[] sortedCounts = Utils.PermuteArray(alleleCounts, perm);
int[] sortedNames = Utils.PermuteArray(alleleNames, perm);
rodDbSNP dbsnp = (rodDbSNP) tracker.lookup("dbsnp", null);
String infoString = String.format("locus=%s ref=%c allele_count=( %c:%d %c:%d %c:%d %c:%d )", Integer[] perm = Utils.SortPermutation(alleleCounts);
context.getLocation(), int[] sortedCounts = Utils.PermuteArray(alleleCounts, perm);
ref.getBase(), int[] sortedNames = Utils.PermuteArray(alleleNames, perm);
BaseUtils.baseIndexToSimpleBase(sortedNames[0]), sortedCounts[0],
BaseUtils.baseIndexToSimpleBase(sortedNames[1]), sortedCounts[1],
BaseUtils.baseIndexToSimpleBase(sortedNames[2]), sortedCounts[2],
BaseUtils.baseIndexToSimpleBase(sortedNames[3]), sortedCounts[3]
);
if (SUPPRESS_MULTISTATE && sortedCounts[2] > 0) { rodDbSNP dbsnp = (rodDbSNP) tracker.lookup("dbsnp", null);
out.println("[multistate] " + infoString);
return 0; String infoString = String.format("locus=%s ref=%c allele_count=( %c:%d %c:%d %c:%d %c:%d )",
} else { context.getLocation(),
if (VERBOSE) { ref.getBase(),
out.println("[locus_info] " + infoString); BaseUtils.baseIndexToSimpleBase(sortedNames[0]), sortedCounts[0],
} BaseUtils.baseIndexToSimpleBase(sortedNames[1]), sortedCounts[1],
BaseUtils.baseIndexToSimpleBase(sortedNames[2]), sortedCounts[2],
BaseUtils.baseIndexToSimpleBase(sortedNames[3]), sortedCounts[3]
);
if (SUPPRESS_MULTISTATE && sortedCounts[2] > 0) {
out.println("[multistate] " + infoString);
return false;
} else {
if (VERBOSE) {
out.println("[locus_info] " + infoString);
} }
Map<VCFHeader.HEADER_FIELDS,String> map = new HashMap<VCFHeader.HEADER_FIELDS,String>();
for (VCFHeader.HEADER_FIELDS field : VCFHeader.HEADER_FIELDS.values()) {
map.put(field,String.valueOf(1));
if (field == VCFHeader.HEADER_FIELDS.CHROM) {
map.put(field, context.getContig());
} else if (field == VCFHeader.HEADER_FIELDS.POS) {
map.put(field, String.valueOf(context.getPosition()));
} else if (field == VCFHeader.HEADER_FIELDS.REF) {
map.put(field, String.valueOf(ref.getBases()));
} else if (field == VCFHeader.HEADER_FIELDS.ALT) {
map.put(field, String.valueOf(BaseUtils.baseIndexToSimpleBase(sortedNames[3])));
} else if (field == VCFHeader.HEADER_FIELDS.ID) {
map.put(field, (dbsnp == null) ? "." : dbsnp.name);
} else if (field == VCFHeader.HEADER_FIELDS.QUAL) {
map.put(field, String.format("%d", snpQual > 99 ? 99 : (int) snpQual));
} else if (field == VCFHeader.HEADER_FIELDS.FILTER) {
map.put(field, String.valueOf("0"));
} else if (field == VCFHeader.HEADER_FIELDS.INFO) {
String infostr = ".";
ArrayList<String> info = new ArrayList<String>();
if (dbsnp != null) { info.add("DB=1"); }
if (dbsnp != null && dbsnp.isHapmap()) { info.add("H2=1"); }
for (int i = 0; i < info.size(); i++) {
if (i == 0) { infostr = ""; }
infostr += info.get(i);
if (i < info.size() - 1) {
infostr += ";";
}
}
map.put(field, infostr);
}
}
vcfwriter.addRecord(new VCFRecord(vcfheader, map, "GT:GQ:DP", gt));
//vcfwriter.addRecord(new VCFRecord(vcfheader, map, "GT", gt));
return 1;
} }
return 0; for (VCFHeader.HEADER_FIELDS field : VCFHeader.HEADER_FIELDS.values()) {
map.put(field,String.valueOf(1));
if (field == VCFHeader.HEADER_FIELDS.CHROM) {
map.put(field, context.getContig());
} else if (field == VCFHeader.HEADER_FIELDS.POS) {
map.put(field, String.valueOf(context.getPosition()));
} else if (field == VCFHeader.HEADER_FIELDS.REF) {
map.put(field, String.valueOf(ref.getBases()));
} else if (field == VCFHeader.HEADER_FIELDS.ALT) {
map.put(field, String.valueOf(BaseUtils.baseIndexToSimpleBase(sortedNames[3])));
} else if (field == VCFHeader.HEADER_FIELDS.ID) {
map.put(field, (dbsnp == null) ? "." : dbsnp.name);
} else if (field == VCFHeader.HEADER_FIELDS.QUAL) {
map.put(field, String.format("%d", snpQual > 99 ? 99 : (int) snpQual));
} else if (field == VCFHeader.HEADER_FIELDS.FILTER) {
map.put(field, String.valueOf("0"));
} else if (field == VCFHeader.HEADER_FIELDS.INFO) {
String infostr = ".";
ArrayList<String> info = new ArrayList<String>();
if (dbsnp != null) { info.add("DB=1"); }
if (dbsnp != null && dbsnp.isHapmap()) { info.add("H2=1"); }
for (int i = 0; i < info.size(); i++) {
if (i == 0) { infostr = ""; }
infostr += info.get(i);
if (i < info.size() - 1) {
infostr += ";";
}
}
map.put(field, infostr);
}
}
return true;
} }
public Integer reduceInit() { public Integer reduceInit() {