VariantFiltration now outputs VCF. Important changes:
1. VariantsToVCF can now be called statically to output VCF for a single ROD instance; this is temporary until we have a VCF ROD. 2. VariantFiltration now outputs only 2 files, both mandatory: all variants that pass filters in geli text, and all variants in VCF. If there are any problems, go find Aaron. git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@1569 348d0f76-0448-11de-a6fe-93d51630548a
This commit is contained in:
parent
dd0085c428
commit
cb31d5a0ab
|
|
@ -67,4 +67,9 @@ public class VECAlleleBalance extends RatioFilter {
|
|||
public String getStudyInfo() {
|
||||
return (exclude ? "fail" : "pass") + "\t" + ratio;
|
||||
}
|
||||
|
||||
public String getVCFFilterString() {
|
||||
return "AlleleBalance" + ratio;
|
||||
}
|
||||
|
||||
}
|
||||
|
|
|
|||
|
|
@ -56,5 +56,9 @@ public class VECClusteredSnps implements VariantExclusionCriterion {
|
|||
return (exclude ? "fail" : "pass") + "\t" + (exclude ? distance : "N/A");
|
||||
}
|
||||
|
||||
public String getVCFFilterString() {
|
||||
return "ClusteredSnp";
|
||||
}
|
||||
|
||||
public boolean useZeroQualityReads() { return false; }
|
||||
}
|
||||
|
|
@ -46,5 +46,9 @@ public class VECDepthOfCoverage implements VariantExclusionCriterion {
|
|||
return (exclude ? "fail" : "pass") + "\t" + depth;
|
||||
}
|
||||
|
||||
public String getVCFFilterString() {
|
||||
return "DoC" + depth;
|
||||
}
|
||||
|
||||
public boolean useZeroQualityReads() { return false; }
|
||||
}
|
||||
|
|
|
|||
|
|
@ -47,6 +47,10 @@ public class VECFisherStrand implements VariantExclusionCriterion {
|
|||
return (exclude ? "fail" : "pass") + "\t" + pValue;
|
||||
}
|
||||
|
||||
public String getVCFFilterString() {
|
||||
return "strand" + pValue;
|
||||
}
|
||||
|
||||
public boolean useZeroQualityReads() { return false; }
|
||||
|
||||
public boolean strandTest(char ref, AlignmentContext context, int allele1, int allele2, double threshold, StringBuffer out) {
|
||||
|
|
|
|||
|
|
@ -116,4 +116,8 @@ public class VECHomopolymer implements VariantExclusionCriterion {
|
|||
public double inclusionProbability() {
|
||||
return exclude ? 0.0 : 1.0;
|
||||
}
|
||||
|
||||
public String getVCFFilterString() {
|
||||
return "homopolymer";
|
||||
}
|
||||
}
|
||||
|
|
@ -52,5 +52,9 @@ public class VECIndelArtifact implements VariantExclusionCriterion {
|
|||
return (exclude ? "fail" : "pass") + "\t" + source;
|
||||
}
|
||||
|
||||
public String getVCFFilterString() {
|
||||
return "InDel";
|
||||
}
|
||||
|
||||
public boolean useZeroQualityReads() { return false; }
|
||||
}
|
||||
|
|
@ -35,4 +35,7 @@ public class VECLodThreshold implements VariantExclusionCriterion {
|
|||
return (exclude ? "fail" : "pass") + "\t" + lod;
|
||||
}
|
||||
|
||||
public String getVCFFilterString() {
|
||||
return "LOD" + lod;
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -33,6 +33,10 @@ public class VECLodThresholdByCoverage implements VariantExclusionCriterion {
|
|||
return String.format("%s\t%d\t%f", exclude ? "fail" : "pass", (int) depth, lod);
|
||||
}
|
||||
|
||||
public String getVCFFilterString() {
|
||||
return "LODbyDoC";
|
||||
}
|
||||
|
||||
public boolean useZeroQualityReads() {
|
||||
return false;
|
||||
}
|
||||
|
|
|
|||
|
|
@ -41,5 +41,9 @@ public class VECMappingQuality implements VariantExclusionCriterion {
|
|||
return (exclude ? "fail" : "pass") + "\t" + rms;
|
||||
}
|
||||
|
||||
public String getVCFFilterString() {
|
||||
return "MQ" + rms;
|
||||
}
|
||||
|
||||
public boolean useZeroQualityReads() { return true; }
|
||||
}
|
||||
|
|
@ -41,5 +41,9 @@ public class VECMappingQualityZero implements VariantExclusionCriterion {
|
|||
return (exclude ? "fail" : "pass") + "\t" + mq0Count;
|
||||
}
|
||||
|
||||
public String getVCFFilterString() {
|
||||
return "MQzero" + mq0Count;
|
||||
}
|
||||
|
||||
public boolean useZeroQualityReads() { return true; }
|
||||
}
|
||||
|
|
@ -20,6 +20,10 @@ public class VECNull implements VariantExclusionCriterion {
|
|||
return "";
|
||||
}
|
||||
|
||||
public String getVCFFilterString() {
|
||||
return "";
|
||||
}
|
||||
|
||||
public boolean useZeroQualityReads() {
|
||||
return false;
|
||||
}
|
||||
|
|
|
|||
|
|
@ -63,4 +63,8 @@ public class VECOnOffGenotypeRatio extends RatioFilter {
|
|||
public String getStudyInfo() {
|
||||
return (exclude ? "fail" : "pass") + "\t" + ratio;
|
||||
}
|
||||
|
||||
public String getVCFFilterString() {
|
||||
return "onOffGenotype" + ratio;
|
||||
}
|
||||
}
|
||||
|
|
@ -10,7 +10,8 @@ public interface VariantExclusionCriterion {
|
|||
public double inclusionProbability();
|
||||
|
||||
public String getStudyHeader();
|
||||
|
||||
public String getStudyInfo();
|
||||
public String getVCFFilterString();
|
||||
|
||||
public boolean useZeroQualityReads();
|
||||
}
|
||||
|
|
|
|||
|
|
@ -5,11 +5,14 @@ import org.broadinstitute.sting.gatk.refdata.*;
|
|||
import org.broadinstitute.sting.gatk.walkers.*;
|
||||
import org.broadinstitute.sting.utils.*;
|
||||
import org.broadinstitute.sting.utils.genotype.geli.GeliTextWriter;
|
||||
import org.broadinstitute.sting.utils.genotype.vcf.*;
|
||||
import org.broadinstitute.sting.utils.cmdLine.Argument;
|
||||
import org.broadinstitute.sting.playground.gatk.walkers.variantstovcf.VariantsToVCF;
|
||||
|
||||
import java.io.FileNotFoundException;
|
||||
import java.io.PrintWriter;
|
||||
import java.util.*;
|
||||
import java.io.*;
|
||||
|
||||
|
||||
/**
|
||||
|
|
@ -19,7 +22,10 @@ import java.util.*;
|
|||
*/
|
||||
@Requires(value={DataSource.READS, DataSource.REFERENCE},referenceMetaData=@RMD(name="variant",type= RodGeliText.class))
|
||||
public class VariantFiltrationWalker extends LocusWalker<Integer, Integer> {
|
||||
@Argument(fullName="variants_out_head", shortName="VOH", doc="File to which modified variants should be written") public String VARIANTS_OUT_HEAD;
|
||||
@Argument(fullName="vcfOutput", shortName="vcf", doc="VCF file to which all variants should be written with annotations", required=true) public File VCF_OUT;
|
||||
@Argument(fullName="sampleName", shortName="sample", doc="Temporary hack to get VCF to work: the sample (NA-ID) corresponding to the variants", required=true) public String sampleName;
|
||||
@Argument(fullName="includedOutput", shortName="included", doc="File to which all variants passing filters should be written", required=true) public String INCLUDED_OUT;
|
||||
@Argument(fullName="annotatedOutput", shortName="annotated", doc="File to which all variants should be written with annotations - for debugging/parameterizing", required=false) public String ANNOTATED_OUT;
|
||||
@Argument(fullName="features", shortName="F", doc="Feature test (optionally with arguments) to apply to genotype posteriors. Syntax: 'testname[:arguments]'", required=false) public String[] FEATURES;
|
||||
@Argument(fullName="exclusion_criterion", shortName="X", doc="Exclusion test (optionally with arguments) to apply to variant call. Syntax: 'testname[:key1=arg1,key2=arg2,...]'", required=false) public String[] EXCLUSIONS;
|
||||
@Argument(fullName="inclusion_threshold", shortName="IT", doc="The product of the probability to include variants based on these filters must be greater than the value specified here in order to be included", required=false) public Double INCLUSION_THRESHOLD = 0.9;
|
||||
|
|
@ -29,9 +35,11 @@ public class VariantFiltrationWalker extends LocusWalker<Integer, Integer> {
|
|||
private List<Class<? extends IndependentVariantFeature>> featureClasses;
|
||||
private List<Class<? extends VariantExclusionCriterion>> exclusionClasses;
|
||||
|
||||
private PrintWriter variantsWriter;
|
||||
private PrintWriter paramsWriter;
|
||||
private HashMap<String, PrintWriter> exclusionWriters;
|
||||
private VCFWriter vcfWriter;
|
||||
private VCFHeader vcfHeader;
|
||||
private PrintWriter annotatedWriter;
|
||||
private PrintWriter includedWriter;
|
||||
private HashMap<String, String> sampleNames = new HashMap<String, String>();
|
||||
|
||||
private ArrayList<IndependentVariantFeature> requestedFeatures;
|
||||
private ArrayList<VariantExclusionCriterion> requestedExclusions;
|
||||
|
|
@ -57,12 +65,15 @@ public class VariantFiltrationWalker extends LocusWalker<Integer, Integer> {
|
|||
if (LIST) { listFiltersAndExit(); }
|
||||
|
||||
try {
|
||||
variantsWriter = new PrintWriter(VARIANTS_OUT_HEAD + ".included.geli.calls");
|
||||
variantsWriter.println(GeliTextWriter.headerLine);
|
||||
|
||||
paramsWriter = new PrintWriter(VARIANTS_OUT_HEAD + ".params.out");
|
||||
paramsWriter.print("Chr\tPosition\t");
|
||||
|
||||
sampleNames.put(sampleName.toUpperCase(), "variant");
|
||||
vcfHeader = VariantsToVCF.getHeader(this.getToolkit().getArguments(), sampleNames.keySet());
|
||||
vcfWriter = new VCFWriter(vcfHeader, VCF_OUT);
|
||||
includedWriter = new PrintWriter(INCLUDED_OUT);
|
||||
includedWriter.println(GeliTextWriter.headerLine);
|
||||
if ( ANNOTATED_OUT != null ) {
|
||||
annotatedWriter = new PrintWriter(ANNOTATED_OUT);
|
||||
annotatedWriter.print("Chr\tPosition\t");
|
||||
}
|
||||
requestedFeatures = new ArrayList<IndependentVariantFeature>();
|
||||
requestedExclusions = new ArrayList<VariantExclusionCriterion>();
|
||||
|
||||
|
|
@ -85,7 +96,8 @@ public class VariantFiltrationWalker extends LocusWalker<Integer, Integer> {
|
|||
ivf.initialize(requestedFeatureArgs);
|
||||
requestedFeatures.add(ivf);
|
||||
|
||||
paramsWriter.print(ivf.getStudyHeader() + "\t");
|
||||
if ( annotatedWriter != null )
|
||||
annotatedWriter.print(ivf.getStudyHeader() + "\t");
|
||||
} catch (InstantiationException e) {
|
||||
throw new StingException(String.format("Cannot instantiate feature class '%s': must be concrete class", featureClass.getSimpleName()));
|
||||
} catch (IllegalAccessException e) {
|
||||
|
|
@ -100,9 +112,6 @@ public class VariantFiltrationWalker extends LocusWalker<Integer, Integer> {
|
|||
}
|
||||
}
|
||||
|
||||
// Initialize requested exclusion criteria
|
||||
exclusionWriters = new HashMap<String, PrintWriter>();
|
||||
|
||||
if (EXCLUSIONS != null) {
|
||||
for (String requestedExclusionString : EXCLUSIONS) {
|
||||
String[] requestedExclusionPieces = requestedExclusionString.split(":");
|
||||
|
|
@ -129,12 +138,8 @@ public class VariantFiltrationWalker extends LocusWalker<Integer, Integer> {
|
|||
vec.initialize(requestedArgs);
|
||||
requestedExclusions.add(vec);
|
||||
|
||||
paramsWriter.print(vec.getStudyHeader() + "\t");
|
||||
|
||||
PrintWriter writer = new PrintWriter(VARIANTS_OUT_HEAD + ".excluded." + exclusionClassName + ".geli.calls");
|
||||
writer.println(GeliTextWriter.headerLine);
|
||||
|
||||
exclusionWriters.put(exclusionClassName, writer);
|
||||
if ( annotatedWriter != null )
|
||||
annotatedWriter.print(vec.getStudyHeader() + "\t");
|
||||
} catch (InstantiationException e) {
|
||||
throw new StingException(String.format("Cannot instantiate exclusion class '%s': must be concrete class", exclusionClass.getSimpleName()));
|
||||
} catch (IllegalAccessException e) {
|
||||
|
|
@ -149,7 +154,8 @@ public class VariantFiltrationWalker extends LocusWalker<Integer, Integer> {
|
|||
}
|
||||
}
|
||||
|
||||
paramsWriter.print("inDbSNP\tinHapMap\tisHet\n");
|
||||
if ( annotatedWriter != null )
|
||||
annotatedWriter.print("inDbSNP\tinHapMap\tisHet\n");
|
||||
} catch (FileNotFoundException e) {
|
||||
throw new StingException(String.format("Could not open file(s) for writing"));
|
||||
}
|
||||
|
|
@ -235,7 +241,8 @@ public class VariantFiltrationWalker extends LocusWalker<Integer, Integer> {
|
|||
if (VERBOSE) { out.println("Original:\n" + variant); }
|
||||
|
||||
GenomeLoc loc = context.getAlignmentContext(true).getLocation();
|
||||
paramsWriter.print(loc.getContig() + "\t" + loc.getStart() + "\t");
|
||||
if ( annotatedWriter != null )
|
||||
annotatedWriter.print(loc.getContig() + "\t" + loc.getStart() + "\t");
|
||||
|
||||
// Apply features that modify the likelihoods and LOD scores
|
||||
for ( IndependentVariantFeature ivf : requestedFeatures ) {
|
||||
|
|
@ -247,7 +254,8 @@ public class VariantFiltrationWalker extends LocusWalker<Integer, Integer> {
|
|||
|
||||
if (VERBOSE) { out.println(rationalizeClassName(ivf.getClass()) + ":\n " + variant); }
|
||||
|
||||
paramsWriter.print(ivf.getStudyInfo() + "\t");
|
||||
if ( annotatedWriter != null )
|
||||
annotatedWriter.print(ivf.getStudyInfo() + "\t");
|
||||
}
|
||||
|
||||
// Apply exclusion tests that score the variant call
|
||||
|
|
@ -256,6 +264,7 @@ public class VariantFiltrationWalker extends LocusWalker<Integer, Integer> {
|
|||
}
|
||||
|
||||
// Use the filters to score the variant
|
||||
String filterFailureString = "";
|
||||
double jointInclusionProbability = 1.0;
|
||||
for ( VariantExclusionCriterion vec : requestedExclusions ) {
|
||||
vec.compute(variantContextWindow);
|
||||
|
|
@ -267,23 +276,20 @@ public class VariantFiltrationWalker extends LocusWalker<Integer, Integer> {
|
|||
exclusionResults.put(exclusionClassName, inclusionProbability);
|
||||
|
||||
if (inclusionProbability < INCLUSION_THRESHOLD) {
|
||||
PrintWriter ewriter = exclusionWriters.get(exclusionClassName);
|
||||
if (ewriter != null) {
|
||||
ewriter.println(variant);
|
||||
ewriter.flush();
|
||||
}
|
||||
filterFailureString += vec.getVCFFilterString() + ";";
|
||||
}
|
||||
|
||||
if (VERBOSE) {
|
||||
out.print(exclusionClassName + "=" + inclusionProbability + ";");
|
||||
}
|
||||
|
||||
paramsWriter.print(vec.getStudyInfo() + "\t");
|
||||
if ( annotatedWriter != null )
|
||||
annotatedWriter.print(vec.getStudyInfo() + "\t");
|
||||
}
|
||||
|
||||
// Decide whether we should keep the call or not
|
||||
if (jointInclusionProbability >= INCLUSION_THRESHOLD) {
|
||||
variantsWriter.println(variant);
|
||||
includedWriter.println(variant);
|
||||
|
||||
if (VERBOSE) { out.println("] JointInclusionProbability:" + jointInclusionProbability + " State:included\n"); }
|
||||
} else {
|
||||
|
|
@ -291,13 +297,21 @@ public class VariantFiltrationWalker extends LocusWalker<Integer, Integer> {
|
|||
}
|
||||
|
||||
rodDbSNP dbsnp = (rodDbSNP) context.getTracker().lookup("dbSNP", null);
|
||||
if ( dbsnp == null ) {
|
||||
paramsWriter.print("false\tfalse\t");
|
||||
} else {
|
||||
paramsWriter.print(dbsnp.isSNP() + "\t" + dbsnp.isHapmap() + "\t");
|
||||
if ( annotatedWriter != null ) {
|
||||
if ( dbsnp == null )
|
||||
annotatedWriter.print("false\tfalse\t");
|
||||
else
|
||||
annotatedWriter.print(dbsnp.isSNP() + "\t" + dbsnp.isHapmap() + "\t");
|
||||
annotatedWriter.println(GenotypeUtils.isHet(variant));
|
||||
}
|
||||
|
||||
paramsWriter.println(GenotypeUtils.isHet(variant));
|
||||
List<VCFGenotypeRecord> gt = new ArrayList<VCFGenotypeRecord>();
|
||||
Map<VCFHeader.HEADER_FIELDS,String> map = new HashMap<VCFHeader.HEADER_FIELDS,String>();
|
||||
if ( VariantsToVCF.generateVCFRecord(context.getTracker(), context.getReferenceContext(), context.getAlignmentContext(true), vcfHeader, gt, map, sampleNames, out, false, false) ) {
|
||||
if ( !filterFailureString.equals("") )
|
||||
map.put(VCFHeader.HEADER_FIELDS.FILTER, filterFailureString);
|
||||
vcfWriter.addRecord(new VCFRecord(vcfHeader, map, "GT:GQ:DP", gt));
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
|
|
@ -330,11 +344,9 @@ public class VariantFiltrationWalker extends LocusWalker<Integer, Integer> {
|
|||
|
||||
out.printf("Processed %d loci.\n", result);
|
||||
|
||||
variantsWriter.close();
|
||||
paramsWriter.close();
|
||||
|
||||
for (PrintWriter ewriter : exclusionWriters.values()) {
|
||||
ewriter.close();
|
||||
}
|
||||
vcfWriter.close();
|
||||
includedWriter.close();
|
||||
if ( annotatedWriter != null )
|
||||
annotatedWriter.close();
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -1,5 +1,6 @@
|
|||
package org.broadinstitute.sting.playground.gatk.walkers.variantstovcf;
|
||||
|
||||
import org.broadinstitute.sting.gatk.GATKArgumentCollection;
|
||||
import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
|
||||
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
|
||||
import org.broadinstitute.sting.gatk.refdata.*;
|
||||
|
|
@ -12,7 +13,7 @@ import org.broadinstitute.sting.utils.genotype.vcf.VCFHeader;
|
|||
import org.broadinstitute.sting.utils.genotype.vcf.VCFRecord;
|
||||
import org.broadinstitute.sting.utils.genotype.vcf.VCFWriter;
|
||||
|
||||
import java.io.File;
|
||||
import java.io.*;
|
||||
import java.util.*;
|
||||
|
||||
public class VariantsToVCF extends RefWalker<Integer, Integer> {
|
||||
|
|
@ -22,43 +23,47 @@ public class VariantsToVCF extends RefWalker<Integer, Integer> {
|
|||
|
||||
private VCFWriter vcfwriter = null;
|
||||
private VCFHeader vcfheader = null;
|
||||
private HashSet<String> sampleNames = null;
|
||||
private TreeMap<String, String> sampleNames = null;
|
||||
|
||||
public void initialize() {
|
||||
sampleNames = new TreeMap<String, String>();
|
||||
GATKArgumentCollection args = this.getToolkit().getArguments();
|
||||
|
||||
for (String rodName : args.RODBindings) {
|
||||
//out.println(rodName);
|
||||
String[] rodPieces = rodName.split(",");
|
||||
String sampleName = rodPieces[0];
|
||||
|
||||
if (sampleName.startsWith("NA"))
|
||||
sampleNames.put(sampleName.toUpperCase(), sampleName.toUpperCase());
|
||||
}
|
||||
|
||||
vcfheader = getHeader(args, sampleNames.keySet());
|
||||
vcfwriter = new VCFWriter(vcfheader, VCF_OUT);
|
||||
}
|
||||
|
||||
public static VCFHeader getHeader(GATKArgumentCollection args, Set<String> sampleNames) {
|
||||
Map<String, String> metaData = new HashMap<String, String>();
|
||||
List<String> additionalColumns = new ArrayList<String>();
|
||||
sampleNames = new HashSet<String>();
|
||||
|
||||
Calendar cal = Calendar.getInstance();
|
||||
|
||||
metaData.put("format", "VCRv3.2");
|
||||
metaData.put("fileDate", String.format("%d%02d%02d", cal.get(Calendar.YEAR), cal.get(Calendar.MONTH), cal.get(Calendar.DAY_OF_MONTH)));
|
||||
metaData.put("source", "VariantsToVCF");
|
||||
metaData.put("reference", this.getToolkit().getArguments().referenceFile.getAbsolutePath());
|
||||
|
||||
metaData.put("reference", args.referenceFile.getAbsolutePath());
|
||||
|
||||
additionalColumns.add("FORMAT");
|
||||
additionalColumns.addAll(sampleNames);
|
||||
|
||||
for (String rodName : this.getToolkit().getArguments().RODBindings) {
|
||||
//out.println(rodName);
|
||||
|
||||
String[] rodPieces = rodName.split(",");
|
||||
String sampleName = rodPieces[0];
|
||||
|
||||
if (sampleName.startsWith("NA")) {
|
||||
additionalColumns.add(sampleName);
|
||||
sampleNames.add(sampleName.toUpperCase());
|
||||
}
|
||||
}
|
||||
|
||||
vcfheader = new VCFHeader(metaData, additionalColumns);
|
||||
vcfwriter = new VCFWriter(vcfheader, VCF_OUT);
|
||||
return new VCFHeader(metaData, additionalColumns);
|
||||
}
|
||||
|
||||
public boolean filter(RefMetaDataTracker tracker, char ref, AlignmentContext context) {
|
||||
if (BaseUtils.simpleBaseToBaseIndex(ref) > -1) { return true; }
|
||||
|
||||
for (ReferenceOrderedDatum rod : tracker.getAllRods()) {
|
||||
if (rod != null && sampleNames.contains(rod.getName().toUpperCase())) {
|
||||
if (rod != null && sampleNames.keySet().contains(rod.getName().toUpperCase())) {
|
||||
return true;
|
||||
}
|
||||
}
|
||||
|
|
@ -67,17 +72,28 @@ public class VariantsToVCF extends RefWalker<Integer, Integer> {
|
|||
}
|
||||
|
||||
public Integer map(RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context) {
|
||||
List<VCFGenotypeRecord> gt = new ArrayList<VCFGenotypeRecord>();
|
||||
Map<VCFHeader.HEADER_FIELDS,String> map = new HashMap<VCFHeader.HEADER_FIELDS,String>();
|
||||
if ( generateVCFRecord(tracker, ref, context, vcfheader, gt, map, sampleNames, out, SUPPRESS_MULTISTATE, VERBOSE) ) {
|
||||
vcfwriter.addRecord(new VCFRecord(vcfheader, map, "GT:GQ:DP", gt));
|
||||
//vcfwriter.addRecord(new VCFRecord(vcfheader, map, "GT", gt));
|
||||
return 1;
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
public static boolean generateVCFRecord(RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context,
|
||||
VCFHeader vcfheader, List<VCFGenotypeRecord> gt, Map<VCFHeader.HEADER_FIELDS,String> map,
|
||||
Map<String, String> sampleNamesToRods, PrintStream out, boolean SUPPRESS_MULTISTATE, boolean VERBOSE) {
|
||||
int[] alleleCounts = new int[4];
|
||||
int numSNPs = 0;
|
||||
int numRefs = 0;
|
||||
int[] alleleNames = { 0, 1, 2, 3 };
|
||||
int[] alleleCounts = new int[4];
|
||||
double snpQual = 0.0;
|
||||
List<VCFGenotypeRecord> gt = new ArrayList<VCFGenotypeRecord>();
|
||||
|
||||
int refbase = BaseUtils.simpleBaseToBaseIndex(ref.getBase());
|
||||
|
||||
for (String name : vcfheader.getGenotypeSamples()) {
|
||||
ReferenceOrderedDatum rod = tracker.lookup(name, null);
|
||||
ReferenceOrderedDatum rod = tracker.lookup(sampleNamesToRods.get(name), null);
|
||||
if (rod != null) {
|
||||
AllelicVariant av = (AllelicVariant) rod;
|
||||
String lod = String.format("%d", av.getVariationConfidence() > 99 ? 99 : (int) av.getVariationConfidence());
|
||||
|
|
@ -122,77 +138,72 @@ public class VariantsToVCF extends RefWalker<Integer, Integer> {
|
|||
}
|
||||
}
|
||||
|
||||
if (numSNPs > 0) {
|
||||
Integer[] perm = Utils.SortPermutation(alleleCounts);
|
||||
int[] sortedCounts = Utils.PermuteArray(alleleCounts, perm);
|
||||
int[] sortedNames = Utils.PermuteArray(alleleNames, perm);
|
||||
if (numSNPs == 0)
|
||||
return false;
|
||||
|
||||
rodDbSNP dbsnp = (rodDbSNP) tracker.lookup("dbsnp", null);
|
||||
|
||||
String infoString = String.format("locus=%s ref=%c allele_count=( %c:%d %c:%d %c:%d %c:%d )",
|
||||
context.getLocation(),
|
||||
ref.getBase(),
|
||||
BaseUtils.baseIndexToSimpleBase(sortedNames[0]), sortedCounts[0],
|
||||
BaseUtils.baseIndexToSimpleBase(sortedNames[1]), sortedCounts[1],
|
||||
BaseUtils.baseIndexToSimpleBase(sortedNames[2]), sortedCounts[2],
|
||||
BaseUtils.baseIndexToSimpleBase(sortedNames[3]), sortedCounts[3]
|
||||
);
|
||||
Integer[] perm = Utils.SortPermutation(alleleCounts);
|
||||
int[] sortedCounts = Utils.PermuteArray(alleleCounts, perm);
|
||||
int[] sortedNames = Utils.PermuteArray(alleleNames, perm);
|
||||
|
||||
if (SUPPRESS_MULTISTATE && sortedCounts[2] > 0) {
|
||||
out.println("[multistate] " + infoString);
|
||||
return 0;
|
||||
} else {
|
||||
if (VERBOSE) {
|
||||
out.println("[locus_info] " + infoString);
|
||||
}
|
||||
rodDbSNP dbsnp = (rodDbSNP) tracker.lookup("dbsnp", null);
|
||||
|
||||
String infoString = String.format("locus=%s ref=%c allele_count=( %c:%d %c:%d %c:%d %c:%d )",
|
||||
context.getLocation(),
|
||||
ref.getBase(),
|
||||
BaseUtils.baseIndexToSimpleBase(sortedNames[0]), sortedCounts[0],
|
||||
BaseUtils.baseIndexToSimpleBase(sortedNames[1]), sortedCounts[1],
|
||||
BaseUtils.baseIndexToSimpleBase(sortedNames[2]), sortedCounts[2],
|
||||
BaseUtils.baseIndexToSimpleBase(sortedNames[3]), sortedCounts[3]
|
||||
);
|
||||
|
||||
if (SUPPRESS_MULTISTATE && sortedCounts[2] > 0) {
|
||||
out.println("[multistate] " + infoString);
|
||||
return false;
|
||||
} else {
|
||||
if (VERBOSE) {
|
||||
out.println("[locus_info] " + infoString);
|
||||
}
|
||||
|
||||
Map<VCFHeader.HEADER_FIELDS,String> map = new HashMap<VCFHeader.HEADER_FIELDS,String>();
|
||||
for (VCFHeader.HEADER_FIELDS field : VCFHeader.HEADER_FIELDS.values()) {
|
||||
map.put(field,String.valueOf(1));
|
||||
|
||||
if (field == VCFHeader.HEADER_FIELDS.CHROM) {
|
||||
map.put(field, context.getContig());
|
||||
} else if (field == VCFHeader.HEADER_FIELDS.POS) {
|
||||
map.put(field, String.valueOf(context.getPosition()));
|
||||
} else if (field == VCFHeader.HEADER_FIELDS.REF) {
|
||||
map.put(field, String.valueOf(ref.getBases()));
|
||||
} else if (field == VCFHeader.HEADER_FIELDS.ALT) {
|
||||
map.put(field, String.valueOf(BaseUtils.baseIndexToSimpleBase(sortedNames[3])));
|
||||
} else if (field == VCFHeader.HEADER_FIELDS.ID) {
|
||||
map.put(field, (dbsnp == null) ? "." : dbsnp.name);
|
||||
} else if (field == VCFHeader.HEADER_FIELDS.QUAL) {
|
||||
map.put(field, String.format("%d", snpQual > 99 ? 99 : (int) snpQual));
|
||||
} else if (field == VCFHeader.HEADER_FIELDS.FILTER) {
|
||||
map.put(field, String.valueOf("0"));
|
||||
} else if (field == VCFHeader.HEADER_FIELDS.INFO) {
|
||||
String infostr = ".";
|
||||
ArrayList<String> info = new ArrayList<String>();
|
||||
|
||||
if (dbsnp != null) { info.add("DB=1"); }
|
||||
if (dbsnp != null && dbsnp.isHapmap()) { info.add("H2=1"); }
|
||||
|
||||
for (int i = 0; i < info.size(); i++) {
|
||||
if (i == 0) { infostr = ""; }
|
||||
|
||||
infostr += info.get(i);
|
||||
|
||||
if (i < info.size() - 1) {
|
||||
infostr += ";";
|
||||
}
|
||||
}
|
||||
|
||||
map.put(field, infostr);
|
||||
}
|
||||
}
|
||||
|
||||
vcfwriter.addRecord(new VCFRecord(vcfheader, map, "GT:GQ:DP", gt));
|
||||
//vcfwriter.addRecord(new VCFRecord(vcfheader, map, "GT", gt));
|
||||
|
||||
return 1;
|
||||
}
|
||||
|
||||
return 0;
|
||||
for (VCFHeader.HEADER_FIELDS field : VCFHeader.HEADER_FIELDS.values()) {
|
||||
map.put(field,String.valueOf(1));
|
||||
|
||||
if (field == VCFHeader.HEADER_FIELDS.CHROM) {
|
||||
map.put(field, context.getContig());
|
||||
} else if (field == VCFHeader.HEADER_FIELDS.POS) {
|
||||
map.put(field, String.valueOf(context.getPosition()));
|
||||
} else if (field == VCFHeader.HEADER_FIELDS.REF) {
|
||||
map.put(field, String.valueOf(ref.getBases()));
|
||||
} else if (field == VCFHeader.HEADER_FIELDS.ALT) {
|
||||
map.put(field, String.valueOf(BaseUtils.baseIndexToSimpleBase(sortedNames[3])));
|
||||
} else if (field == VCFHeader.HEADER_FIELDS.ID) {
|
||||
map.put(field, (dbsnp == null) ? "." : dbsnp.name);
|
||||
} else if (field == VCFHeader.HEADER_FIELDS.QUAL) {
|
||||
map.put(field, String.format("%d", snpQual > 99 ? 99 : (int) snpQual));
|
||||
} else if (field == VCFHeader.HEADER_FIELDS.FILTER) {
|
||||
map.put(field, String.valueOf("0"));
|
||||
} else if (field == VCFHeader.HEADER_FIELDS.INFO) {
|
||||
String infostr = ".";
|
||||
ArrayList<String> info = new ArrayList<String>();
|
||||
|
||||
if (dbsnp != null) { info.add("DB=1"); }
|
||||
if (dbsnp != null && dbsnp.isHapmap()) { info.add("H2=1"); }
|
||||
|
||||
for (int i = 0; i < info.size(); i++) {
|
||||
if (i == 0) { infostr = ""; }
|
||||
|
||||
infostr += info.get(i);
|
||||
|
||||
if (i < info.size() - 1) {
|
||||
infostr += ";";
|
||||
}
|
||||
}
|
||||
|
||||
map.put(field, infostr);
|
||||
}
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
public Integer reduceInit() {
|
||||
|
|
|
|||
Loading…
Reference in New Issue