VariantFiltration now outputs VCF. Important changes:
1. VariantsToVCF can now be called statically to output VCF for a single ROD instance; this is temporary until we have a VCF ROD. 2. VariantFiltration now outputs only 2 files, both mandatory: all variants that pass filters in geli text, and all variants in VCF. If there are any problems, go find Aaron. git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@1569 348d0f76-0448-11de-a6fe-93d51630548a
This commit is contained in:
parent
dd0085c428
commit
cb31d5a0ab
|
|
@ -67,4 +67,9 @@ public class VECAlleleBalance extends RatioFilter {
|
||||||
public String getStudyInfo() {
|
public String getStudyInfo() {
|
||||||
return (exclude ? "fail" : "pass") + "\t" + ratio;
|
return (exclude ? "fail" : "pass") + "\t" + ratio;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
public String getVCFFilterString() {
|
||||||
|
return "AlleleBalance" + ratio;
|
||||||
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -56,5 +56,9 @@ public class VECClusteredSnps implements VariantExclusionCriterion {
|
||||||
return (exclude ? "fail" : "pass") + "\t" + (exclude ? distance : "N/A");
|
return (exclude ? "fail" : "pass") + "\t" + (exclude ? distance : "N/A");
|
||||||
}
|
}
|
||||||
|
|
||||||
|
public String getVCFFilterString() {
|
||||||
|
return "ClusteredSnp";
|
||||||
|
}
|
||||||
|
|
||||||
public boolean useZeroQualityReads() { return false; }
|
public boolean useZeroQualityReads() { return false; }
|
||||||
}
|
}
|
||||||
|
|
@ -46,5 +46,9 @@ public class VECDepthOfCoverage implements VariantExclusionCriterion {
|
||||||
return (exclude ? "fail" : "pass") + "\t" + depth;
|
return (exclude ? "fail" : "pass") + "\t" + depth;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
public String getVCFFilterString() {
|
||||||
|
return "DoC" + depth;
|
||||||
|
}
|
||||||
|
|
||||||
public boolean useZeroQualityReads() { return false; }
|
public boolean useZeroQualityReads() { return false; }
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -47,6 +47,10 @@ public class VECFisherStrand implements VariantExclusionCriterion {
|
||||||
return (exclude ? "fail" : "pass") + "\t" + pValue;
|
return (exclude ? "fail" : "pass") + "\t" + pValue;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
public String getVCFFilterString() {
|
||||||
|
return "strand" + pValue;
|
||||||
|
}
|
||||||
|
|
||||||
public boolean useZeroQualityReads() { return false; }
|
public boolean useZeroQualityReads() { return false; }
|
||||||
|
|
||||||
public boolean strandTest(char ref, AlignmentContext context, int allele1, int allele2, double threshold, StringBuffer out) {
|
public boolean strandTest(char ref, AlignmentContext context, int allele1, int allele2, double threshold, StringBuffer out) {
|
||||||
|
|
|
||||||
|
|
@ -116,4 +116,8 @@ public class VECHomopolymer implements VariantExclusionCriterion {
|
||||||
public double inclusionProbability() {
|
public double inclusionProbability() {
|
||||||
return exclude ? 0.0 : 1.0;
|
return exclude ? 0.0 : 1.0;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
public String getVCFFilterString() {
|
||||||
|
return "homopolymer";
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
@ -52,5 +52,9 @@ public class VECIndelArtifact implements VariantExclusionCriterion {
|
||||||
return (exclude ? "fail" : "pass") + "\t" + source;
|
return (exclude ? "fail" : "pass") + "\t" + source;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
public String getVCFFilterString() {
|
||||||
|
return "InDel";
|
||||||
|
}
|
||||||
|
|
||||||
public boolean useZeroQualityReads() { return false; }
|
public boolean useZeroQualityReads() { return false; }
|
||||||
}
|
}
|
||||||
|
|
@ -35,4 +35,7 @@ public class VECLodThreshold implements VariantExclusionCriterion {
|
||||||
return (exclude ? "fail" : "pass") + "\t" + lod;
|
return (exclude ? "fail" : "pass") + "\t" + lod;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
public String getVCFFilterString() {
|
||||||
|
return "LOD" + lod;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -33,6 +33,10 @@ public class VECLodThresholdByCoverage implements VariantExclusionCriterion {
|
||||||
return String.format("%s\t%d\t%f", exclude ? "fail" : "pass", (int) depth, lod);
|
return String.format("%s\t%d\t%f", exclude ? "fail" : "pass", (int) depth, lod);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
public String getVCFFilterString() {
|
||||||
|
return "LODbyDoC";
|
||||||
|
}
|
||||||
|
|
||||||
public boolean useZeroQualityReads() {
|
public boolean useZeroQualityReads() {
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -41,5 +41,9 @@ public class VECMappingQuality implements VariantExclusionCriterion {
|
||||||
return (exclude ? "fail" : "pass") + "\t" + rms;
|
return (exclude ? "fail" : "pass") + "\t" + rms;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
public String getVCFFilterString() {
|
||||||
|
return "MQ" + rms;
|
||||||
|
}
|
||||||
|
|
||||||
public boolean useZeroQualityReads() { return true; }
|
public boolean useZeroQualityReads() { return true; }
|
||||||
}
|
}
|
||||||
|
|
@ -41,5 +41,9 @@ public class VECMappingQualityZero implements VariantExclusionCriterion {
|
||||||
return (exclude ? "fail" : "pass") + "\t" + mq0Count;
|
return (exclude ? "fail" : "pass") + "\t" + mq0Count;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
public String getVCFFilterString() {
|
||||||
|
return "MQzero" + mq0Count;
|
||||||
|
}
|
||||||
|
|
||||||
public boolean useZeroQualityReads() { return true; }
|
public boolean useZeroQualityReads() { return true; }
|
||||||
}
|
}
|
||||||
|
|
@ -20,6 +20,10 @@ public class VECNull implements VariantExclusionCriterion {
|
||||||
return "";
|
return "";
|
||||||
}
|
}
|
||||||
|
|
||||||
|
public String getVCFFilterString() {
|
||||||
|
return "";
|
||||||
|
}
|
||||||
|
|
||||||
public boolean useZeroQualityReads() {
|
public boolean useZeroQualityReads() {
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -63,4 +63,8 @@ public class VECOnOffGenotypeRatio extends RatioFilter {
|
||||||
public String getStudyInfo() {
|
public String getStudyInfo() {
|
||||||
return (exclude ? "fail" : "pass") + "\t" + ratio;
|
return (exclude ? "fail" : "pass") + "\t" + ratio;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
public String getVCFFilterString() {
|
||||||
|
return "onOffGenotype" + ratio;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
@ -10,7 +10,8 @@ public interface VariantExclusionCriterion {
|
||||||
public double inclusionProbability();
|
public double inclusionProbability();
|
||||||
|
|
||||||
public String getStudyHeader();
|
public String getStudyHeader();
|
||||||
|
|
||||||
public String getStudyInfo();
|
public String getStudyInfo();
|
||||||
|
public String getVCFFilterString();
|
||||||
|
|
||||||
public boolean useZeroQualityReads();
|
public boolean useZeroQualityReads();
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -5,11 +5,14 @@ import org.broadinstitute.sting.gatk.refdata.*;
|
||||||
import org.broadinstitute.sting.gatk.walkers.*;
|
import org.broadinstitute.sting.gatk.walkers.*;
|
||||||
import org.broadinstitute.sting.utils.*;
|
import org.broadinstitute.sting.utils.*;
|
||||||
import org.broadinstitute.sting.utils.genotype.geli.GeliTextWriter;
|
import org.broadinstitute.sting.utils.genotype.geli.GeliTextWriter;
|
||||||
|
import org.broadinstitute.sting.utils.genotype.vcf.*;
|
||||||
import org.broadinstitute.sting.utils.cmdLine.Argument;
|
import org.broadinstitute.sting.utils.cmdLine.Argument;
|
||||||
|
import org.broadinstitute.sting.playground.gatk.walkers.variantstovcf.VariantsToVCF;
|
||||||
|
|
||||||
import java.io.FileNotFoundException;
|
import java.io.FileNotFoundException;
|
||||||
import java.io.PrintWriter;
|
import java.io.PrintWriter;
|
||||||
import java.util.*;
|
import java.util.*;
|
||||||
|
import java.io.*;
|
||||||
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
|
@ -19,7 +22,10 @@ import java.util.*;
|
||||||
*/
|
*/
|
||||||
@Requires(value={DataSource.READS, DataSource.REFERENCE},referenceMetaData=@RMD(name="variant",type= RodGeliText.class))
|
@Requires(value={DataSource.READS, DataSource.REFERENCE},referenceMetaData=@RMD(name="variant",type= RodGeliText.class))
|
||||||
public class VariantFiltrationWalker extends LocusWalker<Integer, Integer> {
|
public class VariantFiltrationWalker extends LocusWalker<Integer, Integer> {
|
||||||
@Argument(fullName="variants_out_head", shortName="VOH", doc="File to which modified variants should be written") public String VARIANTS_OUT_HEAD;
|
@Argument(fullName="vcfOutput", shortName="vcf", doc="VCF file to which all variants should be written with annotations", required=true) public File VCF_OUT;
|
||||||
|
@Argument(fullName="sampleName", shortName="sample", doc="Temporary hack to get VCF to work: the sample (NA-ID) corresponding to the variants", required=true) public String sampleName;
|
||||||
|
@Argument(fullName="includedOutput", shortName="included", doc="File to which all variants passing filters should be written", required=true) public String INCLUDED_OUT;
|
||||||
|
@Argument(fullName="annotatedOutput", shortName="annotated", doc="File to which all variants should be written with annotations - for debugging/parameterizing", required=false) public String ANNOTATED_OUT;
|
||||||
@Argument(fullName="features", shortName="F", doc="Feature test (optionally with arguments) to apply to genotype posteriors. Syntax: 'testname[:arguments]'", required=false) public String[] FEATURES;
|
@Argument(fullName="features", shortName="F", doc="Feature test (optionally with arguments) to apply to genotype posteriors. Syntax: 'testname[:arguments]'", required=false) public String[] FEATURES;
|
||||||
@Argument(fullName="exclusion_criterion", shortName="X", doc="Exclusion test (optionally with arguments) to apply to variant call. Syntax: 'testname[:key1=arg1,key2=arg2,...]'", required=false) public String[] EXCLUSIONS;
|
@Argument(fullName="exclusion_criterion", shortName="X", doc="Exclusion test (optionally with arguments) to apply to variant call. Syntax: 'testname[:key1=arg1,key2=arg2,...]'", required=false) public String[] EXCLUSIONS;
|
||||||
@Argument(fullName="inclusion_threshold", shortName="IT", doc="The product of the probability to include variants based on these filters must be greater than the value specified here in order to be included", required=false) public Double INCLUSION_THRESHOLD = 0.9;
|
@Argument(fullName="inclusion_threshold", shortName="IT", doc="The product of the probability to include variants based on these filters must be greater than the value specified here in order to be included", required=false) public Double INCLUSION_THRESHOLD = 0.9;
|
||||||
|
|
@ -29,9 +35,11 @@ public class VariantFiltrationWalker extends LocusWalker<Integer, Integer> {
|
||||||
private List<Class<? extends IndependentVariantFeature>> featureClasses;
|
private List<Class<? extends IndependentVariantFeature>> featureClasses;
|
||||||
private List<Class<? extends VariantExclusionCriterion>> exclusionClasses;
|
private List<Class<? extends VariantExclusionCriterion>> exclusionClasses;
|
||||||
|
|
||||||
private PrintWriter variantsWriter;
|
private VCFWriter vcfWriter;
|
||||||
private PrintWriter paramsWriter;
|
private VCFHeader vcfHeader;
|
||||||
private HashMap<String, PrintWriter> exclusionWriters;
|
private PrintWriter annotatedWriter;
|
||||||
|
private PrintWriter includedWriter;
|
||||||
|
private HashMap<String, String> sampleNames = new HashMap<String, String>();
|
||||||
|
|
||||||
private ArrayList<IndependentVariantFeature> requestedFeatures;
|
private ArrayList<IndependentVariantFeature> requestedFeatures;
|
||||||
private ArrayList<VariantExclusionCriterion> requestedExclusions;
|
private ArrayList<VariantExclusionCriterion> requestedExclusions;
|
||||||
|
|
@ -57,12 +65,15 @@ public class VariantFiltrationWalker extends LocusWalker<Integer, Integer> {
|
||||||
if (LIST) { listFiltersAndExit(); }
|
if (LIST) { listFiltersAndExit(); }
|
||||||
|
|
||||||
try {
|
try {
|
||||||
variantsWriter = new PrintWriter(VARIANTS_OUT_HEAD + ".included.geli.calls");
|
sampleNames.put(sampleName.toUpperCase(), "variant");
|
||||||
variantsWriter.println(GeliTextWriter.headerLine);
|
vcfHeader = VariantsToVCF.getHeader(this.getToolkit().getArguments(), sampleNames.keySet());
|
||||||
|
vcfWriter = new VCFWriter(vcfHeader, VCF_OUT);
|
||||||
paramsWriter = new PrintWriter(VARIANTS_OUT_HEAD + ".params.out");
|
includedWriter = new PrintWriter(INCLUDED_OUT);
|
||||||
paramsWriter.print("Chr\tPosition\t");
|
includedWriter.println(GeliTextWriter.headerLine);
|
||||||
|
if ( ANNOTATED_OUT != null ) {
|
||||||
|
annotatedWriter = new PrintWriter(ANNOTATED_OUT);
|
||||||
|
annotatedWriter.print("Chr\tPosition\t");
|
||||||
|
}
|
||||||
requestedFeatures = new ArrayList<IndependentVariantFeature>();
|
requestedFeatures = new ArrayList<IndependentVariantFeature>();
|
||||||
requestedExclusions = new ArrayList<VariantExclusionCriterion>();
|
requestedExclusions = new ArrayList<VariantExclusionCriterion>();
|
||||||
|
|
||||||
|
|
@ -85,7 +96,8 @@ public class VariantFiltrationWalker extends LocusWalker<Integer, Integer> {
|
||||||
ivf.initialize(requestedFeatureArgs);
|
ivf.initialize(requestedFeatureArgs);
|
||||||
requestedFeatures.add(ivf);
|
requestedFeatures.add(ivf);
|
||||||
|
|
||||||
paramsWriter.print(ivf.getStudyHeader() + "\t");
|
if ( annotatedWriter != null )
|
||||||
|
annotatedWriter.print(ivf.getStudyHeader() + "\t");
|
||||||
} catch (InstantiationException e) {
|
} catch (InstantiationException e) {
|
||||||
throw new StingException(String.format("Cannot instantiate feature class '%s': must be concrete class", featureClass.getSimpleName()));
|
throw new StingException(String.format("Cannot instantiate feature class '%s': must be concrete class", featureClass.getSimpleName()));
|
||||||
} catch (IllegalAccessException e) {
|
} catch (IllegalAccessException e) {
|
||||||
|
|
@ -100,9 +112,6 @@ public class VariantFiltrationWalker extends LocusWalker<Integer, Integer> {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// Initialize requested exclusion criteria
|
|
||||||
exclusionWriters = new HashMap<String, PrintWriter>();
|
|
||||||
|
|
||||||
if (EXCLUSIONS != null) {
|
if (EXCLUSIONS != null) {
|
||||||
for (String requestedExclusionString : EXCLUSIONS) {
|
for (String requestedExclusionString : EXCLUSIONS) {
|
||||||
String[] requestedExclusionPieces = requestedExclusionString.split(":");
|
String[] requestedExclusionPieces = requestedExclusionString.split(":");
|
||||||
|
|
@ -129,12 +138,8 @@ public class VariantFiltrationWalker extends LocusWalker<Integer, Integer> {
|
||||||
vec.initialize(requestedArgs);
|
vec.initialize(requestedArgs);
|
||||||
requestedExclusions.add(vec);
|
requestedExclusions.add(vec);
|
||||||
|
|
||||||
paramsWriter.print(vec.getStudyHeader() + "\t");
|
if ( annotatedWriter != null )
|
||||||
|
annotatedWriter.print(vec.getStudyHeader() + "\t");
|
||||||
PrintWriter writer = new PrintWriter(VARIANTS_OUT_HEAD + ".excluded." + exclusionClassName + ".geli.calls");
|
|
||||||
writer.println(GeliTextWriter.headerLine);
|
|
||||||
|
|
||||||
exclusionWriters.put(exclusionClassName, writer);
|
|
||||||
} catch (InstantiationException e) {
|
} catch (InstantiationException e) {
|
||||||
throw new StingException(String.format("Cannot instantiate exclusion class '%s': must be concrete class", exclusionClass.getSimpleName()));
|
throw new StingException(String.format("Cannot instantiate exclusion class '%s': must be concrete class", exclusionClass.getSimpleName()));
|
||||||
} catch (IllegalAccessException e) {
|
} catch (IllegalAccessException e) {
|
||||||
|
|
@ -149,7 +154,8 @@ public class VariantFiltrationWalker extends LocusWalker<Integer, Integer> {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
paramsWriter.print("inDbSNP\tinHapMap\tisHet\n");
|
if ( annotatedWriter != null )
|
||||||
|
annotatedWriter.print("inDbSNP\tinHapMap\tisHet\n");
|
||||||
} catch (FileNotFoundException e) {
|
} catch (FileNotFoundException e) {
|
||||||
throw new StingException(String.format("Could not open file(s) for writing"));
|
throw new StingException(String.format("Could not open file(s) for writing"));
|
||||||
}
|
}
|
||||||
|
|
@ -235,7 +241,8 @@ public class VariantFiltrationWalker extends LocusWalker<Integer, Integer> {
|
||||||
if (VERBOSE) { out.println("Original:\n" + variant); }
|
if (VERBOSE) { out.println("Original:\n" + variant); }
|
||||||
|
|
||||||
GenomeLoc loc = context.getAlignmentContext(true).getLocation();
|
GenomeLoc loc = context.getAlignmentContext(true).getLocation();
|
||||||
paramsWriter.print(loc.getContig() + "\t" + loc.getStart() + "\t");
|
if ( annotatedWriter != null )
|
||||||
|
annotatedWriter.print(loc.getContig() + "\t" + loc.getStart() + "\t");
|
||||||
|
|
||||||
// Apply features that modify the likelihoods and LOD scores
|
// Apply features that modify the likelihoods and LOD scores
|
||||||
for ( IndependentVariantFeature ivf : requestedFeatures ) {
|
for ( IndependentVariantFeature ivf : requestedFeatures ) {
|
||||||
|
|
@ -247,7 +254,8 @@ public class VariantFiltrationWalker extends LocusWalker<Integer, Integer> {
|
||||||
|
|
||||||
if (VERBOSE) { out.println(rationalizeClassName(ivf.getClass()) + ":\n " + variant); }
|
if (VERBOSE) { out.println(rationalizeClassName(ivf.getClass()) + ":\n " + variant); }
|
||||||
|
|
||||||
paramsWriter.print(ivf.getStudyInfo() + "\t");
|
if ( annotatedWriter != null )
|
||||||
|
annotatedWriter.print(ivf.getStudyInfo() + "\t");
|
||||||
}
|
}
|
||||||
|
|
||||||
// Apply exclusion tests that score the variant call
|
// Apply exclusion tests that score the variant call
|
||||||
|
|
@ -256,6 +264,7 @@ public class VariantFiltrationWalker extends LocusWalker<Integer, Integer> {
|
||||||
}
|
}
|
||||||
|
|
||||||
// Use the filters to score the variant
|
// Use the filters to score the variant
|
||||||
|
String filterFailureString = "";
|
||||||
double jointInclusionProbability = 1.0;
|
double jointInclusionProbability = 1.0;
|
||||||
for ( VariantExclusionCriterion vec : requestedExclusions ) {
|
for ( VariantExclusionCriterion vec : requestedExclusions ) {
|
||||||
vec.compute(variantContextWindow);
|
vec.compute(variantContextWindow);
|
||||||
|
|
@ -267,23 +276,20 @@ public class VariantFiltrationWalker extends LocusWalker<Integer, Integer> {
|
||||||
exclusionResults.put(exclusionClassName, inclusionProbability);
|
exclusionResults.put(exclusionClassName, inclusionProbability);
|
||||||
|
|
||||||
if (inclusionProbability < INCLUSION_THRESHOLD) {
|
if (inclusionProbability < INCLUSION_THRESHOLD) {
|
||||||
PrintWriter ewriter = exclusionWriters.get(exclusionClassName);
|
filterFailureString += vec.getVCFFilterString() + ";";
|
||||||
if (ewriter != null) {
|
|
||||||
ewriter.println(variant);
|
|
||||||
ewriter.flush();
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
if (VERBOSE) {
|
if (VERBOSE) {
|
||||||
out.print(exclusionClassName + "=" + inclusionProbability + ";");
|
out.print(exclusionClassName + "=" + inclusionProbability + ";");
|
||||||
}
|
}
|
||||||
|
|
||||||
paramsWriter.print(vec.getStudyInfo() + "\t");
|
if ( annotatedWriter != null )
|
||||||
|
annotatedWriter.print(vec.getStudyInfo() + "\t");
|
||||||
}
|
}
|
||||||
|
|
||||||
// Decide whether we should keep the call or not
|
// Decide whether we should keep the call or not
|
||||||
if (jointInclusionProbability >= INCLUSION_THRESHOLD) {
|
if (jointInclusionProbability >= INCLUSION_THRESHOLD) {
|
||||||
variantsWriter.println(variant);
|
includedWriter.println(variant);
|
||||||
|
|
||||||
if (VERBOSE) { out.println("] JointInclusionProbability:" + jointInclusionProbability + " State:included\n"); }
|
if (VERBOSE) { out.println("] JointInclusionProbability:" + jointInclusionProbability + " State:included\n"); }
|
||||||
} else {
|
} else {
|
||||||
|
|
@ -291,13 +297,21 @@ public class VariantFiltrationWalker extends LocusWalker<Integer, Integer> {
|
||||||
}
|
}
|
||||||
|
|
||||||
rodDbSNP dbsnp = (rodDbSNP) context.getTracker().lookup("dbSNP", null);
|
rodDbSNP dbsnp = (rodDbSNP) context.getTracker().lookup("dbSNP", null);
|
||||||
if ( dbsnp == null ) {
|
if ( annotatedWriter != null ) {
|
||||||
paramsWriter.print("false\tfalse\t");
|
if ( dbsnp == null )
|
||||||
} else {
|
annotatedWriter.print("false\tfalse\t");
|
||||||
paramsWriter.print(dbsnp.isSNP() + "\t" + dbsnp.isHapmap() + "\t");
|
else
|
||||||
|
annotatedWriter.print(dbsnp.isSNP() + "\t" + dbsnp.isHapmap() + "\t");
|
||||||
|
annotatedWriter.println(GenotypeUtils.isHet(variant));
|
||||||
}
|
}
|
||||||
|
|
||||||
paramsWriter.println(GenotypeUtils.isHet(variant));
|
List<VCFGenotypeRecord> gt = new ArrayList<VCFGenotypeRecord>();
|
||||||
|
Map<VCFHeader.HEADER_FIELDS,String> map = new HashMap<VCFHeader.HEADER_FIELDS,String>();
|
||||||
|
if ( VariantsToVCF.generateVCFRecord(context.getTracker(), context.getReferenceContext(), context.getAlignmentContext(true), vcfHeader, gt, map, sampleNames, out, false, false) ) {
|
||||||
|
if ( !filterFailureString.equals("") )
|
||||||
|
map.put(VCFHeader.HEADER_FIELDS.FILTER, filterFailureString);
|
||||||
|
vcfWriter.addRecord(new VCFRecord(vcfHeader, map, "GT:GQ:DP", gt));
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
|
@ -330,11 +344,9 @@ public class VariantFiltrationWalker extends LocusWalker<Integer, Integer> {
|
||||||
|
|
||||||
out.printf("Processed %d loci.\n", result);
|
out.printf("Processed %d loci.\n", result);
|
||||||
|
|
||||||
variantsWriter.close();
|
vcfWriter.close();
|
||||||
paramsWriter.close();
|
includedWriter.close();
|
||||||
|
if ( annotatedWriter != null )
|
||||||
for (PrintWriter ewriter : exclusionWriters.values()) {
|
annotatedWriter.close();
|
||||||
ewriter.close();
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -1,5 +1,6 @@
|
||||||
package org.broadinstitute.sting.playground.gatk.walkers.variantstovcf;
|
package org.broadinstitute.sting.playground.gatk.walkers.variantstovcf;
|
||||||
|
|
||||||
|
import org.broadinstitute.sting.gatk.GATKArgumentCollection;
|
||||||
import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
|
import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
|
||||||
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
|
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
|
||||||
import org.broadinstitute.sting.gatk.refdata.*;
|
import org.broadinstitute.sting.gatk.refdata.*;
|
||||||
|
|
@ -12,7 +13,7 @@ import org.broadinstitute.sting.utils.genotype.vcf.VCFHeader;
|
||||||
import org.broadinstitute.sting.utils.genotype.vcf.VCFRecord;
|
import org.broadinstitute.sting.utils.genotype.vcf.VCFRecord;
|
||||||
import org.broadinstitute.sting.utils.genotype.vcf.VCFWriter;
|
import org.broadinstitute.sting.utils.genotype.vcf.VCFWriter;
|
||||||
|
|
||||||
import java.io.File;
|
import java.io.*;
|
||||||
import java.util.*;
|
import java.util.*;
|
||||||
|
|
||||||
public class VariantsToVCF extends RefWalker<Integer, Integer> {
|
public class VariantsToVCF extends RefWalker<Integer, Integer> {
|
||||||
|
|
@ -22,43 +23,47 @@ public class VariantsToVCF extends RefWalker<Integer, Integer> {
|
||||||
|
|
||||||
private VCFWriter vcfwriter = null;
|
private VCFWriter vcfwriter = null;
|
||||||
private VCFHeader vcfheader = null;
|
private VCFHeader vcfheader = null;
|
||||||
private HashSet<String> sampleNames = null;
|
private TreeMap<String, String> sampleNames = null;
|
||||||
|
|
||||||
public void initialize() {
|
public void initialize() {
|
||||||
|
sampleNames = new TreeMap<String, String>();
|
||||||
|
GATKArgumentCollection args = this.getToolkit().getArguments();
|
||||||
|
|
||||||
|
for (String rodName : args.RODBindings) {
|
||||||
|
//out.println(rodName);
|
||||||
|
String[] rodPieces = rodName.split(",");
|
||||||
|
String sampleName = rodPieces[0];
|
||||||
|
|
||||||
|
if (sampleName.startsWith("NA"))
|
||||||
|
sampleNames.put(sampleName.toUpperCase(), sampleName.toUpperCase());
|
||||||
|
}
|
||||||
|
|
||||||
|
vcfheader = getHeader(args, sampleNames.keySet());
|
||||||
|
vcfwriter = new VCFWriter(vcfheader, VCF_OUT);
|
||||||
|
}
|
||||||
|
|
||||||
|
public static VCFHeader getHeader(GATKArgumentCollection args, Set<String> sampleNames) {
|
||||||
Map<String, String> metaData = new HashMap<String, String>();
|
Map<String, String> metaData = new HashMap<String, String>();
|
||||||
List<String> additionalColumns = new ArrayList<String>();
|
List<String> additionalColumns = new ArrayList<String>();
|
||||||
sampleNames = new HashSet<String>();
|
|
||||||
|
|
||||||
Calendar cal = Calendar.getInstance();
|
Calendar cal = Calendar.getInstance();
|
||||||
|
|
||||||
metaData.put("format", "VCRv3.2");
|
metaData.put("format", "VCRv3.2");
|
||||||
metaData.put("fileDate", String.format("%d%02d%02d", cal.get(Calendar.YEAR), cal.get(Calendar.MONTH), cal.get(Calendar.DAY_OF_MONTH)));
|
metaData.put("fileDate", String.format("%d%02d%02d", cal.get(Calendar.YEAR), cal.get(Calendar.MONTH), cal.get(Calendar.DAY_OF_MONTH)));
|
||||||
metaData.put("source", "VariantsToVCF");
|
metaData.put("source", "VariantsToVCF");
|
||||||
metaData.put("reference", this.getToolkit().getArguments().referenceFile.getAbsolutePath());
|
metaData.put("reference", args.referenceFile.getAbsolutePath());
|
||||||
|
|
||||||
additionalColumns.add("FORMAT");
|
additionalColumns.add("FORMAT");
|
||||||
|
additionalColumns.addAll(sampleNames);
|
||||||
|
|
||||||
for (String rodName : this.getToolkit().getArguments().RODBindings) {
|
return new VCFHeader(metaData, additionalColumns);
|
||||||
//out.println(rodName);
|
|
||||||
|
|
||||||
String[] rodPieces = rodName.split(",");
|
|
||||||
String sampleName = rodPieces[0];
|
|
||||||
|
|
||||||
if (sampleName.startsWith("NA")) {
|
|
||||||
additionalColumns.add(sampleName);
|
|
||||||
sampleNames.add(sampleName.toUpperCase());
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
vcfheader = new VCFHeader(metaData, additionalColumns);
|
|
||||||
vcfwriter = new VCFWriter(vcfheader, VCF_OUT);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
public boolean filter(RefMetaDataTracker tracker, char ref, AlignmentContext context) {
|
public boolean filter(RefMetaDataTracker tracker, char ref, AlignmentContext context) {
|
||||||
if (BaseUtils.simpleBaseToBaseIndex(ref) > -1) { return true; }
|
if (BaseUtils.simpleBaseToBaseIndex(ref) > -1) { return true; }
|
||||||
|
|
||||||
for (ReferenceOrderedDatum rod : tracker.getAllRods()) {
|
for (ReferenceOrderedDatum rod : tracker.getAllRods()) {
|
||||||
if (rod != null && sampleNames.contains(rod.getName().toUpperCase())) {
|
if (rod != null && sampleNames.keySet().contains(rod.getName().toUpperCase())) {
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
@ -67,17 +72,28 @@ public class VariantsToVCF extends RefWalker<Integer, Integer> {
|
||||||
}
|
}
|
||||||
|
|
||||||
public Integer map(RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context) {
|
public Integer map(RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context) {
|
||||||
|
List<VCFGenotypeRecord> gt = new ArrayList<VCFGenotypeRecord>();
|
||||||
|
Map<VCFHeader.HEADER_FIELDS,String> map = new HashMap<VCFHeader.HEADER_FIELDS,String>();
|
||||||
|
if ( generateVCFRecord(tracker, ref, context, vcfheader, gt, map, sampleNames, out, SUPPRESS_MULTISTATE, VERBOSE) ) {
|
||||||
|
vcfwriter.addRecord(new VCFRecord(vcfheader, map, "GT:GQ:DP", gt));
|
||||||
|
//vcfwriter.addRecord(new VCFRecord(vcfheader, map, "GT", gt));
|
||||||
|
return 1;
|
||||||
|
}
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
public static boolean generateVCFRecord(RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context,
|
||||||
|
VCFHeader vcfheader, List<VCFGenotypeRecord> gt, Map<VCFHeader.HEADER_FIELDS,String> map,
|
||||||
|
Map<String, String> sampleNamesToRods, PrintStream out, boolean SUPPRESS_MULTISTATE, boolean VERBOSE) {
|
||||||
|
int[] alleleCounts = new int[4];
|
||||||
int numSNPs = 0;
|
int numSNPs = 0;
|
||||||
int numRefs = 0;
|
int numRefs = 0;
|
||||||
int[] alleleNames = { 0, 1, 2, 3 };
|
int[] alleleNames = { 0, 1, 2, 3 };
|
||||||
int[] alleleCounts = new int[4];
|
|
||||||
double snpQual = 0.0;
|
double snpQual = 0.0;
|
||||||
List<VCFGenotypeRecord> gt = new ArrayList<VCFGenotypeRecord>();
|
|
||||||
|
|
||||||
int refbase = BaseUtils.simpleBaseToBaseIndex(ref.getBase());
|
int refbase = BaseUtils.simpleBaseToBaseIndex(ref.getBase());
|
||||||
|
|
||||||
for (String name : vcfheader.getGenotypeSamples()) {
|
for (String name : vcfheader.getGenotypeSamples()) {
|
||||||
ReferenceOrderedDatum rod = tracker.lookup(name, null);
|
ReferenceOrderedDatum rod = tracker.lookup(sampleNamesToRods.get(name), null);
|
||||||
if (rod != null) {
|
if (rod != null) {
|
||||||
AllelicVariant av = (AllelicVariant) rod;
|
AllelicVariant av = (AllelicVariant) rod;
|
||||||
String lod = String.format("%d", av.getVariationConfidence() > 99 ? 99 : (int) av.getVariationConfidence());
|
String lod = String.format("%d", av.getVariationConfidence() > 99 ? 99 : (int) av.getVariationConfidence());
|
||||||
|
|
@ -122,77 +138,72 @@ public class VariantsToVCF extends RefWalker<Integer, Integer> {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
if (numSNPs > 0) {
|
if (numSNPs == 0)
|
||||||
Integer[] perm = Utils.SortPermutation(alleleCounts);
|
return false;
|
||||||
int[] sortedCounts = Utils.PermuteArray(alleleCounts, perm);
|
|
||||||
int[] sortedNames = Utils.PermuteArray(alleleNames, perm);
|
|
||||||
|
|
||||||
rodDbSNP dbsnp = (rodDbSNP) tracker.lookup("dbsnp", null);
|
|
||||||
|
|
||||||
String infoString = String.format("locus=%s ref=%c allele_count=( %c:%d %c:%d %c:%d %c:%d )",
|
Integer[] perm = Utils.SortPermutation(alleleCounts);
|
||||||
context.getLocation(),
|
int[] sortedCounts = Utils.PermuteArray(alleleCounts, perm);
|
||||||
ref.getBase(),
|
int[] sortedNames = Utils.PermuteArray(alleleNames, perm);
|
||||||
BaseUtils.baseIndexToSimpleBase(sortedNames[0]), sortedCounts[0],
|
|
||||||
BaseUtils.baseIndexToSimpleBase(sortedNames[1]), sortedCounts[1],
|
|
||||||
BaseUtils.baseIndexToSimpleBase(sortedNames[2]), sortedCounts[2],
|
|
||||||
BaseUtils.baseIndexToSimpleBase(sortedNames[3]), sortedCounts[3]
|
|
||||||
);
|
|
||||||
|
|
||||||
if (SUPPRESS_MULTISTATE && sortedCounts[2] > 0) {
|
rodDbSNP dbsnp = (rodDbSNP) tracker.lookup("dbsnp", null);
|
||||||
out.println("[multistate] " + infoString);
|
|
||||||
return 0;
|
String infoString = String.format("locus=%s ref=%c allele_count=( %c:%d %c:%d %c:%d %c:%d )",
|
||||||
} else {
|
context.getLocation(),
|
||||||
if (VERBOSE) {
|
ref.getBase(),
|
||||||
out.println("[locus_info] " + infoString);
|
BaseUtils.baseIndexToSimpleBase(sortedNames[0]), sortedCounts[0],
|
||||||
}
|
BaseUtils.baseIndexToSimpleBase(sortedNames[1]), sortedCounts[1],
|
||||||
|
BaseUtils.baseIndexToSimpleBase(sortedNames[2]), sortedCounts[2],
|
||||||
|
BaseUtils.baseIndexToSimpleBase(sortedNames[3]), sortedCounts[3]
|
||||||
|
);
|
||||||
|
|
||||||
|
if (SUPPRESS_MULTISTATE && sortedCounts[2] > 0) {
|
||||||
|
out.println("[multistate] " + infoString);
|
||||||
|
return false;
|
||||||
|
} else {
|
||||||
|
if (VERBOSE) {
|
||||||
|
out.println("[locus_info] " + infoString);
|
||||||
}
|
}
|
||||||
|
|
||||||
Map<VCFHeader.HEADER_FIELDS,String> map = new HashMap<VCFHeader.HEADER_FIELDS,String>();
|
|
||||||
for (VCFHeader.HEADER_FIELDS field : VCFHeader.HEADER_FIELDS.values()) {
|
|
||||||
map.put(field,String.valueOf(1));
|
|
||||||
|
|
||||||
if (field == VCFHeader.HEADER_FIELDS.CHROM) {
|
|
||||||
map.put(field, context.getContig());
|
|
||||||
} else if (field == VCFHeader.HEADER_FIELDS.POS) {
|
|
||||||
map.put(field, String.valueOf(context.getPosition()));
|
|
||||||
} else if (field == VCFHeader.HEADER_FIELDS.REF) {
|
|
||||||
map.put(field, String.valueOf(ref.getBases()));
|
|
||||||
} else if (field == VCFHeader.HEADER_FIELDS.ALT) {
|
|
||||||
map.put(field, String.valueOf(BaseUtils.baseIndexToSimpleBase(sortedNames[3])));
|
|
||||||
} else if (field == VCFHeader.HEADER_FIELDS.ID) {
|
|
||||||
map.put(field, (dbsnp == null) ? "." : dbsnp.name);
|
|
||||||
} else if (field == VCFHeader.HEADER_FIELDS.QUAL) {
|
|
||||||
map.put(field, String.format("%d", snpQual > 99 ? 99 : (int) snpQual));
|
|
||||||
} else if (field == VCFHeader.HEADER_FIELDS.FILTER) {
|
|
||||||
map.put(field, String.valueOf("0"));
|
|
||||||
} else if (field == VCFHeader.HEADER_FIELDS.INFO) {
|
|
||||||
String infostr = ".";
|
|
||||||
ArrayList<String> info = new ArrayList<String>();
|
|
||||||
|
|
||||||
if (dbsnp != null) { info.add("DB=1"); }
|
|
||||||
if (dbsnp != null && dbsnp.isHapmap()) { info.add("H2=1"); }
|
|
||||||
|
|
||||||
for (int i = 0; i < info.size(); i++) {
|
|
||||||
if (i == 0) { infostr = ""; }
|
|
||||||
|
|
||||||
infostr += info.get(i);
|
|
||||||
|
|
||||||
if (i < info.size() - 1) {
|
|
||||||
infostr += ";";
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
map.put(field, infostr);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
vcfwriter.addRecord(new VCFRecord(vcfheader, map, "GT:GQ:DP", gt));
|
|
||||||
//vcfwriter.addRecord(new VCFRecord(vcfheader, map, "GT", gt));
|
|
||||||
|
|
||||||
return 1;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
return 0;
|
for (VCFHeader.HEADER_FIELDS field : VCFHeader.HEADER_FIELDS.values()) {
|
||||||
|
map.put(field,String.valueOf(1));
|
||||||
|
|
||||||
|
if (field == VCFHeader.HEADER_FIELDS.CHROM) {
|
||||||
|
map.put(field, context.getContig());
|
||||||
|
} else if (field == VCFHeader.HEADER_FIELDS.POS) {
|
||||||
|
map.put(field, String.valueOf(context.getPosition()));
|
||||||
|
} else if (field == VCFHeader.HEADER_FIELDS.REF) {
|
||||||
|
map.put(field, String.valueOf(ref.getBases()));
|
||||||
|
} else if (field == VCFHeader.HEADER_FIELDS.ALT) {
|
||||||
|
map.put(field, String.valueOf(BaseUtils.baseIndexToSimpleBase(sortedNames[3])));
|
||||||
|
} else if (field == VCFHeader.HEADER_FIELDS.ID) {
|
||||||
|
map.put(field, (dbsnp == null) ? "." : dbsnp.name);
|
||||||
|
} else if (field == VCFHeader.HEADER_FIELDS.QUAL) {
|
||||||
|
map.put(field, String.format("%d", snpQual > 99 ? 99 : (int) snpQual));
|
||||||
|
} else if (field == VCFHeader.HEADER_FIELDS.FILTER) {
|
||||||
|
map.put(field, String.valueOf("0"));
|
||||||
|
} else if (field == VCFHeader.HEADER_FIELDS.INFO) {
|
||||||
|
String infostr = ".";
|
||||||
|
ArrayList<String> info = new ArrayList<String>();
|
||||||
|
|
||||||
|
if (dbsnp != null) { info.add("DB=1"); }
|
||||||
|
if (dbsnp != null && dbsnp.isHapmap()) { info.add("H2=1"); }
|
||||||
|
|
||||||
|
for (int i = 0; i < info.size(); i++) {
|
||||||
|
if (i == 0) { infostr = ""; }
|
||||||
|
|
||||||
|
infostr += info.get(i);
|
||||||
|
|
||||||
|
if (i < info.size() - 1) {
|
||||||
|
infostr += ";";
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
map.put(field, infostr);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
public Integer reduceInit() {
|
public Integer reduceInit() {
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue