Adding support for other simple header line types (e.g. ALT) and cleaning up the interface a bit.
This commit is contained in:
parent
400b0d4422
commit
d7d15019dd
|
|
@ -25,6 +25,7 @@
|
||||||
|
|
||||||
package org.broadinstitute.sting.gatk.walkers.annotator;
|
package org.broadinstitute.sting.gatk.walkers.annotator;
|
||||||
|
|
||||||
|
import org.broadinstitute.sting.utils.codecs.vcf.VCFHeaderLineCount;
|
||||||
import org.broadinstitute.sting.utils.variantcontext.VariantContext;
|
import org.broadinstitute.sting.utils.variantcontext.VariantContext;
|
||||||
import org.broadinstitute.sting.utils.codecs.vcf.VCFHeaderLineType;
|
import org.broadinstitute.sting.utils.codecs.vcf.VCFHeaderLineType;
|
||||||
import org.broadinstitute.sting.utils.codecs.vcf.VCFInfoHeaderLine;
|
import org.broadinstitute.sting.utils.codecs.vcf.VCFInfoHeaderLine;
|
||||||
|
|
@ -41,8 +42,8 @@ import java.util.*;
|
||||||
public class ChromosomeCounts implements InfoFieldAnnotation, StandardAnnotation {
|
public class ChromosomeCounts implements InfoFieldAnnotation, StandardAnnotation {
|
||||||
|
|
||||||
private String[] keyNames = { VCFConstants.ALLELE_NUMBER_KEY, VCFConstants.ALLELE_COUNT_KEY, VCFConstants.ALLELE_FREQUENCY_KEY };
|
private String[] keyNames = { VCFConstants.ALLELE_NUMBER_KEY, VCFConstants.ALLELE_COUNT_KEY, VCFConstants.ALLELE_FREQUENCY_KEY };
|
||||||
private VCFInfoHeaderLine[] descriptions = { new VCFInfoHeaderLine(VCFConstants.ALLELE_FREQUENCY_KEY, -1, VCFHeaderLineType.Float, "Allele Frequency, for each ALT allele, in the same order as listed"),
|
private VCFInfoHeaderLine[] descriptions = { new VCFInfoHeaderLine(VCFConstants.ALLELE_FREQUENCY_KEY, VCFHeaderLineCount.UNBOUNDED, VCFHeaderLineType.Float, "Allele Frequency, for each ALT allele, in the same order as listed"),
|
||||||
new VCFInfoHeaderLine(VCFConstants.ALLELE_COUNT_KEY, -1, VCFHeaderLineType.Integer, "Allele count in genotypes, for each ALT allele, in the same order as listed"),
|
new VCFInfoHeaderLine(VCFConstants.ALLELE_COUNT_KEY, VCFHeaderLineCount.UNBOUNDED, VCFHeaderLineType.Integer, "Allele count in genotypes, for each ALT allele, in the same order as listed"),
|
||||||
new VCFInfoHeaderLine(VCFConstants.ALLELE_NUMBER_KEY, 1, VCFHeaderLineType.Integer, "Total number of alleles in called genotypes") };
|
new VCFInfoHeaderLine(VCFConstants.ALLELE_NUMBER_KEY, 1, VCFHeaderLineType.Integer, "Total number of alleles in called genotypes") };
|
||||||
|
|
||||||
public Map<String, Object> annotate(RefMetaDataTracker tracker, ReferenceContext ref, Map<String, AlignmentContext> stratifiedContexts, VariantContext vc) {
|
public Map<String, Object> annotate(RefMetaDataTracker tracker, ReferenceContext ref, Map<String, AlignmentContext> stratifiedContexts, VariantContext vc) {
|
||||||
|
|
|
||||||
|
|
@ -37,7 +37,6 @@ import org.broadinstitute.sting.gatk.datasources.rmd.ReferenceOrderedDataSource;
|
||||||
import org.broadinstitute.sting.utils.*;
|
import org.broadinstitute.sting.utils.*;
|
||||||
import org.broadinstitute.sting.utils.baq.BAQ;
|
import org.broadinstitute.sting.utils.baq.BAQ;
|
||||||
import org.broadinstitute.sting.commandline.*;
|
import org.broadinstitute.sting.commandline.*;
|
||||||
import org.broadinstitute.sting.utils.codecs.vcf.VCFUtils;
|
|
||||||
|
|
||||||
import java.util.*;
|
import java.util.*;
|
||||||
import java.io.PrintStream;
|
import java.io.PrintStream;
|
||||||
|
|
@ -158,7 +157,7 @@ public class UnifiedGenotyper extends LocusWalker<VariantCallContext, UnifiedGen
|
||||||
}
|
}
|
||||||
|
|
||||||
// FORMAT and INFO fields
|
// FORMAT and INFO fields
|
||||||
headerInfo.addAll(VCFUtils.getSupportedHeaderStrings());
|
headerInfo.addAll(getSupportedHeaderStrings());
|
||||||
|
|
||||||
// FILTER fields
|
// FILTER fields
|
||||||
if ( UAC.STANDARD_CONFIDENCE_FOR_EMITTING < UAC.STANDARD_CONFIDENCE_FOR_CALLING )
|
if ( UAC.STANDARD_CONFIDENCE_FOR_EMITTING < UAC.STANDARD_CONFIDENCE_FOR_CALLING )
|
||||||
|
|
@ -167,6 +166,20 @@ public class UnifiedGenotyper extends LocusWalker<VariantCallContext, UnifiedGen
|
||||||
return headerInfo;
|
return headerInfo;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* return a set of supported format lines; what we currently support for output in the genotype fields of a VCF
|
||||||
|
* @return a set of VCF format lines
|
||||||
|
*/
|
||||||
|
private static Set<VCFFormatHeaderLine> getSupportedHeaderStrings() {
|
||||||
|
Set<VCFFormatHeaderLine> result = new HashSet<VCFFormatHeaderLine>();
|
||||||
|
result.add(new VCFFormatHeaderLine(VCFConstants.GENOTYPE_KEY, 1, VCFHeaderLineType.String, "Genotype"));
|
||||||
|
result.add(new VCFFormatHeaderLine(VCFConstants.GENOTYPE_QUALITY_KEY, 1, VCFHeaderLineType.Float, "Genotype Quality"));
|
||||||
|
result.add(new VCFFormatHeaderLine(VCFConstants.DEPTH_KEY, 1, VCFHeaderLineType.Integer, "Read Depth (only filtered reads used for calling)"));
|
||||||
|
result.add(new VCFFormatHeaderLine(VCFConstants.PHRED_GENOTYPE_LIKELIHOODS_KEY, VCFHeaderLineCount.UNBOUNDED, VCFHeaderLineType.Float, "Normalized, Phred-scaled likelihoods for AA,AB,BB genotypes where A=ref and B=alt; if site is not biallelic, number of likelihoods if n*(n+1)/2"));
|
||||||
|
|
||||||
|
return result;
|
||||||
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Compute at a given locus.
|
* Compute at a given locus.
|
||||||
*
|
*
|
||||||
|
|
|
||||||
|
|
@ -358,9 +358,8 @@ public class StandardVCFWriter implements VCFWriter {
|
||||||
mWriter.write(key);
|
mWriter.write(key);
|
||||||
|
|
||||||
if ( !entry.getValue().equals("") ) {
|
if ( !entry.getValue().equals("") ) {
|
||||||
int numVals = 1;
|
|
||||||
VCFInfoHeaderLine metaData = mHeader.getInfoHeaderLine(key);
|
VCFInfoHeaderLine metaData = mHeader.getInfoHeaderLine(key);
|
||||||
if ( metaData != null && (metaData.getCountType() != VCFHeaderLineCount.INTEGER || metaData.getCount() > 0) ) {
|
if ( metaData == null || metaData.getCountType() != VCFHeaderLineCount.INTEGER || metaData.getCount() != 0 ) {
|
||||||
mWriter.write("=");
|
mWriter.write("=");
|
||||||
mWriter.write(entry.getValue());
|
mWriter.write(entry.getValue());
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -0,0 +1,28 @@
|
||||||
|
package org.broadinstitute.sting.utils.codecs.vcf;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @author ebanks
|
||||||
|
* A class representing a key=value entry for ALT fields in the VCF header
|
||||||
|
*/
|
||||||
|
public class VCFAltHeaderLine extends VCFSimpleHeaderLine {
|
||||||
|
|
||||||
|
/**
|
||||||
|
* create a VCF filter header line
|
||||||
|
*
|
||||||
|
* @param name the name for this header line
|
||||||
|
* @param description the description for this header line
|
||||||
|
*/
|
||||||
|
public VCFAltHeaderLine(String name, String description) {
|
||||||
|
super(name, description, SupportedHeaderLineType.ALT);
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* create a VCF info header line
|
||||||
|
*
|
||||||
|
* @param line the header line
|
||||||
|
* @param version the vcf header version
|
||||||
|
*/
|
||||||
|
protected VCFAltHeaderLine(String line, VCFHeaderVersion version) {
|
||||||
|
super(line, version, SupportedHeaderLineType.ALT);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
@ -89,6 +89,7 @@ public abstract class VCFCompoundHeaderLine extends VCFHeaderLine implements VCF
|
||||||
* @param count the count for this header line
|
* @param count the count for this header line
|
||||||
* @param type the type for this header line
|
* @param type the type for this header line
|
||||||
* @param description the description for this header line
|
* @param description the description for this header line
|
||||||
|
* @param lineType the header line type
|
||||||
*/
|
*/
|
||||||
protected VCFCompoundHeaderLine(String name, int count, VCFHeaderLineType type, String description, SupportedHeaderLineType lineType) {
|
protected VCFCompoundHeaderLine(String name, int count, VCFHeaderLineType type, String description, SupportedHeaderLineType lineType) {
|
||||||
super(lineType.toString(), "");
|
super(lineType.toString(), "");
|
||||||
|
|
@ -108,6 +109,7 @@ public abstract class VCFCompoundHeaderLine extends VCFHeaderLine implements VCF
|
||||||
* @param count the count type for this header line
|
* @param count the count type for this header line
|
||||||
* @param type the type for this header line
|
* @param type the type for this header line
|
||||||
* @param description the description for this header line
|
* @param description the description for this header line
|
||||||
|
* @param lineType the header line type
|
||||||
*/
|
*/
|
||||||
protected VCFCompoundHeaderLine(String name, VCFHeaderLineCount count, VCFHeaderLineType type, String description, SupportedHeaderLineType lineType) {
|
protected VCFCompoundHeaderLine(String name, VCFHeaderLineCount count, VCFHeaderLineType type, String description, SupportedHeaderLineType lineType) {
|
||||||
super(lineType.toString(), "");
|
super(lineType.toString(), "");
|
||||||
|
|
@ -124,6 +126,7 @@ public abstract class VCFCompoundHeaderLine extends VCFHeaderLine implements VCF
|
||||||
*
|
*
|
||||||
* @param line the header line
|
* @param line the header line
|
||||||
* @param version the VCF header version
|
* @param version the VCF header version
|
||||||
|
* @param lineType the header line type
|
||||||
*
|
*
|
||||||
*/
|
*/
|
||||||
protected VCFCompoundHeaderLine(String line, VCFHeaderVersion version, SupportedHeaderLineType lineType) {
|
protected VCFCompoundHeaderLine(String line, VCFHeaderVersion version, SupportedHeaderLineType lineType) {
|
||||||
|
|
|
||||||
|
|
@ -1,19 +1,10 @@
|
||||||
package org.broadinstitute.sting.utils.codecs.vcf;
|
package org.broadinstitute.sting.utils.codecs.vcf;
|
||||||
|
|
||||||
import java.util.Arrays;
|
|
||||||
import java.util.LinkedHashMap;
|
|
||||||
import java.util.Map;
|
|
||||||
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* @author ebanks
|
* @author ebanks
|
||||||
* A class representing a key=value entry for FILTER fields in the VCF header
|
* A class representing a key=value entry for FILTER fields in the VCF header
|
||||||
*/
|
*/
|
||||||
public class VCFFilterHeaderLine extends VCFHeaderLine implements VCFNamedHeaderLine {
|
public class VCFFilterHeaderLine extends VCFSimpleHeaderLine {
|
||||||
|
|
||||||
private String name;
|
|
||||||
private String description;
|
|
||||||
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* create a VCF filter header line
|
* create a VCF filter header line
|
||||||
|
|
@ -22,12 +13,7 @@ public class VCFFilterHeaderLine extends VCFHeaderLine implements VCFNamedHeader
|
||||||
* @param description the description for this header line
|
* @param description the description for this header line
|
||||||
*/
|
*/
|
||||||
public VCFFilterHeaderLine(String name, String description) {
|
public VCFFilterHeaderLine(String name, String description) {
|
||||||
super("FILTER", "");
|
super(name, description, SupportedHeaderLineType.FILTER);
|
||||||
this.name = name;
|
|
||||||
this.description = description;
|
|
||||||
|
|
||||||
if ( name == null || description == null )
|
|
||||||
throw new IllegalArgumentException(String.format("Invalid VCFCompoundHeaderLine: key=%s name=%s desc=%s", super.getKey(), name, description ));
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
|
@ -37,34 +23,6 @@ public class VCFFilterHeaderLine extends VCFHeaderLine implements VCFNamedHeader
|
||||||
* @param version the vcf header version
|
* @param version the vcf header version
|
||||||
*/
|
*/
|
||||||
protected VCFFilterHeaderLine(String line, VCFHeaderVersion version) {
|
protected VCFFilterHeaderLine(String line, VCFHeaderVersion version) {
|
||||||
super("FILTER", "");
|
super(line, version, SupportedHeaderLineType.FILTER);
|
||||||
Map<String,String> mapping = VCFHeaderLineTranslator.parseLine(version,line, Arrays.asList("ID","Description"));
|
|
||||||
name = mapping.get("ID");
|
|
||||||
description = mapping.get("Description");
|
|
||||||
if ( description == null && ALLOW_UNBOUND_DESCRIPTIONS ) // handle the case where there's no description provided
|
|
||||||
description = UNBOUND_DESCRIPTION;
|
|
||||||
}
|
|
||||||
|
|
||||||
protected String toStringEncoding() {
|
|
||||||
Map<String,Object> map = new LinkedHashMap<String,Object>();
|
|
||||||
map.put("ID", name);
|
|
||||||
map.put("Description", description);
|
|
||||||
return "FILTER=" + VCFHeaderLine.toStringEncoding(map);
|
|
||||||
}
|
|
||||||
|
|
||||||
public boolean equals(Object o) {
|
|
||||||
if ( !(o instanceof VCFFilterHeaderLine) )
|
|
||||||
return false;
|
|
||||||
VCFFilterHeaderLine other = (VCFFilterHeaderLine)o;
|
|
||||||
return name.equals(other.name) &&
|
|
||||||
description.equals(other.description);
|
|
||||||
}
|
|
||||||
|
|
||||||
public String getName() {
|
|
||||||
return name;
|
|
||||||
}
|
|
||||||
|
|
||||||
public String getDescription() {
|
|
||||||
return description;
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
@ -17,7 +17,7 @@ public class VCFFormatHeaderLine extends VCFCompoundHeaderLine {
|
||||||
}
|
}
|
||||||
|
|
||||||
public VCFFormatHeaderLine(String name, VCFHeaderLineCount count, VCFHeaderLineType type, String description) {
|
public VCFFormatHeaderLine(String name, VCFHeaderLineCount count, VCFHeaderLineType type, String description) {
|
||||||
super(name, count, type, description, SupportedHeaderLineType.INFO);
|
super(name, count, type, description, SupportedHeaderLineType.FORMAT);
|
||||||
}
|
}
|
||||||
|
|
||||||
protected VCFFormatHeaderLine(String line, VCFHeaderVersion version) {
|
protected VCFFormatHeaderLine(String line, VCFHeaderVersion version) {
|
||||||
|
|
|
||||||
|
|
@ -0,0 +1,81 @@
|
||||||
|
package org.broadinstitute.sting.utils.codecs.vcf;
|
||||||
|
|
||||||
|
import java.util.Arrays;
|
||||||
|
import java.util.LinkedHashMap;
|
||||||
|
import java.util.Map;
|
||||||
|
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @author ebanks
|
||||||
|
* A class representing a key=value entry for simple VCF header types
|
||||||
|
*/
|
||||||
|
public abstract class VCFSimpleHeaderLine extends VCFHeaderLine implements VCFNamedHeaderLine {
|
||||||
|
|
||||||
|
public enum SupportedHeaderLineType {
|
||||||
|
FILTER, ALT;
|
||||||
|
}
|
||||||
|
|
||||||
|
private String name;
|
||||||
|
private String description;
|
||||||
|
|
||||||
|
// our type of line, i.e. filter, alt, etc
|
||||||
|
private final SupportedHeaderLineType lineType;
|
||||||
|
|
||||||
|
|
||||||
|
/**
|
||||||
|
* create a VCF filter header line
|
||||||
|
*
|
||||||
|
* @param name the name for this header line
|
||||||
|
* @param description the description for this header line
|
||||||
|
* @param lineType the header line type
|
||||||
|
*/
|
||||||
|
public VCFSimpleHeaderLine(String name, String description, SupportedHeaderLineType lineType) {
|
||||||
|
super(lineType.toString(), "");
|
||||||
|
this.lineType = lineType;
|
||||||
|
this.name = name;
|
||||||
|
this.description = description;
|
||||||
|
|
||||||
|
if ( name == null || description == null )
|
||||||
|
throw new IllegalArgumentException(String.format("Invalid VCFSimpleHeaderLine: key=%s name=%s desc=%s", super.getKey(), name, description ));
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* create a VCF info header line
|
||||||
|
*
|
||||||
|
* @param line the header line
|
||||||
|
* @param version the vcf header version
|
||||||
|
* @param lineType the header line type
|
||||||
|
*/
|
||||||
|
protected VCFSimpleHeaderLine(String line, VCFHeaderVersion version, SupportedHeaderLineType lineType) {
|
||||||
|
super(lineType.toString(), "");
|
||||||
|
this.lineType = lineType;
|
||||||
|
Map<String,String> mapping = VCFHeaderLineTranslator.parseLine(version,line, Arrays.asList("ID","Description"));
|
||||||
|
name = mapping.get("ID");
|
||||||
|
description = mapping.get("Description");
|
||||||
|
if ( description == null && ALLOW_UNBOUND_DESCRIPTIONS ) // handle the case where there's no description provided
|
||||||
|
description = UNBOUND_DESCRIPTION;
|
||||||
|
}
|
||||||
|
|
||||||
|
protected String toStringEncoding() {
|
||||||
|
Map<String,Object> map = new LinkedHashMap<String,Object>();
|
||||||
|
map.put("ID", name);
|
||||||
|
map.put("Description", description);
|
||||||
|
return lineType.toString() + "=" + VCFHeaderLine.toStringEncoding(map);
|
||||||
|
}
|
||||||
|
|
||||||
|
public boolean equals(Object o) {
|
||||||
|
if ( !(o instanceof VCFSimpleHeaderLine) )
|
||||||
|
return false;
|
||||||
|
VCFSimpleHeaderLine other = (VCFSimpleHeaderLine)o;
|
||||||
|
return name.equals(other.name) &&
|
||||||
|
description.equals(other.description);
|
||||||
|
}
|
||||||
|
|
||||||
|
public String getName() {
|
||||||
|
return name;
|
||||||
|
}
|
||||||
|
|
||||||
|
public String getDescription() {
|
||||||
|
return description;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
@ -180,19 +180,4 @@ public class VCFUtils {
|
||||||
|
|
||||||
return new HashSet<VCFHeaderLine>(map.values());
|
return new HashSet<VCFHeaderLine>(map.values());
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
|
||||||
* return a set of supported format lines; what we currently support for output in the genotype fields of a VCF
|
|
||||||
* @return a set of VCF format lines
|
|
||||||
*/
|
|
||||||
public static Set<VCFFormatHeaderLine> getSupportedHeaderStrings() {
|
|
||||||
Set<VCFFormatHeaderLine> result = new HashSet<VCFFormatHeaderLine>();
|
|
||||||
result.add(new VCFFormatHeaderLine(VCFConstants.GENOTYPE_KEY, 1, VCFHeaderLineType.String, "Genotype"));
|
|
||||||
result.add(new VCFFormatHeaderLine(VCFConstants.GENOTYPE_QUALITY_KEY, 1, VCFHeaderLineType.Float, "Genotype Quality"));
|
|
||||||
result.add(new VCFFormatHeaderLine(VCFConstants.DEPTH_KEY, 1, VCFHeaderLineType.Integer, "Read Depth (only filtered reads used for calling)"));
|
|
||||||
result.add(new VCFFormatHeaderLine(VCFConstants.PHRED_GENOTYPE_LIKELIHOODS_KEY, -1, VCFHeaderLineType.Float, "Normalized, Phred-scaled likelihoods for AA,AB,BB genotypes where A=ref and B=alt; if site is not biallelic, number of likelihoods if n*(n+1)/2"));
|
|
||||||
|
|
||||||
return result;
|
|
||||||
}
|
|
||||||
|
|
||||||
}
|
}
|
||||||
Loading…
Reference in New Issue