Support for VCF 4.1 header counts
This commit is contained in:
parent
6e7b5e1e7a
commit
e3748675db
|
|
@ -1,5 +1,6 @@
|
||||||
package org.broadinstitute.sting.gatk.walkers.annotator;
|
package org.broadinstitute.sting.gatk.walkers.annotator;
|
||||||
|
|
||||||
|
import org.broadinstitute.sting.utils.codecs.vcf.VCFHeaderLineCount;
|
||||||
import org.broadinstitute.sting.utils.variantcontext.Allele;
|
import org.broadinstitute.sting.utils.variantcontext.Allele;
|
||||||
import org.broadinstitute.sting.utils.variantcontext.Genotype;
|
import org.broadinstitute.sting.utils.variantcontext.Genotype;
|
||||||
import org.broadinstitute.sting.utils.variantcontext.VariantContext;
|
import org.broadinstitute.sting.utils.variantcontext.VariantContext;
|
||||||
|
|
@ -142,5 +143,5 @@ public class DepthPerAlleleBySample implements GenotypeAnnotation, StandardAnnot
|
||||||
// public String getIndelBases()
|
// public String getIndelBases()
|
||||||
public List<String> getKeyNames() { return Arrays.asList("AD"); }
|
public List<String> getKeyNames() { return Arrays.asList("AD"); }
|
||||||
|
|
||||||
public List<VCFFormatHeaderLine> getDescriptions() { return Arrays.asList(new VCFFormatHeaderLine(getKeyNames().get(0), VCFCompoundHeaderLine.UNBOUNDED, VCFHeaderLineType.Integer, "Allelic depths for the ref and alt alleles in the order listed")); }
|
public List<VCFFormatHeaderLine> getDescriptions() { return Arrays.asList(new VCFFormatHeaderLine(getKeyNames().get(0), VCFHeaderLineCount.UNBOUNDED, VCFHeaderLineType.Integer, "Allelic depths for the ref and alt alleles in the order listed")); }
|
||||||
}
|
}
|
||||||
|
|
@ -29,6 +29,7 @@ import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
|
||||||
import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.GenotypeAnnotation;
|
import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.GenotypeAnnotation;
|
||||||
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
|
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
|
||||||
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
|
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
|
||||||
|
import org.broadinstitute.sting.utils.codecs.vcf.VCFHeaderLineCount;
|
||||||
import org.broadinstitute.sting.utils.pileup.ReadBackedPileup;
|
import org.broadinstitute.sting.utils.pileup.ReadBackedPileup;
|
||||||
import org.broadinstitute.sting.utils.pileup.PileupElement;
|
import org.broadinstitute.sting.utils.pileup.PileupElement;
|
||||||
import org.broadinstitute.sting.utils.pileup.ReadBackedExtendedEventPileup;
|
import org.broadinstitute.sting.utils.pileup.ReadBackedExtendedEventPileup;
|
||||||
|
|
@ -201,7 +202,7 @@ public class ReadDepthAndAllelicFractionBySample implements GenotypeAnnotation {
|
||||||
VCFHeaderLineType.Integer,
|
VCFHeaderLineType.Integer,
|
||||||
"Total read depth per sample, including MQ0"),
|
"Total read depth per sample, including MQ0"),
|
||||||
new VCFFormatHeaderLine(getKeyNames().get(1),
|
new VCFFormatHeaderLine(getKeyNames().get(1),
|
||||||
VCFCompoundHeaderLine.UNBOUNDED,
|
VCFHeaderLineCount.UNBOUNDED,
|
||||||
VCFHeaderLineType.Float,
|
VCFHeaderLineType.Float,
|
||||||
"Fractions of reads (excluding MQ0 from both ref and alt) supporting each reported alternative allele, per sample"));
|
"Fractions of reads (excluding MQ0 from both ref and alt) supporting each reported alternative allele, per sample"));
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -25,6 +25,7 @@
|
||||||
|
|
||||||
package org.broadinstitute.sting.gatk.walkers.annotator;
|
package org.broadinstitute.sting.gatk.walkers.annotator;
|
||||||
|
|
||||||
|
import org.broadinstitute.sting.utils.codecs.vcf.VCFHeaderLineCount;
|
||||||
import org.broadinstitute.sting.utils.variantcontext.Genotype;
|
import org.broadinstitute.sting.utils.variantcontext.Genotype;
|
||||||
import org.broadinstitute.sting.utils.variantcontext.VariantContext;
|
import org.broadinstitute.sting.utils.variantcontext.VariantContext;
|
||||||
import org.broadinstitute.sting.utils.codecs.vcf.VCFHeaderLineType;
|
import org.broadinstitute.sting.utils.codecs.vcf.VCFHeaderLineType;
|
||||||
|
|
@ -65,5 +66,5 @@ public class SampleList implements InfoFieldAnnotation {
|
||||||
|
|
||||||
public List<String> getKeyNames() { return Arrays.asList("Samples"); }
|
public List<String> getKeyNames() { return Arrays.asList("Samples"); }
|
||||||
|
|
||||||
public List<VCFInfoHeaderLine> getDescriptions() { return Arrays.asList(new VCFInfoHeaderLine("Samples", VCFInfoHeaderLine.UNBOUNDED, VCFHeaderLineType.String, "List of polymorphic samples")); }
|
public List<VCFInfoHeaderLine> getDescriptions() { return Arrays.asList(new VCFInfoHeaderLine("Samples", VCFHeaderLineCount.UNBOUNDED, VCFHeaderLineType.String, "List of polymorphic samples")); }
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -360,14 +360,7 @@ public class StandardVCFWriter implements VCFWriter {
|
||||||
if ( !entry.getValue().equals("") ) {
|
if ( !entry.getValue().equals("") ) {
|
||||||
int numVals = 1;
|
int numVals = 1;
|
||||||
VCFInfoHeaderLine metaData = mHeader.getInfoHeaderLine(key);
|
VCFInfoHeaderLine metaData = mHeader.getInfoHeaderLine(key);
|
||||||
if ( metaData != null )
|
if ( metaData != null && (metaData.getCountType() != VCFHeaderLineCount.INTEGER || metaData.getCount() > 0) ) {
|
||||||
numVals = metaData.getCount();
|
|
||||||
|
|
||||||
// take care of unbounded encoding
|
|
||||||
if ( numVals == VCFInfoHeaderLine.UNBOUNDED )
|
|
||||||
numVals = 1;
|
|
||||||
|
|
||||||
if ( numVals > 0 ) {
|
|
||||||
mWriter.write("=");
|
mWriter.write("=");
|
||||||
mWriter.write(entry.getValue());
|
mWriter.write(entry.getValue());
|
||||||
}
|
}
|
||||||
|
|
@ -423,7 +416,7 @@ public class StandardVCFWriter implements VCFWriter {
|
||||||
|
|
||||||
VCFFormatHeaderLine metaData = mHeader.getFormatHeaderLine(key);
|
VCFFormatHeaderLine metaData = mHeader.getFormatHeaderLine(key);
|
||||||
if ( metaData != null ) {
|
if ( metaData != null ) {
|
||||||
int numInFormatField = metaData.getCount();
|
int numInFormatField = metaData.getCount(vc.getAlternateAlleles().size());
|
||||||
if ( numInFormatField > 1 && val.equals(VCFConstants.MISSING_VALUE_v4) ) {
|
if ( numInFormatField > 1 && val.equals(VCFConstants.MISSING_VALUE_v4) ) {
|
||||||
// If we have a missing field but multiple values are expected, we need to construct a new string with all fields.
|
// If we have a missing field but multiple values are expected, we need to construct a new string with all fields.
|
||||||
// For example, if Number=2, the string has to be ".,."
|
// For example, if Number=2, the string has to be ".,."
|
||||||
|
|
|
||||||
|
|
@ -24,6 +24,8 @@
|
||||||
|
|
||||||
package org.broadinstitute.sting.utils.codecs.vcf;
|
package org.broadinstitute.sting.utils.codecs.vcf;
|
||||||
|
|
||||||
|
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
|
||||||
|
|
||||||
import java.util.Arrays;
|
import java.util.Arrays;
|
||||||
import java.util.LinkedHashMap;
|
import java.util.LinkedHashMap;
|
||||||
import java.util.Map;
|
import java.util.Map;
|
||||||
|
|
@ -43,26 +45,43 @@ public abstract class VCFCompoundHeaderLine extends VCFHeaderLine implements VCF
|
||||||
|
|
||||||
// the field types
|
// the field types
|
||||||
private String name;
|
private String name;
|
||||||
private int count;
|
private int count = -1;
|
||||||
|
private VCFHeaderLineCount countType;
|
||||||
private String description;
|
private String description;
|
||||||
private VCFHeaderLineType type;
|
private VCFHeaderLineType type;
|
||||||
|
|
||||||
// access methods
|
// access methods
|
||||||
public String getName() { return name; }
|
public String getName() { return name; }
|
||||||
public int getCount() { return count; }
|
|
||||||
public String getDescription() { return description; }
|
public String getDescription() { return description; }
|
||||||
public VCFHeaderLineType getType() { return type; }
|
public VCFHeaderLineType getType() { return type; }
|
||||||
|
public VCFHeaderLineCount getCountType() { return countType; }
|
||||||
|
public int getCount() {
|
||||||
|
if ( countType != VCFHeaderLineCount.INTEGER )
|
||||||
|
throw new ReviewedStingException("Asking for header line count when type is not an integer");
|
||||||
|
return count;
|
||||||
|
}
|
||||||
|
|
||||||
//
|
// utility method
|
||||||
public void setNumberToUnbounded() { this.count = UNBOUNDED; }
|
public int getCount(int numAltAlleles) {
|
||||||
|
int myCount;
|
||||||
|
switch ( countType ) {
|
||||||
|
case INTEGER: myCount = count; break;
|
||||||
|
case UNBOUNDED: myCount = -1; break;
|
||||||
|
case A: myCount = numAltAlleles; break;
|
||||||
|
case G: myCount = ((numAltAlleles + 1) * (numAltAlleles + 2) / 2); break;
|
||||||
|
default: throw new ReviewedStingException("Unknown count type: " + countType);
|
||||||
|
}
|
||||||
|
return myCount;
|
||||||
|
}
|
||||||
|
|
||||||
|
public void setNumberToUnbounded() {
|
||||||
|
countType = VCFHeaderLineCount.UNBOUNDED;
|
||||||
|
count = -1;
|
||||||
|
}
|
||||||
|
|
||||||
// our type of line, i.e. format, info, etc
|
// our type of line, i.e. format, info, etc
|
||||||
private final SupportedHeaderLineType lineType;
|
private final SupportedHeaderLineType lineType;
|
||||||
|
|
||||||
// line numerical values are allowed to be unbounded (or unknown), which is
|
|
||||||
// marked with a dot (.)
|
|
||||||
public static final int UNBOUNDED = -1; // the value we store internally for unbounded types
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* create a VCF format header line
|
* create a VCF format header line
|
||||||
*
|
*
|
||||||
|
|
@ -74,6 +93,7 @@ public abstract class VCFCompoundHeaderLine extends VCFHeaderLine implements VCF
|
||||||
protected VCFCompoundHeaderLine(String name, int count, VCFHeaderLineType type, String description, SupportedHeaderLineType lineType) {
|
protected VCFCompoundHeaderLine(String name, int count, VCFHeaderLineType type, String description, SupportedHeaderLineType lineType) {
|
||||||
super(lineType.toString(), "");
|
super(lineType.toString(), "");
|
||||||
this.name = name;
|
this.name = name;
|
||||||
|
this.countType = VCFHeaderLineCount.INTEGER;
|
||||||
this.count = count;
|
this.count = count;
|
||||||
this.type = type;
|
this.type = type;
|
||||||
this.description = description;
|
this.description = description;
|
||||||
|
|
@ -81,6 +101,24 @@ public abstract class VCFCompoundHeaderLine extends VCFHeaderLine implements VCF
|
||||||
validate();
|
validate();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* create a VCF format header line
|
||||||
|
*
|
||||||
|
* @param name the name for this header line
|
||||||
|
* @param count the count type for this header line
|
||||||
|
* @param type the type for this header line
|
||||||
|
* @param description the description for this header line
|
||||||
|
*/
|
||||||
|
protected VCFCompoundHeaderLine(String name, VCFHeaderLineCount count, VCFHeaderLineType type, String description, SupportedHeaderLineType lineType) {
|
||||||
|
super(lineType.toString(), "");
|
||||||
|
this.name = name;
|
||||||
|
this.countType = count;
|
||||||
|
this.type = type;
|
||||||
|
this.description = description;
|
||||||
|
this.lineType = lineType;
|
||||||
|
validate();
|
||||||
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* create a VCF format header line
|
* create a VCF format header line
|
||||||
*
|
*
|
||||||
|
|
@ -92,9 +130,22 @@ public abstract class VCFCompoundHeaderLine extends VCFHeaderLine implements VCF
|
||||||
super(lineType.toString(), "");
|
super(lineType.toString(), "");
|
||||||
Map<String,String> mapping = VCFHeaderLineTranslator.parseLine(version,line, Arrays.asList("ID","Number","Type","Description"));
|
Map<String,String> mapping = VCFHeaderLineTranslator.parseLine(version,line, Arrays.asList("ID","Number","Type","Description"));
|
||||||
name = mapping.get("ID");
|
name = mapping.get("ID");
|
||||||
count = (version == VCFHeaderVersion.VCF4_0 || version == VCFHeaderVersion.VCF4_1) ?
|
count = -1;
|
||||||
mapping.get("Number").equals(VCFConstants.UNBOUNDED_ENCODING_v4) ? UNBOUNDED : Integer.valueOf(mapping.get("Number")) :
|
final String numberStr = mapping.get("Number");
|
||||||
mapping.get("Number").equals(VCFConstants.UNBOUNDED_ENCODING_v3) ? UNBOUNDED : Integer.valueOf(mapping.get("Number"));
|
if ( numberStr.equals(VCFConstants.PER_ALLELE_COUNT) ) {
|
||||||
|
countType = VCFHeaderLineCount.A;
|
||||||
|
} else if ( numberStr.equals(VCFConstants.PER_GENOTYPE_COUNT) ) {
|
||||||
|
countType = VCFHeaderLineCount.G;
|
||||||
|
} else if ( ((version == VCFHeaderVersion.VCF4_0 || version == VCFHeaderVersion.VCF4_1) &&
|
||||||
|
numberStr.equals(VCFConstants.UNBOUNDED_ENCODING_v4)) ||
|
||||||
|
((version == VCFHeaderVersion.VCF3_2 || version == VCFHeaderVersion.VCF3_3) &&
|
||||||
|
numberStr.equals(VCFConstants.UNBOUNDED_ENCODING_v3)) ) {
|
||||||
|
countType = VCFHeaderLineCount.UNBOUNDED;
|
||||||
|
} else {
|
||||||
|
countType = VCFHeaderLineCount.INTEGER;
|
||||||
|
count = Integer.valueOf(numberStr);
|
||||||
|
|
||||||
|
}
|
||||||
type = VCFHeaderLineType.valueOf(mapping.get("Type"));
|
type = VCFHeaderLineType.valueOf(mapping.get("Type"));
|
||||||
if (type == VCFHeaderLineType.Flag && !allowFlagValues())
|
if (type == VCFHeaderLineType.Flag && !allowFlagValues())
|
||||||
throw new IllegalArgumentException("Flag is an unsupported type for this kind of field");
|
throw new IllegalArgumentException("Flag is an unsupported type for this kind of field");
|
||||||
|
|
@ -121,7 +172,15 @@ public abstract class VCFCompoundHeaderLine extends VCFHeaderLine implements VCF
|
||||||
protected String toStringEncoding() {
|
protected String toStringEncoding() {
|
||||||
Map<String,Object> map = new LinkedHashMap<String,Object>();
|
Map<String,Object> map = new LinkedHashMap<String,Object>();
|
||||||
map.put("ID", name);
|
map.put("ID", name);
|
||||||
map.put("Number", count == UNBOUNDED ? VCFConstants.UNBOUNDED_ENCODING_v4 : count);
|
Object number;
|
||||||
|
switch ( countType ) {
|
||||||
|
case A: number = VCFConstants.PER_ALLELE_COUNT; break;
|
||||||
|
case G: number = VCFConstants.PER_GENOTYPE_COUNT; break;
|
||||||
|
case UNBOUNDED: number = VCFConstants.UNBOUNDED_ENCODING_v4; break;
|
||||||
|
case INTEGER:
|
||||||
|
default: number = count;
|
||||||
|
}
|
||||||
|
map.put("Number", number);
|
||||||
map.put("Type", type);
|
map.put("Type", type);
|
||||||
map.put("Description", description);
|
map.put("Description", description);
|
||||||
return lineType.toString() + "=" + VCFHeaderLine.toStringEncoding(map);
|
return lineType.toString() + "=" + VCFHeaderLine.toStringEncoding(map);
|
||||||
|
|
@ -136,15 +195,13 @@ public abstract class VCFCompoundHeaderLine extends VCFHeaderLine implements VCF
|
||||||
if ( !(o instanceof VCFCompoundHeaderLine) )
|
if ( !(o instanceof VCFCompoundHeaderLine) )
|
||||||
return false;
|
return false;
|
||||||
VCFCompoundHeaderLine other = (VCFCompoundHeaderLine)o;
|
VCFCompoundHeaderLine other = (VCFCompoundHeaderLine)o;
|
||||||
return name.equals(other.name) &&
|
return equalsExcludingDescription(other) &&
|
||||||
count == other.count &&
|
description.equals(other.description);
|
||||||
description.equals(other.description) &&
|
|
||||||
type == other.type &&
|
|
||||||
lineType == other.lineType;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
public boolean equalsExcludingDescription(VCFCompoundHeaderLine other) {
|
public boolean equalsExcludingDescription(VCFCompoundHeaderLine other) {
|
||||||
return count == other.count &&
|
return count == other.count &&
|
||||||
|
countType == other.countType &&
|
||||||
type == other.type &&
|
type == other.type &&
|
||||||
lineType == other.lineType &&
|
lineType == other.lineType &&
|
||||||
name.equals(other.name);
|
name.equals(other.name);
|
||||||
|
|
|
||||||
|
|
@ -99,6 +99,8 @@ public final class VCFConstants {
|
||||||
public static final String MISSING_DEPTH_v3 = "-1";
|
public static final String MISSING_DEPTH_v3 = "-1";
|
||||||
public static final String UNBOUNDED_ENCODING_v4 = ".";
|
public static final String UNBOUNDED_ENCODING_v4 = ".";
|
||||||
public static final String UNBOUNDED_ENCODING_v3 = "-1";
|
public static final String UNBOUNDED_ENCODING_v3 = "-1";
|
||||||
|
public static final String PER_ALLELE_COUNT = "A";
|
||||||
|
public static final String PER_GENOTYPE_COUNT = "G";
|
||||||
public static final String EMPTY_ALLELE = ".";
|
public static final String EMPTY_ALLELE = ".";
|
||||||
public static final String EMPTY_GENOTYPE = "./.";
|
public static final String EMPTY_GENOTYPE = "./.";
|
||||||
public static final double MAX_GENOTYPE_QUAL = 99.0;
|
public static final double MAX_GENOTYPE_QUAL = 99.0;
|
||||||
|
|
|
||||||
|
|
@ -16,6 +16,10 @@ public class VCFFormatHeaderLine extends VCFCompoundHeaderLine {
|
||||||
throw new IllegalArgumentException("Flag is an unsupported type for format fields");
|
throw new IllegalArgumentException("Flag is an unsupported type for format fields");
|
||||||
}
|
}
|
||||||
|
|
||||||
|
public VCFFormatHeaderLine(String name, VCFHeaderLineCount count, VCFHeaderLineType type, String description) {
|
||||||
|
super(name, count, type, description, SupportedHeaderLineType.INFO);
|
||||||
|
}
|
||||||
|
|
||||||
protected VCFFormatHeaderLine(String line, VCFHeaderVersion version) {
|
protected VCFFormatHeaderLine(String line, VCFHeaderVersion version) {
|
||||||
super(line, version, SupportedHeaderLineType.FORMAT);
|
super(line, version, SupportedHeaderLineType.FORMAT);
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -0,0 +1,8 @@
|
||||||
|
package org.broadinstitute.sting.utils.codecs.vcf;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* the count encodings we use for fields in VCF header lines
|
||||||
|
*/
|
||||||
|
public enum VCFHeaderLineCount {
|
||||||
|
INTEGER, A, G, UNBOUNDED;
|
||||||
|
}
|
||||||
|
|
@ -13,6 +13,10 @@ public class VCFInfoHeaderLine extends VCFCompoundHeaderLine {
|
||||||
super(name, count, type, description, SupportedHeaderLineType.INFO);
|
super(name, count, type, description, SupportedHeaderLineType.INFO);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
public VCFInfoHeaderLine(String name, VCFHeaderLineCount count, VCFHeaderLineType type, String description) {
|
||||||
|
super(name, count, type, description, SupportedHeaderLineType.INFO);
|
||||||
|
}
|
||||||
|
|
||||||
protected VCFInfoHeaderLine(String line, VCFHeaderVersion version) {
|
protected VCFInfoHeaderLine(String line, VCFHeaderVersion version) {
|
||||||
super(line, version, SupportedHeaderLineType.INFO);
|
super(line, version, SupportedHeaderLineType.INFO);
|
||||||
}
|
}
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue