Support for VCF 4.1 header counts

This commit is contained in:
Eric Banks 2011-07-11 17:40:45 -04:00
parent 6e7b5e1e7a
commit e3748675db
9 changed files with 101 additions and 30 deletions

View File

@ -1,5 +1,6 @@
package org.broadinstitute.sting.gatk.walkers.annotator;
import org.broadinstitute.sting.utils.codecs.vcf.VCFHeaderLineCount;
import org.broadinstitute.sting.utils.variantcontext.Allele;
import org.broadinstitute.sting.utils.variantcontext.Genotype;
import org.broadinstitute.sting.utils.variantcontext.VariantContext;
@ -142,5 +143,5 @@ public class DepthPerAlleleBySample implements GenotypeAnnotation, StandardAnnot
// public String getIndelBases()
public List<String> getKeyNames() { return Arrays.asList("AD"); }
public List<VCFFormatHeaderLine> getDescriptions() { return Arrays.asList(new VCFFormatHeaderLine(getKeyNames().get(0), VCFCompoundHeaderLine.UNBOUNDED, VCFHeaderLineType.Integer, "Allelic depths for the ref and alt alleles in the order listed")); }
public List<VCFFormatHeaderLine> getDescriptions() { return Arrays.asList(new VCFFormatHeaderLine(getKeyNames().get(0), VCFHeaderLineCount.UNBOUNDED, VCFHeaderLineType.Integer, "Allelic depths for the ref and alt alleles in the order listed")); }
}

View File

@ -29,6 +29,7 @@ import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.GenotypeAnnotation;
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
import org.broadinstitute.sting.utils.codecs.vcf.VCFHeaderLineCount;
import org.broadinstitute.sting.utils.pileup.ReadBackedPileup;
import org.broadinstitute.sting.utils.pileup.PileupElement;
import org.broadinstitute.sting.utils.pileup.ReadBackedExtendedEventPileup;
@ -200,8 +201,8 @@ public class ReadDepthAndAllelicFractionBySample implements GenotypeAnnotation {
1,
VCFHeaderLineType.Integer,
"Total read depth per sample, including MQ0"),
new VCFFormatHeaderLine(getKeyNames().get(1),
VCFCompoundHeaderLine.UNBOUNDED,
new VCFFormatHeaderLine(getKeyNames().get(1),
VCFHeaderLineCount.UNBOUNDED,
VCFHeaderLineType.Float,
"Fractions of reads (excluding MQ0 from both ref and alt) supporting each reported alternative allele, per sample"));
}

View File

@ -25,6 +25,7 @@
package org.broadinstitute.sting.gatk.walkers.annotator;
import org.broadinstitute.sting.utils.codecs.vcf.VCFHeaderLineCount;
import org.broadinstitute.sting.utils.variantcontext.Genotype;
import org.broadinstitute.sting.utils.variantcontext.VariantContext;
import org.broadinstitute.sting.utils.codecs.vcf.VCFHeaderLineType;
@ -65,5 +66,5 @@ public class SampleList implements InfoFieldAnnotation {
public List<String> getKeyNames() { return Arrays.asList("Samples"); }
public List<VCFInfoHeaderLine> getDescriptions() { return Arrays.asList(new VCFInfoHeaderLine("Samples", VCFInfoHeaderLine.UNBOUNDED, VCFHeaderLineType.String, "List of polymorphic samples")); }
public List<VCFInfoHeaderLine> getDescriptions() { return Arrays.asList(new VCFInfoHeaderLine("Samples", VCFHeaderLineCount.UNBOUNDED, VCFHeaderLineType.String, "List of polymorphic samples")); }
}

View File

@ -360,14 +360,7 @@ public class StandardVCFWriter implements VCFWriter {
if ( !entry.getValue().equals("") ) {
int numVals = 1;
VCFInfoHeaderLine metaData = mHeader.getInfoHeaderLine(key);
if ( metaData != null )
numVals = metaData.getCount();
// take care of unbounded encoding
if ( numVals == VCFInfoHeaderLine.UNBOUNDED )
numVals = 1;
if ( numVals > 0 ) {
if ( metaData != null && (metaData.getCountType() != VCFHeaderLineCount.INTEGER || metaData.getCount() > 0) ) {
mWriter.write("=");
mWriter.write(entry.getValue());
}
@ -423,7 +416,7 @@ public class StandardVCFWriter implements VCFWriter {
VCFFormatHeaderLine metaData = mHeader.getFormatHeaderLine(key);
if ( metaData != null ) {
int numInFormatField = metaData.getCount();
int numInFormatField = metaData.getCount(vc.getAlternateAlleles().size());
if ( numInFormatField > 1 && val.equals(VCFConstants.MISSING_VALUE_v4) ) {
// If we have a missing field but multiple values are expected, we need to construct a new string with all fields.
// For example, if Number=2, the string has to be ".,."

View File

@ -24,6 +24,8 @@
package org.broadinstitute.sting.utils.codecs.vcf;
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
import java.util.Arrays;
import java.util.LinkedHashMap;
import java.util.Map;
@ -43,26 +45,43 @@ public abstract class VCFCompoundHeaderLine extends VCFHeaderLine implements VCF
// the field types
private String name;
private int count;
private int count = -1;
private VCFHeaderLineCount countType;
private String description;
private VCFHeaderLineType type;
// access methods
public String getName() { return name; }
public int getCount() { return count; }
public String getDescription() { return description; }
public VCFHeaderLineType getType() { return type; }
public VCFHeaderLineCount getCountType() { return countType; }
public int getCount() {
if ( countType != VCFHeaderLineCount.INTEGER )
throw new ReviewedStingException("Asking for header line count when type is not an integer");
return count;
}
//
public void setNumberToUnbounded() { this.count = UNBOUNDED; }
// utility method
public int getCount(int numAltAlleles) {
int myCount;
switch ( countType ) {
case INTEGER: myCount = count; break;
case UNBOUNDED: myCount = -1; break;
case A: myCount = numAltAlleles; break;
case G: myCount = ((numAltAlleles + 1) * (numAltAlleles + 2) / 2); break;
default: throw new ReviewedStingException("Unknown count type: " + countType);
}
return myCount;
}
public void setNumberToUnbounded() {
countType = VCFHeaderLineCount.UNBOUNDED;
count = -1;
}
// our type of line, i.e. format, info, etc
private final SupportedHeaderLineType lineType;
// line numerical values are allowed to be unbounded (or unknown), which is
// marked with a dot (.)
public static final int UNBOUNDED = -1; // the value we store internally for unbounded types
/**
* create a VCF format header line
*
@ -74,6 +93,7 @@ public abstract class VCFCompoundHeaderLine extends VCFHeaderLine implements VCF
protected VCFCompoundHeaderLine(String name, int count, VCFHeaderLineType type, String description, SupportedHeaderLineType lineType) {
super(lineType.toString(), "");
this.name = name;
this.countType = VCFHeaderLineCount.INTEGER;
this.count = count;
this.type = type;
this.description = description;
@ -81,6 +101,24 @@ public abstract class VCFCompoundHeaderLine extends VCFHeaderLine implements VCF
validate();
}
/**
* create a VCF format header line
*
* @param name the name for this header line
* @param count the count type for this header line
* @param type the type for this header line
* @param description the description for this header line
*/
protected VCFCompoundHeaderLine(String name, VCFHeaderLineCount count, VCFHeaderLineType type, String description, SupportedHeaderLineType lineType) {
super(lineType.toString(), "");
this.name = name;
this.countType = count;
this.type = type;
this.description = description;
this.lineType = lineType;
validate();
}
/**
* create a VCF format header line
*
@ -92,9 +130,22 @@ public abstract class VCFCompoundHeaderLine extends VCFHeaderLine implements VCF
super(lineType.toString(), "");
Map<String,String> mapping = VCFHeaderLineTranslator.parseLine(version,line, Arrays.asList("ID","Number","Type","Description"));
name = mapping.get("ID");
count = (version == VCFHeaderVersion.VCF4_0 || version == VCFHeaderVersion.VCF4_1) ?
mapping.get("Number").equals(VCFConstants.UNBOUNDED_ENCODING_v4) ? UNBOUNDED : Integer.valueOf(mapping.get("Number")) :
mapping.get("Number").equals(VCFConstants.UNBOUNDED_ENCODING_v3) ? UNBOUNDED : Integer.valueOf(mapping.get("Number"));
count = -1;
final String numberStr = mapping.get("Number");
if ( numberStr.equals(VCFConstants.PER_ALLELE_COUNT) ) {
countType = VCFHeaderLineCount.A;
} else if ( numberStr.equals(VCFConstants.PER_GENOTYPE_COUNT) ) {
countType = VCFHeaderLineCount.G;
} else if ( ((version == VCFHeaderVersion.VCF4_0 || version == VCFHeaderVersion.VCF4_1) &&
numberStr.equals(VCFConstants.UNBOUNDED_ENCODING_v4)) ||
((version == VCFHeaderVersion.VCF3_2 || version == VCFHeaderVersion.VCF3_3) &&
numberStr.equals(VCFConstants.UNBOUNDED_ENCODING_v3)) ) {
countType = VCFHeaderLineCount.UNBOUNDED;
} else {
countType = VCFHeaderLineCount.INTEGER;
count = Integer.valueOf(numberStr);
}
type = VCFHeaderLineType.valueOf(mapping.get("Type"));
if (type == VCFHeaderLineType.Flag && !allowFlagValues())
throw new IllegalArgumentException("Flag is an unsupported type for this kind of field");
@ -121,7 +172,15 @@ public abstract class VCFCompoundHeaderLine extends VCFHeaderLine implements VCF
protected String toStringEncoding() {
Map<String,Object> map = new LinkedHashMap<String,Object>();
map.put("ID", name);
map.put("Number", count == UNBOUNDED ? VCFConstants.UNBOUNDED_ENCODING_v4 : count);
Object number;
switch ( countType ) {
case A: number = VCFConstants.PER_ALLELE_COUNT; break;
case G: number = VCFConstants.PER_GENOTYPE_COUNT; break;
case UNBOUNDED: number = VCFConstants.UNBOUNDED_ENCODING_v4; break;
case INTEGER:
default: number = count;
}
map.put("Number", number);
map.put("Type", type);
map.put("Description", description);
return lineType.toString() + "=" + VCFHeaderLine.toStringEncoding(map);
@ -136,15 +195,13 @@ public abstract class VCFCompoundHeaderLine extends VCFHeaderLine implements VCF
if ( !(o instanceof VCFCompoundHeaderLine) )
return false;
VCFCompoundHeaderLine other = (VCFCompoundHeaderLine)o;
return name.equals(other.name) &&
count == other.count &&
description.equals(other.description) &&
type == other.type &&
lineType == other.lineType;
return equalsExcludingDescription(other) &&
description.equals(other.description);
}
public boolean equalsExcludingDescription(VCFCompoundHeaderLine other) {
return count == other.count &&
countType == other.countType &&
type == other.type &&
lineType == other.lineType &&
name.equals(other.name);

View File

@ -99,6 +99,8 @@ public final class VCFConstants {
public static final String MISSING_DEPTH_v3 = "-1";
public static final String UNBOUNDED_ENCODING_v4 = ".";
public static final String UNBOUNDED_ENCODING_v3 = "-1";
public static final String PER_ALLELE_COUNT = "A";
public static final String PER_GENOTYPE_COUNT = "G";
public static final String EMPTY_ALLELE = ".";
public static final String EMPTY_GENOTYPE = "./.";
public static final double MAX_GENOTYPE_QUAL = 99.0;

View File

@ -16,6 +16,10 @@ public class VCFFormatHeaderLine extends VCFCompoundHeaderLine {
throw new IllegalArgumentException("Flag is an unsupported type for format fields");
}
public VCFFormatHeaderLine(String name, VCFHeaderLineCount count, VCFHeaderLineType type, String description) {
super(name, count, type, description, SupportedHeaderLineType.INFO);
}
protected VCFFormatHeaderLine(String line, VCFHeaderVersion version) {
super(line, version, SupportedHeaderLineType.FORMAT);
}

View File

@ -0,0 +1,8 @@
package org.broadinstitute.sting.utils.codecs.vcf;
/**
* the count encodings we use for fields in VCF header lines
*/
public enum VCFHeaderLineCount {
INTEGER, A, G, UNBOUNDED;
}

View File

@ -13,6 +13,10 @@ public class VCFInfoHeaderLine extends VCFCompoundHeaderLine {
super(name, count, type, description, SupportedHeaderLineType.INFO);
}
public VCFInfoHeaderLine(String name, VCFHeaderLineCount count, VCFHeaderLineType type, String description) {
super(name, count, type, description, SupportedHeaderLineType.INFO);
}
protected VCFInfoHeaderLine(String line, VCFHeaderVersion version) {
super(line, version, SupportedHeaderLineType.INFO);
}