diff --git a/public/java/src/org/broadinstitute/sting/utils/codecs/vcf/StandardVCFWriter.java b/public/java/src/org/broadinstitute/sting/utils/codecs/vcf/StandardVCFWriter.java index 75b396bd9..994a2575d 100755 --- a/public/java/src/org/broadinstitute/sting/utils/codecs/vcf/StandardVCFWriter.java +++ b/public/java/src/org/broadinstitute/sting/utils/codecs/vcf/StandardVCFWriter.java @@ -167,8 +167,7 @@ public class StandardVCFWriter extends IndexingVCFWriter { vc = VariantContextUtils.createVariantContextWithPaddedAlleles(vc, false); super.add(vc); - Map alleleMap = new HashMap(vc.getAlleles().size()); - alleleMap.put(Allele.NO_CALL, VCFConstants.EMPTY_ALLELE); // convenience for lookup + Map alleleMap = buildAlleleMap(vc); // CHROM mWriter.write(vc.getChr()); @@ -184,7 +183,6 @@ public class StandardVCFWriter extends IndexingVCFWriter { mWriter.write(VCFConstants.FIELD_SEPARATOR); // REF - alleleMap.put(vc.getReference(), "0"); String refString = vc.getReference().getDisplayString(); mWriter.write(refString); mWriter.write(VCFConstants.FIELD_SEPARATOR); @@ -192,13 +190,11 @@ public class StandardVCFWriter extends IndexingVCFWriter { // ALT if ( vc.isVariant() ) { Allele altAllele = vc.getAlternateAllele(0); - alleleMap.put(altAllele, "1"); String alt = altAllele.getDisplayString(); mWriter.write(alt); for (int i = 1; i < vc.getAlternateAlleles().size(); i++) { altAllele = vc.getAlternateAllele(i); - alleleMap.put(altAllele, String.valueOf(i+1)); alt = altAllele.getDisplayString(); mWriter.write(","); mWriter.write(alt); @@ -260,6 +256,18 @@ public class StandardVCFWriter extends IndexingVCFWriter { } } + public static Map buildAlleleMap(final VariantContext vc) { + final Map alleleMap = new HashMap(vc.getAlleles().size()+1); + alleleMap.put(Allele.NO_CALL, VCFConstants.EMPTY_ALLELE); // convenience for lookup + + final List alleles = vc.getAlleles(); + for ( int i = 0; i < alleles.size(); i++ ) { + alleleMap.put(alleles.get(i), String.valueOf(i)); + } + + return alleleMap; + } + // -------------------------------------------------------------------------------- // // implementation functions @@ -440,7 +448,7 @@ public class StandardVCFWriter extends IndexingVCFWriter { return result; } - private static List calcVCFGenotypeKeys(VariantContext vc) { + public static List calcVCFGenotypeKeys(VariantContext vc) { Set keys = new HashSet(); boolean sawGoodGT = false; diff --git a/public/java/src/org/broadinstitute/sting/utils/variantcontext/VariantContext.java b/public/java/src/org/broadinstitute/sting/utils/variantcontext/VariantContext.java index 3faad46e2..7af4f35ad 100755 --- a/public/java/src/org/broadinstitute/sting/utils/variantcontext/VariantContext.java +++ b/public/java/src/org/broadinstitute/sting/utils/variantcontext/VariantContext.java @@ -3,8 +3,11 @@ package org.broadinstitute.sting.utils.variantcontext; import org.broad.tribble.Feature; import org.broad.tribble.TribbleException; import org.broad.tribble.util.ParsingUtils; +import org.broadinstitute.sting.utils.codecs.vcf.VCFCompoundHeaderLine; import org.broadinstitute.sting.utils.codecs.vcf.VCFConstants; +import org.broadinstitute.sting.utils.codecs.vcf.VCFHeader; import org.broadinstitute.sting.utils.exceptions.ReviewedStingException; +import org.broadinstitute.sting.utils.exceptions.UserException; import java.util.*; @@ -250,11 +253,11 @@ public class VariantContext implements Feature { // to enable tribble integratio * @param validationToPerform set of validation steps to take */ protected VariantContext(String source, String ID, - String contig, long start, long stop, - Collection alleles, GenotypesContext genotypes, - double log10PError, Set filters, Map attributes, - Byte referenceBaseForIndel, - EnumSet validationToPerform ) { + String contig, long start, long stop, + Collection alleles, GenotypesContext genotypes, + double log10PError, Set filters, Map attributes, + Byte referenceBaseForIndel, + EnumSet validationToPerform ) { if ( contig == null ) { throw new IllegalArgumentException("Contig cannot be null"); } this.contig = contig; this.start = start; @@ -808,7 +811,7 @@ public class VariantContext implements Feature { // to enable tribble integratio return getCalledChrCount(new HashSet(0)); } - /** + /** * Returns the number of chromosomes carrying any allele in the genotypes (i.e., excluding NO_CALLS) * * @param sampleIds IDs of samples to take into account. If empty then all samples are included. @@ -819,8 +822,8 @@ public class VariantContext implements Feature { // to enable tribble integratio GenotypesContext genotypes = sampleIds.isEmpty() ? getGenotypes() : getGenotypes(sampleIds); for ( final Genotype g : genotypes) { - for ( final Allele a : g.getAlleles() ) - n += a.isNoCall() ? 0 : 1; + for ( final Allele a : g.getAlleles() ) + n += a.isNoCall() ? 0 : 1; } return n; @@ -848,7 +851,7 @@ public class VariantContext implements Feature { // to enable tribble integratio GenotypesContext genotypes = sampleIds.isEmpty() ? getGenotypes() : getGenotypes(sampleIds); for ( final Genotype g : genotypes ) { - n += g.getAlleles(a).size(); + n += g.getAlleles(a).size(); } return n; @@ -1015,7 +1018,7 @@ public class VariantContext implements Feature { // to enable tribble integratio public void validateChromosomeCounts() { if ( !hasGenotypes() ) return; - + // AN if ( hasAttribute(VCFConstants.ALLELE_NUMBER_KEY) ) { int reportedAN = Integer.valueOf(getAttribute(VCFConstants.ALLELE_NUMBER_KEY).toString()); @@ -1249,6 +1252,89 @@ public class VariantContext implements Feature { // to enable tribble integratio return alleleList; } + // --------------------------------------------------------------------------------------------------------- + // + // Fully decode + // + // --------------------------------------------------------------------------------------------------------- + public VariantContext fullyDecode(final VCFHeader header) { + final VariantContextBuilder builder = new VariantContextBuilder(this); + fullyDecodeInfo(builder, header); + fullyDecodeGenotypes(builder, header); + return builder.make(); + } + + private final void fullyDecodeInfo(final VariantContextBuilder builder, final VCFHeader header) { + builder.attributes(fullyDecodeAttributes(getAttributes(), header)); + } + + private final Map fullyDecodeAttributes(final Map attributes, final VCFHeader header) { + final Map newAttributes = new HashMap(attributes.size()); + + for ( final Map.Entry attr : attributes.entrySet() ) { + final String field = attr.getKey(); + final VCFCompoundHeaderLine format = getMetaDataForField(header, field); + final Object decoded = decodeValue(field, attr.getValue(), format); + if ( decoded != null ) + newAttributes.put(field, decoded); + } + + return newAttributes; + } + + private final Object decodeValue(final String field, final Object value, final VCFCompoundHeaderLine format) { + if ( value instanceof String ) { + final String string = (String)value; + if ( string.indexOf(",") != -1 ) { + final String[] splits = string.split(","); + final List values = new ArrayList(splits.length); + for ( int i = 0; i < splits.length; i++ ) + values.add(decodeOne(field, splits[i], format)); + return values; + } else { + return decodeOne(field, string, format); + } + } else { + return value; + } + } + + private final Object decodeOne(final String field, final String string, final VCFCompoundHeaderLine format) { + if ( string.equals(VCFConstants.MISSING_VALUE_v4) ) + return null; + else { + switch ( format.getType() ) { + case Character: return string; + case Flag: return Boolean.valueOf(string); + case String: return string; + case Integer: return Integer.valueOf(string); + case Float: return Float.valueOf(string); + default: throw new ReviewedStingException("Unexpected type for field" + field); + } + } + } + + private final void fullyDecodeGenotypes(final VariantContextBuilder builder, final VCFHeader header) { + final GenotypesContext gc = new GenotypesContext(); + for ( final Genotype g : getGenotypes() ) { + gc.add(fullyDecodeGenotypes(g, header)); + } + builder.genotypesNoValidation(gc); + } + + private final Genotype fullyDecodeGenotypes(final Genotype g, final VCFHeader header) { + final Map map = fullyDecodeAttributes(g.getAttributes(), header); + return new Genotype(g.getSampleName(), g.getAlleles(), g.getLog10PError(), g.getFilters(), map, g.isPhased()); + } + + public final static VCFCompoundHeaderLine getMetaDataForField(final VCFHeader header, final String field) { + VCFCompoundHeaderLine metaData = header.getFormatHeaderLine(field); + if ( metaData == null ) metaData = header.getInfoHeaderLine(field); + if ( metaData == null ) + throw new UserException.MalformedVCF("Fully decoding VariantContext requires header line for all fields, but none was found for " + field); + return metaData; + } + // --------------------------------------------------------------------------------------------------------- // // tribble integration routines -- not for public consumption @@ -1279,7 +1365,7 @@ public class VariantContext implements Feature { // to enable tribble integratio // optimization: for bi-allelic sites, just return the 1only alt allele if ( isBiallelic() ) return getAlternateAllele(0); - + Allele best = null; int maxAC1 = 0; for ( Allele a : getAlternateAlleles() ) {