From 69ee4d0454d124dcfd29adb24e88e02a071851f0 Mon Sep 17 00:00:00 2001 From: Mark DePristo Date: Tue, 22 May 2012 07:14:46 -0400 Subject: [PATCH] Moved getMetaDataForField to VariantContextUtils --- .../sting/utils/codecs/bcf2/BCF2Codec.java | 7 ++----- .../utils/variantcontext/VariantContext.java | 15 ++------------- .../variantcontext/VariantContextUtils.java | 19 +++++++++++++++++++ .../variantcontext/writer/BCF2Writer.java | 4 ++-- .../org/broadinstitute/sting/WalkerTest.java | 11 +++++++++-- 5 files changed, 34 insertions(+), 22 deletions(-) diff --git a/public/java/src/org/broadinstitute/sting/utils/codecs/bcf2/BCF2Codec.java b/public/java/src/org/broadinstitute/sting/utils/codecs/bcf2/BCF2Codec.java index 7bb0e16c5..083f9ba72 100644 --- a/public/java/src/org/broadinstitute/sting/utils/codecs/bcf2/BCF2Codec.java +++ b/public/java/src/org/broadinstitute/sting/utils/codecs/bcf2/BCF2Codec.java @@ -37,10 +37,7 @@ import org.broadinstitute.sting.utils.Utils; import org.broadinstitute.sting.utils.codecs.vcf.*; import org.broadinstitute.sting.utils.exceptions.ReviewedStingException; import org.broadinstitute.sting.utils.exceptions.UserException; -import org.broadinstitute.sting.utils.variantcontext.Allele; -import org.broadinstitute.sting.utils.variantcontext.Genotype; -import org.broadinstitute.sting.utils.variantcontext.VariantContext; -import org.broadinstitute.sting.utils.variantcontext.VariantContextBuilder; +import org.broadinstitute.sting.utils.variantcontext.*; import java.io.ByteArrayInputStream; import java.io.FileInputStream; @@ -292,7 +289,7 @@ public class BCF2Codec implements FeatureCodec, ReferenceDepende for ( int i = 0; i < numInfoFields; i++ ) { final String key = getDictionaryString(); Object value = decoder.decodeTypedValue(); - final VCFCompoundHeaderLine metaData = VariantContext.getMetaDataForField(header, key); + final VCFCompoundHeaderLine metaData = VariantContextUtils.getMetaDataForField(header, key); if ( metaData.getType() == VCFHeaderLineType.Flag ) value = true; // special case for flags infoFieldEntries.put(key, value); } diff --git a/public/java/src/org/broadinstitute/sting/utils/variantcontext/VariantContext.java b/public/java/src/org/broadinstitute/sting/utils/variantcontext/VariantContext.java index 9c1baef69..0ea1325da 100755 --- a/public/java/src/org/broadinstitute/sting/utils/variantcontext/VariantContext.java +++ b/public/java/src/org/broadinstitute/sting/utils/variantcontext/VariantContext.java @@ -3,11 +3,8 @@ package org.broadinstitute.sting.utils.variantcontext; import org.broad.tribble.Feature; import org.broad.tribble.TribbleException; import org.broad.tribble.util.ParsingUtils; -import org.broadinstitute.sting.utils.codecs.vcf.VCFCompoundHeaderLine; -import org.broadinstitute.sting.utils.codecs.vcf.VCFConstants; -import org.broadinstitute.sting.utils.codecs.vcf.VCFHeader; +import org.broadinstitute.sting.utils.codecs.vcf.*; import org.broadinstitute.sting.utils.exceptions.ReviewedStingException; -import org.broadinstitute.sting.utils.exceptions.UserException; import java.util.*; @@ -1295,7 +1292,7 @@ public class VariantContext implements Feature { // to enable tribble integratio for ( final Map.Entry attr : attributes.entrySet() ) { final String field = attr.getKey(); - final VCFCompoundHeaderLine format = getMetaDataForField(header, field); + final VCFCompoundHeaderLine format = VariantContextUtils.getMetaDataForField(header, field); final Object decoded = decodeValue(field, attr.getValue(), format); if ( decoded != null ) @@ -1359,14 +1356,6 @@ public class VariantContext implements Feature { // to enable tribble integratio return new Genotype(g.getSampleName(), g.getAlleles(), g.getLog10PError(), g.getFilters(), map, g.isPhased()); } - public final static VCFCompoundHeaderLine getMetaDataForField(final VCFHeader header, final String field) { - VCFCompoundHeaderLine metaData = header.getFormatHeaderLine(field); - if ( metaData == null ) metaData = header.getInfoHeaderLine(field); - if ( metaData == null ) - throw new UserException.MalformedVCF("Fully decoding VariantContext requires header line for all fields, but none was found for " + field); - return metaData; - } - // --------------------------------------------------------------------------------------------------------- // // tribble integration routines -- not for public consumption diff --git a/public/java/src/org/broadinstitute/sting/utils/variantcontext/VariantContextUtils.java b/public/java/src/org/broadinstitute/sting/utils/variantcontext/VariantContextUtils.java index b720f8558..ee4bdaa99 100755 --- a/public/java/src/org/broadinstitute/sting/utils/variantcontext/VariantContextUtils.java +++ b/public/java/src/org/broadinstitute/sting/utils/variantcontext/VariantContextUtils.java @@ -49,6 +49,7 @@ public class VariantContextUtils { final public static JexlEngine engine = new JexlEngine(); public static final int DEFAULT_PLOIDY = 2; + private final static boolean ASSUME_MISSING_FIELDS_ARE_STRINGS = true; static { engine.setSilent(false); // will throw errors now for selects that don't evaluate properly @@ -268,6 +269,24 @@ public class VariantContextUtils { } + private static Set MISSING_KEYS_WARNED_ABOUT = new HashSet(); + public final static VCFCompoundHeaderLine getMetaDataForField(final VCFHeader header, final String field) { + VCFCompoundHeaderLine metaData = header.getFormatHeaderLine(field); + if ( metaData == null ) metaData = header.getInfoHeaderLine(field); + if ( metaData == null ) { + if ( ASSUME_MISSING_FIELDS_ARE_STRINGS ) { + if ( ! MISSING_KEYS_WARNED_ABOUT.contains(field) ) { + MISSING_KEYS_WARNED_ABOUT.add(field); + logger.warn("Field " + field + " missing from VCF header, assuming it is an unbounded string type"); + } + return new VCFInfoHeaderLine(field, VCFHeaderLineCount.UNBOUNDED, VCFHeaderLineType.String, "Auto-generated string header for " + field); + } + else + throw new UserException.MalformedVCF("Fully decoding VariantContext requires header line for all fields, but none was found for " + field); + } + return metaData; + } + /** * A simple but common wrapper for matching VariantContext objects using JEXL expressions */ diff --git a/public/java/src/org/broadinstitute/sting/utils/variantcontext/writer/BCF2Writer.java b/public/java/src/org/broadinstitute/sting/utils/variantcontext/writer/BCF2Writer.java index 1bfcfc0b5..c7e55bacb 100644 --- a/public/java/src/org/broadinstitute/sting/utils/variantcontext/writer/BCF2Writer.java +++ b/public/java/src/org/broadinstitute/sting/utils/variantcontext/writer/BCF2Writer.java @@ -226,7 +226,7 @@ class BCF2Writer extends IndexingVariantContextWriter { } private final int getNGenotypeFieldValues(final String field, final VariantContext vc) { - final VCFCompoundHeaderLine metaData = VariantContext.getMetaDataForField(header, field); + final VCFCompoundHeaderLine metaData = VariantContextUtils.getMetaDataForField(header, field); assert metaData != null; // field is supposed to be in header int nFields = metaData.getCount(vc.getNAlleles() - 1); @@ -298,7 +298,7 @@ class BCF2Writer extends IndexingVariantContextWriter { // TODO -- need to generalize so we can enable vectors of compressed genotype ints // TODO -- no sense in allocating these over and over private final VCFToBCFEncoding prepFieldValueForEncoding(final String field, final Object value) { - final VCFCompoundHeaderLine metaData = VariantContext.getMetaDataForField(header, field); + final VCFCompoundHeaderLine metaData = VariantContextUtils.getMetaDataForField(header, field); final boolean isList = value instanceof List; final Object toType = isList ? ((List)value).get(0) : value; diff --git a/public/java/test/org/broadinstitute/sting/WalkerTest.java b/public/java/test/org/broadinstitute/sting/WalkerTest.java index d02d0a7b3..bde635baa 100755 --- a/public/java/test/org/broadinstitute/sting/WalkerTest.java +++ b/public/java/test/org/broadinstitute/sting/WalkerTest.java @@ -47,7 +47,7 @@ import java.io.IOException; import java.util.*; public class WalkerTest extends BaseTest { - private static final boolean GENERATE_SHADOW_BCF = false; + private static final boolean GENERATE_SHADOW_BCF = true; private static final boolean ENABLE_PHONE_HOME_FOR_TESTS = false; private static final boolean ENABLE_ON_THE_FLY_CHECK_FOR_VCF_INDEX = false; @@ -144,6 +144,7 @@ public class WalkerTest extends BaseTest { List exts = null; Class expectedException = null; boolean includeImplicitArgs = true; + boolean includeShadowBCF = true; // the default output path for the integration test private File outputFileLocation = null; @@ -183,13 +184,19 @@ public class WalkerTest extends BaseTest { args = args + (ENABLE_PHONE_HOME_FOR_TESTS ? String.format(" -et %s ", GATKRunReport.PhoneHomeOption.STANDARD) : String.format(" -et %s -K %s ", GATKRunReport.PhoneHomeOption.NO_ET, gatkKeyFile)); - if ( GENERATE_SHADOW_BCF ) + if ( includeShadowBCF && GENERATE_SHADOW_BCF ) args = args + " --generateShadowBCF "; } return args; } + /** + * In the case where the input VCF files are malformed and cannot be fixed + * this function tells the engine to not try to generate a shadow BCF + * which will ultimately blow up... + */ + public void disableShadowBCF() { this.includeShadowBCF = false; } public void setOutputFileLocation(File outputFileLocation) { this.outputFileLocation = outputFileLocation; }