From 549293b6f7b960e41e629177ea7610e4afb19596 Mon Sep 17 00:00:00 2001 From: Mark DePristo Date: Wed, 20 Jun 2012 20:00:17 -0400 Subject: [PATCH] Bugfixes towards final BCF2 implementation -- MLAC and MLAF in PoolCaller now use standard MLE_AC and MLE_AF -- VCFDiffableReader disables onTheFly fixing of VCF header fields so comparisons are easier when headers are changing -- Flag fields with FLAG_KEY=0 are parsed as though FLAG_KEY were entirely absent in AbstractVCFCodec to fix bug where FLAG_KEY=0 was being translated into FLAG_KEY in output VCF, making a false flag value a true one -- Fix the GT field value in VariantContextTestProviders so it isn't fixed 1000s of times during testing -- Keys whose value is null are put into the VariantContext info attributes now --- .../walkers/diffengine/VCFDiffableReader.java | 1 + .../utils/codecs/vcf/AbstractVCFCodec.java | 35 ++++++++++++++++--- .../codecs/vcf/VCFStandardHeaderLines.java | 4 +-- .../utils/variantcontext/VariantContext.java | 25 +++++++------ .../VariantContextTestProvider.java | 2 +- 5 files changed, 45 insertions(+), 22 deletions(-) diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/diffengine/VCFDiffableReader.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/diffengine/VCFDiffableReader.java index 7c03929ae..df5f5adf1 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/diffengine/VCFDiffableReader.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/diffengine/VCFDiffableReader.java @@ -65,6 +65,7 @@ public class VCFDiffableReader implements DiffableReader { br.close(); // must be read as state is stored in reader itself + AbstractVCFCodec.disableOnTheFlyModifications(); FeatureReader reader = AbstractFeatureReader.getFeatureReader(file.getAbsolutePath(), new VCFCodec(), false); VCFHeader header = (VCFHeader)reader.getHeader(); for ( VCFHeaderLine headerLine : header.getMetaData() ) { diff --git a/public/java/src/org/broadinstitute/sting/utils/codecs/vcf/AbstractVCFCodec.java b/public/java/src/org/broadinstitute/sting/utils/codecs/vcf/AbstractVCFCodec.java index baeec462d..77aed0e0b 100755 --- a/public/java/src/org/broadinstitute/sting/utils/codecs/vcf/AbstractVCFCodec.java +++ b/public/java/src/org/broadinstitute/sting/utils/codecs/vcf/AbstractVCFCodec.java @@ -22,6 +22,7 @@ import java.util.zip.GZIPInputStream; public abstract class AbstractVCFCodec extends AsciiFeatureCodec implements NameAwareCodec { public final static int MAX_ALLELE_SIZE_BEFORE_WARNING = (int)Math.pow(2, 20); + protected static boolean doOnTheFlyModifications = true; protected final static Logger log = Logger.getLogger(AbstractVCFCodec.class); protected final static int NUM_STANDARD_FIELDS = 8; // INFO is the 8th column @@ -58,6 +59,8 @@ public abstract class AbstractVCFCodec extends AsciiFeatureCodec protected Map stringCache = new HashMap(); + protected boolean warnedAboutNoEqualsForNonFlag = false; + protected AbstractVCFCodec() { super(VariantContext.class); } @@ -168,7 +171,8 @@ public abstract class AbstractVCFCodec extends AsciiFeatureCodec } this.header = new VCFHeader(metaData, sampleNames); - this.header = VCFStandardHeaderLines.repairStandardHeaderLines(this.header); + if ( doOnTheFlyModifications ) + this.header = VCFStandardHeaderLines.repairStandardHeaderLines(this.header); return this.header; } @@ -426,6 +430,11 @@ public abstract class AbstractVCFCodec extends AsciiFeatureCodec int infoValueSplitSize = ParsingUtils.split(str, infoValueArray, VCFConstants.INFO_FIELD_ARRAY_SEPARATOR_CHAR, false); if ( infoValueSplitSize == 1 ) { value = infoValueArray[0]; + final VCFInfoHeaderLine headerLine = header.getInfoHeaderLine(key); + if ( headerLine != null && headerLine.getType() == VCFHeaderLineType.Flag && value.equals("0") ) { + // deal with the case where a flag field has =0, such as DB=0, by skipping the add + continue; + } } else { ArrayList valueList = new ArrayList(infoValueSplitSize); for ( int j = 0; j < infoValueSplitSize; j++ ) @@ -435,10 +444,17 @@ public abstract class AbstractVCFCodec extends AsciiFeatureCodec } else { key = infoFieldArray[i]; final VCFInfoHeaderLine headerLine = header.getInfoHeaderLine(key); - if ( headerLine != null && headerLine.getType() != VCFHeaderLineType.Flag ) - generateException("Found info key " + key + " without a = value, but the header says the field is of type " - + headerLine.getType() + " but this construct is only value for FLAG type fields"); - value = true; + if ( headerLine != null && headerLine.getType() != VCFHeaderLineType.Flag ) { + if ( ! warnedAboutNoEqualsForNonFlag ) { + log.warn("Found info key " + key + " without a = value, but the header says the field is of type " + + headerLine.getType() + " but this construct is only value for FLAG type fields"); + warnedAboutNoEqualsForNonFlag = true; + } + + value = VCFConstants.MISSING_VALUE_v4; + } else { + value = true; + } } attributes.put(key, value); @@ -828,4 +844,13 @@ public abstract class AbstractVCFCodec extends AsciiFeatureCodec values[i] = Integer.valueOf(INT_DECODE_ARRAY[i]); return values; } + + /** + * Forces all VCFCodecs to not perform any on the fly modifications to the VCF header + * of VCF records. Useful primarily for raw comparisons such as when comparing + * raw VCF records + */ + public static final void disableOnTheFlyModifications() { + doOnTheFlyModifications = false; + } } diff --git a/public/java/src/org/broadinstitute/sting/utils/codecs/vcf/VCFStandardHeaderLines.java b/public/java/src/org/broadinstitute/sting/utils/codecs/vcf/VCFStandardHeaderLines.java index 38e3c44f1..84c60d9d1 100644 --- a/public/java/src/org/broadinstitute/sting/utils/codecs/vcf/VCFStandardHeaderLines.java +++ b/public/java/src/org/broadinstitute/sting/utils/codecs/vcf/VCFStandardHeaderLines.java @@ -243,9 +243,7 @@ public class VCFStandardHeaderLines { } @Requires("line != null") - @Ensures({ - "standards.containsKey(line.getID())", - "old(standards.values().size()) > standards.values().size()"}) + @Ensures({"standards.containsKey(line.getID())"}) public void add(final T line) { if ( standards.containsKey(line.getID()) ) throw new ReviewedStingException("Attempting to add multiple standard header lines for ID " + line.getID()); diff --git a/public/java/src/org/broadinstitute/sting/utils/variantcontext/VariantContext.java b/public/java/src/org/broadinstitute/sting/utils/variantcontext/VariantContext.java index 19cdf337f..8908782f1 100755 --- a/public/java/src/org/broadinstitute/sting/utils/variantcontext/VariantContext.java +++ b/public/java/src/org/broadinstitute/sting/utils/variantcontext/VariantContext.java @@ -1376,21 +1376,20 @@ public class VariantContext implements Feature { // to enable tribble integratio final VCFCompoundHeaderLine format = VariantContextUtils.getMetaDataForField(header, field); final Object decoded = decodeValue(field, attr.getValue(), format); - if ( decoded != null ) { - if ( ! allowMissingValuesComparedToHeader - && format.getCountType() != VCFHeaderLineCount.UNBOUNDED - && format.getType() != VCFHeaderLineType.Flag ) { // we expect exactly the right number of elements - final int obsSize = decoded instanceof List ? ((List) decoded).size() : 1; - final int expSize = format.getCount(this.getNAlleles() - 1); - if ( obsSize != expSize ) { - throw new UserException.MalformedVCFHeader("Discordant field size detected for field " + - field + " at " + getChr() + ":" + getStart() + ". Field had " + obsSize + " values " + - "but the header says this should have " + expSize + " values based on header record " + - format); - } + if ( decoded != null && + ! allowMissingValuesComparedToHeader + && format.getCountType() != VCFHeaderLineCount.UNBOUNDED + && format.getType() != VCFHeaderLineType.Flag ) { // we expect exactly the right number of elements + final int obsSize = decoded instanceof List ? ((List) decoded).size() : 1; + final int expSize = format.getCount(this.getNAlleles() - 1); + if ( obsSize != expSize ) { + throw new UserException.MalformedVCFHeader("Discordant field size detected for field " + + field + " at " + getChr() + ":" + getStart() + ". Field had " + obsSize + " values " + + "but the header says this should have " + expSize + " values based on header record " + + format); } - newAttributes.put(field, decoded); } + newAttributes.put(field, decoded); } return newAttributes; diff --git a/public/java/test/org/broadinstitute/sting/utils/variantcontext/VariantContextTestProvider.java b/public/java/test/org/broadinstitute/sting/utils/variantcontext/VariantContextTestProvider.java index 90bd6b979..ae6c2c503 100644 --- a/public/java/test/org/broadinstitute/sting/utils/variantcontext/VariantContextTestProvider.java +++ b/public/java/test/org/broadinstitute/sting/utils/variantcontext/VariantContextTestProvider.java @@ -177,7 +177,7 @@ public class VariantContextTestProvider { addHeaderLine(metaData, "STRING20", 20, VCFHeaderLineType.String); addHeaderLine(metaData, "VAR.INFO.STRING", VCFHeaderLineCount.UNBOUNDED, VCFHeaderLineType.String); - addHeaderLine(metaData, "GT", 1, VCFHeaderLineType.Integer); + addHeaderLine(metaData, "GT", 1, VCFHeaderLineType.String); addHeaderLine(metaData, "GQ", 1, VCFHeaderLineType.Integer); addHeaderLine(metaData, "PL", VCFHeaderLineCount.G, VCFHeaderLineType.Integer); addHeaderLine(metaData, "GS", 2, VCFHeaderLineType.String);