From 3093a20a5544845c97e4917d492878010778b5cd Mon Sep 17 00:00:00 2001 From: aaron Date: Thu, 1 Jul 2010 22:02:16 +0000 Subject: [PATCH] fixing VCF header format and info fields so that they propery emit the unbounded count value correctly for vcf4 or vcf3. Eric we should update the vcf4 spec page to indicate format fields are allowed to use the unbounded count as well (if this is true). git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@3707 348d0f76-0448-11de-a6fe-93d51630548a --- .../tribble/vcf/VCFFormatHeaderLine.java | 11 ++++- .../tribble/vcf/VCFHeaderLineTranslator.java | 6 +-- .../broad/tribble/vcf/VCFInfoHeaderLine.java | 11 +++-- .../utils/genotype/vcf/VCFHeaderUnitTest.java | 40 +++++++++++++++++++ 4 files changed, 57 insertions(+), 11 deletions(-) diff --git a/java/src/org/broad/tribble/vcf/VCFFormatHeaderLine.java b/java/src/org/broad/tribble/vcf/VCFFormatHeaderLine.java index ce8418d8e..989ed2c71 100755 --- a/java/src/org/broad/tribble/vcf/VCFFormatHeaderLine.java +++ b/java/src/org/broad/tribble/vcf/VCFFormatHeaderLine.java @@ -39,6 +39,11 @@ public class VCFFormatHeaderLine extends VCFHeaderLine { private String mDescription; private FORMAT_TYPE mType; + // info line numerical values are allowed to be unbounded (or unknown), which is + // marked with a dot (.) + public static int UNBOUNDED = -1; + public static String UNBOUNDED_ENCODING_VCF4 = "."; + public static String UNBOUNDED_ENCODING_VCF3 = "-1"; /** * create a VCF format header line @@ -67,7 +72,9 @@ public class VCFFormatHeaderLine extends VCFHeaderLine { super("FORMAT", "", version); Map mapping = VCFHeaderLineTranslator.parseLine(version,line, Arrays.asList("ID","Number","Type","Description")); mName = mapping.get("ID"); - mCount = Integer.valueOf(mapping.get("Number")); + mCount = version == VCFHeaderVersion.VCF4_0 ? + mapping.get("Number").equals(UNBOUNDED_ENCODING_VCF4) ? UNBOUNDED : Integer.valueOf(mapping.get("Number")) : + mapping.get("Number").equals(UNBOUNDED_ENCODING_VCF3) ? UNBOUNDED : Integer.valueOf(mapping.get("Number")); mType = FORMAT_TYPE.valueOf(mapping.get("Type")); mDescription = mapping.get("Description"); } @@ -78,7 +85,7 @@ public class VCFFormatHeaderLine extends VCFHeaderLine { else if (mVersion == VCFHeaderVersion.VCF4_0) { Map map = new LinkedHashMap(); map.put("ID",mName); - map.put("Number",mCount); + map.put("Number",mCount == UNBOUNDED ? (mVersion == VCFHeaderVersion.VCF4_0 ? UNBOUNDED_ENCODING_VCF4 : UNBOUNDED_ENCODING_VCF3) : mCount); map.put("Type",mType); map.put("Description",mDescription); return "FORMAT=" + VCFHeaderLineTranslator.toValue(this.mVersion,map); diff --git a/java/src/org/broad/tribble/vcf/VCFHeaderLineTranslator.java b/java/src/org/broad/tribble/vcf/VCFHeaderLineTranslator.java index 84b5ca1c9..9aa7ae1dc 100644 --- a/java/src/org/broad/tribble/vcf/VCFHeaderLineTranslator.java +++ b/java/src/org/broad/tribble/vcf/VCFHeaderLineTranslator.java @@ -3,11 +3,7 @@ package org.broad.tribble.vcf; import java.util.*; /** - * Created by IntelliJ IDEA. - * User: aaron - * Date: Jun 17, 2010 - * Time: 12:28:46 PM - * To change this template use File | Settings | File Templates. + * A class for translating between vcf header versions */ public class VCFHeaderLineTranslator { private static Map mapping; diff --git a/java/src/org/broad/tribble/vcf/VCFInfoHeaderLine.java b/java/src/org/broad/tribble/vcf/VCFInfoHeaderLine.java index 4d8d2c400..8a460879d 100755 --- a/java/src/org/broad/tribble/vcf/VCFInfoHeaderLine.java +++ b/java/src/org/broad/tribble/vcf/VCFInfoHeaderLine.java @@ -45,8 +45,9 @@ public class VCFInfoHeaderLine extends VCFHeaderLine { // info line numerical values are allowed to be unbounded (or unknown), which is // marked with a dot (.) - public static int UNBOUNDED = Integer.MIN_VALUE; - public static String UNBOUNDED_ENCODING = "."; + public static int UNBOUNDED = -1; + public static String UNBOUNDED_ENCODING_VCF4 = "."; + public static String UNBOUNDED_ENCODING_VCF3 = "-1"; /** * create a VCF info header line @@ -74,7 +75,9 @@ public class VCFInfoHeaderLine extends VCFHeaderLine { super("INFO", "", version); Map mapping = VCFHeaderLineTranslator.parseLine(version,line, Arrays.asList("ID","Number","Type","Description")); mName = mapping.get("ID"); - mCount = mapping.get("Number").equals(UNBOUNDED_ENCODING) ? UNBOUNDED : Integer.valueOf(mapping.get("Number")); + mCount = version == VCFHeaderVersion.VCF4_0 ? + mapping.get("Number").equals(UNBOUNDED_ENCODING_VCF4) ? UNBOUNDED : Integer.valueOf(mapping.get("Number")) : + mapping.get("Number").equals(UNBOUNDED_ENCODING_VCF3) ? UNBOUNDED : Integer.valueOf(mapping.get("Number")); mType = INFO_TYPE.valueOf(mapping.get("Type")); mDescription = mapping.get("Description"); } @@ -85,7 +88,7 @@ public class VCFInfoHeaderLine extends VCFHeaderLine { else if (mVersion == VCFHeaderVersion.VCF4_0) { Map map = new LinkedHashMap(); map.put("ID",mName); - map.put("Number",mCount == UNBOUNDED ? UNBOUNDED_ENCODING : mCount); + map.put("Number",mCount == UNBOUNDED ? (mVersion == VCFHeaderVersion.VCF4_0 ? UNBOUNDED_ENCODING_VCF4 : UNBOUNDED_ENCODING_VCF3) : mCount); map.put("Type",mType); map.put("Description",mDescription); return "INFO=" + VCFHeaderLineTranslator.toValue(this.mVersion,map); diff --git a/java/test/org/broadinstitute/sting/utils/genotype/vcf/VCFHeaderUnitTest.java b/java/test/org/broadinstitute/sting/utils/genotype/vcf/VCFHeaderUnitTest.java index e21697640..d4106b78e 100644 --- a/java/test/org/broadinstitute/sting/utils/genotype/vcf/VCFHeaderUnitTest.java +++ b/java/test/org/broadinstitute/sting/utils/genotype/vcf/VCFHeaderUnitTest.java @@ -34,6 +34,17 @@ public class VCFHeaderUnitTest extends BaseTest { checkMD5ofHeaderFile(codec, "5873e029bd50d6836b86438bccd15456"); } + @Test + public void testVCF4ToVCF3Alternate() { + VCF4Codec codec = new VCF4Codec(); + List headerFields = new ArrayList(); + for (String str : VCF3_3headerStrings_with_negitiveOne) + headerFields.add(str); + Assert.assertEquals(17,codec.createHeader(headerFields,"#CHROM\tPOS\tID\tREF\tALT\tQUAL\tFILTER\tINFO")); + codec.getHeader(VCFHeader.class).setVersion(VCFHeaderVersion.VCF3_3); + checkMD5ofHeaderFile(codec, "e750fd0919704d10813dfe57ac1a0df3"); + } + @Test public void testVCF4ToVCF4() { VCF4Codec codec = new VCF4Codec(); @@ -44,6 +55,16 @@ public class VCFHeaderUnitTest extends BaseTest { checkMD5ofHeaderFile(codec, "4648aa1169257e0a8a9d30131adb5f35"); } + @Test + public void testVCF4ToVCF4_alternate() { + VCF4Codec codec = new VCF4Codec(); + List headerFields = new ArrayList(); + for (String str : VCF3_3headerStrings_with_negitiveOne) + headerFields.add(str); + Assert.assertEquals(17, codec.createHeader(headerFields, "#CHROM\tPOS\tID\tREF\tALT\tQUAL\tFILTER\tINFO")); + checkMD5ofHeaderFile(codec, "ad8c4cf85e868b0261ab49ee2c613088"); + } + private void checkMD5ofHeaderFile(VCF4Codec codec, String md5sum) { File myTempFile = null; PrintWriter pw = null; @@ -81,4 +102,23 @@ public class VCFHeaderUnitTest extends BaseTest { "##FORMAT=", "##FORMAT=", }; + + public String[] VCF3_3headerStrings_with_negitiveOne = { + "##fileformat=VCFv4.0", + "##filedate=2010-06-21", + "##reference=NCBI36", + "##INFO=", + "##INFO=", + "##INFO=", + "##INFO=", + "##INFO=", + "##INFO=", + "##INFO=", + "##INFO=", + "##INFO=", + "##FILTER=", + "##FORMAT=", + "##FORMAT=", + "##FORMAT=", + }; }