fixing VCF header format and info fields so that they propery emit the unbounded count value correctly for vcf4 or vcf3. Eric we should update the vcf4 spec page to indicate format fields are allowed to use the unbounded count as well (if this is true).
git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@3707 348d0f76-0448-11de-a6fe-93d51630548a
This commit is contained in:
parent
61c07c6f90
commit
3093a20a55
|
|
@ -39,6 +39,11 @@ public class VCFFormatHeaderLine extends VCFHeaderLine {
|
|||
private String mDescription;
|
||||
private FORMAT_TYPE mType;
|
||||
|
||||
// info line numerical values are allowed to be unbounded (or unknown), which is
|
||||
// marked with a dot (.)
|
||||
public static int UNBOUNDED = -1;
|
||||
public static String UNBOUNDED_ENCODING_VCF4 = ".";
|
||||
public static String UNBOUNDED_ENCODING_VCF3 = "-1";
|
||||
|
||||
/**
|
||||
* create a VCF format header line
|
||||
|
|
@ -67,7 +72,9 @@ public class VCFFormatHeaderLine extends VCFHeaderLine {
|
|||
super("FORMAT", "", version);
|
||||
Map<String,String> mapping = VCFHeaderLineTranslator.parseLine(version,line, Arrays.asList("ID","Number","Type","Description"));
|
||||
mName = mapping.get("ID");
|
||||
mCount = Integer.valueOf(mapping.get("Number"));
|
||||
mCount = version == VCFHeaderVersion.VCF4_0 ?
|
||||
mapping.get("Number").equals(UNBOUNDED_ENCODING_VCF4) ? UNBOUNDED : Integer.valueOf(mapping.get("Number")) :
|
||||
mapping.get("Number").equals(UNBOUNDED_ENCODING_VCF3) ? UNBOUNDED : Integer.valueOf(mapping.get("Number"));
|
||||
mType = FORMAT_TYPE.valueOf(mapping.get("Type"));
|
||||
mDescription = mapping.get("Description");
|
||||
}
|
||||
|
|
@ -78,7 +85,7 @@ public class VCFFormatHeaderLine extends VCFHeaderLine {
|
|||
else if (mVersion == VCFHeaderVersion.VCF4_0) {
|
||||
Map<String,Object> map = new LinkedHashMap<String,Object>();
|
||||
map.put("ID",mName);
|
||||
map.put("Number",mCount);
|
||||
map.put("Number",mCount == UNBOUNDED ? (mVersion == VCFHeaderVersion.VCF4_0 ? UNBOUNDED_ENCODING_VCF4 : UNBOUNDED_ENCODING_VCF3) : mCount);
|
||||
map.put("Type",mType);
|
||||
map.put("Description",mDescription);
|
||||
return "FORMAT=" + VCFHeaderLineTranslator.toValue(this.mVersion,map);
|
||||
|
|
|
|||
|
|
@ -3,11 +3,7 @@ package org.broad.tribble.vcf;
|
|||
import java.util.*;
|
||||
|
||||
/**
|
||||
* Created by IntelliJ IDEA.
|
||||
* User: aaron
|
||||
* Date: Jun 17, 2010
|
||||
* Time: 12:28:46 PM
|
||||
* To change this template use File | Settings | File Templates.
|
||||
* A class for translating between vcf header versions
|
||||
*/
|
||||
public class VCFHeaderLineTranslator {
|
||||
private static Map<VCFHeaderVersion,VCFLineParser> mapping;
|
||||
|
|
|
|||
|
|
@ -45,8 +45,9 @@ public class VCFInfoHeaderLine extends VCFHeaderLine {
|
|||
|
||||
// info line numerical values are allowed to be unbounded (or unknown), which is
|
||||
// marked with a dot (.)
|
||||
public static int UNBOUNDED = Integer.MIN_VALUE;
|
||||
public static String UNBOUNDED_ENCODING = ".";
|
||||
public static int UNBOUNDED = -1;
|
||||
public static String UNBOUNDED_ENCODING_VCF4 = ".";
|
||||
public static String UNBOUNDED_ENCODING_VCF3 = "-1";
|
||||
|
||||
/**
|
||||
* create a VCF info header line
|
||||
|
|
@ -74,7 +75,9 @@ public class VCFInfoHeaderLine extends VCFHeaderLine {
|
|||
super("INFO", "", version);
|
||||
Map<String,String> mapping = VCFHeaderLineTranslator.parseLine(version,line, Arrays.asList("ID","Number","Type","Description"));
|
||||
mName = mapping.get("ID");
|
||||
mCount = mapping.get("Number").equals(UNBOUNDED_ENCODING) ? UNBOUNDED : Integer.valueOf(mapping.get("Number"));
|
||||
mCount = version == VCFHeaderVersion.VCF4_0 ?
|
||||
mapping.get("Number").equals(UNBOUNDED_ENCODING_VCF4) ? UNBOUNDED : Integer.valueOf(mapping.get("Number")) :
|
||||
mapping.get("Number").equals(UNBOUNDED_ENCODING_VCF3) ? UNBOUNDED : Integer.valueOf(mapping.get("Number"));
|
||||
mType = INFO_TYPE.valueOf(mapping.get("Type"));
|
||||
mDescription = mapping.get("Description");
|
||||
}
|
||||
|
|
@ -85,7 +88,7 @@ public class VCFInfoHeaderLine extends VCFHeaderLine {
|
|||
else if (mVersion == VCFHeaderVersion.VCF4_0) {
|
||||
Map<String,Object> map = new LinkedHashMap<String,Object>();
|
||||
map.put("ID",mName);
|
||||
map.put("Number",mCount == UNBOUNDED ? UNBOUNDED_ENCODING : mCount);
|
||||
map.put("Number",mCount == UNBOUNDED ? (mVersion == VCFHeaderVersion.VCF4_0 ? UNBOUNDED_ENCODING_VCF4 : UNBOUNDED_ENCODING_VCF3) : mCount);
|
||||
map.put("Type",mType);
|
||||
map.put("Description",mDescription);
|
||||
return "INFO=" + VCFHeaderLineTranslator.toValue(this.mVersion,map);
|
||||
|
|
|
|||
|
|
@ -34,6 +34,17 @@ public class VCFHeaderUnitTest extends BaseTest {
|
|||
checkMD5ofHeaderFile(codec, "5873e029bd50d6836b86438bccd15456");
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testVCF4ToVCF3Alternate() {
|
||||
VCF4Codec codec = new VCF4Codec();
|
||||
List<String> headerFields = new ArrayList<String>();
|
||||
for (String str : VCF3_3headerStrings_with_negitiveOne)
|
||||
headerFields.add(str);
|
||||
Assert.assertEquals(17,codec.createHeader(headerFields,"#CHROM\tPOS\tID\tREF\tALT\tQUAL\tFILTER\tINFO"));
|
||||
codec.getHeader(VCFHeader.class).setVersion(VCFHeaderVersion.VCF3_3);
|
||||
checkMD5ofHeaderFile(codec, "e750fd0919704d10813dfe57ac1a0df3");
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testVCF4ToVCF4() {
|
||||
VCF4Codec codec = new VCF4Codec();
|
||||
|
|
@ -44,6 +55,16 @@ public class VCFHeaderUnitTest extends BaseTest {
|
|||
checkMD5ofHeaderFile(codec, "4648aa1169257e0a8a9d30131adb5f35");
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testVCF4ToVCF4_alternate() {
|
||||
VCF4Codec codec = new VCF4Codec();
|
||||
List<String> headerFields = new ArrayList<String>();
|
||||
for (String str : VCF3_3headerStrings_with_negitiveOne)
|
||||
headerFields.add(str);
|
||||
Assert.assertEquals(17, codec.createHeader(headerFields, "#CHROM\tPOS\tID\tREF\tALT\tQUAL\tFILTER\tINFO"));
|
||||
checkMD5ofHeaderFile(codec, "ad8c4cf85e868b0261ab49ee2c613088");
|
||||
}
|
||||
|
||||
private void checkMD5ofHeaderFile(VCF4Codec codec, String md5sum) {
|
||||
File myTempFile = null;
|
||||
PrintWriter pw = null;
|
||||
|
|
@ -81,4 +102,23 @@ public class VCFHeaderUnitTest extends BaseTest {
|
|||
"##FORMAT=<ID=HQ, Number=2, Type=Integer, Description=\"Haplotype quality\">",
|
||||
"##FORMAT=<ID=GQ, Number=1, Type=Integer, Description=\"Genotype quality\">",
|
||||
};
|
||||
|
||||
public String[] VCF3_3headerStrings_with_negitiveOne = {
|
||||
"##fileformat=VCFv4.0",
|
||||
"##filedate=2010-06-21",
|
||||
"##reference=NCBI36",
|
||||
"##INFO=<ID=GC, Number=0, Type=Flag, Description=\"Overlap with Gencode CCDS coding sequence\">",
|
||||
"##INFO=<ID=YY, Number=., Type=Integer, Description=\"Some weird value that has lots of parameters\">",
|
||||
"##INFO=<ID=AF, Number=1, Type=Float, Description=\"Dindel estimated population allele frequency\">",
|
||||
"##INFO=<ID=CA, Number=1, Type=String, Description=\"Pilot 1 callability mask\">",
|
||||
"##INFO=<ID=HP, Number=1, Type=Integer, Description=\"Reference homopolymer tract length\">",
|
||||
"##INFO=<ID=NS, Number=1, Type=Integer, Description=\"Number of samples with data\">",
|
||||
"##INFO=<ID=DB, Number=0, Type=Flag, Description=\"dbSNP membership build 129 - type match and indel sequence length match within 25 bp\">",
|
||||
"##INFO=<ID=NR, Number=1, Type=Integer, Description=\"Number of reads covering non-ref variant on reverse strand\">",
|
||||
"##INFO=<ID=NF, Number=1, Type=Integer, Description=\"Number of reads covering non-ref variant on forward strand\">",
|
||||
"##FILTER=<ID=NoQCALL, Description=\"Variant called by Dindel but not confirmed by QCALL\">",
|
||||
"##FORMAT=<ID=GT, Number=1, Type=String, Description=\"Genotype\">",
|
||||
"##FORMAT=<ID=HQ, Number=2, Type=Integer, Description=\"Haplotype quality\">",
|
||||
"##FORMAT=<ID=TT, Number=., Type=Integer, Description=\"Lots of TTs\">",
|
||||
};
|
||||
}
|
||||
|
|
|
|||
Loading…
Reference in New Issue