part two: todo's in combine variants, fixes for InferredGeneticContext, and some other tests and clean-up.

git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@3721 348d0f76-0448-11de-a6fe-93d51630548a
This commit is contained in:
aaron 2010-07-05 21:07:53 +00:00
parent 36edc60ccc
commit 86031f4034
7 changed files with 125 additions and 30 deletions

View File

@ -7,7 +7,7 @@ import java.util.Map;
/**
* a base class for compound header lines, which include info lines and format lines (so far)
*/
public abstract class VCFCompoundHeaderLine extends VCFHeaderLine {
public abstract class VCFCompoundHeaderLine extends VCFHeaderLine implements VCFNamedHeaderLine {
public enum SupportedHeaderLineType {
INFO(true), FORMAT(false);

View File

@ -9,7 +9,7 @@ import java.util.Map;
* @author ebanks
* A class representing a key=value entry for FILTER fields in the VCF header
*/
public class VCFFilterHeaderLine extends VCFHeaderLine {
public class VCFFilterHeaderLine extends VCFHeaderLine implements VCFNamedHeaderLine {
private String name;
private String description;

View File

@ -9,7 +9,7 @@ public enum VCFHeaderLineType {
public Object convert(String value, VCFCompoundHeaderLine.SupportedHeaderLineType hlt) {
switch (this) {
case Integer:
return java.lang.Integer.valueOf(value); // the java.lang is needed since we use Integer as a enum name
return Math.round(java.lang.Float.valueOf(value)); // this seems like we're allowing off spec values, but use it for now
case Float:
return java.lang.Float.valueOf(value);
case String:

View File

@ -213,7 +213,7 @@ final class InferredGeneticContext {
// return selected;
// }
public String getAttributeAsString(String key) { return (String)getAttribute(key); }
public String getAttributeAsString(String key) { return (String.valueOf(getAttribute(key))); }
public int getAttributeAsInt(String key) { return (Integer)getAttribute(key); }
public double getAttributeAsDouble(String key) { return (Double)getAttribute(key); }

View File

@ -124,12 +124,12 @@ public class CombineVariants extends RodWalker<Integer, Integer> {
if ( ! lineName.equals(otherName) )
throw new IllegalStateException("Incompatible header types: " + line + " " + other );
} else {
//String lineName = ((VCFInfoHeaderLine) line).getName();
//String otherName = ((VCFFilterHeaderLine) other).getName();
String lineName = ((VCFCompoundHeaderLine) line).getName();
String otherName = ((VCFCompoundHeaderLine) other).getName();
// todo -- aaron, please complete these comparisons when INFO and Format header lines are made into one
//if ( (lineType != null && ! lineType.equals(otherType)) || (lineCount != null && !lineCounts.equals(otherCount)))
// throw new IllegalStateException("Incompatible header types: " + line + " " + other );
// if the names are the same, but the values are different, we need to quit
if (lineName.equals(otherName) && !line.equals(other))
throw new IllegalStateException("Incompatible header types, collision between these two types: " + line + " " + other );
}
} else {
map.put(key, line);

View File

@ -0,0 +1,102 @@
package org.broadinstitute.sting.gatk.walkers.variantutils;
import org.broad.tribble.vcf.VCFHeader;
import org.broad.tribble.vcf.VCFHeaderLine;
import org.broadinstitute.sting.gatk.refdata.features.vcf4.VCF4Codec;
import org.broadinstitute.sting.utils.genotype.vcf.VCFHeaderUnitTest;
import org.junit.Assert;
import org.junit.Test;
import java.util.ArrayList;
import java.util.List;
import java.util.Set;
/**
* test out pieces of the combine variants code
*/
public class CombineVariantsUnitTest {
// this header is a small subset of the header in VCFHeaderUnitTest: VCF4headerStrings
public static String[] VCF4headerStringsSmallSubset = {
"##fileformat=VCFv4.0",
"##filedate=2010-06-21",
"##reference=NCBI36",
"##INFO=<ID=GC, Number=0, Type=Flag, Description=\"Overlap with Gencode CCDS coding sequence\">",
"##INFO=<ID=DP, Number=1, Type=Integer, Description=\"Total number of reads in haplotype window\">",
"##INFO=<ID=AF, Number=1, Type=Float, Description=\"Dindel estimated population allele frequency\">",
"##FILTER=<ID=NoQCALL, Description=\"Variant called by Dindel but not confirmed by QCALL\">",
"##FORMAT=<ID=GT, Number=1, Type=String, Description=\"Genotype\">",
"##FORMAT=<ID=HQ, Number=2, Type=Integer, Description=\"Haplotype quality\">",
"##FORMAT=<ID=GQ, Number=1, Type=Integer, Description=\"Genotype quality\">",
};
// altered info field
public static String[] VCF4headerStringsBrokenInfo = {
"##fileformat=VCFv4.0",
"##filedate=2010-06-21",
"##reference=NCBI36",
"##INFO=<ID=GC, Number=0, Type=Flag, Description=\"Overlap with Gencode CCDS coding sequence\">",
"##INFO=<ID=DP, Number=1, Type=Integer, Description=\"Total number of reads in haplotype window\">",
"##INFO=<ID=AF, Number=1, Type=Integer, Description=\"Dindel estimated population allele frequency\">", // float to integer
"##FILTER=<ID=NoQCALL, Description=\"Variant called by Dindel but not confirmed by QCALL\">",
"##FORMAT=<ID=GT, Number=1, Type=String, Description=\"Genotype\">",
"##FORMAT=<ID=HQ, Number=2, Type=Integer, Description=\"Haplotype quality\">",
"##FORMAT=<ID=GQ, Number=1, Type=Integer, Description=\"Genotype quality\">",
};
// altered format field
public static String[] VCF4headerStringsBrokenFormat = {
"##fileformat=VCFv4.0",
"##filedate=2010-06-21",
"##reference=NCBI36",
"##INFO=<ID=GC, Number=0, Type=Flag, Description=\"Overlap with Gencode CCDS coding sequence\">",
"##INFO=<ID=DP, Number=1, Type=Integer, Description=\"Total number of reads in haplotype window\">",
"##INFO=<ID=AF, Number=1, Type=Float, Description=\"Dindel estimated population allele frequency\">",
"##FILTER=<ID=NoQCALL, Description=\"Variant called by Dindel but not confirmed by QCALL\">",
"##FORMAT=<ID=GT, Number=6, Type=String, Description=\"Genotype\">", // changed 1 to 6 here
"##FORMAT=<ID=HQ, Number=2, Type=Integer, Description=\"Haplotype quality\">",
"##FORMAT=<ID=GQ, Number=1, Type=Integer, Description=\"Genotype quality\">",
};
private VCF4Codec createHeader(String[] headerStr) {
VCF4Codec codec = new VCF4Codec();
List<String> headerFields = new ArrayList<String>();
for (String str : headerStr)
headerFields.add(str);
Assert.assertEquals(headerStr.length+1 /* for the # line */,codec.createHeader(headerFields,"#CHROM\tPOS\tID\tREF\tALT\tQUAL\tFILTER\tINFO"));
return codec;
}
@Test
public void testHeadersWhereOneIsAStrictSubsetOfTheOther() {
VCFHeader one = createHeader(VCFHeaderUnitTest.VCF4headerStrings).getHeader(VCFHeader.class);
VCFHeader two = createHeader(VCF4headerStringsSmallSubset).getHeader(VCFHeader.class);
ArrayList<VCFHeader> headers = new ArrayList<VCFHeader>();
headers.add(one);
headers.add(two);
Set<VCFHeaderLine> lines = CombineVariants.smartMergeHeaders(headers);
Assert.assertEquals(VCFHeaderUnitTest.VCF4headerStrings.length,lines.size());
}
@Test(expected=IllegalStateException.class)
public void testHeadersInfoDifferentValues() {
VCFHeader one = createHeader(VCFHeaderUnitTest.VCF4headerStrings).getHeader(VCFHeader.class);
VCFHeader two = createHeader(VCF4headerStringsBrokenInfo).getHeader(VCFHeader.class);
ArrayList<VCFHeader> headers = new ArrayList<VCFHeader>();
headers.add(one);
headers.add(two);
Set<VCFHeaderLine> lines = CombineVariants.smartMergeHeaders(headers);
Assert.assertEquals(VCFHeaderUnitTest.VCF4headerStrings.length,lines.size());
}
@Test(expected=IllegalStateException.class)
public void testHeadersFormatDifferentValues() {
VCFHeader one = createHeader(VCFHeaderUnitTest.VCF4headerStrings).getHeader(VCFHeader.class);
VCFHeader two = createHeader(VCF4headerStringsBrokenFormat).getHeader(VCFHeader.class);
ArrayList<VCFHeader> headers = new ArrayList<VCFHeader>();
headers.add(one);
headers.add(two);
Set<VCFHeaderLine> lines = CombineVariants.smartMergeHeaders(headers);
Assert.assertEquals(VCFHeaderUnitTest.VCF4headerStrings.length,lines.size());
}
}

View File

@ -21,45 +21,38 @@ import java.util.List;
*/
public class VCFHeaderUnitTest extends BaseTest {
@Test
public void testVCF4ToVCF3() {
private VCF4Codec createHeader(String[] headerStr) {
VCF4Codec codec = new VCF4Codec();
List<String> headerFields = new ArrayList<String>();
for (String str : VCF3_3headerStrings)
for (String str : headerStr)
headerFields.add(str);
Assert.assertEquals(17,codec.createHeader(headerFields,"#CHROM\tPOS\tID\tREF\tALT\tQUAL\tFILTER\tINFO"));
Assert.assertEquals(headerStr.length+1 /* for the # line */,codec.createHeader(headerFields,"#CHROM\tPOS\tID\tREF\tALT\tQUAL\tFILTER\tINFO"));
return codec;
}
@Test
public void testVCF4ToVCF3() {
VCF4Codec codec = createHeader(VCF4headerStrings);
codec.getHeader(VCFHeader.class).setVersion(VCFHeaderVersion.VCF3_3);
checkMD5ofHeaderFile(codec, "5873e029bd50d6836b86438bccd15456");
}
@Test
public void testVCF4ToVCF3Alternate() {
VCF4Codec codec = new VCF4Codec();
List<String> headerFields = new ArrayList<String>();
for (String str : VCF3_3headerStrings_with_negitiveOne)
headerFields.add(str);
Assert.assertEquals(17,codec.createHeader(headerFields,"#CHROM\tPOS\tID\tREF\tALT\tQUAL\tFILTER\tINFO"));
VCF4Codec codec = createHeader(VCF4headerStrings_with_negitiveOne);
codec.getHeader(VCFHeader.class).setVersion(VCFHeaderVersion.VCF3_3);
checkMD5ofHeaderFile(codec, "e750fd0919704d10813dfe57ac1a0df3");
}
@Test
public void testVCF4ToVCF4() {
VCF4Codec codec = new VCF4Codec();
List<String> headerFields = new ArrayList<String>();
for (String str : VCF3_3headerStrings)
headerFields.add(str);
Assert.assertEquals(17, codec.createHeader(headerFields, "#CHROM\tPOS\tID\tREF\tALT\tQUAL\tFILTER\tINFO"));
VCF4Codec codec = createHeader(VCF4headerStrings);
checkMD5ofHeaderFile(codec, "4648aa1169257e0a8a9d30131adb5f35");
}
@Test
public void testVCF4ToVCF4_alternate() {
VCF4Codec codec = new VCF4Codec();
List<String> headerFields = new ArrayList<String>();
for (String str : VCF3_3headerStrings_with_negitiveOne)
headerFields.add(str);
Assert.assertEquals(17, codec.createHeader(headerFields, "#CHROM\tPOS\tID\tREF\tALT\tQUAL\tFILTER\tINFO"));
VCF4Codec codec = createHeader(VCF4headerStrings_with_negitiveOne);
checkMD5ofHeaderFile(codec, "ad8c4cf85e868b0261ab49ee2c613088");
}
@ -92,7 +85,7 @@ public class VCFHeaderUnitTest extends BaseTest {
}
public String[] VCF3_3headerStrings = {
public static String[] VCF4headerStrings = {
"##fileformat=VCFv4.0",
"##filedate=2010-06-21",
"##reference=NCBI36",
@ -111,7 +104,7 @@ public class VCFHeaderUnitTest extends BaseTest {
"##FORMAT=<ID=GQ, Number=1, Type=Integer, Description=\"Genotype quality\">",
};
public String[] VCF3_3headerStrings_with_negitiveOne = {
public static String[] VCF4headerStrings_with_negitiveOne = {
"##fileformat=VCFv4.0",
"##filedate=2010-06-21",
"##reference=NCBI36",