part two: todo's in combine variants, fixes for InferredGeneticContext, and some other tests and clean-up.

git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@3721 348d0f76-0448-11de-a6fe-93d51630548a
This commit is contained in:
aaron 2010-07-05 21:07:53 +00:00
parent 36edc60ccc
commit 86031f4034
7 changed files with 125 additions and 30 deletions

View File

@ -7,7 +7,7 @@ import java.util.Map;
/** /**
* a base class for compound header lines, which include info lines and format lines (so far) * a base class for compound header lines, which include info lines and format lines (so far)
*/ */
public abstract class VCFCompoundHeaderLine extends VCFHeaderLine { public abstract class VCFCompoundHeaderLine extends VCFHeaderLine implements VCFNamedHeaderLine {
public enum SupportedHeaderLineType { public enum SupportedHeaderLineType {
INFO(true), FORMAT(false); INFO(true), FORMAT(false);

View File

@ -9,7 +9,7 @@ import java.util.Map;
* @author ebanks * @author ebanks
* A class representing a key=value entry for FILTER fields in the VCF header * A class representing a key=value entry for FILTER fields in the VCF header
*/ */
public class VCFFilterHeaderLine extends VCFHeaderLine { public class VCFFilterHeaderLine extends VCFHeaderLine implements VCFNamedHeaderLine {
private String name; private String name;
private String description; private String description;

View File

@ -9,7 +9,7 @@ public enum VCFHeaderLineType {
public Object convert(String value, VCFCompoundHeaderLine.SupportedHeaderLineType hlt) { public Object convert(String value, VCFCompoundHeaderLine.SupportedHeaderLineType hlt) {
switch (this) { switch (this) {
case Integer: case Integer:
return java.lang.Integer.valueOf(value); // the java.lang is needed since we use Integer as a enum name return Math.round(java.lang.Float.valueOf(value)); // this seems like we're allowing off spec values, but use it for now
case Float: case Float:
return java.lang.Float.valueOf(value); return java.lang.Float.valueOf(value);
case String: case String:

View File

@ -213,7 +213,7 @@ final class InferredGeneticContext {
// return selected; // return selected;
// } // }
public String getAttributeAsString(String key) { return (String)getAttribute(key); } public String getAttributeAsString(String key) { return (String.valueOf(getAttribute(key))); }
public int getAttributeAsInt(String key) { return (Integer)getAttribute(key); } public int getAttributeAsInt(String key) { return (Integer)getAttribute(key); }
public double getAttributeAsDouble(String key) { return (Double)getAttribute(key); } public double getAttributeAsDouble(String key) { return (Double)getAttribute(key); }

View File

@ -124,12 +124,12 @@ public class CombineVariants extends RodWalker<Integer, Integer> {
if ( ! lineName.equals(otherName) ) if ( ! lineName.equals(otherName) )
throw new IllegalStateException("Incompatible header types: " + line + " " + other ); throw new IllegalStateException("Incompatible header types: " + line + " " + other );
} else { } else {
//String lineName = ((VCFInfoHeaderLine) line).getName(); String lineName = ((VCFCompoundHeaderLine) line).getName();
//String otherName = ((VCFFilterHeaderLine) other).getName(); String otherName = ((VCFCompoundHeaderLine) other).getName();
// todo -- aaron, please complete these comparisons when INFO and Format header lines are made into one // if the names are the same, but the values are different, we need to quit
//if ( (lineType != null && ! lineType.equals(otherType)) || (lineCount != null && !lineCounts.equals(otherCount))) if (lineName.equals(otherName) && !line.equals(other))
// throw new IllegalStateException("Incompatible header types: " + line + " " + other ); throw new IllegalStateException("Incompatible header types, collision between these two types: " + line + " " + other );
} }
} else { } else {
map.put(key, line); map.put(key, line);

View File

@ -0,0 +1,102 @@
package org.broadinstitute.sting.gatk.walkers.variantutils;
import org.broad.tribble.vcf.VCFHeader;
import org.broad.tribble.vcf.VCFHeaderLine;
import org.broadinstitute.sting.gatk.refdata.features.vcf4.VCF4Codec;
import org.broadinstitute.sting.utils.genotype.vcf.VCFHeaderUnitTest;
import org.junit.Assert;
import org.junit.Test;
import java.util.ArrayList;
import java.util.List;
import java.util.Set;
/**
* test out pieces of the combine variants code
*/
public class CombineVariantsUnitTest {
// this header is a small subset of the header in VCFHeaderUnitTest: VCF4headerStrings
public static String[] VCF4headerStringsSmallSubset = {
"##fileformat=VCFv4.0",
"##filedate=2010-06-21",
"##reference=NCBI36",
"##INFO=<ID=GC, Number=0, Type=Flag, Description=\"Overlap with Gencode CCDS coding sequence\">",
"##INFO=<ID=DP, Number=1, Type=Integer, Description=\"Total number of reads in haplotype window\">",
"##INFO=<ID=AF, Number=1, Type=Float, Description=\"Dindel estimated population allele frequency\">",
"##FILTER=<ID=NoQCALL, Description=\"Variant called by Dindel but not confirmed by QCALL\">",
"##FORMAT=<ID=GT, Number=1, Type=String, Description=\"Genotype\">",
"##FORMAT=<ID=HQ, Number=2, Type=Integer, Description=\"Haplotype quality\">",
"##FORMAT=<ID=GQ, Number=1, Type=Integer, Description=\"Genotype quality\">",
};
// altered info field
public static String[] VCF4headerStringsBrokenInfo = {
"##fileformat=VCFv4.0",
"##filedate=2010-06-21",
"##reference=NCBI36",
"##INFO=<ID=GC, Number=0, Type=Flag, Description=\"Overlap with Gencode CCDS coding sequence\">",
"##INFO=<ID=DP, Number=1, Type=Integer, Description=\"Total number of reads in haplotype window\">",
"##INFO=<ID=AF, Number=1, Type=Integer, Description=\"Dindel estimated population allele frequency\">", // float to integer
"##FILTER=<ID=NoQCALL, Description=\"Variant called by Dindel but not confirmed by QCALL\">",
"##FORMAT=<ID=GT, Number=1, Type=String, Description=\"Genotype\">",
"##FORMAT=<ID=HQ, Number=2, Type=Integer, Description=\"Haplotype quality\">",
"##FORMAT=<ID=GQ, Number=1, Type=Integer, Description=\"Genotype quality\">",
};
// altered format field
public static String[] VCF4headerStringsBrokenFormat = {
"##fileformat=VCFv4.0",
"##filedate=2010-06-21",
"##reference=NCBI36",
"##INFO=<ID=GC, Number=0, Type=Flag, Description=\"Overlap with Gencode CCDS coding sequence\">",
"##INFO=<ID=DP, Number=1, Type=Integer, Description=\"Total number of reads in haplotype window\">",
"##INFO=<ID=AF, Number=1, Type=Float, Description=\"Dindel estimated population allele frequency\">",
"##FILTER=<ID=NoQCALL, Description=\"Variant called by Dindel but not confirmed by QCALL\">",
"##FORMAT=<ID=GT, Number=6, Type=String, Description=\"Genotype\">", // changed 1 to 6 here
"##FORMAT=<ID=HQ, Number=2, Type=Integer, Description=\"Haplotype quality\">",
"##FORMAT=<ID=GQ, Number=1, Type=Integer, Description=\"Genotype quality\">",
};
private VCF4Codec createHeader(String[] headerStr) {
VCF4Codec codec = new VCF4Codec();
List<String> headerFields = new ArrayList<String>();
for (String str : headerStr)
headerFields.add(str);
Assert.assertEquals(headerStr.length+1 /* for the # line */,codec.createHeader(headerFields,"#CHROM\tPOS\tID\tREF\tALT\tQUAL\tFILTER\tINFO"));
return codec;
}
@Test
public void testHeadersWhereOneIsAStrictSubsetOfTheOther() {
VCFHeader one = createHeader(VCFHeaderUnitTest.VCF4headerStrings).getHeader(VCFHeader.class);
VCFHeader two = createHeader(VCF4headerStringsSmallSubset).getHeader(VCFHeader.class);
ArrayList<VCFHeader> headers = new ArrayList<VCFHeader>();
headers.add(one);
headers.add(two);
Set<VCFHeaderLine> lines = CombineVariants.smartMergeHeaders(headers);
Assert.assertEquals(VCFHeaderUnitTest.VCF4headerStrings.length,lines.size());
}
@Test(expected=IllegalStateException.class)
public void testHeadersInfoDifferentValues() {
VCFHeader one = createHeader(VCFHeaderUnitTest.VCF4headerStrings).getHeader(VCFHeader.class);
VCFHeader two = createHeader(VCF4headerStringsBrokenInfo).getHeader(VCFHeader.class);
ArrayList<VCFHeader> headers = new ArrayList<VCFHeader>();
headers.add(one);
headers.add(two);
Set<VCFHeaderLine> lines = CombineVariants.smartMergeHeaders(headers);
Assert.assertEquals(VCFHeaderUnitTest.VCF4headerStrings.length,lines.size());
}
@Test(expected=IllegalStateException.class)
public void testHeadersFormatDifferentValues() {
VCFHeader one = createHeader(VCFHeaderUnitTest.VCF4headerStrings).getHeader(VCFHeader.class);
VCFHeader two = createHeader(VCF4headerStringsBrokenFormat).getHeader(VCFHeader.class);
ArrayList<VCFHeader> headers = new ArrayList<VCFHeader>();
headers.add(one);
headers.add(two);
Set<VCFHeaderLine> lines = CombineVariants.smartMergeHeaders(headers);
Assert.assertEquals(VCFHeaderUnitTest.VCF4headerStrings.length,lines.size());
}
}

View File

@ -21,45 +21,38 @@ import java.util.List;
*/ */
public class VCFHeaderUnitTest extends BaseTest { public class VCFHeaderUnitTest extends BaseTest {
@Test private VCF4Codec createHeader(String[] headerStr) {
public void testVCF4ToVCF3() {
VCF4Codec codec = new VCF4Codec(); VCF4Codec codec = new VCF4Codec();
List<String> headerFields = new ArrayList<String>(); List<String> headerFields = new ArrayList<String>();
for (String str : VCF3_3headerStrings) for (String str : headerStr)
headerFields.add(str); headerFields.add(str);
Assert.assertEquals(17,codec.createHeader(headerFields,"#CHROM\tPOS\tID\tREF\tALT\tQUAL\tFILTER\tINFO")); Assert.assertEquals(headerStr.length+1 /* for the # line */,codec.createHeader(headerFields,"#CHROM\tPOS\tID\tREF\tALT\tQUAL\tFILTER\tINFO"));
return codec;
}
@Test
public void testVCF4ToVCF3() {
VCF4Codec codec = createHeader(VCF4headerStrings);
codec.getHeader(VCFHeader.class).setVersion(VCFHeaderVersion.VCF3_3); codec.getHeader(VCFHeader.class).setVersion(VCFHeaderVersion.VCF3_3);
checkMD5ofHeaderFile(codec, "5873e029bd50d6836b86438bccd15456"); checkMD5ofHeaderFile(codec, "5873e029bd50d6836b86438bccd15456");
} }
@Test @Test
public void testVCF4ToVCF3Alternate() { public void testVCF4ToVCF3Alternate() {
VCF4Codec codec = new VCF4Codec(); VCF4Codec codec = createHeader(VCF4headerStrings_with_negitiveOne);
List<String> headerFields = new ArrayList<String>();
for (String str : VCF3_3headerStrings_with_negitiveOne)
headerFields.add(str);
Assert.assertEquals(17,codec.createHeader(headerFields,"#CHROM\tPOS\tID\tREF\tALT\tQUAL\tFILTER\tINFO"));
codec.getHeader(VCFHeader.class).setVersion(VCFHeaderVersion.VCF3_3); codec.getHeader(VCFHeader.class).setVersion(VCFHeaderVersion.VCF3_3);
checkMD5ofHeaderFile(codec, "e750fd0919704d10813dfe57ac1a0df3"); checkMD5ofHeaderFile(codec, "e750fd0919704d10813dfe57ac1a0df3");
} }
@Test @Test
public void testVCF4ToVCF4() { public void testVCF4ToVCF4() {
VCF4Codec codec = new VCF4Codec(); VCF4Codec codec = createHeader(VCF4headerStrings);
List<String> headerFields = new ArrayList<String>();
for (String str : VCF3_3headerStrings)
headerFields.add(str);
Assert.assertEquals(17, codec.createHeader(headerFields, "#CHROM\tPOS\tID\tREF\tALT\tQUAL\tFILTER\tINFO"));
checkMD5ofHeaderFile(codec, "4648aa1169257e0a8a9d30131adb5f35"); checkMD5ofHeaderFile(codec, "4648aa1169257e0a8a9d30131adb5f35");
} }
@Test @Test
public void testVCF4ToVCF4_alternate() { public void testVCF4ToVCF4_alternate() {
VCF4Codec codec = new VCF4Codec(); VCF4Codec codec = createHeader(VCF4headerStrings_with_negitiveOne);
List<String> headerFields = new ArrayList<String>();
for (String str : VCF3_3headerStrings_with_negitiveOne)
headerFields.add(str);
Assert.assertEquals(17, codec.createHeader(headerFields, "#CHROM\tPOS\tID\tREF\tALT\tQUAL\tFILTER\tINFO"));
checkMD5ofHeaderFile(codec, "ad8c4cf85e868b0261ab49ee2c613088"); checkMD5ofHeaderFile(codec, "ad8c4cf85e868b0261ab49ee2c613088");
} }
@ -92,7 +85,7 @@ public class VCFHeaderUnitTest extends BaseTest {
} }
public String[] VCF3_3headerStrings = { public static String[] VCF4headerStrings = {
"##fileformat=VCFv4.0", "##fileformat=VCFv4.0",
"##filedate=2010-06-21", "##filedate=2010-06-21",
"##reference=NCBI36", "##reference=NCBI36",
@ -111,7 +104,7 @@ public class VCFHeaderUnitTest extends BaseTest {
"##FORMAT=<ID=GQ, Number=1, Type=Integer, Description=\"Genotype quality\">", "##FORMAT=<ID=GQ, Number=1, Type=Integer, Description=\"Genotype quality\">",
}; };
public String[] VCF3_3headerStrings_with_negitiveOne = { public static String[] VCF4headerStrings_with_negitiveOne = {
"##fileformat=VCFv4.0", "##fileformat=VCFv4.0",
"##filedate=2010-06-21", "##filedate=2010-06-21",
"##reference=NCBI36", "##reference=NCBI36",