part two: todo's in combine variants, fixes for InferredGeneticContext, and some other tests and clean-up.
git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@3721 348d0f76-0448-11de-a6fe-93d51630548a
This commit is contained in:
parent
36edc60ccc
commit
86031f4034
|
|
@ -7,7 +7,7 @@ import java.util.Map;
|
|||
/**
|
||||
* a base class for compound header lines, which include info lines and format lines (so far)
|
||||
*/
|
||||
public abstract class VCFCompoundHeaderLine extends VCFHeaderLine {
|
||||
public abstract class VCFCompoundHeaderLine extends VCFHeaderLine implements VCFNamedHeaderLine {
|
||||
public enum SupportedHeaderLineType {
|
||||
INFO(true), FORMAT(false);
|
||||
|
||||
|
|
|
|||
|
|
@ -9,7 +9,7 @@ import java.util.Map;
|
|||
* @author ebanks
|
||||
* A class representing a key=value entry for FILTER fields in the VCF header
|
||||
*/
|
||||
public class VCFFilterHeaderLine extends VCFHeaderLine {
|
||||
public class VCFFilterHeaderLine extends VCFHeaderLine implements VCFNamedHeaderLine {
|
||||
|
||||
private String name;
|
||||
private String description;
|
||||
|
|
|
|||
|
|
@ -9,7 +9,7 @@ public enum VCFHeaderLineType {
|
|||
public Object convert(String value, VCFCompoundHeaderLine.SupportedHeaderLineType hlt) {
|
||||
switch (this) {
|
||||
case Integer:
|
||||
return java.lang.Integer.valueOf(value); // the java.lang is needed since we use Integer as a enum name
|
||||
return Math.round(java.lang.Float.valueOf(value)); // this seems like we're allowing off spec values, but use it for now
|
||||
case Float:
|
||||
return java.lang.Float.valueOf(value);
|
||||
case String:
|
||||
|
|
|
|||
|
|
@ -213,7 +213,7 @@ final class InferredGeneticContext {
|
|||
// return selected;
|
||||
// }
|
||||
|
||||
public String getAttributeAsString(String key) { return (String)getAttribute(key); }
|
||||
public String getAttributeAsString(String key) { return (String.valueOf(getAttribute(key))); }
|
||||
public int getAttributeAsInt(String key) { return (Integer)getAttribute(key); }
|
||||
public double getAttributeAsDouble(String key) { return (Double)getAttribute(key); }
|
||||
|
||||
|
|
|
|||
|
|
@ -124,12 +124,12 @@ public class CombineVariants extends RodWalker<Integer, Integer> {
|
|||
if ( ! lineName.equals(otherName) )
|
||||
throw new IllegalStateException("Incompatible header types: " + line + " " + other );
|
||||
} else {
|
||||
//String lineName = ((VCFInfoHeaderLine) line).getName();
|
||||
//String otherName = ((VCFFilterHeaderLine) other).getName();
|
||||
String lineName = ((VCFCompoundHeaderLine) line).getName();
|
||||
String otherName = ((VCFCompoundHeaderLine) other).getName();
|
||||
|
||||
// todo -- aaron, please complete these comparisons when INFO and Format header lines are made into one
|
||||
//if ( (lineType != null && ! lineType.equals(otherType)) || (lineCount != null && !lineCounts.equals(otherCount)))
|
||||
// throw new IllegalStateException("Incompatible header types: " + line + " " + other );
|
||||
// if the names are the same, but the values are different, we need to quit
|
||||
if (lineName.equals(otherName) && !line.equals(other))
|
||||
throw new IllegalStateException("Incompatible header types, collision between these two types: " + line + " " + other );
|
||||
}
|
||||
} else {
|
||||
map.put(key, line);
|
||||
|
|
|
|||
|
|
@ -0,0 +1,102 @@
|
|||
package org.broadinstitute.sting.gatk.walkers.variantutils;
|
||||
|
||||
import org.broad.tribble.vcf.VCFHeader;
|
||||
import org.broad.tribble.vcf.VCFHeaderLine;
|
||||
import org.broadinstitute.sting.gatk.refdata.features.vcf4.VCF4Codec;
|
||||
import org.broadinstitute.sting.utils.genotype.vcf.VCFHeaderUnitTest;
|
||||
import org.junit.Assert;
|
||||
import org.junit.Test;
|
||||
|
||||
import java.util.ArrayList;
|
||||
import java.util.List;
|
||||
import java.util.Set;
|
||||
|
||||
/**
|
||||
* test out pieces of the combine variants code
|
||||
*/
|
||||
public class CombineVariantsUnitTest {
|
||||
|
||||
// this header is a small subset of the header in VCFHeaderUnitTest: VCF4headerStrings
|
||||
public static String[] VCF4headerStringsSmallSubset = {
|
||||
"##fileformat=VCFv4.0",
|
||||
"##filedate=2010-06-21",
|
||||
"##reference=NCBI36",
|
||||
"##INFO=<ID=GC, Number=0, Type=Flag, Description=\"Overlap with Gencode CCDS coding sequence\">",
|
||||
"##INFO=<ID=DP, Number=1, Type=Integer, Description=\"Total number of reads in haplotype window\">",
|
||||
"##INFO=<ID=AF, Number=1, Type=Float, Description=\"Dindel estimated population allele frequency\">",
|
||||
"##FILTER=<ID=NoQCALL, Description=\"Variant called by Dindel but not confirmed by QCALL\">",
|
||||
"##FORMAT=<ID=GT, Number=1, Type=String, Description=\"Genotype\">",
|
||||
"##FORMAT=<ID=HQ, Number=2, Type=Integer, Description=\"Haplotype quality\">",
|
||||
"##FORMAT=<ID=GQ, Number=1, Type=Integer, Description=\"Genotype quality\">",
|
||||
};
|
||||
|
||||
// altered info field
|
||||
public static String[] VCF4headerStringsBrokenInfo = {
|
||||
"##fileformat=VCFv4.0",
|
||||
"##filedate=2010-06-21",
|
||||
"##reference=NCBI36",
|
||||
"##INFO=<ID=GC, Number=0, Type=Flag, Description=\"Overlap with Gencode CCDS coding sequence\">",
|
||||
"##INFO=<ID=DP, Number=1, Type=Integer, Description=\"Total number of reads in haplotype window\">",
|
||||
"##INFO=<ID=AF, Number=1, Type=Integer, Description=\"Dindel estimated population allele frequency\">", // float to integer
|
||||
"##FILTER=<ID=NoQCALL, Description=\"Variant called by Dindel but not confirmed by QCALL\">",
|
||||
"##FORMAT=<ID=GT, Number=1, Type=String, Description=\"Genotype\">",
|
||||
"##FORMAT=<ID=HQ, Number=2, Type=Integer, Description=\"Haplotype quality\">",
|
||||
"##FORMAT=<ID=GQ, Number=1, Type=Integer, Description=\"Genotype quality\">",
|
||||
};
|
||||
|
||||
// altered format field
|
||||
public static String[] VCF4headerStringsBrokenFormat = {
|
||||
"##fileformat=VCFv4.0",
|
||||
"##filedate=2010-06-21",
|
||||
"##reference=NCBI36",
|
||||
"##INFO=<ID=GC, Number=0, Type=Flag, Description=\"Overlap with Gencode CCDS coding sequence\">",
|
||||
"##INFO=<ID=DP, Number=1, Type=Integer, Description=\"Total number of reads in haplotype window\">",
|
||||
"##INFO=<ID=AF, Number=1, Type=Float, Description=\"Dindel estimated population allele frequency\">",
|
||||
"##FILTER=<ID=NoQCALL, Description=\"Variant called by Dindel but not confirmed by QCALL\">",
|
||||
"##FORMAT=<ID=GT, Number=6, Type=String, Description=\"Genotype\">", // changed 1 to 6 here
|
||||
"##FORMAT=<ID=HQ, Number=2, Type=Integer, Description=\"Haplotype quality\">",
|
||||
"##FORMAT=<ID=GQ, Number=1, Type=Integer, Description=\"Genotype quality\">",
|
||||
};
|
||||
|
||||
private VCF4Codec createHeader(String[] headerStr) {
|
||||
VCF4Codec codec = new VCF4Codec();
|
||||
List<String> headerFields = new ArrayList<String>();
|
||||
for (String str : headerStr)
|
||||
headerFields.add(str);
|
||||
Assert.assertEquals(headerStr.length+1 /* for the # line */,codec.createHeader(headerFields,"#CHROM\tPOS\tID\tREF\tALT\tQUAL\tFILTER\tINFO"));
|
||||
return codec;
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testHeadersWhereOneIsAStrictSubsetOfTheOther() {
|
||||
VCFHeader one = createHeader(VCFHeaderUnitTest.VCF4headerStrings).getHeader(VCFHeader.class);
|
||||
VCFHeader two = createHeader(VCF4headerStringsSmallSubset).getHeader(VCFHeader.class);
|
||||
ArrayList<VCFHeader> headers = new ArrayList<VCFHeader>();
|
||||
headers.add(one);
|
||||
headers.add(two);
|
||||
Set<VCFHeaderLine> lines = CombineVariants.smartMergeHeaders(headers);
|
||||
Assert.assertEquals(VCFHeaderUnitTest.VCF4headerStrings.length,lines.size());
|
||||
}
|
||||
|
||||
@Test(expected=IllegalStateException.class)
|
||||
public void testHeadersInfoDifferentValues() {
|
||||
VCFHeader one = createHeader(VCFHeaderUnitTest.VCF4headerStrings).getHeader(VCFHeader.class);
|
||||
VCFHeader two = createHeader(VCF4headerStringsBrokenInfo).getHeader(VCFHeader.class);
|
||||
ArrayList<VCFHeader> headers = new ArrayList<VCFHeader>();
|
||||
headers.add(one);
|
||||
headers.add(two);
|
||||
Set<VCFHeaderLine> lines = CombineVariants.smartMergeHeaders(headers);
|
||||
Assert.assertEquals(VCFHeaderUnitTest.VCF4headerStrings.length,lines.size());
|
||||
}
|
||||
|
||||
@Test(expected=IllegalStateException.class)
|
||||
public void testHeadersFormatDifferentValues() {
|
||||
VCFHeader one = createHeader(VCFHeaderUnitTest.VCF4headerStrings).getHeader(VCFHeader.class);
|
||||
VCFHeader two = createHeader(VCF4headerStringsBrokenFormat).getHeader(VCFHeader.class);
|
||||
ArrayList<VCFHeader> headers = new ArrayList<VCFHeader>();
|
||||
headers.add(one);
|
||||
headers.add(two);
|
||||
Set<VCFHeaderLine> lines = CombineVariants.smartMergeHeaders(headers);
|
||||
Assert.assertEquals(VCFHeaderUnitTest.VCF4headerStrings.length,lines.size());
|
||||
}
|
||||
}
|
||||
|
|
@ -21,45 +21,38 @@ import java.util.List;
|
|||
*/
|
||||
public class VCFHeaderUnitTest extends BaseTest {
|
||||
|
||||
@Test
|
||||
public void testVCF4ToVCF3() {
|
||||
private VCF4Codec createHeader(String[] headerStr) {
|
||||
VCF4Codec codec = new VCF4Codec();
|
||||
List<String> headerFields = new ArrayList<String>();
|
||||
for (String str : VCF3_3headerStrings)
|
||||
for (String str : headerStr)
|
||||
headerFields.add(str);
|
||||
Assert.assertEquals(17,codec.createHeader(headerFields,"#CHROM\tPOS\tID\tREF\tALT\tQUAL\tFILTER\tINFO"));
|
||||
Assert.assertEquals(headerStr.length+1 /* for the # line */,codec.createHeader(headerFields,"#CHROM\tPOS\tID\tREF\tALT\tQUAL\tFILTER\tINFO"));
|
||||
return codec;
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testVCF4ToVCF3() {
|
||||
VCF4Codec codec = createHeader(VCF4headerStrings);
|
||||
codec.getHeader(VCFHeader.class).setVersion(VCFHeaderVersion.VCF3_3);
|
||||
checkMD5ofHeaderFile(codec, "5873e029bd50d6836b86438bccd15456");
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testVCF4ToVCF3Alternate() {
|
||||
VCF4Codec codec = new VCF4Codec();
|
||||
List<String> headerFields = new ArrayList<String>();
|
||||
for (String str : VCF3_3headerStrings_with_negitiveOne)
|
||||
headerFields.add(str);
|
||||
Assert.assertEquals(17,codec.createHeader(headerFields,"#CHROM\tPOS\tID\tREF\tALT\tQUAL\tFILTER\tINFO"));
|
||||
VCF4Codec codec = createHeader(VCF4headerStrings_with_negitiveOne);
|
||||
codec.getHeader(VCFHeader.class).setVersion(VCFHeaderVersion.VCF3_3);
|
||||
checkMD5ofHeaderFile(codec, "e750fd0919704d10813dfe57ac1a0df3");
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testVCF4ToVCF4() {
|
||||
VCF4Codec codec = new VCF4Codec();
|
||||
List<String> headerFields = new ArrayList<String>();
|
||||
for (String str : VCF3_3headerStrings)
|
||||
headerFields.add(str);
|
||||
Assert.assertEquals(17, codec.createHeader(headerFields, "#CHROM\tPOS\tID\tREF\tALT\tQUAL\tFILTER\tINFO"));
|
||||
VCF4Codec codec = createHeader(VCF4headerStrings);
|
||||
checkMD5ofHeaderFile(codec, "4648aa1169257e0a8a9d30131adb5f35");
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testVCF4ToVCF4_alternate() {
|
||||
VCF4Codec codec = new VCF4Codec();
|
||||
List<String> headerFields = new ArrayList<String>();
|
||||
for (String str : VCF3_3headerStrings_with_negitiveOne)
|
||||
headerFields.add(str);
|
||||
Assert.assertEquals(17, codec.createHeader(headerFields, "#CHROM\tPOS\tID\tREF\tALT\tQUAL\tFILTER\tINFO"));
|
||||
VCF4Codec codec = createHeader(VCF4headerStrings_with_negitiveOne);
|
||||
checkMD5ofHeaderFile(codec, "ad8c4cf85e868b0261ab49ee2c613088");
|
||||
}
|
||||
|
||||
|
|
@ -92,7 +85,7 @@ public class VCFHeaderUnitTest extends BaseTest {
|
|||
}
|
||||
|
||||
|
||||
public String[] VCF3_3headerStrings = {
|
||||
public static String[] VCF4headerStrings = {
|
||||
"##fileformat=VCFv4.0",
|
||||
"##filedate=2010-06-21",
|
||||
"##reference=NCBI36",
|
||||
|
|
@ -111,7 +104,7 @@ public class VCFHeaderUnitTest extends BaseTest {
|
|||
"##FORMAT=<ID=GQ, Number=1, Type=Integer, Description=\"Genotype quality\">",
|
||||
};
|
||||
|
||||
public String[] VCF3_3headerStrings_with_negitiveOne = {
|
||||
public static String[] VCF4headerStrings_with_negitiveOne = {
|
||||
"##fileformat=VCFv4.0",
|
||||
"##filedate=2010-06-21",
|
||||
"##reference=NCBI36",
|
||||
|
|
|
|||
Loading…
Reference in New Issue