No one should be calling the createHeader method(s) directly, but instead should be going through the full readHeader method (because it first sets the version); therefore I made them package protected and merged them. Updated the various unit tests that were using createHeader and were dangerously assuming that the header version was defaulting to 4.0 (which it no longer does).
git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@5934 348d0f76-0448-11de-a6fe-93d51630548a
This commit is contained in:
parent
32ac7be86a
commit
420d8feff6
|
|
@ -1,5 +1,6 @@
|
||||||
package org.broadinstitute.sting.gatk.walkers.variantutils;
|
package org.broadinstitute.sting.gatk.walkers.variantutils;
|
||||||
|
|
||||||
|
import org.broad.tribble.readers.AsciiLineReader;
|
||||||
import org.broad.tribble.vcf.VCFCodec;
|
import org.broad.tribble.vcf.VCFCodec;
|
||||||
import org.broad.tribble.vcf.VCFHeader;
|
import org.broad.tribble.vcf.VCFHeader;
|
||||||
import org.broad.tribble.vcf.VCFHeaderLine;
|
import org.broad.tribble.vcf.VCFHeaderLine;
|
||||||
|
|
@ -9,6 +10,7 @@ import org.broadinstitute.sting.utils.vcf.VCFUtils;
|
||||||
|
|
||||||
import org.testng.annotations.Test;
|
import org.testng.annotations.Test;
|
||||||
|
|
||||||
|
import java.io.StringBufferInputStream;
|
||||||
import java.util.ArrayList;
|
import java.util.ArrayList;
|
||||||
import java.util.List;
|
import java.util.List;
|
||||||
import java.util.Set;
|
import java.util.Set;
|
||||||
|
|
@ -19,54 +21,50 @@ import java.util.Set;
|
||||||
public class CombineVariantsUnitTest {
|
public class CombineVariantsUnitTest {
|
||||||
|
|
||||||
// this header is a small subset of the header in VCFHeaderUnitTest: VCF4headerStrings
|
// this header is a small subset of the header in VCFHeaderUnitTest: VCF4headerStrings
|
||||||
public static String[] VCF4headerStringsSmallSubset = {
|
public static String VCF4headerStringsSmallSubset =
|
||||||
"##fileformat=VCFv4.0",
|
"##fileformat=VCFv4.0\n" +
|
||||||
"##filedate=2010-06-21",
|
"##filedate=2010-06-21\n"+
|
||||||
"##reference=NCBI36",
|
"##reference=NCBI36\n"+
|
||||||
"##INFO=<ID=GC, Number=0, Type=Flag, Description=\"Overlap with Gencode CCDS coding sequence\">",
|
"##INFO=<ID=GC, Number=0, Type=Flag, Description=\"Overlap with Gencode CCDS coding sequence\">\n"+
|
||||||
"##INFO=<ID=DP, Number=1, Type=Integer, Description=\"Total number of reads in haplotype window\">",
|
"##INFO=<ID=DP, Number=1, Type=Integer, Description=\"Total number of reads in haplotype window\">\n"+
|
||||||
"##INFO=<ID=AF, Number=1, Type=Float, Description=\"Dindel estimated population allele frequency\">",
|
"##INFO=<ID=AF, Number=1, Type=Float, Description=\"Dindel estimated population allele frequency\">\n"+
|
||||||
"##FILTER=<ID=NoQCALL, Description=\"Variant called by Dindel but not confirmed by QCALL\">",
|
"##FILTER=<ID=NoQCALL, Description=\"Variant called by Dindel but not confirmed by QCALL\">\n"+
|
||||||
"##FORMAT=<ID=GT, Number=1, Type=String, Description=\"Genotype\">",
|
"##FORMAT=<ID=GT, Number=1, Type=String, Description=\"Genotype\">\n"+
|
||||||
"##FORMAT=<ID=HQ, Number=2, Type=Integer, Description=\"Haplotype quality\">",
|
"##FORMAT=<ID=HQ, Number=2, Type=Integer, Description=\"Haplotype quality\">\n"+
|
||||||
"##FORMAT=<ID=GQ, Number=1, Type=Integer, Description=\"Genotype quality\">",
|
"##FORMAT=<ID=GQ, Number=1, Type=Integer, Description=\"Genotype quality\">\n"+
|
||||||
};
|
"#CHROM\tPOS\tID\tREF\tALT\tQUAL\tFILTER\tINFO\n";
|
||||||
|
|
||||||
// altered info field
|
// altered info field
|
||||||
public static String[] VCF4headerStringsBrokenInfo = {
|
public static String VCF4headerStringsBrokenInfo =
|
||||||
"##fileformat=VCFv4.0",
|
"##fileformat=VCFv4.0\n"+
|
||||||
"##filedate=2010-06-21",
|
"##filedate=2010-06-21\n"+
|
||||||
"##reference=NCBI36",
|
"##reference=NCBI36\n"+
|
||||||
"##INFO=<ID=GC, Number=0, Type=Flag, Description=\"Overlap with Gencode CCDS coding sequence\">",
|
"##INFO=<ID=GC, Number=0, Type=Flag, Description=\"Overlap with Gencode CCDS coding sequence\">\n"+
|
||||||
"##INFO=<ID=DP, Number=1, Type=Integer, Description=\"Total number of reads in haplotype window\">",
|
"##INFO=<ID=DP, Number=1, Type=Integer, Description=\"Total number of reads in haplotype window\">\n"+
|
||||||
"##INFO=<ID=AF, Number=1, Type=String, Description=\"Dindel estimated population allele frequency\">", // string to integer
|
"##INFO=<ID=AF, Number=1, Type=String, Description=\"Dindel estimated population allele frequency\">\n"+ // string to integer
|
||||||
"##FILTER=<ID=NoQCALL, Description=\"Variant called by Dindel but not confirmed by QCALL\">",
|
"##FILTER=<ID=NoQCALL, Description=\"Variant called by Dindel but not confirmed by QCALL\">\n"+
|
||||||
"##FORMAT=<ID=GT, Number=1, Type=String, Description=\"Genotype\">",
|
"##FORMAT=<ID=GT, Number=1, Type=String, Description=\"Genotype\">\n"+
|
||||||
"##FORMAT=<ID=HQ, Number=2, Type=Integer, Description=\"Haplotype quality\">",
|
"##FORMAT=<ID=HQ, Number=2, Type=Integer, Description=\"Haplotype quality\">\n"+
|
||||||
"##FORMAT=<ID=GQ, Number=1, Type=Integer, Description=\"Genotype quality\">",
|
"##FORMAT=<ID=GQ, Number=1, Type=Integer, Description=\"Genotype quality\">\n"+
|
||||||
};
|
"#CHROM\tPOS\tID\tREF\tALT\tQUAL\tFILTER\tINFO\n";
|
||||||
|
|
||||||
// altered format field
|
// altered format field
|
||||||
public static String[] VCF4headerStringsBrokenFormat = {
|
public static String VCF4headerStringsBrokenFormat =
|
||||||
"##fileformat=VCFv4.0",
|
"##fileformat=VCFv4.0\n"+
|
||||||
"##filedate=2010-06-21",
|
"##filedate=2010-06-21\n"+
|
||||||
"##reference=NCBI36",
|
"##reference=NCBI36\n"+
|
||||||
"##INFO=<ID=GC, Number=0, Type=Flag, Description=\"Overlap with Gencode CCDS coding sequence\">",
|
"##INFO=<ID=GC, Number=0, Type=Flag, Description=\"Overlap with Gencode CCDS coding sequence\">\n"+
|
||||||
"##INFO=<ID=DP, Number=1, Type=Integer, Description=\"Total number of reads in haplotype window\">",
|
"##INFO=<ID=DP, Number=1, Type=Integer, Description=\"Total number of reads in haplotype window\">\n"+
|
||||||
"##INFO=<ID=AF, Number=1, Type=Float, Description=\"Dindel estimated population allele frequency\">",
|
"##INFO=<ID=AF, Number=1, Type=Float, Description=\"Dindel estimated population allele frequency\">\n"+
|
||||||
"##FILTER=<ID=NoQCALL, Description=\"Variant called by Dindel but not confirmed by QCALL\">",
|
"##FILTER=<ID=NoQCALL, Description=\"Variant called by Dindel but not confirmed by QCALL\">\n"+
|
||||||
"##FORMAT=<ID=GT, Number=6, Type=String, Description=\"Genotype\">", // changed 1 to 6 here
|
"##FORMAT=<ID=GT, Number=6, Type=String, Description=\"Genotype\">\n"+ // changed 1 to 6 here
|
||||||
"##FORMAT=<ID=HQ, Number=2, Type=Integer, Description=\"Haplotype quality\">",
|
"##FORMAT=<ID=HQ, Number=2, Type=Integer, Description=\"Haplotype quality\">\n"+
|
||||||
"##FORMAT=<ID=GQ, Number=1, Type=Integer, Description=\"Genotype quality\">",
|
"##FORMAT=<ID=GQ, Number=1, Type=Integer, Description=\"Genotype quality\">\n"+
|
||||||
};
|
"#CHROM\tPOS\tID\tREF\tALT\tQUAL\tFILTER\tINFO\n";
|
||||||
|
|
||||||
private VCFHeader createHeader(String[] headerStr) {
|
private VCFHeader createHeader(String headerStr) {
|
||||||
VCFCodec codec = new VCFCodec();
|
VCFCodec codec = new VCFCodec();
|
||||||
List<String> headerFields = new ArrayList<String>();
|
VCFHeader head = (VCFHeader)codec.readHeader(new AsciiLineReader(new StringBufferInputStream(headerStr)));
|
||||||
for (String str : headerStr)
|
|
||||||
headerFields.add(str);
|
|
||||||
VCFHeader head = (VCFHeader)codec.createHeader(headerFields,"#CHROM\tPOS\tID\tREF\tALT\tQUAL\tFILTER\tINFO");
|
|
||||||
Assert.assertEquals(head.getMetaData().size(), headerStr.length /* for the # line */);
|
|
||||||
return head;
|
return head;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
@ -78,7 +76,7 @@ public class CombineVariantsUnitTest {
|
||||||
headers.add(one);
|
headers.add(one);
|
||||||
headers.add(two);
|
headers.add(two);
|
||||||
Set<VCFHeaderLine> lines = VCFUtils.smartMergeHeaders(headers, null);
|
Set<VCFHeaderLine> lines = VCFUtils.smartMergeHeaders(headers, null);
|
||||||
Assert.assertEquals(lines.size(), VCFHeaderUnitTest.VCF4headerStrings.length);
|
Assert.assertEquals(lines.size(), VCFHeaderUnitTest.VCF4headerStringCount);
|
||||||
}
|
}
|
||||||
|
|
||||||
@Test(expectedExceptions=IllegalStateException.class)
|
@Test(expectedExceptions=IllegalStateException.class)
|
||||||
|
|
@ -89,7 +87,7 @@ public class CombineVariantsUnitTest {
|
||||||
headers.add(one);
|
headers.add(one);
|
||||||
headers.add(two);
|
headers.add(two);
|
||||||
Set<VCFHeaderLine> lines = VCFUtils.smartMergeHeaders(headers, null);
|
Set<VCFHeaderLine> lines = VCFUtils.smartMergeHeaders(headers, null);
|
||||||
Assert.assertEquals(lines.size(), VCFHeaderUnitTest.VCF4headerStrings.length);
|
Assert.assertEquals(lines.size(), VCFHeaderUnitTest.VCF4headerStringCount);
|
||||||
}
|
}
|
||||||
|
|
||||||
@Test
|
@Test
|
||||||
|
|
@ -100,6 +98,6 @@ public class CombineVariantsUnitTest {
|
||||||
headers.add(one);
|
headers.add(one);
|
||||||
headers.add(two);
|
headers.add(two);
|
||||||
Set<VCFHeaderLine> lines = VCFUtils.smartMergeHeaders(headers, null);
|
Set<VCFHeaderLine> lines = VCFUtils.smartMergeHeaders(headers, null);
|
||||||
Assert.assertEquals(lines.size(), VCFHeaderUnitTest.VCF4headerStrings.length);
|
Assert.assertEquals(lines.size(), VCFHeaderUnitTest.VCF4headerStringCount);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -1,5 +1,6 @@
|
||||||
package org.broadinstitute.sting.utils.genotype.vcf;
|
package org.broadinstitute.sting.utils.genotype.vcf;
|
||||||
|
|
||||||
|
import org.broad.tribble.readers.AsciiLineReader;
|
||||||
import org.broad.tribble.vcf.*;
|
import org.broad.tribble.vcf.*;
|
||||||
import org.testng.Assert;
|
import org.testng.Assert;
|
||||||
import org.broadinstitute.sting.BaseTest;
|
import org.broadinstitute.sting.BaseTest;
|
||||||
|
|
@ -9,6 +10,7 @@ import org.testng.annotations.Test;
|
||||||
import java.io.File;
|
import java.io.File;
|
||||||
import java.io.IOException;
|
import java.io.IOException;
|
||||||
import java.io.PrintWriter;
|
import java.io.PrintWriter;
|
||||||
|
import java.io.StringBufferInputStream;
|
||||||
import java.util.ArrayList;
|
import java.util.ArrayList;
|
||||||
import java.util.List;
|
import java.util.List;
|
||||||
|
|
||||||
|
|
@ -21,13 +23,10 @@ import java.util.List;
|
||||||
*/
|
*/
|
||||||
public class VCFHeaderUnitTest extends BaseTest {
|
public class VCFHeaderUnitTest extends BaseTest {
|
||||||
|
|
||||||
private VCFHeader createHeader(String[] headerStr) {
|
private VCFHeader createHeader(String headerStr) {
|
||||||
VCFCodec codec = new VCFCodec();
|
VCFCodec codec = new VCFCodec();
|
||||||
List<String> headerFields = new ArrayList<String>();
|
VCFHeader header = (VCFHeader)codec.readHeader(new AsciiLineReader(new StringBufferInputStream(headerStr)));
|
||||||
for (String str : headerStr)
|
Assert.assertEquals(header.getMetaData().size(), VCF4headerStringCount);
|
||||||
headerFields.add(str);
|
|
||||||
VCFHeader header = (VCFHeader)codec.createHeader(headerFields,"#CHROM\tPOS\tID\tREF\tALT\tQUAL\tFILTER\tINFO");
|
|
||||||
Assert.assertEquals(header.getMetaData().size(), headerStr.length /* for the # line */);
|
|
||||||
return header;
|
return header;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
@ -39,7 +38,7 @@ public class VCFHeaderUnitTest extends BaseTest {
|
||||||
|
|
||||||
@Test
|
@Test
|
||||||
public void testVCF4ToVCF4_alternate() {
|
public void testVCF4ToVCF4_alternate() {
|
||||||
VCFHeader header = createHeader(VCF4headerStrings_with_negitiveOne);
|
VCFHeader header = createHeader(VCF4headerStrings_with_negativeOne);
|
||||||
checkMD5ofHeaderFile(header, "ad8c4cf85e868b0261ab49ee2c613088");
|
checkMD5ofHeaderFile(header, "ad8c4cf85e868b0261ab49ee2c613088");
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
@ -59,41 +58,45 @@ public class VCFHeaderUnitTest extends BaseTest {
|
||||||
Assert.assertTrue(md5sum.equals(md5SumFile(myTempFile)));
|
Assert.assertTrue(md5sum.equals(md5SumFile(myTempFile)));
|
||||||
}
|
}
|
||||||
|
|
||||||
public static String[] VCF4headerStrings = {
|
public static int VCF4headerStringCount = 16;
|
||||||
"##fileformat=VCFv4.0",
|
|
||||||
"##filedate=2010-06-21",
|
public static String VCF4headerStrings =
|
||||||
"##reference=NCBI36",
|
"##fileformat=VCFv4.0\n"+
|
||||||
"##INFO=<ID=GC, Number=0, Type=Flag, Description=\"Overlap with Gencode CCDS coding sequence\">",
|
"##filedate=2010-06-21\n"+
|
||||||
"##INFO=<ID=DP, Number=1, Type=Integer, Description=\"Total number of reads in haplotype window\">",
|
"##reference=NCBI36\n"+
|
||||||
"##INFO=<ID=AF, Number=1, Type=Float, Description=\"Dindel estimated population allele frequency\">",
|
"##INFO=<ID=GC, Number=0, Type=Flag, Description=\"Overlap with Gencode CCDS coding sequence\">\n"+
|
||||||
"##INFO=<ID=CA, Number=1, Type=String, Description=\"Pilot 1 callability mask\">",
|
"##INFO=<ID=DP, Number=1, Type=Integer, Description=\"Total number of reads in haplotype window\">\n"+
|
||||||
"##INFO=<ID=HP, Number=1, Type=Integer, Description=\"Reference homopolymer tract length\">",
|
"##INFO=<ID=AF, Number=1, Type=Float, Description=\"Dindel estimated population allele frequency\">\n"+
|
||||||
"##INFO=<ID=NS, Number=1, Type=Integer, Description=\"Number of samples with data\">",
|
"##INFO=<ID=CA, Number=1, Type=String, Description=\"Pilot 1 callability mask\">\n"+
|
||||||
"##INFO=<ID=DB, Number=0, Type=Flag, Description=\"dbSNP membership build 129 - type match and indel sequence length match within 25 bp\">",
|
"##INFO=<ID=HP, Number=1, Type=Integer, Description=\"Reference homopolymer tract length\">\n"+
|
||||||
"##INFO=<ID=NR, Number=1, Type=Integer, Description=\"Number of reads covering non-ref variant on reverse strand\">",
|
"##INFO=<ID=NS, Number=1, Type=Integer, Description=\"Number of samples with data\">\n"+
|
||||||
"##INFO=<ID=NF, Number=1, Type=Integer, Description=\"Number of reads covering non-ref variant on forward strand\">",
|
"##INFO=<ID=DB, Number=0, Type=Flag, Description=\"dbSNP membership build 129 - type match and indel sequence length match within 25 bp\">\n"+
|
||||||
"##FILTER=<ID=NoQCALL, Description=\"Variant called by Dindel but not confirmed by QCALL\">",
|
"##INFO=<ID=NR, Number=1, Type=Integer, Description=\"Number of reads covering non-ref variant on reverse strand\">\n"+
|
||||||
"##FORMAT=<ID=GT, Number=1, Type=String, Description=\"Genotype\">",
|
"##INFO=<ID=NF, Number=1, Type=Integer, Description=\"Number of reads covering non-ref variant on forward strand\">\n"+
|
||||||
"##FORMAT=<ID=HQ, Number=2, Type=Integer, Description=\"Haplotype quality\">",
|
"##FILTER=<ID=NoQCALL, Description=\"Variant called by Dindel but not confirmed by QCALL\">\n"+
|
||||||
"##FORMAT=<ID=GQ, Number=1, Type=Integer, Description=\"Genotype quality\">",
|
"##FORMAT=<ID=GT, Number=1, Type=String, Description=\"Genotype\">\n"+
|
||||||
};
|
"##FORMAT=<ID=HQ, Number=2, Type=Integer, Description=\"Haplotype quality\">\n"+
|
||||||
|
"##FORMAT=<ID=GQ, Number=1, Type=Integer, Description=\"Genotype quality\">\n"+
|
||||||
|
"#CHROM\tPOS\tID\tREF\tALT\tQUAL\tFILTER\tINFO\n";
|
||||||
|
|
||||||
|
|
||||||
|
public static String VCF4headerStrings_with_negativeOne =
|
||||||
|
"##fileformat=VCFv4.0\n"+
|
||||||
|
"##filedate=2010-06-21\n"+
|
||||||
|
"##reference=NCBI36\n"+
|
||||||
|
"##INFO=<ID=GC, Number=0, Type=Flag, Description=\"Overlap with Gencode CCDS coding sequence\">\n"+
|
||||||
|
"##INFO=<ID=YY, Number=., Type=Integer, Description=\"Some weird value that has lots of parameters\">\n"+
|
||||||
|
"##INFO=<ID=AF, Number=1, Type=Float, Description=\"Dindel estimated population allele frequency\">\n"+
|
||||||
|
"##INFO=<ID=CA, Number=1, Type=String, Description=\"Pilot 1 callability mask\">\n"+
|
||||||
|
"##INFO=<ID=HP, Number=1, Type=Integer, Description=\"Reference homopolymer tract length\">\n"+
|
||||||
|
"##INFO=<ID=NS, Number=1, Type=Integer, Description=\"Number of samples with data\">\n"+
|
||||||
|
"##INFO=<ID=DB, Number=0, Type=Flag, Description=\"dbSNP membership build 129 - type match and indel sequence length match within 25 bp\">\n"+
|
||||||
|
"##INFO=<ID=NR, Number=1, Type=Integer, Description=\"Number of reads covering non-ref variant on reverse strand\">\n"+
|
||||||
|
"##INFO=<ID=NF, Number=1, Type=Integer, Description=\"Number of reads covering non-ref variant on forward strand\">\n"+
|
||||||
|
"##FILTER=<ID=NoQCALL, Description=\"Variant called by Dindel but not confirmed by QCALL\">\n"+
|
||||||
|
"##FORMAT=<ID=GT, Number=1, Type=String, Description=\"Genotype\">\n"+
|
||||||
|
"##FORMAT=<ID=HQ, Number=2, Type=Integer, Description=\"Haplotype quality\">\n"+
|
||||||
|
"##FORMAT=<ID=TT, Number=., Type=Integer, Description=\"Lots of TTs\">\n"+
|
||||||
|
"#CHROM\tPOS\tID\tREF\tALT\tQUAL\tFILTER\tINFO\n";
|
||||||
|
|
||||||
public static String[] VCF4headerStrings_with_negitiveOne = {
|
|
||||||
"##fileformat=VCFv4.0",
|
|
||||||
"##filedate=2010-06-21",
|
|
||||||
"##reference=NCBI36",
|
|
||||||
"##INFO=<ID=GC, Number=0, Type=Flag, Description=\"Overlap with Gencode CCDS coding sequence\">",
|
|
||||||
"##INFO=<ID=YY, Number=., Type=Integer, Description=\"Some weird value that has lots of parameters\">",
|
|
||||||
"##INFO=<ID=AF, Number=1, Type=Float, Description=\"Dindel estimated population allele frequency\">",
|
|
||||||
"##INFO=<ID=CA, Number=1, Type=String, Description=\"Pilot 1 callability mask\">",
|
|
||||||
"##INFO=<ID=HP, Number=1, Type=Integer, Description=\"Reference homopolymer tract length\">",
|
|
||||||
"##INFO=<ID=NS, Number=1, Type=Integer, Description=\"Number of samples with data\">",
|
|
||||||
"##INFO=<ID=DB, Number=0, Type=Flag, Description=\"dbSNP membership build 129 - type match and indel sequence length match within 25 bp\">",
|
|
||||||
"##INFO=<ID=NR, Number=1, Type=Integer, Description=\"Number of reads covering non-ref variant on reverse strand\">",
|
|
||||||
"##INFO=<ID=NF, Number=1, Type=Integer, Description=\"Number of reads covering non-ref variant on forward strand\">",
|
|
||||||
"##FILTER=<ID=NoQCALL, Description=\"Variant called by Dindel but not confirmed by QCALL\">",
|
|
||||||
"##FORMAT=<ID=GT, Number=1, Type=String, Description=\"Genotype\">",
|
|
||||||
"##FORMAT=<ID=HQ, Number=2, Type=Integer, Description=\"Haplotype quality\">",
|
|
||||||
"##FORMAT=<ID=TT, Number=., Type=Integer, Description=\"Lots of TTs\">",
|
|
||||||
};
|
|
||||||
}
|
}
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue