adding changes for VCF 4, mostly in the way we handle VCF headers. The header fields are now aware of the differences between different VCF formats. There was also a bunch of clean-up of out-of-spec VCF used in the tests (mismatched VCF file format fields, etc), and updates to the associated integration tests. Also some logging statements for BTI.

git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@3584 348d0f76-0448-11de-a6fe-93d51630548a
This commit is contained in:
aaron 2010-06-18 08:23:23 +00:00
parent 32f6781ac7
commit b978d5946b
12 changed files with 38 additions and 41 deletions

View File

@ -252,6 +252,7 @@ public class GenomeAnalysisEngine {
for (String str : rodNames.keySet())
if (str.equals(rodName)) {
logger.info("Adding interval list from track (ROD) named " + rodName);
RMDIntervalGenerator intervalGenerator = new RMDIntervalGenerator(rodNames.get(str).getReferenceOrderedData());
ret.addAll(intervalGenerator.toGenomeLocList());
}

View File

@ -25,10 +25,7 @@
package org.broadinstitute.sting.gatk.walkers.genotyper;
import org.broad.tribble.vcf.VCFGenotypeRecord;
import org.broad.tribble.vcf.VCFHeaderLine;
import org.broad.tribble.vcf.VCFInfoHeaderLine;
import org.broad.tribble.vcf.VCFRecord;
import org.broad.tribble.vcf.*;
import org.broadinstitute.sting.gatk.GenomeAnalysisEngine;
import org.broadinstitute.sting.gatk.contexts.*;
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
@ -156,7 +153,7 @@ public class UnifiedGenotyper extends LocusWalker<VariantCallContext, UnifiedGen
headerInfo.add(new VCFInfoHeaderLine(VCFRecord.STRAND_BIAS_KEY, 1, VCFInfoHeaderLine.INFO_TYPE.Float, "Strand Bias"));
// FORMAT and INFO fields
headerInfo.addAll(VCFGenotypeRecord.getSupportedHeaderStrings());
headerInfo.addAll(VCFGenotypeRecord.getSupportedHeaderStrings(VCFHeaderVersion.VCF3_3));
// all of the arguments from the argument collection
Set<Object> args = new HashSet<Object>();

View File

@ -3,6 +3,7 @@ package org.broadinstitute.sting.utils.genotype;
import net.sf.samtools.SAMFileHeader;
import org.broad.tribble.vcf.VCFHeaderLine;
import org.broad.tribble.vcf.VCFGenotypeRecord;
import org.broad.tribble.vcf.VCFHeaderVersion;
import org.broadinstitute.sting.utils.StingException;
import org.broadinstitute.sting.utils.genotype.geli.*;
import org.broadinstitute.sting.utils.genotype.glf.*;
@ -68,7 +69,7 @@ public class GenotypeWriterFactory {
// VCF
if ( writer instanceof VCFGenotypeWriter ) {
if ( headerInfo == null )
headerInfo = new HashSet<VCFHeaderLine>(VCFGenotypeRecord.getSupportedHeaderStrings());
headerInfo = new HashSet<VCFHeaderLine>(VCFGenotypeRecord.getSupportedHeaderStrings(VCFHeaderVersion.VCF3_3));
((VCFGenotypeWriter)writer).writeHeader(sampleNames, headerInfo);
}
// GELI

View File

@ -1,9 +1,6 @@
package org.broadinstitute.sting.utils.genotype.vcf;
import org.broad.tribble.vcf.VCFFormatHeaderLine;
import org.broad.tribble.vcf.VCFHeader;
import org.broad.tribble.vcf.VCFHeaderLine;
import org.broad.tribble.vcf.VCFRecord;
import org.broad.tribble.vcf.*;
import org.broadinstitute.sting.gatk.contexts.variantcontext.VariantContext;
import org.broadinstitute.sting.gatk.refdata.VariantContextAdaptors;
import org.apache.log4j.Logger;
@ -57,7 +54,7 @@ public class VCFGenotypeWriterAdapter implements VCFGenotypeWriter {
// set up the header fields
Set<VCFHeaderLine> hInfo = new TreeSet<VCFHeaderLine>();
hInfo.add(new VCFHeaderLine(VCFHeader.FILE_FORMAT_KEY, VCFHeader.VCF_VERSION));
hInfo.add(new VCFHeaderLine(VCFHeaderVersion.VCF3_3.getFormatString(), VCFHeaderVersion.VCF3_3.getVersionString()));
// set up the allowed genotype format fields
if ( headerInfo != null ) {

View File

@ -3,6 +3,7 @@ package org.broadinstitute.sting.utils.genotype.vcf;
import org.broad.tribble.vcf.VCFHeader;
import org.broad.tribble.vcf.VCFHeaderLine;
import org.broad.tribble.vcf.VCFHeaderVersion;
import org.broad.tribble.vcf.VCFRecord;
import java.io.*;
@ -52,11 +53,11 @@ public class VCFWriter {
// the fileformat field needs to be written first
TreeSet<VCFHeaderLine> nonFormatMetaData = new TreeSet<VCFHeaderLine>();
for ( VCFHeaderLine line : header.getMetaData() ) {
if ( line.getKey().equals(VCFHeader.FILE_FORMAT_KEY) ) {
if ( line.getKey().equals(VCFHeaderVersion.VCF3_3.getFormatString()) ) {
mWriter.write(VCFHeader.METADATA_INDICATOR + line.toString() + "\n");
}
else if ( line.getKey().equals(VCFHeader.OLD_FILE_FORMAT_KEY) ) {
mWriter.write(VCFHeader.METADATA_INDICATOR + VCFHeader.FILE_FORMAT_KEY + line.toString().substring(VCFHeader.OLD_FILE_FORMAT_KEY.length()) + "\n");
else if ( line.getKey().equals(VCFHeaderVersion.VCF3_2.getFormatString()) ) {
mWriter.write(VCFHeader.METADATA_INDICATOR + VCFHeaderVersion.VCF3_2.getFormatString() + "=" + VCFHeaderVersion.VCF3_2.getVersionString() + "\n");
} else {
nonFormatMetaData.add(line);
}

View File

@ -70,7 +70,7 @@ public class VariantsToVCFIntegrationTest extends WalkerTest {
@Test
public void testGenotypesToVCFUsingVCFInput() {
List<String> md5 = new ArrayList<String>();
md5.add("92d661a3789e55078197666eb9ee7020");
md5.add("3f920c6a443764b183e4765b4e4d00b0");
WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(
"-R " + oneKGLocation + "reference/human_b36_both.fasta" +

View File

@ -63,7 +63,7 @@ public class SecondBaseSkewIntegrationTest extends WalkerTest {
+ " -R " + seqLocation + "references/Homo_sapiens_assembly18/v0/Homo_sapiens_assembly18.fasta -A SecondBaseSkew"
+ " -sample variant -B variant,VCF," + validationDataLocation + "FHS_pileup_test_chr15.vcf"
+ " -o %s -L chr15:46347148";
String expected_md5 = "465f92e689110afeb308c201cb6e8c5a";
String expected_md5 = "f4aff94a713fcf4dc5750d6a6a884ad3";
WalkerTestSpec spec = new WalkerTestSpec(test_args,1,Arrays.asList(expected_md5));
executeTest("Testing on locus with many indels", spec);
}

View File

@ -30,7 +30,7 @@ public class VariantAnnotatorIntegrationTest extends WalkerTest {
case 2: return "-L chr22:20660081-20660083 -L chr22:29198100-29198104 -L chr22:29821330-29821334";
default: throw new StingException("Impossible test has been run: secondbasetest number "+testNo);
}
}
}
public static String secondBaseTestmd5( int testNo ) {
switch ( testNo ) {
@ -50,7 +50,7 @@ public class VariantAnnotatorIntegrationTest extends WalkerTest {
public void testHasAnnotsNotAsking1() {
WalkerTestSpec spec = new WalkerTestSpec(
baseTestString() + " -B variant,VCF," + validationDataLocation + "vcfexample2.vcf -I " + validationDataLocation + "low_coverage_CEU.chr1.10k-11k.bam -L 1:10,020,000-10,021,000", 1,
Arrays.asList("5a91f6b50fc136d7d3f8735dbc64defe"));
Arrays.asList("92369e62b3bade625992bc1741e9367a"));
executeTest("test file has annotations, not asking for annotations, #1", spec);
}
@ -58,7 +58,7 @@ public class VariantAnnotatorIntegrationTest extends WalkerTest {
public void testHasAnnotsNotAsking2() {
WalkerTestSpec spec = new WalkerTestSpec(
baseTestString() + " -B variant,VCF," + validationDataLocation + "vcfexample3.vcf -I " + validationDataLocation + "NA12878.1kg.p2.chr1_10mb_11_mb.SLX.bam -L 1:10,000,000-10,050,000", 1,
Arrays.asList("0a561b161a06e68b88417ff5fe365871"));
Arrays.asList("1a394cdace4ff7698b603c2252ee9365"));
executeTest("test file has annotations, not asking for annotations, #2", spec);
}
@ -66,7 +66,7 @@ public class VariantAnnotatorIntegrationTest extends WalkerTest {
public void testHasAnnotsAsking1() {
WalkerTestSpec spec = new WalkerTestSpec(
baseTestString() + " -G \"Standard\" -B variant,VCF," + validationDataLocation + "vcfexample2.vcf -I " + validationDataLocation + "low_coverage_CEU.chr1.10k-11k.bam -L 1:10,020,000-10,021,000", 1,
Arrays.asList("dff7c888fbd142b6eff27c3233c1292d"));
Arrays.asList("5a8c9c18da89b052e7f3797cd391bff6"));
executeTest("test file has annotations, asking for annotations, #1", spec);
}
@ -74,7 +74,7 @@ public class VariantAnnotatorIntegrationTest extends WalkerTest {
public void testHasAnnotsAsking2() {
WalkerTestSpec spec = new WalkerTestSpec(
baseTestString() + " -G \"Standard\" -B variant,VCF," + validationDataLocation + "vcfexample3.vcf -I " + validationDataLocation + "NA12878.1kg.p2.chr1_10mb_11_mb.SLX.bam -L 1:10,000,000-10,050,000", 1,
Arrays.asList("45c9d5136900d9ab347f489d6f442bb4"));
Arrays.asList("0c8607c69d761bd058b944d1e98708b6"));
executeTest("test file has annotations, asking for annotations, #2", spec);
}
@ -82,7 +82,7 @@ public class VariantAnnotatorIntegrationTest extends WalkerTest {
public void testNoAnnotsNotAsking1() {
WalkerTestSpec spec = new WalkerTestSpec(
baseTestString() + " -B variant,VCF," + validationDataLocation + "vcfexample2empty.vcf -I " + validationDataLocation + "low_coverage_CEU.chr1.10k-11k.bam -L 1:10,020,000-10,021,000", 1,
Arrays.asList("16e578597eed68609268c00886f2842a"));
Arrays.asList("006642aabd41e806d3ea64bf5441f1d2"));
executeTest("test file doesn't have annotations, not asking for annotations, #1", spec);
}
@ -90,7 +90,7 @@ public class VariantAnnotatorIntegrationTest extends WalkerTest {
public void testNoAnnotsNotAsking2() {
WalkerTestSpec spec = new WalkerTestSpec(
baseTestString() + " -B variant,VCF," + validationDataLocation + "vcfexample3empty.vcf -I " + validationDataLocation + "NA12878.1kg.p2.chr1_10mb_11_mb.SLX.bam -L 1:10,000,000-10,050,000", 1,
Arrays.asList("0e947087c44479faefe616f6b6f7d272"));
Arrays.asList("c66ed83d2a981c43546d05120809ed57"));
executeTest("test file doesn't have annotations, not asking for annotations, #2", spec);
}
@ -98,7 +98,7 @@ public class VariantAnnotatorIntegrationTest extends WalkerTest {
public void testNoAnnotsAsking1() {
WalkerTestSpec spec = new WalkerTestSpec(
baseTestString() + " -G \"Standard\" -B variant,VCF," + validationDataLocation + "vcfexample2empty.vcf -I " + validationDataLocation + "low_coverage_CEU.chr1.10k-11k.bam -L 1:10,020,000-10,021,000", 1,
Arrays.asList("7c13af3226a92bfc3826c37b30f179e0"));
Arrays.asList("3b570e13bd0e1cb5750da268178ce656"));
executeTest("test file doesn't have annotations, asking for annotations, #1", spec);
}
@ -106,7 +106,7 @@ public class VariantAnnotatorIntegrationTest extends WalkerTest {
public void testNoAnnotsAsking2() {
WalkerTestSpec spec = new WalkerTestSpec(
baseTestString() + " -G \"Standard\" -B variant,VCF," + validationDataLocation + "vcfexample3empty.vcf -I " + validationDataLocation + "NA12878.1kg.p2.chr1_10mb_11_mb.SLX.bam -L 1:10,000,000-10,050,000", 1,
Arrays.asList("369ad538eacdc0943b58a807d92d823c"));
Arrays.asList("42e9c57cd5d266fd7837354fc8495176"));
executeTest("test file doesn't have annotations, asking for annotations, #2", spec);
}
@ -114,7 +114,7 @@ public class VariantAnnotatorIntegrationTest extends WalkerTest {
public void testNoReads() {
WalkerTestSpec spec = new WalkerTestSpec(
baseTestString() + " -G \"Standard\" -B variant,VCF," + validationDataLocation + "vcfexample3empty.vcf -BTI variant", 1,
Arrays.asList("07af9983127c62e96accc03db2fb523e"));
Arrays.asList("374da32e1b9540bfe8ee6ea5b896babd"));
executeTest("not passing it any reads", spec);
}
@ -122,7 +122,7 @@ public class VariantAnnotatorIntegrationTest extends WalkerTest {
public void testDBTag() {
WalkerTestSpec spec = new WalkerTestSpec(
baseTestString() + " -D " + GATKDataLocation + "dbsnp_129_b36.rod -G \"Standard\" -B variant,VCF," + validationDataLocation + "vcfexample3empty.vcf -BTI variant", 1,
Arrays.asList("286887826ab99bb2864f7f4db195e59e"));
Arrays.asList("05a9dcb87efc65e5c726c201460192d9"));
executeTest("getting DB tag", spec);
}
}

View File

@ -14,7 +14,7 @@ public class CallsetConcordanceIntegrationTest extends WalkerTest {
public void testSimpleVenn() {
WalkerTestSpec spec = new WalkerTestSpec(
baseTestString() + " -B set1,VCF," + validationDataLocation + "NA12878.example1.vcf -B set2,VCF," + validationDataLocation + "NA12878.example2.vcf -CT SimpleVenn", 1,
Arrays.asList("d9124d2b0fb5bec5bc50c26a16b4e900"));
Arrays.asList("a1970effe9c51923d52af9034e778de4"));
executeTest("testSimpleVenn", spec);
}
@ -22,7 +22,7 @@ public class CallsetConcordanceIntegrationTest extends WalkerTest {
public void testSNPConcordance() {
WalkerTestSpec spec = new WalkerTestSpec(
baseTestString() + " -B set1,VCF," + validationDataLocation + "NA12878.example1.vcf -B set2,VCF," + validationDataLocation + "NA12878.example2.vcf -CT SNPGenotypeConcordance:qscore=5", 1,
Arrays.asList("df1fbc744947f316f65f51a21368b0e4"));
Arrays.asList("e7a0d52c266ba3c76283111674c7168f"));
executeTest("testSNPConcordance", spec);
}
@ -30,7 +30,7 @@ public class CallsetConcordanceIntegrationTest extends WalkerTest {
public void testNWayVenn() {
WalkerTestSpec spec = new WalkerTestSpec(
baseTestString() + " -B set1,VCF," + validationDataLocation + "NA12878.example1.vcf -B set2,VCF," + validationDataLocation + "NA12878.example2.vcf -B set3,VCF," + validationDataLocation + "CEU.sample.vcf -CT NWayVenn", 1,
Arrays.asList("aa835ae5368b35f376b844d7f8ef2976"));
Arrays.asList("e65fc811137fca7d6c32125240c7468f"));
executeTest("testNWayVenn", spec);
}
@ -38,7 +38,7 @@ public class CallsetConcordanceIntegrationTest extends WalkerTest {
public void testMulti() {
WalkerTestSpec spec = new WalkerTestSpec(
baseTestString() + " -B set1,VCF," + validationDataLocation + "NA12878.example1.vcf -B set2,VCF," + validationDataLocation + "NA12878.example2.vcf -CT SimpleVenn -CT NWayVenn -CT SNPGenotypeConcordance:qscore=5", 1,
Arrays.asList("c9ef68cc3b7dc08f1d2b49170e6560ab"));
Arrays.asList("ddc2507590e28743e9cb4b132cb066e7"));
executeTest("testMulti", spec);
}
@ -46,7 +46,7 @@ public class CallsetConcordanceIntegrationTest extends WalkerTest {
public void testComplex() {
WalkerTestSpec spec = new WalkerTestSpec(
baseTestString() + " -B set1,VCF," + validationDataLocation + "complexExample.vcf -B set2,VCF," + validationDataLocation + "complexExample.vcf -CT NWayVenn", 1,
Arrays.asList("7bc72ec5f8b0fda5d59ebd2526b53e48"));
Arrays.asList("250df7bde7a8cf9c7ee7c5704183ea88"));
executeTest("testComplex", spec);
}
}

View File

@ -16,7 +16,7 @@ public class VariantFiltrationIntegrationTest extends WalkerTest {
public void testNoAction() {
WalkerTestSpec spec = new WalkerTestSpec(
baseTestString() + " -B variant,VCF," + validationDataLocation + "vcfexample2.vcf -L 1:10,020,000-10,021,000", 1,
Arrays.asList("af14c7d03e3516b61f2702c9e4a7780f"));
Arrays.asList("d2f0a3c6e598d9054de9ea7bd914a12d"));
executeTest("test no action", spec);
}
@ -24,7 +24,7 @@ public class VariantFiltrationIntegrationTest extends WalkerTest {
public void testClusteredSnps() {
WalkerTestSpec spec = new WalkerTestSpec(
baseTestString() + " -window 10 -B variant,VCF," + validationDataLocation + "vcfexample2.vcf -L 1:10,020,000-10,021,000", 1,
Arrays.asList("b2b18929289bb47f07bcc23d4cec94c4"));
Arrays.asList("4dad2ddfb74785c5c80c2a73709e543a"));
executeTest("test clustered SNPs", spec);
}
@ -32,7 +32,7 @@ public class VariantFiltrationIntegrationTest extends WalkerTest {
public void testMask() {
WalkerTestSpec spec = new WalkerTestSpec(
baseTestString() + " -mask foo -B mask,VCF," + validationDataLocation + "vcfexample2.vcf -B variant,VCF," + validationDataLocation + "vcfexample2.vcf -L 1:10,020,000-10,021,000", 1,
Arrays.asList("af048053eaef84fc4d61c51c50be1e0a"));
Arrays.asList("70fc964e73fda55ed3f2c0b0ae4bcec1"));
executeTest("test mask", spec);
}
@ -40,7 +40,7 @@ public class VariantFiltrationIntegrationTest extends WalkerTest {
public void testFilter1() {
WalkerTestSpec spec = new WalkerTestSpec(
baseTestString() + " -filter 'DoC < 20 || FisherStrand > 20.0' -filterName foo -B variant,VCF," + validationDataLocation + "vcfexample2.vcf -L 1:10,020,000-10,021,000", 1,
Arrays.asList("43a580d9c684a496f21d0d42939dd910"));
Arrays.asList("bde496530822a81197df31f5ecf98033"));
executeTest("test filter #1", spec);
}
@ -48,7 +48,7 @@ public class VariantFiltrationIntegrationTest extends WalkerTest {
public void testFilter2() {
WalkerTestSpec spec = new WalkerTestSpec(
baseTestString() + " -filter 'AlleleBalance < 70.0 && FisherStrand == 1.4' -filterName bar -B variant,VCF," + validationDataLocation + "vcfexample2.vcf -L 1:10,020,000-10,021,000", 1,
Arrays.asList("2bd1e8975d8105dec2fb6055fbf00569"));
Arrays.asList("750f5076e906901dabec78f804f035fa"));
executeTest("test filter #2", spec);
}
@ -56,7 +56,7 @@ public class VariantFiltrationIntegrationTest extends WalkerTest {
public void testFilterWithSeparateNames() {
WalkerTestSpec spec = new WalkerTestSpec(
baseTestString() + " --filterName ABF -filter 'AlleleBalance < 70.0' --filterName FSF -filter 'FisherStrand == 1.4' -B variant,VCF," + validationDataLocation + "vcfexample2.vcf -L 1:10,020,000-10,021,000", 1,
Arrays.asList("3479158ffd02a45371b5103277a30a53"));
Arrays.asList("7f876c9187948d27c0cd39f5a0395e0e"));
executeTest("test filter with separate names #2", spec);
}
}

View File

@ -39,7 +39,7 @@ public class VariantRecalibrationWalkersIntegrationTest extends WalkerTest {
@Test
public void testVariantRecalibrator() {
HashMap<String, String> e = new HashMap<String, String>();
e.put( validationDataLocation + "yri.trio.gatk_glftrio.intersection.annotated.filtered.chr1.vcf", "412bdb2eb4ca8f7ee9dfb39cda676c95" );
e.put( validationDataLocation + "yri.trio.gatk_glftrio.intersection.annotated.filtered.chr1.vcf", "cf8dac86df97d55f866dd379434bbdc2" );
for ( Map.Entry<String, String> entry : e.entrySet() ) {
String vcf = entry.getKey();

View File

@ -66,8 +66,8 @@ public class VCFWriterUnitTest extends BaseTest {
* @return a fake VCF header
*/
public static VCFHeader createFakeHeader(Set<VCFHeaderLine> metaData, Set<String> additionalColumns) {
metaData.add(new VCFHeaderLine(VCFHeader.FILE_FORMAT_KEY, VCFHeader.VCF_VERSION));
metaData.add(new VCFHeaderLine("two", "2"));
metaData.add(new VCFHeaderLine(VCFHeaderVersion.VCF3_3.getFormatString(), VCFHeaderVersion.VCF3_3.getVersionString(),VCFHeaderVersion.VCF3_3));
metaData.add(new VCFHeaderLine("two", "2",VCFHeaderVersion.VCF3_3));
additionalColumns.add("FORMAT");
additionalColumns.add("extra1");
additionalColumns.add("extra2");