From ae6c014884757239fe8aa53ad05ef60013514862 Mon Sep 17 00:00:00 2001 From: ebanks Date: Tue, 25 May 2010 21:03:45 +0000 Subject: [PATCH] Fixed UG parallelization bug. Better integration test to catch this in the future. git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@3432 348d0f76-0448-11de-a6fe-93d51630548a --- .../io/storage/GenotypeWriterStorage.java | 5 +--- .../utils/genotype/GenotypeWriterFactory.java | 4 ++++ .../vcf/VCFGenotypeWriterAdapter.java | 24 +++++++++++-------- .../UnifiedGenotyperIntegrationTest.java | 18 ++++++++++---- 4 files changed, 32 insertions(+), 19 deletions(-) diff --git a/java/src/org/broadinstitute/sting/gatk/io/storage/GenotypeWriterStorage.java b/java/src/org/broadinstitute/sting/gatk/io/storage/GenotypeWriterStorage.java index 145169d09..0cab18e38 100755 --- a/java/src/org/broadinstitute/sting/gatk/io/storage/GenotypeWriterStorage.java +++ b/java/src/org/broadinstitute/sting/gatk/io/storage/GenotypeWriterStorage.java @@ -27,9 +27,7 @@ package org.broadinstitute.sting.gatk.io.storage; import java.io.*; import java.util.Set; -import java.util.HashSet; -import org.broad.tribble.vcf.VCFHeaderLine; import org.broadinstitute.sting.gatk.io.stubs.GenotypeWriterStub; import org.broadinstitute.sting.gatk.contexts.variantcontext.VariantContext; import org.broadinstitute.sting.utils.genotype.*; @@ -73,8 +71,7 @@ public abstract class GenotypeWriterStorage implements this.stream = null; writer = GenotypeWriterFactory.create(stub.getFormat(), file); Set samples = SampleUtils.getSAMFileSamples(stub.getSAMFileHeader()); - // TODO: this line is a problem, creating with an empty hashset eliminates any genotype FORMAT fields in the calls (besides GT) - GenotypeWriterFactory.writeHeader(writer, stub.getSAMFileHeader(), samples, new HashSet()); + GenotypeWriterFactory.writeHeader(writer, stub.getSAMFileHeader(), samples, null); } public void addCall(VariantContext vc, String refAllele) { diff --git a/java/src/org/broadinstitute/sting/utils/genotype/GenotypeWriterFactory.java b/java/src/org/broadinstitute/sting/utils/genotype/GenotypeWriterFactory.java index d20d32a02..501b92d2e 100644 --- a/java/src/org/broadinstitute/sting/utils/genotype/GenotypeWriterFactory.java +++ b/java/src/org/broadinstitute/sting/utils/genotype/GenotypeWriterFactory.java @@ -2,6 +2,7 @@ package org.broadinstitute.sting.utils.genotype; import net.sf.samtools.SAMFileHeader; import org.broad.tribble.vcf.VCFHeaderLine; +import org.broad.tribble.vcf.VCFGenotypeRecord; import org.broadinstitute.sting.utils.StingException; import org.broadinstitute.sting.utils.genotype.geli.*; import org.broadinstitute.sting.utils.genotype.glf.*; @@ -10,6 +11,7 @@ import org.broadinstitute.sting.utils.genotype.vcf.*; import java.io.File; import java.io.PrintStream; import java.util.Set; +import java.util.HashSet; /** @@ -65,6 +67,8 @@ public class GenotypeWriterFactory { Set headerInfo) { // VCF if ( writer instanceof VCFGenotypeWriter ) { + if ( headerInfo == null ) + headerInfo = new HashSet(VCFGenotypeRecord.getSupportedHeaderStrings()); ((VCFGenotypeWriter)writer).writeHeader(sampleNames, headerInfo); } // GELI diff --git a/java/src/org/broadinstitute/sting/utils/genotype/vcf/VCFGenotypeWriterAdapter.java b/java/src/org/broadinstitute/sting/utils/genotype/vcf/VCFGenotypeWriterAdapter.java index dbc81bba7..cbd9cdae2 100644 --- a/java/src/org/broadinstitute/sting/utils/genotype/vcf/VCFGenotypeWriterAdapter.java +++ b/java/src/org/broadinstitute/sting/utils/genotype/vcf/VCFGenotypeWriterAdapter.java @@ -35,7 +35,7 @@ public class VCFGenotypeWriterAdapter implements VCFGenotypeWriter { private VALIDATION_STRINGENCY validationStringency = VALIDATION_STRINGENCY.STRICT; // allowed genotype format strings - private List allowedGenotypeFormatStrings; + private List allowedGenotypeFormatStrings = null; public VCFGenotypeWriterAdapter(File writeTo) { if (writeTo == null) throw new RuntimeException("VCF output file must not be null"); @@ -59,18 +59,22 @@ public class VCFGenotypeWriterAdapter implements VCFGenotypeWriter { // set up the header fields Set hInfo = new TreeSet(); hInfo.add(new VCFHeaderLine(VCFHeader.FILE_FORMAT_KEY, VCFHeader.VCF_VERSION)); - hInfo.addAll(headerInfo); - + + // set up the allowed genotype format fields + if ( headerInfo != null ) { + for ( VCFHeaderLine field : headerInfo ) { + hInfo.add(field); + if ( field instanceof VCFFormatHeaderLine) { + if ( allowedGenotypeFormatStrings == null ) + allowedGenotypeFormatStrings = new ArrayList(); + allowedGenotypeFormatStrings.add(((VCFFormatHeaderLine)field).getName()); + } + } + } + // set up the sample names mHeader = new VCFHeader(hInfo, mSampleNames); mWriter.writeHeader(mHeader); - - // set up the allowed genotype format fields - allowedGenotypeFormatStrings = new ArrayList(); - for ( VCFHeaderLine field : headerInfo ) { - if ( field instanceof VCFFormatHeaderLine) - allowedGenotypeFormatStrings.add(((VCFFormatHeaderLine)field).getName()); - } } /** finish writing, closing any open files. */ diff --git a/java/test/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyperIntegrationTest.java b/java/test/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyperIntegrationTest.java index 918ff0f39..e6ba6b44a 100755 --- a/java/test/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyperIntegrationTest.java +++ b/java/test/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyperIntegrationTest.java @@ -57,15 +57,23 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest { // -------------------------------------------------------------------------------------------------------------- // - // testing joint estimation model + // testing parallelization // // -------------------------------------------------------------------------------------------------------------- + @Test public void testParallelization() { - WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec( - "-T UnifiedGenotyper -R " + oneKGLocation + "reference/human_b36_both.fasta -I " + validationDataLocation + "NA12878.1kg.p2.chr1_10mb_11_mb.SLX.bam -varout %s -L 1:10,000,000-10,400,000 -nt 4", 1, - Arrays.asList("60bf51595eda646f5ccf0b0bf5db95d8")); - executeTest("test parallelization", spec); + String md5 = "638911a5d7b155076afc79a3d3f50548"; + + WalkerTest.WalkerTestSpec spec1 = new WalkerTest.WalkerTestSpec( + "-T UnifiedGenotyper -R " + oneKGLocation + "reference/human_b36_both.fasta -I " + validationDataLocation + "NA12878.1kg.p2.chr1_10mb_11_mb.SLX.bam -varout %s -L 1:10,000,000-10,075,000", 1, + Arrays.asList(md5)); + executeTest("test parallelization (single thread)", spec1); + + WalkerTest.WalkerTestSpec spec2 = new WalkerTest.WalkerTestSpec( + "-T UnifiedGenotyper -R " + oneKGLocation + "reference/human_b36_both.fasta -I " + validationDataLocation + "NA12878.1kg.p2.chr1_10mb_11_mb.SLX.bam -varout %s -L 1:10,000,000-10,075,000 -nt 2", 1, + Arrays.asList(md5)); + executeTest("test parallelization (multithread)", spec2); } // --------------------------------------------------------------------------------------------------------------