Fixed UG parallelization bug. Better integration test to catch this in the future.

git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@3432 348d0f76-0448-11de-a6fe-93d51630548a
This commit is contained in:
ebanks 2010-05-25 21:03:45 +00:00
parent 434e920da9
commit ae6c014884
4 changed files with 32 additions and 19 deletions

View File

@ -27,9 +27,7 @@ package org.broadinstitute.sting.gatk.io.storage;
import java.io.*;
import java.util.Set;
import java.util.HashSet;
import org.broad.tribble.vcf.VCFHeaderLine;
import org.broadinstitute.sting.gatk.io.stubs.GenotypeWriterStub;
import org.broadinstitute.sting.gatk.contexts.variantcontext.VariantContext;
import org.broadinstitute.sting.utils.genotype.*;
@ -73,8 +71,7 @@ public abstract class GenotypeWriterStorage<T extends GenotypeWriter> implements
this.stream = null;
writer = GenotypeWriterFactory.create(stub.getFormat(), file);
Set<String> samples = SampleUtils.getSAMFileSamples(stub.getSAMFileHeader());
// TODO: this line is a problem, creating with an empty hashset eliminates any genotype FORMAT fields in the calls (besides GT)
GenotypeWriterFactory.writeHeader(writer, stub.getSAMFileHeader(), samples, new HashSet<VCFHeaderLine>());
GenotypeWriterFactory.writeHeader(writer, stub.getSAMFileHeader(), samples, null);
}
public void addCall(VariantContext vc, String refAllele) {

View File

@ -2,6 +2,7 @@ package org.broadinstitute.sting.utils.genotype;
import net.sf.samtools.SAMFileHeader;
import org.broad.tribble.vcf.VCFHeaderLine;
import org.broad.tribble.vcf.VCFGenotypeRecord;
import org.broadinstitute.sting.utils.StingException;
import org.broadinstitute.sting.utils.genotype.geli.*;
import org.broadinstitute.sting.utils.genotype.glf.*;
@ -10,6 +11,7 @@ import org.broadinstitute.sting.utils.genotype.vcf.*;
import java.io.File;
import java.io.PrintStream;
import java.util.Set;
import java.util.HashSet;
/**
@ -65,6 +67,8 @@ public class GenotypeWriterFactory {
Set<VCFHeaderLine> headerInfo) {
// VCF
if ( writer instanceof VCFGenotypeWriter ) {
if ( headerInfo == null )
headerInfo = new HashSet<VCFHeaderLine>(VCFGenotypeRecord.getSupportedHeaderStrings());
((VCFGenotypeWriter)writer).writeHeader(sampleNames, headerInfo);
}
// GELI

View File

@ -35,7 +35,7 @@ public class VCFGenotypeWriterAdapter implements VCFGenotypeWriter {
private VALIDATION_STRINGENCY validationStringency = VALIDATION_STRINGENCY.STRICT;
// allowed genotype format strings
private List<String> allowedGenotypeFormatStrings;
private List<String> allowedGenotypeFormatStrings = null;
public VCFGenotypeWriterAdapter(File writeTo) {
if (writeTo == null) throw new RuntimeException("VCF output file must not be null");
@ -59,18 +59,22 @@ public class VCFGenotypeWriterAdapter implements VCFGenotypeWriter {
// set up the header fields
Set<VCFHeaderLine> hInfo = new TreeSet<VCFHeaderLine>();
hInfo.add(new VCFHeaderLine(VCFHeader.FILE_FORMAT_KEY, VCFHeader.VCF_VERSION));
hInfo.addAll(headerInfo);
// set up the allowed genotype format fields
if ( headerInfo != null ) {
for ( VCFHeaderLine field : headerInfo ) {
hInfo.add(field);
if ( field instanceof VCFFormatHeaderLine) {
if ( allowedGenotypeFormatStrings == null )
allowedGenotypeFormatStrings = new ArrayList<String>();
allowedGenotypeFormatStrings.add(((VCFFormatHeaderLine)field).getName());
}
}
}
// set up the sample names
mHeader = new VCFHeader(hInfo, mSampleNames);
mWriter.writeHeader(mHeader);
// set up the allowed genotype format fields
allowedGenotypeFormatStrings = new ArrayList<String>();
for ( VCFHeaderLine field : headerInfo ) {
if ( field instanceof VCFFormatHeaderLine)
allowedGenotypeFormatStrings.add(((VCFFormatHeaderLine)field).getName());
}
}
/** finish writing, closing any open files. */

View File

@ -57,15 +57,23 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest {
// --------------------------------------------------------------------------------------------------------------
//
// testing joint estimation model
// testing parallelization
//
// --------------------------------------------------------------------------------------------------------------
@Test
public void testParallelization() {
WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(
"-T UnifiedGenotyper -R " + oneKGLocation + "reference/human_b36_both.fasta -I " + validationDataLocation + "NA12878.1kg.p2.chr1_10mb_11_mb.SLX.bam -varout %s -L 1:10,000,000-10,400,000 -nt 4", 1,
Arrays.asList("60bf51595eda646f5ccf0b0bf5db95d8"));
executeTest("test parallelization", spec);
String md5 = "638911a5d7b155076afc79a3d3f50548";
WalkerTest.WalkerTestSpec spec1 = new WalkerTest.WalkerTestSpec(
"-T UnifiedGenotyper -R " + oneKGLocation + "reference/human_b36_both.fasta -I " + validationDataLocation + "NA12878.1kg.p2.chr1_10mb_11_mb.SLX.bam -varout %s -L 1:10,000,000-10,075,000", 1,
Arrays.asList(md5));
executeTest("test parallelization (single thread)", spec1);
WalkerTest.WalkerTestSpec spec2 = new WalkerTest.WalkerTestSpec(
"-T UnifiedGenotyper -R " + oneKGLocation + "reference/human_b36_both.fasta -I " + validationDataLocation + "NA12878.1kg.p2.chr1_10mb_11_mb.SLX.bam -varout %s -L 1:10,000,000-10,075,000 -nt 2", 1,
Arrays.asList(md5));
executeTest("test parallelization (multithread)", spec2);
}
// --------------------------------------------------------------------------------------------------------------