From 96972c3a5c1ba067b0961c0035bab448c02d8d11 Mon Sep 17 00:00:00 2001 From: aaron Date: Fri, 16 Oct 2009 04:57:50 +0000 Subject: [PATCH] a fix for a bug Eric found: if your first call contains fewer samples than calls at other loci, your VCFHeader got setup incorrectly. Also moved a buch of Lists over to Sets for consistancy. git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@1859 348d0f76-0448-11de-a6fe-93d51630548a --- .../walkers/genotyper/UnifiedGenotyper.java | 6 ++++-- .../walkers/variantstovcf/VariantsToVCF.java | 2 +- .../utils/genotype/GenotypeWriterFactory.java | 20 +++++++++++++++---- .../vcf/VCFGenotypeWriterAdapter.java | 10 ++++++---- .../sting/utils/genotype/vcf/VCFHeader.java | 6 +++--- .../sting/utils/genotype/vcf/VCFReader.java | 2 +- .../utils/genotype/vcf/VCFHeaderTest.java | 2 +- .../utils/genotype/vcf/VCFRecordTest.java | 7 ++----- .../utils/genotype/vcf/VCFWriterTest.java | 4 ++-- 9 files changed, 36 insertions(+), 23 deletions(-) diff --git a/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyper.java b/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyper.java index cb012d4fd..ca9e45415 100755 --- a/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyper.java +++ b/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyper.java @@ -98,10 +98,12 @@ public class UnifiedGenotyper extends LocusWalker, Genot if ( VARIANTS_FILE != null ) writer = GenotypeWriterFactory.create(VAR_FORMAT, GenomeAnalysisEngine.instance.getSAMFileHeader(), VARIANTS_FILE, "UnifiedGenotyper", - this.getToolkit().getArguments().referenceFile.getName()); + this.getToolkit().getArguments().referenceFile.getName(), + samples); else writer = GenotypeWriterFactory.create(VAR_FORMAT, GenomeAnalysisEngine.instance.getSAMFileHeader(), out, "UnifiedGenotyper", - this.getToolkit().getArguments().referenceFile.getName()); + this.getToolkit().getArguments().referenceFile.getName(), + samples); callsMetrics = new CallMetrics(); } diff --git a/java/src/org/broadinstitute/sting/playground/gatk/walkers/variantstovcf/VariantsToVCF.java b/java/src/org/broadinstitute/sting/playground/gatk/walkers/variantstovcf/VariantsToVCF.java index 229cbdc5d..fc4a91b1f 100755 --- a/java/src/org/broadinstitute/sting/playground/gatk/walkers/variantstovcf/VariantsToVCF.java +++ b/java/src/org/broadinstitute/sting/playground/gatk/walkers/variantstovcf/VariantsToVCF.java @@ -49,7 +49,7 @@ public class VariantsToVCF extends RefWalker { public static VCFHeader getHeader(GATKArgumentCollection args, Set sampleNames) { Map metaData = new HashMap(); - List additionalColumns = new ArrayList(); + Set additionalColumns = new HashSet(); // Don't output the data for now because it kills our unit test MD5s and is optional // TODO - figure out what to do here diff --git a/java/src/org/broadinstitute/sting/utils/genotype/GenotypeWriterFactory.java b/java/src/org/broadinstitute/sting/utils/genotype/GenotypeWriterFactory.java index 5f3e8f460..69dea8392 100644 --- a/java/src/org/broadinstitute/sting/utils/genotype/GenotypeWriterFactory.java +++ b/java/src/org/broadinstitute/sting/utils/genotype/GenotypeWriterFactory.java @@ -9,6 +9,8 @@ import org.broadinstitute.sting.utils.genotype.vcf.VCFGenotypeWriterAdapter; import java.io.File; import java.io.PrintStream; +import java.util.List; +import java.util.Set; /** @@ -31,7 +33,12 @@ public class GenotypeWriterFactory { * @param destination the destination file * @return the genotype writer object */ - public static GenotypeWriter create(GENOTYPE_FORMAT format, SAMFileHeader header, File destination, String source, String referenceName ) { + public static GenotypeWriter create(GENOTYPE_FORMAT format, + SAMFileHeader header, + File destination, + String source, + String referenceName, + Set sampleNames ) { switch (format) { case GLF: return new GLFWriter(header.toString(), destination); @@ -40,20 +47,25 @@ public class GenotypeWriterFactory { case GELI_BINARY: return new GeliAdapter(destination, header); case VCF: - return new VCFGenotypeWriterAdapter(source, referenceName, destination); + return new VCFGenotypeWriterAdapter(source, referenceName, destination, sampleNames); default: throw new StingException("Genotype writer " + format.toString() + " is not implemented"); } } - public static GenotypeWriter create(GENOTYPE_FORMAT format, SAMFileHeader header, PrintStream destination, String source, String referenceName ) { + public static GenotypeWriter create(GENOTYPE_FORMAT format, + SAMFileHeader header, + PrintStream destination, + String source, + String referenceName, + Set sampleNames ) { switch (format) { case GELI: return new GeliTextWriter(destination); case GLF: return new GLFWriter(header.toString(), destination); case VCF: - return new VCFGenotypeWriterAdapter(source, referenceName, destination); + return new VCFGenotypeWriterAdapter(source, referenceName, destination, sampleNames); default: throw new StingException("Genotype writer to " + format.toString() + " to standard output is not implemented"); } diff --git a/java/src/org/broadinstitute/sting/utils/genotype/vcf/VCFGenotypeWriterAdapter.java b/java/src/org/broadinstitute/sting/utils/genotype/vcf/VCFGenotypeWriterAdapter.java index 116f7e798..5227c2ca3 100644 --- a/java/src/org/broadinstitute/sting/utils/genotype/vcf/VCFGenotypeWriterAdapter.java +++ b/java/src/org/broadinstitute/sting/utils/genotype/vcf/VCFGenotypeWriterAdapter.java @@ -21,23 +21,26 @@ public class VCFGenotypeWriterAdapter implements GenotypeWriter { private String mSource; private String mReferenceName; private boolean mInitialized = false; + private final Set mSampleNames = new HashSet(); private final File mFile; private final OutputStream mStream; - public VCFGenotypeWriterAdapter(String source, String referenceName, File writeTo) { + public VCFGenotypeWriterAdapter(String source, String referenceName, File writeTo, Set sampleNames) { mReferenceName = referenceName; mSource = source; mFile = writeTo; if (mFile == null) throw new RuntimeException("VCF output file must not be null"); mStream = null; + mSampleNames.addAll(sampleNames); } - public VCFGenotypeWriterAdapter(String source, String referenceName, OutputStream writeTo) { + public VCFGenotypeWriterAdapter(String source, String referenceName, OutputStream writeTo, Set sampleNames) { mReferenceName = referenceName; mSource = source; mFile = null; mStream = writeTo; if (mStream == null) throw new RuntimeException("VCF output stream must not be null"); + mSampleNames.addAll(sampleNames); } @@ -49,7 +52,6 @@ public class VCFGenotypeWriterAdapter implements GenotypeWriter { */ private void lazyInitialize(List genotypes, File file, OutputStream stream) { Map hInfo = new HashMap(); - List sampleNames = getSampleNames(genotypes); // setup the header fields hInfo.put("format", "VCRv3.2"); @@ -57,7 +59,7 @@ public class VCFGenotypeWriterAdapter implements GenotypeWriter { hInfo.put("reference", mReferenceName); // setup the sample names - mHeader = new VCFHeader(hInfo, sampleNames); + mHeader = new VCFHeader(hInfo, mSampleNames); if (mFile == null) mWriter = new VCFWriter(mHeader, stream); else diff --git a/java/src/org/broadinstitute/sting/utils/genotype/vcf/VCFHeader.java b/java/src/org/broadinstitute/sting/utils/genotype/vcf/VCFHeader.java index 34af9b685..5c1a3cfbf 100644 --- a/java/src/org/broadinstitute/sting/utils/genotype/vcf/VCFHeader.java +++ b/java/src/org/broadinstitute/sting/utils/genotype/vcf/VCFHeader.java @@ -23,7 +23,7 @@ public class VCFHeader { private final Map mMetaData = new HashMap(); // the list of auxillary tags - private final List mGenotypeSampleNames = new ArrayList(); + private final Set mGenotypeSampleNames = new HashSet(); // the character string that indicates meta data public static final String METADATA_INDICATOR = "##"; @@ -56,7 +56,7 @@ public class VCFHeader { * @param metaData the meta data associated with this header * @param genotypeSampleNames the genotype format field, and the sample names */ - public VCFHeader(Map metaData, List genotypeSampleNames) { + public VCFHeader(Map metaData, Set genotypeSampleNames) { for (String key : metaData.keySet()) mMetaData.put(key, metaData.get(key)); for (String col : genotypeSampleNames) { if (!col.equals("FORMAT")) @@ -107,7 +107,7 @@ public class VCFHeader { * * @return a list of the genotype column names, which may be empty if hasGenotypingData() returns false */ - public List getGenotypeSamples() { + public Set getGenotypeSamples() { return mGenotypeSampleNames; } diff --git a/java/src/org/broadinstitute/sting/utils/genotype/vcf/VCFReader.java b/java/src/org/broadinstitute/sting/utils/genotype/vcf/VCFReader.java index d84691339..3550b9b61 100644 --- a/java/src/org/broadinstitute/sting/utils/genotype/vcf/VCFReader.java +++ b/java/src/org/broadinstitute/sting/utils/genotype/vcf/VCFReader.java @@ -126,7 +126,7 @@ public class VCFReader implements Iterator, Iterable { protected VCFHeader createHeader(List headerStrings) { Map metaData = new HashMap(); - List auxTags = new ArrayList(); + Set auxTags = new HashSet(); // iterate over all the passed in strings for (String str : headerStrings) { Matcher matcher = pMeta.matcher(str); diff --git a/java/test/org/broadinstitute/sting/utils/genotype/vcf/VCFHeaderTest.java b/java/test/org/broadinstitute/sting/utils/genotype/vcf/VCFHeaderTest.java index 06b36a472..0d56101f3 100644 --- a/java/test/org/broadinstitute/sting/utils/genotype/vcf/VCFHeaderTest.java +++ b/java/test/org/broadinstitute/sting/utils/genotype/vcf/VCFHeaderTest.java @@ -19,7 +19,7 @@ public class VCFHeaderTest extends BaseTest { private Set headerFields = new LinkedHashSet(); private Map metaData = new HashMap(); - private List additionalColumns = new ArrayList(); + private Set additionalColumns = new HashSet(); /** * give it fake data, and make sure we get back the right fake data diff --git a/java/test/org/broadinstitute/sting/utils/genotype/vcf/VCFRecordTest.java b/java/test/org/broadinstitute/sting/utils/genotype/vcf/VCFRecordTest.java index d49c7278c..d1fdafd48 100755 --- a/java/test/org/broadinstitute/sting/utils/genotype/vcf/VCFRecordTest.java +++ b/java/test/org/broadinstitute/sting/utils/genotype/vcf/VCFRecordTest.java @@ -4,10 +4,7 @@ import org.broadinstitute.sting.BaseTest; import org.junit.Assert; import org.junit.Test; -import java.util.ArrayList; -import java.util.HashMap; -import java.util.List; -import java.util.Map; +import java.util.*; /** @@ -126,7 +123,7 @@ public class VCFRecordTest extends BaseTest { */ public static VCFHeader createFakeHeader() { Map metaData = new HashMap(); - List additionalColumns = new ArrayList(); + Set additionalColumns = new HashSet(); metaData.put("format", "VCRv3.2"); // required metaData.put("two", "2"); additionalColumns.add("FORMAT"); diff --git a/java/test/org/broadinstitute/sting/utils/genotype/vcf/VCFWriterTest.java b/java/test/org/broadinstitute/sting/utils/genotype/vcf/VCFWriterTest.java index 8e7f793f1..3bce24427 100644 --- a/java/test/org/broadinstitute/sting/utils/genotype/vcf/VCFWriterTest.java +++ b/java/test/org/broadinstitute/sting/utils/genotype/vcf/VCFWriterTest.java @@ -18,7 +18,7 @@ import java.util.*; public class VCFWriterTest extends BaseTest { private Set headerFields = new LinkedHashSet(); private Map metaData = new HashMap(); - private List additionalColumns = new ArrayList(); + private Set additionalColumns = new HashSet(); private File fakeVCFFile = new File("FAKEVCFFILEFORTESTING.vcf"); /** test, using the writer and reader, that we can output and input a VCF file without problems */ @@ -45,7 +45,7 @@ public class VCFWriterTest extends BaseTest { * create a fake header of known quantity * @return a fake VCF header */ - public static VCFHeader createFakeHeader(Map metaData, List additionalColumns) { + public static VCFHeader createFakeHeader(Map metaData, Set additionalColumns) { metaData.put("format", "VCRv3.2"); // required metaData.put("two", "2"); additionalColumns.add("FORMAT");