diff --git a/public/java/src/org/broadinstitute/sting/gatk/io/storage/VariantContextWriterStorage.java b/public/java/src/org/broadinstitute/sting/gatk/io/storage/VariantContextWriterStorage.java
index 972144ab8..b97475000 100644
--- a/public/java/src/org/broadinstitute/sting/gatk/io/storage/VariantContextWriterStorage.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/io/storage/VariantContextWriterStorage.java
@@ -38,10 +38,7 @@ import org.broadinstitute.sting.utils.variantcontext.writer.Options;
import org.broadinstitute.sting.utils.variantcontext.writer.VariantContextWriter;
import org.broadinstitute.sting.utils.variantcontext.writer.VariantContextWriterFactory;
-import java.io.File;
-import java.io.IOException;
-import java.io.OutputStream;
-import java.io.PrintStream;
+import java.io.*;
import java.util.Arrays;
import java.util.EnumSet;
import java.util.List;
@@ -58,6 +55,8 @@ public class VariantContextWriterStorage implements Storage
* Class VCFHeader
@@ -37,6 +43,7 @@ import java.util.*;
* A class representing the VCF header
*/
public class VCFHeader {
+ final protected static Logger logger = Logger.getLogger(VCFHeader.class);
// the mandatory header fields
public enum HEADER_FIELDS {
@@ -164,10 +171,10 @@ public class VCFHeader {
for ( VCFHeaderLine line : mMetaData ) {
if ( line instanceof VCFInfoHeaderLine ) {
VCFInfoHeaderLine infoLine = (VCFInfoHeaderLine)line;
- mInfoMetaData.put(infoLine.getID(), infoLine);
+ addMetaDataMapBinding(mInfoMetaData, infoLine);
} else if ( line instanceof VCFFormatHeaderLine ) {
VCFFormatHeaderLine formatLine = (VCFFormatHeaderLine)line;
- mFormatMetaData.put(formatLine.getID(), formatLine);
+ addMetaDataMapBinding(mFormatMetaData, formatLine);
} else if ( line instanceof VCFContigHeaderLine ) {
contigMetaData.add((VCFContigHeaderLine)line);
} else {
@@ -176,6 +183,21 @@ public class VCFHeader {
}
}
+ /**
+ * Add line to map, issuing warnings about duplicates
+ *
+ * @param map
+ * @param line
+ * @param
+ */
+ private final void addMetaDataMapBinding(final Map map, T line) {
+ final String key = line.getID();
+ if ( map.containsKey(key) )
+ logger.warn("Found duplicate VCF header lines for " + key + "; keeping the first only" );
+ else
+ map.put(key, line);
+ }
+
/**
* get the header fields in order they're presented in the input file (which is now required to be
* the order presented in the spec).
@@ -221,13 +243,17 @@ public class VCFHeader {
return mGenotypeSampleNames;
}
+ public int getNGenotypeSamples() {
+ return mGenotypeSampleNames.size();
+ }
+
/**
* do we have genotyping data?
*
* @return true if we have genotyping columns, false otherwise
*/
public boolean hasGenotypingData() {
- return mGenotypeSampleNames.size() > 0;
+ return getNGenotypeSamples() > 0;
}
/**
@@ -244,6 +270,14 @@ public class VCFHeader {
return HEADER_FIELDS.values().length + (hasGenotypingData() ? mGenotypeSampleNames.size() + 1 : 0);
}
+ public Collection getInfoHeaderLines() {
+ return mInfoMetaData.values();
+ }
+
+ public Collection getFormatHeaderLines() {
+ return mFormatMetaData.values();
+ }
+
/**
* @param id the header key name
* @return the meta data line, or null if there is none
diff --git a/public/java/src/org/broadinstitute/sting/utils/variantcontext/VariantContext.java b/public/java/src/org/broadinstitute/sting/utils/variantcontext/VariantContext.java
index bd37c482d..6739f1d0e 100755
--- a/public/java/src/org/broadinstitute/sting/utils/variantcontext/VariantContext.java
+++ b/public/java/src/org/broadinstitute/sting/utils/variantcontext/VariantContext.java
@@ -866,7 +866,8 @@ public class VariantContext implements Feature { // to enable tribble integratio
* @return chromosome count
*/
public int getCalledChrCount() {
- return getCalledChrCount(new HashSet(0));
+ final Set noSamples = Collections.emptySet();
+ return getCalledChrCount(noSamples);
}
/**
diff --git a/public/java/src/org/broadinstitute/sting/utils/variantcontext/writer/BCF2Encoder.java b/public/java/src/org/broadinstitute/sting/utils/variantcontext/writer/BCF2Encoder.java
index 4d2eae4ab..62da3174d 100644
--- a/public/java/src/org/broadinstitute/sting/utils/variantcontext/writer/BCF2Encoder.java
+++ b/public/java/src/org/broadinstitute/sting/utils/variantcontext/writer/BCF2Encoder.java
@@ -159,9 +159,8 @@ public final class BCF2Encoder {
encodePrimitive(Float.floatToIntBits((float)value), BCF2Type.FLOAT);
}
+ @Requires("size >= 0")
public final void encodeType(final int size, final BCF2Type type) throws IOException {
- if ( size < 0 ) throw new ReviewedStingException("BUG: size < 0");
-
final byte typeByte = BCF2Utils.encodeTypeDescriptor(size, type);
encodeStream.write(typeByte);
if ( BCF2Utils.willOverflow(size) ) {
diff --git a/public/java/src/org/broadinstitute/sting/utils/variantcontext/writer/BCF2FieldWriterManager.java b/public/java/src/org/broadinstitute/sting/utils/variantcontext/writer/BCF2FieldWriterManager.java
index 665c2cc21..00ab1a2ed 100644
--- a/public/java/src/org/broadinstitute/sting/utils/variantcontext/writer/BCF2FieldWriterManager.java
+++ b/public/java/src/org/broadinstitute/sting/utils/variantcontext/writer/BCF2FieldWriterManager.java
@@ -57,16 +57,16 @@ public class BCF2FieldWriterManager {
* @param stringDictionary a map from VCFHeader strings to their offsets for encoding
*/
public void setup(final VCFHeader header, final BCF2Encoder encoder, final Map stringDictionary) {
- for (final VCFHeaderLine line : header.getMetaData()) {
- if ( line instanceof VCFInfoHeaderLine ) {
- final String field = ((VCFInfoHeaderLine) line).getID();
- final BCF2FieldWriter.SiteWriter writer = createInfoWriter(header, (VCFInfoHeaderLine)line, encoder, stringDictionary);
- add(siteWriters, field, writer);
- } else if ( line instanceof VCFFormatHeaderLine ) {
- final String field = ((VCFFormatHeaderLine) line).getID();
- final BCF2FieldWriter.GenotypesWriter writer = createGenotypesWriter(header, (VCFFormatHeaderLine)line, encoder, stringDictionary);
- add(genotypesWriters, field, writer);
- }
+ for (final VCFInfoHeaderLine line : header.getInfoHeaderLines()) {
+ final String field = line.getID();
+ final BCF2FieldWriter.SiteWriter writer = createInfoWriter(header, line, encoder, stringDictionary);
+ add(siteWriters, field, writer);
+ }
+
+ for (final VCFFormatHeaderLine line : header.getFormatHeaderLines()) {
+ final String field = line.getID();
+ final BCF2FieldWriter.GenotypesWriter writer = createGenotypesWriter(header, line, encoder, stringDictionary);
+ add(genotypesWriters, field, writer);
}
}
diff --git a/public/java/src/org/broadinstitute/sting/utils/variantcontext/writer/BCF2Writer.java b/public/java/src/org/broadinstitute/sting/utils/variantcontext/writer/BCF2Writer.java
index f72cb81ec..b328dfc59 100644
--- a/public/java/src/org/broadinstitute/sting/utils/variantcontext/writer/BCF2Writer.java
+++ b/public/java/src/org/broadinstitute/sting/utils/variantcontext/writer/BCF2Writer.java
@@ -90,6 +90,7 @@ class BCF2Writer extends IndexingVariantContextWriter {
private final Map contigDictionary = new HashMap();
private final Map stringDictionaryMap = new LinkedHashMap();
private final boolean doNotWriteGenotypes;
+ private String[] sampleNames = null;
private final BCF2Encoder encoder = new BCF2Encoder(); // initialized after the header arrives
final BCF2FieldWriterManager fieldManager = new BCF2FieldWriterManager();
@@ -122,6 +123,8 @@ class BCF2Writer extends IndexingVariantContextWriter {
stringDictionaryMap.put(dict.get(i), i);
}
+ sampleNames = header.getGenotypeSamples().toArray(new String[header.getNGenotypeSamples()]);
+
// setup the field encodings
fieldManager.setup(header, encoder, stringDictionaryMap);
@@ -289,8 +292,7 @@ class BCF2Writer extends IndexingVariantContextWriter {
final BCF2FieldWriter.GenotypesWriter writer = fieldManager.getGenotypeFieldWriter(field);
writer.start(encoder, vc);
- for ( final String name : header.getGenotypeSamples() ) {
- // todo -- can we optimize this get (string -> genotype) which can be expensive
+ for ( final String name : sampleNames ) {
Genotype g = vc.getGenotype(name);
if ( g == null )
// we don't have any data about g at all
@@ -327,8 +329,6 @@ class BCF2Writer extends IndexingVariantContextWriter {
@Requires("! strings.isEmpty()")
@Ensures("BCF2Type.INTEGERS.contains(result)")
private final BCF2Type encodeStringsByRef(final Collection strings) throws IOException {
- assert ! strings.isEmpty();
-
final List offsets = new ArrayList(strings.size());
BCF2Type maxType = BCF2Type.INT8; // start with the smallest size