My interpretation of the VCF spec is that the FORMAT field should only be present if there is genotype/sample data. So the VCFCodec now throws an exception when it encounters such a case. I had to fix one of the integration test VCFs.

2011-09-21 22:23:28 -04:00 · 2011-09-21 22:23:28 -04:00 · 8f8b59a932
parent e53cb79d42
commit 8f8b59a932
3 changed files with 32 additions and 29 deletions
--- a/public/java/src/org/broadinstitute/sting/utils/codecs/vcf/AbstractVCFCodec.java
+++ b/public/java/src/org/broadinstitute/sting/utils/codecs/vcf/AbstractVCFCodec.java
@ -115,15 +115,21 @@ public abstract class AbstractVCFCodec implements FeatureCodec, NameAwareCodec,
                    }
                    arrayIndex++;
                }
                boolean sawFormatTag = false;
                if ( arrayIndex < strings.length ) {
                    if ( !strings[arrayIndex].equals("FORMAT") )
                        throw new TribbleException.InvalidHeader("we were expecting column name 'FORMAT' but we saw '" + strings[arrayIndex] + "'");
                    sawFormatTag = true;
                    arrayIndex++;
                }
-                while (arrayIndex < strings.length)
+                while ( arrayIndex < strings.length )
                    auxTags.add(strings[arrayIndex++]);
                if ( sawFormatTag && auxTags.size() == 0 )
                    throw new UserException.MalformedVCFHeader("The FORMAT field was provided but there is no genotype/sample data");
            } else {
                if ( str.startsWith("##INFO=") ) {
                    VCFInfoHeaderLine info = new VCFInfoHeaderLine(str.substring(7),version);
@ -200,10 +206,6 @@ public abstract class AbstractVCFCodec implements FeatureCodec, NameAwareCodec,
     * @return a VariantContext
     */
    public Feature decode(String line) {
        return reallyDecode(line);
    }
    private Feature reallyDecode(String line) {
        // the same line reader is not used for parsing the header and parsing lines, if we see a #, we've seen a header line
        if (line.startsWith(VCFHeader.HEADER_INDICATOR)) return null;
--- a/public/java/src/org/broadinstitute/sting/utils/codecs/vcf/VCFHeader.java
+++ b/public/java/src/org/broadinstitute/sting/utils/codecs/vcf/VCFHeader.java
@ -35,9 +35,6 @@ public class VCFHeader {
    // the header string indicator
    public static final String HEADER_INDICATOR = "#";
    /** do we have genotying data? */
    private boolean hasGenotypingData = false;
    // were the input samples sorted originally (or are we sorting them)?
    private boolean samplesWereAlreadySorted = true;
@ -57,17 +54,15 @@ public class VCFHeader {
     * create a VCF header, given a list of meta data and auxillary tags
     *
     * @param metaData            the meta data associated with this header
-     * @param genotypeSampleNames the genotype format field, and the sample names
+     * @param genotypeSampleNames the sample names
     */
    public VCFHeader(Set<VCFHeaderLine> metaData, Set<String> genotypeSampleNames) {
        mMetaData = new TreeSet<VCFHeaderLine>();
        if ( metaData != null )
            mMetaData.addAll(metaData);
-        for (String col : genotypeSampleNames) {
+
-            if (!col.equals("FORMAT"))
+        mGenotypeSampleNames.addAll(genotypeSampleNames);
-                mGenotypeSampleNames.add(col);
+
        }
        if (genotypeSampleNames.size() > 0) hasGenotypingData = true;
        loadVCFVersion();
        loadMetaDataMaps();
@ -157,7 +152,7 @@ public class VCFHeader {
     * @return true if we have genotyping columns, false otherwise
     */
    public boolean hasGenotypingData() {
-        return hasGenotypingData;
+        return mGenotypeSampleNames.size() > 0;
    }
    /**
@ -171,7 +166,7 @@ public class VCFHeader {
    /** @return the column count */
    public int getColumnCount() {
-        return HEADER_FIELDS.values().length + ((hasGenotypingData) ? mGenotypeSampleNames.size() + 1 : 0);
+        return HEADER_FIELDS.values().length + (hasGenotypingData() ? mGenotypeSampleNames.size() + 1 : 0);
    }
    /**
--- a/public/java/src/org/broadinstitute/sting/utils/exceptions/UserException.java
+++ b/public/java/src/org/broadinstitute/sting/utils/exceptions/UserException.java
@ -174,6 +174,12 @@ public class UserException extends ReviewedStingException {
        }
    }
    public static class MalformedVCFHeader extends UserException {
        public MalformedVCFHeader(String message) {
            super(String.format("The provided VCF file has a malformed header: %s", message));
        }
    }
    public static class ReadMissingReadGroup extends MalformedBAM {
        public ReadMissingReadGroup(SAMRecord read) {
            super(read, String.format("Read %s is either missing the read group or its read group is not defined in the BAM header, both of which are required by the GATK.  Please use http://www.broadinstitute.org/gsa/wiki/index.php/ReplaceReadGroups to fix this problem", read.getReadName()));