added a wrapper exception for anything that goes wrong in VCF parsing; this way the problematic file line is emitted, no matter what happens. Makes debugging a lot easier, especially in large files.

git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@2739 348d0f76-0448-11de-a6fe-93d51630548a
2010-01-29 19:58:51 +00:00 · 2010-01-29 19:58:51 +00:00 · ac2a207b0b
parent e7f5c93fe5
commit ac2a207b0b
2 changed files with 45 additions and 28 deletions
--- a/java/src/org/broadinstitute/sting/utils/genotype/vcf/VCFParseException.java
+++ b/java/src/org/broadinstitute/sting/utils/genotype/vcf/VCFParseException.java
@ -0,0 +1,14 @@
+package org.broadinstitute.sting.utils.genotype.vcf;
+
+/**
+ * an exception to funnel all parsing exceptions into; this way we can emit the line we choked on as well
+ */
+public class VCFParseException extends RuntimeException {
+    public VCFParseException(String message) {
+        super(message);
+    }
+
+    public VCFParseException(String message, Throwable cause) {
+        super(message, cause);
+    }
+}
--- a/java/src/org/broadinstitute/sting/utils/genotype/vcf/VCFReader.java
+++ b/java/src/org/broadinstitute/sting/utils/genotype/vcf/VCFReader.java
@ -191,41 +191,44 @@ public class VCFReader implements Iterator<VCFRecord>, Iterable<VCFRecord> {
     *
     * @param line    the line from the file
     * @param mHeader the VCF header
-     *
     * @return the VCFRecord
     */
    public static VCFRecord createRecord(String line, VCFHeader mHeader) {
-        // things we need to make a VCF record
-        Map<VCFHeader.HEADER_FIELDS, String> values = new HashMap<VCFHeader.HEADER_FIELDS, String>();
-        String tokens[] = line.split("\\s+");
+        try {
+            // things we need to make a VCF record
+            Map<VCFHeader.HEADER_FIELDS, String> values = new HashMap<VCFHeader.HEADER_FIELDS, String>();
+            String tokens[] = line.split("\\s+");

-        // check to ensure that the column count of tokens is right
-        if (tokens.length != mHeader.getColumnCount()) {
-            throw new RuntimeException("The input file line doesn't contain enough fields, it should have " + mHeader.getColumnCount() + " fields, it has " + tokens.length + ". Line = " + line);
-        }
-
-        int index = 0;
-        for (VCFHeader.HEADER_FIELDS field : mHeader.getHeaderFields())
-            values.put(field, tokens[index++]);
-        // if we have genotyping data, we try and extract the genotype fields
-        if (mHeader.hasGenotypingData()) {
-            String mFormatString = tokens[index];
-            String keyStrings[] = mFormatString.split(":");
-            List<VCFGenotypeRecord> genotypeRecords = new ArrayList<VCFGenotypeRecord>();
-            index++;
-			String[] alt_alleles = values.get(VCFHeader.HEADER_FIELDS.ALT).split(",");
-            for (String str : mHeader.getGenotypeSamples()) {
-                genotypeRecords.add(getVCFGenotype(str, keyStrings, tokens[index], alt_alleles, values.get(VCFHeader.HEADER_FIELDS.REF).charAt(0)));
-                index++;
+            // check to ensure that the column count of tokens is right
+            if (tokens.length != mHeader.getColumnCount()) {
+                throw new RuntimeException("The input file line doesn't contain enough fields, it should have " + mHeader.getColumnCount() + " fields, it has " + tokens.length + ". Line = " + line);
            }
-            VCFRecord vrec = new VCFRecord(values, mFormatString, genotypeRecords);
-            // associate the genotypes with this new record
-            for ( VCFGenotypeRecord gr : genotypeRecords )
-                gr.setVCFRecord(vrec);
-            return vrec;

+            int index = 0;
+            for (VCFHeader.HEADER_FIELDS field : mHeader.getHeaderFields())
+                values.put(field, tokens[index++]);
+            // if we have genotyping data, we try and extract the genotype fields
+            if (mHeader.hasGenotypingData()) {
+                String mFormatString = tokens[index];
+                String keyStrings[] = mFormatString.split(":");
+                List<VCFGenotypeRecord> genotypeRecords = new ArrayList<VCFGenotypeRecord>();
+                index++;
+                String[] alt_alleles = values.get(VCFHeader.HEADER_FIELDS.ALT).split(",");
+                for (String str : mHeader.getGenotypeSamples()) {
+                    genotypeRecords.add(getVCFGenotype(str, keyStrings, tokens[index], alt_alleles, values.get(VCFHeader.HEADER_FIELDS.REF).charAt(0)));
+                    index++;
+                }
+                VCFRecord vrec = new VCFRecord(values, mFormatString, genotypeRecords);
+                // associate the genotypes with this new record
+                for (VCFGenotypeRecord gr : genotypeRecords)
+                    gr.setVCFRecord(vrec);
+                return vrec;
+
+            }
+            return new VCFRecord(values);
+        } catch (Exception e) {
+            throw new VCFParseException("VCF Reader failed to parsing, on line = " + line, e);
        }
-        return new VCFRecord(values);
    }

    /**