added a wrapper exception for anything that goes wrong in VCF parsing; this way the problematic file line is emitted, no matter what happens. Makes debugging a lot easier, especially in large files.

git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@2739 348d0f76-0448-11de-a6fe-93d51630548a
This commit is contained in:
aaron 2010-01-29 19:58:51 +00:00
parent e7f5c93fe5
commit ac2a207b0b
2 changed files with 45 additions and 28 deletions

View File

@ -0,0 +1,14 @@
package org.broadinstitute.sting.utils.genotype.vcf;
/**
* an exception to funnel all parsing exceptions into; this way we can emit the line we choked on as well
*/
public class VCFParseException extends RuntimeException {
public VCFParseException(String message) {
super(message);
}
public VCFParseException(String message, Throwable cause) {
super(message, cause);
}
}

View File

@ -191,41 +191,44 @@ public class VCFReader implements Iterator<VCFRecord>, Iterable<VCFRecord> {
*
* @param line the line from the file
* @param mHeader the VCF header
*
* @return the VCFRecord
*/
public static VCFRecord createRecord(String line, VCFHeader mHeader) {
// things we need to make a VCF record
Map<VCFHeader.HEADER_FIELDS, String> values = new HashMap<VCFHeader.HEADER_FIELDS, String>();
String tokens[] = line.split("\\s+");
try {
// things we need to make a VCF record
Map<VCFHeader.HEADER_FIELDS, String> values = new HashMap<VCFHeader.HEADER_FIELDS, String>();
String tokens[] = line.split("\\s+");
// check to ensure that the column count of tokens is right
if (tokens.length != mHeader.getColumnCount()) {
throw new RuntimeException("The input file line doesn't contain enough fields, it should have " + mHeader.getColumnCount() + " fields, it has " + tokens.length + ". Line = " + line);
}
int index = 0;
for (VCFHeader.HEADER_FIELDS field : mHeader.getHeaderFields())
values.put(field, tokens[index++]);
// if we have genotyping data, we try and extract the genotype fields
if (mHeader.hasGenotypingData()) {
String mFormatString = tokens[index];
String keyStrings[] = mFormatString.split(":");
List<VCFGenotypeRecord> genotypeRecords = new ArrayList<VCFGenotypeRecord>();
index++;
String[] alt_alleles = values.get(VCFHeader.HEADER_FIELDS.ALT).split(",");
for (String str : mHeader.getGenotypeSamples()) {
genotypeRecords.add(getVCFGenotype(str, keyStrings, tokens[index], alt_alleles, values.get(VCFHeader.HEADER_FIELDS.REF).charAt(0)));
index++;
// check to ensure that the column count of tokens is right
if (tokens.length != mHeader.getColumnCount()) {
throw new RuntimeException("The input file line doesn't contain enough fields, it should have " + mHeader.getColumnCount() + " fields, it has " + tokens.length + ". Line = " + line);
}
VCFRecord vrec = new VCFRecord(values, mFormatString, genotypeRecords);
// associate the genotypes with this new record
for ( VCFGenotypeRecord gr : genotypeRecords )
gr.setVCFRecord(vrec);
return vrec;
int index = 0;
for (VCFHeader.HEADER_FIELDS field : mHeader.getHeaderFields())
values.put(field, tokens[index++]);
// if we have genotyping data, we try and extract the genotype fields
if (mHeader.hasGenotypingData()) {
String mFormatString = tokens[index];
String keyStrings[] = mFormatString.split(":");
List<VCFGenotypeRecord> genotypeRecords = new ArrayList<VCFGenotypeRecord>();
index++;
String[] alt_alleles = values.get(VCFHeader.HEADER_FIELDS.ALT).split(",");
for (String str : mHeader.getGenotypeSamples()) {
genotypeRecords.add(getVCFGenotype(str, keyStrings, tokens[index], alt_alleles, values.get(VCFHeader.HEADER_FIELDS.REF).charAt(0)));
index++;
}
VCFRecord vrec = new VCFRecord(values, mFormatString, genotypeRecords);
// associate the genotypes with this new record
for (VCFGenotypeRecord gr : genotypeRecords)
gr.setVCFRecord(vrec);
return vrec;
}
return new VCFRecord(values);
} catch (Exception e) {
throw new VCFParseException("VCF Reader failed to parsing, on line = " + line, e);
}
return new VCFRecord(values);
}
/**