My interpretation of the VCF spec is that the FORMAT field should only be present if there is genotype/sample data. So the VCFCodec now throws an exception when it encounters such a case. I had to fix one of the integration test VCFs.
This commit is contained in:
parent
e53cb79d42
commit
8f8b59a932
|
|
@ -115,15 +115,21 @@ public abstract class AbstractVCFCodec implements FeatureCodec, NameAwareCodec,
|
|||
}
|
||||
arrayIndex++;
|
||||
}
|
||||
|
||||
boolean sawFormatTag = false;
|
||||
if ( arrayIndex < strings.length ) {
|
||||
if ( !strings[arrayIndex].equals("FORMAT") )
|
||||
throw new TribbleException.InvalidHeader("we were expecting column name 'FORMAT' but we saw '" + strings[arrayIndex] + "'");
|
||||
sawFormatTag = true;
|
||||
arrayIndex++;
|
||||
}
|
||||
|
||||
while (arrayIndex < strings.length)
|
||||
while ( arrayIndex < strings.length )
|
||||
auxTags.add(strings[arrayIndex++]);
|
||||
|
||||
if ( sawFormatTag && auxTags.size() == 0 )
|
||||
throw new UserException.MalformedVCFHeader("The FORMAT field was provided but there is no genotype/sample data");
|
||||
|
||||
} else {
|
||||
if ( str.startsWith("##INFO=") ) {
|
||||
VCFInfoHeaderLine info = new VCFInfoHeaderLine(str.substring(7),version);
|
||||
|
|
@ -200,28 +206,24 @@ public abstract class AbstractVCFCodec implements FeatureCodec, NameAwareCodec,
|
|||
* @return a VariantContext
|
||||
*/
|
||||
public Feature decode(String line) {
|
||||
return reallyDecode(line);
|
||||
}
|
||||
// the same line reader is not used for parsing the header and parsing lines, if we see a #, we've seen a header line
|
||||
if (line.startsWith(VCFHeader.HEADER_INDICATOR)) return null;
|
||||
|
||||
private Feature reallyDecode(String line) {
|
||||
// the same line reader is not used for parsing the header and parsing lines, if we see a #, we've seen a header line
|
||||
if (line.startsWith(VCFHeader.HEADER_INDICATOR)) return null;
|
||||
// our header cannot be null, we need the genotype sample names and counts
|
||||
if (header == null) throw new ReviewedStingException("VCF Header cannot be null when decoding a record");
|
||||
|
||||
// our header cannot be null, we need the genotype sample names and counts
|
||||
if (header == null) throw new ReviewedStingException("VCF Header cannot be null when decoding a record");
|
||||
if (parts == null)
|
||||
parts = new String[Math.min(header.getColumnCount(), NUM_STANDARD_FIELDS+1)];
|
||||
|
||||
if (parts == null)
|
||||
parts = new String[Math.min(header.getColumnCount(), NUM_STANDARD_FIELDS+1)];
|
||||
int nParts = ParsingUtils.split(line, parts, VCFConstants.FIELD_SEPARATOR_CHAR, true);
|
||||
|
||||
int nParts = ParsingUtils.split(line, parts, VCFConstants.FIELD_SEPARATOR_CHAR, true);
|
||||
// if we have don't have a header, or we have a header with no genotyping data check that we have eight columns. Otherwise check that we have nine (normal colummns + genotyping data)
|
||||
if (( (header == null || !header.hasGenotypingData()) && nParts != NUM_STANDARD_FIELDS) ||
|
||||
(header != null && header.hasGenotypingData() && nParts != (NUM_STANDARD_FIELDS + 1)) )
|
||||
throw new UserException.MalformedVCF("there aren't enough columns for line " + line + " (we expected " + (header == null ? NUM_STANDARD_FIELDS : NUM_STANDARD_FIELDS + 1) +
|
||||
" tokens, and saw " + nParts + " )", lineNo);
|
||||
|
||||
// if we have don't have a header, or we have a header with no genotyping data check that we have eight columns. Otherwise check that we have nine (normal colummns + genotyping data)
|
||||
if (( (header == null || !header.hasGenotypingData()) && nParts != NUM_STANDARD_FIELDS) ||
|
||||
(header != null && header.hasGenotypingData() && nParts != (NUM_STANDARD_FIELDS + 1)) )
|
||||
throw new UserException.MalformedVCF("there aren't enough columns for line " + line + " (we expected " + (header == null ? NUM_STANDARD_FIELDS : NUM_STANDARD_FIELDS + 1) +
|
||||
" tokens, and saw " + nParts + " )", lineNo);
|
||||
|
||||
return parseVCFLine(parts);
|
||||
return parseVCFLine(parts);
|
||||
}
|
||||
|
||||
protected void generateException(String message) {
|
||||
|
|
|
|||
|
|
@ -35,9 +35,6 @@ public class VCFHeader {
|
|||
// the header string indicator
|
||||
public static final String HEADER_INDICATOR = "#";
|
||||
|
||||
/** do we have genotying data? */
|
||||
private boolean hasGenotypingData = false;
|
||||
|
||||
// were the input samples sorted originally (or are we sorting them)?
|
||||
private boolean samplesWereAlreadySorted = true;
|
||||
|
||||
|
|
@ -57,17 +54,15 @@ public class VCFHeader {
|
|||
* create a VCF header, given a list of meta data and auxillary tags
|
||||
*
|
||||
* @param metaData the meta data associated with this header
|
||||
* @param genotypeSampleNames the genotype format field, and the sample names
|
||||
* @param genotypeSampleNames the sample names
|
||||
*/
|
||||
public VCFHeader(Set<VCFHeaderLine> metaData, Set<String> genotypeSampleNames) {
|
||||
mMetaData = new TreeSet<VCFHeaderLine>();
|
||||
if ( metaData != null )
|
||||
mMetaData.addAll(metaData);
|
||||
for (String col : genotypeSampleNames) {
|
||||
if (!col.equals("FORMAT"))
|
||||
mGenotypeSampleNames.add(col);
|
||||
}
|
||||
if (genotypeSampleNames.size() > 0) hasGenotypingData = true;
|
||||
|
||||
mGenotypeSampleNames.addAll(genotypeSampleNames);
|
||||
|
||||
loadVCFVersion();
|
||||
loadMetaDataMaps();
|
||||
|
||||
|
|
@ -157,7 +152,7 @@ public class VCFHeader {
|
|||
* @return true if we have genotyping columns, false otherwise
|
||||
*/
|
||||
public boolean hasGenotypingData() {
|
||||
return hasGenotypingData;
|
||||
return mGenotypeSampleNames.size() > 0;
|
||||
}
|
||||
|
||||
/**
|
||||
|
|
@ -171,7 +166,7 @@ public class VCFHeader {
|
|||
|
||||
/** @return the column count */
|
||||
public int getColumnCount() {
|
||||
return HEADER_FIELDS.values().length + ((hasGenotypingData) ? mGenotypeSampleNames.size() + 1 : 0);
|
||||
return HEADER_FIELDS.values().length + (hasGenotypingData() ? mGenotypeSampleNames.size() + 1 : 0);
|
||||
}
|
||||
|
||||
/**
|
||||
|
|
|
|||
|
|
@ -174,6 +174,12 @@ public class UserException extends ReviewedStingException {
|
|||
}
|
||||
}
|
||||
|
||||
public static class MalformedVCFHeader extends UserException {
|
||||
public MalformedVCFHeader(String message) {
|
||||
super(String.format("The provided VCF file has a malformed header: %s", message));
|
||||
}
|
||||
}
|
||||
|
||||
public static class ReadMissingReadGroup extends MalformedBAM {
|
||||
public ReadMissingReadGroup(SAMRecord read) {
|
||||
super(read, String.format("Read %s is either missing the read group or its read group is not defined in the BAM header, both of which are required by the GATK. Please use http://www.broadinstitute.org/gsa/wiki/index.php/ReplaceReadGroups to fix this problem", read.getReadName()));
|
||||
|
|
|
|||
Loading…
Reference in New Issue