Bugfixes towards final BCF2 implementation

-- MLAC and MLAF in PoolCaller now use standard MLE_AC and MLE_AF
-- VCFDiffableReader disables onTheFly fixing of VCF header fields so comparisons are easier when headers are changing
-- Flag fields with FLAG_KEY=0 are parsed as though FLAG_KEY were entirely absent in AbstractVCFCodec to fix bug where FLAG_KEY=0 was being translated into FLAG_KEY in output VCF, making a false flag value a true one
-- Fix the GT field value in VariantContextTestProviders so it isn't fixed 1000s of times during testing
-- Keys whose value is null are put into the VariantContext info attributes now
This commit is contained in:
Mark DePristo 2012-06-20 20:00:17 -04:00
parent 66337a9899
commit 549293b6f7
5 changed files with 45 additions and 22 deletions

View File

@ -65,6 +65,7 @@ public class VCFDiffableReader implements DiffableReader {
br.close();
// must be read as state is stored in reader itself
AbstractVCFCodec.disableOnTheFlyModifications();
FeatureReader<VariantContext> reader = AbstractFeatureReader.getFeatureReader(file.getAbsolutePath(), new VCFCodec(), false);
VCFHeader header = (VCFHeader)reader.getHeader();
for ( VCFHeaderLine headerLine : header.getMetaData() ) {

View File

@ -22,6 +22,7 @@ import java.util.zip.GZIPInputStream;
public abstract class AbstractVCFCodec extends AsciiFeatureCodec<VariantContext> implements NameAwareCodec {
public final static int MAX_ALLELE_SIZE_BEFORE_WARNING = (int)Math.pow(2, 20);
protected static boolean doOnTheFlyModifications = true;
protected final static Logger log = Logger.getLogger(AbstractVCFCodec.class);
protected final static int NUM_STANDARD_FIELDS = 8; // INFO is the 8th column
@ -58,6 +59,8 @@ public abstract class AbstractVCFCodec extends AsciiFeatureCodec<VariantContext>
protected Map<String, String> stringCache = new HashMap<String, String>();
protected boolean warnedAboutNoEqualsForNonFlag = false;
protected AbstractVCFCodec() {
super(VariantContext.class);
}
@ -168,7 +171,8 @@ public abstract class AbstractVCFCodec extends AsciiFeatureCodec<VariantContext>
}
this.header = new VCFHeader(metaData, sampleNames);
this.header = VCFStandardHeaderLines.repairStandardHeaderLines(this.header);
if ( doOnTheFlyModifications )
this.header = VCFStandardHeaderLines.repairStandardHeaderLines(this.header);
return this.header;
}
@ -426,6 +430,11 @@ public abstract class AbstractVCFCodec extends AsciiFeatureCodec<VariantContext>
int infoValueSplitSize = ParsingUtils.split(str, infoValueArray, VCFConstants.INFO_FIELD_ARRAY_SEPARATOR_CHAR, false);
if ( infoValueSplitSize == 1 ) {
value = infoValueArray[0];
final VCFInfoHeaderLine headerLine = header.getInfoHeaderLine(key);
if ( headerLine != null && headerLine.getType() == VCFHeaderLineType.Flag && value.equals("0") ) {
// deal with the case where a flag field has =0, such as DB=0, by skipping the add
continue;
}
} else {
ArrayList<String> valueList = new ArrayList<String>(infoValueSplitSize);
for ( int j = 0; j < infoValueSplitSize; j++ )
@ -435,10 +444,17 @@ public abstract class AbstractVCFCodec extends AsciiFeatureCodec<VariantContext>
} else {
key = infoFieldArray[i];
final VCFInfoHeaderLine headerLine = header.getInfoHeaderLine(key);
if ( headerLine != null && headerLine.getType() != VCFHeaderLineType.Flag )
generateException("Found info key " + key + " without a = value, but the header says the field is of type "
+ headerLine.getType() + " but this construct is only value for FLAG type fields");
value = true;
if ( headerLine != null && headerLine.getType() != VCFHeaderLineType.Flag ) {
if ( ! warnedAboutNoEqualsForNonFlag ) {
log.warn("Found info key " + key + " without a = value, but the header says the field is of type "
+ headerLine.getType() + " but this construct is only value for FLAG type fields");
warnedAboutNoEqualsForNonFlag = true;
}
value = VCFConstants.MISSING_VALUE_v4;
} else {
value = true;
}
}
attributes.put(key, value);
@ -828,4 +844,13 @@ public abstract class AbstractVCFCodec extends AsciiFeatureCodec<VariantContext>
values[i] = Integer.valueOf(INT_DECODE_ARRAY[i]);
return values;
}
/**
* Forces all VCFCodecs to not perform any on the fly modifications to the VCF header
* of VCF records. Useful primarily for raw comparisons such as when comparing
* raw VCF records
*/
public static final void disableOnTheFlyModifications() {
doOnTheFlyModifications = false;
}
}

View File

@ -243,9 +243,7 @@ public class VCFStandardHeaderLines {
}
@Requires("line != null")
@Ensures({
"standards.containsKey(line.getID())",
"old(standards.values().size()) > standards.values().size()"})
@Ensures({"standards.containsKey(line.getID())"})
public void add(final T line) {
if ( standards.containsKey(line.getID()) )
throw new ReviewedStingException("Attempting to add multiple standard header lines for ID " + line.getID());

View File

@ -1376,21 +1376,20 @@ public class VariantContext implements Feature { // to enable tribble integratio
final VCFCompoundHeaderLine format = VariantContextUtils.getMetaDataForField(header, field);
final Object decoded = decodeValue(field, attr.getValue(), format);
if ( decoded != null ) {
if ( ! allowMissingValuesComparedToHeader
&& format.getCountType() != VCFHeaderLineCount.UNBOUNDED
&& format.getType() != VCFHeaderLineType.Flag ) { // we expect exactly the right number of elements
final int obsSize = decoded instanceof List ? ((List) decoded).size() : 1;
final int expSize = format.getCount(this.getNAlleles() - 1);
if ( obsSize != expSize ) {
throw new UserException.MalformedVCFHeader("Discordant field size detected for field " +
field + " at " + getChr() + ":" + getStart() + ". Field had " + obsSize + " values " +
"but the header says this should have " + expSize + " values based on header record " +
format);
}
if ( decoded != null &&
! allowMissingValuesComparedToHeader
&& format.getCountType() != VCFHeaderLineCount.UNBOUNDED
&& format.getType() != VCFHeaderLineType.Flag ) { // we expect exactly the right number of elements
final int obsSize = decoded instanceof List ? ((List) decoded).size() : 1;
final int expSize = format.getCount(this.getNAlleles() - 1);
if ( obsSize != expSize ) {
throw new UserException.MalformedVCFHeader("Discordant field size detected for field " +
field + " at " + getChr() + ":" + getStart() + ". Field had " + obsSize + " values " +
"but the header says this should have " + expSize + " values based on header record " +
format);
}
newAttributes.put(field, decoded);
}
newAttributes.put(field, decoded);
}
return newAttributes;

View File

@ -177,7 +177,7 @@ public class VariantContextTestProvider {
addHeaderLine(metaData, "STRING20", 20, VCFHeaderLineType.String);
addHeaderLine(metaData, "VAR.INFO.STRING", VCFHeaderLineCount.UNBOUNDED, VCFHeaderLineType.String);
addHeaderLine(metaData, "GT", 1, VCFHeaderLineType.Integer);
addHeaderLine(metaData, "GT", 1, VCFHeaderLineType.String);
addHeaderLine(metaData, "GQ", 1, VCFHeaderLineType.Integer);
addHeaderLine(metaData, "PL", VCFHeaderLineCount.G, VCFHeaderLineType.Integer);
addHeaderLine(metaData, "GS", 2, VCFHeaderLineType.String);