Bugfix for BCF2

-- Always decode genotypes block when writing out a BCF file.  If the header changes (and we currently don't know this easily) then the dictionary keys used in the genotypes block may be invalid.  Temporarily added a private static boolean that turns off writing of the blocks until Eric and his team rewrite the header.

Signed-off-by: Mark DePristo <depristo@broadinstitute.org>
This commit is contained in:
Mark DePristo 2012-07-24 15:26:26 -04:00
parent a99f3d6adf
commit 3066894215
2 changed files with 29 additions and 9 deletions

View File

@ -160,7 +160,7 @@ public class BCF2FieldWriterManager {
/**
* Get a site writer specialized to encode values for site info field
* @param field key found in the VCF header INFO records
* @return
* @return non-null writer if one can be found, or null if none exists for field
*/
public BCF2FieldWriter.SiteWriter getSiteFieldWriter(final String field) {
return getWriter(field, siteWriters);
@ -169,17 +169,14 @@ public class BCF2FieldWriterManager {
/**
* Get a genotypes writer specialized to encode values for genotypes field
* @param field key found in the VCF header FORMAT records
* @return
* @return non-null writer if one can be found, or null if none exists for field
*/
public BCF2FieldWriter.GenotypesWriter getGenotypeFieldWriter(final String field) {
return getWriter(field, genotypesWriters);
}
@Requires({"map != null", "key != null"})
@Ensures("result != null")
public <T> T getWriter(final String key, final Map<String, T> map) {
final T writer = map.get(key);
if ( writer == null ) throw new ReviewedStingException("BUG: no writer found for " + key);
return writer;
return map.get(key);
}
}

View File

@ -83,6 +83,14 @@ import java.util.*;
* @since 06/12
*/
class BCF2Writer extends IndexingVariantContextWriter {
/**
* If true, we will write out the undecoded raw bytes for a genotypes block, if it
* is found in the input VC. This can be very dangerous as the genotype encoding
* depends on the exact ordering of the header.
*
* TODO -- enable when the new smart VCF header code is created by Eric Banks
*/
private final static boolean WRITE_UNDECODED_GENOTYPE_BLOCK = false;
final protected static Logger logger = Logger.getLogger(BCF2Writer.class);
final private static boolean ALLOW_MISSING_CONTIG_LINES = false;
@ -285,8 +293,9 @@ class BCF2Writer extends IndexingVariantContextWriter {
private void buildInfo( VariantContext vc ) throws IOException {
for ( Map.Entry<String, Object> infoFieldEntry : vc.getAttributes().entrySet() ) {
final String key = infoFieldEntry.getKey();
final BCF2FieldWriter.SiteWriter writer = fieldManager.getSiteFieldWriter(key);
final String field = infoFieldEntry.getKey();
final BCF2FieldWriter.SiteWriter writer = fieldManager.getSiteFieldWriter(field);
if ( writer == null ) errorUnexpectedFieldToWrite(vc, field, "INFO");
writer.start(encoder, vc);
writer.site(encoder, vc);
writer.done(encoder, vc);
@ -295,7 +304,7 @@ class BCF2Writer extends IndexingVariantContextWriter {
private byte[] buildSamplesData(final VariantContext vc) throws IOException {
final BCF2Codec.LazyData lazyData = getLazyData(vc);
if ( lazyData != null ) {
if ( WRITE_UNDECODED_GENOTYPE_BLOCK && lazyData != null ) {
// we never decoded any data from this BCF file, so just pass it back
return lazyData.bytes;
} else {
@ -303,6 +312,7 @@ class BCF2Writer extends IndexingVariantContextWriter {
final List<String> genotypeFields = VCFWriter.calcVCFGenotypeKeys(vc, header);
for ( final String field : genotypeFields ) {
final BCF2FieldWriter.GenotypesWriter writer = fieldManager.getGenotypeFieldWriter(field);
if ( writer == null ) errorUnexpectedFieldToWrite(vc, field, "FORMAT");
writer.start(encoder, vc);
for ( final String name : sampleNames ) {
@ -316,6 +326,19 @@ class BCF2Writer extends IndexingVariantContextWriter {
}
}
/**
* Throws a meaningful error message when a field (INFO or FORMAT) is found when writing out a file
* but there's no header line for it.
*
* @param vc
* @param field
* @param fieldType
*/
private final void errorUnexpectedFieldToWrite(final VariantContext vc, final String field, final String fieldType) {
throw new UserException("Found field " + field + " in the " + fieldType + " fields of VariantContext at " +
vc.getChr() + ":" + vc.getStart() + " from " + vc.getSource() + " but this hasn't been defined in the VCFHeader");
}
// --------------------------------------------------------------------------------
//
// Low-level block encoding