Bugfix for BCF2
-- Always decode genotypes block when writing out a BCF file. If the header changes (and we currently don't know this easily) then the dictionary keys used in the genotypes block may be invalid. Temporarily added a private static boolean that turns off writing of the blocks until Eric and his team rewrite the header. Signed-off-by: Mark DePristo <depristo@broadinstitute.org>
This commit is contained in:
parent
a99f3d6adf
commit
3066894215
|
|
@ -160,7 +160,7 @@ public class BCF2FieldWriterManager {
|
|||
/**
|
||||
* Get a site writer specialized to encode values for site info field
|
||||
* @param field key found in the VCF header INFO records
|
||||
* @return
|
||||
* @return non-null writer if one can be found, or null if none exists for field
|
||||
*/
|
||||
public BCF2FieldWriter.SiteWriter getSiteFieldWriter(final String field) {
|
||||
return getWriter(field, siteWriters);
|
||||
|
|
@ -169,17 +169,14 @@ public class BCF2FieldWriterManager {
|
|||
/**
|
||||
* Get a genotypes writer specialized to encode values for genotypes field
|
||||
* @param field key found in the VCF header FORMAT records
|
||||
* @return
|
||||
* @return non-null writer if one can be found, or null if none exists for field
|
||||
*/
|
||||
public BCF2FieldWriter.GenotypesWriter getGenotypeFieldWriter(final String field) {
|
||||
return getWriter(field, genotypesWriters);
|
||||
}
|
||||
|
||||
@Requires({"map != null", "key != null"})
|
||||
@Ensures("result != null")
|
||||
public <T> T getWriter(final String key, final Map<String, T> map) {
|
||||
final T writer = map.get(key);
|
||||
if ( writer == null ) throw new ReviewedStingException("BUG: no writer found for " + key);
|
||||
return writer;
|
||||
return map.get(key);
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -83,6 +83,14 @@ import java.util.*;
|
|||
* @since 06/12
|
||||
*/
|
||||
class BCF2Writer extends IndexingVariantContextWriter {
|
||||
/**
|
||||
* If true, we will write out the undecoded raw bytes for a genotypes block, if it
|
||||
* is found in the input VC. This can be very dangerous as the genotype encoding
|
||||
* depends on the exact ordering of the header.
|
||||
*
|
||||
* TODO -- enable when the new smart VCF header code is created by Eric Banks
|
||||
*/
|
||||
private final static boolean WRITE_UNDECODED_GENOTYPE_BLOCK = false;
|
||||
final protected static Logger logger = Logger.getLogger(BCF2Writer.class);
|
||||
final private static boolean ALLOW_MISSING_CONTIG_LINES = false;
|
||||
|
||||
|
|
@ -285,8 +293,9 @@ class BCF2Writer extends IndexingVariantContextWriter {
|
|||
|
||||
private void buildInfo( VariantContext vc ) throws IOException {
|
||||
for ( Map.Entry<String, Object> infoFieldEntry : vc.getAttributes().entrySet() ) {
|
||||
final String key = infoFieldEntry.getKey();
|
||||
final BCF2FieldWriter.SiteWriter writer = fieldManager.getSiteFieldWriter(key);
|
||||
final String field = infoFieldEntry.getKey();
|
||||
final BCF2FieldWriter.SiteWriter writer = fieldManager.getSiteFieldWriter(field);
|
||||
if ( writer == null ) errorUnexpectedFieldToWrite(vc, field, "INFO");
|
||||
writer.start(encoder, vc);
|
||||
writer.site(encoder, vc);
|
||||
writer.done(encoder, vc);
|
||||
|
|
@ -295,7 +304,7 @@ class BCF2Writer extends IndexingVariantContextWriter {
|
|||
|
||||
private byte[] buildSamplesData(final VariantContext vc) throws IOException {
|
||||
final BCF2Codec.LazyData lazyData = getLazyData(vc);
|
||||
if ( lazyData != null ) {
|
||||
if ( WRITE_UNDECODED_GENOTYPE_BLOCK && lazyData != null ) {
|
||||
// we never decoded any data from this BCF file, so just pass it back
|
||||
return lazyData.bytes;
|
||||
} else {
|
||||
|
|
@ -303,6 +312,7 @@ class BCF2Writer extends IndexingVariantContextWriter {
|
|||
final List<String> genotypeFields = VCFWriter.calcVCFGenotypeKeys(vc, header);
|
||||
for ( final String field : genotypeFields ) {
|
||||
final BCF2FieldWriter.GenotypesWriter writer = fieldManager.getGenotypeFieldWriter(field);
|
||||
if ( writer == null ) errorUnexpectedFieldToWrite(vc, field, "FORMAT");
|
||||
|
||||
writer.start(encoder, vc);
|
||||
for ( final String name : sampleNames ) {
|
||||
|
|
@ -316,6 +326,19 @@ class BCF2Writer extends IndexingVariantContextWriter {
|
|||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Throws a meaningful error message when a field (INFO or FORMAT) is found when writing out a file
|
||||
* but there's no header line for it.
|
||||
*
|
||||
* @param vc
|
||||
* @param field
|
||||
* @param fieldType
|
||||
*/
|
||||
private final void errorUnexpectedFieldToWrite(final VariantContext vc, final String field, final String fieldType) {
|
||||
throw new UserException("Found field " + field + " in the " + fieldType + " fields of VariantContext at " +
|
||||
vc.getChr() + ":" + vc.getStart() + " from " + vc.getSource() + " but this hasn't been defined in the VCFHeader");
|
||||
}
|
||||
|
||||
// --------------------------------------------------------------------------------
|
||||
//
|
||||
// Low-level block encoding
|
||||
|
|
|
|||
Loading…
Reference in New Issue