Smarter infrastructure to decode genotypes in BCF
-- Eliminated the large intermediate map from field name to list of list<Integer> values needed to create genotypes without the GenotypeBuilder. The new code is cleaner and simply fills in an array of GenotypeBuilders as it moves through the column layout in BCF2 -- Now we create once decoders specialized for each GT field (GT, AD, etc) that can be optimized for putting data into the GenotypeBuilder. In a subsequent commit these will actually use lower level BCF2 decoders to create the low-level ints and int[], avoiding the intermediate List<Integer> form -- Reduced the amount of data further to be computed in the DiffEngine. The DiffEngine algorithm needs to be rethought to be efficient...
This commit is contained in:
parent
889e3c4583
commit
17fbd103d0
|
|
@ -51,6 +51,7 @@ public class BCF2Codec implements FeatureCodec<VariantContext>, ReferenceDepende
|
||||||
private final BCF2Decoder decoder = new BCF2Decoder();
|
private final BCF2Decoder decoder = new BCF2Decoder();
|
||||||
private boolean skipGenotypes = false;
|
private boolean skipGenotypes = false;
|
||||||
private final static int MAX_HEADER_SIZE = 0x08000000;
|
private final static int MAX_HEADER_SIZE = 0x08000000;
|
||||||
|
private BCF2GenotypeFieldDecoders gtFieldDecoders = null;
|
||||||
|
|
||||||
// ----------------------------------------------------------------------
|
// ----------------------------------------------------------------------
|
||||||
//
|
//
|
||||||
|
|
@ -128,6 +129,9 @@ public class BCF2Codec implements FeatureCodec<VariantContext>, ReferenceDepende
|
||||||
// create the string dictionary
|
// create the string dictionary
|
||||||
dictionary = parseDictionary(header);
|
dictionary = parseDictionary(header);
|
||||||
|
|
||||||
|
// prepare the genotype field decoders
|
||||||
|
gtFieldDecoders = new BCF2GenotypeFieldDecoders(header);
|
||||||
|
|
||||||
// position right before next line (would be right before first real record byte at end of header)
|
// position right before next line (would be right before first real record byte at end of header)
|
||||||
return new FeatureCodecHeader(header, inputStream.getPosition());
|
return new FeatureCodecHeader(header, inputStream.getPosition());
|
||||||
}
|
}
|
||||||
|
|
@ -216,7 +220,7 @@ public class BCF2Codec implements FeatureCodec<VariantContext>, ReferenceDepende
|
||||||
return new SitesInfoForDecoding(pos, nFormatFields, nSamples, alleles);
|
return new SitesInfoForDecoding(pos, nFormatFields, nSamples, alleles);
|
||||||
}
|
}
|
||||||
|
|
||||||
private final static class SitesInfoForDecoding {
|
protected final static class SitesInfoForDecoding {
|
||||||
final int pos;
|
final int pos;
|
||||||
final int nFormatFields;
|
final int nFormatFields;
|
||||||
final int nSamples;
|
final int nSamples;
|
||||||
|
|
@ -361,6 +365,7 @@ public class BCF2Codec implements FeatureCodec<VariantContext>, ReferenceDepende
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
private final ArrayList<String> parseDictionary(final VCFHeader header) {
|
private final ArrayList<String> parseDictionary(final VCFHeader header) {
|
||||||
final ArrayList<String> dict = BCF2Utils.makeDictionary(header);
|
final ArrayList<String> dict = BCF2Utils.makeDictionary(header);
|
||||||
|
|
||||||
|
|
@ -374,4 +379,8 @@ public class BCF2Codec implements FeatureCodec<VariantContext>, ReferenceDepende
|
||||||
protected VCFHeader getHeader() {
|
protected VCFHeader getHeader() {
|
||||||
return header;
|
return header;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
protected BCF2GenotypeFieldDecoders.Decoder getGenotypeFieldDecoder(final String field) {
|
||||||
|
return gtFieldDecoders.getDecoder(field);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -0,0 +1,210 @@
|
||||||
|
/*
|
||||||
|
* Copyright (c) 2012, The Broad Institute
|
||||||
|
*
|
||||||
|
* Permission is hereby granted, free of charge, to any person
|
||||||
|
* obtaining a copy of this software and associated documentation
|
||||||
|
* files (the "Software"), to deal in the Software without
|
||||||
|
* restriction, including without limitation the rights to use,
|
||||||
|
* copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||||
|
* copies of the Software, and to permit persons to whom the
|
||||||
|
* Software is furnished to do so, subject to the following
|
||||||
|
* conditions:
|
||||||
|
*
|
||||||
|
* The above copyright notice and this permission notice shall be
|
||||||
|
* included in all copies or substantial portions of the Software.
|
||||||
|
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||||
|
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
|
||||||
|
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
||||||
|
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
|
||||||
|
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
|
||||||
|
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||||
|
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
|
||||||
|
* OTHER DEALINGS IN THE SOFTWARE.
|
||||||
|
*/
|
||||||
|
|
||||||
|
package org.broadinstitute.sting.utils.codecs.bcf2;
|
||||||
|
|
||||||
|
import com.google.java.contract.Ensures;
|
||||||
|
import com.google.java.contract.Requires;
|
||||||
|
import org.broadinstitute.sting.utils.codecs.vcf.VCFConstants;
|
||||||
|
import org.broadinstitute.sting.utils.codecs.vcf.VCFHeader;
|
||||||
|
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
|
||||||
|
import org.broadinstitute.sting.utils.variantcontext.Allele;
|
||||||
|
import org.broadinstitute.sting.utils.variantcontext.GenotypeBuilder;
|
||||||
|
|
||||||
|
import java.util.ArrayList;
|
||||||
|
import java.util.HashMap;
|
||||||
|
import java.util.List;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* An efficient
|
||||||
|
*
|
||||||
|
* @author Your Name
|
||||||
|
* @since Date created
|
||||||
|
*/
|
||||||
|
public class BCF2GenotypeFieldDecoders {
|
||||||
|
// initialized once per writer to allow parallel writers to work
|
||||||
|
private final HashMap<String, Decoder> genotypeFieldDecoder = new HashMap<String, Decoder>();
|
||||||
|
private final Decoder defaultDecoder = new GenericDecoder();
|
||||||
|
|
||||||
|
public BCF2GenotypeFieldDecoders(final VCFHeader header) {
|
||||||
|
// TODO -- fill in appropriate decoders for each FORMAT field in the header
|
||||||
|
|
||||||
|
genotypeFieldDecoder.put(VCFConstants.GENOTYPE_KEY, new GTDecoder());
|
||||||
|
genotypeFieldDecoder.put(VCFConstants.GENOTYPE_FILTER_KEY, new FLDecoder());
|
||||||
|
genotypeFieldDecoder.put(VCFConstants.DEPTH_KEY, new DPDecoder());
|
||||||
|
genotypeFieldDecoder.put(VCFConstants.GENOTYPE_ALLELE_DEPTHS, new ADDecoder());
|
||||||
|
genotypeFieldDecoder.put(VCFConstants.PHRED_GENOTYPE_LIKELIHOODS_KEY, new PLDecoder());
|
||||||
|
genotypeFieldDecoder.put(VCFConstants.GENOTYPE_QUALITY_KEY, new GQDecoder());
|
||||||
|
}
|
||||||
|
|
||||||
|
// -----------------------------------------------------------------
|
||||||
|
//
|
||||||
|
// Genotype field decoder
|
||||||
|
//
|
||||||
|
// -----------------------------------------------------------------
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Return decoder appropriate for field, or the generic decoder if no
|
||||||
|
* specialized one is bound
|
||||||
|
* @param field the GT field to decode
|
||||||
|
* @return a non-null decoder
|
||||||
|
*/
|
||||||
|
@Requires("field != null")
|
||||||
|
@Ensures("result != null")
|
||||||
|
public Decoder getDecoder(final String field) {
|
||||||
|
final Decoder d = genotypeFieldDecoder.get(field);
|
||||||
|
return d == null ? defaultDecoder : d;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Decoder a field (implicit from creation) encoded as
|
||||||
|
* typeDescriptor in the decoder object in the GenotypeBuilders
|
||||||
|
* one for each sample in order.
|
||||||
|
*
|
||||||
|
* The way this works is that this decode method
|
||||||
|
* iterates over the builders, decoding a genotype field
|
||||||
|
* in BCF2 for each sample from decoder.
|
||||||
|
*
|
||||||
|
* This system allows us to easily use specialized
|
||||||
|
* decoders for specific genotype field values. For example,
|
||||||
|
* we use a special decoder to directly read the BCF2 data for
|
||||||
|
* the PL field into a int[] rather than the generic List of Integer
|
||||||
|
*/
|
||||||
|
public interface Decoder {
|
||||||
|
public void decode(final List<Allele> siteAlleles,
|
||||||
|
final String field,
|
||||||
|
final BCF2Decoder decoder,
|
||||||
|
final byte typeDescriptor,
|
||||||
|
final List<GenotypeBuilder> gbs);
|
||||||
|
}
|
||||||
|
|
||||||
|
private class GTDecoder implements Decoder {
|
||||||
|
@Override
|
||||||
|
public void decode(final List<Allele> siteAlleles, final String field, final BCF2Decoder decoder, final byte typeDescriptor, final List<GenotypeBuilder> gbs) {
|
||||||
|
for ( final GenotypeBuilder gb : gbs ) {
|
||||||
|
// TODO -- fast path for size == 2 (diploid)
|
||||||
|
final List<Integer> encoded = (List<Integer>)decoder.decodeTypedValue(typeDescriptor);
|
||||||
|
if ( encoded == null )
|
||||||
|
// no called sample GT = .
|
||||||
|
gb.alleles(null);
|
||||||
|
else {
|
||||||
|
// we have at least some alleles to decode
|
||||||
|
final List<Allele> gt = new ArrayList<Allele>(encoded.size());
|
||||||
|
|
||||||
|
for ( final Integer encode : encoded ) {
|
||||||
|
if ( encode == null ) {
|
||||||
|
// absent, as are all following by definition
|
||||||
|
break;
|
||||||
|
} else {
|
||||||
|
final int offset = encode >> 1;
|
||||||
|
if ( offset == 0 )
|
||||||
|
gt.add(Allele.NO_CALL);
|
||||||
|
else
|
||||||
|
gt.add(siteAlleles.get(offset - 1));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
gb.alleles(gt);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
private class DPDecoder implements Decoder {
|
||||||
|
@Override
|
||||||
|
public void decode(final List<Allele> siteAlleles, final String field, final BCF2Decoder decoder, final byte typeDescriptor, final List<GenotypeBuilder> gbs) {
|
||||||
|
for ( final GenotypeBuilder gb : gbs ) {
|
||||||
|
final Object value = decoder.decodeTypedValue(typeDescriptor);
|
||||||
|
if ( value != null )
|
||||||
|
gb.DP((Integer)value);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
private class GQDecoder implements Decoder {
|
||||||
|
@Override
|
||||||
|
public void decode(final List<Allele> siteAlleles, final String field, final BCF2Decoder decoder, final byte typeDescriptor, final List<GenotypeBuilder> gbs) {
|
||||||
|
for ( final GenotypeBuilder gb : gbs ) {
|
||||||
|
final Object value = decoder.decodeTypedValue(typeDescriptor);
|
||||||
|
if ( value != null )
|
||||||
|
gb.GQ((Integer)value);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
private class ADDecoder implements Decoder {
|
||||||
|
@Override
|
||||||
|
public void decode(final List<Allele> siteAlleles, final String field, final BCF2Decoder decoder, final byte typeDescriptor, final List<GenotypeBuilder> gbs) {
|
||||||
|
for ( final GenotypeBuilder gb : gbs ) {
|
||||||
|
final int[] AD = decodeIntArray(decoder.decodeTypedValue(typeDescriptor));
|
||||||
|
if ( AD != null )
|
||||||
|
gb.AD(AD);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
private class PLDecoder implements Decoder {
|
||||||
|
@Override
|
||||||
|
public void decode(final List<Allele> siteAlleles, final String field, final BCF2Decoder decoder, final byte typeDescriptor, final List<GenotypeBuilder> gbs) {
|
||||||
|
for ( final GenotypeBuilder gb : gbs ) {
|
||||||
|
final int[] PL = decodeIntArray(decoder.decodeTypedValue(typeDescriptor));
|
||||||
|
if ( PL != null )
|
||||||
|
gb.PL(PL);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
private class FLDecoder implements Decoder {
|
||||||
|
@Override
|
||||||
|
public void decode(final List<Allele> siteAlleles, final String field, final BCF2Decoder decoder, final byte typeDescriptor, final List<GenotypeBuilder> gbs) {
|
||||||
|
throw new ReviewedStingException("Genotype filter not implemented in BCF2 yet");
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
private class GenericDecoder implements Decoder {
|
||||||
|
@Override
|
||||||
|
public void decode(final List<Allele> siteAlleles, final String field, final BCF2Decoder decoder, final byte typeDescriptor, final List<GenotypeBuilder> gbs) {
|
||||||
|
for ( final GenotypeBuilder gb : gbs ) {
|
||||||
|
Object value = decoder.decodeTypedValue(typeDescriptor);
|
||||||
|
if ( value != null ) { // don't add missing values
|
||||||
|
if ( value instanceof List && ((List)value).size() == 1)
|
||||||
|
value = ((List)value).get(0);
|
||||||
|
gb.attribute(field, value);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
private static final int[] decodeIntArray(final Object value) {
|
||||||
|
// todo -- decode directly into int[]
|
||||||
|
final List<Integer> pls = (List<Integer>)value;
|
||||||
|
if ( pls != null ) { // we have a PL field
|
||||||
|
final int[] x = new int[pls.size()];
|
||||||
|
for ( int j = 0; j < x.length; j++ )
|
||||||
|
x[j] = pls.get(j);
|
||||||
|
return x;
|
||||||
|
} else
|
||||||
|
return null;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
@ -25,8 +25,6 @@
|
||||||
package org.broadinstitute.sting.utils.codecs.bcf2;
|
package org.broadinstitute.sting.utils.codecs.bcf2;
|
||||||
|
|
||||||
import org.apache.log4j.Logger;
|
import org.apache.log4j.Logger;
|
||||||
import org.broadinstitute.sting.utils.codecs.vcf.VCFConstants;
|
|
||||||
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
|
|
||||||
import org.broadinstitute.sting.utils.exceptions.UserException;
|
import org.broadinstitute.sting.utils.exceptions.UserException;
|
||||||
import org.broadinstitute.sting.utils.variantcontext.*;
|
import org.broadinstitute.sting.utils.variantcontext.*;
|
||||||
|
|
||||||
|
|
@ -60,9 +58,11 @@ class BCF2LazyGenotypesDecoder implements LazyGenotypesContext.LazyParser {
|
||||||
public LazyGenotypesContext.LazyData parse(final Object data) {
|
public LazyGenotypesContext.LazyData parse(final Object data) {
|
||||||
logger.info("Decoding BCF genotypes for " + nSamples + " samples with " + nFields + " fields each");
|
logger.info("Decoding BCF genotypes for " + nSamples + " samples with " + nFields + " fields each");
|
||||||
|
|
||||||
// load our bytep[] data into the decoder
|
// load our byte[] data into the decoder
|
||||||
final BCF2Decoder decoder = new BCF2Decoder(((BCF2Codec.LazyData)data).bytes);
|
final BCF2Decoder decoder = new BCF2Decoder(((BCF2Codec.LazyData)data).bytes);
|
||||||
|
|
||||||
|
// TODO -- fast path for sites only
|
||||||
|
|
||||||
// go ahead and decode everyone
|
// go ahead and decode everyone
|
||||||
final List<String> samples = new ArrayList<String>(codec.getHeader().getGenotypeSamples());
|
final List<String> samples = new ArrayList<String>(codec.getHeader().getGenotypeSamples());
|
||||||
|
|
||||||
|
|
@ -71,119 +71,32 @@ class BCF2LazyGenotypesDecoder implements LazyGenotypesContext.LazyParser {
|
||||||
"different numbers of samples per record. Saw " + samples.size() +
|
"different numbers of samples per record. Saw " + samples.size() +
|
||||||
" samples in header but have a record with " + nSamples + " samples");
|
" samples in header but have a record with " + nSamples + " samples");
|
||||||
|
|
||||||
final Map<String, List<Object>> fieldValues = decodeGenotypeFieldValues(decoder, nFields, nSamples);
|
// create and initialize the genotypes array
|
||||||
final ArrayList<Genotype> genotypes = new ArrayList<Genotype>(nSamples);
|
final ArrayList<GenotypeBuilder> builders = new ArrayList<GenotypeBuilder>(nSamples);
|
||||||
final GenotypeBuilder gb = new GenotypeBuilder();
|
|
||||||
for ( int i = 0; i < nSamples; i++ ) {
|
for ( int i = 0; i < nSamples; i++ ) {
|
||||||
// all of the information we need for each genotype, with default values
|
builders.add(new GenotypeBuilder(samples.get(i)));
|
||||||
gb.reset();
|
|
||||||
gb.name(samples.get(i));
|
|
||||||
|
|
||||||
for ( final Map.Entry<String, List<Object>> entry : fieldValues.entrySet() ) {
|
|
||||||
final String field = entry.getKey();
|
|
||||||
Object value = entry.getValue().get(i);
|
|
||||||
try {
|
|
||||||
if ( field.equals(VCFConstants.GENOTYPE_KEY) ) {
|
|
||||||
gb.alleles(decodeGenotypeAlleles(siteAlleles, (List<Integer>)value));
|
|
||||||
} else if ( field.equals(VCFConstants.DEPTH_KEY) ) {
|
|
||||||
if ( value != BCF2Type.INT8.getMissingJavaValue() )
|
|
||||||
gb.DP((Integer)value);
|
|
||||||
} else if ( field.equals(VCFConstants.GENOTYPE_QUALITY_KEY) ) {
|
|
||||||
if ( value != BCF2Type.INT8.getMissingJavaValue() )
|
|
||||||
gb.GQ((Integer)value);
|
|
||||||
} else if ( field.equals(VCFConstants.PHRED_GENOTYPE_LIKELIHOODS_KEY) ) {
|
|
||||||
final int[] PLs = decodeIntArray(value);
|
|
||||||
if ( PLs != null )
|
|
||||||
gb.PL(PLs);
|
|
||||||
} else if ( field.equals(VCFConstants.GENOTYPE_ALLELE_DEPTHS) ) {
|
|
||||||
final int[] AD = decodeIntArray(value);
|
|
||||||
if ( AD != null )
|
|
||||||
gb.AD(AD);
|
|
||||||
} else if ( field.equals(VCFConstants.GENOTYPE_FILTER_KEY) ) {
|
|
||||||
throw new ReviewedStingException("Genotype filters not implemented in GATK BCF2");
|
|
||||||
//filters = new HashSet<String>(values.get(i));
|
|
||||||
} else { // add to attributes
|
|
||||||
if ( value != null ) { // don't add missing values
|
|
||||||
if ( value instanceof List && ((List)value).size() == 1)
|
|
||||||
value = ((List)value).get(0);
|
|
||||||
gb.attribute(field, value);
|
|
||||||
}
|
}
|
||||||
}
|
|
||||||
} catch ( ClassCastException e ) {
|
|
||||||
throw new UserException.MalformedBCF2("BUG: expected encoding of field " + field
|
|
||||||
+ " inconsistent with the value observed in the decoded value in the "
|
|
||||||
+ " BCF file. Value was " + value);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
final Genotype g = gb.make();
|
|
||||||
genotypes.add(g);
|
|
||||||
}
|
|
||||||
|
|
||||||
return new LazyGenotypesContext.LazyData(genotypes, codec.getHeader().getSampleNamesInOrder(), codec.getHeader().getSampleNameToOffset());
|
|
||||||
}
|
|
||||||
|
|
||||||
private final int[] decodeIntArray(final Object value) {
|
|
||||||
// todo -- decode directly into int[]
|
|
||||||
final List<Integer> pls = (List<Integer>)value;
|
|
||||||
if ( pls != null ) { // we have a PL field
|
|
||||||
final int[] x = new int[pls.size()];
|
|
||||||
for ( int j = 0; j < x.length; j++ )
|
|
||||||
x[j] = pls.get(j);
|
|
||||||
return x;
|
|
||||||
} else
|
|
||||||
return null;
|
|
||||||
}
|
|
||||||
|
|
||||||
private final List<Allele> decodeGenotypeAlleles(final ArrayList<Allele> siteAlleles, final List<Integer> encoded) {
|
|
||||||
if ( encoded == null )
|
|
||||||
// no called sample GT = .
|
|
||||||
return Collections.emptyList();
|
|
||||||
else {
|
|
||||||
// we have at least some alleles to decode
|
|
||||||
final List<Allele> gt = new ArrayList<Allele>(encoded.size());
|
|
||||||
for ( final Integer encode : encoded ) {
|
|
||||||
if ( encode == null ) // absent, as are all following by definition
|
|
||||||
return gt;
|
|
||||||
else {
|
|
||||||
final int offset = encode >> 1;
|
|
||||||
if ( offset == 0 )
|
|
||||||
gt.add(Allele.NO_CALL);
|
|
||||||
else
|
|
||||||
gt.add(siteAlleles.get(offset - 1));
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
return gt;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
private final Map<String, List<Object>> decodeGenotypeFieldValues(final BCF2Decoder decoder,
|
|
||||||
final int nFields,
|
|
||||||
final int nSamples) {
|
|
||||||
assert (nFields > 0 && nSamples > 0) || (nFields == 0 && nSamples == 0);
|
|
||||||
|
|
||||||
if ( nFields == 0 ) // fast path exit for sites only file
|
|
||||||
return Collections.emptyMap();
|
|
||||||
else {
|
|
||||||
final Map<String, List<Object>> map = new LinkedHashMap<String, List<Object>>(nFields);
|
|
||||||
|
|
||||||
for ( int i = 0; i < nFields; i++ ) {
|
for ( int i = 0; i < nFields; i++ ) {
|
||||||
|
// get the field name
|
||||||
final int offset = (Integer) decoder.decodeTypedValue();
|
final int offset = (Integer) decoder.decodeTypedValue();
|
||||||
final String field = codec.getDictionaryString(offset);
|
final String field = codec.getDictionaryString(offset);
|
||||||
|
|
||||||
// the type of each element
|
// the type of each element
|
||||||
final byte typeDescriptor = decoder.readTypeDescriptor();
|
final byte typeDescriptor = decoder.readTypeDescriptor();
|
||||||
final List<Object> values = new ArrayList<Object>(nSamples);
|
final BCF2GenotypeFieldDecoders.Decoder fieldDecoder = codec.getGenotypeFieldDecoder(field);
|
||||||
for ( int j = 0; j < nSamples; j++ )
|
try {
|
||||||
values.add(decoder.decodeTypedValue(typeDescriptor));
|
fieldDecoder.decode(siteAlleles, field, decoder, typeDescriptor, builders);
|
||||||
|
} catch ( ClassCastException e ) {
|
||||||
assert ! map.containsKey(field);
|
throw new UserException.MalformedBCF2("BUG: expected encoding of field " + field
|
||||||
|
+ " inconsistent with the value observed in the decoded value");
|
||||||
map.put(field, values);
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
return map;
|
final ArrayList<Genotype> genotypes = new ArrayList<Genotype>(nSamples);
|
||||||
}
|
for ( final GenotypeBuilder gb : builders )
|
||||||
|
genotypes.add(gb.make());
|
||||||
|
|
||||||
|
return new LazyGenotypesContext.LazyData(genotypes, codec.getHeader().getSampleNamesInOrder(), codec.getHeader().getSampleNameToOffset());
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -265,6 +265,7 @@ public abstract class Genotype implements Comparable<Genotype> {
|
||||||
/**
|
/**
|
||||||
* @return Returns true if this Genotype has PL field values
|
* @return Returns true if this Genotype has PL field values
|
||||||
*/
|
*/
|
||||||
|
@Ensures("(result && getLikelihoods() != null) || (! result && getLikelihoods() == null)")
|
||||||
public boolean hasLikelihoods() {
|
public boolean hasLikelihoods() {
|
||||||
return getPL() != null;
|
return getPL() != null;
|
||||||
}
|
}
|
||||||
|
|
@ -284,7 +285,7 @@ public abstract class Genotype implements Comparable<Genotype> {
|
||||||
* Returns the GenotypesLikelihoods data associated with this Genotype, or null if missing
|
* Returns the GenotypesLikelihoods data associated with this Genotype, or null if missing
|
||||||
* @return null or a GenotypesLikelihood object for this sample's PL field
|
* @return null or a GenotypesLikelihood object for this sample's PL field
|
||||||
*/
|
*/
|
||||||
@Ensures({"hasLikelihoods() && result != null", "! hasLikelihoods() && result == null"})
|
@Ensures("(hasLikelihoods() && result != null) || (! hasLikelihoods() && result == null)")
|
||||||
public GenotypeLikelihoods getLikelihoods() {
|
public GenotypeLikelihoods getLikelihoods() {
|
||||||
return hasLikelihoods() ? GenotypeLikelihoods.fromPLs(getPL()) : null;
|
return hasLikelihoods() ? GenotypeLikelihoods.fromPLs(getPL()) : null;
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -47,8 +47,8 @@ public class MD5DB {
|
||||||
/**
|
/**
|
||||||
* Subdirectory under the ant build directory where we store integration test md5 results
|
* Subdirectory under the ant build directory where we store integration test md5 results
|
||||||
*/
|
*/
|
||||||
private static final int MAX_RECORDS_TO_READ = 10000;
|
private static final int MAX_RECORDS_TO_READ = 1000;
|
||||||
private static final int MAX_RAW_DIFFS_TO_SUMMARIZE = 1000;
|
private static final int MAX_RAW_DIFFS_TO_SUMMARIZE = 100;
|
||||||
public static final String LOCAL_MD5_DB_DIR = "integrationtests";
|
public static final String LOCAL_MD5_DB_DIR = "integrationtests";
|
||||||
public static final String GLOBAL_MD5_DB_DIR = "/humgen/gsa-hpprojects/GATK/data/integrationtests";
|
public static final String GLOBAL_MD5_DB_DIR = "/humgen/gsa-hpprojects/GATK/data/integrationtests";
|
||||||
|
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue