Simple optimizations for BCF2Encoder
-- Inline encodeString that doesn't go via List<Byte> intermediate -- Inline encodeString that uses byte[] directly so that we can go from Allele.getBytes() => BCF2 -- Fast paths for Atomic Float and Atomic Integer values avoiding intermediate list creation -- Final UG integration test update
This commit is contained in:
parent
68eed7b313
commit
0384ce5d34
|
|
@ -100,13 +100,17 @@ public final class BCF2Encoder {
|
|||
|
||||
@Ensures("encodeStream.size() > old(encodeStream.size())")
|
||||
public final void encodeTypedString(final String s) throws IOException {
|
||||
encodeTypedString(s.getBytes());
|
||||
}
|
||||
|
||||
@Ensures("encodeStream.size() > old(encodeStream.size())")
|
||||
public final void encodeTypedString(final byte[] s) throws IOException {
|
||||
if ( s == null )
|
||||
encodeType(0, BCF2Type.CHAR);
|
||||
else {
|
||||
encodeType(s.length(), BCF2Type.CHAR);
|
||||
for ( int i = 0; i < s.length(); i++ ) {
|
||||
final byte c = (byte)s.charAt(i);
|
||||
encodeRawChar(c);
|
||||
encodeType(s.length, BCF2Type.CHAR);
|
||||
for ( int i = 0; i < s.length; i++ ) {
|
||||
encodeRawChar(s[i]);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -389,17 +389,30 @@ public abstract class BCF2FieldEncoder {
|
|||
// ----------------------------------------------------------------------
|
||||
|
||||
public static class Float extends BCF2FieldEncoder {
|
||||
final boolean isAtomic;
|
||||
|
||||
public Float(final VCFCompoundHeaderLine headerLine, final Map<String, Integer> dict ) {
|
||||
super(headerLine, dict, BCF2Type.FLOAT);
|
||||
isAtomic = hasConstantNumElements() && numElements() == 1;
|
||||
}
|
||||
|
||||
@Override
|
||||
public void encodeValue(final BCF2Encoder encoder, final Object value, final BCF2Type type, final int minValues) throws IOException {
|
||||
final List<Double> doubles = toList(Double.class, value);
|
||||
int count = 0;
|
||||
for ( final double d : doubles ) {
|
||||
encoder.encodeRawFloat(d);
|
||||
count++;
|
||||
// TODO -- can be restructured to avoid toList operation
|
||||
if ( isAtomic ) {
|
||||
// fast path for fields with 1 fixed float value
|
||||
if ( value != null ) {
|
||||
encoder.encodeRawFloat((Double)value);
|
||||
count++;
|
||||
}
|
||||
} else {
|
||||
// handle generic case
|
||||
final List<Double> doubles = toList(Double.class, value);
|
||||
for ( final double d : doubles ) {
|
||||
encoder.encodeRawFloat(d);
|
||||
count++;
|
||||
}
|
||||
}
|
||||
for ( ; count < minValues; count++ ) encoder.encodeRawMissingValue(type);
|
||||
}
|
||||
|
|
@ -445,6 +458,30 @@ public abstract class BCF2FieldEncoder {
|
|||
//
|
||||
// ----------------------------------------------------------------------
|
||||
|
||||
/**
|
||||
* Specialized int encoder for atomic (non-list) integers
|
||||
*/
|
||||
public static class AtomicInt extends BCF2FieldEncoder {
|
||||
public AtomicInt(final VCFCompoundHeaderLine headerLine, final Map<String, Integer> dict ) {
|
||||
super(headerLine, dict, null);
|
||||
}
|
||||
|
||||
@Override
|
||||
public BCF2Type getDynamicType(final Object value) {
|
||||
return value == null ? BCF2Type.INT8 : BCF2Utils.determineIntegerType((Integer)value);
|
||||
}
|
||||
|
||||
@Override
|
||||
public void encodeValue(final BCF2Encoder encoder, final Object value, final BCF2Type type, final int minValues) throws IOException {
|
||||
int count = 0;
|
||||
if ( value != null ) {
|
||||
encoder.encodeRawInt((Integer)value, type);
|
||||
count++;
|
||||
}
|
||||
for ( ; count < minValues; count++ ) encoder.encodeRawMissingValue(type);
|
||||
}
|
||||
}
|
||||
|
||||
public static class GenericInts extends BCF2FieldEncoder {
|
||||
public GenericInts(final VCFCompoundHeaderLine headerLine, final Map<String, Integer> dict ) {
|
||||
super(headerLine, dict, null);
|
||||
|
|
|
|||
|
|
@ -201,6 +201,9 @@ public abstract class BCF2FieldWriter {
|
|||
|
||||
@Override
|
||||
public void start(final BCF2Encoder encoder, final VariantContext vc) throws IOException {
|
||||
// TODO
|
||||
// TODO this piece of code consumes like 10% of the runtime alone because fo the vc.getGenotypes() iteration
|
||||
// TODO
|
||||
encodingType = BCF2Type.INT8;
|
||||
for ( final Genotype g : vc.getGenotypes() ) {
|
||||
final int[] pls = ige.getValues(g);
|
||||
|
|
|
|||
|
|
@ -114,7 +114,10 @@ public class BCF2FieldWriterManager {
|
|||
case Float:
|
||||
return new BCF2FieldEncoder.Float(line, dict);
|
||||
case Integer:
|
||||
return new BCF2FieldEncoder.GenericInts(line, dict);
|
||||
if ( line.isFixedCount() && line.getCount() == 1 )
|
||||
return new BCF2FieldEncoder.AtomicInt(line, dict);
|
||||
else
|
||||
return new BCF2FieldEncoder.GenericInts(line, dict);
|
||||
default:
|
||||
throw new ReviewedStingException("Unexpected type for field " + line.getID());
|
||||
}
|
||||
|
|
|
|||
|
|
@ -257,7 +257,9 @@ class BCF2Writer extends IndexingVariantContextWriter {
|
|||
private void buildAlleles( VariantContext vc ) throws IOException {
|
||||
final boolean needsPadding = VariantContextUtils.needsPadding(vc);
|
||||
for ( final Allele allele : vc.getAlleles() ) {
|
||||
final String s = needsPadding ? VariantContextUtils.padAllele(vc,allele) : allele.getDisplayString();
|
||||
byte[] s = allele.getBases();
|
||||
if ( needsPadding )
|
||||
s = VariantContextUtils.padAllele(vc,allele).getBytes();
|
||||
encoder.encodeTypedString(s);
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -342,7 +342,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest {
|
|||
WalkerTest.WalkerTestSpec spec2 = new WalkerTest.WalkerTestSpec(
|
||||
baseCommandIndels + " --genotyping_mode GENOTYPE_GIVEN_ALLELES -alleles " + result.get(0).getAbsolutePath() + " -I " + validationDataLocation +
|
||||
"low_coverage_CEU.chr1.10k-11k.bam -o %s -L 1:10450700-10551000", 1,
|
||||
Arrays.asList("adcf53b8dcfde7f2c657745751549bfe"));
|
||||
Arrays.asList("5c7db047ae9417d37c6bbda1d8ea6019"));
|
||||
executeTest("test MultiSample Pilot1 CEU indels using GENOTYPE_GIVEN_ALLELES", spec2);
|
||||
}
|
||||
|
||||
|
|
|
|||
Loading…
Reference in New Issue