diff --git a/public/java/src/org/broadinstitute/sting/utils/variantcontext/writer/BCF2Encoder.java b/public/java/src/org/broadinstitute/sting/utils/variantcontext/writer/BCF2Encoder.java index 2f4ec4612..2c1d99546 100644 --- a/public/java/src/org/broadinstitute/sting/utils/variantcontext/writer/BCF2Encoder.java +++ b/public/java/src/org/broadinstitute/sting/utils/variantcontext/writer/BCF2Encoder.java @@ -100,13 +100,17 @@ public final class BCF2Encoder { @Ensures("encodeStream.size() > old(encodeStream.size())") public final void encodeTypedString(final String s) throws IOException { + encodeTypedString(s.getBytes()); + } + + @Ensures("encodeStream.size() > old(encodeStream.size())") + public final void encodeTypedString(final byte[] s) throws IOException { if ( s == null ) encodeType(0, BCF2Type.CHAR); else { - encodeType(s.length(), BCF2Type.CHAR); - for ( int i = 0; i < s.length(); i++ ) { - final byte c = (byte)s.charAt(i); - encodeRawChar(c); + encodeType(s.length, BCF2Type.CHAR); + for ( int i = 0; i < s.length; i++ ) { + encodeRawChar(s[i]); } } } diff --git a/public/java/src/org/broadinstitute/sting/utils/variantcontext/writer/BCF2FieldEncoder.java b/public/java/src/org/broadinstitute/sting/utils/variantcontext/writer/BCF2FieldEncoder.java index 4266d1cfb..b466038dd 100644 --- a/public/java/src/org/broadinstitute/sting/utils/variantcontext/writer/BCF2FieldEncoder.java +++ b/public/java/src/org/broadinstitute/sting/utils/variantcontext/writer/BCF2FieldEncoder.java @@ -389,17 +389,30 @@ public abstract class BCF2FieldEncoder { // ---------------------------------------------------------------------- public static class Float extends BCF2FieldEncoder { + final boolean isAtomic; + public Float(final VCFCompoundHeaderLine headerLine, final Map dict ) { super(headerLine, dict, BCF2Type.FLOAT); + isAtomic = hasConstantNumElements() && numElements() == 1; } @Override public void encodeValue(final BCF2Encoder encoder, final Object value, final BCF2Type type, final int minValues) throws IOException { - final List doubles = toList(Double.class, value); int count = 0; - for ( final double d : doubles ) { - encoder.encodeRawFloat(d); - count++; + // TODO -- can be restructured to avoid toList operation + if ( isAtomic ) { + // fast path for fields with 1 fixed float value + if ( value != null ) { + encoder.encodeRawFloat((Double)value); + count++; + } + } else { + // handle generic case + final List doubles = toList(Double.class, value); + for ( final double d : doubles ) { + encoder.encodeRawFloat(d); + count++; + } } for ( ; count < minValues; count++ ) encoder.encodeRawMissingValue(type); } @@ -445,6 +458,30 @@ public abstract class BCF2FieldEncoder { // // ---------------------------------------------------------------------- + /** + * Specialized int encoder for atomic (non-list) integers + */ + public static class AtomicInt extends BCF2FieldEncoder { + public AtomicInt(final VCFCompoundHeaderLine headerLine, final Map dict ) { + super(headerLine, dict, null); + } + + @Override + public BCF2Type getDynamicType(final Object value) { + return value == null ? BCF2Type.INT8 : BCF2Utils.determineIntegerType((Integer)value); + } + + @Override + public void encodeValue(final BCF2Encoder encoder, final Object value, final BCF2Type type, final int minValues) throws IOException { + int count = 0; + if ( value != null ) { + encoder.encodeRawInt((Integer)value, type); + count++; + } + for ( ; count < minValues; count++ ) encoder.encodeRawMissingValue(type); + } + } + public static class GenericInts extends BCF2FieldEncoder { public GenericInts(final VCFCompoundHeaderLine headerLine, final Map dict ) { super(headerLine, dict, null); diff --git a/public/java/src/org/broadinstitute/sting/utils/variantcontext/writer/BCF2FieldWriter.java b/public/java/src/org/broadinstitute/sting/utils/variantcontext/writer/BCF2FieldWriter.java index 9e0c27045..0a54bc5d0 100644 --- a/public/java/src/org/broadinstitute/sting/utils/variantcontext/writer/BCF2FieldWriter.java +++ b/public/java/src/org/broadinstitute/sting/utils/variantcontext/writer/BCF2FieldWriter.java @@ -201,6 +201,9 @@ public abstract class BCF2FieldWriter { @Override public void start(final BCF2Encoder encoder, final VariantContext vc) throws IOException { + // TODO + // TODO this piece of code consumes like 10% of the runtime alone because fo the vc.getGenotypes() iteration + // TODO encodingType = BCF2Type.INT8; for ( final Genotype g : vc.getGenotypes() ) { final int[] pls = ige.getValues(g); diff --git a/public/java/src/org/broadinstitute/sting/utils/variantcontext/writer/BCF2FieldWriterManager.java b/public/java/src/org/broadinstitute/sting/utils/variantcontext/writer/BCF2FieldWriterManager.java index 00ab1a2ed..9f5bbc487 100644 --- a/public/java/src/org/broadinstitute/sting/utils/variantcontext/writer/BCF2FieldWriterManager.java +++ b/public/java/src/org/broadinstitute/sting/utils/variantcontext/writer/BCF2FieldWriterManager.java @@ -114,7 +114,10 @@ public class BCF2FieldWriterManager { case Float: return new BCF2FieldEncoder.Float(line, dict); case Integer: - return new BCF2FieldEncoder.GenericInts(line, dict); + if ( line.isFixedCount() && line.getCount() == 1 ) + return new BCF2FieldEncoder.AtomicInt(line, dict); + else + return new BCF2FieldEncoder.GenericInts(line, dict); default: throw new ReviewedStingException("Unexpected type for field " + line.getID()); } diff --git a/public/java/src/org/broadinstitute/sting/utils/variantcontext/writer/BCF2Writer.java b/public/java/src/org/broadinstitute/sting/utils/variantcontext/writer/BCF2Writer.java index ae5e2bdcb..37c8e83ff 100644 --- a/public/java/src/org/broadinstitute/sting/utils/variantcontext/writer/BCF2Writer.java +++ b/public/java/src/org/broadinstitute/sting/utils/variantcontext/writer/BCF2Writer.java @@ -257,7 +257,9 @@ class BCF2Writer extends IndexingVariantContextWriter { private void buildAlleles( VariantContext vc ) throws IOException { final boolean needsPadding = VariantContextUtils.needsPadding(vc); for ( final Allele allele : vc.getAlleles() ) { - final String s = needsPadding ? VariantContextUtils.padAllele(vc,allele) : allele.getDisplayString(); + byte[] s = allele.getBases(); + if ( needsPadding ) + s = VariantContextUtils.padAllele(vc,allele).getBytes(); encoder.encodeTypedString(s); } } diff --git a/public/java/test/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyperIntegrationTest.java b/public/java/test/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyperIntegrationTest.java index e4a70db9d..f2b5075c6 100755 --- a/public/java/test/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyperIntegrationTest.java +++ b/public/java/test/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyperIntegrationTest.java @@ -342,7 +342,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest { WalkerTest.WalkerTestSpec spec2 = new WalkerTest.WalkerTestSpec( baseCommandIndels + " --genotyping_mode GENOTYPE_GIVEN_ALLELES -alleles " + result.get(0).getAbsolutePath() + " -I " + validationDataLocation + "low_coverage_CEU.chr1.10k-11k.bam -o %s -L 1:10450700-10551000", 1, - Arrays.asList("adcf53b8dcfde7f2c657745751549bfe")); + Arrays.asList("5c7db047ae9417d37c6bbda1d8ea6019")); executeTest("test MultiSample Pilot1 CEU indels using GENOTYPE_GIVEN_ALLELES", spec2); }