diff --git a/public/java/src/org/broadinstitute/sting/utils/codecs/bcf2/BCF2Codec.java b/public/java/src/org/broadinstitute/sting/utils/codecs/bcf2/BCF2Codec.java index 122fc1dc9..574677334 100644 --- a/public/java/src/org/broadinstitute/sting/utils/codecs/bcf2/BCF2Codec.java +++ b/public/java/src/org/broadinstitute/sting/utils/codecs/bcf2/BCF2Codec.java @@ -434,7 +434,7 @@ public final class BCF2Codec implements FeatureCodec, ReferenceD final public int nGenotypeFields; final public byte[] bytes; - @Requires({"nGenotypeField > 0", "bytes != null"}) + @Requires({"nGenotypeFields > 0", "bytes != null"}) public LazyData(final int nGenotypeFields, final byte[] bytes) { this.nGenotypeFields = nGenotypeFields; this.bytes = bytes; @@ -446,7 +446,7 @@ public final class BCF2Codec implements FeatureCodec, ReferenceD return getDictionaryString((Integer) decoder.decodeTypedValue()); } - @Requires("offset >= dictionary.size()") + @Requires("offset < dictionary.size()") @Ensures("result != null") protected final String getDictionaryString(final int offset) { return dictionary.get(offset); diff --git a/public/java/src/org/broadinstitute/sting/utils/codecs/bcf2/BCF2Encoder.java b/public/java/src/org/broadinstitute/sting/utils/codecs/bcf2/BCF2Encoder.java index cea2d9491..52953a927 100644 --- a/public/java/src/org/broadinstitute/sting/utils/codecs/bcf2/BCF2Encoder.java +++ b/public/java/src/org/broadinstitute/sting/utils/codecs/bcf2/BCF2Encoder.java @@ -163,7 +163,7 @@ public final class BCF2Encoder { encodeStream.write(typeByte); if ( BCF2Utils.willOverflow(size) ) { // write in the overflow size - encodeTyped(size, determineIntegerType(size)); + encodeTyped(size, BCF2Utils.determineIntegerType(size)); } } @@ -181,42 +181,13 @@ public final class BCF2Encoder { // // -------------------------------------------------------------------------------- - public final BCF2Type determineIntegerType(final int[] values) { - // literally a copy of the code below, but there's no general way to unify lists and arrays in java - BCF2Type maxType = BCF2Type.INT8; - for ( final int value : values ) { - final BCF2Type type1 = determineIntegerType(value); - switch ( type1 ) { - case INT8: break; - case INT16: maxType = BCF2Type.INT16; break; - case INT32: return BCF2Type.INT32; // fast path for largest possible value - default: throw new ReviewedStingException("Unexpected integer type " + type1 ); - } - } - return maxType; - } - - public final BCF2Type determineIntegerType(final List values) { - BCF2Type maxType = BCF2Type.INT8; - for ( final int value : values ) { - final BCF2Type type1 = determineIntegerType(value); - switch ( type1 ) { - case INT8: break; - case INT16: maxType = BCF2Type.INT16; break; - case INT32: return BCF2Type.INT32; // fast path for largest possible value - default: throw new ReviewedStingException("Unexpected integer type " + type1 ); - } - } - return maxType; - } - - public final BCF2Type determineIntegerType(final int value) { - for ( final BCF2Type potentialType : BCF2Utils.INTEGER_TYPES_BY_SIZE ) { - if ( potentialType.withinRange(value) ) - return potentialType; - } - - throw new ReviewedStingException("Integer cannot be encoded in allowable range of even INT32: " + value); + public void encodeString(final String s, final int sizeToWrite) throws IOException { + final byte[] bytes = s.getBytes(); + for ( int i = 0; i < sizeToWrite; i++ ) + if ( i < bytes.length ) + encodeRawChar(bytes[i]); + else + encodeRawMissingValue(BCF2Type.CHAR); } /** @@ -245,7 +216,7 @@ public final class BCF2Encoder { final Object toType = arg instanceof List ? ((List)arg).get(0) : arg; if ( toType instanceof Integer ) - return determineIntegerType((Integer)toType); + return BCF2Utils.determineIntegerType((Integer) toType); else if ( toType instanceof String ) return BCF2Type.CHAR; else if ( toType instanceof Double ) diff --git a/public/java/src/org/broadinstitute/sting/utils/codecs/bcf2/BCF2Utils.java b/public/java/src/org/broadinstitute/sting/utils/codecs/bcf2/BCF2Utils.java index fceb60a57..60e82b800 100644 --- a/public/java/src/org/broadinstitute/sting/utils/codecs/bcf2/BCF2Utils.java +++ b/public/java/src/org/broadinstitute/sting/utils/codecs/bcf2/BCF2Utils.java @@ -219,4 +219,42 @@ public final class BCF2Utils { else return new File( path + ".bcf" ); } + + public final static BCF2Type determineIntegerType(final int value) { + for ( final BCF2Type potentialType : INTEGER_TYPES_BY_SIZE) { + if ( potentialType.withinRange(value) ) + return potentialType; + } + + throw new ReviewedStingException("Integer cannot be encoded in allowable range of even INT32: " + value); + } + + public final static BCF2Type determineIntegerType(final int[] values) { + // literally a copy of the code below, but there's no general way to unify lists and arrays in java + BCF2Type maxType = BCF2Type.INT8; + for ( final int value : values ) { + final BCF2Type type1 = determineIntegerType(value); + switch ( type1 ) { + case INT8: break; + case INT16: maxType = BCF2Type.INT16; break; + case INT32: return BCF2Type.INT32; // fast path for largest possible value + default: throw new ReviewedStingException("Unexpected integer type " + type1 ); + } + } + return maxType; + } + + public final static BCF2Type determineIntegerType(final List values) { + BCF2Type maxType = BCF2Type.INT8; + for ( final int value : values ) { + final BCF2Type type1 = determineIntegerType(value); + switch ( type1 ) { + case INT8: break; + case INT16: maxType = BCF2Type.INT16; break; + case INT32: return BCF2Type.INT32; // fast path for largest possible value + default: throw new ReviewedStingException("Unexpected integer type " + type1 ); + } + } + return maxType; + } } diff --git a/public/java/src/org/broadinstitute/sting/utils/codecs/vcf/VCFConstants.java b/public/java/src/org/broadinstitute/sting/utils/codecs/vcf/VCFConstants.java index fd762342e..87e5efcef 100755 --- a/public/java/src/org/broadinstitute/sting/utils/codecs/vcf/VCFConstants.java +++ b/public/java/src/org/broadinstitute/sting/utils/codecs/vcf/VCFConstants.java @@ -114,7 +114,5 @@ public final class VCFConstants { public static final String EMPTY_GENOTYPE = "./."; public static final int MAX_GENOTYPE_QUAL = 99; - public static final String DOUBLE_PRECISION_FORMAT_STRING = "%.2f"; - public static final String DOUBLE_PRECISION_INT_SUFFIX = ".00"; public static final Double VCF_ENCODING_EPSILON = 0.00005; // when we consider fields equal(), used in the Qual compare } \ No newline at end of file diff --git a/public/java/src/org/broadinstitute/sting/utils/variantcontext/GenotypeBuilder.java b/public/java/src/org/broadinstitute/sting/utils/variantcontext/GenotypeBuilder.java index 400f85761..713dc219b 100755 --- a/public/java/src/org/broadinstitute/sting/utils/variantcontext/GenotypeBuilder.java +++ b/public/java/src/org/broadinstitute/sting/utils/variantcontext/GenotypeBuilder.java @@ -390,7 +390,6 @@ public final class GenotypeBuilder { * * @return */ - @Requires("filters != null") public GenotypeBuilder unfiltered() { if ( extendedAttributes != null ) extendedAttributes.remove(VCFConstants.GENOTYPE_FILTER_KEY); diff --git a/public/java/src/org/broadinstitute/sting/utils/variantcontext/VariantContextUtils.java b/public/java/src/org/broadinstitute/sting/utils/variantcontext/VariantContextUtils.java index 547f0425d..2a4b251bf 100755 --- a/public/java/src/org/broadinstitute/sting/utils/variantcontext/VariantContextUtils.java +++ b/public/java/src/org/broadinstitute/sting/utils/variantcontext/VariantContextUtils.java @@ -111,8 +111,7 @@ public class VariantContextUtils { if ( AN == 0 ) { alleleFreqs.add(0.0); } else { - // todo -- this is a performance problem - final Double freq = Double.valueOf(String.format(makePrecisionFormatStringFromDenominatorValue(totalFoundersChromosomes), ((double)foundersAltChromosomes / totalFoundersChromosomes))); + final Double freq = (double)foundersAltChromosomes / totalFoundersChromosomes; alleleFreqs.add(freq); } } @@ -155,17 +154,6 @@ public class VariantContextUtils { builder.attributes(calculateChromosomeCounts(vc, new HashMap(vc.getAttributes()), removeStaleValues, founderIds)); } - public static String makePrecisionFormatStringFromDenominatorValue(double maxValue) { - int precision = 1; - - while ( maxValue > 1 ) { - precision++; - maxValue /= 10.0; - } - - return "%." + precision + "f"; - } - public static Genotype removePLs(Genotype g) { if ( g.hasLikelihoods() ) return new GenotypeBuilder(g).noPL().make(); diff --git a/public/java/src/org/broadinstitute/sting/utils/variantcontext/writer/VCFWriter.java b/public/java/src/org/broadinstitute/sting/utils/variantcontext/writer/VCFWriter.java index ee65497f1..2574753bd 100755 --- a/public/java/src/org/broadinstitute/sting/utils/variantcontext/writer/VCFWriter.java +++ b/public/java/src/org/broadinstitute/sting/utils/variantcontext/writer/VCFWriter.java @@ -209,7 +209,7 @@ class VCFWriter extends IndexingVariantContextWriter { if ( !vc.hasLog10PError() ) mWriter.write(VCFConstants.MISSING_VALUE_v4); else - mWriter.write(getQualValue(vc.getPhredScaledQual())); + mWriter.write(formatQualValue(vc.getPhredScaledQual())); mWriter.write(VCFConstants.FIELD_SEPARATOR); // FILTER @@ -277,10 +277,13 @@ class VCFWriter extends IndexingVariantContextWriter { return vc.isFiltered() ? ParsingUtils.join(";", ParsingUtils.sortList(vc.getFilters())) : (forcePASS || vc.filtersWereApplied() ? VCFConstants.PASSES_FILTERS_v4 : VCFConstants.UNFILTERED); } - private String getQualValue(double qual) { - String s = String.format(VCFConstants.DOUBLE_PRECISION_FORMAT_STRING, qual); - if ( s.endsWith(VCFConstants.DOUBLE_PRECISION_INT_SUFFIX) ) - s = s.substring(0, s.length() - VCFConstants.DOUBLE_PRECISION_INT_SUFFIX.length()); + private static final String QUAL_FORMAT_STRING = "%.2f"; + private static final String QUAL_FORMAT_EXTENSION_TO_TRIM = ".00"; + + private String formatQualValue(double qual) { + String s = String.format(QUAL_FORMAT_STRING, qual); + if ( s.endsWith(QUAL_FORMAT_EXTENSION_TO_TRIM) ) + s = s.substring(0, s.length() - QUAL_FORMAT_EXTENSION_TO_TRIM.length()); return s; } @@ -431,12 +434,39 @@ class VCFWriter extends IndexingVariantContextWriter { mWriter.write(encoding); } + /** + * Takes a double value and pretty prints it to a String for display + * + * Large doubles => gets %.2f style formatting + * Doubles < 1 / 10 but > 1/100 => get %.3f style formatting + * Double < 1/100 => %.3e formatting + * @param d + * @return + */ + public static final String formatVCFDouble(final double d) { + String format = "%.2f"; + if ( d < 0.1 ) { + if ( d < 0.01 ) { + if ( Math.abs(d) >= 1e-20 ) + format = "%.3e"; + else { + // return a zero format + return "0.00"; + } + } else { + format = "%.3f"; + } + } + + return String.format(format, d); + } + public static String formatVCFField(Object val) { String result; if ( val == null ) result = VCFConstants.MISSING_VALUE_v4; else if ( val instanceof Double ) - result = String.format(VCFConstants.DOUBLE_PRECISION_FORMAT_STRING, (Double)val); + result = formatVCFDouble((Double) val); else if ( val instanceof Boolean ) result = (Boolean)val ? "" : null; // empty string for true, null for false else if ( val instanceof List ) { diff --git a/public/java/test/org/broadinstitute/sting/gatk/walkers/variantutils/CombineVariantsUnitTest.java b/public/java/test/org/broadinstitute/sting/gatk/walkers/variantutils/CombineVariantsUnitTest.java index 464dcd807..31f704b85 100644 --- a/public/java/test/org/broadinstitute/sting/gatk/walkers/variantutils/CombineVariantsUnitTest.java +++ b/public/java/test/org/broadinstitute/sting/gatk/walkers/variantutils/CombineVariantsUnitTest.java @@ -6,7 +6,7 @@ import org.broadinstitute.sting.utils.codecs.vcf.VCFCodec; import org.broadinstitute.sting.utils.codecs.vcf.VCFHeader; import org.broadinstitute.sting.utils.codecs.vcf.VCFHeaderLine; import org.testng.Assert; -import org.broadinstitute.sting.utils.genotype.vcf.VCFHeaderUnitTest; +import org.broadinstitute.sting.utils.codecs.vcf.VCFHeaderUnitTest; import org.broadinstitute.sting.utils.codecs.vcf.VCFUtils; import org.testng.annotations.Test; diff --git a/public/java/test/org/broadinstitute/sting/utils/codecs/bcf2/BCF2EncoderDecoderUnitTest.java b/public/java/test/org/broadinstitute/sting/utils/codecs/bcf2/BCF2EncoderDecoderUnitTest.java index 4615d3337..52cbd63e7 100644 --- a/public/java/test/org/broadinstitute/sting/utils/codecs/bcf2/BCF2EncoderDecoderUnitTest.java +++ b/public/java/test/org/broadinstitute/sting/utils/codecs/bcf2/BCF2EncoderDecoderUnitTest.java @@ -317,8 +317,8 @@ public class BCF2EncoderDecoderUnitTest extends BaseTest { @Test(dataProvider = "BestIntTypeTests") public void determineBestEncoding(final List ints, final BCF2Type expectedType) throws IOException { BCF2Encoder encoder = new BCF2Encoder(); - Assert.assertEquals(encoder.determineIntegerType(ints), expectedType); - Assert.assertEquals(encoder.determineIntegerType(ArrayUtils.toPrimitive(ints.toArray(new Integer[0]))), expectedType); + Assert.assertEquals(BCF2Utils.determineIntegerType(ints), expectedType); + Assert.assertEquals(BCF2Utils.determineIntegerType(ArrayUtils.toPrimitive(ints.toArray(new Integer[0]))), expectedType); } // ----------------------------------------------------------------- diff --git a/public/java/test/org/broadinstitute/sting/utils/genotype/vcf/VCFHeaderUnitTest.java b/public/java/test/org/broadinstitute/sting/utils/codecs/vcf/VCFHeaderUnitTest.java similarity index 99% rename from public/java/test/org/broadinstitute/sting/utils/genotype/vcf/VCFHeaderUnitTest.java rename to public/java/test/org/broadinstitute/sting/utils/codecs/vcf/VCFHeaderUnitTest.java index 7f1a90f0b..70460ae01 100644 --- a/public/java/test/org/broadinstitute/sting/utils/genotype/vcf/VCFHeaderUnitTest.java +++ b/public/java/test/org/broadinstitute/sting/utils/codecs/vcf/VCFHeaderUnitTest.java @@ -1,4 +1,4 @@ -package org.broadinstitute.sting.utils.genotype.vcf; +package org.broadinstitute.sting.utils.codecs.vcf; import org.broad.tribble.readers.AsciiLineReader; import org.broad.tribble.readers.PositionalBufferedStream; diff --git a/public/java/test/org/broadinstitute/sting/utils/genotype/vcf/VCFWriterUnitTest.java b/public/java/test/org/broadinstitute/sting/utils/variantcontext/writer/VCFWriterUnitTest.java similarity index 76% rename from public/java/test/org/broadinstitute/sting/utils/genotype/vcf/VCFWriterUnitTest.java rename to public/java/test/org/broadinstitute/sting/utils/variantcontext/writer/VCFWriterUnitTest.java index 08f5bb4d8..0a9eac792 100644 --- a/public/java/test/org/broadinstitute/sting/utils/genotype/vcf/VCFWriterUnitTest.java +++ b/public/java/test/org/broadinstitute/sting/utils/variantcontext/writer/VCFWriterUnitTest.java @@ -1,4 +1,4 @@ -package org.broadinstitute.sting.utils.genotype.vcf; +package org.broadinstitute.sting.utils.variantcontext.writer; import net.sf.picard.reference.IndexedFastaSequenceFile; import org.broad.tribble.AbstractFeatureReader; @@ -15,10 +15,12 @@ import org.broadinstitute.sting.utils.exceptions.ReviewedStingException; import org.broadinstitute.sting.utils.exceptions.UserException; import org.broadinstitute.sting.utils.fasta.CachingIndexedFastaSequenceFile; import org.broadinstitute.sting.utils.variantcontext.*; +import org.broadinstitute.sting.utils.variantcontext.writer.VCFWriter; import org.broadinstitute.sting.utils.variantcontext.writer.VariantContextWriter; import org.broadinstitute.sting.utils.variantcontext.writer.VariantContextWriterFactory; import org.testng.Assert; import org.testng.annotations.BeforeClass; +import org.testng.annotations.DataProvider; import org.testng.annotations.Test; import java.io.File; @@ -147,4 +149,37 @@ public class VCFWriterUnitTest extends BaseTest { } Assert.assertEquals(index, additionalColumns.size()); } + + @DataProvider(name = "VCFWriterDoubleFormatTestData") + public Object[][] makeVCFWriterDoubleFormatTestData() { + List tests = new ArrayList(); + tests.add(new Object[]{1.0, "1.00"}); + tests.add(new Object[]{10.1, "10.10"}); + tests.add(new Object[]{10.01, "10.01"}); + tests.add(new Object[]{10.012, "10.01"}); + tests.add(new Object[]{10.015, "10.02"}); + tests.add(new Object[]{0.0, "0.00"}); + tests.add(new Object[]{0.5, "0.50"}); + tests.add(new Object[]{0.55, "0.55"}); + tests.add(new Object[]{0.555, "0.56"}); + tests.add(new Object[]{0.1, "0.10"}); + tests.add(new Object[]{0.050, "0.050"}); + tests.add(new Object[]{0.010, "0.010"}); + tests.add(new Object[]{0.012, "0.012"}); + tests.add(new Object[]{0.0012, "1.200e-03"}); + tests.add(new Object[]{1.2e-4, "1.200e-04"}); + tests.add(new Object[]{1.21e-4, "1.210e-04"}); + tests.add(new Object[]{1.212e-5, "1.212e-05"}); + tests.add(new Object[]{1.2123e-6, "1.212e-06"}); + tests.add(new Object[]{Double.POSITIVE_INFINITY, "Infinity"}); + tests.add(new Object[]{Double.NEGATIVE_INFINITY, "-Infinity"}); + tests.add(new Object[]{Double.NaN, "NaN"}); + return tests.toArray(new Object[][]{}); + } + + @Test(dataProvider = "VCFWriterDoubleFormatTestData") + public void testVCFWriterDoubleFormatTestData(final double d, final String expected) { + Assert.assertEquals(VCFWriter.formatVCFDouble(d), expected, "Failed to pretty print double in VCFWriter"); + } } +