No more makePrecisionFormatStringFromDenominatorValue
-- As values in VCs are becoming their native Java types the VCFWriter needs to own proper float formating. -- Created a smart float formatter in VCFWriter, with unit tests -- Removed makePrecisionFormatStringFromDenominatorValue and its uses -- Fix broken contracted -- Refactored some code from the encoder to utils in BCF2 -- HaplotypeCaller's GenotypingEngine was using old version of subset to context. Replaced with a faster call that I think is correct. Ryan, please confirm.
This commit is contained in:
parent
a822087f11
commit
51a3b6e25e
|
|
@ -434,7 +434,7 @@ public final class BCF2Codec implements FeatureCodec<VariantContext>, ReferenceD
|
|||
final public int nGenotypeFields;
|
||||
final public byte[] bytes;
|
||||
|
||||
@Requires({"nGenotypeField > 0", "bytes != null"})
|
||||
@Requires({"nGenotypeFields > 0", "bytes != null"})
|
||||
public LazyData(final int nGenotypeFields, final byte[] bytes) {
|
||||
this.nGenotypeFields = nGenotypeFields;
|
||||
this.bytes = bytes;
|
||||
|
|
@ -446,7 +446,7 @@ public final class BCF2Codec implements FeatureCodec<VariantContext>, ReferenceD
|
|||
return getDictionaryString((Integer) decoder.decodeTypedValue());
|
||||
}
|
||||
|
||||
@Requires("offset >= dictionary.size()")
|
||||
@Requires("offset < dictionary.size()")
|
||||
@Ensures("result != null")
|
||||
protected final String getDictionaryString(final int offset) {
|
||||
return dictionary.get(offset);
|
||||
|
|
|
|||
|
|
@ -163,7 +163,7 @@ public final class BCF2Encoder {
|
|||
encodeStream.write(typeByte);
|
||||
if ( BCF2Utils.willOverflow(size) ) {
|
||||
// write in the overflow size
|
||||
encodeTyped(size, determineIntegerType(size));
|
||||
encodeTyped(size, BCF2Utils.determineIntegerType(size));
|
||||
}
|
||||
}
|
||||
|
||||
|
|
@ -181,42 +181,13 @@ public final class BCF2Encoder {
|
|||
//
|
||||
// --------------------------------------------------------------------------------
|
||||
|
||||
public final BCF2Type determineIntegerType(final int[] values) {
|
||||
// literally a copy of the code below, but there's no general way to unify lists and arrays in java
|
||||
BCF2Type maxType = BCF2Type.INT8;
|
||||
for ( final int value : values ) {
|
||||
final BCF2Type type1 = determineIntegerType(value);
|
||||
switch ( type1 ) {
|
||||
case INT8: break;
|
||||
case INT16: maxType = BCF2Type.INT16; break;
|
||||
case INT32: return BCF2Type.INT32; // fast path for largest possible value
|
||||
default: throw new ReviewedStingException("Unexpected integer type " + type1 );
|
||||
}
|
||||
}
|
||||
return maxType;
|
||||
}
|
||||
|
||||
public final BCF2Type determineIntegerType(final List<Integer> values) {
|
||||
BCF2Type maxType = BCF2Type.INT8;
|
||||
for ( final int value : values ) {
|
||||
final BCF2Type type1 = determineIntegerType(value);
|
||||
switch ( type1 ) {
|
||||
case INT8: break;
|
||||
case INT16: maxType = BCF2Type.INT16; break;
|
||||
case INT32: return BCF2Type.INT32; // fast path for largest possible value
|
||||
default: throw new ReviewedStingException("Unexpected integer type " + type1 );
|
||||
}
|
||||
}
|
||||
return maxType;
|
||||
}
|
||||
|
||||
public final BCF2Type determineIntegerType(final int value) {
|
||||
for ( final BCF2Type potentialType : BCF2Utils.INTEGER_TYPES_BY_SIZE ) {
|
||||
if ( potentialType.withinRange(value) )
|
||||
return potentialType;
|
||||
}
|
||||
|
||||
throw new ReviewedStingException("Integer cannot be encoded in allowable range of even INT32: " + value);
|
||||
public void encodeString(final String s, final int sizeToWrite) throws IOException {
|
||||
final byte[] bytes = s.getBytes();
|
||||
for ( int i = 0; i < sizeToWrite; i++ )
|
||||
if ( i < bytes.length )
|
||||
encodeRawChar(bytes[i]);
|
||||
else
|
||||
encodeRawMissingValue(BCF2Type.CHAR);
|
||||
}
|
||||
|
||||
/**
|
||||
|
|
@ -245,7 +216,7 @@ public final class BCF2Encoder {
|
|||
final Object toType = arg instanceof List ? ((List)arg).get(0) : arg;
|
||||
|
||||
if ( toType instanceof Integer )
|
||||
return determineIntegerType((Integer)toType);
|
||||
return BCF2Utils.determineIntegerType((Integer) toType);
|
||||
else if ( toType instanceof String )
|
||||
return BCF2Type.CHAR;
|
||||
else if ( toType instanceof Double )
|
||||
|
|
|
|||
|
|
@ -219,4 +219,42 @@ public final class BCF2Utils {
|
|||
else
|
||||
return new File( path + ".bcf" );
|
||||
}
|
||||
|
||||
public final static BCF2Type determineIntegerType(final int value) {
|
||||
for ( final BCF2Type potentialType : INTEGER_TYPES_BY_SIZE) {
|
||||
if ( potentialType.withinRange(value) )
|
||||
return potentialType;
|
||||
}
|
||||
|
||||
throw new ReviewedStingException("Integer cannot be encoded in allowable range of even INT32: " + value);
|
||||
}
|
||||
|
||||
public final static BCF2Type determineIntegerType(final int[] values) {
|
||||
// literally a copy of the code below, but there's no general way to unify lists and arrays in java
|
||||
BCF2Type maxType = BCF2Type.INT8;
|
||||
for ( final int value : values ) {
|
||||
final BCF2Type type1 = determineIntegerType(value);
|
||||
switch ( type1 ) {
|
||||
case INT8: break;
|
||||
case INT16: maxType = BCF2Type.INT16; break;
|
||||
case INT32: return BCF2Type.INT32; // fast path for largest possible value
|
||||
default: throw new ReviewedStingException("Unexpected integer type " + type1 );
|
||||
}
|
||||
}
|
||||
return maxType;
|
||||
}
|
||||
|
||||
public final static BCF2Type determineIntegerType(final List<Integer> values) {
|
||||
BCF2Type maxType = BCF2Type.INT8;
|
||||
for ( final int value : values ) {
|
||||
final BCF2Type type1 = determineIntegerType(value);
|
||||
switch ( type1 ) {
|
||||
case INT8: break;
|
||||
case INT16: maxType = BCF2Type.INT16; break;
|
||||
case INT32: return BCF2Type.INT32; // fast path for largest possible value
|
||||
default: throw new ReviewedStingException("Unexpected integer type " + type1 );
|
||||
}
|
||||
}
|
||||
return maxType;
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -114,7 +114,5 @@ public final class VCFConstants {
|
|||
public static final String EMPTY_GENOTYPE = "./.";
|
||||
public static final int MAX_GENOTYPE_QUAL = 99;
|
||||
|
||||
public static final String DOUBLE_PRECISION_FORMAT_STRING = "%.2f";
|
||||
public static final String DOUBLE_PRECISION_INT_SUFFIX = ".00";
|
||||
public static final Double VCF_ENCODING_EPSILON = 0.00005; // when we consider fields equal(), used in the Qual compare
|
||||
}
|
||||
|
|
@ -390,7 +390,6 @@ public final class GenotypeBuilder {
|
|||
*
|
||||
* @return
|
||||
*/
|
||||
@Requires("filters != null")
|
||||
public GenotypeBuilder unfiltered() {
|
||||
if ( extendedAttributes != null )
|
||||
extendedAttributes.remove(VCFConstants.GENOTYPE_FILTER_KEY);
|
||||
|
|
|
|||
|
|
@ -111,8 +111,7 @@ public class VariantContextUtils {
|
|||
if ( AN == 0 ) {
|
||||
alleleFreqs.add(0.0);
|
||||
} else {
|
||||
// todo -- this is a performance problem
|
||||
final Double freq = Double.valueOf(String.format(makePrecisionFormatStringFromDenominatorValue(totalFoundersChromosomes), ((double)foundersAltChromosomes / totalFoundersChromosomes)));
|
||||
final Double freq = (double)foundersAltChromosomes / totalFoundersChromosomes;
|
||||
alleleFreqs.add(freq);
|
||||
}
|
||||
}
|
||||
|
|
@ -155,17 +154,6 @@ public class VariantContextUtils {
|
|||
builder.attributes(calculateChromosomeCounts(vc, new HashMap<String, Object>(vc.getAttributes()), removeStaleValues, founderIds));
|
||||
}
|
||||
|
||||
public static String makePrecisionFormatStringFromDenominatorValue(double maxValue) {
|
||||
int precision = 1;
|
||||
|
||||
while ( maxValue > 1 ) {
|
||||
precision++;
|
||||
maxValue /= 10.0;
|
||||
}
|
||||
|
||||
return "%." + precision + "f";
|
||||
}
|
||||
|
||||
public static Genotype removePLs(Genotype g) {
|
||||
if ( g.hasLikelihoods() )
|
||||
return new GenotypeBuilder(g).noPL().make();
|
||||
|
|
|
|||
|
|
@ -209,7 +209,7 @@ class VCFWriter extends IndexingVariantContextWriter {
|
|||
if ( !vc.hasLog10PError() )
|
||||
mWriter.write(VCFConstants.MISSING_VALUE_v4);
|
||||
else
|
||||
mWriter.write(getQualValue(vc.getPhredScaledQual()));
|
||||
mWriter.write(formatQualValue(vc.getPhredScaledQual()));
|
||||
mWriter.write(VCFConstants.FIELD_SEPARATOR);
|
||||
|
||||
// FILTER
|
||||
|
|
@ -277,10 +277,13 @@ class VCFWriter extends IndexingVariantContextWriter {
|
|||
return vc.isFiltered() ? ParsingUtils.join(";", ParsingUtils.sortList(vc.getFilters())) : (forcePASS || vc.filtersWereApplied() ? VCFConstants.PASSES_FILTERS_v4 : VCFConstants.UNFILTERED);
|
||||
}
|
||||
|
||||
private String getQualValue(double qual) {
|
||||
String s = String.format(VCFConstants.DOUBLE_PRECISION_FORMAT_STRING, qual);
|
||||
if ( s.endsWith(VCFConstants.DOUBLE_PRECISION_INT_SUFFIX) )
|
||||
s = s.substring(0, s.length() - VCFConstants.DOUBLE_PRECISION_INT_SUFFIX.length());
|
||||
private static final String QUAL_FORMAT_STRING = "%.2f";
|
||||
private static final String QUAL_FORMAT_EXTENSION_TO_TRIM = ".00";
|
||||
|
||||
private String formatQualValue(double qual) {
|
||||
String s = String.format(QUAL_FORMAT_STRING, qual);
|
||||
if ( s.endsWith(QUAL_FORMAT_EXTENSION_TO_TRIM) )
|
||||
s = s.substring(0, s.length() - QUAL_FORMAT_EXTENSION_TO_TRIM.length());
|
||||
return s;
|
||||
}
|
||||
|
||||
|
|
@ -431,12 +434,39 @@ class VCFWriter extends IndexingVariantContextWriter {
|
|||
mWriter.write(encoding);
|
||||
}
|
||||
|
||||
/**
|
||||
* Takes a double value and pretty prints it to a String for display
|
||||
*
|
||||
* Large doubles => gets %.2f style formatting
|
||||
* Doubles < 1 / 10 but > 1/100 </>=> get %.3f style formatting
|
||||
* Double < 1/100 => %.3e formatting
|
||||
* @param d
|
||||
* @return
|
||||
*/
|
||||
public static final String formatVCFDouble(final double d) {
|
||||
String format = "%.2f";
|
||||
if ( d < 0.1 ) {
|
||||
if ( d < 0.01 ) {
|
||||
if ( Math.abs(d) >= 1e-20 )
|
||||
format = "%.3e";
|
||||
else {
|
||||
// return a zero format
|
||||
return "0.00";
|
||||
}
|
||||
} else {
|
||||
format = "%.3f";
|
||||
}
|
||||
}
|
||||
|
||||
return String.format(format, d);
|
||||
}
|
||||
|
||||
public static String formatVCFField(Object val) {
|
||||
String result;
|
||||
if ( val == null )
|
||||
result = VCFConstants.MISSING_VALUE_v4;
|
||||
else if ( val instanceof Double )
|
||||
result = String.format(VCFConstants.DOUBLE_PRECISION_FORMAT_STRING, (Double)val);
|
||||
result = formatVCFDouble((Double) val);
|
||||
else if ( val instanceof Boolean )
|
||||
result = (Boolean)val ? "" : null; // empty string for true, null for false
|
||||
else if ( val instanceof List ) {
|
||||
|
|
|
|||
|
|
@ -6,7 +6,7 @@ import org.broadinstitute.sting.utils.codecs.vcf.VCFCodec;
|
|||
import org.broadinstitute.sting.utils.codecs.vcf.VCFHeader;
|
||||
import org.broadinstitute.sting.utils.codecs.vcf.VCFHeaderLine;
|
||||
import org.testng.Assert;
|
||||
import org.broadinstitute.sting.utils.genotype.vcf.VCFHeaderUnitTest;
|
||||
import org.broadinstitute.sting.utils.codecs.vcf.VCFHeaderUnitTest;
|
||||
import org.broadinstitute.sting.utils.codecs.vcf.VCFUtils;
|
||||
|
||||
import org.testng.annotations.Test;
|
||||
|
|
|
|||
|
|
@ -317,8 +317,8 @@ public class BCF2EncoderDecoderUnitTest extends BaseTest {
|
|||
@Test(dataProvider = "BestIntTypeTests")
|
||||
public void determineBestEncoding(final List<Integer> ints, final BCF2Type expectedType) throws IOException {
|
||||
BCF2Encoder encoder = new BCF2Encoder();
|
||||
Assert.assertEquals(encoder.determineIntegerType(ints), expectedType);
|
||||
Assert.assertEquals(encoder.determineIntegerType(ArrayUtils.toPrimitive(ints.toArray(new Integer[0]))), expectedType);
|
||||
Assert.assertEquals(BCF2Utils.determineIntegerType(ints), expectedType);
|
||||
Assert.assertEquals(BCF2Utils.determineIntegerType(ArrayUtils.toPrimitive(ints.toArray(new Integer[0]))), expectedType);
|
||||
}
|
||||
|
||||
// -----------------------------------------------------------------
|
||||
|
|
|
|||
|
|
@ -1,4 +1,4 @@
|
|||
package org.broadinstitute.sting.utils.genotype.vcf;
|
||||
package org.broadinstitute.sting.utils.codecs.vcf;
|
||||
|
||||
import org.broad.tribble.readers.AsciiLineReader;
|
||||
import org.broad.tribble.readers.PositionalBufferedStream;
|
||||
|
|
@ -1,4 +1,4 @@
|
|||
package org.broadinstitute.sting.utils.genotype.vcf;
|
||||
package org.broadinstitute.sting.utils.variantcontext.writer;
|
||||
|
||||
import net.sf.picard.reference.IndexedFastaSequenceFile;
|
||||
import org.broad.tribble.AbstractFeatureReader;
|
||||
|
|
@ -15,10 +15,12 @@ import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
|
|||
import org.broadinstitute.sting.utils.exceptions.UserException;
|
||||
import org.broadinstitute.sting.utils.fasta.CachingIndexedFastaSequenceFile;
|
||||
import org.broadinstitute.sting.utils.variantcontext.*;
|
||||
import org.broadinstitute.sting.utils.variantcontext.writer.VCFWriter;
|
||||
import org.broadinstitute.sting.utils.variantcontext.writer.VariantContextWriter;
|
||||
import org.broadinstitute.sting.utils.variantcontext.writer.VariantContextWriterFactory;
|
||||
import org.testng.Assert;
|
||||
import org.testng.annotations.BeforeClass;
|
||||
import org.testng.annotations.DataProvider;
|
||||
import org.testng.annotations.Test;
|
||||
|
||||
import java.io.File;
|
||||
|
|
@ -147,4 +149,37 @@ public class VCFWriterUnitTest extends BaseTest {
|
|||
}
|
||||
Assert.assertEquals(index, additionalColumns.size());
|
||||
}
|
||||
|
||||
@DataProvider(name = "VCFWriterDoubleFormatTestData")
|
||||
public Object[][] makeVCFWriterDoubleFormatTestData() {
|
||||
List<Object[]> tests = new ArrayList<Object[]>();
|
||||
tests.add(new Object[]{1.0, "1.00"});
|
||||
tests.add(new Object[]{10.1, "10.10"});
|
||||
tests.add(new Object[]{10.01, "10.01"});
|
||||
tests.add(new Object[]{10.012, "10.01"});
|
||||
tests.add(new Object[]{10.015, "10.02"});
|
||||
tests.add(new Object[]{0.0, "0.00"});
|
||||
tests.add(new Object[]{0.5, "0.50"});
|
||||
tests.add(new Object[]{0.55, "0.55"});
|
||||
tests.add(new Object[]{0.555, "0.56"});
|
||||
tests.add(new Object[]{0.1, "0.10"});
|
||||
tests.add(new Object[]{0.050, "0.050"});
|
||||
tests.add(new Object[]{0.010, "0.010"});
|
||||
tests.add(new Object[]{0.012, "0.012"});
|
||||
tests.add(new Object[]{0.0012, "1.200e-03"});
|
||||
tests.add(new Object[]{1.2e-4, "1.200e-04"});
|
||||
tests.add(new Object[]{1.21e-4, "1.210e-04"});
|
||||
tests.add(new Object[]{1.212e-5, "1.212e-05"});
|
||||
tests.add(new Object[]{1.2123e-6, "1.212e-06"});
|
||||
tests.add(new Object[]{Double.POSITIVE_INFINITY, "Infinity"});
|
||||
tests.add(new Object[]{Double.NEGATIVE_INFINITY, "-Infinity"});
|
||||
tests.add(new Object[]{Double.NaN, "NaN"});
|
||||
return tests.toArray(new Object[][]{});
|
||||
}
|
||||
|
||||
@Test(dataProvider = "VCFWriterDoubleFormatTestData")
|
||||
public void testVCFWriterDoubleFormatTestData(final double d, final String expected) {
|
||||
Assert.assertEquals(VCFWriter.formatVCFDouble(d), expected, "Failed to pretty print double in VCFWriter");
|
||||
}
|
||||
}
|
||||
|
||||
Loading…
Reference in New Issue