From 63cf7ec7ec1e6b50666c1a72cbc82387bfd62628 Mon Sep 17 00:00:00 2001 From: Roger Zurawicki Date: Tue, 27 Mar 2012 16:22:21 -0400 Subject: [PATCH 1/3] Added more primitives to GATK Report Column Type - The Integer column type now accepts byte and shorts - Updated Unit Tests and added a new testParse() test Signed-off-by: Mauricio Carneiro --- .../sting/gatk/report/GATKReportDataType.java | 46 +++++++++---------- .../sting/gatk/report/GATKReportTable.java | 13 ++---- .../sting/gatk/report/GATKReportUnitTest.java | 33 ++++++------- .../sting/queue/pipeline/PipelineTest.scala | 2 +- 4 files changed, 46 insertions(+), 48 deletions(-) diff --git a/public/java/src/org/broadinstitute/sting/gatk/report/GATKReportDataType.java b/public/java/src/org/broadinstitute/sting/gatk/report/GATKReportDataType.java index d9bae19c7..6451c5836 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/report/GATKReportDataType.java +++ b/public/java/src/org/broadinstitute/sting/gatk/report/GATKReportDataType.java @@ -48,9 +48,9 @@ public enum GATKReportDataType { Boolean("%[Bb]"), /** - * Used for byte and char value. Will display as a char so use printable values! + * Used for char values. Will display as a char so use printable values! */ - Byte("%[Cc]"), + Character("%[Cc]"), /** * Used for float and double values. Will output a decimal with format %.8f unless otherwise specified. @@ -58,7 +58,7 @@ public enum GATKReportDataType { Decimal("%.*[EeFf]"), /** - * Used for int, and long values. Will display the full number by default. + * Used for int, byte, short, and long values. Will display the full number by default. */ Integer("%[Dd]"), @@ -97,17 +97,26 @@ public enum GATKReportDataType { GATKReportDataType value; if (object instanceof Boolean) { value = GATKReportDataType.Boolean; - } else if (object instanceof Byte || object instanceof Character) { - value = GATKReportDataType.Byte; - } else if (object instanceof Float || object instanceof Double) { + + } else if (object instanceof Character) { + value = GATKReportDataType.Character; + + } else if (object instanceof Float || + object instanceof Double) { value = GATKReportDataType.Decimal; - } else if (object instanceof Integer || object instanceof Long) { + + } else if (object instanceof Integer || + object instanceof Long || + object instanceof Short || + object instanceof Byte ) { value = GATKReportDataType.Integer; + } else if (object instanceof String) { value = GATKReportDataType.String; + } else { value = GATKReportDataType.Unknown; - //throw new ReviewedStingException("GATKReport could not convert the data object into a GATKReportDataType. Acceptable data objects are found in the documentation."); + //throw new UserException("GATKReport could not convert the data object into a GATKReportDataType. Acceptable data objects are found in the documentation."); } return value; } @@ -140,8 +149,8 @@ public enum GATKReportDataType { return 0.0D; case Boolean: return false; - case Byte: - return (byte) 0; + case Character: + return '0'; case Integer: return 0L; case String: @@ -166,16 +175,7 @@ public enum GATKReportDataType { case Boolean: case Integer: return a.toString().equals(b.toString()); - case Byte: - // A mess that checks if the bytes and characters contain the same value - if ((a instanceof Character && b instanceof Character) || - (a instanceof Byte && b instanceof Byte)) - return a.toString().equals(b.toString()); - else if (a instanceof Character && b instanceof Byte) { - return ((Character) a).charValue() == ((Byte) b).byteValue(); - } else if (a instanceof Byte && b instanceof Character) { - return ((Byte) a).byteValue() == ((Character) b).charValue(); - } + case Character: case String: default: return a.equals(b); @@ -201,8 +201,8 @@ public enum GATKReportDataType { return Long.parseLong(str); case String: return str; - case Byte: - return (byte) str.toCharArray()[0]; + case Character: + return str.toCharArray()[0]; default: return str; } @@ -225,7 +225,7 @@ public enum GATKReportDataType { return "%d"; case String: return "%s"; - case Byte: + case Character: return "%c"; case Null: default: diff --git a/public/java/src/org/broadinstitute/sting/gatk/report/GATKReportTable.java b/public/java/src/org/broadinstitute/sting/gatk/report/GATKReportTable.java index e0e3ad1fc..1fe67154e 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/report/GATKReportTable.java +++ b/public/java/src/org/broadinstitute/sting/gatk/report/GATKReportTable.java @@ -254,7 +254,7 @@ public class GATKReportTable { * @param dottedColumnValues Period concatenated values. * @return The first primary key matching the column values or throws an exception. */ - public Object getPrimaryKey(String dottedColumnValues) { + public Object getPrimaryKeyByData(String dottedColumnValues) { Object key = findPrimaryKey(dottedColumnValues); if (key == null) throw new ReviewedStingException("Attempted to get non-existent GATKReportTable key for values: " + dottedColumnValues); @@ -411,9 +411,8 @@ public class GATKReportTable { if (value == null) value = "null"; - // This code is bs. Why am do I have to conform to bad code - // Below is some ode to convert a string into its appropriate type. - // This is just Roger ranting + // This code below is bs. Why am do I have to conform to bad code + // Below is some code to convert a string into its appropriate type. // If we got a string but the column is not a String type Object newValue = null; @@ -431,7 +430,7 @@ public class GATKReportTable { } catch (Exception e) { } } - if (column.getDataType().equals(GATKReportDataType.Byte) && ((String) value).length() == 1) { + if (column.getDataType().equals(GATKReportDataType.Character) && ((String) value).length() == 1) { newValue = ((String) value).charAt(0); } @@ -816,7 +815,7 @@ public class GATKReportTable { out.println(); } - out.println(); + out.println(); } public int getNumRows() { @@ -877,8 +876,6 @@ public class GATKReportTable { this.set(rowKey, columnKey, toAdd.get(rowKey)); //System.out.printf("Putting row with PK: %s \n", rowKey); } else { - - // TODO we should be able to handle combining data by adding, averaging, etc. this.set(rowKey, columnKey, toAdd.get(rowKey)); System.out.printf("OVERWRITING Row with PK: %s \n", rowKey); diff --git a/public/java/test/org/broadinstitute/sting/gatk/report/GATKReportUnitTest.java b/public/java/test/org/broadinstitute/sting/gatk/report/GATKReportUnitTest.java index 90c92189e..ec0db12d3 100644 --- a/public/java/test/org/broadinstitute/sting/gatk/report/GATKReportUnitTest.java +++ b/public/java/test/org/broadinstitute/sting/gatk/report/GATKReportUnitTest.java @@ -34,21 +34,22 @@ import java.io.IOException; import java.io.PrintStream; public class GATKReportUnitTest extends BaseTest { - @Test(enabled = false) + @Test public void testParse() throws Exception { String reportPath = validationDataLocation + "exampleGATKReportv1.tbl"; GATKReport report = new GATKReport(reportPath); + Assert.assertEquals(report.getVersion(), GATKReportVersion.V1_0); + Assert.assertEquals(report.getTables().size(), 5); GATKReportTable countVariants = report.getTable("CountVariants"); - //Assert.assertEquals(countVariants.getVersion(), GATKReportVersion.V0_1); - Object countVariantsPK = countVariants.getPrimaryKey("none.eval.none.all"); - Assert.assertEquals(countVariants.get(countVariantsPK, "nProcessedLoci"), "100000"); - Assert.assertEquals(countVariants.get(countVariantsPK, "nNoCalls"), "99872"); + Object countVariantsPK = countVariants.getPrimaryKeyByData("dbsnp.eval.none.all"); + Assert.assertEquals(countVariants.get(countVariantsPK, "nProcessedLoci"), "63025520"); + Assert.assertEquals(countVariants.get(countVariantsPK, "nNoCalls"), "0"); + Assert.assertEquals(countVariants.get(countVariantsPK, "heterozygosity"), 4.73e-06); GATKReportTable validationReport = report.getTable("ValidationReport"); - //Assert.assertEquals(validationReport.getVersion(), GATKReportVersion.V0_1); - Object validationReportPK = countVariants.getPrimaryKey("none.eval.none.known"); - Assert.assertEquals(validationReport.get(validationReportPK, "sensitivity"), "NaN"); + Object validationReportPK = countVariants.getPrimaryKeyByData("dbsnp.eval.none.novel"); + Assert.assertEquals(validationReport.get(validationReportPK, "PPV"), Double.NaN); } @DataProvider(name = "rightAlignValues") @@ -117,15 +118,15 @@ public class GATKReportUnitTest extends BaseTest { report1.addTable("TableName", "Description"); report1.getTable("TableName").addPrimaryKey("id", displayPK); report1.getTable("TableName").addColumn("colA", GATKReportDataType.String.getDefaultValue(), "%s"); - report1.getTable("TableName").addColumn("colB", GATKReportDataType.Byte.getDefaultValue(), "%c"); + report1.getTable("TableName").addColumn("colB", GATKReportDataType.Character.getDefaultValue(), "%c"); report1.getTable("TableName").set(1, "colA", "NotNum"); - report1.getTable("TableName").set(1, "colB", (byte) 64); + report1.getTable("TableName").set(1, "colB", (char) 64); report2 = new GATKReport(); report2.addTable("TableName", "Description"); report2.getTable("TableName").addPrimaryKey("id", displayPK); report2.getTable("TableName").addColumn("colA", GATKReportDataType.String.getDefaultValue(), "%s"); - report2.getTable("TableName").addColumn("colB", GATKReportDataType.Byte.getDefaultValue(), "%c"); + report2.getTable("TableName").addColumn("colB", GATKReportDataType.Character.getDefaultValue(), "%c"); report2.getTable("TableName").set(2, "colA", "df3"); report2.getTable("TableName").set(2, "colB", 'A'); @@ -133,7 +134,7 @@ public class GATKReportUnitTest extends BaseTest { report3.addTable("TableName", "Description"); report3.getTable("TableName").addPrimaryKey("id", displayPK); report3.getTable("TableName").addColumn("colA", GATKReportDataType.String.getDefaultValue(), "%s"); - report3.getTable("TableName").addColumn("colB", GATKReportDataType.Byte.getDefaultValue(), "%c"); + report3.getTable("TableName").addColumn("colB", GATKReportDataType.Character.getDefaultValue(), "%c"); report3.getTable("TableName").set(3, "colA", "df5f"); report3.getTable("TableName").set(3, "colB", 'c'); @@ -146,13 +147,13 @@ public class GATKReportUnitTest extends BaseTest { table.addColumn("SomeInt", GATKReportDataType.Integer.getDefaultValue(), true, "%d"); table.addColumn("SomeFloat", GATKReportDataType.Decimal.getDefaultValue(), true, "%.16E"); table.addColumn("TrueFalse", false, true, "%B"); - table.set("12df", "SomeInt", 34); + table.set("12df", "SomeInt", Byte.MAX_VALUE); table.set("12df", "SomeFloat", 34.0); table.set("12df", "TrueFalse", true); - table.set("5f", "SomeInt", -1); - table.set("5f", "SomeFloat", 0.000003); + table.set("5f", "SomeInt", Short.MAX_VALUE); + table.set("5f", "SomeFloat", Double.MAX_VALUE); table.set("5f", "TrueFalse", false); - table.set("RZ", "SomeInt", 904948230958203958L); + table.set("RZ", "SomeInt", Long.MAX_VALUE); table.set("RZ", "SomeFloat", 535646345.657453464576); table.set("RZ", "TrueFalse", true); diff --git a/public/scala/test/org/broadinstitute/sting/queue/pipeline/PipelineTest.scala b/public/scala/test/org/broadinstitute/sting/queue/pipeline/PipelineTest.scala index f0feb207b..22f4f6225 100644 --- a/public/scala/test/org/broadinstitute/sting/queue/pipeline/PipelineTest.scala +++ b/public/scala/test/org/broadinstitute/sting/queue/pipeline/PipelineTest.scala @@ -136,7 +136,7 @@ object PipelineTest extends BaseTest with Logging { println(" value (min,target,max) table key metric") for (validation <- evalSpec.validations) { val table = report.getTable(validation.table) - val key = table.getPrimaryKey(validation.key) + val key = table.getPrimaryKeyByData(validation.key) val value = String.valueOf(table.get(key, validation.metric)) val inRange = if (value == null) false else validation.inRange(value) val flag = if (!inRange) "*" else " " From bb36cd4adfe0110d393e7c890c38877805930aa4 Mon Sep 17 00:00:00 2001 From: Mauricio Carneiro Date: Tue, 27 Mar 2012 22:51:21 -0400 Subject: [PATCH 2/3] Quick fixes to BQSRGatherer and GATKReportTable * when gathering, be aware that some keys will be missing from some tables. * when a gatktable has no elements, it should still output the header so we know it had no records --- .../org/broadinstitute/sting/gatk/report/GATKReport.java | 7 ++----- .../sting/gatk/walkers/bqsr/RecalibrationReport.java | 7 +++++-- 2 files changed, 7 insertions(+), 7 deletions(-) diff --git a/public/java/src/org/broadinstitute/sting/gatk/report/GATKReport.java b/public/java/src/org/broadinstitute/sting/gatk/report/GATKReport.java index 8fbfa96e9..f2291e5ec 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/report/GATKReport.java +++ b/public/java/src/org/broadinstitute/sting/gatk/report/GATKReport.java @@ -177,11 +177,8 @@ public class GATKReport { */ public void print(PrintStream out) { out.println(GATKREPORT_HEADER_PREFIX + getVersion().toString() + SEPARATOR + getTables().size()); - for (GATKReportTable table : tables.values()) { - if (table.getNumRows() > 0) { - table.write(out); - } - } + for (GATKReportTable table : tables.values()) + table.write(out); } public Collection getTables() { diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/bqsr/RecalibrationReport.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/bqsr/RecalibrationReport.java index ce00240b8..897e1645d 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/bqsr/RecalibrationReport.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/bqsr/RecalibrationReport.java @@ -94,8 +94,11 @@ public class RecalibrationReport { BitSet key = entry.getKey(); RecalDatum otherDatum = entry.getValue(); RecalDatum thisDatum = thisTable.get(key); - thisDatum.increment(otherDatum); // add the two datum objects into 'this' - thisDatum.resetCalculatedQualities(); // reset the empirical quality to make sure the user doesn't forget to recalculate it + if (thisDatum == null) + thisDatum = otherDatum; // sometimes the datum in other won't be present in 'this'. So just assign it! + else + thisDatum.increment(otherDatum); // add the two datum objects into 'this' + thisDatum.resetCalculatedQualities(); // reset the empirical quality to make sure the user doesn't forget to recalculate it } } }