From cbd21c6339f8e65c1aad94a34175d9666b001cb7 Mon Sep 17 00:00:00 2001 From: Mauricio Carneiro Date: Thu, 29 Mar 2012 17:49:42 -0400 Subject: [PATCH] Nasty, nasty..... VariantEval is overly abusive of the GATKReport (lack of) spec. 1. It converts numeric values (longs, integers and doubles) to string before sending to the Report, then expects it to decipher that those were actually numbers. 2. Worse, the stratification modules somehow instead of sending the actual values to the report table, sends a string with the value "unknown" and then abuses the GATKReport spec to convert those "unknown" placeholder values with numbers. Then again, it expects the report to know those are numbers, not strings. Now that the GATKReport HAS specs, VariantEval needs to be overhauled to conform with that. In the meantime, I have added special ad-hoc treatment to these wrong contracts. It works, and the integration tests all passed without changing any MD5's, but right after Mark and Ryan commit their VariantEval refactors, I will step in to change the way it interacts with the GATKReport, so we can clean up the GATKReport. No wonder, the printing needed to be O(n^2). --- .../sting/gatk/report/GATKReportColumn.java | 5 +++-- .../sting/gatk/report/GATKReportTable.java | 4 ++++ .../stratifications/IntervalStratification.java | 9 ++++----- .../gatk/walkers/bqsr/BQSRGathererUnitTest.java | 16 ++++++++++++++++ .../varianteval/VariantEvalIntegrationTest.java | 2 +- 5 files changed, 28 insertions(+), 8 deletions(-) diff --git a/public/java/src/org/broadinstitute/sting/gatk/report/GATKReportColumn.java b/public/java/src/org/broadinstitute/sting/gatk/report/GATKReportColumn.java index 2db22679a..8b54442b0 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/report/GATKReportColumn.java +++ b/public/java/src/org/broadinstitute/sting/gatk/report/GATKReportColumn.java @@ -132,6 +132,7 @@ public class GATKReportColumn extends LinkedHashMap { private static final Collection RIGHT_ALIGN_STRINGS = Arrays.asList( "null", "NA", + "unknown", String.valueOf(Double.POSITIVE_INFINITY), String.valueOf(Double.NEGATIVE_INFINITY), String.valueOf(Double.NaN)); @@ -144,7 +145,7 @@ public class GATKReportColumn extends LinkedHashMap { * @return true if the value is a right alignable */ protected static boolean isRightAlign(String value) { - return value == null || RIGHT_ALIGN_STRINGS.contains(value) || NumberUtils.isNumber(value); + return value == null || RIGHT_ALIGN_STRINGS.contains(value) || NumberUtils.isNumber(value.trim()); } /** @@ -213,7 +214,7 @@ public class GATKReportColumn extends LinkedHashMap { public Object put(Object key, Object value) { if (value != null) { String formatted = formatValue(value); - if (!formatted.equals("")) { + if (!formatted.equals("") && !formatted.equals("unknown")) { updateMaxWidth(formatted); updateFormat(formatted); } diff --git a/public/java/src/org/broadinstitute/sting/gatk/report/GATKReportTable.java b/public/java/src/org/broadinstitute/sting/gatk/report/GATKReportTable.java index 44d70ac4b..58002bd14 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/report/GATKReportTable.java +++ b/public/java/src/org/broadinstitute/sting/gatk/report/GATKReportTable.java @@ -60,6 +60,8 @@ public class GATKReportTable { private static final String COULD_NOT_READ_DATA_LINE = "Could not read a data line of this table -- "; private static final String COULD_NOT_READ_EMPTY_LINE = "Could not read the last empty line of this table -- "; private static final String OLD_GATK_TABLE_VERSION = "We no longer support older versions of the GATK Tables"; + + private static final String NUMBER_CONVERSION_EXCEPTION = "String is a number but is not a long or a double: "; public GATKReportTable(BufferedReader reader, GATKReportVersion version) { int counter = 0; @@ -413,6 +415,8 @@ public class GATKReportTable { // This code below is bs. Why am do I have to conform to bad code // Below is some code to convert a string into its appropriate type. + + // I second Roger's rant! // If we got a string but the column is not a String type Object newValue = null; diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/stratifications/IntervalStratification.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/stratifications/IntervalStratification.java index d91422a7e..879e6066f 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/stratifications/IntervalStratification.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/stratifications/IntervalStratification.java @@ -26,18 +26,17 @@ package org.broadinstitute.sting.gatk.walkers.varianteval.stratifications; import net.sf.picard.util.IntervalTree; import org.apache.log4j.Logger; -import org.broad.tribble.Feature; import org.broadinstitute.sting.gatk.contexts.ReferenceContext; import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; -import org.broadinstitute.sting.gatk.walkers.annotator.SnpEff; import org.broadinstitute.sting.utils.GenomeLoc; -import org.broadinstitute.sting.utils.GenomeLocParser; -import org.broadinstitute.sting.utils.GenomeLocSortedSet; import org.broadinstitute.sting.utils.exceptions.UserException; import org.broadinstitute.sting.utils.interval.IntervalUtils; import org.broadinstitute.sting.utils.variantcontext.VariantContext; -import java.util.*; +import java.util.ArrayList; +import java.util.Arrays; +import java.util.List; +import java.util.Map; /** * Stratifies the variants by whether they overlap an interval in the set provided on the command line. diff --git a/public/java/test/org/broadinstitute/sting/gatk/walkers/bqsr/BQSRGathererUnitTest.java b/public/java/test/org/broadinstitute/sting/gatk/walkers/bqsr/BQSRGathererUnitTest.java index fe83dce22..dc6d1c512 100644 --- a/public/java/test/org/broadinstitute/sting/gatk/walkers/bqsr/BQSRGathererUnitTest.java +++ b/public/java/test/org/broadinstitute/sting/gatk/walkers/bqsr/BQSRGathererUnitTest.java @@ -2,10 +2,13 @@ package org.broadinstitute.sting.gatk.walkers.bqsr; import org.broadinstitute.sting.gatk.report.GATKReport; import org.broadinstitute.sting.gatk.report.GATKReportTable; +import org.broadinstitute.sting.utils.exceptions.ReviewedStingException; import org.testng.Assert; import org.testng.annotations.Test; import java.io.File; +import java.io.FileNotFoundException; +import java.io.PrintStream; import java.util.LinkedList; import java.util.List; @@ -18,6 +21,19 @@ public class BQSRGathererUnitTest { private static File recal = new File("public/testdata/exampleGRP.grp"); + @Test(enabled = true) + public void test(){ + PrintStream out; + try { + File f = new File("foo2.grp"); + out = new PrintStream(f); + } catch (FileNotFoundException e) { + throw new ReviewedStingException("f"); + } + GATKReport report = new GATKReport("foo.grp"); + report.print(out); + } + //todo -- this test doesnt work because the primary keys in different tables are not the same. Need to either implement "sort" for testing purposes on GATKReport or have a sophisticated comparison measure @Test(enabled = false) public void testCombineSimilarFiles() { diff --git a/public/java/test/org/broadinstitute/sting/gatk/walkers/varianteval/VariantEvalIntegrationTest.java b/public/java/test/org/broadinstitute/sting/gatk/walkers/varianteval/VariantEvalIntegrationTest.java index 610733d9c..128f11a31 100755 --- a/public/java/test/org/broadinstitute/sting/gatk/walkers/varianteval/VariantEvalIntegrationTest.java +++ b/public/java/test/org/broadinstitute/sting/gatk/walkers/varianteval/VariantEvalIntegrationTest.java @@ -439,7 +439,7 @@ public class VariantEvalIntegrationTest extends WalkerTest { buildCommandLine( "-T VariantEval", "-R " + b37KGReference, - "--dbsnp " + b37dbSNP132, + "--dbsnp " + "/humgen/gsa-hpprojects/GATK/data/Comparisons/Validated/dbSNP/dbsnp_132_b37.leftAligned.vcf", "--eval " + fundamentalTestSNPsVCF, "-noEV", "-EV CountVariants",