Fix for NaNs in the rank sum tests.

git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@4938 348d0f76-0448-11de-a6fe-93d51630548a
2011-01-05 15:21:30 +00:00 · 2011-01-05 15:21:30 +00:00 · 4ac0590744
parent 445ae06a7a
commit 4ac0590744
2 changed files with 8 additions and 2 deletions
--- a/java/src/org/broadinstitute/sting/gatk/walkers/annotator/RankSumTest.java
+++ b/java/src/org/broadinstitute/sting/gatk/walkers/annotator/RankSumTest.java
@ -72,7 +72,7 @@ public abstract class RankSumTest implements InfoFieldAnnotation, ExperimentalAn
        }

        final Map<String, Object> map = new HashMap<String, Object>();
-        map.put(getKeyNames().get(0), String.format("%.3f", QualityUtils.phredScaleErrorRate(pvalue)));
+        map.put(getKeyNames().get(0), String.format("%.3f", Math.abs(QualityUtils.phredScaleErrorRate(pvalue))));
        return map;
    }

--- a/java/src/org/broadinstitute/sting/utils/WilcoxonRankSum.java
+++ b/java/src/org/broadinstitute/sting/utils/WilcoxonRankSum.java
@ -124,8 +124,9 @@ public class WilcoxonRankSum {
    // calculate normal approximation of the p-value
    // returns -1 when unable to calculate it (too few data points)
    public double getPValue(WILCOXON_H0 h0) {
-        if ( observations.size() == 0 )
+        if ( observations.size() == 0 ) {
            return -1.0;
+        }

        // dither to break rank ties
        dither();
@ -144,6 +145,11 @@ public class WilcoxonRankSum {
        }
        int n2 = observations.size() - n1;

+        if ( n1 == 0 || n2 == 0 ) {
+            // one of the sets is empty so there is no information
+            return -1.0;
+        }
+
        // todo -- these are actually integers
        // we want the smaller of U1 and U2
        double U1 = sum - (n1 * (n1 + 1.0) / 2.0);