Contractified version of MannWhitneyU. Some behavior has been changed:
- Running a test when there are no observations of at least one of the sets now breaks the MWU contract
+ MWU returns Pair(Double.NaN,Double.NaN) in these instances to maintain the contract of never returning null
+ No more Double.Infinity values will appear
- RankSumTests now probe the return values for NaNs, and don't annotate if they appear
- For small sets where the probability is calculated recursively, the z-value is now the inversion of the error function
and not the approximate z-value
- UG and Annotator integration tests updated to reflect changes
git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@5845 348d0f76-0448-11de-a6fe-93d51630548a
This commit is contained in:
parent
b814f4bbd6
commit
480859db50
|
|
@ -52,7 +52,8 @@ public abstract class RankSumTest implements InfoFieldAnnotation, StandardAnnota
|
|||
final Pair<Double,Double> testResults = mannWhitneyU.runOneSidedTest( MannWhitneyU.USet.SET1 );
|
||||
|
||||
final Map<String, Object> map = new HashMap<String, Object>();
|
||||
map.put(getKeyNames().get(0), String.format("%.3f", testResults.first));
|
||||
if ( ! Double.isNaN(testResults.first) )
|
||||
map.put(getKeyNames().get(0), String.format("%.3f", testResults.first));
|
||||
return map;
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -2,6 +2,11 @@ package org.broadinstitute.sting.utils;
|
|||
|
||||
import cern.jet.math.Arithmetic;
|
||||
import cern.jet.random.Normal;
|
||||
import com.google.java.contract.Ensures;
|
||||
import com.google.java.contract.Requires;
|
||||
import org.apache.commons.math.MathException;
|
||||
import org.apache.commons.math.distribution.NormalDistribution;
|
||||
import org.apache.commons.math.distribution.NormalDistributionImpl;
|
||||
import org.broadinstitute.sting.gatk.GenomeAnalysisEngine;
|
||||
import org.broadinstitute.sting.utils.collections.Pair;
|
||||
import org.broadinstitute.sting.utils.exceptions.StingException;
|
||||
|
|
@ -16,6 +21,7 @@ import java.util.TreeSet;
|
|||
public class MannWhitneyU {
|
||||
|
||||
private static Normal STANDARD_NORMAL = new Normal(0.0,1.0,null);
|
||||
private static NormalDistribution APACHE_NORMAL = new NormalDistributionImpl(0.0,1.0,1e-2);
|
||||
|
||||
private TreeSet<Pair<Number,USet>> observations;
|
||||
private int sizeSet1;
|
||||
|
|
@ -58,26 +64,37 @@ public class MannWhitneyU {
|
|||
* @param lessThanOther - either Set1 or Set2
|
||||
* @return - u-based z-approximation, and p-value associated with the test (p-value is exact for small n,m)
|
||||
*/
|
||||
@Requires({"validateObservations(observations)", "lessThanOther != null"})
|
||||
@Ensures({"result != null", "! Double.isInfinite(result.getFirst())", "! Double.isInfinite(result.getSecond())"})
|
||||
public Pair<Double,Double> runOneSidedTest(USet lessThanOther) {
|
||||
long u = calculateOneSidedU(observations, lessThanOther);
|
||||
int n = lessThanOther == USet.SET1 ? sizeSet1 : sizeSet2;
|
||||
int m = lessThanOther == USet.SET1 ? sizeSet2 : sizeSet1;
|
||||
double pval = calculateP(n,m,u,false);
|
||||
return new Pair<Double,Double>(getZApprox(n,m,u),pval);
|
||||
if ( n == 0 || m == 0 ) {
|
||||
// test is uninformative as one or both sets have no observations
|
||||
return new Pair<Double,Double>(Double.NaN,Double.NaN);
|
||||
}
|
||||
|
||||
return calculateP(n, m, u, false);
|
||||
}
|
||||
|
||||
/**
|
||||
* Runs the standard two-sided test,
|
||||
* returns the u-based z-approximate and p values.
|
||||
* @Returns a pair holding the u and p-value.
|
||||
* @return a pair holding the u and p-value.
|
||||
*/
|
||||
@Ensures({"result != null", "! Double.isInfinite(result.getFirst())", "! Double.isInfinite(result.getSecond())"})
|
||||
@Requires({"validateObservations(observations)"})
|
||||
public Pair<Double,Double> runTwoSidedTest() {
|
||||
Pair<Long,USet> uPair = calculateTwoSidedU(observations);
|
||||
long u = uPair.first;
|
||||
int n = uPair.second == USet.SET1 ? sizeSet1 : sizeSet2;
|
||||
int m = uPair.second == USet.SET1 ? sizeSet2 : sizeSet1;
|
||||
double pval = calculateP(n,m,u,true);
|
||||
return new Pair<Double,Double>(getZApprox(n,m,u),pval);
|
||||
if ( n == 0 || m == 0 ) {
|
||||
// test is uninformative as one or both sets have no observations
|
||||
return new Pair<Double,Double>(Double.NaN,Double.NaN);
|
||||
}
|
||||
return calculateP(n, m, u, true);
|
||||
}
|
||||
|
||||
/**
|
||||
|
|
@ -86,35 +103,35 @@ public class MannWhitneyU {
|
|||
* @param m - The number of entries in the stochastically larger (dominated) set
|
||||
* @param u - the Mann-Whitney U value
|
||||
* @param twoSided - is the test twosided
|
||||
* @return the (possibly approximate) p-value associated with the MWU test
|
||||
* @return the (possibly approximate) p-value associated with the MWU test, and the (possibly approximate) z-value associated with it
|
||||
* todo -- there must be an approximation for small m and large n
|
||||
*/
|
||||
public static double calculateP(int n, int m, long u, boolean twoSided) {
|
||||
double pval;
|
||||
if ( m == 0 || n == 0 ) {
|
||||
pval = 1.0;
|
||||
} else if ( n > 8 && m > 8 ) {
|
||||
@Requires({"m > 0","n > 0"})
|
||||
@Ensures({"result != null", "! Double.isInfinite(result.getFirst())", "! Double.isInfinite(result.getSecond())"})
|
||||
public static Pair<Double,Double> calculateP(int n, int m, long u, boolean twoSided) {
|
||||
Pair<Double,Double> zandP;
|
||||
if ( n > 8 && m > 8 ) {
|
||||
// large m and n - normal approx
|
||||
pval = calculatePNormalApproximation(n,m,u);
|
||||
zandP = calculatePNormalApproximation(n,m,u, twoSided);
|
||||
} else if ( n > 5 && m > 7 ) {
|
||||
// large m, small n - sum uniform approx
|
||||
// todo -- find the appropriate regimes where this approximation is actually better
|
||||
// todo -- find the appropriate regimes where this approximation is actually better enough to merit slowness
|
||||
// pval = calculatePUniformApproximation(n,m,u);
|
||||
pval = calculatePNormalApproximation(n,m,u);
|
||||
zandP = calculatePNormalApproximation(n, m, u, twoSided);
|
||||
} else if ( n > 8 || m > 8 ) {
|
||||
pval = calculatePFromTable(n,m,u);
|
||||
zandP = calculatePFromTable(n, m, u, twoSided);
|
||||
} else {
|
||||
// small m and n - full approx
|
||||
pval = calculatePRecursively(n,m,u);
|
||||
zandP = calculatePRecursively(n,m,u, twoSided);
|
||||
}
|
||||
|
||||
return twoSided ? 2*pval : pval;
|
||||
return zandP;
|
||||
}
|
||||
|
||||
public static double calculatePFromTable(int n, int m, long u) {
|
||||
public static Pair<Double,Double> calculatePFromTable(int n, int m, long u, boolean twoSided) {
|
||||
// todo -- actually use a table for:
|
||||
// todo - n large, m small
|
||||
return calculatePNormalApproximation(n,m,u);
|
||||
return calculatePNormalApproximation(n,m,u, twoSided);
|
||||
}
|
||||
|
||||
/**
|
||||
|
|
@ -122,11 +139,18 @@ public class MannWhitneyU {
|
|||
* @param n - The number of entries in the stochastically smaller (dominant) set
|
||||
* @param m - The number of entries in the stochastically larger (dominated) set
|
||||
* @param u - the Mann-Whitney U value
|
||||
* @param twoSided - whether the test should be two sided
|
||||
* @return p-value associated with the normal approximation
|
||||
*/
|
||||
public static double calculatePNormalApproximation(int n,int m,long u) {
|
||||
@Requires({"m > 0","n > 0"})
|
||||
@Ensures({"result != null", "! Double.isInfinite(result.getFirst())", "! Double.isInfinite(result.getSecond())"})
|
||||
public static Pair<Double,Double> calculatePNormalApproximation(int n,int m,long u, boolean twoSided) {
|
||||
double z = getZApprox(n,m,u);
|
||||
return z < 0 ? STANDARD_NORMAL.cdf(z) : 1.0-STANDARD_NORMAL.cdf(z);
|
||||
if ( twoSided ) {
|
||||
return new Pair<Double,Double>(z,2.0*(z < 0 ? STANDARD_NORMAL.cdf(z) : 1.0-STANDARD_NORMAL.cdf(z)));
|
||||
} else {
|
||||
return new Pair<Double,Double>(z,STANDARD_NORMAL.cdf(z));
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
|
|
@ -134,8 +158,10 @@ public class MannWhitneyU {
|
|||
* @param n - The number of entries in the stochastically smaller (dominant) set
|
||||
* @param m - The number of entries in the stochastically larger (dominated) set
|
||||
* @param u - the Mann-Whitney U value
|
||||
* @return z-score associated with the normal approximation
|
||||
* @return the asymptotic z-approximation corresponding to the MWU p-value for n < m
|
||||
*/
|
||||
@Requires({"m > 0","n > 0"})
|
||||
@Ensures({"result != null", "! Double.isInfinite(result)"})
|
||||
private static double getZApprox(int n, int m, long u) {
|
||||
double mean = ( ((long)m)*n+1.0)/2;
|
||||
double var = (((long) n)*m*(n+m+1.0))/12;
|
||||
|
|
@ -150,6 +176,8 @@ public class MannWhitneyU {
|
|||
* @param m - The number of entries in the stochastically larger (dominated) set
|
||||
* @param u - mann-whitney u value
|
||||
* @return p-value according to sum of uniform approx
|
||||
* todo -- this is currently not called due to not having a good characterization of where it is significantly more accurate than the
|
||||
* todo -- normal approxmation (e.g. enough to merit the runtime hit)
|
||||
*/
|
||||
public static double calculatePUniformApproximation(int n, int m, long u) {
|
||||
long R = u + (n*(n+1))/2;
|
||||
|
|
@ -190,6 +218,8 @@ public class MannWhitneyU {
|
|||
* @param observed - the observed data
|
||||
* @return the minimum of the U counts (set1 dominates 2, set 2 dominates 1)
|
||||
*/
|
||||
@Requires({"observed != null", "observed.size() > 0", "validateObservations(observed)"})
|
||||
@Ensures({"result != null","result.first > 0"})
|
||||
public static Pair<Long,USet> calculateTwoSidedU(TreeSet<Pair<Number,USet>> observed) {
|
||||
int set1SeenSoFar = 0;
|
||||
int set2SeenSoFar = 0;
|
||||
|
|
@ -225,6 +255,8 @@ public class MannWhitneyU {
|
|||
* @param dominator - the set that is hypothesized to be stochastically dominating
|
||||
* @return the u-statistic associated with the hypothesis
|
||||
*/
|
||||
@Requires({"observed != null","dominator != null","observed.size() > 0","validateObservations(observed)"})
|
||||
@Ensures({"return > 0"})
|
||||
public static long calculateOneSidedU(TreeSet<Pair<Number,USet>> observed,USet dominator) {
|
||||
long otherBeforeDominator = 0l;
|
||||
int otherSeenSoFar = 0;
|
||||
|
|
@ -246,15 +278,31 @@ public class MannWhitneyU {
|
|||
* @param n: number of set-one entries (hypothesis: set one is stochastically less than set two)
|
||||
* @param m: number of set-two entries
|
||||
* @param u: number of set-two entries that precede set-one entries (e.g. 0,1,0,1,0 -> 3 )
|
||||
* @param twoSided: whether the test is two sided or not. The recursive formula is symmetric, multiply by two for two-sidedness.
|
||||
* @return the probability under the hypothesis that all sequences are equally likely of finding a set-two entry preceding a set-one entry "u" times.
|
||||
*/
|
||||
public static double calculatePRecursively(int n, int m, long u) {
|
||||
@Requires({"m > 0","n > 0","u > 0"})
|
||||
@Ensures({"result != null","! Double.isInfinite(result.getFirst())", "! Double.isInfinite(result.getSecond())"})
|
||||
public static Pair<Double,Double> calculatePRecursively(int n, int m, long u, boolean twoSided) {
|
||||
if ( m > 8 && n > 5 ) { throw new StingException(String.format("Please use the appropriate (normal or sum of uniform) approximation. Values n: %d, m: %d",n,m)); }
|
||||
return cpr(n,m,u);
|
||||
double p = cpr(n,m,u);
|
||||
double z;
|
||||
try {
|
||||
z = APACHE_NORMAL.inverseCumulativeProbability(p);
|
||||
} catch (MathException me) {
|
||||
throw new StingException("A math exception occurred in inverting the probability",me);
|
||||
}
|
||||
|
||||
return new Pair<Double,Double>(z,(twoSided ? 2.0*p : p));
|
||||
}
|
||||
|
||||
/**
|
||||
* Hook into CPR with sufficient warning (for testing purposes)
|
||||
* calls into that recursive calculation.
|
||||
* @param n: number of set-one entries (hypothesis: set one is stochastically less than set two)
|
||||
* @param m: number of set-two entries
|
||||
* @param u: number of set-two entries that precede set-one entries (e.g. 0,1,0,1,0 -> 3 )
|
||||
* @return same as cpr
|
||||
* @deprecated - for testing only (really)
|
||||
*/
|
||||
@Deprecated
|
||||
|
|
@ -263,10 +311,11 @@ public class MannWhitneyU {
|
|||
}
|
||||
|
||||
/**
|
||||
* @doc: just a shorter name for calculatePRecursively. See Mann, Whitney, [1947]
|
||||
* @n: number of set-1 entries
|
||||
* @m: number of set-2 entries
|
||||
* @u: number of times a set-2 entry as preceded a set-1 entry
|
||||
* : just a shorter name for calculatePRecursively. See Mann, Whitney, [1947]
|
||||
* @param n: number of set-1 entries
|
||||
* @param m: number of set-2 entries
|
||||
* @param u: number of times a set-2 entry as preceded a set-1 entry
|
||||
* @return recursive p-value
|
||||
*/
|
||||
private static double cpr(int n, int m, long u) {
|
||||
if ( u < 0 || n == 0 && m == 0 ) {
|
||||
|
|
@ -301,6 +350,27 @@ public class MannWhitneyU {
|
|||
return new Pair<Integer,Integer>(sizeSet1,sizeSet2);
|
||||
}
|
||||
|
||||
protected static boolean validateObservations(TreeSet<Pair<Number,USet>> tree) {
|
||||
boolean seen1 = false;
|
||||
boolean seen2 = false;
|
||||
boolean seenInvalid = false;
|
||||
for ( Pair<Number,USet> p : tree) {
|
||||
if ( ! seen1 && p.getSecond() == USet.SET1 ) {
|
||||
seen1 = true;
|
||||
}
|
||||
|
||||
if ( ! seen2 && p.getSecond() == USet.SET2 ) {
|
||||
seen2 = true;
|
||||
}
|
||||
|
||||
if ( Double.isNaN(p.getFirst().doubleValue()) || Double.isInfinite(p.getFirst().doubleValue())) {
|
||||
seenInvalid = true;
|
||||
}
|
||||
|
||||
return ! seenInvalid && seen1 && seen2;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* A comparator class which uses dithering on tie-breaking to ensure that the internal treeset drops no values
|
||||
* and to ensure that rank ties are broken at random.
|
||||
|
|
|
|||
|
|
@ -39,7 +39,7 @@ public class VariantAnnotatorIntegrationTest extends WalkerTest {
|
|||
public void testHasAnnotsAsking2() {
|
||||
WalkerTestSpec spec = new WalkerTestSpec(
|
||||
baseTestString() + " -G \"Standard\" -B:variant,VCF3 " + validationDataLocation + "vcfexample3.vcf -I " + validationDataLocation + "NA12878.1kg.p2.chr1_10mb_11_mb.SLX.bam -L 1:10,000,000-10,050,000", 1,
|
||||
Arrays.asList("4fb98442f2c09247fab296872abb5655"));
|
||||
Arrays.asList("f559fa1754f82a00454894ecd518a3b0"));
|
||||
executeTest("test file has annotations, asking for annotations, #2", spec);
|
||||
}
|
||||
|
||||
|
|
@ -71,7 +71,7 @@ public class VariantAnnotatorIntegrationTest extends WalkerTest {
|
|||
public void testNoAnnotsAsking2() {
|
||||
WalkerTestSpec spec = new WalkerTestSpec(
|
||||
baseTestString() + " -G \"Standard\" -B:variant,VCF3 " + validationDataLocation + "vcfexample3empty.vcf -I " + validationDataLocation + "NA12878.1kg.p2.chr1_10mb_11_mb.SLX.bam -L 1:10,000,000-10,050,000", 1,
|
||||
Arrays.asList("c92e614ec796a5b39c5fdcc426410a9b"));
|
||||
Arrays.asList("709a1f482cce68992c637da3cff824a8 "));
|
||||
executeTest("test file doesn't have annotations, asking for annotations, #2", spec);
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -28,13 +28,13 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest {
|
|||
public void testMultiSamplePilot1() {
|
||||
WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(
|
||||
baseCommand + " -I " + validationDataLocation + "low_coverage_CEU.chr1.10k-11k.bam -o %s -L 1:10,022,000-10,025,000", 1,
|
||||
Arrays.asList("a1add49e71306d72252ee3af3c40d2da"));
|
||||
Arrays.asList("14a3814607d8ff18ef40f49c454739f2"));
|
||||
executeTest("test MultiSample Pilot1", spec);
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testMultiSamplePilot2AndRecallingWithAlleles() {
|
||||
String md5 = "93d2571e686740c5c775b1fb862b62ec";
|
||||
String md5 = "6aa91d95c360693e5c5f459c132bda32";
|
||||
|
||||
WalkerTest.WalkerTestSpec spec1 = new WalkerTest.WalkerTestSpec(
|
||||
baseCommand + " -I " + validationDataLocation + "pilot2_daughters.chr20.10k-11k.bam -o %s -L 20:10,000,000-10,050,000", 1,
|
||||
|
|
@ -58,7 +58,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest {
|
|||
|
||||
WalkerTest.WalkerTestSpec spec2 = new WalkerTest.WalkerTestSpec(
|
||||
baseCommand + " --output_mode EMIT_ALL_SITES --genotyping_mode GENOTYPE_GIVEN_ALLELES -B:alleles,vcf " + validationDataLocation + "allelesForUG.vcf -I " + validationDataLocation + "pilot2_daughters.chr20.10k-11k.bam -o %s -L 20:10,000,000-10,025,000", 1,
|
||||
Arrays.asList("0d15caf026c74bf859900e717b7fe996"));
|
||||
Arrays.asList("8af463870c0f66cd3fccc5734ef86cb0"));
|
||||
executeTest("test MultiSample Pilot2 with alleles passed in", spec2);
|
||||
}
|
||||
|
||||
|
|
@ -66,7 +66,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest {
|
|||
public void testSingleSamplePilot2() {
|
||||
WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(
|
||||
baseCommand + " -I " + validationDataLocation + "NA12878.1kg.p2.chr1_10mb_11_mb.SLX.bam -o %s -L 1:10,000,000-10,100,000", 1,
|
||||
Arrays.asList("66e3fc85a6037a24bc2b4896f79ea263"));
|
||||
Arrays.asList("6276a53a4dbf87c59dc84b356cca332f"));
|
||||
executeTest("test SingleSample Pilot2", spec);
|
||||
}
|
||||
|
||||
|
|
@ -76,7 +76,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest {
|
|||
//
|
||||
// --------------------------------------------------------------------------------------------------------------
|
||||
|
||||
private final static String COMPRESSED_OUTPUT_MD5 = "64fd80527aa1b00fbbb4a77f1d5df48c";
|
||||
private final static String COMPRESSED_OUTPUT_MD5 = "af45d5a45ad2f030d777926717fc9049";
|
||||
|
||||
@Test
|
||||
public void testCompressedOutput() {
|
||||
|
|
@ -137,9 +137,9 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest {
|
|||
@Test
|
||||
public void testCallingParameters() {
|
||||
HashMap<String, String> e = new HashMap<String, String>();
|
||||
e.put( "--min_base_quality_score 26", "f91280e3c12646207b3d06f9a563699b" );
|
||||
e.put( "--min_mapping_quality_score 26", "6f9e99469ec5f5ef333a5075e8149674" );
|
||||
e.put( "--p_nonref_model GRID_SEARCH", "430cfe737affe6b179725e1eb0b95655" );
|
||||
e.put( "--min_base_quality_score 26", "93ad1ed265be26e0c1fe05975d01edd1" );
|
||||
e.put( "--min_mapping_quality_score 26", "01882d04939a5a6085fc5cadec019d3e" );
|
||||
e.put( "--p_nonref_model GRID_SEARCH", "7b0b7f536c66afe006174ff9a094dd95" );
|
||||
|
||||
for ( Map.Entry<String, String> entry : e.entrySet() ) {
|
||||
WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(
|
||||
|
|
@ -152,9 +152,9 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest {
|
|||
@Test
|
||||
public void testOutputParameter() {
|
||||
HashMap<String, String> e = new HashMap<String, String>();
|
||||
e.put( "-sites_only", "81e01f685e98d8f18fa288beb1246881" );
|
||||
e.put( "--output_mode EMIT_ALL_CONFIDENT_SITES", "3916157145aea448a38e4f12a0c381ae" );
|
||||
e.put( "--output_mode EMIT_ALL_SITES", "7652e2362e7fc038bc0f41a03df5d9f4" );
|
||||
e.put( "-sites_only", "919d4b034ef0e07c8d50ea05b8fa0725" );
|
||||
e.put( "--output_mode EMIT_ALL_CONFIDENT_SITES", "a9bf545b6078eb772b9d01db4eaa4deb" );
|
||||
e.put( "--output_mode EMIT_ALL_SITES", "ab32c0f15401809d861cd8a0edb6f8ff" );
|
||||
|
||||
for ( Map.Entry<String, String> entry : e.entrySet() ) {
|
||||
WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(
|
||||
|
|
@ -168,12 +168,12 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest {
|
|||
public void testConfidence() {
|
||||
WalkerTest.WalkerTestSpec spec1 = new WalkerTest.WalkerTestSpec(
|
||||
baseCommand + " -I " + validationDataLocation + "NA12878.1kg.p2.chr1_10mb_11_mb.SLX.bam -o %s -L 1:10,000,000-10,010,000 -stand_call_conf 10 ", 1,
|
||||
Arrays.asList("6f9e99469ec5f5ef333a5075e8149674"));
|
||||
Arrays.asList("01882d04939a5a6085fc5cadec019d3e"));
|
||||
executeTest("test confidence 1", spec1);
|
||||
|
||||
WalkerTest.WalkerTestSpec spec2 = new WalkerTest.WalkerTestSpec(
|
||||
baseCommand + " -I " + validationDataLocation + "NA12878.1kg.p2.chr1_10mb_11_mb.SLX.bam -o %s -L 1:10,000,000-10,010,000 -stand_emit_conf 10 ", 1,
|
||||
Arrays.asList("c2020de023bb3d731e17ed97aaa85c3d"));
|
||||
Arrays.asList("00807e8a5bc9172c34a5b4bd2e8afba7"));
|
||||
executeTest("test confidence 2", spec2);
|
||||
}
|
||||
|
||||
|
|
@ -185,8 +185,8 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest {
|
|||
@Test
|
||||
public void testHeterozyosity() {
|
||||
HashMap<Double, String> e = new HashMap<Double, String>();
|
||||
e.put( 0.01, "a56d3790bb2e18cec5a3099079431f19" );
|
||||
e.put( 1.0 / 1850, "119dff78b9f3b415ef3ed9d1bc46dd04" );
|
||||
e.put( 0.01, "7db98e357bcebec5d362aacb0b176ca8" );
|
||||
e.put( 1.0 / 1850, "f3a25877f5ec5e0fa436a1d29b6a9b04" );
|
||||
|
||||
for ( Map.Entry<Double, String> entry : e.entrySet() ) {
|
||||
WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(
|
||||
|
|
@ -210,7 +210,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest {
|
|||
" -o %s" +
|
||||
" -L 1:10,000,000-10,100,000",
|
||||
1,
|
||||
Arrays.asList("cd84c86ed02e4b052b96f76270ffd7fc"));
|
||||
Arrays.asList("b58d99ce5a71ed1dbd351cb509a83ac3"));
|
||||
|
||||
executeTest(String.format("test multiple technologies"), spec);
|
||||
}
|
||||
|
|
@ -229,7 +229,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest {
|
|||
" -L 1:10,000,000-10,100,000" +
|
||||
" -baq CALCULATE_AS_NECESSARY",
|
||||
1,
|
||||
Arrays.asList("9b71db6bebce80022c8fde5db09de65f"));
|
||||
Arrays.asList("032e33e33ede77d87c9442fb9895a2f8"));
|
||||
|
||||
executeTest(String.format("test calling with BAQ"), spec);
|
||||
}
|
||||
|
|
|
|||
|
|
@ -1,5 +1,7 @@
|
|||
package org.broadinstitute.sting.oneoffprojects.walkers;
|
||||
|
||||
import net.sf.samtools.SAMRecord;
|
||||
import org.broadinstitute.sting.oneoffprojects.walkers.newassociation.features.ReadFeatureAggregator;
|
||||
import org.broadinstitute.sting.utils.MannWhitneyU;
|
||||
import org.broadinstitute.sting.BaseTest;
|
||||
import org.broadinstitute.sting.utils.collections.Pair;
|
||||
|
|
@ -7,10 +9,6 @@ import org.broadinstitute.sting.utils.collections.Pair;
|
|||
import org.testng.annotations.BeforeClass;
|
||||
import org.testng.annotations.Test;
|
||||
import org.testng.Assert;
|
||||
import java.util.Arrays;
|
||||
import java.util.Collection;
|
||||
import java.util.HashMap;
|
||||
import java.util.Map;
|
||||
|
||||
/**
|
||||
* Created by IntelliJ IDEA.
|
||||
|
|
@ -56,14 +54,15 @@ public class RFAUnitTest extends BaseTest {
|
|||
Assert.assertEquals(MannWhitneyU.calculateOneSidedU(mwu2.getObservations(),MannWhitneyU.USet.SET2),30l);
|
||||
Pair<Integer,Integer> sizes = mwu2.getSetSizes();
|
||||
Assert.assertEquals(MannWhitneyU.calculatePUniformApproximation(sizes.first,sizes.second,10l),0.4180519701814064,1e-14);
|
||||
Assert.assertEquals(MannWhitneyU.calculatePRecursively(sizes.first,sizes.second,10l),0.021756021756021756,1e-14);
|
||||
Assert.assertEquals(MannWhitneyU.calculatePNormalApproximation(sizes.first,sizes.second,10l),0.06214143703127617,1e-14);
|
||||
Assert.assertEquals(MannWhitneyU.calculatePRecursively(sizes.first,sizes.second,10l,false).second,0.021756021756021756,1e-14);
|
||||
Assert.assertEquals(MannWhitneyU.calculatePNormalApproximation(sizes.first,sizes.second,10l,false).second,0.06214143703127617,1e-14);
|
||||
logger.warn("Testing two-sided");
|
||||
Assert.assertEquals((double)mwu2.runTwoSidedTest().second,2*0.021756021756021756,1e-8);
|
||||
|
||||
// tests using the hypothesis that set 1 dominates set 2 (U value = 30) -- empirical should be identical, normall approx close, uniform way off
|
||||
Assert.assertEquals(MannWhitneyU.calculatePNormalApproximation(sizes.second,sizes.first,30l),0.08216463976903321,1e-14);
|
||||
Assert.assertEquals(MannWhitneyU.calculatePNormalApproximation(sizes.second,sizes.first,30l,true).second,2.0*0.08216463976903321,1e-14);
|
||||
Assert.assertEquals(MannWhitneyU.calculatePUniformApproximation(sizes.second,sizes.first,30l),0.0023473625009328147,1e-14);
|
||||
Assert.assertEquals(MannWhitneyU.calculatePRecursively(sizes.second,sizes.first,30l),0.021756021756021756,1e-14); // note -- exactly same value as above
|
||||
Assert.assertEquals(MannWhitneyU.calculatePRecursively(sizes.second,sizes.first,30l,false).second,0.021756021756021756,1e-14); // note -- exactly same value as above
|
||||
|
||||
logger.warn("Set 1");
|
||||
Assert.assertEquals((double)mwu2.runOneSidedTest(MannWhitneyU.USet.SET1).second,0.021756021756021756,1e-8);
|
||||
|
|
@ -80,7 +79,7 @@ public class RFAUnitTest extends BaseTest {
|
|||
long u = MannWhitneyU.calculateOneSidedU(mwu3.getObservations(),MannWhitneyU.USet.SET1);
|
||||
Pair<Integer,Integer> nums = mwu3.getSetSizes();
|
||||
Assert.assertEquals(MannWhitneyU.calculatePRecursivelyDoNotCheckValuesEvenThoughItIsSlow(nums.first,nums.second,u),3.665689149560116E-4,1e-14);
|
||||
Assert.assertEquals(MannWhitneyU.calculatePNormalApproximation(nums.first,nums.second,u),0.0032240865760884696,1e-14);
|
||||
Assert.assertEquals(MannWhitneyU.calculatePNormalApproximation(nums.first,nums.second,u,false).second,0.0032240865760884696,1e-14);
|
||||
Assert.assertEquals(MannWhitneyU.calculatePUniformApproximation(nums.first,nums.second,u),0.0026195003025784036,1e-14);
|
||||
|
||||
}
|
||||
|
|
|
|||
Loading…
Reference in New Issue