Fix for int overflow in RankSum calculation

This commit is contained in:
meganshand 2016-06-24 15:31:42 -04:00
parent b14312a14d
commit 556cc69185
6 changed files with 36 additions and 23 deletions

View File

@ -74,12 +74,12 @@ public class UnifiedGenotyperGeneralPloidySuite1IntegrationTest extends WalkerTe
@Test(enabled = true)
public void testBOTH_GGA_Pools() {
executor.PC_LSV_Test(String.format("-A AlleleCountBySample -maxAltAlleles 2 -ploidy 24 -gt_mode GENOTYPE_GIVEN_ALLELES -out_mode EMIT_ALL_SITES -alleles %s", LSV_ALLELES), "LSV_BOTH_GGA", "BOTH", "c3826794a250e32b0497353ceb1deb26");
executor.PC_LSV_Test(String.format("-A AlleleCountBySample -maxAltAlleles 2 -ploidy 24 -gt_mode GENOTYPE_GIVEN_ALLELES -out_mode EMIT_ALL_SITES -alleles %s", LSV_ALLELES), "LSV_BOTH_GGA", "BOTH", "ee5a2f8954f38d6e5d44fe50b22e43a1");
}
@Test(enabled = true)
public void testINDEL_GGA_Pools() {
executor.PC_LSV_Test(String.format("-A AlleleCountBySample -maxAltAlleles 1 -ploidy 24 -gt_mode GENOTYPE_GIVEN_ALLELES -out_mode EMIT_ALL_SITES -alleles %s", LSV_ALLELES), "LSV_INDEL_GGA", "INDEL", "4eb0d8018da6612cd434491f338ed5a4");
executor.PC_LSV_Test(String.format("-A AlleleCountBySample -maxAltAlleles 1 -ploidy 24 -gt_mode GENOTYPE_GIVEN_ALLELES -out_mode EMIT_ALL_SITES -alleles %s", LSV_ALLELES), "LSV_INDEL_GGA", "INDEL", "5cb3fe396302f3d4a4a9b7b3cc1877cc");
}
@Test(enabled = true)
@ -88,6 +88,6 @@ public class UnifiedGenotyperGeneralPloidySuite1IntegrationTest extends WalkerTe
//TODO the old MD5 is kept for the record.
//TODO this should be revisit once we get into addressing inaccuracies by the independent allele approach.
// executor.PC_LSV_Test_NoRef("-A AlleleCountBySample -maxAltAlleles 2 -ploidy 1", "LSV_INDEL_DISC_NOREF_p1", "INDEL", "b5ff7530827f4b9039a58bdc8a3560d2");
executor.PC_LSV_Test_NoRef("-A AlleleCountBySample -maxAltAlleles 2 -ploidy 1", "LSV_INDEL_DISC_NOREF_p1", "INDEL", "c2fb9b05027c2b0ac9e338d9ddda69b1");
executor.PC_LSV_Test_NoRef("-A AlleleCountBySample -maxAltAlleles 2 -ploidy 1", "LSV_INDEL_DISC_NOREF_p1", "INDEL", "b99416c04ba951577f43fd2d25f46388");
}
}

View File

@ -63,7 +63,7 @@ public class UnifiedGenotyperGeneralPloidySuite2IntegrationTest extends WalkerTe
@Test(enabled = true)
public void testINDEL_maxAltAlleles2_ploidy3_Pools_noRef() {
executor.PC_LSV_Test_NoRef("-A AlleleCountBySample -maxAltAlleles 2 -ploidy 3","LSV_INDEL_DISC_NOREF_p3","INDEL","e22846de4567f576e08e00edda2931d0");
executor.PC_LSV_Test_NoRef("-A AlleleCountBySample -maxAltAlleles 2 -ploidy 3","LSV_INDEL_DISC_NOREF_p3","INDEL","1d27eaa3557dc28c95b9024114d50ad1");
}
@Test(enabled = true)

View File

@ -102,7 +102,7 @@ public class UnifiedGenotyperNormalCallingIntegrationTest extends WalkerTest{
public void testMultipleSNPAlleles() {
WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(
"-T UnifiedGenotyper --contamination_fraction_to_filter 0.05 --disableDithering -R " + b37KGReference + " --no_cmdline_in_header -glm BOTH --dbsnp " + b37dbSNP129 + " -I " + privateTestDir + "multiallelic.snps.bam -o %s -L " + privateTestDir + "multiallelic.snps.intervals", 1,
Arrays.asList("94ca1e00d4fad9c5279271c2779ff797"));
Arrays.asList("25b710f830749448cd056c9b2e7798ff"));
executeTest("test Multiple SNP alleles", spec);
}
@ -126,7 +126,7 @@ public class UnifiedGenotyperNormalCallingIntegrationTest extends WalkerTest{
public void testMismatchedPLs() {
WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(
"-T UnifiedGenotyper --contamination_fraction_to_filter 0.05 --disableDithering -R " + b37KGReference + " --no_cmdline_in_header -glm INDEL -I " + privateTestDir + "mismatchedPLs.bam -o %s -L 1:24020341", 1,
Arrays.asList("c5aff2572ce09c413e7f5c9e1b3f92d6"));
Arrays.asList("1759c156bc45528504398a7ef4ce5bf8"));
executeTest("test mismatched PLs", spec);
}
}

View File

@ -397,7 +397,7 @@ public class HaplotypeCallerIntegrationTest extends WalkerTest {
public void testLackSensitivityDueToBadHaplotypeSelectionFix() {
final String commandLine = String.format("-T HaplotypeCaller -pairHMMSub %s %s -R %s -I %s -L %s --no_cmdline_in_header --maxNumHaplotypesInPopulation 16",
HMM_SUB_IMPLEMENTATION, ALWAYS_LOAD_VECTOR_HMM, b37KGReferenceWithDecoy, privateTestDir + "hc-lack-sensitivity.bam", privateTestDir + "hc-lack-sensitivity.interval_list");
final WalkerTestSpec spec = new WalkerTestSpec(commandLine + " -o %s", Arrays.asList("5514cfbcf12954bb12c725b77eaac248"));
final WalkerTestSpec spec = new WalkerTestSpec(commandLine + " -o %s", Arrays.asList("22f5a3e9366e611509f03c984f8b4960"));
spec.disableShadowBCF();
executeTest("testLackSensitivityDueToBadHaplotypeSelectionFix", spec);
}

View File

@ -280,23 +280,9 @@ public class MannWhitneyU {
RankedData ranked = calculateRank(series1, series2);
Rank[] ranks = ranked.getRank();
ArrayList<Integer> numOfTies = ranked.getNumOfTies();
int lengthOfRanks = ranks.length;
//Calculate number of ties transformed for formula for Sigma to calculate Z-score
ArrayList<Integer> transformedTies = new ArrayList<>();
for (int count : numOfTies) {
//If every single datapoint is tied then we want to return a p-value of .5 and
//the formula for sigma that includes the number of ties breaks down. Setting
//the number of ties to 0 in this case gets the desired result in the normal
//approximation case.
if (count != ranks.length) {
transformedTies.add((count * count * count) - count);
}
}
double numOfTiesForSigma = 0.0;
for (int count : transformedTies) {
numOfTiesForSigma += count;
}
double numOfTiesForSigma = transformTies(lengthOfRanks, numOfTies);
// Calculate R1 and R2 and U.
float r1 = 0, r2 = 0;
@ -314,6 +300,26 @@ public class MannWhitneyU {
return result;
}
public double transformTies(int numOfRanks, ArrayList<Integer> numOfTies) {
//Calculate number of ties transformed for formula for Sigma to calculate Z-score
ArrayList<Double> transformedTies = new ArrayList<>();
for (int count : numOfTies) {
//If every single datapoint is tied then we want to return a p-value of .5 and
//the formula for sigma that includes the number of ties breaks down. Setting
//the number of ties to 0 in this case gets the desired result in the normal
//approximation case.
if (count != numOfRanks) {
transformedTies.add((Math.pow(count, 3)) - count);
}
}
double numOfTiesForSigma = 0.0;
for (double count : transformedTies) {
numOfTiesForSigma += count;
}
return(numOfTiesForSigma);
}
/**
* Calculates the rank-sum test statisic U (sometimes W) from two sets of input data for a one-sided test
* with an int indicating which group is the dominator. Returns a test statistic object with trueU and number of

View File

@ -30,6 +30,7 @@ import org.testng.annotations.DataProvider;
import org.testng.annotations.Test;
import org.testng.Assert;
import java.util.ArrayList;
import java.util.Arrays;
/**
@ -139,4 +140,10 @@ public class MWUnitTest extends BaseTest {
System.out.println("1-side p: " + test.getP());
Assert.assertEquals(test.getZ(), Z, DELTA_PRECISION, name);
}
@Test
public void testTooManyTies(){
ArrayList<Integer> listOfNumberOfTies = new ArrayList<>(Arrays.asList(26,3,6,4,13,18,29,36,60,58,87,63,98,125,158,185,193,171,17592,115,100,141,216,298,451,719,1060,1909,3210,5167,7135,10125,11035,3541,732,9));
Assert.assertEquals(rst.transformTies(64890, listOfNumberOfTies), 8.41378729572e+12);
}
}