Fix for int overflow in RankSum calculation
This commit is contained in:
parent
b14312a14d
commit
556cc69185
|
|
@ -74,12 +74,12 @@ public class UnifiedGenotyperGeneralPloidySuite1IntegrationTest extends WalkerTe
|
|||
|
||||
@Test(enabled = true)
|
||||
public void testBOTH_GGA_Pools() {
|
||||
executor.PC_LSV_Test(String.format("-A AlleleCountBySample -maxAltAlleles 2 -ploidy 24 -gt_mode GENOTYPE_GIVEN_ALLELES -out_mode EMIT_ALL_SITES -alleles %s", LSV_ALLELES), "LSV_BOTH_GGA", "BOTH", "c3826794a250e32b0497353ceb1deb26");
|
||||
executor.PC_LSV_Test(String.format("-A AlleleCountBySample -maxAltAlleles 2 -ploidy 24 -gt_mode GENOTYPE_GIVEN_ALLELES -out_mode EMIT_ALL_SITES -alleles %s", LSV_ALLELES), "LSV_BOTH_GGA", "BOTH", "ee5a2f8954f38d6e5d44fe50b22e43a1");
|
||||
}
|
||||
|
||||
@Test(enabled = true)
|
||||
public void testINDEL_GGA_Pools() {
|
||||
executor.PC_LSV_Test(String.format("-A AlleleCountBySample -maxAltAlleles 1 -ploidy 24 -gt_mode GENOTYPE_GIVEN_ALLELES -out_mode EMIT_ALL_SITES -alleles %s", LSV_ALLELES), "LSV_INDEL_GGA", "INDEL", "4eb0d8018da6612cd434491f338ed5a4");
|
||||
executor.PC_LSV_Test(String.format("-A AlleleCountBySample -maxAltAlleles 1 -ploidy 24 -gt_mode GENOTYPE_GIVEN_ALLELES -out_mode EMIT_ALL_SITES -alleles %s", LSV_ALLELES), "LSV_INDEL_GGA", "INDEL", "5cb3fe396302f3d4a4a9b7b3cc1877cc");
|
||||
}
|
||||
|
||||
@Test(enabled = true)
|
||||
|
|
@ -88,6 +88,6 @@ public class UnifiedGenotyperGeneralPloidySuite1IntegrationTest extends WalkerTe
|
|||
//TODO the old MD5 is kept for the record.
|
||||
//TODO this should be revisit once we get into addressing inaccuracies by the independent allele approach.
|
||||
// executor.PC_LSV_Test_NoRef("-A AlleleCountBySample -maxAltAlleles 2 -ploidy 1", "LSV_INDEL_DISC_NOREF_p1", "INDEL", "b5ff7530827f4b9039a58bdc8a3560d2");
|
||||
executor.PC_LSV_Test_NoRef("-A AlleleCountBySample -maxAltAlleles 2 -ploidy 1", "LSV_INDEL_DISC_NOREF_p1", "INDEL", "c2fb9b05027c2b0ac9e338d9ddda69b1");
|
||||
executor.PC_LSV_Test_NoRef("-A AlleleCountBySample -maxAltAlleles 2 -ploidy 1", "LSV_INDEL_DISC_NOREF_p1", "INDEL", "b99416c04ba951577f43fd2d25f46388");
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -63,7 +63,7 @@ public class UnifiedGenotyperGeneralPloidySuite2IntegrationTest extends WalkerTe
|
|||
|
||||
@Test(enabled = true)
|
||||
public void testINDEL_maxAltAlleles2_ploidy3_Pools_noRef() {
|
||||
executor.PC_LSV_Test_NoRef("-A AlleleCountBySample -maxAltAlleles 2 -ploidy 3","LSV_INDEL_DISC_NOREF_p3","INDEL","e22846de4567f576e08e00edda2931d0");
|
||||
executor.PC_LSV_Test_NoRef("-A AlleleCountBySample -maxAltAlleles 2 -ploidy 3","LSV_INDEL_DISC_NOREF_p3","INDEL","1d27eaa3557dc28c95b9024114d50ad1");
|
||||
}
|
||||
|
||||
@Test(enabled = true)
|
||||
|
|
|
|||
|
|
@ -102,7 +102,7 @@ public class UnifiedGenotyperNormalCallingIntegrationTest extends WalkerTest{
|
|||
public void testMultipleSNPAlleles() {
|
||||
WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(
|
||||
"-T UnifiedGenotyper --contamination_fraction_to_filter 0.05 --disableDithering -R " + b37KGReference + " --no_cmdline_in_header -glm BOTH --dbsnp " + b37dbSNP129 + " -I " + privateTestDir + "multiallelic.snps.bam -o %s -L " + privateTestDir + "multiallelic.snps.intervals", 1,
|
||||
Arrays.asList("94ca1e00d4fad9c5279271c2779ff797"));
|
||||
Arrays.asList("25b710f830749448cd056c9b2e7798ff"));
|
||||
executeTest("test Multiple SNP alleles", spec);
|
||||
}
|
||||
|
||||
|
|
@ -126,7 +126,7 @@ public class UnifiedGenotyperNormalCallingIntegrationTest extends WalkerTest{
|
|||
public void testMismatchedPLs() {
|
||||
WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(
|
||||
"-T UnifiedGenotyper --contamination_fraction_to_filter 0.05 --disableDithering -R " + b37KGReference + " --no_cmdline_in_header -glm INDEL -I " + privateTestDir + "mismatchedPLs.bam -o %s -L 1:24020341", 1,
|
||||
Arrays.asList("c5aff2572ce09c413e7f5c9e1b3f92d6"));
|
||||
Arrays.asList("1759c156bc45528504398a7ef4ce5bf8"));
|
||||
executeTest("test mismatched PLs", spec);
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -397,7 +397,7 @@ public class HaplotypeCallerIntegrationTest extends WalkerTest {
|
|||
public void testLackSensitivityDueToBadHaplotypeSelectionFix() {
|
||||
final String commandLine = String.format("-T HaplotypeCaller -pairHMMSub %s %s -R %s -I %s -L %s --no_cmdline_in_header --maxNumHaplotypesInPopulation 16",
|
||||
HMM_SUB_IMPLEMENTATION, ALWAYS_LOAD_VECTOR_HMM, b37KGReferenceWithDecoy, privateTestDir + "hc-lack-sensitivity.bam", privateTestDir + "hc-lack-sensitivity.interval_list");
|
||||
final WalkerTestSpec spec = new WalkerTestSpec(commandLine + " -o %s", Arrays.asList("5514cfbcf12954bb12c725b77eaac248"));
|
||||
final WalkerTestSpec spec = new WalkerTestSpec(commandLine + " -o %s", Arrays.asList("22f5a3e9366e611509f03c984f8b4960"));
|
||||
spec.disableShadowBCF();
|
||||
executeTest("testLackSensitivityDueToBadHaplotypeSelectionFix", spec);
|
||||
}
|
||||
|
|
|
|||
|
|
@ -280,23 +280,9 @@ public class MannWhitneyU {
|
|||
RankedData ranked = calculateRank(series1, series2);
|
||||
Rank[] ranks = ranked.getRank();
|
||||
ArrayList<Integer> numOfTies = ranked.getNumOfTies();
|
||||
int lengthOfRanks = ranks.length;
|
||||
|
||||
//Calculate number of ties transformed for formula for Sigma to calculate Z-score
|
||||
ArrayList<Integer> transformedTies = new ArrayList<>();
|
||||
for (int count : numOfTies) {
|
||||
//If every single datapoint is tied then we want to return a p-value of .5 and
|
||||
//the formula for sigma that includes the number of ties breaks down. Setting
|
||||
//the number of ties to 0 in this case gets the desired result in the normal
|
||||
//approximation case.
|
||||
if (count != ranks.length) {
|
||||
transformedTies.add((count * count * count) - count);
|
||||
}
|
||||
}
|
||||
|
||||
double numOfTiesForSigma = 0.0;
|
||||
for (int count : transformedTies) {
|
||||
numOfTiesForSigma += count;
|
||||
}
|
||||
double numOfTiesForSigma = transformTies(lengthOfRanks, numOfTies);
|
||||
|
||||
// Calculate R1 and R2 and U.
|
||||
float r1 = 0, r2 = 0;
|
||||
|
|
@ -314,6 +300,26 @@ public class MannWhitneyU {
|
|||
return result;
|
||||
}
|
||||
|
||||
public double transformTies(int numOfRanks, ArrayList<Integer> numOfTies) {
|
||||
//Calculate number of ties transformed for formula for Sigma to calculate Z-score
|
||||
ArrayList<Double> transformedTies = new ArrayList<>();
|
||||
for (int count : numOfTies) {
|
||||
//If every single datapoint is tied then we want to return a p-value of .5 and
|
||||
//the formula for sigma that includes the number of ties breaks down. Setting
|
||||
//the number of ties to 0 in this case gets the desired result in the normal
|
||||
//approximation case.
|
||||
if (count != numOfRanks) {
|
||||
transformedTies.add((Math.pow(count, 3)) - count);
|
||||
}
|
||||
}
|
||||
|
||||
double numOfTiesForSigma = 0.0;
|
||||
for (double count : transformedTies) {
|
||||
numOfTiesForSigma += count;
|
||||
}
|
||||
|
||||
return(numOfTiesForSigma);
|
||||
}
|
||||
/**
|
||||
* Calculates the rank-sum test statisic U (sometimes W) from two sets of input data for a one-sided test
|
||||
* with an int indicating which group is the dominator. Returns a test statistic object with trueU and number of
|
||||
|
|
|
|||
|
|
@ -30,6 +30,7 @@ import org.testng.annotations.DataProvider;
|
|||
import org.testng.annotations.Test;
|
||||
import org.testng.Assert;
|
||||
|
||||
import java.util.ArrayList;
|
||||
import java.util.Arrays;
|
||||
|
||||
/**
|
||||
|
|
@ -139,4 +140,10 @@ public class MWUnitTest extends BaseTest {
|
|||
System.out.println("1-side p: " + test.getP());
|
||||
Assert.assertEquals(test.getZ(), Z, DELTA_PRECISION, name);
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testTooManyTies(){
|
||||
ArrayList<Integer> listOfNumberOfTies = new ArrayList<>(Arrays.asList(26,3,6,4,13,18,29,36,60,58,87,63,98,125,158,185,193,171,17592,115,100,141,216,298,451,719,1060,1909,3210,5167,7135,10125,11035,3541,732,9));
|
||||
Assert.assertEquals(rst.transformTies(64890, listOfNumberOfTies), 8.41378729572e+12);
|
||||
}
|
||||
}
|
||||
|
|
|
|||
Loading…
Reference in New Issue