Updating MD5 to reflect canonical ordering of calculation
-- We should no longer have md5s changing because of hashmaps changing their sort order on us -- Added GenotypeLikelihoodsUnitTests -- Refactored ExactAFCaclculation to put the PL -> QUAL calculation in the GenotypeLikelihoods class to avoid the code copy.
This commit is contained in:
parent
73119c8e3c
commit
b7b57ef39a
|
|
@ -29,10 +29,7 @@ import org.apache.log4j.Logger;
|
|||
import org.broadinstitute.sting.utils.MathUtils;
|
||||
import org.broadinstitute.sting.utils.Utils;
|
||||
import org.broadinstitute.sting.utils.exceptions.UserException;
|
||||
import org.broadinstitute.sting.utils.variantcontext.Allele;
|
||||
import org.broadinstitute.sting.utils.variantcontext.Genotype;
|
||||
import org.broadinstitute.sting.utils.variantcontext.GenotypesContext;
|
||||
import org.broadinstitute.sting.utils.variantcontext.VariantContext;
|
||||
import org.broadinstitute.sting.utils.variantcontext.*;
|
||||
|
||||
import java.io.PrintStream;
|
||||
import java.util.*;
|
||||
|
|
@ -354,11 +351,10 @@ public class ExactAFCalculationModel extends AlleleFrequencyCalculationModel {
|
|||
// and will add no-call genotype to GL's in a second pass
|
||||
ArrayList<Allele> myAlleles = new ArrayList<Allele>();
|
||||
|
||||
double qual = Double.NEGATIVE_INFINITY;
|
||||
double[] likelihoods = g.getLikelihoods().getAsVector();
|
||||
|
||||
if (SIMPLE_GREEDY_GENOTYPER || !vc.isBiallelic()) {
|
||||
bestGTguess = Utils.findIndexOfMaxEntry(g.getLikelihoods().getAsVector());
|
||||
bestGTguess = Utils.findIndexOfMaxEntry(likelihoods);
|
||||
}
|
||||
else {
|
||||
int newIdx = tracebackArray[k][startIdx];;
|
||||
|
|
@ -366,20 +362,6 @@ public class ExactAFCalculationModel extends AlleleFrequencyCalculationModel {
|
|||
startIdx = newIdx;
|
||||
}
|
||||
|
||||
/* System.out.format("Sample: %s GL:",sample);
|
||||
for (int i=0; i < likelihoods.length; i++)
|
||||
System.out.format("%1.4f, ",likelihoods[i]);
|
||||
*/
|
||||
|
||||
for (int i=0; i < likelihoods.length; i++) {
|
||||
if (i==bestGTguess)
|
||||
continue;
|
||||
if (likelihoods[i] >= qual)
|
||||
qual = likelihoods[i];
|
||||
}
|
||||
// qual contains now max(likelihoods[k]) for all k != bestGTguess
|
||||
qual = likelihoods[bestGTguess] - qual;
|
||||
|
||||
// likelihoods are stored row-wise in lower triangular matrix. IE
|
||||
// for 2 alleles they have ordering AA,AB,BB
|
||||
// for 3 alleles they are ordered AA,AB,BB,AC,BC,CC
|
||||
|
|
@ -407,16 +389,9 @@ public class ExactAFCalculationModel extends AlleleFrequencyCalculationModel {
|
|||
break;
|
||||
}
|
||||
|
||||
if (qual < 0) {
|
||||
// QUAL can be negative if the chosen genotype is not the most likely one individually.
|
||||
// In this case, we compute the actual genotype probability and QUAL is the likelihood of it not being the chosen on
|
||||
double[] normalized = MathUtils.normalizeFromLog10(likelihoods);
|
||||
double chosenGenotype = normalized[bestGTguess];
|
||||
qual = -1.0 * Math.log10(1.0 - chosenGenotype);
|
||||
}
|
||||
final double qual = GenotypeLikelihoods.getQualFromLikelihoods(bestGTguess, likelihoods);
|
||||
//System.out.println(myAlleles.toString());
|
||||
calls.add(new Genotype(sample, myAlleles, qual, null, g.getAttributes(), false));
|
||||
|
||||
}
|
||||
|
||||
for ( final Genotype genotype : GLs.iterateInSampleNameOrder() ) {
|
||||
|
|
|
|||
|
|
@ -117,28 +117,34 @@ public class GenotypeLikelihoods {
|
|||
//Return the neg log10 Genotype Quality (GQ) for the given genotype
|
||||
//Returns Double.NEGATIVE_INFINITY in case of missing genotype
|
||||
public double getLog10GQ(Genotype.Type genotype){
|
||||
EnumMap<Genotype.Type,Double> likelihoods = getAsMap(false);
|
||||
return getQualFromLikelihoods(genotype.ordinal() - 1 /* NO_CALL IS FIRST */, getAsVector());
|
||||
}
|
||||
|
||||
public static double getQualFromLikelihoods(int iOfChoosenGenotype, double[] likelihoods){
|
||||
if(likelihoods == null)
|
||||
return Double.NEGATIVE_INFINITY;
|
||||
|
||||
double qual = Double.NEGATIVE_INFINITY;
|
||||
for(Map.Entry<Genotype.Type,Double> likelihood : likelihoods.entrySet()){
|
||||
if(likelihood.getKey() == genotype)
|
||||
for (int i=0; i < likelihoods.length; i++) {
|
||||
if (i==iOfChoosenGenotype)
|
||||
continue;
|
||||
if(likelihood.getValue() > qual)
|
||||
qual = likelihood.getValue();
|
||||
if (likelihoods[i] >= qual)
|
||||
qual = likelihoods[i];
|
||||
}
|
||||
|
||||
//Quality of the most likely genotype = likelihood(most likely) - likelihood (2nd best)
|
||||
qual = likelihoods.get(genotype) - qual;
|
||||
// qual contains now max(likelihoods[k]) for all k != bestGTguess
|
||||
qual = likelihoods[iOfChoosenGenotype] - qual;
|
||||
|
||||
//Quality of other genotypes 1-P(G)
|
||||
if (qual < 0) {
|
||||
double[] normalized = MathUtils.normalizeFromLog10(getAsVector());
|
||||
double chosenGenotype = normalized[genotype.ordinal()-1];
|
||||
qual = Math.log10(1.0 - chosenGenotype);
|
||||
// QUAL can be negative if the chosen genotype is not the most likely one individually.
|
||||
// In this case, we compute the actual genotype probability and QUAL is the likelihood of it not being the chosen one
|
||||
double[] normalized = MathUtils.normalizeFromLog10(likelihoods);
|
||||
double chosenGenotype = normalized[iOfChoosenGenotype];
|
||||
return Math.log10(1.0 - chosenGenotype);
|
||||
} else {
|
||||
// invert the size, as this is the probability of making an error
|
||||
return -1 * qual;
|
||||
}
|
||||
return -1 * qual;
|
||||
}
|
||||
|
||||
private final static double[] parsePLsIntoLikelihoods(String likelihoodsAsString_PLs) {
|
||||
|
|
|
|||
|
|
@ -7,7 +7,7 @@ import org.testng.annotations.Test;
|
|||
|
||||
import static java.lang.Math.log10;
|
||||
|
||||
public class GenotypeLikelihoodsUnitTest extends BaseTest {
|
||||
public class GenotypePriorsUnitTest extends BaseTest {
|
||||
private final static double DELTA = 1e-8;
|
||||
|
||||
@Test
|
||||
|
|
@ -29,7 +29,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest {
|
|||
public void testMultiSamplePilot1() {
|
||||
WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(
|
||||
baseCommand + " -I " + validationDataLocation + "low_coverage_CEU.chr1.10k-11k.bam -o %s -L 1:10,022,000-10,025,000", 1,
|
||||
Arrays.asList("c93def488de12fb3b8199e001b7a24a8"));
|
||||
Arrays.asList("f5c8bd653aed02059b9f377833eae5bb"));
|
||||
executeTest("test MultiSample Pilot1", spec);
|
||||
}
|
||||
|
||||
|
|
@ -37,7 +37,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest {
|
|||
public void testWithAllelesPassedIn1() {
|
||||
WalkerTest.WalkerTestSpec spec1 = new WalkerTest.WalkerTestSpec(
|
||||
baseCommand + " --genotyping_mode GENOTYPE_GIVEN_ALLELES -alleles " + validationDataLocation + "allelesForUG.vcf -I " + validationDataLocation + "pilot2_daughters.chr20.10k-11k.bam -o %s -L 20:10,000,000-10,025,000", 1,
|
||||
Arrays.asList("cef4bd72cbbe72f28e9c72e2818b4708"));
|
||||
Arrays.asList("ea5b5dcea3a6eef7ec60070b551c994e"));
|
||||
executeTest("test MultiSample Pilot2 with alleles passed in", spec1);
|
||||
}
|
||||
|
||||
|
|
@ -45,7 +45,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest {
|
|||
public void testWithAllelesPassedIn2() {
|
||||
WalkerTest.WalkerTestSpec spec2 = new WalkerTest.WalkerTestSpec(
|
||||
baseCommand + " --output_mode EMIT_ALL_SITES --genotyping_mode GENOTYPE_GIVEN_ALLELES -alleles " + validationDataLocation + "allelesForUG.vcf -I " + validationDataLocation + "pilot2_daughters.chr20.10k-11k.bam -o %s -L 20:10,000,000-10,025,000", 1,
|
||||
Arrays.asList("9834f0cef1cd6ba4943a5aaee1ee8be8"));
|
||||
Arrays.asList("030ce4feb4bbcf700caba82a45cc45f2"));
|
||||
executeTest("test MultiSample Pilot2 with alleles passed in and emitting all sites", spec2);
|
||||
}
|
||||
|
||||
|
|
@ -53,7 +53,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest {
|
|||
public void testSingleSamplePilot2() {
|
||||
WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(
|
||||
baseCommand + " -I " + validationDataLocation + "NA12878.1kg.p2.chr1_10mb_11_mb.SLX.bam -o %s -L 1:10,000,000-10,100,000", 1,
|
||||
Arrays.asList("6762b72ae60155ad71738d7c76b80e4b"));
|
||||
Arrays.asList("3ccce5d909f8f128e496f6841836e5f7"));
|
||||
executeTest("test SingleSample Pilot2", spec);
|
||||
}
|
||||
|
||||
|
|
@ -63,7 +63,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest {
|
|||
//
|
||||
// --------------------------------------------------------------------------------------------------------------
|
||||
|
||||
private final static String COMPRESSED_OUTPUT_MD5 = "bc71dba7bbdb23e7d5cc60461fdd897b";
|
||||
private final static String COMPRESSED_OUTPUT_MD5 = "890143b366050e78d6c6ba6b2c6b6864";
|
||||
|
||||
@Test
|
||||
public void testCompressedOutput() {
|
||||
|
|
@ -84,7 +84,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest {
|
|||
|
||||
// Note that we need to turn off any randomization for this to work, so no downsampling and no annotations
|
||||
|
||||
String md5 = "b9504e446b9313559c3ed97add7e8dc1";
|
||||
String md5 = "95614280c565ad90f8c000376fef822c";
|
||||
|
||||
WalkerTest.WalkerTestSpec spec1 = new WalkerTest.WalkerTestSpec(
|
||||
baseCommand + " -dt NONE -G none -I " + validationDataLocation + "NA12878.1kg.p2.chr1_10mb_11_mb.SLX.bam -o %s -L 1:10,000,000-10,075,000", 1,
|
||||
|
|
@ -115,8 +115,8 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest {
|
|||
@Test
|
||||
public void testCallingParameters() {
|
||||
HashMap<String, String> e = new HashMap<String, String>();
|
||||
e.put( "--min_base_quality_score 26", "bb3f294eab3e2cf52c70e63b23aac5ee" );
|
||||
e.put( "--computeSLOD", "eb34979efaadba1e34bd82bcacf5c722" );
|
||||
e.put( "--min_base_quality_score 26", "7acb1a5aee5fdadb0cc0ea07a212efc6" );
|
||||
e.put( "--computeSLOD", "e9d23a08472e4e27b4f25e844f5bad57" );
|
||||
|
||||
for ( Map.Entry<String, String> entry : e.entrySet() ) {
|
||||
WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(
|
||||
|
|
@ -129,9 +129,9 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest {
|
|||
@Test
|
||||
public void testOutputParameter() {
|
||||
HashMap<String, String> e = new HashMap<String, String>();
|
||||
e.put( "-sites_only", "d40114aa201aa33ff5f174f15b6b73af" );
|
||||
e.put( "--output_mode EMIT_ALL_CONFIDENT_SITES", "3c681b053fd2280f3c42041d24243752" );
|
||||
e.put( "--output_mode EMIT_ALL_SITES", "eafa6d71c5ecd64dfee5d7a3f60e392e" );
|
||||
e.put( "-sites_only", "44f3b5b40e6ad44486cddfdb7e0bfcd8" );
|
||||
e.put( "--output_mode EMIT_ALL_CONFIDENT_SITES", "94e53320f14c5ff29d62f68d36b46fcd" );
|
||||
e.put( "--output_mode EMIT_ALL_SITES", "73ad1cc41786b12c5f0e6f3e9ec2b728" );
|
||||
|
||||
for ( Map.Entry<String, String> entry : e.entrySet() ) {
|
||||
WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(
|
||||
|
|
@ -145,12 +145,15 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest {
|
|||
public void testConfidence() {
|
||||
WalkerTest.WalkerTestSpec spec1 = new WalkerTest.WalkerTestSpec(
|
||||
baseCommand + " -I " + validationDataLocation + "NA12878.1kg.p2.chr1_10mb_11_mb.SLX.bam -o %s -L 1:10,000,000-10,010,000 -stand_call_conf 10 ", 1,
|
||||
Arrays.asList("c71ca370947739cb7d87b59452be7a07"));
|
||||
Arrays.asList("902327e8a45fe585c8dfd1a7c4fcf60f"));
|
||||
executeTest("test confidence 1", spec1);
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testConfidence2() {
|
||||
WalkerTest.WalkerTestSpec spec2 = new WalkerTest.WalkerTestSpec(
|
||||
baseCommand + " -I " + validationDataLocation + "NA12878.1kg.p2.chr1_10mb_11_mb.SLX.bam -o %s -L 1:10,000,000-10,010,000 -stand_emit_conf 10 ", 1,
|
||||
Arrays.asList("1c0a599d475cc7d5e745df6e9b6c0d29"));
|
||||
Arrays.asList("2343ac8113791f4e79643b333b34afc8"));
|
||||
executeTest("test confidence 2", spec2);
|
||||
}
|
||||
|
||||
|
|
@ -162,8 +165,8 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest {
|
|||
@Test
|
||||
public void testHeterozyosity() {
|
||||
HashMap<Double, String> e = new HashMap<Double, String>();
|
||||
e.put( 0.01, "f84da90c310367bd51f2ab6e346fa3d8" );
|
||||
e.put( 1.0 / 1850, "5791e7fef40d4412b6d8f84e0a809c6c" );
|
||||
e.put( 0.01, "46243ecc2b9dc716f48ea280c9bb7e72" );
|
||||
e.put( 1.0 / 1850, "6b2a59dbc76984db6d4d6d6b5ee5d62c" );
|
||||
|
||||
for ( Map.Entry<Double, String> entry : e.entrySet() ) {
|
||||
WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(
|
||||
|
|
@ -187,7 +190,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest {
|
|||
" -o %s" +
|
||||
" -L 1:10,000,000-10,100,000",
|
||||
1,
|
||||
Arrays.asList("9cc9538ac83770e12bd0830d285bfbd0"));
|
||||
Arrays.asList("f0fbe472f155baf594b1eeb58166edef"));
|
||||
|
||||
executeTest(String.format("test multiple technologies"), spec);
|
||||
}
|
||||
|
|
@ -206,7 +209,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest {
|
|||
" -L 1:10,000,000-10,100,000" +
|
||||
" -baq CALCULATE_AS_NECESSARY",
|
||||
1,
|
||||
Arrays.asList("eaf8043edb46dfbe9f97ae03baa797ed"));
|
||||
Arrays.asList("8c87c749a7bb5a76ed8504d4ec254272"));
|
||||
|
||||
executeTest(String.format("test calling with BAQ"), spec);
|
||||
}
|
||||
|
|
@ -225,7 +228,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest {
|
|||
" -o %s" +
|
||||
" -L 1:10,000,000-10,500,000",
|
||||
1,
|
||||
Arrays.asList("eeba568272f9b42d5450da75c7cc6d2d"));
|
||||
Arrays.asList("a64d2e65b5927260e4ce0d948760cc5c"));
|
||||
|
||||
executeTest(String.format("test indel caller in SLX"), spec);
|
||||
}
|
||||
|
|
@ -240,7 +243,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest {
|
|||
" -minIndelCnt 1" +
|
||||
" -L 1:10,000,000-10,100,000",
|
||||
1,
|
||||
Arrays.asList("5fe98ee853586dc9db58f0bc97daea63"));
|
||||
Arrays.asList("2ad52c2e75b3ffbfd8f03237c444e8e6"));
|
||||
|
||||
executeTest(String.format("test indel caller in SLX with low min allele count"), spec);
|
||||
}
|
||||
|
|
@ -253,7 +256,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest {
|
|||
" -o %s" +
|
||||
" -L 1:10,000,000-10,500,000",
|
||||
1,
|
||||
Arrays.asList("19ff9bd3139480bdf79dcbf117cf2b24"));
|
||||
Arrays.asList("69107157632714150fc068d412e31939"));
|
||||
|
||||
executeTest(String.format("test indel calling, multiple technologies"), spec);
|
||||
}
|
||||
|
|
@ -263,7 +266,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest {
|
|||
WalkerTest.WalkerTestSpec spec1 = new WalkerTest.WalkerTestSpec(
|
||||
baseCommandIndels + " --genotyping_mode GENOTYPE_GIVEN_ALLELES -alleles " + validationDataLocation + "indelAllelesForUG.vcf -I " + validationDataLocation +
|
||||
"pilot2_daughters.chr20.10k-11k.bam -o %s -L 20:10,000,000-10,100,000", 1,
|
||||
Arrays.asList("81a1035e59cd883e413e62d34265c1a2"));
|
||||
Arrays.asList("4ffda07590e06d58ed867ae326d74b2d"));
|
||||
executeTest("test MultiSample Pilot2 indels with alleles passed in", spec1);
|
||||
}
|
||||
|
||||
|
|
@ -273,7 +276,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest {
|
|||
baseCommandIndels + " --output_mode EMIT_ALL_SITES --genotyping_mode GENOTYPE_GIVEN_ALLELES -alleles "
|
||||
+ validationDataLocation + "indelAllelesForUG.vcf -I " + validationDataLocation +
|
||||
"pilot2_daughters.chr20.10k-11k.bam -o %s -L 20:10,000,000-10,100,000", 1,
|
||||
Arrays.asList("102b7d915f21dff0a9b6ea64c4c7d409"));
|
||||
Arrays.asList("6e182a58472ea17c8b0eb01f80562fbd"));
|
||||
executeTest("test MultiSample Pilot2 indels with alleles passed in and emitting all sites", spec2);
|
||||
}
|
||||
|
||||
|
|
@ -283,7 +286,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest {
|
|||
WalkerTest.WalkerTestSpec spec3 = new WalkerTest.WalkerTestSpec(
|
||||
baseCommandIndels + " --genotyping_mode GENOTYPE_GIVEN_ALLELES -alleles " + validationDataLocation + "ALL.wgs.union_v2.20101123.indels.sites.vcf -I " + validationDataLocation +
|
||||
"pilot2_daughters.chr20.10k-11k.bam -o %s -L 20:10,000,000-10,080,000", 1,
|
||||
Arrays.asList("5900344f97bbac35d147a0a7c2bf1d0c"));
|
||||
Arrays.asList("f93f8a35b47bcf96594ada55e2312c73"));
|
||||
executeTest("test MultiSample Pilot2 indels with complicated records", spec3);
|
||||
}
|
||||
|
||||
|
|
@ -292,7 +295,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest {
|
|||
WalkerTest.WalkerTestSpec spec4 = new WalkerTest.WalkerTestSpec(
|
||||
baseCommandIndelsb37 + " --genotyping_mode GENOTYPE_GIVEN_ALLELES -alleles " + validationDataLocation + "ALL.wgs.union_v2_chr20_100_110K.20101123.indels.sites.vcf -I " + validationDataLocation +
|
||||
"phase1_GBR_realigned.chr20.100K-110K.bam -o %s -L 20:100,000-110,000", 1,
|
||||
Arrays.asList("45e7bf21cd6358921626404e7ae76c69"));
|
||||
Arrays.asList("1e02f57fafaa41db71c531eb25e148e1"));
|
||||
executeTest("test MultiSample 1000G Phase1 indels with complicated records emitting all sites", spec4);
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -105,8 +105,8 @@ public class GenotypeLikelihoodsUnitTest {
|
|||
double[] test = MathUtils.normalizeFromLog10(gl.getAsVector());
|
||||
|
||||
//GQ for the other genotypes
|
||||
Assert.assertEquals(gl.getLog10GQ(Genotype.Type.HOM_REF), -1 * Math.log10(1.0 - test[Genotype.Type.HOM_REF.ordinal()-1]));
|
||||
Assert.assertEquals(gl.getLog10GQ(Genotype.Type.HOM_VAR), -1 * Math.log10(1.0 - test[Genotype.Type.HOM_VAR.ordinal()-1]));
|
||||
Assert.assertEquals(gl.getLog10GQ(Genotype.Type.HOM_REF), Math.log10(1.0 - test[Genotype.Type.HOM_REF.ordinal()-1]));
|
||||
Assert.assertEquals(gl.getLog10GQ(Genotype.Type.HOM_VAR), Math.log10(1.0 - test[Genotype.Type.HOM_VAR.ordinal()-1]));
|
||||
|
||||
//Test missing likelihoods
|
||||
gl = new GenotypeLikelihoods(".");
|
||||
|
|
@ -116,6 +116,18 @@ public class GenotypeLikelihoodsUnitTest {
|
|||
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testgetQualFromLikelihoods(){
|
||||
double[] likelihoods = new double[]{-1, 0, -2};
|
||||
// qual values we expect for each possible "best" genotype
|
||||
double[] expectedQuals = new double[]{-0.04100161, -1, -0.003930294};
|
||||
|
||||
for ( int i = 0; i < likelihoods.length; i++ ) {
|
||||
Assert.assertEquals(GenotypeLikelihoods.getQualFromLikelihoods(i, likelihoods), expectedQuals[i], 1e-6,
|
||||
"GQ value for genotype " + i + " was not calculated correctly");
|
||||
}
|
||||
}
|
||||
|
||||
private void assertDoubleArraysAreEqual(double[] v1, double[] v2) {
|
||||
Assert.assertEquals(v1.length, v2.length);
|
||||
for ( int i = 0; i < v1.length; i++ ) {
|
||||
|
|
|
|||
Loading…
Reference in New Issue