Merge pull request #734 from broadinstitute/vrr_fix_mleac_general_ploidy_exact_af_calculator

Fixed MLEAC and QUAL inaccuracy in GeneralPloidyExactAFCalculator.
This commit is contained in:
Valentin Ruano Rubio 2014-09-24 11:52:37 -04:00
commit f7fc9cd839
7 changed files with 20 additions and 19 deletions

View File

@ -152,7 +152,7 @@ public abstract class DiploidExactAFCalculator extends ExactAFCalculator {
final double log10LofK = set.getLog10Likelihoods()[set.getLog10Likelihoods().length-1];
// can we abort early because the log10Likelihoods are so small?
if ( stateTracker.abort(log10LofK, set.getACcounts(), true) ) {
if ( stateTracker.abort(log10LofK, set.getACcounts(), true, false) ) {
//if ( DEBUG )
// System.out.printf(" *** breaking early set=%s log10L=%.2f maxLog10L=%.2f%n", set.ACcounts, log10LofK, maxLog10L);
return log10LofK;

View File

@ -266,10 +266,8 @@ public class GeneralPloidyExactAFCalculator extends ExactAFCalculator {
if (!Double.isInfinite(log10LofK))
newPool.add(set);
// TODO -- change false to true this correct line when the implementation of this model is optimized (it's too slow now to handle this fix)
if ( stateTracker.abort(log10LofK, set.getACcounts(), false) ) {
if ( stateTracker.abort(log10LofK, set.getACcounts(), true, true) )
return log10LofK;
}
// iterate over higher frequencies if possible
// by convention, ACcounts contained in set have full vector of possible pool ac counts including ref count.

View File

@ -48,8 +48,8 @@ package org.broadinstitute.gatk.tools.walkers.genotyper.afcalc;
import com.google.java.contract.Ensures;
import com.google.java.contract.Requires;
import org.broadinstitute.gatk.utils.MathUtils;
import htsjdk.variant.variantcontext.Allele;
import org.broadinstitute.gatk.utils.MathUtils;
import java.util.Arrays;
import java.util.HashMap;
@ -148,11 +148,13 @@ final class StateTracker {
/**
* @return true iff all ACs in this object are less than or equal to their corresponding ACs in the provided set
*/
private boolean isLowerAC(final ExactACcounts otherACs) {
private boolean isLowerAC(final ExactACcounts otherACs, final boolean otherACsContainsReference) {
final int[] otherACcounts = otherACs.getCounts();
for ( int i = 0; i < otherACcounts.length; i++ ) {
if ( alleleCountsOfMLE[i] > otherACcounts[i] )
final int firstAltAlleleIndex = otherACsContainsReference ? 1 : 0;
for ( int i = firstAltAlleleIndex; i < otherACcounts.length; i++ ) {
if ( alleleCountsOfMLE[i - firstAltAlleleIndex] > otherACcounts[i] )
return false;
}
@ -164,10 +166,11 @@ final class StateTracker {
*
* @param log10LofK the log10LofK of these ACs
* @param ACs the ACs of this state
* @param exactACcountsContainReference whether the {@code ACs} contains the reference allele count (index == 0) beside all other alternative alleles.
* @return return true if there's no reason to continue with subpaths of AC, or false otherwise
*/
protected boolean abort( final double log10LofK, final ExactACcounts ACs, final boolean enforceLowerACs ) {
return tooLowLikelihood(log10LofK) && (!enforceLowerACs || isLowerAC(ACs));
protected boolean abort(final double log10LofK, final ExactACcounts ACs, final boolean enforceLowerACs, final boolean exactACcountsContainReference) {
return tooLowLikelihood(log10LofK) && (!enforceLowerACs || isLowerAC(ACs,exactACcountsContainReference));
}
@Ensures("result != null")

View File

@ -69,7 +69,7 @@ public class UnifiedGenotyperGeneralPloidySuite1IntegrationTest extends WalkerTe
@Test(enabled = true)
public void testBOTH_GGA_Pools() {
executor.PC_LSV_Test(String.format("-A AlleleCountBySample -maxAltAlleles 2 -ploidy 24 -gt_mode GENOTYPE_GIVEN_ALLELES -out_mode EMIT_ALL_SITES -alleles %s", LSV_ALLELES), "LSV_BOTH_GGA", "BOTH", "c731cd0ddb0fa153bffa7bc19586bfa6");
executor.PC_LSV_Test(String.format("-A AlleleCountBySample -maxAltAlleles 2 -ploidy 24 -gt_mode GENOTYPE_GIVEN_ALLELES -out_mode EMIT_ALL_SITES -alleles %s", LSV_ALLELES), "LSV_BOTH_GGA", "BOTH", "40d60700aae1a316620fd96064080dcd");
}
@Test(enabled = true)
@ -79,6 +79,6 @@ public class UnifiedGenotyperGeneralPloidySuite1IntegrationTest extends WalkerTe
@Test(enabled = true)
public void testINDEL_maxAltAlleles2_ploidy1_Pools_noRef() {
executor.PC_LSV_Test_NoRef("-A AlleleCountBySample -maxAltAlleles 2 -ploidy 1", "LSV_INDEL_DISC_NOREF_p1", "INDEL", "54d718401c413fdbc1f801061bf8e7d1");
executor.PC_LSV_Test_NoRef("-A AlleleCountBySample -maxAltAlleles 2 -ploidy 1", "LSV_INDEL_DISC_NOREF_p1", "INDEL", "d5047d65422426cf3ddba07768d8f6af");
}
}

View File

@ -58,7 +58,7 @@ public class UnifiedGenotyperGeneralPloidySuite2IntegrationTest extends WalkerTe
@Test(enabled = true)
public void testINDEL_maxAltAlleles2_ploidy3_Pools_noRef() {
executor.PC_LSV_Test_NoRef("-A AlleleCountBySample -maxAltAlleles 2 -ploidy 3","LSV_INDEL_DISC_NOREF_p3","INDEL","1426e311501a7bd96ecb75ed839f7c64");
executor.PC_LSV_Test_NoRef("-A AlleleCountBySample -maxAltAlleles 2 -ploidy 3","LSV_INDEL_DISC_NOREF_p3","INDEL","1e22151ac0c61860b06d1e5406dd09a6");
}
@Test(enabled = true)

View File

@ -107,9 +107,9 @@ public class HaplotypeCallerGVCFIntegrationTest extends WalkerTest {
final String WExIntervals = "-L 20:10,000,000-10,100,000 -isr INTERSECTION -L " + hg19Chr20Intervals;
// this functionality can be adapted to provide input data for whatever you might want in your data
tests.add(new Object[]{NA12878_PCRFREE, ReferenceConfidenceMode.NONE, PCRFreeIntervals, "df5b803750e604bedd6d713c59f46c33"});
tests.add(new Object[]{NA12878_PCRFREE, ReferenceConfidenceMode.BP_RESOLUTION, PCRFreeIntervals, "8e06e6502d958cab79b9275460c06389"});
tests.add(new Object[]{NA12878_PCRFREE, ReferenceConfidenceMode.GVCF, PCRFreeIntervals, "f32fb9a2485c583bc22ff4f09217318c"});
tests.add(new Object[]{NA12878_PCRFREE, ReferenceConfidenceMode.NONE, PCRFreeIntervals, "7db552463cf779644335bfa09fcddf82"});
tests.add(new Object[]{NA12878_PCRFREE, ReferenceConfidenceMode.BP_RESOLUTION, PCRFreeIntervals, "90600d209cf778fdfca6a844b8ee4acb"});
tests.add(new Object[]{NA12878_PCRFREE, ReferenceConfidenceMode.GVCF, PCRFreeIntervals, "db3c99786c5726b20dbfe47e31e50d60"});
tests.add(new Object[]{NA12878_WEx, ReferenceConfidenceMode.NONE, WExIntervals, "eac793500fbc7de46259000dbbcdd27d"});
tests.add(new Object[]{NA12878_WEx, ReferenceConfidenceMode.BP_RESOLUTION, WExIntervals, "4a431e0e387de3f791318f67d8855b0b"});
tests.add(new Object[]{NA12878_WEx, ReferenceConfidenceMode.GVCF, WExIntervals, "1058d3fe6553e07f002f994759c9647d"});

View File

@ -96,7 +96,7 @@ public class HaplotypeCallerIntegrationTest extends WalkerTest {
@Test
public void testHaplotypeCallerMultiSampleHaploid() {
HCTest(CEUTRIO_BAM,
"-ploidy 1", "c93650f842aa4dfa4ef2b5f1b908a576");
"-ploidy 1", "5046d3f77a56fcc4ccc8a216670effac");
}
@Test
@ -131,7 +131,7 @@ public class HaplotypeCallerIntegrationTest extends WalkerTest {
@Test
public void testHaplotypeCallerGraphBasedMultiSampleHaploid() {
HCTest(CEUTRIO_BAM, "-likelihoodEngine GraphBased -ploidy 1", "0962d7723ef1e5e20fb42e869f12e1da");
HCTest(CEUTRIO_BAM, "-likelihoodEngine GraphBased -ploidy 1", "1425b46f3cd50040a1272c8775672fc0");
}
@Test
@ -160,7 +160,7 @@ public class HaplotypeCallerIntegrationTest extends WalkerTest {
@Test
public void testHaplotypeCallerMultiSampleGGATetraploid() {
HCTest(CEUTRIO_BAM, "--max_alternate_alleles 3 -gt_mode GENOTYPE_GIVEN_ALLELES -ploidy 4 -alleles " + validationDataLocation + "combined.phase1.chr20.raw.indels.sites.vcf",
"874f5fc9d7eed8894f72b8d2587de4b6");
"38ae7a6d082fc02f865f7a69a9c871d9");
}
@Test