From 85905dba9238a0f558d939d66e23eaca758ccf1d Mon Sep 17 00:00:00 2001 From: Ryan Poplin Date: Thu, 23 May 2013 15:15:56 -0400 Subject: [PATCH] Bugfix for GGA mode in UG silently ignoring indels -- Started by Mark. Finished up by Ryan. -- GGA mode still respected glm argument for SNP and INDEL models, so that you would silently fail to genotype indels at all if the -glm INDEL wasn't provided, but you'd still emit the sites, so you'd see records in the VCF but all alleles would be no calls. -- https://www.pivotaltracker.com/story/show/48924339 for more information -- [resolves #48924339] --- .../genotyper/UnifiedGenotyperEngine.java | 38 +++++++++---------- ...perGeneralPloidySuite1IntegrationTest.java | 2 +- 2 files changed, 18 insertions(+), 22 deletions(-) diff --git a/protected/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyperEngine.java b/protected/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyperEngine.java index 3380efcc9..fc11706e5 100644 --- a/protected/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyperEngine.java +++ b/protected/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyperEngine.java @@ -83,6 +83,9 @@ public class UnifiedGenotyperEngine { public static final double HUMAN_SNP_HETEROZYGOSITY = 1e-3; public static final double HUMAN_INDEL_HETEROZYGOSITY = 1e-4; + private static final int SNP_MODEL = 0; + private static final int INDEL_MODEL = 1; + public enum OUTPUT_MODE { /** produces calls only at variant sites */ EMIT_VARIANTS_ONLY, @@ -693,13 +696,13 @@ public class UnifiedGenotyperEngine { } private void determineGLModelsToUse() { - String modelPrefix = ""; if ( !UAC.GLmodel.name().contains(GPSTRING) && UAC.samplePloidy != GATKVariantContextUtils.DEFAULT_PLOIDY ) modelPrefix = GPSTRING; - if ( UAC.GLmodel.name().toUpperCase().contains("BOTH") ) { - modelPrefix += UAC.GLmodel.name().toUpperCase().replaceAll("BOTH",""); + // GGA mode => must initialize both the SNP and indel models + if ( UAC.GenotypingMode == GenotypeLikelihoodsCalculationModel.GENOTYPING_MODE.GENOTYPE_GIVEN_ALLELES || + UAC.GLmodel.name().toUpperCase().contains("BOTH") ) { modelsToUse.add(GenotypeLikelihoodsCalculationModel.Model.valueOf(modelPrefix+"SNP")); modelsToUse.add(GenotypeLikelihoodsCalculationModel.Model.valueOf(modelPrefix+"INDEL")); } @@ -712,31 +715,24 @@ public class UnifiedGenotyperEngine { private List getGLModelsToUse(final RefMetaDataTracker tracker, final ReferenceContext refContext, final AlignmentContext rawContext) { - if ( UAC.GenotypingMode != GenotypeLikelihoodsCalculationModel.GENOTYPING_MODE.GENOTYPE_GIVEN_ALLELES ) return modelsToUse; + if ( modelsToUse.size() != 2 ) + throw new IllegalStateException("GGA mode assumes that we have initialized both the SNP and indel models but found " + modelsToUse); + // if we're genotyping given alleles then we need to choose the model corresponding to the variant type requested - final List GGAmodel = new ArrayList(1); final VariantContext vcInput = getVCFromAllelesRod(tracker, refContext, rawContext.getLocation(), false, logger, UAC.alleles); - if ( vcInput == null ) - return GGAmodel; // no work to be done - if ( vcInput.isSNP() ) { - // use the SNP model unless the user chose INDEL mode only - if ( modelsToUse.size() == 2 || modelsToUse.get(0).name().endsWith("SNP") ) - GGAmodel.add(modelsToUse.get(0)); + if ( vcInput == null ) { + return Collections.emptyList(); // no work to be done + } else if ( vcInput.isSNP() ) { + return Collections.singletonList(modelsToUse.get(SNP_MODEL)); + } else if ( vcInput.isIndel() || vcInput.isMixed() ) { + return Collections.singletonList(modelsToUse.get(INDEL_MODEL)); + } else { + return Collections.emptyList(); // No support for other types yet } - else if ( vcInput.isIndel() || vcInput.isMixed() ) { - // use the INDEL model unless the user chose SNP mode only - if ( modelsToUse.size() == 2 ) - GGAmodel.add(modelsToUse.get(1)); - else if ( modelsToUse.get(0).name().endsWith("INDEL") ) - GGAmodel.add(modelsToUse.get(0)); - } - // No support for other types yet - - return GGAmodel; } /** diff --git a/protected/java/test/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyperGeneralPloidySuite1IntegrationTest.java b/protected/java/test/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyperGeneralPloidySuite1IntegrationTest.java index 88506fda3..1cfc41a27 100644 --- a/protected/java/test/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyperGeneralPloidySuite1IntegrationTest.java +++ b/protected/java/test/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyperGeneralPloidySuite1IntegrationTest.java @@ -74,7 +74,7 @@ public class UnifiedGenotyperGeneralPloidySuite1IntegrationTest extends WalkerTe @Test(enabled = true) public void testINDEL_GGA_Pools() { - executor.PC_LSV_Test(String.format(" -maxAltAlleles 1 -ploidy 24 -gt_mode GENOTYPE_GIVEN_ALLELES -out_mode EMIT_ALL_SITES -alleles %s", LSV_ALLELES), "LSV_INDEL_GGA", "INDEL", "3f7d763c654f1d708323f369ea4a099b"); + executor.PC_LSV_Test(String.format(" -maxAltAlleles 1 -ploidy 24 -gt_mode GENOTYPE_GIVEN_ALLELES -out_mode EMIT_ALL_SITES -alleles %s", LSV_ALLELES), "LSV_INDEL_GGA", "INDEL", "ceb105e3db0f2b993e3d725b0d60b6a3"); } @Test(enabled = true)