From bf630abe887e1d86dfdcc01c8fd8f964218fd447 Mon Sep 17 00:00:00 2001 From: Valentin Ruano-Rubio Date: Fri, 7 Feb 2014 15:24:49 -0500 Subject: [PATCH] Fixed nocall (./.) without PLs bug in GVCF output Story: https://www.pivotaltracker.com/story/show/65388246 Additional changes and notes: 1. The fix consist in forcing the output of all PLs by setting the standard flag for that '-allSitePLs'. 2. BP_RESOLUTION was handled differently to GVCF in some aspect that should be common. That has been fixed. --- .../haplotypecaller/GenotypingEngine.java | 14 +++++++------- .../haplotypecaller/HaplotypeCaller.java | 10 +++++++--- .../HaplotypeCallerGVCFIntegrationTest.java | 18 +++++++++++++++--- 3 files changed, 29 insertions(+), 13 deletions(-) diff --git a/protected/gatk-protected/src/main/java/org/broadinstitute/sting/gatk/walkers/haplotypecaller/GenotypingEngine.java b/protected/gatk-protected/src/main/java/org/broadinstitute/sting/gatk/walkers/haplotypecaller/GenotypingEngine.java index b47c49f14..26e4edc1a 100644 --- a/protected/gatk-protected/src/main/java/org/broadinstitute/sting/gatk/walkers/haplotypecaller/GenotypingEngine.java +++ b/protected/gatk-protected/src/main/java/org/broadinstitute/sting/gatk/walkers/haplotypecaller/GenotypingEngine.java @@ -134,7 +134,7 @@ public class GenotypingEngine { * @param activeRegionWindow Active window * @param genomeLocParser GenomeLocParser * @param activeAllelesToGenotype Alleles to genotype - * @param addNonRef whether we should add a <NON_REF> alternative allele to the result variation contexts. + * @param emitReferenceConfidence whether we should add a <NON_REF> alternative allele to the result variation contexts. * * @return A CalledHaplotypes object containing a list of VC's with genotyped events and called haplotypes * @@ -152,7 +152,7 @@ public class GenotypingEngine { final GenomeLocParser genomeLocParser, final RefMetaDataTracker tracker, final List activeAllelesToGenotype, - final boolean addNonRef) { + final boolean emitReferenceConfidence) { // sanity check input arguments if (UG_engine == null) throw new IllegalArgumentException("UG_Engine input can't be null, got "+UG_engine); if (haplotypes == null || haplotypes.isEmpty()) throw new IllegalArgumentException("haplotypes input should be non-empty and non-null, got "+haplotypes); @@ -195,7 +195,7 @@ public class GenotypingEngine { final GenotypeLikelihoodsCalculationModel.Model calculationModel = mergedVC.isSNP() ? GenotypeLikelihoodsCalculationModel.Model.SNP : GenotypeLikelihoodsCalculationModel.Model.INDEL; - if (addNonRef) { + if (emitReferenceConfidence) { final List alleleList = new ArrayList<>(); alleleList.addAll(mergedVC.getAlleles()); alleleList.add(GATKVariantContextUtils.NON_REF_SYMBOLIC_ALLELE); @@ -217,19 +217,19 @@ public class GenotypingEngine { final Map alleleReadMap = convertHaplotypeReadMapToAlleleReadMap( haplotypeReadMap, alleleMapper, UG_engine.getUAC().getSampleContamination() ); - if (addNonRef) addMiscellaneousAllele(alleleReadMap); + if (emitReferenceConfidence) addMiscellaneousAllele(alleleReadMap); final GenotypesContext genotypes = calculateGLsForThisEvent( alleleReadMap, mergedVC ); - final VariantContext call = UG_engine.calculateGenotypes(new VariantContextBuilder(mergedVC).genotypes(genotypes).make(), calculationModel); + VariantContext call = UG_engine.calculateGenotypes(new VariantContextBuilder(mergedVC).genotypes(genotypes).make(), calculationModel); if( call != null ) { final Map alleleReadMap_annotations = ( USE_FILTERED_READ_MAP_FOR_ANNOTATIONS ? alleleReadMap : convertHaplotypeReadMapToAlleleReadMap( haplotypeReadMap, alleleMapper, emptyDownSamplingMap ) ); - if (addNonRef) addMiscellaneousAllele(alleleReadMap_annotations); + if (emitReferenceConfidence) addMiscellaneousAllele(alleleReadMap_annotations); final Map stratifiedReadMap = filterToOnlyOverlappingReads( genomeLocParser, alleleReadMap_annotations, perSampleFilteredReadList, call ); VariantContext annotatedCall = annotationEngine.annotateContextForActiveRegion(tracker, stratifiedReadMap, call); - if( call.getAlleles().size() != mergedVC.getAlleles().size() ) { // some alleles were removed so reverseTrimming might be necessary! + if( !emitReferenceConfidence && call.getAlleles().size() != mergedVC.getAlleles().size() ) { // some alleles were removed so reverseTrimming might be necessary! annotatedCall = GATKVariantContextUtils.reverseTrimAlleles(annotatedCall); } diff --git a/protected/gatk-protected/src/main/java/org/broadinstitute/sting/gatk/walkers/haplotypecaller/HaplotypeCaller.java b/protected/gatk-protected/src/main/java/org/broadinstitute/sting/gatk/walkers/haplotypecaller/HaplotypeCaller.java index 2db37fc03..7499dc482 100644 --- a/protected/gatk-protected/src/main/java/org/broadinstitute/sting/gatk/walkers/haplotypecaller/HaplotypeCaller.java +++ b/protected/gatk-protected/src/main/java/org/broadinstitute/sting/gatk/walkers/haplotypecaller/HaplotypeCaller.java @@ -545,10 +545,9 @@ public class HaplotypeCaller extends ActiveRegionWalker, In if (dontGenotype && emitReferenceConfidence == ReferenceConfidenceMode.GVCF) throw new UserException("You cannot request gVCF output and do not genotype at the same time"); - if ( emitReferenceConfidence == ReferenceConfidenceMode.GVCF ) { + if ( emitReferenceConfidence() ) { SCAC.STANDARD_CONFIDENCE_FOR_EMITTING = -0.0; SCAC.STANDARD_CONFIDENCE_FOR_CALLING = -0.0; - logger.info("Standard Emitting and Calling confidence set to 0.0 for gVCF output"); // also, we don't need to output several of the annotations annotationsToExclude.add("ChromosomeCounts"); @@ -557,6 +556,7 @@ public class HaplotypeCaller extends ActiveRegionWalker, In // but we definitely want certain other ones annotationsToUse.add("StrandBiasBySample"); + logger.info("Standard Emitting and Calling confidence set to 0.0 for reference-model confidence output"); } if ( SCAC.AFmodel == AFCalcFactory.Calculation.EXACT_GENERAL_PLOIDY ) @@ -572,6 +572,10 @@ public class HaplotypeCaller extends ActiveRegionWalker, In ? UnifiedGenotyperEngine.OUTPUT_MODE.EMIT_ALL_SITES : UnifiedGenotyperEngine.OUTPUT_MODE.EMIT_VARIANTS_ONLY; UG_engine = new UnifiedGenotyperEngine(getToolkit(), UAC, logger, null, null, samples, GATKVariantContextUtils.DEFAULT_PLOIDY); + if (emitReferenceConfidence() && !UG_engine.getUAC().annotateAllSitesWithPLs) { + UG_engine.getUAC().annotateAllSitesWithPLs = true; + logger.info("All sites annotated with PLs force to true for reference-model confidence output"); + } // create a UAC but with the exactCallsLog = null, so we only output the log for the HC caller itself, if requested UnifiedArgumentCollection simpleUAC = new UnifiedArgumentCollection(UAC); simpleUAC.OutputMode = UnifiedGenotyperEngine.OUTPUT_MODE.EMIT_VARIANTS_ONLY; @@ -1154,4 +1158,4 @@ public class HaplotypeCaller extends ActiveRegionWalker, In FragmentUtils.adjustQualsOfOverlappingPairedFragments(overlappingPair); } } -} \ No newline at end of file +} diff --git a/protected/gatk-protected/src/test/java/org/broadinstitute/sting/gatk/walkers/haplotypecaller/HaplotypeCallerGVCFIntegrationTest.java b/protected/gatk-protected/src/test/java/org/broadinstitute/sting/gatk/walkers/haplotypecaller/HaplotypeCallerGVCFIntegrationTest.java index 88589f216..9f24eb22a 100644 --- a/protected/gatk-protected/src/test/java/org/broadinstitute/sting/gatk/walkers/haplotypecaller/HaplotypeCallerGVCFIntegrationTest.java +++ b/protected/gatk-protected/src/test/java/org/broadinstitute/sting/gatk/walkers/haplotypecaller/HaplotypeCallerGVCFIntegrationTest.java @@ -68,10 +68,10 @@ public class HaplotypeCallerGVCFIntegrationTest extends WalkerTest { // this functionality can be adapted to provide input data for whatever you might want in your data tests.add(new Object[]{NA12878_PCRFREE, HaplotypeCaller.ReferenceConfidenceMode.NONE, PCRFreeIntervals, "53aa13711a1ceec1453f21c705723f04"}); - tests.add(new Object[]{NA12878_PCRFREE, HaplotypeCaller.ReferenceConfidenceMode.BP_RESOLUTION, PCRFreeIntervals, "7735be71f57e62929947c289cd48bb9c"}); - tests.add(new Object[]{NA12878_PCRFREE, HaplotypeCaller.ReferenceConfidenceMode.GVCF, PCRFreeIntervals, "1b5697be7ae90723368677d4d66a440a"}); + tests.add(new Object[]{NA12878_PCRFREE, HaplotypeCaller.ReferenceConfidenceMode.BP_RESOLUTION, PCRFreeIntervals, "61c70b7b6d03930420b015958df6b5a5"}); + tests.add(new Object[]{NA12878_PCRFREE, HaplotypeCaller.ReferenceConfidenceMode.GVCF, PCRFreeIntervals, "6fd946c4c8c9fd05ea921513e4523a4b"}); tests.add(new Object[]{NA12878_WEx, HaplotypeCaller.ReferenceConfidenceMode.NONE, WExIntervals, "39bf5fe3911d0c646eefa8f79894f4df"}); - tests.add(new Object[]{NA12878_WEx, HaplotypeCaller.ReferenceConfidenceMode.BP_RESOLUTION, WExIntervals, "aa7c0e3bec4ac307068f85f58d48625f"}); + tests.add(new Object[]{NA12878_WEx, HaplotypeCaller.ReferenceConfidenceMode.BP_RESOLUTION, WExIntervals, "d926d653500a970280ad7828d9ee2b84"}); tests.add(new Object[]{NA12878_WEx, HaplotypeCaller.ReferenceConfidenceMode.GVCF, WExIntervals, "83ddc16e4f0900429b2da30e582994aa"}); return tests.toArray(new Object[][]{}); @@ -141,4 +141,16 @@ public class HaplotypeCallerGVCFIntegrationTest extends WalkerTest { spec.disableShadowBCF(); executeTest("testMissingGVCFIndexingStrategyException", spec); } + + private static final String NOCALL_GVCF_BUGFIX_INTERVALS = privateTestDir + "gvcf_nocall_bug.interval_list"; + private static final String NOCALL_GVCF_BUGFIX_BAM = privateTestDir + "gvcf_nocall_bug.bam"; + + @Test + public void testNoCallGVCFMissingPLsBugFix() { + final String commandLine = String.format("-T HaplotypeCaller --pcr_indel_model NONE -R %s -I %s -L %s -ERC GVCF --no_cmdline_in_header -variant_index_type %s -variant_index_parameter %d", + b37KGReference, NOCALL_GVCF_BUGFIX_BAM, NOCALL_GVCF_BUGFIX_INTERVALS, HaplotypeCaller.OPTIMAL_GVCF_INDEX_TYPE, HaplotypeCaller.OPTIMAL_GVCF_INDEX_PARAMETER); + final WalkerTestSpec spec = new WalkerTestSpec(commandLine + " -o %s", Arrays.asList("4fe4a9bfbbcc98d1158cd0c164b9cc65")); + spec.disableShadowBCF(); + executeTest("testNoCallGVCFMissingPLsBugFix", spec); + } }