Fixed nocall (./.) without PLs bug in GVCF output

Story:

https://www.pivotaltracker.com/story/show/65388246

Additional changes and notes:

1. The fix consist in forcing the output of all PLs by setting the standard flag for that '-allSitePLs'.

2. BP_RESOLUTION was handled differently to GVCF in some aspect that should be common. That has been fixed.
This commit is contained in:
Valentin Ruano-Rubio 2014-02-07 15:24:49 -05:00
parent 8c922be684
commit bf630abe88
3 changed files with 29 additions and 13 deletions

View File

@ -134,7 +134,7 @@ public class GenotypingEngine {
* @param activeRegionWindow Active window
* @param genomeLocParser GenomeLocParser
* @param activeAllelesToGenotype Alleles to genotype
* @param addNonRef whether we should add a <NON_REF> alternative allele to the result variation contexts.
* @param emitReferenceConfidence whether we should add a <NON_REF> alternative allele to the result variation contexts.
*
* @return A CalledHaplotypes object containing a list of VC's with genotyped events and called haplotypes
*
@ -152,7 +152,7 @@ public class GenotypingEngine {
final GenomeLocParser genomeLocParser,
final RefMetaDataTracker tracker,
final List<VariantContext> activeAllelesToGenotype,
final boolean addNonRef) {
final boolean emitReferenceConfidence) {
// sanity check input arguments
if (UG_engine == null) throw new IllegalArgumentException("UG_Engine input can't be null, got "+UG_engine);
if (haplotypes == null || haplotypes.isEmpty()) throw new IllegalArgumentException("haplotypes input should be non-empty and non-null, got "+haplotypes);
@ -195,7 +195,7 @@ public class GenotypingEngine {
final GenotypeLikelihoodsCalculationModel.Model calculationModel = mergedVC.isSNP()
? GenotypeLikelihoodsCalculationModel.Model.SNP : GenotypeLikelihoodsCalculationModel.Model.INDEL;
if (addNonRef) {
if (emitReferenceConfidence) {
final List<Allele> alleleList = new ArrayList<>();
alleleList.addAll(mergedVC.getAlleles());
alleleList.add(GATKVariantContextUtils.NON_REF_SYMBOLIC_ALLELE);
@ -217,19 +217,19 @@ public class GenotypingEngine {
final Map<String, PerReadAlleleLikelihoodMap> alleleReadMap = convertHaplotypeReadMapToAlleleReadMap( haplotypeReadMap, alleleMapper, UG_engine.getUAC().getSampleContamination() );
if (addNonRef) addMiscellaneousAllele(alleleReadMap);
if (emitReferenceConfidence) addMiscellaneousAllele(alleleReadMap);
final GenotypesContext genotypes = calculateGLsForThisEvent( alleleReadMap, mergedVC );
final VariantContext call = UG_engine.calculateGenotypes(new VariantContextBuilder(mergedVC).genotypes(genotypes).make(), calculationModel);
VariantContext call = UG_engine.calculateGenotypes(new VariantContextBuilder(mergedVC).genotypes(genotypes).make(), calculationModel);
if( call != null ) {
final Map<String, PerReadAlleleLikelihoodMap> alleleReadMap_annotations = ( USE_FILTERED_READ_MAP_FOR_ANNOTATIONS ? alleleReadMap :
convertHaplotypeReadMapToAlleleReadMap( haplotypeReadMap, alleleMapper, emptyDownSamplingMap ) );
if (addNonRef) addMiscellaneousAllele(alleleReadMap_annotations);
if (emitReferenceConfidence) addMiscellaneousAllele(alleleReadMap_annotations);
final Map<String, PerReadAlleleLikelihoodMap> stratifiedReadMap = filterToOnlyOverlappingReads( genomeLocParser, alleleReadMap_annotations, perSampleFilteredReadList, call );
VariantContext annotatedCall = annotationEngine.annotateContextForActiveRegion(tracker, stratifiedReadMap, call);
if( call.getAlleles().size() != mergedVC.getAlleles().size() ) { // some alleles were removed so reverseTrimming might be necessary!
if( !emitReferenceConfidence && call.getAlleles().size() != mergedVC.getAlleles().size() ) { // some alleles were removed so reverseTrimming might be necessary!
annotatedCall = GATKVariantContextUtils.reverseTrimAlleles(annotatedCall);
}

View File

@ -545,10 +545,9 @@ public class HaplotypeCaller extends ActiveRegionWalker<List<VariantContext>, In
if (dontGenotype && emitReferenceConfidence == ReferenceConfidenceMode.GVCF)
throw new UserException("You cannot request gVCF output and do not genotype at the same time");
if ( emitReferenceConfidence == ReferenceConfidenceMode.GVCF ) {
if ( emitReferenceConfidence() ) {
SCAC.STANDARD_CONFIDENCE_FOR_EMITTING = -0.0;
SCAC.STANDARD_CONFIDENCE_FOR_CALLING = -0.0;
logger.info("Standard Emitting and Calling confidence set to 0.0 for gVCF output");
// also, we don't need to output several of the annotations
annotationsToExclude.add("ChromosomeCounts");
@ -557,6 +556,7 @@ public class HaplotypeCaller extends ActiveRegionWalker<List<VariantContext>, In
// but we definitely want certain other ones
annotationsToUse.add("StrandBiasBySample");
logger.info("Standard Emitting and Calling confidence set to 0.0 for reference-model confidence output");
}
if ( SCAC.AFmodel == AFCalcFactory.Calculation.EXACT_GENERAL_PLOIDY )
@ -572,6 +572,10 @@ public class HaplotypeCaller extends ActiveRegionWalker<List<VariantContext>, In
? UnifiedGenotyperEngine.OUTPUT_MODE.EMIT_ALL_SITES : UnifiedGenotyperEngine.OUTPUT_MODE.EMIT_VARIANTS_ONLY;
UG_engine = new UnifiedGenotyperEngine(getToolkit(), UAC, logger, null, null, samples, GATKVariantContextUtils.DEFAULT_PLOIDY);
if (emitReferenceConfidence() && !UG_engine.getUAC().annotateAllSitesWithPLs) {
UG_engine.getUAC().annotateAllSitesWithPLs = true;
logger.info("All sites annotated with PLs force to true for reference-model confidence output");
}
// create a UAC but with the exactCallsLog = null, so we only output the log for the HC caller itself, if requested
UnifiedArgumentCollection simpleUAC = new UnifiedArgumentCollection(UAC);
simpleUAC.OutputMode = UnifiedGenotyperEngine.OUTPUT_MODE.EMIT_VARIANTS_ONLY;
@ -1154,4 +1158,4 @@ public class HaplotypeCaller extends ActiveRegionWalker<List<VariantContext>, In
FragmentUtils.adjustQualsOfOverlappingPairedFragments(overlappingPair);
}
}
}
}

View File

@ -68,10 +68,10 @@ public class HaplotypeCallerGVCFIntegrationTest extends WalkerTest {
// this functionality can be adapted to provide input data for whatever you might want in your data
tests.add(new Object[]{NA12878_PCRFREE, HaplotypeCaller.ReferenceConfidenceMode.NONE, PCRFreeIntervals, "53aa13711a1ceec1453f21c705723f04"});
tests.add(new Object[]{NA12878_PCRFREE, HaplotypeCaller.ReferenceConfidenceMode.BP_RESOLUTION, PCRFreeIntervals, "7735be71f57e62929947c289cd48bb9c"});
tests.add(new Object[]{NA12878_PCRFREE, HaplotypeCaller.ReferenceConfidenceMode.GVCF, PCRFreeIntervals, "1b5697be7ae90723368677d4d66a440a"});
tests.add(new Object[]{NA12878_PCRFREE, HaplotypeCaller.ReferenceConfidenceMode.BP_RESOLUTION, PCRFreeIntervals, "61c70b7b6d03930420b015958df6b5a5"});
tests.add(new Object[]{NA12878_PCRFREE, HaplotypeCaller.ReferenceConfidenceMode.GVCF, PCRFreeIntervals, "6fd946c4c8c9fd05ea921513e4523a4b"});
tests.add(new Object[]{NA12878_WEx, HaplotypeCaller.ReferenceConfidenceMode.NONE, WExIntervals, "39bf5fe3911d0c646eefa8f79894f4df"});
tests.add(new Object[]{NA12878_WEx, HaplotypeCaller.ReferenceConfidenceMode.BP_RESOLUTION, WExIntervals, "aa7c0e3bec4ac307068f85f58d48625f"});
tests.add(new Object[]{NA12878_WEx, HaplotypeCaller.ReferenceConfidenceMode.BP_RESOLUTION, WExIntervals, "d926d653500a970280ad7828d9ee2b84"});
tests.add(new Object[]{NA12878_WEx, HaplotypeCaller.ReferenceConfidenceMode.GVCF, WExIntervals, "83ddc16e4f0900429b2da30e582994aa"});
return tests.toArray(new Object[][]{});
@ -141,4 +141,16 @@ public class HaplotypeCallerGVCFIntegrationTest extends WalkerTest {
spec.disableShadowBCF();
executeTest("testMissingGVCFIndexingStrategyException", spec);
}
private static final String NOCALL_GVCF_BUGFIX_INTERVALS = privateTestDir + "gvcf_nocall_bug.interval_list";
private static final String NOCALL_GVCF_BUGFIX_BAM = privateTestDir + "gvcf_nocall_bug.bam";
@Test
public void testNoCallGVCFMissingPLsBugFix() {
final String commandLine = String.format("-T HaplotypeCaller --pcr_indel_model NONE -R %s -I %s -L %s -ERC GVCF --no_cmdline_in_header -variant_index_type %s -variant_index_parameter %d",
b37KGReference, NOCALL_GVCF_BUGFIX_BAM, NOCALL_GVCF_BUGFIX_INTERVALS, HaplotypeCaller.OPTIMAL_GVCF_INDEX_TYPE, HaplotypeCaller.OPTIMAL_GVCF_INDEX_PARAMETER);
final WalkerTestSpec spec = new WalkerTestSpec(commandLine + " -o %s", Arrays.asList("4fe4a9bfbbcc98d1158cd0c164b9cc65"));
spec.disableShadowBCF();
executeTest("testNoCallGVCFMissingPLsBugFix", spec);
}
}