Fixed nocall (./.) without PLs bug in GVCF output
Story: https://www.pivotaltracker.com/story/show/65388246 Additional changes and notes: 1. The fix consist in forcing the output of all PLs by setting the standard flag for that '-allSitePLs'. 2. BP_RESOLUTION was handled differently to GVCF in some aspect that should be common. That has been fixed.
This commit is contained in:
parent
8c922be684
commit
bf630abe88
|
|
@ -134,7 +134,7 @@ public class GenotypingEngine {
|
|||
* @param activeRegionWindow Active window
|
||||
* @param genomeLocParser GenomeLocParser
|
||||
* @param activeAllelesToGenotype Alleles to genotype
|
||||
* @param addNonRef whether we should add a <NON_REF> alternative allele to the result variation contexts.
|
||||
* @param emitReferenceConfidence whether we should add a <NON_REF> alternative allele to the result variation contexts.
|
||||
*
|
||||
* @return A CalledHaplotypes object containing a list of VC's with genotyped events and called haplotypes
|
||||
*
|
||||
|
|
@ -152,7 +152,7 @@ public class GenotypingEngine {
|
|||
final GenomeLocParser genomeLocParser,
|
||||
final RefMetaDataTracker tracker,
|
||||
final List<VariantContext> activeAllelesToGenotype,
|
||||
final boolean addNonRef) {
|
||||
final boolean emitReferenceConfidence) {
|
||||
// sanity check input arguments
|
||||
if (UG_engine == null) throw new IllegalArgumentException("UG_Engine input can't be null, got "+UG_engine);
|
||||
if (haplotypes == null || haplotypes.isEmpty()) throw new IllegalArgumentException("haplotypes input should be non-empty and non-null, got "+haplotypes);
|
||||
|
|
@ -195,7 +195,7 @@ public class GenotypingEngine {
|
|||
final GenotypeLikelihoodsCalculationModel.Model calculationModel = mergedVC.isSNP()
|
||||
? GenotypeLikelihoodsCalculationModel.Model.SNP : GenotypeLikelihoodsCalculationModel.Model.INDEL;
|
||||
|
||||
if (addNonRef) {
|
||||
if (emitReferenceConfidence) {
|
||||
final List<Allele> alleleList = new ArrayList<>();
|
||||
alleleList.addAll(mergedVC.getAlleles());
|
||||
alleleList.add(GATKVariantContextUtils.NON_REF_SYMBOLIC_ALLELE);
|
||||
|
|
@ -217,19 +217,19 @@ public class GenotypingEngine {
|
|||
|
||||
final Map<String, PerReadAlleleLikelihoodMap> alleleReadMap = convertHaplotypeReadMapToAlleleReadMap( haplotypeReadMap, alleleMapper, UG_engine.getUAC().getSampleContamination() );
|
||||
|
||||
if (addNonRef) addMiscellaneousAllele(alleleReadMap);
|
||||
if (emitReferenceConfidence) addMiscellaneousAllele(alleleReadMap);
|
||||
|
||||
final GenotypesContext genotypes = calculateGLsForThisEvent( alleleReadMap, mergedVC );
|
||||
final VariantContext call = UG_engine.calculateGenotypes(new VariantContextBuilder(mergedVC).genotypes(genotypes).make(), calculationModel);
|
||||
VariantContext call = UG_engine.calculateGenotypes(new VariantContextBuilder(mergedVC).genotypes(genotypes).make(), calculationModel);
|
||||
if( call != null ) {
|
||||
final Map<String, PerReadAlleleLikelihoodMap> alleleReadMap_annotations = ( USE_FILTERED_READ_MAP_FOR_ANNOTATIONS ? alleleReadMap :
|
||||
convertHaplotypeReadMapToAlleleReadMap( haplotypeReadMap, alleleMapper, emptyDownSamplingMap ) );
|
||||
if (addNonRef) addMiscellaneousAllele(alleleReadMap_annotations);
|
||||
if (emitReferenceConfidence) addMiscellaneousAllele(alleleReadMap_annotations);
|
||||
final Map<String, PerReadAlleleLikelihoodMap> stratifiedReadMap = filterToOnlyOverlappingReads( genomeLocParser, alleleReadMap_annotations, perSampleFilteredReadList, call );
|
||||
|
||||
VariantContext annotatedCall = annotationEngine.annotateContextForActiveRegion(tracker, stratifiedReadMap, call);
|
||||
|
||||
if( call.getAlleles().size() != mergedVC.getAlleles().size() ) { // some alleles were removed so reverseTrimming might be necessary!
|
||||
if( !emitReferenceConfidence && call.getAlleles().size() != mergedVC.getAlleles().size() ) { // some alleles were removed so reverseTrimming might be necessary!
|
||||
annotatedCall = GATKVariantContextUtils.reverseTrimAlleles(annotatedCall);
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -545,10 +545,9 @@ public class HaplotypeCaller extends ActiveRegionWalker<List<VariantContext>, In
|
|||
if (dontGenotype && emitReferenceConfidence == ReferenceConfidenceMode.GVCF)
|
||||
throw new UserException("You cannot request gVCF output and do not genotype at the same time");
|
||||
|
||||
if ( emitReferenceConfidence == ReferenceConfidenceMode.GVCF ) {
|
||||
if ( emitReferenceConfidence() ) {
|
||||
SCAC.STANDARD_CONFIDENCE_FOR_EMITTING = -0.0;
|
||||
SCAC.STANDARD_CONFIDENCE_FOR_CALLING = -0.0;
|
||||
logger.info("Standard Emitting and Calling confidence set to 0.0 for gVCF output");
|
||||
|
||||
// also, we don't need to output several of the annotations
|
||||
annotationsToExclude.add("ChromosomeCounts");
|
||||
|
|
@ -557,6 +556,7 @@ public class HaplotypeCaller extends ActiveRegionWalker<List<VariantContext>, In
|
|||
|
||||
// but we definitely want certain other ones
|
||||
annotationsToUse.add("StrandBiasBySample");
|
||||
logger.info("Standard Emitting and Calling confidence set to 0.0 for reference-model confidence output");
|
||||
}
|
||||
|
||||
if ( SCAC.AFmodel == AFCalcFactory.Calculation.EXACT_GENERAL_PLOIDY )
|
||||
|
|
@ -572,6 +572,10 @@ public class HaplotypeCaller extends ActiveRegionWalker<List<VariantContext>, In
|
|||
? UnifiedGenotyperEngine.OUTPUT_MODE.EMIT_ALL_SITES : UnifiedGenotyperEngine.OUTPUT_MODE.EMIT_VARIANTS_ONLY;
|
||||
UG_engine = new UnifiedGenotyperEngine(getToolkit(), UAC, logger, null, null, samples, GATKVariantContextUtils.DEFAULT_PLOIDY);
|
||||
|
||||
if (emitReferenceConfidence() && !UG_engine.getUAC().annotateAllSitesWithPLs) {
|
||||
UG_engine.getUAC().annotateAllSitesWithPLs = true;
|
||||
logger.info("All sites annotated with PLs force to true for reference-model confidence output");
|
||||
}
|
||||
// create a UAC but with the exactCallsLog = null, so we only output the log for the HC caller itself, if requested
|
||||
UnifiedArgumentCollection simpleUAC = new UnifiedArgumentCollection(UAC);
|
||||
simpleUAC.OutputMode = UnifiedGenotyperEngine.OUTPUT_MODE.EMIT_VARIANTS_ONLY;
|
||||
|
|
@ -1154,4 +1158,4 @@ public class HaplotypeCaller extends ActiveRegionWalker<List<VariantContext>, In
|
|||
FragmentUtils.adjustQualsOfOverlappingPairedFragments(overlappingPair);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -68,10 +68,10 @@ public class HaplotypeCallerGVCFIntegrationTest extends WalkerTest {
|
|||
|
||||
// this functionality can be adapted to provide input data for whatever you might want in your data
|
||||
tests.add(new Object[]{NA12878_PCRFREE, HaplotypeCaller.ReferenceConfidenceMode.NONE, PCRFreeIntervals, "53aa13711a1ceec1453f21c705723f04"});
|
||||
tests.add(new Object[]{NA12878_PCRFREE, HaplotypeCaller.ReferenceConfidenceMode.BP_RESOLUTION, PCRFreeIntervals, "7735be71f57e62929947c289cd48bb9c"});
|
||||
tests.add(new Object[]{NA12878_PCRFREE, HaplotypeCaller.ReferenceConfidenceMode.GVCF, PCRFreeIntervals, "1b5697be7ae90723368677d4d66a440a"});
|
||||
tests.add(new Object[]{NA12878_PCRFREE, HaplotypeCaller.ReferenceConfidenceMode.BP_RESOLUTION, PCRFreeIntervals, "61c70b7b6d03930420b015958df6b5a5"});
|
||||
tests.add(new Object[]{NA12878_PCRFREE, HaplotypeCaller.ReferenceConfidenceMode.GVCF, PCRFreeIntervals, "6fd946c4c8c9fd05ea921513e4523a4b"});
|
||||
tests.add(new Object[]{NA12878_WEx, HaplotypeCaller.ReferenceConfidenceMode.NONE, WExIntervals, "39bf5fe3911d0c646eefa8f79894f4df"});
|
||||
tests.add(new Object[]{NA12878_WEx, HaplotypeCaller.ReferenceConfidenceMode.BP_RESOLUTION, WExIntervals, "aa7c0e3bec4ac307068f85f58d48625f"});
|
||||
tests.add(new Object[]{NA12878_WEx, HaplotypeCaller.ReferenceConfidenceMode.BP_RESOLUTION, WExIntervals, "d926d653500a970280ad7828d9ee2b84"});
|
||||
tests.add(new Object[]{NA12878_WEx, HaplotypeCaller.ReferenceConfidenceMode.GVCF, WExIntervals, "83ddc16e4f0900429b2da30e582994aa"});
|
||||
|
||||
return tests.toArray(new Object[][]{});
|
||||
|
|
@ -141,4 +141,16 @@ public class HaplotypeCallerGVCFIntegrationTest extends WalkerTest {
|
|||
spec.disableShadowBCF();
|
||||
executeTest("testMissingGVCFIndexingStrategyException", spec);
|
||||
}
|
||||
|
||||
private static final String NOCALL_GVCF_BUGFIX_INTERVALS = privateTestDir + "gvcf_nocall_bug.interval_list";
|
||||
private static final String NOCALL_GVCF_BUGFIX_BAM = privateTestDir + "gvcf_nocall_bug.bam";
|
||||
|
||||
@Test
|
||||
public void testNoCallGVCFMissingPLsBugFix() {
|
||||
final String commandLine = String.format("-T HaplotypeCaller --pcr_indel_model NONE -R %s -I %s -L %s -ERC GVCF --no_cmdline_in_header -variant_index_type %s -variant_index_parameter %d",
|
||||
b37KGReference, NOCALL_GVCF_BUGFIX_BAM, NOCALL_GVCF_BUGFIX_INTERVALS, HaplotypeCaller.OPTIMAL_GVCF_INDEX_TYPE, HaplotypeCaller.OPTIMAL_GVCF_INDEX_PARAMETER);
|
||||
final WalkerTestSpec spec = new WalkerTestSpec(commandLine + " -o %s", Arrays.asList("4fe4a9bfbbcc98d1158cd0c164b9cc65"));
|
||||
spec.disableShadowBCF();
|
||||
executeTest("testNoCallGVCFMissingPLsBugFix", spec);
|
||||
}
|
||||
}
|
||||
|
|
|
|||
Loading…
Reference in New Issue