Merge pull request #552 from broadinstitute/rp_HaplotypeCaller_1kg_consensus_mode

Added the consensus mode used for the 1000 Genomes Project to the Haplot...
This commit is contained in:
Ryan Poplin 2014-03-11 11:17:30 -04:00
commit 35aa24ab54
2 changed files with 24 additions and 2 deletions

View File

@ -289,6 +289,10 @@ public class HaplotypeCaller extends ActiveRegionWalker<List<VariantContext>, In
@Argument(fullName="dontRecoverDanglingTails", shortName="dontRecoverDanglingTails", doc="Should we disable dangling tail recovery in the read threading assembler?", required = false)
protected boolean dontRecoverDanglingTails = false;
@Advanced
@Argument(fullName="consensus", shortName="consensus", doc="In 1000G consensus mode. Inject all provided alleles to the assembly graph but don't forcibly genotype all of them.", required = false)
protected boolean consensusMode = false;
// -----------------------------------------------------------------------------------------------
// general advanced arguments to control haplotype caller behavior
// -----------------------------------------------------------------------------------------------
@ -575,7 +579,7 @@ public class HaplotypeCaller extends ActiveRegionWalker<List<VariantContext>, In
// initialize the UnifiedGenotyper Engine which is used to call into the exact model
final UnifiedArgumentCollection UAC = new UnifiedArgumentCollection( SCAC ); // this adapter is used so that the full set of unused UG arguments aren't exposed to the HC user
// HC GGA mode depends critically on EMIT_ALL_SITES being set for the UG engine
UAC.OutputMode = SCAC.GenotypingMode == GenotypeLikelihoodsCalculationModel.GENOTYPING_MODE.GENOTYPE_GIVEN_ALLELES
UAC.OutputMode = SCAC.GenotypingMode.equals(GenotypeLikelihoodsCalculationModel.GENOTYPING_MODE.GENOTYPE_GIVEN_ALLELES)
? UnifiedGenotyperEngine.OUTPUT_MODE.EMIT_ALL_SITES : UnifiedGenotyperEngine.OUTPUT_MODE.EMIT_VARIANTS_ONLY;
UG_engine = new UnifiedGenotyperEngine(getToolkit(), UAC, logger, null, null, samples, GATKVariantContextUtils.DEFAULT_PLOIDY);
@ -598,6 +602,10 @@ public class HaplotypeCaller extends ActiveRegionWalker<List<VariantContext>, In
UAC.setSampleContamination(AlleleBiasedDownsamplingUtils.loadContaminationFile(UAC.CONTAMINATION_FRACTION_FILE, UAC.CONTAMINATION_FRACTION, samples, logger));
}
if( SCAC.GenotypingMode.equals(GenotypeLikelihoodsCalculationModel.GENOTYPING_MODE.GENOTYPE_GIVEN_ALLELES) && consensusMode ) {
throw new UserException("HaplotypeCaller cannot be run in both GENOTYPE_GIVEN_ALLELES mode and in consensus mode. Please choose one or the other.");
}
// initialize the output VCF header
final VariantAnnotatorEngine annotationEngine = new VariantAnnotatorEngine(Arrays.asList(annotationClassesToUse), annotationsToUse, annotationsToExclude, this, getToolkit());
@ -878,7 +886,8 @@ public class HaplotypeCaller extends ActiveRegionWalker<List<VariantContext>, In
regionForGenotyping.getLocation(),
getToolkit().getGenomeLocParser(),
metaDataTracker,
activeAllelesToGenotype, emitReferenceConfidence() );
( consensusMode ? Collections.<VariantContext>emptyList() : activeAllelesToGenotype ),
emitReferenceConfidence() );
// TODO -- must disable if we are doing NCT, or set the output type of ! presorted
if ( bamWriter != null ) {

View File

@ -96,4 +96,17 @@ public class HaplotypeCallerComplexAndSymbolicVariantsIntegrationTest extends Wa
HCTestComplexGGA(NA12878_CHR20_BAM, "-L 20:133041-133161 -L 20:300207-300337",
"f50e0b35e2240b19b1b8b6dfa0cf9796");
}
private void HCTestComplexConsensusMode(String bam, String args, String md5) {
final String base = String.format("-T HaplotypeCaller --disableDithering --pcr_indel_model NONE -R %s -I %s", REF, bam) + " --no_cmdline_in_header -o %s -consensus -alleles " + validationDataLocation + "combined.phase1.chr20.raw.indels.sites.vcf -alleles " + validationDataLocation + "phase1.projectConsensus.chr20.raw.snps.vcf";
final WalkerTestSpec spec = new WalkerTestSpec(base + " " + args, Arrays.asList(md5));
executeTest("testHaplotypeCallerComplexConsensusMode: args=" + args, spec);
}
@Test
public void testHaplotypeCallerMultiSampleConsensusModeComplex() {
HCTestComplexGGA(NA12878_CHR20_BAM, "-L 20:119673-119823 -L 20:121408-121538 -L 20:133041-133161 -L 20:300207-300337",
"21e521d51b826450d348e5201684ffe4");
}
}