Merge pull request #707 from broadinstitute/eb_improve_physical_phasing

Updated the physical phasing in the Haplotype Caller to address requests...
This commit is contained in:
Eric Banks 2014-08-18 16:27:40 -04:00
commit 1af78f707e
4 changed files with 173 additions and 92 deletions

View File

@ -497,10 +497,11 @@ public class HaplotypeCaller extends ActiveRegionWalker<List<VariantContext>, In
protected boolean mergeVariantsViaLD = false; protected boolean mergeVariantsViaLD = false;
@Advanced @Advanced
@Argument(fullName="tryPhysicalPhasing", shortName="tryPhysicalPhasing", doc="If specified, we will add physical (read-based) phasing information", required = false) @Argument(fullName="doNotRunPhysicalPhasing", shortName="doNotRunPhysicalPhasing", doc="If specified, we will not try to add physical (read-based) phasing information", required = false)
protected boolean tryPhysicalPhasing = false; protected boolean doNotRunPhysicalPhasing = false;
public static final String HAPLOTYPE_CALLER_PHASING_KEY = "HCP"; public static final String HAPLOTYPE_CALLER_PHASING_ID_KEY = "PID";
public static final String HAPLOTYPE_CALLER_PHASING_GT_KEY = "PGT";
// ----------------------------------------------------------------------------------------------- // -----------------------------------------------------------------------------------------------
// arguments for debugging / developing the haplotype caller // arguments for debugging / developing the haplotype caller
@ -634,12 +635,11 @@ public class HaplotypeCaller extends ActiveRegionWalker<List<VariantContext>, In
if ( emitReferenceConfidence() ) { if ( emitReferenceConfidence() ) {
if (SCAC.genotypingOutputMode == GenotypingOutputMode.GENOTYPE_GIVEN_ALLELES) if (SCAC.genotypingOutputMode == GenotypingOutputMode.GENOTYPE_GIVEN_ALLELES)
throw new UserException.BadArgumentValue("ERC/gt_mode","you cannot request reference confidence output and Genotyping Giving Alleles at the same time"); throw new UserException.BadArgumentValue("ERC/gt_mode","you cannot request reference confidence output and GENOTYPE_GIVEN_ALLELES at the same time");
SCAC.genotypeArgs.STANDARD_CONFIDENCE_FOR_EMITTING = -0.0; SCAC.genotypeArgs.STANDARD_CONFIDENCE_FOR_EMITTING = -0.0;
SCAC.genotypeArgs.STANDARD_CONFIDENCE_FOR_CALLING = -0.0; SCAC.genotypeArgs.STANDARD_CONFIDENCE_FOR_CALLING = -0.0;
// also, we don't need to output several of the annotations // also, we don't need to output several of the annotations
annotationsToExclude.add("ChromosomeCounts"); annotationsToExclude.add("ChromosomeCounts");
annotationsToExclude.add("FisherStrand"); annotationsToExclude.add("FisherStrand");
@ -651,6 +651,9 @@ public class HaplotypeCaller extends ActiveRegionWalker<List<VariantContext>, In
if (!SCAC.annotateAllSitesWithPLs) if (!SCAC.annotateAllSitesWithPLs)
logger.info("All sites annotated with PLs forced to true for reference-model confidence output"); logger.info("All sites annotated with PLs forced to true for reference-model confidence output");
SCAC.annotateAllSitesWithPLs = true; SCAC.annotateAllSitesWithPLs = true;
} else if ( ! doNotRunPhysicalPhasing ) {
doNotRunPhysicalPhasing = true;
logger.info("Disabling physical phasing, which is supported only for reference-model confidence output");
} }
if ( SCAC.AFmodel == AFCalcFactory.Calculation.EXACT_GENERAL_PLOIDY ) if ( SCAC.AFmodel == AFCalcFactory.Calculation.EXACT_GENERAL_PLOIDY )
@ -678,7 +681,7 @@ public class HaplotypeCaller extends ActiveRegionWalker<List<VariantContext>, In
if( SCAC.genotypingOutputMode == GenotypingOutputMode.GENOTYPE_GIVEN_ALLELES && consensusMode ) if( SCAC.genotypingOutputMode == GenotypingOutputMode.GENOTYPE_GIVEN_ALLELES && consensusMode )
throw new UserException("HaplotypeCaller cannot be run in both GENOTYPE_GIVEN_ALLELES mode and in consensus mode. Please choose one or the other."); throw new UserException("HaplotypeCaller cannot be run in both GENOTYPE_GIVEN_ALLELES mode and in consensus mode. Please choose one or the other.");
genotypingEngine = new HaplotypeCallerGenotypingEngine( getToolkit(), SCAC, tryPhysicalPhasing); genotypingEngine = new HaplotypeCallerGenotypingEngine( getToolkit(), SCAC, !doNotRunPhysicalPhasing);
// initialize the output VCF header // initialize the output VCF header
final VariantAnnotatorEngine annotationEngine = new VariantAnnotatorEngine(Arrays.asList(annotationClassesToUse), annotationsToUse, annotationsToExclude, this, getToolkit()); final VariantAnnotatorEngine annotationEngine = new VariantAnnotatorEngine(Arrays.asList(annotationClassesToUse), annotationsToUse, annotationsToExclude, this, getToolkit());
@ -699,8 +702,10 @@ public class HaplotypeCaller extends ActiveRegionWalker<List<VariantContext>, In
VCFConstants.DEPTH_KEY, VCFConstants.DEPTH_KEY,
VCFConstants.GENOTYPE_PL_KEY); VCFConstants.GENOTYPE_PL_KEY);
if ( tryPhysicalPhasing ) if ( ! doNotRunPhysicalPhasing ) {
headerInfo.add(new VCFFormatHeaderLine(HAPLOTYPE_CALLER_PHASING_KEY, VCFHeaderLineCount.UNBOUNDED, VCFHeaderLineType.String, "Physical phasing information, each unique ID within a given sample (but not across samples) connects alternate alleles as occurring on the same haplotype")); headerInfo.add(new VCFFormatHeaderLine(HAPLOTYPE_CALLER_PHASING_ID_KEY, 1, VCFHeaderLineType.String, "Physical phasing ID information, where each unique ID within a given sample (but not across samples) connects records within a phasing group"));
headerInfo.add(new VCFFormatHeaderLine(HAPLOTYPE_CALLER_PHASING_GT_KEY, 1, VCFHeaderLineType.String, "Physical phasing haplotype information, describing how the alternate alleles are phased in relation to one another"));
}
// FILTER fields are added unconditionally as it's not always 100% certain the circumstances // FILTER fields are added unconditionally as it's not always 100% certain the circumstances
// where the filters are used. For example, in emitting all sites the lowQual field is used // where the filters are used. For example, in emitting all sites the lowQual field is used

View File

@ -57,6 +57,7 @@ import org.broadinstitute.gatk.tools.walkers.genotyper.OutputMode;
import org.broadinstitute.gatk.utils.GenomeLoc; import org.broadinstitute.gatk.utils.GenomeLoc;
import org.broadinstitute.gatk.utils.GenomeLocParser; import org.broadinstitute.gatk.utils.GenomeLocParser;
import org.broadinstitute.gatk.utils.Utils; import org.broadinstitute.gatk.utils.Utils;
import org.broadinstitute.gatk.utils.collections.Pair;
import org.broadinstitute.gatk.utils.genotyper.ReadLikelihoods; import org.broadinstitute.gatk.utils.genotyper.ReadLikelihoods;
import org.broadinstitute.gatk.utils.haplotype.EventMap; import org.broadinstitute.gatk.utils.haplotype.EventMap;
import org.broadinstitute.gatk.utils.haplotype.Haplotype; import org.broadinstitute.gatk.utils.haplotype.Haplotype;
@ -77,16 +78,16 @@ public class HaplotypeCallerGenotypingEngine extends GenotypingEngine<HaplotypeC
private MergeVariantsAcrossHaplotypes crossHaplotypeEventMerger; private MergeVariantsAcrossHaplotypes crossHaplotypeEventMerger;
private final boolean tryPhysicalPhasing; private final boolean doPhysicalPhasing;
/** /**
* {@inheritDoc} * {@inheritDoc}
* @param toolkit {@inheritDoc} * @param toolkit {@inheritDoc}
* @param configuration {@inheritDoc} * @param configuration {@inheritDoc}
*/ */
public HaplotypeCallerGenotypingEngine(final GenomeAnalysisEngine toolkit, final HaplotypeCallerArgumentCollection configuration, final boolean tryPhysicalPhasing) { public HaplotypeCallerGenotypingEngine(final GenomeAnalysisEngine toolkit, final HaplotypeCallerArgumentCollection configuration, final boolean doPhysicalPhasing) {
super(toolkit,configuration); super(toolkit,configuration);
this.tryPhysicalPhasing = tryPhysicalPhasing; this.doPhysicalPhasing = doPhysicalPhasing;
} }
/** /**
@ -97,7 +98,7 @@ public class HaplotypeCallerGenotypingEngine extends GenotypingEngine<HaplotypeC
*/ */
public HaplotypeCallerGenotypingEngine(final GenomeAnalysisEngine toolkit, final HaplotypeCallerArgumentCollection configuration, final Set<String> sampleNames) { public HaplotypeCallerGenotypingEngine(final GenomeAnalysisEngine toolkit, final HaplotypeCallerArgumentCollection configuration, final Set<String> sampleNames) {
super(toolkit,configuration,sampleNames); super(toolkit,configuration,sampleNames);
tryPhysicalPhasing = false; doPhysicalPhasing = true;
} }
@ -254,7 +255,7 @@ public class HaplotypeCallerGenotypingEngine extends GenotypingEngine<HaplotypeC
if (logger != null) logger.info("Genotyping event at " + loc + " with alleles = " + mergedVC.getAlleles()); if (logger != null) logger.info("Genotyping event at " + loc + " with alleles = " + mergedVC.getAlleles());
} }
ReadLikelihoods<Allele> readAlleleLikelihoods = readLikelihoods.marginalize(alleleMapper,genomeLocParser.createPaddedGenomeLoc(genomeLocParser.createGenomeLoc(mergedVC),ALLELE_EXTENSION)); ReadLikelihoods<Allele> readAlleleLikelihoods = readLikelihoods.marginalize(alleleMapper, genomeLocParser.createPaddedGenomeLoc(genomeLocParser.createGenomeLoc(mergedVC), ALLELE_EXTENSION));
if (configuration.isSampleContaminationPresent()) if (configuration.isSampleContaminationPresent())
readAlleleLikelihoods.contaminationDownsampling(configuration.getSampleContamination()); readAlleleLikelihoods.contaminationDownsampling(configuration.getSampleContamination());
@ -286,7 +287,7 @@ public class HaplotypeCallerGenotypingEngine extends GenotypingEngine<HaplotypeC
} }
} }
final List<VariantContext> phasedCalls = tryPhysicalPhasing ? phaseCalls(returnCalls, calledHaplotypes) : returnCalls; final List<VariantContext> phasedCalls = doPhysicalPhasing ? phaseCalls(returnCalls, calledHaplotypes) : returnCalls;
return new CalledHaplotypes(phasedCalls, calledHaplotypes); return new CalledHaplotypes(phasedCalls, calledHaplotypes);
} }
@ -303,8 +304,8 @@ public class HaplotypeCallerGenotypingEngine extends GenotypingEngine<HaplotypeC
final Map<VariantContext, Set<Haplotype>> haplotypeMap = constructHaplotypeMapping(calls, calledHaplotypes); final Map<VariantContext, Set<Haplotype>> haplotypeMap = constructHaplotypeMapping(calls, calledHaplotypes);
// construct a mapping from call to phase set ID // construct a mapping from call to phase set ID
final Map<VariantContext, Integer> phaseSetMapping = new HashMap<>(); final Map<VariantContext, Pair<Integer, String>> phaseSetMapping = new HashMap<>();
final int uniqueCounterEndValue = constructPhaseSetMapping(calls, haplotypeMap, phaseSetMapping); final int uniqueCounterEndValue = constructPhaseSetMapping(calls, haplotypeMap, calledHaplotypes.size() - 1, phaseSetMapping);
// we want to establish (potential) *groups* of phased variants, so we need to be smart when looking at pairwise phasing partners // we want to establish (potential) *groups* of phased variants, so we need to be smart when looking at pairwise phasing partners
return constructPhaseGroups(calls, phaseSetMapping, uniqueCounterEndValue); return constructPhaseGroups(calls, phaseSetMapping, uniqueCounterEndValue);
@ -349,17 +350,19 @@ public class HaplotypeCallerGenotypingEngine extends GenotypingEngine<HaplotypeC
* *
* @param originalCalls the original unphased calls * @param originalCalls the original unphased calls
* @param haplotypeMap mapping from alternate allele to the set of haplotypes that contain that allele * @param haplotypeMap mapping from alternate allele to the set of haplotypes that contain that allele
* @param totalAvailableHaplotypes the total number of possible haplotypes used in calling
* @param phaseSetMapping the map to populate in this method * @param phaseSetMapping the map to populate in this method
* @return the next incremental unique index * @return the next incremental unique index
*/ */
protected static int constructPhaseSetMapping(final List<VariantContext> originalCalls, protected static int constructPhaseSetMapping(final List<VariantContext> originalCalls,
final Map<VariantContext, Set<Haplotype>> haplotypeMap, final Map<VariantContext, Set<Haplotype>> haplotypeMap,
final Map<VariantContext, Integer> phaseSetMapping) { final int totalAvailableHaplotypes,
final Map<VariantContext, Pair<Integer, String>> phaseSetMapping) {
final int numCalls = originalCalls.size(); final int numCalls = originalCalls.size();
int uniqueCounter = 0; int uniqueCounter = 0;
// use the haplotype mapping to connect variants that are always present on the same haplotypes // use the haplotype mapping to connect variants that are always/never present on the same haplotypes
for ( int i = 0; i < numCalls - 1; i++ ) { for ( int i = 0; i < numCalls - 1; i++ ) {
final VariantContext call = originalCalls.get(i); final VariantContext call = originalCalls.get(i);
final Set<Haplotype> haplotypesWithCall = haplotypeMap.get(call); final Set<Haplotype> haplotypesWithCall = haplotypeMap.get(call);
@ -369,18 +372,46 @@ public class HaplotypeCallerGenotypingEngine extends GenotypingEngine<HaplotypeC
for ( int j = i+1; j < numCalls; j++ ) { for ( int j = i+1; j < numCalls; j++ ) {
final VariantContext comp = originalCalls.get(j); final VariantContext comp = originalCalls.get(j);
final Set<Haplotype> haplotypesWithComp = haplotypeMap.get(comp); final Set<Haplotype> haplotypesWithComp = haplotypeMap.get(comp);
if ( haplotypesWithComp.isEmpty() )
continue;
// if the variants are in phase, record that fact // if the variants are together on all haplotypes, record that fact.
if ( haplotypesWithCall.size() == haplotypesWithComp.size() && haplotypesWithCall.containsAll(haplotypesWithComp) ) { // another possibility is that one of the variants is on all possible haplotypes (i.e. it is homozygous).
// if it's part of an existing group, use that group's unique ID final boolean callIsOnAllHaps = haplotypesWithCall.size() == totalAvailableHaplotypes;
if ( phaseSetMapping.containsKey(call) ) { final boolean compIsOnAllHaps = haplotypesWithComp.size() == totalAvailableHaplotypes;
phaseSetMapping.put(comp, phaseSetMapping.get(call)); if ( (haplotypesWithCall.size() == haplotypesWithComp.size() && haplotypesWithCall.containsAll(haplotypesWithComp)) || callIsOnAllHaps || compIsOnAllHaps ) {
// otherwise, create a new group
} else { // create a new group if these are the first entries
phaseSetMapping.put(call, uniqueCounter); if ( ! phaseSetMapping.containsKey(call) ) {
phaseSetMapping.put(comp, uniqueCounter); phaseSetMapping.put(call, new Pair<>(uniqueCounter, callIsOnAllHaps ? "1|1" : "0|1"));
phaseSetMapping.put(comp, new Pair<>(uniqueCounter, compIsOnAllHaps ? "1|1" : "0|1"));
uniqueCounter++; uniqueCounter++;
} }
// otherwise it's part of an existing group so use that group's unique ID
else if ( ! phaseSetMapping.containsKey(comp) ) {
final Pair<Integer, String> callPhase = phaseSetMapping.get(call);
phaseSetMapping.put(comp, new Pair<>(callPhase.first, compIsOnAllHaps ? "1|1" : callPhase.second));
}
}
// if the variants are apart on *all* haplotypes, record that fact
else if ( haplotypesWithCall.size() + haplotypesWithComp.size() == totalAvailableHaplotypes ) {
final Set<Haplotype> intersection = new HashSet<>();
intersection.addAll(haplotypesWithCall);
intersection.retainAll(haplotypesWithComp);
if ( intersection.isEmpty() ) {
// create a new group if these are the first entries
if ( ! phaseSetMapping.containsKey(call) ) {
phaseSetMapping.put(call, new Pair<>(uniqueCounter, "0|1"));
phaseSetMapping.put(comp, new Pair<>(uniqueCounter, "1|0"));
uniqueCounter++;
}
// otherwise it's part of an existing group so use that group's unique ID
else if ( ! phaseSetMapping.containsKey(comp) ){
final Pair<Integer, String> callPhase = phaseSetMapping.get(call);
phaseSetMapping.put(comp, new Pair<>(callPhase.first, callPhase.second.equals("0|1") ? "1|0" : "0|1"));
}
}
} }
} }
} }
@ -397,7 +428,7 @@ public class HaplotypeCallerGenotypingEngine extends GenotypingEngine<HaplotypeC
* @return a non-null list which represents the possibly phased version of the calls * @return a non-null list which represents the possibly phased version of the calls
*/ */
protected static List<VariantContext> constructPhaseGroups(final List<VariantContext> originalCalls, protected static List<VariantContext> constructPhaseGroups(final List<VariantContext> originalCalls,
final Map<VariantContext, Integer> phaseSetMapping, final Map<VariantContext, Pair<Integer, String>> phaseSetMapping,
final int indexTo) { final int indexTo) {
final List<VariantContext> phasedCalls = new ArrayList<>(originalCalls); final List<VariantContext> phasedCalls = new ArrayList<>(originalCalls);
@ -407,7 +438,7 @@ public class HaplotypeCallerGenotypingEngine extends GenotypingEngine<HaplotypeC
final List<Integer> indexes = new ArrayList<>(); final List<Integer> indexes = new ArrayList<>();
for ( int index = 0; index < originalCalls.size(); index++ ) { for ( int index = 0; index < originalCalls.size(); index++ ) {
final VariantContext call = originalCalls.get(index); final VariantContext call = originalCalls.get(index);
if ( phaseSetMapping.containsKey(call) && phaseSetMapping.get(call) == count ) if ( phaseSetMapping.containsKey(call) && phaseSetMapping.get(call).first == count )
indexes.add(index); indexes.add(index);
} }
if ( indexes.size() < 2 ) if ( indexes.size() < 2 )
@ -418,7 +449,8 @@ public class HaplotypeCallerGenotypingEngine extends GenotypingEngine<HaplotypeC
// update the VCs // update the VCs
for ( final int index : indexes ) { for ( final int index : indexes ) {
final VariantContext phasedCall = phaseVC(originalCalls.get(index), uniqueID); final VariantContext originalCall = originalCalls.get(index);
final VariantContext phasedCall = phaseVC(originalCall, uniqueID, phaseSetMapping.get(originalCall).second);
phasedCalls.set(index, phasedCall); phasedCalls.set(index, phasedCall);
} }
} }
@ -452,12 +484,13 @@ public class HaplotypeCallerGenotypingEngine extends GenotypingEngine<HaplotypeC
* *
* @param vc the variant context * @param vc the variant context
* @param ID the ID to use * @param ID the ID to use
* @param phaseGT the phase GT string to use
* @return phased non-null variant context * @return phased non-null variant context
*/ */
private static VariantContext phaseVC(final VariantContext vc, final String ID) { private static VariantContext phaseVC(final VariantContext vc, final String ID, final String phaseGT) {
final List<Genotype> phasedGenotypes = new ArrayList<>(); final List<Genotype> phasedGenotypes = new ArrayList<>();
for ( final Genotype g : vc.getGenotypes() ) for ( final Genotype g : vc.getGenotypes() )
phasedGenotypes.add(new GenotypeBuilder(g).attribute(HaplotypeCaller.HAPLOTYPE_CALLER_PHASING_KEY, ID).make()); phasedGenotypes.add(new GenotypeBuilder(g).attribute(HaplotypeCaller.HAPLOTYPE_CALLER_PHASING_ID_KEY, ID).attribute(HaplotypeCaller.HAPLOTYPE_CALLER_PHASING_GT_KEY, phaseGT).make());
return new VariantContextBuilder(vc).genotypes(phasedGenotypes).make(); return new VariantContextBuilder(vc).genotypes(phasedGenotypes).make();
} }

View File

@ -68,11 +68,11 @@ public class HaplotypeCallerGVCFIntegrationTest extends WalkerTest {
// this functionality can be adapted to provide input data for whatever you might want in your data // this functionality can be adapted to provide input data for whatever you might want in your data
tests.add(new Object[]{NA12878_PCRFREE, ReferenceConfidenceMode.NONE, PCRFreeIntervals, "9db87ae56df22456f3c08024384f3e5e"}); tests.add(new Object[]{NA12878_PCRFREE, ReferenceConfidenceMode.NONE, PCRFreeIntervals, "9db87ae56df22456f3c08024384f3e5e"});
tests.add(new Object[]{NA12878_PCRFREE, ReferenceConfidenceMode.BP_RESOLUTION, PCRFreeIntervals, "4a006f9e39cacc3ae84384ad56ca22a2"}); tests.add(new Object[]{NA12878_PCRFREE, ReferenceConfidenceMode.BP_RESOLUTION, PCRFreeIntervals, "90dc8950e28e042097817d785022482e"});
tests.add(new Object[]{NA12878_PCRFREE, ReferenceConfidenceMode.GVCF, PCRFreeIntervals, "fe014592c9fa8d442508a96eb09e0c93"}); tests.add(new Object[]{NA12878_PCRFREE, ReferenceConfidenceMode.GVCF, PCRFreeIntervals, "a64e2d8e60a1be9cb0b4306c85e96393"});
tests.add(new Object[]{NA12878_WEx, ReferenceConfidenceMode.NONE, WExIntervals, "d5c07fa3edca496a84fd17cecad06230"}); tests.add(new Object[]{NA12878_WEx, ReferenceConfidenceMode.NONE, WExIntervals, "d5c07fa3edca496a84fd17cecad06230"});
tests.add(new Object[]{NA12878_WEx, ReferenceConfidenceMode.BP_RESOLUTION, WExIntervals, "330685c734e277d70a44637de85ad54d"}); tests.add(new Object[]{NA12878_WEx, ReferenceConfidenceMode.BP_RESOLUTION, WExIntervals, "66019a0914f905522da6bd3b557a57d1"});
tests.add(new Object[]{NA12878_WEx, ReferenceConfidenceMode.GVCF, WExIntervals, "b01a2c222a3f00a675f5534c3b449919"}); tests.add(new Object[]{NA12878_WEx, ReferenceConfidenceMode.GVCF, WExIntervals, "de763f6c9a5aec4586a2671941e4c96d"});
return tests.toArray(new Object[][]{}); return tests.toArray(new Object[][]{});
} }
@ -137,7 +137,7 @@ public class HaplotypeCallerGVCFIntegrationTest extends WalkerTest {
public void testWrongGVCFNonVariantRecordOrderBugFix() { public void testWrongGVCFNonVariantRecordOrderBugFix() {
final String commandLine = String.format("-T HaplotypeCaller --pcr_indel_model NONE -R %s -I %s -L %s -ERC GVCF --no_cmdline_in_header -variant_index_type %s -variant_index_parameter %d", final String commandLine = String.format("-T HaplotypeCaller --pcr_indel_model NONE -R %s -I %s -L %s -ERC GVCF --no_cmdline_in_header -variant_index_type %s -variant_index_parameter %d",
b37KGReference, WRONG_GVCF_RECORD_ORDER_BUGFIX_BAM, WRONG_GVCF_RECORD_ORDER_BUGFIX_INTERVALS, HaplotypeCaller.OPTIMAL_GVCF_INDEX_TYPE, HaplotypeCaller.OPTIMAL_GVCF_INDEX_PARAMETER); b37KGReference, WRONG_GVCF_RECORD_ORDER_BUGFIX_BAM, WRONG_GVCF_RECORD_ORDER_BUGFIX_INTERVALS, HaplotypeCaller.OPTIMAL_GVCF_INDEX_TYPE, HaplotypeCaller.OPTIMAL_GVCF_INDEX_PARAMETER);
final WalkerTestSpec spec = new WalkerTestSpec(commandLine + " -o %s", Arrays.asList("d595dae0e3f993047c22c6e520e190aa")); final WalkerTestSpec spec = new WalkerTestSpec(commandLine + " -o %s", Arrays.asList("e6a4e571abb59b925d59a38d244f0abe"));
spec.disableShadowBCF(); spec.disableShadowBCF();
executeTest("testMissingGVCFIndexingStrategyException", spec); executeTest("testMissingGVCFIndexingStrategyException", spec);
} }
@ -149,7 +149,7 @@ public class HaplotypeCallerGVCFIntegrationTest extends WalkerTest {
public void testNoCallGVCFMissingPLsBugFix() { public void testNoCallGVCFMissingPLsBugFix() {
final String commandLine = String.format("-T HaplotypeCaller --pcr_indel_model NONE -R %s -I %s -L %s -ERC GVCF --no_cmdline_in_header -variant_index_type %s -variant_index_parameter %d", final String commandLine = String.format("-T HaplotypeCaller --pcr_indel_model NONE -R %s -I %s -L %s -ERC GVCF --no_cmdline_in_header -variant_index_type %s -variant_index_parameter %d",
b37KGReference, NOCALL_GVCF_BUGFIX_BAM, NOCALL_GVCF_BUGFIX_INTERVALS, HaplotypeCaller.OPTIMAL_GVCF_INDEX_TYPE, HaplotypeCaller.OPTIMAL_GVCF_INDEX_PARAMETER); b37KGReference, NOCALL_GVCF_BUGFIX_BAM, NOCALL_GVCF_BUGFIX_INTERVALS, HaplotypeCaller.OPTIMAL_GVCF_INDEX_TYPE, HaplotypeCaller.OPTIMAL_GVCF_INDEX_PARAMETER);
final WalkerTestSpec spec = new WalkerTestSpec(commandLine + " -o %s", Arrays.asList("2a91768430a84dbae8d40242dee8c9b8")); final WalkerTestSpec spec = new WalkerTestSpec(commandLine + " -o %s", Arrays.asList("187e43b1034478edced1eb5c664bac34"));
spec.disableShadowBCF(); spec.disableShadowBCF();
executeTest("testNoCallGVCFMissingPLsBugFix", spec); executeTest("testNoCallGVCFMissingPLsBugFix", spec);
} }

View File

@ -56,6 +56,7 @@ import htsjdk.samtools.reference.ReferenceSequenceFile;
import htsjdk.variant.variantcontext.*; import htsjdk.variant.variantcontext.*;
import org.broadinstitute.gatk.utils.BaseTest; import org.broadinstitute.gatk.utils.BaseTest;
import org.broadinstitute.gatk.utils.*; import org.broadinstitute.gatk.utils.*;
import org.broadinstitute.gatk.utils.collections.Pair;
import org.broadinstitute.gatk.utils.fasta.CachingIndexedFastaSequenceFile; import org.broadinstitute.gatk.utils.fasta.CachingIndexedFastaSequenceFile;
import org.broadinstitute.gatk.utils.haplotype.EventMap; import org.broadinstitute.gatk.utils.haplotype.EventMap;
import org.broadinstitute.gatk.utils.haplotype.Haplotype; import org.broadinstitute.gatk.utils.haplotype.Haplotype;
@ -416,11 +417,15 @@ public class HaplotypeCallerGenotypingEngineUnitTest extends BaseTest {
final Allele altC = Allele.create("C", false); final Allele altC = Allele.create("C", false);
final Allele altT = Allele.create("T", false); final Allele altT = Allele.create("T", false);
final VariantContext vc1 = new VariantContextBuilder().chr("20").start(1).stop(1).alleles(Arrays.asList(ref, altC)).make();
final VariantContext vc2 = new VariantContextBuilder().chr("20").start(2).stop(2).alleles(Arrays.asList(ref, altC)).make(); final VariantContext vc2 = new VariantContextBuilder().chr("20").start(2).stop(2).alleles(Arrays.asList(ref, altC)).make();
final VariantContext vc3 = new VariantContextBuilder().chr("20").start(3).stop(3).alleles(Arrays.asList(ref, altT)).make(); final VariantContext vc3 = new VariantContextBuilder().chr("20").start(3).stop(3).alleles(Arrays.asList(ref, altT)).make();
final VariantContext vc4 = new VariantContextBuilder().chr("20").start(4).stop(4).alleles(Arrays.asList(ref, altC)).make(); final VariantContext vc4 = new VariantContextBuilder().chr("20").start(4).stop(4).alleles(Arrays.asList(ref, altC)).make();
final List<VariantContext> calls = Arrays.asList(vc2, vc3, vc4); final List<VariantContext> calls = Arrays.asList(vc2, vc3, vc4);
final Haplotype pos1 = new Haplotype("CAAAA".getBytes());
pos1.setEventMap(new EventMap(Arrays.asList(vc1)));
pos1.getEventMap().put(1, vc1);
final Haplotype pos2 = new Haplotype("ACAAA".getBytes()); final Haplotype pos2 = new Haplotype("ACAAA".getBytes());
pos2.setEventMap(new EventMap(Arrays.asList(vc2))); pos2.setEventMap(new EventMap(Arrays.asList(vc2)));
pos2.getEventMap().put(2, vc2); pos2.getEventMap().put(2, vc2);
@ -440,44 +445,69 @@ public class HaplotypeCallerGenotypingEngineUnitTest extends BaseTest {
pos234.getEventMap().put(3, vc3); pos234.getEventMap().put(3, vc3);
pos234.getEventMap().put(4, vc4); pos234.getEventMap().put(4, vc4);
// test no phased variants final Map<VariantContext, Set<Haplotype>> haplotypeMap = new HashMap<>();
final Set<Haplotype> haplotypes2NoPhase = new HashSet<>();
haplotypes2NoPhase.add(pos2);
haplotypes2NoPhase.add(pos24);
final Set<Haplotype> haplotypes3NoPhase = new HashSet<>();
haplotypes3NoPhase.add(pos3);
final Set<Haplotype> haplotypes4NoPhase = new HashSet<>();
haplotypes4NoPhase.add(pos4);
haplotypes4NoPhase.add(pos24);
final Map<VariantContext, Set<Haplotype>> haplotypeMapNoPhase = new HashMap<>();
haplotypeMapNoPhase.put(vc2, haplotypes2NoPhase);
haplotypeMapNoPhase.put(vc3, haplotypes3NoPhase);
haplotypeMapNoPhase.put(vc4, haplotypes4NoPhase);
tests.add(new Object[]{calls, haplotypeMapNoPhase, 0, 0});
// test 2 phased variants // test no phased variants #1
final Set<Haplotype> haplotypes2SomePhase = new HashSet<>(); final Set<Haplotype> haplotypes2 = new HashSet<>();
haplotypes2SomePhase.add(pos24); haplotypes2.add(pos2);
final Set<Haplotype> haplotypes4SomePhase = new HashSet<>(); haplotypeMap.put(vc2, haplotypes2);
haplotypes4SomePhase.add(pos24); tests.add(new Object[]{Arrays.asList(vc2), new HashMap<>(haplotypeMap), 2, 0, 0, 0, 0});
final Map<VariantContext, Set<Haplotype>> haplotypeMapSomePhase = new HashMap<>();
haplotypeMapSomePhase.put(vc2, haplotypes2SomePhase);
haplotypeMapSomePhase.put(vc3, haplotypes3NoPhase);
haplotypeMapSomePhase.put(vc4, haplotypes4SomePhase);
tests.add(new Object[]{calls, haplotypeMapSomePhase, 2, 1});
// test all phased variants // test no phased variants #2
final Set<Haplotype> haplotypes2AllPhase = new HashSet<>(); final Set<Haplotype> haplotypes3 = new HashSet<>();
haplotypes2AllPhase.add(pos234); haplotypes3.add(pos3);
final Set<Haplotype> haplotypes3AllPhase = new HashSet<>(); haplotypeMap.put(vc3, haplotypes3);
haplotypes3AllPhase.add(pos234); tests.add(new Object[]{Arrays.asList(vc2, vc3), new HashMap<>(haplotypeMap), 3, 0, 0, 0, 0});
final Set<Haplotype> haplotypes4AllPhase = new HashSet<>();
haplotypes4AllPhase.add(pos234); // test opposite phase
final Map<VariantContext, Set<Haplotype>> haplotypeMapAllPhase = new HashMap<>(); tests.add(new Object[]{Arrays.asList(vc2, vc3), new HashMap<>(haplotypeMap), 2, 2, 1, 1, 1});
haplotypeMapAllPhase.put(vc2, haplotypes2AllPhase);
haplotypeMapAllPhase.put(vc3, haplotypes3AllPhase); // test no phased variants #3
haplotypeMapAllPhase.put(vc4, haplotypes4AllPhase); final Set<Haplotype> haplotypes4 = new HashSet<>();
tests.add(new Object[]{calls, haplotypeMapAllPhase, 3, 1}); haplotypes4.add(pos4);
haplotypeMap.put(vc4, haplotypes4);
tests.add(new Object[]{calls, new HashMap<>(haplotypeMap), 3, 0, 0, 0, 0});
// test mixture
final Set<Haplotype> haplotypes24 = new HashSet<>();
haplotypes24.add(pos24);
haplotypeMap.put(vc2, haplotypes24);
haplotypeMap.put(vc4, haplotypes24);
tests.add(new Object[]{calls, new HashMap<>(haplotypeMap), 2, 3, 1, 2, 1});
// test 2 with opposite phase
final Set<Haplotype> haplotypes1 = new HashSet<>();
haplotypes1.add(pos1);
haplotypeMap.put(vc1, haplotypes1);
haplotypeMap.remove(vc3);
tests.add(new Object[]{Arrays.asList(vc1, vc2, vc4), new HashMap<>(haplotypeMap), 2, 3, 1, 1, 2});
// test homs around a het
final Set<Haplotype> haplotypes2hom = new HashSet<>();
haplotypes2hom.add(pos24);
haplotypes2hom.add(pos234);
final Set<Haplotype> haplotypes4hom = new HashSet<>();
haplotypes4hom.add(pos24);
haplotypes4hom.add(pos234);
final Set<Haplotype> haplotypes3het = new HashSet<>();
haplotypes3het.add(pos234);
haplotypeMap.put(vc2, haplotypes2hom);
haplotypeMap.put(vc3, haplotypes3het);
haplotypeMap.put(vc4, haplotypes4hom);
tests.add(new Object[]{calls, new HashMap<>(haplotypeMap), 2, 3, 1, 1, 0});
// test hets around a hom
final Set<Haplotype> haplotypes2het = new HashSet<>();
haplotypes2het.add(pos234);
final Set<Haplotype> haplotypes4het = new HashSet<>();
haplotypes4het.add(pos234);
final Set<Haplotype> haplotypes3hom = new HashSet<>();
haplotypes3hom.add(pos3);
haplotypes3hom.add(pos234);
haplotypeMap.put(vc2, haplotypes2het);
haplotypeMap.put(vc3, haplotypes3hom);
haplotypeMap.put(vc4, haplotypes4het);
tests.add(new Object[]{calls, new HashMap<>(haplotypeMap), 2, 3, 1, 2, 0});
return tests.toArray(new Object[][]{}); return tests.toArray(new Object[][]{});
} }
@ -485,12 +515,25 @@ public class HaplotypeCallerGenotypingEngineUnitTest extends BaseTest {
@Test(dataProvider="ConstructPhaseSetMappingProvider") @Test(dataProvider="ConstructPhaseSetMappingProvider")
public void testConstructPhaseSetMapping(final List<VariantContext> calls, public void testConstructPhaseSetMapping(final List<VariantContext> calls,
final Map<VariantContext, Set<Haplotype>> haplotypeMap, final Map<VariantContext, Set<Haplotype>> haplotypeMap,
final int totalHaplotypes,
final int expectedMapSize, final int expectedMapSize,
final int expectedNumGroups) { final int expectedNumGroups,
final Map<VariantContext, Integer> actualPhaseSetMapping = new HashMap<>(); final int expectedNum01,
final int actualNumGroups = HaplotypeCallerGenotypingEngine.constructPhaseSetMapping(calls, haplotypeMap, actualPhaseSetMapping); final int expectedNum10) {
final Map<VariantContext, Pair<Integer, String>> actualPhaseSetMapping = new HashMap<>();
final int actualNumGroups = HaplotypeCallerGenotypingEngine.constructPhaseSetMapping(calls, haplotypeMap, totalHaplotypes, actualPhaseSetMapping);
Assert.assertEquals(actualNumGroups, expectedNumGroups); Assert.assertEquals(actualNumGroups, expectedNumGroups);
Assert.assertEquals(actualPhaseSetMapping.size(), expectedMapSize); Assert.assertEquals(actualPhaseSetMapping.size(), expectedMapSize);
int num01 = 0, num10 = 0;
for ( final Pair<Integer, String> phase : actualPhaseSetMapping.values() ) {
if ( phase.second.equals("0|1") )
num01++;
else if ( phase.second.equals("1|0") )
num10++;
}
Assert.assertEquals(num01, expectedNum01);
Assert.assertEquals(num10, expectedNum10);
} }
@DataProvider(name = "ConstructPhaseGroupsProvider") @DataProvider(name = "ConstructPhaseGroupsProvider")
@ -509,27 +552,27 @@ public class HaplotypeCallerGenotypingEngineUnitTest extends BaseTest {
final List<VariantContext> calls = Arrays.asList(vc1, vc2, vc3); final List<VariantContext> calls = Arrays.asList(vc1, vc2, vc3);
// test no phased variants, empty map // test no phased variants, empty map
final Map<VariantContext, Integer> nonePhased1 = new HashMap<>(); final Map<VariantContext, Pair<Integer, String>> nonePhased1 = new HashMap<>();
tests.add(new Object[]{calls, nonePhased1, 0, 0, 0}); tests.add(new Object[]{calls, nonePhased1, 0, 0, 0});
// test no phased variants, full map, exception expected // test no phased variants, full map, exception expected
final Map<VariantContext, Integer> nonePhased2 = new HashMap<>(); final Map<VariantContext, Pair<Integer, String>> nonePhased2 = new HashMap<>();
nonePhased2.put(vc1, 0); nonePhased2.put(vc1, new Pair<>(0, "0/1"));
nonePhased2.put(vc2, 1); nonePhased2.put(vc2, new Pair<>(1, "0/1"));
nonePhased2.put(vc3, 2); nonePhased2.put(vc3, new Pair<>(2, "0/1"));
tests.add(new Object[]{calls, nonePhased2, 3, -1, -1}); tests.add(new Object[]{calls, nonePhased2, 3, -1, -1});
// test 2 phased variants // test 2 phased variants
final Map<VariantContext, Integer> twoPhased = new HashMap<>(); final Map<VariantContext, Pair<Integer, String>> twoPhased = new HashMap<>();
twoPhased.put(vc1, 0); twoPhased.put(vc1, new Pair<>(0, "0/1"));
twoPhased.put(vc2, 0); twoPhased.put(vc2, new Pair<>(0, "0/1"));
tests.add(new Object[]{calls, twoPhased, 1, 1, 2}); tests.add(new Object[]{calls, twoPhased, 1, 1, 2});
// test all phased variants // test all phased variants
final Map<VariantContext, Integer> allPhased = new HashMap<>(); final Map<VariantContext, Pair<Integer, String>> allPhased = new HashMap<>();
allPhased.put(vc1, 0); allPhased.put(vc1, new Pair<>(0, "0/1"));
allPhased.put(vc2, 0); allPhased.put(vc2, new Pair<>(0, "0/1"));
allPhased.put(vc3, 0); allPhased.put(vc3, new Pair<>(0, "0/1"));
tests.add(new Object[]{calls, allPhased, 1, 1, 3}); tests.add(new Object[]{calls, allPhased, 1, 1, 3});
return tests.toArray(new Object[][]{}); return tests.toArray(new Object[][]{});
@ -537,7 +580,7 @@ public class HaplotypeCallerGenotypingEngineUnitTest extends BaseTest {
@Test(dataProvider="ConstructPhaseGroupsProvider") @Test(dataProvider="ConstructPhaseGroupsProvider")
public void testConstructPhaseGroups(final List<VariantContext> calls, public void testConstructPhaseGroups(final List<VariantContext> calls,
final Map<VariantContext, Integer> phaseMap, final Map<VariantContext, Pair<Integer, String>> phaseMap,
final int endIndex, final int endIndex,
final int expectedNumGroups, final int expectedNumGroups,
final int expectedGroupSize) { final int expectedGroupSize) {
@ -553,8 +596,8 @@ public class HaplotypeCallerGenotypingEngineUnitTest extends BaseTest {
int counter = 0; int counter = 0;
for ( final VariantContext call : actualPhasedCalls ) { for ( final VariantContext call : actualPhasedCalls ) {
for ( final Genotype g : call.getGenotypes() ) { for ( final Genotype g : call.getGenotypes() ) {
if ( g.hasExtendedAttribute(HaplotypeCaller.HAPLOTYPE_CALLER_PHASING_KEY) ) { if ( g.hasExtendedAttribute(HaplotypeCaller.HAPLOTYPE_CALLER_PHASING_ID_KEY) ) {
uniqueGroups.add(g.getExtendedAttribute(HaplotypeCaller.HAPLOTYPE_CALLER_PHASING_KEY).toString()); uniqueGroups.add(g.getExtendedAttribute(HaplotypeCaller.HAPLOTYPE_CALLER_PHASING_ID_KEY).toString());
counter++; counter++;
} }
} }