Merge pull request #707 from broadinstitute/eb_improve_physical_phasing

Updated the physical phasing in the Haplotype Caller to address requests...
This commit is contained in:
Eric Banks 2014-08-18 16:27:40 -04:00
commit 1af78f707e
4 changed files with 173 additions and 92 deletions

View File

@ -497,10 +497,11 @@ public class HaplotypeCaller extends ActiveRegionWalker<List<VariantContext>, In
protected boolean mergeVariantsViaLD = false;
@Advanced
@Argument(fullName="tryPhysicalPhasing", shortName="tryPhysicalPhasing", doc="If specified, we will add physical (read-based) phasing information", required = false)
protected boolean tryPhysicalPhasing = false;
@Argument(fullName="doNotRunPhysicalPhasing", shortName="doNotRunPhysicalPhasing", doc="If specified, we will not try to add physical (read-based) phasing information", required = false)
protected boolean doNotRunPhysicalPhasing = false;
public static final String HAPLOTYPE_CALLER_PHASING_KEY = "HCP";
public static final String HAPLOTYPE_CALLER_PHASING_ID_KEY = "PID";
public static final String HAPLOTYPE_CALLER_PHASING_GT_KEY = "PGT";
// -----------------------------------------------------------------------------------------------
// arguments for debugging / developing the haplotype caller
@ -634,12 +635,11 @@ public class HaplotypeCaller extends ActiveRegionWalker<List<VariantContext>, In
if ( emitReferenceConfidence() ) {
if (SCAC.genotypingOutputMode == GenotypingOutputMode.GENOTYPE_GIVEN_ALLELES)
throw new UserException.BadArgumentValue("ERC/gt_mode","you cannot request reference confidence output and Genotyping Giving Alleles at the same time");
throw new UserException.BadArgumentValue("ERC/gt_mode","you cannot request reference confidence output and GENOTYPE_GIVEN_ALLELES at the same time");
SCAC.genotypeArgs.STANDARD_CONFIDENCE_FOR_EMITTING = -0.0;
SCAC.genotypeArgs.STANDARD_CONFIDENCE_FOR_CALLING = -0.0;
// also, we don't need to output several of the annotations
annotationsToExclude.add("ChromosomeCounts");
annotationsToExclude.add("FisherStrand");
@ -651,6 +651,9 @@ public class HaplotypeCaller extends ActiveRegionWalker<List<VariantContext>, In
if (!SCAC.annotateAllSitesWithPLs)
logger.info("All sites annotated with PLs forced to true for reference-model confidence output");
SCAC.annotateAllSitesWithPLs = true;
} else if ( ! doNotRunPhysicalPhasing ) {
doNotRunPhysicalPhasing = true;
logger.info("Disabling physical phasing, which is supported only for reference-model confidence output");
}
if ( SCAC.AFmodel == AFCalcFactory.Calculation.EXACT_GENERAL_PLOIDY )
@ -678,7 +681,7 @@ public class HaplotypeCaller extends ActiveRegionWalker<List<VariantContext>, In
if( SCAC.genotypingOutputMode == GenotypingOutputMode.GENOTYPE_GIVEN_ALLELES && consensusMode )
throw new UserException("HaplotypeCaller cannot be run in both GENOTYPE_GIVEN_ALLELES mode and in consensus mode. Please choose one or the other.");
genotypingEngine = new HaplotypeCallerGenotypingEngine( getToolkit(), SCAC, tryPhysicalPhasing);
genotypingEngine = new HaplotypeCallerGenotypingEngine( getToolkit(), SCAC, !doNotRunPhysicalPhasing);
// initialize the output VCF header
final VariantAnnotatorEngine annotationEngine = new VariantAnnotatorEngine(Arrays.asList(annotationClassesToUse), annotationsToUse, annotationsToExclude, this, getToolkit());
@ -699,8 +702,10 @@ public class HaplotypeCaller extends ActiveRegionWalker<List<VariantContext>, In
VCFConstants.DEPTH_KEY,
VCFConstants.GENOTYPE_PL_KEY);
if ( tryPhysicalPhasing )
headerInfo.add(new VCFFormatHeaderLine(HAPLOTYPE_CALLER_PHASING_KEY, VCFHeaderLineCount.UNBOUNDED, VCFHeaderLineType.String, "Physical phasing information, each unique ID within a given sample (but not across samples) connects alternate alleles as occurring on the same haplotype"));
if ( ! doNotRunPhysicalPhasing ) {
headerInfo.add(new VCFFormatHeaderLine(HAPLOTYPE_CALLER_PHASING_ID_KEY, 1, VCFHeaderLineType.String, "Physical phasing ID information, where each unique ID within a given sample (but not across samples) connects records within a phasing group"));
headerInfo.add(new VCFFormatHeaderLine(HAPLOTYPE_CALLER_PHASING_GT_KEY, 1, VCFHeaderLineType.String, "Physical phasing haplotype information, describing how the alternate alleles are phased in relation to one another"));
}
// FILTER fields are added unconditionally as it's not always 100% certain the circumstances
// where the filters are used. For example, in emitting all sites the lowQual field is used

View File

@ -57,6 +57,7 @@ import org.broadinstitute.gatk.tools.walkers.genotyper.OutputMode;
import org.broadinstitute.gatk.utils.GenomeLoc;
import org.broadinstitute.gatk.utils.GenomeLocParser;
import org.broadinstitute.gatk.utils.Utils;
import org.broadinstitute.gatk.utils.collections.Pair;
import org.broadinstitute.gatk.utils.genotyper.ReadLikelihoods;
import org.broadinstitute.gatk.utils.haplotype.EventMap;
import org.broadinstitute.gatk.utils.haplotype.Haplotype;
@ -77,16 +78,16 @@ public class HaplotypeCallerGenotypingEngine extends GenotypingEngine<HaplotypeC
private MergeVariantsAcrossHaplotypes crossHaplotypeEventMerger;
private final boolean tryPhysicalPhasing;
private final boolean doPhysicalPhasing;
/**
* {@inheritDoc}
* @param toolkit {@inheritDoc}
* @param configuration {@inheritDoc}
*/
public HaplotypeCallerGenotypingEngine(final GenomeAnalysisEngine toolkit, final HaplotypeCallerArgumentCollection configuration, final boolean tryPhysicalPhasing) {
public HaplotypeCallerGenotypingEngine(final GenomeAnalysisEngine toolkit, final HaplotypeCallerArgumentCollection configuration, final boolean doPhysicalPhasing) {
super(toolkit,configuration);
this.tryPhysicalPhasing = tryPhysicalPhasing;
this.doPhysicalPhasing = doPhysicalPhasing;
}
/**
@ -97,7 +98,7 @@ public class HaplotypeCallerGenotypingEngine extends GenotypingEngine<HaplotypeC
*/
public HaplotypeCallerGenotypingEngine(final GenomeAnalysisEngine toolkit, final HaplotypeCallerArgumentCollection configuration, final Set<String> sampleNames) {
super(toolkit,configuration,sampleNames);
tryPhysicalPhasing = false;
doPhysicalPhasing = true;
}
@ -254,7 +255,7 @@ public class HaplotypeCallerGenotypingEngine extends GenotypingEngine<HaplotypeC
if (logger != null) logger.info("Genotyping event at " + loc + " with alleles = " + mergedVC.getAlleles());
}
ReadLikelihoods<Allele> readAlleleLikelihoods = readLikelihoods.marginalize(alleleMapper,genomeLocParser.createPaddedGenomeLoc(genomeLocParser.createGenomeLoc(mergedVC),ALLELE_EXTENSION));
ReadLikelihoods<Allele> readAlleleLikelihoods = readLikelihoods.marginalize(alleleMapper, genomeLocParser.createPaddedGenomeLoc(genomeLocParser.createGenomeLoc(mergedVC), ALLELE_EXTENSION));
if (configuration.isSampleContaminationPresent())
readAlleleLikelihoods.contaminationDownsampling(configuration.getSampleContamination());
@ -286,7 +287,7 @@ public class HaplotypeCallerGenotypingEngine extends GenotypingEngine<HaplotypeC
}
}
final List<VariantContext> phasedCalls = tryPhysicalPhasing ? phaseCalls(returnCalls, calledHaplotypes) : returnCalls;
final List<VariantContext> phasedCalls = doPhysicalPhasing ? phaseCalls(returnCalls, calledHaplotypes) : returnCalls;
return new CalledHaplotypes(phasedCalls, calledHaplotypes);
}
@ -303,8 +304,8 @@ public class HaplotypeCallerGenotypingEngine extends GenotypingEngine<HaplotypeC
final Map<VariantContext, Set<Haplotype>> haplotypeMap = constructHaplotypeMapping(calls, calledHaplotypes);
// construct a mapping from call to phase set ID
final Map<VariantContext, Integer> phaseSetMapping = new HashMap<>();
final int uniqueCounterEndValue = constructPhaseSetMapping(calls, haplotypeMap, phaseSetMapping);
final Map<VariantContext, Pair<Integer, String>> phaseSetMapping = new HashMap<>();
final int uniqueCounterEndValue = constructPhaseSetMapping(calls, haplotypeMap, calledHaplotypes.size() - 1, phaseSetMapping);
// we want to establish (potential) *groups* of phased variants, so we need to be smart when looking at pairwise phasing partners
return constructPhaseGroups(calls, phaseSetMapping, uniqueCounterEndValue);
@ -349,17 +350,19 @@ public class HaplotypeCallerGenotypingEngine extends GenotypingEngine<HaplotypeC
*
* @param originalCalls the original unphased calls
* @param haplotypeMap mapping from alternate allele to the set of haplotypes that contain that allele
* @param totalAvailableHaplotypes the total number of possible haplotypes used in calling
* @param phaseSetMapping the map to populate in this method
* @return the next incremental unique index
*/
protected static int constructPhaseSetMapping(final List<VariantContext> originalCalls,
final Map<VariantContext, Set<Haplotype>> haplotypeMap,
final Map<VariantContext, Integer> phaseSetMapping) {
final int totalAvailableHaplotypes,
final Map<VariantContext, Pair<Integer, String>> phaseSetMapping) {
final int numCalls = originalCalls.size();
int uniqueCounter = 0;
// use the haplotype mapping to connect variants that are always present on the same haplotypes
// use the haplotype mapping to connect variants that are always/never present on the same haplotypes
for ( int i = 0; i < numCalls - 1; i++ ) {
final VariantContext call = originalCalls.get(i);
final Set<Haplotype> haplotypesWithCall = haplotypeMap.get(call);
@ -369,18 +372,46 @@ public class HaplotypeCallerGenotypingEngine extends GenotypingEngine<HaplotypeC
for ( int j = i+1; j < numCalls; j++ ) {
final VariantContext comp = originalCalls.get(j);
final Set<Haplotype> haplotypesWithComp = haplotypeMap.get(comp);
if ( haplotypesWithComp.isEmpty() )
continue;
// if the variants are in phase, record that fact
if ( haplotypesWithCall.size() == haplotypesWithComp.size() && haplotypesWithCall.containsAll(haplotypesWithComp) ) {
// if it's part of an existing group, use that group's unique ID
if ( phaseSetMapping.containsKey(call) ) {
phaseSetMapping.put(comp, phaseSetMapping.get(call));
// otherwise, create a new group
} else {
phaseSetMapping.put(call, uniqueCounter);
phaseSetMapping.put(comp, uniqueCounter);
// if the variants are together on all haplotypes, record that fact.
// another possibility is that one of the variants is on all possible haplotypes (i.e. it is homozygous).
final boolean callIsOnAllHaps = haplotypesWithCall.size() == totalAvailableHaplotypes;
final boolean compIsOnAllHaps = haplotypesWithComp.size() == totalAvailableHaplotypes;
if ( (haplotypesWithCall.size() == haplotypesWithComp.size() && haplotypesWithCall.containsAll(haplotypesWithComp)) || callIsOnAllHaps || compIsOnAllHaps ) {
// create a new group if these are the first entries
if ( ! phaseSetMapping.containsKey(call) ) {
phaseSetMapping.put(call, new Pair<>(uniqueCounter, callIsOnAllHaps ? "1|1" : "0|1"));
phaseSetMapping.put(comp, new Pair<>(uniqueCounter, compIsOnAllHaps ? "1|1" : "0|1"));
uniqueCounter++;
}
// otherwise it's part of an existing group so use that group's unique ID
else if ( ! phaseSetMapping.containsKey(comp) ) {
final Pair<Integer, String> callPhase = phaseSetMapping.get(call);
phaseSetMapping.put(comp, new Pair<>(callPhase.first, compIsOnAllHaps ? "1|1" : callPhase.second));
}
}
// if the variants are apart on *all* haplotypes, record that fact
else if ( haplotypesWithCall.size() + haplotypesWithComp.size() == totalAvailableHaplotypes ) {
final Set<Haplotype> intersection = new HashSet<>();
intersection.addAll(haplotypesWithCall);
intersection.retainAll(haplotypesWithComp);
if ( intersection.isEmpty() ) {
// create a new group if these are the first entries
if ( ! phaseSetMapping.containsKey(call) ) {
phaseSetMapping.put(call, new Pair<>(uniqueCounter, "0|1"));
phaseSetMapping.put(comp, new Pair<>(uniqueCounter, "1|0"));
uniqueCounter++;
}
// otherwise it's part of an existing group so use that group's unique ID
else if ( ! phaseSetMapping.containsKey(comp) ){
final Pair<Integer, String> callPhase = phaseSetMapping.get(call);
phaseSetMapping.put(comp, new Pair<>(callPhase.first, callPhase.second.equals("0|1") ? "1|0" : "0|1"));
}
}
}
}
}
@ -397,7 +428,7 @@ public class HaplotypeCallerGenotypingEngine extends GenotypingEngine<HaplotypeC
* @return a non-null list which represents the possibly phased version of the calls
*/
protected static List<VariantContext> constructPhaseGroups(final List<VariantContext> originalCalls,
final Map<VariantContext, Integer> phaseSetMapping,
final Map<VariantContext, Pair<Integer, String>> phaseSetMapping,
final int indexTo) {
final List<VariantContext> phasedCalls = new ArrayList<>(originalCalls);
@ -407,7 +438,7 @@ public class HaplotypeCallerGenotypingEngine extends GenotypingEngine<HaplotypeC
final List<Integer> indexes = new ArrayList<>();
for ( int index = 0; index < originalCalls.size(); index++ ) {
final VariantContext call = originalCalls.get(index);
if ( phaseSetMapping.containsKey(call) && phaseSetMapping.get(call) == count )
if ( phaseSetMapping.containsKey(call) && phaseSetMapping.get(call).first == count )
indexes.add(index);
}
if ( indexes.size() < 2 )
@ -418,7 +449,8 @@ public class HaplotypeCallerGenotypingEngine extends GenotypingEngine<HaplotypeC
// update the VCs
for ( final int index : indexes ) {
final VariantContext phasedCall = phaseVC(originalCalls.get(index), uniqueID);
final VariantContext originalCall = originalCalls.get(index);
final VariantContext phasedCall = phaseVC(originalCall, uniqueID, phaseSetMapping.get(originalCall).second);
phasedCalls.set(index, phasedCall);
}
}
@ -452,12 +484,13 @@ public class HaplotypeCallerGenotypingEngine extends GenotypingEngine<HaplotypeC
*
* @param vc the variant context
* @param ID the ID to use
* @param phaseGT the phase GT string to use
* @return phased non-null variant context
*/
private static VariantContext phaseVC(final VariantContext vc, final String ID) {
private static VariantContext phaseVC(final VariantContext vc, final String ID, final String phaseGT) {
final List<Genotype> phasedGenotypes = new ArrayList<>();
for ( final Genotype g : vc.getGenotypes() )
phasedGenotypes.add(new GenotypeBuilder(g).attribute(HaplotypeCaller.HAPLOTYPE_CALLER_PHASING_KEY, ID).make());
phasedGenotypes.add(new GenotypeBuilder(g).attribute(HaplotypeCaller.HAPLOTYPE_CALLER_PHASING_ID_KEY, ID).attribute(HaplotypeCaller.HAPLOTYPE_CALLER_PHASING_GT_KEY, phaseGT).make());
return new VariantContextBuilder(vc).genotypes(phasedGenotypes).make();
}

View File

@ -68,11 +68,11 @@ public class HaplotypeCallerGVCFIntegrationTest extends WalkerTest {
// this functionality can be adapted to provide input data for whatever you might want in your data
tests.add(new Object[]{NA12878_PCRFREE, ReferenceConfidenceMode.NONE, PCRFreeIntervals, "9db87ae56df22456f3c08024384f3e5e"});
tests.add(new Object[]{NA12878_PCRFREE, ReferenceConfidenceMode.BP_RESOLUTION, PCRFreeIntervals, "4a006f9e39cacc3ae84384ad56ca22a2"});
tests.add(new Object[]{NA12878_PCRFREE, ReferenceConfidenceMode.GVCF, PCRFreeIntervals, "fe014592c9fa8d442508a96eb09e0c93"});
tests.add(new Object[]{NA12878_PCRFREE, ReferenceConfidenceMode.BP_RESOLUTION, PCRFreeIntervals, "90dc8950e28e042097817d785022482e"});
tests.add(new Object[]{NA12878_PCRFREE, ReferenceConfidenceMode.GVCF, PCRFreeIntervals, "a64e2d8e60a1be9cb0b4306c85e96393"});
tests.add(new Object[]{NA12878_WEx, ReferenceConfidenceMode.NONE, WExIntervals, "d5c07fa3edca496a84fd17cecad06230"});
tests.add(new Object[]{NA12878_WEx, ReferenceConfidenceMode.BP_RESOLUTION, WExIntervals, "330685c734e277d70a44637de85ad54d"});
tests.add(new Object[]{NA12878_WEx, ReferenceConfidenceMode.GVCF, WExIntervals, "b01a2c222a3f00a675f5534c3b449919"});
tests.add(new Object[]{NA12878_WEx, ReferenceConfidenceMode.BP_RESOLUTION, WExIntervals, "66019a0914f905522da6bd3b557a57d1"});
tests.add(new Object[]{NA12878_WEx, ReferenceConfidenceMode.GVCF, WExIntervals, "de763f6c9a5aec4586a2671941e4c96d"});
return tests.toArray(new Object[][]{});
}
@ -137,7 +137,7 @@ public class HaplotypeCallerGVCFIntegrationTest extends WalkerTest {
public void testWrongGVCFNonVariantRecordOrderBugFix() {
final String commandLine = String.format("-T HaplotypeCaller --pcr_indel_model NONE -R %s -I %s -L %s -ERC GVCF --no_cmdline_in_header -variant_index_type %s -variant_index_parameter %d",
b37KGReference, WRONG_GVCF_RECORD_ORDER_BUGFIX_BAM, WRONG_GVCF_RECORD_ORDER_BUGFIX_INTERVALS, HaplotypeCaller.OPTIMAL_GVCF_INDEX_TYPE, HaplotypeCaller.OPTIMAL_GVCF_INDEX_PARAMETER);
final WalkerTestSpec spec = new WalkerTestSpec(commandLine + " -o %s", Arrays.asList("d595dae0e3f993047c22c6e520e190aa"));
final WalkerTestSpec spec = new WalkerTestSpec(commandLine + " -o %s", Arrays.asList("e6a4e571abb59b925d59a38d244f0abe"));
spec.disableShadowBCF();
executeTest("testMissingGVCFIndexingStrategyException", spec);
}
@ -149,7 +149,7 @@ public class HaplotypeCallerGVCFIntegrationTest extends WalkerTest {
public void testNoCallGVCFMissingPLsBugFix() {
final String commandLine = String.format("-T HaplotypeCaller --pcr_indel_model NONE -R %s -I %s -L %s -ERC GVCF --no_cmdline_in_header -variant_index_type %s -variant_index_parameter %d",
b37KGReference, NOCALL_GVCF_BUGFIX_BAM, NOCALL_GVCF_BUGFIX_INTERVALS, HaplotypeCaller.OPTIMAL_GVCF_INDEX_TYPE, HaplotypeCaller.OPTIMAL_GVCF_INDEX_PARAMETER);
final WalkerTestSpec spec = new WalkerTestSpec(commandLine + " -o %s", Arrays.asList("2a91768430a84dbae8d40242dee8c9b8"));
final WalkerTestSpec spec = new WalkerTestSpec(commandLine + " -o %s", Arrays.asList("187e43b1034478edced1eb5c664bac34"));
spec.disableShadowBCF();
executeTest("testNoCallGVCFMissingPLsBugFix", spec);
}

View File

@ -56,6 +56,7 @@ import htsjdk.samtools.reference.ReferenceSequenceFile;
import htsjdk.variant.variantcontext.*;
import org.broadinstitute.gatk.utils.BaseTest;
import org.broadinstitute.gatk.utils.*;
import org.broadinstitute.gatk.utils.collections.Pair;
import org.broadinstitute.gatk.utils.fasta.CachingIndexedFastaSequenceFile;
import org.broadinstitute.gatk.utils.haplotype.EventMap;
import org.broadinstitute.gatk.utils.haplotype.Haplotype;
@ -416,11 +417,15 @@ public class HaplotypeCallerGenotypingEngineUnitTest extends BaseTest {
final Allele altC = Allele.create("C", false);
final Allele altT = Allele.create("T", false);
final VariantContext vc1 = new VariantContextBuilder().chr("20").start(1).stop(1).alleles(Arrays.asList(ref, altC)).make();
final VariantContext vc2 = new VariantContextBuilder().chr("20").start(2).stop(2).alleles(Arrays.asList(ref, altC)).make();
final VariantContext vc3 = new VariantContextBuilder().chr("20").start(3).stop(3).alleles(Arrays.asList(ref, altT)).make();
final VariantContext vc4 = new VariantContextBuilder().chr("20").start(4).stop(4).alleles(Arrays.asList(ref, altC)).make();
final List<VariantContext> calls = Arrays.asList(vc2, vc3, vc4);
final Haplotype pos1 = new Haplotype("CAAAA".getBytes());
pos1.setEventMap(new EventMap(Arrays.asList(vc1)));
pos1.getEventMap().put(1, vc1);
final Haplotype pos2 = new Haplotype("ACAAA".getBytes());
pos2.setEventMap(new EventMap(Arrays.asList(vc2)));
pos2.getEventMap().put(2, vc2);
@ -440,44 +445,69 @@ public class HaplotypeCallerGenotypingEngineUnitTest extends BaseTest {
pos234.getEventMap().put(3, vc3);
pos234.getEventMap().put(4, vc4);
// test no phased variants
final Set<Haplotype> haplotypes2NoPhase = new HashSet<>();
haplotypes2NoPhase.add(pos2);
haplotypes2NoPhase.add(pos24);
final Set<Haplotype> haplotypes3NoPhase = new HashSet<>();
haplotypes3NoPhase.add(pos3);
final Set<Haplotype> haplotypes4NoPhase = new HashSet<>();
haplotypes4NoPhase.add(pos4);
haplotypes4NoPhase.add(pos24);
final Map<VariantContext, Set<Haplotype>> haplotypeMapNoPhase = new HashMap<>();
haplotypeMapNoPhase.put(vc2, haplotypes2NoPhase);
haplotypeMapNoPhase.put(vc3, haplotypes3NoPhase);
haplotypeMapNoPhase.put(vc4, haplotypes4NoPhase);
tests.add(new Object[]{calls, haplotypeMapNoPhase, 0, 0});
final Map<VariantContext, Set<Haplotype>> haplotypeMap = new HashMap<>();
// test 2 phased variants
final Set<Haplotype> haplotypes2SomePhase = new HashSet<>();
haplotypes2SomePhase.add(pos24);
final Set<Haplotype> haplotypes4SomePhase = new HashSet<>();
haplotypes4SomePhase.add(pos24);
final Map<VariantContext, Set<Haplotype>> haplotypeMapSomePhase = new HashMap<>();
haplotypeMapSomePhase.put(vc2, haplotypes2SomePhase);
haplotypeMapSomePhase.put(vc3, haplotypes3NoPhase);
haplotypeMapSomePhase.put(vc4, haplotypes4SomePhase);
tests.add(new Object[]{calls, haplotypeMapSomePhase, 2, 1});
// test no phased variants #1
final Set<Haplotype> haplotypes2 = new HashSet<>();
haplotypes2.add(pos2);
haplotypeMap.put(vc2, haplotypes2);
tests.add(new Object[]{Arrays.asList(vc2), new HashMap<>(haplotypeMap), 2, 0, 0, 0, 0});
// test all phased variants
final Set<Haplotype> haplotypes2AllPhase = new HashSet<>();
haplotypes2AllPhase.add(pos234);
final Set<Haplotype> haplotypes3AllPhase = new HashSet<>();
haplotypes3AllPhase.add(pos234);
final Set<Haplotype> haplotypes4AllPhase = new HashSet<>();
haplotypes4AllPhase.add(pos234);
final Map<VariantContext, Set<Haplotype>> haplotypeMapAllPhase = new HashMap<>();
haplotypeMapAllPhase.put(vc2, haplotypes2AllPhase);
haplotypeMapAllPhase.put(vc3, haplotypes3AllPhase);
haplotypeMapAllPhase.put(vc4, haplotypes4AllPhase);
tests.add(new Object[]{calls, haplotypeMapAllPhase, 3, 1});
// test no phased variants #2
final Set<Haplotype> haplotypes3 = new HashSet<>();
haplotypes3.add(pos3);
haplotypeMap.put(vc3, haplotypes3);
tests.add(new Object[]{Arrays.asList(vc2, vc3), new HashMap<>(haplotypeMap), 3, 0, 0, 0, 0});
// test opposite phase
tests.add(new Object[]{Arrays.asList(vc2, vc3), new HashMap<>(haplotypeMap), 2, 2, 1, 1, 1});
// test no phased variants #3
final Set<Haplotype> haplotypes4 = new HashSet<>();
haplotypes4.add(pos4);
haplotypeMap.put(vc4, haplotypes4);
tests.add(new Object[]{calls, new HashMap<>(haplotypeMap), 3, 0, 0, 0, 0});
// test mixture
final Set<Haplotype> haplotypes24 = new HashSet<>();
haplotypes24.add(pos24);
haplotypeMap.put(vc2, haplotypes24);
haplotypeMap.put(vc4, haplotypes24);
tests.add(new Object[]{calls, new HashMap<>(haplotypeMap), 2, 3, 1, 2, 1});
// test 2 with opposite phase
final Set<Haplotype> haplotypes1 = new HashSet<>();
haplotypes1.add(pos1);
haplotypeMap.put(vc1, haplotypes1);
haplotypeMap.remove(vc3);
tests.add(new Object[]{Arrays.asList(vc1, vc2, vc4), new HashMap<>(haplotypeMap), 2, 3, 1, 1, 2});
// test homs around a het
final Set<Haplotype> haplotypes2hom = new HashSet<>();
haplotypes2hom.add(pos24);
haplotypes2hom.add(pos234);
final Set<Haplotype> haplotypes4hom = new HashSet<>();
haplotypes4hom.add(pos24);
haplotypes4hom.add(pos234);
final Set<Haplotype> haplotypes3het = new HashSet<>();
haplotypes3het.add(pos234);
haplotypeMap.put(vc2, haplotypes2hom);
haplotypeMap.put(vc3, haplotypes3het);
haplotypeMap.put(vc4, haplotypes4hom);
tests.add(new Object[]{calls, new HashMap<>(haplotypeMap), 2, 3, 1, 1, 0});
// test hets around a hom
final Set<Haplotype> haplotypes2het = new HashSet<>();
haplotypes2het.add(pos234);
final Set<Haplotype> haplotypes4het = new HashSet<>();
haplotypes4het.add(pos234);
final Set<Haplotype> haplotypes3hom = new HashSet<>();
haplotypes3hom.add(pos3);
haplotypes3hom.add(pos234);
haplotypeMap.put(vc2, haplotypes2het);
haplotypeMap.put(vc3, haplotypes3hom);
haplotypeMap.put(vc4, haplotypes4het);
tests.add(new Object[]{calls, new HashMap<>(haplotypeMap), 2, 3, 1, 2, 0});
return tests.toArray(new Object[][]{});
}
@ -485,12 +515,25 @@ public class HaplotypeCallerGenotypingEngineUnitTest extends BaseTest {
@Test(dataProvider="ConstructPhaseSetMappingProvider")
public void testConstructPhaseSetMapping(final List<VariantContext> calls,
final Map<VariantContext, Set<Haplotype>> haplotypeMap,
final int totalHaplotypes,
final int expectedMapSize,
final int expectedNumGroups) {
final Map<VariantContext, Integer> actualPhaseSetMapping = new HashMap<>();
final int actualNumGroups = HaplotypeCallerGenotypingEngine.constructPhaseSetMapping(calls, haplotypeMap, actualPhaseSetMapping);
final int expectedNumGroups,
final int expectedNum01,
final int expectedNum10) {
final Map<VariantContext, Pair<Integer, String>> actualPhaseSetMapping = new HashMap<>();
final int actualNumGroups = HaplotypeCallerGenotypingEngine.constructPhaseSetMapping(calls, haplotypeMap, totalHaplotypes, actualPhaseSetMapping);
Assert.assertEquals(actualNumGroups, expectedNumGroups);
Assert.assertEquals(actualPhaseSetMapping.size(), expectedMapSize);
int num01 = 0, num10 = 0;
for ( final Pair<Integer, String> phase : actualPhaseSetMapping.values() ) {
if ( phase.second.equals("0|1") )
num01++;
else if ( phase.second.equals("1|0") )
num10++;
}
Assert.assertEquals(num01, expectedNum01);
Assert.assertEquals(num10, expectedNum10);
}
@DataProvider(name = "ConstructPhaseGroupsProvider")
@ -509,27 +552,27 @@ public class HaplotypeCallerGenotypingEngineUnitTest extends BaseTest {
final List<VariantContext> calls = Arrays.asList(vc1, vc2, vc3);
// test no phased variants, empty map
final Map<VariantContext, Integer> nonePhased1 = new HashMap<>();
final Map<VariantContext, Pair<Integer, String>> nonePhased1 = new HashMap<>();
tests.add(new Object[]{calls, nonePhased1, 0, 0, 0});
// test no phased variants, full map, exception expected
final Map<VariantContext, Integer> nonePhased2 = new HashMap<>();
nonePhased2.put(vc1, 0);
nonePhased2.put(vc2, 1);
nonePhased2.put(vc3, 2);
final Map<VariantContext, Pair<Integer, String>> nonePhased2 = new HashMap<>();
nonePhased2.put(vc1, new Pair<>(0, "0/1"));
nonePhased2.put(vc2, new Pair<>(1, "0/1"));
nonePhased2.put(vc3, new Pair<>(2, "0/1"));
tests.add(new Object[]{calls, nonePhased2, 3, -1, -1});
// test 2 phased variants
final Map<VariantContext, Integer> twoPhased = new HashMap<>();
twoPhased.put(vc1, 0);
twoPhased.put(vc2, 0);
final Map<VariantContext, Pair<Integer, String>> twoPhased = new HashMap<>();
twoPhased.put(vc1, new Pair<>(0, "0/1"));
twoPhased.put(vc2, new Pair<>(0, "0/1"));
tests.add(new Object[]{calls, twoPhased, 1, 1, 2});
// test all phased variants
final Map<VariantContext, Integer> allPhased = new HashMap<>();
allPhased.put(vc1, 0);
allPhased.put(vc2, 0);
allPhased.put(vc3, 0);
final Map<VariantContext, Pair<Integer, String>> allPhased = new HashMap<>();
allPhased.put(vc1, new Pair<>(0, "0/1"));
allPhased.put(vc2, new Pair<>(0, "0/1"));
allPhased.put(vc3, new Pair<>(0, "0/1"));
tests.add(new Object[]{calls, allPhased, 1, 1, 3});
return tests.toArray(new Object[][]{});
@ -537,7 +580,7 @@ public class HaplotypeCallerGenotypingEngineUnitTest extends BaseTest {
@Test(dataProvider="ConstructPhaseGroupsProvider")
public void testConstructPhaseGroups(final List<VariantContext> calls,
final Map<VariantContext, Integer> phaseMap,
final Map<VariantContext, Pair<Integer, String>> phaseMap,
final int endIndex,
final int expectedNumGroups,
final int expectedGroupSize) {
@ -553,8 +596,8 @@ public class HaplotypeCallerGenotypingEngineUnitTest extends BaseTest {
int counter = 0;
for ( final VariantContext call : actualPhasedCalls ) {
for ( final Genotype g : call.getGenotypes() ) {
if ( g.hasExtendedAttribute(HaplotypeCaller.HAPLOTYPE_CALLER_PHASING_KEY) ) {
uniqueGroups.add(g.getExtendedAttribute(HaplotypeCaller.HAPLOTYPE_CALLER_PHASING_KEY).toString());
if ( g.hasExtendedAttribute(HaplotypeCaller.HAPLOTYPE_CALLER_PHASING_ID_KEY) ) {
uniqueGroups.add(g.getExtendedAttribute(HaplotypeCaller.HAPLOTYPE_CALLER_PHASING_ID_KEY).toString());
counter++;
}
}