Updated the physical phasing in the Haplotype Caller to address requests from ATGU.

1. It is now turned on by default
2. It now phases homozygous variants
3. Most importantly, it also phases variants that are always on opposite haplotypes

Changed the INFO keys to be PID and PGT, as described in the header.
This commit is contained in:
Eric Banks 2014-08-15 17:47:32 -04:00
parent 7e0c326e1c
commit d3f06024f8
4 changed files with 173 additions and 92 deletions

View File

@ -497,10 +497,11 @@ public class HaplotypeCaller extends ActiveRegionWalker<List<VariantContext>, In
protected boolean mergeVariantsViaLD = false;
@Advanced
@Argument(fullName="tryPhysicalPhasing", shortName="tryPhysicalPhasing", doc="If specified, we will add physical (read-based) phasing information", required = false)
protected boolean tryPhysicalPhasing = false;
@Argument(fullName="doNotRunPhysicalPhasing", shortName="doNotRunPhysicalPhasing", doc="If specified, we will not try to add physical (read-based) phasing information", required = false)
protected boolean doNotRunPhysicalPhasing = false;
public static final String HAPLOTYPE_CALLER_PHASING_KEY = "HCP";
public static final String HAPLOTYPE_CALLER_PHASING_ID_KEY = "PID";
public static final String HAPLOTYPE_CALLER_PHASING_GT_KEY = "PGT";
// -----------------------------------------------------------------------------------------------
// arguments for debugging / developing the haplotype caller
@ -634,12 +635,11 @@ public class HaplotypeCaller extends ActiveRegionWalker<List<VariantContext>, In
if ( emitReferenceConfidence() ) {
if (SCAC.genotypingOutputMode == GenotypingOutputMode.GENOTYPE_GIVEN_ALLELES)
throw new UserException.BadArgumentValue("ERC/gt_mode","you cannot request reference confidence output and Genotyping Giving Alleles at the same time");
throw new UserException.BadArgumentValue("ERC/gt_mode","you cannot request reference confidence output and GENOTYPE_GIVEN_ALLELES at the same time");
SCAC.genotypeArgs.STANDARD_CONFIDENCE_FOR_EMITTING = -0.0;
SCAC.genotypeArgs.STANDARD_CONFIDENCE_FOR_CALLING = -0.0;
// also, we don't need to output several of the annotations
annotationsToExclude.add("ChromosomeCounts");
annotationsToExclude.add("FisherStrand");
@ -651,6 +651,9 @@ public class HaplotypeCaller extends ActiveRegionWalker<List<VariantContext>, In
if (!SCAC.annotateAllSitesWithPLs)
logger.info("All sites annotated with PLs forced to true for reference-model confidence output");
SCAC.annotateAllSitesWithPLs = true;
} else if ( ! doNotRunPhysicalPhasing ) {
doNotRunPhysicalPhasing = true;
logger.info("Disabling physical phasing, which is supported only for reference-model confidence output");
}
if ( SCAC.AFmodel == AFCalcFactory.Calculation.EXACT_GENERAL_PLOIDY )
@ -678,7 +681,7 @@ public class HaplotypeCaller extends ActiveRegionWalker<List<VariantContext>, In
if( SCAC.genotypingOutputMode == GenotypingOutputMode.GENOTYPE_GIVEN_ALLELES && consensusMode )
throw new UserException("HaplotypeCaller cannot be run in both GENOTYPE_GIVEN_ALLELES mode and in consensus mode. Please choose one or the other.");
genotypingEngine = new HaplotypeCallerGenotypingEngine( getToolkit(), SCAC, tryPhysicalPhasing);
genotypingEngine = new HaplotypeCallerGenotypingEngine( getToolkit(), SCAC, !doNotRunPhysicalPhasing);
// initialize the output VCF header
final VariantAnnotatorEngine annotationEngine = new VariantAnnotatorEngine(Arrays.asList(annotationClassesToUse), annotationsToUse, annotationsToExclude, this, getToolkit());
@ -699,8 +702,10 @@ public class HaplotypeCaller extends ActiveRegionWalker<List<VariantContext>, In
VCFConstants.DEPTH_KEY,
VCFConstants.GENOTYPE_PL_KEY);
if ( tryPhysicalPhasing )
headerInfo.add(new VCFFormatHeaderLine(HAPLOTYPE_CALLER_PHASING_KEY, VCFHeaderLineCount.UNBOUNDED, VCFHeaderLineType.String, "Physical phasing information, each unique ID within a given sample (but not across samples) connects alternate alleles as occurring on the same haplotype"));
if ( ! doNotRunPhysicalPhasing ) {
headerInfo.add(new VCFFormatHeaderLine(HAPLOTYPE_CALLER_PHASING_ID_KEY, 1, VCFHeaderLineType.String, "Physical phasing ID information, where each unique ID within a given sample (but not across samples) connects records within a phasing group"));
headerInfo.add(new VCFFormatHeaderLine(HAPLOTYPE_CALLER_PHASING_GT_KEY, 1, VCFHeaderLineType.String, "Physical phasing haplotype information, describing how the alternate alleles are phased in relation to one another"));
}
// FILTER fields are added unconditionally as it's not always 100% certain the circumstances
// where the filters are used. For example, in emitting all sites the lowQual field is used

View File

@ -57,6 +57,7 @@ import org.broadinstitute.gatk.tools.walkers.genotyper.OutputMode;
import org.broadinstitute.gatk.utils.GenomeLoc;
import org.broadinstitute.gatk.utils.GenomeLocParser;
import org.broadinstitute.gatk.utils.Utils;
import org.broadinstitute.gatk.utils.collections.Pair;
import org.broadinstitute.gatk.utils.genotyper.ReadLikelihoods;
import org.broadinstitute.gatk.utils.haplotype.EventMap;
import org.broadinstitute.gatk.utils.haplotype.Haplotype;
@ -77,16 +78,16 @@ public class HaplotypeCallerGenotypingEngine extends GenotypingEngine<HaplotypeC
private MergeVariantsAcrossHaplotypes crossHaplotypeEventMerger;
private final boolean tryPhysicalPhasing;
private final boolean doPhysicalPhasing;
/**
* {@inheritDoc}
* @param toolkit {@inheritDoc}
* @param configuration {@inheritDoc}
*/
public HaplotypeCallerGenotypingEngine(final GenomeAnalysisEngine toolkit, final HaplotypeCallerArgumentCollection configuration, final boolean tryPhysicalPhasing) {
public HaplotypeCallerGenotypingEngine(final GenomeAnalysisEngine toolkit, final HaplotypeCallerArgumentCollection configuration, final boolean doPhysicalPhasing) {
super(toolkit,configuration);
this.tryPhysicalPhasing = tryPhysicalPhasing;
this.doPhysicalPhasing = doPhysicalPhasing;
}
/**
@ -97,7 +98,7 @@ public class HaplotypeCallerGenotypingEngine extends GenotypingEngine<HaplotypeC
*/
public HaplotypeCallerGenotypingEngine(final GenomeAnalysisEngine toolkit, final HaplotypeCallerArgumentCollection configuration, final Set<String> sampleNames) {
super(toolkit,configuration,sampleNames);
tryPhysicalPhasing = false;
doPhysicalPhasing = true;
}
@ -254,7 +255,7 @@ public class HaplotypeCallerGenotypingEngine extends GenotypingEngine<HaplotypeC
if (logger != null) logger.info("Genotyping event at " + loc + " with alleles = " + mergedVC.getAlleles());
}
ReadLikelihoods<Allele> readAlleleLikelihoods = readLikelihoods.marginalize(alleleMapper,genomeLocParser.createPaddedGenomeLoc(genomeLocParser.createGenomeLoc(mergedVC),ALLELE_EXTENSION));
ReadLikelihoods<Allele> readAlleleLikelihoods = readLikelihoods.marginalize(alleleMapper, genomeLocParser.createPaddedGenomeLoc(genomeLocParser.createGenomeLoc(mergedVC), ALLELE_EXTENSION));
if (configuration.isSampleContaminationPresent())
readAlleleLikelihoods.contaminationDownsampling(configuration.getSampleContamination());
@ -286,7 +287,7 @@ public class HaplotypeCallerGenotypingEngine extends GenotypingEngine<HaplotypeC
}
}
final List<VariantContext> phasedCalls = tryPhysicalPhasing ? phaseCalls(returnCalls, calledHaplotypes) : returnCalls;
final List<VariantContext> phasedCalls = doPhysicalPhasing ? phaseCalls(returnCalls, calledHaplotypes) : returnCalls;
return new CalledHaplotypes(phasedCalls, calledHaplotypes);
}
@ -303,8 +304,8 @@ public class HaplotypeCallerGenotypingEngine extends GenotypingEngine<HaplotypeC
final Map<VariantContext, Set<Haplotype>> haplotypeMap = constructHaplotypeMapping(calls, calledHaplotypes);
// construct a mapping from call to phase set ID
final Map<VariantContext, Integer> phaseSetMapping = new HashMap<>();
final int uniqueCounterEndValue = constructPhaseSetMapping(calls, haplotypeMap, phaseSetMapping);
final Map<VariantContext, Pair<Integer, String>> phaseSetMapping = new HashMap<>();
final int uniqueCounterEndValue = constructPhaseSetMapping(calls, haplotypeMap, calledHaplotypes.size() - 1, phaseSetMapping);
// we want to establish (potential) *groups* of phased variants, so we need to be smart when looking at pairwise phasing partners
return constructPhaseGroups(calls, phaseSetMapping, uniqueCounterEndValue);
@ -349,17 +350,19 @@ public class HaplotypeCallerGenotypingEngine extends GenotypingEngine<HaplotypeC
*
* @param originalCalls the original unphased calls
* @param haplotypeMap mapping from alternate allele to the set of haplotypes that contain that allele
* @param totalAvailableHaplotypes the total number of possible haplotypes used in calling
* @param phaseSetMapping the map to populate in this method
* @return the next incremental unique index
*/
protected static int constructPhaseSetMapping(final List<VariantContext> originalCalls,
final Map<VariantContext, Set<Haplotype>> haplotypeMap,
final Map<VariantContext, Integer> phaseSetMapping) {
final int totalAvailableHaplotypes,
final Map<VariantContext, Pair<Integer, String>> phaseSetMapping) {
final int numCalls = originalCalls.size();
int uniqueCounter = 0;
// use the haplotype mapping to connect variants that are always present on the same haplotypes
// use the haplotype mapping to connect variants that are always/never present on the same haplotypes
for ( int i = 0; i < numCalls - 1; i++ ) {
final VariantContext call = originalCalls.get(i);
final Set<Haplotype> haplotypesWithCall = haplotypeMap.get(call);
@ -369,18 +372,46 @@ public class HaplotypeCallerGenotypingEngine extends GenotypingEngine<HaplotypeC
for ( int j = i+1; j < numCalls; j++ ) {
final VariantContext comp = originalCalls.get(j);
final Set<Haplotype> haplotypesWithComp = haplotypeMap.get(comp);
if ( haplotypesWithComp.isEmpty() )
continue;
// if the variants are in phase, record that fact
if ( haplotypesWithCall.size() == haplotypesWithComp.size() && haplotypesWithCall.containsAll(haplotypesWithComp) ) {
// if it's part of an existing group, use that group's unique ID
if ( phaseSetMapping.containsKey(call) ) {
phaseSetMapping.put(comp, phaseSetMapping.get(call));
// otherwise, create a new group
} else {
phaseSetMapping.put(call, uniqueCounter);
phaseSetMapping.put(comp, uniqueCounter);
// if the variants are together on all haplotypes, record that fact.
// another possibility is that one of the variants is on all possible haplotypes (i.e. it is homozygous).
final boolean callIsOnAllHaps = haplotypesWithCall.size() == totalAvailableHaplotypes;
final boolean compIsOnAllHaps = haplotypesWithComp.size() == totalAvailableHaplotypes;
if ( (haplotypesWithCall.size() == haplotypesWithComp.size() && haplotypesWithCall.containsAll(haplotypesWithComp)) || callIsOnAllHaps || compIsOnAllHaps ) {
// create a new group if these are the first entries
if ( ! phaseSetMapping.containsKey(call) ) {
phaseSetMapping.put(call, new Pair<>(uniqueCounter, callIsOnAllHaps ? "1|1" : "0|1"));
phaseSetMapping.put(comp, new Pair<>(uniqueCounter, compIsOnAllHaps ? "1|1" : "0|1"));
uniqueCounter++;
}
// otherwise it's part of an existing group so use that group's unique ID
else if ( ! phaseSetMapping.containsKey(comp) ) {
final Pair<Integer, String> callPhase = phaseSetMapping.get(call);
phaseSetMapping.put(comp, new Pair<>(callPhase.first, compIsOnAllHaps ? "1|1" : callPhase.second));
}
}
// if the variants are apart on *all* haplotypes, record that fact
else if ( haplotypesWithCall.size() + haplotypesWithComp.size() == totalAvailableHaplotypes ) {
final Set<Haplotype> intersection = new HashSet<>();
intersection.addAll(haplotypesWithCall);
intersection.retainAll(haplotypesWithComp);
if ( intersection.isEmpty() ) {
// create a new group if these are the first entries
if ( ! phaseSetMapping.containsKey(call) ) {
phaseSetMapping.put(call, new Pair<>(uniqueCounter, "0|1"));
phaseSetMapping.put(comp, new Pair<>(uniqueCounter, "1|0"));
uniqueCounter++;
}
// otherwise it's part of an existing group so use that group's unique ID
else if ( ! phaseSetMapping.containsKey(comp) ){
final Pair<Integer, String> callPhase = phaseSetMapping.get(call);
phaseSetMapping.put(comp, new Pair<>(callPhase.first, callPhase.second.equals("0|1") ? "1|0" : "0|1"));
}
}
}
}
}
@ -397,7 +428,7 @@ public class HaplotypeCallerGenotypingEngine extends GenotypingEngine<HaplotypeC
* @return a non-null list which represents the possibly phased version of the calls
*/
protected static List<VariantContext> constructPhaseGroups(final List<VariantContext> originalCalls,
final Map<VariantContext, Integer> phaseSetMapping,
final Map<VariantContext, Pair<Integer, String>> phaseSetMapping,
final int indexTo) {
final List<VariantContext> phasedCalls = new ArrayList<>(originalCalls);
@ -407,7 +438,7 @@ public class HaplotypeCallerGenotypingEngine extends GenotypingEngine<HaplotypeC
final List<Integer> indexes = new ArrayList<>();
for ( int index = 0; index < originalCalls.size(); index++ ) {
final VariantContext call = originalCalls.get(index);
if ( phaseSetMapping.containsKey(call) && phaseSetMapping.get(call) == count )
if ( phaseSetMapping.containsKey(call) && phaseSetMapping.get(call).first == count )
indexes.add(index);
}
if ( indexes.size() < 2 )
@ -418,7 +449,8 @@ public class HaplotypeCallerGenotypingEngine extends GenotypingEngine<HaplotypeC
// update the VCs
for ( final int index : indexes ) {
final VariantContext phasedCall = phaseVC(originalCalls.get(index), uniqueID);
final VariantContext originalCall = originalCalls.get(index);
final VariantContext phasedCall = phaseVC(originalCall, uniqueID, phaseSetMapping.get(originalCall).second);
phasedCalls.set(index, phasedCall);
}
}
@ -452,12 +484,13 @@ public class HaplotypeCallerGenotypingEngine extends GenotypingEngine<HaplotypeC
*
* @param vc the variant context
* @param ID the ID to use
* @param phaseGT the phase GT string to use
* @return phased non-null variant context
*/
private static VariantContext phaseVC(final VariantContext vc, final String ID) {
private static VariantContext phaseVC(final VariantContext vc, final String ID, final String phaseGT) {
final List<Genotype> phasedGenotypes = new ArrayList<>();
for ( final Genotype g : vc.getGenotypes() )
phasedGenotypes.add(new GenotypeBuilder(g).attribute(HaplotypeCaller.HAPLOTYPE_CALLER_PHASING_KEY, ID).make());
phasedGenotypes.add(new GenotypeBuilder(g).attribute(HaplotypeCaller.HAPLOTYPE_CALLER_PHASING_ID_KEY, ID).attribute(HaplotypeCaller.HAPLOTYPE_CALLER_PHASING_GT_KEY, phaseGT).make());
return new VariantContextBuilder(vc).genotypes(phasedGenotypes).make();
}

View File

@ -68,11 +68,11 @@ public class HaplotypeCallerGVCFIntegrationTest extends WalkerTest {
// this functionality can be adapted to provide input data for whatever you might want in your data
tests.add(new Object[]{NA12878_PCRFREE, ReferenceConfidenceMode.NONE, PCRFreeIntervals, "9db87ae56df22456f3c08024384f3e5e"});
tests.add(new Object[]{NA12878_PCRFREE, ReferenceConfidenceMode.BP_RESOLUTION, PCRFreeIntervals, "4a006f9e39cacc3ae84384ad56ca22a2"});
tests.add(new Object[]{NA12878_PCRFREE, ReferenceConfidenceMode.GVCF, PCRFreeIntervals, "fe014592c9fa8d442508a96eb09e0c93"});
tests.add(new Object[]{NA12878_PCRFREE, ReferenceConfidenceMode.BP_RESOLUTION, PCRFreeIntervals, "90dc8950e28e042097817d785022482e"});
tests.add(new Object[]{NA12878_PCRFREE, ReferenceConfidenceMode.GVCF, PCRFreeIntervals, "a64e2d8e60a1be9cb0b4306c85e96393"});
tests.add(new Object[]{NA12878_WEx, ReferenceConfidenceMode.NONE, WExIntervals, "d5c07fa3edca496a84fd17cecad06230"});
tests.add(new Object[]{NA12878_WEx, ReferenceConfidenceMode.BP_RESOLUTION, WExIntervals, "330685c734e277d70a44637de85ad54d"});
tests.add(new Object[]{NA12878_WEx, ReferenceConfidenceMode.GVCF, WExIntervals, "b01a2c222a3f00a675f5534c3b449919"});
tests.add(new Object[]{NA12878_WEx, ReferenceConfidenceMode.BP_RESOLUTION, WExIntervals, "66019a0914f905522da6bd3b557a57d1"});
tests.add(new Object[]{NA12878_WEx, ReferenceConfidenceMode.GVCF, WExIntervals, "de763f6c9a5aec4586a2671941e4c96d"});
return tests.toArray(new Object[][]{});
}
@ -137,7 +137,7 @@ public class HaplotypeCallerGVCFIntegrationTest extends WalkerTest {
public void testWrongGVCFNonVariantRecordOrderBugFix() {
final String commandLine = String.format("-T HaplotypeCaller --pcr_indel_model NONE -R %s -I %s -L %s -ERC GVCF --no_cmdline_in_header -variant_index_type %s -variant_index_parameter %d",
b37KGReference, WRONG_GVCF_RECORD_ORDER_BUGFIX_BAM, WRONG_GVCF_RECORD_ORDER_BUGFIX_INTERVALS, HaplotypeCaller.OPTIMAL_GVCF_INDEX_TYPE, HaplotypeCaller.OPTIMAL_GVCF_INDEX_PARAMETER);
final WalkerTestSpec spec = new WalkerTestSpec(commandLine + " -o %s", Arrays.asList("d595dae0e3f993047c22c6e520e190aa"));
final WalkerTestSpec spec = new WalkerTestSpec(commandLine + " -o %s", Arrays.asList("e6a4e571abb59b925d59a38d244f0abe"));
spec.disableShadowBCF();
executeTest("testMissingGVCFIndexingStrategyException", spec);
}
@ -149,7 +149,7 @@ public class HaplotypeCallerGVCFIntegrationTest extends WalkerTest {
public void testNoCallGVCFMissingPLsBugFix() {
final String commandLine = String.format("-T HaplotypeCaller --pcr_indel_model NONE -R %s -I %s -L %s -ERC GVCF --no_cmdline_in_header -variant_index_type %s -variant_index_parameter %d",
b37KGReference, NOCALL_GVCF_BUGFIX_BAM, NOCALL_GVCF_BUGFIX_INTERVALS, HaplotypeCaller.OPTIMAL_GVCF_INDEX_TYPE, HaplotypeCaller.OPTIMAL_GVCF_INDEX_PARAMETER);
final WalkerTestSpec spec = new WalkerTestSpec(commandLine + " -o %s", Arrays.asList("2a91768430a84dbae8d40242dee8c9b8"));
final WalkerTestSpec spec = new WalkerTestSpec(commandLine + " -o %s", Arrays.asList("187e43b1034478edced1eb5c664bac34"));
spec.disableShadowBCF();
executeTest("testNoCallGVCFMissingPLsBugFix", spec);
}

View File

@ -56,6 +56,7 @@ import htsjdk.samtools.reference.ReferenceSequenceFile;
import htsjdk.variant.variantcontext.*;
import org.broadinstitute.gatk.utils.BaseTest;
import org.broadinstitute.gatk.utils.*;
import org.broadinstitute.gatk.utils.collections.Pair;
import org.broadinstitute.gatk.utils.fasta.CachingIndexedFastaSequenceFile;
import org.broadinstitute.gatk.utils.haplotype.EventMap;
import org.broadinstitute.gatk.utils.haplotype.Haplotype;
@ -416,11 +417,15 @@ public class HaplotypeCallerGenotypingEngineUnitTest extends BaseTest {
final Allele altC = Allele.create("C", false);
final Allele altT = Allele.create("T", false);
final VariantContext vc1 = new VariantContextBuilder().chr("20").start(1).stop(1).alleles(Arrays.asList(ref, altC)).make();
final VariantContext vc2 = new VariantContextBuilder().chr("20").start(2).stop(2).alleles(Arrays.asList(ref, altC)).make();
final VariantContext vc3 = new VariantContextBuilder().chr("20").start(3).stop(3).alleles(Arrays.asList(ref, altT)).make();
final VariantContext vc4 = new VariantContextBuilder().chr("20").start(4).stop(4).alleles(Arrays.asList(ref, altC)).make();
final List<VariantContext> calls = Arrays.asList(vc2, vc3, vc4);
final Haplotype pos1 = new Haplotype("CAAAA".getBytes());
pos1.setEventMap(new EventMap(Arrays.asList(vc1)));
pos1.getEventMap().put(1, vc1);
final Haplotype pos2 = new Haplotype("ACAAA".getBytes());
pos2.setEventMap(new EventMap(Arrays.asList(vc2)));
pos2.getEventMap().put(2, vc2);
@ -440,44 +445,69 @@ public class HaplotypeCallerGenotypingEngineUnitTest extends BaseTest {
pos234.getEventMap().put(3, vc3);
pos234.getEventMap().put(4, vc4);
// test no phased variants
final Set<Haplotype> haplotypes2NoPhase = new HashSet<>();
haplotypes2NoPhase.add(pos2);
haplotypes2NoPhase.add(pos24);
final Set<Haplotype> haplotypes3NoPhase = new HashSet<>();
haplotypes3NoPhase.add(pos3);
final Set<Haplotype> haplotypes4NoPhase = new HashSet<>();
haplotypes4NoPhase.add(pos4);
haplotypes4NoPhase.add(pos24);
final Map<VariantContext, Set<Haplotype>> haplotypeMapNoPhase = new HashMap<>();
haplotypeMapNoPhase.put(vc2, haplotypes2NoPhase);
haplotypeMapNoPhase.put(vc3, haplotypes3NoPhase);
haplotypeMapNoPhase.put(vc4, haplotypes4NoPhase);
tests.add(new Object[]{calls, haplotypeMapNoPhase, 0, 0});
final Map<VariantContext, Set<Haplotype>> haplotypeMap = new HashMap<>();
// test 2 phased variants
final Set<Haplotype> haplotypes2SomePhase = new HashSet<>();
haplotypes2SomePhase.add(pos24);
final Set<Haplotype> haplotypes4SomePhase = new HashSet<>();
haplotypes4SomePhase.add(pos24);
final Map<VariantContext, Set<Haplotype>> haplotypeMapSomePhase = new HashMap<>();
haplotypeMapSomePhase.put(vc2, haplotypes2SomePhase);
haplotypeMapSomePhase.put(vc3, haplotypes3NoPhase);
haplotypeMapSomePhase.put(vc4, haplotypes4SomePhase);
tests.add(new Object[]{calls, haplotypeMapSomePhase, 2, 1});
// test no phased variants #1
final Set<Haplotype> haplotypes2 = new HashSet<>();
haplotypes2.add(pos2);
haplotypeMap.put(vc2, haplotypes2);
tests.add(new Object[]{Arrays.asList(vc2), new HashMap<>(haplotypeMap), 2, 0, 0, 0, 0});
// test all phased variants
final Set<Haplotype> haplotypes2AllPhase = new HashSet<>();
haplotypes2AllPhase.add(pos234);
final Set<Haplotype> haplotypes3AllPhase = new HashSet<>();
haplotypes3AllPhase.add(pos234);
final Set<Haplotype> haplotypes4AllPhase = new HashSet<>();
haplotypes4AllPhase.add(pos234);
final Map<VariantContext, Set<Haplotype>> haplotypeMapAllPhase = new HashMap<>();
haplotypeMapAllPhase.put(vc2, haplotypes2AllPhase);
haplotypeMapAllPhase.put(vc3, haplotypes3AllPhase);
haplotypeMapAllPhase.put(vc4, haplotypes4AllPhase);
tests.add(new Object[]{calls, haplotypeMapAllPhase, 3, 1});
// test no phased variants #2
final Set<Haplotype> haplotypes3 = new HashSet<>();
haplotypes3.add(pos3);
haplotypeMap.put(vc3, haplotypes3);
tests.add(new Object[]{Arrays.asList(vc2, vc3), new HashMap<>(haplotypeMap), 3, 0, 0, 0, 0});
// test opposite phase
tests.add(new Object[]{Arrays.asList(vc2, vc3), new HashMap<>(haplotypeMap), 2, 2, 1, 1, 1});
// test no phased variants #3
final Set<Haplotype> haplotypes4 = new HashSet<>();
haplotypes4.add(pos4);
haplotypeMap.put(vc4, haplotypes4);
tests.add(new Object[]{calls, new HashMap<>(haplotypeMap), 3, 0, 0, 0, 0});
// test mixture
final Set<Haplotype> haplotypes24 = new HashSet<>();
haplotypes24.add(pos24);
haplotypeMap.put(vc2, haplotypes24);
haplotypeMap.put(vc4, haplotypes24);
tests.add(new Object[]{calls, new HashMap<>(haplotypeMap), 2, 3, 1, 2, 1});
// test 2 with opposite phase
final Set<Haplotype> haplotypes1 = new HashSet<>();
haplotypes1.add(pos1);
haplotypeMap.put(vc1, haplotypes1);
haplotypeMap.remove(vc3);
tests.add(new Object[]{Arrays.asList(vc1, vc2, vc4), new HashMap<>(haplotypeMap), 2, 3, 1, 1, 2});
// test homs around a het
final Set<Haplotype> haplotypes2hom = new HashSet<>();
haplotypes2hom.add(pos24);
haplotypes2hom.add(pos234);
final Set<Haplotype> haplotypes4hom = new HashSet<>();
haplotypes4hom.add(pos24);
haplotypes4hom.add(pos234);
final Set<Haplotype> haplotypes3het = new HashSet<>();
haplotypes3het.add(pos234);
haplotypeMap.put(vc2, haplotypes2hom);
haplotypeMap.put(vc3, haplotypes3het);
haplotypeMap.put(vc4, haplotypes4hom);
tests.add(new Object[]{calls, new HashMap<>(haplotypeMap), 2, 3, 1, 1, 0});
// test hets around a hom
final Set<Haplotype> haplotypes2het = new HashSet<>();
haplotypes2het.add(pos234);
final Set<Haplotype> haplotypes4het = new HashSet<>();
haplotypes4het.add(pos234);
final Set<Haplotype> haplotypes3hom = new HashSet<>();
haplotypes3hom.add(pos3);
haplotypes3hom.add(pos234);
haplotypeMap.put(vc2, haplotypes2het);
haplotypeMap.put(vc3, haplotypes3hom);
haplotypeMap.put(vc4, haplotypes4het);
tests.add(new Object[]{calls, new HashMap<>(haplotypeMap), 2, 3, 1, 2, 0});
return tests.toArray(new Object[][]{});
}
@ -485,12 +515,25 @@ public class HaplotypeCallerGenotypingEngineUnitTest extends BaseTest {
@Test(dataProvider="ConstructPhaseSetMappingProvider")
public void testConstructPhaseSetMapping(final List<VariantContext> calls,
final Map<VariantContext, Set<Haplotype>> haplotypeMap,
final int totalHaplotypes,
final int expectedMapSize,
final int expectedNumGroups) {
final Map<VariantContext, Integer> actualPhaseSetMapping = new HashMap<>();
final int actualNumGroups = HaplotypeCallerGenotypingEngine.constructPhaseSetMapping(calls, haplotypeMap, actualPhaseSetMapping);
final int expectedNumGroups,
final int expectedNum01,
final int expectedNum10) {
final Map<VariantContext, Pair<Integer, String>> actualPhaseSetMapping = new HashMap<>();
final int actualNumGroups = HaplotypeCallerGenotypingEngine.constructPhaseSetMapping(calls, haplotypeMap, totalHaplotypes, actualPhaseSetMapping);
Assert.assertEquals(actualNumGroups, expectedNumGroups);
Assert.assertEquals(actualPhaseSetMapping.size(), expectedMapSize);
int num01 = 0, num10 = 0;
for ( final Pair<Integer, String> phase : actualPhaseSetMapping.values() ) {
if ( phase.second.equals("0|1") )
num01++;
else if ( phase.second.equals("1|0") )
num10++;
}
Assert.assertEquals(num01, expectedNum01);
Assert.assertEquals(num10, expectedNum10);
}
@DataProvider(name = "ConstructPhaseGroupsProvider")
@ -509,27 +552,27 @@ public class HaplotypeCallerGenotypingEngineUnitTest extends BaseTest {
final List<VariantContext> calls = Arrays.asList(vc1, vc2, vc3);
// test no phased variants, empty map
final Map<VariantContext, Integer> nonePhased1 = new HashMap<>();
final Map<VariantContext, Pair<Integer, String>> nonePhased1 = new HashMap<>();
tests.add(new Object[]{calls, nonePhased1, 0, 0, 0});
// test no phased variants, full map, exception expected
final Map<VariantContext, Integer> nonePhased2 = new HashMap<>();
nonePhased2.put(vc1, 0);
nonePhased2.put(vc2, 1);
nonePhased2.put(vc3, 2);
final Map<VariantContext, Pair<Integer, String>> nonePhased2 = new HashMap<>();
nonePhased2.put(vc1, new Pair<>(0, "0/1"));
nonePhased2.put(vc2, new Pair<>(1, "0/1"));
nonePhased2.put(vc3, new Pair<>(2, "0/1"));
tests.add(new Object[]{calls, nonePhased2, 3, -1, -1});
// test 2 phased variants
final Map<VariantContext, Integer> twoPhased = new HashMap<>();
twoPhased.put(vc1, 0);
twoPhased.put(vc2, 0);
final Map<VariantContext, Pair<Integer, String>> twoPhased = new HashMap<>();
twoPhased.put(vc1, new Pair<>(0, "0/1"));
twoPhased.put(vc2, new Pair<>(0, "0/1"));
tests.add(new Object[]{calls, twoPhased, 1, 1, 2});
// test all phased variants
final Map<VariantContext, Integer> allPhased = new HashMap<>();
allPhased.put(vc1, 0);
allPhased.put(vc2, 0);
allPhased.put(vc3, 0);
final Map<VariantContext, Pair<Integer, String>> allPhased = new HashMap<>();
allPhased.put(vc1, new Pair<>(0, "0/1"));
allPhased.put(vc2, new Pair<>(0, "0/1"));
allPhased.put(vc3, new Pair<>(0, "0/1"));
tests.add(new Object[]{calls, allPhased, 1, 1, 3});
return tests.toArray(new Object[][]{});
@ -537,7 +580,7 @@ public class HaplotypeCallerGenotypingEngineUnitTest extends BaseTest {
@Test(dataProvider="ConstructPhaseGroupsProvider")
public void testConstructPhaseGroups(final List<VariantContext> calls,
final Map<VariantContext, Integer> phaseMap,
final Map<VariantContext, Pair<Integer, String>> phaseMap,
final int endIndex,
final int expectedNumGroups,
final int expectedGroupSize) {
@ -553,8 +596,8 @@ public class HaplotypeCallerGenotypingEngineUnitTest extends BaseTest {
int counter = 0;
for ( final VariantContext call : actualPhasedCalls ) {
for ( final Genotype g : call.getGenotypes() ) {
if ( g.hasExtendedAttribute(HaplotypeCaller.HAPLOTYPE_CALLER_PHASING_KEY) ) {
uniqueGroups.add(g.getExtendedAttribute(HaplotypeCaller.HAPLOTYPE_CALLER_PHASING_KEY).toString());
if ( g.hasExtendedAttribute(HaplotypeCaller.HAPLOTYPE_CALLER_PHASING_ID_KEY) ) {
uniqueGroups.add(g.getExtendedAttribute(HaplotypeCaller.HAPLOTYPE_CALLER_PHASING_ID_KEY).toString());
counter++;
}
}