From 9a5da41dd473b989661a86db1659c2599abf6ffc Mon Sep 17 00:00:00 2001
From: Laura Gauthier
Date: Mon, 21 Jul 2014 15:17:01 -0400
Subject: [PATCH] Add bells and whistles for Genotype Refinement Pipeline New
annotation for low= and high-confidence de novos (only annotates biallelics)
FamilyLikelihoodsUtils now add joint likelihood and joint posterior
annotations Restrict population priors based on discovered allele count to be
valid for 10 or more samples.
---
.../walkers/annotator/PossibleDeNovo.java | 43 +-
.../CalculateGenotypePosteriors.java | 32 +-
.../variantutils/FamilyLikelihoodsUtils.java | 628 ++++++------------
.../PosteriorLikelihoodsUtils.java | 10 +-
...lateGenotypePosteriorsIntegrationTest.java | 10 +-
.../PosteriorLikelihoodsUtilsUnitTest.java | 69 +-
6 files changed, 314 insertions(+), 478 deletions(-)
diff --git a/protected/gatk-tools-protected/src/main/java/org/broadinstitute/gatk/tools/walkers/annotator/PossibleDeNovo.java b/protected/gatk-tools-protected/src/main/java/org/broadinstitute/gatk/tools/walkers/annotator/PossibleDeNovo.java
index d89e171dd..24b632e30 100644
--- a/protected/gatk-tools-protected/src/main/java/org/broadinstitute/gatk/tools/walkers/annotator/PossibleDeNovo.java
+++ b/protected/gatk-tools-protected/src/main/java/org/broadinstitute/gatk/tools/walkers/annotator/PossibleDeNovo.java
@@ -94,7 +94,10 @@ public class PossibleDeNovo extends InfoFieldAnnotation implements RodRequiringA
private MendelianViolation mendelianViolation = null;
public static final String HI_CONF_DENOVO_KEY = "hiConfDeNovo";
public static final String LO_CONF_DENOVO_KEY = "loConfDeNovo";
- private final int GQ_threshold = 20;
+ private final int hi_GQ_threshold = 20;
+ private final int lo_GQ_threshold = 10;
+ private final double percentOfSamplesCutoff = 0.001; //for many, many samples use 0.1% of samples as allele frequency threshold for de novos
+ private final int flatNumberOfSamplesCutoff = 4;
private Set trios;
public Map annotate(final RefMetaDataTracker tracker,
@@ -109,7 +112,7 @@ public class PossibleDeNovo extends InfoFieldAnnotation implements RodRequiringA
mendelianViolation = new MendelianViolation(((VariantAnnotator)walker).minGenotypeQualityP );
}
else {
- throw new UserException("Mendelian violation annotation can only be used from the Variant Annotator, and must be provided a valid PED file (-ped) from the command line.");
+ throw new UserException("Possible de novos annotation can only be used from the Variant Annotator, and must be provided a valid PED file (-ped) from the command line.");
}
}
@@ -119,38 +122,38 @@ public class PossibleDeNovo extends InfoFieldAnnotation implements RodRequiringA
final List highConfDeNovoChildren = new ArrayList();
final List lowConfDeNovoChildren = new ArrayList();
for ( final Trio trio : trios ) {
- if ( contextHasTrioLikelihoods(vc,trio) ) {
- if (mendelianViolation.isViolation(trio.getMother(),trio.getFather(),trio.getChild(),vc)) {
- if (mendelianViolation.getParentsRefRefChildHet() > 0) {
- if (vc.getGenotype(trio.getChildID()).getGQ() > GQ_threshold && vc.getGenotype(trio.getMaternalID()).getGQ() > GQ_threshold && vc.getGenotype(trio.getPaternalID()).getGQ() > GQ_threshold) {
+ if (vc.isBiallelic() && contextHasTrioLikelihoods(vc,trio) && mendelianViolation.isViolation(trio.getMother(),trio.getFather(),trio.getChild(),vc) )
+ {
+ if (mendelianViolation.getParentsRefRefChildHet() > 0) {
+ if ((vc.getGenotype(trio.getChildID()).getGQ() >= hi_GQ_threshold) && (vc.getGenotype(trio.getMaternalID()).getGQ()) >= hi_GQ_threshold && (vc.getGenotype(trio.getPaternalID()).getGQ() >= hi_GQ_threshold))
+ {
highConfDeNovoChildren.add(trio.getChildID());
- isHighConfDeNovo = true;
+ isHighConfDeNovo = true;
}
- else {
+ else if ((vc.getGenotype(trio.getChildID()).getGQ() >= lo_GQ_threshold) && (vc.getGenotype(trio.getMaternalID()).getGQ()) > 0 && (vc.getGenotype(trio.getPaternalID()).getGQ() > 0))
+ {
lowConfDeNovoChildren.add(trio.getChildID());
isLowConfDeNovo = true;
}
- }
- }
+ }
}
}
- if ( isHighConfDeNovo || isLowConfDeNovo ) {
- for(final String child : highConfDeNovoChildren) {
- attributeMap.put(HI_CONF_DENOVO_KEY,child);
- }
- for(final String child : lowConfDeNovoChildren) {
- attributeMap.put(LO_CONF_DENOVO_KEY,child);
- }
- }
+ final double percentNumberOfSamplesCutoff = vc.getNSamples()*percentOfSamplesCutoff;
+ final double AFcutoff = Math.max(flatNumberOfSamplesCutoff,percentNumberOfSamplesCutoff);
+ final int deNovoAlleleCount = vc.getCalledChrCount(vc.getAlternateAllele(0)); //we assume we're biallelic above so use the first alt
+ if ( isHighConfDeNovo && deNovoAlleleCount < AFcutoff )
+ attributeMap.put(HI_CONF_DENOVO_KEY,highConfDeNovoChildren);
+ if ( isLowConfDeNovo && deNovoAlleleCount < AFcutoff )
+ attributeMap.put(LO_CONF_DENOVO_KEY,lowConfDeNovoChildren);
return attributeMap;
}
// return the descriptions used for the VCF INFO meta field
public List getKeyNames() { return Arrays.asList(HI_CONF_DENOVO_KEY,LO_CONF_DENOVO_KEY); }
- public List getDescriptions() { return Arrays.asList(new VCFInfoHeaderLine(HI_CONF_DENOVO_KEY, 1, VCFHeaderLineType.String, "High confidence possible de novo mutation (GQ > "+GQ_threshold+"): sample name"),
- new VCFInfoHeaderLine(LO_CONF_DENOVO_KEY, 1, VCFHeaderLineType.String, "Low confidence possible de novo mutation: sample name")); }
+ public List getDescriptions() { return Arrays.asList(new VCFInfoHeaderLine(HI_CONF_DENOVO_KEY, 1, VCFHeaderLineType.String, "High confidence possible de novo mutation (GQ >= "+hi_GQ_threshold+" for all trio members)=[comma-delimited list of child samples]"),
+ new VCFInfoHeaderLine(LO_CONF_DENOVO_KEY, 1, VCFHeaderLineType.String, "Low confidence possible de novo mutation (GQ >= "+lo_GQ_threshold+" for child, GQ > 0 for parents)=[comma-delimited list of child samples]")); }
private boolean contextHasTrioLikelihoods(VariantContext context, Trio trio) {
diff --git a/protected/gatk-tools-protected/src/main/java/org/broadinstitute/gatk/tools/walkers/variantutils/CalculateGenotypePosteriors.java b/protected/gatk-tools-protected/src/main/java/org/broadinstitute/gatk/tools/walkers/variantutils/CalculateGenotypePosteriors.java
index 9d2b5b71a..cd102fdf6 100644
--- a/protected/gatk-tools-protected/src/main/java/org/broadinstitute/gatk/tools/walkers/variantutils/CalculateGenotypePosteriors.java
+++ b/protected/gatk-tools-protected/src/main/java/org/broadinstitute/gatk/tools/walkers/variantutils/CalculateGenotypePosteriors.java
@@ -113,15 +113,19 @@ import java.util.*;
* 1) Genotype posteriors added to the genotype fields ("PP")
* 2) Genotypes and GQ assigned according to these posteriors
* 3) Per-site genotype priors added to the INFO field ("PG")
- * 4) (Optional) Per-site, per-trio transmission probabilities given as Phred-scaled probability of all genotypes in the trio being correct, added to the genotype fields ("TP")
+ * 4) (Optional) Per-site, per-trio joint likelihoods (JL) and joint posteriors (JL) given as Phred-scaled probability
+ * of all genotypes in the trio being correct based on the PLs for JL and the PPs for JP. These annotations are added to
+ * the genotype fields.
*
*
* Notes
*
- * Currently, priors will only be applied for SNP sites in the input callset (and only those that have a SNP at the
- * matching site in the priors VCF unless the --calculateMissingPriors flag is used).
- * If the site is not called in the priors, flat priors will be applied. Flat priors are also applied for any non-SNP
- * sites in the input callset.
+ * Using the default behavior, priors will only be applied for each variants (provided each variant has at least 10
+ * called samples.) SNP sites in the input callset that have a SNP at the matching site in the supporting VCF will have
+ * priors applied based on the AC from the supporting samples and the input callset (unless the --ignoreInputSamples
+ * flag is used). If the site is not called in the supporting VCF, priors will be applied using the discovered AC from
+ * the input samples (unless the --discoveredACpriorsOff flag is used). Flat priors are applied for any non-SNP sites in
+ * the input callset.
*
*
* Examples
@@ -234,8 +238,9 @@ public class CalculateGenotypePosteriors extends RodWalker {
/**
* Calculate priors for missing external variants from sample data -- default behavior is to apply flat priors
*/
- @Argument(fullName="calculateMissingPriors",shortName="calcMissing",doc="Use discovered allele frequency in the callset for variants that do no appear in the external callset", required=false)
- public boolean calcMissing = false;
+ @Argument(fullName="discoveredACpriorsOff",shortName="useACoff",doc="Do not use discovered allele count in the input callset " +
+ "for variants that do not appear in the external callset. ", required=false)
+ public boolean useACoff = false;
/**
* Skip application of population-based priors
@@ -252,7 +257,8 @@ public class CalculateGenotypePosteriors extends RodWalker {
@Output(doc="File to which variants should be written")
protected VariantContextWriter vcfWriter = null;
- private final String TRANSMISSION_PROBABILITY_TAG_NAME = "TP";
+ private final String JOINT_LIKELIHOOD_TAG_NAME = "JL";
+ private final String JOINT_POSTERIOR_TAG_NAME = "JP";
private final String PHRED_SCALED_POSTERIORS_KEY = "PP";
private FamilyLikelihoodsUtils famUtils = new FamilyLikelihoodsUtils();
@@ -298,8 +304,10 @@ public class CalculateGenotypePosteriors extends RodWalker {
final Set headerLines = VCFUtils.smartMergeHeaders(vcfRods.values(), true);
headerLines.add(new VCFFormatHeaderLine(PHRED_SCALED_POSTERIORS_KEY, VCFHeaderLineCount.G, VCFHeaderLineType.Integer, "Phred-scaled Posterior Genotype Probabilities"));
headerLines.add(new VCFInfoHeaderLine("PG", VCFHeaderLineCount.G, VCFHeaderLineType.Integer, "Genotype Likelihood Prior"));
- if (!skipFamilyPriors)
- headerLines.add(new VCFFormatHeaderLine(TRANSMISSION_PROBABILITY_TAG_NAME, 1, VCFHeaderLineType.Integer, "Phred score of the genotype combination and phase given that the genotypes are correct"));
+ if (!skipFamilyPriors) {
+ headerLines.add(new VCFFormatHeaderLine(JOINT_LIKELIHOOD_TAG_NAME, 1, VCFHeaderLineType.Integer, "Phred-scaled joint likelihood of the genotype combination (before applying family priors)"));
+ headerLines.add(new VCFFormatHeaderLine(JOINT_POSTERIOR_TAG_NAME, 1, VCFHeaderLineType.Integer, "Phred-scaled joint posterior probability of the genotype combination (after applying family priors)"));
+ }
headerLines.add(new VCFHeaderLine("source", "CalculateGenotypePosteriors"));
vcfWriter.writeHeader(new VCFHeader(headerLines, vcfSamples));
@@ -331,12 +339,14 @@ public class CalculateGenotypePosteriors extends RodWalker {
GenotypesContext gc = famUtils.calculatePosteriorGLs(vc);
builder.genotypes(gc);
}
+ VariantContextUtils.calculateChromosomeCounts(builder, false);
vc_familyPriors = builder.make();
if (!skipPopulationPriors)
- vc_bothPriors = PosteriorLikelihoodsUtils.calculatePosteriorGLs(vc_familyPriors, otherVCs, missing * numRefIfMissing, globalPrior, !ignoreInputSamples, defaultToAC, calcMissing);
+ vc_bothPriors = PosteriorLikelihoodsUtils.calculatePosteriorGLs(vc_familyPriors, otherVCs, missing * numRefIfMissing, globalPrior, !ignoreInputSamples, defaultToAC, useACoff);
else {
final VariantContextBuilder builder2 = new VariantContextBuilder(vc_familyPriors);
+ VariantContextUtils.calculateChromosomeCounts(builder, false);
vc_bothPriors = builder2.make();
}
vcfWriter.add(vc_bothPriors);
diff --git a/protected/gatk-tools-protected/src/main/java/org/broadinstitute/gatk/tools/walkers/variantutils/FamilyLikelihoodsUtils.java b/protected/gatk-tools-protected/src/main/java/org/broadinstitute/gatk/tools/walkers/variantutils/FamilyLikelihoodsUtils.java
index 9db9a443b..b4bc9d8dc 100644
--- a/protected/gatk-tools-protected/src/main/java/org/broadinstitute/gatk/tools/walkers/variantutils/FamilyLikelihoodsUtils.java
+++ b/protected/gatk-tools-protected/src/main/java/org/broadinstitute/gatk/tools/walkers/variantutils/FamilyLikelihoodsUtils.java
@@ -47,7 +47,6 @@
package org.broadinstitute.gatk.tools.walkers.variantutils;
import org.apache.log4j.Logger;
-import org.broadinstitute.gatk.engine.GenomeAnalysisEngine;
import org.broadinstitute.gatk.engine.samples.Sample;
import org.broadinstitute.gatk.utils.MathUtils;
import org.broadinstitute.gatk.utils.QualityUtils;
@@ -57,6 +56,7 @@ import org.broadinstitute.gatk.utils.variant.GATKVariantContextUtils;
import htsjdk.variant.utils.GeneralUtils;
import htsjdk.variant.variantcontext.*;
+import java.util.Arrays;
import java.util.*;
/**
@@ -70,239 +70,180 @@ public class FamilyLikelihoodsUtils {
final private EnumMap>> mvCountMatrix =
new EnumMap>>(GenotypeType.class);
- //Matrix of allele transmission
- final private EnumMap>> transmissionMatrix =
- new EnumMap>>(GenotypeType.class);
+ final int NUM_CALLED_GENOTYPETYPES = 3; //HOM_REF, HET, and HOM_VAR
- final double[] configurationLikelihoodsMatrix = {0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,
- 0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,
- 0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0}; //27 is # of trio genotype combos, initialize to zero
+ double[] configurationLikelihoodsMatrix = new double[NUM_CALLED_GENOTYPETYPES*NUM_CALLED_GENOTYPETYPES*NUM_CALLED_GENOTYPETYPES];
ArrayList trios = new ArrayList();
- //Random number generator
- final private Random rand = new GenomeAnalysisEngine().getRandomGenerator();
-
- private final String TRANSMISSION_PROBABILITY_TAG_NAME = "TP";
+ private final String JOINT_LIKELIHOOD_TAG_NAME = "JL";
+ private final String JOINT_POSTERIOR_TAG_NAME = "JP";
private final String PHRED_SCALED_POSTERIORS_KEY = "PP";
- public final double NO_TRANSMISSION_PROB = -1.0;
+ public final double NO_JOINT_VALUE = -1.0;
private double deNovoPrior = 1e-8;
+ private final double ONE_THIRD = 0.333333333333333333;
+ private final double LOG10_OF_ONE_THIRD = -0.4771213;
+
private enum FamilyMember {
MOTHER,
FATHER,
CHILD
}
- //Stores a conceptual trio or parent/child pair genotype combination
- //This combination can then be "applied" to a given trio or pair using the getUpdatedGenotypes method.
- private class TrioGenotypes {
+ /**
+ * Applies the trio genotype combination to the given trio.
+ * @param motherGenotype: Original genotype of the mother
+ * @param fatherGenotype: Original genotype of the father
+ * @param childGenotype: Original genotype of the child
+ * @param updatedGenotypes: An ArrayList to which the newly updated genotypes are added in the following order: Mother, Father, Child
+ */
+ public void getUpdatedGenotypes(final VariantContext vc, final Genotype motherGenotype, final Genotype fatherGenotype, final Genotype childGenotype, final ArrayList updatedGenotypes){
+ //genotypes here can be no call
+ boolean fatherIsCalled = fatherGenotype != null && hasCalledGT(fatherGenotype.getType());
+ boolean motherIsCalled = motherGenotype != null && hasCalledGT(motherGenotype.getType());
+ boolean childIsCalled = childGenotype != null && hasCalledGT(childGenotype.getType());
- //Create 2 fake alleles
- //The actual bases will never be used but the Genotypes created using the alleles will be.
- private final Allele REF = Allele.create("A",true);
- private final Allele VAR = Allele.create("A",false);
- private final Allele NO_CALL = Allele.create(".",false);
- private final String DUMMY_NAME = "DummySample";
- private EnumMap>> MVcountMatrix;
+ //default to posteriors equal to likelihoods (flat priors) in case input genotypes are not called
+ double[] uninformativeLikelihoods = {ONE_THIRD, ONE_THIRD, ONE_THIRD};
- private final EnumMap familyGenotypes = new EnumMap(FamilyMember.class);
+ double[] motherLikelihoods = motherIsCalled? GeneralUtils.normalizeFromLog10(motherGenotype.getLikelihoods().getAsVector()) : uninformativeLikelihoods;
+ double[] fatherLikelihoods = fatherIsCalled? GeneralUtils.normalizeFromLog10(fatherGenotype.getLikelihoods().getAsVector()) : uninformativeLikelihoods;
+ double[] childLikelihoods = childIsCalled? GeneralUtils.normalizeFromLog10(childGenotype.getLikelihoods().getAsVector()) : uninformativeLikelihoods;
- /* Constructor: Creates a conceptual trio genotype combination from the given genotypes.
- */
- public TrioGenotypes(GenotypeType mother, GenotypeType father, GenotypeType child){
- familyGenotypes.put(FamilyMember.MOTHER, makeGenotype(mother));
- familyGenotypes.put(FamilyMember.FATHER, makeGenotype(father));
- familyGenotypes.put(FamilyMember.CHILD, makeGenotype(child));
- }
-
- private ArrayList getAlleles(GenotypeType genotype){
- final ArrayList alleles = new ArrayList(2);
- if(genotype == GenotypeType.HOM_REF){
- alleles.add(REF);
- alleles.add(REF);
- }
- else if(genotype == GenotypeType.HET){
- alleles.add(REF);
- alleles.add(VAR);
- }
- else if(genotype == GenotypeType.HOM_VAR){
- alleles.add(VAR);
- alleles.add(VAR);
- }
- else{
- return null;
- }
- return alleles;
- }
-
- public void setMVcountMatrix(EnumMap>> inputMat) {
- MVcountMatrix = inputMat;
- }
-
- private boolean hasCalledGT(GenotypeType genotype){
- return genotype == GenotypeType.HOM_REF || genotype == GenotypeType.HET || genotype == GenotypeType.HOM_VAR;
- }
-
- //TODO: this was stupid stuff for phasing -- let's take it out
- private Genotype makeGenotype(final GenotypeType type) {
- return makeGenotype(getAlleles(type));
- }
-
- private Genotype makeGenotype(final List alleles) {
- final GenotypeBuilder gb = new GenotypeBuilder(DUMMY_NAME, alleles);
- return gb.make();
- }
-
- /**
- * Applies the trio genotype combination to the given trio.
- * @param ref: Reference allele
- * @param alt: Alternate allele
- * @param motherGenotype: Genotype of the mother in this trio genotype combination
- * @param fatherGenotype: Genotype of the father in this trio genotype combination
- * @param childGenotype: Genotype of the child in this trio genotype combination
- * @param transmissionProb: Probability for this trio genotype combination to be correct (pass NO_TRANSMISSION_PROB if unavailable)
- * @param configurationLikelihoodsMatrix: (Non-normalized) likelihoods for each trio genotype combination, for use in generating new PLs
- * @param updatedGenotypes: An ArrayList to which the newly updated genotypes are added in the following order: Mother, Father, Child
- */
- public void getUpdatedGenotypes(final Allele ref, final Allele alt, final Genotype motherGenotype, final Genotype fatherGenotype, final Genotype childGenotype, final double transmissionProb, double[] configurationLikelihoodsMatrix, final ArrayList updatedGenotypes){
- //default to flat priors in case input genotypes are not called
- double[] motherPosteriors = {1,1,1};
- double[] fatherPosteriors = {1,1,1};
- double[] childPosteriors = {1,1,1};
-
- //genotypes here can be no call
- boolean fatherIsCalled = fatherGenotype != null && hasCalledGT(fatherGenotype.getType());
- boolean motherIsCalled = motherGenotype != null && hasCalledGT(motherGenotype.getType());
- boolean childIsCalled = childGenotype != null && hasCalledGT(childGenotype.getType());
-
- if (fatherIsCalled && childIsCalled) {
- motherPosteriors = getPosteriors(FamilyMember.MOTHER);
- }
- updatedGenotypes.add(getUpdatedGenotype(ref, alt, motherGenotype, transmissionProb, motherPosteriors));
-
- if (motherIsCalled && childIsCalled) {
- fatherPosteriors = getPosteriors(FamilyMember.FATHER);
- }
- updatedGenotypes.add(getUpdatedGenotype(ref, alt, fatherGenotype, transmissionProb, fatherPosteriors));
-
- if (motherIsCalled && fatherIsCalled) {
- childPosteriors = getPosteriors(FamilyMember.CHILD);
- }
- updatedGenotypes.add(getUpdatedGenotype(ref, alt, childGenotype, transmissionProb, childPosteriors));
- }
-
- private Genotype getUpdatedGenotype(Allele refAllele, Allele altAllele, Genotype genotype, double transmissionProb, double[] normalizedPosteriors){
-
- int phredScoreTransmission = -1;
- if(transmissionProb != NO_TRANSMISSION_PROB){
- double dphredScoreTransmission = QualityUtils.phredScaleLog10ErrorRate(Math.log10(1 - (transmissionProb)));
- phredScoreTransmission = dphredScoreTransmission < Byte.MAX_VALUE ? (byte)dphredScoreTransmission : Byte.MAX_VALUE;
- }
- //Handle null, missing and unavailable genotypes
- //Note that only cases where a null/missing/unavailable genotype was passed in the first place can lead to a null/missing/unavailable
- //genotype so it is safe to return the original genotype in this case.
- //In addition, if the genotype configuration confidence is 0, then return the original genotypes.
- if(phredScoreTransmission ==0 || genotype == null || !hasCalledGT(genotype.getType()))
- return genotype;
-
- //Add the transmission probability
- final Map genotypeAttributes = new HashMap();
- genotypeAttributes.putAll(genotype.getExtendedAttributes());
- if(transmissionProb>NO_TRANSMISSION_PROB)
- genotypeAttributes.put(TRANSMISSION_PROBABILITY_TAG_NAME, phredScoreTransmission);
-
- final ArrayList usedAlleles = new ArrayList(2);
- usedAlleles.add(refAllele);
- usedAlleles.add(altAllele);
-
- final GenotypeBuilder builder = new GenotypeBuilder(genotype);
-
- final double[] log10Posteriors = MathUtils.toLog10(normalizedPosteriors);
-
- //note that there will there be times when posteriors don't agree with genotype predicted by configuration likelihoods
- GATKVariantContextUtils.updateGenotypeAfterSubsetting(usedAlleles, builder,
- GATKVariantContextUtils.GenotypeAssignmentMethod.USE_PLS_TO_ASSIGN, log10Posteriors, usedAlleles);
-
-
-
- builder.attribute(PHRED_SCALED_POSTERIORS_KEY,
- Utils.listFromPrimitives(GenotypeLikelihoods.fromLog10Likelihoods(log10Posteriors).getAsPLs()));
- builder.attributes(genotypeAttributes);
- return builder.make();
- }
-
- //marginalize over the configurationLikelihoodsMatrix and normalize to get the posteriors
- private double[] getPosteriors(FamilyMember recalcInd) {
- double marginalOverChangedHR, marginalOverChangedHET, marginalOverChangedHV;
- marginalOverChangedHR = marginalOverChangedHET = marginalOverChangedHV = 0;
- final double[] recalcPosteriors = new double[3];
-
- GenotypeType[] calledTypes = {GenotypeType.HOM_REF, GenotypeType.HET, GenotypeType.HOM_VAR};
-
- switch (recalcInd) {
- case MOTHER:
- for(final GenotypeType father : calledTypes) {
- for(final GenotypeType child : calledTypes) {
- GenotypeType mother;
- mother = GenotypeType.HOM_REF;
- marginalOverChangedHR += configurationLikelihoodsMatrix[getLikelihoodIndex(mother,father,child,false)];
- mother = GenotypeType.HET;
- marginalOverChangedHET += configurationLikelihoodsMatrix[getLikelihoodIndex(mother,father,child,false)];
- mother = GenotypeType.HOM_VAR;
- marginalOverChangedHV += configurationLikelihoodsMatrix[getLikelihoodIndex(mother,father,child,false)];
- }
- }
- break;
- case FATHER:
- for(final GenotypeType mother : calledTypes){
- for (final GenotypeType child : calledTypes){
- GenotypeType father;
- father = GenotypeType.HOM_REF;
- marginalOverChangedHR += configurationLikelihoodsMatrix[getLikelihoodIndex(mother,father,child,false)];
- father = GenotypeType.HET;
- marginalOverChangedHET += configurationLikelihoodsMatrix[getLikelihoodIndex(mother,father,child,false)];
- father = GenotypeType.HOM_VAR;
- marginalOverChangedHV += configurationLikelihoodsMatrix[getLikelihoodIndex(mother,father,child,false)];
- }
- }
- break;
- case CHILD:
- for(final GenotypeType mother : calledTypes){
- for (final GenotypeType father: calledTypes){
- GenotypeType child;
- child = GenotypeType.HOM_REF;
- marginalOverChangedHR += configurationLikelihoodsMatrix[getLikelihoodIndex(mother,father,child,false)];
- child = GenotypeType.HET;
- marginalOverChangedHET += configurationLikelihoodsMatrix[getLikelihoodIndex(mother,father,child,false)];
- child = GenotypeType.HOM_VAR;
- marginalOverChangedHV += configurationLikelihoodsMatrix[getLikelihoodIndex(mother,father,child,false)];
- }
- }
- break;
- default:
- throw new UserException(String.format("%d does not indicate a valid trio individual -- use 0 for mother, 1 for father, 2 for child",recalcInd));
- }
- recalcPosteriors[0] = marginalOverChangedHR;
- recalcPosteriors[1] = marginalOverChangedHET;
- recalcPosteriors[2] = marginalOverChangedHV;
-
- final double[] normalizedPosteriors = MathUtils.normalizeFromRealSpace(recalcPosteriors);
-
- return normalizedPosteriors;
+ //these are also in log10 space
+ double[] motherLog10Posteriors = getPosteriors(FamilyMember.MOTHER);
+ double[] fatherLog10Posteriors = getPosteriors(FamilyMember.FATHER);
+ double[] childLog10Posteriors = getPosteriors(FamilyMember.CHILD);
+
+ double[] motherPosteriors = GeneralUtils.normalizeFromLog10(motherLog10Posteriors);
+ double[] fatherPosteriors = GeneralUtils.normalizeFromLog10(fatherLog10Posteriors);
+ double[] childPosteriors = GeneralUtils.normalizeFromLog10(childLog10Posteriors);
+
+
+ double jointPosteriorProbability = -1;
+ //jointTrioLikelihood is combined likelihoods (before prior) of best configuration after applying prior
+ double jointTrioLikelihood = -1;
+ if(childIsCalled && motherIsCalled && fatherIsCalled) {
+ jointTrioLikelihood = motherLikelihoods[MathUtils.maxElementIndex(motherPosteriors)]*fatherLikelihoods[MathUtils.maxElementIndex(fatherPosteriors)]*childLikelihoods[MathUtils.maxElementIndex(childPosteriors)];
+ jointPosteriorProbability = MathUtils.arrayMax(motherPosteriors)*MathUtils.arrayMax(fatherPosteriors)*MathUtils.arrayMax(childPosteriors);
}
+ updatedGenotypes.add(getUpdatedGenotype(vc, motherGenotype, jointTrioLikelihood, jointPosteriorProbability, motherLog10Posteriors));
+ updatedGenotypes.add(getUpdatedGenotype(vc, fatherGenotype, jointTrioLikelihood, jointPosteriorProbability, fatherLog10Posteriors));
+ updatedGenotypes.add(getUpdatedGenotype(vc, childGenotype, jointTrioLikelihood, jointPosteriorProbability, childLog10Posteriors));
}
- public void initialize(double DNprior, Set vcfSamples, Map> families){
+ private Genotype getUpdatedGenotype(final VariantContext vc, final Genotype genotype, final double jointLikelihood, final double jointPosteriorProb, final double[] log10Posteriors){
+ //Don't update null, missing or unavailable genotypes
+ if(genotype == null || !hasCalledGT(genotype.getType()))
+ return genotype;
+
+ int phredScaledJL = -1;
+ int phredScaledJP = -1;
+ if(jointLikelihood != NO_JOINT_VALUE){
+ double dphredScaledJL = QualityUtils.phredScaleLog10ErrorRate(Math.log10(1-jointLikelihood));
+ phredScaledJL = dphredScaledJL < Byte.MAX_VALUE ? (byte)dphredScaledJL : Byte.MAX_VALUE;
+ }
+ if(jointPosteriorProb != NO_JOINT_VALUE){
+ double dphredScaledJP = QualityUtils.phredScaleLog10ErrorRate(Math.log10(1-jointPosteriorProb));
+ phredScaledJP = dphredScaledJP < Byte.MAX_VALUE ? (byte)dphredScaledJP : Byte.MAX_VALUE;
+ }
+
+ //Add the joint trio calculations
+ final Map genotypeAttributes = new HashMap();
+ genotypeAttributes.putAll(genotype.getExtendedAttributes());
+ genotypeAttributes.put(JOINT_LIKELIHOOD_TAG_NAME, phredScaledJL);
+ genotypeAttributes.put(JOINT_POSTERIOR_TAG_NAME, phredScaledJP);
+
+ final GenotypeBuilder builder = new GenotypeBuilder(genotype);
+
+ //final double[] log10Posteriors = MathUtils.toLog10(normalizedPosteriors);
+
+ //update genotype types based on posteriors
+ GATKVariantContextUtils.updateGenotypeAfterSubsetting(vc.getAlleles(), builder,
+ GATKVariantContextUtils.GenotypeAssignmentMethod.USE_PLS_TO_ASSIGN, log10Posteriors, vc.getAlleles());
+
+ builder.attribute(PHRED_SCALED_POSTERIORS_KEY,
+ Utils.listFromPrimitives(GenotypeLikelihoods.fromLog10Likelihoods(log10Posteriors).getAsPLs()));
+ builder.attributes(genotypeAttributes);
+ return builder.make();
+ }
+
+ //marginalize over the configurationLikelihoodsMatrix and normalize to get the posteriors
+ private double[] getPosteriors(final FamilyMember recalcInd) {
+ double[] marginalOverChangedHR = new double[NUM_CALLED_GENOTYPETYPES*NUM_CALLED_GENOTYPETYPES];
+ double[] marginalOverChangedHET = new double[NUM_CALLED_GENOTYPETYPES*NUM_CALLED_GENOTYPETYPES];
+ double[] marginalOverChangedHV = new double[NUM_CALLED_GENOTYPETYPES*NUM_CALLED_GENOTYPETYPES];
+ final double[] recalcPosteriors = new double[NUM_CALLED_GENOTYPETYPES];
+
+ final GenotypeType[] calledTypes = {GenotypeType.HOM_REF, GenotypeType.HET, GenotypeType.HOM_VAR};
+ int counter = 0;
+
+ switch (recalcInd) {
+ case MOTHER:
+ for(final GenotypeType father : calledTypes) {
+ for(final GenotypeType child : calledTypes) {
+ GenotypeType mother;
+ mother = GenotypeType.HOM_REF;
+ marginalOverChangedHR[counter] = configurationLikelihoodsMatrix[getLikelihoodMatrixIndex(mother, father, child)];
+ mother = GenotypeType.HET;
+ marginalOverChangedHET[counter] = configurationLikelihoodsMatrix[getLikelihoodMatrixIndex(mother, father, child)];
+ mother = GenotypeType.HOM_VAR;
+ marginalOverChangedHV[counter] = configurationLikelihoodsMatrix[getLikelihoodMatrixIndex(mother, father, child)];
+ counter++;
+ }
+ }
+ break;
+ case FATHER:
+ for(final GenotypeType mother : calledTypes){
+ for (final GenotypeType child : calledTypes){
+ GenotypeType father;
+ father = GenotypeType.HOM_REF;
+ marginalOverChangedHR[counter] = configurationLikelihoodsMatrix[getLikelihoodMatrixIndex(mother, father, child)];
+ father = GenotypeType.HET;
+ marginalOverChangedHET[counter] = configurationLikelihoodsMatrix[getLikelihoodMatrixIndex(mother, father, child)];
+ father = GenotypeType.HOM_VAR;
+ marginalOverChangedHV[counter] = configurationLikelihoodsMatrix[getLikelihoodMatrixIndex(mother, father, child)];
+ counter++;
+ }
+ }
+ break;
+ case CHILD:
+ for(final GenotypeType mother : calledTypes){
+ for (final GenotypeType father: calledTypes){
+ GenotypeType child;
+ child = GenotypeType.HOM_REF;
+ marginalOverChangedHR[counter] = configurationLikelihoodsMatrix[getLikelihoodMatrixIndex(mother, father, child)];
+ child = GenotypeType.HET;
+ marginalOverChangedHET[counter] = configurationLikelihoodsMatrix[getLikelihoodMatrixIndex(mother, father, child)];
+ child = GenotypeType.HOM_VAR;
+ marginalOverChangedHV[counter] = configurationLikelihoodsMatrix[getLikelihoodMatrixIndex(mother, father, child)];
+ counter++;
+ }
+ }
+ break;
+ default:
+ throw new UserException(String.format("%d does not indicate a valid trio FamilyMember -- use 0 for mother, 1 for father, 2 for child",recalcInd));
+ }
+
+ recalcPosteriors[0] = MathUtils.log10sumLog10(marginalOverChangedHR,0);
+ recalcPosteriors[1] = MathUtils.log10sumLog10(marginalOverChangedHET,0);
+ recalcPosteriors[2] = MathUtils.log10sumLog10(marginalOverChangedHV,0);
+
+ return MathUtils.normalizeFromLog10(recalcPosteriors,true,true);
+ }
+
+ public void initialize(final double DNprior, final Set vcfSamples, final Map> families){
this.deNovoPrior = DNprior;
+ Arrays.fill(configurationLikelihoodsMatrix,0);
buildMatrices();
trios = setTrios(vcfSamples, families);
}
- public GenotypesContext calculatePosteriorGLs(VariantContext vc){
+ public GenotypesContext calculatePosteriorGLs(final VariantContext vc){
final GenotypesContext genotypesContext = GenotypesContext.copy(vc.getGenotypes());
for (final Sample sample : trios) {
@@ -312,30 +253,28 @@ public class FamilyLikelihoodsUtils {
//Keep only trios and parent/child pairs
if(mother == null && father == null || child == null) {
- logger.warn("null genos in var "+vc.toStringDecodeGenotypes());
+ logger.warn("Null genotypes in variant: "+vc.toStringDecodeGenotypes());
continue;
}
final ArrayList trioGenotypes = new ArrayList(3);
- final int mvCount = updateFamilyGenotypes(vc.getReference(), vc.getAltAlleleWithHighestAlleleCount(), mother, father, child, trioGenotypes);
+ updateFamilyGenotypes(vc, mother, father, child, trioGenotypes);
- Genotype updatedMother = trioGenotypes.get(0);
- Genotype updatedFather = trioGenotypes.get(1);
- Genotype updatedChild = trioGenotypes.get(2);
-
- genotypesContext.replace(updatedChild);
- genotypesContext.replace(updatedFather);
- genotypesContext.replace(updatedMother);
+ //replace uses sample names to match genotypes, so order doesn't matter
+ if (trioGenotypes.size() > 0) {
+ genotypesContext.replace(trioGenotypes.get(0));
+ genotypesContext.replace(trioGenotypes.get(1));
+ genotypesContext.replace(trioGenotypes.get(2));
+ }
}
- return genotypesContext;
+ return genotypesContext;
}
/**
* Select trios and parent/child pairs only
*/
private ArrayList setTrios(Set vcfSamples, Map> families){
-
Set family;
ArrayList parents;
final ArrayList trios = new ArrayList();
@@ -365,18 +304,14 @@ public class FamilyLikelihoodsUtils {
return trios;
}
- //Create the transmission matrices
- //TODO: pass in the real genotypes so we have that info
+ //Create a lookup matrix to find the number of MVs for each family genotype combination
private void buildMatrices(){
for(final GenotypeType mother : GenotypeType.values()){
mvCountMatrix.put(mother,new EnumMap>(GenotypeType.class));
- transmissionMatrix.put(mother,new EnumMap>(GenotypeType.class));
for(final GenotypeType father : GenotypeType.values()){
mvCountMatrix.get(mother).put(father,new EnumMap(GenotypeType.class));
- transmissionMatrix.get(mother).put(father,new EnumMap(GenotypeType.class));
for(final GenotypeType child : GenotypeType.values()){
mvCountMatrix.get(mother).get(father).put(child, getCombinationMVCount(mother, father, child));
- transmissionMatrix.get(mother).get(father).put(child,new TrioGenotypes(mother,father,child));
}
}
}
@@ -442,179 +377,54 @@ public class FamilyLikelihoodsUtils {
/**
* Updates the genotypes of the given trio. If one of the parents is null, it is considered a parent/child pair.
- * @param ref: Reference allele
- * @param alt: Alternative allele
+ * @param vc: Input variant context
* @param mother: Mother's genotype from vc input
* @param father: Father's genotype from vc input
* @param child: Child's genotype from vc input
* @param finalGenotypes: An ArrayList containing the updated genotypes
- * @return
*/
- private int updateFamilyGenotypes(Allele ref, Allele alt, Genotype mother, Genotype father, Genotype child, ArrayList finalGenotypes) {
+ private void updateFamilyGenotypes(VariantContext vc, Genotype mother, Genotype father, Genotype child, ArrayList finalGenotypes) {
- //Check whether it is a pair or trio
- //Always assign the first parent as the parent having genotype information in pairs
- //Always assign the mother as the first parent in trios
- int parentsCalled = 0;
- Map firstParentLikelihoods;
- Map secondParentLikelihoods;
- final ArrayList bestFirstParentGenotype = new ArrayList();
- final ArrayList bestSecondParentGenotype = new ArrayList();
- final ArrayList bestChildGenotype = new ArrayList();
- GenotypeType pairSecondParentGenotype = null;
- boolean parentsAreFlipped = false; //usually mother comes first, like for indexing of transmissionMatrix
- final int INVALID_INDEX = -1;
-
- //if only one parent is called, make uncalled parent the secondParent
- if(mother == null || !mother.isCalled()){
- firstParentLikelihoods = getLikelihoodsAsMapSafeNull(father);
- secondParentLikelihoods = getLikelihoodsAsMapSafeNull(mother);
- bestFirstParentGenotype.add(getTypeSafeNull(father));
- bestSecondParentGenotype.add(getTypeSafeNull(mother));
- pairSecondParentGenotype = mother == null ? GenotypeType.UNAVAILABLE : mother.getType();
- parentsAreFlipped = true;
- if(father != null && father.isCalled())
- parentsCalled = 1;
- }
- else{
- firstParentLikelihoods = getLikelihoodsAsMapSafeNull(mother);
- secondParentLikelihoods = getLikelihoodsAsMapSafeNull(father);
- bestFirstParentGenotype.add(getTypeSafeNull(mother));
- bestSecondParentGenotype.add(getTypeSafeNull(father));
- if(father == null || !father.isCalled()){
- parentsCalled = 1;
- pairSecondParentGenotype = father == null ? GenotypeType.UNAVAILABLE : father.getType();
- }else{
- parentsCalled = 2;
- }
- }
+ //If one of the parents is not called, fill in with uninformative likelihoods
+ Map motherLikelihoods = getLikelihoodsAsMapSafeNull(mother);
+ Map fatherLikelihoods = getLikelihoodsAsMapSafeNull(father);
Map childLikelihoods = getLikelihoodsAsMapSafeNull(child);
- bestChildGenotype.add(getTypeSafeNull(child));
- //Prior vars
- double bestConfigurationLikelihood = 0.0;
- double norm = 0.0;
- int configuration_index =0;
- final ArrayList bestMVCount = new ArrayList();
- bestMVCount.add(0);
+ //if the child isn't called or neither parent is called, there's no extra inheritance information in that trio so return
+ if (!hasCalledGT(child.getType()) || (!hasCalledGT(mother.getType()) && !hasCalledGT(father.getType())))
+ return;
- //Get the most likely combination
- //Only check for most likely combination if at least a parent and the child have genotypes
+ //Fill the configurationLikelihoodsMatrix for each genotype combination
int matInd;
- if(child.isCalled() && parentsCalled > 0){
- int mvCount;
- int cumulativeMVCount = 0;
- double configurationLikelihood = 0;
- for(final Map.Entry childGenotype :
- childLikelihoods.entrySet()){
- for(final Map.Entry firstParentGenotype :
- firstParentLikelihoods.entrySet()){
- for(final Map.Entry secondParentGenotype :
- secondParentLikelihoods.entrySet()){
- mvCount = mvCountMatrix.get(firstParentGenotype.getKey()).get(secondParentGenotype.getKey()).get(childGenotype.getKey());
- //For parent/child pairs, sum over the possible genotype configurations of the missing parent
- if(parentsCalled<2){
- cumulativeMVCount += mvCount;
- configurationLikelihood += mvCount>0 ? Math.pow(deNovoPrior,mvCount)*firstParentGenotype.getValue()*secondParentGenotype.getValue()*childGenotype.getValue() : (1.0-10*deNovoPrior-deNovoPrior*deNovoPrior)*firstParentGenotype.getValue()*secondParentGenotype.getValue()*childGenotype.getValue();
- }
- //Evaluate configurations of trios
- else{
- configurationLikelihood = mvCount>0 ? Math.pow(deNovoPrior,mvCount)*firstParentGenotype.getValue()*secondParentGenotype.getValue()*childGenotype.getValue() : (1.0-10*deNovoPrior-deNovoPrior*deNovoPrior)*firstParentGenotype.getValue()*secondParentGenotype.getValue()*childGenotype.getValue();
- norm += configurationLikelihood;
- matInd = getLikelihoodIndex(firstParentGenotype.getKey(), secondParentGenotype.getKey(),childGenotype.getKey(),parentsAreFlipped);
- if (matInd > INVALID_INDEX) //still a slim chance of a MIXED GT
- configurationLikelihoodsMatrix[matInd] = configurationLikelihood;
- //Keep this combination if
- //It has a better likelihood
- //Or it has the same likelihood but requires less changes from original genotypes
- if (configurationLikelihood > bestConfigurationLikelihood){
- bestConfigurationLikelihood = configurationLikelihood;
- bestMVCount.clear();
- bestMVCount.add(mvCount);
- bestFirstParentGenotype.clear();
- bestFirstParentGenotype.add(firstParentGenotype.getKey());
- bestSecondParentGenotype.clear();
- bestSecondParentGenotype.add(secondParentGenotype.getKey());
- bestChildGenotype.clear();
- bestChildGenotype.add(childGenotype.getKey());
- }
- else if(configurationLikelihood == bestConfigurationLikelihood) {
- bestFirstParentGenotype.add(firstParentGenotype.getKey());
- bestSecondParentGenotype.add(secondParentGenotype.getKey());
- bestChildGenotype.add(childGenotype.getKey());
- bestMVCount.add(mvCount);
- }
- }
- }
- //Evaluate configurations of parent/child pairs
- if(parentsCalled<2){
- norm += configurationLikelihood;
- matInd = getLikelihoodIndex(firstParentGenotype.getKey(), GenotypeType.HOM_REF,childGenotype.getKey(), parentsAreFlipped);
- if (matInd > INVALID_INDEX)
- configurationLikelihoodsMatrix[matInd] = configurationLikelihood;
- matInd = getLikelihoodIndex(firstParentGenotype.getKey(), GenotypeType.HET,childGenotype.getKey(),parentsAreFlipped);
- if (matInd > INVALID_INDEX)
- configurationLikelihoodsMatrix[matInd] = configurationLikelihood;
- matInd = getLikelihoodIndex(firstParentGenotype.getKey(), GenotypeType.HOM_VAR,childGenotype.getKey(),parentsAreFlipped);
- if (matInd > INVALID_INDEX)
- configurationLikelihoodsMatrix[matInd] = configurationLikelihood;
-
- //Keep this combination if
- //It has a better likelihood
- //Or it has the same likelihood but requires less changes from original genotypes
- if (configurationLikelihood > bestConfigurationLikelihood){
- bestConfigurationLikelihood = configurationLikelihood;
- bestMVCount.clear();
- bestMVCount.add(cumulativeMVCount/3);
- bestChildGenotype.clear();
- bestFirstParentGenotype.clear();
- bestSecondParentGenotype.clear();
- bestChildGenotype.add(childGenotype.getKey());
- bestFirstParentGenotype.add(firstParentGenotype.getKey());
- bestSecondParentGenotype.add(pairSecondParentGenotype);
- }
- else if(configurationLikelihood == bestConfigurationLikelihood) {
- bestFirstParentGenotype.add(firstParentGenotype.getKey());
- bestSecondParentGenotype.add(pairSecondParentGenotype);
- bestChildGenotype.add(childGenotype.getKey());
- bestMVCount.add(cumulativeMVCount/3);
- }
- configurationLikelihood = 0;
- }
+ int mvCount;
+ double jointLikelihood;
+ double mvCoeff;
+ double configurationLikelihood;
+ for(final Map.Entry childGenotype :
+ childLikelihoods.entrySet()){
+ for(final Map.Entry motherGenotype :
+ motherLikelihoods.entrySet()){
+ for(final Map.Entry fatherGenotype :
+ fatherLikelihoods.entrySet()){
+ mvCount = mvCountMatrix.get(motherGenotype.getKey()).get(fatherGenotype.getKey()).get(childGenotype.getKey());
+ jointLikelihood = motherGenotype.getValue()+fatherGenotype.getValue()+childGenotype.getValue();
+ mvCoeff = mvCount>0 ? Math.pow(deNovoPrior,mvCount) : (1.0-10*deNovoPrior-deNovoPrior*deNovoPrior);
+ configurationLikelihood = Math.log10(mvCoeff) + jointLikelihood;
+ matInd = getLikelihoodMatrixIndex(motherGenotype.getKey(), fatherGenotype.getKey(), childGenotype.getKey());
+ configurationLikelihoodsMatrix[matInd] = configurationLikelihood;
}
}
-
- //normalize the best configuration probability
- bestConfigurationLikelihood = bestConfigurationLikelihood / norm;
-
- //In case of multiple equally likely combinations, take a random one
- if(bestFirstParentGenotype.size()>1){
- configuration_index = rand.nextInt(bestFirstParentGenotype.size()-1);
- }
-
- }
- else{
- bestConfigurationLikelihood = NO_TRANSMISSION_PROB;
}
- TrioGenotypes updatedTrioGenotypes;
- if(parentsCalled < 2 && mother == null || !mother.isCalled())
- updatedTrioGenotypes = transmissionMatrix.get(bestSecondParentGenotype.get(configuration_index)).get(bestFirstParentGenotype.get(configuration_index)).get(bestChildGenotype.get(configuration_index));
- else
- updatedTrioGenotypes = transmissionMatrix.get(bestFirstParentGenotype.get(configuration_index)).get(bestSecondParentGenotype.get(configuration_index)).get(bestChildGenotype.get(configuration_index));
-
- //Return the updated genotypes
- updatedTrioGenotypes.setMVcountMatrix(mvCountMatrix);
- updatedTrioGenotypes.getUpdatedGenotypes(ref, alt, mother, father, child, bestConfigurationLikelihood, configurationLikelihoodsMatrix, finalGenotypes);
- return bestMVCount.get(configuration_index);
-
+ getUpdatedGenotypes(vc, mother, father, child, finalGenotypes);
}
- //Get a Map of genotype likelihoods, normalized from log10-space.
- //In case of null, unavailable or no call, all likelihoods are 1/3.
+ //Get a Map of genotype (log10)likelihoods
private EnumMap getLikelihoodsAsMapSafeNull(Genotype genotype){
- if (genotype != null && genotype.isCalled() && genotype.hasExtendedAttribute(PHRED_SCALED_POSTERIORS_KEY)) {
- final EnumMap likelihoodsMap = new EnumMap(GenotypeType.class);
+ final EnumMap likelihoodsMap = new EnumMap(GenotypeType.class);
+ double[] likelihoods;
+
+ if (genotype != null && hasCalledGT(genotype.getType()) && genotype.hasExtendedAttribute(PHRED_SCALED_POSTERIORS_KEY)) {
Object GPfromVCF = genotype.getExtendedAttribute(PHRED_SCALED_POSTERIORS_KEY);
//parse the GPs into a vector of probabilities
final String[] likelihoodsAsStringVector = ((String)GPfromVCF).split(",");
@@ -622,46 +432,35 @@ public class FamilyLikelihoodsUtils {
for ( int i = 0; i < likelihoodsAsStringVector.length; i++ ) {
likelihoodsAsVector[i] = Double.parseDouble(likelihoodsAsStringVector[i]) / -10.0;
}
- double[] likelihoods = GeneralUtils.normalizeFromLog10(likelihoodsAsVector);
- likelihoodsMap.put(GenotypeType.HOM_REF,likelihoods[GenotypeType.HOM_REF.ordinal()-1]);
- likelihoodsMap.put(GenotypeType.HET,likelihoods[GenotypeType.HET.ordinal()-1]);
- likelihoodsMap.put(GenotypeType.HOM_VAR, likelihoods[GenotypeType.HOM_VAR.ordinal() - 1]);
- return likelihoodsMap;
+ //keep in log10 space for large GQs
+ likelihoods = GeneralUtils.normalizeFromLog10(likelihoodsAsVector, true, true);
}
- if(genotype == null || !genotype.isCalled() || genotype.getLikelihoods() == null){
- final EnumMap likelihoods = new EnumMap(GenotypeType.class);
- likelihoods.put(GenotypeType.HOM_REF,1.0/3.0);
- likelihoods.put(GenotypeType.HET,1.0/3.0);
- likelihoods.put(GenotypeType.HOM_VAR,1.0/3.0);
- return likelihoods;
+ //In case of null, unavailable or no call, all likelihoods are log10(1/3)
+ else if(genotype == null || !hasCalledGT(genotype.getType()) || genotype.getLikelihoods() == null){
+ likelihoods = new double[3];
+ likelihoods[0] = LOG10_OF_ONE_THIRD;
+ likelihoods[1] = LOG10_OF_ONE_THIRD;
+ likelihoods[2] = LOG10_OF_ONE_THIRD;
}
- return genotype.getLikelihoods().getAsMap(true);
+
+ //No posteriors in VC, use PLs
+ else
+ likelihoods = GeneralUtils.normalizeFromLog10(genotype.getLikelihoods().getAsVector(),true,true);
+
+ likelihoodsMap.put(GenotypeType.HOM_REF,likelihoods[genotypeTypeToValue(GenotypeType.HOM_REF)]);
+ likelihoodsMap.put(GenotypeType.HET,likelihoods[genotypeTypeToValue(GenotypeType.HET)]);
+ likelihoodsMap.put(GenotypeType.HOM_VAR, likelihoods[genotypeTypeToValue(GenotypeType.HOM_VAR)]);
+ return likelihoodsMap;
}
- //Returns the GenotypeType; returns UNAVAILABLE if given null
- private GenotypeType getTypeSafeNull(Genotype genotype){
- if(genotype == null)
- return GenotypeType.UNAVAILABLE;
- return genotype.getType();
- }
-
- private int getLikelihoodIndex(GenotypeType firstParent, GenotypeType secondParent, GenotypeType child, boolean parentsAreFlipped){
- int childInd = genotypeTypeValue(child);
+ private int getLikelihoodMatrixIndex(GenotypeType mother, GenotypeType father, GenotypeType child){
+ int childInd = genotypeTypeToValue(child);
int motherInd;
int fatherInd;
- final int NUM_CALLED_GENOTYPETYPES = 3;
final int INVALID = -1;
- if (parentsAreFlipped)
- {
- motherInd = genotypeTypeValue(secondParent);
- fatherInd = genotypeTypeValue(firstParent);
- }
- else {
- motherInd = genotypeTypeValue(firstParent);
- fatherInd = genotypeTypeValue(secondParent);
- }
-
+ motherInd = genotypeTypeToValue(mother);
+ fatherInd = genotypeTypeToValue(father);
if (childInd == INVALID || motherInd == INVALID || fatherInd == INVALID) //any of the genotypes are NO_CALL, UNAVAILABLE or MIXED
return INVALID;
@@ -670,11 +469,16 @@ public class FamilyLikelihoodsUtils {
return motherInd*NUM_CALLED_GENOTYPETYPES*NUM_CALLED_GENOTYPETYPES + fatherInd*NUM_CALLED_GENOTYPETYPES + childInd;
}
- private int genotypeTypeValue(GenotypeType input){
+ private int genotypeTypeToValue(GenotypeType input){
if (input == GenotypeType.HOM_REF) return 0;
if (input == GenotypeType.HET) return 1;
if (input == GenotypeType.HOM_VAR) return 2;
return -1;
}
+ //this excludes mixed genotypes, whereas the htsjdk Genotype.isCalled() will return true if the GenotypeType is mixed
+ private boolean hasCalledGT(GenotypeType genotype){
+ return genotype == GenotypeType.HOM_REF || genotype == GenotypeType.HET || genotype == GenotypeType.HOM_VAR;
+ }
+
}
\ No newline at end of file
diff --git a/protected/gatk-tools-protected/src/main/java/org/broadinstitute/gatk/tools/walkers/variantutils/PosteriorLikelihoodsUtils.java b/protected/gatk-tools-protected/src/main/java/org/broadinstitute/gatk/tools/walkers/variantutils/PosteriorLikelihoodsUtils.java
index d8db1036c..60a6c9da2 100644
--- a/protected/gatk-tools-protected/src/main/java/org/broadinstitute/gatk/tools/walkers/variantutils/PosteriorLikelihoodsUtils.java
+++ b/protected/gatk-tools-protected/src/main/java/org/broadinstitute/gatk/tools/walkers/variantutils/PosteriorLikelihoodsUtils.java
@@ -65,15 +65,17 @@ public class PosteriorLikelihoodsUtils {
final double globalFrequencyPriorDirichlet,
final boolean useInputSamples,
final boolean useAC,
- final boolean calcMissing) {
+ final boolean useACoff) {
final Map totalAlleleCounts = new HashMap<>();
boolean nonSNPprior = false;
if (vc1 == null) throw new IllegalArgumentException("VariantContext vc1 is null");
final boolean nonSNPeval = !vc1.isSNP();
final double[] alleleCounts = new double[vc1.getNAlleles()];
+ //only use discovered allele count if there are at least 10 samples
+ final boolean useDiscoveredAC = !useACoff && vc1.getNSamples() >= 10;
- if(!nonSNPeval)
+ if(vc1.isSNP())
{
//store the allele counts for each allele in the variant priors
for ( final VariantContext resource : resources ) {
@@ -111,7 +113,7 @@ public class PosteriorLikelihoodsUtils {
//parse the PPs into a vector of probabilities
if (PPfromVCF instanceof String) {
final String PPstring = (String)PPfromVCF;
- if (PPstring.charAt(0)=='.') //samples not in trios will have PP tag like ".,.,." after family priors are applied
+ if (PPstring.charAt(0)=='.') //samples not in trios will have PP tag like ".,.,." if family priors are applied
likelihoods.add(genotype.hasLikelihoods() ? genotype.getLikelihoods().getAsVector() : null );
else {
final String[] likelihoodsAsStringVector = PPstring.split(",");
@@ -135,7 +137,7 @@ public class PosteriorLikelihoodsUtils {
}
//TODO: for now just use priors that are SNPs because indel priors will bias SNP calls
- final boolean useFlatPriors = nonSNPeval || nonSNPprior || (resources.isEmpty() && !calcMissing);
+ final boolean useFlatPriors = nonSNPeval || nonSNPprior || (resources.isEmpty() && !useDiscoveredAC);
final List posteriors = calculatePosteriorGLs(likelihoods,alleleCounts,vc1.getMaxPloidy(2), useFlatPriors);
diff --git a/protected/gatk-tools-protected/src/test/java/org/broadinstitute/gatk/tools/walkers/variantutils/CalculateGenotypePosteriorsIntegrationTest.java b/protected/gatk-tools-protected/src/test/java/org/broadinstitute/gatk/tools/walkers/variantutils/CalculateGenotypePosteriorsIntegrationTest.java
index c5d354356..459afd804 100644
--- a/protected/gatk-tools-protected/src/test/java/org/broadinstitute/gatk/tools/walkers/variantutils/CalculateGenotypePosteriorsIntegrationTest.java
+++ b/protected/gatk-tools-protected/src/test/java/org/broadinstitute/gatk/tools/walkers/variantutils/CalculateGenotypePosteriorsIntegrationTest.java
@@ -60,7 +60,7 @@ public class CalculateGenotypePosteriorsIntegrationTest extends WalkerTest {
@Test(enabled = true)
public void testUsingDiscoveredAF() {
WalkerTestSpec spec = new WalkerTestSpec(
- "-T CalculateGenotypePosteriors --no_cmdline_in_header -calcMissing" +
+ "-T CalculateGenotypePosteriors --no_cmdline_in_header" +
" -o %s" +
" -R " + b37KGReference +
" -L 20:10,000,000-10,100,000" +
@@ -73,7 +73,7 @@ public class CalculateGenotypePosteriorsIntegrationTest extends WalkerTest {
@Test(enabled = true)
public void testMissingPriors() {
WalkerTestSpec spec = new WalkerTestSpec(
- "-T CalculateGenotypePosteriors --no_cmdline_in_header" +
+ "-T CalculateGenotypePosteriors --no_cmdline_in_header -useACoff" +
" -o %s" +
" -R " + b37KGReference +
" -L 20:10,000,000-10,100,000" +
@@ -86,7 +86,7 @@ public class CalculateGenotypePosteriorsIntegrationTest extends WalkerTest {
@Test(enabled = true)
public void testInputINDELs() {
WalkerTestSpec spec = new WalkerTestSpec(
- "-T CalculateGenotypePosteriors --no_cmdline_in_header" +
+ "-T CalculateGenotypePosteriors --no_cmdline_in_header -useACoff" +
" -o %s" +
" -R " + b37KGReference +
" -L 20:10,000,000-10,100,000" +
@@ -100,14 +100,14 @@ public class CalculateGenotypePosteriorsIntegrationTest extends WalkerTest {
@Test(enabled = true)
public void testFamilyPriors() {
WalkerTestSpec spec = new WalkerTestSpec(
- "-T CalculateGenotypePosteriors --no_cmdline_in_header" +
+ "-T CalculateGenotypePosteriors --no_cmdline_in_header -useACoff" +
" -o %s" +
" -R " + b37KGReference +
" -ped " + CEUtrioFamilyFile +
" -V " + CEUtrioTest +
" -supporting " + CEUtrioPopPriorsTest,
1,
- Arrays.asList("a22c81f0609c9f43578054661797395b"));
+ Arrays.asList("781f85f56dac9074c96ace31b09e0f59"));
executeTest("testFamilyPriors", spec);
}
diff --git a/protected/gatk-tools-protected/src/test/java/org/broadinstitute/gatk/tools/walkers/variantutils/PosteriorLikelihoodsUtilsUnitTest.java b/protected/gatk-tools-protected/src/test/java/org/broadinstitute/gatk/tools/walkers/variantutils/PosteriorLikelihoodsUtilsUnitTest.java
index 977f0e290..c415fcf0d 100644
--- a/protected/gatk-tools-protected/src/test/java/org/broadinstitute/gatk/tools/walkers/variantutils/PosteriorLikelihoodsUtilsUnitTest.java
+++ b/protected/gatk-tools-protected/src/test/java/org/broadinstitute/gatk/tools/walkers/variantutils/PosteriorLikelihoodsUtilsUnitTest.java
@@ -136,13 +136,22 @@ public class PosteriorLikelihoodsUtilsUnitTest extends BaseTest {
private void testCalculatePosteriorNoExternalData() {
VariantContext test1 = makeVC("1",Arrays.asList(Aref,T), makeG("s1",Aref,T,20,0,10),
makeG("s2",T,T,60,40,0),
- makeG("s3",Aref,Aref,0,30,90));
- test1 = new VariantContextBuilder(test1).attribute(VCFConstants.MLE_ALLELE_COUNT_KEY,3).make();
- VariantContext test1result = PosteriorLikelihoodsUtils.calculatePosteriorGLs(test1, new ArrayList(), 0, 0.001, true, false, true);
- Genotype test1exp1 = makeGwithPLs("s1",Aref,T,new double[]{-2.20686, -0.03073215, -1.20686});
+ makeG("s3",Aref,Aref,0,30,90),
+ makeG("s4",Aref,T,20,0,10),
+ makeG("s5",T,T,60,40,0),
+ makeG("s6",Aref,Aref,0,30,90),
+ makeG("s7",Aref,T,20,0,10),
+ makeG("s8",T,T,60,40,0),
+ makeG("s9",Aref,Aref,0,30,90),
+ makeG("s10",Aref,T,20,0,10),
+ makeG("s11",T,T,60,40,0),
+ makeG("s12",Aref,Aref,0,30,90));
+ test1 = new VariantContextBuilder(test1).attribute(VCFConstants.MLE_ALLELE_COUNT_KEY,12).make();
+ VariantContext test1result = PosteriorLikelihoodsUtils.calculatePosteriorGLs(test1, new ArrayList(), 0, 0.001, true, false, false);
+ Genotype test1exp1 = makeGwithPLs("s1",Aref,T,new double[]{-2.26110257, -0.02700903, -1.26110257});
Assert.assertTrue(test1exp1.hasPL());
- Genotype test1exp2 = makeGwithPLs("s2",T,T,new double[]{-6.000066, -3.823938, -6.557894e-05});
- Genotype test1exp3 = makeGwithPLs("s3",Aref,Aref,new double[]{-0.0006510083, -2.824524, -9.000651});
+ Genotype test1exp2 = makeGwithPLs("s2",T,T,new double[]{-6.000075e+00, -3.765981e+00, -7.488009e-05});
+ Genotype test1exp3 = makeGwithPLs("s3",Aref,Aref,new double[]{-0.0007438855, -2.7666503408, -9.0007438855});
Assert.assertEquals("java.util.ArrayList",test1result.getGenotype(0).getAnyAttribute(PHRED_SCALED_POSTERIORS_KEY).getClass().getCanonicalName());
Assert.assertEquals(arraysEq(test1exp1.getPL(), _mleparse((List)test1result.getGenotype(0).getAnyAttribute(PHRED_SCALED_POSTERIORS_KEY))), "");
Assert.assertEquals(arraysEq(test1exp2.getPL(),_mleparse((List)test1result.getGenotype(1).getAnyAttribute(PHRED_SCALED_POSTERIORS_KEY))), "");
@@ -154,13 +163,21 @@ public class PosteriorLikelihoodsUtilsUnitTest extends BaseTest {
makeG("s1",Aref,T,30,10,60,0,15,90),
makeG("s2",Aref,C,40,0,10,30,40,80),
makeG("s3",Aref,Aref,0,5,8,15,20,40),
- makeG("s4",C,T,80,40,12,20,0,10));
- test2 = new VariantContextBuilder(test2).attribute(VCFConstants.MLE_ALLELE_COUNT_KEY,new ArrayList(Arrays.asList(2,2))).make();
- VariantContext test2result = PosteriorLikelihoodsUtils.calculatePosteriorGLs(test2,new ArrayList(),5,0.001,true,false, true);
- Genotype test2exp1 = makeGwithPLs("s1",Aref,T,new double[]{-2.647372, -1.045139, -6.823193, -0.04513873, -2.198182, -9.823193});
- Genotype test2exp2 = makeGwithPLs("s2",Aref,C,new double[]{-3.609957, -0.007723248, -1.785778, -3.007723, -4.660767, -8.785778});
- Genotype test2exp3 = makeGwithPLs("s3",Aref,Aref,new double[] {-0.06094877, -0.9587151, -2.03677,-1.958715, -3.111759, -5.23677});
- Genotype test2exp4 = makeGwithPLs("s4",C,T,new double[]{-7.016534, -3.4143, -1.392355, -1.4143, -0.06734388, -1.192355});
+ makeG("s4",C,T,80,40,12,20,0,10),
+ makeG("s5",Aref,T,30,10,60,0,15,90),
+ makeG("s6",Aref,C,40,0,10,30,40,80),
+ makeG("s7",Aref,Aref,0,5,8,15,20,40),
+ makeG("s8",C,T,80,40,12,20,0,10),
+ makeG("s9",Aref,T,30,10,60,0,15,90),
+ makeG("s10",Aref,C,40,0,10,30,40,80),
+ makeG("s11",Aref,Aref,0,5,8,15,20,40),
+ makeG("s12",C,T,80,40,12,20,0,10));
+ test2 = new VariantContextBuilder(test2).attribute(VCFConstants.MLE_ALLELE_COUNT_KEY,new ArrayList(Arrays.asList(6,6))).make();
+ VariantContext test2result = PosteriorLikelihoodsUtils.calculatePosteriorGLs(test2,new ArrayList(),5,0.001,true,false, false);
+ Genotype test2exp1 = makeGwithPLs("s1",Aref,T,new double[]{-2.823957, -1.000000, -6.686344, 0.000000, -1.952251, -9.686344});
+ Genotype test2exp2 = makeGwithPLs("s2",Aref,C,new double[]{-3.823957, 0.000000, -1.686344, -3.000000, -4.452251, -8.686344});
+ Genotype test2exp3 = makeGwithPLs("s3",Aref,Aref,new double[] {0.000000, -0.676043, -1.662387, -1.676043, -2.628294, -4.862387});
+ Genotype test2exp4 = makeGwithPLs("s4",C,T,new double[]{-7.371706, -3.547749, -1.434094, -1.547749, 0.000000, -1.234094});
Assert.assertEquals(arraysEq(test2exp1.getPL(),(int[]) _mleparse((List)test2result.getGenotype(0).getAnyAttribute(PHRED_SCALED_POSTERIORS_KEY))), "");
Assert.assertEquals(arraysEq(test2exp2.getPL(),(int[]) _mleparse((List)test2result.getGenotype(1).getAnyAttribute(PHRED_SCALED_POSTERIORS_KEY))), "");
Assert.assertEquals(arraysEq(test2exp3.getPL(),(int[]) _mleparse((List)test2result.getGenotype(2).getAnyAttribute(PHRED_SCALED_POSTERIORS_KEY))), "");
@@ -178,7 +195,7 @@ public class PosteriorLikelihoodsUtilsUnitTest extends BaseTest {
supplTest1.add(makeVC("4",Arrays.asList(Aref,T),
makeG("s_1",T,T),
makeG("s_2",Aref,T)));
- VariantContext test1result = PosteriorLikelihoodsUtils.calculatePosteriorGLs(testOverlappingBase,supplTest1,0,0.001,true,false, true);
+ VariantContext test1result = PosteriorLikelihoodsUtils.calculatePosteriorGLs(testOverlappingBase,supplTest1,0,0.001,true,false, false);
// the counts here are ref=30, alt=14
Genotype test1exp1 = makeGwithPLs("t1",T,T,new double[]{-3.370985, -1.415172, -0.01721766});
Genotype test1exp2 = makeGwithPLs("t2",Aref,T,new double[]{-1.763792, -0.007978791, -3.010024});
@@ -189,7 +206,7 @@ public class PosteriorLikelihoodsUtilsUnitTest extends BaseTest {
VariantContext testNonOverlapping = makeVC("1", Arrays.asList(Aref,T), makeG("s1",T,T,3,1,0));
List other = Arrays.asList(makeVC("2",Arrays.asList(Aref,C),makeG("s2",C,C,10,2,0)));
- VariantContext test2result = PosteriorLikelihoodsUtils.calculatePosteriorGLs(testNonOverlapping,other,0,0.001,true,false,true);
+ VariantContext test2result = PosteriorLikelihoodsUtils.calculatePosteriorGLs(testNonOverlapping,other,0,0.001,true,false,false);
Genotype test2exp1 = makeGwithPLs("SGV",T,T,new double[]{-4.078345, -3.276502, -0.0002661066});
Assert.assertEquals(arraysEq(test2exp1.getPL(),_mleparse((List) test2result.getGenotype(0).getAnyAttribute(PHRED_SCALED_POSTERIORS_KEY))), "");
}
@@ -199,7 +216,7 @@ public class PosteriorLikelihoodsUtilsUnitTest extends BaseTest {
VariantContext testOverlappingBase = makeVC("1", Arrays.asList(Aref,T), makeG("s1",T,T,40,1,0));
List supplTest1 = new ArrayList<>(1);
supplTest1.add(new VariantContextBuilder(makeVC("2",Arrays.asList(Aref,T))).attribute(VCFConstants.MLE_ALLELE_COUNT_KEY,500).attribute(VCFConstants.ALLELE_NUMBER_KEY,1000).make());
- VariantContext test1result = PosteriorLikelihoodsUtils.calculatePosteriorGLs(testOverlappingBase,supplTest1,0,0.001,true,false,true);
+ VariantContext test1result = PosteriorLikelihoodsUtils.calculatePosteriorGLs(testOverlappingBase,supplTest1,0,0.001,true,false,false);
int[] GP = _mleparse( (List)test1result.getGenotype(0).getAnyAttribute(PHRED_SCALED_POSTERIORS_KEY));
Assert.assertTrue(GP[2] > GP[1]);
@@ -210,7 +227,7 @@ public class PosteriorLikelihoodsUtilsUnitTest extends BaseTest {
VariantContext testOverlappingBase = makeVC("1", Arrays.asList(Aref,T), makeG("s1",T,T,40,0,1));
List supplTest1 = new ArrayList<>(1);
supplTest1.add(new VariantContextBuilder(makeVC("2",Arrays.asList(Aref,T))).attribute(VCFConstants.MLE_ALLELE_COUNT_KEY,900).attribute(VCFConstants.ALLELE_NUMBER_KEY,1000).make());
- VariantContext test1result = PosteriorLikelihoodsUtils.calculatePosteriorGLs(testOverlappingBase,supplTest1,0,0.001,true,false,true);
+ VariantContext test1result = PosteriorLikelihoodsUtils.calculatePosteriorGLs(testOverlappingBase,supplTest1,0,0.001,true,false,false);
int[] GP = _mleparse( (List)test1result.getGenotype(0).getAnyAttribute(PHRED_SCALED_POSTERIORS_KEY));
Assert.assertTrue(GP[2] < GP[1]);
@@ -221,7 +238,7 @@ public class PosteriorLikelihoodsUtilsUnitTest extends BaseTest {
VariantContext testOverlappingBase = makeVC("1", Arrays.asList(Aref,T), makeG("s1",T,T,0,1,40));
List supplTest1 = new ArrayList<>(1);
supplTest1.add(new VariantContextBuilder(makeVC("2",Arrays.asList(Aref,T))).attribute(VCFConstants.MLE_ALLELE_COUNT_KEY,500).attribute(VCFConstants.ALLELE_NUMBER_KEY,1000).make());
- VariantContext test1result = PosteriorLikelihoodsUtils.calculatePosteriorGLs(testOverlappingBase,supplTest1,0,0.001,true,false,true);
+ VariantContext test1result = PosteriorLikelihoodsUtils.calculatePosteriorGLs(testOverlappingBase,supplTest1,0,0.001,true,false,false);
int[] GP = _mleparse( (List)test1result.getGenotype(0).getAnyAttribute(PHRED_SCALED_POSTERIORS_KEY));
Assert.assertTrue(GP[0] > GP[1]);
@@ -232,7 +249,7 @@ public class PosteriorLikelihoodsUtilsUnitTest extends BaseTest {
VariantContext testOverlappingBase = makeVC("1", Arrays.asList(Aref,T), makeG("s1",T,T,1,0,40));
List supplTest1 = new ArrayList<>(1);
supplTest1.add(new VariantContextBuilder(makeVC("2",Arrays.asList(Aref,T))).attribute(VCFConstants.MLE_ALLELE_COUNT_KEY,100).attribute(VCFConstants.ALLELE_NUMBER_KEY,1000).make());
- VariantContext test1result = PosteriorLikelihoodsUtils.calculatePosteriorGLs(testOverlappingBase,supplTest1,0,0.001,true,false,true);
+ VariantContext test1result = PosteriorLikelihoodsUtils.calculatePosteriorGLs(testOverlappingBase,supplTest1,0,0.001,true,false,false);
int[] GP = _mleparse( (List)test1result.getGenotype(0).getAnyAttribute(PHRED_SCALED_POSTERIORS_KEY));
Assert.assertTrue(GP[0] < GP[1]);
@@ -245,7 +262,7 @@ public class PosteriorLikelihoodsUtilsUnitTest extends BaseTest {
makeG("s3",Aref,T,22,0,12));
List supplTest1 = new ArrayList<>(1);
supplTest1.add(new VariantContextBuilder(makeVC("2",Arrays.asList(Aref,T))).attribute(VCFConstants.MLE_ALLELE_COUNT_KEY,11).attribute(VCFConstants.ALLELE_NUMBER_KEY,10).make());
- VariantContext test1result = PosteriorLikelihoodsUtils.calculatePosteriorGLs(testOverlappingBase,supplTest1,0,0.001,true,false,true);
+ VariantContext test1result = PosteriorLikelihoodsUtils.calculatePosteriorGLs(testOverlappingBase,supplTest1,0,0.001,true,false,false);
}
@Test (expectedExceptions = {UserException.class})
@@ -256,7 +273,7 @@ public class PosteriorLikelihoodsUtilsUnitTest extends BaseTest {
List supplTest1 = new ArrayList<>(1);
supplTest1.add(new VariantContextBuilder(makeVC("2",Arrays.asList(Aref,T,C))).attribute(VCFConstants.ALLELE_COUNT_KEY,5).attribute(VCFConstants.ALLELE_NUMBER_KEY,10).make());
- VariantContext test1result = PosteriorLikelihoodsUtils.calculatePosteriorGLs(testOverlappingBase,supplTest1,0,0.001,true,false,true);
+ VariantContext test1result = PosteriorLikelihoodsUtils.calculatePosteriorGLs(testOverlappingBase,supplTest1,0,0.001,true,false,false);
}
@Test (expectedExceptions = {UserException.class})
@@ -266,7 +283,7 @@ public class PosteriorLikelihoodsUtilsUnitTest extends BaseTest {
makeG("s3",Aref,T,22,0,12));
List supplTest1 = new ArrayList<>(1);
supplTest1.add(new VariantContextBuilder(makeVC("2",Arrays.asList(Aref,T,C))).attribute(VCFConstants.MLE_ALLELE_COUNT_KEY,5).attribute(VCFConstants.ALLELE_NUMBER_KEY,10).make());
- VariantContext test1result = PosteriorLikelihoodsUtils.calculatePosteriorGLs(testOverlappingBase,supplTest1,0,0.001,true,false,true);
+ VariantContext test1result = PosteriorLikelihoodsUtils.calculatePosteriorGLs(testOverlappingBase,supplTest1,0,0.001,true,false,false);
}
@Test
@@ -276,7 +293,7 @@ public class PosteriorLikelihoodsUtilsUnitTest extends BaseTest {
makeG("s3",Aref,T,22,0,12));
List supplTest1 = new ArrayList<>(1);
supplTest1.add(new VariantContextBuilder(makeVC("2",Arrays.asList(Aref,T,C))).attribute(VCFConstants.ALLELE_COUNT_KEY,Arrays.asList(5,4)).attribute(VCFConstants.ALLELE_NUMBER_KEY,10).make());
- VariantContext test1result = PosteriorLikelihoodsUtils.calculatePosteriorGLs(testOverlappingBase,supplTest1,0,0.001,true,false,true);
+ VariantContext test1result = PosteriorLikelihoodsUtils.calculatePosteriorGLs(testOverlappingBase,supplTest1,0,0.001,true,false,false);
}
@Test
@@ -286,7 +303,7 @@ public class PosteriorLikelihoodsUtilsUnitTest extends BaseTest {
makeG("s3",Aref,T,22,0,12));
List supplTest1 = new ArrayList<>(1);
supplTest1.add(new VariantContextBuilder(makeVC("2",Arrays.asList(Aref,T,C))).attribute(VCFConstants.MLE_ALLELE_COUNT_KEY,Arrays.asList(5,4)).attribute(VCFConstants.ALLELE_NUMBER_KEY,10).make());
- VariantContext test1result = PosteriorLikelihoodsUtils.calculatePosteriorGLs(testOverlappingBase,supplTest1,0,0.001,true,false,true);
+ VariantContext test1result = PosteriorLikelihoodsUtils.calculatePosteriorGLs(testOverlappingBase,supplTest1,0,0.001,true,false,false);
}
@Test
@@ -296,7 +313,7 @@ public class PosteriorLikelihoodsUtilsUnitTest extends BaseTest {
makeG("s3",Aref,ATC,22,0,12));
List supplTest1 = new ArrayList<>(1);
supplTest1.add(new VariantContextBuilder(makeVC("2",Arrays.asList(Aref,T,C))).attribute(VCFConstants.MLE_ALLELE_COUNT_KEY,Arrays.asList(5,4)).attribute(VCFConstants.ALLELE_NUMBER_KEY,10).make());
- VariantContext test1result = PosteriorLikelihoodsUtils.calculatePosteriorGLs(inputIndel,supplTest1,0,0.001,true,false,true);
+ VariantContext test1result = PosteriorLikelihoodsUtils.calculatePosteriorGLs(inputIndel,supplTest1,0,0.001,true,false,false);
System.out.println(test1result);
int[] GPs = _mleparse( (List)test1result.getGenotype(0).getAnyAttribute(PHRED_SCALED_POSTERIORS_KEY));
@@ -311,7 +328,7 @@ public class PosteriorLikelihoodsUtilsUnitTest extends BaseTest {
makeG("s3",Aref,T,22,0,12));
List supplTest1 = new ArrayList<>(1);
supplTest1.add(new VariantContextBuilder(makeVC("2",Arrays.asList(Aref,ATC,ATCATC))).attribute(VCFConstants.MLE_ALLELE_COUNT_KEY,Arrays.asList(5,4)).attribute(VCFConstants.ALLELE_NUMBER_KEY,10).make());
- VariantContext test1result = PosteriorLikelihoodsUtils.calculatePosteriorGLs(inputIndel,supplTest1,0,0.001,true,false,true);
+ VariantContext test1result = PosteriorLikelihoodsUtils.calculatePosteriorGLs(inputIndel,supplTest1,0,0.001,true,false,false);
System.out.println(test1result);