Merge pull request #711 from broadinstitute/ldg_deNovoAnnotation
Add bells and whistles for Genotype Refinement Pipeline
This commit is contained in:
commit
e60dd77362
|
|
@ -94,7 +94,10 @@ public class PossibleDeNovo extends InfoFieldAnnotation implements RodRequiringA
|
|||
private MendelianViolation mendelianViolation = null;
|
||||
public static final String HI_CONF_DENOVO_KEY = "hiConfDeNovo";
|
||||
public static final String LO_CONF_DENOVO_KEY = "loConfDeNovo";
|
||||
private final int GQ_threshold = 20;
|
||||
private final int hi_GQ_threshold = 20;
|
||||
private final int lo_GQ_threshold = 10;
|
||||
private final double percentOfSamplesCutoff = 0.001; //for many, many samples use 0.1% of samples as allele frequency threshold for de novos
|
||||
private final int flatNumberOfSamplesCutoff = 4;
|
||||
private Set<Trio> trios;
|
||||
|
||||
public Map<String, Object> annotate(final RefMetaDataTracker tracker,
|
||||
|
|
@ -109,7 +112,7 @@ public class PossibleDeNovo extends InfoFieldAnnotation implements RodRequiringA
|
|||
mendelianViolation = new MendelianViolation(((VariantAnnotator)walker).minGenotypeQualityP );
|
||||
}
|
||||
else {
|
||||
throw new UserException("Mendelian violation annotation can only be used from the Variant Annotator, and must be provided a valid PED file (-ped) from the command line.");
|
||||
throw new UserException("Possible de novos annotation can only be used from the Variant Annotator, and must be provided a valid PED file (-ped) from the command line.");
|
||||
}
|
||||
}
|
||||
|
||||
|
|
@ -119,38 +122,38 @@ public class PossibleDeNovo extends InfoFieldAnnotation implements RodRequiringA
|
|||
final List<String> highConfDeNovoChildren = new ArrayList<String>();
|
||||
final List<String> lowConfDeNovoChildren = new ArrayList<String>();
|
||||
for ( final Trio trio : trios ) {
|
||||
if ( contextHasTrioLikelihoods(vc,trio) ) {
|
||||
if (mendelianViolation.isViolation(trio.getMother(),trio.getFather(),trio.getChild(),vc)) {
|
||||
if (mendelianViolation.getParentsRefRefChildHet() > 0) {
|
||||
if (vc.getGenotype(trio.getChildID()).getGQ() > GQ_threshold && vc.getGenotype(trio.getMaternalID()).getGQ() > GQ_threshold && vc.getGenotype(trio.getPaternalID()).getGQ() > GQ_threshold) {
|
||||
if (vc.isBiallelic() && contextHasTrioLikelihoods(vc,trio) && mendelianViolation.isViolation(trio.getMother(),trio.getFather(),trio.getChild(),vc) )
|
||||
{
|
||||
if (mendelianViolation.getParentsRefRefChildHet() > 0) {
|
||||
if ((vc.getGenotype(trio.getChildID()).getGQ() >= hi_GQ_threshold) && (vc.getGenotype(trio.getMaternalID()).getGQ()) >= hi_GQ_threshold && (vc.getGenotype(trio.getPaternalID()).getGQ() >= hi_GQ_threshold))
|
||||
{
|
||||
highConfDeNovoChildren.add(trio.getChildID());
|
||||
isHighConfDeNovo = true;
|
||||
isHighConfDeNovo = true;
|
||||
}
|
||||
else {
|
||||
else if ((vc.getGenotype(trio.getChildID()).getGQ() >= lo_GQ_threshold) && (vc.getGenotype(trio.getMaternalID()).getGQ()) > 0 && (vc.getGenotype(trio.getPaternalID()).getGQ() > 0))
|
||||
{
|
||||
lowConfDeNovoChildren.add(trio.getChildID());
|
||||
isLowConfDeNovo = true;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if ( isHighConfDeNovo || isLowConfDeNovo ) {
|
||||
for(final String child : highConfDeNovoChildren) {
|
||||
attributeMap.put(HI_CONF_DENOVO_KEY,child);
|
||||
}
|
||||
for(final String child : lowConfDeNovoChildren) {
|
||||
attributeMap.put(LO_CONF_DENOVO_KEY,child);
|
||||
}
|
||||
}
|
||||
final double percentNumberOfSamplesCutoff = vc.getNSamples()*percentOfSamplesCutoff;
|
||||
final double AFcutoff = Math.max(flatNumberOfSamplesCutoff,percentNumberOfSamplesCutoff);
|
||||
final int deNovoAlleleCount = vc.getCalledChrCount(vc.getAlternateAllele(0)); //we assume we're biallelic above so use the first alt
|
||||
if ( isHighConfDeNovo && deNovoAlleleCount < AFcutoff )
|
||||
attributeMap.put(HI_CONF_DENOVO_KEY,highConfDeNovoChildren);
|
||||
if ( isLowConfDeNovo && deNovoAlleleCount < AFcutoff )
|
||||
attributeMap.put(LO_CONF_DENOVO_KEY,lowConfDeNovoChildren);
|
||||
return attributeMap;
|
||||
}
|
||||
|
||||
// return the descriptions used for the VCF INFO meta field
|
||||
public List<String> getKeyNames() { return Arrays.asList(HI_CONF_DENOVO_KEY,LO_CONF_DENOVO_KEY); }
|
||||
|
||||
public List<VCFInfoHeaderLine> getDescriptions() { return Arrays.asList(new VCFInfoHeaderLine(HI_CONF_DENOVO_KEY, 1, VCFHeaderLineType.String, "High confidence possible de novo mutation (GQ > "+GQ_threshold+"): sample name"),
|
||||
new VCFInfoHeaderLine(LO_CONF_DENOVO_KEY, 1, VCFHeaderLineType.String, "Low confidence possible de novo mutation: sample name")); }
|
||||
public List<VCFInfoHeaderLine> getDescriptions() { return Arrays.asList(new VCFInfoHeaderLine(HI_CONF_DENOVO_KEY, 1, VCFHeaderLineType.String, "High confidence possible de novo mutation (GQ >= "+hi_GQ_threshold+" for all trio members)=[comma-delimited list of child samples]"),
|
||||
new VCFInfoHeaderLine(LO_CONF_DENOVO_KEY, 1, VCFHeaderLineType.String, "Low confidence possible de novo mutation (GQ >= "+lo_GQ_threshold+" for child, GQ > 0 for parents)=[comma-delimited list of child samples]")); }
|
||||
|
||||
|
||||
private boolean contextHasTrioLikelihoods(VariantContext context, Trio trio) {
|
||||
|
|
|
|||
|
|
@ -113,15 +113,19 @@ import java.util.*;
|
|||
* 1) Genotype posteriors added to the genotype fields ("PP")
|
||||
* 2) Genotypes and GQ assigned according to these posteriors
|
||||
* 3) Per-site genotype priors added to the INFO field ("PG")
|
||||
* 4) (Optional) Per-site, per-trio transmission probabilities given as Phred-scaled probability of all genotypes in the trio being correct, added to the genotype fields ("TP")
|
||||
* 4) (Optional) Per-site, per-trio joint likelihoods (JL) and joint posteriors (JL) given as Phred-scaled probability
|
||||
* of all genotypes in the trio being correct based on the PLs for JL and the PPs for JP. These annotations are added to
|
||||
* the genotype fields.
|
||||
* </p>
|
||||
*
|
||||
* <h3>Notes</h3>
|
||||
* <p>
|
||||
* Currently, priors will only be applied for SNP sites in the input callset (and only those that have a SNP at the
|
||||
* matching site in the priors VCF unless the --calculateMissingPriors flag is used).
|
||||
* If the site is not called in the priors, flat priors will be applied. Flat priors are also applied for any non-SNP
|
||||
* sites in the input callset.
|
||||
* Using the default behavior, priors will only be applied for each variants (provided each variant has at least 10
|
||||
* called samples.) SNP sites in the input callset that have a SNP at the matching site in the supporting VCF will have
|
||||
* priors applied based on the AC from the supporting samples and the input callset (unless the --ignoreInputSamples
|
||||
* flag is used). If the site is not called in the supporting VCF, priors will be applied using the discovered AC from
|
||||
* the input samples (unless the --discoveredACpriorsOff flag is used). Flat priors are applied for any non-SNP sites in
|
||||
* the input callset.
|
||||
* </p>
|
||||
*
|
||||
* <h3>Examples</h3>
|
||||
|
|
@ -234,8 +238,9 @@ public class CalculateGenotypePosteriors extends RodWalker<Integer,Integer> {
|
|||
/**
|
||||
* Calculate priors for missing external variants from sample data -- default behavior is to apply flat priors
|
||||
*/
|
||||
@Argument(fullName="calculateMissingPriors",shortName="calcMissing",doc="Use discovered allele frequency in the callset for variants that do no appear in the external callset", required=false)
|
||||
public boolean calcMissing = false;
|
||||
@Argument(fullName="discoveredACpriorsOff",shortName="useACoff",doc="Do not use discovered allele count in the input callset " +
|
||||
"for variants that do not appear in the external callset. ", required=false)
|
||||
public boolean useACoff = false;
|
||||
|
||||
/**
|
||||
* Skip application of population-based priors
|
||||
|
|
@ -252,7 +257,8 @@ public class CalculateGenotypePosteriors extends RodWalker<Integer,Integer> {
|
|||
@Output(doc="File to which variants should be written")
|
||||
protected VariantContextWriter vcfWriter = null;
|
||||
|
||||
private final String TRANSMISSION_PROBABILITY_TAG_NAME = "TP";
|
||||
private final String JOINT_LIKELIHOOD_TAG_NAME = "JL";
|
||||
private final String JOINT_POSTERIOR_TAG_NAME = "JP";
|
||||
private final String PHRED_SCALED_POSTERIORS_KEY = "PP";
|
||||
|
||||
private FamilyLikelihoodsUtils famUtils = new FamilyLikelihoodsUtils();
|
||||
|
|
@ -298,8 +304,10 @@ public class CalculateGenotypePosteriors extends RodWalker<Integer,Integer> {
|
|||
final Set<VCFHeaderLine> headerLines = VCFUtils.smartMergeHeaders(vcfRods.values(), true);
|
||||
headerLines.add(new VCFFormatHeaderLine(PHRED_SCALED_POSTERIORS_KEY, VCFHeaderLineCount.G, VCFHeaderLineType.Integer, "Phred-scaled Posterior Genotype Probabilities"));
|
||||
headerLines.add(new VCFInfoHeaderLine("PG", VCFHeaderLineCount.G, VCFHeaderLineType.Integer, "Genotype Likelihood Prior"));
|
||||
if (!skipFamilyPriors)
|
||||
headerLines.add(new VCFFormatHeaderLine(TRANSMISSION_PROBABILITY_TAG_NAME, 1, VCFHeaderLineType.Integer, "Phred score of the genotype combination and phase given that the genotypes are correct"));
|
||||
if (!skipFamilyPriors) {
|
||||
headerLines.add(new VCFFormatHeaderLine(JOINT_LIKELIHOOD_TAG_NAME, 1, VCFHeaderLineType.Integer, "Phred-scaled joint likelihood of the genotype combination (before applying family priors)"));
|
||||
headerLines.add(new VCFFormatHeaderLine(JOINT_POSTERIOR_TAG_NAME, 1, VCFHeaderLineType.Integer, "Phred-scaled joint posterior probability of the genotype combination (after applying family priors)"));
|
||||
}
|
||||
headerLines.add(new VCFHeaderLine("source", "CalculateGenotypePosteriors"));
|
||||
|
||||
vcfWriter.writeHeader(new VCFHeader(headerLines, vcfSamples));
|
||||
|
|
@ -331,12 +339,14 @@ public class CalculateGenotypePosteriors extends RodWalker<Integer,Integer> {
|
|||
GenotypesContext gc = famUtils.calculatePosteriorGLs(vc);
|
||||
builder.genotypes(gc);
|
||||
}
|
||||
VariantContextUtils.calculateChromosomeCounts(builder, false);
|
||||
vc_familyPriors = builder.make();
|
||||
|
||||
if (!skipPopulationPriors)
|
||||
vc_bothPriors = PosteriorLikelihoodsUtils.calculatePosteriorGLs(vc_familyPriors, otherVCs, missing * numRefIfMissing, globalPrior, !ignoreInputSamples, defaultToAC, calcMissing);
|
||||
vc_bothPriors = PosteriorLikelihoodsUtils.calculatePosteriorGLs(vc_familyPriors, otherVCs, missing * numRefIfMissing, globalPrior, !ignoreInputSamples, defaultToAC, useACoff);
|
||||
else {
|
||||
final VariantContextBuilder builder2 = new VariantContextBuilder(vc_familyPriors);
|
||||
VariantContextUtils.calculateChromosomeCounts(builder, false);
|
||||
vc_bothPriors = builder2.make();
|
||||
}
|
||||
vcfWriter.add(vc_bothPriors);
|
||||
|
|
|
|||
|
|
@ -47,7 +47,6 @@
|
|||
package org.broadinstitute.gatk.tools.walkers.variantutils;
|
||||
|
||||
import org.apache.log4j.Logger;
|
||||
import org.broadinstitute.gatk.engine.GenomeAnalysisEngine;
|
||||
import org.broadinstitute.gatk.engine.samples.Sample;
|
||||
import org.broadinstitute.gatk.utils.MathUtils;
|
||||
import org.broadinstitute.gatk.utils.QualityUtils;
|
||||
|
|
@ -57,6 +56,7 @@ import org.broadinstitute.gatk.utils.variant.GATKVariantContextUtils;
|
|||
import htsjdk.variant.utils.GeneralUtils;
|
||||
import htsjdk.variant.variantcontext.*;
|
||||
|
||||
import java.util.Arrays;
|
||||
import java.util.*;
|
||||
|
||||
/**
|
||||
|
|
@ -70,239 +70,180 @@ public class FamilyLikelihoodsUtils {
|
|||
final private EnumMap<GenotypeType,EnumMap<GenotypeType,EnumMap<GenotypeType,Integer>>> mvCountMatrix =
|
||||
new EnumMap<GenotypeType,EnumMap<GenotypeType,EnumMap<GenotypeType,Integer>>>(GenotypeType.class);
|
||||
|
||||
//Matrix of allele transmission
|
||||
final private EnumMap<GenotypeType,EnumMap<GenotypeType,EnumMap<GenotypeType,TrioGenotypes>>> transmissionMatrix =
|
||||
new EnumMap<GenotypeType,EnumMap<GenotypeType,EnumMap<GenotypeType,TrioGenotypes>>>(GenotypeType.class);
|
||||
final int NUM_CALLED_GENOTYPETYPES = 3; //HOM_REF, HET, and HOM_VAR
|
||||
|
||||
final double[] configurationLikelihoodsMatrix = {0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,
|
||||
0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,
|
||||
0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0}; //27 is # of trio genotype combos, initialize to zero
|
||||
double[] configurationLikelihoodsMatrix = new double[NUM_CALLED_GENOTYPETYPES*NUM_CALLED_GENOTYPETYPES*NUM_CALLED_GENOTYPETYPES];
|
||||
|
||||
ArrayList<Sample> trios = new ArrayList<Sample>();
|
||||
|
||||
//Random number generator
|
||||
final private Random rand = new GenomeAnalysisEngine().getRandomGenerator();
|
||||
|
||||
private final String TRANSMISSION_PROBABILITY_TAG_NAME = "TP";
|
||||
private final String JOINT_LIKELIHOOD_TAG_NAME = "JL";
|
||||
private final String JOINT_POSTERIOR_TAG_NAME = "JP";
|
||||
private final String PHRED_SCALED_POSTERIORS_KEY = "PP";
|
||||
|
||||
public final double NO_TRANSMISSION_PROB = -1.0;
|
||||
public final double NO_JOINT_VALUE = -1.0;
|
||||
|
||||
private double deNovoPrior = 1e-8;
|
||||
|
||||
private final double ONE_THIRD = 0.333333333333333333;
|
||||
private final double LOG10_OF_ONE_THIRD = -0.4771213;
|
||||
|
||||
private enum FamilyMember {
|
||||
MOTHER,
|
||||
FATHER,
|
||||
CHILD
|
||||
}
|
||||
|
||||
//Stores a conceptual trio or parent/child pair genotype combination
|
||||
//This combination can then be "applied" to a given trio or pair using the getUpdatedGenotypes method.
|
||||
private class TrioGenotypes {
|
||||
/**
|
||||
* Applies the trio genotype combination to the given trio.
|
||||
* @param motherGenotype: Original genotype of the mother
|
||||
* @param fatherGenotype: Original genotype of the father
|
||||
* @param childGenotype: Original genotype of the child
|
||||
* @param updatedGenotypes: An ArrayList<Genotype> to which the newly updated genotypes are added in the following order: Mother, Father, Child
|
||||
*/
|
||||
public void getUpdatedGenotypes(final VariantContext vc, final Genotype motherGenotype, final Genotype fatherGenotype, final Genotype childGenotype, final ArrayList<Genotype> updatedGenotypes){
|
||||
//genotypes here can be no call
|
||||
boolean fatherIsCalled = fatherGenotype != null && hasCalledGT(fatherGenotype.getType());
|
||||
boolean motherIsCalled = motherGenotype != null && hasCalledGT(motherGenotype.getType());
|
||||
boolean childIsCalled = childGenotype != null && hasCalledGT(childGenotype.getType());
|
||||
|
||||
//Create 2 fake alleles
|
||||
//The actual bases will never be used but the Genotypes created using the alleles will be.
|
||||
private final Allele REF = Allele.create("A",true);
|
||||
private final Allele VAR = Allele.create("A",false);
|
||||
private final Allele NO_CALL = Allele.create(".",false);
|
||||
private final String DUMMY_NAME = "DummySample";
|
||||
private EnumMap<GenotypeType,EnumMap<GenotypeType,EnumMap<GenotypeType,Integer>>> MVcountMatrix;
|
||||
//default to posteriors equal to likelihoods (flat priors) in case input genotypes are not called
|
||||
double[] uninformativeLikelihoods = {ONE_THIRD, ONE_THIRD, ONE_THIRD};
|
||||
|
||||
private final EnumMap<FamilyMember,Genotype> familyGenotypes = new EnumMap<FamilyMember, Genotype>(FamilyMember.class);
|
||||
double[] motherLikelihoods = motherIsCalled? GeneralUtils.normalizeFromLog10(motherGenotype.getLikelihoods().getAsVector()) : uninformativeLikelihoods;
|
||||
double[] fatherLikelihoods = fatherIsCalled? GeneralUtils.normalizeFromLog10(fatherGenotype.getLikelihoods().getAsVector()) : uninformativeLikelihoods;
|
||||
double[] childLikelihoods = childIsCalled? GeneralUtils.normalizeFromLog10(childGenotype.getLikelihoods().getAsVector()) : uninformativeLikelihoods;
|
||||
|
||||
/* Constructor: Creates a conceptual trio genotype combination from the given genotypes.
|
||||
*/
|
||||
public TrioGenotypes(GenotypeType mother, GenotypeType father, GenotypeType child){
|
||||
familyGenotypes.put(FamilyMember.MOTHER, makeGenotype(mother));
|
||||
familyGenotypes.put(FamilyMember.FATHER, makeGenotype(father));
|
||||
familyGenotypes.put(FamilyMember.CHILD, makeGenotype(child));
|
||||
}
|
||||
|
||||
private ArrayList<Allele> getAlleles(GenotypeType genotype){
|
||||
final ArrayList<Allele> alleles = new ArrayList<Allele>(2);
|
||||
if(genotype == GenotypeType.HOM_REF){
|
||||
alleles.add(REF);
|
||||
alleles.add(REF);
|
||||
}
|
||||
else if(genotype == GenotypeType.HET){
|
||||
alleles.add(REF);
|
||||
alleles.add(VAR);
|
||||
}
|
||||
else if(genotype == GenotypeType.HOM_VAR){
|
||||
alleles.add(VAR);
|
||||
alleles.add(VAR);
|
||||
}
|
||||
else{
|
||||
return null;
|
||||
}
|
||||
return alleles;
|
||||
}
|
||||
|
||||
public void setMVcountMatrix(EnumMap<GenotypeType,EnumMap<GenotypeType,EnumMap<GenotypeType,Integer>>> inputMat) {
|
||||
MVcountMatrix = inputMat;
|
||||
}
|
||||
|
||||
private boolean hasCalledGT(GenotypeType genotype){
|
||||
return genotype == GenotypeType.HOM_REF || genotype == GenotypeType.HET || genotype == GenotypeType.HOM_VAR;
|
||||
}
|
||||
|
||||
//TODO: this was stupid stuff for phasing -- let's take it out
|
||||
private Genotype makeGenotype(final GenotypeType type) {
|
||||
return makeGenotype(getAlleles(type));
|
||||
}
|
||||
|
||||
private Genotype makeGenotype(final List<Allele> alleles) {
|
||||
final GenotypeBuilder gb = new GenotypeBuilder(DUMMY_NAME, alleles);
|
||||
return gb.make();
|
||||
}
|
||||
|
||||
/**
|
||||
* Applies the trio genotype combination to the given trio.
|
||||
* @param ref: Reference allele
|
||||
* @param alt: Alternate allele
|
||||
* @param motherGenotype: Genotype of the mother in this trio genotype combination
|
||||
* @param fatherGenotype: Genotype of the father in this trio genotype combination
|
||||
* @param childGenotype: Genotype of the child in this trio genotype combination
|
||||
* @param transmissionProb: Probability for this trio genotype combination to be correct (pass NO_TRANSMISSION_PROB if unavailable)
|
||||
* @param configurationLikelihoodsMatrix: (Non-normalized) likelihoods for each trio genotype combination, for use in generating new PLs
|
||||
* @param updatedGenotypes: An ArrayList<Genotype> to which the newly updated genotypes are added in the following order: Mother, Father, Child
|
||||
*/
|
||||
public void getUpdatedGenotypes(final Allele ref, final Allele alt, final Genotype motherGenotype, final Genotype fatherGenotype, final Genotype childGenotype, final double transmissionProb, double[] configurationLikelihoodsMatrix, final ArrayList<Genotype> updatedGenotypes){
|
||||
//default to flat priors in case input genotypes are not called
|
||||
double[] motherPosteriors = {1,1,1};
|
||||
double[] fatherPosteriors = {1,1,1};
|
||||
double[] childPosteriors = {1,1,1};
|
||||
|
||||
//genotypes here can be no call
|
||||
boolean fatherIsCalled = fatherGenotype != null && hasCalledGT(fatherGenotype.getType());
|
||||
boolean motherIsCalled = motherGenotype != null && hasCalledGT(motherGenotype.getType());
|
||||
boolean childIsCalled = childGenotype != null && hasCalledGT(childGenotype.getType());
|
||||
|
||||
if (fatherIsCalled && childIsCalled) {
|
||||
motherPosteriors = getPosteriors(FamilyMember.MOTHER);
|
||||
}
|
||||
updatedGenotypes.add(getUpdatedGenotype(ref, alt, motherGenotype, transmissionProb, motherPosteriors));
|
||||
|
||||
if (motherIsCalled && childIsCalled) {
|
||||
fatherPosteriors = getPosteriors(FamilyMember.FATHER);
|
||||
}
|
||||
updatedGenotypes.add(getUpdatedGenotype(ref, alt, fatherGenotype, transmissionProb, fatherPosteriors));
|
||||
|
||||
if (motherIsCalled && fatherIsCalled) {
|
||||
childPosteriors = getPosteriors(FamilyMember.CHILD);
|
||||
}
|
||||
updatedGenotypes.add(getUpdatedGenotype(ref, alt, childGenotype, transmissionProb, childPosteriors));
|
||||
}
|
||||
|
||||
private Genotype getUpdatedGenotype(Allele refAllele, Allele altAllele, Genotype genotype, double transmissionProb, double[] normalizedPosteriors){
|
||||
|
||||
int phredScoreTransmission = -1;
|
||||
if(transmissionProb != NO_TRANSMISSION_PROB){
|
||||
double dphredScoreTransmission = QualityUtils.phredScaleLog10ErrorRate(Math.log10(1 - (transmissionProb)));
|
||||
phredScoreTransmission = dphredScoreTransmission < Byte.MAX_VALUE ? (byte)dphredScoreTransmission : Byte.MAX_VALUE;
|
||||
}
|
||||
//Handle null, missing and unavailable genotypes
|
||||
//Note that only cases where a null/missing/unavailable genotype was passed in the first place can lead to a null/missing/unavailable
|
||||
//genotype so it is safe to return the original genotype in this case.
|
||||
//In addition, if the genotype configuration confidence is 0, then return the original genotypes.
|
||||
if(phredScoreTransmission ==0 || genotype == null || !hasCalledGT(genotype.getType()))
|
||||
return genotype;
|
||||
|
||||
//Add the transmission probability
|
||||
final Map<String, Object> genotypeAttributes = new HashMap<String, Object>();
|
||||
genotypeAttributes.putAll(genotype.getExtendedAttributes());
|
||||
if(transmissionProb>NO_TRANSMISSION_PROB)
|
||||
genotypeAttributes.put(TRANSMISSION_PROBABILITY_TAG_NAME, phredScoreTransmission);
|
||||
|
||||
final ArrayList<Allele> usedAlleles = new ArrayList<Allele>(2);
|
||||
usedAlleles.add(refAllele);
|
||||
usedAlleles.add(altAllele);
|
||||
|
||||
final GenotypeBuilder builder = new GenotypeBuilder(genotype);
|
||||
|
||||
final double[] log10Posteriors = MathUtils.toLog10(normalizedPosteriors);
|
||||
|
||||
//note that there will there be times when posteriors don't agree with genotype predicted by configuration likelihoods
|
||||
GATKVariantContextUtils.updateGenotypeAfterSubsetting(usedAlleles, builder,
|
||||
GATKVariantContextUtils.GenotypeAssignmentMethod.USE_PLS_TO_ASSIGN, log10Posteriors, usedAlleles);
|
||||
|
||||
|
||||
|
||||
builder.attribute(PHRED_SCALED_POSTERIORS_KEY,
|
||||
Utils.listFromPrimitives(GenotypeLikelihoods.fromLog10Likelihoods(log10Posteriors).getAsPLs()));
|
||||
builder.attributes(genotypeAttributes);
|
||||
return builder.make();
|
||||
}
|
||||
|
||||
//marginalize over the configurationLikelihoodsMatrix and normalize to get the posteriors
|
||||
private double[] getPosteriors(FamilyMember recalcInd) {
|
||||
double marginalOverChangedHR, marginalOverChangedHET, marginalOverChangedHV;
|
||||
marginalOverChangedHR = marginalOverChangedHET = marginalOverChangedHV = 0;
|
||||
final double[] recalcPosteriors = new double[3];
|
||||
|
||||
GenotypeType[] calledTypes = {GenotypeType.HOM_REF, GenotypeType.HET, GenotypeType.HOM_VAR};
|
||||
|
||||
switch (recalcInd) {
|
||||
case MOTHER:
|
||||
for(final GenotypeType father : calledTypes) {
|
||||
for(final GenotypeType child : calledTypes) {
|
||||
GenotypeType mother;
|
||||
mother = GenotypeType.HOM_REF;
|
||||
marginalOverChangedHR += configurationLikelihoodsMatrix[getLikelihoodIndex(mother,father,child,false)];
|
||||
mother = GenotypeType.HET;
|
||||
marginalOverChangedHET += configurationLikelihoodsMatrix[getLikelihoodIndex(mother,father,child,false)];
|
||||
mother = GenotypeType.HOM_VAR;
|
||||
marginalOverChangedHV += configurationLikelihoodsMatrix[getLikelihoodIndex(mother,father,child,false)];
|
||||
}
|
||||
}
|
||||
break;
|
||||
case FATHER:
|
||||
for(final GenotypeType mother : calledTypes){
|
||||
for (final GenotypeType child : calledTypes){
|
||||
GenotypeType father;
|
||||
father = GenotypeType.HOM_REF;
|
||||
marginalOverChangedHR += configurationLikelihoodsMatrix[getLikelihoodIndex(mother,father,child,false)];
|
||||
father = GenotypeType.HET;
|
||||
marginalOverChangedHET += configurationLikelihoodsMatrix[getLikelihoodIndex(mother,father,child,false)];
|
||||
father = GenotypeType.HOM_VAR;
|
||||
marginalOverChangedHV += configurationLikelihoodsMatrix[getLikelihoodIndex(mother,father,child,false)];
|
||||
}
|
||||
}
|
||||
break;
|
||||
case CHILD:
|
||||
for(final GenotypeType mother : calledTypes){
|
||||
for (final GenotypeType father: calledTypes){
|
||||
GenotypeType child;
|
||||
child = GenotypeType.HOM_REF;
|
||||
marginalOverChangedHR += configurationLikelihoodsMatrix[getLikelihoodIndex(mother,father,child,false)];
|
||||
child = GenotypeType.HET;
|
||||
marginalOverChangedHET += configurationLikelihoodsMatrix[getLikelihoodIndex(mother,father,child,false)];
|
||||
child = GenotypeType.HOM_VAR;
|
||||
marginalOverChangedHV += configurationLikelihoodsMatrix[getLikelihoodIndex(mother,father,child,false)];
|
||||
}
|
||||
}
|
||||
break;
|
||||
default:
|
||||
throw new UserException(String.format("%d does not indicate a valid trio individual -- use 0 for mother, 1 for father, 2 for child",recalcInd));
|
||||
}
|
||||
recalcPosteriors[0] = marginalOverChangedHR;
|
||||
recalcPosteriors[1] = marginalOverChangedHET;
|
||||
recalcPosteriors[2] = marginalOverChangedHV;
|
||||
|
||||
final double[] normalizedPosteriors = MathUtils.normalizeFromRealSpace(recalcPosteriors);
|
||||
|
||||
return normalizedPosteriors;
|
||||
//these are also in log10 space
|
||||
double[] motherLog10Posteriors = getPosteriors(FamilyMember.MOTHER);
|
||||
double[] fatherLog10Posteriors = getPosteriors(FamilyMember.FATHER);
|
||||
double[] childLog10Posteriors = getPosteriors(FamilyMember.CHILD);
|
||||
|
||||
double[] motherPosteriors = GeneralUtils.normalizeFromLog10(motherLog10Posteriors);
|
||||
double[] fatherPosteriors = GeneralUtils.normalizeFromLog10(fatherLog10Posteriors);
|
||||
double[] childPosteriors = GeneralUtils.normalizeFromLog10(childLog10Posteriors);
|
||||
|
||||
|
||||
double jointPosteriorProbability = -1;
|
||||
//jointTrioLikelihood is combined likelihoods (before prior) of best configuration after applying prior
|
||||
double jointTrioLikelihood = -1;
|
||||
if(childIsCalled && motherIsCalled && fatherIsCalled) {
|
||||
jointTrioLikelihood = motherLikelihoods[MathUtils.maxElementIndex(motherPosteriors)]*fatherLikelihoods[MathUtils.maxElementIndex(fatherPosteriors)]*childLikelihoods[MathUtils.maxElementIndex(childPosteriors)];
|
||||
jointPosteriorProbability = MathUtils.arrayMax(motherPosteriors)*MathUtils.arrayMax(fatherPosteriors)*MathUtils.arrayMax(childPosteriors);
|
||||
}
|
||||
|
||||
updatedGenotypes.add(getUpdatedGenotype(vc, motherGenotype, jointTrioLikelihood, jointPosteriorProbability, motherLog10Posteriors));
|
||||
updatedGenotypes.add(getUpdatedGenotype(vc, fatherGenotype, jointTrioLikelihood, jointPosteriorProbability, fatherLog10Posteriors));
|
||||
updatedGenotypes.add(getUpdatedGenotype(vc, childGenotype, jointTrioLikelihood, jointPosteriorProbability, childLog10Posteriors));
|
||||
}
|
||||
|
||||
public void initialize(double DNprior, Set<String> vcfSamples, Map<String,Set<Sample>> families){
|
||||
private Genotype getUpdatedGenotype(final VariantContext vc, final Genotype genotype, final double jointLikelihood, final double jointPosteriorProb, final double[] log10Posteriors){
|
||||
//Don't update null, missing or unavailable genotypes
|
||||
if(genotype == null || !hasCalledGT(genotype.getType()))
|
||||
return genotype;
|
||||
|
||||
int phredScaledJL = -1;
|
||||
int phredScaledJP = -1;
|
||||
if(jointLikelihood != NO_JOINT_VALUE){
|
||||
double dphredScaledJL = QualityUtils.phredScaleLog10ErrorRate(Math.log10(1-jointLikelihood));
|
||||
phredScaledJL = dphredScaledJL < Byte.MAX_VALUE ? (byte)dphredScaledJL : Byte.MAX_VALUE;
|
||||
}
|
||||
if(jointPosteriorProb != NO_JOINT_VALUE){
|
||||
double dphredScaledJP = QualityUtils.phredScaleLog10ErrorRate(Math.log10(1-jointPosteriorProb));
|
||||
phredScaledJP = dphredScaledJP < Byte.MAX_VALUE ? (byte)dphredScaledJP : Byte.MAX_VALUE;
|
||||
}
|
||||
|
||||
//Add the joint trio calculations
|
||||
final Map<String, Object> genotypeAttributes = new HashMap<String, Object>();
|
||||
genotypeAttributes.putAll(genotype.getExtendedAttributes());
|
||||
genotypeAttributes.put(JOINT_LIKELIHOOD_TAG_NAME, phredScaledJL);
|
||||
genotypeAttributes.put(JOINT_POSTERIOR_TAG_NAME, phredScaledJP);
|
||||
|
||||
final GenotypeBuilder builder = new GenotypeBuilder(genotype);
|
||||
|
||||
//final double[] log10Posteriors = MathUtils.toLog10(normalizedPosteriors);
|
||||
|
||||
//update genotype types based on posteriors
|
||||
GATKVariantContextUtils.updateGenotypeAfterSubsetting(vc.getAlleles(), builder,
|
||||
GATKVariantContextUtils.GenotypeAssignmentMethod.USE_PLS_TO_ASSIGN, log10Posteriors, vc.getAlleles());
|
||||
|
||||
builder.attribute(PHRED_SCALED_POSTERIORS_KEY,
|
||||
Utils.listFromPrimitives(GenotypeLikelihoods.fromLog10Likelihoods(log10Posteriors).getAsPLs()));
|
||||
builder.attributes(genotypeAttributes);
|
||||
return builder.make();
|
||||
}
|
||||
|
||||
//marginalize over the configurationLikelihoodsMatrix and normalize to get the posteriors
|
||||
private double[] getPosteriors(final FamilyMember recalcInd) {
|
||||
double[] marginalOverChangedHR = new double[NUM_CALLED_GENOTYPETYPES*NUM_CALLED_GENOTYPETYPES];
|
||||
double[] marginalOverChangedHET = new double[NUM_CALLED_GENOTYPETYPES*NUM_CALLED_GENOTYPETYPES];
|
||||
double[] marginalOverChangedHV = new double[NUM_CALLED_GENOTYPETYPES*NUM_CALLED_GENOTYPETYPES];
|
||||
final double[] recalcPosteriors = new double[NUM_CALLED_GENOTYPETYPES];
|
||||
|
||||
final GenotypeType[] calledTypes = {GenotypeType.HOM_REF, GenotypeType.HET, GenotypeType.HOM_VAR};
|
||||
int counter = 0;
|
||||
|
||||
switch (recalcInd) {
|
||||
case MOTHER:
|
||||
for(final GenotypeType father : calledTypes) {
|
||||
for(final GenotypeType child : calledTypes) {
|
||||
GenotypeType mother;
|
||||
mother = GenotypeType.HOM_REF;
|
||||
marginalOverChangedHR[counter] = configurationLikelihoodsMatrix[getLikelihoodMatrixIndex(mother, father, child)];
|
||||
mother = GenotypeType.HET;
|
||||
marginalOverChangedHET[counter] = configurationLikelihoodsMatrix[getLikelihoodMatrixIndex(mother, father, child)];
|
||||
mother = GenotypeType.HOM_VAR;
|
||||
marginalOverChangedHV[counter] = configurationLikelihoodsMatrix[getLikelihoodMatrixIndex(mother, father, child)];
|
||||
counter++;
|
||||
}
|
||||
}
|
||||
break;
|
||||
case FATHER:
|
||||
for(final GenotypeType mother : calledTypes){
|
||||
for (final GenotypeType child : calledTypes){
|
||||
GenotypeType father;
|
||||
father = GenotypeType.HOM_REF;
|
||||
marginalOverChangedHR[counter] = configurationLikelihoodsMatrix[getLikelihoodMatrixIndex(mother, father, child)];
|
||||
father = GenotypeType.HET;
|
||||
marginalOverChangedHET[counter] = configurationLikelihoodsMatrix[getLikelihoodMatrixIndex(mother, father, child)];
|
||||
father = GenotypeType.HOM_VAR;
|
||||
marginalOverChangedHV[counter] = configurationLikelihoodsMatrix[getLikelihoodMatrixIndex(mother, father, child)];
|
||||
counter++;
|
||||
}
|
||||
}
|
||||
break;
|
||||
case CHILD:
|
||||
for(final GenotypeType mother : calledTypes){
|
||||
for (final GenotypeType father: calledTypes){
|
||||
GenotypeType child;
|
||||
child = GenotypeType.HOM_REF;
|
||||
marginalOverChangedHR[counter] = configurationLikelihoodsMatrix[getLikelihoodMatrixIndex(mother, father, child)];
|
||||
child = GenotypeType.HET;
|
||||
marginalOverChangedHET[counter] = configurationLikelihoodsMatrix[getLikelihoodMatrixIndex(mother, father, child)];
|
||||
child = GenotypeType.HOM_VAR;
|
||||
marginalOverChangedHV[counter] = configurationLikelihoodsMatrix[getLikelihoodMatrixIndex(mother, father, child)];
|
||||
counter++;
|
||||
}
|
||||
}
|
||||
break;
|
||||
default:
|
||||
throw new UserException(String.format("%d does not indicate a valid trio FamilyMember -- use 0 for mother, 1 for father, 2 for child",recalcInd));
|
||||
}
|
||||
|
||||
recalcPosteriors[0] = MathUtils.log10sumLog10(marginalOverChangedHR,0);
|
||||
recalcPosteriors[1] = MathUtils.log10sumLog10(marginalOverChangedHET,0);
|
||||
recalcPosteriors[2] = MathUtils.log10sumLog10(marginalOverChangedHV,0);
|
||||
|
||||
return MathUtils.normalizeFromLog10(recalcPosteriors,true,true);
|
||||
}
|
||||
|
||||
public void initialize(final double DNprior, final Set<String> vcfSamples, final Map<String,Set<Sample>> families){
|
||||
this.deNovoPrior = DNprior;
|
||||
Arrays.fill(configurationLikelihoodsMatrix,0);
|
||||
buildMatrices();
|
||||
trios = setTrios(vcfSamples, families);
|
||||
}
|
||||
|
||||
public GenotypesContext calculatePosteriorGLs(VariantContext vc){
|
||||
public GenotypesContext calculatePosteriorGLs(final VariantContext vc){
|
||||
final GenotypesContext genotypesContext = GenotypesContext.copy(vc.getGenotypes());
|
||||
|
||||
for (final Sample sample : trios) {
|
||||
|
|
@ -312,30 +253,28 @@ public class FamilyLikelihoodsUtils {
|
|||
|
||||
//Keep only trios and parent/child pairs
|
||||
if(mother == null && father == null || child == null) {
|
||||
logger.warn("null genos in var "+vc.toStringDecodeGenotypes());
|
||||
logger.warn("Null genotypes in variant: "+vc.toStringDecodeGenotypes());
|
||||
continue;
|
||||
}
|
||||
|
||||
final ArrayList<Genotype> trioGenotypes = new ArrayList<Genotype>(3);
|
||||
final int mvCount = updateFamilyGenotypes(vc.getReference(), vc.getAltAlleleWithHighestAlleleCount(), mother, father, child, trioGenotypes);
|
||||
updateFamilyGenotypes(vc, mother, father, child, trioGenotypes);
|
||||
|
||||
Genotype updatedMother = trioGenotypes.get(0);
|
||||
Genotype updatedFather = trioGenotypes.get(1);
|
||||
Genotype updatedChild = trioGenotypes.get(2);
|
||||
|
||||
genotypesContext.replace(updatedChild);
|
||||
genotypesContext.replace(updatedFather);
|
||||
genotypesContext.replace(updatedMother);
|
||||
//replace uses sample names to match genotypes, so order doesn't matter
|
||||
if (trioGenotypes.size() > 0) {
|
||||
genotypesContext.replace(trioGenotypes.get(0));
|
||||
genotypesContext.replace(trioGenotypes.get(1));
|
||||
genotypesContext.replace(trioGenotypes.get(2));
|
||||
}
|
||||
}
|
||||
|
||||
return genotypesContext;
|
||||
return genotypesContext;
|
||||
}
|
||||
|
||||
/**
|
||||
* Select trios and parent/child pairs only
|
||||
*/
|
||||
private ArrayList<Sample> setTrios(Set<String> vcfSamples, Map<String,Set<Sample>> families){
|
||||
|
||||
Set<Sample> family;
|
||||
ArrayList<Sample> parents;
|
||||
final ArrayList<Sample> trios = new ArrayList<Sample>();
|
||||
|
|
@ -365,18 +304,14 @@ public class FamilyLikelihoodsUtils {
|
|||
return trios;
|
||||
}
|
||||
|
||||
//Create the transmission matrices
|
||||
//TODO: pass in the real genotypes so we have that info
|
||||
//Create a lookup matrix to find the number of MVs for each family genotype combination
|
||||
private void buildMatrices(){
|
||||
for(final GenotypeType mother : GenotypeType.values()){
|
||||
mvCountMatrix.put(mother,new EnumMap<GenotypeType,EnumMap<GenotypeType,Integer>>(GenotypeType.class));
|
||||
transmissionMatrix.put(mother,new EnumMap<GenotypeType,EnumMap<GenotypeType,TrioGenotypes>>(GenotypeType.class));
|
||||
for(final GenotypeType father : GenotypeType.values()){
|
||||
mvCountMatrix.get(mother).put(father,new EnumMap<GenotypeType, Integer>(GenotypeType.class));
|
||||
transmissionMatrix.get(mother).put(father,new EnumMap<GenotypeType,TrioGenotypes>(GenotypeType.class));
|
||||
for(final GenotypeType child : GenotypeType.values()){
|
||||
mvCountMatrix.get(mother).get(father).put(child, getCombinationMVCount(mother, father, child));
|
||||
transmissionMatrix.get(mother).get(father).put(child,new TrioGenotypes(mother,father,child));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
@ -442,179 +377,54 @@ public class FamilyLikelihoodsUtils {
|
|||
|
||||
/**
|
||||
* Updates the genotypes of the given trio. If one of the parents is null, it is considered a parent/child pair.
|
||||
* @param ref: Reference allele
|
||||
* @param alt: Alternative allele
|
||||
* @param vc: Input variant context
|
||||
* @param mother: Mother's genotype from vc input
|
||||
* @param father: Father's genotype from vc input
|
||||
* @param child: Child's genotype from vc input
|
||||
* @param finalGenotypes: An ArrayList<Genotype> containing the updated genotypes
|
||||
* @return
|
||||
*/
|
||||
private int updateFamilyGenotypes(Allele ref, Allele alt, Genotype mother, Genotype father, Genotype child, ArrayList<Genotype> finalGenotypes) {
|
||||
private void updateFamilyGenotypes(VariantContext vc, Genotype mother, Genotype father, Genotype child, ArrayList<Genotype> finalGenotypes) {
|
||||
|
||||
//Check whether it is a pair or trio
|
||||
//Always assign the first parent as the parent having genotype information in pairs
|
||||
//Always assign the mother as the first parent in trios
|
||||
int parentsCalled = 0;
|
||||
Map<GenotypeType,Double> firstParentLikelihoods;
|
||||
Map<GenotypeType,Double> secondParentLikelihoods;
|
||||
final ArrayList<GenotypeType> bestFirstParentGenotype = new ArrayList<GenotypeType>();
|
||||
final ArrayList<GenotypeType> bestSecondParentGenotype = new ArrayList<GenotypeType>();
|
||||
final ArrayList<GenotypeType> bestChildGenotype = new ArrayList<GenotypeType>();
|
||||
GenotypeType pairSecondParentGenotype = null;
|
||||
boolean parentsAreFlipped = false; //usually mother comes first, like for indexing of transmissionMatrix
|
||||
final int INVALID_INDEX = -1;
|
||||
|
||||
//if only one parent is called, make uncalled parent the secondParent
|
||||
if(mother == null || !mother.isCalled()){
|
||||
firstParentLikelihoods = getLikelihoodsAsMapSafeNull(father);
|
||||
secondParentLikelihoods = getLikelihoodsAsMapSafeNull(mother);
|
||||
bestFirstParentGenotype.add(getTypeSafeNull(father));
|
||||
bestSecondParentGenotype.add(getTypeSafeNull(mother));
|
||||
pairSecondParentGenotype = mother == null ? GenotypeType.UNAVAILABLE : mother.getType();
|
||||
parentsAreFlipped = true;
|
||||
if(father != null && father.isCalled())
|
||||
parentsCalled = 1;
|
||||
}
|
||||
else{
|
||||
firstParentLikelihoods = getLikelihoodsAsMapSafeNull(mother);
|
||||
secondParentLikelihoods = getLikelihoodsAsMapSafeNull(father);
|
||||
bestFirstParentGenotype.add(getTypeSafeNull(mother));
|
||||
bestSecondParentGenotype.add(getTypeSafeNull(father));
|
||||
if(father == null || !father.isCalled()){
|
||||
parentsCalled = 1;
|
||||
pairSecondParentGenotype = father == null ? GenotypeType.UNAVAILABLE : father.getType();
|
||||
}else{
|
||||
parentsCalled = 2;
|
||||
}
|
||||
}
|
||||
//If one of the parents is not called, fill in with uninformative likelihoods
|
||||
Map<GenotypeType,Double> motherLikelihoods = getLikelihoodsAsMapSafeNull(mother);
|
||||
Map<GenotypeType,Double> fatherLikelihoods = getLikelihoodsAsMapSafeNull(father);
|
||||
Map<GenotypeType,Double> childLikelihoods = getLikelihoodsAsMapSafeNull(child);
|
||||
bestChildGenotype.add(getTypeSafeNull(child));
|
||||
|
||||
//Prior vars
|
||||
double bestConfigurationLikelihood = 0.0;
|
||||
double norm = 0.0;
|
||||
int configuration_index =0;
|
||||
final ArrayList<Integer> bestMVCount = new ArrayList<Integer>();
|
||||
bestMVCount.add(0);
|
||||
//if the child isn't called or neither parent is called, there's no extra inheritance information in that trio so return
|
||||
if (!hasCalledGT(child.getType()) || (!hasCalledGT(mother.getType()) && !hasCalledGT(father.getType())))
|
||||
return;
|
||||
|
||||
//Get the most likely combination
|
||||
//Only check for most likely combination if at least a parent and the child have genotypes
|
||||
//Fill the configurationLikelihoodsMatrix for each genotype combination
|
||||
int matInd;
|
||||
if(child.isCalled() && parentsCalled > 0){
|
||||
int mvCount;
|
||||
int cumulativeMVCount = 0;
|
||||
double configurationLikelihood = 0;
|
||||
for(final Map.Entry<GenotypeType,Double> childGenotype :
|
||||
childLikelihoods.entrySet()){
|
||||
for(final Map.Entry<GenotypeType,Double> firstParentGenotype :
|
||||
firstParentLikelihoods.entrySet()){
|
||||
for(final Map.Entry<GenotypeType,Double> secondParentGenotype :
|
||||
secondParentLikelihoods.entrySet()){
|
||||
mvCount = mvCountMatrix.get(firstParentGenotype.getKey()).get(secondParentGenotype.getKey()).get(childGenotype.getKey());
|
||||
//For parent/child pairs, sum over the possible genotype configurations of the missing parent
|
||||
if(parentsCalled<2){
|
||||
cumulativeMVCount += mvCount;
|
||||
configurationLikelihood += mvCount>0 ? Math.pow(deNovoPrior,mvCount)*firstParentGenotype.getValue()*secondParentGenotype.getValue()*childGenotype.getValue() : (1.0-10*deNovoPrior-deNovoPrior*deNovoPrior)*firstParentGenotype.getValue()*secondParentGenotype.getValue()*childGenotype.getValue();
|
||||
}
|
||||
//Evaluate configurations of trios
|
||||
else{
|
||||
configurationLikelihood = mvCount>0 ? Math.pow(deNovoPrior,mvCount)*firstParentGenotype.getValue()*secondParentGenotype.getValue()*childGenotype.getValue() : (1.0-10*deNovoPrior-deNovoPrior*deNovoPrior)*firstParentGenotype.getValue()*secondParentGenotype.getValue()*childGenotype.getValue();
|
||||
norm += configurationLikelihood;
|
||||
matInd = getLikelihoodIndex(firstParentGenotype.getKey(), secondParentGenotype.getKey(),childGenotype.getKey(),parentsAreFlipped);
|
||||
if (matInd > INVALID_INDEX) //still a slim chance of a MIXED GT
|
||||
configurationLikelihoodsMatrix[matInd] = configurationLikelihood;
|
||||
//Keep this combination if
|
||||
//It has a better likelihood
|
||||
//Or it has the same likelihood but requires less changes from original genotypes
|
||||
if (configurationLikelihood > bestConfigurationLikelihood){
|
||||
bestConfigurationLikelihood = configurationLikelihood;
|
||||
bestMVCount.clear();
|
||||
bestMVCount.add(mvCount);
|
||||
bestFirstParentGenotype.clear();
|
||||
bestFirstParentGenotype.add(firstParentGenotype.getKey());
|
||||
bestSecondParentGenotype.clear();
|
||||
bestSecondParentGenotype.add(secondParentGenotype.getKey());
|
||||
bestChildGenotype.clear();
|
||||
bestChildGenotype.add(childGenotype.getKey());
|
||||
}
|
||||
else if(configurationLikelihood == bestConfigurationLikelihood) {
|
||||
bestFirstParentGenotype.add(firstParentGenotype.getKey());
|
||||
bestSecondParentGenotype.add(secondParentGenotype.getKey());
|
||||
bestChildGenotype.add(childGenotype.getKey());
|
||||
bestMVCount.add(mvCount);
|
||||
}
|
||||
}
|
||||
}
|
||||
//Evaluate configurations of parent/child pairs
|
||||
if(parentsCalled<2){
|
||||
norm += configurationLikelihood;
|
||||
matInd = getLikelihoodIndex(firstParentGenotype.getKey(), GenotypeType.HOM_REF,childGenotype.getKey(), parentsAreFlipped);
|
||||
if (matInd > INVALID_INDEX)
|
||||
configurationLikelihoodsMatrix[matInd] = configurationLikelihood;
|
||||
matInd = getLikelihoodIndex(firstParentGenotype.getKey(), GenotypeType.HET,childGenotype.getKey(),parentsAreFlipped);
|
||||
if (matInd > INVALID_INDEX)
|
||||
configurationLikelihoodsMatrix[matInd] = configurationLikelihood;
|
||||
matInd = getLikelihoodIndex(firstParentGenotype.getKey(), GenotypeType.HOM_VAR,childGenotype.getKey(),parentsAreFlipped);
|
||||
if (matInd > INVALID_INDEX)
|
||||
configurationLikelihoodsMatrix[matInd] = configurationLikelihood;
|
||||
|
||||
//Keep this combination if
|
||||
//It has a better likelihood
|
||||
//Or it has the same likelihood but requires less changes from original genotypes
|
||||
if (configurationLikelihood > bestConfigurationLikelihood){
|
||||
bestConfigurationLikelihood = configurationLikelihood;
|
||||
bestMVCount.clear();
|
||||
bestMVCount.add(cumulativeMVCount/3);
|
||||
bestChildGenotype.clear();
|
||||
bestFirstParentGenotype.clear();
|
||||
bestSecondParentGenotype.clear();
|
||||
bestChildGenotype.add(childGenotype.getKey());
|
||||
bestFirstParentGenotype.add(firstParentGenotype.getKey());
|
||||
bestSecondParentGenotype.add(pairSecondParentGenotype);
|
||||
}
|
||||
else if(configurationLikelihood == bestConfigurationLikelihood) {
|
||||
bestFirstParentGenotype.add(firstParentGenotype.getKey());
|
||||
bestSecondParentGenotype.add(pairSecondParentGenotype);
|
||||
bestChildGenotype.add(childGenotype.getKey());
|
||||
bestMVCount.add(cumulativeMVCount/3);
|
||||
}
|
||||
configurationLikelihood = 0;
|
||||
}
|
||||
int mvCount;
|
||||
double jointLikelihood;
|
||||
double mvCoeff;
|
||||
double configurationLikelihood;
|
||||
for(final Map.Entry<GenotypeType,Double> childGenotype :
|
||||
childLikelihoods.entrySet()){
|
||||
for(final Map.Entry<GenotypeType,Double> motherGenotype :
|
||||
motherLikelihoods.entrySet()){
|
||||
for(final Map.Entry<GenotypeType,Double> fatherGenotype :
|
||||
fatherLikelihoods.entrySet()){
|
||||
mvCount = mvCountMatrix.get(motherGenotype.getKey()).get(fatherGenotype.getKey()).get(childGenotype.getKey());
|
||||
jointLikelihood = motherGenotype.getValue()+fatherGenotype.getValue()+childGenotype.getValue();
|
||||
mvCoeff = mvCount>0 ? Math.pow(deNovoPrior,mvCount) : (1.0-10*deNovoPrior-deNovoPrior*deNovoPrior);
|
||||
configurationLikelihood = Math.log10(mvCoeff) + jointLikelihood;
|
||||
matInd = getLikelihoodMatrixIndex(motherGenotype.getKey(), fatherGenotype.getKey(), childGenotype.getKey());
|
||||
configurationLikelihoodsMatrix[matInd] = configurationLikelihood;
|
||||
}
|
||||
}
|
||||
|
||||
//normalize the best configuration probability
|
||||
bestConfigurationLikelihood = bestConfigurationLikelihood / norm;
|
||||
|
||||
//In case of multiple equally likely combinations, take a random one
|
||||
if(bestFirstParentGenotype.size()>1){
|
||||
configuration_index = rand.nextInt(bestFirstParentGenotype.size()-1);
|
||||
}
|
||||
|
||||
}
|
||||
else{
|
||||
bestConfigurationLikelihood = NO_TRANSMISSION_PROB;
|
||||
}
|
||||
|
||||
TrioGenotypes updatedTrioGenotypes;
|
||||
if(parentsCalled < 2 && mother == null || !mother.isCalled())
|
||||
updatedTrioGenotypes = transmissionMatrix.get(bestSecondParentGenotype.get(configuration_index)).get(bestFirstParentGenotype.get(configuration_index)).get(bestChildGenotype.get(configuration_index));
|
||||
else
|
||||
updatedTrioGenotypes = transmissionMatrix.get(bestFirstParentGenotype.get(configuration_index)).get(bestSecondParentGenotype.get(configuration_index)).get(bestChildGenotype.get(configuration_index));
|
||||
|
||||
//Return the updated genotypes
|
||||
updatedTrioGenotypes.setMVcountMatrix(mvCountMatrix);
|
||||
updatedTrioGenotypes.getUpdatedGenotypes(ref, alt, mother, father, child, bestConfigurationLikelihood, configurationLikelihoodsMatrix, finalGenotypes);
|
||||
return bestMVCount.get(configuration_index);
|
||||
|
||||
getUpdatedGenotypes(vc, mother, father, child, finalGenotypes);
|
||||
}
|
||||
|
||||
//Get a Map of genotype likelihoods, normalized from log10-space.
|
||||
//In case of null, unavailable or no call, all likelihoods are 1/3.
|
||||
//Get a Map of genotype (log10)likelihoods
|
||||
private EnumMap<GenotypeType,Double> getLikelihoodsAsMapSafeNull(Genotype genotype){
|
||||
if (genotype != null && genotype.isCalled() && genotype.hasExtendedAttribute(PHRED_SCALED_POSTERIORS_KEY)) {
|
||||
final EnumMap<GenotypeType,Double> likelihoodsMap = new EnumMap<GenotypeType, Double>(GenotypeType.class);
|
||||
final EnumMap<GenotypeType,Double> likelihoodsMap = new EnumMap<GenotypeType, Double>(GenotypeType.class);
|
||||
double[] likelihoods;
|
||||
|
||||
if (genotype != null && hasCalledGT(genotype.getType()) && genotype.hasExtendedAttribute(PHRED_SCALED_POSTERIORS_KEY)) {
|
||||
Object GPfromVCF = genotype.getExtendedAttribute(PHRED_SCALED_POSTERIORS_KEY);
|
||||
//parse the GPs into a vector of probabilities
|
||||
final String[] likelihoodsAsStringVector = ((String)GPfromVCF).split(",");
|
||||
|
|
@ -622,46 +432,35 @@ public class FamilyLikelihoodsUtils {
|
|||
for ( int i = 0; i < likelihoodsAsStringVector.length; i++ ) {
|
||||
likelihoodsAsVector[i] = Double.parseDouble(likelihoodsAsStringVector[i]) / -10.0;
|
||||
}
|
||||
double[] likelihoods = GeneralUtils.normalizeFromLog10(likelihoodsAsVector);
|
||||
likelihoodsMap.put(GenotypeType.HOM_REF,likelihoods[GenotypeType.HOM_REF.ordinal()-1]);
|
||||
likelihoodsMap.put(GenotypeType.HET,likelihoods[GenotypeType.HET.ordinal()-1]);
|
||||
likelihoodsMap.put(GenotypeType.HOM_VAR, likelihoods[GenotypeType.HOM_VAR.ordinal() - 1]);
|
||||
return likelihoodsMap;
|
||||
//keep in log10 space for large GQs
|
||||
likelihoods = GeneralUtils.normalizeFromLog10(likelihoodsAsVector, true, true);
|
||||
}
|
||||
|
||||
if(genotype == null || !genotype.isCalled() || genotype.getLikelihoods() == null){
|
||||
final EnumMap<GenotypeType,Double> likelihoods = new EnumMap<GenotypeType, Double>(GenotypeType.class);
|
||||
likelihoods.put(GenotypeType.HOM_REF,1.0/3.0);
|
||||
likelihoods.put(GenotypeType.HET,1.0/3.0);
|
||||
likelihoods.put(GenotypeType.HOM_VAR,1.0/3.0);
|
||||
return likelihoods;
|
||||
//In case of null, unavailable or no call, all likelihoods are log10(1/3)
|
||||
else if(genotype == null || !hasCalledGT(genotype.getType()) || genotype.getLikelihoods() == null){
|
||||
likelihoods = new double[3];
|
||||
likelihoods[0] = LOG10_OF_ONE_THIRD;
|
||||
likelihoods[1] = LOG10_OF_ONE_THIRD;
|
||||
likelihoods[2] = LOG10_OF_ONE_THIRD;
|
||||
}
|
||||
return genotype.getLikelihoods().getAsMap(true);
|
||||
|
||||
//No posteriors in VC, use PLs
|
||||
else
|
||||
likelihoods = GeneralUtils.normalizeFromLog10(genotype.getLikelihoods().getAsVector(),true,true);
|
||||
|
||||
likelihoodsMap.put(GenotypeType.HOM_REF,likelihoods[genotypeTypeToValue(GenotypeType.HOM_REF)]);
|
||||
likelihoodsMap.put(GenotypeType.HET,likelihoods[genotypeTypeToValue(GenotypeType.HET)]);
|
||||
likelihoodsMap.put(GenotypeType.HOM_VAR, likelihoods[genotypeTypeToValue(GenotypeType.HOM_VAR)]);
|
||||
return likelihoodsMap;
|
||||
}
|
||||
|
||||
//Returns the GenotypeType; returns UNAVAILABLE if given null
|
||||
private GenotypeType getTypeSafeNull(Genotype genotype){
|
||||
if(genotype == null)
|
||||
return GenotypeType.UNAVAILABLE;
|
||||
return genotype.getType();
|
||||
}
|
||||
|
||||
private int getLikelihoodIndex(GenotypeType firstParent, GenotypeType secondParent, GenotypeType child, boolean parentsAreFlipped){
|
||||
int childInd = genotypeTypeValue(child);
|
||||
private int getLikelihoodMatrixIndex(GenotypeType mother, GenotypeType father, GenotypeType child){
|
||||
int childInd = genotypeTypeToValue(child);
|
||||
int motherInd;
|
||||
int fatherInd;
|
||||
final int NUM_CALLED_GENOTYPETYPES = 3;
|
||||
final int INVALID = -1;
|
||||
if (parentsAreFlipped)
|
||||
{
|
||||
motherInd = genotypeTypeValue(secondParent);
|
||||
fatherInd = genotypeTypeValue(firstParent);
|
||||
}
|
||||
else {
|
||||
motherInd = genotypeTypeValue(firstParent);
|
||||
fatherInd = genotypeTypeValue(secondParent);
|
||||
}
|
||||
|
||||
motherInd = genotypeTypeToValue(mother);
|
||||
fatherInd = genotypeTypeToValue(father);
|
||||
|
||||
if (childInd == INVALID || motherInd == INVALID || fatherInd == INVALID) //any of the genotypes are NO_CALL, UNAVAILABLE or MIXED
|
||||
return INVALID;
|
||||
|
|
@ -670,11 +469,16 @@ public class FamilyLikelihoodsUtils {
|
|||
return motherInd*NUM_CALLED_GENOTYPETYPES*NUM_CALLED_GENOTYPETYPES + fatherInd*NUM_CALLED_GENOTYPETYPES + childInd;
|
||||
}
|
||||
|
||||
private int genotypeTypeValue(GenotypeType input){
|
||||
private int genotypeTypeToValue(GenotypeType input){
|
||||
if (input == GenotypeType.HOM_REF) return 0;
|
||||
if (input == GenotypeType.HET) return 1;
|
||||
if (input == GenotypeType.HOM_VAR) return 2;
|
||||
return -1;
|
||||
}
|
||||
|
||||
//this excludes mixed genotypes, whereas the htsjdk Genotype.isCalled() will return true if the GenotypeType is mixed
|
||||
private boolean hasCalledGT(GenotypeType genotype){
|
||||
return genotype == GenotypeType.HOM_REF || genotype == GenotypeType.HET || genotype == GenotypeType.HOM_VAR;
|
||||
}
|
||||
|
||||
}
|
||||
|
|
@ -65,15 +65,17 @@ public class PosteriorLikelihoodsUtils {
|
|||
final double globalFrequencyPriorDirichlet,
|
||||
final boolean useInputSamples,
|
||||
final boolean useAC,
|
||||
final boolean calcMissing) {
|
||||
final boolean useACoff) {
|
||||
|
||||
final Map<Allele,Integer> totalAlleleCounts = new HashMap<>();
|
||||
boolean nonSNPprior = false;
|
||||
if (vc1 == null) throw new IllegalArgumentException("VariantContext vc1 is null");
|
||||
final boolean nonSNPeval = !vc1.isSNP();
|
||||
final double[] alleleCounts = new double[vc1.getNAlleles()];
|
||||
//only use discovered allele count if there are at least 10 samples
|
||||
final boolean useDiscoveredAC = !useACoff && vc1.getNSamples() >= 10;
|
||||
|
||||
if(!nonSNPeval)
|
||||
if(vc1.isSNP())
|
||||
{
|
||||
//store the allele counts for each allele in the variant priors
|
||||
for ( final VariantContext resource : resources ) {
|
||||
|
|
@ -111,7 +113,7 @@ public class PosteriorLikelihoodsUtils {
|
|||
//parse the PPs into a vector of probabilities
|
||||
if (PPfromVCF instanceof String) {
|
||||
final String PPstring = (String)PPfromVCF;
|
||||
if (PPstring.charAt(0)=='.') //samples not in trios will have PP tag like ".,.,." after family priors are applied
|
||||
if (PPstring.charAt(0)=='.') //samples not in trios will have PP tag like ".,.,." if family priors are applied
|
||||
likelihoods.add(genotype.hasLikelihoods() ? genotype.getLikelihoods().getAsVector() : null );
|
||||
else {
|
||||
final String[] likelihoodsAsStringVector = PPstring.split(",");
|
||||
|
|
@ -135,7 +137,7 @@ public class PosteriorLikelihoodsUtils {
|
|||
}
|
||||
|
||||
//TODO: for now just use priors that are SNPs because indel priors will bias SNP calls
|
||||
final boolean useFlatPriors = nonSNPeval || nonSNPprior || (resources.isEmpty() && !calcMissing);
|
||||
final boolean useFlatPriors = nonSNPeval || nonSNPprior || (resources.isEmpty() && !useDiscoveredAC);
|
||||
|
||||
final List<double[]> posteriors = calculatePosteriorGLs(likelihoods,alleleCounts,vc1.getMaxPloidy(2), useFlatPriors);
|
||||
|
||||
|
|
|
|||
|
|
@ -60,7 +60,7 @@ public class CalculateGenotypePosteriorsIntegrationTest extends WalkerTest {
|
|||
@Test(enabled = true)
|
||||
public void testUsingDiscoveredAF() {
|
||||
WalkerTestSpec spec = new WalkerTestSpec(
|
||||
"-T CalculateGenotypePosteriors --no_cmdline_in_header -calcMissing" +
|
||||
"-T CalculateGenotypePosteriors --no_cmdline_in_header" +
|
||||
" -o %s" +
|
||||
" -R " + b37KGReference +
|
||||
" -L 20:10,000,000-10,100,000" +
|
||||
|
|
@ -73,7 +73,7 @@ public class CalculateGenotypePosteriorsIntegrationTest extends WalkerTest {
|
|||
@Test(enabled = true)
|
||||
public void testMissingPriors() {
|
||||
WalkerTestSpec spec = new WalkerTestSpec(
|
||||
"-T CalculateGenotypePosteriors --no_cmdline_in_header" +
|
||||
"-T CalculateGenotypePosteriors --no_cmdline_in_header -useACoff" +
|
||||
" -o %s" +
|
||||
" -R " + b37KGReference +
|
||||
" -L 20:10,000,000-10,100,000" +
|
||||
|
|
@ -86,7 +86,7 @@ public class CalculateGenotypePosteriorsIntegrationTest extends WalkerTest {
|
|||
@Test(enabled = true)
|
||||
public void testInputINDELs() {
|
||||
WalkerTestSpec spec = new WalkerTestSpec(
|
||||
"-T CalculateGenotypePosteriors --no_cmdline_in_header" +
|
||||
"-T CalculateGenotypePosteriors --no_cmdline_in_header -useACoff" +
|
||||
" -o %s" +
|
||||
" -R " + b37KGReference +
|
||||
" -L 20:10,000,000-10,100,000" +
|
||||
|
|
@ -100,14 +100,14 @@ public class CalculateGenotypePosteriorsIntegrationTest extends WalkerTest {
|
|||
@Test(enabled = true)
|
||||
public void testFamilyPriors() {
|
||||
WalkerTestSpec spec = new WalkerTestSpec(
|
||||
"-T CalculateGenotypePosteriors --no_cmdline_in_header" +
|
||||
"-T CalculateGenotypePosteriors --no_cmdline_in_header -useACoff" +
|
||||
" -o %s" +
|
||||
" -R " + b37KGReference +
|
||||
" -ped " + CEUtrioFamilyFile +
|
||||
" -V " + CEUtrioTest +
|
||||
" -supporting " + CEUtrioPopPriorsTest,
|
||||
1,
|
||||
Arrays.asList("a22c81f0609c9f43578054661797395b"));
|
||||
Arrays.asList("781f85f56dac9074c96ace31b09e0f59"));
|
||||
executeTest("testFamilyPriors", spec);
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -136,13 +136,22 @@ public class PosteriorLikelihoodsUtilsUnitTest extends BaseTest {
|
|||
private void testCalculatePosteriorNoExternalData() {
|
||||
VariantContext test1 = makeVC("1",Arrays.asList(Aref,T), makeG("s1",Aref,T,20,0,10),
|
||||
makeG("s2",T,T,60,40,0),
|
||||
makeG("s3",Aref,Aref,0,30,90));
|
||||
test1 = new VariantContextBuilder(test1).attribute(VCFConstants.MLE_ALLELE_COUNT_KEY,3).make();
|
||||
VariantContext test1result = PosteriorLikelihoodsUtils.calculatePosteriorGLs(test1, new ArrayList<VariantContext>(), 0, 0.001, true, false, true);
|
||||
Genotype test1exp1 = makeGwithPLs("s1",Aref,T,new double[]{-2.20686, -0.03073215, -1.20686});
|
||||
makeG("s3",Aref,Aref,0,30,90),
|
||||
makeG("s4",Aref,T,20,0,10),
|
||||
makeG("s5",T,T,60,40,0),
|
||||
makeG("s6",Aref,Aref,0,30,90),
|
||||
makeG("s7",Aref,T,20,0,10),
|
||||
makeG("s8",T,T,60,40,0),
|
||||
makeG("s9",Aref,Aref,0,30,90),
|
||||
makeG("s10",Aref,T,20,0,10),
|
||||
makeG("s11",T,T,60,40,0),
|
||||
makeG("s12",Aref,Aref,0,30,90));
|
||||
test1 = new VariantContextBuilder(test1).attribute(VCFConstants.MLE_ALLELE_COUNT_KEY,12).make();
|
||||
VariantContext test1result = PosteriorLikelihoodsUtils.calculatePosteriorGLs(test1, new ArrayList<VariantContext>(), 0, 0.001, true, false, false);
|
||||
Genotype test1exp1 = makeGwithPLs("s1",Aref,T,new double[]{-2.26110257, -0.02700903, -1.26110257});
|
||||
Assert.assertTrue(test1exp1.hasPL());
|
||||
Genotype test1exp2 = makeGwithPLs("s2",T,T,new double[]{-6.000066, -3.823938, -6.557894e-05});
|
||||
Genotype test1exp3 = makeGwithPLs("s3",Aref,Aref,new double[]{-0.0006510083, -2.824524, -9.000651});
|
||||
Genotype test1exp2 = makeGwithPLs("s2",T,T,new double[]{-6.000075e+00, -3.765981e+00, -7.488009e-05});
|
||||
Genotype test1exp3 = makeGwithPLs("s3",Aref,Aref,new double[]{-0.0007438855, -2.7666503408, -9.0007438855});
|
||||
Assert.assertEquals("java.util.ArrayList",test1result.getGenotype(0).getAnyAttribute(PHRED_SCALED_POSTERIORS_KEY).getClass().getCanonicalName());
|
||||
Assert.assertEquals(arraysEq(test1exp1.getPL(), _mleparse((List<Integer>)test1result.getGenotype(0).getAnyAttribute(PHRED_SCALED_POSTERIORS_KEY))), "");
|
||||
Assert.assertEquals(arraysEq(test1exp2.getPL(),_mleparse((List<Integer>)test1result.getGenotype(1).getAnyAttribute(PHRED_SCALED_POSTERIORS_KEY))), "");
|
||||
|
|
@ -154,13 +163,21 @@ public class PosteriorLikelihoodsUtilsUnitTest extends BaseTest {
|
|||
makeG("s1",Aref,T,30,10,60,0,15,90),
|
||||
makeG("s2",Aref,C,40,0,10,30,40,80),
|
||||
makeG("s3",Aref,Aref,0,5,8,15,20,40),
|
||||
makeG("s4",C,T,80,40,12,20,0,10));
|
||||
test2 = new VariantContextBuilder(test2).attribute(VCFConstants.MLE_ALLELE_COUNT_KEY,new ArrayList<Integer>(Arrays.asList(2,2))).make();
|
||||
VariantContext test2result = PosteriorLikelihoodsUtils.calculatePosteriorGLs(test2,new ArrayList<VariantContext>(),5,0.001,true,false, true);
|
||||
Genotype test2exp1 = makeGwithPLs("s1",Aref,T,new double[]{-2.647372, -1.045139, -6.823193, -0.04513873, -2.198182, -9.823193});
|
||||
Genotype test2exp2 = makeGwithPLs("s2",Aref,C,new double[]{-3.609957, -0.007723248, -1.785778, -3.007723, -4.660767, -8.785778});
|
||||
Genotype test2exp3 = makeGwithPLs("s3",Aref,Aref,new double[] {-0.06094877, -0.9587151, -2.03677,-1.958715, -3.111759, -5.23677});
|
||||
Genotype test2exp4 = makeGwithPLs("s4",C,T,new double[]{-7.016534, -3.4143, -1.392355, -1.4143, -0.06734388, -1.192355});
|
||||
makeG("s4",C,T,80,40,12,20,0,10),
|
||||
makeG("s5",Aref,T,30,10,60,0,15,90),
|
||||
makeG("s6",Aref,C,40,0,10,30,40,80),
|
||||
makeG("s7",Aref,Aref,0,5,8,15,20,40),
|
||||
makeG("s8",C,T,80,40,12,20,0,10),
|
||||
makeG("s9",Aref,T,30,10,60,0,15,90),
|
||||
makeG("s10",Aref,C,40,0,10,30,40,80),
|
||||
makeG("s11",Aref,Aref,0,5,8,15,20,40),
|
||||
makeG("s12",C,T,80,40,12,20,0,10));
|
||||
test2 = new VariantContextBuilder(test2).attribute(VCFConstants.MLE_ALLELE_COUNT_KEY,new ArrayList<Integer>(Arrays.asList(6,6))).make();
|
||||
VariantContext test2result = PosteriorLikelihoodsUtils.calculatePosteriorGLs(test2,new ArrayList<VariantContext>(),5,0.001,true,false, false);
|
||||
Genotype test2exp1 = makeGwithPLs("s1",Aref,T,new double[]{-2.823957, -1.000000, -6.686344, 0.000000, -1.952251, -9.686344});
|
||||
Genotype test2exp2 = makeGwithPLs("s2",Aref,C,new double[]{-3.823957, 0.000000, -1.686344, -3.000000, -4.452251, -8.686344});
|
||||
Genotype test2exp3 = makeGwithPLs("s3",Aref,Aref,new double[] {0.000000, -0.676043, -1.662387, -1.676043, -2.628294, -4.862387});
|
||||
Genotype test2exp4 = makeGwithPLs("s4",C,T,new double[]{-7.371706, -3.547749, -1.434094, -1.547749, 0.000000, -1.234094});
|
||||
Assert.assertEquals(arraysEq(test2exp1.getPL(),(int[]) _mleparse((List<Integer>)test2result.getGenotype(0).getAnyAttribute(PHRED_SCALED_POSTERIORS_KEY))), "");
|
||||
Assert.assertEquals(arraysEq(test2exp2.getPL(),(int[]) _mleparse((List<Integer>)test2result.getGenotype(1).getAnyAttribute(PHRED_SCALED_POSTERIORS_KEY))), "");
|
||||
Assert.assertEquals(arraysEq(test2exp3.getPL(),(int[]) _mleparse((List<Integer>)test2result.getGenotype(2).getAnyAttribute(PHRED_SCALED_POSTERIORS_KEY))), "");
|
||||
|
|
@ -178,7 +195,7 @@ public class PosteriorLikelihoodsUtilsUnitTest extends BaseTest {
|
|||
supplTest1.add(makeVC("4",Arrays.asList(Aref,T),
|
||||
makeG("s_1",T,T),
|
||||
makeG("s_2",Aref,T)));
|
||||
VariantContext test1result = PosteriorLikelihoodsUtils.calculatePosteriorGLs(testOverlappingBase,supplTest1,0,0.001,true,false, true);
|
||||
VariantContext test1result = PosteriorLikelihoodsUtils.calculatePosteriorGLs(testOverlappingBase,supplTest1,0,0.001,true,false, false);
|
||||
// the counts here are ref=30, alt=14
|
||||
Genotype test1exp1 = makeGwithPLs("t1",T,T,new double[]{-3.370985, -1.415172, -0.01721766});
|
||||
Genotype test1exp2 = makeGwithPLs("t2",Aref,T,new double[]{-1.763792, -0.007978791, -3.010024});
|
||||
|
|
@ -189,7 +206,7 @@ public class PosteriorLikelihoodsUtilsUnitTest extends BaseTest {
|
|||
|
||||
VariantContext testNonOverlapping = makeVC("1", Arrays.asList(Aref,T), makeG("s1",T,T,3,1,0));
|
||||
List<VariantContext> other = Arrays.asList(makeVC("2",Arrays.asList(Aref,C),makeG("s2",C,C,10,2,0)));
|
||||
VariantContext test2result = PosteriorLikelihoodsUtils.calculatePosteriorGLs(testNonOverlapping,other,0,0.001,true,false,true);
|
||||
VariantContext test2result = PosteriorLikelihoodsUtils.calculatePosteriorGLs(testNonOverlapping,other,0,0.001,true,false,false);
|
||||
Genotype test2exp1 = makeGwithPLs("SGV",T,T,new double[]{-4.078345, -3.276502, -0.0002661066});
|
||||
Assert.assertEquals(arraysEq(test2exp1.getPL(),_mleparse((List<Integer>) test2result.getGenotype(0).getAnyAttribute(PHRED_SCALED_POSTERIORS_KEY))), "");
|
||||
}
|
||||
|
|
@ -199,7 +216,7 @@ public class PosteriorLikelihoodsUtilsUnitTest extends BaseTest {
|
|||
VariantContext testOverlappingBase = makeVC("1", Arrays.asList(Aref,T), makeG("s1",T,T,40,1,0));
|
||||
List<VariantContext> supplTest1 = new ArrayList<>(1);
|
||||
supplTest1.add(new VariantContextBuilder(makeVC("2",Arrays.asList(Aref,T))).attribute(VCFConstants.MLE_ALLELE_COUNT_KEY,500).attribute(VCFConstants.ALLELE_NUMBER_KEY,1000).make());
|
||||
VariantContext test1result = PosteriorLikelihoodsUtils.calculatePosteriorGLs(testOverlappingBase,supplTest1,0,0.001,true,false,true);
|
||||
VariantContext test1result = PosteriorLikelihoodsUtils.calculatePosteriorGLs(testOverlappingBase,supplTest1,0,0.001,true,false,false);
|
||||
|
||||
int[] GP = _mleparse( (List<Integer>)test1result.getGenotype(0).getAnyAttribute(PHRED_SCALED_POSTERIORS_KEY));
|
||||
Assert.assertTrue(GP[2] > GP[1]);
|
||||
|
|
@ -210,7 +227,7 @@ public class PosteriorLikelihoodsUtilsUnitTest extends BaseTest {
|
|||
VariantContext testOverlappingBase = makeVC("1", Arrays.asList(Aref,T), makeG("s1",T,T,40,0,1));
|
||||
List<VariantContext> supplTest1 = new ArrayList<>(1);
|
||||
supplTest1.add(new VariantContextBuilder(makeVC("2",Arrays.asList(Aref,T))).attribute(VCFConstants.MLE_ALLELE_COUNT_KEY,900).attribute(VCFConstants.ALLELE_NUMBER_KEY,1000).make());
|
||||
VariantContext test1result = PosteriorLikelihoodsUtils.calculatePosteriorGLs(testOverlappingBase,supplTest1,0,0.001,true,false,true);
|
||||
VariantContext test1result = PosteriorLikelihoodsUtils.calculatePosteriorGLs(testOverlappingBase,supplTest1,0,0.001,true,false,false);
|
||||
|
||||
int[] GP = _mleparse( (List<Integer>)test1result.getGenotype(0).getAnyAttribute(PHRED_SCALED_POSTERIORS_KEY));
|
||||
Assert.assertTrue(GP[2] < GP[1]);
|
||||
|
|
@ -221,7 +238,7 @@ public class PosteriorLikelihoodsUtilsUnitTest extends BaseTest {
|
|||
VariantContext testOverlappingBase = makeVC("1", Arrays.asList(Aref,T), makeG("s1",T,T,0,1,40));
|
||||
List<VariantContext> supplTest1 = new ArrayList<>(1);
|
||||
supplTest1.add(new VariantContextBuilder(makeVC("2",Arrays.asList(Aref,T))).attribute(VCFConstants.MLE_ALLELE_COUNT_KEY,500).attribute(VCFConstants.ALLELE_NUMBER_KEY,1000).make());
|
||||
VariantContext test1result = PosteriorLikelihoodsUtils.calculatePosteriorGLs(testOverlappingBase,supplTest1,0,0.001,true,false,true);
|
||||
VariantContext test1result = PosteriorLikelihoodsUtils.calculatePosteriorGLs(testOverlappingBase,supplTest1,0,0.001,true,false,false);
|
||||
|
||||
int[] GP = _mleparse( (List<Integer>)test1result.getGenotype(0).getAnyAttribute(PHRED_SCALED_POSTERIORS_KEY));
|
||||
Assert.assertTrue(GP[0] > GP[1]);
|
||||
|
|
@ -232,7 +249,7 @@ public class PosteriorLikelihoodsUtilsUnitTest extends BaseTest {
|
|||
VariantContext testOverlappingBase = makeVC("1", Arrays.asList(Aref,T), makeG("s1",T,T,1,0,40));
|
||||
List<VariantContext> supplTest1 = new ArrayList<>(1);
|
||||
supplTest1.add(new VariantContextBuilder(makeVC("2",Arrays.asList(Aref,T))).attribute(VCFConstants.MLE_ALLELE_COUNT_KEY,100).attribute(VCFConstants.ALLELE_NUMBER_KEY,1000).make());
|
||||
VariantContext test1result = PosteriorLikelihoodsUtils.calculatePosteriorGLs(testOverlappingBase,supplTest1,0,0.001,true,false,true);
|
||||
VariantContext test1result = PosteriorLikelihoodsUtils.calculatePosteriorGLs(testOverlappingBase,supplTest1,0,0.001,true,false,false);
|
||||
|
||||
int[] GP = _mleparse( (List<Integer>)test1result.getGenotype(0).getAnyAttribute(PHRED_SCALED_POSTERIORS_KEY));
|
||||
Assert.assertTrue(GP[0] < GP[1]);
|
||||
|
|
@ -245,7 +262,7 @@ public class PosteriorLikelihoodsUtilsUnitTest extends BaseTest {
|
|||
makeG("s3",Aref,T,22,0,12));
|
||||
List<VariantContext> supplTest1 = new ArrayList<>(1);
|
||||
supplTest1.add(new VariantContextBuilder(makeVC("2",Arrays.asList(Aref,T))).attribute(VCFConstants.MLE_ALLELE_COUNT_KEY,11).attribute(VCFConstants.ALLELE_NUMBER_KEY,10).make());
|
||||
VariantContext test1result = PosteriorLikelihoodsUtils.calculatePosteriorGLs(testOverlappingBase,supplTest1,0,0.001,true,false,true);
|
||||
VariantContext test1result = PosteriorLikelihoodsUtils.calculatePosteriorGLs(testOverlappingBase,supplTest1,0,0.001,true,false,false);
|
||||
}
|
||||
|
||||
@Test (expectedExceptions = {UserException.class})
|
||||
|
|
@ -256,7 +273,7 @@ public class PosteriorLikelihoodsUtilsUnitTest extends BaseTest {
|
|||
List<VariantContext> supplTest1 = new ArrayList<>(1);
|
||||
supplTest1.add(new VariantContextBuilder(makeVC("2",Arrays.asList(Aref,T,C))).attribute(VCFConstants.ALLELE_COUNT_KEY,5).attribute(VCFConstants.ALLELE_NUMBER_KEY,10).make());
|
||||
|
||||
VariantContext test1result = PosteriorLikelihoodsUtils.calculatePosteriorGLs(testOverlappingBase,supplTest1,0,0.001,true,false,true);
|
||||
VariantContext test1result = PosteriorLikelihoodsUtils.calculatePosteriorGLs(testOverlappingBase,supplTest1,0,0.001,true,false,false);
|
||||
}
|
||||
|
||||
@Test (expectedExceptions = {UserException.class})
|
||||
|
|
@ -266,7 +283,7 @@ public class PosteriorLikelihoodsUtilsUnitTest extends BaseTest {
|
|||
makeG("s3",Aref,T,22,0,12));
|
||||
List<VariantContext> supplTest1 = new ArrayList<>(1);
|
||||
supplTest1.add(new VariantContextBuilder(makeVC("2",Arrays.asList(Aref,T,C))).attribute(VCFConstants.MLE_ALLELE_COUNT_KEY,5).attribute(VCFConstants.ALLELE_NUMBER_KEY,10).make());
|
||||
VariantContext test1result = PosteriorLikelihoodsUtils.calculatePosteriorGLs(testOverlappingBase,supplTest1,0,0.001,true,false,true);
|
||||
VariantContext test1result = PosteriorLikelihoodsUtils.calculatePosteriorGLs(testOverlappingBase,supplTest1,0,0.001,true,false,false);
|
||||
}
|
||||
|
||||
@Test
|
||||
|
|
@ -276,7 +293,7 @@ public class PosteriorLikelihoodsUtilsUnitTest extends BaseTest {
|
|||
makeG("s3",Aref,T,22,0,12));
|
||||
List<VariantContext> supplTest1 = new ArrayList<>(1);
|
||||
supplTest1.add(new VariantContextBuilder(makeVC("2",Arrays.asList(Aref,T,C))).attribute(VCFConstants.ALLELE_COUNT_KEY,Arrays.asList(5,4)).attribute(VCFConstants.ALLELE_NUMBER_KEY,10).make());
|
||||
VariantContext test1result = PosteriorLikelihoodsUtils.calculatePosteriorGLs(testOverlappingBase,supplTest1,0,0.001,true,false,true);
|
||||
VariantContext test1result = PosteriorLikelihoodsUtils.calculatePosteriorGLs(testOverlappingBase,supplTest1,0,0.001,true,false,false);
|
||||
}
|
||||
|
||||
@Test
|
||||
|
|
@ -286,7 +303,7 @@ public class PosteriorLikelihoodsUtilsUnitTest extends BaseTest {
|
|||
makeG("s3",Aref,T,22,0,12));
|
||||
List<VariantContext> supplTest1 = new ArrayList<>(1);
|
||||
supplTest1.add(new VariantContextBuilder(makeVC("2",Arrays.asList(Aref,T,C))).attribute(VCFConstants.MLE_ALLELE_COUNT_KEY,Arrays.asList(5,4)).attribute(VCFConstants.ALLELE_NUMBER_KEY,10).make());
|
||||
VariantContext test1result = PosteriorLikelihoodsUtils.calculatePosteriorGLs(testOverlappingBase,supplTest1,0,0.001,true,false,true);
|
||||
VariantContext test1result = PosteriorLikelihoodsUtils.calculatePosteriorGLs(testOverlappingBase,supplTest1,0,0.001,true,false,false);
|
||||
}
|
||||
|
||||
@Test
|
||||
|
|
@ -296,7 +313,7 @@ public class PosteriorLikelihoodsUtilsUnitTest extends BaseTest {
|
|||
makeG("s3",Aref,ATC,22,0,12));
|
||||
List<VariantContext> supplTest1 = new ArrayList<>(1);
|
||||
supplTest1.add(new VariantContextBuilder(makeVC("2",Arrays.asList(Aref,T,C))).attribute(VCFConstants.MLE_ALLELE_COUNT_KEY,Arrays.asList(5,4)).attribute(VCFConstants.ALLELE_NUMBER_KEY,10).make());
|
||||
VariantContext test1result = PosteriorLikelihoodsUtils.calculatePosteriorGLs(inputIndel,supplTest1,0,0.001,true,false,true);
|
||||
VariantContext test1result = PosteriorLikelihoodsUtils.calculatePosteriorGLs(inputIndel,supplTest1,0,0.001,true,false,false);
|
||||
|
||||
System.out.println(test1result);
|
||||
int[] GPs = _mleparse( (List<Integer>)test1result.getGenotype(0).getAnyAttribute(PHRED_SCALED_POSTERIORS_KEY));
|
||||
|
|
@ -311,7 +328,7 @@ public class PosteriorLikelihoodsUtilsUnitTest extends BaseTest {
|
|||
makeG("s3",Aref,T,22,0,12));
|
||||
List<VariantContext> supplTest1 = new ArrayList<>(1);
|
||||
supplTest1.add(new VariantContextBuilder(makeVC("2",Arrays.asList(Aref,ATC,ATCATC))).attribute(VCFConstants.MLE_ALLELE_COUNT_KEY,Arrays.asList(5,4)).attribute(VCFConstants.ALLELE_NUMBER_KEY,10).make());
|
||||
VariantContext test1result = PosteriorLikelihoodsUtils.calculatePosteriorGLs(inputIndel,supplTest1,0,0.001,true,false,true);
|
||||
VariantContext test1result = PosteriorLikelihoodsUtils.calculatePosteriorGLs(inputIndel,supplTest1,0,0.001,true,false,false);
|
||||
|
||||
|
||||
System.out.println(test1result);
|
||||
|
|
|
|||
Loading…
Reference in New Issue