Merge pull request #711 from broadinstitute/ldg_deNovoAnnotation
Add bells and whistles for Genotype Refinement Pipeline
This commit is contained in:
commit
e60dd77362
|
|
@ -94,7 +94,10 @@ public class PossibleDeNovo extends InfoFieldAnnotation implements RodRequiringA
|
||||||
private MendelianViolation mendelianViolation = null;
|
private MendelianViolation mendelianViolation = null;
|
||||||
public static final String HI_CONF_DENOVO_KEY = "hiConfDeNovo";
|
public static final String HI_CONF_DENOVO_KEY = "hiConfDeNovo";
|
||||||
public static final String LO_CONF_DENOVO_KEY = "loConfDeNovo";
|
public static final String LO_CONF_DENOVO_KEY = "loConfDeNovo";
|
||||||
private final int GQ_threshold = 20;
|
private final int hi_GQ_threshold = 20;
|
||||||
|
private final int lo_GQ_threshold = 10;
|
||||||
|
private final double percentOfSamplesCutoff = 0.001; //for many, many samples use 0.1% of samples as allele frequency threshold for de novos
|
||||||
|
private final int flatNumberOfSamplesCutoff = 4;
|
||||||
private Set<Trio> trios;
|
private Set<Trio> trios;
|
||||||
|
|
||||||
public Map<String, Object> annotate(final RefMetaDataTracker tracker,
|
public Map<String, Object> annotate(final RefMetaDataTracker tracker,
|
||||||
|
|
@ -109,7 +112,7 @@ public class PossibleDeNovo extends InfoFieldAnnotation implements RodRequiringA
|
||||||
mendelianViolation = new MendelianViolation(((VariantAnnotator)walker).minGenotypeQualityP );
|
mendelianViolation = new MendelianViolation(((VariantAnnotator)walker).minGenotypeQualityP );
|
||||||
}
|
}
|
||||||
else {
|
else {
|
||||||
throw new UserException("Mendelian violation annotation can only be used from the Variant Annotator, and must be provided a valid PED file (-ped) from the command line.");
|
throw new UserException("Possible de novos annotation can only be used from the Variant Annotator, and must be provided a valid PED file (-ped) from the command line.");
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
@ -119,38 +122,38 @@ public class PossibleDeNovo extends InfoFieldAnnotation implements RodRequiringA
|
||||||
final List<String> highConfDeNovoChildren = new ArrayList<String>();
|
final List<String> highConfDeNovoChildren = new ArrayList<String>();
|
||||||
final List<String> lowConfDeNovoChildren = new ArrayList<String>();
|
final List<String> lowConfDeNovoChildren = new ArrayList<String>();
|
||||||
for ( final Trio trio : trios ) {
|
for ( final Trio trio : trios ) {
|
||||||
if ( contextHasTrioLikelihoods(vc,trio) ) {
|
if (vc.isBiallelic() && contextHasTrioLikelihoods(vc,trio) && mendelianViolation.isViolation(trio.getMother(),trio.getFather(),trio.getChild(),vc) )
|
||||||
if (mendelianViolation.isViolation(trio.getMother(),trio.getFather(),trio.getChild(),vc)) {
|
{
|
||||||
if (mendelianViolation.getParentsRefRefChildHet() > 0) {
|
if (mendelianViolation.getParentsRefRefChildHet() > 0) {
|
||||||
if (vc.getGenotype(trio.getChildID()).getGQ() > GQ_threshold && vc.getGenotype(trio.getMaternalID()).getGQ() > GQ_threshold && vc.getGenotype(trio.getPaternalID()).getGQ() > GQ_threshold) {
|
if ((vc.getGenotype(trio.getChildID()).getGQ() >= hi_GQ_threshold) && (vc.getGenotype(trio.getMaternalID()).getGQ()) >= hi_GQ_threshold && (vc.getGenotype(trio.getPaternalID()).getGQ() >= hi_GQ_threshold))
|
||||||
|
{
|
||||||
highConfDeNovoChildren.add(trio.getChildID());
|
highConfDeNovoChildren.add(trio.getChildID());
|
||||||
isHighConfDeNovo = true;
|
isHighConfDeNovo = true;
|
||||||
}
|
}
|
||||||
else {
|
else if ((vc.getGenotype(trio.getChildID()).getGQ() >= lo_GQ_threshold) && (vc.getGenotype(trio.getMaternalID()).getGQ()) > 0 && (vc.getGenotype(trio.getPaternalID()).getGQ() > 0))
|
||||||
|
{
|
||||||
lowConfDeNovoChildren.add(trio.getChildID());
|
lowConfDeNovoChildren.add(trio.getChildID());
|
||||||
isLowConfDeNovo = true;
|
isLowConfDeNovo = true;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
if ( isHighConfDeNovo || isLowConfDeNovo ) {
|
final double percentNumberOfSamplesCutoff = vc.getNSamples()*percentOfSamplesCutoff;
|
||||||
for(final String child : highConfDeNovoChildren) {
|
final double AFcutoff = Math.max(flatNumberOfSamplesCutoff,percentNumberOfSamplesCutoff);
|
||||||
attributeMap.put(HI_CONF_DENOVO_KEY,child);
|
final int deNovoAlleleCount = vc.getCalledChrCount(vc.getAlternateAllele(0)); //we assume we're biallelic above so use the first alt
|
||||||
}
|
if ( isHighConfDeNovo && deNovoAlleleCount < AFcutoff )
|
||||||
for(final String child : lowConfDeNovoChildren) {
|
attributeMap.put(HI_CONF_DENOVO_KEY,highConfDeNovoChildren);
|
||||||
attributeMap.put(LO_CONF_DENOVO_KEY,child);
|
if ( isLowConfDeNovo && deNovoAlleleCount < AFcutoff )
|
||||||
}
|
attributeMap.put(LO_CONF_DENOVO_KEY,lowConfDeNovoChildren);
|
||||||
}
|
|
||||||
return attributeMap;
|
return attributeMap;
|
||||||
}
|
}
|
||||||
|
|
||||||
// return the descriptions used for the VCF INFO meta field
|
// return the descriptions used for the VCF INFO meta field
|
||||||
public List<String> getKeyNames() { return Arrays.asList(HI_CONF_DENOVO_KEY,LO_CONF_DENOVO_KEY); }
|
public List<String> getKeyNames() { return Arrays.asList(HI_CONF_DENOVO_KEY,LO_CONF_DENOVO_KEY); }
|
||||||
|
|
||||||
public List<VCFInfoHeaderLine> getDescriptions() { return Arrays.asList(new VCFInfoHeaderLine(HI_CONF_DENOVO_KEY, 1, VCFHeaderLineType.String, "High confidence possible de novo mutation (GQ > "+GQ_threshold+"): sample name"),
|
public List<VCFInfoHeaderLine> getDescriptions() { return Arrays.asList(new VCFInfoHeaderLine(HI_CONF_DENOVO_KEY, 1, VCFHeaderLineType.String, "High confidence possible de novo mutation (GQ >= "+hi_GQ_threshold+" for all trio members)=[comma-delimited list of child samples]"),
|
||||||
new VCFInfoHeaderLine(LO_CONF_DENOVO_KEY, 1, VCFHeaderLineType.String, "Low confidence possible de novo mutation: sample name")); }
|
new VCFInfoHeaderLine(LO_CONF_DENOVO_KEY, 1, VCFHeaderLineType.String, "Low confidence possible de novo mutation (GQ >= "+lo_GQ_threshold+" for child, GQ > 0 for parents)=[comma-delimited list of child samples]")); }
|
||||||
|
|
||||||
|
|
||||||
private boolean contextHasTrioLikelihoods(VariantContext context, Trio trio) {
|
private boolean contextHasTrioLikelihoods(VariantContext context, Trio trio) {
|
||||||
|
|
|
||||||
|
|
@ -113,15 +113,19 @@ import java.util.*;
|
||||||
* 1) Genotype posteriors added to the genotype fields ("PP")
|
* 1) Genotype posteriors added to the genotype fields ("PP")
|
||||||
* 2) Genotypes and GQ assigned according to these posteriors
|
* 2) Genotypes and GQ assigned according to these posteriors
|
||||||
* 3) Per-site genotype priors added to the INFO field ("PG")
|
* 3) Per-site genotype priors added to the INFO field ("PG")
|
||||||
* 4) (Optional) Per-site, per-trio transmission probabilities given as Phred-scaled probability of all genotypes in the trio being correct, added to the genotype fields ("TP")
|
* 4) (Optional) Per-site, per-trio joint likelihoods (JL) and joint posteriors (JL) given as Phred-scaled probability
|
||||||
|
* of all genotypes in the trio being correct based on the PLs for JL and the PPs for JP. These annotations are added to
|
||||||
|
* the genotype fields.
|
||||||
* </p>
|
* </p>
|
||||||
*
|
*
|
||||||
* <h3>Notes</h3>
|
* <h3>Notes</h3>
|
||||||
* <p>
|
* <p>
|
||||||
* Currently, priors will only be applied for SNP sites in the input callset (and only those that have a SNP at the
|
* Using the default behavior, priors will only be applied for each variants (provided each variant has at least 10
|
||||||
* matching site in the priors VCF unless the --calculateMissingPriors flag is used).
|
* called samples.) SNP sites in the input callset that have a SNP at the matching site in the supporting VCF will have
|
||||||
* If the site is not called in the priors, flat priors will be applied. Flat priors are also applied for any non-SNP
|
* priors applied based on the AC from the supporting samples and the input callset (unless the --ignoreInputSamples
|
||||||
* sites in the input callset.
|
* flag is used). If the site is not called in the supporting VCF, priors will be applied using the discovered AC from
|
||||||
|
* the input samples (unless the --discoveredACpriorsOff flag is used). Flat priors are applied for any non-SNP sites in
|
||||||
|
* the input callset.
|
||||||
* </p>
|
* </p>
|
||||||
*
|
*
|
||||||
* <h3>Examples</h3>
|
* <h3>Examples</h3>
|
||||||
|
|
@ -234,8 +238,9 @@ public class CalculateGenotypePosteriors extends RodWalker<Integer,Integer> {
|
||||||
/**
|
/**
|
||||||
* Calculate priors for missing external variants from sample data -- default behavior is to apply flat priors
|
* Calculate priors for missing external variants from sample data -- default behavior is to apply flat priors
|
||||||
*/
|
*/
|
||||||
@Argument(fullName="calculateMissingPriors",shortName="calcMissing",doc="Use discovered allele frequency in the callset for variants that do no appear in the external callset", required=false)
|
@Argument(fullName="discoveredACpriorsOff",shortName="useACoff",doc="Do not use discovered allele count in the input callset " +
|
||||||
public boolean calcMissing = false;
|
"for variants that do not appear in the external callset. ", required=false)
|
||||||
|
public boolean useACoff = false;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Skip application of population-based priors
|
* Skip application of population-based priors
|
||||||
|
|
@ -252,7 +257,8 @@ public class CalculateGenotypePosteriors extends RodWalker<Integer,Integer> {
|
||||||
@Output(doc="File to which variants should be written")
|
@Output(doc="File to which variants should be written")
|
||||||
protected VariantContextWriter vcfWriter = null;
|
protected VariantContextWriter vcfWriter = null;
|
||||||
|
|
||||||
private final String TRANSMISSION_PROBABILITY_TAG_NAME = "TP";
|
private final String JOINT_LIKELIHOOD_TAG_NAME = "JL";
|
||||||
|
private final String JOINT_POSTERIOR_TAG_NAME = "JP";
|
||||||
private final String PHRED_SCALED_POSTERIORS_KEY = "PP";
|
private final String PHRED_SCALED_POSTERIORS_KEY = "PP";
|
||||||
|
|
||||||
private FamilyLikelihoodsUtils famUtils = new FamilyLikelihoodsUtils();
|
private FamilyLikelihoodsUtils famUtils = new FamilyLikelihoodsUtils();
|
||||||
|
|
@ -298,8 +304,10 @@ public class CalculateGenotypePosteriors extends RodWalker<Integer,Integer> {
|
||||||
final Set<VCFHeaderLine> headerLines = VCFUtils.smartMergeHeaders(vcfRods.values(), true);
|
final Set<VCFHeaderLine> headerLines = VCFUtils.smartMergeHeaders(vcfRods.values(), true);
|
||||||
headerLines.add(new VCFFormatHeaderLine(PHRED_SCALED_POSTERIORS_KEY, VCFHeaderLineCount.G, VCFHeaderLineType.Integer, "Phred-scaled Posterior Genotype Probabilities"));
|
headerLines.add(new VCFFormatHeaderLine(PHRED_SCALED_POSTERIORS_KEY, VCFHeaderLineCount.G, VCFHeaderLineType.Integer, "Phred-scaled Posterior Genotype Probabilities"));
|
||||||
headerLines.add(new VCFInfoHeaderLine("PG", VCFHeaderLineCount.G, VCFHeaderLineType.Integer, "Genotype Likelihood Prior"));
|
headerLines.add(new VCFInfoHeaderLine("PG", VCFHeaderLineCount.G, VCFHeaderLineType.Integer, "Genotype Likelihood Prior"));
|
||||||
if (!skipFamilyPriors)
|
if (!skipFamilyPriors) {
|
||||||
headerLines.add(new VCFFormatHeaderLine(TRANSMISSION_PROBABILITY_TAG_NAME, 1, VCFHeaderLineType.Integer, "Phred score of the genotype combination and phase given that the genotypes are correct"));
|
headerLines.add(new VCFFormatHeaderLine(JOINT_LIKELIHOOD_TAG_NAME, 1, VCFHeaderLineType.Integer, "Phred-scaled joint likelihood of the genotype combination (before applying family priors)"));
|
||||||
|
headerLines.add(new VCFFormatHeaderLine(JOINT_POSTERIOR_TAG_NAME, 1, VCFHeaderLineType.Integer, "Phred-scaled joint posterior probability of the genotype combination (after applying family priors)"));
|
||||||
|
}
|
||||||
headerLines.add(new VCFHeaderLine("source", "CalculateGenotypePosteriors"));
|
headerLines.add(new VCFHeaderLine("source", "CalculateGenotypePosteriors"));
|
||||||
|
|
||||||
vcfWriter.writeHeader(new VCFHeader(headerLines, vcfSamples));
|
vcfWriter.writeHeader(new VCFHeader(headerLines, vcfSamples));
|
||||||
|
|
@ -331,12 +339,14 @@ public class CalculateGenotypePosteriors extends RodWalker<Integer,Integer> {
|
||||||
GenotypesContext gc = famUtils.calculatePosteriorGLs(vc);
|
GenotypesContext gc = famUtils.calculatePosteriorGLs(vc);
|
||||||
builder.genotypes(gc);
|
builder.genotypes(gc);
|
||||||
}
|
}
|
||||||
|
VariantContextUtils.calculateChromosomeCounts(builder, false);
|
||||||
vc_familyPriors = builder.make();
|
vc_familyPriors = builder.make();
|
||||||
|
|
||||||
if (!skipPopulationPriors)
|
if (!skipPopulationPriors)
|
||||||
vc_bothPriors = PosteriorLikelihoodsUtils.calculatePosteriorGLs(vc_familyPriors, otherVCs, missing * numRefIfMissing, globalPrior, !ignoreInputSamples, defaultToAC, calcMissing);
|
vc_bothPriors = PosteriorLikelihoodsUtils.calculatePosteriorGLs(vc_familyPriors, otherVCs, missing * numRefIfMissing, globalPrior, !ignoreInputSamples, defaultToAC, useACoff);
|
||||||
else {
|
else {
|
||||||
final VariantContextBuilder builder2 = new VariantContextBuilder(vc_familyPriors);
|
final VariantContextBuilder builder2 = new VariantContextBuilder(vc_familyPriors);
|
||||||
|
VariantContextUtils.calculateChromosomeCounts(builder, false);
|
||||||
vc_bothPriors = builder2.make();
|
vc_bothPriors = builder2.make();
|
||||||
}
|
}
|
||||||
vcfWriter.add(vc_bothPriors);
|
vcfWriter.add(vc_bothPriors);
|
||||||
|
|
|
||||||
|
|
@ -47,7 +47,6 @@
|
||||||
package org.broadinstitute.gatk.tools.walkers.variantutils;
|
package org.broadinstitute.gatk.tools.walkers.variantutils;
|
||||||
|
|
||||||
import org.apache.log4j.Logger;
|
import org.apache.log4j.Logger;
|
||||||
import org.broadinstitute.gatk.engine.GenomeAnalysisEngine;
|
|
||||||
import org.broadinstitute.gatk.engine.samples.Sample;
|
import org.broadinstitute.gatk.engine.samples.Sample;
|
||||||
import org.broadinstitute.gatk.utils.MathUtils;
|
import org.broadinstitute.gatk.utils.MathUtils;
|
||||||
import org.broadinstitute.gatk.utils.QualityUtils;
|
import org.broadinstitute.gatk.utils.QualityUtils;
|
||||||
|
|
@ -57,6 +56,7 @@ import org.broadinstitute.gatk.utils.variant.GATKVariantContextUtils;
|
||||||
import htsjdk.variant.utils.GeneralUtils;
|
import htsjdk.variant.utils.GeneralUtils;
|
||||||
import htsjdk.variant.variantcontext.*;
|
import htsjdk.variant.variantcontext.*;
|
||||||
|
|
||||||
|
import java.util.Arrays;
|
||||||
import java.util.*;
|
import java.util.*;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
|
@ -70,239 +70,180 @@ public class FamilyLikelihoodsUtils {
|
||||||
final private EnumMap<GenotypeType,EnumMap<GenotypeType,EnumMap<GenotypeType,Integer>>> mvCountMatrix =
|
final private EnumMap<GenotypeType,EnumMap<GenotypeType,EnumMap<GenotypeType,Integer>>> mvCountMatrix =
|
||||||
new EnumMap<GenotypeType,EnumMap<GenotypeType,EnumMap<GenotypeType,Integer>>>(GenotypeType.class);
|
new EnumMap<GenotypeType,EnumMap<GenotypeType,EnumMap<GenotypeType,Integer>>>(GenotypeType.class);
|
||||||
|
|
||||||
//Matrix of allele transmission
|
final int NUM_CALLED_GENOTYPETYPES = 3; //HOM_REF, HET, and HOM_VAR
|
||||||
final private EnumMap<GenotypeType,EnumMap<GenotypeType,EnumMap<GenotypeType,TrioGenotypes>>> transmissionMatrix =
|
|
||||||
new EnumMap<GenotypeType,EnumMap<GenotypeType,EnumMap<GenotypeType,TrioGenotypes>>>(GenotypeType.class);
|
|
||||||
|
|
||||||
final double[] configurationLikelihoodsMatrix = {0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,
|
double[] configurationLikelihoodsMatrix = new double[NUM_CALLED_GENOTYPETYPES*NUM_CALLED_GENOTYPETYPES*NUM_CALLED_GENOTYPETYPES];
|
||||||
0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,
|
|
||||||
0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0}; //27 is # of trio genotype combos, initialize to zero
|
|
||||||
|
|
||||||
ArrayList<Sample> trios = new ArrayList<Sample>();
|
ArrayList<Sample> trios = new ArrayList<Sample>();
|
||||||
|
|
||||||
//Random number generator
|
private final String JOINT_LIKELIHOOD_TAG_NAME = "JL";
|
||||||
final private Random rand = new GenomeAnalysisEngine().getRandomGenerator();
|
private final String JOINT_POSTERIOR_TAG_NAME = "JP";
|
||||||
|
|
||||||
private final String TRANSMISSION_PROBABILITY_TAG_NAME = "TP";
|
|
||||||
private final String PHRED_SCALED_POSTERIORS_KEY = "PP";
|
private final String PHRED_SCALED_POSTERIORS_KEY = "PP";
|
||||||
|
|
||||||
public final double NO_TRANSMISSION_PROB = -1.0;
|
public final double NO_JOINT_VALUE = -1.0;
|
||||||
|
|
||||||
private double deNovoPrior = 1e-8;
|
private double deNovoPrior = 1e-8;
|
||||||
|
|
||||||
|
private final double ONE_THIRD = 0.333333333333333333;
|
||||||
|
private final double LOG10_OF_ONE_THIRD = -0.4771213;
|
||||||
|
|
||||||
private enum FamilyMember {
|
private enum FamilyMember {
|
||||||
MOTHER,
|
MOTHER,
|
||||||
FATHER,
|
FATHER,
|
||||||
CHILD
|
CHILD
|
||||||
}
|
}
|
||||||
|
|
||||||
//Stores a conceptual trio or parent/child pair genotype combination
|
/**
|
||||||
//This combination can then be "applied" to a given trio or pair using the getUpdatedGenotypes method.
|
* Applies the trio genotype combination to the given trio.
|
||||||
private class TrioGenotypes {
|
* @param motherGenotype: Original genotype of the mother
|
||||||
|
* @param fatherGenotype: Original genotype of the father
|
||||||
|
* @param childGenotype: Original genotype of the child
|
||||||
|
* @param updatedGenotypes: An ArrayList<Genotype> to which the newly updated genotypes are added in the following order: Mother, Father, Child
|
||||||
|
*/
|
||||||
|
public void getUpdatedGenotypes(final VariantContext vc, final Genotype motherGenotype, final Genotype fatherGenotype, final Genotype childGenotype, final ArrayList<Genotype> updatedGenotypes){
|
||||||
|
//genotypes here can be no call
|
||||||
|
boolean fatherIsCalled = fatherGenotype != null && hasCalledGT(fatherGenotype.getType());
|
||||||
|
boolean motherIsCalled = motherGenotype != null && hasCalledGT(motherGenotype.getType());
|
||||||
|
boolean childIsCalled = childGenotype != null && hasCalledGT(childGenotype.getType());
|
||||||
|
|
||||||
//Create 2 fake alleles
|
//default to posteriors equal to likelihoods (flat priors) in case input genotypes are not called
|
||||||
//The actual bases will never be used but the Genotypes created using the alleles will be.
|
double[] uninformativeLikelihoods = {ONE_THIRD, ONE_THIRD, ONE_THIRD};
|
||||||
private final Allele REF = Allele.create("A",true);
|
|
||||||
private final Allele VAR = Allele.create("A",false);
|
|
||||||
private final Allele NO_CALL = Allele.create(".",false);
|
|
||||||
private final String DUMMY_NAME = "DummySample";
|
|
||||||
private EnumMap<GenotypeType,EnumMap<GenotypeType,EnumMap<GenotypeType,Integer>>> MVcountMatrix;
|
|
||||||
|
|
||||||
private final EnumMap<FamilyMember,Genotype> familyGenotypes = new EnumMap<FamilyMember, Genotype>(FamilyMember.class);
|
double[] motherLikelihoods = motherIsCalled? GeneralUtils.normalizeFromLog10(motherGenotype.getLikelihoods().getAsVector()) : uninformativeLikelihoods;
|
||||||
|
double[] fatherLikelihoods = fatherIsCalled? GeneralUtils.normalizeFromLog10(fatherGenotype.getLikelihoods().getAsVector()) : uninformativeLikelihoods;
|
||||||
|
double[] childLikelihoods = childIsCalled? GeneralUtils.normalizeFromLog10(childGenotype.getLikelihoods().getAsVector()) : uninformativeLikelihoods;
|
||||||
|
|
||||||
/* Constructor: Creates a conceptual trio genotype combination from the given genotypes.
|
//these are also in log10 space
|
||||||
*/
|
double[] motherLog10Posteriors = getPosteriors(FamilyMember.MOTHER);
|
||||||
public TrioGenotypes(GenotypeType mother, GenotypeType father, GenotypeType child){
|
double[] fatherLog10Posteriors = getPosteriors(FamilyMember.FATHER);
|
||||||
familyGenotypes.put(FamilyMember.MOTHER, makeGenotype(mother));
|
double[] childLog10Posteriors = getPosteriors(FamilyMember.CHILD);
|
||||||
familyGenotypes.put(FamilyMember.FATHER, makeGenotype(father));
|
|
||||||
familyGenotypes.put(FamilyMember.CHILD, makeGenotype(child));
|
double[] motherPosteriors = GeneralUtils.normalizeFromLog10(motherLog10Posteriors);
|
||||||
}
|
double[] fatherPosteriors = GeneralUtils.normalizeFromLog10(fatherLog10Posteriors);
|
||||||
|
double[] childPosteriors = GeneralUtils.normalizeFromLog10(childLog10Posteriors);
|
||||||
private ArrayList<Allele> getAlleles(GenotypeType genotype){
|
|
||||||
final ArrayList<Allele> alleles = new ArrayList<Allele>(2);
|
|
||||||
if(genotype == GenotypeType.HOM_REF){
|
double jointPosteriorProbability = -1;
|
||||||
alleles.add(REF);
|
//jointTrioLikelihood is combined likelihoods (before prior) of best configuration after applying prior
|
||||||
alleles.add(REF);
|
double jointTrioLikelihood = -1;
|
||||||
}
|
if(childIsCalled && motherIsCalled && fatherIsCalled) {
|
||||||
else if(genotype == GenotypeType.HET){
|
jointTrioLikelihood = motherLikelihoods[MathUtils.maxElementIndex(motherPosteriors)]*fatherLikelihoods[MathUtils.maxElementIndex(fatherPosteriors)]*childLikelihoods[MathUtils.maxElementIndex(childPosteriors)];
|
||||||
alleles.add(REF);
|
jointPosteriorProbability = MathUtils.arrayMax(motherPosteriors)*MathUtils.arrayMax(fatherPosteriors)*MathUtils.arrayMax(childPosteriors);
|
||||||
alleles.add(VAR);
|
|
||||||
}
|
|
||||||
else if(genotype == GenotypeType.HOM_VAR){
|
|
||||||
alleles.add(VAR);
|
|
||||||
alleles.add(VAR);
|
|
||||||
}
|
|
||||||
else{
|
|
||||||
return null;
|
|
||||||
}
|
|
||||||
return alleles;
|
|
||||||
}
|
|
||||||
|
|
||||||
public void setMVcountMatrix(EnumMap<GenotypeType,EnumMap<GenotypeType,EnumMap<GenotypeType,Integer>>> inputMat) {
|
|
||||||
MVcountMatrix = inputMat;
|
|
||||||
}
|
|
||||||
|
|
||||||
private boolean hasCalledGT(GenotypeType genotype){
|
|
||||||
return genotype == GenotypeType.HOM_REF || genotype == GenotypeType.HET || genotype == GenotypeType.HOM_VAR;
|
|
||||||
}
|
|
||||||
|
|
||||||
//TODO: this was stupid stuff for phasing -- let's take it out
|
|
||||||
private Genotype makeGenotype(final GenotypeType type) {
|
|
||||||
return makeGenotype(getAlleles(type));
|
|
||||||
}
|
|
||||||
|
|
||||||
private Genotype makeGenotype(final List<Allele> alleles) {
|
|
||||||
final GenotypeBuilder gb = new GenotypeBuilder(DUMMY_NAME, alleles);
|
|
||||||
return gb.make();
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Applies the trio genotype combination to the given trio.
|
|
||||||
* @param ref: Reference allele
|
|
||||||
* @param alt: Alternate allele
|
|
||||||
* @param motherGenotype: Genotype of the mother in this trio genotype combination
|
|
||||||
* @param fatherGenotype: Genotype of the father in this trio genotype combination
|
|
||||||
* @param childGenotype: Genotype of the child in this trio genotype combination
|
|
||||||
* @param transmissionProb: Probability for this trio genotype combination to be correct (pass NO_TRANSMISSION_PROB if unavailable)
|
|
||||||
* @param configurationLikelihoodsMatrix: (Non-normalized) likelihoods for each trio genotype combination, for use in generating new PLs
|
|
||||||
* @param updatedGenotypes: An ArrayList<Genotype> to which the newly updated genotypes are added in the following order: Mother, Father, Child
|
|
||||||
*/
|
|
||||||
public void getUpdatedGenotypes(final Allele ref, final Allele alt, final Genotype motherGenotype, final Genotype fatherGenotype, final Genotype childGenotype, final double transmissionProb, double[] configurationLikelihoodsMatrix, final ArrayList<Genotype> updatedGenotypes){
|
|
||||||
//default to flat priors in case input genotypes are not called
|
|
||||||
double[] motherPosteriors = {1,1,1};
|
|
||||||
double[] fatherPosteriors = {1,1,1};
|
|
||||||
double[] childPosteriors = {1,1,1};
|
|
||||||
|
|
||||||
//genotypes here can be no call
|
|
||||||
boolean fatherIsCalled = fatherGenotype != null && hasCalledGT(fatherGenotype.getType());
|
|
||||||
boolean motherIsCalled = motherGenotype != null && hasCalledGT(motherGenotype.getType());
|
|
||||||
boolean childIsCalled = childGenotype != null && hasCalledGT(childGenotype.getType());
|
|
||||||
|
|
||||||
if (fatherIsCalled && childIsCalled) {
|
|
||||||
motherPosteriors = getPosteriors(FamilyMember.MOTHER);
|
|
||||||
}
|
|
||||||
updatedGenotypes.add(getUpdatedGenotype(ref, alt, motherGenotype, transmissionProb, motherPosteriors));
|
|
||||||
|
|
||||||
if (motherIsCalled && childIsCalled) {
|
|
||||||
fatherPosteriors = getPosteriors(FamilyMember.FATHER);
|
|
||||||
}
|
|
||||||
updatedGenotypes.add(getUpdatedGenotype(ref, alt, fatherGenotype, transmissionProb, fatherPosteriors));
|
|
||||||
|
|
||||||
if (motherIsCalled && fatherIsCalled) {
|
|
||||||
childPosteriors = getPosteriors(FamilyMember.CHILD);
|
|
||||||
}
|
|
||||||
updatedGenotypes.add(getUpdatedGenotype(ref, alt, childGenotype, transmissionProb, childPosteriors));
|
|
||||||
}
|
|
||||||
|
|
||||||
private Genotype getUpdatedGenotype(Allele refAllele, Allele altAllele, Genotype genotype, double transmissionProb, double[] normalizedPosteriors){
|
|
||||||
|
|
||||||
int phredScoreTransmission = -1;
|
|
||||||
if(transmissionProb != NO_TRANSMISSION_PROB){
|
|
||||||
double dphredScoreTransmission = QualityUtils.phredScaleLog10ErrorRate(Math.log10(1 - (transmissionProb)));
|
|
||||||
phredScoreTransmission = dphredScoreTransmission < Byte.MAX_VALUE ? (byte)dphredScoreTransmission : Byte.MAX_VALUE;
|
|
||||||
}
|
|
||||||
//Handle null, missing and unavailable genotypes
|
|
||||||
//Note that only cases where a null/missing/unavailable genotype was passed in the first place can lead to a null/missing/unavailable
|
|
||||||
//genotype so it is safe to return the original genotype in this case.
|
|
||||||
//In addition, if the genotype configuration confidence is 0, then return the original genotypes.
|
|
||||||
if(phredScoreTransmission ==0 || genotype == null || !hasCalledGT(genotype.getType()))
|
|
||||||
return genotype;
|
|
||||||
|
|
||||||
//Add the transmission probability
|
|
||||||
final Map<String, Object> genotypeAttributes = new HashMap<String, Object>();
|
|
||||||
genotypeAttributes.putAll(genotype.getExtendedAttributes());
|
|
||||||
if(transmissionProb>NO_TRANSMISSION_PROB)
|
|
||||||
genotypeAttributes.put(TRANSMISSION_PROBABILITY_TAG_NAME, phredScoreTransmission);
|
|
||||||
|
|
||||||
final ArrayList<Allele> usedAlleles = new ArrayList<Allele>(2);
|
|
||||||
usedAlleles.add(refAllele);
|
|
||||||
usedAlleles.add(altAllele);
|
|
||||||
|
|
||||||
final GenotypeBuilder builder = new GenotypeBuilder(genotype);
|
|
||||||
|
|
||||||
final double[] log10Posteriors = MathUtils.toLog10(normalizedPosteriors);
|
|
||||||
|
|
||||||
//note that there will there be times when posteriors don't agree with genotype predicted by configuration likelihoods
|
|
||||||
GATKVariantContextUtils.updateGenotypeAfterSubsetting(usedAlleles, builder,
|
|
||||||
GATKVariantContextUtils.GenotypeAssignmentMethod.USE_PLS_TO_ASSIGN, log10Posteriors, usedAlleles);
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
builder.attribute(PHRED_SCALED_POSTERIORS_KEY,
|
|
||||||
Utils.listFromPrimitives(GenotypeLikelihoods.fromLog10Likelihoods(log10Posteriors).getAsPLs()));
|
|
||||||
builder.attributes(genotypeAttributes);
|
|
||||||
return builder.make();
|
|
||||||
}
|
|
||||||
|
|
||||||
//marginalize over the configurationLikelihoodsMatrix and normalize to get the posteriors
|
|
||||||
private double[] getPosteriors(FamilyMember recalcInd) {
|
|
||||||
double marginalOverChangedHR, marginalOverChangedHET, marginalOverChangedHV;
|
|
||||||
marginalOverChangedHR = marginalOverChangedHET = marginalOverChangedHV = 0;
|
|
||||||
final double[] recalcPosteriors = new double[3];
|
|
||||||
|
|
||||||
GenotypeType[] calledTypes = {GenotypeType.HOM_REF, GenotypeType.HET, GenotypeType.HOM_VAR};
|
|
||||||
|
|
||||||
switch (recalcInd) {
|
|
||||||
case MOTHER:
|
|
||||||
for(final GenotypeType father : calledTypes) {
|
|
||||||
for(final GenotypeType child : calledTypes) {
|
|
||||||
GenotypeType mother;
|
|
||||||
mother = GenotypeType.HOM_REF;
|
|
||||||
marginalOverChangedHR += configurationLikelihoodsMatrix[getLikelihoodIndex(mother,father,child,false)];
|
|
||||||
mother = GenotypeType.HET;
|
|
||||||
marginalOverChangedHET += configurationLikelihoodsMatrix[getLikelihoodIndex(mother,father,child,false)];
|
|
||||||
mother = GenotypeType.HOM_VAR;
|
|
||||||
marginalOverChangedHV += configurationLikelihoodsMatrix[getLikelihoodIndex(mother,father,child,false)];
|
|
||||||
}
|
|
||||||
}
|
|
||||||
break;
|
|
||||||
case FATHER:
|
|
||||||
for(final GenotypeType mother : calledTypes){
|
|
||||||
for (final GenotypeType child : calledTypes){
|
|
||||||
GenotypeType father;
|
|
||||||
father = GenotypeType.HOM_REF;
|
|
||||||
marginalOverChangedHR += configurationLikelihoodsMatrix[getLikelihoodIndex(mother,father,child,false)];
|
|
||||||
father = GenotypeType.HET;
|
|
||||||
marginalOverChangedHET += configurationLikelihoodsMatrix[getLikelihoodIndex(mother,father,child,false)];
|
|
||||||
father = GenotypeType.HOM_VAR;
|
|
||||||
marginalOverChangedHV += configurationLikelihoodsMatrix[getLikelihoodIndex(mother,father,child,false)];
|
|
||||||
}
|
|
||||||
}
|
|
||||||
break;
|
|
||||||
case CHILD:
|
|
||||||
for(final GenotypeType mother : calledTypes){
|
|
||||||
for (final GenotypeType father: calledTypes){
|
|
||||||
GenotypeType child;
|
|
||||||
child = GenotypeType.HOM_REF;
|
|
||||||
marginalOverChangedHR += configurationLikelihoodsMatrix[getLikelihoodIndex(mother,father,child,false)];
|
|
||||||
child = GenotypeType.HET;
|
|
||||||
marginalOverChangedHET += configurationLikelihoodsMatrix[getLikelihoodIndex(mother,father,child,false)];
|
|
||||||
child = GenotypeType.HOM_VAR;
|
|
||||||
marginalOverChangedHV += configurationLikelihoodsMatrix[getLikelihoodIndex(mother,father,child,false)];
|
|
||||||
}
|
|
||||||
}
|
|
||||||
break;
|
|
||||||
default:
|
|
||||||
throw new UserException(String.format("%d does not indicate a valid trio individual -- use 0 for mother, 1 for father, 2 for child",recalcInd));
|
|
||||||
}
|
|
||||||
recalcPosteriors[0] = marginalOverChangedHR;
|
|
||||||
recalcPosteriors[1] = marginalOverChangedHET;
|
|
||||||
recalcPosteriors[2] = marginalOverChangedHV;
|
|
||||||
|
|
||||||
final double[] normalizedPosteriors = MathUtils.normalizeFromRealSpace(recalcPosteriors);
|
|
||||||
|
|
||||||
return normalizedPosteriors;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
updatedGenotypes.add(getUpdatedGenotype(vc, motherGenotype, jointTrioLikelihood, jointPosteriorProbability, motherLog10Posteriors));
|
||||||
|
updatedGenotypes.add(getUpdatedGenotype(vc, fatherGenotype, jointTrioLikelihood, jointPosteriorProbability, fatherLog10Posteriors));
|
||||||
|
updatedGenotypes.add(getUpdatedGenotype(vc, childGenotype, jointTrioLikelihood, jointPosteriorProbability, childLog10Posteriors));
|
||||||
}
|
}
|
||||||
|
|
||||||
public void initialize(double DNprior, Set<String> vcfSamples, Map<String,Set<Sample>> families){
|
private Genotype getUpdatedGenotype(final VariantContext vc, final Genotype genotype, final double jointLikelihood, final double jointPosteriorProb, final double[] log10Posteriors){
|
||||||
|
//Don't update null, missing or unavailable genotypes
|
||||||
|
if(genotype == null || !hasCalledGT(genotype.getType()))
|
||||||
|
return genotype;
|
||||||
|
|
||||||
|
int phredScaledJL = -1;
|
||||||
|
int phredScaledJP = -1;
|
||||||
|
if(jointLikelihood != NO_JOINT_VALUE){
|
||||||
|
double dphredScaledJL = QualityUtils.phredScaleLog10ErrorRate(Math.log10(1-jointLikelihood));
|
||||||
|
phredScaledJL = dphredScaledJL < Byte.MAX_VALUE ? (byte)dphredScaledJL : Byte.MAX_VALUE;
|
||||||
|
}
|
||||||
|
if(jointPosteriorProb != NO_JOINT_VALUE){
|
||||||
|
double dphredScaledJP = QualityUtils.phredScaleLog10ErrorRate(Math.log10(1-jointPosteriorProb));
|
||||||
|
phredScaledJP = dphredScaledJP < Byte.MAX_VALUE ? (byte)dphredScaledJP : Byte.MAX_VALUE;
|
||||||
|
}
|
||||||
|
|
||||||
|
//Add the joint trio calculations
|
||||||
|
final Map<String, Object> genotypeAttributes = new HashMap<String, Object>();
|
||||||
|
genotypeAttributes.putAll(genotype.getExtendedAttributes());
|
||||||
|
genotypeAttributes.put(JOINT_LIKELIHOOD_TAG_NAME, phredScaledJL);
|
||||||
|
genotypeAttributes.put(JOINT_POSTERIOR_TAG_NAME, phredScaledJP);
|
||||||
|
|
||||||
|
final GenotypeBuilder builder = new GenotypeBuilder(genotype);
|
||||||
|
|
||||||
|
//final double[] log10Posteriors = MathUtils.toLog10(normalizedPosteriors);
|
||||||
|
|
||||||
|
//update genotype types based on posteriors
|
||||||
|
GATKVariantContextUtils.updateGenotypeAfterSubsetting(vc.getAlleles(), builder,
|
||||||
|
GATKVariantContextUtils.GenotypeAssignmentMethod.USE_PLS_TO_ASSIGN, log10Posteriors, vc.getAlleles());
|
||||||
|
|
||||||
|
builder.attribute(PHRED_SCALED_POSTERIORS_KEY,
|
||||||
|
Utils.listFromPrimitives(GenotypeLikelihoods.fromLog10Likelihoods(log10Posteriors).getAsPLs()));
|
||||||
|
builder.attributes(genotypeAttributes);
|
||||||
|
return builder.make();
|
||||||
|
}
|
||||||
|
|
||||||
|
//marginalize over the configurationLikelihoodsMatrix and normalize to get the posteriors
|
||||||
|
private double[] getPosteriors(final FamilyMember recalcInd) {
|
||||||
|
double[] marginalOverChangedHR = new double[NUM_CALLED_GENOTYPETYPES*NUM_CALLED_GENOTYPETYPES];
|
||||||
|
double[] marginalOverChangedHET = new double[NUM_CALLED_GENOTYPETYPES*NUM_CALLED_GENOTYPETYPES];
|
||||||
|
double[] marginalOverChangedHV = new double[NUM_CALLED_GENOTYPETYPES*NUM_CALLED_GENOTYPETYPES];
|
||||||
|
final double[] recalcPosteriors = new double[NUM_CALLED_GENOTYPETYPES];
|
||||||
|
|
||||||
|
final GenotypeType[] calledTypes = {GenotypeType.HOM_REF, GenotypeType.HET, GenotypeType.HOM_VAR};
|
||||||
|
int counter = 0;
|
||||||
|
|
||||||
|
switch (recalcInd) {
|
||||||
|
case MOTHER:
|
||||||
|
for(final GenotypeType father : calledTypes) {
|
||||||
|
for(final GenotypeType child : calledTypes) {
|
||||||
|
GenotypeType mother;
|
||||||
|
mother = GenotypeType.HOM_REF;
|
||||||
|
marginalOverChangedHR[counter] = configurationLikelihoodsMatrix[getLikelihoodMatrixIndex(mother, father, child)];
|
||||||
|
mother = GenotypeType.HET;
|
||||||
|
marginalOverChangedHET[counter] = configurationLikelihoodsMatrix[getLikelihoodMatrixIndex(mother, father, child)];
|
||||||
|
mother = GenotypeType.HOM_VAR;
|
||||||
|
marginalOverChangedHV[counter] = configurationLikelihoodsMatrix[getLikelihoodMatrixIndex(mother, father, child)];
|
||||||
|
counter++;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
break;
|
||||||
|
case FATHER:
|
||||||
|
for(final GenotypeType mother : calledTypes){
|
||||||
|
for (final GenotypeType child : calledTypes){
|
||||||
|
GenotypeType father;
|
||||||
|
father = GenotypeType.HOM_REF;
|
||||||
|
marginalOverChangedHR[counter] = configurationLikelihoodsMatrix[getLikelihoodMatrixIndex(mother, father, child)];
|
||||||
|
father = GenotypeType.HET;
|
||||||
|
marginalOverChangedHET[counter] = configurationLikelihoodsMatrix[getLikelihoodMatrixIndex(mother, father, child)];
|
||||||
|
father = GenotypeType.HOM_VAR;
|
||||||
|
marginalOverChangedHV[counter] = configurationLikelihoodsMatrix[getLikelihoodMatrixIndex(mother, father, child)];
|
||||||
|
counter++;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
break;
|
||||||
|
case CHILD:
|
||||||
|
for(final GenotypeType mother : calledTypes){
|
||||||
|
for (final GenotypeType father: calledTypes){
|
||||||
|
GenotypeType child;
|
||||||
|
child = GenotypeType.HOM_REF;
|
||||||
|
marginalOverChangedHR[counter] = configurationLikelihoodsMatrix[getLikelihoodMatrixIndex(mother, father, child)];
|
||||||
|
child = GenotypeType.HET;
|
||||||
|
marginalOverChangedHET[counter] = configurationLikelihoodsMatrix[getLikelihoodMatrixIndex(mother, father, child)];
|
||||||
|
child = GenotypeType.HOM_VAR;
|
||||||
|
marginalOverChangedHV[counter] = configurationLikelihoodsMatrix[getLikelihoodMatrixIndex(mother, father, child)];
|
||||||
|
counter++;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
break;
|
||||||
|
default:
|
||||||
|
throw new UserException(String.format("%d does not indicate a valid trio FamilyMember -- use 0 for mother, 1 for father, 2 for child",recalcInd));
|
||||||
|
}
|
||||||
|
|
||||||
|
recalcPosteriors[0] = MathUtils.log10sumLog10(marginalOverChangedHR,0);
|
||||||
|
recalcPosteriors[1] = MathUtils.log10sumLog10(marginalOverChangedHET,0);
|
||||||
|
recalcPosteriors[2] = MathUtils.log10sumLog10(marginalOverChangedHV,0);
|
||||||
|
|
||||||
|
return MathUtils.normalizeFromLog10(recalcPosteriors,true,true);
|
||||||
|
}
|
||||||
|
|
||||||
|
public void initialize(final double DNprior, final Set<String> vcfSamples, final Map<String,Set<Sample>> families){
|
||||||
this.deNovoPrior = DNprior;
|
this.deNovoPrior = DNprior;
|
||||||
|
Arrays.fill(configurationLikelihoodsMatrix,0);
|
||||||
buildMatrices();
|
buildMatrices();
|
||||||
trios = setTrios(vcfSamples, families);
|
trios = setTrios(vcfSamples, families);
|
||||||
}
|
}
|
||||||
|
|
||||||
public GenotypesContext calculatePosteriorGLs(VariantContext vc){
|
public GenotypesContext calculatePosteriorGLs(final VariantContext vc){
|
||||||
final GenotypesContext genotypesContext = GenotypesContext.copy(vc.getGenotypes());
|
final GenotypesContext genotypesContext = GenotypesContext.copy(vc.getGenotypes());
|
||||||
|
|
||||||
for (final Sample sample : trios) {
|
for (final Sample sample : trios) {
|
||||||
|
|
@ -312,30 +253,28 @@ public class FamilyLikelihoodsUtils {
|
||||||
|
|
||||||
//Keep only trios and parent/child pairs
|
//Keep only trios and parent/child pairs
|
||||||
if(mother == null && father == null || child == null) {
|
if(mother == null && father == null || child == null) {
|
||||||
logger.warn("null genos in var "+vc.toStringDecodeGenotypes());
|
logger.warn("Null genotypes in variant: "+vc.toStringDecodeGenotypes());
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
|
|
||||||
final ArrayList<Genotype> trioGenotypes = new ArrayList<Genotype>(3);
|
final ArrayList<Genotype> trioGenotypes = new ArrayList<Genotype>(3);
|
||||||
final int mvCount = updateFamilyGenotypes(vc.getReference(), vc.getAltAlleleWithHighestAlleleCount(), mother, father, child, trioGenotypes);
|
updateFamilyGenotypes(vc, mother, father, child, trioGenotypes);
|
||||||
|
|
||||||
Genotype updatedMother = trioGenotypes.get(0);
|
//replace uses sample names to match genotypes, so order doesn't matter
|
||||||
Genotype updatedFather = trioGenotypes.get(1);
|
if (trioGenotypes.size() > 0) {
|
||||||
Genotype updatedChild = trioGenotypes.get(2);
|
genotypesContext.replace(trioGenotypes.get(0));
|
||||||
|
genotypesContext.replace(trioGenotypes.get(1));
|
||||||
genotypesContext.replace(updatedChild);
|
genotypesContext.replace(trioGenotypes.get(2));
|
||||||
genotypesContext.replace(updatedFather);
|
}
|
||||||
genotypesContext.replace(updatedMother);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
return genotypesContext;
|
return genotypesContext;
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Select trios and parent/child pairs only
|
* Select trios and parent/child pairs only
|
||||||
*/
|
*/
|
||||||
private ArrayList<Sample> setTrios(Set<String> vcfSamples, Map<String,Set<Sample>> families){
|
private ArrayList<Sample> setTrios(Set<String> vcfSamples, Map<String,Set<Sample>> families){
|
||||||
|
|
||||||
Set<Sample> family;
|
Set<Sample> family;
|
||||||
ArrayList<Sample> parents;
|
ArrayList<Sample> parents;
|
||||||
final ArrayList<Sample> trios = new ArrayList<Sample>();
|
final ArrayList<Sample> trios = new ArrayList<Sample>();
|
||||||
|
|
@ -365,18 +304,14 @@ public class FamilyLikelihoodsUtils {
|
||||||
return trios;
|
return trios;
|
||||||
}
|
}
|
||||||
|
|
||||||
//Create the transmission matrices
|
//Create a lookup matrix to find the number of MVs for each family genotype combination
|
||||||
//TODO: pass in the real genotypes so we have that info
|
|
||||||
private void buildMatrices(){
|
private void buildMatrices(){
|
||||||
for(final GenotypeType mother : GenotypeType.values()){
|
for(final GenotypeType mother : GenotypeType.values()){
|
||||||
mvCountMatrix.put(mother,new EnumMap<GenotypeType,EnumMap<GenotypeType,Integer>>(GenotypeType.class));
|
mvCountMatrix.put(mother,new EnumMap<GenotypeType,EnumMap<GenotypeType,Integer>>(GenotypeType.class));
|
||||||
transmissionMatrix.put(mother,new EnumMap<GenotypeType,EnumMap<GenotypeType,TrioGenotypes>>(GenotypeType.class));
|
|
||||||
for(final GenotypeType father : GenotypeType.values()){
|
for(final GenotypeType father : GenotypeType.values()){
|
||||||
mvCountMatrix.get(mother).put(father,new EnumMap<GenotypeType, Integer>(GenotypeType.class));
|
mvCountMatrix.get(mother).put(father,new EnumMap<GenotypeType, Integer>(GenotypeType.class));
|
||||||
transmissionMatrix.get(mother).put(father,new EnumMap<GenotypeType,TrioGenotypes>(GenotypeType.class));
|
|
||||||
for(final GenotypeType child : GenotypeType.values()){
|
for(final GenotypeType child : GenotypeType.values()){
|
||||||
mvCountMatrix.get(mother).get(father).put(child, getCombinationMVCount(mother, father, child));
|
mvCountMatrix.get(mother).get(father).put(child, getCombinationMVCount(mother, father, child));
|
||||||
transmissionMatrix.get(mother).get(father).put(child,new TrioGenotypes(mother,father,child));
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
@ -442,179 +377,54 @@ public class FamilyLikelihoodsUtils {
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Updates the genotypes of the given trio. If one of the parents is null, it is considered a parent/child pair.
|
* Updates the genotypes of the given trio. If one of the parents is null, it is considered a parent/child pair.
|
||||||
* @param ref: Reference allele
|
* @param vc: Input variant context
|
||||||
* @param alt: Alternative allele
|
|
||||||
* @param mother: Mother's genotype from vc input
|
* @param mother: Mother's genotype from vc input
|
||||||
* @param father: Father's genotype from vc input
|
* @param father: Father's genotype from vc input
|
||||||
* @param child: Child's genotype from vc input
|
* @param child: Child's genotype from vc input
|
||||||
* @param finalGenotypes: An ArrayList<Genotype> containing the updated genotypes
|
* @param finalGenotypes: An ArrayList<Genotype> containing the updated genotypes
|
||||||
* @return
|
|
||||||
*/
|
*/
|
||||||
private int updateFamilyGenotypes(Allele ref, Allele alt, Genotype mother, Genotype father, Genotype child, ArrayList<Genotype> finalGenotypes) {
|
private void updateFamilyGenotypes(VariantContext vc, Genotype mother, Genotype father, Genotype child, ArrayList<Genotype> finalGenotypes) {
|
||||||
|
|
||||||
//Check whether it is a pair or trio
|
//If one of the parents is not called, fill in with uninformative likelihoods
|
||||||
//Always assign the first parent as the parent having genotype information in pairs
|
Map<GenotypeType,Double> motherLikelihoods = getLikelihoodsAsMapSafeNull(mother);
|
||||||
//Always assign the mother as the first parent in trios
|
Map<GenotypeType,Double> fatherLikelihoods = getLikelihoodsAsMapSafeNull(father);
|
||||||
int parentsCalled = 0;
|
|
||||||
Map<GenotypeType,Double> firstParentLikelihoods;
|
|
||||||
Map<GenotypeType,Double> secondParentLikelihoods;
|
|
||||||
final ArrayList<GenotypeType> bestFirstParentGenotype = new ArrayList<GenotypeType>();
|
|
||||||
final ArrayList<GenotypeType> bestSecondParentGenotype = new ArrayList<GenotypeType>();
|
|
||||||
final ArrayList<GenotypeType> bestChildGenotype = new ArrayList<GenotypeType>();
|
|
||||||
GenotypeType pairSecondParentGenotype = null;
|
|
||||||
boolean parentsAreFlipped = false; //usually mother comes first, like for indexing of transmissionMatrix
|
|
||||||
final int INVALID_INDEX = -1;
|
|
||||||
|
|
||||||
//if only one parent is called, make uncalled parent the secondParent
|
|
||||||
if(mother == null || !mother.isCalled()){
|
|
||||||
firstParentLikelihoods = getLikelihoodsAsMapSafeNull(father);
|
|
||||||
secondParentLikelihoods = getLikelihoodsAsMapSafeNull(mother);
|
|
||||||
bestFirstParentGenotype.add(getTypeSafeNull(father));
|
|
||||||
bestSecondParentGenotype.add(getTypeSafeNull(mother));
|
|
||||||
pairSecondParentGenotype = mother == null ? GenotypeType.UNAVAILABLE : mother.getType();
|
|
||||||
parentsAreFlipped = true;
|
|
||||||
if(father != null && father.isCalled())
|
|
||||||
parentsCalled = 1;
|
|
||||||
}
|
|
||||||
else{
|
|
||||||
firstParentLikelihoods = getLikelihoodsAsMapSafeNull(mother);
|
|
||||||
secondParentLikelihoods = getLikelihoodsAsMapSafeNull(father);
|
|
||||||
bestFirstParentGenotype.add(getTypeSafeNull(mother));
|
|
||||||
bestSecondParentGenotype.add(getTypeSafeNull(father));
|
|
||||||
if(father == null || !father.isCalled()){
|
|
||||||
parentsCalled = 1;
|
|
||||||
pairSecondParentGenotype = father == null ? GenotypeType.UNAVAILABLE : father.getType();
|
|
||||||
}else{
|
|
||||||
parentsCalled = 2;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
Map<GenotypeType,Double> childLikelihoods = getLikelihoodsAsMapSafeNull(child);
|
Map<GenotypeType,Double> childLikelihoods = getLikelihoodsAsMapSafeNull(child);
|
||||||
bestChildGenotype.add(getTypeSafeNull(child));
|
|
||||||
|
|
||||||
//Prior vars
|
//if the child isn't called or neither parent is called, there's no extra inheritance information in that trio so return
|
||||||
double bestConfigurationLikelihood = 0.0;
|
if (!hasCalledGT(child.getType()) || (!hasCalledGT(mother.getType()) && !hasCalledGT(father.getType())))
|
||||||
double norm = 0.0;
|
return;
|
||||||
int configuration_index =0;
|
|
||||||
final ArrayList<Integer> bestMVCount = new ArrayList<Integer>();
|
|
||||||
bestMVCount.add(0);
|
|
||||||
|
|
||||||
//Get the most likely combination
|
//Fill the configurationLikelihoodsMatrix for each genotype combination
|
||||||
//Only check for most likely combination if at least a parent and the child have genotypes
|
|
||||||
int matInd;
|
int matInd;
|
||||||
if(child.isCalled() && parentsCalled > 0){
|
int mvCount;
|
||||||
int mvCount;
|
double jointLikelihood;
|
||||||
int cumulativeMVCount = 0;
|
double mvCoeff;
|
||||||
double configurationLikelihood = 0;
|
double configurationLikelihood;
|
||||||
for(final Map.Entry<GenotypeType,Double> childGenotype :
|
for(final Map.Entry<GenotypeType,Double> childGenotype :
|
||||||
childLikelihoods.entrySet()){
|
childLikelihoods.entrySet()){
|
||||||
for(final Map.Entry<GenotypeType,Double> firstParentGenotype :
|
for(final Map.Entry<GenotypeType,Double> motherGenotype :
|
||||||
firstParentLikelihoods.entrySet()){
|
motherLikelihoods.entrySet()){
|
||||||
for(final Map.Entry<GenotypeType,Double> secondParentGenotype :
|
for(final Map.Entry<GenotypeType,Double> fatherGenotype :
|
||||||
secondParentLikelihoods.entrySet()){
|
fatherLikelihoods.entrySet()){
|
||||||
mvCount = mvCountMatrix.get(firstParentGenotype.getKey()).get(secondParentGenotype.getKey()).get(childGenotype.getKey());
|
mvCount = mvCountMatrix.get(motherGenotype.getKey()).get(fatherGenotype.getKey()).get(childGenotype.getKey());
|
||||||
//For parent/child pairs, sum over the possible genotype configurations of the missing parent
|
jointLikelihood = motherGenotype.getValue()+fatherGenotype.getValue()+childGenotype.getValue();
|
||||||
if(parentsCalled<2){
|
mvCoeff = mvCount>0 ? Math.pow(deNovoPrior,mvCount) : (1.0-10*deNovoPrior-deNovoPrior*deNovoPrior);
|
||||||
cumulativeMVCount += mvCount;
|
configurationLikelihood = Math.log10(mvCoeff) + jointLikelihood;
|
||||||
configurationLikelihood += mvCount>0 ? Math.pow(deNovoPrior,mvCount)*firstParentGenotype.getValue()*secondParentGenotype.getValue()*childGenotype.getValue() : (1.0-10*deNovoPrior-deNovoPrior*deNovoPrior)*firstParentGenotype.getValue()*secondParentGenotype.getValue()*childGenotype.getValue();
|
matInd = getLikelihoodMatrixIndex(motherGenotype.getKey(), fatherGenotype.getKey(), childGenotype.getKey());
|
||||||
}
|
configurationLikelihoodsMatrix[matInd] = configurationLikelihood;
|
||||||
//Evaluate configurations of trios
|
|
||||||
else{
|
|
||||||
configurationLikelihood = mvCount>0 ? Math.pow(deNovoPrior,mvCount)*firstParentGenotype.getValue()*secondParentGenotype.getValue()*childGenotype.getValue() : (1.0-10*deNovoPrior-deNovoPrior*deNovoPrior)*firstParentGenotype.getValue()*secondParentGenotype.getValue()*childGenotype.getValue();
|
|
||||||
norm += configurationLikelihood;
|
|
||||||
matInd = getLikelihoodIndex(firstParentGenotype.getKey(), secondParentGenotype.getKey(),childGenotype.getKey(),parentsAreFlipped);
|
|
||||||
if (matInd > INVALID_INDEX) //still a slim chance of a MIXED GT
|
|
||||||
configurationLikelihoodsMatrix[matInd] = configurationLikelihood;
|
|
||||||
//Keep this combination if
|
|
||||||
//It has a better likelihood
|
|
||||||
//Or it has the same likelihood but requires less changes from original genotypes
|
|
||||||
if (configurationLikelihood > bestConfigurationLikelihood){
|
|
||||||
bestConfigurationLikelihood = configurationLikelihood;
|
|
||||||
bestMVCount.clear();
|
|
||||||
bestMVCount.add(mvCount);
|
|
||||||
bestFirstParentGenotype.clear();
|
|
||||||
bestFirstParentGenotype.add(firstParentGenotype.getKey());
|
|
||||||
bestSecondParentGenotype.clear();
|
|
||||||
bestSecondParentGenotype.add(secondParentGenotype.getKey());
|
|
||||||
bestChildGenotype.clear();
|
|
||||||
bestChildGenotype.add(childGenotype.getKey());
|
|
||||||
}
|
|
||||||
else if(configurationLikelihood == bestConfigurationLikelihood) {
|
|
||||||
bestFirstParentGenotype.add(firstParentGenotype.getKey());
|
|
||||||
bestSecondParentGenotype.add(secondParentGenotype.getKey());
|
|
||||||
bestChildGenotype.add(childGenotype.getKey());
|
|
||||||
bestMVCount.add(mvCount);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
//Evaluate configurations of parent/child pairs
|
|
||||||
if(parentsCalled<2){
|
|
||||||
norm += configurationLikelihood;
|
|
||||||
matInd = getLikelihoodIndex(firstParentGenotype.getKey(), GenotypeType.HOM_REF,childGenotype.getKey(), parentsAreFlipped);
|
|
||||||
if (matInd > INVALID_INDEX)
|
|
||||||
configurationLikelihoodsMatrix[matInd] = configurationLikelihood;
|
|
||||||
matInd = getLikelihoodIndex(firstParentGenotype.getKey(), GenotypeType.HET,childGenotype.getKey(),parentsAreFlipped);
|
|
||||||
if (matInd > INVALID_INDEX)
|
|
||||||
configurationLikelihoodsMatrix[matInd] = configurationLikelihood;
|
|
||||||
matInd = getLikelihoodIndex(firstParentGenotype.getKey(), GenotypeType.HOM_VAR,childGenotype.getKey(),parentsAreFlipped);
|
|
||||||
if (matInd > INVALID_INDEX)
|
|
||||||
configurationLikelihoodsMatrix[matInd] = configurationLikelihood;
|
|
||||||
|
|
||||||
//Keep this combination if
|
|
||||||
//It has a better likelihood
|
|
||||||
//Or it has the same likelihood but requires less changes from original genotypes
|
|
||||||
if (configurationLikelihood > bestConfigurationLikelihood){
|
|
||||||
bestConfigurationLikelihood = configurationLikelihood;
|
|
||||||
bestMVCount.clear();
|
|
||||||
bestMVCount.add(cumulativeMVCount/3);
|
|
||||||
bestChildGenotype.clear();
|
|
||||||
bestFirstParentGenotype.clear();
|
|
||||||
bestSecondParentGenotype.clear();
|
|
||||||
bestChildGenotype.add(childGenotype.getKey());
|
|
||||||
bestFirstParentGenotype.add(firstParentGenotype.getKey());
|
|
||||||
bestSecondParentGenotype.add(pairSecondParentGenotype);
|
|
||||||
}
|
|
||||||
else if(configurationLikelihood == bestConfigurationLikelihood) {
|
|
||||||
bestFirstParentGenotype.add(firstParentGenotype.getKey());
|
|
||||||
bestSecondParentGenotype.add(pairSecondParentGenotype);
|
|
||||||
bestChildGenotype.add(childGenotype.getKey());
|
|
||||||
bestMVCount.add(cumulativeMVCount/3);
|
|
||||||
}
|
|
||||||
configurationLikelihood = 0;
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
//normalize the best configuration probability
|
|
||||||
bestConfigurationLikelihood = bestConfigurationLikelihood / norm;
|
|
||||||
|
|
||||||
//In case of multiple equally likely combinations, take a random one
|
|
||||||
if(bestFirstParentGenotype.size()>1){
|
|
||||||
configuration_index = rand.nextInt(bestFirstParentGenotype.size()-1);
|
|
||||||
}
|
|
||||||
|
|
||||||
}
|
|
||||||
else{
|
|
||||||
bestConfigurationLikelihood = NO_TRANSMISSION_PROB;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
TrioGenotypes updatedTrioGenotypes;
|
getUpdatedGenotypes(vc, mother, father, child, finalGenotypes);
|
||||||
if(parentsCalled < 2 && mother == null || !mother.isCalled())
|
|
||||||
updatedTrioGenotypes = transmissionMatrix.get(bestSecondParentGenotype.get(configuration_index)).get(bestFirstParentGenotype.get(configuration_index)).get(bestChildGenotype.get(configuration_index));
|
|
||||||
else
|
|
||||||
updatedTrioGenotypes = transmissionMatrix.get(bestFirstParentGenotype.get(configuration_index)).get(bestSecondParentGenotype.get(configuration_index)).get(bestChildGenotype.get(configuration_index));
|
|
||||||
|
|
||||||
//Return the updated genotypes
|
|
||||||
updatedTrioGenotypes.setMVcountMatrix(mvCountMatrix);
|
|
||||||
updatedTrioGenotypes.getUpdatedGenotypes(ref, alt, mother, father, child, bestConfigurationLikelihood, configurationLikelihoodsMatrix, finalGenotypes);
|
|
||||||
return bestMVCount.get(configuration_index);
|
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
//Get a Map of genotype likelihoods, normalized from log10-space.
|
//Get a Map of genotype (log10)likelihoods
|
||||||
//In case of null, unavailable or no call, all likelihoods are 1/3.
|
|
||||||
private EnumMap<GenotypeType,Double> getLikelihoodsAsMapSafeNull(Genotype genotype){
|
private EnumMap<GenotypeType,Double> getLikelihoodsAsMapSafeNull(Genotype genotype){
|
||||||
if (genotype != null && genotype.isCalled() && genotype.hasExtendedAttribute(PHRED_SCALED_POSTERIORS_KEY)) {
|
final EnumMap<GenotypeType,Double> likelihoodsMap = new EnumMap<GenotypeType, Double>(GenotypeType.class);
|
||||||
final EnumMap<GenotypeType,Double> likelihoodsMap = new EnumMap<GenotypeType, Double>(GenotypeType.class);
|
double[] likelihoods;
|
||||||
|
|
||||||
|
if (genotype != null && hasCalledGT(genotype.getType()) && genotype.hasExtendedAttribute(PHRED_SCALED_POSTERIORS_KEY)) {
|
||||||
Object GPfromVCF = genotype.getExtendedAttribute(PHRED_SCALED_POSTERIORS_KEY);
|
Object GPfromVCF = genotype.getExtendedAttribute(PHRED_SCALED_POSTERIORS_KEY);
|
||||||
//parse the GPs into a vector of probabilities
|
//parse the GPs into a vector of probabilities
|
||||||
final String[] likelihoodsAsStringVector = ((String)GPfromVCF).split(",");
|
final String[] likelihoodsAsStringVector = ((String)GPfromVCF).split(",");
|
||||||
|
|
@ -622,46 +432,35 @@ public class FamilyLikelihoodsUtils {
|
||||||
for ( int i = 0; i < likelihoodsAsStringVector.length; i++ ) {
|
for ( int i = 0; i < likelihoodsAsStringVector.length; i++ ) {
|
||||||
likelihoodsAsVector[i] = Double.parseDouble(likelihoodsAsStringVector[i]) / -10.0;
|
likelihoodsAsVector[i] = Double.parseDouble(likelihoodsAsStringVector[i]) / -10.0;
|
||||||
}
|
}
|
||||||
double[] likelihoods = GeneralUtils.normalizeFromLog10(likelihoodsAsVector);
|
//keep in log10 space for large GQs
|
||||||
likelihoodsMap.put(GenotypeType.HOM_REF,likelihoods[GenotypeType.HOM_REF.ordinal()-1]);
|
likelihoods = GeneralUtils.normalizeFromLog10(likelihoodsAsVector, true, true);
|
||||||
likelihoodsMap.put(GenotypeType.HET,likelihoods[GenotypeType.HET.ordinal()-1]);
|
|
||||||
likelihoodsMap.put(GenotypeType.HOM_VAR, likelihoods[GenotypeType.HOM_VAR.ordinal() - 1]);
|
|
||||||
return likelihoodsMap;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
if(genotype == null || !genotype.isCalled() || genotype.getLikelihoods() == null){
|
//In case of null, unavailable or no call, all likelihoods are log10(1/3)
|
||||||
final EnumMap<GenotypeType,Double> likelihoods = new EnumMap<GenotypeType, Double>(GenotypeType.class);
|
else if(genotype == null || !hasCalledGT(genotype.getType()) || genotype.getLikelihoods() == null){
|
||||||
likelihoods.put(GenotypeType.HOM_REF,1.0/3.0);
|
likelihoods = new double[3];
|
||||||
likelihoods.put(GenotypeType.HET,1.0/3.0);
|
likelihoods[0] = LOG10_OF_ONE_THIRD;
|
||||||
likelihoods.put(GenotypeType.HOM_VAR,1.0/3.0);
|
likelihoods[1] = LOG10_OF_ONE_THIRD;
|
||||||
return likelihoods;
|
likelihoods[2] = LOG10_OF_ONE_THIRD;
|
||||||
}
|
}
|
||||||
return genotype.getLikelihoods().getAsMap(true);
|
|
||||||
|
//No posteriors in VC, use PLs
|
||||||
|
else
|
||||||
|
likelihoods = GeneralUtils.normalizeFromLog10(genotype.getLikelihoods().getAsVector(),true,true);
|
||||||
|
|
||||||
|
likelihoodsMap.put(GenotypeType.HOM_REF,likelihoods[genotypeTypeToValue(GenotypeType.HOM_REF)]);
|
||||||
|
likelihoodsMap.put(GenotypeType.HET,likelihoods[genotypeTypeToValue(GenotypeType.HET)]);
|
||||||
|
likelihoodsMap.put(GenotypeType.HOM_VAR, likelihoods[genotypeTypeToValue(GenotypeType.HOM_VAR)]);
|
||||||
|
return likelihoodsMap;
|
||||||
}
|
}
|
||||||
|
|
||||||
//Returns the GenotypeType; returns UNAVAILABLE if given null
|
private int getLikelihoodMatrixIndex(GenotypeType mother, GenotypeType father, GenotypeType child){
|
||||||
private GenotypeType getTypeSafeNull(Genotype genotype){
|
int childInd = genotypeTypeToValue(child);
|
||||||
if(genotype == null)
|
|
||||||
return GenotypeType.UNAVAILABLE;
|
|
||||||
return genotype.getType();
|
|
||||||
}
|
|
||||||
|
|
||||||
private int getLikelihoodIndex(GenotypeType firstParent, GenotypeType secondParent, GenotypeType child, boolean parentsAreFlipped){
|
|
||||||
int childInd = genotypeTypeValue(child);
|
|
||||||
int motherInd;
|
int motherInd;
|
||||||
int fatherInd;
|
int fatherInd;
|
||||||
final int NUM_CALLED_GENOTYPETYPES = 3;
|
|
||||||
final int INVALID = -1;
|
final int INVALID = -1;
|
||||||
if (parentsAreFlipped)
|
motherInd = genotypeTypeToValue(mother);
|
||||||
{
|
fatherInd = genotypeTypeToValue(father);
|
||||||
motherInd = genotypeTypeValue(secondParent);
|
|
||||||
fatherInd = genotypeTypeValue(firstParent);
|
|
||||||
}
|
|
||||||
else {
|
|
||||||
motherInd = genotypeTypeValue(firstParent);
|
|
||||||
fatherInd = genotypeTypeValue(secondParent);
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
if (childInd == INVALID || motherInd == INVALID || fatherInd == INVALID) //any of the genotypes are NO_CALL, UNAVAILABLE or MIXED
|
if (childInd == INVALID || motherInd == INVALID || fatherInd == INVALID) //any of the genotypes are NO_CALL, UNAVAILABLE or MIXED
|
||||||
return INVALID;
|
return INVALID;
|
||||||
|
|
@ -670,11 +469,16 @@ public class FamilyLikelihoodsUtils {
|
||||||
return motherInd*NUM_CALLED_GENOTYPETYPES*NUM_CALLED_GENOTYPETYPES + fatherInd*NUM_CALLED_GENOTYPETYPES + childInd;
|
return motherInd*NUM_CALLED_GENOTYPETYPES*NUM_CALLED_GENOTYPETYPES + fatherInd*NUM_CALLED_GENOTYPETYPES + childInd;
|
||||||
}
|
}
|
||||||
|
|
||||||
private int genotypeTypeValue(GenotypeType input){
|
private int genotypeTypeToValue(GenotypeType input){
|
||||||
if (input == GenotypeType.HOM_REF) return 0;
|
if (input == GenotypeType.HOM_REF) return 0;
|
||||||
if (input == GenotypeType.HET) return 1;
|
if (input == GenotypeType.HET) return 1;
|
||||||
if (input == GenotypeType.HOM_VAR) return 2;
|
if (input == GenotypeType.HOM_VAR) return 2;
|
||||||
return -1;
|
return -1;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
//this excludes mixed genotypes, whereas the htsjdk Genotype.isCalled() will return true if the GenotypeType is mixed
|
||||||
|
private boolean hasCalledGT(GenotypeType genotype){
|
||||||
|
return genotype == GenotypeType.HOM_REF || genotype == GenotypeType.HET || genotype == GenotypeType.HOM_VAR;
|
||||||
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
@ -65,15 +65,17 @@ public class PosteriorLikelihoodsUtils {
|
||||||
final double globalFrequencyPriorDirichlet,
|
final double globalFrequencyPriorDirichlet,
|
||||||
final boolean useInputSamples,
|
final boolean useInputSamples,
|
||||||
final boolean useAC,
|
final boolean useAC,
|
||||||
final boolean calcMissing) {
|
final boolean useACoff) {
|
||||||
|
|
||||||
final Map<Allele,Integer> totalAlleleCounts = new HashMap<>();
|
final Map<Allele,Integer> totalAlleleCounts = new HashMap<>();
|
||||||
boolean nonSNPprior = false;
|
boolean nonSNPprior = false;
|
||||||
if (vc1 == null) throw new IllegalArgumentException("VariantContext vc1 is null");
|
if (vc1 == null) throw new IllegalArgumentException("VariantContext vc1 is null");
|
||||||
final boolean nonSNPeval = !vc1.isSNP();
|
final boolean nonSNPeval = !vc1.isSNP();
|
||||||
final double[] alleleCounts = new double[vc1.getNAlleles()];
|
final double[] alleleCounts = new double[vc1.getNAlleles()];
|
||||||
|
//only use discovered allele count if there are at least 10 samples
|
||||||
|
final boolean useDiscoveredAC = !useACoff && vc1.getNSamples() >= 10;
|
||||||
|
|
||||||
if(!nonSNPeval)
|
if(vc1.isSNP())
|
||||||
{
|
{
|
||||||
//store the allele counts for each allele in the variant priors
|
//store the allele counts for each allele in the variant priors
|
||||||
for ( final VariantContext resource : resources ) {
|
for ( final VariantContext resource : resources ) {
|
||||||
|
|
@ -111,7 +113,7 @@ public class PosteriorLikelihoodsUtils {
|
||||||
//parse the PPs into a vector of probabilities
|
//parse the PPs into a vector of probabilities
|
||||||
if (PPfromVCF instanceof String) {
|
if (PPfromVCF instanceof String) {
|
||||||
final String PPstring = (String)PPfromVCF;
|
final String PPstring = (String)PPfromVCF;
|
||||||
if (PPstring.charAt(0)=='.') //samples not in trios will have PP tag like ".,.,." after family priors are applied
|
if (PPstring.charAt(0)=='.') //samples not in trios will have PP tag like ".,.,." if family priors are applied
|
||||||
likelihoods.add(genotype.hasLikelihoods() ? genotype.getLikelihoods().getAsVector() : null );
|
likelihoods.add(genotype.hasLikelihoods() ? genotype.getLikelihoods().getAsVector() : null );
|
||||||
else {
|
else {
|
||||||
final String[] likelihoodsAsStringVector = PPstring.split(",");
|
final String[] likelihoodsAsStringVector = PPstring.split(",");
|
||||||
|
|
@ -135,7 +137,7 @@ public class PosteriorLikelihoodsUtils {
|
||||||
}
|
}
|
||||||
|
|
||||||
//TODO: for now just use priors that are SNPs because indel priors will bias SNP calls
|
//TODO: for now just use priors that are SNPs because indel priors will bias SNP calls
|
||||||
final boolean useFlatPriors = nonSNPeval || nonSNPprior || (resources.isEmpty() && !calcMissing);
|
final boolean useFlatPriors = nonSNPeval || nonSNPprior || (resources.isEmpty() && !useDiscoveredAC);
|
||||||
|
|
||||||
final List<double[]> posteriors = calculatePosteriorGLs(likelihoods,alleleCounts,vc1.getMaxPloidy(2), useFlatPriors);
|
final List<double[]> posteriors = calculatePosteriorGLs(likelihoods,alleleCounts,vc1.getMaxPloidy(2), useFlatPriors);
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -60,7 +60,7 @@ public class CalculateGenotypePosteriorsIntegrationTest extends WalkerTest {
|
||||||
@Test(enabled = true)
|
@Test(enabled = true)
|
||||||
public void testUsingDiscoveredAF() {
|
public void testUsingDiscoveredAF() {
|
||||||
WalkerTestSpec spec = new WalkerTestSpec(
|
WalkerTestSpec spec = new WalkerTestSpec(
|
||||||
"-T CalculateGenotypePosteriors --no_cmdline_in_header -calcMissing" +
|
"-T CalculateGenotypePosteriors --no_cmdline_in_header" +
|
||||||
" -o %s" +
|
" -o %s" +
|
||||||
" -R " + b37KGReference +
|
" -R " + b37KGReference +
|
||||||
" -L 20:10,000,000-10,100,000" +
|
" -L 20:10,000,000-10,100,000" +
|
||||||
|
|
@ -73,7 +73,7 @@ public class CalculateGenotypePosteriorsIntegrationTest extends WalkerTest {
|
||||||
@Test(enabled = true)
|
@Test(enabled = true)
|
||||||
public void testMissingPriors() {
|
public void testMissingPriors() {
|
||||||
WalkerTestSpec spec = new WalkerTestSpec(
|
WalkerTestSpec spec = new WalkerTestSpec(
|
||||||
"-T CalculateGenotypePosteriors --no_cmdline_in_header" +
|
"-T CalculateGenotypePosteriors --no_cmdline_in_header -useACoff" +
|
||||||
" -o %s" +
|
" -o %s" +
|
||||||
" -R " + b37KGReference +
|
" -R " + b37KGReference +
|
||||||
" -L 20:10,000,000-10,100,000" +
|
" -L 20:10,000,000-10,100,000" +
|
||||||
|
|
@ -86,7 +86,7 @@ public class CalculateGenotypePosteriorsIntegrationTest extends WalkerTest {
|
||||||
@Test(enabled = true)
|
@Test(enabled = true)
|
||||||
public void testInputINDELs() {
|
public void testInputINDELs() {
|
||||||
WalkerTestSpec spec = new WalkerTestSpec(
|
WalkerTestSpec spec = new WalkerTestSpec(
|
||||||
"-T CalculateGenotypePosteriors --no_cmdline_in_header" +
|
"-T CalculateGenotypePosteriors --no_cmdline_in_header -useACoff" +
|
||||||
" -o %s" +
|
" -o %s" +
|
||||||
" -R " + b37KGReference +
|
" -R " + b37KGReference +
|
||||||
" -L 20:10,000,000-10,100,000" +
|
" -L 20:10,000,000-10,100,000" +
|
||||||
|
|
@ -100,14 +100,14 @@ public class CalculateGenotypePosteriorsIntegrationTest extends WalkerTest {
|
||||||
@Test(enabled = true)
|
@Test(enabled = true)
|
||||||
public void testFamilyPriors() {
|
public void testFamilyPriors() {
|
||||||
WalkerTestSpec spec = new WalkerTestSpec(
|
WalkerTestSpec spec = new WalkerTestSpec(
|
||||||
"-T CalculateGenotypePosteriors --no_cmdline_in_header" +
|
"-T CalculateGenotypePosteriors --no_cmdline_in_header -useACoff" +
|
||||||
" -o %s" +
|
" -o %s" +
|
||||||
" -R " + b37KGReference +
|
" -R " + b37KGReference +
|
||||||
" -ped " + CEUtrioFamilyFile +
|
" -ped " + CEUtrioFamilyFile +
|
||||||
" -V " + CEUtrioTest +
|
" -V " + CEUtrioTest +
|
||||||
" -supporting " + CEUtrioPopPriorsTest,
|
" -supporting " + CEUtrioPopPriorsTest,
|
||||||
1,
|
1,
|
||||||
Arrays.asList("a22c81f0609c9f43578054661797395b"));
|
Arrays.asList("781f85f56dac9074c96ace31b09e0f59"));
|
||||||
executeTest("testFamilyPriors", spec);
|
executeTest("testFamilyPriors", spec);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -136,13 +136,22 @@ public class PosteriorLikelihoodsUtilsUnitTest extends BaseTest {
|
||||||
private void testCalculatePosteriorNoExternalData() {
|
private void testCalculatePosteriorNoExternalData() {
|
||||||
VariantContext test1 = makeVC("1",Arrays.asList(Aref,T), makeG("s1",Aref,T,20,0,10),
|
VariantContext test1 = makeVC("1",Arrays.asList(Aref,T), makeG("s1",Aref,T,20,0,10),
|
||||||
makeG("s2",T,T,60,40,0),
|
makeG("s2",T,T,60,40,0),
|
||||||
makeG("s3",Aref,Aref,0,30,90));
|
makeG("s3",Aref,Aref,0,30,90),
|
||||||
test1 = new VariantContextBuilder(test1).attribute(VCFConstants.MLE_ALLELE_COUNT_KEY,3).make();
|
makeG("s4",Aref,T,20,0,10),
|
||||||
VariantContext test1result = PosteriorLikelihoodsUtils.calculatePosteriorGLs(test1, new ArrayList<VariantContext>(), 0, 0.001, true, false, true);
|
makeG("s5",T,T,60,40,0),
|
||||||
Genotype test1exp1 = makeGwithPLs("s1",Aref,T,new double[]{-2.20686, -0.03073215, -1.20686});
|
makeG("s6",Aref,Aref,0,30,90),
|
||||||
|
makeG("s7",Aref,T,20,0,10),
|
||||||
|
makeG("s8",T,T,60,40,0),
|
||||||
|
makeG("s9",Aref,Aref,0,30,90),
|
||||||
|
makeG("s10",Aref,T,20,0,10),
|
||||||
|
makeG("s11",T,T,60,40,0),
|
||||||
|
makeG("s12",Aref,Aref,0,30,90));
|
||||||
|
test1 = new VariantContextBuilder(test1).attribute(VCFConstants.MLE_ALLELE_COUNT_KEY,12).make();
|
||||||
|
VariantContext test1result = PosteriorLikelihoodsUtils.calculatePosteriorGLs(test1, new ArrayList<VariantContext>(), 0, 0.001, true, false, false);
|
||||||
|
Genotype test1exp1 = makeGwithPLs("s1",Aref,T,new double[]{-2.26110257, -0.02700903, -1.26110257});
|
||||||
Assert.assertTrue(test1exp1.hasPL());
|
Assert.assertTrue(test1exp1.hasPL());
|
||||||
Genotype test1exp2 = makeGwithPLs("s2",T,T,new double[]{-6.000066, -3.823938, -6.557894e-05});
|
Genotype test1exp2 = makeGwithPLs("s2",T,T,new double[]{-6.000075e+00, -3.765981e+00, -7.488009e-05});
|
||||||
Genotype test1exp3 = makeGwithPLs("s3",Aref,Aref,new double[]{-0.0006510083, -2.824524, -9.000651});
|
Genotype test1exp3 = makeGwithPLs("s3",Aref,Aref,new double[]{-0.0007438855, -2.7666503408, -9.0007438855});
|
||||||
Assert.assertEquals("java.util.ArrayList",test1result.getGenotype(0).getAnyAttribute(PHRED_SCALED_POSTERIORS_KEY).getClass().getCanonicalName());
|
Assert.assertEquals("java.util.ArrayList",test1result.getGenotype(0).getAnyAttribute(PHRED_SCALED_POSTERIORS_KEY).getClass().getCanonicalName());
|
||||||
Assert.assertEquals(arraysEq(test1exp1.getPL(), _mleparse((List<Integer>)test1result.getGenotype(0).getAnyAttribute(PHRED_SCALED_POSTERIORS_KEY))), "");
|
Assert.assertEquals(arraysEq(test1exp1.getPL(), _mleparse((List<Integer>)test1result.getGenotype(0).getAnyAttribute(PHRED_SCALED_POSTERIORS_KEY))), "");
|
||||||
Assert.assertEquals(arraysEq(test1exp2.getPL(),_mleparse((List<Integer>)test1result.getGenotype(1).getAnyAttribute(PHRED_SCALED_POSTERIORS_KEY))), "");
|
Assert.assertEquals(arraysEq(test1exp2.getPL(),_mleparse((List<Integer>)test1result.getGenotype(1).getAnyAttribute(PHRED_SCALED_POSTERIORS_KEY))), "");
|
||||||
|
|
@ -154,13 +163,21 @@ public class PosteriorLikelihoodsUtilsUnitTest extends BaseTest {
|
||||||
makeG("s1",Aref,T,30,10,60,0,15,90),
|
makeG("s1",Aref,T,30,10,60,0,15,90),
|
||||||
makeG("s2",Aref,C,40,0,10,30,40,80),
|
makeG("s2",Aref,C,40,0,10,30,40,80),
|
||||||
makeG("s3",Aref,Aref,0,5,8,15,20,40),
|
makeG("s3",Aref,Aref,0,5,8,15,20,40),
|
||||||
makeG("s4",C,T,80,40,12,20,0,10));
|
makeG("s4",C,T,80,40,12,20,0,10),
|
||||||
test2 = new VariantContextBuilder(test2).attribute(VCFConstants.MLE_ALLELE_COUNT_KEY,new ArrayList<Integer>(Arrays.asList(2,2))).make();
|
makeG("s5",Aref,T,30,10,60,0,15,90),
|
||||||
VariantContext test2result = PosteriorLikelihoodsUtils.calculatePosteriorGLs(test2,new ArrayList<VariantContext>(),5,0.001,true,false, true);
|
makeG("s6",Aref,C,40,0,10,30,40,80),
|
||||||
Genotype test2exp1 = makeGwithPLs("s1",Aref,T,new double[]{-2.647372, -1.045139, -6.823193, -0.04513873, -2.198182, -9.823193});
|
makeG("s7",Aref,Aref,0,5,8,15,20,40),
|
||||||
Genotype test2exp2 = makeGwithPLs("s2",Aref,C,new double[]{-3.609957, -0.007723248, -1.785778, -3.007723, -4.660767, -8.785778});
|
makeG("s8",C,T,80,40,12,20,0,10),
|
||||||
Genotype test2exp3 = makeGwithPLs("s3",Aref,Aref,new double[] {-0.06094877, -0.9587151, -2.03677,-1.958715, -3.111759, -5.23677});
|
makeG("s9",Aref,T,30,10,60,0,15,90),
|
||||||
Genotype test2exp4 = makeGwithPLs("s4",C,T,new double[]{-7.016534, -3.4143, -1.392355, -1.4143, -0.06734388, -1.192355});
|
makeG("s10",Aref,C,40,0,10,30,40,80),
|
||||||
|
makeG("s11",Aref,Aref,0,5,8,15,20,40),
|
||||||
|
makeG("s12",C,T,80,40,12,20,0,10));
|
||||||
|
test2 = new VariantContextBuilder(test2).attribute(VCFConstants.MLE_ALLELE_COUNT_KEY,new ArrayList<Integer>(Arrays.asList(6,6))).make();
|
||||||
|
VariantContext test2result = PosteriorLikelihoodsUtils.calculatePosteriorGLs(test2,new ArrayList<VariantContext>(),5,0.001,true,false, false);
|
||||||
|
Genotype test2exp1 = makeGwithPLs("s1",Aref,T,new double[]{-2.823957, -1.000000, -6.686344, 0.000000, -1.952251, -9.686344});
|
||||||
|
Genotype test2exp2 = makeGwithPLs("s2",Aref,C,new double[]{-3.823957, 0.000000, -1.686344, -3.000000, -4.452251, -8.686344});
|
||||||
|
Genotype test2exp3 = makeGwithPLs("s3",Aref,Aref,new double[] {0.000000, -0.676043, -1.662387, -1.676043, -2.628294, -4.862387});
|
||||||
|
Genotype test2exp4 = makeGwithPLs("s4",C,T,new double[]{-7.371706, -3.547749, -1.434094, -1.547749, 0.000000, -1.234094});
|
||||||
Assert.assertEquals(arraysEq(test2exp1.getPL(),(int[]) _mleparse((List<Integer>)test2result.getGenotype(0).getAnyAttribute(PHRED_SCALED_POSTERIORS_KEY))), "");
|
Assert.assertEquals(arraysEq(test2exp1.getPL(),(int[]) _mleparse((List<Integer>)test2result.getGenotype(0).getAnyAttribute(PHRED_SCALED_POSTERIORS_KEY))), "");
|
||||||
Assert.assertEquals(arraysEq(test2exp2.getPL(),(int[]) _mleparse((List<Integer>)test2result.getGenotype(1).getAnyAttribute(PHRED_SCALED_POSTERIORS_KEY))), "");
|
Assert.assertEquals(arraysEq(test2exp2.getPL(),(int[]) _mleparse((List<Integer>)test2result.getGenotype(1).getAnyAttribute(PHRED_SCALED_POSTERIORS_KEY))), "");
|
||||||
Assert.assertEquals(arraysEq(test2exp3.getPL(),(int[]) _mleparse((List<Integer>)test2result.getGenotype(2).getAnyAttribute(PHRED_SCALED_POSTERIORS_KEY))), "");
|
Assert.assertEquals(arraysEq(test2exp3.getPL(),(int[]) _mleparse((List<Integer>)test2result.getGenotype(2).getAnyAttribute(PHRED_SCALED_POSTERIORS_KEY))), "");
|
||||||
|
|
@ -178,7 +195,7 @@ public class PosteriorLikelihoodsUtilsUnitTest extends BaseTest {
|
||||||
supplTest1.add(makeVC("4",Arrays.asList(Aref,T),
|
supplTest1.add(makeVC("4",Arrays.asList(Aref,T),
|
||||||
makeG("s_1",T,T),
|
makeG("s_1",T,T),
|
||||||
makeG("s_2",Aref,T)));
|
makeG("s_2",Aref,T)));
|
||||||
VariantContext test1result = PosteriorLikelihoodsUtils.calculatePosteriorGLs(testOverlappingBase,supplTest1,0,0.001,true,false, true);
|
VariantContext test1result = PosteriorLikelihoodsUtils.calculatePosteriorGLs(testOverlappingBase,supplTest1,0,0.001,true,false, false);
|
||||||
// the counts here are ref=30, alt=14
|
// the counts here are ref=30, alt=14
|
||||||
Genotype test1exp1 = makeGwithPLs("t1",T,T,new double[]{-3.370985, -1.415172, -0.01721766});
|
Genotype test1exp1 = makeGwithPLs("t1",T,T,new double[]{-3.370985, -1.415172, -0.01721766});
|
||||||
Genotype test1exp2 = makeGwithPLs("t2",Aref,T,new double[]{-1.763792, -0.007978791, -3.010024});
|
Genotype test1exp2 = makeGwithPLs("t2",Aref,T,new double[]{-1.763792, -0.007978791, -3.010024});
|
||||||
|
|
@ -189,7 +206,7 @@ public class PosteriorLikelihoodsUtilsUnitTest extends BaseTest {
|
||||||
|
|
||||||
VariantContext testNonOverlapping = makeVC("1", Arrays.asList(Aref,T), makeG("s1",T,T,3,1,0));
|
VariantContext testNonOverlapping = makeVC("1", Arrays.asList(Aref,T), makeG("s1",T,T,3,1,0));
|
||||||
List<VariantContext> other = Arrays.asList(makeVC("2",Arrays.asList(Aref,C),makeG("s2",C,C,10,2,0)));
|
List<VariantContext> other = Arrays.asList(makeVC("2",Arrays.asList(Aref,C),makeG("s2",C,C,10,2,0)));
|
||||||
VariantContext test2result = PosteriorLikelihoodsUtils.calculatePosteriorGLs(testNonOverlapping,other,0,0.001,true,false,true);
|
VariantContext test2result = PosteriorLikelihoodsUtils.calculatePosteriorGLs(testNonOverlapping,other,0,0.001,true,false,false);
|
||||||
Genotype test2exp1 = makeGwithPLs("SGV",T,T,new double[]{-4.078345, -3.276502, -0.0002661066});
|
Genotype test2exp1 = makeGwithPLs("SGV",T,T,new double[]{-4.078345, -3.276502, -0.0002661066});
|
||||||
Assert.assertEquals(arraysEq(test2exp1.getPL(),_mleparse((List<Integer>) test2result.getGenotype(0).getAnyAttribute(PHRED_SCALED_POSTERIORS_KEY))), "");
|
Assert.assertEquals(arraysEq(test2exp1.getPL(),_mleparse((List<Integer>) test2result.getGenotype(0).getAnyAttribute(PHRED_SCALED_POSTERIORS_KEY))), "");
|
||||||
}
|
}
|
||||||
|
|
@ -199,7 +216,7 @@ public class PosteriorLikelihoodsUtilsUnitTest extends BaseTest {
|
||||||
VariantContext testOverlappingBase = makeVC("1", Arrays.asList(Aref,T), makeG("s1",T,T,40,1,0));
|
VariantContext testOverlappingBase = makeVC("1", Arrays.asList(Aref,T), makeG("s1",T,T,40,1,0));
|
||||||
List<VariantContext> supplTest1 = new ArrayList<>(1);
|
List<VariantContext> supplTest1 = new ArrayList<>(1);
|
||||||
supplTest1.add(new VariantContextBuilder(makeVC("2",Arrays.asList(Aref,T))).attribute(VCFConstants.MLE_ALLELE_COUNT_KEY,500).attribute(VCFConstants.ALLELE_NUMBER_KEY,1000).make());
|
supplTest1.add(new VariantContextBuilder(makeVC("2",Arrays.asList(Aref,T))).attribute(VCFConstants.MLE_ALLELE_COUNT_KEY,500).attribute(VCFConstants.ALLELE_NUMBER_KEY,1000).make());
|
||||||
VariantContext test1result = PosteriorLikelihoodsUtils.calculatePosteriorGLs(testOverlappingBase,supplTest1,0,0.001,true,false,true);
|
VariantContext test1result = PosteriorLikelihoodsUtils.calculatePosteriorGLs(testOverlappingBase,supplTest1,0,0.001,true,false,false);
|
||||||
|
|
||||||
int[] GP = _mleparse( (List<Integer>)test1result.getGenotype(0).getAnyAttribute(PHRED_SCALED_POSTERIORS_KEY));
|
int[] GP = _mleparse( (List<Integer>)test1result.getGenotype(0).getAnyAttribute(PHRED_SCALED_POSTERIORS_KEY));
|
||||||
Assert.assertTrue(GP[2] > GP[1]);
|
Assert.assertTrue(GP[2] > GP[1]);
|
||||||
|
|
@ -210,7 +227,7 @@ public class PosteriorLikelihoodsUtilsUnitTest extends BaseTest {
|
||||||
VariantContext testOverlappingBase = makeVC("1", Arrays.asList(Aref,T), makeG("s1",T,T,40,0,1));
|
VariantContext testOverlappingBase = makeVC("1", Arrays.asList(Aref,T), makeG("s1",T,T,40,0,1));
|
||||||
List<VariantContext> supplTest1 = new ArrayList<>(1);
|
List<VariantContext> supplTest1 = new ArrayList<>(1);
|
||||||
supplTest1.add(new VariantContextBuilder(makeVC("2",Arrays.asList(Aref,T))).attribute(VCFConstants.MLE_ALLELE_COUNT_KEY,900).attribute(VCFConstants.ALLELE_NUMBER_KEY,1000).make());
|
supplTest1.add(new VariantContextBuilder(makeVC("2",Arrays.asList(Aref,T))).attribute(VCFConstants.MLE_ALLELE_COUNT_KEY,900).attribute(VCFConstants.ALLELE_NUMBER_KEY,1000).make());
|
||||||
VariantContext test1result = PosteriorLikelihoodsUtils.calculatePosteriorGLs(testOverlappingBase,supplTest1,0,0.001,true,false,true);
|
VariantContext test1result = PosteriorLikelihoodsUtils.calculatePosteriorGLs(testOverlappingBase,supplTest1,0,0.001,true,false,false);
|
||||||
|
|
||||||
int[] GP = _mleparse( (List<Integer>)test1result.getGenotype(0).getAnyAttribute(PHRED_SCALED_POSTERIORS_KEY));
|
int[] GP = _mleparse( (List<Integer>)test1result.getGenotype(0).getAnyAttribute(PHRED_SCALED_POSTERIORS_KEY));
|
||||||
Assert.assertTrue(GP[2] < GP[1]);
|
Assert.assertTrue(GP[2] < GP[1]);
|
||||||
|
|
@ -221,7 +238,7 @@ public class PosteriorLikelihoodsUtilsUnitTest extends BaseTest {
|
||||||
VariantContext testOverlappingBase = makeVC("1", Arrays.asList(Aref,T), makeG("s1",T,T,0,1,40));
|
VariantContext testOverlappingBase = makeVC("1", Arrays.asList(Aref,T), makeG("s1",T,T,0,1,40));
|
||||||
List<VariantContext> supplTest1 = new ArrayList<>(1);
|
List<VariantContext> supplTest1 = new ArrayList<>(1);
|
||||||
supplTest1.add(new VariantContextBuilder(makeVC("2",Arrays.asList(Aref,T))).attribute(VCFConstants.MLE_ALLELE_COUNT_KEY,500).attribute(VCFConstants.ALLELE_NUMBER_KEY,1000).make());
|
supplTest1.add(new VariantContextBuilder(makeVC("2",Arrays.asList(Aref,T))).attribute(VCFConstants.MLE_ALLELE_COUNT_KEY,500).attribute(VCFConstants.ALLELE_NUMBER_KEY,1000).make());
|
||||||
VariantContext test1result = PosteriorLikelihoodsUtils.calculatePosteriorGLs(testOverlappingBase,supplTest1,0,0.001,true,false,true);
|
VariantContext test1result = PosteriorLikelihoodsUtils.calculatePosteriorGLs(testOverlappingBase,supplTest1,0,0.001,true,false,false);
|
||||||
|
|
||||||
int[] GP = _mleparse( (List<Integer>)test1result.getGenotype(0).getAnyAttribute(PHRED_SCALED_POSTERIORS_KEY));
|
int[] GP = _mleparse( (List<Integer>)test1result.getGenotype(0).getAnyAttribute(PHRED_SCALED_POSTERIORS_KEY));
|
||||||
Assert.assertTrue(GP[0] > GP[1]);
|
Assert.assertTrue(GP[0] > GP[1]);
|
||||||
|
|
@ -232,7 +249,7 @@ public class PosteriorLikelihoodsUtilsUnitTest extends BaseTest {
|
||||||
VariantContext testOverlappingBase = makeVC("1", Arrays.asList(Aref,T), makeG("s1",T,T,1,0,40));
|
VariantContext testOverlappingBase = makeVC("1", Arrays.asList(Aref,T), makeG("s1",T,T,1,0,40));
|
||||||
List<VariantContext> supplTest1 = new ArrayList<>(1);
|
List<VariantContext> supplTest1 = new ArrayList<>(1);
|
||||||
supplTest1.add(new VariantContextBuilder(makeVC("2",Arrays.asList(Aref,T))).attribute(VCFConstants.MLE_ALLELE_COUNT_KEY,100).attribute(VCFConstants.ALLELE_NUMBER_KEY,1000).make());
|
supplTest1.add(new VariantContextBuilder(makeVC("2",Arrays.asList(Aref,T))).attribute(VCFConstants.MLE_ALLELE_COUNT_KEY,100).attribute(VCFConstants.ALLELE_NUMBER_KEY,1000).make());
|
||||||
VariantContext test1result = PosteriorLikelihoodsUtils.calculatePosteriorGLs(testOverlappingBase,supplTest1,0,0.001,true,false,true);
|
VariantContext test1result = PosteriorLikelihoodsUtils.calculatePosteriorGLs(testOverlappingBase,supplTest1,0,0.001,true,false,false);
|
||||||
|
|
||||||
int[] GP = _mleparse( (List<Integer>)test1result.getGenotype(0).getAnyAttribute(PHRED_SCALED_POSTERIORS_KEY));
|
int[] GP = _mleparse( (List<Integer>)test1result.getGenotype(0).getAnyAttribute(PHRED_SCALED_POSTERIORS_KEY));
|
||||||
Assert.assertTrue(GP[0] < GP[1]);
|
Assert.assertTrue(GP[0] < GP[1]);
|
||||||
|
|
@ -245,7 +262,7 @@ public class PosteriorLikelihoodsUtilsUnitTest extends BaseTest {
|
||||||
makeG("s3",Aref,T,22,0,12));
|
makeG("s3",Aref,T,22,0,12));
|
||||||
List<VariantContext> supplTest1 = new ArrayList<>(1);
|
List<VariantContext> supplTest1 = new ArrayList<>(1);
|
||||||
supplTest1.add(new VariantContextBuilder(makeVC("2",Arrays.asList(Aref,T))).attribute(VCFConstants.MLE_ALLELE_COUNT_KEY,11).attribute(VCFConstants.ALLELE_NUMBER_KEY,10).make());
|
supplTest1.add(new VariantContextBuilder(makeVC("2",Arrays.asList(Aref,T))).attribute(VCFConstants.MLE_ALLELE_COUNT_KEY,11).attribute(VCFConstants.ALLELE_NUMBER_KEY,10).make());
|
||||||
VariantContext test1result = PosteriorLikelihoodsUtils.calculatePosteriorGLs(testOverlappingBase,supplTest1,0,0.001,true,false,true);
|
VariantContext test1result = PosteriorLikelihoodsUtils.calculatePosteriorGLs(testOverlappingBase,supplTest1,0,0.001,true,false,false);
|
||||||
}
|
}
|
||||||
|
|
||||||
@Test (expectedExceptions = {UserException.class})
|
@Test (expectedExceptions = {UserException.class})
|
||||||
|
|
@ -256,7 +273,7 @@ public class PosteriorLikelihoodsUtilsUnitTest extends BaseTest {
|
||||||
List<VariantContext> supplTest1 = new ArrayList<>(1);
|
List<VariantContext> supplTest1 = new ArrayList<>(1);
|
||||||
supplTest1.add(new VariantContextBuilder(makeVC("2",Arrays.asList(Aref,T,C))).attribute(VCFConstants.ALLELE_COUNT_KEY,5).attribute(VCFConstants.ALLELE_NUMBER_KEY,10).make());
|
supplTest1.add(new VariantContextBuilder(makeVC("2",Arrays.asList(Aref,T,C))).attribute(VCFConstants.ALLELE_COUNT_KEY,5).attribute(VCFConstants.ALLELE_NUMBER_KEY,10).make());
|
||||||
|
|
||||||
VariantContext test1result = PosteriorLikelihoodsUtils.calculatePosteriorGLs(testOverlappingBase,supplTest1,0,0.001,true,false,true);
|
VariantContext test1result = PosteriorLikelihoodsUtils.calculatePosteriorGLs(testOverlappingBase,supplTest1,0,0.001,true,false,false);
|
||||||
}
|
}
|
||||||
|
|
||||||
@Test (expectedExceptions = {UserException.class})
|
@Test (expectedExceptions = {UserException.class})
|
||||||
|
|
@ -266,7 +283,7 @@ public class PosteriorLikelihoodsUtilsUnitTest extends BaseTest {
|
||||||
makeG("s3",Aref,T,22,0,12));
|
makeG("s3",Aref,T,22,0,12));
|
||||||
List<VariantContext> supplTest1 = new ArrayList<>(1);
|
List<VariantContext> supplTest1 = new ArrayList<>(1);
|
||||||
supplTest1.add(new VariantContextBuilder(makeVC("2",Arrays.asList(Aref,T,C))).attribute(VCFConstants.MLE_ALLELE_COUNT_KEY,5).attribute(VCFConstants.ALLELE_NUMBER_KEY,10).make());
|
supplTest1.add(new VariantContextBuilder(makeVC("2",Arrays.asList(Aref,T,C))).attribute(VCFConstants.MLE_ALLELE_COUNT_KEY,5).attribute(VCFConstants.ALLELE_NUMBER_KEY,10).make());
|
||||||
VariantContext test1result = PosteriorLikelihoodsUtils.calculatePosteriorGLs(testOverlappingBase,supplTest1,0,0.001,true,false,true);
|
VariantContext test1result = PosteriorLikelihoodsUtils.calculatePosteriorGLs(testOverlappingBase,supplTest1,0,0.001,true,false,false);
|
||||||
}
|
}
|
||||||
|
|
||||||
@Test
|
@Test
|
||||||
|
|
@ -276,7 +293,7 @@ public class PosteriorLikelihoodsUtilsUnitTest extends BaseTest {
|
||||||
makeG("s3",Aref,T,22,0,12));
|
makeG("s3",Aref,T,22,0,12));
|
||||||
List<VariantContext> supplTest1 = new ArrayList<>(1);
|
List<VariantContext> supplTest1 = new ArrayList<>(1);
|
||||||
supplTest1.add(new VariantContextBuilder(makeVC("2",Arrays.asList(Aref,T,C))).attribute(VCFConstants.ALLELE_COUNT_KEY,Arrays.asList(5,4)).attribute(VCFConstants.ALLELE_NUMBER_KEY,10).make());
|
supplTest1.add(new VariantContextBuilder(makeVC("2",Arrays.asList(Aref,T,C))).attribute(VCFConstants.ALLELE_COUNT_KEY,Arrays.asList(5,4)).attribute(VCFConstants.ALLELE_NUMBER_KEY,10).make());
|
||||||
VariantContext test1result = PosteriorLikelihoodsUtils.calculatePosteriorGLs(testOverlappingBase,supplTest1,0,0.001,true,false,true);
|
VariantContext test1result = PosteriorLikelihoodsUtils.calculatePosteriorGLs(testOverlappingBase,supplTest1,0,0.001,true,false,false);
|
||||||
}
|
}
|
||||||
|
|
||||||
@Test
|
@Test
|
||||||
|
|
@ -286,7 +303,7 @@ public class PosteriorLikelihoodsUtilsUnitTest extends BaseTest {
|
||||||
makeG("s3",Aref,T,22,0,12));
|
makeG("s3",Aref,T,22,0,12));
|
||||||
List<VariantContext> supplTest1 = new ArrayList<>(1);
|
List<VariantContext> supplTest1 = new ArrayList<>(1);
|
||||||
supplTest1.add(new VariantContextBuilder(makeVC("2",Arrays.asList(Aref,T,C))).attribute(VCFConstants.MLE_ALLELE_COUNT_KEY,Arrays.asList(5,4)).attribute(VCFConstants.ALLELE_NUMBER_KEY,10).make());
|
supplTest1.add(new VariantContextBuilder(makeVC("2",Arrays.asList(Aref,T,C))).attribute(VCFConstants.MLE_ALLELE_COUNT_KEY,Arrays.asList(5,4)).attribute(VCFConstants.ALLELE_NUMBER_KEY,10).make());
|
||||||
VariantContext test1result = PosteriorLikelihoodsUtils.calculatePosteriorGLs(testOverlappingBase,supplTest1,0,0.001,true,false,true);
|
VariantContext test1result = PosteriorLikelihoodsUtils.calculatePosteriorGLs(testOverlappingBase,supplTest1,0,0.001,true,false,false);
|
||||||
}
|
}
|
||||||
|
|
||||||
@Test
|
@Test
|
||||||
|
|
@ -296,7 +313,7 @@ public class PosteriorLikelihoodsUtilsUnitTest extends BaseTest {
|
||||||
makeG("s3",Aref,ATC,22,0,12));
|
makeG("s3",Aref,ATC,22,0,12));
|
||||||
List<VariantContext> supplTest1 = new ArrayList<>(1);
|
List<VariantContext> supplTest1 = new ArrayList<>(1);
|
||||||
supplTest1.add(new VariantContextBuilder(makeVC("2",Arrays.asList(Aref,T,C))).attribute(VCFConstants.MLE_ALLELE_COUNT_KEY,Arrays.asList(5,4)).attribute(VCFConstants.ALLELE_NUMBER_KEY,10).make());
|
supplTest1.add(new VariantContextBuilder(makeVC("2",Arrays.asList(Aref,T,C))).attribute(VCFConstants.MLE_ALLELE_COUNT_KEY,Arrays.asList(5,4)).attribute(VCFConstants.ALLELE_NUMBER_KEY,10).make());
|
||||||
VariantContext test1result = PosteriorLikelihoodsUtils.calculatePosteriorGLs(inputIndel,supplTest1,0,0.001,true,false,true);
|
VariantContext test1result = PosteriorLikelihoodsUtils.calculatePosteriorGLs(inputIndel,supplTest1,0,0.001,true,false,false);
|
||||||
|
|
||||||
System.out.println(test1result);
|
System.out.println(test1result);
|
||||||
int[] GPs = _mleparse( (List<Integer>)test1result.getGenotype(0).getAnyAttribute(PHRED_SCALED_POSTERIORS_KEY));
|
int[] GPs = _mleparse( (List<Integer>)test1result.getGenotype(0).getAnyAttribute(PHRED_SCALED_POSTERIORS_KEY));
|
||||||
|
|
@ -311,7 +328,7 @@ public class PosteriorLikelihoodsUtilsUnitTest extends BaseTest {
|
||||||
makeG("s3",Aref,T,22,0,12));
|
makeG("s3",Aref,T,22,0,12));
|
||||||
List<VariantContext> supplTest1 = new ArrayList<>(1);
|
List<VariantContext> supplTest1 = new ArrayList<>(1);
|
||||||
supplTest1.add(new VariantContextBuilder(makeVC("2",Arrays.asList(Aref,ATC,ATCATC))).attribute(VCFConstants.MLE_ALLELE_COUNT_KEY,Arrays.asList(5,4)).attribute(VCFConstants.ALLELE_NUMBER_KEY,10).make());
|
supplTest1.add(new VariantContextBuilder(makeVC("2",Arrays.asList(Aref,ATC,ATCATC))).attribute(VCFConstants.MLE_ALLELE_COUNT_KEY,Arrays.asList(5,4)).attribute(VCFConstants.ALLELE_NUMBER_KEY,10).make());
|
||||||
VariantContext test1result = PosteriorLikelihoodsUtils.calculatePosteriorGLs(inputIndel,supplTest1,0,0.001,true,false,true);
|
VariantContext test1result = PosteriorLikelihoodsUtils.calculatePosteriorGLs(inputIndel,supplTest1,0,0.001,true,false,false);
|
||||||
|
|
||||||
|
|
||||||
System.out.println(test1result);
|
System.out.println(test1result);
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue