More pool caller cleanups: ove common duplicated code between Pool and Exact AF calculation models up to super-class to avoid duplication. TMP: Have pool genotypes include the GT field. Mostly because without genotypes we can't get the site-wide AF,AC annotations, but it's unwieldy because it makes the genotype columns very long, TBD final implementation
This commit is contained in:
parent
15e26fec04
commit
820216dc68
|
|
@ -26,11 +26,14 @@
|
||||||
package org.broadinstitute.sting.gatk.walkers.genotyper;
|
package org.broadinstitute.sting.gatk.walkers.genotyper;
|
||||||
|
|
||||||
import org.apache.log4j.Logger;
|
import org.apache.log4j.Logger;
|
||||||
|
import org.broadinstitute.sting.utils.MathUtils;
|
||||||
import org.broadinstitute.sting.utils.variantcontext.Allele;
|
import org.broadinstitute.sting.utils.variantcontext.Allele;
|
||||||
|
import org.broadinstitute.sting.utils.variantcontext.Genotype;
|
||||||
import org.broadinstitute.sting.utils.variantcontext.GenotypesContext;
|
import org.broadinstitute.sting.utils.variantcontext.GenotypesContext;
|
||||||
import org.broadinstitute.sting.utils.variantcontext.VariantContext;
|
import org.broadinstitute.sting.utils.variantcontext.VariantContext;
|
||||||
|
|
||||||
import java.io.PrintStream;
|
import java.io.PrintStream;
|
||||||
|
import java.util.ArrayList;
|
||||||
import java.util.List;
|
import java.util.List;
|
||||||
|
|
||||||
|
|
||||||
|
|
@ -63,6 +66,42 @@ public abstract class AlleleFrequencyCalculationModel implements Cloneable {
|
||||||
this.verboseWriter = verboseWriter;
|
this.verboseWriter = verboseWriter;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Wrapper class that compares two likelihoods associated with two alleles
|
||||||
|
*/
|
||||||
|
protected static final class LikelihoodSum implements Comparable<LikelihoodSum> {
|
||||||
|
public double sum = 0.0;
|
||||||
|
public Allele allele;
|
||||||
|
|
||||||
|
public LikelihoodSum(Allele allele) { this.allele = allele; }
|
||||||
|
|
||||||
|
public int compareTo(LikelihoodSum other) {
|
||||||
|
final double diff = sum - other.sum;
|
||||||
|
return ( diff < 0.0 ) ? 1 : (diff > 0.0 ) ? -1 : 0;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Unpack GenotypesContext into arraylist of doubel values
|
||||||
|
* @param GLs Input genotype context
|
||||||
|
* @return ArrayList of doubles corresponding to GL vectors
|
||||||
|
*/
|
||||||
|
protected static ArrayList<double[]> getGLs(GenotypesContext GLs) {
|
||||||
|
ArrayList<double[]> genotypeLikelihoods = new ArrayList<double[]>(GLs.size());
|
||||||
|
|
||||||
|
genotypeLikelihoods.add(new double[]{0.0,0.0,0.0}); // dummy
|
||||||
|
for ( Genotype sample : GLs.iterateInSampleNameOrder() ) {
|
||||||
|
if ( sample.hasLikelihoods() ) {
|
||||||
|
double[] gls = sample.getLikelihoods().getAsVector();
|
||||||
|
|
||||||
|
if ( MathUtils.sum(gls) < UnifiedGenotyperEngine.SUM_GL_THRESH_NOCALL )
|
||||||
|
genotypeLikelihoods.add(gls);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return genotypeLikelihoods;
|
||||||
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Must be overridden by concrete subclasses
|
* Must be overridden by concrete subclasses
|
||||||
* @param vc variant context with alleles and genotype likelihoods
|
* @param vc variant context with alleles and genotype likelihoods
|
||||||
|
|
|
||||||
|
|
@ -56,7 +56,7 @@ public class ExactAFCalculationModel extends AlleleFrequencyCalculationModel {
|
||||||
alleles = new ArrayList<Allele>(MAX_ALTERNATE_ALLELES_TO_GENOTYPE + 1);
|
alleles = new ArrayList<Allele>(MAX_ALTERNATE_ALLELES_TO_GENOTYPE + 1);
|
||||||
alleles.add(vc.getReference());
|
alleles.add(vc.getReference());
|
||||||
alleles.addAll(chooseMostLikelyAlternateAlleles(vc, MAX_ALTERNATE_ALLELES_TO_GENOTYPE));
|
alleles.addAll(chooseMostLikelyAlternateAlleles(vc, MAX_ALTERNATE_ALLELES_TO_GENOTYPE));
|
||||||
GLs = VariantContextUtils.subsetAlleles(vc, alleles, false);
|
GLs = VariantContextUtils.subsetDiploidAlleles(vc, alleles, false);
|
||||||
}
|
}
|
||||||
|
|
||||||
linearExactMultiAllelic(GLs, alleles.size() - 1, log10AlleleFrequencyPriors, result);
|
linearExactMultiAllelic(GLs, alleles.size() - 1, log10AlleleFrequencyPriors, result);
|
||||||
|
|
@ -64,17 +64,6 @@ public class ExactAFCalculationModel extends AlleleFrequencyCalculationModel {
|
||||||
return alleles;
|
return alleles;
|
||||||
}
|
}
|
||||||
|
|
||||||
private static final class LikelihoodSum implements Comparable<LikelihoodSum> {
|
|
||||||
public double sum = 0.0;
|
|
||||||
public Allele allele;
|
|
||||||
|
|
||||||
public LikelihoodSum(Allele allele) { this.allele = allele; }
|
|
||||||
|
|
||||||
public int compareTo(LikelihoodSum other) {
|
|
||||||
final double diff = sum - other.sum;
|
|
||||||
return ( diff < 0.0 ) ? 1 : (diff > 0.0 ) ? -1 : 0;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
private static final int PL_INDEX_OF_HOM_REF = 0;
|
private static final int PL_INDEX_OF_HOM_REF = 0;
|
||||||
private static final List<Allele> chooseMostLikelyAlternateAlleles(VariantContext vc, int numAllelesToChoose) {
|
private static final List<Allele> chooseMostLikelyAlternateAlleles(VariantContext vc, int numAllelesToChoose) {
|
||||||
|
|
@ -112,22 +101,6 @@ public class ExactAFCalculationModel extends AlleleFrequencyCalculationModel {
|
||||||
return orderedBestAlleles;
|
return orderedBestAlleles;
|
||||||
}
|
}
|
||||||
|
|
||||||
private static final ArrayList<double[]> getGLs(GenotypesContext GLs) {
|
|
||||||
ArrayList<double[]> genotypeLikelihoods = new ArrayList<double[]>(GLs.size());
|
|
||||||
|
|
||||||
genotypeLikelihoods.add(new double[]{0.0,0.0,0.0}); // dummy
|
|
||||||
for ( Genotype sample : GLs.iterateInSampleNameOrder() ) {
|
|
||||||
if ( sample.hasLikelihoods() ) {
|
|
||||||
double[] gls = sample.getLikelihoods().getAsVector();
|
|
||||||
|
|
||||||
if ( MathUtils.sum(gls) < UnifiedGenotyperEngine.SUM_GL_THRESH_NOCALL )
|
|
||||||
genotypeLikelihoods.add(gls);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
return genotypeLikelihoods;
|
|
||||||
}
|
|
||||||
|
|
||||||
// -------------------------------------------------------------------------------------
|
// -------------------------------------------------------------------------------------
|
||||||
//
|
//
|
||||||
// Multi-allelic implementation.
|
// Multi-allelic implementation.
|
||||||
|
|
@ -450,7 +423,7 @@ public class ExactAFCalculationModel extends AlleleFrequencyCalculationModel {
|
||||||
final List<Allele> allelesToUse,
|
final List<Allele> allelesToUse,
|
||||||
final boolean assignGenotypes,
|
final boolean assignGenotypes,
|
||||||
final int ploidy) {
|
final int ploidy) {
|
||||||
return VariantContextUtils.subsetAlleles(vc, allelesToUse, assignGenotypes);
|
return VariantContextUtils.subsetDiploidAlleles(vc, allelesToUse, assignGenotypes);
|
||||||
}
|
}
|
||||||
|
|
||||||
// -------------------------------------------------------------------------------------
|
// -------------------------------------------------------------------------------------
|
||||||
|
|
|
||||||
|
|
@ -1076,8 +1076,8 @@ public class VariantContextUtils {
|
||||||
* @param vc variant context with genotype likelihoods
|
* @param vc variant context with genotype likelihoods
|
||||||
* @return genotypes context
|
* @return genotypes context
|
||||||
*/
|
*/
|
||||||
public static GenotypesContext assignGenotypes(final VariantContext vc) {
|
public static GenotypesContext assignDiploidGenotypes(final VariantContext vc) {
|
||||||
return subsetAlleles(vc, vc.getAlleles(), true);
|
return subsetDiploidAlleles(vc, vc.getAlleles(), true);
|
||||||
}
|
}
|
||||||
|
|
||||||
private static final List<Allele> NO_CALL_ALLELES = Arrays.asList(Allele.NO_CALL, Allele.NO_CALL);
|
private static final List<Allele> NO_CALL_ALLELES = Arrays.asList(Allele.NO_CALL, Allele.NO_CALL);
|
||||||
|
|
@ -1091,7 +1091,7 @@ public class VariantContextUtils {
|
||||||
* @param assignGenotypes true if we should update the genotypes based on the (subsetted) PLs
|
* @param assignGenotypes true if we should update the genotypes based on the (subsetted) PLs
|
||||||
* @return genotypes
|
* @return genotypes
|
||||||
*/
|
*/
|
||||||
public static GenotypesContext subsetAlleles(final VariantContext vc,
|
public static GenotypesContext subsetDiploidAlleles(final VariantContext vc,
|
||||||
final List<Allele> allelesToUse,
|
final List<Allele> allelesToUse,
|
||||||
final boolean assignGenotypes) {
|
final boolean assignGenotypes) {
|
||||||
|
|
||||||
|
|
@ -1170,7 +1170,7 @@ public class VariantContextUtils {
|
||||||
if ( !assignGenotypes || MathUtils.sum(newLikelihoods) > SUM_GL_THRESH_NOCALL )
|
if ( !assignGenotypes || MathUtils.sum(newLikelihoods) > SUM_GL_THRESH_NOCALL )
|
||||||
newGTs.add(new Genotype(g.getSampleName(), NO_CALL_ALLELES, Genotype.NO_LOG10_PERROR, null, attrs, false));
|
newGTs.add(new Genotype(g.getSampleName(), NO_CALL_ALLELES, Genotype.NO_LOG10_PERROR, null, attrs, false));
|
||||||
else
|
else
|
||||||
newGTs.add(assignGenotype(g, newLikelihoods, allelesToUse, attrs));
|
newGTs.add(assignDiploidGenotype(g, newLikelihoods, allelesToUse, attrs));
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
@ -1187,7 +1187,7 @@ public class VariantContextUtils {
|
||||||
*
|
*
|
||||||
* @return genotype
|
* @return genotype
|
||||||
*/
|
*/
|
||||||
private static Genotype assignGenotype(final Genotype originalGT, final double[] newLikelihoods, final List<Allele> allelesToUse, final Map<String, Object> attrs) {
|
private static Genotype assignDiploidGenotype(final Genotype originalGT, final double[] newLikelihoods, final List<Allele> allelesToUse, final Map<String, Object> attrs) {
|
||||||
final int numNewAltAlleles = allelesToUse.size() - 1;
|
final int numNewAltAlleles = allelesToUse.size() - 1;
|
||||||
|
|
||||||
// find the genotype with maximum likelihoods
|
// find the genotype with maximum likelihoods
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue