More pool caller cleanups: ove common duplicated code between Pool and Exact AF calculation models up to super-class to avoid duplication. TMP: Have pool genotypes include the GT field. Mostly because without genotypes we can't get the site-wide AF,AC annotations, but it's unwieldy because it makes the genotype columns very long, TBD final implementation
This commit is contained in:
parent
15e26fec04
commit
820216dc68
|
|
@ -26,11 +26,14 @@
|
|||
package org.broadinstitute.sting.gatk.walkers.genotyper;
|
||||
|
||||
import org.apache.log4j.Logger;
|
||||
import org.broadinstitute.sting.utils.MathUtils;
|
||||
import org.broadinstitute.sting.utils.variantcontext.Allele;
|
||||
import org.broadinstitute.sting.utils.variantcontext.Genotype;
|
||||
import org.broadinstitute.sting.utils.variantcontext.GenotypesContext;
|
||||
import org.broadinstitute.sting.utils.variantcontext.VariantContext;
|
||||
|
||||
import java.io.PrintStream;
|
||||
import java.util.ArrayList;
|
||||
import java.util.List;
|
||||
|
||||
|
||||
|
|
@ -63,6 +66,42 @@ public abstract class AlleleFrequencyCalculationModel implements Cloneable {
|
|||
this.verboseWriter = verboseWriter;
|
||||
}
|
||||
|
||||
/**
|
||||
* Wrapper class that compares two likelihoods associated with two alleles
|
||||
*/
|
||||
protected static final class LikelihoodSum implements Comparable<LikelihoodSum> {
|
||||
public double sum = 0.0;
|
||||
public Allele allele;
|
||||
|
||||
public LikelihoodSum(Allele allele) { this.allele = allele; }
|
||||
|
||||
public int compareTo(LikelihoodSum other) {
|
||||
final double diff = sum - other.sum;
|
||||
return ( diff < 0.0 ) ? 1 : (diff > 0.0 ) ? -1 : 0;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Unpack GenotypesContext into arraylist of doubel values
|
||||
* @param GLs Input genotype context
|
||||
* @return ArrayList of doubles corresponding to GL vectors
|
||||
*/
|
||||
protected static ArrayList<double[]> getGLs(GenotypesContext GLs) {
|
||||
ArrayList<double[]> genotypeLikelihoods = new ArrayList<double[]>(GLs.size());
|
||||
|
||||
genotypeLikelihoods.add(new double[]{0.0,0.0,0.0}); // dummy
|
||||
for ( Genotype sample : GLs.iterateInSampleNameOrder() ) {
|
||||
if ( sample.hasLikelihoods() ) {
|
||||
double[] gls = sample.getLikelihoods().getAsVector();
|
||||
|
||||
if ( MathUtils.sum(gls) < UnifiedGenotyperEngine.SUM_GL_THRESH_NOCALL )
|
||||
genotypeLikelihoods.add(gls);
|
||||
}
|
||||
}
|
||||
|
||||
return genotypeLikelihoods;
|
||||
}
|
||||
|
||||
/**
|
||||
* Must be overridden by concrete subclasses
|
||||
* @param vc variant context with alleles and genotype likelihoods
|
||||
|
|
|
|||
|
|
@ -56,7 +56,7 @@ public class ExactAFCalculationModel extends AlleleFrequencyCalculationModel {
|
|||
alleles = new ArrayList<Allele>(MAX_ALTERNATE_ALLELES_TO_GENOTYPE + 1);
|
||||
alleles.add(vc.getReference());
|
||||
alleles.addAll(chooseMostLikelyAlternateAlleles(vc, MAX_ALTERNATE_ALLELES_TO_GENOTYPE));
|
||||
GLs = VariantContextUtils.subsetAlleles(vc, alleles, false);
|
||||
GLs = VariantContextUtils.subsetDiploidAlleles(vc, alleles, false);
|
||||
}
|
||||
|
||||
linearExactMultiAllelic(GLs, alleles.size() - 1, log10AlleleFrequencyPriors, result);
|
||||
|
|
@ -64,17 +64,6 @@ public class ExactAFCalculationModel extends AlleleFrequencyCalculationModel {
|
|||
return alleles;
|
||||
}
|
||||
|
||||
private static final class LikelihoodSum implements Comparable<LikelihoodSum> {
|
||||
public double sum = 0.0;
|
||||
public Allele allele;
|
||||
|
||||
public LikelihoodSum(Allele allele) { this.allele = allele; }
|
||||
|
||||
public int compareTo(LikelihoodSum other) {
|
||||
final double diff = sum - other.sum;
|
||||
return ( diff < 0.0 ) ? 1 : (diff > 0.0 ) ? -1 : 0;
|
||||
}
|
||||
}
|
||||
|
||||
private static final int PL_INDEX_OF_HOM_REF = 0;
|
||||
private static final List<Allele> chooseMostLikelyAlternateAlleles(VariantContext vc, int numAllelesToChoose) {
|
||||
|
|
@ -112,22 +101,6 @@ public class ExactAFCalculationModel extends AlleleFrequencyCalculationModel {
|
|||
return orderedBestAlleles;
|
||||
}
|
||||
|
||||
private static final ArrayList<double[]> getGLs(GenotypesContext GLs) {
|
||||
ArrayList<double[]> genotypeLikelihoods = new ArrayList<double[]>(GLs.size());
|
||||
|
||||
genotypeLikelihoods.add(new double[]{0.0,0.0,0.0}); // dummy
|
||||
for ( Genotype sample : GLs.iterateInSampleNameOrder() ) {
|
||||
if ( sample.hasLikelihoods() ) {
|
||||
double[] gls = sample.getLikelihoods().getAsVector();
|
||||
|
||||
if ( MathUtils.sum(gls) < UnifiedGenotyperEngine.SUM_GL_THRESH_NOCALL )
|
||||
genotypeLikelihoods.add(gls);
|
||||
}
|
||||
}
|
||||
|
||||
return genotypeLikelihoods;
|
||||
}
|
||||
|
||||
// -------------------------------------------------------------------------------------
|
||||
//
|
||||
// Multi-allelic implementation.
|
||||
|
|
@ -450,7 +423,7 @@ public class ExactAFCalculationModel extends AlleleFrequencyCalculationModel {
|
|||
final List<Allele> allelesToUse,
|
||||
final boolean assignGenotypes,
|
||||
final int ploidy) {
|
||||
return VariantContextUtils.subsetAlleles(vc, allelesToUse, assignGenotypes);
|
||||
return VariantContextUtils.subsetDiploidAlleles(vc, allelesToUse, assignGenotypes);
|
||||
}
|
||||
|
||||
// -------------------------------------------------------------------------------------
|
||||
|
|
|
|||
|
|
@ -1076,8 +1076,8 @@ public class VariantContextUtils {
|
|||
* @param vc variant context with genotype likelihoods
|
||||
* @return genotypes context
|
||||
*/
|
||||
public static GenotypesContext assignGenotypes(final VariantContext vc) {
|
||||
return subsetAlleles(vc, vc.getAlleles(), true);
|
||||
public static GenotypesContext assignDiploidGenotypes(final VariantContext vc) {
|
||||
return subsetDiploidAlleles(vc, vc.getAlleles(), true);
|
||||
}
|
||||
|
||||
private static final List<Allele> NO_CALL_ALLELES = Arrays.asList(Allele.NO_CALL, Allele.NO_CALL);
|
||||
|
|
@ -1091,7 +1091,7 @@ public class VariantContextUtils {
|
|||
* @param assignGenotypes true if we should update the genotypes based on the (subsetted) PLs
|
||||
* @return genotypes
|
||||
*/
|
||||
public static GenotypesContext subsetAlleles(final VariantContext vc,
|
||||
public static GenotypesContext subsetDiploidAlleles(final VariantContext vc,
|
||||
final List<Allele> allelesToUse,
|
||||
final boolean assignGenotypes) {
|
||||
|
||||
|
|
@ -1170,7 +1170,7 @@ public class VariantContextUtils {
|
|||
if ( !assignGenotypes || MathUtils.sum(newLikelihoods) > SUM_GL_THRESH_NOCALL )
|
||||
newGTs.add(new Genotype(g.getSampleName(), NO_CALL_ALLELES, Genotype.NO_LOG10_PERROR, null, attrs, false));
|
||||
else
|
||||
newGTs.add(assignGenotype(g, newLikelihoods, allelesToUse, attrs));
|
||||
newGTs.add(assignDiploidGenotype(g, newLikelihoods, allelesToUse, attrs));
|
||||
}
|
||||
}
|
||||
|
||||
|
|
@ -1187,7 +1187,7 @@ public class VariantContextUtils {
|
|||
*
|
||||
* @return genotype
|
||||
*/
|
||||
private static Genotype assignGenotype(final Genotype originalGT, final double[] newLikelihoods, final List<Allele> allelesToUse, final Map<String, Object> attrs) {
|
||||
private static Genotype assignDiploidGenotype(final Genotype originalGT, final double[] newLikelihoods, final List<Allele> allelesToUse, final Map<String, Object> attrs) {
|
||||
final int numNewAltAlleles = allelesToUse.size() - 1;
|
||||
|
||||
// find the genotype with maximum likelihoods
|
||||
|
|
|
|||
Loading…
Reference in New Issue