More pool caller cleanups: ove common duplicated code between Pool and Exact AF calculation models up to super-class to avoid duplication. TMP: Have pool genotypes include the GT field. Mostly because without genotypes we can't get the site-wide AF,AC annotations, but it's unwieldy because it makes the genotype columns very long, TBD final implementation

This commit is contained in:
Guillermo del Angel 2012-04-04 16:23:10 -04:00
parent 15e26fec04
commit 820216dc68
3 changed files with 46 additions and 34 deletions

View File

@ -26,11 +26,14 @@
package org.broadinstitute.sting.gatk.walkers.genotyper;
import org.apache.log4j.Logger;
import org.broadinstitute.sting.utils.MathUtils;
import org.broadinstitute.sting.utils.variantcontext.Allele;
import org.broadinstitute.sting.utils.variantcontext.Genotype;
import org.broadinstitute.sting.utils.variantcontext.GenotypesContext;
import org.broadinstitute.sting.utils.variantcontext.VariantContext;
import java.io.PrintStream;
import java.util.ArrayList;
import java.util.List;
@ -63,6 +66,42 @@ public abstract class AlleleFrequencyCalculationModel implements Cloneable {
this.verboseWriter = verboseWriter;
}
/**
* Wrapper class that compares two likelihoods associated with two alleles
*/
protected static final class LikelihoodSum implements Comparable<LikelihoodSum> {
public double sum = 0.0;
public Allele allele;
public LikelihoodSum(Allele allele) { this.allele = allele; }
public int compareTo(LikelihoodSum other) {
final double diff = sum - other.sum;
return ( diff < 0.0 ) ? 1 : (diff > 0.0 ) ? -1 : 0;
}
}
/**
* Unpack GenotypesContext into arraylist of doubel values
* @param GLs Input genotype context
* @return ArrayList of doubles corresponding to GL vectors
*/
protected static ArrayList<double[]> getGLs(GenotypesContext GLs) {
ArrayList<double[]> genotypeLikelihoods = new ArrayList<double[]>(GLs.size());
genotypeLikelihoods.add(new double[]{0.0,0.0,0.0}); // dummy
for ( Genotype sample : GLs.iterateInSampleNameOrder() ) {
if ( sample.hasLikelihoods() ) {
double[] gls = sample.getLikelihoods().getAsVector();
if ( MathUtils.sum(gls) < UnifiedGenotyperEngine.SUM_GL_THRESH_NOCALL )
genotypeLikelihoods.add(gls);
}
}
return genotypeLikelihoods;
}
/**
* Must be overridden by concrete subclasses
* @param vc variant context with alleles and genotype likelihoods

View File

@ -56,7 +56,7 @@ public class ExactAFCalculationModel extends AlleleFrequencyCalculationModel {
alleles = new ArrayList<Allele>(MAX_ALTERNATE_ALLELES_TO_GENOTYPE + 1);
alleles.add(vc.getReference());
alleles.addAll(chooseMostLikelyAlternateAlleles(vc, MAX_ALTERNATE_ALLELES_TO_GENOTYPE));
GLs = VariantContextUtils.subsetAlleles(vc, alleles, false);
GLs = VariantContextUtils.subsetDiploidAlleles(vc, alleles, false);
}
linearExactMultiAllelic(GLs, alleles.size() - 1, log10AlleleFrequencyPriors, result);
@ -64,17 +64,6 @@ public class ExactAFCalculationModel extends AlleleFrequencyCalculationModel {
return alleles;
}
private static final class LikelihoodSum implements Comparable<LikelihoodSum> {
public double sum = 0.0;
public Allele allele;
public LikelihoodSum(Allele allele) { this.allele = allele; }
public int compareTo(LikelihoodSum other) {
final double diff = sum - other.sum;
return ( diff < 0.0 ) ? 1 : (diff > 0.0 ) ? -1 : 0;
}
}
private static final int PL_INDEX_OF_HOM_REF = 0;
private static final List<Allele> chooseMostLikelyAlternateAlleles(VariantContext vc, int numAllelesToChoose) {
@ -112,22 +101,6 @@ public class ExactAFCalculationModel extends AlleleFrequencyCalculationModel {
return orderedBestAlleles;
}
private static final ArrayList<double[]> getGLs(GenotypesContext GLs) {
ArrayList<double[]> genotypeLikelihoods = new ArrayList<double[]>(GLs.size());
genotypeLikelihoods.add(new double[]{0.0,0.0,0.0}); // dummy
for ( Genotype sample : GLs.iterateInSampleNameOrder() ) {
if ( sample.hasLikelihoods() ) {
double[] gls = sample.getLikelihoods().getAsVector();
if ( MathUtils.sum(gls) < UnifiedGenotyperEngine.SUM_GL_THRESH_NOCALL )
genotypeLikelihoods.add(gls);
}
}
return genotypeLikelihoods;
}
// -------------------------------------------------------------------------------------
//
// Multi-allelic implementation.
@ -450,7 +423,7 @@ public class ExactAFCalculationModel extends AlleleFrequencyCalculationModel {
final List<Allele> allelesToUse,
final boolean assignGenotypes,
final int ploidy) {
return VariantContextUtils.subsetAlleles(vc, allelesToUse, assignGenotypes);
return VariantContextUtils.subsetDiploidAlleles(vc, allelesToUse, assignGenotypes);
}
// -------------------------------------------------------------------------------------

View File

@ -1076,8 +1076,8 @@ public class VariantContextUtils {
* @param vc variant context with genotype likelihoods
* @return genotypes context
*/
public static GenotypesContext assignGenotypes(final VariantContext vc) {
return subsetAlleles(vc, vc.getAlleles(), true);
public static GenotypesContext assignDiploidGenotypes(final VariantContext vc) {
return subsetDiploidAlleles(vc, vc.getAlleles(), true);
}
private static final List<Allele> NO_CALL_ALLELES = Arrays.asList(Allele.NO_CALL, Allele.NO_CALL);
@ -1091,7 +1091,7 @@ public class VariantContextUtils {
* @param assignGenotypes true if we should update the genotypes based on the (subsetted) PLs
* @return genotypes
*/
public static GenotypesContext subsetAlleles(final VariantContext vc,
public static GenotypesContext subsetDiploidAlleles(final VariantContext vc,
final List<Allele> allelesToUse,
final boolean assignGenotypes) {
@ -1170,7 +1170,7 @@ public class VariantContextUtils {
if ( !assignGenotypes || MathUtils.sum(newLikelihoods) > SUM_GL_THRESH_NOCALL )
newGTs.add(new Genotype(g.getSampleName(), NO_CALL_ALLELES, Genotype.NO_LOG10_PERROR, null, attrs, false));
else
newGTs.add(assignGenotype(g, newLikelihoods, allelesToUse, attrs));
newGTs.add(assignDiploidGenotype(g, newLikelihoods, allelesToUse, attrs));
}
}
@ -1187,7 +1187,7 @@ public class VariantContextUtils {
*
* @return genotype
*/
private static Genotype assignGenotype(final Genotype originalGT, final double[] newLikelihoods, final List<Allele> allelesToUse, final Map<String, Object> attrs) {
private static Genotype assignDiploidGenotype(final Genotype originalGT, final double[] newLikelihoods, final List<Allele> allelesToUse, final Map<String, Object> attrs) {
final int numNewAltAlleles = allelesToUse.size() - 1;
// find the genotype with maximum likelihoods