Major refactor/completion of new Pool Caller under UnifiedGenotyper framework. PoolAFCalculationModel implements new math to combine pools - correct, but still O(N^2) and not complete yet for multiallelics. Pool likelihoods are better encapsulated and kept in an internal hashmap from int[] -> double for space efficiency (likelihoods can be big for pool calls when in initial discovery mode with 4 alleles). Maybe need several iterations of optimization to make it runnable at large scale. Still need to correct function chooseMostLikelyAlternateAlleles before full runs can be produced.
This commit is contained in:
parent
4075bc7d3d
commit
9e11b4f9a7
|
|
@ -27,6 +27,7 @@ package org.broadinstitute.sting.gatk.walkers.genotyper;
|
||||||
|
|
||||||
import org.apache.log4j.Logger;
|
import org.apache.log4j.Logger;
|
||||||
import org.broadinstitute.sting.utils.variantcontext.Allele;
|
import org.broadinstitute.sting.utils.variantcontext.Allele;
|
||||||
|
import org.broadinstitute.sting.utils.variantcontext.GenotypesContext;
|
||||||
import org.broadinstitute.sting.utils.variantcontext.VariantContext;
|
import org.broadinstitute.sting.utils.variantcontext.VariantContext;
|
||||||
|
|
||||||
import java.io.PrintStream;
|
import java.io.PrintStream;
|
||||||
|
|
@ -72,4 +73,17 @@ public abstract class AlleleFrequencyCalculationModel implements Cloneable {
|
||||||
protected abstract List<Allele> getLog10PNonRef(final VariantContext vc,
|
protected abstract List<Allele> getLog10PNonRef(final VariantContext vc,
|
||||||
final double[] log10AlleleFrequencyPriors,
|
final double[] log10AlleleFrequencyPriors,
|
||||||
final AlleleFrequencyCalculationResult result);
|
final AlleleFrequencyCalculationResult result);
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Must be overridden by concrete subclasses
|
||||||
|
* @param vc variant context with alleles and genotype likelihoods
|
||||||
|
* @param allelesToUse alleles to subset
|
||||||
|
* @param assignGenotypes
|
||||||
|
* @param ploidy
|
||||||
|
* @return GenotypesContext object
|
||||||
|
*/
|
||||||
|
protected abstract GenotypesContext subsetAlleles(final VariantContext vc,
|
||||||
|
final List<Allele> allelesToUse,
|
||||||
|
final boolean assignGenotypes,
|
||||||
|
final int ploidy);
|
||||||
}
|
}
|
||||||
|
|
@ -446,6 +446,12 @@ public class ExactAFCalculationModel extends AlleleFrequencyCalculationModel {
|
||||||
return coeff;
|
return coeff;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
public GenotypesContext subsetAlleles(final VariantContext vc,
|
||||||
|
final List<Allele> allelesToUse,
|
||||||
|
final boolean assignGenotypes,
|
||||||
|
final int ploidy) {
|
||||||
|
return VariantContextUtils.subsetAlleles(vc, allelesToUse, assignGenotypes);
|
||||||
|
}
|
||||||
|
|
||||||
// -------------------------------------------------------------------------------------
|
// -------------------------------------------------------------------------------------
|
||||||
//
|
//
|
||||||
|
|
|
||||||
|
|
@ -357,7 +357,7 @@ public class UnifiedGenotyperEngine {
|
||||||
}
|
}
|
||||||
|
|
||||||
// create the genotypes
|
// create the genotypes
|
||||||
final GenotypesContext genotypes = VariantContextUtils.subsetAlleles(vc, myAlleles, true);
|
final GenotypesContext genotypes = afcm.get().subsetAlleles(vc, myAlleles, true,ploidy);
|
||||||
|
|
||||||
// print out stats if we have a writer
|
// print out stats if we have a writer
|
||||||
if ( verboseWriter != null && !limitedContext )
|
if ( verboseWriter != null && !limitedContext )
|
||||||
|
|
|
||||||
|
|
@ -231,7 +231,7 @@ public class GenotypeLikelihoods {
|
||||||
private final static GenotypeLikelihoodsAllelePair[] PLIndexToAlleleIndex = calculatePLcache(MAX_ALLELES_THAT_CAN_BE_GENOTYPED); // start with data for 10 alternate alleles
|
private final static GenotypeLikelihoodsAllelePair[] PLIndexToAlleleIndex = calculatePLcache(MAX_ALLELES_THAT_CAN_BE_GENOTYPED); // start with data for 10 alternate alleles
|
||||||
|
|
||||||
private static GenotypeLikelihoodsAllelePair[] calculatePLcache(final int altAlleles) {
|
private static GenotypeLikelihoodsAllelePair[] calculatePLcache(final int altAlleles) {
|
||||||
final int numLikelihoods = calculateNumLikelihoods(altAlleles);
|
final int numLikelihoods = calculateNumLikelihoods(1+altAlleles, 2);
|
||||||
final GenotypeLikelihoodsAllelePair[] cache = new GenotypeLikelihoodsAllelePair[numLikelihoods];
|
final GenotypeLikelihoodsAllelePair[] cache = new GenotypeLikelihoodsAllelePair[numLikelihoods];
|
||||||
|
|
||||||
// for all possible combinations of 2 alleles
|
// for all possible combinations of 2 alleles
|
||||||
|
|
@ -251,11 +251,22 @@ public class GenotypeLikelihoods {
|
||||||
}
|
}
|
||||||
|
|
||||||
// how many likelihoods are associated with the given number of alternate alleles?
|
// how many likelihoods are associated with the given number of alternate alleles?
|
||||||
public static int calculateNumLikelihoods(final int numAltAlleles) {
|
public static int calculateNumLikelihoods(final int numAlleles, final int ploidy) {
|
||||||
int numLikelihoods = 1;
|
|
||||||
|
if (numAlleles == 1)
|
||||||
|
return 1;
|
||||||
|
else if (ploidy == 1)
|
||||||
|
return numAlleles;
|
||||||
|
|
||||||
|
int acc =0;
|
||||||
|
for (int k=0; k <= ploidy; k++ )
|
||||||
|
acc += calculateNumLikelihoods(numAlleles-1, ploidy-k);
|
||||||
|
|
||||||
|
return acc;
|
||||||
|
/* int numLikelihoods = 1;
|
||||||
for ( int i = 1; i <= numAltAlleles; i++ )
|
for ( int i = 1; i <= numAltAlleles; i++ )
|
||||||
numLikelihoods += i + 1;
|
numLikelihoods += i + 1;
|
||||||
return numLikelihoods;
|
return numLikelihoods; */
|
||||||
}
|
}
|
||||||
|
|
||||||
// As per the VCF spec: "the ordering of genotypes for the likelihoods is given by: F(j/k) = (k*(k+1)/2)+j.
|
// As per the VCF spec: "the ordering of genotypes for the likelihoods is given by: F(j/k) = (k*(k+1)/2)+j.
|
||||||
|
|
|
||||||
|
|
@ -30,6 +30,7 @@ import org.apache.commons.jexl2.JexlEngine;
|
||||||
import org.apache.log4j.Logger;
|
import org.apache.log4j.Logger;
|
||||||
import org.broad.tribble.util.popgen.HardyWeinbergCalculation;
|
import org.broad.tribble.util.popgen.HardyWeinbergCalculation;
|
||||||
import org.broadinstitute.sting.commandline.Hidden;
|
import org.broadinstitute.sting.commandline.Hidden;
|
||||||
|
import org.broadinstitute.sting.gatk.walkers.genotyper.UnifiedGenotyperEngine;
|
||||||
import org.broadinstitute.sting.utils.*;
|
import org.broadinstitute.sting.utils.*;
|
||||||
import org.broadinstitute.sting.utils.codecs.vcf.AbstractVCFCodec;
|
import org.broadinstitute.sting.utils.codecs.vcf.AbstractVCFCodec;
|
||||||
import org.broadinstitute.sting.utils.codecs.vcf.VCFConstants;
|
import org.broadinstitute.sting.utils.codecs.vcf.VCFConstants;
|
||||||
|
|
@ -119,7 +120,7 @@ public class VariantContextUtils {
|
||||||
builder.attributes(attrs);
|
builder.attributes(attrs);
|
||||||
}
|
}
|
||||||
|
|
||||||
private static String makePrecisionFormatStringFromDenominatorValue(double maxValue) {
|
public static String makePrecisionFormatStringFromDenominatorValue(double maxValue) {
|
||||||
int precision = 1;
|
int precision = 1;
|
||||||
|
|
||||||
while ( maxValue > 1 ) {
|
while ( maxValue > 1 ) {
|
||||||
|
|
@ -1116,7 +1117,7 @@ public class VariantContextUtils {
|
||||||
altAlleleIndexToUse[i] = true;
|
altAlleleIndexToUse[i] = true;
|
||||||
}
|
}
|
||||||
|
|
||||||
final int numLikelihoods = GenotypeLikelihoods.calculateNumLikelihoods(numOriginalAltAlleles);
|
final int numLikelihoods = GenotypeLikelihoods.calculateNumLikelihoods(numOriginalAltAlleles, UnifiedGenotyperEngine.DEFAULT_PLOIDY);
|
||||||
for ( int PLindex = 0; PLindex < numLikelihoods; PLindex++ ) {
|
for ( int PLindex = 0; PLindex < numLikelihoods; PLindex++ ) {
|
||||||
final GenotypeLikelihoods.GenotypeLikelihoodsAllelePair alleles = GenotypeLikelihoods.getAllelePair(PLindex);
|
final GenotypeLikelihoods.GenotypeLikelihoodsAllelePair alleles = GenotypeLikelihoods.getAllelePair(PLindex);
|
||||||
// consider this entry only if both of the alleles are good
|
// consider this entry only if both of the alleles are good
|
||||||
|
|
|
||||||
|
|
@ -84,7 +84,7 @@ public abstract class BaseTest {
|
||||||
public static final String hg19Intervals = intervalsLocation + "whole_exome_agilent_1.1_refseq_plus_3_boosters.Homo_sapiens_assembly19.targets.interval_list";
|
public static final String hg19Intervals = intervalsLocation + "whole_exome_agilent_1.1_refseq_plus_3_boosters.Homo_sapiens_assembly19.targets.interval_list";
|
||||||
public static final String hg19Chr20Intervals = intervalsLocation + "whole_exome_agilent_1.1_refseq_plus_3_boosters.Homo_sapiens_assembly19.targets.chr20.interval_list";
|
public static final String hg19Chr20Intervals = intervalsLocation + "whole_exome_agilent_1.1_refseq_plus_3_boosters.Homo_sapiens_assembly19.targets.chr20.interval_list";
|
||||||
|
|
||||||
public static final boolean REQUIRE_NETWORK_CONNECTION = true;
|
public static final boolean REQUIRE_NETWORK_CONNECTION = false;
|
||||||
public static final String networkTempDir;
|
public static final String networkTempDir;
|
||||||
public static final File networkTempDirFile;
|
public static final File networkTempDirFile;
|
||||||
|
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue