Simple cleanup of pool caller code - since usage is much more general than just calling pools, AF calculation models and GL calculation models are renamed from Pool -> GeneralPloidy. Also, don't have users specify special arguments for -glm and -pnrm. Instead, when running UG with sample ploidy != 2, the correct general ploidy modules are automatically detected and loaded. -glm now reverts to old [SNP|INDEL|BOTH] usage

This commit is contained in:
Guillermo del Angel 2012-07-31 16:34:20 -04:00
parent f7133ffc31
commit 4a23f3cd11
14 changed files with 93 additions and 90 deletions

View File

@ -34,7 +34,7 @@ import org.broadinstitute.sting.utils.variantcontext.*;
import java.io.PrintStream;
import java.util.*;
public class PoolAFCalculationModel extends AlleleFrequencyCalculationModel {
public class GeneralPloidyExactAFCalculationModel extends AlleleFrequencyCalculationModel {
static final int MAX_LENGTH_FOR_POOL_PL_LOGGING = 10; // if PL vectors longer than this # of elements, don't log them
final protected UnifiedArgumentCollection UAC;
@ -42,7 +42,7 @@ public class PoolAFCalculationModel extends AlleleFrequencyCalculationModel {
private final static double MAX_LOG10_ERROR_TO_STOP_EARLY = 6; // we want the calculation to be accurate to 1 / 10^6
private final static boolean VERBOSE = false;
protected PoolAFCalculationModel(UnifiedArgumentCollection UAC, int N, Logger logger, PrintStream verboseWriter) {
protected GeneralPloidyExactAFCalculationModel(UnifiedArgumentCollection UAC, int N, Logger logger, PrintStream verboseWriter) {
super(UAC, N, logger, verboseWriter);
ploidy = UAC.samplePloidy;
this.UAC = UAC;
@ -140,7 +140,7 @@ public class PoolAFCalculationModel extends AlleleFrequencyCalculationModel {
for ( final double[] likelihoods : GLs ) {
final int PLindexOfBestGL = MathUtils.maxElementIndex(likelihoods);
final int[] acCount = PoolGenotypeLikelihoods.getAlleleCountFromPLIndex(1+numOriginalAltAlleles,ploidy,PLindexOfBestGL);
final int[] acCount = GeneralPloidyGenotypeLikelihoods.getAlleleCountFromPLIndex(1 + numOriginalAltAlleles, ploidy, PLindexOfBestGL);
// by convention, first count coming from getAlleleCountFromPLIndex comes from reference allele
for (int k=1; k < acCount.length;k++) {
if (acCount[k] > 0)
@ -238,7 +238,7 @@ public class PoolAFCalculationModel extends AlleleFrequencyCalculationModel {
return newPool;
}
// todo - refactor, function almost identical except for log10LofK computation in PoolGenotypeLikelihoods
// todo - refactor, function almost identical except for log10LofK computation in GeneralPloidyGenotypeLikelihoods
/**
*
* @param set ExactACset holding conformation to be computed
@ -301,7 +301,7 @@ public class PoolAFCalculationModel extends AlleleFrequencyCalculationModel {
continue;
PoolGenotypeLikelihoods.updateACset(ACcountsClone, ACqueue, indexesToACset);
GeneralPloidyGenotypeLikelihoods.updateACset(ACcountsClone, ACqueue, indexesToACset);
}
@ -341,14 +341,14 @@ public class PoolAFCalculationModel extends AlleleFrequencyCalculationModel {
// Say L1(K) = Pr(D|AC1=K) * choose(m1,K)
// and L2(K) = Pr(D|AC2=K) * choose(m2,K)
PoolGenotypeLikelihoods.SumIterator firstIterator = new PoolGenotypeLikelihoods.SumIterator(numAlleles,ploidy1);
GeneralPloidyGenotypeLikelihoods.SumIterator firstIterator = new GeneralPloidyGenotypeLikelihoods.SumIterator(numAlleles,ploidy1);
final double[] x = originalPool.getLikelihoodsAsVector(true);
while(firstIterator.hasNext()) {
x[firstIterator.getLinearIndex()] += MathUtils.log10MultinomialCoefficient(ploidy1,firstIterator.getCurrentVector());
firstIterator.next();
}
PoolGenotypeLikelihoods.SumIterator secondIterator = new PoolGenotypeLikelihoods.SumIterator(numAlleles,ploidy2);
GeneralPloidyGenotypeLikelihoods.SumIterator secondIterator = new GeneralPloidyGenotypeLikelihoods.SumIterator(numAlleles,ploidy2);
final double[] y = yy.clone();
while(secondIterator.hasNext()) {
y[secondIterator.getLinearIndex()] += MathUtils.log10MultinomialCoefficient(ploidy2,secondIterator.getCurrentVector());
@ -357,7 +357,7 @@ public class PoolAFCalculationModel extends AlleleFrequencyCalculationModel {
// initialize output to -log10(choose(m1+m2,[k1 k2...])
final int outputDim = GenotypeLikelihoods.numLikelihoods(numAlleles, newPloidy);
final PoolGenotypeLikelihoods.SumIterator outputIterator = new PoolGenotypeLikelihoods.SumIterator(numAlleles,newPloidy);
final GeneralPloidyGenotypeLikelihoods.SumIterator outputIterator = new GeneralPloidyGenotypeLikelihoods.SumIterator(numAlleles,newPloidy);
// Now, result(K) = logSum_G (L1(G)+L2(K-G)) where G are all possible vectors that sum UP to K
@ -419,7 +419,7 @@ public class PoolAFCalculationModel extends AlleleFrequencyCalculationModel {
double denom = -MathUtils.log10MultinomialCoefficient(newPloidy, currentCount);
// for current conformation, get all possible ways to break vector K into two components G1 and G2
final PoolGenotypeLikelihoods.SumIterator innerIterator = new PoolGenotypeLikelihoods.SumIterator(numAlleles,ploidy2);
final GeneralPloidyGenotypeLikelihoods.SumIterator innerIterator = new GeneralPloidyGenotypeLikelihoods.SumIterator(numAlleles,ploidy2);
set.log10Likelihoods[0] = Double.NEGATIVE_INFINITY;
while (innerIterator.hasNext()) {
// check if breaking current conformation into g1 and g2 is feasible.
@ -617,7 +617,7 @@ public class PoolAFCalculationModel extends AlleleFrequencyCalculationModel {
if ( numOriginalAltAlleles == numNewAltAlleles) {
newLikelihoods = originalLikelihoods;
} else {
newLikelihoods = PoolGenotypeLikelihoods.subsetToAlleles(originalLikelihoods, ploidy, vc.getAlleles(),allelesToUse);
newLikelihoods = GeneralPloidyGenotypeLikelihoods.subsetToAlleles(originalLikelihoods, ploidy, vc.getAlleles(), allelesToUse);
// might need to re-normalize
newLikelihoods = MathUtils.normalizeFromLog10(newLikelihoods, false, true);
@ -668,7 +668,7 @@ public class PoolAFCalculationModel extends AlleleFrequencyCalculationModel {
// find the genotype with maximum likelihoods
final int PLindex = numNewAltAlleles == 0 ? 0 : MathUtils.maxElementIndex(newLikelihoods);
final int[] mlAlleleCount = PoolGenotypeLikelihoods.getAlleleCountFromPLIndex(allelesToUse.size(), numChromosomes, PLindex);
final int[] mlAlleleCount = GeneralPloidyGenotypeLikelihoods.getAlleleCountFromPLIndex(allelesToUse.size(), numChromosomes, PLindex);
final ArrayList<Double> alleleFreqs = new ArrayList<Double>();
final ArrayList<Integer> alleleCounts = new ArrayList<Integer>();

View File

@ -37,7 +37,7 @@ import org.broadinstitute.sting.utils.variantcontext.GenotypeLikelihoods;
import java.util.*;
public abstract class PoolGenotypeLikelihoods {
public abstract class GeneralPloidyGenotypeLikelihoods {
protected final int numChromosomes;
private final static double MAX_LOG10_ERROR_TO_STOP_EARLY = 6; // we want the calculation to be accurate to 1 / 10^6
@ -67,8 +67,8 @@ public abstract class PoolGenotypeLikelihoods {
private static final boolean FAST_GL_COMPUTATION = true;
// constructor with given logPL elements
public PoolGenotypeLikelihoods(final List<Allele> alleles, final double[] logLikelihoods, final int ploidy,
final HashMap<String, ErrorModel> perLaneErrorModels, final boolean ignoreLaneInformation) {
public GeneralPloidyGenotypeLikelihoods(final List<Allele> alleles, final double[] logLikelihoods, final int ploidy,
final HashMap<String, ErrorModel> perLaneErrorModels, final boolean ignoreLaneInformation) {
this.alleles = alleles;
this.nAlleles = alleles.size();
numChromosomes = ploidy;
@ -101,7 +101,7 @@ public abstract class PoolGenotypeLikelihoods {
Arrays.fill(log10Likelihoods, MIN_LIKELIHOOD);
} else {
if (logLikelihoods.length != likelihoodDim)
throw new ReviewedStingException("BUG: inconsistent parameters when creating PoolGenotypeLikelihoods object");
throw new ReviewedStingException("BUG: inconsistent parameters when creating GeneralPloidyGenotypeLikelihoods object");
log10Likelihoods = logLikelihoods; //.clone(); // is clone needed?
}
@ -174,7 +174,7 @@ public abstract class PoolGenotypeLikelihoods {
final int numAlleles = currentState.length;
final int ploidy = restrictSumTo;
linearIndex = PoolGenotypeLikelihoods.getLinearIndex(stateVector, numAlleles, ploidy);
linearIndex = GeneralPloidyGenotypeLikelihoods.getLinearIndex(stateVector, numAlleles, ploidy);
}
else
throw new ReviewedStingException("BUG: Not supported");
@ -308,7 +308,7 @@ public abstract class PoolGenotypeLikelihoods {
public static double[] subsetToAlleles(final double[] oldLikelihoods, final int numChromosomes,
final List<Allele> originalAlleles, final List<Allele> allelesToSubset) {
int newPLSize = PoolGenotypeLikelihoods.getNumLikelihoodElements(allelesToSubset.size(), numChromosomes);
int newPLSize = GeneralPloidyGenotypeLikelihoods.getNumLikelihoodElements(allelesToSubset.size(), numChromosomes);
double[] newPLs = new double[newPLSize];
@ -357,7 +357,7 @@ public abstract class PoolGenotypeLikelihoods {
newCount[idx] = pVec[permutationKey[idx]];
// get corresponding index from new count
int outputIdx = PoolGenotypeLikelihoods.getLinearIndex(newCount, allelesToSubset.size(), numChromosomes);
int outputIdx = GeneralPloidyGenotypeLikelihoods.getLinearIndex(newCount, allelesToSubset.size(), numChromosomes);
newPLs[outputIdx] = pl;
if (VERBOSE) {
System.out.println("Old Key:"+Arrays.toString(pVec));

View File

@ -39,7 +39,7 @@ import org.broadinstitute.sting.utils.variantcontext.*;
import java.util.*;
public abstract class PoolGenotypeLikelihoodsCalculationModel extends GenotypeLikelihoodsCalculationModel {
public abstract class GeneralPloidyGenotypeLikelihoodsCalculationModel extends GenotypeLikelihoodsCalculationModel {
//protected Set<String> laneIDs;
public enum Model {
@ -52,7 +52,7 @@ public abstract class PoolGenotypeLikelihoodsCalculationModel extends GenotypeLi
final protected UnifiedArgumentCollection UAC;
protected PoolGenotypeLikelihoodsCalculationModel(UnifiedArgumentCollection UAC, Logger logger) {
protected GeneralPloidyGenotypeLikelihoodsCalculationModel(UnifiedArgumentCollection UAC, Logger logger) {
super(UAC,logger);
this.UAC = UAC;
@ -137,11 +137,11 @@ public abstract class PoolGenotypeLikelihoodsCalculationModel extends GenotypeLi
protected static class PoolGenotypeData {
public final String name;
public final PoolGenotypeLikelihoods GL;
public final GeneralPloidyGenotypeLikelihoods GL;
public final int depth;
public final List<Allele> alleles;
public PoolGenotypeData(final String name, final PoolGenotypeLikelihoods GL, final int depth, final List<Allele> alleles) {
public PoolGenotypeData(final String name, final GeneralPloidyGenotypeLikelihoods GL, final int depth, final List<Allele> alleles) {
this.name = name;
this.GL = GL;
this.depth = depth;
@ -236,7 +236,7 @@ public abstract class PoolGenotypeLikelihoodsCalculationModel extends GenotypeLi
ReadBackedPileup pileup = AlignmentContextUtils.stratify(sample.getValue(), contextType).getBasePileup();
// create the GenotypeLikelihoods object
final PoolGenotypeLikelihoods GL = getPoolGenotypeLikelihoodObject(allAlleles, null, UAC.samplePloidy, perLaneErrorModels, useBAQedPileup, ref, UAC.IGNORE_LANE_INFO);
final GeneralPloidyGenotypeLikelihoods GL = getPoolGenotypeLikelihoodObject(allAlleles, null, UAC.samplePloidy, perLaneErrorModels, useBAQedPileup, ref, UAC.IGNORE_LANE_INFO);
// actually compute likelihoods
final int nGoodBases = GL.add(pileup, UAC);
if ( nGoodBases > 0 )
@ -268,7 +268,7 @@ public abstract class PoolGenotypeLikelihoodsCalculationModel extends GenotypeLi
for ( PoolGenotypeData sampleData : GLs ) {
// extract from multidimensional array
final double[] myLikelihoods = PoolGenotypeLikelihoods.subsetToAlleles(sampleData.GL.getLikelihoods(),sampleData.GL.numChromosomes,
final double[] myLikelihoods = GeneralPloidyGenotypeLikelihoods.subsetToAlleles(sampleData.GL.getLikelihoods(), sampleData.GL.numChromosomes,
allAlleles, alleles);
// normalize in log space so that max element is zero.
@ -327,7 +327,7 @@ public abstract class PoolGenotypeLikelihoodsCalculationModel extends GenotypeLi
Abstract methods - must be implemented in derived classes
*/
protected abstract PoolGenotypeLikelihoods getPoolGenotypeLikelihoodObject(final List<Allele> alleles,
protected abstract GeneralPloidyGenotypeLikelihoods getPoolGenotypeLikelihoodObject(final List<Allele> alleles,
final double[] logLikelihoods,
final int ploidy,
final HashMap<String, ErrorModel> perLaneErrorModels,

View File

@ -18,7 +18,7 @@ import java.util.*;
* Time: 10:06 AM
* To change this template use File | Settings | File Templates.
*/
public class PoolIndelGenotypeLikelihoods extends PoolGenotypeLikelihoods {
public class GeneralPloidyIndelGenotypeLikelihoods extends GeneralPloidyGenotypeLikelihoods {
final PairHMMIndelErrorModel pairModel;
final LinkedHashMap<Allele, Haplotype> haplotypeMap;
final ReferenceContext refContext;
@ -27,14 +27,14 @@ public class PoolIndelGenotypeLikelihoods extends PoolGenotypeLikelihoods {
final byte refBase;
public PoolIndelGenotypeLikelihoods(final List<Allele> alleles,
final double[] logLikelihoods,
final int ploidy,
final HashMap<String, ErrorModel> perLaneErrorModels,
final boolean ignoreLaneInformation,
final PairHMMIndelErrorModel pairModel,
final LinkedHashMap<Allele, Haplotype> haplotypeMap,
final ReferenceContext referenceContext) {
public GeneralPloidyIndelGenotypeLikelihoods(final List<Allele> alleles,
final double[] logLikelihoods,
final int ploidy,
final HashMap<String, ErrorModel> perLaneErrorModels,
final boolean ignoreLaneInformation,
final PairHMMIndelErrorModel pairModel,
final LinkedHashMap<Allele, Haplotype> haplotypeMap,
final ReferenceContext referenceContext) {
super(alleles, logLikelihoods, ploidy, perLaneErrorModels, ignoreLaneInformation);
this.pairModel = pairModel;
this.haplotypeMap = haplotypeMap;

View File

@ -32,13 +32,11 @@ import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
import org.broadinstitute.sting.gatk.walkers.indels.PairHMMIndelErrorModel;
import org.broadinstitute.sting.utils.*;
import org.broadinstitute.sting.utils.collections.Pair;
import org.broadinstitute.sting.utils.pileup.PileupElement;
import org.broadinstitute.sting.utils.variantcontext.*;
import java.util.*;
public class PoolIndelGenotypeLikelihoodsCalculationModel extends PoolGenotypeLikelihoodsCalculationModel {
public class GeneralPloidyIndelGenotypeLikelihoodsCalculationModel extends GeneralPloidyGenotypeLikelihoodsCalculationModel {
private static final int MAX_NUM_ALLELES_TO_GENOTYPE = 4;
private PairHMMIndelErrorModel pairModel;
@ -59,7 +57,7 @@ public class PoolIndelGenotypeLikelihoodsCalculationModel extends PoolGenotypeLi
}
*/
protected PoolIndelGenotypeLikelihoodsCalculationModel(final UnifiedArgumentCollection UAC, final Logger logger) {
protected GeneralPloidyIndelGenotypeLikelihoodsCalculationModel(final UnifiedArgumentCollection UAC, final Logger logger) {
super(UAC, logger);
@ -69,14 +67,14 @@ public class PoolIndelGenotypeLikelihoodsCalculationModel extends PoolGenotypeLi
}
protected PoolGenotypeLikelihoods getPoolGenotypeLikelihoodObject(final List<Allele> alleles,
protected GeneralPloidyGenotypeLikelihoods getPoolGenotypeLikelihoodObject(final List<Allele> alleles,
final double[] logLikelihoods,
final int ploidy,
final HashMap<String, ErrorModel> perLaneErrorModels,
final boolean useBQAedPileup,
final ReferenceContext ref,
final boolean ignoreLaneInformation){
return new PoolIndelGenotypeLikelihoods(alleles, logLikelihoods, ploidy,perLaneErrorModels,ignoreLaneInformation, pairModel, haplotypeMap, ref);
return new GeneralPloidyIndelGenotypeLikelihoods(alleles, logLikelihoods, ploidy,perLaneErrorModels,ignoreLaneInformation, pairModel, haplotypeMap, ref);
}
protected List<Allele> getInitialAllelesToUse(final RefMetaDataTracker tracker,

View File

@ -23,7 +23,7 @@ import static java.lang.Math.pow;
* and posteriors given a pile of bases and quality scores
*
*/
public class PoolSNPGenotypeLikelihoods extends PoolGenotypeLikelihoods/* implements Cloneable*/ {
public class GeneralPloidySNPGenotypeLikelihoods extends GeneralPloidyGenotypeLikelihoods/* implements Cloneable*/ {
final List<Allele> myAlleles;
final int[] alleleIndices;
@ -42,8 +42,8 @@ public class PoolSNPGenotypeLikelihoods extends PoolGenotypeLikelihoods/* implem
* @param useBQAedPileup Use BAQed pileup
* @param ignoreLaneInformation If true, lane info is ignored
*/
public PoolSNPGenotypeLikelihoods(final List<Allele> alleles, final double[] logLikelihoods, final int ploidy,
final HashMap<String, ErrorModel> perLaneErrorModels, final boolean useBQAedPileup,final boolean ignoreLaneInformation) {
public GeneralPloidySNPGenotypeLikelihoods(final List<Allele> alleles, final double[] logLikelihoods, final int ploidy,
final HashMap<String, ErrorModel> perLaneErrorModels, final boolean useBQAedPileup, final boolean ignoreLaneInformation) {
super(alleles, logLikelihoods, ploidy, perLaneErrorModels, ignoreLaneInformation);
this.useBAQedPileup = useBQAedPileup;
@ -52,7 +52,7 @@ public class PoolSNPGenotypeLikelihoods extends PoolGenotypeLikelihoods/* implem
Allele refAllele = alleles.get(0);
//sanity check: by construction, first allele should ALWAYS be the reference alleles
if (!refAllele.isReference())
throw new ReviewedStingException("BUG: First allele in list passed to PoolSNPGenotypeLikelihoods should be reference!");
throw new ReviewedStingException("BUG: First allele in list passed to GeneralPloidySNPGenotypeLikelihoods should be reference!");
refByte = refAllele.getBases()[0]; // by construction, first allele in list is always ref!

View File

@ -35,22 +35,22 @@ import org.broadinstitute.sting.utils.variantcontext.*;
import java.util.*;
public class PoolSNPGenotypeLikelihoodsCalculationModel extends PoolGenotypeLikelihoodsCalculationModel {
public class GeneralPloidySNPGenotypeLikelihoodsCalculationModel extends GeneralPloidyGenotypeLikelihoodsCalculationModel {
protected PoolSNPGenotypeLikelihoodsCalculationModel( UnifiedArgumentCollection UAC, Logger logger) {
protected GeneralPloidySNPGenotypeLikelihoodsCalculationModel(UnifiedArgumentCollection UAC, Logger logger) {
super(UAC, logger);
}
protected PoolGenotypeLikelihoods getPoolGenotypeLikelihoodObject(final List<Allele> alleles,
protected GeneralPloidyGenotypeLikelihoods getPoolGenotypeLikelihoodObject(final List<Allele> alleles,
final double[] logLikelihoods,
final int ploidy,
final HashMap<String, ErrorModel> perLaneErrorModels,
final boolean useBQAedPileup,
final ReferenceContext ref,
final boolean ignoreLaneInformation) {
return new PoolSNPGenotypeLikelihoods(alleles, null, UAC.samplePloidy, perLaneErrorModels, useBQAedPileup, UAC.IGNORE_LANE_INFO);
return new GeneralPloidySNPGenotypeLikelihoods(alleles, null, UAC.samplePloidy, perLaneErrorModels, useBQAedPileup, UAC.IGNORE_LANE_INFO);
}
protected List<Allele> getInitialAllelesToUse(final RefMetaDataTracker tracker,

View File

@ -19,7 +19,7 @@ import java.util.Arrays;
* Time: 7:44 AM
* To change this template use File | Settings | File Templates.
*/
public class PoolAFCalculationModelUnitTest extends BaseTest {
public class GeneralPloidyAFCalculationModelUnitTest extends BaseTest {
static double[] AA1, AB1, BB1;
static double[] AA2, AB2, AC2, BB2, BC2, CC2;
@ -138,10 +138,10 @@ public class PoolAFCalculationModelUnitTest extends BaseTest {
public void testGLs(GetGLsTest cfg) {
final AlleleFrequencyCalculationResult result = new AlleleFrequencyCalculationResult(cfg.numAltAlleles);
final int len = PoolGenotypeLikelihoods.getNumLikelihoodElements(1+cfg.numAltAlleles,cfg.ploidy*cfg.GLs.size());
final int len = GeneralPloidyGenotypeLikelihoods.getNumLikelihoodElements(1 + cfg.numAltAlleles, cfg.ploidy * cfg.GLs.size());
double[] priors = new double[len]; // flat priors
PoolAFCalculationModel.combineSinglePools(cfg.GLs, 1+cfg.numAltAlleles, cfg.ploidy, priors, result);
GeneralPloidyExactAFCalculationModel.combineSinglePools(cfg.GLs, 1 + cfg.numAltAlleles, cfg.ploidy, priors, result);
int nameIndex = 1;
for ( int allele = 0; allele < cfg.numAltAlleles; allele++, nameIndex+=2 ) {
int expectedAlleleCount = Integer.valueOf(cfg.name.substring(nameIndex, nameIndex+1));

View File

@ -27,7 +27,6 @@ package org.broadinstitute.sting.gatk.walkers.genotyper;
import net.sf.samtools.SAMUtils;
import org.apache.log4j.Logger;
import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
import org.broadinstitute.sting.gatk.walkers.Walker;
import org.broadinstitute.sting.utils.BaseUtils;
import org.broadinstitute.sting.utils.MathUtils;
@ -42,7 +41,7 @@ import java.io.PrintStream;
import java.util.*;
public class PoolGenotypeLikelihoodsUnitTest {
public class GeneralPloidyGenotypeLikelihoodsUnitTest {
final UnifiedArgumentCollection UAC = new UnifiedArgumentCollection();
final Logger logger = Logger.getLogger(Walker.class);
@ -61,7 +60,7 @@ public class PoolGenotypeLikelihoodsUnitTest {
public void testStoringLikelihoodElements() {
// basic test storing a given PL vector in a PoolGenotypeLikelihoods object and then retrieving it back
// basic test storing a given PL vector in a GeneralPloidyGenotypeLikelihoods object and then retrieving it back
int ploidy = 20;
int numAlleles = 4;
@ -79,7 +78,7 @@ public class PoolGenotypeLikelihoodsUnitTest {
for (int k=0; k < gls.length; k++)
gls[k]= (double)k;
PoolGenotypeLikelihoods gl = new PoolSNPGenotypeLikelihoods(alleles, gls,ploidy, null, false,true);
GeneralPloidyGenotypeLikelihoods gl = new GeneralPloidySNPGenotypeLikelihoods(alleles, gls,ploidy, null, false,true);
double[] glnew = gl.getLikelihoods();
Assert.assertEquals(gls, glnew);
@ -91,7 +90,7 @@ public class PoolGenotypeLikelihoodsUnitTest {
for (int ploidy = 2; ploidy < 10; ploidy++) {
for (int nAlleles = 2; nAlleles < 10; nAlleles++)
Assert.assertEquals(PoolGenotypeLikelihoods.getNumLikelihoodElements(nAlleles,ploidy),
Assert.assertEquals(GeneralPloidyGenotypeLikelihoods.getNumLikelihoodElements(nAlleles, ploidy),
GenotypeLikelihoods.numLikelihoods(nAlleles, ploidy));
}
@ -103,7 +102,7 @@ public class PoolGenotypeLikelihoodsUnitTest {
// create iterator, compare linear index given by iterator with closed form function
int numAlleles = 4;
int ploidy = 2;
PoolGenotypeLikelihoods.SumIterator iterator = new PoolGenotypeLikelihoods.SumIterator(numAlleles, ploidy);
GeneralPloidyGenotypeLikelihoods.SumIterator iterator = new GeneralPloidyGenotypeLikelihoods.SumIterator(numAlleles, ploidy);
while(iterator.hasNext()) {
System.out.format("\n%d:",iterator.getLinearIndex());
@ -112,7 +111,7 @@ public class PoolGenotypeLikelihoodsUnitTest {
System.out.format("%d ",aa);
int computedIdx = PoolGenotypeLikelihoods.getLinearIndex(a, numAlleles, ploidy);
int computedIdx = GeneralPloidyGenotypeLikelihoods.getLinearIndex(a, numAlleles, ploidy);
System.out.format("Computed idx = %d\n",computedIdx);
iterator.next();
}
@ -141,7 +140,7 @@ public class PoolGenotypeLikelihoodsUnitTest {
allelesToSubset.add(Allele.create("A",false));
allelesToSubset.add(Allele.create("C",false));
double[] newGLs = PoolGenotypeLikelihoods.subsetToAlleles(oldLikelihoods, ploidy,
double[] newGLs = GeneralPloidyGenotypeLikelihoods.subsetToAlleles(oldLikelihoods, ploidy,
originalAlleles, allelesToSubset);
@ -171,7 +170,7 @@ public class PoolGenotypeLikelihoodsUnitTest {
@Test
public void testIndexIterator() {
int[] seed = new int[]{1,2,3,4};
PoolGenotypeLikelihoods.SumIterator iterator = runIterator(seed,-1);
GeneralPloidyGenotypeLikelihoods.SumIterator iterator = runIterator(seed,-1);
// Assert.assertTrue(compareIntArrays(iterator.getCurrentVector(), seed));
Assert.assertEquals(iterator.getLinearIndex(),prod(seed)-1);
@ -229,12 +228,12 @@ public class PoolGenotypeLikelihoodsUnitTest {
}
private PoolGenotypeLikelihoods.SumIterator runIterator(int[] seed, int restrictSumTo) {
PoolGenotypeLikelihoods.SumIterator iterator = new PoolGenotypeLikelihoods.SumIterator(seed, restrictSumTo);
private GeneralPloidyGenotypeLikelihoods.SumIterator runIterator(int[] seed, int restrictSumTo) {
GeneralPloidyGenotypeLikelihoods.SumIterator iterator = new GeneralPloidyGenotypeLikelihoods.SumIterator(seed, restrictSumTo);
while(iterator.hasNext()) {
int[] a = iterator.getCurrentVector();
int idx = PoolGenotypeLikelihoods.getLinearIndex(a, a.length, restrictSumTo);
int idx = GeneralPloidyGenotypeLikelihoods.getLinearIndex(a, a.length, restrictSumTo);
if (VERBOSE) {
System.out.format("%d:",iterator.getLinearIndex());
for (int i=0; i < seed.length; i++)
@ -454,7 +453,7 @@ public class PoolGenotypeLikelihoodsUnitTest {
// get now likelihoods for this
final PoolSNPGenotypeLikelihoods GL = new PoolSNPGenotypeLikelihoods(allAlleles, null, nSamplesPerPool*2, noiselessErrorModels, false, true);
final GeneralPloidySNPGenotypeLikelihoods GL = new GeneralPloidySNPGenotypeLikelihoods(allAlleles, null, nSamplesPerPool*2, noiselessErrorModels, false, true);
final int nGoodBases = GL.add(alignmentContextMap.get("sample0000").getBasePileup(), true, false, UAC.MIN_BASE_QUALTY_SCORE);
if (VERBOSE) {
System.out.format("Depth:%d, AC:%d, altDepth:%d, samplesPerPool:%d\nGLs:", depth,ac,altDepth, nSamplesPerPool);
@ -483,7 +482,7 @@ public class PoolGenotypeLikelihoodsUnitTest {
// get now likelihoods for this
final PoolSNPGenotypeLikelihoods noisyGL = new PoolSNPGenotypeLikelihoods(allAlleles, null, nSamplesPerPool*2, noisyErrorModels, false,true);
final GeneralPloidySNPGenotypeLikelihoods noisyGL = new GeneralPloidySNPGenotypeLikelihoods(allAlleles, null, nSamplesPerPool*2, noisyErrorModels, false,true);
noisyGL.add(noisyAlignmentContextMap.get("sample0000").getBasePileup(), true, false, UAC.MIN_BASE_QUALTY_SCORE);
mlPair = noisyGL.getMostLikelyACCount();

View File

@ -12,7 +12,7 @@ import org.testng.annotations.Test;
* Time: 11:28 AM
* To change this template use File | Settings | File Templates.
*/
public class PoolCallerIntegrationTest extends WalkerTest {
public class UnifiedGenotyperGeneralPloidyIntegrationTest extends WalkerTest {
final static String REF = b37KGReference;
final String CEUTRIO_BAM = "/humgen/gsa-hpprojects/NA12878Collection/bams/CEUTrio.HiSeq.WGS.b37.list";
final String LSV_BAM = validationDataLocation +"93pools_NA12878_ref_chr20_40m_41m.bam";
@ -24,21 +24,21 @@ public class PoolCallerIntegrationTest extends WalkerTest {
final String NA12878_WG_CALLS = comparisonDataLocation + "Unvalidated/NA12878/CEUTrio.HiSeq.WGS.b37_decoy.recal.ts_95.snp_indel_combined.vcf";
final String LSV_ALLELES = validationDataLocation + "ALL.chr20_40m_41m.largeScaleValidationSites.vcf";
private void PC_MT_Test(String bam, String args, String name, String md5) {
final String base = String.format("-T UnifiedGenotyper -dcov 10000 -R %s -I %s -L %s --reference_sample_calls %s -refsample %s -glm POOLSNP -ignoreLane -pnrm POOL",
final String base = String.format("-T UnifiedGenotyper -dcov 10000 -R %s -I %s -L %s --reference_sample_calls %s -refsample %s -ignoreLane ",
REF, bam, MTINTERVALS, REFSAMPLE_MT_CALLS, REFSAMPLE_NAME) + " --no_cmdline_in_header -o %s";
final WalkerTestSpec spec = new WalkerTestSpec(base + " " + args, Arrays.asList(md5));
executeTest("testPoolCaller:"+name+" args=" + args, spec);
}
private void PC_LSV_Test(String args, String name, String model, String md5) {
final String base = String.format("-T UnifiedGenotyper -dcov 10000 -R %s -I %s -L %s --reference_sample_calls %s -refsample %s -glm %s -ignoreLane -pnrm POOL",
final String base = String.format("-T UnifiedGenotyper -dcov 10000 -R %s -I %s -L %s --reference_sample_calls %s -refsample %s -glm %s -ignoreLane ",
REF, LSV_BAM, LSVINTERVALS, NA12878_WG_CALLS, REFSAMPLE_NAME, model) + " --no_cmdline_in_header -o %s";
final WalkerTestSpec spec = new WalkerTestSpec(base + " " + args, Arrays.asList(md5));
executeTest("testPoolCaller:"+name+" args=" + args, spec);
}
private void PC_LSV_Test_NoRef(String args, String name, String model, String md5) {
final String base = String.format("-T UnifiedGenotyper -dcov 10000 -R %s -I %s -L %s -glm %s -ignoreLane -pnrm POOL",
final String base = String.format("-T UnifiedGenotyper -dcov 10000 -R %s -I %s -L %s -glm %s -ignoreLane",
REF, LSV_BAM, LSVINTERVALS, model) + " --no_cmdline_in_header -o %s";
final WalkerTestSpec spec = new WalkerTestSpec(base + " " + args, Arrays.asList(md5));
executeTest("testPoolCaller:"+name+" args=" + args, spec);
@ -46,22 +46,22 @@ public class PoolCallerIntegrationTest extends WalkerTest {
@Test
public void testBOTH_GGA_Pools() {
PC_LSV_Test(String.format(" -maxAlleles 2 -ploidy 24 -gt_mode GENOTYPE_GIVEN_ALLELES -out_mode EMIT_ALL_SITES -alleles %s",LSV_ALLELES),"LSV_BOTH_GGA","POOLBOTH","d76e3b910259da819f1e1b2adc68ba8d");
PC_LSV_Test(String.format(" -maxAlleles 2 -ploidy 24 -gt_mode GENOTYPE_GIVEN_ALLELES -out_mode EMIT_ALL_SITES -alleles %s",LSV_ALLELES),"LSV_BOTH_GGA","BOTH","d76e3b910259da819f1e1b2adc68ba8d");
}
@Test
public void testINDEL_GGA_Pools() {
PC_LSV_Test(String.format(" -maxAlleles 1 -ploidy 24 -gt_mode GENOTYPE_GIVEN_ALLELES -out_mode EMIT_ALL_SITES -alleles %s",LSV_ALLELES),"LSV_INDEL_GGA","POOLINDEL","ffadcdaee613dab975197bed0fc78da3");
PC_LSV_Test(String.format(" -maxAlleles 1 -ploidy 24 -gt_mode GENOTYPE_GIVEN_ALLELES -out_mode EMIT_ALL_SITES -alleles %s",LSV_ALLELES),"LSV_INDEL_GGA","INDEL","ffadcdaee613dab975197bed0fc78da3");
}
@Test
public void testINDEL_maxAlleles2_ploidy3_Pools_noRef() {
PC_LSV_Test_NoRef(" -maxAlleles 2 -ploidy 3","LSV_INDEL_DISC_NOREF_p3","POOLINDEL","96087fe9240e3656cc2a4e0ff0174d5b");
PC_LSV_Test_NoRef(" -maxAlleles 2 -ploidy 3","LSV_INDEL_DISC_NOREF_p3","INDEL","96087fe9240e3656cc2a4e0ff0174d5b");
}
@Test
public void testINDEL_maxAlleles2_ploidy1_Pools_noRef() {
PC_LSV_Test_NoRef(" -maxAlleles 2 -ploidy 1","LSV_INDEL_DISC_NOREF_p1","POOLINDEL","6fdae7093831ecfc82a06dd707d62fe9");
PC_LSV_Test_NoRef(" -maxAlleles 2 -ploidy 1","LSV_INDEL_DISC_NOREF_p1","INDEL","6fdae7093831ecfc82a06dd707d62fe9");
}
@Test

View File

@ -46,8 +46,7 @@ public abstract class AlleleFrequencyCalculationModel implements Cloneable {
public enum Model {
/** The default model with the best performance in all cases */
EXACT,
POOL
EXACT
}
protected int N;

View File

@ -59,10 +59,9 @@ public abstract class GenotypeLikelihoodsCalculationModel implements Cloneable {
public enum Model {
SNP,
INDEL,
BOTH,
POOLSNP,
POOLINDEL,
POOLBOTH
GeneralPloidySNP,
GeneralPloidyINDEL,
BOTH
}
public enum GENOTYPING_MODE {

View File

@ -241,7 +241,7 @@ public class UnifiedGenotyper extends LocusWalker<List<VariantCallContext>, Unif
} else {
// in full mode: check for consistency in ploidy/pool calling arguments
// check for correct calculation models
if (UAC.samplePloidy != VariantContextUtils.DEFAULT_PLOIDY) {
/* if (UAC.samplePloidy != VariantContextUtils.DEFAULT_PLOIDY) {
// polyploidy requires POOL GL and AF calculation models to be specified right now
if (UAC.GLmodel != GenotypeLikelihoodsCalculationModel.Model.POOLSNP && UAC.GLmodel != GenotypeLikelihoodsCalculationModel.Model.POOLINDEL
&& UAC.GLmodel != GenotypeLikelihoodsCalculationModel.Model.POOLBOTH) {
@ -252,6 +252,7 @@ public class UnifiedGenotyper extends LocusWalker<List<VariantCallContext>, Unif
throw new UserException("Incorrect AF Calculation model. Only POOL model supported if sample ploidy != 2");
}
*/
// get all of the unique sample names
if (UAC.TREAT_ALL_READS_AS_SINGLE_POOL) {
samples.clear();

View File

@ -50,6 +50,7 @@ import java.util.*;
public class UnifiedGenotyperEngine {
public static final String LOW_QUAL_FILTER_NAME = "LowQual";
private static final String GPSTRING = "GeneralPloidy";
public static final String NUMBER_OF_DISCOVERED_ALLELES_KEY = "NDA";
@ -273,7 +274,7 @@ public class UnifiedGenotyperEngine {
glcm.set(getGenotypeLikelihoodsCalculationObject(logger, UAC));
}
return glcm.get().get(model.name()).getLikelihoods(tracker, refContext, stratifiedContexts, type, alternateAllelesToUse, useBAQedPileup && BAQEnabledOnCMDLine, genomeLocParser);
return glcm.get().get(model.name().toUpperCase()).getLikelihoods(tracker, refContext, stratifiedContexts, type, alternateAllelesToUse, useBAQedPileup && BAQEnabledOnCMDLine, genomeLocParser);
}
private VariantCallContext generateEmptyContext(RefMetaDataTracker tracker, ReferenceContext ref, Map<String, AlignmentContext> stratifiedContexts, AlignmentContext rawContext) {
@ -640,6 +641,9 @@ public class UnifiedGenotyperEngine {
if ( UAC.GLmodel.name().toUpperCase().contains("BOTH") )
modelPrefix = UAC.GLmodel.name().toUpperCase().replaceAll("BOTH","");
if (!UAC.GLmodel.name().contains(GPSTRING) && UAC.samplePloidy != VariantContextUtils.DEFAULT_PLOIDY)
modelPrefix = GPSTRING + modelPrefix;
// if we're genotyping given alleles and we have a requested SNP at this position, do SNP
if ( UAC.GenotypingMode == GenotypeLikelihoodsCalculationModel.GENOTYPING_MODE.GENOTYPE_GIVEN_ALLELES ) {
final VariantContext vcInput = UnifiedGenotyperEngine.getVCFromAllelesRod(tracker, refContext, rawContext.getLocation(), false, logger, UAC.alleles);
@ -648,17 +652,13 @@ public class UnifiedGenotyperEngine {
if ( vcInput.isSNP() ) {
// ignore SNPs if the user chose INDEL mode only
if ( UAC.GLmodel.name().toUpperCase().contains("BOTH") )
if ( UAC.GLmodel.name().toUpperCase().contains("BOTH") || UAC.GLmodel.name().toUpperCase().contains("SNP") )
models.add(GenotypeLikelihoodsCalculationModel.Model.valueOf(modelPrefix+"SNP"));
else if ( UAC.GLmodel.name().toUpperCase().contains("SNP") )
models.add(UAC.GLmodel);
}
}
else if ( vcInput.isIndel() || vcInput.isMixed() ) {
// ignore INDELs if the user chose SNP mode only
if ( UAC.GLmodel.name().toUpperCase().contains("BOTH") )
if ( UAC.GLmodel.name().toUpperCase().contains("BOTH") || UAC.GLmodel.name().toUpperCase().contains("INDEL") )
models.add(GenotypeLikelihoodsCalculationModel.Model.valueOf(modelPrefix+"INDEL"));
else if (UAC.GLmodel.name().toUpperCase().contains("INDEL"))
models.add(UAC.GLmodel);
}
// No support for other types yet
}
@ -668,7 +668,7 @@ public class UnifiedGenotyperEngine {
models.add(GenotypeLikelihoodsCalculationModel.Model.valueOf(modelPrefix+"INDEL"));
}
else {
models.add(UAC.GLmodel);
models.add(GenotypeLikelihoodsCalculationModel.Model.valueOf(modelPrefix+UAC.GLmodel.name().toUpperCase()));
}
}
@ -730,12 +730,19 @@ public class UnifiedGenotyperEngine {
}
private static AlleleFrequencyCalculationModel getAlleleFrequencyCalculationObject(int N, Logger logger, PrintStream verboseWriter, UnifiedArgumentCollection UAC) {
List<Class<? extends AlleleFrequencyCalculationModel>> afClasses = new PluginManager<AlleleFrequencyCalculationModel>(AlleleFrequencyCalculationModel.class).getPlugins();
// user-specified name
String afModelName = UAC.AFmodel.name();
if (!afModelName.contains(GPSTRING) && UAC.samplePloidy != VariantContextUtils.DEFAULT_PLOIDY)
afModelName = GPSTRING + afModelName;
for (int i = 0; i < afClasses.size(); i++) {
Class<? extends AlleleFrequencyCalculationModel> afClass = afClasses.get(i);
String key = afClass.getSimpleName().replace("AFCalculationModel","").toUpperCase();
if (UAC.AFmodel.name().equalsIgnoreCase(key)) {
if (afModelName.equalsIgnoreCase(key)) {
try {
Object args[] = new Object[]{UAC,N,logger,verboseWriter};
Constructor c = afClass.getDeclaredConstructor(UnifiedArgumentCollection.class, int.class, Logger.class, PrintStream.class);