Merge branch 'master' of gsa2:/humgen/gsa-scr1/chartl/dev/unstable

This commit is contained in:
Christopher Hartl 2012-09-20 12:56:07 -04:00
commit c492185be6
6 changed files with 69 additions and 17 deletions

View File

@ -224,12 +224,16 @@ public class GeneralPloidyExactAFCalculationModel extends AlleleFrequencyCalcula
indexesToACset.put(zeroSet.ACcounts, zeroSet);
// keep processing while we have AC conformations that need to be calculated
double maxLog10L = Double.NEGATIVE_INFINITY;
MaxLikelihoodSeen maxLikelihoodSeen = new MaxLikelihoodSeen();
while ( !ACqueue.isEmpty() ) {
// compute log10Likelihoods
final ExactACset ACset = ACqueue.remove();
final double log10LofKs = calculateACConformationAndUpdateQueue(ACset, newPool, originalPool, newGL, log10AlleleFrequencyPriors, originalPloidy, newGLPloidy, result, maxLog10L, ACqueue, indexesToACset);
maxLog10L = Math.max(maxLog10L, log10LofKs);
final double log10LofKs = calculateACConformationAndUpdateQueue(ACset, newPool, originalPool, newGL, log10AlleleFrequencyPriors, originalPloidy, newGLPloidy, result, maxLikelihoodSeen, ACqueue, indexesToACset);
// adjust max likelihood seen if needed
if ( log10LofKs > maxLikelihoodSeen.maxLog10L )
maxLikelihoodSeen.update(log10LofKs, ACset.ACcounts);
// clean up memory
indexesToACset.remove(ACset.ACcounts);
if ( VERBOSE )
@ -250,7 +254,7 @@ public class GeneralPloidyExactAFCalculationModel extends AlleleFrequencyCalcula
* @param originalPloidy Total ploidy of original combined pool
* @param newGLPloidy Ploidy of GL vector
* @param result AFResult object
* @param maxLog10L max likelihood observed so far
* @param maxLikelihoodSeen max likelihood observed so far
* @param ACqueue Queue of conformations to compute
* @param indexesToACset AC indices of objects in queue
* @return max log likelihood
@ -263,7 +267,7 @@ public class GeneralPloidyExactAFCalculationModel extends AlleleFrequencyCalcula
final int originalPloidy,
final int newGLPloidy,
final AlleleFrequencyCalculationResult result,
final double maxLog10L,
final MaxLikelihoodSeen maxLikelihoodSeen,
final LinkedList<ExactACset> ACqueue,
final HashMap<ExactACcounts, ExactACset> indexesToACset) {
@ -277,9 +281,9 @@ public class GeneralPloidyExactAFCalculationModel extends AlleleFrequencyCalcula
if (!Double.isInfinite(log10LofK))
newPool.add(set);
if ( log10LofK < maxLog10L - MAX_LOG10_ERROR_TO_STOP_EARLY ) {
if ( log10LofK < maxLikelihoodSeen.maxLog10L - MAX_LOG10_ERROR_TO_STOP_EARLY && maxLikelihoodSeen.isLowerAC(set.ACcounts) ) {
if ( VERBOSE )
System.out.printf(" *** breaking early set=%s log10L=%.2f maxLog10L=%.2f%n", set.ACcounts, log10LofK, maxLog10L);
System.out.printf(" *** breaking early set=%s log10L=%.2f maxLog10L=%.2f%n", set.ACcounts, log10LofK, maxLikelihoodSeen.maxLog10L);
return log10LofK;
}

View File

@ -22,7 +22,7 @@ import java.util.*;
* Given a variant context, uses the genotype likelihoods to assess the likelihood of the site being a mendelian violation
* versus the likelihood of the site transmitting according to mendelian rules. This assumes that the organism is
* diploid. When multiple trios are present, the annotation is simply the maximum of the likelihood ratios, rather than
* the strict 1-(1-p_i) calculation, as this can scale poorly for uncertain sites and many trios.
* the strict 1-Prod(1-p_i) calculation, as this can scale poorly for uncertain sites and many trios.
*/
public class MVLikelihoodRatio extends InfoFieldAnnotation implements ExperimentalAnnotation, RodRequiringAnnotation {

View File

@ -204,4 +204,28 @@ public abstract class AlleleFrequencyCalculationModel implements Cloneable {
return (obj instanceof ExactACset) && ACcounts.equals(((ExactACset)obj).ACcounts);
}
}
protected static final class MaxLikelihoodSeen {
double maxLog10L = Double.NEGATIVE_INFINITY;
ExactACcounts ACs = null;
public MaxLikelihoodSeen() {}
public void update(final double maxLog10L, final ExactACcounts ACs) {
this.maxLog10L = maxLog10L;
this.ACs = ACs;
}
// returns true iff all ACs in this object are less than or equal to their corresponding ACs in the provided set
public boolean isLowerAC(final ExactACcounts otherACs) {
final int[] myACcounts = this.ACs.getCounts();
final int[] otherACcounts = otherACs.getCounts();
for ( int i = 0; i < myACcounts.length; i++ ) {
if ( myACcounts[i] > otherACcounts[i] )
return false;
}
return true;
}
}
}

View File

@ -68,7 +68,7 @@ public class ExactAFCalculationModel extends AlleleFrequencyCalculationModel {
private static final int PL_INDEX_OF_HOM_REF = 0;
private static final List<Allele> chooseMostLikelyAlternateAlleles(VariantContext vc, int numAllelesToChoose) {
private static List<Allele> chooseMostLikelyAlternateAlleles(VariantContext vc, int numAllelesToChoose) {
final int numOriginalAltAlleles = vc.getAlternateAlleles().size();
final LikelihoodSum[] likelihoodSums = new LikelihoodSum[numOriginalAltAlleles];
for ( int i = 0; i < numOriginalAltAlleles; i++ )
@ -132,14 +132,15 @@ public class ExactAFCalculationModel extends AlleleFrequencyCalculationModel {
indexesToACset.put(zeroSet.ACcounts, zeroSet);
// keep processing while we have AC conformations that need to be calculated
double maxLog10L = Double.NEGATIVE_INFINITY;
MaxLikelihoodSeen maxLikelihoodSeen = new MaxLikelihoodSeen();
while ( !ACqueue.isEmpty() ) {
// compute log10Likelihoods
final ExactACset set = ACqueue.remove();
final double log10LofKs = calculateAlleleCountConformation(set, genotypeLikelihoods, maxLog10L, numChr, ACqueue, indexesToACset, log10AlleleFrequencyPriors, result);
final double log10LofKs = calculateAlleleCountConformation(set, genotypeLikelihoods, maxLikelihoodSeen, numChr, ACqueue, indexesToACset, log10AlleleFrequencyPriors, result);
// adjust max likelihood seen if needed
maxLog10L = Math.max(maxLog10L, log10LofKs);
if ( log10LofKs > maxLikelihoodSeen.maxLog10L )
maxLikelihoodSeen.update(log10LofKs, set.ACcounts);
// clean up memory
indexesToACset.remove(set.ACcounts);
@ -160,7 +161,7 @@ public class ExactAFCalculationModel extends AlleleFrequencyCalculationModel {
private static double calculateAlleleCountConformation(final ExactACset set,
final ArrayList<double[]> genotypeLikelihoods,
final double maxLog10L,
final MaxLikelihoodSeen maxLikelihoodSeen,
final int numChr,
final LinkedList<ExactACset> ACqueue,
final HashMap<ExactACcounts, ExactACset> indexesToACset,
@ -176,7 +177,7 @@ public class ExactAFCalculationModel extends AlleleFrequencyCalculationModel {
final double log10LofK = set.log10Likelihoods[set.log10Likelihoods.length-1];
// can we abort early because the log10Likelihoods are so small?
if ( log10LofK < maxLog10L - MAX_LOG10_ERROR_TO_STOP_EARLY ) {
if ( log10LofK < maxLikelihoodSeen.maxLog10L - MAX_LOG10_ERROR_TO_STOP_EARLY && maxLikelihoodSeen.isLowerAC(set.ACcounts) ) {
//if ( DEBUG )
// System.out.printf(" *** breaking early set=%s log10L=%.2f maxLog10L=%.2f%n", set.ACcounts, log10LofK, maxLog10L);
return log10LofK;

View File

@ -109,4 +109,19 @@ public class ExactAFCalculationModelUnitTest extends BaseTest {
int calculatedAlleleCount = result.getAlleleCountsOfMAP()[0];
Assert.assertEquals(calculatedAlleleCount, 6);
}
@Test
public void testMismatchedGLs() {
final double[] AB = new double[]{-2000.0, 0.0, -2000.0, -2000.0, -2000.0, -2000.0};
final double[] AC = new double[]{-100.0, -100.0, -100.0, 0.0, -100.0, -100.0};
GetGLsTest cfg = new GetGLsTest("B1C1", 2, createGenotype("1", AC), createGenotype("2", AB));
final AlleleFrequencyCalculationResult result = new AlleleFrequencyCalculationResult(2);
ExactAFCalculationModel.linearExactMultiAllelic(cfg.GLs, cfg.numAltAlleles, priors, result);
Assert.assertEquals(result.getAlleleCountsOfMAP()[0], 1);
Assert.assertEquals(result.getAlleleCountsOfMAP()[1], 1);
}
}

View File

@ -60,7 +60,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest {
public void testMultipleSNPAlleles() {
WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(
"-T UnifiedGenotyper -R " + b37KGReference + " -nosl --no_cmdline_in_header -glm BOTH --dbsnp " + b37dbSNP129 + " -I " + privateTestDir + "multiallelic.snps.bam -o %s -L " + privateTestDir + "multiallelic.snps.intervals", 1,
Arrays.asList("8472b1ad2fe1060e732da9e29d10cf99"));
Arrays.asList("cceb34ffbd2dbc45b8821f86ea255284"));
executeTest("test Multiple SNP alleles", spec);
}
@ -76,10 +76,18 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest {
public void testReverseTrim() {
WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(
"-T UnifiedGenotyper -R " + b37KGReference + " -nosl --no_cmdline_in_header -glm INDEL -I " + validationDataLocation + "CEUTrio.HiSeq.b37.chr20.10_11mb.bam -o %s -L 20:10289124 -L 20:10090289", 1,
Arrays.asList("8a4ad38ec8015eea3461295148143428"));
Arrays.asList("00f54a0097e710c0f7b001444c237e32"));
executeTest("test reverse trim", spec);
}
@Test
public void testMismatchedPLs() {
WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(
"-T UnifiedGenotyper -R " + b37KGReference + " -nosl --no_cmdline_in_header -glm INDEL -I " + privateTestDir + "mismatchedPLs.bam -o %s -L 1:24020341", 1,
Arrays.asList("b3fae6bf4c620458f4259dbc93125e37"));
executeTest("test mismatched PLs", spec);
}
// --------------------------------------------------------------------------------------------------------------
//
// testing compressed output
@ -335,7 +343,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest {
public void testMultiSampleIndels1() {
WalkerTest.WalkerTestSpec spec1 = new WalkerTest.WalkerTestSpec(
baseCommandIndels + " -I " + validationDataLocation + "low_coverage_CEU.chr1.10k-11k.bam -o %s -L 1:10450700-10551000", 1,
Arrays.asList("c3f786a5228346b43a80aa80d22b1490"));
Arrays.asList("af04b81f0548ca22b8d1f6bf223b336e"));
List<File> result = executeTest("test MultiSample Pilot1 CEU indels", spec1).getFirst();
WalkerTest.WalkerTestSpec spec2 = new WalkerTest.WalkerTestSpec(