Merge branch 'master' of gsa2:/humgen/gsa-scr1/chartl/dev/unstable

This commit is contained in:
Christopher Hartl 2012-09-20 12:56:07 -04:00
commit c492185be6
6 changed files with 69 additions and 17 deletions

View File

@ -224,12 +224,16 @@ public class GeneralPloidyExactAFCalculationModel extends AlleleFrequencyCalcula
indexesToACset.put(zeroSet.ACcounts, zeroSet); indexesToACset.put(zeroSet.ACcounts, zeroSet);
// keep processing while we have AC conformations that need to be calculated // keep processing while we have AC conformations that need to be calculated
double maxLog10L = Double.NEGATIVE_INFINITY; MaxLikelihoodSeen maxLikelihoodSeen = new MaxLikelihoodSeen();
while ( !ACqueue.isEmpty() ) { while ( !ACqueue.isEmpty() ) {
// compute log10Likelihoods // compute log10Likelihoods
final ExactACset ACset = ACqueue.remove(); final ExactACset ACset = ACqueue.remove();
final double log10LofKs = calculateACConformationAndUpdateQueue(ACset, newPool, originalPool, newGL, log10AlleleFrequencyPriors, originalPloidy, newGLPloidy, result, maxLog10L, ACqueue, indexesToACset); final double log10LofKs = calculateACConformationAndUpdateQueue(ACset, newPool, originalPool, newGL, log10AlleleFrequencyPriors, originalPloidy, newGLPloidy, result, maxLikelihoodSeen, ACqueue, indexesToACset);
maxLog10L = Math.max(maxLog10L, log10LofKs);
// adjust max likelihood seen if needed
if ( log10LofKs > maxLikelihoodSeen.maxLog10L )
maxLikelihoodSeen.update(log10LofKs, ACset.ACcounts);
// clean up memory // clean up memory
indexesToACset.remove(ACset.ACcounts); indexesToACset.remove(ACset.ACcounts);
if ( VERBOSE ) if ( VERBOSE )
@ -250,7 +254,7 @@ public class GeneralPloidyExactAFCalculationModel extends AlleleFrequencyCalcula
* @param originalPloidy Total ploidy of original combined pool * @param originalPloidy Total ploidy of original combined pool
* @param newGLPloidy Ploidy of GL vector * @param newGLPloidy Ploidy of GL vector
* @param result AFResult object * @param result AFResult object
* @param maxLog10L max likelihood observed so far * @param maxLikelihoodSeen max likelihood observed so far
* @param ACqueue Queue of conformations to compute * @param ACqueue Queue of conformations to compute
* @param indexesToACset AC indices of objects in queue * @param indexesToACset AC indices of objects in queue
* @return max log likelihood * @return max log likelihood
@ -263,7 +267,7 @@ public class GeneralPloidyExactAFCalculationModel extends AlleleFrequencyCalcula
final int originalPloidy, final int originalPloidy,
final int newGLPloidy, final int newGLPloidy,
final AlleleFrequencyCalculationResult result, final AlleleFrequencyCalculationResult result,
final double maxLog10L, final MaxLikelihoodSeen maxLikelihoodSeen,
final LinkedList<ExactACset> ACqueue, final LinkedList<ExactACset> ACqueue,
final HashMap<ExactACcounts, ExactACset> indexesToACset) { final HashMap<ExactACcounts, ExactACset> indexesToACset) {
@ -277,9 +281,9 @@ public class GeneralPloidyExactAFCalculationModel extends AlleleFrequencyCalcula
if (!Double.isInfinite(log10LofK)) if (!Double.isInfinite(log10LofK))
newPool.add(set); newPool.add(set);
if ( log10LofK < maxLog10L - MAX_LOG10_ERROR_TO_STOP_EARLY ) { if ( log10LofK < maxLikelihoodSeen.maxLog10L - MAX_LOG10_ERROR_TO_STOP_EARLY && maxLikelihoodSeen.isLowerAC(set.ACcounts) ) {
if ( VERBOSE ) if ( VERBOSE )
System.out.printf(" *** breaking early set=%s log10L=%.2f maxLog10L=%.2f%n", set.ACcounts, log10LofK, maxLog10L); System.out.printf(" *** breaking early set=%s log10L=%.2f maxLog10L=%.2f%n", set.ACcounts, log10LofK, maxLikelihoodSeen.maxLog10L);
return log10LofK; return log10LofK;
} }

View File

@ -22,7 +22,7 @@ import java.util.*;
* Given a variant context, uses the genotype likelihoods to assess the likelihood of the site being a mendelian violation * Given a variant context, uses the genotype likelihoods to assess the likelihood of the site being a mendelian violation
* versus the likelihood of the site transmitting according to mendelian rules. This assumes that the organism is * versus the likelihood of the site transmitting according to mendelian rules. This assumes that the organism is
* diploid. When multiple trios are present, the annotation is simply the maximum of the likelihood ratios, rather than * diploid. When multiple trios are present, the annotation is simply the maximum of the likelihood ratios, rather than
* the strict 1-(1-p_i) calculation, as this can scale poorly for uncertain sites and many trios. * the strict 1-Prod(1-p_i) calculation, as this can scale poorly for uncertain sites and many trios.
*/ */
public class MVLikelihoodRatio extends InfoFieldAnnotation implements ExperimentalAnnotation, RodRequiringAnnotation { public class MVLikelihoodRatio extends InfoFieldAnnotation implements ExperimentalAnnotation, RodRequiringAnnotation {

View File

@ -204,4 +204,28 @@ public abstract class AlleleFrequencyCalculationModel implements Cloneable {
return (obj instanceof ExactACset) && ACcounts.equals(((ExactACset)obj).ACcounts); return (obj instanceof ExactACset) && ACcounts.equals(((ExactACset)obj).ACcounts);
} }
} }
protected static final class MaxLikelihoodSeen {
double maxLog10L = Double.NEGATIVE_INFINITY;
ExactACcounts ACs = null;
public MaxLikelihoodSeen() {}
public void update(final double maxLog10L, final ExactACcounts ACs) {
this.maxLog10L = maxLog10L;
this.ACs = ACs;
}
// returns true iff all ACs in this object are less than or equal to their corresponding ACs in the provided set
public boolean isLowerAC(final ExactACcounts otherACs) {
final int[] myACcounts = this.ACs.getCounts();
final int[] otherACcounts = otherACs.getCounts();
for ( int i = 0; i < myACcounts.length; i++ ) {
if ( myACcounts[i] > otherACcounts[i] )
return false;
}
return true;
}
}
} }

View File

@ -68,7 +68,7 @@ public class ExactAFCalculationModel extends AlleleFrequencyCalculationModel {
private static final int PL_INDEX_OF_HOM_REF = 0; private static final int PL_INDEX_OF_HOM_REF = 0;
private static final List<Allele> chooseMostLikelyAlternateAlleles(VariantContext vc, int numAllelesToChoose) { private static List<Allele> chooseMostLikelyAlternateAlleles(VariantContext vc, int numAllelesToChoose) {
final int numOriginalAltAlleles = vc.getAlternateAlleles().size(); final int numOriginalAltAlleles = vc.getAlternateAlleles().size();
final LikelihoodSum[] likelihoodSums = new LikelihoodSum[numOriginalAltAlleles]; final LikelihoodSum[] likelihoodSums = new LikelihoodSum[numOriginalAltAlleles];
for ( int i = 0; i < numOriginalAltAlleles; i++ ) for ( int i = 0; i < numOriginalAltAlleles; i++ )
@ -132,14 +132,15 @@ public class ExactAFCalculationModel extends AlleleFrequencyCalculationModel {
indexesToACset.put(zeroSet.ACcounts, zeroSet); indexesToACset.put(zeroSet.ACcounts, zeroSet);
// keep processing while we have AC conformations that need to be calculated // keep processing while we have AC conformations that need to be calculated
double maxLog10L = Double.NEGATIVE_INFINITY; MaxLikelihoodSeen maxLikelihoodSeen = new MaxLikelihoodSeen();
while ( !ACqueue.isEmpty() ) { while ( !ACqueue.isEmpty() ) {
// compute log10Likelihoods // compute log10Likelihoods
final ExactACset set = ACqueue.remove(); final ExactACset set = ACqueue.remove();
final double log10LofKs = calculateAlleleCountConformation(set, genotypeLikelihoods, maxLog10L, numChr, ACqueue, indexesToACset, log10AlleleFrequencyPriors, result); final double log10LofKs = calculateAlleleCountConformation(set, genotypeLikelihoods, maxLikelihoodSeen, numChr, ACqueue, indexesToACset, log10AlleleFrequencyPriors, result);
// adjust max likelihood seen if needed // adjust max likelihood seen if needed
maxLog10L = Math.max(maxLog10L, log10LofKs); if ( log10LofKs > maxLikelihoodSeen.maxLog10L )
maxLikelihoodSeen.update(log10LofKs, set.ACcounts);
// clean up memory // clean up memory
indexesToACset.remove(set.ACcounts); indexesToACset.remove(set.ACcounts);
@ -160,7 +161,7 @@ public class ExactAFCalculationModel extends AlleleFrequencyCalculationModel {
private static double calculateAlleleCountConformation(final ExactACset set, private static double calculateAlleleCountConformation(final ExactACset set,
final ArrayList<double[]> genotypeLikelihoods, final ArrayList<double[]> genotypeLikelihoods,
final double maxLog10L, final MaxLikelihoodSeen maxLikelihoodSeen,
final int numChr, final int numChr,
final LinkedList<ExactACset> ACqueue, final LinkedList<ExactACset> ACqueue,
final HashMap<ExactACcounts, ExactACset> indexesToACset, final HashMap<ExactACcounts, ExactACset> indexesToACset,
@ -176,7 +177,7 @@ public class ExactAFCalculationModel extends AlleleFrequencyCalculationModel {
final double log10LofK = set.log10Likelihoods[set.log10Likelihoods.length-1]; final double log10LofK = set.log10Likelihoods[set.log10Likelihoods.length-1];
// can we abort early because the log10Likelihoods are so small? // can we abort early because the log10Likelihoods are so small?
if ( log10LofK < maxLog10L - MAX_LOG10_ERROR_TO_STOP_EARLY ) { if ( log10LofK < maxLikelihoodSeen.maxLog10L - MAX_LOG10_ERROR_TO_STOP_EARLY && maxLikelihoodSeen.isLowerAC(set.ACcounts) ) {
//if ( DEBUG ) //if ( DEBUG )
// System.out.printf(" *** breaking early set=%s log10L=%.2f maxLog10L=%.2f%n", set.ACcounts, log10LofK, maxLog10L); // System.out.printf(" *** breaking early set=%s log10L=%.2f maxLog10L=%.2f%n", set.ACcounts, log10LofK, maxLog10L);
return log10LofK; return log10LofK;

View File

@ -109,4 +109,19 @@ public class ExactAFCalculationModelUnitTest extends BaseTest {
int calculatedAlleleCount = result.getAlleleCountsOfMAP()[0]; int calculatedAlleleCount = result.getAlleleCountsOfMAP()[0];
Assert.assertEquals(calculatedAlleleCount, 6); Assert.assertEquals(calculatedAlleleCount, 6);
} }
@Test
public void testMismatchedGLs() {
final double[] AB = new double[]{-2000.0, 0.0, -2000.0, -2000.0, -2000.0, -2000.0};
final double[] AC = new double[]{-100.0, -100.0, -100.0, 0.0, -100.0, -100.0};
GetGLsTest cfg = new GetGLsTest("B1C1", 2, createGenotype("1", AC), createGenotype("2", AB));
final AlleleFrequencyCalculationResult result = new AlleleFrequencyCalculationResult(2);
ExactAFCalculationModel.linearExactMultiAllelic(cfg.GLs, cfg.numAltAlleles, priors, result);
Assert.assertEquals(result.getAlleleCountsOfMAP()[0], 1);
Assert.assertEquals(result.getAlleleCountsOfMAP()[1], 1);
}
} }

View File

@ -60,7 +60,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest {
public void testMultipleSNPAlleles() { public void testMultipleSNPAlleles() {
WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec( WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(
"-T UnifiedGenotyper -R " + b37KGReference + " -nosl --no_cmdline_in_header -glm BOTH --dbsnp " + b37dbSNP129 + " -I " + privateTestDir + "multiallelic.snps.bam -o %s -L " + privateTestDir + "multiallelic.snps.intervals", 1, "-T UnifiedGenotyper -R " + b37KGReference + " -nosl --no_cmdline_in_header -glm BOTH --dbsnp " + b37dbSNP129 + " -I " + privateTestDir + "multiallelic.snps.bam -o %s -L " + privateTestDir + "multiallelic.snps.intervals", 1,
Arrays.asList("8472b1ad2fe1060e732da9e29d10cf99")); Arrays.asList("cceb34ffbd2dbc45b8821f86ea255284"));
executeTest("test Multiple SNP alleles", spec); executeTest("test Multiple SNP alleles", spec);
} }
@ -76,10 +76,18 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest {
public void testReverseTrim() { public void testReverseTrim() {
WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec( WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(
"-T UnifiedGenotyper -R " + b37KGReference + " -nosl --no_cmdline_in_header -glm INDEL -I " + validationDataLocation + "CEUTrio.HiSeq.b37.chr20.10_11mb.bam -o %s -L 20:10289124 -L 20:10090289", 1, "-T UnifiedGenotyper -R " + b37KGReference + " -nosl --no_cmdline_in_header -glm INDEL -I " + validationDataLocation + "CEUTrio.HiSeq.b37.chr20.10_11mb.bam -o %s -L 20:10289124 -L 20:10090289", 1,
Arrays.asList("8a4ad38ec8015eea3461295148143428")); Arrays.asList("00f54a0097e710c0f7b001444c237e32"));
executeTest("test reverse trim", spec); executeTest("test reverse trim", spec);
} }
@Test
public void testMismatchedPLs() {
WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(
"-T UnifiedGenotyper -R " + b37KGReference + " -nosl --no_cmdline_in_header -glm INDEL -I " + privateTestDir + "mismatchedPLs.bam -o %s -L 1:24020341", 1,
Arrays.asList("b3fae6bf4c620458f4259dbc93125e37"));
executeTest("test mismatched PLs", spec);
}
// -------------------------------------------------------------------------------------------------------------- // --------------------------------------------------------------------------------------------------------------
// //
// testing compressed output // testing compressed output
@ -335,7 +343,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest {
public void testMultiSampleIndels1() { public void testMultiSampleIndels1() {
WalkerTest.WalkerTestSpec spec1 = new WalkerTest.WalkerTestSpec( WalkerTest.WalkerTestSpec spec1 = new WalkerTest.WalkerTestSpec(
baseCommandIndels + " -I " + validationDataLocation + "low_coverage_CEU.chr1.10k-11k.bam -o %s -L 1:10450700-10551000", 1, baseCommandIndels + " -I " + validationDataLocation + "low_coverage_CEU.chr1.10k-11k.bam -o %s -L 1:10450700-10551000", 1,
Arrays.asList("c3f786a5228346b43a80aa80d22b1490")); Arrays.asList("af04b81f0548ca22b8d1f6bf223b336e"));
List<File> result = executeTest("test MultiSample Pilot1 CEU indels", spec1).getFirst(); List<File> result = executeTest("test MultiSample Pilot1 CEU indels", spec1).getFirst();
WalkerTest.WalkerTestSpec spec2 = new WalkerTest.WalkerTestSpec( WalkerTest.WalkerTestSpec spec2 = new WalkerTest.WalkerTestSpec(