Merge branch 'master' of gsa2:/humgen/gsa-scr1/chartl/dev/unstable
This commit is contained in:
commit
c492185be6
|
|
@ -224,12 +224,16 @@ public class GeneralPloidyExactAFCalculationModel extends AlleleFrequencyCalcula
|
|||
indexesToACset.put(zeroSet.ACcounts, zeroSet);
|
||||
|
||||
// keep processing while we have AC conformations that need to be calculated
|
||||
double maxLog10L = Double.NEGATIVE_INFINITY;
|
||||
MaxLikelihoodSeen maxLikelihoodSeen = new MaxLikelihoodSeen();
|
||||
while ( !ACqueue.isEmpty() ) {
|
||||
// compute log10Likelihoods
|
||||
final ExactACset ACset = ACqueue.remove();
|
||||
final double log10LofKs = calculateACConformationAndUpdateQueue(ACset, newPool, originalPool, newGL, log10AlleleFrequencyPriors, originalPloidy, newGLPloidy, result, maxLog10L, ACqueue, indexesToACset);
|
||||
maxLog10L = Math.max(maxLog10L, log10LofKs);
|
||||
final double log10LofKs = calculateACConformationAndUpdateQueue(ACset, newPool, originalPool, newGL, log10AlleleFrequencyPriors, originalPloidy, newGLPloidy, result, maxLikelihoodSeen, ACqueue, indexesToACset);
|
||||
|
||||
// adjust max likelihood seen if needed
|
||||
if ( log10LofKs > maxLikelihoodSeen.maxLog10L )
|
||||
maxLikelihoodSeen.update(log10LofKs, ACset.ACcounts);
|
||||
|
||||
// clean up memory
|
||||
indexesToACset.remove(ACset.ACcounts);
|
||||
if ( VERBOSE )
|
||||
|
|
@ -250,7 +254,7 @@ public class GeneralPloidyExactAFCalculationModel extends AlleleFrequencyCalcula
|
|||
* @param originalPloidy Total ploidy of original combined pool
|
||||
* @param newGLPloidy Ploidy of GL vector
|
||||
* @param result AFResult object
|
||||
* @param maxLog10L max likelihood observed so far
|
||||
* @param maxLikelihoodSeen max likelihood observed so far
|
||||
* @param ACqueue Queue of conformations to compute
|
||||
* @param indexesToACset AC indices of objects in queue
|
||||
* @return max log likelihood
|
||||
|
|
@ -263,7 +267,7 @@ public class GeneralPloidyExactAFCalculationModel extends AlleleFrequencyCalcula
|
|||
final int originalPloidy,
|
||||
final int newGLPloidy,
|
||||
final AlleleFrequencyCalculationResult result,
|
||||
final double maxLog10L,
|
||||
final MaxLikelihoodSeen maxLikelihoodSeen,
|
||||
final LinkedList<ExactACset> ACqueue,
|
||||
final HashMap<ExactACcounts, ExactACset> indexesToACset) {
|
||||
|
||||
|
|
@ -277,9 +281,9 @@ public class GeneralPloidyExactAFCalculationModel extends AlleleFrequencyCalcula
|
|||
if (!Double.isInfinite(log10LofK))
|
||||
newPool.add(set);
|
||||
|
||||
if ( log10LofK < maxLog10L - MAX_LOG10_ERROR_TO_STOP_EARLY ) {
|
||||
if ( log10LofK < maxLikelihoodSeen.maxLog10L - MAX_LOG10_ERROR_TO_STOP_EARLY && maxLikelihoodSeen.isLowerAC(set.ACcounts) ) {
|
||||
if ( VERBOSE )
|
||||
System.out.printf(" *** breaking early set=%s log10L=%.2f maxLog10L=%.2f%n", set.ACcounts, log10LofK, maxLog10L);
|
||||
System.out.printf(" *** breaking early set=%s log10L=%.2f maxLog10L=%.2f%n", set.ACcounts, log10LofK, maxLikelihoodSeen.maxLog10L);
|
||||
return log10LofK;
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -22,7 +22,7 @@ import java.util.*;
|
|||
* Given a variant context, uses the genotype likelihoods to assess the likelihood of the site being a mendelian violation
|
||||
* versus the likelihood of the site transmitting according to mendelian rules. This assumes that the organism is
|
||||
* diploid. When multiple trios are present, the annotation is simply the maximum of the likelihood ratios, rather than
|
||||
* the strict 1-∏(1-p_i) calculation, as this can scale poorly for uncertain sites and many trios.
|
||||
* the strict 1-Prod(1-p_i) calculation, as this can scale poorly for uncertain sites and many trios.
|
||||
*/
|
||||
|
||||
public class MVLikelihoodRatio extends InfoFieldAnnotation implements ExperimentalAnnotation, RodRequiringAnnotation {
|
||||
|
|
|
|||
|
|
@ -204,4 +204,28 @@ public abstract class AlleleFrequencyCalculationModel implements Cloneable {
|
|||
return (obj instanceof ExactACset) && ACcounts.equals(((ExactACset)obj).ACcounts);
|
||||
}
|
||||
}
|
||||
|
||||
protected static final class MaxLikelihoodSeen {
|
||||
double maxLog10L = Double.NEGATIVE_INFINITY;
|
||||
ExactACcounts ACs = null;
|
||||
|
||||
public MaxLikelihoodSeen() {}
|
||||
|
||||
public void update(final double maxLog10L, final ExactACcounts ACs) {
|
||||
this.maxLog10L = maxLog10L;
|
||||
this.ACs = ACs;
|
||||
}
|
||||
|
||||
// returns true iff all ACs in this object are less than or equal to their corresponding ACs in the provided set
|
||||
public boolean isLowerAC(final ExactACcounts otherACs) {
|
||||
final int[] myACcounts = this.ACs.getCounts();
|
||||
final int[] otherACcounts = otherACs.getCounts();
|
||||
|
||||
for ( int i = 0; i < myACcounts.length; i++ ) {
|
||||
if ( myACcounts[i] > otherACcounts[i] )
|
||||
return false;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
@ -68,7 +68,7 @@ public class ExactAFCalculationModel extends AlleleFrequencyCalculationModel {
|
|||
|
||||
|
||||
private static final int PL_INDEX_OF_HOM_REF = 0;
|
||||
private static final List<Allele> chooseMostLikelyAlternateAlleles(VariantContext vc, int numAllelesToChoose) {
|
||||
private static List<Allele> chooseMostLikelyAlternateAlleles(VariantContext vc, int numAllelesToChoose) {
|
||||
final int numOriginalAltAlleles = vc.getAlternateAlleles().size();
|
||||
final LikelihoodSum[] likelihoodSums = new LikelihoodSum[numOriginalAltAlleles];
|
||||
for ( int i = 0; i < numOriginalAltAlleles; i++ )
|
||||
|
|
@ -132,14 +132,15 @@ public class ExactAFCalculationModel extends AlleleFrequencyCalculationModel {
|
|||
indexesToACset.put(zeroSet.ACcounts, zeroSet);
|
||||
|
||||
// keep processing while we have AC conformations that need to be calculated
|
||||
double maxLog10L = Double.NEGATIVE_INFINITY;
|
||||
MaxLikelihoodSeen maxLikelihoodSeen = new MaxLikelihoodSeen();
|
||||
while ( !ACqueue.isEmpty() ) {
|
||||
// compute log10Likelihoods
|
||||
final ExactACset set = ACqueue.remove();
|
||||
final double log10LofKs = calculateAlleleCountConformation(set, genotypeLikelihoods, maxLog10L, numChr, ACqueue, indexesToACset, log10AlleleFrequencyPriors, result);
|
||||
final double log10LofKs = calculateAlleleCountConformation(set, genotypeLikelihoods, maxLikelihoodSeen, numChr, ACqueue, indexesToACset, log10AlleleFrequencyPriors, result);
|
||||
|
||||
// adjust max likelihood seen if needed
|
||||
maxLog10L = Math.max(maxLog10L, log10LofKs);
|
||||
if ( log10LofKs > maxLikelihoodSeen.maxLog10L )
|
||||
maxLikelihoodSeen.update(log10LofKs, set.ACcounts);
|
||||
|
||||
// clean up memory
|
||||
indexesToACset.remove(set.ACcounts);
|
||||
|
|
@ -160,7 +161,7 @@ public class ExactAFCalculationModel extends AlleleFrequencyCalculationModel {
|
|||
|
||||
private static double calculateAlleleCountConformation(final ExactACset set,
|
||||
final ArrayList<double[]> genotypeLikelihoods,
|
||||
final double maxLog10L,
|
||||
final MaxLikelihoodSeen maxLikelihoodSeen,
|
||||
final int numChr,
|
||||
final LinkedList<ExactACset> ACqueue,
|
||||
final HashMap<ExactACcounts, ExactACset> indexesToACset,
|
||||
|
|
@ -176,7 +177,7 @@ public class ExactAFCalculationModel extends AlleleFrequencyCalculationModel {
|
|||
final double log10LofK = set.log10Likelihoods[set.log10Likelihoods.length-1];
|
||||
|
||||
// can we abort early because the log10Likelihoods are so small?
|
||||
if ( log10LofK < maxLog10L - MAX_LOG10_ERROR_TO_STOP_EARLY ) {
|
||||
if ( log10LofK < maxLikelihoodSeen.maxLog10L - MAX_LOG10_ERROR_TO_STOP_EARLY && maxLikelihoodSeen.isLowerAC(set.ACcounts) ) {
|
||||
//if ( DEBUG )
|
||||
// System.out.printf(" *** breaking early set=%s log10L=%.2f maxLog10L=%.2f%n", set.ACcounts, log10LofK, maxLog10L);
|
||||
return log10LofK;
|
||||
|
|
|
|||
|
|
@ -109,4 +109,19 @@ public class ExactAFCalculationModelUnitTest extends BaseTest {
|
|||
int calculatedAlleleCount = result.getAlleleCountsOfMAP()[0];
|
||||
Assert.assertEquals(calculatedAlleleCount, 6);
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testMismatchedGLs() {
|
||||
|
||||
final double[] AB = new double[]{-2000.0, 0.0, -2000.0, -2000.0, -2000.0, -2000.0};
|
||||
final double[] AC = new double[]{-100.0, -100.0, -100.0, 0.0, -100.0, -100.0};
|
||||
GetGLsTest cfg = new GetGLsTest("B1C1", 2, createGenotype("1", AC), createGenotype("2", AB));
|
||||
|
||||
final AlleleFrequencyCalculationResult result = new AlleleFrequencyCalculationResult(2);
|
||||
|
||||
ExactAFCalculationModel.linearExactMultiAllelic(cfg.GLs, cfg.numAltAlleles, priors, result);
|
||||
|
||||
Assert.assertEquals(result.getAlleleCountsOfMAP()[0], 1);
|
||||
Assert.assertEquals(result.getAlleleCountsOfMAP()[1], 1);
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -60,7 +60,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest {
|
|||
public void testMultipleSNPAlleles() {
|
||||
WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(
|
||||
"-T UnifiedGenotyper -R " + b37KGReference + " -nosl --no_cmdline_in_header -glm BOTH --dbsnp " + b37dbSNP129 + " -I " + privateTestDir + "multiallelic.snps.bam -o %s -L " + privateTestDir + "multiallelic.snps.intervals", 1,
|
||||
Arrays.asList("8472b1ad2fe1060e732da9e29d10cf99"));
|
||||
Arrays.asList("cceb34ffbd2dbc45b8821f86ea255284"));
|
||||
executeTest("test Multiple SNP alleles", spec);
|
||||
}
|
||||
|
||||
|
|
@ -76,10 +76,18 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest {
|
|||
public void testReverseTrim() {
|
||||
WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(
|
||||
"-T UnifiedGenotyper -R " + b37KGReference + " -nosl --no_cmdline_in_header -glm INDEL -I " + validationDataLocation + "CEUTrio.HiSeq.b37.chr20.10_11mb.bam -o %s -L 20:10289124 -L 20:10090289", 1,
|
||||
Arrays.asList("8a4ad38ec8015eea3461295148143428"));
|
||||
Arrays.asList("00f54a0097e710c0f7b001444c237e32"));
|
||||
executeTest("test reverse trim", spec);
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testMismatchedPLs() {
|
||||
WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(
|
||||
"-T UnifiedGenotyper -R " + b37KGReference + " -nosl --no_cmdline_in_header -glm INDEL -I " + privateTestDir + "mismatchedPLs.bam -o %s -L 1:24020341", 1,
|
||||
Arrays.asList("b3fae6bf4c620458f4259dbc93125e37"));
|
||||
executeTest("test mismatched PLs", spec);
|
||||
}
|
||||
|
||||
// --------------------------------------------------------------------------------------------------------------
|
||||
//
|
||||
// testing compressed output
|
||||
|
|
@ -335,7 +343,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest {
|
|||
public void testMultiSampleIndels1() {
|
||||
WalkerTest.WalkerTestSpec spec1 = new WalkerTest.WalkerTestSpec(
|
||||
baseCommandIndels + " -I " + validationDataLocation + "low_coverage_CEU.chr1.10k-11k.bam -o %s -L 1:10450700-10551000", 1,
|
||||
Arrays.asList("c3f786a5228346b43a80aa80d22b1490"));
|
||||
Arrays.asList("af04b81f0548ca22b8d1f6bf223b336e"));
|
||||
List<File> result = executeTest("test MultiSample Pilot1 CEU indels", spec1).getFirst();
|
||||
|
||||
WalkerTest.WalkerTestSpec spec2 = new WalkerTest.WalkerTestSpec(
|
||||
|
|
|
|||
Loading…
Reference in New Issue