Merge branch 'master' of gsa2:/humgen/gsa-scr1/chartl/dev/unstable
This commit is contained in:
commit
c492185be6
|
|
@ -224,12 +224,16 @@ public class GeneralPloidyExactAFCalculationModel extends AlleleFrequencyCalcula
|
||||||
indexesToACset.put(zeroSet.ACcounts, zeroSet);
|
indexesToACset.put(zeroSet.ACcounts, zeroSet);
|
||||||
|
|
||||||
// keep processing while we have AC conformations that need to be calculated
|
// keep processing while we have AC conformations that need to be calculated
|
||||||
double maxLog10L = Double.NEGATIVE_INFINITY;
|
MaxLikelihoodSeen maxLikelihoodSeen = new MaxLikelihoodSeen();
|
||||||
while ( !ACqueue.isEmpty() ) {
|
while ( !ACqueue.isEmpty() ) {
|
||||||
// compute log10Likelihoods
|
// compute log10Likelihoods
|
||||||
final ExactACset ACset = ACqueue.remove();
|
final ExactACset ACset = ACqueue.remove();
|
||||||
final double log10LofKs = calculateACConformationAndUpdateQueue(ACset, newPool, originalPool, newGL, log10AlleleFrequencyPriors, originalPloidy, newGLPloidy, result, maxLog10L, ACqueue, indexesToACset);
|
final double log10LofKs = calculateACConformationAndUpdateQueue(ACset, newPool, originalPool, newGL, log10AlleleFrequencyPriors, originalPloidy, newGLPloidy, result, maxLikelihoodSeen, ACqueue, indexesToACset);
|
||||||
maxLog10L = Math.max(maxLog10L, log10LofKs);
|
|
||||||
|
// adjust max likelihood seen if needed
|
||||||
|
if ( log10LofKs > maxLikelihoodSeen.maxLog10L )
|
||||||
|
maxLikelihoodSeen.update(log10LofKs, ACset.ACcounts);
|
||||||
|
|
||||||
// clean up memory
|
// clean up memory
|
||||||
indexesToACset.remove(ACset.ACcounts);
|
indexesToACset.remove(ACset.ACcounts);
|
||||||
if ( VERBOSE )
|
if ( VERBOSE )
|
||||||
|
|
@ -250,7 +254,7 @@ public class GeneralPloidyExactAFCalculationModel extends AlleleFrequencyCalcula
|
||||||
* @param originalPloidy Total ploidy of original combined pool
|
* @param originalPloidy Total ploidy of original combined pool
|
||||||
* @param newGLPloidy Ploidy of GL vector
|
* @param newGLPloidy Ploidy of GL vector
|
||||||
* @param result AFResult object
|
* @param result AFResult object
|
||||||
* @param maxLog10L max likelihood observed so far
|
* @param maxLikelihoodSeen max likelihood observed so far
|
||||||
* @param ACqueue Queue of conformations to compute
|
* @param ACqueue Queue of conformations to compute
|
||||||
* @param indexesToACset AC indices of objects in queue
|
* @param indexesToACset AC indices of objects in queue
|
||||||
* @return max log likelihood
|
* @return max log likelihood
|
||||||
|
|
@ -263,7 +267,7 @@ public class GeneralPloidyExactAFCalculationModel extends AlleleFrequencyCalcula
|
||||||
final int originalPloidy,
|
final int originalPloidy,
|
||||||
final int newGLPloidy,
|
final int newGLPloidy,
|
||||||
final AlleleFrequencyCalculationResult result,
|
final AlleleFrequencyCalculationResult result,
|
||||||
final double maxLog10L,
|
final MaxLikelihoodSeen maxLikelihoodSeen,
|
||||||
final LinkedList<ExactACset> ACqueue,
|
final LinkedList<ExactACset> ACqueue,
|
||||||
final HashMap<ExactACcounts, ExactACset> indexesToACset) {
|
final HashMap<ExactACcounts, ExactACset> indexesToACset) {
|
||||||
|
|
||||||
|
|
@ -277,9 +281,9 @@ public class GeneralPloidyExactAFCalculationModel extends AlleleFrequencyCalcula
|
||||||
if (!Double.isInfinite(log10LofK))
|
if (!Double.isInfinite(log10LofK))
|
||||||
newPool.add(set);
|
newPool.add(set);
|
||||||
|
|
||||||
if ( log10LofK < maxLog10L - MAX_LOG10_ERROR_TO_STOP_EARLY ) {
|
if ( log10LofK < maxLikelihoodSeen.maxLog10L - MAX_LOG10_ERROR_TO_STOP_EARLY && maxLikelihoodSeen.isLowerAC(set.ACcounts) ) {
|
||||||
if ( VERBOSE )
|
if ( VERBOSE )
|
||||||
System.out.printf(" *** breaking early set=%s log10L=%.2f maxLog10L=%.2f%n", set.ACcounts, log10LofK, maxLog10L);
|
System.out.printf(" *** breaking early set=%s log10L=%.2f maxLog10L=%.2f%n", set.ACcounts, log10LofK, maxLikelihoodSeen.maxLog10L);
|
||||||
return log10LofK;
|
return log10LofK;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -22,7 +22,7 @@ import java.util.*;
|
||||||
* Given a variant context, uses the genotype likelihoods to assess the likelihood of the site being a mendelian violation
|
* Given a variant context, uses the genotype likelihoods to assess the likelihood of the site being a mendelian violation
|
||||||
* versus the likelihood of the site transmitting according to mendelian rules. This assumes that the organism is
|
* versus the likelihood of the site transmitting according to mendelian rules. This assumes that the organism is
|
||||||
* diploid. When multiple trios are present, the annotation is simply the maximum of the likelihood ratios, rather than
|
* diploid. When multiple trios are present, the annotation is simply the maximum of the likelihood ratios, rather than
|
||||||
* the strict 1-∏(1-p_i) calculation, as this can scale poorly for uncertain sites and many trios.
|
* the strict 1-Prod(1-p_i) calculation, as this can scale poorly for uncertain sites and many trios.
|
||||||
*/
|
*/
|
||||||
|
|
||||||
public class MVLikelihoodRatio extends InfoFieldAnnotation implements ExperimentalAnnotation, RodRequiringAnnotation {
|
public class MVLikelihoodRatio extends InfoFieldAnnotation implements ExperimentalAnnotation, RodRequiringAnnotation {
|
||||||
|
|
|
||||||
|
|
@ -204,4 +204,28 @@ public abstract class AlleleFrequencyCalculationModel implements Cloneable {
|
||||||
return (obj instanceof ExactACset) && ACcounts.equals(((ExactACset)obj).ACcounts);
|
return (obj instanceof ExactACset) && ACcounts.equals(((ExactACset)obj).ACcounts);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
protected static final class MaxLikelihoodSeen {
|
||||||
|
double maxLog10L = Double.NEGATIVE_INFINITY;
|
||||||
|
ExactACcounts ACs = null;
|
||||||
|
|
||||||
|
public MaxLikelihoodSeen() {}
|
||||||
|
|
||||||
|
public void update(final double maxLog10L, final ExactACcounts ACs) {
|
||||||
|
this.maxLog10L = maxLog10L;
|
||||||
|
this.ACs = ACs;
|
||||||
|
}
|
||||||
|
|
||||||
|
// returns true iff all ACs in this object are less than or equal to their corresponding ACs in the provided set
|
||||||
|
public boolean isLowerAC(final ExactACcounts otherACs) {
|
||||||
|
final int[] myACcounts = this.ACs.getCounts();
|
||||||
|
final int[] otherACcounts = otherACs.getCounts();
|
||||||
|
|
||||||
|
for ( int i = 0; i < myACcounts.length; i++ ) {
|
||||||
|
if ( myACcounts[i] > otherACcounts[i] )
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
@ -68,7 +68,7 @@ public class ExactAFCalculationModel extends AlleleFrequencyCalculationModel {
|
||||||
|
|
||||||
|
|
||||||
private static final int PL_INDEX_OF_HOM_REF = 0;
|
private static final int PL_INDEX_OF_HOM_REF = 0;
|
||||||
private static final List<Allele> chooseMostLikelyAlternateAlleles(VariantContext vc, int numAllelesToChoose) {
|
private static List<Allele> chooseMostLikelyAlternateAlleles(VariantContext vc, int numAllelesToChoose) {
|
||||||
final int numOriginalAltAlleles = vc.getAlternateAlleles().size();
|
final int numOriginalAltAlleles = vc.getAlternateAlleles().size();
|
||||||
final LikelihoodSum[] likelihoodSums = new LikelihoodSum[numOriginalAltAlleles];
|
final LikelihoodSum[] likelihoodSums = new LikelihoodSum[numOriginalAltAlleles];
|
||||||
for ( int i = 0; i < numOriginalAltAlleles; i++ )
|
for ( int i = 0; i < numOriginalAltAlleles; i++ )
|
||||||
|
|
@ -132,14 +132,15 @@ public class ExactAFCalculationModel extends AlleleFrequencyCalculationModel {
|
||||||
indexesToACset.put(zeroSet.ACcounts, zeroSet);
|
indexesToACset.put(zeroSet.ACcounts, zeroSet);
|
||||||
|
|
||||||
// keep processing while we have AC conformations that need to be calculated
|
// keep processing while we have AC conformations that need to be calculated
|
||||||
double maxLog10L = Double.NEGATIVE_INFINITY;
|
MaxLikelihoodSeen maxLikelihoodSeen = new MaxLikelihoodSeen();
|
||||||
while ( !ACqueue.isEmpty() ) {
|
while ( !ACqueue.isEmpty() ) {
|
||||||
// compute log10Likelihoods
|
// compute log10Likelihoods
|
||||||
final ExactACset set = ACqueue.remove();
|
final ExactACset set = ACqueue.remove();
|
||||||
final double log10LofKs = calculateAlleleCountConformation(set, genotypeLikelihoods, maxLog10L, numChr, ACqueue, indexesToACset, log10AlleleFrequencyPriors, result);
|
final double log10LofKs = calculateAlleleCountConformation(set, genotypeLikelihoods, maxLikelihoodSeen, numChr, ACqueue, indexesToACset, log10AlleleFrequencyPriors, result);
|
||||||
|
|
||||||
// adjust max likelihood seen if needed
|
// adjust max likelihood seen if needed
|
||||||
maxLog10L = Math.max(maxLog10L, log10LofKs);
|
if ( log10LofKs > maxLikelihoodSeen.maxLog10L )
|
||||||
|
maxLikelihoodSeen.update(log10LofKs, set.ACcounts);
|
||||||
|
|
||||||
// clean up memory
|
// clean up memory
|
||||||
indexesToACset.remove(set.ACcounts);
|
indexesToACset.remove(set.ACcounts);
|
||||||
|
|
@ -160,7 +161,7 @@ public class ExactAFCalculationModel extends AlleleFrequencyCalculationModel {
|
||||||
|
|
||||||
private static double calculateAlleleCountConformation(final ExactACset set,
|
private static double calculateAlleleCountConformation(final ExactACset set,
|
||||||
final ArrayList<double[]> genotypeLikelihoods,
|
final ArrayList<double[]> genotypeLikelihoods,
|
||||||
final double maxLog10L,
|
final MaxLikelihoodSeen maxLikelihoodSeen,
|
||||||
final int numChr,
|
final int numChr,
|
||||||
final LinkedList<ExactACset> ACqueue,
|
final LinkedList<ExactACset> ACqueue,
|
||||||
final HashMap<ExactACcounts, ExactACset> indexesToACset,
|
final HashMap<ExactACcounts, ExactACset> indexesToACset,
|
||||||
|
|
@ -176,7 +177,7 @@ public class ExactAFCalculationModel extends AlleleFrequencyCalculationModel {
|
||||||
final double log10LofK = set.log10Likelihoods[set.log10Likelihoods.length-1];
|
final double log10LofK = set.log10Likelihoods[set.log10Likelihoods.length-1];
|
||||||
|
|
||||||
// can we abort early because the log10Likelihoods are so small?
|
// can we abort early because the log10Likelihoods are so small?
|
||||||
if ( log10LofK < maxLog10L - MAX_LOG10_ERROR_TO_STOP_EARLY ) {
|
if ( log10LofK < maxLikelihoodSeen.maxLog10L - MAX_LOG10_ERROR_TO_STOP_EARLY && maxLikelihoodSeen.isLowerAC(set.ACcounts) ) {
|
||||||
//if ( DEBUG )
|
//if ( DEBUG )
|
||||||
// System.out.printf(" *** breaking early set=%s log10L=%.2f maxLog10L=%.2f%n", set.ACcounts, log10LofK, maxLog10L);
|
// System.out.printf(" *** breaking early set=%s log10L=%.2f maxLog10L=%.2f%n", set.ACcounts, log10LofK, maxLog10L);
|
||||||
return log10LofK;
|
return log10LofK;
|
||||||
|
|
|
||||||
|
|
@ -109,4 +109,19 @@ public class ExactAFCalculationModelUnitTest extends BaseTest {
|
||||||
int calculatedAlleleCount = result.getAlleleCountsOfMAP()[0];
|
int calculatedAlleleCount = result.getAlleleCountsOfMAP()[0];
|
||||||
Assert.assertEquals(calculatedAlleleCount, 6);
|
Assert.assertEquals(calculatedAlleleCount, 6);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void testMismatchedGLs() {
|
||||||
|
|
||||||
|
final double[] AB = new double[]{-2000.0, 0.0, -2000.0, -2000.0, -2000.0, -2000.0};
|
||||||
|
final double[] AC = new double[]{-100.0, -100.0, -100.0, 0.0, -100.0, -100.0};
|
||||||
|
GetGLsTest cfg = new GetGLsTest("B1C1", 2, createGenotype("1", AC), createGenotype("2", AB));
|
||||||
|
|
||||||
|
final AlleleFrequencyCalculationResult result = new AlleleFrequencyCalculationResult(2);
|
||||||
|
|
||||||
|
ExactAFCalculationModel.linearExactMultiAllelic(cfg.GLs, cfg.numAltAlleles, priors, result);
|
||||||
|
|
||||||
|
Assert.assertEquals(result.getAlleleCountsOfMAP()[0], 1);
|
||||||
|
Assert.assertEquals(result.getAlleleCountsOfMAP()[1], 1);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -60,7 +60,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest {
|
||||||
public void testMultipleSNPAlleles() {
|
public void testMultipleSNPAlleles() {
|
||||||
WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(
|
WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(
|
||||||
"-T UnifiedGenotyper -R " + b37KGReference + " -nosl --no_cmdline_in_header -glm BOTH --dbsnp " + b37dbSNP129 + " -I " + privateTestDir + "multiallelic.snps.bam -o %s -L " + privateTestDir + "multiallelic.snps.intervals", 1,
|
"-T UnifiedGenotyper -R " + b37KGReference + " -nosl --no_cmdline_in_header -glm BOTH --dbsnp " + b37dbSNP129 + " -I " + privateTestDir + "multiallelic.snps.bam -o %s -L " + privateTestDir + "multiallelic.snps.intervals", 1,
|
||||||
Arrays.asList("8472b1ad2fe1060e732da9e29d10cf99"));
|
Arrays.asList("cceb34ffbd2dbc45b8821f86ea255284"));
|
||||||
executeTest("test Multiple SNP alleles", spec);
|
executeTest("test Multiple SNP alleles", spec);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
@ -76,10 +76,18 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest {
|
||||||
public void testReverseTrim() {
|
public void testReverseTrim() {
|
||||||
WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(
|
WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(
|
||||||
"-T UnifiedGenotyper -R " + b37KGReference + " -nosl --no_cmdline_in_header -glm INDEL -I " + validationDataLocation + "CEUTrio.HiSeq.b37.chr20.10_11mb.bam -o %s -L 20:10289124 -L 20:10090289", 1,
|
"-T UnifiedGenotyper -R " + b37KGReference + " -nosl --no_cmdline_in_header -glm INDEL -I " + validationDataLocation + "CEUTrio.HiSeq.b37.chr20.10_11mb.bam -o %s -L 20:10289124 -L 20:10090289", 1,
|
||||||
Arrays.asList("8a4ad38ec8015eea3461295148143428"));
|
Arrays.asList("00f54a0097e710c0f7b001444c237e32"));
|
||||||
executeTest("test reverse trim", spec);
|
executeTest("test reverse trim", spec);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void testMismatchedPLs() {
|
||||||
|
WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(
|
||||||
|
"-T UnifiedGenotyper -R " + b37KGReference + " -nosl --no_cmdline_in_header -glm INDEL -I " + privateTestDir + "mismatchedPLs.bam -o %s -L 1:24020341", 1,
|
||||||
|
Arrays.asList("b3fae6bf4c620458f4259dbc93125e37"));
|
||||||
|
executeTest("test mismatched PLs", spec);
|
||||||
|
}
|
||||||
|
|
||||||
// --------------------------------------------------------------------------------------------------------------
|
// --------------------------------------------------------------------------------------------------------------
|
||||||
//
|
//
|
||||||
// testing compressed output
|
// testing compressed output
|
||||||
|
|
@ -335,7 +343,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest {
|
||||||
public void testMultiSampleIndels1() {
|
public void testMultiSampleIndels1() {
|
||||||
WalkerTest.WalkerTestSpec spec1 = new WalkerTest.WalkerTestSpec(
|
WalkerTest.WalkerTestSpec spec1 = new WalkerTest.WalkerTestSpec(
|
||||||
baseCommandIndels + " -I " + validationDataLocation + "low_coverage_CEU.chr1.10k-11k.bam -o %s -L 1:10450700-10551000", 1,
|
baseCommandIndels + " -I " + validationDataLocation + "low_coverage_CEU.chr1.10k-11k.bam -o %s -L 1:10450700-10551000", 1,
|
||||||
Arrays.asList("c3f786a5228346b43a80aa80d22b1490"));
|
Arrays.asList("af04b81f0548ca22b8d1f6bf223b336e"));
|
||||||
List<File> result = executeTest("test MultiSample Pilot1 CEU indels", spec1).getFirst();
|
List<File> result = executeTest("test MultiSample Pilot1 CEU indels", spec1).getFirst();
|
||||||
|
|
||||||
WalkerTest.WalkerTestSpec spec2 = new WalkerTest.WalkerTestSpec(
|
WalkerTest.WalkerTestSpec spec2 = new WalkerTest.WalkerTestSpec(
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue