Several bug fixes to new genotyping strategy. Update integration tests for multi-allelic indels accordingly.

This commit is contained in:
Eric Banks 2012-02-09 16:14:22 -05:00
parent 0f728a0604
commit 7a937dd1eb
3 changed files with 23 additions and 15 deletions

View File

@ -47,7 +47,7 @@ public class ExactAFCalculationModel extends AlleleFrequencyCalculationModel {
final double[][] log10AlleleFrequencyPriors,
final AlleleFrequencyCalculationResult result) {
final GenotypesContext GLs = vc.getGenotypes();
GenotypesContext GLs = vc.getGenotypes();
List<Allele> alleles = vc.getAlleles();
// don't try to genotype too many alternate alleles
@ -58,7 +58,7 @@ public class ExactAFCalculationModel extends AlleleFrequencyCalculationModel {
alleles.add(vc.getReference());
for ( int i = 0; i < MAX_ALTERNATE_ALLELES_TO_GENOTYPE; i++ )
alleles.add(vc.getAlternateAllele(i));
UnifiedGenotyperEngine.subsetAlleles(vc, alleles, false);
GLs = UnifiedGenotyperEngine.subsetAlleles(vc, alleles, false);
}
//linearExact(GLs, log10AlleleFrequencyPriors[0], log10AlleleFrequencyLikelihoods, log10AlleleFrequencyPosteriors);

View File

@ -795,6 +795,10 @@ public class UnifiedGenotyperEngine {
// then we can keep the PLs as is; otherwise, we determine which ones to keep
if ( numNewAltAlleles != numOriginalAltAlleles && numNewAltAlleles > 0 ) {
likelihoodIndexesToUse = new ArrayList<Integer>(30);
// make sure that we've cached enough data
if ( numOriginalAltAlleles > PLIndexToAlleleIndex.length - 1 )
calculatePLcache(numOriginalAltAlleles);
final int[][] PLcache = PLIndexToAlleleIndex[numOriginalAltAlleles];
final boolean[] altAlleleIndexToUse = new boolean[numOriginalAltAlleles];
@ -834,20 +838,29 @@ public class UnifiedGenotyperEngine {
newLikelihoods = MathUtils.normalizeFromLog10(newLikelihoods, false, true);
}
// if there is no mass on the (new) likelihoods or we weren't asked to assign a genotype, then just no-call the sample
if ( !assignGenotypes || MathUtils.sum(newLikelihoods) > SUM_GL_THRESH_NOCALL ) {
// if there is no mass on the (new) likelihoods, then just no-call the sample
if ( MathUtils.sum(newLikelihoods) > SUM_GL_THRESH_NOCALL ) {
newGTs.add(new Genotype(g.getSampleName(), NO_CALL_ALLELES, Genotype.NO_LOG10_PERROR, null, null, false));
continue;
}
else {
Map<String, Object> attrs = new HashMap<String, Object>(g.getAttributes());
if ( numNewAltAlleles == 0 )
attrs.remove(VCFConstants.PHRED_GENOTYPE_LIKELIHOODS_KEY);
else
attrs.put(VCFConstants.PHRED_GENOTYPE_LIKELIHOODS_KEY, GenotypeLikelihoods.fromLog10Likelihoods(newLikelihoods));
final Genotype newGT = assignGenotype(g, newLikelihoods, allelesToUse, numNewAltAlleles);
newGTs.add(newGT);
// if we weren't asked to assign a genotype, then just no-call the sample
if ( !assignGenotypes || MathUtils.sum(newLikelihoods) > SUM_GL_THRESH_NOCALL )
newGTs.add(new Genotype(g.getSampleName(), NO_CALL_ALLELES, Genotype.NO_LOG10_PERROR, null, attrs, false));
else
newGTs.add(assignGenotype(g, newLikelihoods, allelesToUse, numNewAltAlleles, attrs));
}
}
return newGTs;
}
protected static Genotype assignGenotype(Genotype originalGT, double[] newLikelihoods, List<Allele> allelesToUse, int numNewAltAlleles) {
protected static Genotype assignGenotype(Genotype originalGT, double[] newLikelihoods, List<Allele> allelesToUse, int numNewAltAlleles, Map<String, Object> attrs) {
// find the genotype with maximum likelihoods
int PLindex = numNewAltAlleles == 0 ? 0 : MathUtils.maxElementIndex(newLikelihoods);
int[] alleles = PLIndexToAlleleIndex[numNewAltAlleles][PLindex];
@ -857,11 +870,6 @@ public class UnifiedGenotyperEngine {
myAlleles.add(allelesToUse.get(alleles[1]));
final double qual = numNewAltAlleles == 0 ? Genotype.NO_LOG10_PERROR : GenotypeLikelihoods.getQualFromLikelihoods(PLindex, newLikelihoods);
Map<String, Object> attrs = new HashMap<String, Object>(originalGT.getAttributes());
if ( numNewAltAlleles == 0 )
attrs.remove(VCFConstants.PHRED_GENOTYPE_LIKELIHOODS_KEY);
else
attrs.put(VCFConstants.PHRED_GENOTYPE_LIKELIHOODS_KEY, GenotypeLikelihoods.fromLog10Likelihoods(newLikelihoods));
return new Genotype(originalGT.getSampleName(), myAlleles, qual, null, attrs, false);
}
}

View File

@ -300,7 +300,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest {
WalkerTest.WalkerTestSpec spec3 = new WalkerTest.WalkerTestSpec(
baseCommandIndels + " --genotyping_mode GENOTYPE_GIVEN_ALLELES -alleles " + validationDataLocation + "ALL.wgs.union_v2.20101123.indels.sites.vcf -I " + validationDataLocation +
"pilot2_daughters.chr20.10k-11k.bam -o %s -L 20:10,000,000-10,080,000", 1,
Arrays.asList("d356cbaf240d7025d1aecdabaff3a3e0"));
Arrays.asList("e4d2904b406f37d99fbe8f52ae75254f"));
executeTest("test MultiSample Pilot2 indels with complicated records", spec3);
}
@ -309,7 +309,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest {
WalkerTest.WalkerTestSpec spec4 = new WalkerTest.WalkerTestSpec(
baseCommandIndelsb37 + " --genotyping_mode GENOTYPE_GIVEN_ALLELES -alleles " + validationDataLocation + "ALL.wgs.union_v2_chr20_100_110K.20101123.indels.sites.vcf -I " + validationDataLocation +
"phase1_GBR_realigned.chr20.100K-110K.bam -o %s -L 20:100,000-110,000", 1,
Arrays.asList("1d1956fd7b0f0d30935674b2f5019860"));
Arrays.asList("21f7b6c8b7eaccad1754a832bac79a65"));
executeTest("test MultiSample Phase1 indels with complicated records", spec4);
}