Several bug fixes to new genotyping strategy. Update integration tests for multi-allelic indels accordingly.
This commit is contained in:
parent
0f728a0604
commit
7a937dd1eb
|
|
@ -47,7 +47,7 @@ public class ExactAFCalculationModel extends AlleleFrequencyCalculationModel {
|
|||
final double[][] log10AlleleFrequencyPriors,
|
||||
final AlleleFrequencyCalculationResult result) {
|
||||
|
||||
final GenotypesContext GLs = vc.getGenotypes();
|
||||
GenotypesContext GLs = vc.getGenotypes();
|
||||
List<Allele> alleles = vc.getAlleles();
|
||||
|
||||
// don't try to genotype too many alternate alleles
|
||||
|
|
@ -58,7 +58,7 @@ public class ExactAFCalculationModel extends AlleleFrequencyCalculationModel {
|
|||
alleles.add(vc.getReference());
|
||||
for ( int i = 0; i < MAX_ALTERNATE_ALLELES_TO_GENOTYPE; i++ )
|
||||
alleles.add(vc.getAlternateAllele(i));
|
||||
UnifiedGenotyperEngine.subsetAlleles(vc, alleles, false);
|
||||
GLs = UnifiedGenotyperEngine.subsetAlleles(vc, alleles, false);
|
||||
}
|
||||
|
||||
//linearExact(GLs, log10AlleleFrequencyPriors[0], log10AlleleFrequencyLikelihoods, log10AlleleFrequencyPosteriors);
|
||||
|
|
|
|||
|
|
@ -795,6 +795,10 @@ public class UnifiedGenotyperEngine {
|
|||
// then we can keep the PLs as is; otherwise, we determine which ones to keep
|
||||
if ( numNewAltAlleles != numOriginalAltAlleles && numNewAltAlleles > 0 ) {
|
||||
likelihoodIndexesToUse = new ArrayList<Integer>(30);
|
||||
|
||||
// make sure that we've cached enough data
|
||||
if ( numOriginalAltAlleles > PLIndexToAlleleIndex.length - 1 )
|
||||
calculatePLcache(numOriginalAltAlleles);
|
||||
final int[][] PLcache = PLIndexToAlleleIndex[numOriginalAltAlleles];
|
||||
|
||||
final boolean[] altAlleleIndexToUse = new boolean[numOriginalAltAlleles];
|
||||
|
|
@ -834,20 +838,29 @@ public class UnifiedGenotyperEngine {
|
|||
newLikelihoods = MathUtils.normalizeFromLog10(newLikelihoods, false, true);
|
||||
}
|
||||
|
||||
// if there is no mass on the (new) likelihoods or we weren't asked to assign a genotype, then just no-call the sample
|
||||
if ( !assignGenotypes || MathUtils.sum(newLikelihoods) > SUM_GL_THRESH_NOCALL ) {
|
||||
// if there is no mass on the (new) likelihoods, then just no-call the sample
|
||||
if ( MathUtils.sum(newLikelihoods) > SUM_GL_THRESH_NOCALL ) {
|
||||
newGTs.add(new Genotype(g.getSampleName(), NO_CALL_ALLELES, Genotype.NO_LOG10_PERROR, null, null, false));
|
||||
continue;
|
||||
}
|
||||
else {
|
||||
Map<String, Object> attrs = new HashMap<String, Object>(g.getAttributes());
|
||||
if ( numNewAltAlleles == 0 )
|
||||
attrs.remove(VCFConstants.PHRED_GENOTYPE_LIKELIHOODS_KEY);
|
||||
else
|
||||
attrs.put(VCFConstants.PHRED_GENOTYPE_LIKELIHOODS_KEY, GenotypeLikelihoods.fromLog10Likelihoods(newLikelihoods));
|
||||
|
||||
final Genotype newGT = assignGenotype(g, newLikelihoods, allelesToUse, numNewAltAlleles);
|
||||
newGTs.add(newGT);
|
||||
// if we weren't asked to assign a genotype, then just no-call the sample
|
||||
if ( !assignGenotypes || MathUtils.sum(newLikelihoods) > SUM_GL_THRESH_NOCALL )
|
||||
newGTs.add(new Genotype(g.getSampleName(), NO_CALL_ALLELES, Genotype.NO_LOG10_PERROR, null, attrs, false));
|
||||
else
|
||||
newGTs.add(assignGenotype(g, newLikelihoods, allelesToUse, numNewAltAlleles, attrs));
|
||||
}
|
||||
}
|
||||
|
||||
return newGTs;
|
||||
}
|
||||
|
||||
protected static Genotype assignGenotype(Genotype originalGT, double[] newLikelihoods, List<Allele> allelesToUse, int numNewAltAlleles) {
|
||||
protected static Genotype assignGenotype(Genotype originalGT, double[] newLikelihoods, List<Allele> allelesToUse, int numNewAltAlleles, Map<String, Object> attrs) {
|
||||
// find the genotype with maximum likelihoods
|
||||
int PLindex = numNewAltAlleles == 0 ? 0 : MathUtils.maxElementIndex(newLikelihoods);
|
||||
int[] alleles = PLIndexToAlleleIndex[numNewAltAlleles][PLindex];
|
||||
|
|
@ -857,11 +870,6 @@ public class UnifiedGenotyperEngine {
|
|||
myAlleles.add(allelesToUse.get(alleles[1]));
|
||||
|
||||
final double qual = numNewAltAlleles == 0 ? Genotype.NO_LOG10_PERROR : GenotypeLikelihoods.getQualFromLikelihoods(PLindex, newLikelihoods);
|
||||
Map<String, Object> attrs = new HashMap<String, Object>(originalGT.getAttributes());
|
||||
if ( numNewAltAlleles == 0 )
|
||||
attrs.remove(VCFConstants.PHRED_GENOTYPE_LIKELIHOODS_KEY);
|
||||
else
|
||||
attrs.put(VCFConstants.PHRED_GENOTYPE_LIKELIHOODS_KEY, GenotypeLikelihoods.fromLog10Likelihoods(newLikelihoods));
|
||||
return new Genotype(originalGT.getSampleName(), myAlleles, qual, null, attrs, false);
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -300,7 +300,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest {
|
|||
WalkerTest.WalkerTestSpec spec3 = new WalkerTest.WalkerTestSpec(
|
||||
baseCommandIndels + " --genotyping_mode GENOTYPE_GIVEN_ALLELES -alleles " + validationDataLocation + "ALL.wgs.union_v2.20101123.indels.sites.vcf -I " + validationDataLocation +
|
||||
"pilot2_daughters.chr20.10k-11k.bam -o %s -L 20:10,000,000-10,080,000", 1,
|
||||
Arrays.asList("d356cbaf240d7025d1aecdabaff3a3e0"));
|
||||
Arrays.asList("e4d2904b406f37d99fbe8f52ae75254f"));
|
||||
executeTest("test MultiSample Pilot2 indels with complicated records", spec3);
|
||||
}
|
||||
|
||||
|
|
@ -309,7 +309,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest {
|
|||
WalkerTest.WalkerTestSpec spec4 = new WalkerTest.WalkerTestSpec(
|
||||
baseCommandIndelsb37 + " --genotyping_mode GENOTYPE_GIVEN_ALLELES -alleles " + validationDataLocation + "ALL.wgs.union_v2_chr20_100_110K.20101123.indels.sites.vcf -I " + validationDataLocation +
|
||||
"phase1_GBR_realigned.chr20.100K-110K.bam -o %s -L 20:100,000-110,000", 1,
|
||||
Arrays.asList("1d1956fd7b0f0d30935674b2f5019860"));
|
||||
Arrays.asList("21f7b6c8b7eaccad1754a832bac79a65"));
|
||||
executeTest("test MultiSample Phase1 indels with complicated records", spec4);
|
||||
}
|
||||
|
||||
|
|
|
|||
Loading…
Reference in New Issue