Bugfixes for AssessNA12878

-- Refactor initialization routine into BadSitesWriter.  This now adds the GQ and DP genotype header lines which are necessarily if the input VCF doesn't have proper headers
-- GATKVariantContextUtils subset to biallelics now tolerates samples with bad GL values for multi-allelics, where it just removes the PLs and issues a warning.
This commit is contained in:
Mark DePristo 2013-02-27 15:38:17 -05:00
parent 92d6a4f441
commit 4095a9ef32
1 changed files with 6 additions and 2 deletions

View File

@ -405,6 +405,7 @@ public class GATKVariantContextUtils {
// we need to determine which of the alternate alleles (and hence the likelihoods) to use and carry forward // we need to determine which of the alternate alleles (and hence the likelihoods) to use and carry forward
final int numOriginalAltAlleles = vc.getAlternateAlleles().size(); final int numOriginalAltAlleles = vc.getAlternateAlleles().size();
final int expectedNumLikelihoods = GenotypeLikelihoods.numLikelihoods(vc.getNAlleles(), 2);
final int numNewAltAlleles = allelesToUse.size() - 1; final int numNewAltAlleles = allelesToUse.size() - 1;
// which PLs should be carried forward? // which PLs should be carried forward?
@ -444,6 +445,9 @@ public class GATKVariantContextUtils {
double[] newLikelihoods; double[] newLikelihoods;
if ( likelihoodIndexesToUse == null ) { if ( likelihoodIndexesToUse == null ) {
newLikelihoods = originalLikelihoods; newLikelihoods = originalLikelihoods;
} else if ( originalLikelihoods.length != expectedNumLikelihoods ) {
logger.warn("Wrong number of likelihoods in sample " + g.getSampleName() + " at " + vc + " got " + g.getLikelihoodsString() + " but expected " + expectedNumLikelihoods);
newLikelihoods = null;
} else { } else {
newLikelihoods = new double[likelihoodIndexesToUse.size()]; newLikelihoods = new double[likelihoodIndexesToUse.size()];
int newIndex = 0; int newIndex = 0;
@ -455,13 +459,13 @@ public class GATKVariantContextUtils {
} }
// if there is no mass on the (new) likelihoods, then just no-call the sample // if there is no mass on the (new) likelihoods, then just no-call the sample
if ( MathUtils.sum(newLikelihoods) > SUM_GL_THRESH_NOCALL ) { if ( newLikelihoods != null && MathUtils.sum(newLikelihoods) > SUM_GL_THRESH_NOCALL ) {
newGTs.add(GenotypeBuilder.create(g.getSampleName(), NO_CALL_ALLELES)); newGTs.add(GenotypeBuilder.create(g.getSampleName(), NO_CALL_ALLELES));
} }
else { else {
final GenotypeBuilder gb = new GenotypeBuilder(g); final GenotypeBuilder gb = new GenotypeBuilder(g);
if ( numNewAltAlleles == 0 ) if ( newLikelihoods == null || numNewAltAlleles == 0 )
gb.noPL(); gb.noPL();
else else
gb.PL(newLikelihoods); gb.PL(newLikelihoods);