From 4095a9ef32eda00be7a2af9a9d9f0e856c3746fe Mon Sep 17 00:00:00 2001 From: Mark DePristo Date: Wed, 27 Feb 2013 15:38:17 -0500 Subject: [PATCH 1/2] Bugfixes for AssessNA12878 -- Refactor initialization routine into BadSitesWriter. This now adds the GQ and DP genotype header lines which are necessarily if the input VCF doesn't have proper headers -- GATKVariantContextUtils subset to biallelics now tolerates samples with bad GL values for multi-allelics, where it just removes the PLs and issues a warning. --- .../sting/utils/variant/GATKVariantContextUtils.java | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/public/java/src/org/broadinstitute/sting/utils/variant/GATKVariantContextUtils.java b/public/java/src/org/broadinstitute/sting/utils/variant/GATKVariantContextUtils.java index 3a5ddb7a0..37bd798cf 100644 --- a/public/java/src/org/broadinstitute/sting/utils/variant/GATKVariantContextUtils.java +++ b/public/java/src/org/broadinstitute/sting/utils/variant/GATKVariantContextUtils.java @@ -405,6 +405,7 @@ public class GATKVariantContextUtils { // we need to determine which of the alternate alleles (and hence the likelihoods) to use and carry forward final int numOriginalAltAlleles = vc.getAlternateAlleles().size(); + final int expectedNumLikelihoods = GenotypeLikelihoods.numLikelihoods(vc.getNAlleles(), 2); final int numNewAltAlleles = allelesToUse.size() - 1; // which PLs should be carried forward? @@ -444,6 +445,9 @@ public class GATKVariantContextUtils { double[] newLikelihoods; if ( likelihoodIndexesToUse == null ) { newLikelihoods = originalLikelihoods; + } else if ( originalLikelihoods.length != expectedNumLikelihoods ) { + logger.warn("Wrong number of likelihoods in sample " + g.getSampleName() + " at " + vc + " got " + g.getLikelihoodsString() + " but expected " + expectedNumLikelihoods); + newLikelihoods = null; } else { newLikelihoods = new double[likelihoodIndexesToUse.size()]; int newIndex = 0; @@ -455,13 +459,13 @@ public class GATKVariantContextUtils { } // if there is no mass on the (new) likelihoods, then just no-call the sample - if ( MathUtils.sum(newLikelihoods) > SUM_GL_THRESH_NOCALL ) { + if ( newLikelihoods != null && MathUtils.sum(newLikelihoods) > SUM_GL_THRESH_NOCALL ) { newGTs.add(GenotypeBuilder.create(g.getSampleName(), NO_CALL_ALLELES)); } else { final GenotypeBuilder gb = new GenotypeBuilder(g); - if ( numNewAltAlleles == 0 ) + if ( newLikelihoods == null || numNewAltAlleles == 0 ) gb.noPL(); else gb.PL(newLikelihoods);