diff --git a/protected/gatk-protected/src/main/java/org/broadinstitute/sting/gatk/walkers/annotator/QualByDepth.java b/protected/gatk-protected/src/main/java/org/broadinstitute/sting/gatk/walkers/annotator/QualByDepth.java index aff38419c..7ebbd49dd 100644 --- a/protected/gatk-protected/src/main/java/org/broadinstitute/sting/gatk/walkers/annotator/QualByDepth.java +++ b/protected/gatk-protected/src/main/java/org/broadinstitute/sting/gatk/walkers/annotator/QualByDepth.java @@ -54,6 +54,7 @@ import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.ActiveRegionBa import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.AnnotatorCompatible; import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.InfoFieldAnnotation; import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.StandardAnnotation; +import org.broadinstitute.sting.utils.MathUtils; import org.broadinstitute.sting.utils.genotyper.PerReadAlleleLikelihoodMap; import org.broadinstitute.sting.utils.variant.GATKVariantContextUtils; import org.broadinstitute.variant.vcf.VCFHeaderLineType; @@ -86,7 +87,8 @@ public class QualByDepth extends InfoFieldAnnotation implements StandardAnnotati if ( genotypes == null || genotypes.size() == 0 ) return null; - int depth = 0; + int standardDepth = 0; + int ADrestrictedDepth = 0; for ( final Genotype genotype : genotypes ) { @@ -101,39 +103,43 @@ public class QualByDepth extends InfoFieldAnnotation implements StandardAnnotati // TODO -- annotations must come afterwards (so that QD can use the AD). if ( genotype.hasAD() ) { final int[] AD = genotype.getAD(); - int variantDepth = 0; - for ( int i = 1; i < AD.length; i++ ) - variantDepth += AD[i]; - if ( variantDepth <= 1 ) - continue; + final int totalADdepth = (int)MathUtils.sum(AD); + if ( totalADdepth - AD[0] > 1 ) + ADrestrictedDepth += totalADdepth; + standardDepth += totalADdepth; + continue; } if (stratifiedContexts!= null && !stratifiedContexts.isEmpty()) { final AlignmentContext context = stratifiedContexts.get(genotype.getSampleName()); if ( context == null ) continue; - depth += context.getBasePileup().depthOfCoverage(); + standardDepth += context.getBasePileup().depthOfCoverage(); } else if (perReadAlleleLikelihoodMap != null) { final PerReadAlleleLikelihoodMap perReadAlleleLikelihoods = perReadAlleleLikelihoodMap.get(genotype.getSampleName()); if (perReadAlleleLikelihoods == null || perReadAlleleLikelihoods.isEmpty()) continue; - depth += perReadAlleleLikelihoods.getNumberOfStoredElements(); + standardDepth += perReadAlleleLikelihoods.getNumberOfStoredElements(); } else if ( genotype.hasDP() ) { - depth += genotype.getDP(); + standardDepth += genotype.getDP(); } } - if ( depth == 0 ) + // if the AD-restricted depth is a usable value (i.e. not zero), then we should use that one going forward + if ( ADrestrictedDepth > 0 ) + standardDepth = ADrestrictedDepth; + + if ( standardDepth == 0 ) return null; final double altAlleleLength = GATKVariantContextUtils.getMeanAltAlleleLength(vc); - // Hack: when refContext == null then we know we are coming from the HaplotypeCaller and do not want to do a - // full length-based normalization (because the indel length problem is present only in the UnifiedGenotyper) - double QD = -10.0 * vc.getLog10PError() / ((double)depth * indelNormalizationFactor(altAlleleLength, ref != null)); + // Hack: when refContext == null then we know we are coming from the HaplotypeCaller and do not want to do a + // full length-based normalization (because the indel length problem is present only in the UnifiedGenotyper) + double QD = -10.0 * vc.getLog10PError() / ((double)standardDepth * indelNormalizationFactor(altAlleleLength, ref != null)); - // Hack: see note in the fixTooHighQD method below + // Hack: see note in the fixTooHighQD method below QD = fixTooHighQD(QD); final Map map = new HashMap<>(); @@ -149,7 +155,7 @@ public class QualByDepth extends InfoFieldAnnotation implements StandardAnnotati * @return a possitive double */ private double indelNormalizationFactor(final double altAlleleLength, final boolean increaseNormalizationAsLengthIncreases) { - return ( increaseNormalizationAsLengthIncreases ? Math.max(altAlleleLength / 3.0, 1.0) : 1.0); + return ( increaseNormalizationAsLengthIncreases ? Math.max(altAlleleLength / 3.0, 1.0) : 1.0); } /** diff --git a/protected/gatk-protected/src/main/java/org/broadinstitute/sting/gatk/walkers/variantutils/CombineGVCFs.java b/protected/gatk-protected/src/main/java/org/broadinstitute/sting/gatk/walkers/variantutils/CombineGVCFs.java index 314f6ae42..0f577cb23 100644 --- a/protected/gatk-protected/src/main/java/org/broadinstitute/sting/gatk/walkers/variantutils/CombineGVCFs.java +++ b/protected/gatk-protected/src/main/java/org/broadinstitute/sting/gatk/walkers/variantutils/CombineGVCFs.java @@ -205,12 +205,25 @@ public class CombineGVCFs extends RodWalker allele). + * + * @param vc the variant context + * @param pos the position to query against + * @return true if this variant context "ends" at this position, false otherwise + */ + private boolean isEndingContext(final VariantContext vc, final int pos) { + return vc.getNAlleles() > 2 || vc.getEnd() == pos; + } + /** * Disrupt the VariantContexts so that they all stop at the given pos, write them out, and put the remainder back in the list. * @@ -228,10 +241,8 @@ public class CombineGVCFs extends RodWalker allele) - if ( vc.getNAlleles() > 2 || vc.getEnd() == pos ) + // if it was ending anyways, then remove it from the future state + if ( isEndingContext(vc, pos) ) state.VCs.remove(i); } } diff --git a/protected/gatk-protected/src/main/java/org/broadinstitute/sting/gatk/walkers/variantutils/GenotypeGVCFs.java b/protected/gatk-protected/src/main/java/org/broadinstitute/sting/gatk/walkers/variantutils/GenotypeGVCFs.java index afd657ea3..134ed0cd1 100644 --- a/protected/gatk-protected/src/main/java/org/broadinstitute/sting/gatk/walkers/variantutils/GenotypeGVCFs.java +++ b/protected/gatk-protected/src/main/java/org/broadinstitute/sting/gatk/walkers/variantutils/GenotypeGVCFs.java @@ -56,10 +56,8 @@ import org.broadinstitute.sting.gatk.walkers.Reference; import org.broadinstitute.sting.gatk.walkers.RodWalker; import org.broadinstitute.sting.gatk.walkers.TreeReducible; import org.broadinstitute.sting.gatk.walkers.Window; -import org.broadinstitute.sting.gatk.walkers.annotator.ChromosomeCountConstants; import org.broadinstitute.sting.gatk.walkers.annotator.VariantAnnotatorEngine; import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.AnnotatorCompatible; -import org.broadinstitute.sting.gatk.walkers.genotyper.GenotypeLikelihoodsCalculationModel; import org.broadinstitute.sting.gatk.walkers.genotyper.UnifiedArgumentCollection; import org.broadinstitute.sting.gatk.walkers.genotyper.UnifiedGenotyperEngine; import org.broadinstitute.sting.utils.GenomeLoc; @@ -154,10 +152,14 @@ public class GenotypeGVCFs extends RodWalkeremptyList(), this, getToolkit()); + // take care of the VCF headers final Map vcfRods = GATKVCFUtils.getVCFHeadersFromRods(getToolkit()); final Set headerLines = VCFUtils.smartMergeHeaders(vcfRods.values(), true); - headerLines.addAll(Arrays.asList(ChromosomeCountConstants.descriptions)); + headerLines.addAll(annotationEngine.getVCFAnnotationDescriptions()); + VCFStandardHeaderLines.addStandardInfoLines(headerLines, true, VCFConstants.MLE_ALLELE_COUNT_KEY, VCFConstants.MLE_ALLELE_FREQUENCY_KEY); if ( dbsnp != null && dbsnp.dbsnp.isBound() ) VCFStandardHeaderLines.addStandardInfoLines(headerLines, true, VCFConstants.DBSNP_KEY); @@ -168,9 +170,6 @@ public class GenotypeGVCFs extends RodWalkeremptyList(), this, getToolkit()); - // collect the actual rod bindings into a list for use later for ( final RodBindingCollection variantCollection : variantCollections ) variants.addAll(variantCollection.getRodBindings()); @@ -206,7 +205,13 @@ public class GenotypeGVCFs extends RodWalker attrs = new HashMap<>(originalVC.getAttributes()); + attrs.put(VCFConstants.MLE_ALLELE_COUNT_KEY, regenotypedVC.getAttribute(VCFConstants.MLE_ALLELE_COUNT_KEY)); + attrs.put(VCFConstants.MLE_ALLELE_FREQUENCY_KEY, regenotypedVC.getAttribute(VCFConstants.MLE_ALLELE_FREQUENCY_KEY)); + + result = new VariantContextBuilder(regenotypedVC).attributes(attrs).make(); } // if it turned monomorphic and we don't want such sites, quit diff --git a/protected/gatk-protected/src/test/java/org/broadinstitute/sting/gatk/walkers/annotator/QualByDepthUnitTest.java b/protected/gatk-protected/src/test/java/org/broadinstitute/sting/gatk/walkers/annotator/QualByDepthUnitTest.java index 4494e16bf..a118a462d 100644 --- a/protected/gatk-protected/src/test/java/org/broadinstitute/sting/gatk/walkers/annotator/QualByDepthUnitTest.java +++ b/protected/gatk-protected/src/test/java/org/broadinstitute/sting/gatk/walkers/annotator/QualByDepthUnitTest.java @@ -69,9 +69,6 @@ public class QualByDepthUnitTest extends WalkerTest { final List AA = Arrays.asList(A,A); final List AC = Arrays.asList(A,C); - final List CC = Arrays.asList(C,C); - final List AG = Arrays.asList(A,G); - final List CG = Arrays.asList(C,G); final List GG = Arrays.asList(G,G); final List ACG = Arrays.asList(A,C,G); @@ -81,6 +78,7 @@ public class QualByDepthUnitTest extends WalkerTest { final Genotype gGG = new GenotypeBuilder("4", GG).DP(10).AD(new int[]{1,9}).make(); tests.add(new Object[]{new VariantContextBuilder("test", "20", 10, 10, AC).log10PError(-5).genotypes(Arrays.asList(gAC)).make(), 5.0}); + tests.add(new Object[]{new VariantContextBuilder("test", "20", 10, 10, AC).log10PError(-5).genotypes(Arrays.asList(gACerror)).make(), 5.0}); tests.add(new Object[]{new VariantContextBuilder("test", "20", 10, 10, AC).log10PError(-5).genotypes(Arrays.asList(gAA, gAC)).make(), 5.0}); tests.add(new Object[]{new VariantContextBuilder("test", "20", 10, 10, AC).log10PError(-5).genotypes(Arrays.asList(gAC, gACerror)).make(), 5.0}); tests.add(new Object[]{new VariantContextBuilder("test", "20", 10, 10, ACG).log10PError(-5).genotypes(Arrays.asList(gAA, gAC, gACerror, gGG)).make(), 2.5}); diff --git a/protected/gatk-protected/src/test/java/org/broadinstitute/sting/gatk/walkers/variantutils/CombineGVCFsIntegrationTest.java b/protected/gatk-protected/src/test/java/org/broadinstitute/sting/gatk/walkers/variantutils/CombineGVCFsIntegrationTest.java index 17b9124aa..03d136290 100644 --- a/protected/gatk-protected/src/test/java/org/broadinstitute/sting/gatk/walkers/variantutils/CombineGVCFsIntegrationTest.java +++ b/protected/gatk-protected/src/test/java/org/broadinstitute/sting/gatk/walkers/variantutils/CombineGVCFsIntegrationTest.java @@ -139,7 +139,7 @@ public class CombineGVCFsIntegrationTest extends WalkerTest { final File gVCF = executeTest("testOneHasDeletionAndTwoHasRefBlock", spec).first.get(0); final List allVCs = GATKVCFUtils.readVCF(gVCF).getSecond(); - Assert.assertEquals(allVCs.size(), 2); + Assert.assertEquals(allVCs.size(), 3); final VariantContext first = allVCs.get(0); Assert.assertEquals(first.getStart(), 69772); @@ -149,14 +149,19 @@ public class CombineGVCFsIntegrationTest extends WalkerTest { final VariantContext second = allVCs.get(1); Assert.assertEquals(second.getStart(), 69773); - Assert.assertEquals(second.getEnd(), 69783); + Assert.assertEquals(second.getEnd(), 69774); Assert.assertEquals(second.getGenotypes().size(), 2); + + final VariantContext third = allVCs.get(2); + Assert.assertEquals(third.getStart(), 69775); + Assert.assertEquals(third.getEnd(), 69783); + Assert.assertEquals(third.getGenotypes().size(), 2); } @Test public void testMD5s() throws Exception { final String cmd = baseTestString(" -L 1:69485-69791"); - final WalkerTestSpec spec = new WalkerTestSpec(cmd, 1, Arrays.asList("d2a5ca67a8ef6e27854e7a439883f05d")); + final WalkerTestSpec spec = new WalkerTestSpec(cmd, 1, Arrays.asList("aecdfa9eb32b802cd629e9f811ef15fd")); spec.disableShadowBCF(); executeTest("testMD5s", spec); } diff --git a/protected/gatk-protected/src/test/java/org/broadinstitute/sting/gatk/walkers/variantutils/GenotypeGVCFsIntegrationTest.java b/protected/gatk-protected/src/test/java/org/broadinstitute/sting/gatk/walkers/variantutils/GenotypeGVCFsIntegrationTest.java index da7ec3dd4..fff353c60 100644 --- a/protected/gatk-protected/src/test/java/org/broadinstitute/sting/gatk/walkers/variantutils/GenotypeGVCFsIntegrationTest.java +++ b/protected/gatk-protected/src/test/java/org/broadinstitute/sting/gatk/walkers/variantutils/GenotypeGVCFsIntegrationTest.java @@ -65,7 +65,7 @@ public class GenotypeGVCFsIntegrationTest extends WalkerTest { " -V:sample3 " + privateTestDir + "combine.single.sample.pipeline.3.vcf" + " -L 20:10,000,000-20,000,000", b37KGReference), 1, - Arrays.asList("54487ea151c49d36a15eac8097a7e460")); + Arrays.asList("ebda39d3343b34d21490a78284ed88e8")); executeTest("combineSingleSamplePipelineGVCF", spec); } @@ -89,7 +89,7 @@ public class GenotypeGVCFsIntegrationTest extends WalkerTest { " -V:sample3 " + privateTestDir + "combine.single.sample.pipeline.3.vcf" + " -L 20:10,000,000-11,000,000 --dbsnp " + b37dbSNP132, b37KGReference), 1, - Arrays.asList("9e6ef126d5e872e5b2a68c3d73471566")); + Arrays.asList("8eeda24a07f66d67b7639a31fda5c903")); executeTest("combineSingleSamplePipelineGVCF_addDbsnp", spec); } @@ -99,7 +99,7 @@ public class GenotypeGVCFsIntegrationTest extends WalkerTest { "-T GenotypeGVCFs --no_cmdline_in_header -L 1:69485-69791 -o %s -R " + b37KGReference + " -V " + privateTestDir + "gvcfExample1.vcf", 1, - Arrays.asList("dd0e2846b3be9692ecb94f152b45c316")); + Arrays.asList("2541e164056d5632ad7de784a9af3880")); executeTest("testJustOneSample", spec); } @@ -110,7 +110,7 @@ public class GenotypeGVCFsIntegrationTest extends WalkerTest { " -V " + privateTestDir + "gvcfExample1.vcf" + " -V " + privateTestDir + "gvcfExample2.vcf", 1, - Arrays.asList("a4f76a094af4708fc7f96a9b7a1b8726")); + Arrays.asList("9daf9602338db9d06c075c6e9a15ee2c")); executeTest("testSamplesWithDifferentLs", spec); } } \ No newline at end of file