diff --git a/protected/gatk-tools-protected/src/test/java/org/broadinstitute/gatk/tools/walkers/filters/VariantFiltrationIntegrationTest.java b/protected/gatk-tools-protected/src/test/java/org/broadinstitute/gatk/tools/walkers/filters/VariantFiltrationIntegrationTest.java index 78507fa16..e594dce5b 100644 --- a/protected/gatk-tools-protected/src/test/java/org/broadinstitute/gatk/tools/walkers/filters/VariantFiltrationIntegrationTest.java +++ b/protected/gatk-tools-protected/src/test/java/org/broadinstitute/gatk/tools/walkers/filters/VariantFiltrationIntegrationTest.java @@ -234,10 +234,21 @@ public class VariantFiltrationIntegrationTest extends WalkerTest { WalkerTestSpec spec = new WalkerTestSpec( "-T VariantFiltration -o %s --no_cmdline_in_header -R " + b37KGReference + " --genotypeFilterExpression 'DP < 8' --genotypeFilterName lowDP -V " + privateTestDir + "filteringDepthInFormat.vcf --setFilteredGtToNocall", 1, - Arrays.asList("454d265ee8b425284ed7fca8ca4774be")); + Arrays.asList("2ff3753215d418712309e50da323f6e8")); executeTest("testSetFilteredGtoNocall", spec); } + + @Test + public void testSetFilteredGtoNocallUpdateInfo() { + WalkerTestSpec spec = new WalkerTestSpec( + "-T VariantFiltration -o %s --no_cmdline_in_header -R " + b37KGReference + + " -G_filter 'GQ < 20' -G_filterName lowDP -G_filter 'DP<10' -G_filterName lowGQ -V " + privateTestDir + "variantFiltrationInfoField.vcf --setFilteredGtToNocall", + 1, + Arrays.asList("3b074975bb6f70c84b2dd81695bb89ff")); + executeTest("testSetFilteredGtoNocallUpdateInfo", spec); + } + @Test public void testSetVcfFilteredGtoNocall() { String testfile = privateTestDir + "filteredSamples.vcf"; @@ -245,7 +256,7 @@ public class VariantFiltrationIntegrationTest extends WalkerTest { WalkerTestSpec spec = new WalkerTestSpec( "-T SelectVariants --setFilteredGtToNocall -R " + b37KGReference + " --variant " + testfile + " -o %s --no_cmdline_in_header", 1, - Arrays.asList("7771f07a9997296852ab367fac2c7a6c") + Arrays.asList("410c6b7bb62fc43bb41eee627670f757") ); spec.disableShadowBCF(); diff --git a/protected/gatk-tools-protected/src/test/java/org/broadinstitute/gatk/tools/walkers/variantutils/SelectVariantsIntegrationTest.java b/protected/gatk-tools-protected/src/test/java/org/broadinstitute/gatk/tools/walkers/variantutils/SelectVariantsIntegrationTest.java index 79cac9790..2776a1a20 100644 --- a/protected/gatk-tools-protected/src/test/java/org/broadinstitute/gatk/tools/walkers/variantutils/SelectVariantsIntegrationTest.java +++ b/protected/gatk-tools-protected/src/test/java/org/broadinstitute/gatk/tools/walkers/variantutils/SelectVariantsIntegrationTest.java @@ -732,12 +732,25 @@ public class SelectVariantsIntegrationTest extends WalkerTest { WalkerTestSpec spec = new WalkerTestSpec( "-T SelectVariants --setFilteredGtToNocall -R " + b37KGReference + " --variant " + testfile + " -o %s --no_cmdline_in_header", 1, - Arrays.asList("7771f07a9997296852ab367fac2c7a6c") + Arrays.asList("410c6b7bb62fc43bb41eee627670f757") ); spec.disableShadowBCF(); executeTest("testSetFilteredGtoNocall--" + testfile, spec); } + + @Test + public void testSetFilteredGtoNocallUpdateInfo() { + String testfile = privateTestDir + "selectVariantsInfoField.vcf"; + + WalkerTestSpec spec = new WalkerTestSpec( + "-T SelectVariants --setFilteredGtToNocall --removeUnusedAlternates --excludeNonVariants -R " + b37KGReference + " --variant " + + testfile + " -o %s --no_cmdline_in_header", + 1, + Arrays.asList("349136d92f915f8c7ba8a2f92e51d6b7")); + executeTest("testSetFilteredGtoNocallUpdateInfo", spec); + } + @Test public void testSACSimpleDiploid() { String testfile = privateTestDir + "261_S01_raw_variants_gvcf.vcf"; diff --git a/public/gatk-tools-public/src/main/java/org/broadinstitute/gatk/tools/walkers/filters/VariantFiltration.java b/public/gatk-tools-public/src/main/java/org/broadinstitute/gatk/tools/walkers/filters/VariantFiltration.java index ee2da20bd..1e950ece8 100644 --- a/public/gatk-tools-public/src/main/java/org/broadinstitute/gatk/tools/walkers/filters/VariantFiltration.java +++ b/public/gatk-tools-public/src/main/java/org/broadinstitute/gatk/tools/walkers/filters/VariantFiltration.java @@ -45,6 +45,7 @@ import org.broadinstitute.gatk.utils.exceptions.UserException; import org.broadinstitute.gatk.utils.help.DocumentedGATKFeature; import htsjdk.variant.variantcontext.*; import htsjdk.variant.variantcontext.writer.VariantContextWriter; +import org.broadinstitute.gatk.utils.variant.GATKVariantContextUtils; import java.util.*; @@ -241,6 +242,13 @@ public class VariantFiltration extends RodWalker { Set hInfo = new HashSet(); hInfo.addAll(GATKVCFUtils.getHeaderFields(getToolkit(), inputNames)); + // need AC, AN and AF since output if set filtered genotypes to no-call + if ( setFilteredGenotypesToNocall ) { + hInfo.add(VCFStandardHeaderLines.getInfoLine(VCFConstants.ALLELE_COUNT_KEY)); + hInfo.add(VCFStandardHeaderLines.getInfoLine(VCFConstants.ALLELE_NUMBER_KEY)); + hInfo.add(VCFStandardHeaderLines.getInfoLine(VCFConstants.ALLELE_FREQUENCY_KEY)); + } + if ( clusterWindow > 0 ) hInfo.add(new VCFFilterHeaderLine(CLUSTERED_SNP_FILTER_NAME, "SNPs found in clusters")); @@ -402,6 +410,18 @@ public class VariantFiltration extends RodWalker { if ( !genotypeFilterExps.isEmpty() || setFilteredGenotypesToNocall ) { GenotypesContext genotypes = GenotypesContext.create(vc.getGenotypes().size()); + // + // recompute AC, AN and AF if filtered genotypes are set to no-call + // + // occurrences of alternate alleles over all genotypes + final Map calledAltAlleles = new LinkedHashMap(vc.getNAlleles()-1); + for ( final Allele altAllele : vc.getAlternateAlleles() ) { + calledAltAlleles.put(altAllele, 0); + } + + int calledAlleles = 0; + boolean haveFilteredNoCallAlleles = false; + // for each genotype, check filters then create a new object for ( final Genotype g : vc.getGenotypes() ) { if ( g.isCalled() ) { @@ -415,16 +435,23 @@ public class VariantFiltration extends RodWalker { } // if sample is filtered and --setFilteredGtToNocall, set genotype to non-call - if ( !filters.isEmpty() && setFilteredGenotypesToNocall ) + if ( !filters.isEmpty() && setFilteredGenotypesToNocall ) { + haveFilteredNoCallAlleles = true; genotypes.add(new GenotypeBuilder(g).filters(filters).alleles(diploidNoCallAlleles).make()); - else + } + else { genotypes.add(new GenotypeBuilder(g).filters(filters).make()); + calledAlleles = GATKVariantContextUtils.incrementChromosomeCountsInfo(calledAltAlleles, calledAlleles, g); + } } else { genotypes.add(g); } } builder.genotypes(genotypes); + // if filtered genotypes are set to no-call, output recomputed AC, AN, AF + if ( haveFilteredNoCallAlleles ) + GATKVariantContextUtils.updateChromosomeCountsInfo(calledAltAlleles, calledAlleles, builder); } // make a new variant context based on filters diff --git a/public/gatk-tools-public/src/main/java/org/broadinstitute/gatk/tools/walkers/variantutils/SelectVariants.java b/public/gatk-tools-public/src/main/java/org/broadinstitute/gatk/tools/walkers/variantutils/SelectVariants.java index 2ebdcb97b..f177b7393 100644 --- a/public/gatk-tools-public/src/main/java/org/broadinstitute/gatk/tools/walkers/variantutils/SelectVariants.java +++ b/public/gatk-tools-public/src/main/java/org/broadinstitute/gatk/tools/walkers/variantutils/SelectVariants.java @@ -708,6 +708,13 @@ public class SelectVariants extends RodWalker implements TreeR Set headerLines = VCFUtils.smartMergeHeaders(vcfRods.values(), true); headerLines.add(new VCFHeaderLine("source", "SelectVariants")); + // need AC, AN and AF since output if set filtered genotypes to no-call + if (setFilteredGenotypesToNocall) { + headerLines.add(VCFStandardHeaderLines.getInfoLine(VCFConstants.ALLELE_COUNT_KEY)); + headerLines.add(VCFStandardHeaderLines.getInfoLine(VCFConstants.ALLELE_NUMBER_KEY)); + headerLines.add(VCFStandardHeaderLines.getInfoLine(VCFConstants.ALLELE_FREQUENCY_KEY)); + } + if (keepOriginalChrCounts) { headerLines.add(GATKVCFHeaderLines.getInfoLine(GATKVCFConstants.ORIGINAL_AC_KEY)); headerLines.add(GATKVCFHeaderLines.getInfoLine(GATKVCFConstants.ORIGINAL_AF_KEY)); @@ -1117,14 +1124,35 @@ public class SelectVariants extends RodWalker implements TreeR final VariantContextBuilder builder = new VariantContextBuilder(vc); final GenotypesContext genotypes = GenotypesContext.create(vc.getGenotypes().size()); + // + // recompute AC, AN and AF if filtered genotypes are set to no-call + // + // occurrences of alternate alleles over all genotypes + final Map calledAltAlleles = new LinkedHashMap(vc.getNAlleles()-1); + for ( final Allele altAllele : vc.getAlternateAlleles() ) { + calledAltAlleles.put(altAllele, 0); + } + int calledAlleles = 0; + boolean haveFilteredNoCallAlleles = false; for ( final Genotype g : vc.getGenotypes() ) { - if ( g.isCalled() && g.isFiltered() ) + if ( g.isCalled() && g.isFiltered() ) { + haveFilteredNoCallAlleles = true; genotypes.add(new GenotypeBuilder(g).alleles(diploidNoCallAlleles).make()); - else + } + else { + // increment the number called alleles and called alternate alleles + calledAlleles = GATKVariantContextUtils.incrementChromosomeCountsInfo(calledAltAlleles, calledAlleles, g); genotypes.add(g); + } } - return builder.genotypes(genotypes).make(); + builder.genotypes(genotypes); + + // if filtered genotypes are set to no-call, output recomputed AC, AN, AF + if ( haveFilteredNoCallAlleles ) + GATKVariantContextUtils.updateChromosomeCountsInfo(calledAltAlleles, calledAlleles, builder); + + return builder.make(); } /* * Add annotations to the new VC @@ -1243,4 +1271,4 @@ public class SelectVariants extends RodWalker implements TreeR private boolean considerNoCallGenotypes(){ return maxNOCALLnumber != MAX_NOCALL_NUMBER_DEFAULT_VALUE || maxNOCALLfraction != MAX_NOCALL_FRACTION_DEFAULT_VALUE; } -} \ No newline at end of file +} diff --git a/public/gatk-utils/src/main/java/org/broadinstitute/gatk/utils/variant/GATKVariantContextUtils.java b/public/gatk-utils/src/main/java/org/broadinstitute/gatk/utils/variant/GATKVariantContextUtils.java index 447409d25..ecffdf7d4 100644 --- a/public/gatk-utils/src/main/java/org/broadinstitute/gatk/utils/variant/GATKVariantContextUtils.java +++ b/public/gatk-utils/src/main/java/org/broadinstitute/gatk/utils/variant/GATKVariantContextUtils.java @@ -31,8 +31,6 @@ import htsjdk.tribble.TribbleException; import htsjdk.tribble.util.popgen.HardyWeinbergCalculation; import htsjdk.variant.variantcontext.*; import htsjdk.variant.vcf.VCFConstants; -import htsjdk.variant.vcf.VCFHeaderLineCount; -import htsjdk.variant.vcf.VCFInfoHeaderLine; import org.apache.commons.lang.ArrayUtils; import org.apache.log4j.Logger; import org.broadinstitute.gatk.utils.*; @@ -2281,5 +2279,56 @@ public class GATKVariantContextUtils { return tokens; } + + /** + * Increment the number of called alternate and reference plus alternate alleles for a genotype + * + * @param calledAltAlleles number of called alternate alleles for all genotypes + * @param calledAlleles number of called alleles for all genotypes + * @param genotype genotype + * @return incremented called alleles + * @throws IllegalArgumentException if calledAltAlleles or genotype are null + */ + public static int incrementChromosomeCountsInfo(final Map calledAltAlleles, final int calledAlleles, final Genotype genotype) { + if ( calledAltAlleles == null ) throw new IllegalArgumentException("Called alternate alleles can not be null"); + if ( genotype == null ) throw new IllegalArgumentException("Genotype can not be null"); + + int incrementedCalledAlleles = calledAlleles; + if (genotype.isCalled()) { + for (final Allele allele : genotype.getAlleles()) { + incrementedCalledAlleles++; + if (allele.isNonReference()) { + calledAltAlleles.put(allele, calledAltAlleles.get(allele) + 1); + } + } + } + + return incrementedCalledAlleles; + } + + /** + * Update the variant context chromosome counts info fields (AC, AN, AF) + * + * @param calledAltAlleles number of called alternate alleles for all genotypes + * @param calledAlleles number of called alleles for all genotypes + * @param builder builder for variant context + * @throws IllegalArgumentException if calledAltAlleles or builder are null + */ + public static void updateChromosomeCountsInfo(final Map calledAltAlleles, final int calledAlleles, + final VariantContextBuilder builder) { + if ( calledAltAlleles == null ) throw new IllegalArgumentException("Called alternate alleles can not be null"); + if ( builder == null ) throw new IllegalArgumentException("Variant context builder can not be null"); + + builder.attribute(VCFConstants.ALLELE_COUNT_KEY, calledAltAlleles.values().toArray()). + attribute(VCFConstants.ALLELE_NUMBER_KEY, calledAlleles); + // Add AF is there are called alleles + if ( calledAlleles != 0 ) { + final Set alleleFrequency = new LinkedHashSet(calledAltAlleles.size()); + for ( final Integer value : calledAltAlleles.values() ) { + alleleFrequency.add(value.doubleValue()/calledAlleles); + } + builder.attribute(VCFConstants.ALLELE_FREQUENCY_KEY, alleleFrequency.toArray()); + } + } } diff --git a/public/gatk-utils/src/test/java/org/broadinstitute/gatk/utils/variant/GATKVariantContextUtilsUnitTest.java b/public/gatk-utils/src/test/java/org/broadinstitute/gatk/utils/variant/GATKVariantContextUtilsUnitTest.java index 6223a95df..85d8c56ff 100644 --- a/public/gatk-utils/src/test/java/org/broadinstitute/gatk/utils/variant/GATKVariantContextUtilsUnitTest.java +++ b/public/gatk-utils/src/test/java/org/broadinstitute/gatk/utils/variant/GATKVariantContextUtilsUnitTest.java @@ -26,6 +26,7 @@ package org.broadinstitute.gatk.utils.variant; import htsjdk.variant.variantcontext.*; +import htsjdk.variant.vcf.VCFConstants; import org.broadinstitute.gatk.utils.*; import org.broadinstitute.gatk.utils.collections.Pair; import org.broadinstitute.gatk.utils.fasta.CachingIndexedFastaSequenceFile; @@ -1661,5 +1662,75 @@ public class GATKVariantContextUtilsUnitTest extends BaseTest { attribute(GATKVCFConstants.STRAND_COUNT_BY_SAMPLE_KEY, new int[]{5, 10, 15, 20}).make(); Assert.assertEquals(GATKVariantContextUtils.makeNewSACs(g, Arrays.asList(1, 3)), expected); } + + @Test + public void testIncrementChromosomeCountsInfo() { + final Map calledAltAlleles = new LinkedHashMap<>(); + final Map expectedCalledAltAlleles = new LinkedHashMap<>(); + final List alleles = new ArrayList<>(Arrays.asList(Aref, C)); + for ( final Allele allele : alleles ) { + if ( allele.isNonReference() ) { + calledAltAlleles.put(allele, 0); + expectedCalledAltAlleles.put(allele, 1); + } + } + int calledAlleles = 0; + final Genotype genotype = new GenotypeBuilder().alleles(alleles).make(); + calledAlleles = GATKVariantContextUtils.incrementChromosomeCountsInfo(calledAltAlleles, calledAlleles, genotype); + Assert.assertEquals(calledAlleles, alleles.size()); + Assert.assertEquals(calledAltAlleles, expectedCalledAltAlleles); + } + + @Test(expectedExceptions = IllegalArgumentException.class) + public void testIncrementChromosomeCountsInfoCalledAltAllelesException() { + int calledAlleles = 0; + final Genotype genotype = new GenotypeBuilder().make(); + calledAlleles = GATKVariantContextUtils.incrementChromosomeCountsInfo(null, calledAlleles, genotype); + } + + @Test(expectedExceptions = IllegalArgumentException.class) + public void testIncrementChromosomeCountsInfoGenotypeException() { + final Map calledAltAlleles = new LinkedHashMap<>(); + int calledAlleles = 0; + calledAlleles = GATKVariantContextUtils.incrementChromosomeCountsInfo(calledAltAlleles, calledAlleles, null); + } + + @Test + public void testUpdateChromosomeCountsInfo() { + final Map calledAltAlleles = new LinkedHashMap<>(); + final Set alleleFrequency = new LinkedHashSet(calledAltAlleles.size()); + final List alleles = new ArrayList<>(Arrays.asList(Aref, C)); + final int calledAlleles = alleles.size(); + final List numAltAlleles = new ArrayList<>(); + + final int alleleOccurence = 1; + for ( final Allele allele : alleles ) { + if ( allele.isNonReference() ) { + calledAltAlleles.put(allele, alleleOccurence); + alleleFrequency.add( ((double) alleleOccurence)/calledAlleles ); + numAltAlleles.add(1); + } + } + final VariantContextBuilder builder = new VariantContextBuilder("test", "chr1", 1, Aref.length(), alleles); + GATKVariantContextUtils.updateChromosomeCountsInfo(calledAltAlleles, calledAlleles, builder); + final VariantContext vc = builder.make(); + Assert.assertEquals(vc.getAttribute(VCFConstants.ALLELE_COUNT_KEY), numAltAlleles.toArray()); + Assert.assertEquals(vc.getAttribute(VCFConstants.ALLELE_NUMBER_KEY), alleles.size()); + Assert.assertEquals(vc.getAttribute(VCFConstants.ALLELE_FREQUENCY_KEY), alleleFrequency.toArray()); + } + + @Test(expectedExceptions = IllegalArgumentException.class) + public void testUpdateChromosomeCountsInfoCalledAltAllelesException() { + final int calledAlleles = 0; + final VariantContextBuilder builder = new VariantContextBuilder("test", "chr1", 1, Aref.length(), Arrays.asList(Aref, C)); + GATKVariantContextUtils.updateChromosomeCountsInfo(null, calledAlleles, builder); + } + + @Test(expectedExceptions = IllegalArgumentException.class) + public void testUpdateChromosomeCountsInfoBuilderException() { + final int calledAlleles = 0; + final Map calledAltAlleles = new LinkedHashMap<>(); + GATKVariantContextUtils.updateChromosomeCountsInfo(calledAltAlleles, calledAlleles, null); + } }