SelectVariants and VariantFiltration not updating AC, AN and AF for --setFilteredGtToNocall

This commit is contained in:
Ron Levine 2016-04-13 13:42:03 -04:00
parent 7e0a9e44fe
commit e2828104b1
6 changed files with 210 additions and 11 deletions

View File

@ -234,10 +234,21 @@ public class VariantFiltrationIntegrationTest extends WalkerTest {
WalkerTestSpec spec = new WalkerTestSpec(
"-T VariantFiltration -o %s --no_cmdline_in_header -R " + b37KGReference
+ " --genotypeFilterExpression 'DP < 8' --genotypeFilterName lowDP -V " + privateTestDir + "filteringDepthInFormat.vcf --setFilteredGtToNocall", 1,
Arrays.asList("454d265ee8b425284ed7fca8ca4774be"));
Arrays.asList("2ff3753215d418712309e50da323f6e8"));
executeTest("testSetFilteredGtoNocall", spec);
}
@Test
public void testSetFilteredGtoNocallUpdateInfo() {
WalkerTestSpec spec = new WalkerTestSpec(
"-T VariantFiltration -o %s --no_cmdline_in_header -R " + b37KGReference
+ " -G_filter 'GQ < 20' -G_filterName lowDP -G_filter 'DP<10' -G_filterName lowGQ -V " + privateTestDir + "variantFiltrationInfoField.vcf --setFilteredGtToNocall",
1,
Arrays.asList("3b074975bb6f70c84b2dd81695bb89ff"));
executeTest("testSetFilteredGtoNocallUpdateInfo", spec);
}
@Test
public void testSetVcfFilteredGtoNocall() {
String testfile = privateTestDir + "filteredSamples.vcf";
@ -245,7 +256,7 @@ public class VariantFiltrationIntegrationTest extends WalkerTest {
WalkerTestSpec spec = new WalkerTestSpec(
"-T SelectVariants --setFilteredGtToNocall -R " + b37KGReference + " --variant " + testfile + " -o %s --no_cmdline_in_header",
1,
Arrays.asList("7771f07a9997296852ab367fac2c7a6c")
Arrays.asList("410c6b7bb62fc43bb41eee627670f757")
);
spec.disableShadowBCF();

View File

@ -732,12 +732,25 @@ public class SelectVariantsIntegrationTest extends WalkerTest {
WalkerTestSpec spec = new WalkerTestSpec(
"-T SelectVariants --setFilteredGtToNocall -R " + b37KGReference + " --variant " + testfile + " -o %s --no_cmdline_in_header",
1,
Arrays.asList("7771f07a9997296852ab367fac2c7a6c")
Arrays.asList("410c6b7bb62fc43bb41eee627670f757")
);
spec.disableShadowBCF();
executeTest("testSetFilteredGtoNocall--" + testfile, spec);
}
@Test
public void testSetFilteredGtoNocallUpdateInfo() {
String testfile = privateTestDir + "selectVariantsInfoField.vcf";
WalkerTestSpec spec = new WalkerTestSpec(
"-T SelectVariants --setFilteredGtToNocall --removeUnusedAlternates --excludeNonVariants -R " + b37KGReference + " --variant " +
testfile + " -o %s --no_cmdline_in_header",
1,
Arrays.asList("349136d92f915f8c7ba8a2f92e51d6b7"));
executeTest("testSetFilteredGtoNocallUpdateInfo", spec);
}
@Test
public void testSACSimpleDiploid() {
String testfile = privateTestDir + "261_S01_raw_variants_gvcf.vcf";

View File

@ -45,6 +45,7 @@ import org.broadinstitute.gatk.utils.exceptions.UserException;
import org.broadinstitute.gatk.utils.help.DocumentedGATKFeature;
import htsjdk.variant.variantcontext.*;
import htsjdk.variant.variantcontext.writer.VariantContextWriter;
import org.broadinstitute.gatk.utils.variant.GATKVariantContextUtils;
import java.util.*;
@ -241,6 +242,13 @@ public class VariantFiltration extends RodWalker<Integer, Integer> {
Set<VCFHeaderLine> hInfo = new HashSet<VCFHeaderLine>();
hInfo.addAll(GATKVCFUtils.getHeaderFields(getToolkit(), inputNames));
// need AC, AN and AF since output if set filtered genotypes to no-call
if ( setFilteredGenotypesToNocall ) {
hInfo.add(VCFStandardHeaderLines.getInfoLine(VCFConstants.ALLELE_COUNT_KEY));
hInfo.add(VCFStandardHeaderLines.getInfoLine(VCFConstants.ALLELE_NUMBER_KEY));
hInfo.add(VCFStandardHeaderLines.getInfoLine(VCFConstants.ALLELE_FREQUENCY_KEY));
}
if ( clusterWindow > 0 )
hInfo.add(new VCFFilterHeaderLine(CLUSTERED_SNP_FILTER_NAME, "SNPs found in clusters"));
@ -402,6 +410,18 @@ public class VariantFiltration extends RodWalker<Integer, Integer> {
if ( !genotypeFilterExps.isEmpty() || setFilteredGenotypesToNocall ) {
GenotypesContext genotypes = GenotypesContext.create(vc.getGenotypes().size());
//
// recompute AC, AN and AF if filtered genotypes are set to no-call
//
// occurrences of alternate alleles over all genotypes
final Map<Allele, Integer> calledAltAlleles = new LinkedHashMap<Allele, Integer>(vc.getNAlleles()-1);
for ( final Allele altAllele : vc.getAlternateAlleles() ) {
calledAltAlleles.put(altAllele, 0);
}
int calledAlleles = 0;
boolean haveFilteredNoCallAlleles = false;
// for each genotype, check filters then create a new object
for ( final Genotype g : vc.getGenotypes() ) {
if ( g.isCalled() ) {
@ -415,16 +435,23 @@ public class VariantFiltration extends RodWalker<Integer, Integer> {
}
// if sample is filtered and --setFilteredGtToNocall, set genotype to non-call
if ( !filters.isEmpty() && setFilteredGenotypesToNocall )
if ( !filters.isEmpty() && setFilteredGenotypesToNocall ) {
haveFilteredNoCallAlleles = true;
genotypes.add(new GenotypeBuilder(g).filters(filters).alleles(diploidNoCallAlleles).make());
else
}
else {
genotypes.add(new GenotypeBuilder(g).filters(filters).make());
calledAlleles = GATKVariantContextUtils.incrementChromosomeCountsInfo(calledAltAlleles, calledAlleles, g);
}
} else {
genotypes.add(g);
}
}
builder.genotypes(genotypes);
// if filtered genotypes are set to no-call, output recomputed AC, AN, AF
if ( haveFilteredNoCallAlleles )
GATKVariantContextUtils.updateChromosomeCountsInfo(calledAltAlleles, calledAlleles, builder);
}
// make a new variant context based on filters

View File

@ -708,6 +708,13 @@ public class SelectVariants extends RodWalker<Integer, Integer> implements TreeR
Set<VCFHeaderLine> headerLines = VCFUtils.smartMergeHeaders(vcfRods.values(), true);
headerLines.add(new VCFHeaderLine("source", "SelectVariants"));
// need AC, AN and AF since output if set filtered genotypes to no-call
if (setFilteredGenotypesToNocall) {
headerLines.add(VCFStandardHeaderLines.getInfoLine(VCFConstants.ALLELE_COUNT_KEY));
headerLines.add(VCFStandardHeaderLines.getInfoLine(VCFConstants.ALLELE_NUMBER_KEY));
headerLines.add(VCFStandardHeaderLines.getInfoLine(VCFConstants.ALLELE_FREQUENCY_KEY));
}
if (keepOriginalChrCounts) {
headerLines.add(GATKVCFHeaderLines.getInfoLine(GATKVCFConstants.ORIGINAL_AC_KEY));
headerLines.add(GATKVCFHeaderLines.getInfoLine(GATKVCFConstants.ORIGINAL_AF_KEY));
@ -1117,14 +1124,35 @@ public class SelectVariants extends RodWalker<Integer, Integer> implements TreeR
final VariantContextBuilder builder = new VariantContextBuilder(vc);
final GenotypesContext genotypes = GenotypesContext.create(vc.getGenotypes().size());
//
// recompute AC, AN and AF if filtered genotypes are set to no-call
//
// occurrences of alternate alleles over all genotypes
final Map<Allele, Integer> calledAltAlleles = new LinkedHashMap<Allele, Integer>(vc.getNAlleles()-1);
for ( final Allele altAllele : vc.getAlternateAlleles() ) {
calledAltAlleles.put(altAllele, 0);
}
int calledAlleles = 0;
boolean haveFilteredNoCallAlleles = false;
for ( final Genotype g : vc.getGenotypes() ) {
if ( g.isCalled() && g.isFiltered() )
if ( g.isCalled() && g.isFiltered() ) {
haveFilteredNoCallAlleles = true;
genotypes.add(new GenotypeBuilder(g).alleles(diploidNoCallAlleles).make());
else
}
else {
// increment the number called alleles and called alternate alleles
calledAlleles = GATKVariantContextUtils.incrementChromosomeCountsInfo(calledAltAlleles, calledAlleles, g);
genotypes.add(g);
}
}
return builder.genotypes(genotypes).make();
builder.genotypes(genotypes);
// if filtered genotypes are set to no-call, output recomputed AC, AN, AF
if ( haveFilteredNoCallAlleles )
GATKVariantContextUtils.updateChromosomeCountsInfo(calledAltAlleles, calledAlleles, builder);
return builder.make();
}
/*
* Add annotations to the new VC
@ -1243,4 +1271,4 @@ public class SelectVariants extends RodWalker<Integer, Integer> implements TreeR
private boolean considerNoCallGenotypes(){
return maxNOCALLnumber != MAX_NOCALL_NUMBER_DEFAULT_VALUE || maxNOCALLfraction != MAX_NOCALL_FRACTION_DEFAULT_VALUE;
}
}
}

View File

@ -31,8 +31,6 @@ import htsjdk.tribble.TribbleException;
import htsjdk.tribble.util.popgen.HardyWeinbergCalculation;
import htsjdk.variant.variantcontext.*;
import htsjdk.variant.vcf.VCFConstants;
import htsjdk.variant.vcf.VCFHeaderLineCount;
import htsjdk.variant.vcf.VCFInfoHeaderLine;
import org.apache.commons.lang.ArrayUtils;
import org.apache.log4j.Logger;
import org.broadinstitute.gatk.utils.*;
@ -2281,5 +2279,56 @@ public class GATKVariantContextUtils {
return tokens;
}
/**
* Increment the number of called alternate and reference plus alternate alleles for a genotype
*
* @param calledAltAlleles number of called alternate alleles for all genotypes
* @param calledAlleles number of called alleles for all genotypes
* @param genotype genotype
* @return incremented called alleles
* @throws IllegalArgumentException if calledAltAlleles or genotype are null
*/
public static int incrementChromosomeCountsInfo(final Map<Allele, Integer> calledAltAlleles, final int calledAlleles, final Genotype genotype) {
if ( calledAltAlleles == null ) throw new IllegalArgumentException("Called alternate alleles can not be null");
if ( genotype == null ) throw new IllegalArgumentException("Genotype can not be null");
int incrementedCalledAlleles = calledAlleles;
if (genotype.isCalled()) {
for (final Allele allele : genotype.getAlleles()) {
incrementedCalledAlleles++;
if (allele.isNonReference()) {
calledAltAlleles.put(allele, calledAltAlleles.get(allele) + 1);
}
}
}
return incrementedCalledAlleles;
}
/**
* Update the variant context chromosome counts info fields (AC, AN, AF)
*
* @param calledAltAlleles number of called alternate alleles for all genotypes
* @param calledAlleles number of called alleles for all genotypes
* @param builder builder for variant context
* @throws IllegalArgumentException if calledAltAlleles or builder are null
*/
public static void updateChromosomeCountsInfo(final Map<Allele, Integer> calledAltAlleles, final int calledAlleles,
final VariantContextBuilder builder) {
if ( calledAltAlleles == null ) throw new IllegalArgumentException("Called alternate alleles can not be null");
if ( builder == null ) throw new IllegalArgumentException("Variant context builder can not be null");
builder.attribute(VCFConstants.ALLELE_COUNT_KEY, calledAltAlleles.values().toArray()).
attribute(VCFConstants.ALLELE_NUMBER_KEY, calledAlleles);
// Add AF is there are called alleles
if ( calledAlleles != 0 ) {
final Set<Double> alleleFrequency = new LinkedHashSet<Double>(calledAltAlleles.size());
for ( final Integer value : calledAltAlleles.values() ) {
alleleFrequency.add(value.doubleValue()/calledAlleles);
}
builder.attribute(VCFConstants.ALLELE_FREQUENCY_KEY, alleleFrequency.toArray());
}
}
}

View File

@ -26,6 +26,7 @@
package org.broadinstitute.gatk.utils.variant;
import htsjdk.variant.variantcontext.*;
import htsjdk.variant.vcf.VCFConstants;
import org.broadinstitute.gatk.utils.*;
import org.broadinstitute.gatk.utils.collections.Pair;
import org.broadinstitute.gatk.utils.fasta.CachingIndexedFastaSequenceFile;
@ -1661,5 +1662,75 @@ public class GATKVariantContextUtilsUnitTest extends BaseTest {
attribute(GATKVCFConstants.STRAND_COUNT_BY_SAMPLE_KEY, new int[]{5, 10, 15, 20}).make();
Assert.assertEquals(GATKVariantContextUtils.makeNewSACs(g, Arrays.asList(1, 3)), expected);
}
@Test
public void testIncrementChromosomeCountsInfo() {
final Map<Allele, Integer> calledAltAlleles = new LinkedHashMap<>();
final Map<Allele, Integer> expectedCalledAltAlleles = new LinkedHashMap<>();
final List<Allele> alleles = new ArrayList<>(Arrays.asList(Aref, C));
for ( final Allele allele : alleles ) {
if ( allele.isNonReference() ) {
calledAltAlleles.put(allele, 0);
expectedCalledAltAlleles.put(allele, 1);
}
}
int calledAlleles = 0;
final Genotype genotype = new GenotypeBuilder().alleles(alleles).make();
calledAlleles = GATKVariantContextUtils.incrementChromosomeCountsInfo(calledAltAlleles, calledAlleles, genotype);
Assert.assertEquals(calledAlleles, alleles.size());
Assert.assertEquals(calledAltAlleles, expectedCalledAltAlleles);
}
@Test(expectedExceptions = IllegalArgumentException.class)
public void testIncrementChromosomeCountsInfoCalledAltAllelesException() {
int calledAlleles = 0;
final Genotype genotype = new GenotypeBuilder().make();
calledAlleles = GATKVariantContextUtils.incrementChromosomeCountsInfo(null, calledAlleles, genotype);
}
@Test(expectedExceptions = IllegalArgumentException.class)
public void testIncrementChromosomeCountsInfoGenotypeException() {
final Map<Allele, Integer> calledAltAlleles = new LinkedHashMap<>();
int calledAlleles = 0;
calledAlleles = GATKVariantContextUtils.incrementChromosomeCountsInfo(calledAltAlleles, calledAlleles, null);
}
@Test
public void testUpdateChromosomeCountsInfo() {
final Map<Allele, Integer> calledAltAlleles = new LinkedHashMap<>();
final Set<Double> alleleFrequency = new LinkedHashSet<Double>(calledAltAlleles.size());
final List<Allele> alleles = new ArrayList<>(Arrays.asList(Aref, C));
final int calledAlleles = alleles.size();
final List<Integer> numAltAlleles = new ArrayList<>();
final int alleleOccurence = 1;
for ( final Allele allele : alleles ) {
if ( allele.isNonReference() ) {
calledAltAlleles.put(allele, alleleOccurence);
alleleFrequency.add( ((double) alleleOccurence)/calledAlleles );
numAltAlleles.add(1);
}
}
final VariantContextBuilder builder = new VariantContextBuilder("test", "chr1", 1, Aref.length(), alleles);
GATKVariantContextUtils.updateChromosomeCountsInfo(calledAltAlleles, calledAlleles, builder);
final VariantContext vc = builder.make();
Assert.assertEquals(vc.getAttribute(VCFConstants.ALLELE_COUNT_KEY), numAltAlleles.toArray());
Assert.assertEquals(vc.getAttribute(VCFConstants.ALLELE_NUMBER_KEY), alleles.size());
Assert.assertEquals(vc.getAttribute(VCFConstants.ALLELE_FREQUENCY_KEY), alleleFrequency.toArray());
}
@Test(expectedExceptions = IllegalArgumentException.class)
public void testUpdateChromosomeCountsInfoCalledAltAllelesException() {
final int calledAlleles = 0;
final VariantContextBuilder builder = new VariantContextBuilder("test", "chr1", 1, Aref.length(), Arrays.asList(Aref, C));
GATKVariantContextUtils.updateChromosomeCountsInfo(null, calledAlleles, builder);
}
@Test(expectedExceptions = IllegalArgumentException.class)
public void testUpdateChromosomeCountsInfoBuilderException() {
final int calledAlleles = 0;
final Map<Allele, Integer> calledAltAlleles = new LinkedHashMap<>();
GATKVariantContextUtils.updateChromosomeCountsInfo(calledAltAlleles, calledAlleles, null);
}
}