SelectVariants and VariantFiltration not updating AC, AN and AF for --setFilteredGtToNocall
This commit is contained in:
parent
7e0a9e44fe
commit
e2828104b1
|
|
@ -234,10 +234,21 @@ public class VariantFiltrationIntegrationTest extends WalkerTest {
|
|||
WalkerTestSpec spec = new WalkerTestSpec(
|
||||
"-T VariantFiltration -o %s --no_cmdline_in_header -R " + b37KGReference
|
||||
+ " --genotypeFilterExpression 'DP < 8' --genotypeFilterName lowDP -V " + privateTestDir + "filteringDepthInFormat.vcf --setFilteredGtToNocall", 1,
|
||||
Arrays.asList("454d265ee8b425284ed7fca8ca4774be"));
|
||||
Arrays.asList("2ff3753215d418712309e50da323f6e8"));
|
||||
executeTest("testSetFilteredGtoNocall", spec);
|
||||
}
|
||||
|
||||
|
||||
@Test
|
||||
public void testSetFilteredGtoNocallUpdateInfo() {
|
||||
WalkerTestSpec spec = new WalkerTestSpec(
|
||||
"-T VariantFiltration -o %s --no_cmdline_in_header -R " + b37KGReference
|
||||
+ " -G_filter 'GQ < 20' -G_filterName lowDP -G_filter 'DP<10' -G_filterName lowGQ -V " + privateTestDir + "variantFiltrationInfoField.vcf --setFilteredGtToNocall",
|
||||
1,
|
||||
Arrays.asList("3b074975bb6f70c84b2dd81695bb89ff"));
|
||||
executeTest("testSetFilteredGtoNocallUpdateInfo", spec);
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testSetVcfFilteredGtoNocall() {
|
||||
String testfile = privateTestDir + "filteredSamples.vcf";
|
||||
|
|
@ -245,7 +256,7 @@ public class VariantFiltrationIntegrationTest extends WalkerTest {
|
|||
WalkerTestSpec spec = new WalkerTestSpec(
|
||||
"-T SelectVariants --setFilteredGtToNocall -R " + b37KGReference + " --variant " + testfile + " -o %s --no_cmdline_in_header",
|
||||
1,
|
||||
Arrays.asList("7771f07a9997296852ab367fac2c7a6c")
|
||||
Arrays.asList("410c6b7bb62fc43bb41eee627670f757")
|
||||
);
|
||||
|
||||
spec.disableShadowBCF();
|
||||
|
|
|
|||
|
|
@ -732,12 +732,25 @@ public class SelectVariantsIntegrationTest extends WalkerTest {
|
|||
WalkerTestSpec spec = new WalkerTestSpec(
|
||||
"-T SelectVariants --setFilteredGtToNocall -R " + b37KGReference + " --variant " + testfile + " -o %s --no_cmdline_in_header",
|
||||
1,
|
||||
Arrays.asList("7771f07a9997296852ab367fac2c7a6c")
|
||||
Arrays.asList("410c6b7bb62fc43bb41eee627670f757")
|
||||
);
|
||||
|
||||
spec.disableShadowBCF();
|
||||
executeTest("testSetFilteredGtoNocall--" + testfile, spec);
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testSetFilteredGtoNocallUpdateInfo() {
|
||||
String testfile = privateTestDir + "selectVariantsInfoField.vcf";
|
||||
|
||||
WalkerTestSpec spec = new WalkerTestSpec(
|
||||
"-T SelectVariants --setFilteredGtToNocall --removeUnusedAlternates --excludeNonVariants -R " + b37KGReference + " --variant " +
|
||||
testfile + " -o %s --no_cmdline_in_header",
|
||||
1,
|
||||
Arrays.asList("349136d92f915f8c7ba8a2f92e51d6b7"));
|
||||
executeTest("testSetFilteredGtoNocallUpdateInfo", spec);
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testSACSimpleDiploid() {
|
||||
String testfile = privateTestDir + "261_S01_raw_variants_gvcf.vcf";
|
||||
|
|
|
|||
|
|
@ -45,6 +45,7 @@ import org.broadinstitute.gatk.utils.exceptions.UserException;
|
|||
import org.broadinstitute.gatk.utils.help.DocumentedGATKFeature;
|
||||
import htsjdk.variant.variantcontext.*;
|
||||
import htsjdk.variant.variantcontext.writer.VariantContextWriter;
|
||||
import org.broadinstitute.gatk.utils.variant.GATKVariantContextUtils;
|
||||
|
||||
import java.util.*;
|
||||
|
||||
|
|
@ -241,6 +242,13 @@ public class VariantFiltration extends RodWalker<Integer, Integer> {
|
|||
Set<VCFHeaderLine> hInfo = new HashSet<VCFHeaderLine>();
|
||||
hInfo.addAll(GATKVCFUtils.getHeaderFields(getToolkit(), inputNames));
|
||||
|
||||
// need AC, AN and AF since output if set filtered genotypes to no-call
|
||||
if ( setFilteredGenotypesToNocall ) {
|
||||
hInfo.add(VCFStandardHeaderLines.getInfoLine(VCFConstants.ALLELE_COUNT_KEY));
|
||||
hInfo.add(VCFStandardHeaderLines.getInfoLine(VCFConstants.ALLELE_NUMBER_KEY));
|
||||
hInfo.add(VCFStandardHeaderLines.getInfoLine(VCFConstants.ALLELE_FREQUENCY_KEY));
|
||||
}
|
||||
|
||||
if ( clusterWindow > 0 )
|
||||
hInfo.add(new VCFFilterHeaderLine(CLUSTERED_SNP_FILTER_NAME, "SNPs found in clusters"));
|
||||
|
||||
|
|
@ -402,6 +410,18 @@ public class VariantFiltration extends RodWalker<Integer, Integer> {
|
|||
if ( !genotypeFilterExps.isEmpty() || setFilteredGenotypesToNocall ) {
|
||||
GenotypesContext genotypes = GenotypesContext.create(vc.getGenotypes().size());
|
||||
|
||||
//
|
||||
// recompute AC, AN and AF if filtered genotypes are set to no-call
|
||||
//
|
||||
// occurrences of alternate alleles over all genotypes
|
||||
final Map<Allele, Integer> calledAltAlleles = new LinkedHashMap<Allele, Integer>(vc.getNAlleles()-1);
|
||||
for ( final Allele altAllele : vc.getAlternateAlleles() ) {
|
||||
calledAltAlleles.put(altAllele, 0);
|
||||
}
|
||||
|
||||
int calledAlleles = 0;
|
||||
boolean haveFilteredNoCallAlleles = false;
|
||||
|
||||
// for each genotype, check filters then create a new object
|
||||
for ( final Genotype g : vc.getGenotypes() ) {
|
||||
if ( g.isCalled() ) {
|
||||
|
|
@ -415,16 +435,23 @@ public class VariantFiltration extends RodWalker<Integer, Integer> {
|
|||
}
|
||||
|
||||
// if sample is filtered and --setFilteredGtToNocall, set genotype to non-call
|
||||
if ( !filters.isEmpty() && setFilteredGenotypesToNocall )
|
||||
if ( !filters.isEmpty() && setFilteredGenotypesToNocall ) {
|
||||
haveFilteredNoCallAlleles = true;
|
||||
genotypes.add(new GenotypeBuilder(g).filters(filters).alleles(diploidNoCallAlleles).make());
|
||||
else
|
||||
}
|
||||
else {
|
||||
genotypes.add(new GenotypeBuilder(g).filters(filters).make());
|
||||
calledAlleles = GATKVariantContextUtils.incrementChromosomeCountsInfo(calledAltAlleles, calledAlleles, g);
|
||||
}
|
||||
} else {
|
||||
genotypes.add(g);
|
||||
}
|
||||
}
|
||||
|
||||
builder.genotypes(genotypes);
|
||||
// if filtered genotypes are set to no-call, output recomputed AC, AN, AF
|
||||
if ( haveFilteredNoCallAlleles )
|
||||
GATKVariantContextUtils.updateChromosomeCountsInfo(calledAltAlleles, calledAlleles, builder);
|
||||
}
|
||||
|
||||
// make a new variant context based on filters
|
||||
|
|
|
|||
|
|
@ -708,6 +708,13 @@ public class SelectVariants extends RodWalker<Integer, Integer> implements TreeR
|
|||
Set<VCFHeaderLine> headerLines = VCFUtils.smartMergeHeaders(vcfRods.values(), true);
|
||||
headerLines.add(new VCFHeaderLine("source", "SelectVariants"));
|
||||
|
||||
// need AC, AN and AF since output if set filtered genotypes to no-call
|
||||
if (setFilteredGenotypesToNocall) {
|
||||
headerLines.add(VCFStandardHeaderLines.getInfoLine(VCFConstants.ALLELE_COUNT_KEY));
|
||||
headerLines.add(VCFStandardHeaderLines.getInfoLine(VCFConstants.ALLELE_NUMBER_KEY));
|
||||
headerLines.add(VCFStandardHeaderLines.getInfoLine(VCFConstants.ALLELE_FREQUENCY_KEY));
|
||||
}
|
||||
|
||||
if (keepOriginalChrCounts) {
|
||||
headerLines.add(GATKVCFHeaderLines.getInfoLine(GATKVCFConstants.ORIGINAL_AC_KEY));
|
||||
headerLines.add(GATKVCFHeaderLines.getInfoLine(GATKVCFConstants.ORIGINAL_AF_KEY));
|
||||
|
|
@ -1117,14 +1124,35 @@ public class SelectVariants extends RodWalker<Integer, Integer> implements TreeR
|
|||
final VariantContextBuilder builder = new VariantContextBuilder(vc);
|
||||
final GenotypesContext genotypes = GenotypesContext.create(vc.getGenotypes().size());
|
||||
|
||||
//
|
||||
// recompute AC, AN and AF if filtered genotypes are set to no-call
|
||||
//
|
||||
// occurrences of alternate alleles over all genotypes
|
||||
final Map<Allele, Integer> calledAltAlleles = new LinkedHashMap<Allele, Integer>(vc.getNAlleles()-1);
|
||||
for ( final Allele altAllele : vc.getAlternateAlleles() ) {
|
||||
calledAltAlleles.put(altAllele, 0);
|
||||
}
|
||||
int calledAlleles = 0;
|
||||
boolean haveFilteredNoCallAlleles = false;
|
||||
for ( final Genotype g : vc.getGenotypes() ) {
|
||||
if ( g.isCalled() && g.isFiltered() )
|
||||
if ( g.isCalled() && g.isFiltered() ) {
|
||||
haveFilteredNoCallAlleles = true;
|
||||
genotypes.add(new GenotypeBuilder(g).alleles(diploidNoCallAlleles).make());
|
||||
else
|
||||
}
|
||||
else {
|
||||
// increment the number called alleles and called alternate alleles
|
||||
calledAlleles = GATKVariantContextUtils.incrementChromosomeCountsInfo(calledAltAlleles, calledAlleles, g);
|
||||
genotypes.add(g);
|
||||
}
|
||||
}
|
||||
|
||||
return builder.genotypes(genotypes).make();
|
||||
builder.genotypes(genotypes);
|
||||
|
||||
// if filtered genotypes are set to no-call, output recomputed AC, AN, AF
|
||||
if ( haveFilteredNoCallAlleles )
|
||||
GATKVariantContextUtils.updateChromosomeCountsInfo(calledAltAlleles, calledAlleles, builder);
|
||||
|
||||
return builder.make();
|
||||
}
|
||||
/*
|
||||
* Add annotations to the new VC
|
||||
|
|
@ -1243,4 +1271,4 @@ public class SelectVariants extends RodWalker<Integer, Integer> implements TreeR
|
|||
private boolean considerNoCallGenotypes(){
|
||||
return maxNOCALLnumber != MAX_NOCALL_NUMBER_DEFAULT_VALUE || maxNOCALLfraction != MAX_NOCALL_FRACTION_DEFAULT_VALUE;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -31,8 +31,6 @@ import htsjdk.tribble.TribbleException;
|
|||
import htsjdk.tribble.util.popgen.HardyWeinbergCalculation;
|
||||
import htsjdk.variant.variantcontext.*;
|
||||
import htsjdk.variant.vcf.VCFConstants;
|
||||
import htsjdk.variant.vcf.VCFHeaderLineCount;
|
||||
import htsjdk.variant.vcf.VCFInfoHeaderLine;
|
||||
import org.apache.commons.lang.ArrayUtils;
|
||||
import org.apache.log4j.Logger;
|
||||
import org.broadinstitute.gatk.utils.*;
|
||||
|
|
@ -2281,5 +2279,56 @@ public class GATKVariantContextUtils {
|
|||
|
||||
return tokens;
|
||||
}
|
||||
|
||||
/**
|
||||
* Increment the number of called alternate and reference plus alternate alleles for a genotype
|
||||
*
|
||||
* @param calledAltAlleles number of called alternate alleles for all genotypes
|
||||
* @param calledAlleles number of called alleles for all genotypes
|
||||
* @param genotype genotype
|
||||
* @return incremented called alleles
|
||||
* @throws IllegalArgumentException if calledAltAlleles or genotype are null
|
||||
*/
|
||||
public static int incrementChromosomeCountsInfo(final Map<Allele, Integer> calledAltAlleles, final int calledAlleles, final Genotype genotype) {
|
||||
if ( calledAltAlleles == null ) throw new IllegalArgumentException("Called alternate alleles can not be null");
|
||||
if ( genotype == null ) throw new IllegalArgumentException("Genotype can not be null");
|
||||
|
||||
int incrementedCalledAlleles = calledAlleles;
|
||||
if (genotype.isCalled()) {
|
||||
for (final Allele allele : genotype.getAlleles()) {
|
||||
incrementedCalledAlleles++;
|
||||
if (allele.isNonReference()) {
|
||||
calledAltAlleles.put(allele, calledAltAlleles.get(allele) + 1);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return incrementedCalledAlleles;
|
||||
}
|
||||
|
||||
/**
|
||||
* Update the variant context chromosome counts info fields (AC, AN, AF)
|
||||
*
|
||||
* @param calledAltAlleles number of called alternate alleles for all genotypes
|
||||
* @param calledAlleles number of called alleles for all genotypes
|
||||
* @param builder builder for variant context
|
||||
* @throws IllegalArgumentException if calledAltAlleles or builder are null
|
||||
*/
|
||||
public static void updateChromosomeCountsInfo(final Map<Allele, Integer> calledAltAlleles, final int calledAlleles,
|
||||
final VariantContextBuilder builder) {
|
||||
if ( calledAltAlleles == null ) throw new IllegalArgumentException("Called alternate alleles can not be null");
|
||||
if ( builder == null ) throw new IllegalArgumentException("Variant context builder can not be null");
|
||||
|
||||
builder.attribute(VCFConstants.ALLELE_COUNT_KEY, calledAltAlleles.values().toArray()).
|
||||
attribute(VCFConstants.ALLELE_NUMBER_KEY, calledAlleles);
|
||||
// Add AF is there are called alleles
|
||||
if ( calledAlleles != 0 ) {
|
||||
final Set<Double> alleleFrequency = new LinkedHashSet<Double>(calledAltAlleles.size());
|
||||
for ( final Integer value : calledAltAlleles.values() ) {
|
||||
alleleFrequency.add(value.doubleValue()/calledAlleles);
|
||||
}
|
||||
builder.attribute(VCFConstants.ALLELE_FREQUENCY_KEY, alleleFrequency.toArray());
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -26,6 +26,7 @@
|
|||
package org.broadinstitute.gatk.utils.variant;
|
||||
|
||||
import htsjdk.variant.variantcontext.*;
|
||||
import htsjdk.variant.vcf.VCFConstants;
|
||||
import org.broadinstitute.gatk.utils.*;
|
||||
import org.broadinstitute.gatk.utils.collections.Pair;
|
||||
import org.broadinstitute.gatk.utils.fasta.CachingIndexedFastaSequenceFile;
|
||||
|
|
@ -1661,5 +1662,75 @@ public class GATKVariantContextUtilsUnitTest extends BaseTest {
|
|||
attribute(GATKVCFConstants.STRAND_COUNT_BY_SAMPLE_KEY, new int[]{5, 10, 15, 20}).make();
|
||||
Assert.assertEquals(GATKVariantContextUtils.makeNewSACs(g, Arrays.asList(1, 3)), expected);
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testIncrementChromosomeCountsInfo() {
|
||||
final Map<Allele, Integer> calledAltAlleles = new LinkedHashMap<>();
|
||||
final Map<Allele, Integer> expectedCalledAltAlleles = new LinkedHashMap<>();
|
||||
final List<Allele> alleles = new ArrayList<>(Arrays.asList(Aref, C));
|
||||
for ( final Allele allele : alleles ) {
|
||||
if ( allele.isNonReference() ) {
|
||||
calledAltAlleles.put(allele, 0);
|
||||
expectedCalledAltAlleles.put(allele, 1);
|
||||
}
|
||||
}
|
||||
int calledAlleles = 0;
|
||||
final Genotype genotype = new GenotypeBuilder().alleles(alleles).make();
|
||||
calledAlleles = GATKVariantContextUtils.incrementChromosomeCountsInfo(calledAltAlleles, calledAlleles, genotype);
|
||||
Assert.assertEquals(calledAlleles, alleles.size());
|
||||
Assert.assertEquals(calledAltAlleles, expectedCalledAltAlleles);
|
||||
}
|
||||
|
||||
@Test(expectedExceptions = IllegalArgumentException.class)
|
||||
public void testIncrementChromosomeCountsInfoCalledAltAllelesException() {
|
||||
int calledAlleles = 0;
|
||||
final Genotype genotype = new GenotypeBuilder().make();
|
||||
calledAlleles = GATKVariantContextUtils.incrementChromosomeCountsInfo(null, calledAlleles, genotype);
|
||||
}
|
||||
|
||||
@Test(expectedExceptions = IllegalArgumentException.class)
|
||||
public void testIncrementChromosomeCountsInfoGenotypeException() {
|
||||
final Map<Allele, Integer> calledAltAlleles = new LinkedHashMap<>();
|
||||
int calledAlleles = 0;
|
||||
calledAlleles = GATKVariantContextUtils.incrementChromosomeCountsInfo(calledAltAlleles, calledAlleles, null);
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testUpdateChromosomeCountsInfo() {
|
||||
final Map<Allele, Integer> calledAltAlleles = new LinkedHashMap<>();
|
||||
final Set<Double> alleleFrequency = new LinkedHashSet<Double>(calledAltAlleles.size());
|
||||
final List<Allele> alleles = new ArrayList<>(Arrays.asList(Aref, C));
|
||||
final int calledAlleles = alleles.size();
|
||||
final List<Integer> numAltAlleles = new ArrayList<>();
|
||||
|
||||
final int alleleOccurence = 1;
|
||||
for ( final Allele allele : alleles ) {
|
||||
if ( allele.isNonReference() ) {
|
||||
calledAltAlleles.put(allele, alleleOccurence);
|
||||
alleleFrequency.add( ((double) alleleOccurence)/calledAlleles );
|
||||
numAltAlleles.add(1);
|
||||
}
|
||||
}
|
||||
final VariantContextBuilder builder = new VariantContextBuilder("test", "chr1", 1, Aref.length(), alleles);
|
||||
GATKVariantContextUtils.updateChromosomeCountsInfo(calledAltAlleles, calledAlleles, builder);
|
||||
final VariantContext vc = builder.make();
|
||||
Assert.assertEquals(vc.getAttribute(VCFConstants.ALLELE_COUNT_KEY), numAltAlleles.toArray());
|
||||
Assert.assertEquals(vc.getAttribute(VCFConstants.ALLELE_NUMBER_KEY), alleles.size());
|
||||
Assert.assertEquals(vc.getAttribute(VCFConstants.ALLELE_FREQUENCY_KEY), alleleFrequency.toArray());
|
||||
}
|
||||
|
||||
@Test(expectedExceptions = IllegalArgumentException.class)
|
||||
public void testUpdateChromosomeCountsInfoCalledAltAllelesException() {
|
||||
final int calledAlleles = 0;
|
||||
final VariantContextBuilder builder = new VariantContextBuilder("test", "chr1", 1, Aref.length(), Arrays.asList(Aref, C));
|
||||
GATKVariantContextUtils.updateChromosomeCountsInfo(null, calledAlleles, builder);
|
||||
}
|
||||
|
||||
@Test(expectedExceptions = IllegalArgumentException.class)
|
||||
public void testUpdateChromosomeCountsInfoBuilderException() {
|
||||
final int calledAlleles = 0;
|
||||
final Map<Allele, Integer> calledAltAlleles = new LinkedHashMap<>();
|
||||
GATKVariantContextUtils.updateChromosomeCountsInfo(calledAltAlleles, calledAlleles, null);
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
|||
Loading…
Reference in New Issue