Merge pull request #1264 from broadinstitute/rhl_max_nocall_1262
Backport maxNoCall functionality from GATK4
This commit is contained in:
commit
f002acebdf
|
|
@ -69,6 +69,8 @@ public class SelectVariantsIntegrationTest extends WalkerTest {
|
|||
private static final String INVERT_SELECTION_MD5 = "26d192b868746ab14133f145ae812e7c";
|
||||
private static final String MAX_FILTERED_GT_SELECTION_MD5 = "f83ac0deb7a8b022d6d40a85627a71ec";
|
||||
private static final String MIN_FILTERED_GT_SELECTION_MD5 = "346620b7a5d66dabf89d3f42d6e27db7";
|
||||
private static final String NO_CALL_FILTERING_KEEP_ONE = "6e2401190c5ada6a3bed2640c068f43b";
|
||||
private static final String NO_CALL_FILTERING_KEEP_TWO = "6bced1ab6a3d58f1fd905b7f601987a3";
|
||||
|
||||
@Test
|
||||
public void testDiscordanceNoSampleSpecified() {
|
||||
|
|
@ -771,4 +773,56 @@ public class SelectVariantsIntegrationTest extends WalkerTest {
|
|||
spec.disableShadowBCF();
|
||||
executeTest("testSACNonDiploid", spec);
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testMaxNoCall1() {
|
||||
final String testfile = privateTestDir + "vcfexample.forNoCallFiltering.vcf";
|
||||
|
||||
WalkerTestSpec spec = new WalkerTestSpec(
|
||||
"-T SelectVariants -R " + hg19Reference + " --variant " + testfile + " -o %s --no_cmdline_in_header --maxNOCALLnumber 1",
|
||||
1,
|
||||
Arrays.asList(NO_CALL_FILTERING_KEEP_ONE));
|
||||
spec.disableShadowBCF();
|
||||
|
||||
executeTest("testMaxNoCall1", spec);
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testMaxNoCall0_25() {
|
||||
final String testfile = privateTestDir + "vcfexample.forNoCallFiltering.vcf";
|
||||
|
||||
WalkerTestSpec spec = new WalkerTestSpec(
|
||||
"-T SelectVariants -R " + hg19Reference + " --variant " + testfile + " -o %s --no_cmdline_in_header --maxNOCALLfraction 0.25",
|
||||
1,
|
||||
Arrays.asList(NO_CALL_FILTERING_KEEP_ONE));
|
||||
spec.disableShadowBCF();
|
||||
|
||||
executeTest("testMaxNoCall0_25", spec);
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testMaxNoCall2() {
|
||||
final String testfile = privateTestDir + "vcfexample.forNoCallFiltering.vcf";
|
||||
|
||||
WalkerTestSpec spec = new WalkerTestSpec(
|
||||
"-T SelectVariants -R " + hg19Reference + " --variant " + testfile + " -o %s --no_cmdline_in_header --maxNOCALLnumber 2",
|
||||
1,
|
||||
Arrays.asList(NO_CALL_FILTERING_KEEP_TWO));
|
||||
spec.disableShadowBCF();
|
||||
|
||||
executeTest("testMaxNoCall2", spec);
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testMaxNoCall0_5() {
|
||||
final String testfile = privateTestDir + "vcfexample.forNoCallFiltering.vcf";
|
||||
|
||||
WalkerTestSpec spec = new WalkerTestSpec(
|
||||
"-T SelectVariants -R " + hg19Reference + " --variant " + testfile + " -o %s --no_cmdline_in_header --maxNOCALLfraction 0.5",
|
||||
1,
|
||||
Arrays.asList(NO_CALL_FILTERING_KEEP_TWO));
|
||||
spec.disableShadowBCF();
|
||||
|
||||
executeTest("testMaxNoCall0_5", spec);
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -309,9 +309,11 @@ import java.util.*;
|
|||
@DocumentedGATKFeature( groupName = HelpConstants.DOCS_CAT_VARMANIP, extraDocs = {CommandLineGATK.class} )
|
||||
public class SelectVariants extends RodWalker<Integer, Integer> implements TreeReducible<Integer> {
|
||||
static final int MAX_FILTERED_GENOTYPES_DEFAULT_VALUE = Integer.MAX_VALUE;
|
||||
static final int MIN_FILTERED_GENOTYPES_DEFAULT_VALUE = 0;
|
||||
static final double MAX_FRACTION_FILTERED_GENOTYPES_DEFAULT_VALUE = 1.0;
|
||||
static final int MIN_FILTERED_GENOTYPES_DEFAULT_VALUE = 0;
|
||||
static final double MIN_FRACTION_FILTERED_GENOTYPES_DEFAULT_VALUE = 0.0;
|
||||
private static final int MAX_NOCALL_NUMBER_DEFAULT_VALUE = Integer.MAX_VALUE;
|
||||
private static final double MAX_NOCALL_FRACTION_DEFAULT_VALUE = 1.0;
|
||||
|
||||
@ArgumentCollection protected StandardVariantContextInputArgumentCollection variantCollection = new StandardVariantContextInputArgumentCollection();
|
||||
|
||||
|
|
@ -564,6 +566,18 @@ public class SelectVariants extends RodWalker<Integer, Integer> implements TreeR
|
|||
@Argument(fullName="minFractionFilteredGenotypes", required=false, doc="Maximum fraction of samples filtered at the genotype level")
|
||||
private double minFractionFilteredGenotypes = MIN_FRACTION_FILTERED_GENOTYPES_DEFAULT_VALUE;
|
||||
|
||||
/**
|
||||
* If this argument is provided, select sites where at most the given number of samples have no-call genotypes.
|
||||
*/
|
||||
@Argument(fullName="maxNOCALLnumber", required=false, doc="Maximum number of samples with no-call genotypes")
|
||||
private int maxNOCALLnumber = MAX_NOCALL_NUMBER_DEFAULT_VALUE;
|
||||
|
||||
/**
|
||||
* If this argument is provided, select sites where at most the given fraction of samples have no-call genotypes.
|
||||
*/
|
||||
@Argument(fullName="maxNOCALLfraction", required=false, doc="Maximum fraction of samples with no-call genotypes")
|
||||
private double maxNOCALLfraction = MAX_NOCALL_FRACTION_DEFAULT_VALUE;
|
||||
|
||||
/**
|
||||
* If this argument is provided, set filtered genotypes to no-call (./.).
|
||||
*/
|
||||
|
|
@ -815,7 +829,7 @@ public class SelectVariants extends RodWalker<Integer, Integer> implements TreeR
|
|||
if (containsIndelLargerOrSmallerThan(vc, maxIndelSize, minIndelSize))
|
||||
continue;
|
||||
|
||||
if ( needNumFilteredGenotypes()) {
|
||||
if ( considerFilteredGenotypes()) {
|
||||
int numFilteredSamples = numFilteredGenotypes(vc);
|
||||
double fractionFilteredGenotypes = samples.isEmpty() ? 0.0 : numFilteredSamples / samples.size();
|
||||
if (numFilteredSamples > maxFilteredGenotypes || numFilteredSamples < minFilteredGenotypes ||
|
||||
|
|
@ -823,6 +837,13 @@ public class SelectVariants extends RodWalker<Integer, Integer> implements TreeR
|
|||
continue;
|
||||
}
|
||||
|
||||
if (considerNoCallGenotypes()) {
|
||||
final int numNoCallSamples = numNoCallGenotypes(vc);
|
||||
final double fractionNoCallGenotypes = samples.isEmpty() ? 0.0 : ((double) numNoCallSamples) / samples.size();
|
||||
if (numNoCallSamples > maxNOCALLnumber || fractionNoCallGenotypes > maxNOCALLfraction)
|
||||
continue;
|
||||
}
|
||||
|
||||
VariantContext sub = subsetRecord(vc, preserveAlleles, removeUnusedAlternates);
|
||||
|
||||
VariantContext filteredGenotypeToNocall = setFilteredGenotypeToNocall(sub, setFilteredGenotypesToNocall);
|
||||
|
|
@ -877,6 +898,25 @@ public class SelectVariants extends RodWalker<Integer, Integer> implements TreeR
|
|||
return false;
|
||||
}
|
||||
|
||||
/**
|
||||
* Find the number of no-call genotypes
|
||||
*
|
||||
* @param vc the variant context
|
||||
* @return number of no-call genotypes
|
||||
*/
|
||||
private int numNoCallGenotypes(final VariantContext vc) {
|
||||
if (vc == null)
|
||||
return 0;
|
||||
|
||||
int numFiltered = 0;
|
||||
for ( final Genotype g : vc.getGenotypes() ) {
|
||||
if ( g.isNoCall() )
|
||||
numFiltered++;
|
||||
}
|
||||
|
||||
return numFiltered;
|
||||
}
|
||||
|
||||
/**
|
||||
* Find the number of filtered samples
|
||||
*
|
||||
|
|
@ -1183,14 +1223,24 @@ public class SelectVariants extends RodWalker<Integer, Integer> implements TreeR
|
|||
}
|
||||
|
||||
/**
|
||||
* Need the number of filtered genotypes samples?
|
||||
* Should the number of filtered genotypes be considered for filtering?
|
||||
*
|
||||
* @return true if any of the filtered genotype samples arguments is used (not the default value), false otherwise
|
||||
*/
|
||||
private boolean needNumFilteredGenotypes(){
|
||||
private boolean considerFilteredGenotypes(){
|
||||
return maxFilteredGenotypes != MAX_FILTERED_GENOTYPES_DEFAULT_VALUE ||
|
||||
minFilteredGenotypes != MIN_FILTERED_GENOTYPES_DEFAULT_VALUE ||
|
||||
maxFractionFilteredGenotypes != MAX_FRACTION_FILTERED_GENOTYPES_DEFAULT_VALUE ||
|
||||
minFractionFilteredGenotypes != MIN_FRACTION_FILTERED_GENOTYPES_DEFAULT_VALUE;
|
||||
}
|
||||
|
||||
/**
|
||||
* Should the number of no-call genotypes be considered for filtering?
|
||||
*
|
||||
* @return true if any of the filtered genotype samples arguments is used (not the default value), false otherwise
|
||||
*
|
||||
*/
|
||||
private boolean considerNoCallGenotypes(){
|
||||
return maxNOCALLnumber != MAX_NOCALL_NUMBER_DEFAULT_VALUE || maxNOCALLfraction != MAX_NOCALL_FRACTION_DEFAULT_VALUE;
|
||||
}
|
||||
}
|
||||
Loading…
Reference in New Issue