Site-level selection based on genotype filter status
This commit is contained in:
parent
28a7ea43ec
commit
a6ca97ef14
|
|
@ -147,11 +147,19 @@ public class VariantFiltrationIntegrationTest extends WalkerTest {
|
||||||
@Test
|
@Test
|
||||||
public void testInvertFilter() {
|
public void testInvertFilter() {
|
||||||
WalkerTestSpec spec = new WalkerTestSpec(
|
WalkerTestSpec spec = new WalkerTestSpec(
|
||||||
baseTestString() + " --filterName ABF -filter 'AlleleBalance < 0.7' --filterName FSF -filter 'FisherStrand == 1.4' --variant " + privateTestDir + "vcfexample2.vcf -L 1:10,020,000-10,021,000 --invert_filter_expression", 1,
|
baseTestString() + " --filterName ABF -filter 'AlleleBalance < 0.7' --filterName FSF -filter 'FisherStrand == 1.4' --variant " + privateTestDir + "vcfexample2.vcf -L 1:10,020,000-10,021,000 --invertFilterExpression", 1,
|
||||||
Arrays.asList("d478fd6bcf0884133fe2a47adf4cd765"));
|
Arrays.asList("d478fd6bcf0884133fe2a47adf4cd765"));
|
||||||
executeTest("test inversion of selection of filter with separate names #2", spec);
|
executeTest("test inversion of selection of filter with separate names #2", spec);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void testInvertJexlFilter() {
|
||||||
|
WalkerTestSpec spec = new WalkerTestSpec(
|
||||||
|
baseTestString() + " --filterName ABF -filter 'AlleleBalance >= 0.7' --filterName FSF -filter 'FisherStrand != 1.4' --variant " + privateTestDir + "vcfexample2.vcf -L 1:10,020,000-10,021,000", 1,
|
||||||
|
Arrays.asList("6fa6cd89bfc8b6b4dfc3da25eb36d08b")); // Differs from testInvertFilter() because their VCF header FILTER description uses the -filter argument. Their filter statuses are identical.
|
||||||
|
executeTest("test inversion of selection of filter via JEXL with separate names #2", spec);
|
||||||
|
}
|
||||||
|
|
||||||
@Test
|
@Test
|
||||||
public void testGenotypeFilters1() {
|
public void testGenotypeFilters1() {
|
||||||
WalkerTestSpec spec1 = new WalkerTestSpec(
|
WalkerTestSpec spec1 = new WalkerTestSpec(
|
||||||
|
|
@ -207,8 +215,40 @@ public class VariantFiltrationIntegrationTest extends WalkerTest {
|
||||||
public void testInvertGenotypeFilterExpression() {
|
public void testInvertGenotypeFilterExpression() {
|
||||||
WalkerTestSpec spec = new WalkerTestSpec(
|
WalkerTestSpec spec = new WalkerTestSpec(
|
||||||
"-T VariantFiltration -o %s --no_cmdline_in_header -R " + b37KGReference
|
"-T VariantFiltration -o %s --no_cmdline_in_header -R " + b37KGReference
|
||||||
+ " --genotypeFilterExpression 'DP < 8' --genotypeFilterName highDP -V " + privateTestDir + "filteringDepthInFormat.vcf --invert_genotype_filter_expression", 1,
|
+ " --genotypeFilterExpression 'DP < 8' --genotypeFilterName highDP -V " + privateTestDir + "filteringDepthInFormat.vcf --invertGenotypeFilterExpression", 1,
|
||||||
Arrays.asList("d2664870e7145eb73a2295766482c823"));
|
Arrays.asList("d2664870e7145eb73a2295766482c823"));
|
||||||
executeTest("testInvertGenotypeFilterExpression", spec);
|
executeTest("testInvertGenotypeFilterExpression", spec);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void testInvertJexlGenotypeFilterExpression() {
|
||||||
|
WalkerTestSpec spec = new WalkerTestSpec(
|
||||||
|
"-T VariantFiltration -o %s --no_cmdline_in_header -R " + b37KGReference
|
||||||
|
+ " --genotypeFilterExpression 'DP >= 8' --genotypeFilterName highDP -V " + privateTestDir + "filteringDepthInFormat.vcf", 1,
|
||||||
|
Arrays.asList("8ddd8f3b5ee351c4ab79cb186b1d45ba")); // Differs from testInvertFilter because FILTER description uses the -genotypeFilterExpression argument
|
||||||
|
executeTest("testInvertJexlGenotypeFilterExpression", spec);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void testSetFilteredGtoNocall() {
|
||||||
|
WalkerTestSpec spec = new WalkerTestSpec(
|
||||||
|
"-T VariantFiltration -o %s --no_cmdline_in_header -R " + b37KGReference
|
||||||
|
+ " --genotypeFilterExpression 'DP < 8' --genotypeFilterName lowDP -V " + privateTestDir + "filteringDepthInFormat.vcf --setFilteredGtToNocall", 1,
|
||||||
|
Arrays.asList("9ff801dd726eb4fc562b278ccc6854b1"));
|
||||||
|
executeTest("testSetFilteredGtoNocall", spec);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void testSetVcfFilteredGtoNocall() {
|
||||||
|
String testfile = privateTestDir + "filteredSamples.vcf";
|
||||||
|
|
||||||
|
WalkerTestSpec spec = new WalkerTestSpec(
|
||||||
|
"-T SelectVariants --setFilteredGtToNocall -R " + b37KGReference + " --variant " + testfile + " -o %s --no_cmdline_in_header",
|
||||||
|
1,
|
||||||
|
Arrays.asList("81b99386a64a8f2b857a7ef2bca5856e")
|
||||||
|
);
|
||||||
|
|
||||||
|
spec.disableShadowBCF();
|
||||||
|
executeTest("testSetVcfFilteredGtoNocall--" + testfile, spec);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -64,8 +64,10 @@ public class SelectVariantsIntegrationTest extends WalkerTest {
|
||||||
return "-T SelectVariants -R " + b36KGReference + " -L 1 -o %s --no_cmdline_in_header" + args;
|
return "-T SelectVariants -R " + b36KGReference + " -L 1 -o %s --no_cmdline_in_header" + args;
|
||||||
}
|
}
|
||||||
|
|
||||||
private static final String sampleExclusionMD5 = "eea22fbf1e490e59389a663c3d6a6537";
|
private static final String SAMPLE_EXCLUSION_MD5 = "eea22fbf1e490e59389a663c3d6a6537";
|
||||||
private static final String invertSelectionMD5 = "831bc0a5a723b0681a910d668ff3757b";
|
private static final String INVERT_SELECTION_MD5 = "831bc0a5a723b0681a910d668ff3757b";
|
||||||
|
private static final String MAX_FILTERED_GT_SELECTION_MD5 = "0365de1bbf7c037be00badace0a74d02";
|
||||||
|
private static final String MIN_FILTERED_GT_SELECTION_MD5 = "fcee8c8caa0696a6675961bb12664878";
|
||||||
|
|
||||||
@Test
|
@Test
|
||||||
public void testDiscordanceNoSampleSpecified() {
|
public void testDiscordanceNoSampleSpecified() {
|
||||||
|
|
@ -199,7 +201,7 @@ public class SelectVariantsIntegrationTest extends WalkerTest {
|
||||||
WalkerTestSpec spec = new WalkerTestSpec(
|
WalkerTestSpec spec = new WalkerTestSpec(
|
||||||
"-T SelectVariants -R " + b36KGReference + " -L 1:1-1000000 -o %s --no_cmdline_in_header -xl_se '[CDH]' --variant " + testfile,
|
"-T SelectVariants -R " + b36KGReference + " -L 1:1-1000000 -o %s --no_cmdline_in_header -xl_se '[CDH]' --variant " + testfile,
|
||||||
1,
|
1,
|
||||||
Arrays.asList(sampleExclusionMD5)
|
Arrays.asList(SAMPLE_EXCLUSION_MD5)
|
||||||
);
|
);
|
||||||
spec.disableShadowBCF();
|
spec.disableShadowBCF();
|
||||||
|
|
||||||
|
|
@ -216,7 +218,7 @@ public class SelectVariantsIntegrationTest extends WalkerTest {
|
||||||
WalkerTestSpec spec = new WalkerTestSpec(
|
WalkerTestSpec spec = new WalkerTestSpec(
|
||||||
"-T SelectVariants -R " + b36KGReference + " -L 1:1-1000000 -o %s --no_cmdline_in_header -se '[^CDH]' --variant " + testfile,
|
"-T SelectVariants -R " + b36KGReference + " -L 1:1-1000000 -o %s --no_cmdline_in_header -se '[^CDH]' --variant " + testfile,
|
||||||
1,
|
1,
|
||||||
Arrays.asList(sampleExclusionMD5)
|
Arrays.asList(SAMPLE_EXCLUSION_MD5)
|
||||||
);
|
);
|
||||||
spec.disableShadowBCF();
|
spec.disableShadowBCF();
|
||||||
|
|
||||||
|
|
@ -557,7 +559,7 @@ public class SelectVariantsIntegrationTest extends WalkerTest {
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Test inverting the variant selection criteria
|
* Test inverting the variant selection criteria by the -invertSelect argument
|
||||||
*/
|
*/
|
||||||
@Test
|
@Test
|
||||||
public void testInvertSelection() {
|
public void testInvertSelection() {
|
||||||
|
|
@ -565,16 +567,16 @@ public class SelectVariantsIntegrationTest extends WalkerTest {
|
||||||
String samplesFile = validationDataLocation + "SelectVariants.samples.txt";
|
String samplesFile = validationDataLocation + "SelectVariants.samples.txt";
|
||||||
|
|
||||||
WalkerTestSpec spec = new WalkerTestSpec(
|
WalkerTestSpec spec = new WalkerTestSpec(
|
||||||
baseTestString(" -sn A -se '[CDH]' -sf " + samplesFile + " -env -ef -select 'DP < 20000' --invert_selection --variant " + testfile),
|
baseTestString(" -sn A -se '[CDH]' -sf " + samplesFile + " -env -ef -select 'DP < 20000' -invertSelect --variant " + testfile),
|
||||||
1,
|
1,
|
||||||
Arrays.asList(invertSelectionMD5)
|
Arrays.asList(INVERT_SELECTION_MD5)
|
||||||
);
|
);
|
||||||
spec.disableShadowBCF();
|
spec.disableShadowBCF();
|
||||||
executeTest("testInvertSelection--" + testfile, spec);
|
executeTest("testInvertSelection--" + testfile, spec);
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Test inverting the variant JEXL selection criteria
|
* Test inverting the variant selection criteria by inverting the JEXL expression logic following -select
|
||||||
*/
|
*/
|
||||||
@Test
|
@Test
|
||||||
public void testInvertJexlSelection() {
|
public void testInvertJexlSelection() {
|
||||||
|
|
@ -584,7 +586,7 @@ public class SelectVariantsIntegrationTest extends WalkerTest {
|
||||||
WalkerTestSpec spec = new WalkerTestSpec(
|
WalkerTestSpec spec = new WalkerTestSpec(
|
||||||
baseTestString(" -sn A -se '[CDH]' -sf " + samplesFile + " -env -ef -select 'DP >= 20000'--variant " + testfile),
|
baseTestString(" -sn A -se '[CDH]' -sf " + samplesFile + " -env -ef -select 'DP >= 20000'--variant " + testfile),
|
||||||
1,
|
1,
|
||||||
Arrays.asList(invertSelectionMD5)
|
Arrays.asList(INVERT_SELECTION_MD5)
|
||||||
);
|
);
|
||||||
spec.disableShadowBCF();
|
spec.disableShadowBCF();
|
||||||
executeTest("testInvertJexlSelection--" + testfile, spec);
|
executeTest("testInvertJexlSelection--" + testfile, spec);
|
||||||
|
|
@ -659,10 +661,76 @@ public class SelectVariantsIntegrationTest extends WalkerTest {
|
||||||
String pedFile = privateTestDir + "CEUtrio.ped";
|
String pedFile = privateTestDir + "CEUtrio.ped";
|
||||||
|
|
||||||
WalkerTestSpec spec = new WalkerTestSpec(
|
WalkerTestSpec spec = new WalkerTestSpec(
|
||||||
"-T SelectVariants -R "+b37KGReference + " -mv -mvq 0 -inv_mv --variant " + testFile + " -ped " + pedFile + " -o %s --no_cmdline_in_header",
|
"-T SelectVariants -R "+b37KGReference + " -mv -mvq 0 -invMv --variant " + testFile + " -ped " + pedFile + " -o %s --no_cmdline_in_header",
|
||||||
1,
|
1,
|
||||||
Arrays.asList("35921fb2dedca0ead83027a66b725794"));
|
Arrays.asList("35921fb2dedca0ead83027a66b725794"));
|
||||||
|
|
||||||
executeTest("testMendelianViolationSelection--" + testFile, spec);
|
executeTest("testMendelianViolationSelection--" + testFile, spec);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void testMaxFilteredGenotypesSelection() {
|
||||||
|
String testfile = privateTestDir + "filteredSamples.vcf";
|
||||||
|
|
||||||
|
WalkerTestSpec spec = new WalkerTestSpec(
|
||||||
|
"-T SelectVariants --maxFilteredGenotypes 1 -R " + b37KGReference + " --variant " + testfile + " -o %s --no_cmdline_in_header",
|
||||||
|
1,
|
||||||
|
Arrays.asList(MAX_FILTERED_GT_SELECTION_MD5)
|
||||||
|
);
|
||||||
|
spec.disableShadowBCF();
|
||||||
|
executeTest("testMaxFilteredGenotypesSelection--" + testfile, spec);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void testMinFilteredGenotypesSelection() {
|
||||||
|
String testfile = privateTestDir + "filteredSamples.vcf";
|
||||||
|
|
||||||
|
WalkerTestSpec spec = new WalkerTestSpec(
|
||||||
|
"-T SelectVariants --minFilteredGenotypes 2 -R " + b37KGReference + " --variant " + testfile + " -o %s --no_cmdline_in_header",
|
||||||
|
1,
|
||||||
|
Arrays.asList(MIN_FILTERED_GT_SELECTION_MD5)
|
||||||
|
);
|
||||||
|
spec.disableShadowBCF();
|
||||||
|
executeTest("testMinFilteredGenotypesSelection--" + testfile, spec);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void testMaxFractionFilteredGenotypesSelection() {
|
||||||
|
String testfile = privateTestDir + "filteredSamples.vcf";
|
||||||
|
|
||||||
|
WalkerTestSpec spec = new WalkerTestSpec(
|
||||||
|
"-T SelectVariants --maxFractionFilteredGenotypes 0.4 -R " + b37KGReference + " --variant " + testfile + " -o %s --no_cmdline_in_header",
|
||||||
|
1,
|
||||||
|
Arrays.asList(MAX_FILTERED_GT_SELECTION_MD5)
|
||||||
|
);
|
||||||
|
spec.disableShadowBCF();
|
||||||
|
executeTest("testMaxFractionFilteredGenotypesSelection--" + testfile, spec);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void testMinFractionFilteredGenotypesSelection() {
|
||||||
|
String testfile = privateTestDir + "filteredSamples.vcf";
|
||||||
|
|
||||||
|
WalkerTestSpec spec = new WalkerTestSpec(
|
||||||
|
"-T SelectVariants --minFractionFilteredGenotypes 0.6 -R " + b37KGReference + " --variant " + testfile + " -o %s --no_cmdline_in_header",
|
||||||
|
1,
|
||||||
|
Arrays.asList(MIN_FILTERED_GT_SELECTION_MD5)
|
||||||
|
);
|
||||||
|
spec.disableShadowBCF();
|
||||||
|
executeTest("testMinFractionFilteredGenotypesSelection--" + testfile, spec);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void testSetFilteredGtoNocall() {
|
||||||
|
String testfile = privateTestDir + "filteredSamples.vcf";
|
||||||
|
|
||||||
|
WalkerTestSpec spec = new WalkerTestSpec(
|
||||||
|
"-T SelectVariants --setFilteredGtToNocall -R " + b37KGReference + " --variant " + testfile + " -o %s --no_cmdline_in_header",
|
||||||
|
1,
|
||||||
|
Arrays.asList("81b99386a64a8f2b857a7ef2bca5856e")
|
||||||
|
);
|
||||||
|
|
||||||
|
spec.disableShadowBCF();
|
||||||
|
executeTest("testSetFilteredGtoNocall--" + testfile, spec);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -179,15 +179,21 @@ public class VariantFiltration extends RodWalker<Integer, Integer> {
|
||||||
/**
|
/**
|
||||||
* Invert the selection criteria for --filterExpression
|
* Invert the selection criteria for --filterExpression
|
||||||
*/
|
*/
|
||||||
@Argument(fullName="invert_filter_expression", shortName="inv_fil_sel", doc="Invert the selection criteria for --filterExpression", required=false)
|
@Argument(fullName="invertFilterExpression", shortName="invfilter", doc="Invert the selection criteria for --filterExpression", required=false)
|
||||||
protected boolean invertFilterExpression = false;
|
protected boolean invertFilterExpression = false;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Invert the selection criteria for --genotypeFilterExpression
|
* Invert the selection criteria for --genotypeFilterExpression
|
||||||
*/
|
*/
|
||||||
@Argument(fullName="invert_genotype_filter_expression", shortName="inv_gen_fil_sel", doc="Invert the selection criteria for --genotypeFilterExpression", required=false)
|
@Argument(fullName="invertGenotypeFilterExpression", shortName="invG_filter", doc="Invert the selection criteria for --genotypeFilterExpression", required=false)
|
||||||
protected boolean invertGenotypeFilterExpression = false;
|
protected boolean invertGenotypeFilterExpression = false;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* If this argument is provided, set filtered genotypes to no-call (./.).
|
||||||
|
*/
|
||||||
|
@Argument(fullName="setFilteredGtToNocall", required=false, doc="Set filtered genotypes to no-call")
|
||||||
|
private boolean setFilteredGenotypesToNocall = false;
|
||||||
|
|
||||||
// JEXL expressions for the filters
|
// JEXL expressions for the filters
|
||||||
List<VariantContextUtils.JexlVCMatchExp> filterExps;
|
List<VariantContextUtils.JexlVCMatchExp> filterExps;
|
||||||
List<VariantContextUtils.JexlVCMatchExp> genotypeFilterExps;
|
List<VariantContextUtils.JexlVCMatchExp> genotypeFilterExps;
|
||||||
|
|
@ -201,8 +207,10 @@ public class VariantFiltration extends RodWalker<Integer, Integer> {
|
||||||
private static final int WINDOW_SIZE = 10; // 10 variants on either end of the current one
|
private static final int WINDOW_SIZE = 10; // 10 variants on either end of the current one
|
||||||
private ArrayList<FiltrationContext> windowInitializer = new ArrayList<FiltrationContext>();
|
private ArrayList<FiltrationContext> windowInitializer = new ArrayList<FiltrationContext>();
|
||||||
|
|
||||||
|
private final List<Allele> diploidNoCallAlleles = Arrays.asList(Allele.NO_CALL, Allele.NO_CALL);
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Prepend inverse phrase to description if --invert_filter_expression
|
* Prepend inverse phrase to description if --invertFilterExpression
|
||||||
*
|
*
|
||||||
* @param description the description
|
* @param description the description
|
||||||
* @return the description with inverse prepended if --invert_filter_expression
|
* @return the description with inverse prepended if --invert_filter_expression
|
||||||
|
|
@ -379,7 +387,7 @@ public class VariantFiltration extends RodWalker<Integer, Integer> {
|
||||||
final VariantContextBuilder builder = new VariantContextBuilder(vc);
|
final VariantContextBuilder builder = new VariantContextBuilder(vc);
|
||||||
|
|
||||||
// make new Genotypes based on filters
|
// make new Genotypes based on filters
|
||||||
if ( !genotypeFilterExps.isEmpty() ) {
|
if ( !genotypeFilterExps.isEmpty() || setFilteredGenotypesToNocall ) {
|
||||||
GenotypesContext genotypes = GenotypesContext.create(vc.getGenotypes().size());
|
GenotypesContext genotypes = GenotypesContext.create(vc.getGenotypes().size());
|
||||||
|
|
||||||
// for each genotype, check filters then create a new object
|
// for each genotype, check filters then create a new object
|
||||||
|
|
@ -388,12 +396,17 @@ public class VariantFiltration extends RodWalker<Integer, Integer> {
|
||||||
final List<String> filters = new ArrayList<String>();
|
final List<String> filters = new ArrayList<String>();
|
||||||
if ( g.isFiltered() ) filters.add(g.getFilters());
|
if ( g.isFiltered() ) filters.add(g.getFilters());
|
||||||
|
|
||||||
|
// Add if expression filters the variant context
|
||||||
for ( VariantContextUtils.JexlVCMatchExp exp : genotypeFilterExps ) {
|
for ( VariantContextUtils.JexlVCMatchExp exp : genotypeFilterExps ) {
|
||||||
if ( Utils.invertLogic(VariantContextUtils.match(vc, g, exp), invertGenotypeFilterExpression) )
|
if ( Utils.invertLogic(VariantContextUtils.match(vc, g, exp), invertGenotypeFilterExpression) )
|
||||||
filters.add(exp.name);
|
filters.add(exp.name);
|
||||||
}
|
}
|
||||||
|
|
||||||
genotypes.add(new GenotypeBuilder(g).filters(filters).make());
|
// if sample is filtered and --setFilteredGtToNocall, set genotype to non-call
|
||||||
|
if ( !filters.isEmpty() && setFilteredGenotypesToNocall )
|
||||||
|
genotypes.add(new GenotypeBuilder(g).filters(filters).alleles(diploidNoCallAlleles).make());
|
||||||
|
else
|
||||||
|
genotypes.add(new GenotypeBuilder(g).filters(filters).make());
|
||||||
} else {
|
} else {
|
||||||
genotypes.add(g);
|
genotypes.add(g);
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -146,7 +146,7 @@ import java.util.*;
|
||||||
* -o output.vcf \
|
* -o output.vcf \
|
||||||
* -se 'SAMPLE.+PARC' \
|
* -se 'SAMPLE.+PARC' \
|
||||||
* -select "QD > 10.0"
|
* -select "QD > 10.0"
|
||||||
* --invert_selection
|
* -invertSelect
|
||||||
* </pre>
|
* </pre>
|
||||||
*
|
*
|
||||||
* <h4>Select a sample and exclude non-variant loci and filtered loci (trim remaining alleles by default):</h4>
|
* <h4>Select a sample and exclude non-variant loci and filtered loci (trim remaining alleles by default):</h4>
|
||||||
|
|
@ -207,7 +207,7 @@ import java.util.*;
|
||||||
* -sn mySample
|
* -sn mySample
|
||||||
* </pre>
|
* </pre>
|
||||||
*
|
*
|
||||||
* <h4>Generating a VCF of all the variants that are mendelian violations. The optional argument `-mvq` restricts the selection to sites that have a QUAL score of 50 or more</h4>
|
* <h4>Generating a VCF of all the variants that are mendelian violations. The optional argument '-mvq' restricts the selection to sites that have a QUAL score of 50 or more</h4>
|
||||||
* <pre>
|
* <pre>
|
||||||
* java -jar GenomeAnalysisTK.jar \
|
* java -jar GenomeAnalysisTK.jar \
|
||||||
* -T SelectVariants \
|
* -T SelectVariants \
|
||||||
|
|
@ -218,14 +218,14 @@ import java.util.*;
|
||||||
* -o violations.vcf
|
* -o violations.vcf
|
||||||
* </pre>
|
* </pre>
|
||||||
*
|
*
|
||||||
* <h4>Generating a VCF of all the variants that are not mendelian violations. The optional argument `-mvq` together with '-inv_mv' restricts the selection to sites that have a QUAL score of 50 or less</h4>
|
* <h4>Generating a VCF of all the variants that are not mendelian violations. The optional argument '-mvq' together with '-invMv' restricts the selection to sites that have a QUAL score of 50 or less</h4>
|
||||||
* <pre>
|
* <pre>
|
||||||
* java -jar GenomeAnalysisTK.jar \
|
* java -jar GenomeAnalysisTK.jar \
|
||||||
* -T SelectVariants \
|
* -T SelectVariants \
|
||||||
* -R reference.fasta \
|
* -R reference.fasta \
|
||||||
* -V input.vcf \
|
* -V input.vcf \
|
||||||
* -ped family.ped \
|
* -ped family.ped \
|
||||||
* -mv -mvq 50 -inv_mv \
|
* -mv -mvq 50 -invMv \
|
||||||
* -o violations.vcf
|
* -o violations.vcf
|
||||||
* </pre>
|
* </pre>
|
||||||
*
|
*
|
||||||
|
|
@ -275,7 +275,7 @@ import java.util.*;
|
||||||
*
|
*
|
||||||
* <h4>Select IDs in fileKeep and exclude IDs in fileExclude:</h4>
|
* <h4>Select IDs in fileKeep and exclude IDs in fileExclude:</h4>
|
||||||
* <pre>
|
* <pre>
|
||||||
* java -Xmx2g -jar GenomeAnalysisTK.jar \
|
* java -jar GenomeAnalysisTK.jar \
|
||||||
* -R ref.fasta \
|
* -R ref.fasta \
|
||||||
* -T SelectVariants \
|
* -T SelectVariants \
|
||||||
* --variant input.vcf \
|
* --variant input.vcf \
|
||||||
|
|
@ -284,9 +284,35 @@ import java.util.*;
|
||||||
* -excludeIDs fileExclude
|
* -excludeIDs fileExclude
|
||||||
* </pre>
|
* </pre>
|
||||||
*
|
*
|
||||||
|
* <h4>Select sites where there are between 2 and 5 samples and between 10 and 50 percent of the sample genotypes are filtered:</h4>
|
||||||
|
* <pre>
|
||||||
|
* java -jar GenomeAnalysisTK.jar \
|
||||||
|
* -R ref.fasta \
|
||||||
|
* -T SelectVariants \
|
||||||
|
* --variant input.vcf \
|
||||||
|
* --maxFilteredGenotypes 5
|
||||||
|
* --minFilteredGenotypes 2
|
||||||
|
* --maxFractionFilteredGenotypes 0.60
|
||||||
|
* --minFractionFilteredGenotypes 0.10
|
||||||
|
* </pre>
|
||||||
|
*
|
||||||
|
* <h4>Set filtered genotypes to no-call (./.):</h4>
|
||||||
|
* <pre>
|
||||||
|
* java -jar GenomeAnalysisTK.jar \
|
||||||
|
* -R ref.fasta \
|
||||||
|
* -T SelectVariants \
|
||||||
|
* --variant input.vcf \
|
||||||
|
* --setFilteredGenotypesToNocall
|
||||||
|
* </pre>
|
||||||
|
*
|
||||||
*/
|
*/
|
||||||
@DocumentedGATKFeature( groupName = HelpConstants.DOCS_CAT_VARMANIP, extraDocs = {CommandLineGATK.class} )
|
@DocumentedGATKFeature( groupName = HelpConstants.DOCS_CAT_VARMANIP, extraDocs = {CommandLineGATK.class} )
|
||||||
public class SelectVariants extends RodWalker<Integer, Integer> implements TreeReducible<Integer> {
|
public class SelectVariants extends RodWalker<Integer, Integer> implements TreeReducible<Integer> {
|
||||||
|
static final int MAX_FILTERED_GENOTYPES_DEFAULT_VALUE = Integer.MAX_VALUE;
|
||||||
|
static final int MIN_FILTERED_GENOTYPES_DEFAULT_VALUE = 0;
|
||||||
|
static final double MAX_FRACTION_FILTERED_GENOTYPES_DEFAULT_VALUE = 1.0;
|
||||||
|
static final double MIN_FRACTION_FILTERED_GENOTYPES_DEFAULT_VALUE = 0.0;
|
||||||
|
|
||||||
@ArgumentCollection protected StandardVariantContextInputArgumentCollection variantCollection = new StandardVariantContextInputArgumentCollection();
|
@ArgumentCollection protected StandardVariantContextInputArgumentCollection variantCollection = new StandardVariantContextInputArgumentCollection();
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
|
@ -359,10 +385,10 @@ public class SelectVariants extends RodWalker<Integer, Integer> implements TreeR
|
||||||
public ArrayList<String> selectExpressions = new ArrayList<>();
|
public ArrayList<String> selectExpressions = new ArrayList<>();
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Invert the selection criteria for --select.
|
* Invert the selection criteria for -select.
|
||||||
*/
|
*/
|
||||||
@Argument(fullName="invert_selection", shortName="inv_sel", doc="Invert the selection criteria for --select", required=false)
|
@Argument(shortName="invertSelect", doc="Invert the selection criteria for -select", required=false)
|
||||||
protected boolean invertSelection = false;
|
protected boolean invertSelect = false;
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* If this flag is enabled, sites that are found to be non-variant after the subsetting procedure (i.e. where none
|
* If this flag is enabled, sites that are found to be non-variant after the subsetting procedure (i.e. where none
|
||||||
|
|
@ -436,8 +462,8 @@ public class SelectVariants extends RodWalker<Integer, Integer> implements TreeR
|
||||||
* determined on the basis of family structure. Requires passing a pedigree file using the engine-level
|
* determined on the basis of family structure. Requires passing a pedigree file using the engine-level
|
||||||
* `-ped` argument.
|
* `-ped` argument.
|
||||||
*/
|
*/
|
||||||
@Argument(fullName="invert_mendelianViolation", shortName="inv_mv", doc="Output non-mendelian violation sites only", required=false)
|
@Argument(fullName="invertMendelianViolation", shortName="invMv", doc="Output non-mendelian violation sites only", required=false)
|
||||||
private Boolean invert_mendelianViolations = false;
|
private Boolean invertMendelianViolations = false;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* This argument specifies the genotype quality (GQ) threshold that all members of a trio must have in order
|
* This argument specifies the genotype quality (GQ) threshold that all members of a trio must have in order
|
||||||
|
|
@ -514,6 +540,36 @@ public class SelectVariants extends RodWalker<Integer, Integer> implements TreeR
|
||||||
@Argument(fullName="minIndelSize", required=false, doc="Minimum size of indels to include")
|
@Argument(fullName="minIndelSize", required=false, doc="Minimum size of indels to include")
|
||||||
private int minIndelSize = 0;
|
private int minIndelSize = 0;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* If this argument is provided, select sites where at most a maximum number of samples are filtered at the genotype level.
|
||||||
|
*/
|
||||||
|
@Argument(fullName="maxFilteredGenotypes", required=false, doc="Maximum number of samples filtered at the genotype level")
|
||||||
|
private int maxFilteredGenotypes = MAX_FILTERED_GENOTYPES_DEFAULT_VALUE;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* If this argument is provided, select sites where at least a minimum number of samples are filtered at the genotype level.
|
||||||
|
*/
|
||||||
|
@Argument(fullName="minFilteredGenotypes", required=false, doc="Minimum number of samples filtered at the genotype level")
|
||||||
|
private int minFilteredGenotypes = MIN_FILTERED_GENOTYPES_DEFAULT_VALUE;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* If this argument is provided, select sites where a fraction or less of the samples are filtered at the genotype level.
|
||||||
|
*/
|
||||||
|
@Argument(fullName="maxFractionFilteredGenotypes", required=false, doc="Maximum fraction of samples filtered at the genotype level")
|
||||||
|
private double maxFractionFilteredGenotypes = MAX_FRACTION_FILTERED_GENOTYPES_DEFAULT_VALUE;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* If this argument is provided, select sites where a fraction or more of the samples are filtered at the genotype level.
|
||||||
|
*/
|
||||||
|
@Argument(fullName="minFractionFilteredGenotypes", required=false, doc="Maximum fraction of samples filtered at the genotype level")
|
||||||
|
private double minFractionFilteredGenotypes = MIN_FRACTION_FILTERED_GENOTYPES_DEFAULT_VALUE;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* If this argument is provided, set filtered genotypes to no-call (./.).
|
||||||
|
*/
|
||||||
|
@Argument(fullName="setFilteredGtToNocall", required=false, doc="Set filtered genotypes to no-call")
|
||||||
|
private boolean setFilteredGenotypesToNocall = false;
|
||||||
|
|
||||||
@Hidden
|
@Hidden
|
||||||
@Argument(fullName="ALLOW_NONOVERLAPPING_COMMAND_LINE_SAMPLES", required=false, doc="Allow samples other than those in the VCF to be specified on the command line. These samples will be ignored.")
|
@Argument(fullName="ALLOW_NONOVERLAPPING_COMMAND_LINE_SAMPLES", required=false, doc="Allow samples other than those in the VCF to be specified on the command line. These samples will be ignored.")
|
||||||
private boolean allowNonOverlappingCommandLineSamples = false;
|
private boolean allowNonOverlappingCommandLineSamples = false;
|
||||||
|
|
@ -547,6 +603,8 @@ public class SelectVariants extends RodWalker<Integer, Integer> implements TreeR
|
||||||
private Set<String> IDsToRemove = null;
|
private Set<String> IDsToRemove = null;
|
||||||
private Map<String, VCFHeader> vcfRods;
|
private Map<String, VCFHeader> vcfRods;
|
||||||
|
|
||||||
|
private final List<Allele> diploidNoCallAlleles = Arrays.asList(Allele.NO_CALL, Allele.NO_CALL);
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Set up the VCF writer, the sample expressions and regexs, and the JEXL matcher
|
* Set up the VCF writer, the sample expressions and regexs, and the JEXL matcher
|
||||||
*/
|
*/
|
||||||
|
|
@ -619,7 +677,7 @@ public class SelectVariants extends RodWalker<Integer, Integer> implements TreeR
|
||||||
selectedTypes.add(t);
|
selectedTypes.add(t);
|
||||||
}
|
}
|
||||||
|
|
||||||
// remove specifed types
|
// remove specified types
|
||||||
for (VariantContext.Type t : typesToExclude)
|
for (VariantContext.Type t : typesToExclude)
|
||||||
selectedTypes.remove(t);
|
selectedTypes.remove(t);
|
||||||
|
|
||||||
|
|
@ -713,16 +771,16 @@ public class SelectVariants extends RodWalker<Integer, Integer> implements TreeR
|
||||||
|
|
||||||
for (VariantContext vc : vcs) {
|
for (VariantContext vc : vcs) {
|
||||||
// an option for performance testing only
|
// an option for performance testing only
|
||||||
if ( fullyDecode )
|
if (fullyDecode)
|
||||||
vc = vc.fullyDecode(vcfRods.get(vc.getSource()), getToolkit().lenientVCFProcessing() );
|
vc = vc.fullyDecode(vcfRods.get(vc.getSource()), getToolkit().lenientVCFProcessing());
|
||||||
|
|
||||||
if ( IDsToKeep != null && !IDsToKeep.contains(vc.getID()) )
|
if (IDsToKeep != null && !IDsToKeep.contains(vc.getID()))
|
||||||
continue;
|
continue;
|
||||||
|
|
||||||
if ( IDsToRemove != null && IDsToRemove.contains(vc.getID()) )
|
if (IDsToRemove != null && IDsToRemove.contains(vc.getID()))
|
||||||
continue;
|
continue;
|
||||||
|
|
||||||
if ( mendelianViolations && Utils.invertLogic(mv.countViolations(this.getSampleDB().getFamilies(samples), vc) == 0, invert_mendelianViolations) )
|
if (mendelianViolations && Utils.invertLogic(mv.countViolations(this.getSampleDB().getFamilies(samples), vc) == 0, invertMendelianViolations))
|
||||||
break;
|
break;
|
||||||
|
|
||||||
if (discordanceOnly) {
|
if (discordanceOnly) {
|
||||||
|
|
@ -745,20 +803,30 @@ public class SelectVariants extends RodWalker<Integer, Integer> implements TreeR
|
||||||
if (!selectedTypes.contains(vc.getType()))
|
if (!selectedTypes.contains(vc.getType()))
|
||||||
continue;
|
continue;
|
||||||
|
|
||||||
if ( containsIndelLargerOrSmallerThan(vc, maxIndelSize, minIndelSize) )
|
if (containsIndelLargerOrSmallerThan(vc, maxIndelSize, minIndelSize))
|
||||||
continue;
|
continue;
|
||||||
|
|
||||||
|
if ( needNumFilteredGenotypes()) {
|
||||||
|
int numFilteredSamples = numFilteredGenotypes(vc);
|
||||||
|
double fractionFilteredGenotypes = samples.isEmpty() ? 0.0 : numFilteredSamples / samples.size();
|
||||||
|
if (numFilteredSamples > maxFilteredGenotypes || numFilteredSamples < minFilteredGenotypes ||
|
||||||
|
fractionFilteredGenotypes > maxFractionFilteredGenotypes || fractionFilteredGenotypes < minFractionFilteredGenotypes)
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
VariantContext sub = subsetRecord(vc, preserveAlleles, removeUnusedAlternates);
|
VariantContext sub = subsetRecord(vc, preserveAlleles, removeUnusedAlternates);
|
||||||
|
|
||||||
|
VariantContext filteredGenotypeToNocall = setFilteredGenotypeToNocall(sub, setFilteredGenotypesToNocall);
|
||||||
|
|
||||||
// Not excluding non-variants or subsetted polymorphic variants AND including filtered loci or subsetted variant is not filtered
|
// Not excluding non-variants or subsetted polymorphic variants AND including filtered loci or subsetted variant is not filtered
|
||||||
if ( (!XLnonVariants || sub.isPolymorphicInSamples()) && (!XLfiltered || !sub.isFiltered()) ) {
|
if ( (!XLnonVariants || filteredGenotypeToNocall.isPolymorphicInSamples()) && (!XLfiltered || !filteredGenotypeToNocall.isFiltered()) ) {
|
||||||
|
|
||||||
// Write the subsetted variant if it matches all of the expressions
|
// Write the subsetted variant if it matches all of the expressions
|
||||||
boolean failedJexlMatch = false;
|
boolean failedJexlMatch = false;
|
||||||
|
|
||||||
try {
|
try {
|
||||||
for (VariantContextUtils.JexlVCMatchExp jexl : jexls) {
|
for (VariantContextUtils.JexlVCMatchExp jexl : jexls) {
|
||||||
if ( Utils.invertLogic(!VariantContextUtils.match(sub, jexl), invertSelection) ){
|
if ( Utils.invertLogic(!VariantContextUtils.match(filteredGenotypeToNocall, jexl), invertSelect) ){
|
||||||
failedJexlMatch = true;
|
failedJexlMatch = true;
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
|
|
@ -771,7 +839,7 @@ public class SelectVariants extends RodWalker<Integer, Integer> implements TreeR
|
||||||
if ( !failedJexlMatch &&
|
if ( !failedJexlMatch &&
|
||||||
!justRead &&
|
!justRead &&
|
||||||
( !selectRandomFraction || Utils.getRandomGenerator().nextDouble() < fractionRandom ) ) {
|
( !selectRandomFraction || Utils.getRandomGenerator().nextDouble() < fractionRandom ) ) {
|
||||||
vcfWriter.add(sub);
|
vcfWriter.add(filteredGenotypeToNocall);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
@ -800,6 +868,26 @@ public class SelectVariants extends RodWalker<Integer, Integer> implements TreeR
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Find the number of filtered samples
|
||||||
|
*
|
||||||
|
* @param vc the variant rod VariantContext
|
||||||
|
* @return number of filtered samples
|
||||||
|
*/
|
||||||
|
private int numFilteredGenotypes(final VariantContext vc){
|
||||||
|
if (vc == null)
|
||||||
|
return 0;
|
||||||
|
|
||||||
|
int numFiltered = 0;
|
||||||
|
// check if we find it in the variant rod
|
||||||
|
GenotypesContext genotypes = vc.getGenotypes(samples);
|
||||||
|
for (final Genotype g : genotypes)
|
||||||
|
if ( g.isFiltered() && !g.getFilters().isEmpty())
|
||||||
|
numFiltered++;
|
||||||
|
|
||||||
|
return numFiltered;
|
||||||
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Checks if vc has a variant call for (at least one of) the samples.
|
* Checks if vc has a variant call for (at least one of) the samples.
|
||||||
*
|
*
|
||||||
|
|
@ -807,7 +895,7 @@ public class SelectVariants extends RodWalker<Integer, Integer> implements TreeR
|
||||||
* @param compVCs the comparison VariantContext (discordance)
|
* @param compVCs the comparison VariantContext (discordance)
|
||||||
* @return true VariantContexts are discordant, false otherwise
|
* @return true VariantContexts are discordant, false otherwise
|
||||||
*/
|
*/
|
||||||
private boolean isDiscordant (VariantContext vc, Collection<VariantContext> compVCs) {
|
private boolean isDiscordant (final VariantContext vc, final Collection<VariantContext> compVCs) {
|
||||||
if (vc == null)
|
if (vc == null)
|
||||||
return false;
|
return false;
|
||||||
|
|
||||||
|
|
@ -845,7 +933,7 @@ public class SelectVariants extends RodWalker<Integer, Integer> implements TreeR
|
||||||
* @param compVCs the comparison VariantContext
|
* @param compVCs the comparison VariantContext
|
||||||
* @return true if VariantContexts are concordant, false otherwise
|
* @return true if VariantContexts are concordant, false otherwise
|
||||||
*/
|
*/
|
||||||
private boolean isConcordant (VariantContext vc, Collection<VariantContext> compVCs) {
|
private boolean isConcordant (final VariantContext vc, final Collection<VariantContext> compVCs) {
|
||||||
if (vc == null || compVCs == null || compVCs.isEmpty())
|
if (vc == null || compVCs == null || compVCs.isEmpty())
|
||||||
return false;
|
return false;
|
||||||
|
|
||||||
|
|
@ -876,7 +964,7 @@ public class SelectVariants extends RodWalker<Integer, Integer> implements TreeR
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
private boolean sampleHasVariant(Genotype g) {
|
private boolean sampleHasVariant(final Genotype g) {
|
||||||
return (g !=null && !g.isHomRef() && (g.isCalled() || (g.isFiltered() && !XLfiltered)));
|
return (g !=null && !g.isHomRef() && (g.isCalled() || (g.isFiltered() && !XLfiltered)));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
@ -945,8 +1033,7 @@ public class SelectVariants extends RodWalker<Integer, Integer> implements TreeR
|
||||||
for ( Genotype genotype : newGC ) {
|
for ( Genotype genotype : newGC ) {
|
||||||
//Set genotype to no call if it falls in the fraction.
|
//Set genotype to no call if it falls in the fraction.
|
||||||
if(fractionGenotypes>0 && randomGenotypes.nextDouble()<fractionGenotypes){
|
if(fractionGenotypes>0 && randomGenotypes.nextDouble()<fractionGenotypes){
|
||||||
final List<Allele> alleles = Arrays.asList(Allele.NO_CALL, Allele.NO_CALL);
|
genotypes.add(new GenotypeBuilder(genotype).alleles(diploidNoCallAlleles).noGQ().make());
|
||||||
genotypes.add(new GenotypeBuilder(genotype).alleles(alleles).noGQ().make());
|
|
||||||
}
|
}
|
||||||
else{
|
else{
|
||||||
genotypes.add(genotype);
|
genotypes.add(genotype);
|
||||||
|
|
@ -966,6 +1053,30 @@ public class SelectVariants extends RodWalker<Integer, Integer> implements TreeR
|
||||||
return trimmed;
|
return trimmed;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* If --setFilteredGtToNocall, set filtered genotypes to no-call
|
||||||
|
*
|
||||||
|
* @param vc the VariantContext record to set filtered genotypes to no-call
|
||||||
|
* @param filteredGenotypesToNocall set filtered genotypes to non-call?
|
||||||
|
* @return the VariantContext with no-call genotypes if the sample was filtered
|
||||||
|
*/
|
||||||
|
private VariantContext setFilteredGenotypeToNocall(final VariantContext vc, final boolean filteredGenotypesToNocall) {
|
||||||
|
|
||||||
|
if ( !filteredGenotypesToNocall )
|
||||||
|
return vc;
|
||||||
|
|
||||||
|
final VariantContextBuilder builder = new VariantContextBuilder(vc);
|
||||||
|
final GenotypesContext genotypes = GenotypesContext.create(vc.getGenotypes().size());
|
||||||
|
|
||||||
|
for ( final Genotype g : vc.getGenotypes() ) {
|
||||||
|
if ( g.isCalled() && g.isFiltered() )
|
||||||
|
genotypes.add(new GenotypeBuilder(g).alleles(diploidNoCallAlleles).make());
|
||||||
|
else
|
||||||
|
genotypes.add(g);
|
||||||
|
}
|
||||||
|
|
||||||
|
return builder.genotypes(genotypes).make();
|
||||||
|
}
|
||||||
/*
|
/*
|
||||||
* Add annotations to the new VC
|
* Add annotations to the new VC
|
||||||
*
|
*
|
||||||
|
|
@ -1061,4 +1172,16 @@ public class SelectVariants extends RodWalker<Integer, Integer> implements TreeR
|
||||||
}
|
}
|
||||||
return result;
|
return result;
|
||||||
}
|
}
|
||||||
}
|
|
||||||
|
/**
|
||||||
|
* Need the number of filtered genotypes samples?
|
||||||
|
*
|
||||||
|
* @return true if any of the filtered genotype samples arguments is used (not the default value), false otherwise
|
||||||
|
*/
|
||||||
|
private boolean needNumFilteredGenotypes(){
|
||||||
|
return maxFilteredGenotypes != MAX_FILTERED_GENOTYPES_DEFAULT_VALUE ||
|
||||||
|
minFilteredGenotypes != MIN_FILTERED_GENOTYPES_DEFAULT_VALUE ||
|
||||||
|
maxFractionFilteredGenotypes != MAX_FRACTION_FILTERED_GENOTYPES_DEFAULT_VALUE ||
|
||||||
|
minFractionFilteredGenotypes != MIN_FRACTION_FILTERED_GENOTYPES_DEFAULT_VALUE;
|
||||||
|
}
|
||||||
|
}
|
||||||
Loading…
Reference in New Issue