fix 1430 for genotype filters; refactored filter() method; added unit and integration test; more comprehensive fix must be done first on htsjdk side in JEXLMap (#1456)

This commit is contained in:
Steve Huang 2016-09-06 17:30:18 -04:00 committed by GitHub
parent d582ee6d3c
commit 3c88e6859f
3 changed files with 461 additions and 240 deletions

View File

@ -211,6 +211,24 @@ public class VariantFiltrationIntegrationTest extends WalkerTest {
executeTest("testFilteringDPfromFORMAT", spec); executeTest("testFilteringDPfromFORMAT", spec);
} }
@Test
public void testFilteringDPfromFORMATWithMissing() {
WalkerTestSpec spec = new WalkerTestSpec(
"-T VariantFiltration -o %s --no_cmdline_in_header -R " + b37KGReference
+ " --genotypeFilterExpression 'DP < 10' --genotypeFilterName lowDP -V " + privateTestDir + "filteringDepthInFormatWithMissing.vcf", 1,
Arrays.asList("4bf46103a71bac92a11eae04b97f9877"));
executeTest("testFilteringDPfromFORMATWithMissing", spec);
}
@Test
public void testFilteringDPfromFORMATAndFailMissing() {
WalkerTestSpec spec = new WalkerTestSpec(
"-T VariantFiltration -o %s --no_cmdline_in_header -R " + b37KGReference
+ " --missingValuesInExpressionsShouldEvaluateAsFailing --genotypeFilterExpression 'DP < 10' --genotypeFilterName lowDP -V " + privateTestDir + "filteringDepthInFormatWithMissing.vcf", 1,
Arrays.asList("baeda696c92adc8745ac4ebbdead6c91"));
executeTest("testFilteringDPfromFORMATAndFailMissing", spec);
}
@Test @Test
public void testInvertGenotypeFilterExpression() { public void testInvertGenotypeFilterExpression() {
WalkerTestSpec spec = new WalkerTestSpec( WalkerTestSpec spec = new WalkerTestSpec(

View File

@ -25,6 +25,7 @@
package org.broadinstitute.gatk.tools.walkers.filters; package org.broadinstitute.gatk.tools.walkers.filters;
import com.google.common.annotations.VisibleForTesting;
import htsjdk.tribble.Feature; import htsjdk.tribble.Feature;
import org.broadinstitute.gatk.utils.Utils; import org.broadinstitute.gatk.utils.Utils;
import org.broadinstitute.gatk.utils.commandline.*; import org.broadinstitute.gatk.utils.commandline.*;
@ -55,7 +56,7 @@ import java.util.*;
* *
* <p> * <p>
* This tool is designed for hard-filtering variant calls based on certain criteria. Records are hard-filtered * This tool is designed for hard-filtering variant calls based on certain criteria. Records are hard-filtered
* by changing the value in the FILTER field to something other than PASS. Filtered records will be preserved * by changing the value in the FILTER field to something other than PASS. Filtered records will be preserved
* in the output unless their removal is requested in the command line. </p> * in the output unless their removal is requested in the command line. </p>
* *
* <p>The most common way of specifying filtering criteria is by using JEXL queries. See the * <p>The most common way of specifying filtering criteria is by using JEXL queries. See the
@ -84,7 +85,7 @@ import java.util.*;
* </pre> * </pre>
* *
* <h3>Caveat</h3> * <h3>Caveat</h3>
* <p>when you run VariantFiltration with a command that includes multiple logical parts, each part of the command is applied * <p>when you run {@link VariantFiltration} with a command that includes multiple logical parts, each part of the command is applied
* individually to the original form of the VCF record. Say you ran a VF command that includes three parts: one applies * individually to the original form of the VCF record. Say you ran a VF command that includes three parts: one applies
* some genotype filters, another applies setFilterGtToNoCall (which changes sample genotypes to ./. whenever a sample has a * some genotype filters, another applies setFilterGtToNoCall (which changes sample genotypes to ./. whenever a sample has a
* genotype-level FT annotation), and yet another one filters sites based on whether any samples have a no-call there. You might * genotype-level FT annotation), and yet another one filters sites based on whether any samples have a no-call there. You might
@ -98,14 +99,17 @@ import java.util.*;
@Reference(window=@Window(start=-50,stop=50)) @Reference(window=@Window(start=-50,stop=50))
public class VariantFiltration extends RodWalker<Integer, Integer> { public class VariantFiltration extends RodWalker<Integer, Integer> {
// -----------------------------------------------------------------------------------------------
// Arguments
// -----------------------------------------------------------------------------------------------
@ArgumentCollection @ArgumentCollection
protected StandardVariantContextInputArgumentCollection variantCollection = new StandardVariantContextInputArgumentCollection(); protected StandardVariantContextInputArgumentCollection variantCollection = new StandardVariantContextInputArgumentCollection();
/** /**
* Any variant which overlaps entries from the provided mask rod will be filtered. If the user wants logic to be reversed, * Any variant which overlaps entries from the provided mask rod will be filtered. If the user wants logic to be reversed,
* i.e. filter variants that do not overlap with provided mask, then argument -filterNotInMask can be used. * i.e. filter variants that do not overlap with provided mask, then argument {@code -filterNotInMask} can be used.
* Note that it is up to the user to adapt the name of the mask to make it clear that the reverse logic was used * Note that it is up to the user to adapt the name of the mask to make it clear that the reverse logic was used
* (e.g. if masking against Hapmap, use -maskName=hapmap for the normal masking and -maskName=not_hapmap for the reverse masking). * (e.g. if masking against Hapmap, use {@code -maskName=hapmap} for the normal masking and {@code -maskName=not_hapmap} for the reverse masking).
*/ */
@Input(fullName="mask", shortName="mask", doc="Input ROD mask", required=false) @Input(fullName="mask", shortName="mask", doc="Input ROD mask", required=false)
public RodBinding<Feature> mask; public RodBinding<Feature> mask;
@ -115,41 +119,41 @@ public class VariantFiltration extends RodWalker<Integer, Integer> {
/** /**
* VariantFiltration accepts any number of JEXL expressions (so you can have two named filters by using * VariantFiltration accepts any number of JEXL expressions (so you can have two named filters by using
* --filterName One --filterExpression "X < 1" --filterName Two --filterExpression "X > 2"). * {@code --filterName One --filterExpression "X < 1" --filterName Two --filterExpression "X > 2"}).
*/ */
@Argument(fullName="filterExpression", shortName="filter", doc="One or more expression used with INFO fields to filter", required=false) @Argument(fullName="filterExpression", shortName="filter", doc="One or more expression used with INFO fields to filter", required=false)
protected ArrayList<String> filterExpressions = new ArrayList<String>(); protected ArrayList<String> filterExpressions = new ArrayList<>();
/** /**
* This name is put in the FILTER field for variants that get filtered. Note that there must be a 1-to-1 mapping between filter expressions and filter names. * This name is put in the <pre>FILTER</pre> field for variants that get filtered. Note that there must be a 1-to-1 mapping between filter expressions and filter names.
*/ */
@Argument(fullName="filterName", shortName="filterName", doc="Names to use for the list of filters", required=false) @Argument(fullName="filterName", shortName="filterName", doc="Names to use for the list of filters", required=false)
protected ArrayList<String> filterNames = new ArrayList<String>(); protected ArrayList<String> filterNames = new ArrayList<>();
/** /**
* Similar to the INFO field based expressions, but used on the FORMAT (genotype) fields instead. * Similar to the <pre>INFO</pre> field based expressions, but used on the <pre>FORMAT</pre> (genotype) fields instead.
* VariantFiltration will add the sample-level FT tag to the FORMAT field of filtered samples (this does not affect the record's FILTER tag). * {@link VariantFiltration} will add the sample-level <pre>FT</pre> tag to the <pre>FORMAT</pre> field of filtered samples (this does not affect the record's <pre>FILTER</pre> tag).
* One can filter normally based on most fields (e.g. "GQ < 5.0"), but the GT (genotype) field is an exception. We have put in convenience * One can filter normally based on most fields (e.g. {@code "GQ < 5.0"}), but the <pre>GT</pre> (genotype) field is an exception.
* methods so that one can now filter out hets ("isHet == 1"), refs ("isHomRef == 1"), or homs ("isHomVar == 1"). Also available are * We have put in convenience methods so that one can now filter out hets ({@code "isHet == 1"}), refs ({@code "isHomRef == 1"}), or homs ({@code "isHomVar == 1"}).
* expressions isCalled, isNoCall, isMixed, and isAvailable, in accordance with the methods of the Genotype object. * Also available are expressions {@code isCalled}, {@code isNoCall}, {@code isMixed}, and {@code isAvailable}, in accordance with the methods of the {@link Genotype} object.
*/ */
@Argument(fullName="genotypeFilterExpression", shortName="G_filter", doc="One or more expression used with FORMAT (sample/genotype-level) fields to filter (see documentation guide for more info)", required=false) @Argument(fullName="genotypeFilterExpression", shortName="G_filter", doc="One or more expression used with FORMAT (sample/genotype-level) fields to filter (see documentation guide for more info)", required=false)
protected ArrayList<String> genotypeFilterExpressions = new ArrayList<String>(); protected ArrayList<String> genotypeFilterExpressions = new ArrayList<>();
/** /**
* Similar to the INFO field based expressions, but used on the FORMAT (genotype) fields instead. * Similar to the <pre>INFO</pre> field based expressions, but used on the <pre>FORMAT</pre> (genotype) fields instead.
*/ */
@Argument(fullName="genotypeFilterName", shortName="G_filterName", doc="Names to use for the list of sample/genotype filters (must be a 1-to-1 mapping); this name is put in the FILTER field for variants that get filtered", required=false) @Argument(fullName="genotypeFilterName", shortName="G_filterName", doc="Names to use for the list of sample/genotype filters (must be a 1-to-1 mapping); this name is put in the FILTER field for variants that get filtered", required=false)
protected ArrayList<String> genotypeFilterNames = new ArrayList<String>(); protected ArrayList<String> genotypeFilterNames = new ArrayList<>();
/** /**
* Works together with the --clusterWindowSize argument. * Works together with the {@code --clusterWindowSize} argument.
*/ */
@Argument(fullName="clusterSize", shortName="cluster", doc="The number of SNPs which make up a cluster", required=false) @Argument(fullName="clusterSize", shortName="cluster", doc="The number of SNPs which make up a cluster", required=false)
protected Integer clusterSize = 3; protected Integer clusterSize = 3;
/** /**
* Works together with the --clusterSize argument. To disable the clustered SNP filter, set this value to less than 1. * Works together with the {@code --clusterWindowSize} argument. To disable the clustered SNP filter, set this value to less than 1.
*/ */
@Argument(fullName="clusterWindowSize", shortName="window", doc="The window size (in bases) in which to evaluate clustered SNPs", required=false) @Argument(fullName="clusterWindowSize", shortName="window", doc="The window size (in bases) in which to evaluate clustered SNPs", required=false)
protected Integer clusterWindow = 0; protected Integer clusterWindow = 0;
@ -158,45 +162,45 @@ public class VariantFiltration extends RodWalker<Integer, Integer> {
protected Integer maskExtension = 0; protected Integer maskExtension = 0;
/** /**
* When using the -mask argument, the maskName will be annotated in the variant record. * When using the {@code -mask} argument, the {@code maskName} will be annotated in the variant record.
* Note that when using the -filterNotInMask argument to reverse the masking logic, * Note that when using the {@code -filterNotInMask} argument to reverse the masking logic,
* it is up to the user to adapt the name of the mask to make it clear that the reverse logic was used * it is up to the user to adapt the name of the mask to make it clear that the reverse logic was used
* (e.g. if masking against Hapmap, use -maskName=hapmap for the normal masking and -maskName=not_hapmap for the reverse masking). * (e.g. if masking against Hapmap, use {@code -maskName=hapmap} for the normal masking and {@code -maskName=not_hapmap} for the reverse masking).
*/ */
@Argument(fullName="maskName", shortName="maskName", doc="The text to put in the FILTER field if a 'mask' rod is provided and overlaps with a variant call", required=false) @Argument(fullName="maskName", shortName="maskName", doc="The text to put in the FILTER field if a 'mask' rod is provided and overlaps with a variant call", required=false)
protected String maskName = "Mask"; protected String maskName = "Mask";
/** /**
* By default, if the -mask argument is used, any variant falling in a mask will be filtered. * By default, if the {@code -mask} argument is used, any variant falling in a mask will be filtered.
* If this argument is used, logic is reversed, and variants falling outside a given mask will be filtered. * If this argument is used, logic is reversed, and variants falling outside a given mask will be filtered.
* Use case is, for example, if we have an interval list or BED file with "good" sites. * Use case is, for example, if we have an interval list or BED file with "good" sites.
* Note that it is up to the user to adapt the name of the mask to make it clear that the reverse logic was used * Note that it is up to the user to adapt the name of the mask to make it clear that the reverse logic was used
* (e.g. if masking against Hapmap, use -maskName=hapmap for the normal masking and -maskName=not_hapmap for the reverse masking). * (e.g. if masking against Hapmap, use {@code -maskName=hapmap} for the normal masking and {@code -maskName=not_hapmap} for the reverse masking).
*/ */
@Argument(fullName="filterNotInMask", shortName="filterNotInMask", doc="Filter records NOT in given input mask.", required=false) @Argument(fullName="filterNotInMask", shortName="filterNotInMask", doc="Filter records NOT in given input mask.", required=false)
protected boolean filterRecordsNotInMask = false; protected boolean filterRecordsNotInMask = false;
/** /**
* By default, if JEXL cannot evaluate your expression for a particular record because one of the annotations is not present, the whole expression evaluates as PASSing. * By default, if JEXL cannot evaluate your expression for a particular record because one of the annotations is not present, the whole expression evaluates as <pre>PASS</pre>ing.
* Use this argument to have it evaluate as failing filters instead for these cases. * Use this argument to have it evaluate as failing filters instead for these cases.
*/ */
@Argument(fullName="missingValuesInExpressionsShouldEvaluateAsFailing", doc="When evaluating the JEXL expressions, missing values should be considered failing the expression", required=false) @Argument(fullName="missingValuesInExpressionsShouldEvaluateAsFailing", doc="When evaluating the JEXL expressions, missing values should be considered failing the expression", required=false)
protected Boolean failMissingValues = false; protected Boolean failMissingValues = false;
/** /**
* Invalidate previous filters applied to the VariantContext, applying only the filters here * Invalidate previous filters applied to the {@link VariantContext}, applying only the filters here.
*/ */
@Argument(fullName="invalidatePreviousFilters",doc="Remove previous filters applied to the VCF",required=false) @Argument(fullName="invalidatePreviousFilters",doc="Remove previous filters applied to the VCF",required=false)
boolean invalidatePrevious = false; boolean invalidatePrevious = false;
/** /**
* Invert the selection criteria for --filterExpression * Invert the selection criteria for {@code --filterExpression}.
*/ */
@Argument(fullName="invertFilterExpression", shortName="invfilter", doc="Invert the selection criteria for --filterExpression", required=false) @Argument(fullName="invertFilterExpression", shortName="invfilter", doc="Invert the selection criteria for --filterExpression", required=false)
protected boolean invertFilterExpression = false; protected boolean invertFilterExpression = false;
/** /**
* Invert the selection criteria for --genotypeFilterExpression * Invert the selection criteria for {@code --genotypeFilterExpression}.
*/ */
@Argument(fullName="invertGenotypeFilterExpression", shortName="invG_filter", doc="Invert the selection criteria for --genotypeFilterExpression", required=false) @Argument(fullName="invertGenotypeFilterExpression", shortName="invG_filter", doc="Invert the selection criteria for --genotypeFilterExpression", required=false)
protected boolean invertGenotypeFilterExpression = false; protected boolean invertGenotypeFilterExpression = false;
@ -207,81 +211,42 @@ public class VariantFiltration extends RodWalker<Integer, Integer> {
@Argument(fullName="setFilteredGtToNocall", required=false, doc="Set filtered genotypes to no-call") @Argument(fullName="setFilteredGtToNocall", required=false, doc="Set filtered genotypes to no-call")
private boolean setFilteredGenotypesToNocall = false; private boolean setFilteredGenotypesToNocall = false;
// -----------------------------------------------------------------------------------------------
// Fields
// -----------------------------------------------------------------------------------------------
// JEXL expressions for the filters // JEXL expressions for the filters
List<VariantContextUtils.JexlVCMatchExp> filterExps; List<VariantContextUtils.JexlVCMatchExp> filterExps;
List<VariantContextUtils.JexlVCMatchExp> genotypeFilterExps; List<VariantContextUtils.JexlVCMatchExp> genotypeFilterExps;
public static final String CLUSTERED_SNP_FILTER_NAME = "SnpCluster"; public static final String CLUSTERED_SNP_FILTER_NAME = "SnpCluster";
private ClusteredSnps clusteredSNPs = null; private ClusteredSnps clusteredSNPs = null;
private GenomeLoc previousMaskPosition = null; private GenomeLoc previousMaskPosition = null;
private FiltrationContextWindow variantContextWindow; // the structures necessary to initialize and maintain a windowed context
// the structures necessary to initialize and maintain a windowed context
private FiltrationContextWindow variantContextWindow;
private static final int WINDOW_SIZE = 10; // 10 variants on either end of the current one private static final int WINDOW_SIZE = 10; // 10 variants on either end of the current one
private ArrayList<FiltrationContext> windowInitializer = new ArrayList<FiltrationContext>(); private ArrayList<FiltrationContext> windowInitializer = new ArrayList<>();
private final List<Allele> diploidNoCallAlleles = Arrays.asList(Allele.NO_CALL, Allele.NO_CALL); private static final List<Allele> DIPLOID_NO_CALL_ALLELES = Arrays.asList(Allele.NO_CALL, Allele.NO_CALL);
/** // -----------------------------------------------------------------------------------------------
* Prepend inverse phrase to description if --invertFilterExpression // public methods from base classes
* // -----------------------------------------------------------------------------------------------
* @param description the description
* @return the description with inverse prepended if --invert_filter_expression
*/
private String possiblyInvertFilterExpression( String description ){
if ( invertFilterExpression )
description = "Inverse of: " + description;
return description;
}
private void initializeVcfWriter() {
final List<String> inputNames = Arrays.asList(variantCollection.variants.getName());
// setup the header fields
Set<VCFHeaderLine> hInfo = new HashSet<VCFHeaderLine>();
hInfo.addAll(GATKVCFUtils.getHeaderFields(getToolkit(), inputNames));
// need AC, AN and AF since output if set filtered genotypes to no-call
if ( setFilteredGenotypesToNocall ) {
hInfo.add(VCFStandardHeaderLines.getInfoLine(VCFConstants.ALLELE_COUNT_KEY));
hInfo.add(VCFStandardHeaderLines.getInfoLine(VCFConstants.ALLELE_NUMBER_KEY));
hInfo.add(VCFStandardHeaderLines.getInfoLine(VCFConstants.ALLELE_FREQUENCY_KEY));
}
if ( clusterWindow > 0 )
hInfo.add(new VCFFilterHeaderLine(CLUSTERED_SNP_FILTER_NAME, "SNPs found in clusters"));
if ( !genotypeFilterExps.isEmpty() )
hInfo.add(VCFStandardHeaderLines.getFormatLine(VCFConstants.GENOTYPE_FILTER_KEY));
try {
for ( VariantContextUtils.JexlVCMatchExp exp : filterExps )
hInfo.add(new VCFFilterHeaderLine(exp.name, possiblyInvertFilterExpression(exp.exp.toString())));
for ( VariantContextUtils.JexlVCMatchExp exp : genotypeFilterExps )
hInfo.add(new VCFFilterHeaderLine(exp.name, possiblyInvertFilterExpression(exp.exp.toString())));
if ( mask.isBound() ) {
if (filterRecordsNotInMask)
hInfo.add(new VCFFilterHeaderLine(maskName, "Doesn't overlap a user-input mask"));
else hInfo.add(new VCFFilterHeaderLine(maskName, "Overlaps a user-input mask"));
}
} catch (IllegalArgumentException e) {
throw new UserException.BadInput(e.getMessage());
}
writer.writeHeader(new VCFHeader(hInfo, SampleUtils.getUniqueSamplesFromRods(getToolkit(), inputNames)));
}
public void initialize() { public void initialize() {
if ( clusterWindow > 0 )
clusteredSNPs = new ClusteredSnps(getToolkit().getGenomeLocParser(),clusterSize, clusterWindow);
if ( maskExtension < 0 ) if ( maskExtension < 0 ) {
throw new UserException.BadArgumentValue("maskExtension", "negative values are not allowed"); throw new UserException.BadArgumentValue("maskExtension", "negative values are not allowed");
}
if (filterRecordsNotInMask && !mask.isBound()) {
throw new UserException.BadArgumentValue("filterNotInMask", "argument not allowed if mask argument is not provided");
}
if ( clusterWindow > 0 ) {
clusteredSNPs = new ClusteredSnps(getToolkit().getGenomeLocParser(), clusterSize, clusterWindow);
}
if (filterRecordsNotInMask && !mask.isBound())
throw new UserException.BadArgumentValue("filterNotInMask","argument not allowed if mask argument is not provided");
filterExps = VariantContextUtils.initializeMatchExps(filterNames, filterExpressions); filterExps = VariantContextUtils.initializeMatchExps(filterNames, filterExpressions);
genotypeFilterExps = VariantContextUtils.initializeMatchExps(genotypeFilterNames, genotypeFilterExpressions); genotypeFilterExps = VariantContextUtils.initializeMatchExps(genotypeFilterNames, genotypeFilterExpressions);
@ -290,44 +255,45 @@ public class VariantFiltration extends RodWalker<Integer, Integer> {
initializeVcfWriter(); initializeVcfWriter();
} }
public Integer reduceInit() { return 0; }
/** /**
* *
* @param tracker the meta-data tracker * @param tracker the meta-data tracker
* @param ref the reference base * @param ref the reference base
* @param context the context for the given locus * @param context the context for the given locus
* @return 1 if the locus was successfully processed, 0 if otherwise * @return 1 if the locus was successfully processed, 0 otherwise
*/ */
public Integer map(RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context) { public Integer map(RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context) {
if ( tracker == null ) if ( tracker == null ) {
return 0; return 0;
}
Collection<VariantContext> VCs = tracker.getValues(variantCollection.variants, context.getLocation()); final Collection<VariantContext> VCs = tracker.getValues(variantCollection.variants, context.getLocation());
// is there a SNP mask present? // is there a SNP mask present?
boolean hasMask = (tracker.hasValues(mask) && !filterRecordsNotInMask) || (filterRecordsNotInMask && !tracker.hasValues(mask)); final boolean hasMask = (tracker.hasValues(mask) && !filterRecordsNotInMask) || (filterRecordsNotInMask && !tracker.hasValues(mask));
if ( hasMask ) if ( hasMask ) {
previousMaskPosition = ref.getLocus(); // multi-base masks will get triggered over all bases of the mask previousMaskPosition = ref.getLocus(); // multi-base masks will get triggered over all bases of the mask
}
for ( VariantContext vc : VCs ) { for ( VariantContext vc : VCs ) {
if ( invalidatePrevious ) { if ( invalidatePrevious ) {
vc = (new VariantContextBuilder(vc)).filters(new HashSet<String>()).make(); vc = (new VariantContextBuilder(vc)).filters(new HashSet<>()).make();
} }
// filter based on previous mask position // filter based on previous mask position
vc = addMaskIfCoversVariant(vc, previousMaskPosition, maskName, maskExtension, false); vc = addMaskIfCoversVariant(vc, previousMaskPosition, maskName, maskExtension, false);
FiltrationContext varContext = new FiltrationContext(ref, vc); final FiltrationContext varContext = new FiltrationContext(ref, vc);
// if we're still initializing the context, do so // if we're still initializing the context, do so
if ( windowInitializer != null ) { if ( windowInitializer != null ) {
// if this is a mask position, filter previous records // if this is a mask position, filter previous records
if ( hasMask ) { if ( hasMask ) {
for ( FiltrationContext prevVC : windowInitializer ) for ( final FiltrationContext prevVC : windowInitializer ) {
prevVC.setVariantContext(addMaskIfCoversVariant(prevVC.getVariantContext(), ref.getLocus(), maskName, maskExtension, true)); prevVC.setVariantContext(addMaskIfCoversVariant(prevVC.getVariantContext(), ref.getLocus(), maskName, maskExtension, true));
}
} }
windowInitializer.add(varContext); windowInitializer.add(varContext);
@ -339,9 +305,10 @@ public class VariantFiltration extends RodWalker<Integer, Integer> {
// if this is a mask position, filter previous records // if this is a mask position, filter previous records
if ( hasMask ) { if ( hasMask ) {
for ( FiltrationContext prevVC : variantContextWindow.getWindow(10, 10) ) { for ( final FiltrationContext prevVC : variantContextWindow.getWindow(10, 10) ) {
if ( prevVC != null ) if ( prevVC != null ) {
prevVC.setVariantContext(addMaskIfCoversVariant(prevVC.getVariantContext(), ref.getLocus(), maskName, maskExtension, true)); prevVC.setVariantContext(addMaskIfCoversVariant(prevVC.getVariantContext(), ref.getLocus(), maskName, maskExtension, true));
}
} }
} }
@ -353,6 +320,244 @@ public class VariantFiltration extends RodWalker<Integer, Integer> {
return 1; return 1;
} }
public Integer reduce(Integer value, Integer sum) {
return sum + value;
}
public Integer reduceInit() { return 0; }
/**
* Tell the user the number of loci processed and close out the new variants file.
*
* @param result the number of loci seen.
*/
public void onTraversalDone(Integer result) {
// move the window over so that we can filter the last few variants
if ( windowInitializer != null ) {
while ( windowInitializer.size() < WINDOW_SIZE ) {
windowInitializer.add(null);
}
variantContextWindow = new FiltrationContextWindow(windowInitializer);
}
for (int i=0; i < WINDOW_SIZE; i++) {
variantContextWindow.moveWindow(null);
filter();
}
}
// -----------------------------------------------------------------------------------------------
// main filtering steps
// -----------------------------------------------------------------------------------------------
/**
* Organizing filters: genotype filters and normal filters.
*/
private void filter() {
// get the current context
final FiltrationContext context = variantContextWindow.getContext();
if ( context == null ) {
return;
}
final VariantContext vc = context.getVariantContext();
// make new Genotypes based on genotype filters
final VariantContextBuilder builder = ( genotypeFilterExps.isEmpty() && !setFilteredGenotypesToNocall ) ? new VariantContextBuilder(vc)
: applyGenotypeFilters(vc, genotypeFilterExps, invertGenotypeFilterExpression, failMissingValues, setFilteredGenotypesToNocall);
// extract filters already in VC and append new filters
final Set<String> filters = buildVCfilters(vc, filterExps, invertFilterExpression, failMissingValues);
// test for clustered SNPs if requested
if ( clusteredSNPs != null && clusteredSNPs.filter(variantContextWindow) ) {
filters.add(CLUSTERED_SNP_FILTER_NAME);
}
// make a new variant context based on all filters, and write
writer.add(filters.isEmpty() ? builder.passFilters().make() : builder.filters(filters).make());
}
/**
* Given a VC builder and a vc (which was used to construct the builder), update the properties that the builder
* will use to construct a new VC, based on some of the attributes/annotations of the old VC.
* @param vc variant context holding genotypes to be filtered
* @param genotypeFilterExpressions genotype filter expressions
* @param invertGenotypeFilterExpression should invert the genotype filter expression or not
* @param failIfMissingValues if sample misses the corresponding annotation(s) the filter(s) work by, should we fail them or not
* @param setFilteredGenotypesToNocall if sample is filtered should we set genotype to non-call or not
*/
@VisibleForTesting
static VariantContextBuilder applyGenotypeFilters(final VariantContext vc,
final List<VariantContextUtils.JexlVCMatchExp> genotypeFilterExpressions,
final boolean invertGenotypeFilterExpression,
final boolean failIfMissingValues,
final boolean setFilteredGenotypesToNocall) {
final VariantContextBuilder builder = new VariantContextBuilder(vc);
final GenotypesContext genotypes = GenotypesContext.create(vc.getGenotypes().size());
// recompute AC, AN and AF if filtered genotypes are set to no-call
// occurrences of alternate alleles over all genotypes
final Map<Allele, Integer> calledAltAlleles = new LinkedHashMap<>(vc.getNAlleles()-1);
for ( final Allele altAllele : vc.getAlternateAlleles() ) {
calledAltAlleles.put(altAllele, 0);
}
int calledAlleles = 0;
boolean haveFilteredNoCallAlleles = false;
// for each genotype, check filters then create a new object
for ( final Genotype g : vc.getGenotypes() ) {
if ( g.isCalled() ) {
final List<String> filters = new ArrayList<>();
if ( g.isFiltered() ) filters.add(g.getFilters());
// Add if expression filters the variant context
for ( final VariantContextUtils.JexlVCMatchExp exp : genotypeFilterExpressions ) {
try {
if (Utils.invertLogic(VariantContextUtils.match(vc, g, exp), invertGenotypeFilterExpression)) {
filters.add(exp.name);
}
} catch (final IllegalArgumentException e) {
// logic: right now (2016/08/18) if a filter is applied based on specific annotation and some sample contains missing value for such annotation,
// lower level code will throw IllegalArgumentException, therefore we specifically catch this type of exception
// do nothing unless specifically asked to; it just means that the expression isn't defined for this context
if ( failIfMissingValues ) {
filters.add(exp.name);
}
}
}
// if sample is filtered and --setFilteredGtToNocall, set genotype to non-call
if ( !filters.isEmpty() && setFilteredGenotypesToNocall ) {
haveFilteredNoCallAlleles = true;
genotypes.add(new GenotypeBuilder(g).filters(filters).alleles(DIPLOID_NO_CALL_ALLELES).make());
}
else {
genotypes.add(new GenotypeBuilder(g).filters(filters).make());
calledAlleles = GATKVariantContextUtils.incrementChromosomeCountsInfo(calledAltAlleles, calledAlleles, g);
}
} else {
genotypes.add(g);
}
}
builder.genotypes(genotypes);
// if filtered genotypes are set to no-call, output recomputed AC, AN, AF
if ( haveFilteredNoCallAlleles ) {
GATKVariantContextUtils.updateChromosomeCountsInfo(calledAltAlleles, calledAlleles, builder);
}
return builder;
}
/**
* Extract filters already present in the {@code vc}, and append user provided expressions.
* For user provided genotype filter expressions, see {@link #applyGenotypeFilters(VariantContext, List, boolean, boolean, boolean)}
* @param vc VC from which filters to be extracted
* @param vcFilterExpressions more filter expressions provided by user
* @param invertVCfilterExpression should we invert the logic in expressions provided in {@code vcFilterExpressions}
* @param failIfMissingValues should we mark the VC as failing if it misses the value the filters work on
*
* @return filters already in the provided vc and user-provided filters
*/
@VisibleForTesting
static Set<String> buildVCfilters(final VariantContext vc,
final List<VariantContextUtils.JexlVCMatchExp> vcFilterExpressions,
final boolean invertVCfilterExpression,
final boolean failIfMissingValues) {
final Set<String> filters = new LinkedHashSet<>(vc.getFilters());
for ( final VariantContextUtils.JexlVCMatchExp exp : vcFilterExpressions ) {
try {
if ( Utils.invertLogic(VariantContextUtils.match(vc, exp), invertVCfilterExpression) ) {
filters.add(exp.name);
}
} catch (final Exception e) {
// do nothing unless specifically asked to; it just means that the expression isn't defined for this context
if ( failIfMissingValues ) {
filters.add(exp.name);
}
}
}
return filters;
}
// -----------------------------------------------------------------------------------------------
// some other complications besides main stuff
// -----------------------------------------------------------------------------------------------
private void initializeVcfWriter() {
final List<String> inputNames = Arrays.asList(variantCollection.variants.getName());
// setup the header fields
final Set<VCFHeaderLine> hInfo = new HashSet<VCFHeaderLine>();
hInfo.addAll(GATKVCFUtils.getHeaderFields(getToolkit(), inputNames));
// need AC, AN and AF since output if set filtered genotypes to no-call
if ( setFilteredGenotypesToNocall ) {
hInfo.add(VCFStandardHeaderLines.getInfoLine(VCFConstants.ALLELE_COUNT_KEY));
hInfo.add(VCFStandardHeaderLines.getInfoLine(VCFConstants.ALLELE_NUMBER_KEY));
hInfo.add(VCFStandardHeaderLines.getInfoLine(VCFConstants.ALLELE_FREQUENCY_KEY));
}
if ( clusterWindow > 0 ) {
hInfo.add(new VCFFilterHeaderLine(CLUSTERED_SNP_FILTER_NAME, "SNPs found in clusters"));
}
if ( !genotypeFilterExps.isEmpty() ) {
hInfo.add(VCFStandardHeaderLines.getFormatLine(VCFConstants.GENOTYPE_FILTER_KEY));
}
try {
for ( final VariantContextUtils.JexlVCMatchExp exp : filterExps ) {
hInfo.add(new VCFFilterHeaderLine(exp.name, possiblyInvertFilterExpression(exp.exp.toString())));
}
for ( final VariantContextUtils.JexlVCMatchExp exp : genotypeFilterExps ) {
hInfo.add(new VCFFilterHeaderLine(exp.name, possiblyInvertFilterExpression(exp.exp.toString())));
}
if ( mask.isBound() ) {
hInfo.add(new VCFFilterHeaderLine(maskName, filterRecordsNotInMask ? "Doesn't overlap a user-input mask" : "Overlaps a user-input mask"));
}
} catch (final IllegalArgumentException e) {
throw new UserException.BadInput(e.getMessage());
}
writer.writeHeader(new VCFHeader(hInfo, SampleUtils.getUniqueSamplesFromRods(getToolkit(), inputNames)));
}
/**
* Prepend inverse phrase to description if {@code --invertFilterExpression}.
*
* @param description the description
* @return the description with inverse prepended if {@code --invert_filter_expression}.
*/
private String possiblyInvertFilterExpression( final String description ){
return invertFilterExpression ? "Inverse of: " + description : description;
}
/**
* Add mask to variant context filters if it covers its location.
* @param vc VariantContext
* @param genomeLoc genome location
* @param maskName name of the mask
* @param maskExtension bases beyond the mask
* @param locStart if true, start at genome location and end at VariantContext. If false, do the opposite.
* @return VariantContext with the mask added if the VariantContext is within the extended mask area
*/
private VariantContext addMaskIfCoversVariant(VariantContext vc, final GenomeLoc genomeLoc, final String maskName, final int maskExtension, final boolean locStart) {
if (doesMaskCoverVariant(vc, genomeLoc, maskName, maskExtension, locStart) ) {
final Set<String> filters = new LinkedHashSet<>(vc.getFilters());
filters.add(maskName);
vc = new VariantContextBuilder(vc).filters(filters).make();
}
return vc;
}
/** /**
* Helper function to check if a mask covers the variant location. * Helper function to check if a mask covers the variant location.
* *
@ -364,141 +569,14 @@ public class VariantFiltration extends RodWalker<Integer, Integer> {
* @return true if the genome location is within the extended mask area, false otherwise * @return true if the genome location is within the extended mask area, false otherwise
*/ */
protected static boolean doesMaskCoverVariant(VariantContext vc, GenomeLoc genomeLoc, String maskName, int maskExtension, boolean vcBeforeLoc) { protected static boolean doesMaskCoverVariant(VariantContext vc, GenomeLoc genomeLoc, String maskName, int maskExtension, boolean vcBeforeLoc) {
boolean logic = genomeLoc != null && // have a location final boolean needToCheckOveralpping = genomeLoc != null && // have a location
genomeLoc.getContig().equals(vc.getChr()) && // it's on the same contig genomeLoc.getContig().equals(vc.getChr()) && // it's on the same contig
(vc.getFilters() == null || !vc.getFilters().contains(maskName)); // the filter hasn't already been applied (vc.getFilters() == null || !vc.getFilters().contains(maskName)); // the filter hasn't already been applied
if ( logic ) { if ( needToCheckOveralpping ) {
if (vcBeforeLoc) return vcBeforeLoc ? (genomeLoc.getStart() - vc.getEnd() <= maskExtension) // it's within the mask area (multi-base VCs that overlap this site will always give a negative distance)
return genomeLoc.getStart() - vc.getEnd() <= maskExtension; // it's within the mask area (multi-base VCs that overlap this site will always give a negative distance) : (vc.getStart() - genomeLoc.getStop() <= maskExtension);
else
return vc.getStart() - genomeLoc.getStop() <= maskExtension;
} else { } else {
return false; return false;
} }
} }
/**
* Add mask to variant context filters if it covers the it's location
*
* @param vc VariantContext
* @param genomeLoc genome location
* @param maskName name of the mask
* @param maskExtension bases beyond the mask
* @param locStart if true, start at genome location and end at VariantContext. If false, do the opposite.
* @return VariantContext with the mask added if the VariantContext is within the extended mask area
*/
private VariantContext addMaskIfCoversVariant(VariantContext vc, GenomeLoc genomeLoc, String maskName, int maskExtension, boolean locStart) {
if (doesMaskCoverVariant(vc, genomeLoc, maskName, maskExtension, locStart) ) {
Set<String> filters = new LinkedHashSet<String>(vc.getFilters());
filters.add(maskName);
vc = new VariantContextBuilder(vc).filters(filters).make();
}
return vc;
}
private void filter() {
// get the current context
FiltrationContext context = variantContextWindow.getContext();
if ( context == null )
return;
final VariantContext vc = context.getVariantContext();
final VariantContextBuilder builder = new VariantContextBuilder(vc);
// make new Genotypes based on filters
if ( !genotypeFilterExps.isEmpty() || setFilteredGenotypesToNocall ) {
GenotypesContext genotypes = GenotypesContext.create(vc.getGenotypes().size());
//
// recompute AC, AN and AF if filtered genotypes are set to no-call
//
// occurrences of alternate alleles over all genotypes
final Map<Allele, Integer> calledAltAlleles = new LinkedHashMap<Allele, Integer>(vc.getNAlleles()-1);
for ( final Allele altAllele : vc.getAlternateAlleles() ) {
calledAltAlleles.put(altAllele, 0);
}
int calledAlleles = 0;
boolean haveFilteredNoCallAlleles = false;
// for each genotype, check filters then create a new object
for ( final Genotype g : vc.getGenotypes() ) {
if ( g.isCalled() ) {
final List<String> filters = new ArrayList<String>();
if ( g.isFiltered() ) filters.add(g.getFilters());
// Add if expression filters the variant context
for ( VariantContextUtils.JexlVCMatchExp exp : genotypeFilterExps ) {
if ( Utils.invertLogic(VariantContextUtils.match(vc, g, exp), invertGenotypeFilterExpression) )
filters.add(exp.name);
}
// if sample is filtered and --setFilteredGtToNocall, set genotype to non-call
if ( !filters.isEmpty() && setFilteredGenotypesToNocall ) {
haveFilteredNoCallAlleles = true;
genotypes.add(new GenotypeBuilder(g).filters(filters).alleles(diploidNoCallAlleles).make());
}
else {
genotypes.add(new GenotypeBuilder(g).filters(filters).make());
calledAlleles = GATKVariantContextUtils.incrementChromosomeCountsInfo(calledAltAlleles, calledAlleles, g);
}
} else {
genotypes.add(g);
}
}
builder.genotypes(genotypes);
// if filtered genotypes are set to no-call, output recomputed AC, AN, AF
if ( haveFilteredNoCallAlleles )
GATKVariantContextUtils.updateChromosomeCountsInfo(calledAltAlleles, calledAlleles, builder);
}
// make a new variant context based on filters
Set<String> filters = new LinkedHashSet<String>(vc.getFilters());
// test for clustered SNPs if requested
if ( clusteredSNPs != null && clusteredSNPs.filter(variantContextWindow) )
filters.add(CLUSTERED_SNP_FILTER_NAME);
for ( VariantContextUtils.JexlVCMatchExp exp : filterExps ) {
try {
if ( Utils.invertLogic(VariantContextUtils.match(vc, exp), invertFilterExpression) )
filters.add(exp.name);
} catch (Exception e) {
// do nothing unless specifically asked to; it just means that the expression isn't defined for this context
if ( failMissingValues )
filters.add(exp.name);
}
}
if ( filters.isEmpty() )
builder.passFilters();
else
builder.filters(filters);
writer.add(builder.make());
}
public Integer reduce(Integer value, Integer sum) {
return sum + value;
}
/**
* Tell the user the number of loci processed and close out the new variants file.
*
* @param result the number of loci seen.
*/
public void onTraversalDone(Integer result) {
// move the window over so that we can filter the last few variants
if ( windowInitializer != null ) {
while ( windowInitializer.size() < WINDOW_SIZE )
windowInitializer.add(null);
variantContextWindow = new FiltrationContextWindow(windowInitializer);
}
for (int i=0; i < WINDOW_SIZE; i++) {
variantContextWindow.moveWindow(null);
filter();
}
}
} }

View File

@ -26,21 +26,24 @@
package org.broadinstitute.gatk.tools.walkers.filters; package org.broadinstitute.gatk.tools.walkers.filters;
import htsjdk.samtools.reference.ReferenceSequenceFile; import htsjdk.samtools.reference.ReferenceSequenceFile;
import htsjdk.variant.variantcontext.*;
import htsjdk.variant.vcf.VCFConstants;
import org.broadinstitute.gatk.utils.BaseTest; import org.broadinstitute.gatk.utils.BaseTest;
import org.broadinstitute.gatk.utils.GenomeLoc; import org.broadinstitute.gatk.utils.GenomeLoc;
import org.broadinstitute.gatk.utils.GenomeLocParser; import org.broadinstitute.gatk.utils.GenomeLocParser;
import org.broadinstitute.gatk.utils.Utils; import org.broadinstitute.gatk.utils.Utils;
import htsjdk.variant.variantcontext.Allele;
import htsjdk.variant.variantcontext.VariantContext;
import htsjdk.variant.variantcontext.VariantContextBuilder;
import org.broadinstitute.gatk.utils.fasta.CachingIndexedFastaSequenceFile; import org.broadinstitute.gatk.utils.fasta.CachingIndexedFastaSequenceFile;
import org.testng.Assert;
import org.testng.annotations.BeforeTest;
import org.testng.annotations.DataProvider;
import org.testng.annotations.Test;
import java.io.File; import java.io.File;
import java.util.ArrayList; import java.util.ArrayList;
import java.util.Arrays; import java.util.Arrays;
import java.util.List; import java.util.List;
import org.testng.Assert; import java.util.Set;
import org.testng.annotations.*; import java.util.stream.Collectors;
public class VariantFiltrationUnitTest extends BaseTest { public class VariantFiltrationUnitTest extends BaseTest {
@ -58,7 +61,7 @@ public class VariantFiltrationUnitTest extends BaseTest {
} }
@DataProvider(name = "VariantMaskData") @DataProvider(name = "VariantMaskData")
public Object[][] DoesMaskCoverVariantTestData() { public Object[][] doesMaskCoverVariantTestData() {
final String maskName = "testMask"; final String maskName = "testMask";
@ -85,8 +88,8 @@ public class VariantFiltrationUnitTest extends BaseTest {
* @param vcBeforeLoc if true, variant context is before the genome location; if false, the converse is true. * @param vcBeforeLoc if true, variant context is before the genome location; if false, the converse is true.
* @param expectedValue return the expected return value from doesMaskCoverVariant() * @param expectedValue return the expected return value from doesMaskCoverVariant()
*/ */
@Test(dataProvider = "VariantMaskData") @Test(dataProvider = "doesMaskCoverVariantTestData")
public void TestDoesMaskCoverVariant(final String contig, final int start, final int stop, final String maskName, final int maskExtension, public void testDoesMaskCoverVariant(final String contig, final int start, final int stop, final String maskName, final int maskExtension,
final boolean vcBeforeLoc, final boolean expectedValue) { final boolean vcBeforeLoc, final boolean expectedValue) {
// Build VariantContext // Build VariantContext
@ -104,4 +107,126 @@ public class VariantFiltrationUnitTest extends BaseTest {
boolean coversVariant = VariantFiltration.doesMaskCoverVariant(vc, genomeLoc, maskName, maskExtension, vcBeforeLoc); boolean coversVariant = VariantFiltration.doesMaskCoverVariant(vc, genomeLoc, maskName, maskExtension, vcBeforeLoc);
Assert.assertEquals(coversVariant, expectedValue); Assert.assertEquals(coversVariant, expectedValue);
} }
@Test
public void testApplyGenotypeFilters(){
final VariantContext vc = buildDataForFilters().make();
final String filterName = "LowDP";
final String filterExpr = "DP < 10";
final List<VariantContextUtils.JexlVCMatchExp> genotypeFilterExps = VariantContextUtils.initializeMatchExps(Arrays.asList(filterName), Arrays.asList(filterExpr));
final VariantContextBuilder anotherVCBuilder = VariantFiltration.applyGenotypeFilters(vc, genotypeFilterExps, false, false, false);
final VariantContext anotherVC = anotherVCBuilder.filters().make();
Assert.assertEquals(anotherVC.getGenotype("one").isFiltered(), true);
Assert.assertTrue(anotherVC.getGenotype("one").getFilters().equals(filterName));
Assert.assertEquals(anotherVC.getGenotype("two").isFiltered(), false);
Assert.assertEquals(anotherVC.getGenotype("three").isFiltered(), false);
Assert.assertEquals(anotherVC.getGenotype("four").isFiltered(), false);
Assert.assertEquals(anotherVC.getGenotype("five").isFiltered(), false);
Assert.assertEquals(anotherVC.getGenotype("six").isFiltered(), false);
final VariantContextBuilder yetAnotherVCBuilder = VariantFiltration.applyGenotypeFilters(anotherVC, genotypeFilterExps, false, true, false);
final VariantContext yetAnotherVC = yetAnotherVCBuilder.filters().make();
Assert.assertEquals(yetAnotherVC.getGenotype("six").isFiltered(), true);
Assert.assertTrue(yetAnotherVC.getGenotype("six").getFilters().equals(filterName));
}
@Test
public void testApplyVCFilters(){
final VariantContext vcNoFilters = buildDataForFilters().make(); // assumes this vc doesn't hold any filters yet
String filterName = "LowDP";
String filterExpr = "DP < 23";
List<VariantContextUtils.JexlVCMatchExp> vcFilterExps = VariantContextUtils.initializeMatchExps(Arrays.asList(filterName), Arrays.asList(filterExpr));
final Set<String> filters = VariantFiltration.buildVCfilters(vcNoFilters, vcFilterExps, false, false);
Assert.assertFalse(vcNoFilters.isFiltered());
Assert.assertEquals(filters.size(), 1);
Assert.assertTrue(filters.contains(filterName));
filterName = "ID";
filterExpr = "ID = rs123";
vcFilterExps = VariantContextUtils.initializeMatchExps(Arrays.asList(filterName), Arrays.asList(filterExpr));
Set<String> filterWhenFailMissing = VariantFiltration.buildVCfilters(vcNoFilters, vcFilterExps, false, true);
// Assert.assertEquals(filterWhenFailMissing.size(), 1);
// Assert.assertTrue(filterWhenFailMissing.contains(filterName));
filterWhenFailMissing = VariantFiltration.buildVCfilters(vcNoFilters, vcFilterExps, false, false);
Assert.assertTrue(filterWhenFailMissing.isEmpty());
}
private static VariantContextBuilder buildDataForFilters() {
/**
* Uses (part of) the following (semi fake) data for testing (data was modified from real data so expect some minor inconsistencies in annotations)
* 1 1234567 . T C 152.03 .
* AC=6;AF=1.00;AN=6;DP=22;ExcessHet=3.0103;FS=0.000;MLEAC=2;MLEAF=1.00;SOR=0.693;set=variant3-variant4-variant6
* GT:AD:DP:GQ:PGT:PID:PL:RGQ
* 1/1:0,2:9:6:1|1:15870493_CT_C:90,6,0
* 1/1:0,4:10:12:1|1:15870493_CT_C:180,12,0
* ./.:0:3:.:.:.:.:0
* ./.:0:0:.:.:.:.:0
* ./.:0:0:.:.:.:.:0
* 1/1:0,0:.:6:1|1:15870493_CT_C:90,6,0
*/
final Allele refT = Allele.create("T", true);
final Allele altC = Allele.create("C", false);
final Allele nocall = Allele.NO_CALL;
final VariantContextBuilder vcBuilder = new VariantContextBuilder("", "1", 1234567, 1234567, Arrays.asList(refT, altC));
vcBuilder.noID();
vcBuilder.attribute("AC", 6);
vcBuilder.attribute("AF", 1.00);
vcBuilder.attribute("AN", 6);
vcBuilder.attribute("DP", 22);
vcBuilder.attribute("ExcessHet", 3.0103);
vcBuilder.attribute("FS", 0.000);
vcBuilder.attribute("MLEAC", 2);
vcBuilder.attribute("MLEAF", 1.00);
vcBuilder.attribute("SOR", 0.693);
GenotypeBuilder gtBuilder = new GenotypeBuilder("one", Arrays.asList(altC,altC));
final Genotype firstSample = gtBuilder.attribute(VCFConstants.GENOTYPE_KEY, GenotypeType.HOM_VAR)
.DP(9) // edge case not passing "DP < 10"
.make();
gtBuilder = new GenotypeBuilder("two", Arrays.asList(altC,altC));
final Genotype secondSample = gtBuilder.attribute(VCFConstants.GENOTYPE_KEY, GenotypeType.HOM_VAR)
.DP(10) // edge case passing "DP < 10"
.make();
gtBuilder = new GenotypeBuilder("three", Arrays.asList(nocall,nocall));
final Genotype thirdSample = gtBuilder.attribute(VCFConstants.GENOTYPE_KEY, GenotypeType.NO_CALL)
.DP(3)
.make();
gtBuilder = new GenotypeBuilder("four", Arrays.asList(nocall,nocall));
final Genotype fourthSample = gtBuilder.attribute(VCFConstants.GENOTYPE_KEY, GenotypeType.NO_CALL)
.DP(0)
.make();
gtBuilder = new GenotypeBuilder("five", Arrays.asList(nocall,nocall));
final Genotype fifthSample = gtBuilder.attribute(VCFConstants.GENOTYPE_KEY, GenotypeType.NO_CALL)
.DP(0)
.make();
gtBuilder = new GenotypeBuilder("six", Arrays.asList(altC,altC));
final Genotype sixthSample = gtBuilder.attribute(VCFConstants.GENOTYPE_KEY, GenotypeType.HOM_VAR)
.DP(-1)
.make();
vcBuilder.genotypes(firstSample, secondSample, thirdSample, fourthSample, fifthSample, sixthSample);
return vcBuilder;
}
} }