As promised, VariantFiltration can now mask out sites within a user-specified window around the provided mask rod. By default the window is 0, but you can now use the --maskExtension argument to increase that value. Added integration tests to cover this new functionality.
git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@6060 348d0f76-0448-11de-a6fe-93d51630548a
This commit is contained in:
parent
ea47ccf032
commit
d323ef0461
|
|
@ -26,25 +26,22 @@
|
|||
package org.broadinstitute.sting.gatk.walkers.filters;
|
||||
|
||||
import org.broad.tribble.util.variantcontext.VariantContext;
|
||||
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
|
||||
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
|
||||
|
||||
|
||||
public class FiltrationContext {
|
||||
|
||||
private RefMetaDataTracker tracker;
|
||||
private ReferenceContext ref;
|
||||
private VariantContext vc;
|
||||
|
||||
public FiltrationContext(RefMetaDataTracker tracker, ReferenceContext ref, VariantContext vc) {
|
||||
this.tracker = tracker;
|
||||
public FiltrationContext(ReferenceContext ref, VariantContext vc) {
|
||||
this.ref = ref;
|
||||
this.vc = vc;
|
||||
}
|
||||
|
||||
public RefMetaDataTracker getTracker() { return tracker; }
|
||||
|
||||
public ReferenceContext getReferenceContext() { return ref; }
|
||||
|
||||
public VariantContext getVariantContext() { return vc; }
|
||||
|
||||
public void setVariantContext(VariantContext newVC) { vc = newVC; }
|
||||
}
|
||||
|
|
@ -36,7 +36,9 @@ import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
|
|||
import org.broadinstitute.sting.gatk.walkers.*;
|
||||
import org.broadinstitute.sting.commandline.Argument;
|
||||
import org.broadinstitute.sting.commandline.Output;
|
||||
import org.broadinstitute.sting.utils.GenomeLoc;
|
||||
import org.broadinstitute.sting.utils.SampleUtils;
|
||||
import org.broadinstitute.sting.utils.exceptions.UserException;
|
||||
import org.broadinstitute.sting.utils.vcf.VCFUtils;
|
||||
|
||||
import java.util.*;
|
||||
|
|
@ -67,6 +69,8 @@ public class VariantFiltrationWalker extends RodWalker<Integer, Integer> {
|
|||
@Argument(fullName="clusterWindowSize", shortName="window", doc="The window size (in bases) in which to evaluate clustered SNPs (to disable the clustered SNP filter, set this value to less than 1); [default:0]", required=false)
|
||||
protected Integer clusterWindow = 0;
|
||||
|
||||
@Argument(fullName="maskExtension", shortName="maskExtend", doc="How many bases beyond records from a provided 'mask' rod should variants be filtered; [default:0]", required=false)
|
||||
protected Integer MASK_EXTEND = 0;
|
||||
@Argument(fullName="maskName", shortName="mask", doc="The text to put in the FILTER field if a 'mask' rod is provided and overlaps with a variant call; [default:'Mask']", required=false)
|
||||
protected String MASK_NAME = "Mask";
|
||||
|
||||
|
|
@ -80,6 +84,7 @@ public class VariantFiltrationWalker extends RodWalker<Integer, Integer> {
|
|||
public static final String INPUT_VARIANT_ROD_BINDING_NAME = "variant";
|
||||
public static final String CLUSTERED_SNP_FILTER_NAME = "SnpCluster";
|
||||
private ClusteredSnps clusteredSNPs = null;
|
||||
private GenomeLoc previousMaskPosition = null;
|
||||
|
||||
// the structures necessary to initialize and maintain a windowed context
|
||||
private FiltrationContextWindow variantContextWindow;
|
||||
|
|
@ -121,6 +126,9 @@ public class VariantFiltrationWalker extends RodWalker<Integer, Integer> {
|
|||
if ( clusterWindow > 0 )
|
||||
clusteredSNPs = new ClusteredSnps(getToolkit().getGenomeLocParser(),clusterSize, clusterWindow);
|
||||
|
||||
if ( MASK_EXTEND < 0 )
|
||||
throw new UserException.BadArgumentValue("maskExtension", "negative values are not allowed");
|
||||
|
||||
filterExps = VariantContextUtils.initializeMatchExps(FILTER_NAMES, FILTER_EXPS);
|
||||
genotypeFilterExps = VariantContextUtils.initializeMatchExps(GENOTYPE_FILTER_NAMES, GENOTYPE_FILTER_EXPS);
|
||||
|
||||
|
|
@ -143,20 +151,50 @@ public class VariantFiltrationWalker extends RodWalker<Integer, Integer> {
|
|||
return 0;
|
||||
|
||||
Collection<VariantContext> VCs = tracker.getVariantContexts(ref, INPUT_VARIANT_ROD_BINDING_NAME, null, context.getLocation(), true, false);
|
||||
if ( VCs.size() == 0 )
|
||||
return 0;
|
||||
|
||||
// is there a SNP mask present?
|
||||
boolean hasMask = tracker.getReferenceMetaData("mask").size() > 0;
|
||||
if ( hasMask )
|
||||
previousMaskPosition = ref.getLocus(); // multi-base masks will get triggered over all bases of the mask
|
||||
|
||||
for ( VariantContext vc : VCs ) {
|
||||
FiltrationContext varContext = new FiltrationContext(tracker, ref, vc);
|
||||
|
||||
// filter based on previous mask position
|
||||
if ( previousMaskPosition != null && // we saw a previous mask site
|
||||
previousMaskPosition.getContig().equals(vc.getChr()) && // it's on the same contig
|
||||
vc.getStart() - previousMaskPosition.getStop() <= MASK_EXTEND && // it's within the mask area (multi-base masks that overlap this site will always give a negative distance)
|
||||
(vc.getFilters() == null || !vc.getFilters().contains(MASK_NAME)) ) { // the filter hasn't already been applied
|
||||
Set<String> filters = new LinkedHashSet<String>(vc.getFilters());
|
||||
filters.add(MASK_NAME);
|
||||
vc = VariantContext.modifyFilters(vc, filters);
|
||||
}
|
||||
|
||||
FiltrationContext varContext = new FiltrationContext(ref, vc);
|
||||
|
||||
// if we're still initializing the context, do so
|
||||
if ( windowInitializer != null ) {
|
||||
|
||||
// if this is a mask position, filter previous records
|
||||
if ( hasMask ) {
|
||||
for ( FiltrationContext prevVC : windowInitializer )
|
||||
prevVC.setVariantContext(checkMaskForPreviousLocation(prevVC.getVariantContext(), ref.getLocus()));
|
||||
}
|
||||
|
||||
windowInitializer.add(varContext);
|
||||
if ( windowInitializer.size() == windowSize ) {
|
||||
variantContextWindow = new FiltrationContextWindow(windowInitializer);
|
||||
windowInitializer = null;
|
||||
}
|
||||
} else {
|
||||
|
||||
// if this is a mask position, filter previous records
|
||||
if ( hasMask ) {
|
||||
for ( FiltrationContext prevVC : variantContextWindow.getWindow(10, 10) ) {
|
||||
if ( prevVC != null )
|
||||
prevVC.setVariantContext(checkMaskForPreviousLocation(prevVC.getVariantContext(), ref.getLocus()));
|
||||
}
|
||||
}
|
||||
|
||||
variantContextWindow.moveWindow(varContext);
|
||||
filter();
|
||||
}
|
||||
|
|
@ -165,6 +203,18 @@ public class VariantFiltrationWalker extends RodWalker<Integer, Integer> {
|
|||
return 1;
|
||||
}
|
||||
|
||||
private VariantContext checkMaskForPreviousLocation(VariantContext vc, GenomeLoc maskLoc) {
|
||||
if ( maskLoc.getContig().equals(vc.getChr()) && // it's on the same contig
|
||||
maskLoc.getStart() - vc.getEnd() <= MASK_EXTEND && // it's within the mask area (multi-base VCs that overlap this site will always give a negative distance)
|
||||
(vc.getFilters() == null || !vc.getFilters().contains(MASK_NAME)) ) { // the filter hasn't already been applied
|
||||
Set<String> filters = new LinkedHashSet<String>(vc.getFilters());
|
||||
filters.add(MASK_NAME);
|
||||
vc = VariantContext.modifyFilters(vc, filters);
|
||||
}
|
||||
|
||||
return vc;
|
||||
}
|
||||
|
||||
private void filter() {
|
||||
// get the current context
|
||||
FiltrationContext context = variantContextWindow.getContext();
|
||||
|
|
@ -202,11 +252,6 @@ public class VariantFiltrationWalker extends RodWalker<Integer, Integer> {
|
|||
// make a new variant context based on filters
|
||||
Set<String> filters = new LinkedHashSet<String>(vc.getFilters());
|
||||
|
||||
// test for SNP mask, if present
|
||||
List<Object> mask = context.getTracker().getReferenceMetaData("mask");
|
||||
if ( mask.size() > 0 )
|
||||
filters.add(MASK_NAME);
|
||||
|
||||
// test for clustered SNPs if requested
|
||||
if ( clusteredSNPs != null && clusteredSNPs.filter(variantContextWindow) )
|
||||
filters.add(CLUSTERED_SNP_FILTER_NAME);
|
||||
|
|
|
|||
|
|
@ -29,11 +29,21 @@ public class VariantFiltrationIntegrationTest extends WalkerTest {
|
|||
}
|
||||
|
||||
@Test
|
||||
public void testMask() {
|
||||
WalkerTestSpec spec = new WalkerTestSpec(
|
||||
public void testMasks() {
|
||||
WalkerTestSpec spec1 = new WalkerTestSpec(
|
||||
baseTestString() + " -mask foo -B:mask,VCF3 " + validationDataLocation + "vcfexample2.vcf -B:variant,VCF3 " + validationDataLocation + "vcfexample2.vcf -L 1:10,020,000-10,021,000", 1,
|
||||
Arrays.asList("b0fcac4af3526e3b2a37602ab4c0e6ae"));
|
||||
executeTest("test mask", spec);
|
||||
executeTest("test mask all", spec1);
|
||||
|
||||
WalkerTestSpec spec2 = new WalkerTestSpec(
|
||||
baseTestString() + " -mask foo -B:mask,VCF " + validationDataLocation + "vcfMask.vcf -B:variant,VCF3 " + validationDataLocation + "vcfexample2.vcf -L 1:10,020,000-10,021,000", 1,
|
||||
Arrays.asList("b64baabe905a5d197cc1ab594147d3d5"));
|
||||
executeTest("test mask some", spec2);
|
||||
|
||||
WalkerTestSpec spec3 = new WalkerTestSpec(
|
||||
baseTestString() + " -mask foo -maskExtend 10 -B:mask,VCF " + validationDataLocation + "vcfMask.vcf -B:variant,VCF3 " + validationDataLocation + "vcfexample2.vcf -L 1:10,020,000-10,021,000", 1,
|
||||
Arrays.asList("0eff92fe72024d535c44b98e1e9e1993"));
|
||||
executeTest("test mask extend", spec3);
|
||||
}
|
||||
|
||||
@Test
|
||||
|
|
@ -61,19 +71,16 @@ public class VariantFiltrationIntegrationTest extends WalkerTest {
|
|||
}
|
||||
|
||||
@Test
|
||||
public void testGenotypeFilter1() {
|
||||
WalkerTestSpec spec = new WalkerTestSpec(
|
||||
public void testGenotypeFilters() {
|
||||
WalkerTestSpec spec1 = new WalkerTestSpec(
|
||||
baseTestString() + " -G_filter 'GQ == 0.60' -G_filterName foo -B:variant,VCF3 " + validationDataLocation + "vcfexample2.vcf -L 1:10,020,000-10,021,000", 1,
|
||||
Arrays.asList("6696e3f65a62ce912230d47cdb0c129b"));
|
||||
executeTest("test genotype filter #1", spec);
|
||||
}
|
||||
executeTest("test genotype filter #1", spec1);
|
||||
|
||||
@Test
|
||||
public void testGenotypeFilter2() {
|
||||
WalkerTestSpec spec = new WalkerTestSpec(
|
||||
WalkerTestSpec spec2 = new WalkerTestSpec(
|
||||
baseTestString() + " -G_filter 'AF == 0.04 && isHomVar == 1' -G_filterName foo -B:variant,VCF3 " + validationDataLocation + "vcfexample2.vcf -L 1:10,020,000-10,021,000", 1,
|
||||
Arrays.asList("26e5b4ee954c9e0b5eb044afd4b88ee9"));
|
||||
executeTest("test genotype filter #2", spec);
|
||||
executeTest("test genotype filter #2", spec2);
|
||||
}
|
||||
|
||||
@Test
|
||||
|
|
|
|||
Loading…
Reference in New Issue