diff --git a/java/src/org/broadinstitute/sting/gatk/walkers/filters/FiltrationContext.java b/java/src/org/broadinstitute/sting/gatk/walkers/filters/FiltrationContext.java index 68be54097..4ef007e05 100755 --- a/java/src/org/broadinstitute/sting/gatk/walkers/filters/FiltrationContext.java +++ b/java/src/org/broadinstitute/sting/gatk/walkers/filters/FiltrationContext.java @@ -26,25 +26,22 @@ package org.broadinstitute.sting.gatk.walkers.filters; import org.broad.tribble.util.variantcontext.VariantContext; -import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; import org.broadinstitute.sting.gatk.contexts.ReferenceContext; public class FiltrationContext { - private RefMetaDataTracker tracker; private ReferenceContext ref; private VariantContext vc; - public FiltrationContext(RefMetaDataTracker tracker, ReferenceContext ref, VariantContext vc) { - this.tracker = tracker; + public FiltrationContext(ReferenceContext ref, VariantContext vc) { this.ref = ref; this.vc = vc; } - public RefMetaDataTracker getTracker() { return tracker; } - public ReferenceContext getReferenceContext() { return ref; } public VariantContext getVariantContext() { return vc; } + + public void setVariantContext(VariantContext newVC) { vc = newVC; } } \ No newline at end of file diff --git a/java/src/org/broadinstitute/sting/gatk/walkers/filters/VariantFiltrationWalker.java b/java/src/org/broadinstitute/sting/gatk/walkers/filters/VariantFiltrationWalker.java index 3c4e07ef9..61536e758 100755 --- a/java/src/org/broadinstitute/sting/gatk/walkers/filters/VariantFiltrationWalker.java +++ b/java/src/org/broadinstitute/sting/gatk/walkers/filters/VariantFiltrationWalker.java @@ -36,7 +36,9 @@ import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; import org.broadinstitute.sting.gatk.walkers.*; import org.broadinstitute.sting.commandline.Argument; import org.broadinstitute.sting.commandline.Output; +import org.broadinstitute.sting.utils.GenomeLoc; import org.broadinstitute.sting.utils.SampleUtils; +import org.broadinstitute.sting.utils.exceptions.UserException; import org.broadinstitute.sting.utils.vcf.VCFUtils; import java.util.*; @@ -67,6 +69,8 @@ public class VariantFiltrationWalker extends RodWalker { @Argument(fullName="clusterWindowSize", shortName="window", doc="The window size (in bases) in which to evaluate clustered SNPs (to disable the clustered SNP filter, set this value to less than 1); [default:0]", required=false) protected Integer clusterWindow = 0; + @Argument(fullName="maskExtension", shortName="maskExtend", doc="How many bases beyond records from a provided 'mask' rod should variants be filtered; [default:0]", required=false) + protected Integer MASK_EXTEND = 0; @Argument(fullName="maskName", shortName="mask", doc="The text to put in the FILTER field if a 'mask' rod is provided and overlaps with a variant call; [default:'Mask']", required=false) protected String MASK_NAME = "Mask"; @@ -80,6 +84,7 @@ public class VariantFiltrationWalker extends RodWalker { public static final String INPUT_VARIANT_ROD_BINDING_NAME = "variant"; public static final String CLUSTERED_SNP_FILTER_NAME = "SnpCluster"; private ClusteredSnps clusteredSNPs = null; + private GenomeLoc previousMaskPosition = null; // the structures necessary to initialize and maintain a windowed context private FiltrationContextWindow variantContextWindow; @@ -121,6 +126,9 @@ public class VariantFiltrationWalker extends RodWalker { if ( clusterWindow > 0 ) clusteredSNPs = new ClusteredSnps(getToolkit().getGenomeLocParser(),clusterSize, clusterWindow); + if ( MASK_EXTEND < 0 ) + throw new UserException.BadArgumentValue("maskExtension", "negative values are not allowed"); + filterExps = VariantContextUtils.initializeMatchExps(FILTER_NAMES, FILTER_EXPS); genotypeFilterExps = VariantContextUtils.initializeMatchExps(GENOTYPE_FILTER_NAMES, GENOTYPE_FILTER_EXPS); @@ -143,20 +151,50 @@ public class VariantFiltrationWalker extends RodWalker { return 0; Collection VCs = tracker.getVariantContexts(ref, INPUT_VARIANT_ROD_BINDING_NAME, null, context.getLocation(), true, false); - if ( VCs.size() == 0 ) - return 0; + + // is there a SNP mask present? + boolean hasMask = tracker.getReferenceMetaData("mask").size() > 0; + if ( hasMask ) + previousMaskPosition = ref.getLocus(); // multi-base masks will get triggered over all bases of the mask for ( VariantContext vc : VCs ) { - FiltrationContext varContext = new FiltrationContext(tracker, ref, vc); + + // filter based on previous mask position + if ( previousMaskPosition != null && // we saw a previous mask site + previousMaskPosition.getContig().equals(vc.getChr()) && // it's on the same contig + vc.getStart() - previousMaskPosition.getStop() <= MASK_EXTEND && // it's within the mask area (multi-base masks that overlap this site will always give a negative distance) + (vc.getFilters() == null || !vc.getFilters().contains(MASK_NAME)) ) { // the filter hasn't already been applied + Set filters = new LinkedHashSet(vc.getFilters()); + filters.add(MASK_NAME); + vc = VariantContext.modifyFilters(vc, filters); + } + + FiltrationContext varContext = new FiltrationContext(ref, vc); // if we're still initializing the context, do so if ( windowInitializer != null ) { + + // if this is a mask position, filter previous records + if ( hasMask ) { + for ( FiltrationContext prevVC : windowInitializer ) + prevVC.setVariantContext(checkMaskForPreviousLocation(prevVC.getVariantContext(), ref.getLocus())); + } + windowInitializer.add(varContext); if ( windowInitializer.size() == windowSize ) { variantContextWindow = new FiltrationContextWindow(windowInitializer); windowInitializer = null; } } else { + + // if this is a mask position, filter previous records + if ( hasMask ) { + for ( FiltrationContext prevVC : variantContextWindow.getWindow(10, 10) ) { + if ( prevVC != null ) + prevVC.setVariantContext(checkMaskForPreviousLocation(prevVC.getVariantContext(), ref.getLocus())); + } + } + variantContextWindow.moveWindow(varContext); filter(); } @@ -165,6 +203,18 @@ public class VariantFiltrationWalker extends RodWalker { return 1; } + private VariantContext checkMaskForPreviousLocation(VariantContext vc, GenomeLoc maskLoc) { + if ( maskLoc.getContig().equals(vc.getChr()) && // it's on the same contig + maskLoc.getStart() - vc.getEnd() <= MASK_EXTEND && // it's within the mask area (multi-base VCs that overlap this site will always give a negative distance) + (vc.getFilters() == null || !vc.getFilters().contains(MASK_NAME)) ) { // the filter hasn't already been applied + Set filters = new LinkedHashSet(vc.getFilters()); + filters.add(MASK_NAME); + vc = VariantContext.modifyFilters(vc, filters); + } + + return vc; + } + private void filter() { // get the current context FiltrationContext context = variantContextWindow.getContext(); @@ -202,11 +252,6 @@ public class VariantFiltrationWalker extends RodWalker { // make a new variant context based on filters Set filters = new LinkedHashSet(vc.getFilters()); - // test for SNP mask, if present - List mask = context.getTracker().getReferenceMetaData("mask"); - if ( mask.size() > 0 ) - filters.add(MASK_NAME); - // test for clustered SNPs if requested if ( clusteredSNPs != null && clusteredSNPs.filter(variantContextWindow) ) filters.add(CLUSTERED_SNP_FILTER_NAME); diff --git a/java/test/org/broadinstitute/sting/gatk/walkers/filters/VariantFiltrationIntegrationTest.java b/java/test/org/broadinstitute/sting/gatk/walkers/filters/VariantFiltrationIntegrationTest.java index 840b29b37..3d75fdc44 100755 --- a/java/test/org/broadinstitute/sting/gatk/walkers/filters/VariantFiltrationIntegrationTest.java +++ b/java/test/org/broadinstitute/sting/gatk/walkers/filters/VariantFiltrationIntegrationTest.java @@ -29,11 +29,21 @@ public class VariantFiltrationIntegrationTest extends WalkerTest { } @Test - public void testMask() { - WalkerTestSpec spec = new WalkerTestSpec( + public void testMasks() { + WalkerTestSpec spec1 = new WalkerTestSpec( baseTestString() + " -mask foo -B:mask,VCF3 " + validationDataLocation + "vcfexample2.vcf -B:variant,VCF3 " + validationDataLocation + "vcfexample2.vcf -L 1:10,020,000-10,021,000", 1, Arrays.asList("b0fcac4af3526e3b2a37602ab4c0e6ae")); - executeTest("test mask", spec); + executeTest("test mask all", spec1); + + WalkerTestSpec spec2 = new WalkerTestSpec( + baseTestString() + " -mask foo -B:mask,VCF " + validationDataLocation + "vcfMask.vcf -B:variant,VCF3 " + validationDataLocation + "vcfexample2.vcf -L 1:10,020,000-10,021,000", 1, + Arrays.asList("b64baabe905a5d197cc1ab594147d3d5")); + executeTest("test mask some", spec2); + + WalkerTestSpec spec3 = new WalkerTestSpec( + baseTestString() + " -mask foo -maskExtend 10 -B:mask,VCF " + validationDataLocation + "vcfMask.vcf -B:variant,VCF3 " + validationDataLocation + "vcfexample2.vcf -L 1:10,020,000-10,021,000", 1, + Arrays.asList("0eff92fe72024d535c44b98e1e9e1993")); + executeTest("test mask extend", spec3); } @Test @@ -61,19 +71,16 @@ public class VariantFiltrationIntegrationTest extends WalkerTest { } @Test - public void testGenotypeFilter1() { - WalkerTestSpec spec = new WalkerTestSpec( + public void testGenotypeFilters() { + WalkerTestSpec spec1 = new WalkerTestSpec( baseTestString() + " -G_filter 'GQ == 0.60' -G_filterName foo -B:variant,VCF3 " + validationDataLocation + "vcfexample2.vcf -L 1:10,020,000-10,021,000", 1, Arrays.asList("6696e3f65a62ce912230d47cdb0c129b")); - executeTest("test genotype filter #1", spec); - } + executeTest("test genotype filter #1", spec1); - @Test - public void testGenotypeFilter2() { - WalkerTestSpec spec = new WalkerTestSpec( + WalkerTestSpec spec2 = new WalkerTestSpec( baseTestString() + " -G_filter 'AF == 0.04 && isHomVar == 1' -G_filterName foo -B:variant,VCF3 " + validationDataLocation + "vcfexample2.vcf -L 1:10,020,000-10,021,000", 1, Arrays.asList("26e5b4ee954c9e0b5eb044afd4b88ee9")); - executeTest("test genotype filter #2", spec); + executeTest("test genotype filter #2", spec2); } @Test