As promised, VariantFiltration can now mask out sites within a user-specified window around the provided mask rod. By default the window is 0, but you can now use the --maskExtension argument to increase that value. Added integration tests to cover this new functionality.

git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@6060 348d0f76-0448-11de-a6fe-93d51630548a
This commit is contained in:
droazen 2011-06-22 22:55:29 +00:00
parent ea47ccf032
commit d323ef0461
3 changed files with 74 additions and 25 deletions

View File

@ -26,25 +26,22 @@
package org.broadinstitute.sting.gatk.walkers.filters;
import org.broad.tribble.util.variantcontext.VariantContext;
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
public class FiltrationContext {
private RefMetaDataTracker tracker;
private ReferenceContext ref;
private VariantContext vc;
public FiltrationContext(RefMetaDataTracker tracker, ReferenceContext ref, VariantContext vc) {
this.tracker = tracker;
public FiltrationContext(ReferenceContext ref, VariantContext vc) {
this.ref = ref;
this.vc = vc;
}
public RefMetaDataTracker getTracker() { return tracker; }
public ReferenceContext getReferenceContext() { return ref; }
public VariantContext getVariantContext() { return vc; }
public void setVariantContext(VariantContext newVC) { vc = newVC; }
}

View File

@ -36,7 +36,9 @@ import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
import org.broadinstitute.sting.gatk.walkers.*;
import org.broadinstitute.sting.commandline.Argument;
import org.broadinstitute.sting.commandline.Output;
import org.broadinstitute.sting.utils.GenomeLoc;
import org.broadinstitute.sting.utils.SampleUtils;
import org.broadinstitute.sting.utils.exceptions.UserException;
import org.broadinstitute.sting.utils.vcf.VCFUtils;
import java.util.*;
@ -67,6 +69,8 @@ public class VariantFiltrationWalker extends RodWalker<Integer, Integer> {
@Argument(fullName="clusterWindowSize", shortName="window", doc="The window size (in bases) in which to evaluate clustered SNPs (to disable the clustered SNP filter, set this value to less than 1); [default:0]", required=false)
protected Integer clusterWindow = 0;
@Argument(fullName="maskExtension", shortName="maskExtend", doc="How many bases beyond records from a provided 'mask' rod should variants be filtered; [default:0]", required=false)
protected Integer MASK_EXTEND = 0;
@Argument(fullName="maskName", shortName="mask", doc="The text to put in the FILTER field if a 'mask' rod is provided and overlaps with a variant call; [default:'Mask']", required=false)
protected String MASK_NAME = "Mask";
@ -80,6 +84,7 @@ public class VariantFiltrationWalker extends RodWalker<Integer, Integer> {
public static final String INPUT_VARIANT_ROD_BINDING_NAME = "variant";
public static final String CLUSTERED_SNP_FILTER_NAME = "SnpCluster";
private ClusteredSnps clusteredSNPs = null;
private GenomeLoc previousMaskPosition = null;
// the structures necessary to initialize and maintain a windowed context
private FiltrationContextWindow variantContextWindow;
@ -121,6 +126,9 @@ public class VariantFiltrationWalker extends RodWalker<Integer, Integer> {
if ( clusterWindow > 0 )
clusteredSNPs = new ClusteredSnps(getToolkit().getGenomeLocParser(),clusterSize, clusterWindow);
if ( MASK_EXTEND < 0 )
throw new UserException.BadArgumentValue("maskExtension", "negative values are not allowed");
filterExps = VariantContextUtils.initializeMatchExps(FILTER_NAMES, FILTER_EXPS);
genotypeFilterExps = VariantContextUtils.initializeMatchExps(GENOTYPE_FILTER_NAMES, GENOTYPE_FILTER_EXPS);
@ -143,20 +151,50 @@ public class VariantFiltrationWalker extends RodWalker<Integer, Integer> {
return 0;
Collection<VariantContext> VCs = tracker.getVariantContexts(ref, INPUT_VARIANT_ROD_BINDING_NAME, null, context.getLocation(), true, false);
if ( VCs.size() == 0 )
return 0;
// is there a SNP mask present?
boolean hasMask = tracker.getReferenceMetaData("mask").size() > 0;
if ( hasMask )
previousMaskPosition = ref.getLocus(); // multi-base masks will get triggered over all bases of the mask
for ( VariantContext vc : VCs ) {
FiltrationContext varContext = new FiltrationContext(tracker, ref, vc);
// filter based on previous mask position
if ( previousMaskPosition != null && // we saw a previous mask site
previousMaskPosition.getContig().equals(vc.getChr()) && // it's on the same contig
vc.getStart() - previousMaskPosition.getStop() <= MASK_EXTEND && // it's within the mask area (multi-base masks that overlap this site will always give a negative distance)
(vc.getFilters() == null || !vc.getFilters().contains(MASK_NAME)) ) { // the filter hasn't already been applied
Set<String> filters = new LinkedHashSet<String>(vc.getFilters());
filters.add(MASK_NAME);
vc = VariantContext.modifyFilters(vc, filters);
}
FiltrationContext varContext = new FiltrationContext(ref, vc);
// if we're still initializing the context, do so
if ( windowInitializer != null ) {
// if this is a mask position, filter previous records
if ( hasMask ) {
for ( FiltrationContext prevVC : windowInitializer )
prevVC.setVariantContext(checkMaskForPreviousLocation(prevVC.getVariantContext(), ref.getLocus()));
}
windowInitializer.add(varContext);
if ( windowInitializer.size() == windowSize ) {
variantContextWindow = new FiltrationContextWindow(windowInitializer);
windowInitializer = null;
}
} else {
// if this is a mask position, filter previous records
if ( hasMask ) {
for ( FiltrationContext prevVC : variantContextWindow.getWindow(10, 10) ) {
if ( prevVC != null )
prevVC.setVariantContext(checkMaskForPreviousLocation(prevVC.getVariantContext(), ref.getLocus()));
}
}
variantContextWindow.moveWindow(varContext);
filter();
}
@ -165,6 +203,18 @@ public class VariantFiltrationWalker extends RodWalker<Integer, Integer> {
return 1;
}
private VariantContext checkMaskForPreviousLocation(VariantContext vc, GenomeLoc maskLoc) {
if ( maskLoc.getContig().equals(vc.getChr()) && // it's on the same contig
maskLoc.getStart() - vc.getEnd() <= MASK_EXTEND && // it's within the mask area (multi-base VCs that overlap this site will always give a negative distance)
(vc.getFilters() == null || !vc.getFilters().contains(MASK_NAME)) ) { // the filter hasn't already been applied
Set<String> filters = new LinkedHashSet<String>(vc.getFilters());
filters.add(MASK_NAME);
vc = VariantContext.modifyFilters(vc, filters);
}
return vc;
}
private void filter() {
// get the current context
FiltrationContext context = variantContextWindow.getContext();
@ -202,11 +252,6 @@ public class VariantFiltrationWalker extends RodWalker<Integer, Integer> {
// make a new variant context based on filters
Set<String> filters = new LinkedHashSet<String>(vc.getFilters());
// test for SNP mask, if present
List<Object> mask = context.getTracker().getReferenceMetaData("mask");
if ( mask.size() > 0 )
filters.add(MASK_NAME);
// test for clustered SNPs if requested
if ( clusteredSNPs != null && clusteredSNPs.filter(variantContextWindow) )
filters.add(CLUSTERED_SNP_FILTER_NAME);

View File

@ -29,11 +29,21 @@ public class VariantFiltrationIntegrationTest extends WalkerTest {
}
@Test
public void testMask() {
WalkerTestSpec spec = new WalkerTestSpec(
public void testMasks() {
WalkerTestSpec spec1 = new WalkerTestSpec(
baseTestString() + " -mask foo -B:mask,VCF3 " + validationDataLocation + "vcfexample2.vcf -B:variant,VCF3 " + validationDataLocation + "vcfexample2.vcf -L 1:10,020,000-10,021,000", 1,
Arrays.asList("b0fcac4af3526e3b2a37602ab4c0e6ae"));
executeTest("test mask", spec);
executeTest("test mask all", spec1);
WalkerTestSpec spec2 = new WalkerTestSpec(
baseTestString() + " -mask foo -B:mask,VCF " + validationDataLocation + "vcfMask.vcf -B:variant,VCF3 " + validationDataLocation + "vcfexample2.vcf -L 1:10,020,000-10,021,000", 1,
Arrays.asList("b64baabe905a5d197cc1ab594147d3d5"));
executeTest("test mask some", spec2);
WalkerTestSpec spec3 = new WalkerTestSpec(
baseTestString() + " -mask foo -maskExtend 10 -B:mask,VCF " + validationDataLocation + "vcfMask.vcf -B:variant,VCF3 " + validationDataLocation + "vcfexample2.vcf -L 1:10,020,000-10,021,000", 1,
Arrays.asList("0eff92fe72024d535c44b98e1e9e1993"));
executeTest("test mask extend", spec3);
}
@Test
@ -61,19 +71,16 @@ public class VariantFiltrationIntegrationTest extends WalkerTest {
}
@Test
public void testGenotypeFilter1() {
WalkerTestSpec spec = new WalkerTestSpec(
public void testGenotypeFilters() {
WalkerTestSpec spec1 = new WalkerTestSpec(
baseTestString() + " -G_filter 'GQ == 0.60' -G_filterName foo -B:variant,VCF3 " + validationDataLocation + "vcfexample2.vcf -L 1:10,020,000-10,021,000", 1,
Arrays.asList("6696e3f65a62ce912230d47cdb0c129b"));
executeTest("test genotype filter #1", spec);
}
executeTest("test genotype filter #1", spec1);
@Test
public void testGenotypeFilter2() {
WalkerTestSpec spec = new WalkerTestSpec(
WalkerTestSpec spec2 = new WalkerTestSpec(
baseTestString() + " -G_filter 'AF == 0.04 && isHomVar == 1' -G_filterName foo -B:variant,VCF3 " + validationDataLocation + "vcfexample2.vcf -L 1:10,020,000-10,021,000", 1,
Arrays.asList("26e5b4ee954c9e0b5eb044afd4b88ee9"));
executeTest("test genotype filter #2", spec);
executeTest("test genotype filter #2", spec2);
}
@Test