As promised, VariantFiltration can now mask out sites within a user-specified window around the provided mask rod. By default the window is 0, but you can now use the --maskExtension argument to increase that value. Added integration tests to cover this new functionality.
git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@6060 348d0f76-0448-11de-a6fe-93d51630548a
This commit is contained in:
parent
ea47ccf032
commit
d323ef0461
|
|
@ -26,25 +26,22 @@
|
||||||
package org.broadinstitute.sting.gatk.walkers.filters;
|
package org.broadinstitute.sting.gatk.walkers.filters;
|
||||||
|
|
||||||
import org.broad.tribble.util.variantcontext.VariantContext;
|
import org.broad.tribble.util.variantcontext.VariantContext;
|
||||||
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
|
|
||||||
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
|
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
|
||||||
|
|
||||||
|
|
||||||
public class FiltrationContext {
|
public class FiltrationContext {
|
||||||
|
|
||||||
private RefMetaDataTracker tracker;
|
|
||||||
private ReferenceContext ref;
|
private ReferenceContext ref;
|
||||||
private VariantContext vc;
|
private VariantContext vc;
|
||||||
|
|
||||||
public FiltrationContext(RefMetaDataTracker tracker, ReferenceContext ref, VariantContext vc) {
|
public FiltrationContext(ReferenceContext ref, VariantContext vc) {
|
||||||
this.tracker = tracker;
|
|
||||||
this.ref = ref;
|
this.ref = ref;
|
||||||
this.vc = vc;
|
this.vc = vc;
|
||||||
}
|
}
|
||||||
|
|
||||||
public RefMetaDataTracker getTracker() { return tracker; }
|
|
||||||
|
|
||||||
public ReferenceContext getReferenceContext() { return ref; }
|
public ReferenceContext getReferenceContext() { return ref; }
|
||||||
|
|
||||||
public VariantContext getVariantContext() { return vc; }
|
public VariantContext getVariantContext() { return vc; }
|
||||||
|
|
||||||
|
public void setVariantContext(VariantContext newVC) { vc = newVC; }
|
||||||
}
|
}
|
||||||
|
|
@ -36,7 +36,9 @@ import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
|
||||||
import org.broadinstitute.sting.gatk.walkers.*;
|
import org.broadinstitute.sting.gatk.walkers.*;
|
||||||
import org.broadinstitute.sting.commandline.Argument;
|
import org.broadinstitute.sting.commandline.Argument;
|
||||||
import org.broadinstitute.sting.commandline.Output;
|
import org.broadinstitute.sting.commandline.Output;
|
||||||
|
import org.broadinstitute.sting.utils.GenomeLoc;
|
||||||
import org.broadinstitute.sting.utils.SampleUtils;
|
import org.broadinstitute.sting.utils.SampleUtils;
|
||||||
|
import org.broadinstitute.sting.utils.exceptions.UserException;
|
||||||
import org.broadinstitute.sting.utils.vcf.VCFUtils;
|
import org.broadinstitute.sting.utils.vcf.VCFUtils;
|
||||||
|
|
||||||
import java.util.*;
|
import java.util.*;
|
||||||
|
|
@ -67,6 +69,8 @@ public class VariantFiltrationWalker extends RodWalker<Integer, Integer> {
|
||||||
@Argument(fullName="clusterWindowSize", shortName="window", doc="The window size (in bases) in which to evaluate clustered SNPs (to disable the clustered SNP filter, set this value to less than 1); [default:0]", required=false)
|
@Argument(fullName="clusterWindowSize", shortName="window", doc="The window size (in bases) in which to evaluate clustered SNPs (to disable the clustered SNP filter, set this value to less than 1); [default:0]", required=false)
|
||||||
protected Integer clusterWindow = 0;
|
protected Integer clusterWindow = 0;
|
||||||
|
|
||||||
|
@Argument(fullName="maskExtension", shortName="maskExtend", doc="How many bases beyond records from a provided 'mask' rod should variants be filtered; [default:0]", required=false)
|
||||||
|
protected Integer MASK_EXTEND = 0;
|
||||||
@Argument(fullName="maskName", shortName="mask", doc="The text to put in the FILTER field if a 'mask' rod is provided and overlaps with a variant call; [default:'Mask']", required=false)
|
@Argument(fullName="maskName", shortName="mask", doc="The text to put in the FILTER field if a 'mask' rod is provided and overlaps with a variant call; [default:'Mask']", required=false)
|
||||||
protected String MASK_NAME = "Mask";
|
protected String MASK_NAME = "Mask";
|
||||||
|
|
||||||
|
|
@ -80,6 +84,7 @@ public class VariantFiltrationWalker extends RodWalker<Integer, Integer> {
|
||||||
public static final String INPUT_VARIANT_ROD_BINDING_NAME = "variant";
|
public static final String INPUT_VARIANT_ROD_BINDING_NAME = "variant";
|
||||||
public static final String CLUSTERED_SNP_FILTER_NAME = "SnpCluster";
|
public static final String CLUSTERED_SNP_FILTER_NAME = "SnpCluster";
|
||||||
private ClusteredSnps clusteredSNPs = null;
|
private ClusteredSnps clusteredSNPs = null;
|
||||||
|
private GenomeLoc previousMaskPosition = null;
|
||||||
|
|
||||||
// the structures necessary to initialize and maintain a windowed context
|
// the structures necessary to initialize and maintain a windowed context
|
||||||
private FiltrationContextWindow variantContextWindow;
|
private FiltrationContextWindow variantContextWindow;
|
||||||
|
|
@ -121,6 +126,9 @@ public class VariantFiltrationWalker extends RodWalker<Integer, Integer> {
|
||||||
if ( clusterWindow > 0 )
|
if ( clusterWindow > 0 )
|
||||||
clusteredSNPs = new ClusteredSnps(getToolkit().getGenomeLocParser(),clusterSize, clusterWindow);
|
clusteredSNPs = new ClusteredSnps(getToolkit().getGenomeLocParser(),clusterSize, clusterWindow);
|
||||||
|
|
||||||
|
if ( MASK_EXTEND < 0 )
|
||||||
|
throw new UserException.BadArgumentValue("maskExtension", "negative values are not allowed");
|
||||||
|
|
||||||
filterExps = VariantContextUtils.initializeMatchExps(FILTER_NAMES, FILTER_EXPS);
|
filterExps = VariantContextUtils.initializeMatchExps(FILTER_NAMES, FILTER_EXPS);
|
||||||
genotypeFilterExps = VariantContextUtils.initializeMatchExps(GENOTYPE_FILTER_NAMES, GENOTYPE_FILTER_EXPS);
|
genotypeFilterExps = VariantContextUtils.initializeMatchExps(GENOTYPE_FILTER_NAMES, GENOTYPE_FILTER_EXPS);
|
||||||
|
|
||||||
|
|
@ -143,20 +151,50 @@ public class VariantFiltrationWalker extends RodWalker<Integer, Integer> {
|
||||||
return 0;
|
return 0;
|
||||||
|
|
||||||
Collection<VariantContext> VCs = tracker.getVariantContexts(ref, INPUT_VARIANT_ROD_BINDING_NAME, null, context.getLocation(), true, false);
|
Collection<VariantContext> VCs = tracker.getVariantContexts(ref, INPUT_VARIANT_ROD_BINDING_NAME, null, context.getLocation(), true, false);
|
||||||
if ( VCs.size() == 0 )
|
|
||||||
return 0;
|
// is there a SNP mask present?
|
||||||
|
boolean hasMask = tracker.getReferenceMetaData("mask").size() > 0;
|
||||||
|
if ( hasMask )
|
||||||
|
previousMaskPosition = ref.getLocus(); // multi-base masks will get triggered over all bases of the mask
|
||||||
|
|
||||||
for ( VariantContext vc : VCs ) {
|
for ( VariantContext vc : VCs ) {
|
||||||
FiltrationContext varContext = new FiltrationContext(tracker, ref, vc);
|
|
||||||
|
// filter based on previous mask position
|
||||||
|
if ( previousMaskPosition != null && // we saw a previous mask site
|
||||||
|
previousMaskPosition.getContig().equals(vc.getChr()) && // it's on the same contig
|
||||||
|
vc.getStart() - previousMaskPosition.getStop() <= MASK_EXTEND && // it's within the mask area (multi-base masks that overlap this site will always give a negative distance)
|
||||||
|
(vc.getFilters() == null || !vc.getFilters().contains(MASK_NAME)) ) { // the filter hasn't already been applied
|
||||||
|
Set<String> filters = new LinkedHashSet<String>(vc.getFilters());
|
||||||
|
filters.add(MASK_NAME);
|
||||||
|
vc = VariantContext.modifyFilters(vc, filters);
|
||||||
|
}
|
||||||
|
|
||||||
|
FiltrationContext varContext = new FiltrationContext(ref, vc);
|
||||||
|
|
||||||
// if we're still initializing the context, do so
|
// if we're still initializing the context, do so
|
||||||
if ( windowInitializer != null ) {
|
if ( windowInitializer != null ) {
|
||||||
|
|
||||||
|
// if this is a mask position, filter previous records
|
||||||
|
if ( hasMask ) {
|
||||||
|
for ( FiltrationContext prevVC : windowInitializer )
|
||||||
|
prevVC.setVariantContext(checkMaskForPreviousLocation(prevVC.getVariantContext(), ref.getLocus()));
|
||||||
|
}
|
||||||
|
|
||||||
windowInitializer.add(varContext);
|
windowInitializer.add(varContext);
|
||||||
if ( windowInitializer.size() == windowSize ) {
|
if ( windowInitializer.size() == windowSize ) {
|
||||||
variantContextWindow = new FiltrationContextWindow(windowInitializer);
|
variantContextWindow = new FiltrationContextWindow(windowInitializer);
|
||||||
windowInitializer = null;
|
windowInitializer = null;
|
||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
|
|
||||||
|
// if this is a mask position, filter previous records
|
||||||
|
if ( hasMask ) {
|
||||||
|
for ( FiltrationContext prevVC : variantContextWindow.getWindow(10, 10) ) {
|
||||||
|
if ( prevVC != null )
|
||||||
|
prevVC.setVariantContext(checkMaskForPreviousLocation(prevVC.getVariantContext(), ref.getLocus()));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
variantContextWindow.moveWindow(varContext);
|
variantContextWindow.moveWindow(varContext);
|
||||||
filter();
|
filter();
|
||||||
}
|
}
|
||||||
|
|
@ -165,6 +203,18 @@ public class VariantFiltrationWalker extends RodWalker<Integer, Integer> {
|
||||||
return 1;
|
return 1;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
private VariantContext checkMaskForPreviousLocation(VariantContext vc, GenomeLoc maskLoc) {
|
||||||
|
if ( maskLoc.getContig().equals(vc.getChr()) && // it's on the same contig
|
||||||
|
maskLoc.getStart() - vc.getEnd() <= MASK_EXTEND && // it's within the mask area (multi-base VCs that overlap this site will always give a negative distance)
|
||||||
|
(vc.getFilters() == null || !vc.getFilters().contains(MASK_NAME)) ) { // the filter hasn't already been applied
|
||||||
|
Set<String> filters = new LinkedHashSet<String>(vc.getFilters());
|
||||||
|
filters.add(MASK_NAME);
|
||||||
|
vc = VariantContext.modifyFilters(vc, filters);
|
||||||
|
}
|
||||||
|
|
||||||
|
return vc;
|
||||||
|
}
|
||||||
|
|
||||||
private void filter() {
|
private void filter() {
|
||||||
// get the current context
|
// get the current context
|
||||||
FiltrationContext context = variantContextWindow.getContext();
|
FiltrationContext context = variantContextWindow.getContext();
|
||||||
|
|
@ -202,11 +252,6 @@ public class VariantFiltrationWalker extends RodWalker<Integer, Integer> {
|
||||||
// make a new variant context based on filters
|
// make a new variant context based on filters
|
||||||
Set<String> filters = new LinkedHashSet<String>(vc.getFilters());
|
Set<String> filters = new LinkedHashSet<String>(vc.getFilters());
|
||||||
|
|
||||||
// test for SNP mask, if present
|
|
||||||
List<Object> mask = context.getTracker().getReferenceMetaData("mask");
|
|
||||||
if ( mask.size() > 0 )
|
|
||||||
filters.add(MASK_NAME);
|
|
||||||
|
|
||||||
// test for clustered SNPs if requested
|
// test for clustered SNPs if requested
|
||||||
if ( clusteredSNPs != null && clusteredSNPs.filter(variantContextWindow) )
|
if ( clusteredSNPs != null && clusteredSNPs.filter(variantContextWindow) )
|
||||||
filters.add(CLUSTERED_SNP_FILTER_NAME);
|
filters.add(CLUSTERED_SNP_FILTER_NAME);
|
||||||
|
|
|
||||||
|
|
@ -29,11 +29,21 @@ public class VariantFiltrationIntegrationTest extends WalkerTest {
|
||||||
}
|
}
|
||||||
|
|
||||||
@Test
|
@Test
|
||||||
public void testMask() {
|
public void testMasks() {
|
||||||
WalkerTestSpec spec = new WalkerTestSpec(
|
WalkerTestSpec spec1 = new WalkerTestSpec(
|
||||||
baseTestString() + " -mask foo -B:mask,VCF3 " + validationDataLocation + "vcfexample2.vcf -B:variant,VCF3 " + validationDataLocation + "vcfexample2.vcf -L 1:10,020,000-10,021,000", 1,
|
baseTestString() + " -mask foo -B:mask,VCF3 " + validationDataLocation + "vcfexample2.vcf -B:variant,VCF3 " + validationDataLocation + "vcfexample2.vcf -L 1:10,020,000-10,021,000", 1,
|
||||||
Arrays.asList("b0fcac4af3526e3b2a37602ab4c0e6ae"));
|
Arrays.asList("b0fcac4af3526e3b2a37602ab4c0e6ae"));
|
||||||
executeTest("test mask", spec);
|
executeTest("test mask all", spec1);
|
||||||
|
|
||||||
|
WalkerTestSpec spec2 = new WalkerTestSpec(
|
||||||
|
baseTestString() + " -mask foo -B:mask,VCF " + validationDataLocation + "vcfMask.vcf -B:variant,VCF3 " + validationDataLocation + "vcfexample2.vcf -L 1:10,020,000-10,021,000", 1,
|
||||||
|
Arrays.asList("b64baabe905a5d197cc1ab594147d3d5"));
|
||||||
|
executeTest("test mask some", spec2);
|
||||||
|
|
||||||
|
WalkerTestSpec spec3 = new WalkerTestSpec(
|
||||||
|
baseTestString() + " -mask foo -maskExtend 10 -B:mask,VCF " + validationDataLocation + "vcfMask.vcf -B:variant,VCF3 " + validationDataLocation + "vcfexample2.vcf -L 1:10,020,000-10,021,000", 1,
|
||||||
|
Arrays.asList("0eff92fe72024d535c44b98e1e9e1993"));
|
||||||
|
executeTest("test mask extend", spec3);
|
||||||
}
|
}
|
||||||
|
|
||||||
@Test
|
@Test
|
||||||
|
|
@ -61,19 +71,16 @@ public class VariantFiltrationIntegrationTest extends WalkerTest {
|
||||||
}
|
}
|
||||||
|
|
||||||
@Test
|
@Test
|
||||||
public void testGenotypeFilter1() {
|
public void testGenotypeFilters() {
|
||||||
WalkerTestSpec spec = new WalkerTestSpec(
|
WalkerTestSpec spec1 = new WalkerTestSpec(
|
||||||
baseTestString() + " -G_filter 'GQ == 0.60' -G_filterName foo -B:variant,VCF3 " + validationDataLocation + "vcfexample2.vcf -L 1:10,020,000-10,021,000", 1,
|
baseTestString() + " -G_filter 'GQ == 0.60' -G_filterName foo -B:variant,VCF3 " + validationDataLocation + "vcfexample2.vcf -L 1:10,020,000-10,021,000", 1,
|
||||||
Arrays.asList("6696e3f65a62ce912230d47cdb0c129b"));
|
Arrays.asList("6696e3f65a62ce912230d47cdb0c129b"));
|
||||||
executeTest("test genotype filter #1", spec);
|
executeTest("test genotype filter #1", spec1);
|
||||||
}
|
|
||||||
|
|
||||||
@Test
|
WalkerTestSpec spec2 = new WalkerTestSpec(
|
||||||
public void testGenotypeFilter2() {
|
|
||||||
WalkerTestSpec spec = new WalkerTestSpec(
|
|
||||||
baseTestString() + " -G_filter 'AF == 0.04 && isHomVar == 1' -G_filterName foo -B:variant,VCF3 " + validationDataLocation + "vcfexample2.vcf -L 1:10,020,000-10,021,000", 1,
|
baseTestString() + " -G_filter 'AF == 0.04 && isHomVar == 1' -G_filterName foo -B:variant,VCF3 " + validationDataLocation + "vcfexample2.vcf -L 1:10,020,000-10,021,000", 1,
|
||||||
Arrays.asList("26e5b4ee954c9e0b5eb044afd4b88ee9"));
|
Arrays.asList("26e5b4ee954c9e0b5eb044afd4b88ee9"));
|
||||||
executeTest("test genotype filter #2", spec);
|
executeTest("test genotype filter #2", spec2);
|
||||||
}
|
}
|
||||||
|
|
||||||
@Test
|
@Test
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue