Three fixes for VariantFiltrationWalker: Trying to filter an empty VCF file will produce a well-formed VCF file with zero records instead of a blank file, needed for pipelines. The first record's genotype info fields are now in the same order as all the others. The VCF header lines are pulled from just the input variant rod instead of from all rods.

git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@4341 348d0f76-0448-11de-a6fe-93d51630548a
This commit is contained in:
rpoplin 2010-09-24 13:52:56 +00:00
parent c355afc320
commit 1931b2e1bd
2 changed files with 22 additions and 23 deletions

View File

@ -39,6 +39,7 @@ import org.broadinstitute.sting.gatk.walkers.*;
import org.broadinstitute.sting.commandline.Argument; import org.broadinstitute.sting.commandline.Argument;
import org.broadinstitute.sting.commandline.CommandLineUtils; import org.broadinstitute.sting.commandline.CommandLineUtils;
import org.broadinstitute.sting.commandline.Output; import org.broadinstitute.sting.commandline.Output;
import org.broadinstitute.sting.utils.SampleUtils;
import org.broadinstitute.sting.utils.vcf.VCFUtils; import org.broadinstitute.sting.utils.vcf.VCFUtils;
import java.util.*; import java.util.*;
@ -76,6 +77,7 @@ public class VariantFiltrationWalker extends RodWalker<Integer, Integer> {
List<VariantContextUtils.JexlVCMatchExp> filterExps; List<VariantContextUtils.JexlVCMatchExp> filterExps;
List<VariantContextUtils.JexlVCMatchExp> genotypeFilterExps; List<VariantContextUtils.JexlVCMatchExp> genotypeFilterExps;
public static final String INPUT_VARIANT_ROD_BINDING_NAME = "variant";
public static final String CLUSTERED_SNP_FILTER_NAME = "SnpCluster"; public static final String CLUSTERED_SNP_FILTER_NAME = "SnpCluster";
private ClusteredSnps clusteredSNPs = null; private ClusteredSnps clusteredSNPs = null;
@ -83,12 +85,15 @@ public class VariantFiltrationWalker extends RodWalker<Integer, Integer> {
private FiltrationContextWindow variantContextWindow; private FiltrationContextWindow variantContextWindow;
private static final int windowSize = 10; // 10 variants on either end of the current one private static final int windowSize = 10; // 10 variants on either end of the current one
private ArrayList<FiltrationContext> windowInitializer = new ArrayList<FiltrationContext>(); private ArrayList<FiltrationContext> windowInitializer = new ArrayList<FiltrationContext>();
private boolean wroteHeader = false;
private void initializeVcfWriter(VariantContext vc) { private void initializeVcfWriter() {
final ArrayList<String> inputNames = new ArrayList<String>();
inputNames.add( INPUT_VARIANT_ROD_BINDING_NAME );
// setup the header fields // setup the header fields
Set<VCFHeaderLine> hInfo = new HashSet<VCFHeaderLine>(); Set<VCFHeaderLine> hInfo = new HashSet<VCFHeaderLine>();
hInfo.addAll(VCFUtils.getHeaderFields(getToolkit())); hInfo.addAll(VCFUtils.getHeaderFields(getToolkit(), inputNames));
if ( clusterWindow > 0 ) if ( clusterWindow > 0 )
hInfo.add(new VCFFilterHeaderLine(CLUSTERED_SNP_FILTER_NAME, "SNPs found in clusters")); hInfo.add(new VCFFilterHeaderLine(CLUSTERED_SNP_FILTER_NAME, "SNPs found in clusters"));
@ -109,7 +114,7 @@ public class VariantFiltrationWalker extends RodWalker<Integer, Integer> {
} }
} }
writer.writeHeader(new VCFHeader(hInfo, vc.getSampleNames())); writer.writeHeader(new VCFHeader(hInfo, SampleUtils.getUniqueSamplesFromRods(getToolkit(), inputNames)));
} }
public void initialize() { public void initialize() {
@ -120,6 +125,8 @@ public class VariantFiltrationWalker extends RodWalker<Integer, Integer> {
genotypeFilterExps = VariantContextUtils.initializeMatchExps(GENOTYPE_FILTER_NAMES, GENOTYPE_FILTER_EXPS); genotypeFilterExps = VariantContextUtils.initializeMatchExps(GENOTYPE_FILTER_NAMES, GENOTYPE_FILTER_EXPS);
VariantContextUtils.engine.setSilent(true); VariantContextUtils.engine.setSilent(true);
initializeVcfWriter();
} }
public Integer reduceInit() { return 0; } public Integer reduceInit() { return 0; }
@ -136,12 +143,12 @@ public class VariantFiltrationWalker extends RodWalker<Integer, Integer> {
if ( tracker == null ) if ( tracker == null )
return 0; return 0;
List<Object> rods = tracker.getReferenceMetaData("variant"); List<Object> rods = tracker.getReferenceMetaData( INPUT_VARIANT_ROD_BINDING_NAME );
// ignore places where we don't have a variant // ignore places where we don't have a variant
if ( rods.size() == 0 ) if ( rods.size() == 0 )
return 0; return 0;
VariantContext vc = VariantContextAdaptors.toVariantContext("variant", rods.get(0), ref); VariantContext vc = VariantContextAdaptors.toVariantContext( INPUT_VARIANT_ROD_BINDING_NAME, rods.get(0), ref );
FiltrationContext varContext = new FiltrationContext(tracker, ref, vc); FiltrationContext varContext = new FiltrationContext(tracker, ref, vc);
// if we're still initializing the context, do so // if we're still initializing the context, do so
@ -216,15 +223,7 @@ public class VariantFiltrationWalker extends RodWalker<Integer, Integer> {
else else
filteredVC = new VariantContext(vc.getName(), vc.getChr(), vc.getStart(), vc.getEnd(), vc.getAlleles(), genotypes, vc.getNegLog10PError(), filters, vc.getAttributes()); filteredVC = new VariantContext(vc.getName(), vc.getChr(), vc.getStart(), vc.getEnd(), vc.getAlleles(), genotypes, vc.getNegLog10PError(), filters, vc.getAttributes());
writeVCF(filteredVC, context.getReferenceContext().getBase()); writer.add( filteredVC, context.getReferenceContext().getBase() );
}
private void writeVCF(VariantContext vc, byte ref) {
if ( !wroteHeader ) {
initializeVcfWriter(vc);
wroteHeader = true;
}
writer.add(vc, ref);
} }
public Integer reduce(Integer value, Integer sum) { public Integer reduce(Integer value, Integer sum) {

View File

@ -16,7 +16,7 @@ public class VariantFiltrationIntegrationTest extends WalkerTest {
public void testNoAction() { public void testNoAction() {
WalkerTestSpec spec = new WalkerTestSpec( WalkerTestSpec spec = new WalkerTestSpec(
baseTestString() + " -B:variant,VCF " + validationDataLocation + "vcfexample2.vcf -L 1:10,020,000-10,021,000", 1, baseTestString() + " -B:variant,VCF " + validationDataLocation + "vcfexample2.vcf -L 1:10,020,000-10,021,000", 1,
Arrays.asList("a08a88866aac0ec4a844386bea5c585f")); Arrays.asList("1faa4662400a1128b94d5f93d46eee1d"));
executeTest("test no action", spec); executeTest("test no action", spec);
} }
@ -24,7 +24,7 @@ public class VariantFiltrationIntegrationTest extends WalkerTest {
public void testClusteredSnps() { public void testClusteredSnps() {
WalkerTestSpec spec = new WalkerTestSpec( WalkerTestSpec spec = new WalkerTestSpec(
baseTestString() + " -window 10 -B:variant,VCF " + validationDataLocation + "vcfexample2.vcf -L 1:10,020,000-10,021,000", 1, baseTestString() + " -window 10 -B:variant,VCF " + validationDataLocation + "vcfexample2.vcf -L 1:10,020,000-10,021,000", 1,
Arrays.asList("59f0f365550cc01e0fdef65e98963826")); Arrays.asList("f09454e99562d839e5117f992f4eebef"));
executeTest("test clustered SNPs", spec); executeTest("test clustered SNPs", spec);
} }
@ -32,7 +32,7 @@ public class VariantFiltrationIntegrationTest extends WalkerTest {
public void testMask() { public void testMask() {
WalkerTestSpec spec = new WalkerTestSpec( WalkerTestSpec spec = new WalkerTestSpec(
baseTestString() + " -mask foo -B:mask,VCF " + validationDataLocation + "vcfexample2.vcf -B:variant,VCF " + validationDataLocation + "vcfexample2.vcf -L 1:10,020,000-10,021,000", 1, baseTestString() + " -mask foo -B:mask,VCF " + validationDataLocation + "vcfexample2.vcf -B:variant,VCF " + validationDataLocation + "vcfexample2.vcf -L 1:10,020,000-10,021,000", 1,
Arrays.asList("cb67d20027e4e0cb45544a69ff49476e")); Arrays.asList("4b1d370e60c2bc0c763886c3adff473a"));
executeTest("test mask", spec); executeTest("test mask", spec);
} }
@ -40,7 +40,7 @@ public class VariantFiltrationIntegrationTest extends WalkerTest {
public void testFilter1() { public void testFilter1() {
WalkerTestSpec spec = new WalkerTestSpec( WalkerTestSpec spec = new WalkerTestSpec(
baseTestString() + " -filter 'DoC < 20 || FisherStrand > 20.0' -filterName foo -B:variant,VCF " + validationDataLocation + "vcfexample2.vcf -L 1:10,020,000-10,021,000", 1, baseTestString() + " -filter 'DoC < 20 || FisherStrand > 20.0' -filterName foo -B:variant,VCF " + validationDataLocation + "vcfexample2.vcf -L 1:10,020,000-10,021,000", 1,
Arrays.asList("1fdccdb8ca837d5fc7a619d285e2308a")); Arrays.asList("3ea5495c6eace03971cb4c6cd1f84af4"));
executeTest("test filter #1", spec); executeTest("test filter #1", spec);
} }
@ -48,7 +48,7 @@ public class VariantFiltrationIntegrationTest extends WalkerTest {
public void testFilter2() { public void testFilter2() {
WalkerTestSpec spec = new WalkerTestSpec( WalkerTestSpec spec = new WalkerTestSpec(
baseTestString() + " -filter 'AlleleBalance < 70.0 && FisherStrand == 1.4' -filterName bar -B:variant,VCF " + validationDataLocation + "vcfexample2.vcf -L 1:10,020,000-10,021,000", 1, baseTestString() + " -filter 'AlleleBalance < 70.0 && FisherStrand == 1.4' -filterName bar -B:variant,VCF " + validationDataLocation + "vcfexample2.vcf -L 1:10,020,000-10,021,000", 1,
Arrays.asList("40fdd0321091402a669d7e2eaadf072a")); Arrays.asList("b45f3ac20397ab4b64c963ce67cf9cca"));
executeTest("test filter #2", spec); executeTest("test filter #2", spec);
} }
@ -56,7 +56,7 @@ public class VariantFiltrationIntegrationTest extends WalkerTest {
public void testFilterWithSeparateNames() { public void testFilterWithSeparateNames() {
WalkerTestSpec spec = new WalkerTestSpec( WalkerTestSpec spec = new WalkerTestSpec(
baseTestString() + " --filterName ABF -filter 'AlleleBalance < 0.7' --filterName FSF -filter 'FisherStrand == 1.4' -B:variant,VCF " + validationDataLocation + "vcfexample2.vcf -L 1:10,020,000-10,021,000", 1, baseTestString() + " --filterName ABF -filter 'AlleleBalance < 0.7' --filterName FSF -filter 'FisherStrand == 1.4' -B:variant,VCF " + validationDataLocation + "vcfexample2.vcf -L 1:10,020,000-10,021,000", 1,
Arrays.asList("a017fbbb6d610481b174c53d29b1ae5a")); Arrays.asList("d2484ea8f1d1b12b5586e9bab518d7fa"));
executeTest("test filter with separate names #2", spec); executeTest("test filter with separate names #2", spec);
} }
@ -64,7 +64,7 @@ public class VariantFiltrationIntegrationTest extends WalkerTest {
public void testGenotypeFilter1() { public void testGenotypeFilter1() {
WalkerTestSpec spec = new WalkerTestSpec( WalkerTestSpec spec = new WalkerTestSpec(
baseTestString() + " -G_filter 'GQ == 0.60' -G_filterName foo -B:variant,VCF " + validationDataLocation + "vcfexample2.vcf -L 1:10,020,000-10,021,000", 1, baseTestString() + " -G_filter 'GQ == 0.60' -G_filterName foo -B:variant,VCF " + validationDataLocation + "vcfexample2.vcf -L 1:10,020,000-10,021,000", 1,
Arrays.asList("1a200b0e47cac16d1dfd8ce44484c667")); Arrays.asList("76a37ff57ae5c93f5abf341203f7af2c"));
executeTest("test genotype filter #1", spec); executeTest("test genotype filter #1", spec);
} }
@ -72,7 +72,7 @@ public class VariantFiltrationIntegrationTest extends WalkerTest {
public void testGenotypeFilter2() { public void testGenotypeFilter2() {
WalkerTestSpec spec = new WalkerTestSpec( WalkerTestSpec spec = new WalkerTestSpec(
baseTestString() + " -G_filter 'AF == 0.04 && isHomVar == 1' -G_filterName foo -B:variant,VCF " + validationDataLocation + "vcfexample2.vcf -L 1:10,020,000-10,021,000", 1, baseTestString() + " -G_filter 'AF == 0.04 && isHomVar == 1' -G_filterName foo -B:variant,VCF " + validationDataLocation + "vcfexample2.vcf -L 1:10,020,000-10,021,000", 1,
Arrays.asList("78315a09eb3ac8cc47010bb92fad342f")); Arrays.asList("e8b80ce01f631c840336792cfbf35254"));
executeTest("test genotype filter #2", spec); executeTest("test genotype filter #2", spec);
} }
} }