From 1931b2e1bdb804dfdb5daa0c497c7b4bc3f976ba Mon Sep 17 00:00:00 2001 From: rpoplin Date: Fri, 24 Sep 2010 13:52:56 +0000 Subject: [PATCH] Three fixes for VariantFiltrationWalker: Trying to filter an empty VCF file will produce a well-formed VCF file with zero records instead of a blank file, needed for pipelines. The first record's genotype info fields are now in the same order as all the others. The VCF header lines are pulled from just the input variant rod instead of from all rods. git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@4341 348d0f76-0448-11de-a6fe-93d51630548a --- .../filters/VariantFiltrationWalker.java | 29 +++++++++---------- .../VariantFiltrationIntegrationTest.java | 16 +++++----- 2 files changed, 22 insertions(+), 23 deletions(-) diff --git a/java/src/org/broadinstitute/sting/gatk/walkers/filters/VariantFiltrationWalker.java b/java/src/org/broadinstitute/sting/gatk/walkers/filters/VariantFiltrationWalker.java index 671e9e4e3..0227ac1ad 100755 --- a/java/src/org/broadinstitute/sting/gatk/walkers/filters/VariantFiltrationWalker.java +++ b/java/src/org/broadinstitute/sting/gatk/walkers/filters/VariantFiltrationWalker.java @@ -39,6 +39,7 @@ import org.broadinstitute.sting.gatk.walkers.*; import org.broadinstitute.sting.commandline.Argument; import org.broadinstitute.sting.commandline.CommandLineUtils; import org.broadinstitute.sting.commandline.Output; +import org.broadinstitute.sting.utils.SampleUtils; import org.broadinstitute.sting.utils.vcf.VCFUtils; import java.util.*; @@ -76,6 +77,7 @@ public class VariantFiltrationWalker extends RodWalker { List filterExps; List genotypeFilterExps; + public static final String INPUT_VARIANT_ROD_BINDING_NAME = "variant"; public static final String CLUSTERED_SNP_FILTER_NAME = "SnpCluster"; private ClusteredSnps clusteredSNPs = null; @@ -83,12 +85,15 @@ public class VariantFiltrationWalker extends RodWalker { private FiltrationContextWindow variantContextWindow; private static final int windowSize = 10; // 10 variants on either end of the current one private ArrayList windowInitializer = new ArrayList(); - private boolean wroteHeader = false; - private void initializeVcfWriter(VariantContext vc) { + private void initializeVcfWriter() { + + final ArrayList inputNames = new ArrayList(); + inputNames.add( INPUT_VARIANT_ROD_BINDING_NAME ); + // setup the header fields Set hInfo = new HashSet(); - hInfo.addAll(VCFUtils.getHeaderFields(getToolkit())); + hInfo.addAll(VCFUtils.getHeaderFields(getToolkit(), inputNames)); if ( clusterWindow > 0 ) hInfo.add(new VCFFilterHeaderLine(CLUSTERED_SNP_FILTER_NAME, "SNPs found in clusters")); @@ -109,7 +114,7 @@ public class VariantFiltrationWalker extends RodWalker { } } - writer.writeHeader(new VCFHeader(hInfo, vc.getSampleNames())); + writer.writeHeader(new VCFHeader(hInfo, SampleUtils.getUniqueSamplesFromRods(getToolkit(), inputNames))); } public void initialize() { @@ -120,6 +125,8 @@ public class VariantFiltrationWalker extends RodWalker { genotypeFilterExps = VariantContextUtils.initializeMatchExps(GENOTYPE_FILTER_NAMES, GENOTYPE_FILTER_EXPS); VariantContextUtils.engine.setSilent(true); + + initializeVcfWriter(); } public Integer reduceInit() { return 0; } @@ -136,12 +143,12 @@ public class VariantFiltrationWalker extends RodWalker { if ( tracker == null ) return 0; - List rods = tracker.getReferenceMetaData("variant"); + List rods = tracker.getReferenceMetaData( INPUT_VARIANT_ROD_BINDING_NAME ); // ignore places where we don't have a variant if ( rods.size() == 0 ) return 0; - VariantContext vc = VariantContextAdaptors.toVariantContext("variant", rods.get(0), ref); + VariantContext vc = VariantContextAdaptors.toVariantContext( INPUT_VARIANT_ROD_BINDING_NAME, rods.get(0), ref ); FiltrationContext varContext = new FiltrationContext(tracker, ref, vc); // if we're still initializing the context, do so @@ -216,15 +223,7 @@ public class VariantFiltrationWalker extends RodWalker { else filteredVC = new VariantContext(vc.getName(), vc.getChr(), vc.getStart(), vc.getEnd(), vc.getAlleles(), genotypes, vc.getNegLog10PError(), filters, vc.getAttributes()); - writeVCF(filteredVC, context.getReferenceContext().getBase()); - } - - private void writeVCF(VariantContext vc, byte ref) { - if ( !wroteHeader ) { - initializeVcfWriter(vc); - wroteHeader = true; - } - writer.add(vc, ref); + writer.add( filteredVC, context.getReferenceContext().getBase() ); } public Integer reduce(Integer value, Integer sum) { diff --git a/java/test/org/broadinstitute/sting/gatk/walkers/filters/VariantFiltrationIntegrationTest.java b/java/test/org/broadinstitute/sting/gatk/walkers/filters/VariantFiltrationIntegrationTest.java index 8478fdf11..4d722f5ea 100755 --- a/java/test/org/broadinstitute/sting/gatk/walkers/filters/VariantFiltrationIntegrationTest.java +++ b/java/test/org/broadinstitute/sting/gatk/walkers/filters/VariantFiltrationIntegrationTest.java @@ -16,7 +16,7 @@ public class VariantFiltrationIntegrationTest extends WalkerTest { public void testNoAction() { WalkerTestSpec spec = new WalkerTestSpec( baseTestString() + " -B:variant,VCF " + validationDataLocation + "vcfexample2.vcf -L 1:10,020,000-10,021,000", 1, - Arrays.asList("a08a88866aac0ec4a844386bea5c585f")); + Arrays.asList("1faa4662400a1128b94d5f93d46eee1d")); executeTest("test no action", spec); } @@ -24,7 +24,7 @@ public class VariantFiltrationIntegrationTest extends WalkerTest { public void testClusteredSnps() { WalkerTestSpec spec = new WalkerTestSpec( baseTestString() + " -window 10 -B:variant,VCF " + validationDataLocation + "vcfexample2.vcf -L 1:10,020,000-10,021,000", 1, - Arrays.asList("59f0f365550cc01e0fdef65e98963826")); + Arrays.asList("f09454e99562d839e5117f992f4eebef")); executeTest("test clustered SNPs", spec); } @@ -32,7 +32,7 @@ public class VariantFiltrationIntegrationTest extends WalkerTest { public void testMask() { WalkerTestSpec spec = new WalkerTestSpec( baseTestString() + " -mask foo -B:mask,VCF " + validationDataLocation + "vcfexample2.vcf -B:variant,VCF " + validationDataLocation + "vcfexample2.vcf -L 1:10,020,000-10,021,000", 1, - Arrays.asList("cb67d20027e4e0cb45544a69ff49476e")); + Arrays.asList("4b1d370e60c2bc0c763886c3adff473a")); executeTest("test mask", spec); } @@ -40,7 +40,7 @@ public class VariantFiltrationIntegrationTest extends WalkerTest { public void testFilter1() { WalkerTestSpec spec = new WalkerTestSpec( baseTestString() + " -filter 'DoC < 20 || FisherStrand > 20.0' -filterName foo -B:variant,VCF " + validationDataLocation + "vcfexample2.vcf -L 1:10,020,000-10,021,000", 1, - Arrays.asList("1fdccdb8ca837d5fc7a619d285e2308a")); + Arrays.asList("3ea5495c6eace03971cb4c6cd1f84af4")); executeTest("test filter #1", spec); } @@ -48,7 +48,7 @@ public class VariantFiltrationIntegrationTest extends WalkerTest { public void testFilter2() { WalkerTestSpec spec = new WalkerTestSpec( baseTestString() + " -filter 'AlleleBalance < 70.0 && FisherStrand == 1.4' -filterName bar -B:variant,VCF " + validationDataLocation + "vcfexample2.vcf -L 1:10,020,000-10,021,000", 1, - Arrays.asList("40fdd0321091402a669d7e2eaadf072a")); + Arrays.asList("b45f3ac20397ab4b64c963ce67cf9cca")); executeTest("test filter #2", spec); } @@ -56,7 +56,7 @@ public class VariantFiltrationIntegrationTest extends WalkerTest { public void testFilterWithSeparateNames() { WalkerTestSpec spec = new WalkerTestSpec( baseTestString() + " --filterName ABF -filter 'AlleleBalance < 0.7' --filterName FSF -filter 'FisherStrand == 1.4' -B:variant,VCF " + validationDataLocation + "vcfexample2.vcf -L 1:10,020,000-10,021,000", 1, - Arrays.asList("a017fbbb6d610481b174c53d29b1ae5a")); + Arrays.asList("d2484ea8f1d1b12b5586e9bab518d7fa")); executeTest("test filter with separate names #2", spec); } @@ -64,7 +64,7 @@ public class VariantFiltrationIntegrationTest extends WalkerTest { public void testGenotypeFilter1() { WalkerTestSpec spec = new WalkerTestSpec( baseTestString() + " -G_filter 'GQ == 0.60' -G_filterName foo -B:variant,VCF " + validationDataLocation + "vcfexample2.vcf -L 1:10,020,000-10,021,000", 1, - Arrays.asList("1a200b0e47cac16d1dfd8ce44484c667")); + Arrays.asList("76a37ff57ae5c93f5abf341203f7af2c")); executeTest("test genotype filter #1", spec); } @@ -72,7 +72,7 @@ public class VariantFiltrationIntegrationTest extends WalkerTest { public void testGenotypeFilter2() { WalkerTestSpec spec = new WalkerTestSpec( baseTestString() + " -G_filter 'AF == 0.04 && isHomVar == 1' -G_filterName foo -B:variant,VCF " + validationDataLocation + "vcfexample2.vcf -L 1:10,020,000-10,021,000", 1, - Arrays.asList("78315a09eb3ac8cc47010bb92fad342f")); + Arrays.asList("e8b80ce01f631c840336792cfbf35254")); executeTest("test genotype filter #2", spec); } } \ No newline at end of file