Three fixes for VariantFiltrationWalker: Trying to filter an empty VCF file will produce a well-formed VCF file with zero records instead of a blank file, needed for pipelines. The first record's genotype info fields are now in the same order as all the others. The VCF header lines are pulled from just the input variant rod instead of from all rods.
git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@4341 348d0f76-0448-11de-a6fe-93d51630548a
This commit is contained in:
parent
c355afc320
commit
1931b2e1bd
|
|
@ -39,6 +39,7 @@ import org.broadinstitute.sting.gatk.walkers.*;
|
|||
import org.broadinstitute.sting.commandline.Argument;
|
||||
import org.broadinstitute.sting.commandline.CommandLineUtils;
|
||||
import org.broadinstitute.sting.commandline.Output;
|
||||
import org.broadinstitute.sting.utils.SampleUtils;
|
||||
import org.broadinstitute.sting.utils.vcf.VCFUtils;
|
||||
|
||||
import java.util.*;
|
||||
|
|
@ -76,6 +77,7 @@ public class VariantFiltrationWalker extends RodWalker<Integer, Integer> {
|
|||
List<VariantContextUtils.JexlVCMatchExp> filterExps;
|
||||
List<VariantContextUtils.JexlVCMatchExp> genotypeFilterExps;
|
||||
|
||||
public static final String INPUT_VARIANT_ROD_BINDING_NAME = "variant";
|
||||
public static final String CLUSTERED_SNP_FILTER_NAME = "SnpCluster";
|
||||
private ClusteredSnps clusteredSNPs = null;
|
||||
|
||||
|
|
@ -83,12 +85,15 @@ public class VariantFiltrationWalker extends RodWalker<Integer, Integer> {
|
|||
private FiltrationContextWindow variantContextWindow;
|
||||
private static final int windowSize = 10; // 10 variants on either end of the current one
|
||||
private ArrayList<FiltrationContext> windowInitializer = new ArrayList<FiltrationContext>();
|
||||
private boolean wroteHeader = false;
|
||||
|
||||
private void initializeVcfWriter(VariantContext vc) {
|
||||
private void initializeVcfWriter() {
|
||||
|
||||
final ArrayList<String> inputNames = new ArrayList<String>();
|
||||
inputNames.add( INPUT_VARIANT_ROD_BINDING_NAME );
|
||||
|
||||
// setup the header fields
|
||||
Set<VCFHeaderLine> hInfo = new HashSet<VCFHeaderLine>();
|
||||
hInfo.addAll(VCFUtils.getHeaderFields(getToolkit()));
|
||||
hInfo.addAll(VCFUtils.getHeaderFields(getToolkit(), inputNames));
|
||||
|
||||
if ( clusterWindow > 0 )
|
||||
hInfo.add(new VCFFilterHeaderLine(CLUSTERED_SNP_FILTER_NAME, "SNPs found in clusters"));
|
||||
|
|
@ -109,7 +114,7 @@ public class VariantFiltrationWalker extends RodWalker<Integer, Integer> {
|
|||
}
|
||||
}
|
||||
|
||||
writer.writeHeader(new VCFHeader(hInfo, vc.getSampleNames()));
|
||||
writer.writeHeader(new VCFHeader(hInfo, SampleUtils.getUniqueSamplesFromRods(getToolkit(), inputNames)));
|
||||
}
|
||||
|
||||
public void initialize() {
|
||||
|
|
@ -120,6 +125,8 @@ public class VariantFiltrationWalker extends RodWalker<Integer, Integer> {
|
|||
genotypeFilterExps = VariantContextUtils.initializeMatchExps(GENOTYPE_FILTER_NAMES, GENOTYPE_FILTER_EXPS);
|
||||
|
||||
VariantContextUtils.engine.setSilent(true);
|
||||
|
||||
initializeVcfWriter();
|
||||
}
|
||||
|
||||
public Integer reduceInit() { return 0; }
|
||||
|
|
@ -136,12 +143,12 @@ public class VariantFiltrationWalker extends RodWalker<Integer, Integer> {
|
|||
if ( tracker == null )
|
||||
return 0;
|
||||
|
||||
List<Object> rods = tracker.getReferenceMetaData("variant");
|
||||
List<Object> rods = tracker.getReferenceMetaData( INPUT_VARIANT_ROD_BINDING_NAME );
|
||||
// ignore places where we don't have a variant
|
||||
if ( rods.size() == 0 )
|
||||
return 0;
|
||||
|
||||
VariantContext vc = VariantContextAdaptors.toVariantContext("variant", rods.get(0), ref);
|
||||
VariantContext vc = VariantContextAdaptors.toVariantContext( INPUT_VARIANT_ROD_BINDING_NAME, rods.get(0), ref );
|
||||
FiltrationContext varContext = new FiltrationContext(tracker, ref, vc);
|
||||
|
||||
// if we're still initializing the context, do so
|
||||
|
|
@ -216,15 +223,7 @@ public class VariantFiltrationWalker extends RodWalker<Integer, Integer> {
|
|||
else
|
||||
filteredVC = new VariantContext(vc.getName(), vc.getChr(), vc.getStart(), vc.getEnd(), vc.getAlleles(), genotypes, vc.getNegLog10PError(), filters, vc.getAttributes());
|
||||
|
||||
writeVCF(filteredVC, context.getReferenceContext().getBase());
|
||||
}
|
||||
|
||||
private void writeVCF(VariantContext vc, byte ref) {
|
||||
if ( !wroteHeader ) {
|
||||
initializeVcfWriter(vc);
|
||||
wroteHeader = true;
|
||||
}
|
||||
writer.add(vc, ref);
|
||||
writer.add( filteredVC, context.getReferenceContext().getBase() );
|
||||
}
|
||||
|
||||
public Integer reduce(Integer value, Integer sum) {
|
||||
|
|
|
|||
|
|
@ -16,7 +16,7 @@ public class VariantFiltrationIntegrationTest extends WalkerTest {
|
|||
public void testNoAction() {
|
||||
WalkerTestSpec spec = new WalkerTestSpec(
|
||||
baseTestString() + " -B:variant,VCF " + validationDataLocation + "vcfexample2.vcf -L 1:10,020,000-10,021,000", 1,
|
||||
Arrays.asList("a08a88866aac0ec4a844386bea5c585f"));
|
||||
Arrays.asList("1faa4662400a1128b94d5f93d46eee1d"));
|
||||
executeTest("test no action", spec);
|
||||
}
|
||||
|
||||
|
|
@ -24,7 +24,7 @@ public class VariantFiltrationIntegrationTest extends WalkerTest {
|
|||
public void testClusteredSnps() {
|
||||
WalkerTestSpec spec = new WalkerTestSpec(
|
||||
baseTestString() + " -window 10 -B:variant,VCF " + validationDataLocation + "vcfexample2.vcf -L 1:10,020,000-10,021,000", 1,
|
||||
Arrays.asList("59f0f365550cc01e0fdef65e98963826"));
|
||||
Arrays.asList("f09454e99562d839e5117f992f4eebef"));
|
||||
executeTest("test clustered SNPs", spec);
|
||||
}
|
||||
|
||||
|
|
@ -32,7 +32,7 @@ public class VariantFiltrationIntegrationTest extends WalkerTest {
|
|||
public void testMask() {
|
||||
WalkerTestSpec spec = new WalkerTestSpec(
|
||||
baseTestString() + " -mask foo -B:mask,VCF " + validationDataLocation + "vcfexample2.vcf -B:variant,VCF " + validationDataLocation + "vcfexample2.vcf -L 1:10,020,000-10,021,000", 1,
|
||||
Arrays.asList("cb67d20027e4e0cb45544a69ff49476e"));
|
||||
Arrays.asList("4b1d370e60c2bc0c763886c3adff473a"));
|
||||
executeTest("test mask", spec);
|
||||
}
|
||||
|
||||
|
|
@ -40,7 +40,7 @@ public class VariantFiltrationIntegrationTest extends WalkerTest {
|
|||
public void testFilter1() {
|
||||
WalkerTestSpec spec = new WalkerTestSpec(
|
||||
baseTestString() + " -filter 'DoC < 20 || FisherStrand > 20.0' -filterName foo -B:variant,VCF " + validationDataLocation + "vcfexample2.vcf -L 1:10,020,000-10,021,000", 1,
|
||||
Arrays.asList("1fdccdb8ca837d5fc7a619d285e2308a"));
|
||||
Arrays.asList("3ea5495c6eace03971cb4c6cd1f84af4"));
|
||||
executeTest("test filter #1", spec);
|
||||
}
|
||||
|
||||
|
|
@ -48,7 +48,7 @@ public class VariantFiltrationIntegrationTest extends WalkerTest {
|
|||
public void testFilter2() {
|
||||
WalkerTestSpec spec = new WalkerTestSpec(
|
||||
baseTestString() + " -filter 'AlleleBalance < 70.0 && FisherStrand == 1.4' -filterName bar -B:variant,VCF " + validationDataLocation + "vcfexample2.vcf -L 1:10,020,000-10,021,000", 1,
|
||||
Arrays.asList("40fdd0321091402a669d7e2eaadf072a"));
|
||||
Arrays.asList("b45f3ac20397ab4b64c963ce67cf9cca"));
|
||||
executeTest("test filter #2", spec);
|
||||
}
|
||||
|
||||
|
|
@ -56,7 +56,7 @@ public class VariantFiltrationIntegrationTest extends WalkerTest {
|
|||
public void testFilterWithSeparateNames() {
|
||||
WalkerTestSpec spec = new WalkerTestSpec(
|
||||
baseTestString() + " --filterName ABF -filter 'AlleleBalance < 0.7' --filterName FSF -filter 'FisherStrand == 1.4' -B:variant,VCF " + validationDataLocation + "vcfexample2.vcf -L 1:10,020,000-10,021,000", 1,
|
||||
Arrays.asList("a017fbbb6d610481b174c53d29b1ae5a"));
|
||||
Arrays.asList("d2484ea8f1d1b12b5586e9bab518d7fa"));
|
||||
executeTest("test filter with separate names #2", spec);
|
||||
}
|
||||
|
||||
|
|
@ -64,7 +64,7 @@ public class VariantFiltrationIntegrationTest extends WalkerTest {
|
|||
public void testGenotypeFilter1() {
|
||||
WalkerTestSpec spec = new WalkerTestSpec(
|
||||
baseTestString() + " -G_filter 'GQ == 0.60' -G_filterName foo -B:variant,VCF " + validationDataLocation + "vcfexample2.vcf -L 1:10,020,000-10,021,000", 1,
|
||||
Arrays.asList("1a200b0e47cac16d1dfd8ce44484c667"));
|
||||
Arrays.asList("76a37ff57ae5c93f5abf341203f7af2c"));
|
||||
executeTest("test genotype filter #1", spec);
|
||||
}
|
||||
|
||||
|
|
@ -72,7 +72,7 @@ public class VariantFiltrationIntegrationTest extends WalkerTest {
|
|||
public void testGenotypeFilter2() {
|
||||
WalkerTestSpec spec = new WalkerTestSpec(
|
||||
baseTestString() + " -G_filter 'AF == 0.04 && isHomVar == 1' -G_filterName foo -B:variant,VCF " + validationDataLocation + "vcfexample2.vcf -L 1:10,020,000-10,021,000", 1,
|
||||
Arrays.asList("78315a09eb3ac8cc47010bb92fad342f"));
|
||||
Arrays.asList("e8b80ce01f631c840336792cfbf35254"));
|
||||
executeTest("test genotype filter #2", spec);
|
||||
}
|
||||
}
|
||||
Loading…
Reference in New Issue