From 0a06fbdb943276055f201ce078a0f29a0c411683 Mon Sep 17 00:00:00 2001 From: rpoplin Date: Wed, 15 Sep 2010 16:45:03 +0000 Subject: [PATCH] Adding header lines to output of VR walkers to settle validator warnings. Command lines are added to the VCF header. GATK version numbers will be added to the header lines by Matt. git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@4288 348d0f76-0448-11de-a6fe-93d51630548a --- .../filters/VariantFiltrationWalker.java | 2 ++ .../TableRecalibrationWalker.java | 3 +- .../ApplyVariantCuts.java | 28 +++++++++++++------ .../GenerateVariantClustersWalker.java | 21 ++++++++++++-- .../VariantGaussianMixtureModel.java | 8 ++---- .../VariantRecalibrator.java | 19 +++++++++---- ...ntRecalibrationWalkersIntegrationTest.java | 15 +++++----- 7 files changed, 65 insertions(+), 31 deletions(-) diff --git a/java/src/org/broadinstitute/sting/gatk/walkers/filters/VariantFiltrationWalker.java b/java/src/org/broadinstitute/sting/gatk/walkers/filters/VariantFiltrationWalker.java index c17b94d3b..e3b59e87e 100755 --- a/java/src/org/broadinstitute/sting/gatk/walkers/filters/VariantFiltrationWalker.java +++ b/java/src/org/broadinstitute/sting/gatk/walkers/filters/VariantFiltrationWalker.java @@ -28,6 +28,7 @@ package org.broadinstitute.sting.gatk.walkers.filters; import org.broad.tribble.util.variantcontext.Genotype; import org.broad.tribble.util.variantcontext.VariantContext; import org.broad.tribble.vcf.*; +import org.broadinstitute.sting.commandline.Hidden; import org.broadinstitute.sting.gatk.contexts.AlignmentContext; import org.broadinstitute.sting.gatk.contexts.ReferenceContext; import org.broadinstitute.sting.gatk.contexts.variantcontext.VariantContextUtils; @@ -71,6 +72,7 @@ public class VariantFiltrationWalker extends RodWalker { @Argument(fullName="maskName", shortName="mask", doc="The text to put in the FILTER field if a 'mask' rod is provided and overlaps with a variant call; [default:'Mask']", required=false) protected String MASK_NAME = "Mask"; + @Hidden @Argument(fullName = "NO_HEADER", shortName = "NO_HEADER", doc = "Don't output the usual VCF header tag with the command line. FOR DEBUGGING PURPOSES ONLY. This option is required in order to pass integration tests.", required = false) protected Boolean NO_VCF_HEADER_LINE = false; diff --git a/java/src/org/broadinstitute/sting/gatk/walkers/recalibration/TableRecalibrationWalker.java b/java/src/org/broadinstitute/sting/gatk/walkers/recalibration/TableRecalibrationWalker.java index c1435f52e..96ed496f6 100644 --- a/java/src/org/broadinstitute/sting/gatk/walkers/recalibration/TableRecalibrationWalker.java +++ b/java/src/org/broadinstitute/sting/gatk/walkers/recalibration/TableRecalibrationWalker.java @@ -102,11 +102,12 @@ public class TableRecalibrationWalker extends ReadWalker { @Argument(fullName="fdr_filter_level", shortName="fdr_filter_level", doc="The FDR level at which to start filtering.", required=false) private double FDR_FILTER_LEVEL = 0.0; + ///////////////////////////// + // Debug Arguments + ///////////////////////////// + @Hidden + @Argument(fullName = "NO_HEADER", shortName = "NO_HEADER", doc = "Don't output the usual VCF header tag with the command line. FOR DEBUGGING PURPOSES ONLY. This option is required in order to pass integration tests.", required = false) + protected Boolean NO_VCF_HEADER_LINE = false; + ///////////////////////////// // Private Member Variables ///////////////////////////// @@ -162,15 +167,20 @@ public class ApplyVariantCuts extends RodWalker { // setup the header fields final Set hInfo = new HashSet(); hInfo.addAll(VCFUtils.getHeaderFields(getToolkit())); - hInfo.add(new VCFInfoHeaderLine("OQ", 1, VCFHeaderLineType.Float, "The original variant quality score")); - hInfo.add(new VCFHeaderLine("source", "ApplyVariantCuts")); + if( !NO_VCF_HEADER_LINE ) { + hInfo.add(new VCFHeaderLine("ApplyVariantCuts", "\"" + CommandLineUtils.createApproximateCommandLineArgumentString(getToolkit(), this) + "\"")); + } final TreeSet samples = new TreeSet(); samples.addAll(SampleUtils.getUniqueSamplesFromRods(getToolkit())); - - for( int iii = 1; iii < filterName.size(); iii++ ) { - hInfo.add(new VCFFilterHeaderLine(filterName.get(iii), String.format("FDR tranche level at qual " + qCuts.get(iii)))); + + if( filterName.size() >= 2 ) { + for( int iii = 0; iii < filterName.size() - 1; iii++ ) { + hInfo.add(new VCFFilterHeaderLine(filterName.get(iii), String.format("FDR tranche level at qual: " + qCuts.get(iii) + " <= x < " + qCuts.get(iii+1)))); + } + } + if( filterName.size() >= 1 ) { + hInfo.add(new VCFFilterHeaderLine(filterName.get(0)+"+", String.format("FDR tranche level at qual < " + qCuts.get(0)))); } - hInfo.add(new VCFFilterHeaderLine(filterName.get(0)+"+", String.format("FDR tranche level at qual > " + qCuts.get(filterName.size()-1)))); final VCFHeader vcfHeader = new VCFHeader(hInfo, samples); vcfWriter.writeHeader(vcfHeader); diff --git a/java/src/org/broadinstitute/sting/gatk/walkers/variantrecalibration/GenerateVariantClustersWalker.java b/java/src/org/broadinstitute/sting/gatk/walkers/variantrecalibration/GenerateVariantClustersWalker.java index 172bc0309..c347af681 100755 --- a/java/src/org/broadinstitute/sting/gatk/walkers/variantrecalibration/GenerateVariantClustersWalker.java +++ b/java/src/org/broadinstitute/sting/gatk/walkers/variantrecalibration/GenerateVariantClustersWalker.java @@ -27,6 +27,8 @@ package org.broadinstitute.sting.gatk.walkers.variantrecalibration; import org.broad.tribble.dbsnp.DbSNPFeature; import org.broad.tribble.util.variantcontext.VariantContext; +import org.broadinstitute.sting.commandline.CommandLineUtils; +import org.broadinstitute.sting.commandline.Hidden; import org.broadinstitute.sting.commandline.Output; import org.broadinstitute.sting.gatk.contexts.AlignmentContext; import org.broadinstitute.sting.gatk.contexts.ReferenceContext; @@ -76,7 +78,7 @@ public class GenerateVariantClustersWalker extends RodWalker(Arrays.asList(USE_ANNOTATIONS)); @@ -123,6 +133,11 @@ public class GenerateVariantClustersWalker extends RodWalker(Arrays.asList(IGNORE_INPUT_FILTERS)); } + if( !NO_HEADER_LINE ) { + CLUSTER_FILE.print("##GenerateVariantClusters = "); + CLUSTER_FILE.println("\"" + CommandLineUtils.createApproximateCommandLineArgumentString(getToolkit(), this) + "\""); + } + boolean foundDBSNP = false; for( ReferenceOrderedDataSource d : this.getToolkit().getRodDataSources() ) { if( d.getName().startsWith("input") ) { @@ -235,7 +250,7 @@ public class GenerateVariantClustersWalker extends RodWalker samples = new TreeSet(); hInfo.addAll(VCFUtils.getHeaderFields(getToolkit(), inputNames)); hInfo.add(new VCFInfoHeaderLine("OQ", 1, VCFHeaderLineType.Float, "The original variant quality score")); - hInfo.add(new VCFHeaderLine("source", "VariantRecalibrator")); + hInfo.add(new VCFInfoHeaderLine("LOD", 1, VCFHeaderLineType.Float, "The log odds ratio calculated by the VR algorithm which was turned into the phred scaled recalibrated quality score")); + if( !NO_VCF_HEADER_LINE ) { + hInfo.add(new VCFHeaderLine("VariantRecalibrator", "\"" + CommandLineUtils.createApproximateCommandLineArgumentString(getToolkit(), this) + "\"")); + } samples.addAll(SampleUtils.getUniqueSamplesFromRods(getToolkit(), inputNames)); final VCFHeader vcfHeader = new VCFHeader(hInfo, samples); diff --git a/java/test/org/broadinstitute/sting/gatk/walkers/variantrecalibration/VariantRecalibrationWalkersIntegrationTest.java b/java/test/org/broadinstitute/sting/gatk/walkers/variantrecalibration/VariantRecalibrationWalkersIntegrationTest.java index 75278b2a9..5cca8918f 100755 --- a/java/test/org/broadinstitute/sting/gatk/walkers/variantrecalibration/VariantRecalibrationWalkersIntegrationTest.java +++ b/java/test/org/broadinstitute/sting/gatk/walkers/variantrecalibration/VariantRecalibrationWalkersIntegrationTest.java @@ -23,6 +23,7 @@ public class VariantRecalibrationWalkersIntegrationTest extends WalkerTest { WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec( "-R " + b36KGReference + + " -NO_HEADER" + " --DBSNP " + GATKDataLocation + "dbsnp_129_b36.rod" + " -B:hapmap,VCF " + comparisonDataLocation + "Validated/HapMap/3.2/genotypes_r27_nr.b36_fwd.vcf" + " -weightDBSNP 0.2 -weightHapMap 1.0" + @@ -44,9 +45,9 @@ public class VariantRecalibrationWalkersIntegrationTest extends WalkerTest { public void testVariantRecalibrator() { HashMap> e = new HashMap>(); e.put( validationDataLocation + "yri.trio.gatk_glftrio.intersection.annotated.filtered.chr1.vcf", - Arrays.asList("9e4509be067284eefc31e91378faba6a", "038c31c5bb46a4df89b8ee69ec740812","7d42bbdfb69fdfb18cbda13a63d92602")); // Each test checks the md5 of three output files + Arrays.asList("0c6b5085a678b6312ab4bc8ce7b4eee4", "038c31c5bb46a4df89b8ee69ec740812","7d42bbdfb69fdfb18cbda13a63d92602")); // Each test checks the md5 of three output files e.put( validationDataLocation + "lowpass.N3.chr1.raw.vcf", - Arrays.asList("80243bd6731f6b0341fa1762c095a72e", "661360e85392af9c97e386399871854a","371e5a70a4006420737c5ab259e0e23e")); // Each test checks the md5 of three output files + Arrays.asList("bbdffb7fa611f4ae80e919cdf86b9bc6", "661360e85392af9c97e386399871854a","371e5a70a4006420737c5ab259e0e23e")); // Each test checks the md5 of three output files for ( Map.Entry> entry : e.entrySet() ) { String vcf = entry.getKey(); @@ -56,6 +57,7 @@ public class VariantRecalibrationWalkersIntegrationTest extends WalkerTest { if ( clusterFile != null ) { WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec( "-R " + b36KGReference + + " -NO_HEADER" + " --DBSNP " + GATKDataLocation + "dbsnp_129_b36.rod" + " -B:hapmap,VCF " + comparisonDataLocation + "Validated/HapMap/3.2/genotypes_r27_nr.b36_fwd.vcf" + " -T VariantRecalibrator" + @@ -69,22 +71,20 @@ public class VariantRecalibrationWalkersIntegrationTest extends WalkerTest { " -o %s" + " -tranchesFile %s" + " -reportDatFile %s", - 3, // two output file + 3, // three output files md5s); List result = executeTest("testVariantRecalibrator", spec).getFirst(); inputVCFFiles.put(vcf, result.get(0).getAbsolutePath()); tranchesFiles.put(vcf, result.get(1).getAbsolutePath()); } } - - } @Test public void testApplyVariantCuts() { HashMap e = new HashMap(); - e.put( validationDataLocation + "yri.trio.gatk_glftrio.intersection.annotated.filtered.chr1.vcf", "5fc641e291ad7330985ad94d4a347511" ); - e.put( validationDataLocation + "lowpass.N3.chr1.raw.vcf", "d9e00e6fe8269b3218880d2c084804c6" ); + e.put( validationDataLocation + "yri.trio.gatk_glftrio.intersection.annotated.filtered.chr1.vcf", "7429ed494131eb1dab5a9169cf65d1f0" ); + e.put( validationDataLocation + "lowpass.N3.chr1.raw.vcf", "ad8661cba3b04a7977c97a541fd8a668" ); for ( Map.Entry entry : e.entrySet() ) { String vcf = entry.getKey(); @@ -96,6 +96,7 @@ public class VariantRecalibrationWalkersIntegrationTest extends WalkerTest { if ( inputVCFFile != null && tranchesFile != null ) { WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec( "-R " + b36KGReference + + " -NO_HEADER" + " --DBSNP " + GATKDataLocation + "dbsnp_129_b36.rod" + " -T ApplyVariantCuts" + " -L 1:20,000,000-100,000,000" +