Adding command-line header to VA and VF

git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@3974 348d0f76-0448-11de-a6fe-93d51630548a
This commit is contained in:
ebanks 2010-08-08 05:21:15 +00:00
parent 64446f0ddf
commit bd6d5a8d51
6 changed files with 50 additions and 34 deletions

View File

@ -43,6 +43,7 @@ import org.broadinstitute.sting.utils.BaseUtils;
import org.broadinstitute.sting.utils.classloader.PackageUtils;
import org.broadinstitute.sting.utils.SampleUtils;
import org.broadinstitute.sting.commandline.Argument;
import org.broadinstitute.sting.commandline.CommandLineUtils;
import org.broadinstitute.sting.utils.genotype.vcf.VCFUtils;
import org.broadinstitute.sting.utils.genotype.vcf.VCFWriter;
@ -62,10 +63,10 @@ public class VariantAnnotator extends RodWalker<Integer, Integer> {
protected String sampleName = null;
@Argument(fullName="annotation", shortName="A", doc="One or more specific annotations to apply to variant calls", required=false)
protected String[] annotationsToUse = {};
protected List<String> annotationsToUse = new ArrayList<String>();
@Argument(fullName="group", shortName="G", doc="One or more classes/groups of annotations to apply to variant calls", required=false)
protected String[] annotationGroupsToUse = {};
protected List<String> annotationGroupsToUse = new ArrayList<String>();
@Argument(fullName="useAllAnnotations", shortName="all", doc="Use all possible annotations (not for the faint of heart)", required=false)
protected Boolean USE_ALL_ANNOTATIONS = false;
@ -79,6 +80,9 @@ public class VariantAnnotator extends RodWalker<Integer, Integer> {
@Argument(fullName="vcfContainsOnlyIndels", shortName="dels",doc="Use if you are annotating an indel vcf, currently VERY experimental", required = false)
protected boolean indelsOnly = false;
@Argument(fullName = "NO_HEADER", shortName = "NO_HEADER", doc = "Don't output the usual VCF header tag with the command line. FOR DEBUGGING PURPOSES ONLY. This option is required in order to pass integration tests.", required = false)
protected Boolean NO_VCF_HEADER_LINE = false;
private VCFWriter vcfWriter;
private HashMap<String, String> nonVCFsampleName = new HashMap<String, String>();
@ -139,11 +143,15 @@ public class VariantAnnotator extends RodWalker<Integer, Integer> {
// note that if any of the definitions conflict with our new ones, then we want to overwrite the old ones
Set<VCFHeaderLine> hInfo = new HashSet<VCFHeaderLine>();
hInfo.addAll(engine.getVCFAnnotationDescriptions());
hInfo.add(new VCFHeaderLine("source", "VariantAnnotator"));
for ( VCFHeaderLine line : VCFUtils.getHeaderFields(getToolkit(), Arrays.asList("variant")) ) {
if ( isUniqueHeaderLine(line, hInfo) )
hInfo.add(line);
}
if ( !NO_VCF_HEADER_LINE ) {
Set<Object> args = new HashSet<Object>();
args.add(this);
hInfo.add(new VCFHeaderLine("VariantAnnotator", "\"" + CommandLineUtils.createApproximateCommandLineArgumentString(getToolkit(), args, getClass()) + "\""));
}
vcfWriter = new VCFWriter(out);
VCFHeader vcfHeader = new VCFHeader(hInfo, samples);

View File

@ -88,7 +88,7 @@ public class VariantAnnotatorEngine {
}
// use this constructor if you want to select specific annotations (and/or interfaces)
public VariantAnnotatorEngine(GenomeAnalysisEngine engine, String[] annotationGroupsToUse, String[] annotationsToUse) {
public VariantAnnotatorEngine(GenomeAnalysisEngine engine, List<String> annotationGroupsToUse, List<String> annotationsToUse) {
// create a map for all annotation classes which implement our top-level interfaces
HashMap<String, Class> classMap = new HashMap<String, Class>();

View File

@ -37,6 +37,7 @@ import org.broadinstitute.sting.gatk.refdata.ReferenceOrderedDatum;
import org.broadinstitute.sting.gatk.refdata.VariantContextAdaptors;
import org.broadinstitute.sting.gatk.walkers.*;
import org.broadinstitute.sting.commandline.Argument;
import org.broadinstitute.sting.commandline.CommandLineUtils;
import org.broadinstitute.sting.utils.genotype.vcf.*;
import org.broadinstitute.sting.utils.genotype.vcf.VCFWriter;
@ -51,14 +52,14 @@ import java.util.*;
public class VariantFiltrationWalker extends RodWalker<Integer, Integer> {
@Argument(fullName="filterExpression", shortName="filter", doc="One or more expression used with INFO fields to filter (see wiki docs for more info)", required=false)
protected String[] FILTER_EXPS = new String[]{};
protected ArrayList<String> FILTER_EXPS = new ArrayList<String>();
@Argument(fullName="filterName", shortName="filterName", doc="Names to use for the list of filters (must be a 1-to-1 mapping); this name is put in the FILTER field for variants that get filtered", required=false)
protected String[] FILTER_NAMES = new String[]{};
protected ArrayList<String> FILTER_NAMES = new ArrayList<String>();
@Argument(fullName="genotypeFilterExpression", shortName="G_filter", doc="One or more expression used with FORMAT (sample/genotype-level) fields to filter (see wiki docs for more info)", required=false)
protected String[] GENOTYPE_FILTER_EXPS = new String[]{};
protected ArrayList<String> GENOTYPE_FILTER_EXPS = new ArrayList<String>();
@Argument(fullName="genotypeFilterName", shortName="G_filterName", doc="Names to use for the list of sample/genotype filters (must be a 1-to-1 mapping); this name is put in the FILTER field for variants that get filtered", required=false)
protected String[] GENOTYPE_FILTER_NAMES = new String[]{};
protected ArrayList<String> GENOTYPE_FILTER_NAMES = new ArrayList<String>();
@Argument(fullName="clusterSize", shortName="cluster", doc="The number of SNPs which make up a cluster (see also --clusterWindowSize); [default:3]", required=false)
protected Integer clusterSize = 3;
@ -68,6 +69,9 @@ public class VariantFiltrationWalker extends RodWalker<Integer, Integer> {
@Argument(fullName="maskName", shortName="mask", doc="The text to put in the FILTER field if a 'mask' rod is provided and overlaps with a variant call; [default:'Mask']", required=false)
protected String MASK_NAME = "Mask";
@Argument(fullName = "NO_HEADER", shortName = "NO_HEADER", doc = "Don't output the usual VCF header tag with the command line. FOR DEBUGGING PURPOSES ONLY. This option is required in order to pass integration tests.", required = false)
protected Boolean NO_VCF_HEADER_LINE = false;
// JEXL expressions for the filters
List<VariantContextUtils.JexlVCMatchExp> filterExps;
List<VariantContextUtils.JexlVCMatchExp> genotypeFilterExps;
@ -88,8 +92,6 @@ public class VariantFiltrationWalker extends RodWalker<Integer, Integer> {
// setup the header fields
Set<VCFHeaderLine> hInfo = new HashSet<VCFHeaderLine>();
hInfo.addAll(VCFUtils.getHeaderFields(getToolkit()));
hInfo.add(new VCFHeaderLine("source", "VariantFiltration"));
hInfo.add(new VCFHeaderLine("reference", getToolkit().getArguments().referenceFile.getName()));
if ( clusterWindow > 0 )
hInfo.add(new VCFFilterHeaderLine(CLUSTERED_SNP_FILTER_NAME, "SNPs found in clusters"));
@ -110,6 +112,12 @@ public class VariantFiltrationWalker extends RodWalker<Integer, Integer> {
}
}
if ( !NO_VCF_HEADER_LINE ) {
Set<Object> args = new HashSet<Object>();
args.add(this);
hInfo.add(new VCFHeaderLine("VariantFiltration", "\"" + CommandLineUtils.createApproximateCommandLineArgumentString(getToolkit(), args, getClass()) + "\""));
}
writer = new VCFWriter(out);
writer.writeHeader(new VCFHeader(hInfo, new TreeSet<String>(vc.getSampleNames())));
}

View File

@ -61,7 +61,7 @@ public class UnifiedGenotyper extends LocusWalker<VariantCallContext, UnifiedGen
protected PrintStream metricsWriter = null;
@Argument(fullName="annotation", shortName="A", doc="One or more specific annotations to apply to variant calls", required=false)
protected String[] annotationsToUse = {};
protected List<String> annotationsToUse = new ArrayList<String>();
@Argument(fullName="group", shortName="G", doc="One or more classes/groups of annotations to apply to variant calls", required=false)
protected String[] annotationClassesToUse = { "Standard" };
@ -112,7 +112,7 @@ public class UnifiedGenotyper extends LocusWalker<VariantCallContext, UnifiedGen
**/
public void initialize() {
annotationEngine = new VariantAnnotatorEngine(getToolkit(), annotationClassesToUse, annotationsToUse);
annotationEngine = new VariantAnnotatorEngine(getToolkit(), Arrays.asList(annotationClassesToUse), annotationsToUse);
UG_engine = new UnifiedGenotyperEngine(getToolkit(), UAC, logger, writer, verboseWriter, annotationEngine);
// initialize the writers

View File

@ -8,14 +8,14 @@ import java.util.Arrays;
public class VariantAnnotatorIntegrationTest extends WalkerTest {
public static String baseTestString() {
return "-T VariantAnnotator -R " + b36KGReference + " -o %s";
return "-T VariantAnnotator -R " + b36KGReference + " -NO_HEADER -o %s";
}
@Test
public void testHasAnnotsNotAsking1() {
WalkerTestSpec spec = new WalkerTestSpec(
baseTestString() + " -B variant,VCF," + validationDataLocation + "vcfexample2.vcf -I " + validationDataLocation + "low_coverage_CEU.chr1.10k-11k.bam -L 1:10,020,000-10,021,000", 1,
Arrays.asList("989ff3afb1384b3c6c8a284b11ebb228"));
Arrays.asList("89b846266a0c565b9d2a7dbe793546aa"));
executeTest("test file has annotations, not asking for annotations, #1", spec);
}
@ -23,7 +23,7 @@ public class VariantAnnotatorIntegrationTest extends WalkerTest {
public void testHasAnnotsNotAsking2() {
WalkerTestSpec spec = new WalkerTestSpec(
baseTestString() + " -B variant,VCF," + validationDataLocation + "vcfexample3.vcf -I " + validationDataLocation + "NA12878.1kg.p2.chr1_10mb_11_mb.SLX.bam -L 1:10,000,000-10,050,000", 1,
Arrays.asList("5c58506847ddf85bfe75c3cf3babb669"));
Arrays.asList("14e546fa7e819faeda3227c842fa92e7"));
executeTest("test file has annotations, not asking for annotations, #2", spec);
}
@ -31,7 +31,7 @@ public class VariantAnnotatorIntegrationTest extends WalkerTest {
public void testHasAnnotsAsking1() {
WalkerTestSpec spec = new WalkerTestSpec(
baseTestString() + " -G \"Standard\" -B variant,VCF," + validationDataLocation + "vcfexample2.vcf -I " + validationDataLocation + "low_coverage_CEU.chr1.10k-11k.bam -L 1:10,020,000-10,021,000", 1,
Arrays.asList("2f2afe80e52542ac8393526061913040"));
Arrays.asList("c2a52baffc7254f222697f0f274bddfa"));
executeTest("test file has annotations, asking for annotations, #1", spec);
}
@ -39,7 +39,7 @@ public class VariantAnnotatorIntegrationTest extends WalkerTest {
public void testHasAnnotsAsking2() {
WalkerTestSpec spec = new WalkerTestSpec(
baseTestString() + " -G \"Standard\" -B variant,VCF," + validationDataLocation + "vcfexample3.vcf -I " + validationDataLocation + "NA12878.1kg.p2.chr1_10mb_11_mb.SLX.bam -L 1:10,000,000-10,050,000", 1,
Arrays.asList("cd05490993b1b3aaddbe75a997942ea0"));
Arrays.asList("d9ec651bf6fa225f684719038f1c48c9"));
executeTest("test file has annotations, asking for annotations, #2", spec);
}
@ -47,7 +47,7 @@ public class VariantAnnotatorIntegrationTest extends WalkerTest {
public void testNoAnnotsNotAsking1() {
WalkerTestSpec spec = new WalkerTestSpec(
baseTestString() + " -B variant,VCF," + validationDataLocation + "vcfexample2empty.vcf -I " + validationDataLocation + "low_coverage_CEU.chr1.10k-11k.bam -L 1:10,020,000-10,021,000", 1,
Arrays.asList("3712b2901bece15094c1eb468dfdc5a8"));
Arrays.asList("34d2831e7d843093a2cf47c1f4e5f0f0"));
executeTest("test file doesn't have annotations, not asking for annotations, #1", spec);
}
@ -55,7 +55,7 @@ public class VariantAnnotatorIntegrationTest extends WalkerTest {
public void testNoAnnotsNotAsking2() {
WalkerTestSpec spec = new WalkerTestSpec(
baseTestString() + " -B variant,VCF," + validationDataLocation + "vcfexample3empty.vcf -I " + validationDataLocation + "NA12878.1kg.p2.chr1_10mb_11_mb.SLX.bam -L 1:10,000,000-10,050,000", 1,
Arrays.asList("b7b0e9f3f4f25fd41f388a736dd7b3b8"));
Arrays.asList("20b946924fffdf6700cd029424fb72b5"));
executeTest("test file doesn't have annotations, not asking for annotations, #2", spec);
}
@ -63,7 +63,7 @@ public class VariantAnnotatorIntegrationTest extends WalkerTest {
public void testNoAnnotsAsking1() {
WalkerTestSpec spec = new WalkerTestSpec(
baseTestString() + " -G \"Standard\" -B variant,VCF," + validationDataLocation + "vcfexample2empty.vcf -I " + validationDataLocation + "low_coverage_CEU.chr1.10k-11k.bam -L 1:10,020,000-10,021,000", 1,
Arrays.asList("8ce6560869906e7b179bc20349d69892"));
Arrays.asList("05658e763022cbfdcef4d2e7c0b02b4e"));
executeTest("test file doesn't have annotations, asking for annotations, #1", spec);
}
@ -71,7 +71,7 @@ public class VariantAnnotatorIntegrationTest extends WalkerTest {
public void testNoAnnotsAsking2() {
WalkerTestSpec spec = new WalkerTestSpec(
baseTestString() + " -G \"Standard\" -B variant,VCF," + validationDataLocation + "vcfexample3empty.vcf -I " + validationDataLocation + "NA12878.1kg.p2.chr1_10mb_11_mb.SLX.bam -L 1:10,000,000-10,050,000", 1,
Arrays.asList("423b382952fdd8c85b6ed9c0b8cdd172"));
Arrays.asList("d11ff0ac631f214b6023ab0232be4b14"));
executeTest("test file doesn't have annotations, asking for annotations, #2", spec);
}
@ -79,7 +79,7 @@ public class VariantAnnotatorIntegrationTest extends WalkerTest {
public void testOverwritingHeader() {
WalkerTestSpec spec = new WalkerTestSpec(
baseTestString() + " -G \"Standard\" -B variant,VCF," + validationDataLocation + "vcfexample4.vcf -I " + validationDataLocation + "NA12878.1kg.p2.chr1_10mb_11_mb.SLX.bam -L 1:10,001,292", 1,
Arrays.asList("d3b1d7a271a46228b9a67841969d7055"));
Arrays.asList("20cb9e8b7d883ed945e13845e84153ad"));
executeTest("test overwriting header", spec);
}
@ -87,7 +87,7 @@ public class VariantAnnotatorIntegrationTest extends WalkerTest {
public void testNoReads() {
WalkerTestSpec spec = new WalkerTestSpec(
baseTestString() + " -G \"Standard\" -B variant,VCF," + validationDataLocation + "vcfexample3empty.vcf -BTI variant", 1,
Arrays.asList("e40a69279c30ddf1273f33eefa8da0cd"));
Arrays.asList("edde543d79202c3cc5a76a654f88c7de"));
executeTest("not passing it any reads", spec);
}
@ -95,7 +95,7 @@ public class VariantAnnotatorIntegrationTest extends WalkerTest {
public void testDBTagWithDbsnp() {
WalkerTestSpec spec = new WalkerTestSpec(
baseTestString() + " -D " + GATKDataLocation + "dbsnp_129_b36.rod -G \"Standard\" -B variant,VCF," + validationDataLocation + "vcfexample3empty.vcf -BTI variant", 1,
Arrays.asList("fa9ad0cf345d381eafeea750467fc18e"));
Arrays.asList("dee4c3065c832004201104a314637d18"));
executeTest("getting DB tag with dbSNP", spec);
}
@ -103,7 +103,7 @@ public class VariantAnnotatorIntegrationTest extends WalkerTest {
public void testDBTagWithHapMap() {
WalkerTestSpec spec = new WalkerTestSpec(
baseTestString() + " -B compH3,VCF," + validationDataLocation + "fakeHM3.vcf -G \"Standard\" -B variant,VCF," + validationDataLocation + "vcfexample3empty.vcf -BTI variant", 1,
Arrays.asList("5aa6b5c3ce7d9e107269af7e20a20167"));
Arrays.asList("8eefa78f0137d6a1f247e336655e6d4c"));
executeTest("getting DB tag with HM3", spec);
}
}

View File

@ -8,7 +8,7 @@ import java.util.Arrays;
public class VariantFiltrationIntegrationTest extends WalkerTest {
public static String baseTestString() {
return "-T VariantFiltration -o %s -R " + b36KGReference;
return "-T VariantFiltration -o %s -NO_HEADER -R " + b36KGReference;
}
@ -16,7 +16,7 @@ public class VariantFiltrationIntegrationTest extends WalkerTest {
public void testNoAction() {
WalkerTestSpec spec = new WalkerTestSpec(
baseTestString() + " -B variant,VCF," + validationDataLocation + "vcfexample2.vcf -L 1:10,020,000-10,021,000", 1,
Arrays.asList("2cac82e304185cfceea5816f89f64773"));
Arrays.asList("89b846266a0c565b9d2a7dbe793546aa"));
executeTest("test no action", spec);
}
@ -24,7 +24,7 @@ public class VariantFiltrationIntegrationTest extends WalkerTest {
public void testClusteredSnps() {
WalkerTestSpec spec = new WalkerTestSpec(
baseTestString() + " -window 10 -B variant,VCF," + validationDataLocation + "vcfexample2.vcf -L 1:10,020,000-10,021,000", 1,
Arrays.asList("5ae28a70de7a778c50749a60b69724ee"));
Arrays.asList("46877eb3ec258e6b70d8fcacca1acb25"));
executeTest("test clustered SNPs", spec);
}
@ -32,7 +32,7 @@ public class VariantFiltrationIntegrationTest extends WalkerTest {
public void testMask() {
WalkerTestSpec spec = new WalkerTestSpec(
baseTestString() + " -mask foo -B mask,VCF," + validationDataLocation + "vcfexample2.vcf -B variant,VCF," + validationDataLocation + "vcfexample2.vcf -L 1:10,020,000-10,021,000", 1,
Arrays.asList("0404f10b3c928ee1ea240ccea6ee3cd1"));
Arrays.asList("650504a58af863d9cef699087a7961aa"));
executeTest("test mask", spec);
}
@ -40,7 +40,7 @@ public class VariantFiltrationIntegrationTest extends WalkerTest {
public void testFilter1() {
WalkerTestSpec spec = new WalkerTestSpec(
baseTestString() + " -filter 'DoC < 20 || FisherStrand > 20.0' -filterName foo -B variant,VCF," + validationDataLocation + "vcfexample2.vcf -L 1:10,020,000-10,021,000", 1,
Arrays.asList("fa110840f477b238c0b30ed4fae5ab72"));
Arrays.asList("b0b9110aeff967dd87cd0ec273d20791"));
executeTest("test filter #1", spec);
}
@ -48,7 +48,7 @@ public class VariantFiltrationIntegrationTest extends WalkerTest {
public void testFilter2() {
WalkerTestSpec spec = new WalkerTestSpec(
baseTestString() + " -filter 'AlleleBalance < 70.0 && FisherStrand == 1.4' -filterName bar -B variant,VCF," + validationDataLocation + "vcfexample2.vcf -L 1:10,020,000-10,021,000", 1,
Arrays.asList("9a30dd4c67ddbfadc9e153e0345b46d4"));
Arrays.asList("8405a7ef69792c239d903d13832a1ea7"));
executeTest("test filter #2", spec);
}
@ -56,7 +56,7 @@ public class VariantFiltrationIntegrationTest extends WalkerTest {
public void testFilterWithSeparateNames() {
WalkerTestSpec spec = new WalkerTestSpec(
baseTestString() + " --filterName ABF -filter 'AlleleBalance < 70.0' --filterName FSF -filter 'FisherStrand == 1.4' -B variant,VCF," + validationDataLocation + "vcfexample2.vcf -L 1:10,020,000-10,021,000", 1,
Arrays.asList("7052fca252754e7a92ddb1f27123c7c8"));
Arrays.asList("8266b9eb2ba35d77ab1cecb395322f31"));
executeTest("test filter with separate names #2", spec);
}
@ -64,7 +64,7 @@ public class VariantFiltrationIntegrationTest extends WalkerTest {
public void testGenotypeFilter1() {
WalkerTestSpec spec = new WalkerTestSpec(
baseTestString() + " -G_filter 'GQ == 0.60' -G_filterName foo -B variant,VCF," + validationDataLocation + "vcfexample2.vcf -L 1:10,020,000-10,021,000", 1,
Arrays.asList("e22ecae2f992fb9d5a91c286f9ac3e40"));
Arrays.asList("99d8a47623cba215ee7d803c514ef116"));
executeTest("test genotype filter #1", spec);
}
@ -72,7 +72,7 @@ public class VariantFiltrationIntegrationTest extends WalkerTest {
public void testGenotypeFilter2() {
WalkerTestSpec spec = new WalkerTestSpec(
baseTestString() + " -G_filter 'AF == 0.04 && isHomVar == 1' -G_filterName foo -B variant,VCF," + validationDataLocation + "vcfexample2.vcf -L 1:10,020,000-10,021,000", 1,
Arrays.asList("b7e7e7abbf5b03d773f82cced33497a4"));
Arrays.asList("c58ea74c32290c9b4e8ae3dd1d0250e1"));
executeTest("test genotype filter #2", spec);
}
}