diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/VariantEvalWalker.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/VariantEvalWalker.java index 2b5a4b629..253c6e6d0 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/VariantEvalWalker.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/VariantEvalWalker.java @@ -47,7 +47,8 @@ public class VariantEvalWalker extends RodWalker implements Tr public List> evals; @Input(fullName="comp", shortName = "comp", doc="Input comparison file(s)", required=false) - public List> comps = Collections.emptyList(); + public List> compsProvided = Collections.emptyList(); + private List> comps = new ArrayList>(); @ArgumentCollection protected DbsnpArgumentCollection dbsnp = new DbsnpArgumentCollection(); @@ -67,7 +68,7 @@ public class VariantEvalWalker extends RodWalker implements Tr protected Set SAMPLE_EXPRESSIONS; @Argument(shortName="knownName", doc="Name of ROD bindings containing variant sites that should be treated as known when splitting eval rods into known and novel subsets", required=false) - protected String[] KNOWN_NAMES = {dbsnp.dbsnp.getName()}; + protected String[] KNOWN_NAMES = {}; // Stratification arguments @Argument(fullName="stratificationModule", shortName="ST", doc="One or more specific stratification modules to apply to the eval track(s) (in addition to the standard stratifications, unless -noS is specified)", required=false) @@ -144,11 +145,17 @@ public class VariantEvalWalker extends RodWalker implements Tr // Just list the modules, and exit quickly. if (LIST) { variantEvalUtils.listModulesAndExit(); } - // Add a dummy comp track if none exists - if ( comps.size() == 0 ) { - comps.add(new RodBinding(VariantContext.class, "none", "UNBOUND", "", new Tags())); + // maintain the full list of comps + comps.addAll(compsProvided); + if ( dbsnp.dbsnp.isBound() ) { + comps.add(dbsnp.dbsnp); + knownNames.add(dbsnp.dbsnp.getName()); } + // Add a dummy comp track if none exists + if ( comps.size() == 0 ) + comps.add(new RodBinding(VariantContext.class, "none", "UNBOUND", "", new Tags())); + // Cache the rod names for ( RodBinding compRod : comps ) compNames.add(compRod.getName()); @@ -156,10 +163,7 @@ public class VariantEvalWalker extends RodWalker implements Tr for ( RodBinding evalRod : evals ) evalNames.add(evalRod.getName()); - if ( dbsnp.dbsnp.isBound() ) - compNames.add(dbsnp.dbsnp.getName()); - - // Set up set of known names + // Set up set of additional known names knownNames.addAll(Arrays.asList(KNOWN_NAMES)); // Now that we have all the rods categorized, determine the sample list from the eval rods. @@ -293,23 +297,6 @@ public class VariantEvalWalker extends RodWalker implements Tr } } } - - // todo: Eric, this is really the problem. We select single eval and comp VCs independently - // todo: discarding multiple eval tracks at the sites and not providing matched comps - // todo: where appropriate. Really this loop should look like: - // todo: for each eval track: - // todo: for each eval in track: - // todo: for each compTrack: - // todo: comp = findMatchingComp(eval, compTrack) // find the matching comp in compTrack - // todo: call evalModule(eval, comp) - // todo: // may return null if no such comp exists, but proceed as eval modules may need to see eval / null pair - // todo: for each comp not matched by an eval in compTrack: - // todo: call evalModule(null, comp) - // todo: // need to call with null comp, as module - // todo: note that the reason Kiran pre-computed the possible VCs is to apply the modifiers - // todo: like subset to sample, etc. So you probably will want a master map that maps - // todo: from special eval bindings to the digested VC for efficiency. - } } } diff --git a/public/java/test/org/broadinstitute/sting/gatk/walkers/varianteval/VariantEvalIntegrationTest.java b/public/java/test/org/broadinstitute/sting/gatk/walkers/varianteval/VariantEvalIntegrationTest.java index 8fa5f0c29..72af7034b 100755 --- a/public/java/test/org/broadinstitute/sting/gatk/walkers/varianteval/VariantEvalIntegrationTest.java +++ b/public/java/test/org/broadinstitute/sting/gatk/walkers/varianteval/VariantEvalIntegrationTest.java @@ -20,8 +20,8 @@ public class VariantEvalIntegrationTest extends WalkerTest { buildCommandLine( "-T VariantEval", "-R " + b37KGReference, - "-B:dbsnp,VCF " + b37dbSNP132, - "-B:eval,VCF " + fundamentalTestVCF, + "--dbsnp " + b37dbSNP132, + "--eval " + fundamentalTestVCF, "-noEV", "-EV CountVariants", "-noST", @@ -40,8 +40,8 @@ public class VariantEvalIntegrationTest extends WalkerTest { buildCommandLine( "-T VariantEval", "-R " + b37KGReference, - "-B:dbsnp,VCF " + b37dbSNP132, - "-B:eval,VCF " + fundamentalTestVCF, + "--dbsnp " + b37dbSNP132, + "--eval " + fundamentalTestVCF, "-noEV", "-EV CountVariants", "-noST", @@ -61,8 +61,8 @@ public class VariantEvalIntegrationTest extends WalkerTest { buildCommandLine( "-T VariantEval", "-R " + b37KGReference, - "-B:dbsnp,VCF " + b37dbSNP132, - "-B:eval,VCF " + fundamentalTestVCF, + "--dbsnp " + b37dbSNP132, + "--eval " + fundamentalTestVCF, "-noEV", "-EV CountVariants", "-noST", @@ -83,8 +83,8 @@ public class VariantEvalIntegrationTest extends WalkerTest { buildCommandLine( "-T VariantEval", "-R " + b37KGReference, - "-B:dbsnp,VCF " + b37dbSNP132, - "-B:eval,VCF " + fundamentalTestVCF, + "--dbsnp " + b37dbSNP132, + "--eval " + fundamentalTestVCF, "-noEV", "-EV CountVariants", "-noST", @@ -104,8 +104,8 @@ public class VariantEvalIntegrationTest extends WalkerTest { buildCommandLine( "-T VariantEval", "-R " + b37KGReference, - "-B:dbsnp,VCF " + b37dbSNP132, - "-B:eval,VCF " + fundamentalTestVCF, + "--dbsnp " + b37dbSNP132, + "--eval " + fundamentalTestVCF, "-noEV", "-EV CountVariants", "-noST", @@ -125,8 +125,8 @@ public class VariantEvalIntegrationTest extends WalkerTest { buildCommandLine( "-T VariantEval", "-R " + b37KGReference, - "-B:dbsnp,VCF " + b37dbSNP132, - "-B:eval,VCF " + fundamentalTestVCF, + "--dbsnp " + b37dbSNP132, + "--eval " + fundamentalTestVCF, "-noEV", "-EV CountVariants", "-noST", @@ -146,8 +146,8 @@ public class VariantEvalIntegrationTest extends WalkerTest { buildCommandLine( "-T VariantEval", "-R " + b37KGReference, - "-B:dbsnp,VCF " + b37dbSNP132, - "-B:eval,VCF " + fundamentalTestVCF, + "--dbsnp " + b37dbSNP132, + "--eval " + fundamentalTestVCF, "-noEV", "-EV CountVariants", "-noST", @@ -167,8 +167,8 @@ public class VariantEvalIntegrationTest extends WalkerTest { buildCommandLine( "-T VariantEval", "-R " + b37KGReference, - "-B:dbsnp,VCF " + b37dbSNP132, - "-B:eval,VCF " + fundamentalTestVCF, + "--dbsnp " + b37dbSNP132, + "--eval " + fundamentalTestVCF, "-noEV", "-EV CountVariants", "-noST", @@ -190,8 +190,8 @@ public class VariantEvalIntegrationTest extends WalkerTest { buildCommandLine( "-T VariantEval", "-R " + b37KGReference, - "-B:dbsnp,VCF " + b37dbSNP132, - "-B:eval,VCF " + fundamentalTestVCF, + "--dbsnp " + b37dbSNP132, + "--eval " + fundamentalTestVCF, "-noEV", "-EV CountVariants", "-noST", @@ -215,7 +215,7 @@ public class VariantEvalIntegrationTest extends WalkerTest { buildCommandLine( "-T VariantEval", "-R " + b37KGReference, - "-B:eval,VCF " + fundamentalTestVCF, + "--eval " + fundamentalTestVCF, "-noEV", "-EV CountVariants", "-noST", @@ -232,9 +232,9 @@ public class VariantEvalIntegrationTest extends WalkerTest { public void testSelect1() { String extraArgs = "-L 1:1-10,000,000"; String tests = cmdRoot + - " -B:dbsnp,VCF " + b36dbSNP129 + - " -B:eval,VCF3 " + validationDataLocation + "yri.trio.gatk_glftrio.intersection.annotated.filtered.chr1.vcf" + - " -B:comp_genotypes,VCF3 " + validationDataLocation + "yri.trio.gatk.ug.head.vcf"; + " --dbsnp " + b36dbSNP129 + + " --eval " + validationDataLocation + "yri.trio.gatk_glftrio.intersection.annotated.filtered.chr1.vcf" + + " --comp:comp_genotypes,VCF3 " + validationDataLocation + "yri.trio.gatk.ug.head.vcf"; WalkerTestSpec spec = new WalkerTestSpec(withSelect(tests, "DP < 50", "DP50") + " " + extraArgs + " -ST CpG -o %s", 1, Arrays.asList("14054badcd89b24c2375e1d09918f681")); executeTestParallel("testSelect1", spec); @@ -244,7 +244,7 @@ public class VariantEvalIntegrationTest extends WalkerTest { public void testVEGenotypeConcordance() { String vcfFile = "GenotypeConcordanceEval.vcf"; - WalkerTestSpec spec = new WalkerTestSpec(cmdRoot + " -ST CpG -B:eval,VCF3 " + validationDataLocation + vcfFile + " -B:comp,VCF3 " + validationDataLocation + "GenotypeConcordanceComp.vcf -noEV -EV GenotypeConcordance -o %s", + WalkerTestSpec spec = new WalkerTestSpec(cmdRoot + " -ST CpG --eval:VCF3 " + validationDataLocation + vcfFile + " --comp:VCF3 " + validationDataLocation + "GenotypeConcordanceComp.vcf -noEV -EV GenotypeConcordance -o %s", 1, Arrays.asList("96f27163f16bb945f19c6623cd6db34e")); executeTestParallel("testVEGenotypeConcordance" + vcfFile, spec); @@ -252,7 +252,7 @@ public class VariantEvalIntegrationTest extends WalkerTest { @Test public void testCompVsEvalAC() { - String extraArgs = "-T VariantEval -R "+b36KGReference+" -o %s -ST CpG -EV GenotypeConcordance -B:evalYRI,VCF3 " + validationDataLocation + "yri.trio.gatk.ug.very.few.lines.vcf -B:compYRI,VCF3 " + validationDataLocation + "yri.trio.gatk.fake.genotypes.ac.test.vcf"; + String extraArgs = "-T VariantEval -R "+b36KGReference+" -o %s -ST CpG -EV GenotypeConcordance --eval:evalYRI,VCF3 " + validationDataLocation + "yri.trio.gatk.ug.very.few.lines.vcf --comp:compYRI,VCF3 " + validationDataLocation + "yri.trio.gatk.fake.genotypes.ac.test.vcf"; WalkerTestSpec spec = new WalkerTestSpec(extraArgs,1,Arrays.asList("d1932be3748fcf6da77dc51aec323710")); executeTestParallel("testCompVsEvalAC",spec); } @@ -263,14 +263,14 @@ public class VariantEvalIntegrationTest extends WalkerTest { @Test public void testTranches() { - String extraArgs = "-T VariantEval -R "+ hg18Reference +" -B:eval,vcf " + validationDataLocation + "GA2.WEx.cleaned.ug.snpfiltered.indelfiltered.optimized.vcf -o %s -EV TiTvVariantEvaluator -L chr1 -noEV -ST CpG -tf " + testDir + "tranches.6.txt"; + String extraArgs = "-T VariantEval -R "+ hg18Reference +" --eval " + validationDataLocation + "GA2.WEx.cleaned.ug.snpfiltered.indelfiltered.optimized.vcf -o %s -EV TiTvVariantEvaluator -L chr1 -noEV -ST CpG -tf " + testDir + "tranches.6.txt"; WalkerTestSpec spec = new WalkerTestSpec(extraArgs,1,Arrays.asList("984df6e94a546294fc7e0846cbac2dfe")); executeTestParallel("testTranches",spec); } @Test public void testCompOverlap() { - String extraArgs = "-T VariantEval -R " + b37KGReference + " -L " + validationDataLocation + "VariantEval/pacbio.hg19.intervals -B:comphapmap,vcf " + comparisonDataLocation + "Validated/HapMap/3.3/genotypes_r27_nr.b37_fwd.vcf -B:eval,vcf " + validationDataLocation + "VariantEval/pacbio.ts.recalibrated.vcf -noEV -EV CompOverlap -sn NA12878 -noST -ST Novelty -o %s"; + String extraArgs = "-T VariantEval -R " + b37KGReference + " -L " + validationDataLocation + "VariantEval/pacbio.hg19.intervals --comp:comphapmap " + comparisonDataLocation + "Validated/HapMap/3.3/genotypes_r27_nr.b37_fwd.vcf --eval " + validationDataLocation + "VariantEval/pacbio.ts.recalibrated.vcf -noEV -EV CompOverlap -sn NA12878 -noST -ST Novelty -o %s"; WalkerTestSpec spec = new WalkerTestSpec(extraArgs,1,Arrays.asList("462d4784dd55294ef9d5118217b157a5")); executeTestParallel("testCompOverlap",spec); } @@ -280,10 +280,10 @@ public class VariantEvalIntegrationTest extends WalkerTest { String extraArgs = "-T VariantEval -R " + b37KGReference + " -L 20" + - " -B:dbsnp,VCF " + b37dbSNP132 + - " -B:evalBI,VCF " + validationDataLocation + "VariantEval/ALL.20100201.chr20.bi.sites.vcf" + + " --dbsnp " + b37dbSNP132 + + " --eval:evalBI " + validationDataLocation + "VariantEval/ALL.20100201.chr20.bi.sites.vcf" + " -noST -ST Novelty -o %s"; - WalkerTestSpec spec = new WalkerTestSpec(extraArgs,1,Arrays.asList("0897dfba2f4a245faddce38000555cce")); + WalkerTestSpec spec = new WalkerTestSpec(extraArgs,1,Arrays.asList("cbea5f9f8c046d4c014d261db352c43b")); executeTestParallel("testEvalTrackWithoutGenotypes",spec); } @@ -291,11 +291,11 @@ public class VariantEvalIntegrationTest extends WalkerTest { public void testMultipleEvalTracksWithoutGenotypes() { String extraArgs = "-T VariantEval -R " + b37KGReference + " -L 20" + - " -B:dbsnp,VCF " + b37dbSNP132 + - " -B:evalBI,VCF " + validationDataLocation + "VariantEval/ALL.20100201.chr20.bi.sites.vcf" + - " -B:evalBC,VCF " + validationDataLocation + "VariantEval/ALL.20100201.chr20.bc.sites.vcf" + + " --dbsnp " + b37dbSNP132 + + " --eval:evalBI " + validationDataLocation + "VariantEval/ALL.20100201.chr20.bi.sites.vcf" + + " --eval:evalBC " + validationDataLocation + "VariantEval/ALL.20100201.chr20.bc.sites.vcf" + " -noST -ST Novelty -o %s"; - WalkerTestSpec spec = new WalkerTestSpec(extraArgs,1,Arrays.asList("ead3602e14ec2944b5d9e4dacc08c819")); + WalkerTestSpec spec = new WalkerTestSpec(extraArgs,1,Arrays.asList("d07a246963ae609643620c839b20cd1e")); executeTestParallel("testMultipleEvalTracksWithoutGenotypes",spec); } @@ -305,27 +305,27 @@ public class VariantEvalIntegrationTest extends WalkerTest { String extraArgs = "-T VariantEval" + " -R " + b37KGReference + - " -B:comp,VCF " + validationDataLocation + "/VariantEval/ALL.phase1.chr20.broad.snps.genotypes.subset.vcf" + - " -B:eval,VCF " + validationDataLocation + "/VariantEval/NA12878.hg19.HiSeq.WGS.cleaned.ug.snpfiltered.indelfiltered.optimized.cut.subset.vcf" + - " -B:dbsnp,VCF " + dbsnp + + " --comp " + validationDataLocation + "/VariantEval/ALL.phase1.chr20.broad.snps.genotypes.subset.vcf" + + " --eval " + validationDataLocation + "/VariantEval/NA12878.hg19.HiSeq.WGS.cleaned.ug.snpfiltered.indelfiltered.optimized.cut.subset.vcf" + + " --dbsnp " + dbsnp + " -L 20:10000000-10100000" + " -noST -noEV -ST Novelty -EV CompOverlap" + " -o %s"; - WalkerTestSpec spec = new WalkerTestSpec(extraArgs,1,Arrays.asList("167a347ce0729d1bc3d4fd5069ebd674")); + WalkerTestSpec spec = new WalkerTestSpec(extraArgs,1,Arrays.asList("a3c2177849cb00fdff99574cff7f0e4f")); executeTestParallel("testMultipleCompTracks",spec); } @Test public void testPerSampleAndSubsettedSampleHaveSameResults() { - String md5 = "40471a84b501eb440ee2d42e3081f228"; + String md5 = "dab415cc76846e18fcf8c78f2b2ee033"; WalkerTestSpec spec = new WalkerTestSpec( buildCommandLine( "-T VariantEval", "-R " + b37KGReference, - "-B:dbsnp,VCF " + b37dbSNP132, - "-B:eval,VCF " + fundamentalTestSNPsVCF, + "--dbsnp " + b37dbSNP132, + "--eval " + fundamentalTestSNPsVCF, "-noEV", "-EV CompOverlap", "-sn HG00625", @@ -342,8 +342,8 @@ public class VariantEvalIntegrationTest extends WalkerTest { buildCommandLine( "-T VariantEval", "-R " + b37KGReference, - "-B:dbsnp,VCF " + b37dbSNP132, - "-B:eval,VCF " + fundamentalTestSNPsOneSampleVCF, + "--dbsnp " + b37dbSNP132, + "--eval " + fundamentalTestSNPsOneSampleVCF, "-noEV", "-EV CompOverlap", "-noST", @@ -363,8 +363,8 @@ public class VariantEvalIntegrationTest extends WalkerTest { buildCommandLine( "-T VariantEval", "-R " + b37KGReference, - "-B:dbsnp,VCF " + b37dbSNP132, - "-B:eval,VCF " + fundamentalTestSNPsVCF, + "--dbsnp " + b37dbSNP132, + "--eval " + fundamentalTestSNPsVCF, "-noEV", "-EV CountVariants", "-noST",