diff --git a/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/VariantEvalWalker.java b/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/VariantEvalWalker.java index 3c1e1ce4d..53a336464 100755 --- a/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/VariantEvalWalker.java +++ b/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/VariantEvalWalker.java @@ -42,7 +42,7 @@ import org.broadinstitute.sting.gatk.walkers.Reference; import org.broadinstitute.sting.gatk.walkers.RodWalker; import org.broadinstitute.sting.gatk.walkers.Window; import org.broadinstitute.sting.gatk.walkers.TreeReducible; -import org.broadinstitute.sting.gatk.walkers.variantrecalibration.ApplyVariantCuts; +import org.broadinstitute.sting.gatk.walkers.variantrecalibration.Tranche; import org.broadinstitute.sting.utils.SampleUtils; import org.broadinstitute.sting.utils.report.ReportMarshaller; import org.broadinstitute.sting.utils.report.VE2ReportFactory; @@ -71,18 +71,10 @@ import java.util.*; // todo -- clustered SNP counter // todo -- HWEs // todo -- indel metrics [count of sizes in/del should be in CountVariants] -// todo -- synonymous / non-synonmous ratio, or really just comparison of observed vs. expected biological annotation values - -// todo -- Performance: -// todo -- deal with performance issues with variant contexts // todo -- port over SNP density walker: // todo -- see walker for WG calc but will need to make it work with intervals correctly -// todo -- counts of snps per target [target name, gene, etc] - -// todo -- add subgroup of known variants as to those at hapmap sites [it's in the dbSNP record] - // Todo -- should really include argument parsing @annotations from subclass in this walker. Very // todo -- useful general capability. Right now you need to add arguments to VariantEval2 to handle new // todo -- evaluation arguments (which is better than passing a string!) @@ -101,8 +93,6 @@ import java.util.*; // todo -- discovered and released by 1KG. Might need to make this data set ourselves and keep it in GATK/data like // todo -- dbsnp rod // -// todo -- aux. plotting routines for VE2 -// // todo -- implement as select statment, but it's hard for multi-sample calls. // todo -- Provide separate dbsnp rates for het only calls and any call where there is at least one hom-var genotype, // todo -- since hets are much more likely to be errors @@ -126,15 +116,9 @@ public class VariantEvalWalker extends RodWalker implements Tr @Argument(shortName="select", doc="One or more stratifications to use when evaluating the data", required=false) protected ArrayList SELECT_EXPS = new ArrayList(); - //protected String[] SELECT_EXPS = {"set == \"Intersection\"", - // "set == \"HiSeq.WGS.cleaned.ug.vcf\"", - // "set == \"HiSeq.WGS.cleaned.ug.vcf\" || set == \"Intersection\"", - // "set == \"HiSeq.WGS.raw.OQ.ug.vcf\"", - // "set == \"HiSeq.WGS.raw.OQ.ug.vcf\" || set == \"Intersection\""}; @Argument(shortName="selectName", doc="Names to use for the list of stratifications (must be a 1-to-1 mapping)", required=false) protected ArrayList SELECT_NAMES = new ArrayList(); - //protected String[] SELECT_NAMES = {"Intersection", "x1", "x2", "x3", "x4"}; @Argument(shortName="known", doc="Name of ROD bindings containing variant sites that should be treated as known when splitting eval rods into known and novel subsets", required=false) protected String[] KNOWN_NAMES = {DbSNPHelper.STANDARD_DBSNP_TRACK_NAME}; @@ -321,15 +305,17 @@ public class VariantEvalWalker extends RodWalker implements Tr if ( TRANCHE_FILENAME != null ) { // we are going to build a few select names automatically from the tranches file - for ( ApplyVariantCuts.Tranche t : ApplyVariantCuts.readTraches(new File(TRANCHE_FILENAME)) ) { + for ( Tranche t : Tranche.readTraches(new File(TRANCHE_FILENAME)) ) { logger.info("Adding select for all variant above the pCut of : " + t); SELECT_EXPS.add(String.format("QUAL >= %.2f", t.pCut)); SELECT_NAMES.add(String.format("FDR-%.2f", t.fdr)); } } - logger.info("Selects: " + SELECT_NAMES); - logger.info("Selects: " + SELECT_EXPS); + if ( SELECT_NAMES.size() > 0 ) { + logger.info("Selects: " + SELECT_NAMES); + logger.info("Selects: " + SELECT_EXPS); + } List selectExps = VariantContextUtils.initializeMatchExps(SELECT_NAMES, SELECT_EXPS); for ( ReferenceOrderedDataSource d : this.getToolkit().getRodDataSources() ) { diff --git a/java/test/org/broadinstitute/sting/gatk/walkers/varianteval/VariantEvalIntegrationTest.java b/java/test/org/broadinstitute/sting/gatk/walkers/varianteval/VariantEvalIntegrationTest.java index a1aafae45..3b39767b1 100755 --- a/java/test/org/broadinstitute/sting/gatk/walkers/varianteval/VariantEvalIntegrationTest.java +++ b/java/test/org/broadinstitute/sting/gatk/walkers/varianteval/VariantEvalIntegrationTest.java @@ -141,4 +141,13 @@ public class private static String withSelect(String cmd, String select, String name) { return String.format("%s -select '%s' -selectName %s", cmd, select, name); } + + @Test + public void testTranches() { + String extraArgs = "-T VariantEval -R "+ hg18Reference +" -B:eval,vcf " + validationDataLocation + "GA2.WEx.cleaned.ug.snpfiltered.indelfiltered.optimized.vcf -o %s -E TiTvVariantEvaluator -L chr1 -noStandard -reportType CSV -tf " + testDir + "tranches.4.txt"; + WalkerTestSpec spec = new WalkerTestSpec(extraArgs,1,Arrays.asList("85b6621c64cc8f9a3b68cea644edf216")); + executeTestParallel("testTranches",spec); + //executeTest("testACDiscordanceAtAC1EvalAC2Comp",spec); + } + } diff --git a/java/test/data/tranches.4.txt b/testdata/tranches.4.txt similarity index 100% rename from java/test/data/tranches.4.txt rename to testdata/tranches.4.txt diff --git a/java/test/data/tranches.6.txt b/testdata/tranches.6.txt similarity index 100% rename from java/test/data/tranches.6.txt rename to testdata/tranches.6.txt diff --git a/java/test/data/tranches.raw.dat b/testdata/tranches.raw.dat similarity index 100% rename from java/test/data/tranches.raw.dat rename to testdata/tranches.raw.dat