From 0ef85647f79be3bfcfa8af3bf9a5d16124a13713 Mon Sep 17 00:00:00 2001 From: Mark DePristo Date: Wed, 3 Aug 2011 18:21:18 -0400 Subject: [PATCH] A working version of a GATKReportDiffableReader for the diffEngine! --- .../sting/gatk/report/GATKReport.java | 8 +- .../sting/gatk/report/GATKReportColumn.java | 4 + .../sting/gatk/report/GATKReportTable.java | 12 + .../diffengine/GATKReportDiffableReader.java | 98 +++ .../VariantEvalIntegrationTest.java | 788 +++++++++--------- 5 files changed, 515 insertions(+), 395 deletions(-) create mode 100644 public/java/src/org/broadinstitute/sting/gatk/walkers/diffengine/GATKReportDiffableReader.java diff --git a/public/java/src/org/broadinstitute/sting/gatk/report/GATKReport.java b/public/java/src/org/broadinstitute/sting/gatk/report/GATKReport.java index dc3a617e7..608b5d1d0 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/report/GATKReport.java +++ b/public/java/src/org/broadinstitute/sting/gatk/report/GATKReport.java @@ -5,6 +5,7 @@ import org.broadinstitute.sting.utils.exceptions.StingException; import org.broadinstitute.sting.utils.text.TextFormattingUtils; import java.io.*; +import java.util.Collection; import java.util.List; import java.util.TreeMap; @@ -12,6 +13,7 @@ import java.util.TreeMap; * Container class for GATK report tables */ public class GATKReport { + public static final String GATKREPORT_HEADER_PREFIX = "##:GATKReport.v"; private TreeMap tables = new TreeMap(); /** @@ -53,7 +55,7 @@ public class GATKReport { String line; while ( (line = reader.readLine()) != null ) { - if (line.startsWith("##:GATKReport.v")) { + if (line.startsWith(GATKREPORT_HEADER_PREFIX)) { version = GATKReportVersion.fromHeader(line); @@ -169,4 +171,8 @@ public class GATKReport { } } } + + public Collection getTables() { + return tables.values(); + } } diff --git a/public/java/src/org/broadinstitute/sting/gatk/report/GATKReportColumn.java b/public/java/src/org/broadinstitute/sting/gatk/report/GATKReportColumn.java index 1c46b3bac..347e870c8 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/report/GATKReportColumn.java +++ b/public/java/src/org/broadinstitute/sting/gatk/report/GATKReportColumn.java @@ -106,4 +106,8 @@ public class GATKReportColumn extends TreeMap { } return value; } + + public String getColumnName() { + return columnName; + } } diff --git a/public/java/src/org/broadinstitute/sting/gatk/report/GATKReportTable.java b/public/java/src/org/broadinstitute/sting/gatk/report/GATKReportTable.java index 5d38295f5..152e1a57b 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/report/GATKReportTable.java +++ b/public/java/src/org/broadinstitute/sting/gatk/report/GATKReportTable.java @@ -654,4 +654,16 @@ public class GATKReportTable { public int getNumRows() { return primaryKeyColumn.size(); } + + public String getTableName() { + return tableName; + } + + public String getTableDescription() { + return tableDescription; + } + + public GATKReportColumns getColumns() { + return columns; + } } diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/diffengine/GATKReportDiffableReader.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/diffengine/GATKReportDiffableReader.java new file mode 100644 index 000000000..ef47ee33c --- /dev/null +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/diffengine/GATKReportDiffableReader.java @@ -0,0 +1,98 @@ +/* + * Copyright (c) 2011, The Broad Institute + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, + * copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following + * conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES + * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT + * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, + * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + */ + +package org.broadinstitute.sting.gatk.walkers.diffengine; + +import org.broadinstitute.sting.gatk.report.GATKReport; +import org.broadinstitute.sting.gatk.report.GATKReportColumn; +import org.broadinstitute.sting.gatk.report.GATKReportTable; + +import java.io.File; +import java.io.FileReader; +import java.io.IOException; +import java.util.Map; + + +/** + * Class implementing diffnode reader for GATKReports + */ +public class GATKReportDiffableReader implements DiffableReader { + @Override + public String getName() { return "GATKReport"; } + + @Override + public DiffElement readFromFile(File file, int maxElementsToRead) { + DiffNode root = DiffNode.rooted(file.getName()); + try { + // one line reads the whole thing into memory + GATKReport report = new GATKReport(file); + + for (GATKReportTable table : report.getTables() ) { + root.add(tableToNode(table, root)); + } + + return root.getBinding(); + } catch ( Exception e ) { + return null; + } + } + + private DiffNode tableToNode(GATKReportTable table, DiffNode root) { + DiffNode tableRoot = DiffNode.empty(table.getTableName(), root); + + tableRoot.add("Description", table.getTableDescription()); + tableRoot.add("NumberOfRows", table.getNumRows()); + tableRoot.add("Version", table.getVersion()); + + for ( GATKReportColumn column : table.getColumns().values() ) { + DiffNode columnRoot = DiffNode.empty(column.getColumnName(), tableRoot); + + columnRoot.add("Width", column.getColumnWidth()); + columnRoot.add("Displayable", column.isDisplayable()); + + int n = 1; + for ( Object elt : column.values() ) { + String name = column.getColumnName() + n++; + columnRoot.add(name, elt.toString()); + } + + tableRoot.add(columnRoot); + } + + return tableRoot; + } + + @Override + public boolean canRead(File file) { + try { + final String HEADER = GATKReport.GATKREPORT_HEADER_PREFIX; + char[] buff = new char[HEADER.length()]; + new FileReader(file).read(buff, 0, HEADER.length()); + String firstLine = new String(buff); + return firstLine.startsWith(HEADER); + } catch ( IOException e ) { + return false; + } + } +} diff --git a/public/java/test/org/broadinstitute/sting/gatk/walkers/varianteval/VariantEvalIntegrationTest.java b/public/java/test/org/broadinstitute/sting/gatk/walkers/varianteval/VariantEvalIntegrationTest.java index be6631d29..9a536146c 100755 --- a/public/java/test/org/broadinstitute/sting/gatk/walkers/varianteval/VariantEvalIntegrationTest.java +++ b/public/java/test/org/broadinstitute/sting/gatk/walkers/varianteval/VariantEvalIntegrationTest.java @@ -1,394 +1,394 @@ -//package org.broadinstitute.sting.gatk.walkers.varianteval; -// -//import org.broadinstitute.sting.WalkerTest; -//import org.testng.annotations.Test; -// -//import java.util.Arrays; -//import java.util.HashMap; -//import java.util.Map; -// -//public class VariantEvalIntegrationTest extends WalkerTest { -// private static String variantEvalTestDataRoot = validationDataLocation + "/VariantEval"; -// private static String fundamentalTestVCF = variantEvalTestDataRoot + "/" + "FundamentalsTest.annotated.db.subset.snps_and_indels.vcf"; -// private static String fundamentalTestSNPsVCF = variantEvalTestDataRoot + "/" + "FundamentalsTest.annotated.db.subset.final.vcf"; -// private static String fundamentalTestSNPsOneSampleVCF = variantEvalTestDataRoot + "/" + "FundamentalsTest.annotated.db.subset.final.HG00625.vcf"; -// -// private static String cmdRoot = "-T VariantEval" + -// " -R " + b36KGReference; -// -// private static String root = cmdRoot + -// " -B:dbsnp,vcf " + GATKDataLocation + "dbsnp_132.b36.excluding_sites_after_129.vcf" + -// " -B:eval,VCF3 " + validationDataLocation + "yri.trio.gatk_glftrio.intersection.annotated.filtered.chr1.vcf" + -// " -B:comp_genotypes,VCF3 " + validationDataLocation + "yri.trio.gatk.ug.head.vcf"; -// -// private static String rootGZ = cmdRoot + -// " -B:dbsnp,vcf " + GATKDataLocation + "dbsnp_132.b36.excluding_sites_after_129.vcf" + -// " -B:eval,VCF3 " + validationDataLocation + "yri.trio.gatk_glftrio.intersection.annotated.filtered.chr1.vcf.gz" + -// " -B:comp_genotypes,VCF3 " + validationDataLocation + "yri.trio.gatk.ug.head.vcf.gz"; -// -// // TODO -- I can't seem to reindex this VCF using Tabix without it causing failures. Looking into it. [EB] -// // private static String[] testsEnumerations = {root, rootGZ}; -// private static String[] testsEnumerations = {root}; -// -// @Test -// public void testFundamentalsCountVariantsSNPsAndIndels() { -// WalkerTestSpec spec = new WalkerTestSpec( -// buildCommandLine( -// "-T VariantEval", -// "-R " + b37KGReference, -// "-B:dbsnp,VCF " + GATKDataLocation + "dbsnp_132_b37.vcf", -// "-B:eval,VCF " + fundamentalTestVCF, -// "-noEV", -// "-EV CountVariants", -// "-noST", -// "-BTI eval", -// "-o %s" -// ), -// 1, -// Arrays.asList("48b8417c1f8bd74ff7b9808580abd2a2") -// ); -// executeTest("testFundamentalsCountVariantsSNPsandIndels", spec); -// } -// -// @Test -// public void testFundamentalsCountVariantsSNPsAndIndelsWithNovelty() { -// WalkerTestSpec spec = new WalkerTestSpec( -// buildCommandLine( -// "-T VariantEval", -// "-R " + b37KGReference, -// "-B:dbsnp,VCF " + GATKDataLocation + "dbsnp_132_b37.vcf", -// "-B:eval,VCF " + fundamentalTestVCF, -// "-noEV", -// "-EV CountVariants", -// "-noST", -// "-ST Novelty", -// "-BTI eval", -// "-o %s" -// ), -// 1, -// Arrays.asList("86d45ecefdf5849c55b3ca8f82a3d525") -// ); -// executeTest("testFundamentalsCountVariantsSNPsandIndelsWithNovelty", spec); -// } -// -// @Test -// public void testFundamentalsCountVariantsSNPsAndIndelsWithNoveltyAndFilter() { -// WalkerTestSpec spec = new WalkerTestSpec( -// buildCommandLine( -// "-T VariantEval", -// "-R " + b37KGReference, -// "-B:dbsnp,VCF " + GATKDataLocation + "dbsnp_132_b37.vcf", -// "-B:eval,VCF " + fundamentalTestVCF, -// "-noEV", -// "-EV CountVariants", -// "-noST", -// "-ST Novelty", -// "-ST Filter", -// "-BTI eval", -// "-o %s" -// ), -// 1, -// Arrays.asList("3d18901ec1766aa2e748eac913f5ddcd") -// ); -// executeTest("testFundamentalsCountVariantsSNPsandIndelsWithNoveltyAndFilter", spec); -// } -// -// @Test -// public void testFundamentalsCountVariantsSNPsAndIndelsWithCpG() { -// WalkerTestSpec spec = new WalkerTestSpec( -// buildCommandLine( -// "-T VariantEval", -// "-R " + b37KGReference, -// "-B:dbsnp,VCF " + GATKDataLocation + "dbsnp_132_b37.vcf", -// "-B:eval,VCF " + fundamentalTestVCF, -// "-noEV", -// "-EV CountVariants", -// "-noST", -// "-ST CpG", -// "-BTI eval", -// "-o %s" -// ), -// 1, -// Arrays.asList("677fe398643e62a10d6739d36a720a12") -// ); -// executeTest("testFundamentalsCountVariantsSNPsandIndelsWithCpG", spec); -// } -// -// @Test -// public void testFundamentalsCountVariantsSNPsAndIndelsWithFunctionalClasses() { -// WalkerTestSpec spec = new WalkerTestSpec( -// buildCommandLine( -// "-T VariantEval", -// "-R " + b37KGReference, -// "-B:dbsnp,VCF " + GATKDataLocation + "dbsnp_132_b37.vcf", -// "-B:eval,VCF " + fundamentalTestVCF, -// "-noEV", -// "-EV CountVariants", -// "-noST", -// "-ST FunctionalClass", -// "-BTI eval", -// "-o %s" -// ), -// 1, -// Arrays.asList("5fb44fd7cb00941c986a9941e43e44cd") -// ); -// executeTest("testFundamentalsCountVariantsSNPsandIndelsWithFunctionalClass", spec); -// } -// -// @Test -// public void testFundamentalsCountVariantsSNPsAndIndelsWithDegeneracy() { -// WalkerTestSpec spec = new WalkerTestSpec( -// buildCommandLine( -// "-T VariantEval", -// "-R " + b37KGReference, -// "-B:dbsnp,VCF " + GATKDataLocation + "dbsnp_132_b37.vcf", -// "-B:eval,VCF " + fundamentalTestVCF, -// "-noEV", -// "-EV CountVariants", -// "-noST", -// "-ST Degeneracy", -// "-BTI eval", -// "-o %s" -// ), -// 1, -// Arrays.asList("daaca7ef3b7313e5af217cbc6f37c9e2") -// ); -// executeTest("testFundamentalsCountVariantsSNPsandIndelsWithDegeneracy", spec); -// } -// -// @Test -// public void testFundamentalsCountVariantsSNPsAndIndelsWithSample() { -// WalkerTestSpec spec = new WalkerTestSpec( -// buildCommandLine( -// "-T VariantEval", -// "-R " + b37KGReference, -// "-B:dbsnp,VCF " + GATKDataLocation + "dbsnp_132_b37.vcf", -// "-B:eval,VCF " + fundamentalTestVCF, -// "-noEV", -// "-EV CountVariants", -// "-noST", -// "-ST Sample", -// "-BTI eval", -// "-o %s" -// ), -// 1, -// Arrays.asList("97c466f8ffd0fcf2c30ef08669d213d9") -// ); -// executeTest("testFundamentalsCountVariantsSNPsandIndelsWithSample", spec); -// } -// -// @Test -// public void testFundamentalsCountVariantsSNPsAndIndelsWithJexlExpression() { -// WalkerTestSpec spec = new WalkerTestSpec( -// buildCommandLine( -// "-T VariantEval", -// "-R " + b37KGReference, -// "-B:dbsnp,VCF " + GATKDataLocation + "dbsnp_132_b37.vcf", -// "-B:eval,VCF " + fundamentalTestVCF, -// "-noEV", -// "-EV CountVariants", -// "-noST", -// "-ST JexlExpression", -// "-select 'DP < 20'", -// "-selectName DepthSelect", -// "-BTI eval", -// "-o %s" -// ), -// 1, -// Arrays.asList("df8cdfcf3d0c2fc795812c6eae6a76f8") -// ); -// executeTest("testFundamentalsCountVariantsSNPsandIndelsWithJexlExpression", spec); -// } -// -// @Test -// public void testFundamentalsCountVariantsSNPsAndIndelsWithMultipleJexlExpressions() { -// WalkerTestSpec spec = new WalkerTestSpec( -// buildCommandLine( -// "-T VariantEval", -// "-R " + b37KGReference, -// "-B:dbsnp,VCF " + GATKDataLocation + "dbsnp_132_b37.vcf", -// "-B:eval,VCF " + fundamentalTestVCF, -// "-noEV", -// "-EV CountVariants", -// "-noST", -// "-ST JexlExpression", -// "-select 'DP < 20'", -// "-selectName DepthLt20", -// "-select 'DP > 20'", -// "-selectName DepthGt20", -// "-BTI eval", -// "-o %s" -// ), -// 1, -// Arrays.asList("c7aed12265e2b2311d17a0cc8a29f6aa") -// ); -// executeTest("testFundamentalsCountVariantsSNPsandIndelsWithMultipleJexlExpressions", spec); -// } -// -// @Test -// public void testFundamentalsCountVariantsNoCompRod() { -// WalkerTestSpec spec = new WalkerTestSpec( -// buildCommandLine( -// "-T VariantEval", -// "-R " + b37KGReference, -// "-B:eval,VCF " + fundamentalTestVCF, -// "-noEV", -// "-EV CountVariants", -// "-noST", -// "-BTI eval", -// "-o %s" -// ), -// 1, -// Arrays.asList("d44c8f44384189a09eea85a8e89d7299") -// ); -// executeTest("testFundamentalsCountVariantsNoCompRod", spec); -// } -// -// @Test -// public void testSelect1() { -// String extraArgs = "-L 1:1-10,000,000"; -// for (String tests : testsEnumerations) { -// WalkerTestSpec spec = new WalkerTestSpec(withSelect(tests, "DP < 50", "DP50") + " " + extraArgs + " -ST CpG -o %s", -// 1, Arrays.asList("96860dedea0fa6b46c07f46b847fea42")); -// executeTestParallel("testSelect1", spec); -// } -// } -// -// @Test -// public void testVEGenotypeConcordance() { -// String vcfFile = "GenotypeConcordanceEval.vcf"; -// -// WalkerTestSpec spec = new WalkerTestSpec(cmdRoot + " -ST CpG -B:eval,VCF3 " + validationDataLocation + vcfFile + " -B:comp,VCF3 " + validationDataLocation + "GenotypeConcordanceComp.vcf -noEV -EV GenotypeConcordance -o %s", -// 1, -// Arrays.asList("e4c981f7f5d78680c71310fc9be9a1c1")); -// executeTestParallel("testVEGenotypeConcordance" + vcfFile, spec); -// } -// -// @Test -// public void testCompVsEvalAC() { -// String extraArgs = "-T VariantEval -R "+b36KGReference+" -o %s -ST CpG -EV GenotypeConcordance -B:evalYRI,VCF3 " + validationDataLocation + "yri.trio.gatk.ug.very.few.lines.vcf -B:compYRI,VCF3 " + validationDataLocation + "yri.trio.gatk.fake.genotypes.ac.test.vcf"; -// WalkerTestSpec spec = new WalkerTestSpec(extraArgs,1,Arrays.asList("162daa5039e1965eb2423a8589339a69")); -// executeTestParallel("testCompVsEvalAC",spec); -// } -// -// private static String withSelect(String cmd, String select, String name) { -// return String.format("%s -select '%s' -selectName %s", cmd, select, name); -// } -// -// @Test -// public void testTranches() { -// String extraArgs = "-T VariantEval -R "+ hg18Reference +" -B:eval,vcf " + validationDataLocation + "GA2.WEx.cleaned.ug.snpfiltered.indelfiltered.optimized.vcf -o %s -EV TiTvVariantEvaluator -L chr1 -noEV -ST CpG -tf " + testDir + "tranches.6.txt"; -// WalkerTestSpec spec = new WalkerTestSpec(extraArgs,1,Arrays.asList("90cd98044e754b80034a9f4e6d2c55b9")); -// executeTestParallel("testTranches",spec); -// } -// -// @Test -// public void testCompOverlap() { -// String extraArgs = "-T VariantEval -R " + b37KGReference + " -L " + validationDataLocation + "VariantEval/pacbio.hg19.intervals -B:comphapmap,vcf " + comparisonDataLocation + "Validated/HapMap/3.3/genotypes_r27_nr.b37_fwd.vcf -B:eval,vcf " + validationDataLocation + "VariantEval/pacbio.ts.recalibrated.vcf -noEV -EV CompOverlap -sn NA12878 -noST -ST Novelty -o %s"; -// WalkerTestSpec spec = new WalkerTestSpec(extraArgs,1,Arrays.asList("70aa420929de7f888a6f48c2d01bbcda")); -// executeTestParallel("testCompOverlap",spec); -// } -// -// @Test -// public void testEvalTrackWithoutGenotypes() { -// String extraArgs = "-T VariantEval -R " + -// b37KGReference + -// " -L 20" + -// " -B:dbsnp,vcf " + b37dbSNP132 + -// " -B:evalBI,VCF " + validationDataLocation + "VariantEval/ALL.20100201.chr20.bi.sites.vcf" + -// " -noST -ST Novelty -o %s"; -// WalkerTestSpec spec = new WalkerTestSpec(extraArgs,1,Arrays.asList("58fdc6c42fade3007537bb99fb3ce738")); -// executeTestParallel("testEvalTrackWithoutGenotypes",spec); -// } -// -// @Test -// public void testMultipleEvalTracksWithoutGenotypes() { -// String extraArgs = "-T VariantEval -R " + b37KGReference + -// " -L 20" + -// " -B:dbsnp,vcf " + b37dbSNP132 + -// " -B:evalBI,VCF " + validationDataLocation + "VariantEval/ALL.20100201.chr20.bi.sites.vcf" + -// " -B:evalBC,VCF " + validationDataLocation + "VariantEval/ALL.20100201.chr20.bc.sites.vcf" + -// " -noST -ST Novelty -o %s"; -// WalkerTestSpec spec = new WalkerTestSpec(extraArgs,1,Arrays.asList("34df2815d27e5e62f1694731a7e7953c")); -// executeTestParallel("testMultipleEvalTracksWithoutGenotypes",spec); -// } -// -// @Test -// public void testMultipleCompTracks() { -// String dbsnp = GATKDataLocation + "dbsnp_132_b37.vcf"; -// -// String extraArgs = "-T VariantEval" + -// " -R " + b37KGReference + -// " -B:comp,VCF " + validationDataLocation + "/VariantEval/ALL.phase1.chr20.broad.snps.genotypes.subset.vcf" + -// " -B:eval,VCF " + validationDataLocation + "/VariantEval/NA12878.hg19.HiSeq.WGS.cleaned.ug.snpfiltered.indelfiltered.optimized.cut.subset.vcf" + -// " -B:dbsnp,VCF " + dbsnp + -// " -L 20:10000000-10100000" + -// " -noST -noEV -ST Novelty -EV CompOverlap" + -// " -o %s"; -// -// WalkerTestSpec spec = new WalkerTestSpec(extraArgs,1,Arrays.asList("20332902ae36a84b2fd80405410815f1")); -// executeTestParallel("testMultipleCompTracks",spec); -// } -// -// @Test -// public void testPerSampleAndSubsettedSampleHaveSameResults() { -// String md5 = "9d61f6e2c8592dcf616712a2c587b2af"; -// -// WalkerTestSpec spec = new WalkerTestSpec( -// buildCommandLine( -// "-T VariantEval", -// "-R " + b37KGReference, -// "-B:dbsnp,VCF " + GATKDataLocation + "dbsnp_132_b37.vcf", -// "-B:eval,VCF " + fundamentalTestSNPsVCF, -// "-noEV", -// "-EV CompOverlap", -// "-sn HG00625", -// "-noST", -// "-BTI eval", -// "-o %s" -// ), -// 1, -// Arrays.asList(md5) -// ); -// executeTestParallel("testPerSampleAndSubsettedSampleHaveSameResults-subset", spec); -// -// WalkerTestSpec spec2 = new WalkerTestSpec( -// buildCommandLine( -// "-T VariantEval", -// "-R " + b37KGReference, -// "-B:dbsnp,VCF " + GATKDataLocation + "dbsnp_132_b37.vcf", -// "-B:eval,VCF " + fundamentalTestSNPsOneSampleVCF, -// "-noEV", -// "-EV CompOverlap", -// "-noST", -// "-BTI eval", -// "-o %s" -// ), -// 1, -// Arrays.asList(md5) -// ); -// executeTestParallel("testPerSampleAndSubsettedSampleHaveSameResults-onesample", spec2); -// } -// -// -// @Test -// public void testAlleleCountStrat() { -// WalkerTestSpec spec = new WalkerTestSpec( -// buildCommandLine( -// "-T VariantEval", -// "-R " + b37KGReference, -// "-B:dbsnp,VCF " + GATKDataLocation + "dbsnp_132_b37.vcf", -// "-B:eval,VCF " + fundamentalTestSNPsVCF, -// "-noEV", -// "-EV CountVariants", -// "-noST", -// "-ST AlleleCount", -// "-BTI eval", -// "-o %s" -// ), -// 1, -// Arrays.asList("bf324e4c87fe0d21170fcd2a67a20371") -// ); -// executeTest("testAlleleCountStrat", spec); -// } -//} +package org.broadinstitute.sting.gatk.walkers.varianteval; + +import org.broadinstitute.sting.WalkerTest; +import org.testng.annotations.Test; + +import java.util.Arrays; +import java.util.HashMap; +import java.util.Map; + +public class VariantEvalIntegrationTest extends WalkerTest { + private static String variantEvalTestDataRoot = validationDataLocation + "/VariantEval"; + private static String fundamentalTestVCF = variantEvalTestDataRoot + "/" + "FundamentalsTest.annotated.db.subset.snps_and_indels.vcf"; + private static String fundamentalTestSNPsVCF = variantEvalTestDataRoot + "/" + "FundamentalsTest.annotated.db.subset.final.vcf"; + private static String fundamentalTestSNPsOneSampleVCF = variantEvalTestDataRoot + "/" + "FundamentalsTest.annotated.db.subset.final.HG00625.vcf"; + + private static String cmdRoot = "-T VariantEval" + + " -R " + b36KGReference; + + private static String root = cmdRoot + + " -B:dbsnp,vcf " + GATKDataLocation + "dbsnp_132.b36.excluding_sites_after_129.vcf" + + " -B:eval,VCF3 " + validationDataLocation + "yri.trio.gatk_glftrio.intersection.annotated.filtered.chr1.vcf" + + " -B:comp_genotypes,VCF3 " + validationDataLocation + "yri.trio.gatk.ug.head.vcf"; + + private static String rootGZ = cmdRoot + + " -B:dbsnp,vcf " + GATKDataLocation + "dbsnp_132.b36.excluding_sites_after_129.vcf" + + " -B:eval,VCF3 " + validationDataLocation + "yri.trio.gatk_glftrio.intersection.annotated.filtered.chr1.vcf.gz" + + " -B:comp_genotypes,VCF3 " + validationDataLocation + "yri.trio.gatk.ug.head.vcf.gz"; + + // TODO -- I can't seem to reindex this VCF using Tabix without it causing failures. Looking into it. [EB] + // private static String[] testsEnumerations = {root, rootGZ}; + private static String[] testsEnumerations = {root}; + + @Test + public void testFundamentalsCountVariantsSNPsAndIndels() { + WalkerTestSpec spec = new WalkerTestSpec( + buildCommandLine( + "-T VariantEval", + "-R " + b37KGReference, + "-B:dbsnp,VCF " + GATKDataLocation + "dbsnp_132_b37.vcf", + "-B:eval,VCF " + fundamentalTestVCF, + "-noEV", + "-EV CountVariants", + "-noST", + "-BTI eval", + "-o %s" + ), + 1, + Arrays.asList("48b8417c1f8bd74ff7b9808580abd2a2") + ); + executeTest("testFundamentalsCountVariantsSNPsandIndels", spec); + } + + @Test + public void testFundamentalsCountVariantsSNPsAndIndelsWithNovelty() { + WalkerTestSpec spec = new WalkerTestSpec( + buildCommandLine( + "-T VariantEval", + "-R " + b37KGReference, + "-B:dbsnp,VCF " + GATKDataLocation + "dbsnp_132_b37.vcf", + "-B:eval,VCF " + fundamentalTestVCF, + "-noEV", + "-EV CountVariants", + "-noST", + "-ST Novelty", + "-BTI eval", + "-o %s" + ), + 1, + Arrays.asList("86d45ecefdf5849c55b3ca8f82a3d525") + ); + executeTest("testFundamentalsCountVariantsSNPsandIndelsWithNovelty", spec); + } + + @Test + public void testFundamentalsCountVariantsSNPsAndIndelsWithNoveltyAndFilter() { + WalkerTestSpec spec = new WalkerTestSpec( + buildCommandLine( + "-T VariantEval", + "-R " + b37KGReference, + "-B:dbsnp,VCF " + GATKDataLocation + "dbsnp_132_b37.vcf", + "-B:eval,VCF " + fundamentalTestVCF, + "-noEV", + "-EV CountVariants", + "-noST", + "-ST Novelty", + "-ST Filter", + "-BTI eval", + "-o %s" + ), + 1, + Arrays.asList("3d18901ec1766aa2e748eac913f5ddcd") + ); + executeTest("testFundamentalsCountVariantsSNPsandIndelsWithNoveltyAndFilter", spec); + } + + @Test + public void testFundamentalsCountVariantsSNPsAndIndelsWithCpG() { + WalkerTestSpec spec = new WalkerTestSpec( + buildCommandLine( + "-T VariantEval", + "-R " + b37KGReference, + "-B:dbsnp,VCF " + GATKDataLocation + "dbsnp_132_b37.vcf", + "-B:eval,VCF " + fundamentalTestVCF, + "-noEV", + "-EV CountVariants", + "-noST", + "-ST CpG", + "-BTI eval", + "-o %s" + ), + 1, + Arrays.asList("677fe398643e62a10d6739d36a720a12") + ); + executeTest("testFundamentalsCountVariantsSNPsandIndelsWithCpG", spec); + } + + @Test + public void testFundamentalsCountVariantsSNPsAndIndelsWithFunctionalClasses() { + WalkerTestSpec spec = new WalkerTestSpec( + buildCommandLine( + "-T VariantEval", + "-R " + b37KGReference, + "-B:dbsnp,VCF " + GATKDataLocation + "dbsnp_132_b37.vcf", + "-B:eval,VCF " + fundamentalTestVCF, + "-noEV", + "-EV CountVariants", + "-noST", + "-ST FunctionalClass", + "-BTI eval", + "-o %s" + ), + 1, + Arrays.asList("5fb44fd7cb00941c986a9941e43e44cd") + ); + executeTest("testFundamentalsCountVariantsSNPsandIndelsWithFunctionalClass", spec); + } + + @Test + public void testFundamentalsCountVariantsSNPsAndIndelsWithDegeneracy() { + WalkerTestSpec spec = new WalkerTestSpec( + buildCommandLine( + "-T VariantEval", + "-R " + b37KGReference, + "-B:dbsnp,VCF " + GATKDataLocation + "dbsnp_132_b37.vcf", + "-B:eval,VCF " + fundamentalTestVCF, + "-noEV", + "-EV CountVariants", + "-noST", + "-ST Degeneracy", + "-BTI eval", + "-o %s" + ), + 1, + Arrays.asList("daaca7ef3b7313e5af217cbc6f37c9e2") + ); + executeTest("testFundamentalsCountVariantsSNPsandIndelsWithDegeneracy", spec); + } + + @Test + public void testFundamentalsCountVariantsSNPsAndIndelsWithSample() { + WalkerTestSpec spec = new WalkerTestSpec( + buildCommandLine( + "-T VariantEval", + "-R " + b37KGReference, + "-B:dbsnp,VCF " + GATKDataLocation + "dbsnp_132_b37.vcf", + "-B:eval,VCF " + fundamentalTestVCF, + "-noEV", + "-EV CountVariants", + "-noST", + "-ST Sample", + "-BTI eval", + "-o %s" + ), + 1, + Arrays.asList("97c466f8ffd0fcf2c30ef08669d213d9") + ); + executeTest("testFundamentalsCountVariantsSNPsandIndelsWithSample", spec); + } + + @Test + public void testFundamentalsCountVariantsSNPsAndIndelsWithJexlExpression() { + WalkerTestSpec spec = new WalkerTestSpec( + buildCommandLine( + "-T VariantEval", + "-R " + b37KGReference, + "-B:dbsnp,VCF " + GATKDataLocation + "dbsnp_132_b37.vcf", + "-B:eval,VCF " + fundamentalTestVCF, + "-noEV", + "-EV CountVariants", + "-noST", + "-ST JexlExpression", + "-select 'DP < 20'", + "-selectName DepthSelect", + "-BTI eval", + "-o %s" + ), + 1, + Arrays.asList("df8cdfcf3d0c2fc795812c6eae6a76f8") + ); + executeTest("testFundamentalsCountVariantsSNPsandIndelsWithJexlExpression", spec); + } + + @Test + public void testFundamentalsCountVariantsSNPsAndIndelsWithMultipleJexlExpressions() { + WalkerTestSpec spec = new WalkerTestSpec( + buildCommandLine( + "-T VariantEval", + "-R " + b37KGReference, + "-B:dbsnp,VCF " + GATKDataLocation + "dbsnp_132_b37.vcf", + "-B:eval,VCF " + fundamentalTestVCF, + "-noEV", + "-EV CountVariants", + "-noST", + "-ST JexlExpression", + "-select 'DP < 20'", + "-selectName DepthLt20", + "-select 'DP > 20'", + "-selectName DepthGt20", + "-BTI eval", + "-o %s" + ), + 1, + Arrays.asList("c7aed12265e2b2311d17a0cc8a29f6aa") + ); + executeTest("testFundamentalsCountVariantsSNPsandIndelsWithMultipleJexlExpressions", spec); + } + + @Test + public void testFundamentalsCountVariantsNoCompRod() { + WalkerTestSpec spec = new WalkerTestSpec( + buildCommandLine( + "-T VariantEval", + "-R " + b37KGReference, + "-B:eval,VCF " + fundamentalTestVCF, + "-noEV", + "-EV CountVariants", + "-noST", + "-BTI eval", + "-o %s" + ), + 1, + Arrays.asList("d44c8f44384189a09eea85a8e89d7299") + ); + executeTest("testFundamentalsCountVariantsNoCompRod", spec); + } + + @Test + public void testSelect1() { + String extraArgs = "-L 1:1-10,000,000"; + for (String tests : testsEnumerations) { + WalkerTestSpec spec = new WalkerTestSpec(withSelect(tests, "DP < 50", "DP50") + " " + extraArgs + " -ST CpG -o %s", + 1, Arrays.asList("96860dedea0fa6b46c07f46b847fea42")); + executeTestParallel("testSelect1", spec); + } + } + + @Test + public void testVEGenotypeConcordance() { + String vcfFile = "GenotypeConcordanceEval.vcf"; + + WalkerTestSpec spec = new WalkerTestSpec(cmdRoot + " -ST CpG -B:eval,VCF3 " + validationDataLocation + vcfFile + " -B:comp,VCF3 " + validationDataLocation + "GenotypeConcordanceComp.vcf -noEV -EV GenotypeConcordance -o %s", + 1, + Arrays.asList("e4c981f7f5d78680c71310fc9be9a1c1")); + executeTestParallel("testVEGenotypeConcordance" + vcfFile, spec); + } + + @Test + public void testCompVsEvalAC() { + String extraArgs = "-T VariantEval -R "+b36KGReference+" -o %s -ST CpG -EV GenotypeConcordance -B:evalYRI,VCF3 " + validationDataLocation + "yri.trio.gatk.ug.very.few.lines.vcf -B:compYRI,VCF3 " + validationDataLocation + "yri.trio.gatk.fake.genotypes.ac.test.vcf"; + WalkerTestSpec spec = new WalkerTestSpec(extraArgs,1,Arrays.asList("162daa5039e1965eb2423a8589339a69")); + executeTestParallel("testCompVsEvalAC",spec); + } + + private static String withSelect(String cmd, String select, String name) { + return String.format("%s -select '%s' -selectName %s", cmd, select, name); + } + + @Test + public void testTranches() { + String extraArgs = "-T VariantEval -R "+ hg18Reference +" -B:eval,vcf " + validationDataLocation + "GA2.WEx.cleaned.ug.snpfiltered.indelfiltered.optimized.vcf -o %s -EV TiTvVariantEvaluator -L chr1 -noEV -ST CpG -tf " + testDir + "tranches.6.txt"; + WalkerTestSpec spec = new WalkerTestSpec(extraArgs,1,Arrays.asList("90cd98044e754b80034a9f4e6d2c55b9")); + executeTestParallel("testTranches",spec); + } + + @Test + public void testCompOverlap() { + String extraArgs = "-T VariantEval -R " + b37KGReference + " -L " + validationDataLocation + "VariantEval/pacbio.hg19.intervals -B:comphapmap,vcf " + comparisonDataLocation + "Validated/HapMap/3.3/genotypes_r27_nr.b37_fwd.vcf -B:eval,vcf " + validationDataLocation + "VariantEval/pacbio.ts.recalibrated.vcf -noEV -EV CompOverlap -sn NA12878 -noST -ST Novelty -o %s"; + WalkerTestSpec spec = new WalkerTestSpec(extraArgs,1,Arrays.asList("70aa420929de7f888a6f48c2d01bbcda")); + executeTestParallel("testCompOverlap",spec); + } + + @Test + public void testEvalTrackWithoutGenotypes() { + String extraArgs = "-T VariantEval -R " + + b37KGReference + + " -L 20" + + " -B:dbsnp,vcf " + b37dbSNP132 + + " -B:evalBI,VCF " + validationDataLocation + "VariantEval/ALL.20100201.chr20.bi.sites.vcf" + + " -noST -ST Novelty -o %s"; + WalkerTestSpec spec = new WalkerTestSpec(extraArgs,1,Arrays.asList("58fdc6c42fade3007537bb99fb3ce738")); + executeTestParallel("testEvalTrackWithoutGenotypes",spec); + } + + @Test + public void testMultipleEvalTracksWithoutGenotypes() { + String extraArgs = "-T VariantEval -R " + b37KGReference + + " -L 20" + + " -B:dbsnp,vcf " + b37dbSNP132 + + " -B:evalBI,VCF " + validationDataLocation + "VariantEval/ALL.20100201.chr20.bi.sites.vcf" + + " -B:evalBC,VCF " + validationDataLocation + "VariantEval/ALL.20100201.chr20.bc.sites.vcf" + + " -noST -ST Novelty -o %s"; + WalkerTestSpec spec = new WalkerTestSpec(extraArgs,1,Arrays.asList("34df2815d27e5e62f1694731a7e7953c")); + executeTestParallel("testMultipleEvalTracksWithoutGenotypes",spec); + } + + @Test + public void testMultipleCompTracks() { + String dbsnp = GATKDataLocation + "dbsnp_132_b37.vcf"; + + String extraArgs = "-T VariantEval" + + " -R " + b37KGReference + + " -B:comp,VCF " + validationDataLocation + "/VariantEval/ALL.phase1.chr20.broad.snps.genotypes.subset.vcf" + + " -B:eval,VCF " + validationDataLocation + "/VariantEval/NA12878.hg19.HiSeq.WGS.cleaned.ug.snpfiltered.indelfiltered.optimized.cut.subset.vcf" + + " -B:dbsnp,VCF " + dbsnp + + " -L 20:10000000-10100000" + + " -noST -noEV -ST Novelty -EV CompOverlap" + + " -o %s"; + + WalkerTestSpec spec = new WalkerTestSpec(extraArgs,1,Arrays.asList("20332902ae36a84b2fd80405410815f1")); + executeTestParallel("testMultipleCompTracks",spec); + } + + @Test + public void testPerSampleAndSubsettedSampleHaveSameResults() { + String md5 = "9d61f6e2c8592dcf616712a2c587b2af"; + + WalkerTestSpec spec = new WalkerTestSpec( + buildCommandLine( + "-T VariantEval", + "-R " + b37KGReference, + "-B:dbsnp,VCF " + GATKDataLocation + "dbsnp_132_b37.vcf", + "-B:eval,VCF " + fundamentalTestSNPsVCF, + "-noEV", + "-EV CompOverlap", + "-sn HG00625", + "-noST", + "-BTI eval", + "-o %s" + ), + 1, + Arrays.asList(md5) + ); + executeTestParallel("testPerSampleAndSubsettedSampleHaveSameResults-subset", spec); + + WalkerTestSpec spec2 = new WalkerTestSpec( + buildCommandLine( + "-T VariantEval", + "-R " + b37KGReference, + "-B:dbsnp,VCF " + GATKDataLocation + "dbsnp_132_b37.vcf", + "-B:eval,VCF " + fundamentalTestSNPsOneSampleVCF, + "-noEV", + "-EV CompOverlap", + "-noST", + "-BTI eval", + "-o %s" + ), + 1, + Arrays.asList(md5) + ); + executeTestParallel("testPerSampleAndSubsettedSampleHaveSameResults-onesample", spec2); + } + + + @Test + public void testAlleleCountStrat() { + WalkerTestSpec spec = new WalkerTestSpec( + buildCommandLine( + "-T VariantEval", + "-R " + b37KGReference, + "-B:dbsnp,VCF " + GATKDataLocation + "dbsnp_132_b37.vcf", + "-B:eval,VCF " + fundamentalTestSNPsVCF, + "-noEV", + "-EV CountVariants", + "-noST", + "-ST AlleleCount", + "-BTI eval", + "-o %s" + ), + 1, + Arrays.asList("bf324e4c87fe0d21170fcd2a67a20371") + ); + executeTest("testAlleleCountStrat", spec); + } +}