From 6c7a0adc7641e9352f22e1297c4048bac178f3f4 Mon Sep 17 00:00:00 2001 From: rpoplin Date: Wed, 4 May 2011 13:56:01 +0000 Subject: [PATCH] Updating VariantGaussianMixtureModelUnitTest to use truth sensitivity cutting git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@5750 348d0f76-0448-11de-a6fe-93d51630548a --- .../walkers/variantrecalibration/Tranche.java | 4 +- .../VariantGaussianMixtureModelUnitTest.java | 52 ++++++------------- testdata/tranches.6.txt | 12 ++--- 3 files changed, 25 insertions(+), 43 deletions(-) diff --git a/java/src/org/broadinstitute/sting/gatk/walkers/variantrecalibration/Tranche.java b/java/src/org/broadinstitute/sting/gatk/walkers/variantrecalibration/Tranche.java index 1ca905518..fbee64fe2 100755 --- a/java/src/org/broadinstitute/sting/gatk/walkers/variantrecalibration/Tranche.java +++ b/java/src/org/broadinstitute/sting/gatk/walkers/variantrecalibration/Tranche.java @@ -83,7 +83,7 @@ public class Tranche implements Comparable { } public String toString() { - return String.format("Tranche ts=%.2f minVQSLod=%.4f known=(%d @ %.2f) novel=(%d @ %.2f) truthSites(%d accessible, %d called), name=%s]", + return String.format("Tranche ts=%.2f minVQSLod=%.4f known=(%d @ %.4f) novel=(%d @ %.4f) truthSites(%d accessible, %d called), name=%s]", ts, minVQSLod, numKnown, knownTiTv, numNovel, novelTiTv, accessibleTruthSites, callsAtTruthSites, name); } @@ -161,7 +161,7 @@ public class Tranche implements Comparable { throw new UserException.MalformedFile(f, "Expected 10 elements in header line " + line); } else { if ( header.length != vals.length ) - throw new UserException.MalformedFile(f, "Line had too few/many fields. Header = " + header.length + " vals " + vals.length + " line " + line); + throw new UserException.MalformedFile(f, "Line had too few/many fields. Header = " + header.length + " vals " + vals.length + ". The line was: " + line); Map bindings = new HashMap(); for ( int i = 0; i < vals.length; i++ ) bindings.put(header[i], vals[i]); diff --git a/java/test/org/broadinstitute/sting/gatk/walkers/variantrecalibration/VariantGaussianMixtureModelUnitTest.java b/java/test/org/broadinstitute/sting/gatk/walkers/variantrecalibration/VariantGaussianMixtureModelUnitTest.java index 037ab6c98..eece0ff39 100644 --- a/java/test/org/broadinstitute/sting/gatk/walkers/variantrecalibration/VariantGaussianMixtureModelUnitTest.java +++ b/java/test/org/broadinstitute/sting/gatk/walkers/variantrecalibration/VariantGaussianMixtureModelUnitTest.java @@ -24,33 +24,20 @@ package org.broadinstitute.sting.gatk.walkers.variantrecalibration; -import org.apache.log4j.Logger; -import org.broad.tribble.util.variantcontext.VariantContext; -import org.broadinstitute.sting.gatk.contexts.variantcontext.VariantContextUtils; -import org.broadinstitute.sting.utils.GenomeLocParser; -import org.broadinstitute.sting.utils.exceptions.ReviewedStingException; -import org.broadinstitute.sting.utils.MathUtils; -import org.broadinstitute.sting.utils.BaseUtils; -import org.broadinstitute.sting.utils.collections.ExpandingArrayList; import org.broadinstitute.sting.utils.exceptions.UserException; import org.broadinstitute.sting.utils.exceptions.StingException; import org.broadinstitute.sting.utils.text.XReadLines; import org.broadinstitute.sting.BaseTest; import org.testng.annotations.Test; -import org.testng.annotations.BeforeTest; import org.testng.Assert; -import Jama.*; - import java.io.File; import java.io.FileNotFoundException; -import java.io.PrintStream; import java.util.*; -import java.util.regex.Pattern; /** * Created by IntelliJ IDEA. - * User: rpoplin + * User: depristo * Date: Feb 26, 2010 */ @@ -59,13 +46,12 @@ public final class VariantGaussianMixtureModelUnitTest extends BaseTest { VariantDatum[] variantData1 = new VariantDatum[N_VARIANTS]; private final File QUAL_DATA = new File(testDir + "tranches.raw.dat"); - private final double[] FDRS = new double[]{0.1, 1, 2, 10}; - private final double TARGET_TITV = 2.8; + private final double[] TRUTH_SENSITIVITY_CUTS = new double[]{99.9, 99.0, 97.0, 95.0}; private final File EXPECTED_TRANCHES_NEW = new File(testDir + "tranches.6.txt"); private final File EXPECTED_TRANCHES_OLD = new File(testDir + "tranches.4.txt"); - private List readData() { - List vd = new ArrayList(); + private ArrayList readData() { + ArrayList vd = new ArrayList(); try { for ( String line : new XReadLines(QUAL_DATA, true) ) { String[] parts = line.split("\t"); @@ -75,6 +61,8 @@ public final class VariantGaussianMixtureModelUnitTest extends BaseTest { datum.lod = Double.valueOf(parts[3]); datum.isTransition = parts[1].equals("1"); datum.isKnown = ! parts[2].equals("."); + datum.isSNP = true; + datum.atTruthSite = datum.isKnown; vd.add(datum); } } @@ -87,7 +75,7 @@ public final class VariantGaussianMixtureModelUnitTest extends BaseTest { @Test(expectedExceptions = {UserException.MalformedFile.class}) public final void readBadFormat() { - Tranche.readTraches(QUAL_DATA); + Tranche.readTranches(QUAL_DATA); } @Test @@ -101,7 +89,7 @@ public final class VariantGaussianMixtureModelUnitTest extends BaseTest { } public final List read(File f) { - return Tranche.readTraches(f); + return Tranche.readTranches(f); } private static void assertTranchesAreTheSame(List newFormat, List oldFormat, boolean completeP, boolean includeName) { @@ -109,7 +97,7 @@ public final class VariantGaussianMixtureModelUnitTest extends BaseTest { for ( int i = 0; i < newFormat.size(); i++ ) { Tranche n = newFormat.get(i); Tranche o = oldFormat.get(i); - Assert.assertEquals(n.fdr, o.fdr, 1e-3); + Assert.assertEquals(n.ts, o.ts, 1e-3); Assert.assertEquals(n.numNovel, o.numNovel); Assert.assertEquals(n.novelTiTv, o.novelTiTv, 1e-3); if ( includeName ) @@ -117,33 +105,27 @@ public final class VariantGaussianMixtureModelUnitTest extends BaseTest { if ( completeP ) { Assert.assertEquals(n.numKnown, o.numKnown); Assert.assertEquals(n.knownTiTv, o.knownTiTv, 1e-3); - Assert.assertEquals(n.targetTiTv, o.targetTiTv, 1e-3); } } } - private static final List findMyTranches(List vd, double[] fdrs, double targetTiTv) { - VariantGaussianMixtureModel.SelectionMetric metric = new VariantGaussianMixtureModel.NovelTiTvMetric(targetTiTv); - return VariantGaussianMixtureModel.findTranches(vd.toArray(new VariantDatum[0]), fdrs, metric); + private static List findMyTranches(ArrayList vd, double[] tranches) { + final int nCallsAtTruth = TrancheManager.countCallsAtTruth( vd, Double.NEGATIVE_INFINITY ); + final TrancheManager.SelectionMetric metric = new TrancheManager.TruthSensitivityMetric( nCallsAtTruth ); + return TrancheManager.findTranches(vd, tranches, metric); } @Test public final void testFindTranches1() { - List vd = readData(); - List tranches = findMyTranches(vd, FDRS, TARGET_TITV); + ArrayList vd = readData(); + List tranches = findMyTranches(vd, TRUTH_SENSITIVITY_CUTS); System.out.printf(Tranche.tranchesString(tranches)); assertTranchesAreTheSame(read(EXPECTED_TRANCHES_NEW), tranches, true, false); } @Test(expectedExceptions = {UserException.class}) public final void testBadFDR() { - List vd = readData(); - List tranches = findMyTranches(vd, new double[]{-1}, TARGET_TITV); - } - - @Test(expectedExceptions = {UserException.class}) - public final void testBadTargetTiTv() { - List vd = readData(); - List tranches = findMyTranches(vd, FDRS, 0.1); + ArrayList vd = readData(); + List tranches = findMyTranches(vd, new double[]{-1}); } } diff --git a/testdata/tranches.6.txt b/testdata/tranches.6.txt index 983c444b4..981ed072b 100644 --- a/testdata/tranches.6.txt +++ b/testdata/tranches.6.txt @@ -1,7 +1,7 @@ # Variant quality score tranches file -# Version number 2 -FDRtranche,targetTiTv,numKnown,numNovel,knownTiTv,novelTiTv,minVQSLod,filterName -0.10,2.80,15839,897,3.3052,2.8008,-3.8485,FDRtranche0.00to0.10 -1.00,2.80,15844,900,3.3043,2.7815,-3.8652,FDRtranche0.10to1.00 -2.00,2.80,15847,902,3.3051,2.7583,-3.8826,FDRtranche1.00to2.00 -10.00,2.80,16153,1039,3.2744,2.5704,-5.1058,FDRtranche2.00to10.00 +# Version number 4 +targetTruthSensitivity,numKnown,numNovel,knownTiTv,novelTiTv,minVQSLod,filterName,accessibleTruthSites,callsAtTruthSites,truthSensitivity +95.00,15749,872,3.3207,2.9279,-3.6382,TruthSensitivityTranche0.00to95.00,16578,15749,0.9500 +97.00,16080,1011,3.2766,2.5979,-4.7439,TruthSensitivityTranche95.00to97.00,16578,16080,0.9700 +99.00,16412,1281,3.2507,1.9516,-8.4204,TruthSensitivityTranche97.00to99.00,16578,16412,0.9900 +99.90,16561,1573,3.2334,1.6043,-142.4519,TruthSensitivityTranche99.0to99.90,16578,16561,0.9990