Updating VariantGaussianMixtureModelUnitTest to use truth sensitivity cutting

git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@5750 348d0f76-0448-11de-a6fe-93d51630548a
This commit is contained in:
rpoplin 2011-05-04 13:56:01 +00:00
parent 08f0509a5c
commit 6c7a0adc76
3 changed files with 25 additions and 43 deletions

View File

@ -83,7 +83,7 @@ public class Tranche implements Comparable<Tranche> {
}
public String toString() {
return String.format("Tranche ts=%.2f minVQSLod=%.4f known=(%d @ %.2f) novel=(%d @ %.2f) truthSites(%d accessible, %d called), name=%s]",
return String.format("Tranche ts=%.2f minVQSLod=%.4f known=(%d @ %.4f) novel=(%d @ %.4f) truthSites(%d accessible, %d called), name=%s]",
ts, minVQSLod, numKnown, knownTiTv, numNovel, novelTiTv, accessibleTruthSites, callsAtTruthSites, name);
}
@ -161,7 +161,7 @@ public class Tranche implements Comparable<Tranche> {
throw new UserException.MalformedFile(f, "Expected 10 elements in header line " + line);
} else {
if ( header.length != vals.length )
throw new UserException.MalformedFile(f, "Line had too few/many fields. Header = " + header.length + " vals " + vals.length + " line " + line);
throw new UserException.MalformedFile(f, "Line had too few/many fields. Header = " + header.length + " vals " + vals.length + ". The line was: " + line);
Map<String,String> bindings = new HashMap<String, String>();
for ( int i = 0; i < vals.length; i++ ) bindings.put(header[i], vals[i]);

View File

@ -24,33 +24,20 @@
package org.broadinstitute.sting.gatk.walkers.variantrecalibration;
import org.apache.log4j.Logger;
import org.broad.tribble.util.variantcontext.VariantContext;
import org.broadinstitute.sting.gatk.contexts.variantcontext.VariantContextUtils;
import org.broadinstitute.sting.utils.GenomeLocParser;
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
import org.broadinstitute.sting.utils.MathUtils;
import org.broadinstitute.sting.utils.BaseUtils;
import org.broadinstitute.sting.utils.collections.ExpandingArrayList;
import org.broadinstitute.sting.utils.exceptions.UserException;
import org.broadinstitute.sting.utils.exceptions.StingException;
import org.broadinstitute.sting.utils.text.XReadLines;
import org.broadinstitute.sting.BaseTest;
import org.testng.annotations.Test;
import org.testng.annotations.BeforeTest;
import org.testng.Assert;
import Jama.*;
import java.io.File;
import java.io.FileNotFoundException;
import java.io.PrintStream;
import java.util.*;
import java.util.regex.Pattern;
/**
* Created by IntelliJ IDEA.
* User: rpoplin
* User: depristo
* Date: Feb 26, 2010
*/
@ -59,13 +46,12 @@ public final class VariantGaussianMixtureModelUnitTest extends BaseTest {
VariantDatum[] variantData1 = new VariantDatum[N_VARIANTS];
private final File QUAL_DATA = new File(testDir + "tranches.raw.dat");
private final double[] FDRS = new double[]{0.1, 1, 2, 10};
private final double TARGET_TITV = 2.8;
private final double[] TRUTH_SENSITIVITY_CUTS = new double[]{99.9, 99.0, 97.0, 95.0};
private final File EXPECTED_TRANCHES_NEW = new File(testDir + "tranches.6.txt");
private final File EXPECTED_TRANCHES_OLD = new File(testDir + "tranches.4.txt");
private List<VariantDatum> readData() {
List<VariantDatum> vd = new ArrayList<VariantDatum>();
private ArrayList<VariantDatum> readData() {
ArrayList<VariantDatum> vd = new ArrayList<VariantDatum>();
try {
for ( String line : new XReadLines(QUAL_DATA, true) ) {
String[] parts = line.split("\t");
@ -75,6 +61,8 @@ public final class VariantGaussianMixtureModelUnitTest extends BaseTest {
datum.lod = Double.valueOf(parts[3]);
datum.isTransition = parts[1].equals("1");
datum.isKnown = ! parts[2].equals(".");
datum.isSNP = true;
datum.atTruthSite = datum.isKnown;
vd.add(datum);
}
}
@ -87,7 +75,7 @@ public final class VariantGaussianMixtureModelUnitTest extends BaseTest {
@Test(expectedExceptions = {UserException.MalformedFile.class})
public final void readBadFormat() {
Tranche.readTraches(QUAL_DATA);
Tranche.readTranches(QUAL_DATA);
}
@Test
@ -101,7 +89,7 @@ public final class VariantGaussianMixtureModelUnitTest extends BaseTest {
}
public final List<Tranche> read(File f) {
return Tranche.readTraches(f);
return Tranche.readTranches(f);
}
private static void assertTranchesAreTheSame(List<Tranche> newFormat, List<Tranche> oldFormat, boolean completeP, boolean includeName) {
@ -109,7 +97,7 @@ public final class VariantGaussianMixtureModelUnitTest extends BaseTest {
for ( int i = 0; i < newFormat.size(); i++ ) {
Tranche n = newFormat.get(i);
Tranche o = oldFormat.get(i);
Assert.assertEquals(n.fdr, o.fdr, 1e-3);
Assert.assertEquals(n.ts, o.ts, 1e-3);
Assert.assertEquals(n.numNovel, o.numNovel);
Assert.assertEquals(n.novelTiTv, o.novelTiTv, 1e-3);
if ( includeName )
@ -117,33 +105,27 @@ public final class VariantGaussianMixtureModelUnitTest extends BaseTest {
if ( completeP ) {
Assert.assertEquals(n.numKnown, o.numKnown);
Assert.assertEquals(n.knownTiTv, o.knownTiTv, 1e-3);
Assert.assertEquals(n.targetTiTv, o.targetTiTv, 1e-3);
}
}
}
private static final List<Tranche> findMyTranches(List<VariantDatum> vd, double[] fdrs, double targetTiTv) {
VariantGaussianMixtureModel.SelectionMetric metric = new VariantGaussianMixtureModel.NovelTiTvMetric(targetTiTv);
return VariantGaussianMixtureModel.findTranches(vd.toArray(new VariantDatum[0]), fdrs, metric);
private static List<Tranche> findMyTranches(ArrayList<VariantDatum> vd, double[] tranches) {
final int nCallsAtTruth = TrancheManager.countCallsAtTruth( vd, Double.NEGATIVE_INFINITY );
final TrancheManager.SelectionMetric metric = new TrancheManager.TruthSensitivityMetric( nCallsAtTruth );
return TrancheManager.findTranches(vd, tranches, metric);
}
@Test
public final void testFindTranches1() {
List<VariantDatum> vd = readData();
List<Tranche> tranches = findMyTranches(vd, FDRS, TARGET_TITV);
ArrayList<VariantDatum> vd = readData();
List<Tranche> tranches = findMyTranches(vd, TRUTH_SENSITIVITY_CUTS);
System.out.printf(Tranche.tranchesString(tranches));
assertTranchesAreTheSame(read(EXPECTED_TRANCHES_NEW), tranches, true, false);
}
@Test(expectedExceptions = {UserException.class})
public final void testBadFDR() {
List<VariantDatum> vd = readData();
List<Tranche> tranches = findMyTranches(vd, new double[]{-1}, TARGET_TITV);
}
@Test(expectedExceptions = {UserException.class})
public final void testBadTargetTiTv() {
List<VariantDatum> vd = readData();
List<Tranche> tranches = findMyTranches(vd, FDRS, 0.1);
ArrayList<VariantDatum> vd = readData();
List<Tranche> tranches = findMyTranches(vd, new double[]{-1});
}
}

View File

@ -1,7 +1,7 @@
# Variant quality score tranches file
# Version number 2
FDRtranche,targetTiTv,numKnown,numNovel,knownTiTv,novelTiTv,minVQSLod,filterName
0.10,2.80,15839,897,3.3052,2.8008,-3.8485,FDRtranche0.00to0.10
1.00,2.80,15844,900,3.3043,2.7815,-3.8652,FDRtranche0.10to1.00
2.00,2.80,15847,902,3.3051,2.7583,-3.8826,FDRtranche1.00to2.00
10.00,2.80,16153,1039,3.2744,2.5704,-5.1058,FDRtranche2.00to10.00
# Version number 4
targetTruthSensitivity,numKnown,numNovel,knownTiTv,novelTiTv,minVQSLod,filterName,accessibleTruthSites,callsAtTruthSites,truthSensitivity
95.00,15749,872,3.3207,2.9279,-3.6382,TruthSensitivityTranche0.00to95.00,16578,15749,0.9500
97.00,16080,1011,3.2766,2.5979,-4.7439,TruthSensitivityTranche95.00to97.00,16578,16080,0.9700
99.00,16412,1281,3.2507,1.9516,-8.4204,TruthSensitivityTranche97.00to99.00,16578,16412,0.9900
99.90,16561,1573,3.2334,1.6043,-142.4519,TruthSensitivityTranche99.0to99.90,16578,16561,0.9990