Updating VariantGaussianMixtureModelUnitTest to use truth sensitivity cutting
git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@5750 348d0f76-0448-11de-a6fe-93d51630548a
This commit is contained in:
parent
08f0509a5c
commit
6c7a0adc76
|
|
@ -83,7 +83,7 @@ public class Tranche implements Comparable<Tranche> {
|
|||
}
|
||||
|
||||
public String toString() {
|
||||
return String.format("Tranche ts=%.2f minVQSLod=%.4f known=(%d @ %.2f) novel=(%d @ %.2f) truthSites(%d accessible, %d called), name=%s]",
|
||||
return String.format("Tranche ts=%.2f minVQSLod=%.4f known=(%d @ %.4f) novel=(%d @ %.4f) truthSites(%d accessible, %d called), name=%s]",
|
||||
ts, minVQSLod, numKnown, knownTiTv, numNovel, novelTiTv, accessibleTruthSites, callsAtTruthSites, name);
|
||||
}
|
||||
|
||||
|
|
@ -161,7 +161,7 @@ public class Tranche implements Comparable<Tranche> {
|
|||
throw new UserException.MalformedFile(f, "Expected 10 elements in header line " + line);
|
||||
} else {
|
||||
if ( header.length != vals.length )
|
||||
throw new UserException.MalformedFile(f, "Line had too few/many fields. Header = " + header.length + " vals " + vals.length + " line " + line);
|
||||
throw new UserException.MalformedFile(f, "Line had too few/many fields. Header = " + header.length + " vals " + vals.length + ". The line was: " + line);
|
||||
|
||||
Map<String,String> bindings = new HashMap<String, String>();
|
||||
for ( int i = 0; i < vals.length; i++ ) bindings.put(header[i], vals[i]);
|
||||
|
|
|
|||
|
|
@ -24,33 +24,20 @@
|
|||
|
||||
package org.broadinstitute.sting.gatk.walkers.variantrecalibration;
|
||||
|
||||
import org.apache.log4j.Logger;
|
||||
import org.broad.tribble.util.variantcontext.VariantContext;
|
||||
import org.broadinstitute.sting.gatk.contexts.variantcontext.VariantContextUtils;
|
||||
import org.broadinstitute.sting.utils.GenomeLocParser;
|
||||
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
|
||||
import org.broadinstitute.sting.utils.MathUtils;
|
||||
import org.broadinstitute.sting.utils.BaseUtils;
|
||||
import org.broadinstitute.sting.utils.collections.ExpandingArrayList;
|
||||
import org.broadinstitute.sting.utils.exceptions.UserException;
|
||||
import org.broadinstitute.sting.utils.exceptions.StingException;
|
||||
import org.broadinstitute.sting.utils.text.XReadLines;
|
||||
import org.broadinstitute.sting.BaseTest;
|
||||
import org.testng.annotations.Test;
|
||||
import org.testng.annotations.BeforeTest;
|
||||
import org.testng.Assert;
|
||||
|
||||
import Jama.*;
|
||||
|
||||
import java.io.File;
|
||||
import java.io.FileNotFoundException;
|
||||
import java.io.PrintStream;
|
||||
import java.util.*;
|
||||
import java.util.regex.Pattern;
|
||||
|
||||
/**
|
||||
* Created by IntelliJ IDEA.
|
||||
* User: rpoplin
|
||||
* User: depristo
|
||||
* Date: Feb 26, 2010
|
||||
*/
|
||||
|
||||
|
|
@ -59,13 +46,12 @@ public final class VariantGaussianMixtureModelUnitTest extends BaseTest {
|
|||
VariantDatum[] variantData1 = new VariantDatum[N_VARIANTS];
|
||||
|
||||
private final File QUAL_DATA = new File(testDir + "tranches.raw.dat");
|
||||
private final double[] FDRS = new double[]{0.1, 1, 2, 10};
|
||||
private final double TARGET_TITV = 2.8;
|
||||
private final double[] TRUTH_SENSITIVITY_CUTS = new double[]{99.9, 99.0, 97.0, 95.0};
|
||||
private final File EXPECTED_TRANCHES_NEW = new File(testDir + "tranches.6.txt");
|
||||
private final File EXPECTED_TRANCHES_OLD = new File(testDir + "tranches.4.txt");
|
||||
|
||||
private List<VariantDatum> readData() {
|
||||
List<VariantDatum> vd = new ArrayList<VariantDatum>();
|
||||
private ArrayList<VariantDatum> readData() {
|
||||
ArrayList<VariantDatum> vd = new ArrayList<VariantDatum>();
|
||||
try {
|
||||
for ( String line : new XReadLines(QUAL_DATA, true) ) {
|
||||
String[] parts = line.split("\t");
|
||||
|
|
@ -75,6 +61,8 @@ public final class VariantGaussianMixtureModelUnitTest extends BaseTest {
|
|||
datum.lod = Double.valueOf(parts[3]);
|
||||
datum.isTransition = parts[1].equals("1");
|
||||
datum.isKnown = ! parts[2].equals(".");
|
||||
datum.isSNP = true;
|
||||
datum.atTruthSite = datum.isKnown;
|
||||
vd.add(datum);
|
||||
}
|
||||
}
|
||||
|
|
@ -87,7 +75,7 @@ public final class VariantGaussianMixtureModelUnitTest extends BaseTest {
|
|||
|
||||
@Test(expectedExceptions = {UserException.MalformedFile.class})
|
||||
public final void readBadFormat() {
|
||||
Tranche.readTraches(QUAL_DATA);
|
||||
Tranche.readTranches(QUAL_DATA);
|
||||
}
|
||||
|
||||
@Test
|
||||
|
|
@ -101,7 +89,7 @@ public final class VariantGaussianMixtureModelUnitTest extends BaseTest {
|
|||
}
|
||||
|
||||
public final List<Tranche> read(File f) {
|
||||
return Tranche.readTraches(f);
|
||||
return Tranche.readTranches(f);
|
||||
}
|
||||
|
||||
private static void assertTranchesAreTheSame(List<Tranche> newFormat, List<Tranche> oldFormat, boolean completeP, boolean includeName) {
|
||||
|
|
@ -109,7 +97,7 @@ public final class VariantGaussianMixtureModelUnitTest extends BaseTest {
|
|||
for ( int i = 0; i < newFormat.size(); i++ ) {
|
||||
Tranche n = newFormat.get(i);
|
||||
Tranche o = oldFormat.get(i);
|
||||
Assert.assertEquals(n.fdr, o.fdr, 1e-3);
|
||||
Assert.assertEquals(n.ts, o.ts, 1e-3);
|
||||
Assert.assertEquals(n.numNovel, o.numNovel);
|
||||
Assert.assertEquals(n.novelTiTv, o.novelTiTv, 1e-3);
|
||||
if ( includeName )
|
||||
|
|
@ -117,33 +105,27 @@ public final class VariantGaussianMixtureModelUnitTest extends BaseTest {
|
|||
if ( completeP ) {
|
||||
Assert.assertEquals(n.numKnown, o.numKnown);
|
||||
Assert.assertEquals(n.knownTiTv, o.knownTiTv, 1e-3);
|
||||
Assert.assertEquals(n.targetTiTv, o.targetTiTv, 1e-3);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
private static final List<Tranche> findMyTranches(List<VariantDatum> vd, double[] fdrs, double targetTiTv) {
|
||||
VariantGaussianMixtureModel.SelectionMetric metric = new VariantGaussianMixtureModel.NovelTiTvMetric(targetTiTv);
|
||||
return VariantGaussianMixtureModel.findTranches(vd.toArray(new VariantDatum[0]), fdrs, metric);
|
||||
private static List<Tranche> findMyTranches(ArrayList<VariantDatum> vd, double[] tranches) {
|
||||
final int nCallsAtTruth = TrancheManager.countCallsAtTruth( vd, Double.NEGATIVE_INFINITY );
|
||||
final TrancheManager.SelectionMetric metric = new TrancheManager.TruthSensitivityMetric( nCallsAtTruth );
|
||||
return TrancheManager.findTranches(vd, tranches, metric);
|
||||
}
|
||||
|
||||
@Test
|
||||
public final void testFindTranches1() {
|
||||
List<VariantDatum> vd = readData();
|
||||
List<Tranche> tranches = findMyTranches(vd, FDRS, TARGET_TITV);
|
||||
ArrayList<VariantDatum> vd = readData();
|
||||
List<Tranche> tranches = findMyTranches(vd, TRUTH_SENSITIVITY_CUTS);
|
||||
System.out.printf(Tranche.tranchesString(tranches));
|
||||
assertTranchesAreTheSame(read(EXPECTED_TRANCHES_NEW), tranches, true, false);
|
||||
}
|
||||
|
||||
@Test(expectedExceptions = {UserException.class})
|
||||
public final void testBadFDR() {
|
||||
List<VariantDatum> vd = readData();
|
||||
List<Tranche> tranches = findMyTranches(vd, new double[]{-1}, TARGET_TITV);
|
||||
}
|
||||
|
||||
@Test(expectedExceptions = {UserException.class})
|
||||
public final void testBadTargetTiTv() {
|
||||
List<VariantDatum> vd = readData();
|
||||
List<Tranche> tranches = findMyTranches(vd, FDRS, 0.1);
|
||||
ArrayList<VariantDatum> vd = readData();
|
||||
List<Tranche> tranches = findMyTranches(vd, new double[]{-1});
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -1,7 +1,7 @@
|
|||
# Variant quality score tranches file
|
||||
# Version number 2
|
||||
FDRtranche,targetTiTv,numKnown,numNovel,knownTiTv,novelTiTv,minVQSLod,filterName
|
||||
0.10,2.80,15839,897,3.3052,2.8008,-3.8485,FDRtranche0.00to0.10
|
||||
1.00,2.80,15844,900,3.3043,2.7815,-3.8652,FDRtranche0.10to1.00
|
||||
2.00,2.80,15847,902,3.3051,2.7583,-3.8826,FDRtranche1.00to2.00
|
||||
10.00,2.80,16153,1039,3.2744,2.5704,-5.1058,FDRtranche2.00to10.00
|
||||
# Version number 4
|
||||
targetTruthSensitivity,numKnown,numNovel,knownTiTv,novelTiTv,minVQSLod,filterName,accessibleTruthSites,callsAtTruthSites,truthSensitivity
|
||||
95.00,15749,872,3.3207,2.9279,-3.6382,TruthSensitivityTranche0.00to95.00,16578,15749,0.9500
|
||||
97.00,16080,1011,3.2766,2.5979,-4.7439,TruthSensitivityTranche95.00to97.00,16578,16080,0.9700
|
||||
99.00,16412,1281,3.2507,1.9516,-8.4204,TruthSensitivityTranche97.00to99.00,16578,16412,0.9900
|
||||
99.90,16561,1573,3.2334,1.6043,-142.4519,TruthSensitivityTranche99.0to99.90,16578,16561,0.9990
|
||||
|
|
|
|||
Loading…
Reference in New Issue