Merge remote-tracking branch 'Laurent/master'

This commit is contained in:
Eric Banks 2011-11-16 09:34:47 -05:00
commit c2ebe58712
5 changed files with 968 additions and 215 deletions

View File

@ -3,6 +3,7 @@ package org.broadinstitute.sting.gatk.samples;
import org.broadinstitute.sting.utils.exceptions.UserException;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.Map;
@ -110,6 +111,17 @@ public class Sample implements Comparable<Sample> { // implements java.io.Serial
return infoDB.getSample(paternalID);
}
public ArrayList<Sample> getParents(){
ArrayList<Sample> parents = new ArrayList<Sample>(2);
Sample parent = getMother();
if(parent != null)
parents.add(parent);
parent = getFather();
if(parent != null)
parents.add(parent);
return parents;
}
/**
* Get gender of the sample
* @return property of key "gender" - must be of type Gender

View File

@ -25,7 +25,13 @@
package org.broadinstitute.sting.utils.variantcontext;
import org.broad.tribble.TribbleException;
import org.broadinstitute.sting.gatk.io.DirectOutputTracker;
import org.broadinstitute.sting.utils.MathUtils;
import org.broadinstitute.sting.utils.codecs.vcf.VCFConstants;
import org.jgrapht.util.MathUtil;
import java.util.EnumMap;
import java.util.Map;
public class GenotypeLikelihoods {
public static final boolean CAP_PLS = false;
@ -94,6 +100,48 @@ public class GenotypeLikelihoods {
return likelihoodsAsString_PLs;
}
//Return genotype likelihoods as an EnumMap with Genotypes as keys and likelihoods as values
//Returns null in case of missing likelihoods
public EnumMap<Genotype.Type,Double> getAsMap(boolean normalizeFromLog10){
//Make sure that the log10likelihoods are set
double[] likelihoods = normalizeFromLog10 ? MathUtils.normalizeFromLog10(getAsVector()) : getAsVector();
if(likelihoods == null)
return null;
EnumMap<Genotype.Type,Double> likelihoodsMap = new EnumMap<Genotype.Type, Double>(Genotype.Type.class);
likelihoodsMap.put(Genotype.Type.HOM_REF,likelihoods[Genotype.Type.HOM_REF.ordinal()-1]);
likelihoodsMap.put(Genotype.Type.HET,likelihoods[Genotype.Type.HET.ordinal()-1]);
likelihoodsMap.put(Genotype.Type.HOM_VAR, likelihoods[Genotype.Type.HOM_VAR.ordinal() - 1]);
return likelihoodsMap;
}
//Return the neg log10 Genotype Quality (GQ) for the given genotype
//Returns Double.NEGATIVE_INFINITY in case of missing genotype
public double getNegLog10GQ(Genotype.Type genotype){
double qual = Double.NEGATIVE_INFINITY;
EnumMap<Genotype.Type,Double> likelihoods = getAsMap(false);
if(likelihoods == null)
return qual;
for(Map.Entry<Genotype.Type,Double> likelihood : likelihoods.entrySet()){
if(likelihood.getKey() == genotype)
continue;
if(likelihood.getValue() > qual)
qual = likelihood.getValue();
}
//Quality of the most likely genotype = likelihood(most likely) - likelihood (2nd best)
qual = likelihoods.get(genotype) - qual;
//Quality of other genotypes 1-P(G)
if (qual < 0) {
double[] normalized = MathUtils.normalizeFromLog10(getAsVector());
double chosenGenotype = normalized[genotype.ordinal()-1];
qual = -1.0 * Math.log10(1.0 - chosenGenotype);
}
return qual;
}
private final static double[] parsePLsIntoLikelihoods(String likelihoodsAsString_PLs) {
if ( !likelihoodsAsString_PLs.equals(VCFConstants.MISSING_VALUE_v4) ) {
String[] strings = likelihoodsAsString_PLs.split(",");

View File

@ -6,23 +6,131 @@ import org.testng.annotations.Test;
import java.util.Arrays;
public class PhaseByTransmissionIntegrationTest extends WalkerTest {
private static String phaseByTransmissionTestDataRoot = validationDataLocation + "/PhaseByTransmission";
private static String fundamentalTestVCF = phaseByTransmissionTestDataRoot + "/" + "FundamentalsTest.unfiltered.vcf";
private static String phaseByTransmissionTestDataRoot = validationDataLocation + "PhaseByTransmission/";
private static String goodFamilyFile = phaseByTransmissionTestDataRoot + "PhaseByTransmission.IntegrationTest.goodFamilies.ped";
private static String TNTest = phaseByTransmissionTestDataRoot + "PhaseByTransmission.IntegrationTest.TN.vcf";
private static String TPTest = phaseByTransmissionTestDataRoot + "PhaseByTransmission.IntegrationTest.TP.vcf";
private static String FPTest = phaseByTransmissionTestDataRoot + "PhaseByTransmission.IntegrationTest.FP.vcf";
private static String SpecialTest = phaseByTransmissionTestDataRoot + "PhaseByTransmission.IntegrationTest.Special.vcf";
//Tests using PbT on all genotypes with default parameters
//And all reporting options
@Test
public void testBasicFunctionality() {
public void testTrueNegativeMV() {
WalkerTestSpec spec = new WalkerTestSpec(
buildCommandLine(
"-T PhaseByTransmission",
"-NO_HEADER",
"-R " + b37KGReference,
"--variant " + fundamentalTestVCF,
"-f NA12892+NA12891=NA12878",
"--variant " + TNTest,
"-ped "+ goodFamilyFile,
"-L 1:10109-10315",
"-mvf %s",
"-o %s"
),
2,
Arrays.asList("16fefda693156eadf1481fd9de23facb","9418a7a6405b78179ca13a67b8bfcc14")
);
executeTest("testTrueNegativeMV", spec);
}
@Test
public void testTruePositiveMV() {
WalkerTestSpec spec = new WalkerTestSpec(
buildCommandLine(
"-T PhaseByTransmission",
"-NO_HEADER",
"-R " + b37KGReference,
"--variant " + TPTest,
"-ped "+ goodFamilyFile,
"-L 1:10109-10315",
"-mvf %s",
"-o %s"
),
2,
Arrays.asList("14cf1d21a54d8b9fb506df178b634c56","efc66ae3d036715b721f9bd35b65d556")
);
executeTest("testTruePositiveMV", spec);
}
@Test
public void testFalsePositiveMV() {
WalkerTestSpec spec = new WalkerTestSpec(
buildCommandLine(
"-T PhaseByTransmission",
"-NO_HEADER",
"-R " + b37KGReference,
"--variant " + FPTest,
"-ped "+ goodFamilyFile,
"-L 1:10109-10315",
"-mvf %s",
"-o %s"
),
2,
Arrays.asList("f9b0fae9fe1e0f09b883a292b0e70a12","398724bc1e65314cc5ee92706e05a3ee")
);
executeTest("testFalsePositiveMV", spec);
}
@Test
public void testSpecialCases() {
WalkerTestSpec spec = new WalkerTestSpec(
buildCommandLine(
"-T PhaseByTransmission",
"-NO_HEADER",
"-R " + b37KGReference,
"--variant " + SpecialTest,
"-ped "+ goodFamilyFile,
"-L 1:10109-10315",
"-mvf %s",
"-o %s"
),
2,
Arrays.asList("b8d1aa3789ce77b45430c62d13ee3006","a1a333e08fafb288cda0e7711909e1c3")
);
executeTest("testSpecialCases", spec);
}
//Test using a different prior
//Here the FP file is used but as the prior is lowered, 3 turn to TP
@Test
public void testPriorOption() {
WalkerTestSpec spec = new WalkerTestSpec(
buildCommandLine(
"-T PhaseByTransmission",
"-NO_HEADER",
"-R " + b37KGReference,
"--variant " + FPTest,
"-ped "+ goodFamilyFile,
"-L 1:10109-10315",
"-prior 1e-4",
"-mvf %s",
"-o %s"
),
2,
Arrays.asList("7201ce7cc47db5840ac6b647709f7c33","c11b5e7cd7459d90d0160f917eff3b1e")
);
executeTest("testPriorOption", spec);
}
//Test when running without MV reporting option
//This is the exact same test file as FP but should not generate a .mvf file
@Test
public void testMVFileOption() {
WalkerTestSpec spec = new WalkerTestSpec(
buildCommandLine(
"-T PhaseByTransmission",
"-NO_HEADER",
"-R " + b37KGReference,
"--variant " + FPTest,
"-ped "+ goodFamilyFile,
"-L 1:10109-10315",
"-o %s"
),
1,
Arrays.asList("")
Arrays.asList("398724bc1e65314cc5ee92706e05a3ee")
);
executeTest("testBasicFunctionality", spec);
executeTest("testMVFileOption", spec);
}
}

View File

@ -29,10 +29,13 @@ package org.broadinstitute.sting.utils.variantcontext;
// the imports for unit testing.
import org.broadinstitute.sting.utils.MathUtils;
import org.testng.Assert;
import org.testng.annotations.Test;
import org.broadinstitute.sting.utils.codecs.vcf.VCFConstants;
import java.util.EnumMap;
/**
* Basic unit test for Genotype likelihoods objects
@ -69,6 +72,50 @@ public class GenotypeLikelihoodsUnitTest {
gl.getAsVector();
}
@Test
public void testGetAsMap(){
GenotypeLikelihoods gl = new GenotypeLikelihoods(v);
//Log scale
EnumMap<Genotype.Type,Double> glMap = gl.getAsMap(false);
Assert.assertEquals(v[Genotype.Type.HOM_REF.ordinal()-1],glMap.get(Genotype.Type.HOM_REF));
Assert.assertEquals(v[Genotype.Type.HET.ordinal()-1],glMap.get(Genotype.Type.HET));
Assert.assertEquals(v[Genotype.Type.HOM_VAR.ordinal()-1],glMap.get(Genotype.Type.HOM_VAR));
//Linear scale
glMap = gl.getAsMap(true);
double [] vl = MathUtils.normalizeFromLog10(v);
Assert.assertEquals(vl[Genotype.Type.HOM_REF.ordinal()-1],glMap.get(Genotype.Type.HOM_REF));
Assert.assertEquals(vl[Genotype.Type.HET.ordinal()-1],glMap.get(Genotype.Type.HET));
Assert.assertEquals(vl[Genotype.Type.HOM_VAR.ordinal()-1],glMap.get(Genotype.Type.HOM_VAR));
//Test missing likelihoods
gl = new GenotypeLikelihoods(".");
glMap = gl.getAsMap(false);
Assert.assertNull(glMap);
}
@Test
public void testGetNegLog10GQ(){
GenotypeLikelihoods gl = new GenotypeLikelihoods(vPLString);
//GQ for the best guess genotype
Assert.assertEquals(gl.getNegLog10GQ(Genotype.Type.HET),3.9);
double[] test = MathUtils.normalizeFromLog10(gl.getAsVector());
//GQ for the other genotypes
Assert.assertEquals(gl.getNegLog10GQ(Genotype.Type.HOM_REF), -1.0 * Math.log10(1.0 - test[Genotype.Type.HOM_REF.ordinal()-1]));
Assert.assertEquals(gl.getNegLog10GQ(Genotype.Type.HOM_VAR), -1.0 * Math.log10(1.0 - test[Genotype.Type.HOM_VAR.ordinal()-1]));
//Test missing likelihoods
gl = new GenotypeLikelihoods(".");
Assert.assertEquals(gl.getNegLog10GQ(Genotype.Type.HOM_REF),Double.NEGATIVE_INFINITY);
Assert.assertEquals(gl.getNegLog10GQ(Genotype.Type.HET),Double.NEGATIVE_INFINITY);
Assert.assertEquals(gl.getNegLog10GQ(Genotype.Type.HOM_VAR),Double.NEGATIVE_INFINITY);
}
private void assertDoubleArraysAreEqual(double[] v1, double[] v2) {
Assert.assertEquals(v1.length, v2.length);
for ( int i = 0; i < v1.length; i++ ) {