Merge branch 'PhaseByTransmission'
This commit is contained in:
commit
1347beef40
|
|
@ -3,6 +3,7 @@ package org.broadinstitute.sting.gatk.samples;
|
||||||
|
|
||||||
import org.broadinstitute.sting.utils.exceptions.UserException;
|
import org.broadinstitute.sting.utils.exceptions.UserException;
|
||||||
|
|
||||||
|
import java.util.ArrayList;
|
||||||
import java.util.HashMap;
|
import java.util.HashMap;
|
||||||
import java.util.Map;
|
import java.util.Map;
|
||||||
|
|
||||||
|
|
@ -110,6 +111,17 @@ public class Sample implements Comparable<Sample> { // implements java.io.Serial
|
||||||
return infoDB.getSample(paternalID);
|
return infoDB.getSample(paternalID);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
public ArrayList<Sample> getParents(){
|
||||||
|
ArrayList<Sample> parents = new ArrayList<Sample>(2);
|
||||||
|
Sample parent = getMother();
|
||||||
|
if(parent != null)
|
||||||
|
parents.add(parent);
|
||||||
|
parent = getFather();
|
||||||
|
if(parent != null)
|
||||||
|
parents.add(parent);
|
||||||
|
return parents;
|
||||||
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Get gender of the sample
|
* Get gender of the sample
|
||||||
* @return property of key "gender" - must be of type Gender
|
* @return property of key "gender" - must be of type Gender
|
||||||
|
|
|
||||||
File diff suppressed because it is too large
Load Diff
|
|
@ -25,7 +25,13 @@
|
||||||
package org.broadinstitute.sting.utils.variantcontext;
|
package org.broadinstitute.sting.utils.variantcontext;
|
||||||
|
|
||||||
import org.broad.tribble.TribbleException;
|
import org.broad.tribble.TribbleException;
|
||||||
|
import org.broadinstitute.sting.gatk.io.DirectOutputTracker;
|
||||||
|
import org.broadinstitute.sting.utils.MathUtils;
|
||||||
import org.broadinstitute.sting.utils.codecs.vcf.VCFConstants;
|
import org.broadinstitute.sting.utils.codecs.vcf.VCFConstants;
|
||||||
|
import org.jgrapht.util.MathUtil;
|
||||||
|
|
||||||
|
import java.util.EnumMap;
|
||||||
|
import java.util.Map;
|
||||||
|
|
||||||
public class GenotypeLikelihoods {
|
public class GenotypeLikelihoods {
|
||||||
public static final boolean CAP_PLS = false;
|
public static final boolean CAP_PLS = false;
|
||||||
|
|
@ -94,6 +100,48 @@ public class GenotypeLikelihoods {
|
||||||
return likelihoodsAsString_PLs;
|
return likelihoodsAsString_PLs;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
//Return genotype likelihoods as an EnumMap with Genotypes as keys and likelihoods as values
|
||||||
|
//Returns null in case of missing likelihoods
|
||||||
|
public EnumMap<Genotype.Type,Double> getAsMap(boolean normalizeFromLog10){
|
||||||
|
//Make sure that the log10likelihoods are set
|
||||||
|
double[] likelihoods = normalizeFromLog10 ? MathUtils.normalizeFromLog10(getAsVector()) : getAsVector();
|
||||||
|
if(likelihoods == null)
|
||||||
|
return null;
|
||||||
|
EnumMap<Genotype.Type,Double> likelihoodsMap = new EnumMap<Genotype.Type, Double>(Genotype.Type.class);
|
||||||
|
likelihoodsMap.put(Genotype.Type.HOM_REF,likelihoods[Genotype.Type.HOM_REF.ordinal()-1]);
|
||||||
|
likelihoodsMap.put(Genotype.Type.HET,likelihoods[Genotype.Type.HET.ordinal()-1]);
|
||||||
|
likelihoodsMap.put(Genotype.Type.HOM_VAR, likelihoods[Genotype.Type.HOM_VAR.ordinal() - 1]);
|
||||||
|
return likelihoodsMap;
|
||||||
|
}
|
||||||
|
|
||||||
|
//Return the neg log10 Genotype Quality (GQ) for the given genotype
|
||||||
|
//Returns Double.NEGATIVE_INFINITY in case of missing genotype
|
||||||
|
public double getNegLog10GQ(Genotype.Type genotype){
|
||||||
|
|
||||||
|
double qual = Double.NEGATIVE_INFINITY;
|
||||||
|
EnumMap<Genotype.Type,Double> likelihoods = getAsMap(false);
|
||||||
|
if(likelihoods == null)
|
||||||
|
return qual;
|
||||||
|
for(Map.Entry<Genotype.Type,Double> likelihood : likelihoods.entrySet()){
|
||||||
|
if(likelihood.getKey() == genotype)
|
||||||
|
continue;
|
||||||
|
if(likelihood.getValue() > qual)
|
||||||
|
qual = likelihood.getValue();
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
//Quality of the most likely genotype = likelihood(most likely) - likelihood (2nd best)
|
||||||
|
qual = likelihoods.get(genotype) - qual;
|
||||||
|
|
||||||
|
//Quality of other genotypes 1-P(G)
|
||||||
|
if (qual < 0) {
|
||||||
|
double[] normalized = MathUtils.normalizeFromLog10(getAsVector());
|
||||||
|
double chosenGenotype = normalized[genotype.ordinal()-1];
|
||||||
|
qual = -1.0 * Math.log10(1.0 - chosenGenotype);
|
||||||
|
}
|
||||||
|
return qual;
|
||||||
|
}
|
||||||
|
|
||||||
private final static double[] parsePLsIntoLikelihoods(String likelihoodsAsString_PLs) {
|
private final static double[] parsePLsIntoLikelihoods(String likelihoodsAsString_PLs) {
|
||||||
if ( !likelihoodsAsString_PLs.equals(VCFConstants.MISSING_VALUE_v4) ) {
|
if ( !likelihoodsAsString_PLs.equals(VCFConstants.MISSING_VALUE_v4) ) {
|
||||||
String[] strings = likelihoodsAsString_PLs.split(",");
|
String[] strings = likelihoodsAsString_PLs.split(",");
|
||||||
|
|
|
||||||
|
|
@ -6,23 +6,131 @@ import org.testng.annotations.Test;
|
||||||
import java.util.Arrays;
|
import java.util.Arrays;
|
||||||
|
|
||||||
public class PhaseByTransmissionIntegrationTest extends WalkerTest {
|
public class PhaseByTransmissionIntegrationTest extends WalkerTest {
|
||||||
private static String phaseByTransmissionTestDataRoot = validationDataLocation + "/PhaseByTransmission";
|
private static String phaseByTransmissionTestDataRoot = validationDataLocation + "PhaseByTransmission/";
|
||||||
private static String fundamentalTestVCF = phaseByTransmissionTestDataRoot + "/" + "FundamentalsTest.unfiltered.vcf";
|
private static String goodFamilyFile = phaseByTransmissionTestDataRoot + "PhaseByTransmission.IntegrationTest.goodFamilies.ped";
|
||||||
|
private static String TNTest = phaseByTransmissionTestDataRoot + "PhaseByTransmission.IntegrationTest.TN.vcf";
|
||||||
|
private static String TPTest = phaseByTransmissionTestDataRoot + "PhaseByTransmission.IntegrationTest.TP.vcf";
|
||||||
|
private static String FPTest = phaseByTransmissionTestDataRoot + "PhaseByTransmission.IntegrationTest.FP.vcf";
|
||||||
|
private static String SpecialTest = phaseByTransmissionTestDataRoot + "PhaseByTransmission.IntegrationTest.Special.vcf";
|
||||||
|
|
||||||
|
//Tests using PbT on all genotypes with default parameters
|
||||||
|
//And all reporting options
|
||||||
@Test
|
@Test
|
||||||
public void testBasicFunctionality() {
|
public void testTrueNegativeMV() {
|
||||||
WalkerTestSpec spec = new WalkerTestSpec(
|
WalkerTestSpec spec = new WalkerTestSpec(
|
||||||
buildCommandLine(
|
buildCommandLine(
|
||||||
"-T PhaseByTransmission",
|
"-T PhaseByTransmission",
|
||||||
"-NO_HEADER",
|
"-NO_HEADER",
|
||||||
"-R " + b37KGReference,
|
"-R " + b37KGReference,
|
||||||
"--variant " + fundamentalTestVCF,
|
"--variant " + TNTest,
|
||||||
"-f NA12892+NA12891=NA12878",
|
"-ped "+ goodFamilyFile,
|
||||||
|
"-L 1:10109-10315",
|
||||||
|
"-mvf %s",
|
||||||
|
"-o %s"
|
||||||
|
),
|
||||||
|
2,
|
||||||
|
Arrays.asList("16fefda693156eadf1481fd9de23facb","9418a7a6405b78179ca13a67b8bfcc14")
|
||||||
|
);
|
||||||
|
executeTest("testTrueNegativeMV", spec);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void testTruePositiveMV() {
|
||||||
|
WalkerTestSpec spec = new WalkerTestSpec(
|
||||||
|
buildCommandLine(
|
||||||
|
"-T PhaseByTransmission",
|
||||||
|
"-NO_HEADER",
|
||||||
|
"-R " + b37KGReference,
|
||||||
|
"--variant " + TPTest,
|
||||||
|
"-ped "+ goodFamilyFile,
|
||||||
|
"-L 1:10109-10315",
|
||||||
|
"-mvf %s",
|
||||||
|
"-o %s"
|
||||||
|
),
|
||||||
|
2,
|
||||||
|
Arrays.asList("14cf1d21a54d8b9fb506df178b634c56","efc66ae3d036715b721f9bd35b65d556")
|
||||||
|
);
|
||||||
|
executeTest("testTruePositiveMV", spec);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void testFalsePositiveMV() {
|
||||||
|
WalkerTestSpec spec = new WalkerTestSpec(
|
||||||
|
buildCommandLine(
|
||||||
|
"-T PhaseByTransmission",
|
||||||
|
"-NO_HEADER",
|
||||||
|
"-R " + b37KGReference,
|
||||||
|
"--variant " + FPTest,
|
||||||
|
"-ped "+ goodFamilyFile,
|
||||||
|
"-L 1:10109-10315",
|
||||||
|
"-mvf %s",
|
||||||
|
"-o %s"
|
||||||
|
),
|
||||||
|
2,
|
||||||
|
Arrays.asList("f9b0fae9fe1e0f09b883a292b0e70a12","398724bc1e65314cc5ee92706e05a3ee")
|
||||||
|
);
|
||||||
|
executeTest("testFalsePositiveMV", spec);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void testSpecialCases() {
|
||||||
|
WalkerTestSpec spec = new WalkerTestSpec(
|
||||||
|
buildCommandLine(
|
||||||
|
"-T PhaseByTransmission",
|
||||||
|
"-NO_HEADER",
|
||||||
|
"-R " + b37KGReference,
|
||||||
|
"--variant " + SpecialTest,
|
||||||
|
"-ped "+ goodFamilyFile,
|
||||||
|
"-L 1:10109-10315",
|
||||||
|
"-mvf %s",
|
||||||
|
"-o %s"
|
||||||
|
),
|
||||||
|
2,
|
||||||
|
Arrays.asList("b8d1aa3789ce77b45430c62d13ee3006","a1a333e08fafb288cda0e7711909e1c3")
|
||||||
|
);
|
||||||
|
executeTest("testSpecialCases", spec);
|
||||||
|
}
|
||||||
|
|
||||||
|
//Test using a different prior
|
||||||
|
//Here the FP file is used but as the prior is lowered, 3 turn to TP
|
||||||
|
@Test
|
||||||
|
public void testPriorOption() {
|
||||||
|
WalkerTestSpec spec = new WalkerTestSpec(
|
||||||
|
buildCommandLine(
|
||||||
|
"-T PhaseByTransmission",
|
||||||
|
"-NO_HEADER",
|
||||||
|
"-R " + b37KGReference,
|
||||||
|
"--variant " + FPTest,
|
||||||
|
"-ped "+ goodFamilyFile,
|
||||||
|
"-L 1:10109-10315",
|
||||||
|
"-prior 1e-4",
|
||||||
|
"-mvf %s",
|
||||||
|
"-o %s"
|
||||||
|
),
|
||||||
|
2,
|
||||||
|
Arrays.asList("7201ce7cc47db5840ac6b647709f7c33","c11b5e7cd7459d90d0160f917eff3b1e")
|
||||||
|
);
|
||||||
|
executeTest("testPriorOption", spec);
|
||||||
|
}
|
||||||
|
|
||||||
|
//Test when running without MV reporting option
|
||||||
|
//This is the exact same test file as FP but should not generate a .mvf file
|
||||||
|
@Test
|
||||||
|
public void testMVFileOption() {
|
||||||
|
WalkerTestSpec spec = new WalkerTestSpec(
|
||||||
|
buildCommandLine(
|
||||||
|
"-T PhaseByTransmission",
|
||||||
|
"-NO_HEADER",
|
||||||
|
"-R " + b37KGReference,
|
||||||
|
"--variant " + FPTest,
|
||||||
|
"-ped "+ goodFamilyFile,
|
||||||
|
"-L 1:10109-10315",
|
||||||
"-o %s"
|
"-o %s"
|
||||||
),
|
),
|
||||||
1,
|
1,
|
||||||
Arrays.asList("")
|
Arrays.asList("398724bc1e65314cc5ee92706e05a3ee")
|
||||||
);
|
);
|
||||||
executeTest("testBasicFunctionality", spec);
|
executeTest("testMVFileOption", spec);
|
||||||
}
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -29,10 +29,13 @@ package org.broadinstitute.sting.utils.variantcontext;
|
||||||
// the imports for unit testing.
|
// the imports for unit testing.
|
||||||
|
|
||||||
|
|
||||||
|
import org.broadinstitute.sting.utils.MathUtils;
|
||||||
import org.testng.Assert;
|
import org.testng.Assert;
|
||||||
import org.testng.annotations.Test;
|
import org.testng.annotations.Test;
|
||||||
import org.broadinstitute.sting.utils.codecs.vcf.VCFConstants;
|
import org.broadinstitute.sting.utils.codecs.vcf.VCFConstants;
|
||||||
|
|
||||||
|
import java.util.EnumMap;
|
||||||
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Basic unit test for Genotype likelihoods objects
|
* Basic unit test for Genotype likelihoods objects
|
||||||
|
|
@ -69,6 +72,50 @@ public class GenotypeLikelihoodsUnitTest {
|
||||||
gl.getAsVector();
|
gl.getAsVector();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void testGetAsMap(){
|
||||||
|
GenotypeLikelihoods gl = new GenotypeLikelihoods(v);
|
||||||
|
//Log scale
|
||||||
|
EnumMap<Genotype.Type,Double> glMap = gl.getAsMap(false);
|
||||||
|
Assert.assertEquals(v[Genotype.Type.HOM_REF.ordinal()-1],glMap.get(Genotype.Type.HOM_REF));
|
||||||
|
Assert.assertEquals(v[Genotype.Type.HET.ordinal()-1],glMap.get(Genotype.Type.HET));
|
||||||
|
Assert.assertEquals(v[Genotype.Type.HOM_VAR.ordinal()-1],glMap.get(Genotype.Type.HOM_VAR));
|
||||||
|
|
||||||
|
//Linear scale
|
||||||
|
glMap = gl.getAsMap(true);
|
||||||
|
double [] vl = MathUtils.normalizeFromLog10(v);
|
||||||
|
Assert.assertEquals(vl[Genotype.Type.HOM_REF.ordinal()-1],glMap.get(Genotype.Type.HOM_REF));
|
||||||
|
Assert.assertEquals(vl[Genotype.Type.HET.ordinal()-1],glMap.get(Genotype.Type.HET));
|
||||||
|
Assert.assertEquals(vl[Genotype.Type.HOM_VAR.ordinal()-1],glMap.get(Genotype.Type.HOM_VAR));
|
||||||
|
|
||||||
|
//Test missing likelihoods
|
||||||
|
gl = new GenotypeLikelihoods(".");
|
||||||
|
glMap = gl.getAsMap(false);
|
||||||
|
Assert.assertNull(glMap);
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void testGetNegLog10GQ(){
|
||||||
|
GenotypeLikelihoods gl = new GenotypeLikelihoods(vPLString);
|
||||||
|
|
||||||
|
//GQ for the best guess genotype
|
||||||
|
Assert.assertEquals(gl.getNegLog10GQ(Genotype.Type.HET),3.9);
|
||||||
|
|
||||||
|
double[] test = MathUtils.normalizeFromLog10(gl.getAsVector());
|
||||||
|
|
||||||
|
//GQ for the other genotypes
|
||||||
|
Assert.assertEquals(gl.getNegLog10GQ(Genotype.Type.HOM_REF), -1.0 * Math.log10(1.0 - test[Genotype.Type.HOM_REF.ordinal()-1]));
|
||||||
|
Assert.assertEquals(gl.getNegLog10GQ(Genotype.Type.HOM_VAR), -1.0 * Math.log10(1.0 - test[Genotype.Type.HOM_VAR.ordinal()-1]));
|
||||||
|
|
||||||
|
//Test missing likelihoods
|
||||||
|
gl = new GenotypeLikelihoods(".");
|
||||||
|
Assert.assertEquals(gl.getNegLog10GQ(Genotype.Type.HOM_REF),Double.NEGATIVE_INFINITY);
|
||||||
|
Assert.assertEquals(gl.getNegLog10GQ(Genotype.Type.HET),Double.NEGATIVE_INFINITY);
|
||||||
|
Assert.assertEquals(gl.getNegLog10GQ(Genotype.Type.HOM_VAR),Double.NEGATIVE_INFINITY);
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
private void assertDoubleArraysAreEqual(double[] v1, double[] v2) {
|
private void assertDoubleArraysAreEqual(double[] v1, double[] v2) {
|
||||||
Assert.assertEquals(v1.length, v2.length);
|
Assert.assertEquals(v1.length, v2.length);
|
||||||
for ( int i = 0; i < v1.length; i++ ) {
|
for ( int i = 0; i < v1.length; i++ ) {
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue