Continuing cleanup of SSG. GenotypeLikelihoods now have extensive testing routines. DiploidGenotype supports het, homref, etc calculations. SSG has been cleaned up to remove old garbage functionality. Also now supports output to standard output by simply omitting varout
git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@1387 348d0f76-0448-11de-a6fe-93d51630548a
This commit is contained in:
parent
d60d5aa516
commit
bbd7bec5db
|
|
@ -1,5 +1,7 @@
|
|||
package org.broadinstitute.sting.gatk.walkers.genotyper;
|
||||
|
||||
import org.broadinstitute.sting.utils.Utils;
|
||||
|
||||
/**
|
||||
* Created by IntelliJ IDEA.
|
||||
* User: depristo
|
||||
|
|
@ -17,5 +19,31 @@ public enum DiploidGenotype {
|
|||
CT,
|
||||
GG,
|
||||
GT,
|
||||
TT
|
||||
}
|
||||
TT;
|
||||
|
||||
public boolean isHomRef(char r) {
|
||||
return isHom() && r == this.toString().charAt(0);
|
||||
}
|
||||
|
||||
public boolean isHomVar(char r) {
|
||||
return isHom() && r != this.toString().charAt(0);
|
||||
}
|
||||
|
||||
public boolean isHetRef(char r) {
|
||||
return Utils.countOccurrences(r, this.toString()) == 1;
|
||||
}
|
||||
|
||||
public boolean isHom() {
|
||||
return ! isHet();
|
||||
}
|
||||
|
||||
public boolean isHet() {
|
||||
switch (this) {
|
||||
case AA:
|
||||
case CC:
|
||||
case GG:
|
||||
case TT: return false;
|
||||
default: return true;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
@ -18,7 +18,7 @@ public abstract class GenotypeLikelihoods {
|
|||
protected static final double[] oneMinusData = new double[Byte.MAX_VALUE];
|
||||
protected static final double[] oneHalfMinusDataArachne = new double[Byte.MAX_VALUE];
|
||||
protected static final double[] oneHalfMinusData3Base = new double[Byte.MAX_VALUE];
|
||||
protected static final double log10Of1_3 = log10(1.0 / 3);
|
||||
protected static final double log10Of1_3 = log10(1.0 / 3.0);
|
||||
|
||||
static {
|
||||
for (int qual = 0; qual < Byte.MAX_VALUE; qual++) {
|
||||
|
|
@ -43,15 +43,46 @@ public abstract class GenotypeLikelihoods {
|
|||
* @param h the heterozygosity [probability of a base being heterozygous]
|
||||
*/
|
||||
public static double[] heterozygosity2DiploidProbabilities(double h) {
|
||||
if (MathUtils.isNegativeOrZero(h)) {
|
||||
double[] pdbls = new double[3];
|
||||
|
||||
pdbls[0] = heterozygosity2HomRefProbability(h);
|
||||
pdbls[1] = heterozygosity2HetProbability(h);
|
||||
pdbls[2] = heterozygosity2HomVarProbability(h);
|
||||
return pdbls;
|
||||
}
|
||||
|
||||
/**
|
||||
*
|
||||
* @param h
|
||||
* @return
|
||||
*/
|
||||
public static double heterozygosity2HomRefProbability(double h) {
|
||||
if (MathUtils.isNegative(h)) {
|
||||
throw new RuntimeException(String.format("Heterozygous value is bad %f", h));
|
||||
}
|
||||
|
||||
double[] pdbls = new double[3];
|
||||
pdbls[0] = 1.0 - (3.0 * h / 2.0);
|
||||
pdbls[1] = h;
|
||||
pdbls[2] = h / 2.0;
|
||||
return pdbls;
|
||||
double v = 1.0 - (3.0 * h / 2.0);
|
||||
if (MathUtils.isNegative(v)) {
|
||||
throw new RuntimeException(String.format("Heterozygous value is bad %f", h));
|
||||
}
|
||||
|
||||
return v;
|
||||
}
|
||||
|
||||
public static double heterozygosity2HetProbability(double h) {
|
||||
if (MathUtils.isNegative(h)) {
|
||||
throw new RuntimeException(String.format("Heterozygous value is bad %f", h));
|
||||
}
|
||||
|
||||
return h;
|
||||
}
|
||||
|
||||
public static double heterozygosity2HomVarProbability(double h) {
|
||||
if (MathUtils.isNegative(h)) {
|
||||
throw new RuntimeException(String.format("Heterozygous value is bad %f", h));
|
||||
}
|
||||
|
||||
return h / 2.0;
|
||||
}
|
||||
|
||||
/**
|
||||
|
|
|
|||
|
|
@ -212,7 +212,7 @@ public class NewHotnessGenotypeLikelihoods extends GenotypeLikelihoods {
|
|||
|
||||
coverage++;
|
||||
for (DiploidGenotype g : DiploidGenotype.values() ) {
|
||||
double likelihood = calculateBaseLikelihood(observedBase, g.toString(), qualityScore);
|
||||
double likelihood = calculateBaseLikelihood(observedBase, g, qualityScore);
|
||||
//System.out.printf("Likelihood is %f for %c %d %s%n", likelihood, read, qual, g.toString());
|
||||
likelihoods[g.ordinal()] += likelihood;
|
||||
posteriors[g.ordinal()] += likelihood;
|
||||
|
|
@ -262,21 +262,17 @@ public class NewHotnessGenotypeLikelihoods extends GenotypeLikelihoods {
|
|||
return add(pileup, false);
|
||||
}
|
||||
|
||||
|
||||
private double calculateBaseLikelihood(char read, String genotype, byte qual) {
|
||||
private double calculateBaseLikelihood(char observedBase, DiploidGenotype g, byte qual) {
|
||||
if (qual == 0) { // zero quals are wrong
|
||||
throw new RuntimeException(String.format("Unexpected Q0 base discovered in calculateAlleleLikelihood: %c %c %d", read, qual));
|
||||
throw new RuntimeException(String.format("Unexpected Q0 base discovered in calculateAlleleLikelihood: %c %s %d", observedBase, g, qual));
|
||||
}
|
||||
|
||||
char h1 = genotype.charAt(0);
|
||||
char h2 = genotype.charAt(1);
|
||||
double p_base = 0.0;
|
||||
|
||||
double p_base;
|
||||
|
||||
if ((h1 == h2) && (h1 == read)) {
|
||||
if ( g.isHomRef(observedBase) ) {
|
||||
// hom
|
||||
p_base = getOneMinusQual(qual);
|
||||
} else if ( (h1 != h2) && ((h1 == read) || (h2 == read)) ) {
|
||||
} else if ( g.isHetRef(observedBase) ) {
|
||||
// het
|
||||
p_base = getOneHalfMinusQual(qual);
|
||||
} else {
|
||||
|
|
|
|||
|
|
@ -20,13 +20,11 @@ import java.io.File;
|
|||
import java.util.List;
|
||||
import java.util.ArrayList;
|
||||
|
||||
import net.sf.samtools.SAMRecord;
|
||||
|
||||
@ReadFilters(ZeroMappingQualityReadFilter.class)
|
||||
public class SingleSampleGenotyper extends LocusWalker<SSGGenotypeCall, GenotypeWriter> {
|
||||
// Control output settings
|
||||
@Argument(fullName = "variants_out", shortName = "varout", doc = "File to which variants should be written", required = true)
|
||||
public File VARIANTS_FILE;
|
||||
@Argument(fullName = "variants_out", shortName = "varout", doc = "File to which variants should be written", required = false)
|
||||
public File VARIANTS_FILE = null;
|
||||
|
||||
@Argument(fullName = "variant_output_format", shortName = "vf", doc = "File to which metrics should be written", required = false)
|
||||
public GenotypeWriterFactory.GENOTYPE_FORMAT VAR_FORMAT = GenotypeWriterFactory.GENOTYPE_FORMAT.GELI;
|
||||
|
|
@ -38,37 +36,17 @@ public class SingleSampleGenotyper extends LocusWalker<SSGGenotypeCall, Genotype
|
|||
@Argument(fullName = "genotype", shortName = "genotype", doc = "Should we output confidient genotypes or just the variants?", required = false)
|
||||
public boolean GENOTYPE = false;
|
||||
|
||||
//@Argument(fullName = "3BaseErrors", shortName = "3BaseErrors", doc = "Should we use a 3-base error mode (so that P(b_true != b_called | e) == e / 3?", required = false)
|
||||
// todo -- remove me
|
||||
public boolean THREE_BASE_ERRORS = true;
|
||||
|
||||
public enum Caller {
|
||||
OLD_AND_BUSTED,
|
||||
NEW_HOTNESS
|
||||
}
|
||||
|
||||
@Argument(fullName = "caller", doc = "", required = false)
|
||||
public Caller caller = Caller.NEW_HOTNESS;
|
||||
|
||||
// Control how the genotype hypotheses are weighed
|
||||
@Argument(fullName = "heterozygosity", shortName = "hets", doc = "Heterozygosity value used to compute prior likelihoods for any locus", required = false)
|
||||
public Double heterozygosity = GenotypeLikelihoods.HUMAN_HETEROZYGOSITY;
|
||||
|
||||
// todo -- remove dbSNP awareness until we understand how it should be used
|
||||
//@Argument(fullName = "priors_dbsnp", shortName = "pdbsnp", doc = "Comma-separated prior likelihoods for dbSNP loci (homref,het,homvar)", required = false)
|
||||
public String PRIORS_DBSNP = "0.999,1e-3,1e-5";
|
||||
//public String PRIORS_DBSNP = "0.999,1e-3,1e-5";
|
||||
|
||||
@Argument(fullName = "keepQ0Bases", shortName = "keepQ0Bases", doc = "If true, then Q0 bases will be included in the genotyping calculation, and treated as Q1 -- this is really not a good idea", required = false)
|
||||
public boolean keepQ0Bases = false;
|
||||
|
||||
public double[] plocus;
|
||||
//public double[] phapmap;
|
||||
public double[] pdbsnp;
|
||||
|
||||
public long nFilteredQ0Bases = 0;
|
||||
|
||||
// todo -- remove this functionality
|
||||
private final static boolean TESTING_NEW = false;
|
||||
|
||||
/**
|
||||
* Filter out loci to ignore (at an ambiguous base in the reference or a locus with zero coverage).
|
||||
|
|
@ -83,58 +61,20 @@ public class SingleSampleGenotyper extends LocusWalker<SSGGenotypeCall, Genotype
|
|||
return (BaseUtils.simpleBaseToBaseIndex(ref) != -1 && context.getReads().size() != 0);
|
||||
}
|
||||
|
||||
/**
|
||||
* Convert an array string (value1,value2,...,valueN) to a double array
|
||||
*
|
||||
* @param priorsString the array string of priors
|
||||
*
|
||||
* @return the same array, but each value is now a double
|
||||
*/
|
||||
private double[] priorsArray(String priorsString) {
|
||||
String[] pstrs = priorsString.split(",");
|
||||
double[] pdbls = new double[pstrs.length];
|
||||
|
||||
for (int i = 0; i < pstrs.length; i++) {
|
||||
pdbls[i] = Double.valueOf(pstrs[i]);
|
||||
}
|
||||
|
||||
return pdbls;
|
||||
}
|
||||
|
||||
/** Initialize the walker with some sensible defaults */
|
||||
public void initialize() {
|
||||
plocus = OldAndBustedGenotypeLikelihoods.computePriors(heterozygosity);
|
||||
//phapmap = priorsArray(PRIORS_HAPMAP);
|
||||
pdbsnp = priorsArray(PRIORS_DBSNP);
|
||||
// nothing to do
|
||||
}
|
||||
|
||||
/**
|
||||
* Compute at a given locus.
|
||||
*
|
||||
* @param tracker the meta data tracker
|
||||
* @param ref the reference base
|
||||
* @param refContext the reference base
|
||||
* @param context contextual information around the locus
|
||||
*/
|
||||
public SSGGenotypeCall map(RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context) {
|
||||
if ( TESTING_NEW ) {
|
||||
SSGGenotypeCall oldAndBusted = mapOldAndBusted(tracker, ref.getBase(), context);
|
||||
SSGGenotypeCall newHotness = mapNewHotness(tracker, ref.getBase(), context);
|
||||
|
||||
if ( ! oldAndBusted.equals(newHotness) ) {
|
||||
System.out.printf("Calls not equal:%nold: %s%nnew: %s%n", oldAndBusted, newHotness);
|
||||
}
|
||||
|
||||
return caller == Caller.OLD_AND_BUSTED ? oldAndBusted : newHotness;
|
||||
} else {
|
||||
switch ( caller ) {
|
||||
case OLD_AND_BUSTED: return mapOldAndBusted(tracker, ref.getBase(), context);
|
||||
case NEW_HOTNESS: return mapNewHotness(tracker, ref.getBase(), context);
|
||||
default: return null;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
private SSGGenotypeCall mapNewHotness(RefMetaDataTracker tracker, char ref, AlignmentContext context) {
|
||||
public SSGGenotypeCall map(RefMetaDataTracker tracker, ReferenceContext refContext, AlignmentContext context) {
|
||||
char ref = refContext.getBase();
|
||||
NewHotnessGenotypeLikelihoods G = new NewHotnessGenotypeLikelihoods(ref, GenotypeLikelihoods.HUMAN_HETEROZYGOSITY);
|
||||
G.filterQ0Bases(! keepQ0Bases);
|
||||
ReadBackedPileup pileup = new ReadBackedPileup(ref, context);
|
||||
|
|
@ -146,35 +86,10 @@ public class SingleSampleGenotyper extends LocusWalker<SSGGenotypeCall, Genotype
|
|||
for ( DiploidGenotype g : DiploidGenotype.values() ) {
|
||||
lst.add(new BasicGenotype(pileup.getLocation(), g.toString(),new BayesianConfidenceScore(G.getLikelihood(g))));
|
||||
}
|
||||
|
||||
return new SSGGenotypeCall(! GENOTYPE, ref, 2, lst, G.getPosteriors(), pileup);
|
||||
}
|
||||
|
||||
public SSGGenotypeCall callGenotypes(OldAndBustedGenotypeLikelihoods G, RefMetaDataTracker tracker, char ref, ReadBackedPileup pileup) {
|
||||
// for calculating the rms of the mapping qualities
|
||||
double squared = 0.0;
|
||||
for (int i = 0; i < pileup.getReads().size(); i++) {
|
||||
SAMRecord read = pileup.getReads().get(i);
|
||||
squared += read.getMappingQuality() * read.getMappingQuality();
|
||||
int offset = pileup.getOffsets().get(i);
|
||||
char base = read.getReadString().charAt(offset);
|
||||
byte qual = read.getBaseQualities()[offset];
|
||||
G.add(ref, base, qual);
|
||||
}
|
||||
|
||||
// save off the likelihoods
|
||||
if (G.likelihoods == null || G.likelihoods.length == 0) return null;
|
||||
|
||||
// Apply the two calculations
|
||||
G.applyPrior(ref);
|
||||
|
||||
// lets setup the locus
|
||||
List<Genotype> lst = new ArrayList<Genotype>();
|
||||
for (int x = 0; x < G.likelihoods.length; x++) {
|
||||
lst.add(new BasicGenotype(pileup.getLocation(),OldAndBustedGenotypeLikelihoods.genotypes[x],new BayesianConfidenceScore(G.likelihoods[x])));
|
||||
}
|
||||
return new SSGGenotypeCall(! GENOTYPE,ref,2,lst,G.likelihoods,pileup);
|
||||
}
|
||||
|
||||
/**
|
||||
* Determine whether we're at a Hapmap site
|
||||
*
|
||||
|
|
@ -203,8 +118,10 @@ public class SingleSampleGenotyper extends LocusWalker<SSGGenotypeCall, Genotype
|
|||
* @return an empty string
|
||||
*/
|
||||
public GenotypeWriter reduceInit() {
|
||||
return GenotypeWriterFactory.create(VAR_FORMAT, GenomeAnalysisEngine.instance.getSAMFileHeader(), VARIANTS_FILE);
|
||||
|
||||
if ( VARIANTS_FILE != null )
|
||||
return GenotypeWriterFactory.create(VAR_FORMAT, GenomeAnalysisEngine.instance.getSAMFileHeader(), VARIANTS_FILE);
|
||||
else
|
||||
return GenotypeWriterFactory.create(VAR_FORMAT, GenomeAnalysisEngine.instance.getSAMFileHeader(), out);
|
||||
}
|
||||
|
||||
/**
|
||||
|
|
@ -228,40 +145,8 @@ public class SingleSampleGenotyper extends LocusWalker<SSGGenotypeCall, Genotype
|
|||
|
||||
/** Close the variant file. */
|
||||
public void onTraversalDone(GenotypeWriter sum) {
|
||||
logger.info(String.format("SingleSampleGenotyper filtered %d Q0 bases", nFilteredQ0Bases));
|
||||
sum.close();
|
||||
}
|
||||
|
||||
//
|
||||
// TODO -- delete the old and busted routines
|
||||
//
|
||||
private OldAndBustedGenotypeLikelihoods makeOldAndBustedGenotypeLikelihoods(RefMetaDataTracker tracker) {
|
||||
OldAndBustedGenotypeLikelihoods G = null;
|
||||
|
||||
if (isHapmapSite(tracker)) {
|
||||
G = new OldAndBustedGenotypeLikelihoods(OldAndBustedGenotypeLikelihoods.HUMAN_HETEROZYGOSITY);
|
||||
} else if (isDbSNPSite(tracker)) {
|
||||
G = new OldAndBustedGenotypeLikelihoods(pdbsnp[0], pdbsnp[1], pdbsnp[2]);
|
||||
} else {
|
||||
G = new OldAndBustedGenotypeLikelihoods(plocus[0], plocus[1], plocus[2]);
|
||||
}
|
||||
|
||||
G.filterQ0Bases(! keepQ0Bases);
|
||||
|
||||
return G;
|
||||
}
|
||||
|
||||
//
|
||||
// TODO -- delete the old and busted routines
|
||||
//
|
||||
private SSGGenotypeCall mapOldAndBusted(RefMetaDataTracker tracker, char ref, AlignmentContext context) {
|
||||
ReadBackedPileup pileup = new ReadBackedPileup(ref, context);
|
||||
OldAndBustedGenotypeLikelihoods G = makeOldAndBustedGenotypeLikelihoods(tracker);
|
||||
G.setThreeStateErrors(THREE_BASE_ERRORS);
|
||||
G.setDiscovery(GENOTYPE); // set it to discovery mode or variant detection mode
|
||||
SSGGenotypeCall geno = G.callGenotypes(tracker, ref, pileup);
|
||||
return geno;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
|
|
|
|||
|
|
@ -7,6 +7,7 @@ import org.broadinstitute.sting.utils.genotype.geli.GeliTextWriter;
|
|||
import org.broadinstitute.sting.utils.genotype.glf.GLFWriter;
|
||||
|
||||
import java.io.File;
|
||||
import java.io.PrintStream;
|
||||
|
||||
|
||||
/**
|
||||
|
|
@ -41,4 +42,13 @@ public class GenotypeWriterFactory {
|
|||
throw new StingException("Genotype writer " + format.toString() + " is not implemented");
|
||||
}
|
||||
}
|
||||
|
||||
public static GenotypeWriter create(GENOTYPE_FORMAT format, SAMFileHeader header, PrintStream destination) {
|
||||
switch (format) {
|
||||
case GELI:
|
||||
return new GeliTextWriter(destination);
|
||||
default:
|
||||
throw new StingException("Genotype writer to " + format.toString() + " to standard output is not implemented");
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -8,6 +8,7 @@ import org.broadinstitute.sting.gatk.walkers.genotyper.SSGGenotypeCall;
|
|||
import java.io.File;
|
||||
import java.io.FileNotFoundException;
|
||||
import java.io.PrintWriter;
|
||||
import java.io.PrintStream;
|
||||
|
||||
|
||||
/**
|
||||
|
|
@ -35,7 +36,12 @@ public class GeliTextWriter implements GenotypeWriter {
|
|||
mWriter.println(headerLine);
|
||||
}
|
||||
|
||||
public static String headerLine = "#Sequence Position ReferenceBase NumberOfReads MaxMappingQuality BestGenotype BtrLod BtnbLod AA AC AG AT CC CG CT GG GT TT";
|
||||
public GeliTextWriter(PrintStream out) {
|
||||
mWriter = new PrintWriter(out);
|
||||
mWriter.println(headerLine);
|
||||
}
|
||||
|
||||
public final static String headerLine = "#Sequence Position ReferenceBase NumberOfReads MaxMappingQuality BestGenotype BtrLod BtnbLod AA AC AG AT CC CG CT GG GT TT";
|
||||
|
||||
/**
|
||||
* Add a genotype, given a genotype locus
|
||||
|
|
|
|||
|
|
@ -0,0 +1,138 @@
|
|||
package org.broadinstitute.sting.gatk.walkers.genotyper;
|
||||
|
||||
import org.broadinstitute.sting.utils.Utils;
|
||||
import org.broadinstitute.sting.utils.MathUtils;
|
||||
import org.broadinstitute.sting.BaseTest;
|
||||
import org.broadinstitute.sting.gatk.walkers.recalibration.RecalData;
|
||||
import org.junit.Before;
|
||||
import org.junit.Test;
|
||||
import org.junit.Assert;
|
||||
|
||||
import static java.lang.Math.log10;
|
||||
import static java.lang.Math.pow;
|
||||
|
||||
public class GenotypeLikelihoodsTest extends BaseTest {
|
||||
private final static double DELTA = 1e-8;
|
||||
|
||||
@Test
|
||||
public void testBasic() {
|
||||
logger.warn("Executing testIsBetween");
|
||||
Assert.assertTrue(GenotypeLikelihoods.oneMinusData.length >= Byte.MAX_VALUE);
|
||||
Assert.assertTrue(GenotypeLikelihoods.oneHalfMinusDataArachne.length >= Byte.MAX_VALUE);
|
||||
Assert.assertTrue(GenotypeLikelihoods.oneHalfMinusData3Base.length >= Byte.MAX_VALUE);
|
||||
Assert.assertEquals(GenotypeLikelihoods.log10Of1_3,-0.4771212547196624, DELTA);
|
||||
Assert.assertEquals(GenotypeLikelihoods.HUMAN_HETEROZYGOSITY,1e-3, DELTA);
|
||||
|
||||
for (int qual = 0; qual < Byte.MAX_VALUE; qual++) {
|
||||
double e = pow(10, (qual / -10.0));
|
||||
Assert.assertEquals(GenotypeLikelihoods.oneMinusData[qual], log10(1.0 - e), DELTA);
|
||||
Assert.assertEquals(GenotypeLikelihoods.oneHalfMinusDataArachne[qual], log10(0.5 - e / 2.0), DELTA);
|
||||
Assert.assertEquals(GenotypeLikelihoods.oneHalfMinusData3Base[qual], log10(0.5 - e / 2.0 + e / 6.0), DELTA);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
// f <- function(h) { print(paste(1-3.0 * h / 2, h, h/2, sep=', '));}
|
||||
@Test
|
||||
public void testPriorsFromHet() {
|
||||
logger.warn("Executing testPriorsFromHet");
|
||||
testPriorsFromHet(0.0, 1, 0, 0);
|
||||
testPriorsFromHet(1e-1, 0.85, 0.1, 0.05);
|
||||
testPriorsFromHet(1e-2, 0.985, 0.01, 0.005);
|
||||
testPriorsFromHet(1e-3, 0.9985, 0.001, 5e-04);
|
||||
testPriorsFromHet(1e-4, 0.99985, 1e-04, 5e-05);
|
||||
testPriorsFromHet(1e-5, 0.999985, 1e-05, 5e-06);
|
||||
testPriorsFromHet(0.5, 0.25, 0.5, 0.25);
|
||||
}
|
||||
|
||||
@Test (expected = RuntimeException.class)
|
||||
public void testPriorsFromHetFail1() {
|
||||
logger.warn("Executing testPriorsFromHetFail1");
|
||||
testPriorsFromHet(1.0, 0, 0, 0);
|
||||
}
|
||||
|
||||
@Test (expected = RuntimeException.class)
|
||||
public void testPriorsFromHetFail2() {
|
||||
logger.warn("Executing testPriorsFromHetFail2");
|
||||
testPriorsFromHet(-1.0, 0, 0, 0);
|
||||
}
|
||||
|
||||
private void testPriorsFromHet(double h, double homRef, double het, double homVar) {
|
||||
double[] vals = GenotypeLikelihoods.heterozygosity2DiploidProbabilities(h);
|
||||
Assert.assertEquals(vals[0], homRef, DELTA);
|
||||
Assert.assertEquals(vals[1], het, DELTA);
|
||||
Assert.assertEquals(vals[2], homVar, DELTA);
|
||||
Assert.assertEquals(GenotypeLikelihoods.heterozygosity2HomRefProbability(h), homRef, DELTA);
|
||||
Assert.assertEquals(GenotypeLikelihoods.heterozygosity2HetProbability(h), het, DELTA);
|
||||
Assert.assertEquals(GenotypeLikelihoods.heterozygosity2HomVarProbability(h), homVar, DELTA);
|
||||
}
|
||||
|
||||
//
|
||||
@Test
|
||||
public void testGenotypePriors1() {
|
||||
logger.warn("Executing testGenotypePriors1");
|
||||
// AA, AC, AG, AT, CC, CG, CT, GG, GT, TT
|
||||
double[] array1 = {-0.0705810742857073, -1, -1, -1, -1.301029995663981, -1, -1, -1.301029995663981, -1, -1.301029995663981};
|
||||
testGenotypePriors('A', 1e-1, array1);
|
||||
double[] array2 = {-1.301029995663981, -1, -1, -1, -0.0705810742857073, -1, -1, -1.301029995663981, -1, -1.301029995663981};
|
||||
testGenotypePriors('C', 1e-1, array2);
|
||||
double[] array3 = {-1.301029995663981, -1, -1, -1, -1.301029995663981, -1, -1, -0.0705810742857073, -1, -1.301029995663981};
|
||||
testGenotypePriors('G', 1e-1, array3);
|
||||
double[] array4 = {-1.301029995663981, -1, -1, -1, -1.301029995663981, -1, -1, -1.301029995663981, -1, -0.0705810742857073};
|
||||
testGenotypePriors('T', 1e-1, array4);
|
||||
}
|
||||
|
||||
private void testGenotypePriors(char ref, double h, double[] array) {
|
||||
for ( DiploidGenotype g : DiploidGenotype.values() ) {
|
||||
double val = 0.0;
|
||||
if ( g.isHomRef(ref) ) val = GenotypeLikelihoods.heterozygosity2HomRefProbability(h);
|
||||
if ( g.isHet() ) val = GenotypeLikelihoods.heterozygosity2HetProbability(h);
|
||||
if ( g.isHomVar(ref) ) val = GenotypeLikelihoods.heterozygosity2HomVarProbability(h);
|
||||
|
||||
val = log10(val);
|
||||
double e = array[g.ordinal()];
|
||||
Assert.assertEquals(String.format("%s should have p=%f but has p=%f", g, val, e), val, e, DELTA);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Takes reference base, and three priors for hom-ref, het, hom-var, and fills in the priors vector
|
||||
* appropriately.
|
||||
*
|
||||
* @param ref
|
||||
* @param priorHomRef
|
||||
* @param priorHet
|
||||
* @param priorHomVar
|
||||
*/
|
||||
public static double[] getGenotypePriors(char ref, double priorHomRef, double priorHet, double priorHomVar) {
|
||||
if ((priorHomRef + priorHet + priorHomVar) != 1) {
|
||||
throw new RuntimeException(String.format("Prior probabilities don't sum to one => %f, %f, %f", priorHomRef, priorHet, priorHomVar));
|
||||
}
|
||||
|
||||
double[] priors = new double[DiploidGenotype.values().length];
|
||||
|
||||
for ( DiploidGenotype g : DiploidGenotype.values() ) {
|
||||
int nRefBases = Utils.countOccurrences(ref, g.toString());
|
||||
double log10POfG = 0.0;
|
||||
|
||||
switch ( nRefBases ) {
|
||||
case 2: // hom-ref
|
||||
log10POfG = Math.log10(priorHomRef);
|
||||
break;
|
||||
case 0: // hom-nonref
|
||||
//log10POfG = Math.log10(priorHomVar / 3);
|
||||
log10POfG = Math.log10(priorHomVar);
|
||||
break;
|
||||
default:
|
||||
//log10POfG = Math.log10(priorHet / 6);
|
||||
log10POfG = Math.log10(priorHet);
|
||||
break;
|
||||
}
|
||||
|
||||
priors[g.ordinal()] = log10POfG;
|
||||
}
|
||||
|
||||
return priors;
|
||||
}
|
||||
}
|
||||
|
|
@ -0,0 +1,72 @@
|
|||
package org.broadinstitute.sting.gatk.walkers.genotyper;
|
||||
|
||||
import net.sf.samtools.SAMRecord;
|
||||
import org.broadinstitute.sting.utils.ReadBackedPileup;
|
||||
import org.broadinstitute.sting.utils.Utils;
|
||||
import org.broadinstitute.sting.utils.BaseUtils;
|
||||
import org.broadinstitute.sting.utils.MathUtils;
|
||||
import org.broadinstitute.sting.BaseTest;
|
||||
import edu.mit.broad.picard.util.MathUtil;
|
||||
|
||||
import java.util.Arrays;
|
||||
|
||||
/**
|
||||
* Stable, error checking version of the Bayesian genotyper. Useful for calculating the likelihoods, priors,
|
||||
* and posteriors given a pile of bases and quality scores
|
||||
*
|
||||
* Suppose we have bases b1, b2, ..., bN with qualities scores q1, q2, ..., qN. This object
|
||||
* calculates:
|
||||
*
|
||||
* P(G | D) = P(G) * P(D | G)
|
||||
*
|
||||
* where
|
||||
*
|
||||
* P(D | G) = sum_i log10 P(bi | G)
|
||||
*
|
||||
* and
|
||||
*
|
||||
* P(bi | G) = 1 - P(error | q1) if bi is in G
|
||||
* = P(error | q1) / 3 if bi is not in G
|
||||
*
|
||||
* for homozygous genotypes and
|
||||
*
|
||||
* P(bi | G) = 1 - P(error | q1) / 2 + P(error | q1) / 6 if bi is in G
|
||||
* = P(error | q1) / 3 if bi is not in G
|
||||
*
|
||||
* for the 10 unique diploid genotypes AA, AC, AG, .., TT
|
||||
*
|
||||
* Everything is stored as arrays indexed by DiploidGenotype.ordinal() values in log10 space.
|
||||
*
|
||||
* The priors contain the relative probabilities of each genotype, and must be provided at object creation.
|
||||
* From then on, you can call any of the add() routines to update the likelihoods and posteriors in the above
|
||||
* model.
|
||||
*/
|
||||
public class NewHotnessGenotypeLikelihoodsTest extends BaseTest {
|
||||
int x;
|
||||
/* private int coverage = 0;
|
||||
private double[] likelihoods = null;
|
||||
private double[] priors = null;
|
||||
private double[] posteriors = null;
|
||||
|
||||
NewHotnessGenotypeLikelihoods();
|
||||
NewHotnessGenotypeLikelihoods(char ref, double heterozygosity)
|
||||
NewHotnessGenotypeLikelihoods(char ref, double priorHomRef, double priorHet, double priorHomVar);
|
||||
NewHotnessGenotypeLikelihoods(double[] log10Priors)
|
||||
double[] getLikelihoods()
|
||||
double getLikelihood(DiploidGenotype g);
|
||||
double[] getPosteriors();
|
||||
double getPosterior(DiploidGenotype g);
|
||||
|
||||
double[] getPriors()
|
||||
double getPrior(DiploidGenotype g)
|
||||
int getCoverage()
|
||||
boolean isFilteringQ0Bases()
|
||||
void filterQ0Bases(boolean filterQ0Bases)
|
||||
int add(char ref, char read, byte qual)
|
||||
int add(char observedBase, byte qualityScore)
|
||||
private boolean badBase(char observedBase)
|
||||
int add(ReadBackedPileup pileup, boolean ignoreBadBases)
|
||||
private double calculateBaseLikelihood(char read, String genotype, byte qual)
|
||||
String toString()
|
||||
boolean validate(boolean throwException)*/
|
||||
}
|
||||
Loading…
Reference in New Issue