Commit on the behalf of Mark: cleaning up some old and busted code in GenotypeLikelihood and associated objects.
git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@1361 348d0f76-0448-11de-a6fe-93d51630548a
This commit is contained in:
parent
8d06bb21ed
commit
8bc925a216
|
|
@ -35,12 +35,6 @@ public class CallHLAWalker extends LocusWalker<Integer, Pair<Long, Long>>{
|
|||
int[] HLAstoppos;
|
||||
int numHLAlleles = 0;
|
||||
double[][] Prob; String[][] Alleles;
|
||||
boolean THREE_BASE_ERRORS = false;
|
||||
String PRIORS_2ND_ON = "0.000,0.302,0.366,0.142,0.000,0.548,0.370,0.000,0.319,0.000";
|
||||
String PRIORS_2ND_OFF = "0.480,0.769,0.744,0.538,0.575,0.727,0.768,0.589,0.762,0.505";
|
||||
double[] p2ndon = priorsArray(PRIORS_2ND_ON);
|
||||
double[] p2ndoff = priorsArray(PRIORS_2ND_OFF);
|
||||
boolean keepQ0Bases = false;
|
||||
|
||||
//HLAreads.add("Italian Riviera");
|
||||
|
||||
|
|
@ -108,7 +102,7 @@ public class CallHLAWalker extends LocusWalker<Integer, Pair<Long, Long>>{
|
|||
}
|
||||
out.printf("%sAs\t%sCs\t%sTs\t%sGs\t",numAs,numCs,numTs,numGs);
|
||||
|
||||
GenotypeLikelihoods G = new GenotypeLikelihoods(THREE_BASE_ERRORS,0.999,0.000333,0.000667, p2ndon, p2ndoff, keepQ0Bases);
|
||||
GenotypeLikelihoods G = new GenotypeLikelihoods(GenotypeLikelihoods.HUMAN_HETEROZYGOSITY);
|
||||
SSGGenotypeCall geno = (SSGGenotypeCall)G.callGenotypes(tracker, ref, pileup);
|
||||
|
||||
|
||||
|
|
|
|||
|
|
@ -150,7 +150,7 @@ public class MultiSampleCaller extends LocusWalker<MultiSampleCaller.MultiSample
|
|||
|
||||
if (bases.length() == 0)
|
||||
{
|
||||
GenotypeLikelihoods G = new GenotypeLikelihoods();
|
||||
GenotypeLikelihoods G = new GenotypeLikelihoods(GenotypeLikelihoods.HUMAN_HETEROZYGOSITY);
|
||||
return G;
|
||||
}
|
||||
|
||||
|
|
@ -159,7 +159,7 @@ public class MultiSampleCaller extends LocusWalker<MultiSampleCaller.MultiSample
|
|||
ref = Character.toUpperCase(ref);
|
||||
|
||||
// Handle single-base polymorphisms.
|
||||
GenotypeLikelihoods G = new GenotypeLikelihoods();
|
||||
GenotypeLikelihoods G = new GenotypeLikelihoods(GenotypeLikelihoods.HUMAN_HETEROZYGOSITY);
|
||||
for ( int i = 0; i < reads.size(); i++ )
|
||||
{
|
||||
SAMRecord read = reads.get(i);
|
||||
|
|
@ -388,7 +388,7 @@ public class MultiSampleCaller extends LocusWalker<MultiSampleCaller.MultiSample
|
|||
|
||||
GenotypeLikelihoods HardyWeinberg(double[] allele_likelihoods)
|
||||
{
|
||||
GenotypeLikelihoods G = new GenotypeLikelihoods();
|
||||
GenotypeLikelihoods G = new GenotypeLikelihoods(GenotypeLikelihoods.HUMAN_HETEROZYGOSITY);
|
||||
int k = 0;
|
||||
for (int i = 0; i < 4; i++)
|
||||
{
|
||||
|
|
|
|||
|
|
@ -42,11 +42,9 @@ public class SingleSampleGenotyper extends LocusWalker<SSGGenotypeCall, Genotype
|
|||
@Argument(fullName = "call_indels", shortName = "indels", doc = "Call indels as well as point mutations", required = false) public Boolean CALL_INDELS = false;
|
||||
|
||||
// Control how the genotype hypotheses are weighed
|
||||
@Argument(fullName = "heterozygosity", shortName = "hets", doc = "Heterozygosity value used to compute prior likelihoods for any locus", required = false) public Double heterozygosity = 0.001;
|
||||
@Argument(fullName = "heterozygosity", shortName = "hets", doc = "Heterozygosity value used to compute prior likelihoods for any locus", required = false) public Double heterozygosity = GenotypeLikelihoods.HUMAN_HETEROZYGOSITY;
|
||||
@Argument(fullName = "priors_hapmap", shortName = "phapmap", doc = "Comma-separated prior likelihoods for Hapmap loci (homref,het,homvar)", required = false) public String PRIORS_HAPMAP = "0.999,1e-3,1e-5";
|
||||
@Argument(fullName = "priors_dbsnp", shortName = "pdbsnp", doc = "Comma-separated prior likelihoods for dbSNP loci (homref,het,homvar)", required = false) public String PRIORS_DBSNP = "0.999,1e-3,1e-5";
|
||||
@Argument(fullName = "priors_2nd_on", shortName = "p2ndon", doc = "Comma-separated prior likelihoods for the secondary bases of primary on-genotype bases (AA,AC,AG,AT,CC,CG,CT,GG,GT,TT)", required = false) public String PRIORS_2ND_ON = "0.000,0.302,0.366,0.142,0.000,0.548,0.370,0.000,0.319,0.000";
|
||||
@Argument(fullName = "priors_2nd_off", shortName = "p2ndoff", doc = "Comma-separated prior likelihoods for the secondary bases of primary off-genotype bases (AA,AC,AG,AT,CC,CG,CT,GG,GT,TT)", required = false) public String PRIORS_2ND_OFF = "0.480,0.769,0.744,0.538,0.575,0.727,0.768,0.589,0.762,0.505";
|
||||
|
||||
// Control various sample-level settings
|
||||
@Argument(fullName = "sample_name_regex", shortName = "sample_name_regex", doc = "Replaces the sample name specified in the BAM read group with the value supplied here", required = false) public String SAMPLE_NAME_REGEX = null;
|
||||
|
|
@ -60,8 +58,6 @@ public class SingleSampleGenotyper extends LocusWalker<SSGGenotypeCall, Genotype
|
|||
public double[] plocus;
|
||||
public double[] phapmap;
|
||||
public double[] pdbsnp;
|
||||
public double[] p2ndon;
|
||||
public double[] p2ndoff;
|
||||
|
||||
public long nFilteredQ0Bases = 0;
|
||||
|
||||
|
|
@ -105,23 +101,13 @@ public class SingleSampleGenotyper extends LocusWalker<SSGGenotypeCall, Genotype
|
|||
return pdbls;
|
||||
}
|
||||
|
||||
private double[] computePriors(double h) {
|
||||
double[] pdbls = new double[3];
|
||||
pdbls[0] = 1.0 - (3.0 * h / 2.0);
|
||||
pdbls[1] = h;
|
||||
pdbls[2] = h / 2.0;
|
||||
return pdbls;
|
||||
}
|
||||
|
||||
/** Initialize the walker with some sensible defaults */
|
||||
public void initialize() {
|
||||
metricsOut = new AlleleMetrics(METRICS_FILE, LOD_THRESHOLD);
|
||||
|
||||
plocus = computePriors(heterozygosity);
|
||||
plocus = GenotypeLikelihoods.computePriors(heterozygosity);
|
||||
phapmap = priorsArray(PRIORS_HAPMAP);
|
||||
pdbsnp = priorsArray(PRIORS_DBSNP);
|
||||
p2ndon = priorsArray(PRIORS_2ND_ON);
|
||||
p2ndoff = priorsArray(PRIORS_2ND_OFF);
|
||||
}
|
||||
|
||||
/**
|
||||
|
|
@ -134,9 +120,12 @@ public class SingleSampleGenotyper extends LocusWalker<SSGGenotypeCall, Genotype
|
|||
* @return an AlleleFrequencyEstimate object
|
||||
*/
|
||||
public SSGGenotypeCall map(RefMetaDataTracker tracker, char ref, LocusContext context) {
|
||||
rationalizeSampleName(context.getReads().get(0));
|
||||
//rationalizeSampleName(context.getReads().get(0));
|
||||
|
||||
// todo -- totally convoluted code!
|
||||
|
||||
ReadBackedPileup pileup = new ReadBackedPileup(ref, context);
|
||||
GenotypeLikelihoods G = callGenotype(tracker);
|
||||
GenotypeLikelihoods G = makeGenotypeLikelihood(tracker);
|
||||
G.setDiscovery(GENOTYPE); // set it to discovery mode or variant detection mode
|
||||
SSGGenotypeCall geno = (SSGGenotypeCall)G.callGenotypes(tracker, ref, pileup);
|
||||
if (geno != null) {
|
||||
|
|
@ -148,6 +137,7 @@ public class SingleSampleGenotyper extends LocusWalker<SSGGenotypeCall, Genotype
|
|||
return geno;
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Sometimes the sample names in the BAM files get screwed up. Fix it here if we can.
|
||||
*
|
||||
|
|
@ -183,16 +173,19 @@ public class SingleSampleGenotyper extends LocusWalker<SSGGenotypeCall, Genotype
|
|||
*
|
||||
* @return the likelihoods per genotype
|
||||
*/
|
||||
private GenotypeLikelihoods callGenotype(RefMetaDataTracker tracker) {
|
||||
private GenotypeLikelihoods makeGenotypeLikelihood(RefMetaDataTracker tracker) {
|
||||
GenotypeLikelihoods G = null;
|
||||
|
||||
if (isHapmapSite(tracker)) {
|
||||
G = new GenotypeLikelihoods(THREE_BASE_ERRORS, phapmap[0], phapmap[1], phapmap[2], p2ndon, p2ndoff, keepQ0Bases);
|
||||
G = new GenotypeLikelihoods(GenotypeLikelihoods.HUMAN_HETEROZYGOSITY);
|
||||
} else if (isDbSNPSite(tracker)) {
|
||||
G = new GenotypeLikelihoods(THREE_BASE_ERRORS, pdbsnp[0], pdbsnp[1], pdbsnp[2], p2ndon, p2ndoff, keepQ0Bases);
|
||||
G = new GenotypeLikelihoods(pdbsnp[0], pdbsnp[1], pdbsnp[2]);
|
||||
} else {
|
||||
G = new GenotypeLikelihoods(THREE_BASE_ERRORS, plocus[0], plocus[1], plocus[2], p2ndon, p2ndoff, keepQ0Bases);
|
||||
G = new GenotypeLikelihoods(plocus[0], plocus[1], plocus[2]);
|
||||
}
|
||||
|
||||
G.filterQ0Bases(! keepQ0Bases);
|
||||
|
||||
return G;
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -30,7 +30,7 @@ public class GenotypeLikelihoods implements GenotypeGenerator {
|
|||
private static final double[] oneMinusData = new double[Byte.MAX_VALUE];
|
||||
private static final double[] oneHalfMinusDataArachne = new double[Byte.MAX_VALUE];
|
||||
private static final double[] oneHalfMinusData3Base = new double[Byte.MAX_VALUE];
|
||||
private final boolean keepQ0Bases;
|
||||
private final boolean keepQ0Bases = false;
|
||||
private static final double log10Of1_3 = log10(1.0 / 3);
|
||||
private static final double log10Of2_3 = log10(2.0 / 3);
|
||||
|
||||
|
|
@ -79,9 +79,28 @@ public class GenotypeLikelihoods implements GenotypeGenerator {
|
|||
private double priorHet;
|
||||
private double priorHomVar;
|
||||
private double[] oneHalfMinusData;
|
||||
private boolean threeBaseErrors = false;
|
||||
|
||||
public boolean isThreeStateErrors() {
|
||||
return threeStateErrors;
|
||||
}
|
||||
|
||||
public void setThreeStateErrors(boolean threeStateErrors) {
|
||||
this.threeStateErrors = threeStateErrors;
|
||||
}
|
||||
|
||||
private boolean threeStateErrors = false;
|
||||
private boolean discoveryMode = false;
|
||||
|
||||
public static final double HUMAN_HETEROZYGOSITY = 1e-3;
|
||||
|
||||
public static double[] computePriors(double h) {
|
||||
double[] pdbls = new double[3];
|
||||
pdbls[0] = 1.0 - (3.0 * h / 2.0);
|
||||
pdbls[1] = h;
|
||||
pdbls[2] = h / 2.0;
|
||||
return pdbls;
|
||||
}
|
||||
|
||||
/**
|
||||
* set the mode to discovery
|
||||
* @param isInDiscoveryMode
|
||||
|
|
@ -95,23 +114,13 @@ public class GenotypeLikelihoods implements GenotypeGenerator {
|
|||
// Store the 2nd-best base priors for off-genotype primary bases
|
||||
private HashMap<String, Double> offNextBestBasePriors = new HashMap<String, Double>();
|
||||
|
||||
public GenotypeLikelihoods() {
|
||||
double[] p2ndon = {0.000, 0.302, 0.366, 0.142, 0.000, 0.548, 0.370, 0.000, 0.319, 0.000};
|
||||
double[] p2ndoff = {0.480, 0.769, 0.744, 0.538, 0.575, 0.727, 0.768, 0.589, 0.762, 0.505};
|
||||
keepQ0Bases = true;
|
||||
initialize(false, 1.0 - 1e-3, 1e-3, 1e-5, p2ndon, p2ndoff);
|
||||
public GenotypeLikelihoods(double heterozygosity) {
|
||||
double[] vals = computePriors(heterozygosity);
|
||||
initialize(vals[0], vals[1], vals[2]);
|
||||
}
|
||||
|
||||
public GenotypeLikelihoods(boolean threeBaseErrors , double priorHomRef, double priorHet, double priorHomVar) {
|
||||
double[] p2ndon = {0.000, 0.302, 0.366, 0.142, 0.000, 0.548, 0.370, 0.000, 0.319, 0.000};
|
||||
double[] p2ndoff = {0.480, 0.769, 0.744, 0.538, 0.575, 0.727, 0.768, 0.589, 0.762, 0.505};
|
||||
keepQ0Bases = true;
|
||||
initialize(threeBaseErrors, priorHomRef, priorHet, priorHomVar, p2ndon, p2ndoff);
|
||||
}
|
||||
|
||||
public GenotypeLikelihoods(boolean threeBaseErrors , double priorHomRef, double priorHet, double priorHomVar, double[] p2ndon, double[] p2ndoff, boolean keepQ0Bases) {
|
||||
this.keepQ0Bases = keepQ0Bases;
|
||||
initialize(threeBaseErrors, priorHomRef, priorHet, priorHomVar, p2ndon, p2ndoff);
|
||||
public GenotypeLikelihoods(double priorHomRef, double priorHet, double priorHomVar) {
|
||||
initialize(priorHomRef, priorHet, priorHomVar);
|
||||
}
|
||||
|
||||
/**
|
||||
|
|
@ -131,9 +140,8 @@ public class GenotypeLikelihoods implements GenotypeGenerator {
|
|||
this.filterQ0Bases = filterQ0Bases;
|
||||
}
|
||||
|
||||
private void initialize(boolean threeBaseErrors , double priorHomRef, double priorHet, double priorHomVar, double[] p2ndon, double[] p2ndoff) {
|
||||
this.threeBaseErrors = threeBaseErrors ;
|
||||
this.oneHalfMinusData = threeBaseErrors ? oneHalfMinusData3Base : oneHalfMinusDataArachne;
|
||||
private void initialize(double priorHomRef, double priorHet, double priorHomVar) {
|
||||
this.oneHalfMinusData = threeStateErrors ? oneHalfMinusData3Base : oneHalfMinusDataArachne;
|
||||
|
||||
this.priorHomRef = priorHomRef;
|
||||
this.priorHet = priorHet;
|
||||
|
|
@ -144,11 +152,6 @@ public class GenotypeLikelihoods implements GenotypeGenerator {
|
|||
coverage = 0;
|
||||
|
||||
for (int i = 0; i < likelihoods.length; i++) { likelihoods[i] = Math.log10(0.1); }
|
||||
|
||||
for (int genotypeIndex = 0; genotypeIndex < 10; genotypeIndex++) {
|
||||
onNextBestBasePriors.put(genotypes[genotypeIndex], p2ndon[genotypeIndex]);
|
||||
offNextBestBasePriors.put(genotypes[genotypeIndex], p2ndoff[genotypeIndex]);
|
||||
}
|
||||
}
|
||||
|
||||
public double getHomRefPrior() {
|
||||
|
|
@ -175,38 +178,6 @@ public class GenotypeLikelihoods implements GenotypeGenerator {
|
|||
this.priorHomVar = priorHomVar;
|
||||
}
|
||||
|
||||
public double[] getOnGenotypeSecondaryPriors() {
|
||||
double[] p2ndon = new double[10];
|
||||
|
||||
for (int genotypeIndex = 0; genotypeIndex < 10; genotypeIndex++) {
|
||||
p2ndon[genotypeIndex] = onNextBestBasePriors.get(genotypes[genotypeIndex]);
|
||||
}
|
||||
|
||||
return p2ndon;
|
||||
}
|
||||
|
||||
public void setOnGenotypeSecondaryPriors(double[] p2ndon) {
|
||||
for (int genotypeIndex = 0; genotypeIndex < 10; genotypeIndex++) {
|
||||
onNextBestBasePriors.put(genotypes[genotypeIndex], p2ndon[genotypeIndex]);
|
||||
}
|
||||
}
|
||||
|
||||
public double[] getOffGenotypeSecondaryPriors() {
|
||||
double[] p2ndoff = new double[10];
|
||||
|
||||
for (int genotypeIndex = 0; genotypeIndex < 10; genotypeIndex++) {
|
||||
p2ndoff[genotypeIndex] = offNextBestBasePriors.get(genotypes[genotypeIndex]);
|
||||
}
|
||||
|
||||
return p2ndoff;
|
||||
}
|
||||
|
||||
public void setOffGenotypeSecondaryPriors(double[] p2ndoff) {
|
||||
for (int genotypeIndex = 0; genotypeIndex < 10; genotypeIndex++) {
|
||||
offNextBestBasePriors.put(genotypes[genotypeIndex], p2ndoff[genotypeIndex]);
|
||||
}
|
||||
}
|
||||
|
||||
public int add(char ref, char read, byte qual)
|
||||
{
|
||||
if (qual <= 0) {
|
||||
|
|
@ -253,7 +224,7 @@ public class GenotypeLikelihoods implements GenotypeGenerator {
|
|||
} else if ( (h1 != h2) && ((h1 == read) || (h2 == read)) ) {
|
||||
// het
|
||||
p_base = getOneHalfMinusQual(qual);
|
||||
} else if ( this.threeBaseErrors ) {
|
||||
} else if ( threeStateErrors ) {
|
||||
// error
|
||||
//System.out.printf("%s %b %f %f%n", genotype, h1 != h2, log10Of2_3, log10Of1_3 );
|
||||
p_base = qual / -10.0 + ( h1 != h2 ? log10Of1_3 : log10Of1_3 );
|
||||
|
|
@ -329,48 +300,6 @@ public class GenotypeLikelihoods implements GenotypeGenerator {
|
|||
this.sort();
|
||||
}
|
||||
|
||||
public void applySecondBaseDistributionPrior(String primaryBases, String secondaryBases) {
|
||||
for (int genotypeIndex = 0; genotypeIndex < genotypes.length; genotypeIndex++) {
|
||||
char firstAllele = genotypes[genotypeIndex].charAt(0);
|
||||
char secondAllele = genotypes[genotypeIndex].charAt(1);
|
||||
|
||||
int offIsGenotypic = 0;
|
||||
int offTotal = 0;
|
||||
|
||||
int onIsGenotypic = 0;
|
||||
int onTotal = 0;
|
||||
|
||||
for (int pileupIndex = 0; pileupIndex < primaryBases.length(); pileupIndex++) {
|
||||
char primaryBase = primaryBases.charAt(pileupIndex);
|
||||
|
||||
if (secondaryBases != null) {
|
||||
char secondaryBase = secondaryBases.charAt(pileupIndex);
|
||||
|
||||
if (primaryBase != firstAllele && primaryBase != secondAllele) {
|
||||
if (secondaryBase == firstAllele || secondaryBase == secondAllele) {
|
||||
offIsGenotypic++;
|
||||
}
|
||||
offTotal++;
|
||||
} else {
|
||||
if (secondaryBase == firstAllele || secondaryBase == secondAllele) {
|
||||
onIsGenotypic++;
|
||||
}
|
||||
onTotal++;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
double offPrior = MathUtils.binomialProbability(offIsGenotypic, offTotal, offNextBestBasePriors.get(genotypes[genotypeIndex]));
|
||||
double onPrior = MathUtils.binomialProbability(onIsGenotypic, onTotal, onNextBestBasePriors.get(genotypes[genotypeIndex]));
|
||||
|
||||
double logOffPrior = MathUtils.compareDoubles(offPrior, 0.0, 1e-10) == 0 ? Math.log10(Double.MIN_VALUE) : Math.log10(offPrior);
|
||||
double logOnPrior = MathUtils.compareDoubles(onPrior, 0.0, 1e-10) == 0 ? Math.log10(Double.MIN_VALUE) : Math.log10(onPrior);
|
||||
|
||||
likelihoods[genotypeIndex] += logOffPrior + logOnPrior;
|
||||
}
|
||||
this.sort();
|
||||
}
|
||||
|
||||
public double LodVsNextBest() {
|
||||
this.sort();
|
||||
return sorted_likelihoods[0] - sorted_likelihoods[1];
|
||||
|
|
@ -438,14 +367,13 @@ public class GenotypeLikelihoods implements GenotypeGenerator {
|
|||
int offset = pileup.getOffsets().get(i);
|
||||
char base = read.getReadString().charAt(offset);
|
||||
byte qual = read.getBaseQualities()[offset];
|
||||
add(ref, base, qual);
|
||||
add(ref, base, qual);
|
||||
}
|
||||
// save off the likelihoods
|
||||
if (likelihoods == null || likelihoods.length == 0) return null;
|
||||
|
||||
// Apply the two calculations
|
||||
applyPrior(ref);
|
||||
applySecondBaseDistributionPrior(pileup.getBases(), pileup.getSecondaryBasePileup());
|
||||
|
||||
// lets setup the locus
|
||||
List<Genotype> lst = new ArrayList<Genotype>();
|
||||
|
|
@ -454,5 +382,4 @@ public class GenotypeLikelihoods implements GenotypeGenerator {
|
|||
}
|
||||
return new SSGGenotypeCall(discoveryMode,ref,2,lst,likelihoods,pileup);
|
||||
}
|
||||
|
||||
}
|
||||
|
|
|
|||
Loading…
Reference in New Issue