New hotness and old and Busted genotype likelihood objects are now in the code base as I work towards a bug-free SSG along with a cleaner interface to the genotype likelihood object

git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@1372 348d0f76-0448-11de-a6fe-93d51630548a
This commit is contained in:
depristo 2009-08-03 23:07:53 +00:00
parent 4986b2abd6
commit 20ff603339
9 changed files with 751 additions and 524 deletions

View File

@ -1,34 +1,22 @@
package org.broadinstitute.sting.gatk.walkers.genotyper;
import net.sf.samtools.SAMRecord;
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
import org.broadinstitute.sting.utils.ReadBackedPileup;
import org.broadinstitute.sting.utils.Utils;
import org.broadinstitute.sting.utils.genotype.BasicGenotype;
import org.broadinstitute.sting.utils.genotype.Genotype;
import org.broadinstitute.sting.utils.genotype.confidence.BayesianConfidenceScore;
import static java.lang.Math.log10;
import static java.lang.Math.pow;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;
public class GenotypeLikelihoods {
public abstract class GenotypeLikelihoods {
// precalculate these for performance (pow/log10 is expensive!)
/**
* SOLID data uses Q0 bases to represent reference-fixed bases -- they shouldn't be counted
* during GL calculations. If this field is true, Q0 bases will be removed in add().
*/
private boolean filterQ0Bases = true;
protected boolean filterQ0Bases = true;
private static final double[] oneMinusData = new double[Byte.MAX_VALUE];
private static final double[] oneHalfMinusDataArachne = new double[Byte.MAX_VALUE];
private static final double[] oneHalfMinusData3Base = new double[Byte.MAX_VALUE];
private final boolean keepQ0Bases = false;
private static final double log10Of1_3 = log10(1.0 / 3);
private static final double log10Of2_3 = log10(2.0 / 3);
protected static final double[] oneMinusData = new double[Byte.MAX_VALUE];
protected static final double[] oneHalfMinusDataArachne = new double[Byte.MAX_VALUE];
protected static final double[] oneHalfMinusData3Base = new double[Byte.MAX_VALUE];
protected static final double log10Of1_3 = log10(1.0 / 3);
protected static final double log10Of2_3 = log10(2.0 / 3);
static {
for (int qual = 0; qual < Byte.MAX_VALUE; qual++) {
@ -36,11 +24,6 @@ public class GenotypeLikelihoods {
}
}
private static double getOneMinusQual(final byte qual) {
return oneMinusData[qual];
}
static {
for (int qual = 0; qual < Byte.MAX_VALUE; qual++) {
double e = pow(10, (qual / -10.0));
@ -50,12 +33,7 @@ public class GenotypeLikelihoods {
}
}
private double getOneHalfMinusQual(final byte qual) {
return oneHalfMinusData[qual];
}
public double[] likelihoods;
public static String[] genotypes = new String[10];
public final static String[] genotypes = new String[10];
static {
genotypes[0] = "AA";
genotypes[1] = "AC";
@ -68,24 +46,6 @@ public class GenotypeLikelihoods {
genotypes[8] = "GT";
genotypes[9] = "TT";
}
public int coverage;
// The genotype priors;
private double priorHomRef;
private double priorHet;
private double priorHomVar;
private double[] oneHalfMinusData;
public boolean isThreeStateErrors() {
return threeStateErrors;
}
public void setThreeStateErrors(boolean threeStateErrors) {
this.threeStateErrors = threeStateErrors;
}
private boolean threeStateErrors = false;
private boolean discoveryMode = false;
public static final double HUMAN_HETEROZYGOSITY = 1e-3;
@ -97,274 +57,8 @@ public class GenotypeLikelihoods {
return pdbls;
}
/**
* set the mode to discovery
* @param isInDiscoveryMode
*/
public void setDiscovery(boolean isInDiscoveryMode) {
discoveryMode = isInDiscoveryMode;
}
// Store the 2nd-best base priors for on-genotype primary bases
private HashMap<String, Double> onNextBestBasePriors = new HashMap<String, Double>();
public double[] likelihoods;
// Store the 2nd-best base priors for off-genotype primary bases
private HashMap<String, Double> offNextBestBasePriors = new HashMap<String, Double>();
public GenotypeLikelihoods(double heterozygosity) {
double[] vals = computePriors(heterozygosity);
initialize(vals[0], vals[1], vals[2]);
}
public GenotypeLikelihoods(double priorHomRef, double priorHet, double priorHomVar) {
initialize(priorHomRef, priorHet, priorHomVar);
}
/**
* Are we ignoring Q0 bases during calculations?
* @return
*/
public boolean isFilteringQ0Bases() {
return filterQ0Bases;
}
/**
* Enable / disable filtering of Q0 bases. Enabled by default
*
* @param filterQ0Bases
*/
public void filterQ0Bases(boolean filterQ0Bases) {
this.filterQ0Bases = filterQ0Bases;
}
private void initialize(double priorHomRef, double priorHet, double priorHomVar) {
this.oneHalfMinusData = threeStateErrors ? oneHalfMinusData3Base : oneHalfMinusDataArachne;
this.priorHomRef = priorHomRef;
this.priorHet = priorHet;
this.priorHomVar = priorHomVar;
likelihoods = new double[10];
coverage = 0;
for (int i = 0; i < likelihoods.length; i++) { likelihoods[i] = Math.log10(0.1); }
}
public double getHomRefPrior() {
return priorHomRef;
}
public void setHomRefPrior(double priorHomRef) {
this.priorHomRef = priorHomRef;
}
public double getHetPrior() {
return priorHet;
}
public void setHetPrior(double priorHet) {
this.priorHet = priorHet;
}
public double getHomVarPrior() {
return priorHomVar;
}
public void setHomVarPrior(double priorHomVar) {
this.priorHomVar = priorHomVar;
}
public int add(char ref, char read, byte qual)
{
if (qual <= 0) {
if ( isFilteringQ0Bases() ) {
return 0;
} else {
qual = 1;
}
}
if (coverage == 0)
{
for (int i = 0; i < likelihoods.length; i++)
{
likelihoods[i] = 0;
}
}
for (int i = 0; i < genotypes.length; i++)
{
double likelihood = calculateAlleleLikelihood(ref, read, genotypes[i], qual);
//if ( originalQual == 0 ) System.out.printf("Likelihood is %f for %c %c %d %s%n", likelihood, ref, read, qual, genotypes[i]);
likelihoods[i] += likelihood;
coverage += 1;
}
return 1;
}
private double calculateAlleleLikelihood(char ref, char read, String genotype, byte qual) {
if (qual == 0) {
// zero quals are wrong
throw new RuntimeException(String.format("Unexpected Q0 base discovered in calculateAlleleLikelihood: %c %c %d", ref, read, qual));
}
char h1 = genotype.charAt(0);
char h2 = genotype.charAt(1);
double p_base;
if ((h1 == h2) && (h1 == read)) {
// hom
p_base = getOneMinusQual(qual);
} else if ( (h1 != h2) && ((h1 == read) || (h2 == read)) ) {
// het
p_base = getOneHalfMinusQual(qual);
} else if ( threeStateErrors ) {
// error
//System.out.printf("%s %b %f %f%n", genotype, h1 != h2, log10Of2_3, log10Of1_3 );
p_base = qual / -10.0 + ( h1 != h2 ? log10Of1_3 : log10Of1_3 );
} else {
// error
p_base = qual / -10.0;
}
return p_base;
}
public String[] sorted_genotypes;
public double[] sorted_likelihoods;
public void sort() {
Integer[] permutation = Utils.SortPermutation(likelihoods);
Integer[] reverse_permutation = new Integer[permutation.length];
for (int i = 0; i < reverse_permutation.length; i++) {
reverse_permutation[i] = permutation[(permutation.length - 1) - i];
}
sorted_genotypes = Utils.PermuteArray(genotypes, reverse_permutation);
sorted_likelihoods = Utils.PermuteArray(likelihoods, reverse_permutation);
}
public String toString(char ref) {
this.sort();
double sum = 0;
String s = String.format("%s %f %f ", this.BestGenotype(), this.LodVsNextBest(), this.LodVsRef(ref));
for (int i = 0; i < sorted_genotypes.length; i++) {
if (i != 0) {
s = s + " ";
}
s = s + sorted_genotypes[i] + ":" + String.format("%.2f", sorted_likelihoods[i]);
sum += Math.pow(10,sorted_likelihoods[i]);
}
s = s + String.format(" %f", sum);
return s;
}
public void applyPrior(char ref, double[] allele_likelihoods) {
int k = 0;
for (int i = 0; i < 4; i++) {
for (int j = i; j < 4; j++) {
if (i == j) {
this.likelihoods[k] += Math.log10(allele_likelihoods[i]) + Math.log10(allele_likelihoods[j]);
} else {
this.likelihoods[k] += Math.log10(allele_likelihoods[i]) + Math.log10(allele_likelihoods[j]) + Math.log10(2);
}
k++;
}
}
this.sort();
}
public void applyPrior(char ref) {
for (int i = 0; i < genotypes.length; i++) {
if ((genotypes[i].charAt(0) == ref) && (genotypes[i].charAt(1) == ref)) {
// hom-ref
likelihoods[i] += Math.log10(priorHomRef);
} else if ((genotypes[i].charAt(0) != ref) && (genotypes[i].charAt(1) != ref)) {
// hom-nonref
likelihoods[i] += Math.log10(priorHomVar);
} else {
// het
likelihoods[i] += Math.log10(priorHet);
}
if (Double.isInfinite(likelihoods[i])) {
likelihoods[i] = -1000;
}
}
this.sort();
}
public double LodVsNextBest() {
this.sort();
return sorted_likelihoods[0] - sorted_likelihoods[1];
}
double ref_likelihood = Double.NaN;
public double LodVsRef(char ref) {
if ((this.BestGenotype().charAt(0) == ref) && (this.BestGenotype().charAt(1) == ref)) {
ref_likelihood = sorted_likelihoods[0];
return (-1.0 * this.LodVsNextBest());
} else {
for (int i = 0; i < genotypes.length; i++) {
if ((genotypes[i].charAt(0) == ref) && (genotypes[i].charAt(1) == ref)) {
ref_likelihood = likelihoods[i];
}
}
}
return sorted_likelihoods[0] - ref_likelihood;
}
public String BestGenotype() {
this.sort();
return this.sorted_genotypes[0];
}
public double BestPosterior() {
this.sort();
return this.sorted_likelihoods[0];
}
public double RefPosterior(char ref) {
this.LodVsRef(ref);
return this.ref_likelihood;
}
/**
* given all the data associated with a locus, make a genotypeLocus object containing the likelihoods and posterior probs
*
* @param tracker contains the reference meta data for this location, which may contain relevent information like dpSNP or hapmap information
* @param ref the reference base
* @param pileup a pileup of the reads, containing the reads and their offsets
*
* @return a GenotypeLocus, containing each of the genotypes and their associated likelihood and posterior prob values
*/
public SSGGenotypeCall callGenotypes(RefMetaDataTracker tracker, char ref, ReadBackedPileup pileup) {
filterQ0Bases(!keepQ0Bases); // Set the filtering / keeping flag
// for calculating the rms of the mapping qualities
double squared = 0.0;
for (int i = 0; i < pileup.getReads().size(); i++) {
SAMRecord read = pileup.getReads().get(i);
squared += read.getMappingQuality() * read.getMappingQuality();
int offset = pileup.getOffsets().get(i);
char base = read.getReadString().charAt(offset);
byte qual = read.getBaseQualities()[offset];
add(ref, base, qual);
}
// save off the likelihoods
if (likelihoods == null || likelihoods.length == 0) return null;
// Apply the two calculations
applyPrior(ref);
// lets setup the locus
List<Genotype> lst = new ArrayList<Genotype>();
for (int x = 0; x < this.likelihoods.length; x++) {
lst.add(new BasicGenotype(pileup.getLocation(),this.genotypes[x],new BayesianConfidenceScore(this.likelihoods[x])));
}
return new SSGGenotypeCall(discoveryMode,ref,2,lst,likelihoods,pileup);
}
public abstract int add(char ref, char read, byte qual);
public abstract void applyPrior(char ref);
}

View File

@ -0,0 +1,326 @@
package org.broadinstitute.sting.gatk.walkers.genotyper;
import net.sf.samtools.SAMRecord;
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
import org.broadinstitute.sting.utils.ReadBackedPileup;
import org.broadinstitute.sting.utils.Utils;
import org.broadinstitute.sting.utils.genotype.BasicGenotype;
import org.broadinstitute.sting.utils.genotype.Genotype;
import org.broadinstitute.sting.utils.genotype.confidence.BayesianConfidenceScore;
import static java.lang.Math.log10;
import static java.lang.Math.pow;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;
public class NewHotnessGenotypeLikelihoods extends GenotypeLikelihoods {
private static double getOneMinusQual(final byte qual) {
return oneMinusData[qual];
}
private double getOneHalfMinusQual(final byte qual) {
return oneHalfMinusData[qual];
}
//public double[] likelihoods;
public int coverage;
// The genotype priors;
private double priorHomRef;
private double priorHet;
private double priorHomVar;
private double[] oneHalfMinusData;
public boolean isThreeStateErrors() {
return threeStateErrors;
}
public void setThreeStateErrors(boolean threeStateErrors) {
this.threeStateErrors = threeStateErrors;
}
private boolean threeStateErrors = false;
private boolean discoveryMode = false;
public static final double HUMAN_HETEROZYGOSITY = 1e-3;
public static double[] computePriors(double h) {
double[] pdbls = new double[3];
pdbls[0] = 1.0 - (3.0 * h / 2.0);
pdbls[1] = h;
pdbls[2] = h / 2.0;
return pdbls;
}
/**
* set the mode to discovery
* @param isInDiscoveryMode
*/
public void setDiscovery(boolean isInDiscoveryMode) {
discoveryMode = isInDiscoveryMode;
}
// Store the 2nd-best base priors for on-genotype primary bases
private HashMap<String, Double> onNextBestBasePriors = new HashMap<String, Double>();
// Store the 2nd-best base priors for off-genotype primary bases
private HashMap<String, Double> offNextBestBasePriors = new HashMap<String, Double>();
public NewHotnessGenotypeLikelihoods(double heterozygosity) {
double[] vals = computePriors(heterozygosity);
initialize(vals[0], vals[1], vals[2]);
}
public NewHotnessGenotypeLikelihoods(double priorHomRef, double priorHet, double priorHomVar) {
initialize(priorHomRef, priorHet, priorHomVar);
}
/**
* Are we ignoring Q0 bases during calculations?
* @return
*/
public boolean isFilteringQ0Bases() {
return filterQ0Bases;
}
/**
* Enable / disable filtering of Q0 bases. Enabled by default
*
* @param filterQ0Bases
*/
public void filterQ0Bases(boolean filterQ0Bases) {
this.filterQ0Bases = filterQ0Bases;
}
private void initialize(double priorHomRef, double priorHet, double priorHomVar) {
this.oneHalfMinusData = threeStateErrors ? oneHalfMinusData3Base : oneHalfMinusDataArachne;
this.priorHomRef = priorHomRef;
this.priorHet = priorHet;
this.priorHomVar = priorHomVar;
likelihoods = new double[10];
coverage = 0;
for (int i = 0; i < likelihoods.length; i++) { likelihoods[i] = Math.log10(0.1); }
}
public double getHomRefPrior() {
return priorHomRef;
}
public void setHomRefPrior(double priorHomRef) {
this.priorHomRef = priorHomRef;
}
public double getHetPrior() {
return priorHet;
}
public void setHetPrior(double priorHet) {
this.priorHet = priorHet;
}
public double getHomVarPrior() {
return priorHomVar;
}
public void setHomVarPrior(double priorHomVar) {
this.priorHomVar = priorHomVar;
}
public int add(char ref, char read, byte qual)
{
if (qual <= 0) {
if ( isFilteringQ0Bases() ) {
return 0;
} else {
qual = 1;
}
}
if (coverage == 0)
{
for (int i = 0; i < likelihoods.length; i++)
{
likelihoods[i] = 0;
}
}
for (int i = 0; i < genotypes.length; i++)
{
double likelihood = calculateAlleleLikelihood(ref, read, genotypes[i], qual);
//if ( originalQual == 0 ) System.out.printf("Likelihood is %f for %c %c %d %s%n", likelihood, ref, read, qual, genotypes[i]);
likelihoods[i] += likelihood;
coverage += 1;
}
return 1;
}
private double calculateAlleleLikelihood(char ref, char read, String genotype, byte qual) {
if (qual == 0) {
// zero quals are wrong
throw new RuntimeException(String.format("Unexpected Q0 base discovered in calculateAlleleLikelihood: %c %c %d", ref, read, qual));
}
char h1 = genotype.charAt(0);
char h2 = genotype.charAt(1);
double p_base;
if ((h1 == h2) && (h1 == read)) {
// hom
p_base = getOneMinusQual(qual);
} else if ( (h1 != h2) && ((h1 == read) || (h2 == read)) ) {
// het
p_base = getOneHalfMinusQual(qual);
} else if ( threeStateErrors ) {
// error
//System.out.printf("%s %b %f %f%n", genotype, h1 != h2, log10Of2_3, log10Of1_3 );
p_base = qual / -10.0 + ( h1 != h2 ? log10Of1_3 : log10Of1_3 );
} else {
// error
p_base = qual / -10.0;
}
return p_base;
}
public String[] sorted_genotypes;
public double[] sorted_likelihoods;
public void sort() {
Integer[] permutation = Utils.SortPermutation(likelihoods);
Integer[] reverse_permutation = new Integer[permutation.length];
for (int i = 0; i < reverse_permutation.length; i++) {
reverse_permutation[i] = permutation[(permutation.length - 1) - i];
}
sorted_genotypes = Utils.PermuteArray(genotypes, reverse_permutation);
sorted_likelihoods = Utils.PermuteArray(likelihoods, reverse_permutation);
}
public String toString(char ref) {
this.sort();
double sum = 0;
String s = String.format("%s %f %f ", this.BestGenotype(), this.LodVsNextBest(), this.LodVsRef(ref));
for (int i = 0; i < sorted_genotypes.length; i++) {
if (i != 0) {
s = s + " ";
}
s = s + sorted_genotypes[i] + ":" + String.format("%.2f", sorted_likelihoods[i]);
sum += Math.pow(10,sorted_likelihoods[i]);
}
s = s + String.format(" %f", sum);
return s;
}
public void applyPrior(char ref, double[] allele_likelihoods) {
int k = 0;
for (int i = 0; i < 4; i++) {
for (int j = i; j < 4; j++) {
if (i == j) {
this.likelihoods[k] += Math.log10(allele_likelihoods[i]) + Math.log10(allele_likelihoods[j]);
} else {
this.likelihoods[k] += Math.log10(allele_likelihoods[i]) + Math.log10(allele_likelihoods[j]) + Math.log10(2);
}
k++;
}
}
this.sort();
}
public void applyPrior(char ref) {
for (int i = 0; i < genotypes.length; i++) {
if ((genotypes[i].charAt(0) == ref) && (genotypes[i].charAt(1) == ref)) {
// hom-ref
likelihoods[i] += Math.log10(priorHomRef);
} else if ((genotypes[i].charAt(0) != ref) && (genotypes[i].charAt(1) != ref)) {
// hom-nonref
likelihoods[i] += Math.log10(priorHomVar);
} else {
// het
likelihoods[i] += Math.log10(priorHet);
}
if (Double.isInfinite(likelihoods[i])) {
likelihoods[i] = -1000;
}
}
this.sort();
}
public double LodVsNextBest() {
this.sort();
return sorted_likelihoods[0] - sorted_likelihoods[1];
}
double ref_likelihood = Double.NaN;
public double LodVsRef(char ref) {
if ((this.BestGenotype().charAt(0) == ref) && (this.BestGenotype().charAt(1) == ref)) {
ref_likelihood = sorted_likelihoods[0];
return (-1.0 * this.LodVsNextBest());
} else {
for (int i = 0; i < genotypes.length; i++) {
if ((genotypes[i].charAt(0) == ref) && (genotypes[i].charAt(1) == ref)) {
ref_likelihood = likelihoods[i];
}
}
}
return sorted_likelihoods[0] - ref_likelihood;
}
public String BestGenotype() {
this.sort();
return this.sorted_genotypes[0];
}
public double BestPosterior() {
this.sort();
return this.sorted_likelihoods[0];
}
public double RefPosterior(char ref) {
this.LodVsRef(ref);
return this.ref_likelihood;
}
/**
* given all the data associated with a locus, make a genotypeLocus object containing the likelihoods and posterior probs
*
* @param tracker contains the reference meta data for this location, which may contain relevent information like dpSNP or hapmap information
* @param ref the reference base
* @param pileup a pileup of the reads, containing the reads and their offsets
*
* @return a GenotypeLocus, containing each of the genotypes and their associated likelihood and posterior prob values
*/
public SSGGenotypeCall callGenotypes(RefMetaDataTracker tracker, char ref, ReadBackedPileup pileup) {
//filterQ0Bases(!keepQ0Bases); // Set the filtering / keeping flag
// for calculating the rms of the mapping qualities
double squared = 0.0;
for (int i = 0; i < pileup.getReads().size(); i++) {
SAMRecord read = pileup.getReads().get(i);
squared += read.getMappingQuality() * read.getMappingQuality();
int offset = pileup.getOffsets().get(i);
char base = read.getReadString().charAt(offset);
byte qual = read.getBaseQualities()[offset];
add(ref, base, qual);
}
// save off the likelihoods
if (likelihoods == null || likelihoods.length == 0) return null;
// Apply the two calculations
applyPrior(ref);
// lets setup the locus
List<Genotype> lst = new ArrayList<Genotype>();
for (int x = 0; x < this.likelihoods.length; x++) {
lst.add(new BasicGenotype(pileup.getLocation(),this.genotypes[x],new BayesianConfidenceScore(this.likelihoods[x])));
}
return new SSGGenotypeCall(discoveryMode,ref,2,lst,likelihoods,pileup);
}
}

View File

@ -0,0 +1,324 @@
package org.broadinstitute.sting.gatk.walkers.genotyper;
import net.sf.samtools.SAMRecord;
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
import org.broadinstitute.sting.utils.ReadBackedPileup;
import org.broadinstitute.sting.utils.Utils;
import org.broadinstitute.sting.utils.genotype.BasicGenotype;
import org.broadinstitute.sting.utils.genotype.Genotype;
import org.broadinstitute.sting.utils.genotype.confidence.BayesianConfidenceScore;
import static java.lang.Math.log10;
import static java.lang.Math.pow;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;
public class OldAndBustedGenotypeLikelihoods extends GenotypeLikelihoods {
private static double getOneMinusQual(final byte qual) {
return oneMinusData[qual];
}
private double getOneHalfMinusQual(final byte qual) {
return oneHalfMinusData[qual];
}
//public double[] likelihoods;
public int coverage;
// The genotype priors;
private double priorHomRef;
private double priorHet;
private double priorHomVar;
private double[] oneHalfMinusData;
public boolean isThreeStateErrors() {
return threeStateErrors;
}
public void setThreeStateErrors(boolean threeStateErrors) {
this.threeStateErrors = threeStateErrors;
}
private boolean threeStateErrors = false;
private boolean discoveryMode = false;
public static double[] computePriors(double h) {
double[] pdbls = new double[3];
pdbls[0] = 1.0 - (3.0 * h / 2.0);
pdbls[1] = h;
pdbls[2] = h / 2.0;
return pdbls;
}
/**
* set the mode to discovery
* @param isInDiscoveryMode
*/
public void setDiscovery(boolean isInDiscoveryMode) {
discoveryMode = isInDiscoveryMode;
}
// Store the 2nd-best base priors for on-genotype primary bases
private HashMap<String, Double> onNextBestBasePriors = new HashMap<String, Double>();
// Store the 2nd-best base priors for off-genotype primary bases
private HashMap<String, Double> offNextBestBasePriors = new HashMap<String, Double>();
public OldAndBustedGenotypeLikelihoods(double heterozygosity) {
double[] vals = computePriors(heterozygosity);
initialize(vals[0], vals[1], vals[2]);
}
public OldAndBustedGenotypeLikelihoods(double priorHomRef, double priorHet, double priorHomVar) {
initialize(priorHomRef, priorHet, priorHomVar);
}
/**
* Are we ignoring Q0 bases during calculations?
* @return
*/
public boolean isFilteringQ0Bases() {
return filterQ0Bases;
}
/**
* Enable / disable filtering of Q0 bases. Enabled by default
*
* @param filterQ0Bases
*/
public void filterQ0Bases(boolean filterQ0Bases) {
this.filterQ0Bases = filterQ0Bases;
}
private void initialize(double priorHomRef, double priorHet, double priorHomVar) {
this.oneHalfMinusData = threeStateErrors ? oneHalfMinusData3Base : oneHalfMinusDataArachne;
this.priorHomRef = priorHomRef;
this.priorHet = priorHet;
this.priorHomVar = priorHomVar;
likelihoods = new double[10];
coverage = 0;
for (int i = 0; i < likelihoods.length; i++) { likelihoods[i] = Math.log10(0.1); }
}
public double getHomRefPrior() {
return priorHomRef;
}
public void setHomRefPrior(double priorHomRef) {
this.priorHomRef = priorHomRef;
}
public double getHetPrior() {
return priorHet;
}
public void setHetPrior(double priorHet) {
this.priorHet = priorHet;
}
public double getHomVarPrior() {
return priorHomVar;
}
public void setHomVarPrior(double priorHomVar) {
this.priorHomVar = priorHomVar;
}
public int add(char ref, char read, byte qual)
{
if (qual <= 0) {
if ( isFilteringQ0Bases() ) {
return 0;
} else {
qual = 1;
}
}
if (coverage == 0)
{
for (int i = 0; i < likelihoods.length; i++)
{
likelihoods[i] = 0;
}
}
for (int i = 0; i < genotypes.length; i++)
{
double likelihood = calculateAlleleLikelihood(ref, read, genotypes[i], qual);
//if ( originalQual == 0 ) System.out.printf("Likelihood is %f for %c %c %d %s%n", likelihood, ref, read, qual, genotypes[i]);
likelihoods[i] += likelihood;
coverage += 1;
}
return 1;
}
private double calculateAlleleLikelihood(char ref, char read, String genotype, byte qual) {
if (qual == 0) {
// zero quals are wrong
throw new RuntimeException(String.format("Unexpected Q0 base discovered in calculateAlleleLikelihood: %c %c %d", ref, read, qual));
}
char h1 = genotype.charAt(0);
char h2 = genotype.charAt(1);
double p_base;
if ((h1 == h2) && (h1 == read)) {
// hom
p_base = getOneMinusQual(qual);
} else if ( (h1 != h2) && ((h1 == read) || (h2 == read)) ) {
// het
p_base = getOneHalfMinusQual(qual);
} else if ( threeStateErrors ) {
// error
//System.out.printf("%s %b %f %f%n", genotype, h1 != h2, log10Of2_3, log10Of1_3 );
p_base = qual / -10.0 + ( h1 != h2 ? log10Of1_3 : log10Of1_3 );
} else {
// error
p_base = qual / -10.0;
}
return p_base;
}
public String[] sorted_genotypes;
public double[] sorted_likelihoods;
public void sort() {
Integer[] permutation = Utils.SortPermutation(likelihoods);
Integer[] reverse_permutation = new Integer[permutation.length];
for (int i = 0; i < reverse_permutation.length; i++) {
reverse_permutation[i] = permutation[(permutation.length - 1) - i];
}
sorted_genotypes = Utils.PermuteArray(genotypes, reverse_permutation);
sorted_likelihoods = Utils.PermuteArray(likelihoods, reverse_permutation);
}
public String toString(char ref) {
this.sort();
double sum = 0;
String s = String.format("%s %f %f ", this.BestGenotype(), this.LodVsNextBest(), this.LodVsRef(ref));
for (int i = 0; i < sorted_genotypes.length; i++) {
if (i != 0) {
s = s + " ";
}
s = s + sorted_genotypes[i] + ":" + String.format("%.2f", sorted_likelihoods[i]);
sum += Math.pow(10,sorted_likelihoods[i]);
}
s = s + String.format(" %f", sum);
return s;
}
public void applyPrior(char ref, double[] allele_likelihoods) {
int k = 0;
for (int i = 0; i < 4; i++) {
for (int j = i; j < 4; j++) {
if (i == j) {
this.likelihoods[k] += Math.log10(allele_likelihoods[i]) + Math.log10(allele_likelihoods[j]);
} else {
this.likelihoods[k] += Math.log10(allele_likelihoods[i]) + Math.log10(allele_likelihoods[j]) + Math.log10(2);
}
k++;
}
}
this.sort();
}
public void applyPrior(char ref) {
for (int i = 0; i < genotypes.length; i++) {
if ((genotypes[i].charAt(0) == ref) && (genotypes[i].charAt(1) == ref)) {
// hom-ref
likelihoods[i] += Math.log10(priorHomRef);
} else if ((genotypes[i].charAt(0) != ref) && (genotypes[i].charAt(1) != ref)) {
// hom-nonref
likelihoods[i] += Math.log10(priorHomVar);
} else {
// het
likelihoods[i] += Math.log10(priorHet);
}
if (Double.isInfinite(likelihoods[i])) {
likelihoods[i] = -1000;
}
}
this.sort();
}
public double LodVsNextBest() {
this.sort();
return sorted_likelihoods[0] - sorted_likelihoods[1];
}
double ref_likelihood = Double.NaN;
public double LodVsRef(char ref) {
if ((this.BestGenotype().charAt(0) == ref) && (this.BestGenotype().charAt(1) == ref)) {
ref_likelihood = sorted_likelihoods[0];
return (-1.0 * this.LodVsNextBest());
} else {
for (int i = 0; i < genotypes.length; i++) {
if ((genotypes[i].charAt(0) == ref) && (genotypes[i].charAt(1) == ref)) {
ref_likelihood = likelihoods[i];
}
}
}
return sorted_likelihoods[0] - ref_likelihood;
}
public String BestGenotype() {
this.sort();
return this.sorted_genotypes[0];
}
public double BestPosterior() {
this.sort();
return this.sorted_likelihoods[0];
}
public double RefPosterior(char ref) {
this.LodVsRef(ref);
return this.ref_likelihood;
}
/**
* given all the data associated with a locus, make a genotypeLocus object containing the likelihoods and posterior probs
*
* @param tracker contains the reference meta data for this location, which may contain relevent information like dpSNP or hapmap information
* @param ref the reference base
* @param pileup a pileup of the reads, containing the reads and their offsets
*
* @return a GenotypeLocus, containing each of the genotypes and their associated likelihood and posterior prob values
*/
public SSGGenotypeCall callGenotypes(RefMetaDataTracker tracker, char ref, ReadBackedPileup pileup) {
//filterQ0Bases(!keepQ0Bases); // Set the filtering / keeping flag
// for calculating the rms of the mapping qualities
double squared = 0.0;
for (int i = 0; i < pileup.getReads().size(); i++) {
SAMRecord read = pileup.getReads().get(i);
squared += read.getMappingQuality() * read.getMappingQuality();
int offset = pileup.getOffsets().get(i);
char base = read.getReadString().charAt(offset);
byte qual = read.getBaseQualities()[offset];
add(ref, base, qual);
}
// save off the likelihoods
if (likelihoods == null || likelihoods.length == 0) return null;
// Apply the two calculations
applyPrior(ref);
// lets setup the locus
List<Genotype> lst = new ArrayList<Genotype>();
for (int x = 0; x < this.likelihoods.length; x++) {
lst.add(new BasicGenotype(pileup.getLocation(),this.genotypes[x],new BayesianConfidenceScore(this.likelihoods[x])));
}
return new SSGGenotypeCall(discoveryMode,ref,2,lst,likelihoods,pileup);
}
}

View File

@ -114,8 +114,8 @@ public class SSGGenotypeCall implements GenotypeCall, GenotypeOutput {
}
public String toString() {
return String.format("ref=%s depth=%d rmsMAPQ=%.2f bestVRef=%.2f bestVNext=%.2f likelihoods=%s",
mRefBase, readDepth, rmsMapping, bestRef, bestNext, Arrays.toString(mLikelihoods));
return String.format("%s ref=%s depth=%d rmsMAPQ=%.2f bestVRef=%.2f bestVNext=%.2f likelihoods=%s",
getLocation(), mRefBase, readDepth, rmsMapping, bestRef, bestNext, Arrays.toString(mLikelihoods));
}
/**

View File

@ -26,6 +26,7 @@ public class SingleSampleGenotyper extends LocusWalker<SSGGenotypeCall, Genotype
// Control output settings
@Argument(fullName = "variants_out", shortName = "varout", doc = "File to which variants should be written", required = true)
public File VARIANTS_FILE;
@Argument(fullName = "variant_output_format", shortName = "vf", doc = "File to which metrics should be written", required = false)
public GenotypeWriterFactory.GENOTYPE_FORMAT VAR_FORMAT = GenotypeWriterFactory.GENOTYPE_FORMAT.GELI;
@ -37,7 +38,7 @@ public class SingleSampleGenotyper extends LocusWalker<SSGGenotypeCall, Genotype
public boolean GENOTYPE = false;
@Argument(fullName = "3BaseErrors", shortName = "3BaseErrors", doc = "Should we use a 3-base error mode (so that P(b_true != b_called | e) == e / 3?", required = false)
public boolean THREE_BASE_ERRORS = false;
public boolean THREE_BASE_ERRORS = true;
public enum Caller {
OLD_AND_BUSTED,
@ -45,30 +46,24 @@ public class SingleSampleGenotyper extends LocusWalker<SSGGenotypeCall, Genotype
}
@Argument(fullName = "caller", doc = "", required = false)
public Caller caller = Caller.OLD_AND_BUSTED;
public Caller caller = Caller.NEW_HOTNESS;
// Control how the genotype hypotheses are weighed
@Argument(fullName = "heterozygosity", shortName = "hets", doc = "Heterozygosity value used to compute prior likelihoods for any locus", required = false) public Double heterozygosity = GenotypeLikelihoods.HUMAN_HETEROZYGOSITY;
@Argument(fullName = "priors_hapmap", shortName = "phapmap", doc = "Comma-separated prior likelihoods for Hapmap loci (homref,het,homvar)", required = false) public String PRIORS_HAPMAP = "0.999,1e-3,1e-5";
@Argument(fullName = "priors_dbsnp", shortName = "pdbsnp", doc = "Comma-separated prior likelihoods for dbSNP loci (homref,het,homvar)", required = false) public String PRIORS_DBSNP = "0.999,1e-3,1e-5";
@Argument(fullName = "heterozygosity", shortName = "hets", doc = "Heterozygosity value used to compute prior likelihoods for any locus", required = false)
public Double heterozygosity = OldAndBustedGenotypeLikelihoods.HUMAN_HETEROZYGOSITY;
//@Argument(fullName = "priors_dbsnp", shortName = "pdbsnp", doc = "Comma-separated prior likelihoods for dbSNP loci (homref,het,homvar)", required = false)
public String PRIORS_DBSNP = "0.999,1e-3,1e-5";
@Argument(fullName = "keepQ0Bases", shortName = "keepQ0Bases", doc = "If true, then Q0 bases will be included in the genotyping calculation, and treated as Q1 -- this is really not a good idea", required = false)
public boolean keepQ0Bases = false;
public double[] plocus;
public double[] phapmap;
//public double[] phapmap;
public double[] pdbsnp;
public long nFilteredQ0Bases = 0;
/**
* Specify that this walker requires reads.
*
* @return true
*/
public boolean requiresReads() {
return true;
}
private final static boolean TESTING_NEW = false;
/**
* Filter out loci to ignore (at an ambiguous base in the reference or a locus with zero coverage).
@ -103,8 +98,8 @@ public class SingleSampleGenotyper extends LocusWalker<SSGGenotypeCall, Genotype
/** Initialize the walker with some sensible defaults */
public void initialize() {
plocus = GenotypeLikelihoods.computePriors(heterozygosity);
phapmap = priorsArray(PRIORS_HAPMAP);
plocus = OldAndBustedGenotypeLikelihoods.computePriors(heterozygosity);
//phapmap = priorsArray(PRIORS_HAPMAP);
pdbsnp = priorsArray(PRIORS_DBSNP);
}
@ -116,31 +111,32 @@ public class SingleSampleGenotyper extends LocusWalker<SSGGenotypeCall, Genotype
* @param context contextual information around the locus
*/
public SSGGenotypeCall map(RefMetaDataTracker tracker, char ref, LocusContext context) {
SSGGenotypeCall oldAndBusted = mapOldAndBusted(tracker, ref, context);
SSGGenotypeCall newHotness = mapNewHotness(tracker, ref, context);
if ( TESTING_NEW ) {
SSGGenotypeCall oldAndBusted = mapOldAndBusted(tracker, ref, context);
SSGGenotypeCall newHotness = mapNewHotness(tracker, ref, context);
if ( ! oldAndBusted.equals(newHotness) || true ) {
System.out.printf("Calls not equal:%nold: %s%nnew: %s%n", oldAndBusted, newHotness);
if ( ! oldAndBusted.equals(newHotness) ) {
System.out.printf("Calls not equal:%nold: %s%nnew: %s%n", oldAndBusted, newHotness);
}
return caller == Caller.OLD_AND_BUSTED ? oldAndBusted : newHotness;
} else {
switch ( caller ) {
case OLD_AND_BUSTED: return mapOldAndBusted(tracker, ref, context);
case NEW_HOTNESS: return mapNewHotness(tracker, ref, context);
default: return null;
}
}
return caller == Caller.OLD_AND_BUSTED ? oldAndBusted : newHotness;
}
private SSGGenotypeCall mapNewHotness(RefMetaDataTracker tracker, char ref, LocusContext context) {
ReadBackedPileup pileup = new ReadBackedPileup(ref, context);
GenotypeLikelihoods G = makeGenotypeLikelihood(tracker);
NewHotnessGenotypeLikelihoods G = makeNewHotnessGenotypeLikelihoods(tracker);
G.setDiscovery(GENOTYPE); // set it to discovery mode or variant detection mode
SSGGenotypeCall geno = callGenotypes(G, tracker, ref, pileup);
return geno;
}
private SSGGenotypeCall mapOldAndBusted(RefMetaDataTracker tracker, char ref, LocusContext context) {
ReadBackedPileup pileup = new ReadBackedPileup(ref, context);
GenotypeLikelihoods G = makeGenotypeLikelihood(tracker);
G.setDiscovery(GENOTYPE); // set it to discovery mode or variant detection mode
SSGGenotypeCall geno = (SSGGenotypeCall)G.callGenotypes(tracker, ref, pileup);
return geno;
}
/**
* given all the data associated with a locus, make a genotypeLocus object containing the likelihoods and posterior probs
@ -152,8 +148,6 @@ public class SingleSampleGenotyper extends LocusWalker<SSGGenotypeCall, Genotype
* @return a GenotypeLocus, containing each of the genotypes and their associated likelihood and posterior prob values
*/
public SSGGenotypeCall callGenotypes(GenotypeLikelihoods G, RefMetaDataTracker tracker, char ref, ReadBackedPileup pileup) {
G.filterQ0Bases(!keepQ0Bases); // Set the filtering / keeping flag
// for calculating the rms of the mapping qualities
double squared = 0.0;
for (int i = 0; i < pileup.getReads().size(); i++) {
@ -174,31 +168,14 @@ public class SingleSampleGenotyper extends LocusWalker<SSGGenotypeCall, Genotype
// lets setup the locus
List<Genotype> lst = new ArrayList<Genotype>();
for (int x = 0; x < G.likelihoods.length; x++) {
lst.add(new BasicGenotype(pileup.getLocation(),GenotypeLikelihoods.genotypes[x],new BayesianConfidenceScore(G.likelihoods[x])));
lst.add(new BasicGenotype(pileup.getLocation(),OldAndBustedGenotypeLikelihoods.genotypes[x],new BayesianConfidenceScore(G.likelihoods[x])));
}
return new SSGGenotypeCall(! GENOTYPE,ref,2,lst,G.likelihoods,pileup);
}
/**
* Calls the underlying, single locus genotype of the sample
*
* @param tracker the meta data tracker
*
* @return the likelihoods per genotype
*/
private GenotypeLikelihoods makeGenotypeLikelihood(RefMetaDataTracker tracker) {
GenotypeLikelihoods G = null;
if (isHapmapSite(tracker)) {
G = new GenotypeLikelihoods(GenotypeLikelihoods.HUMAN_HETEROZYGOSITY);
} else if (isDbSNPSite(tracker)) {
G = new GenotypeLikelihoods(pdbsnp[0], pdbsnp[1], pdbsnp[2]);
} else {
G = new GenotypeLikelihoods(plocus[0], plocus[1], plocus[2]);
}
private NewHotnessGenotypeLikelihoods makeNewHotnessGenotypeLikelihoods(RefMetaDataTracker tracker) {
NewHotnessGenotypeLikelihoods G = new NewHotnessGenotypeLikelihoods(OldAndBustedGenotypeLikelihoods.HUMAN_HETEROZYGOSITY);
G.filterQ0Bases(! keepQ0Bases);
return G;
}
@ -244,9 +221,9 @@ public class SingleSampleGenotyper extends LocusWalker<SSGGenotypeCall, Genotype
*/
public GenotypeWriter reduce(SSGGenotypeCall call, GenotypeWriter sum) {
if (call != null && (GENOTYPE || call.isVariation())) {
System.out.printf("Score %s%n", call.getConfidenceScore());
//System.out.printf("Score %s%n", call.getConfidenceScore());
if (call.getConfidenceScore().getScore() > LOD_THRESHOLD) {
System.out.printf("Call %s%n", call);
//System.out.printf("Call %s%n", call);
sum.addGenotypeCall(call);
}
}
@ -258,6 +235,36 @@ public class SingleSampleGenotyper extends LocusWalker<SSGGenotypeCall, Genotype
logger.info(String.format("SingleSampleGenotyper filtered %d Q0 bases", nFilteredQ0Bases));
sum.close();
}
//
// TODO -- delete the old and busted routines
//
private OldAndBustedGenotypeLikelihoods makeOldAndBustedGenotypeLikelihoods(RefMetaDataTracker tracker) {
OldAndBustedGenotypeLikelihoods G = null;
if (isHapmapSite(tracker)) {
G = new OldAndBustedGenotypeLikelihoods(OldAndBustedGenotypeLikelihoods.HUMAN_HETEROZYGOSITY);
} else if (isDbSNPSite(tracker)) {
G = new OldAndBustedGenotypeLikelihoods(pdbsnp[0], pdbsnp[1], pdbsnp[2]);
} else {
G = new OldAndBustedGenotypeLikelihoods(plocus[0], plocus[1], plocus[2]);
}
G.filterQ0Bases(! keepQ0Bases);
return G;
}
//
// TODO -- delete the old and busted routines
//
private SSGGenotypeCall mapOldAndBusted(RefMetaDataTracker tracker, char ref, LocusContext context) {
ReadBackedPileup pileup = new ReadBackedPileup(ref, context);
OldAndBustedGenotypeLikelihoods G = makeOldAndBustedGenotypeLikelihoods(tracker);
G.setDiscovery(GENOTYPE); // set it to discovery mode or variant detection mode
SSGGenotypeCall geno = G.callGenotypes(tracker, ref, pileup);
return geno;
}
}

View File

@ -1,124 +0,0 @@
package org.broadinstitute.sting.playground;
import net.sf.samtools.*;
import net.sf.samtools.SAMFileReader.ValidationStringency;
import net.sf.samtools.util.CloseableIterator;
import net.sf.picard.cmdline.CommandLineProgram;
import net.sf.picard.cmdline.Usage;
import net.sf.picard.cmdline.Option;
import java.io.*;
public class ValidateSAM extends CommandLineProgram {
// Usage and parameters
@Usage(programVersion="0.1") public String USAGE = "SAM Validator\n";
@Option(shortName="I", doc="SAM or BAM file for validation") public File INPUT_FILE;
@Option(shortName="M", doc="Maximum number of errors to detect before exiting", optional=true) public String MAX_ERRORS_ARG = "-1";
@Option(shortName="S", doc="How strict should we be with validation", optional=true) public String STRICTNESS_ARG = "strict";
@Option(shortName="SORTED", doc="Check that the SAM file is sorted", optional=true) public boolean CHECK_SORTED_ARG = false;
private long startTime = -1;
/** Required main method implementation. */
public static void main(String[] argv) {
System.exit(new ValidateSAM().instanceMain(argv));
}
public void printProgress( int nRecords, int nErrors ) {
final double elapsed = (System.currentTimeMillis() - startTime) / 1000.0;
final double secsPer1MReads = (elapsed * 1000000.0) / nRecords;
System.out.printf("Read %d records containing %d errors in %.2f secs (%.2f secs per 1M reads)%n", nRecords, nErrors, elapsed, secsPer1MReads);
}
protected int doWork() {
int MAX_ERRORS = -1; // Don't bail ever
if ( MAX_ERRORS_ARG != null ) {
MAX_ERRORS = Integer.parseInt(MAX_ERRORS_ARG);
}
// Start the timer
startTime = System.currentTimeMillis();
// Initialize the sam reader
CloseableIterator<SAMRecord> iter = null;
try {
final SAMFileReader samReader = getSamReader(INPUT_FILE);
iter = samReader.iterator();
} catch (Exception ioe) {
System.out.println("[VALIDATION FAILURE IN HEADER]: " + ioe);
ioe.printStackTrace();
return 1;
}
int nRecords = 0;
int nErrors = 0;
SAMRecord last_record = null;
while ( iter.hasNext() ) {
nRecords++;
try {
final SAMRecord ri = iter.next();
if (CHECK_SORTED_ARG &&
(last_record != null) &&
(ri.getReferenceName() == last_record.getReferenceName()) &&
(ri.getAlignmentStart() < last_record.getAlignmentStart()))
{
System.out.println("Not sorted!");
System.out.println(last_record.format());
System.out.println(ri.format() + "\n");
System.exit(-1);
}
last_record = ri;
} catch (Exception ioe) {
nErrors++;
System.out.println("[VALIDATION FAILURE IN RECORD]: " + ioe);
ioe.printStackTrace();
}
if ( MAX_ERRORS > -1 && nErrors >= MAX_ERRORS ) {
System.out.println("Maximum number of errors encountered " + nErrors);
break;
}
if ( nRecords % 100000 == 0 ) {
printProgress( nRecords, nErrors );
}
}
printProgress( nRecords, nErrors );
return 0;
}
private static void usage() {
System.err.println("USAGE: org.broadinstitute.sting.ValidateSAM <SAMFile|BAMFile>");
}
private SAMFileReader getSamReader(final File samFile) {
ValidationStringency strictness = SAMFileReader.ValidationStringency.STRICT;
if ( STRICTNESS_ARG == null ) {
strictness = SAMFileReader.ValidationStringency.STRICT;
}
else if ( STRICTNESS_ARG.toLowerCase().equals("lenient") ) {
strictness = SAMFileReader.ValidationStringency.LENIENT;
}
else if ( STRICTNESS_ARG.toLowerCase().equals("silent") ) {
strictness = SAMFileReader.ValidationStringency.SILENT;
}
else {
strictness = SAMFileReader.ValidationStringency.STRICT;
}
System.err.println("Strictness is " + strictness);
final SAMFileReader samReader = new SAMFileReader(samFile, true);
samReader.setValidationStringency(strictness);
return samReader;
}
}

View File

@ -12,7 +12,7 @@ import org.broadinstitute.sting.utils.Pair;
import org.broadinstitute.sting.utils.ReadBackedPileup;
import org.broadinstitute.sting.utils.genotype.Genotype;
import org.broadinstitute.sting.gatk.walkers.*;
import org.broadinstitute.sting.gatk.walkers.genotyper.GenotypeLikelihoods;
import org.broadinstitute.sting.gatk.walkers.genotyper.OldAndBustedGenotypeLikelihoods;
import org.broadinstitute.sting.gatk.walkers.genotyper.SSGGenotypeCall;
import java.io.*;
@ -103,7 +103,7 @@ public class CallHLAWalker extends LocusWalker<Integer, Pair<Long, Long>>{
}
out.printf("%sAs\t%sCs\t%sTs\t%sGs\t",numAs,numCs,numTs,numGs);
GenotypeLikelihoods G = new GenotypeLikelihoods(GenotypeLikelihoods.HUMAN_HETEROZYGOSITY);
OldAndBustedGenotypeLikelihoods G = new OldAndBustedGenotypeLikelihoods(OldAndBustedGenotypeLikelihoods.HUMAN_HETEROZYGOSITY);
SSGGenotypeCall geno = G.callGenotypes(tracker, ref, pileup);

View File

@ -8,7 +8,7 @@ import org.broadinstitute.sting.gatk.GenomeAnalysisEngine;
import org.broadinstitute.sting.gatk.LocusContext;
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
import org.broadinstitute.sting.gatk.walkers.LocusWalker;
import org.broadinstitute.sting.gatk.walkers.genotyper.GenotypeLikelihoods;
import org.broadinstitute.sting.gatk.walkers.genotyper.OldAndBustedGenotypeLikelihoods;
import org.broadinstitute.sting.playground.utils.*;
import org.broadinstitute.sting.utils.*;
import org.broadinstitute.sting.utils.ReadBackedPileup;
@ -144,14 +144,14 @@ public class MultiSampleCaller extends LocusWalker<MultiSampleCaller.MultiSample
char ref;
GenotypeLikelihoods Genotype(LocusContext context, double[] allele_likelihoods, double indel_alt_freq)
OldAndBustedGenotypeLikelihoods Genotype(LocusContext context, double[] allele_likelihoods, double indel_alt_freq)
{
ReadBackedPileup pileup = new ReadBackedPileup(ref, context);
String bases = pileup.getBases();
if (bases.length() == 0)
{
GenotypeLikelihoods G = new GenotypeLikelihoods(GenotypeLikelihoods.HUMAN_HETEROZYGOSITY);
OldAndBustedGenotypeLikelihoods G = new OldAndBustedGenotypeLikelihoods(OldAndBustedGenotypeLikelihoods.HUMAN_HETEROZYGOSITY);
return G;
}
@ -160,7 +160,7 @@ public class MultiSampleCaller extends LocusWalker<MultiSampleCaller.MultiSample
ref = Character.toUpperCase(ref);
// Handle single-base polymorphisms.
GenotypeLikelihoods G = new GenotypeLikelihoods(GenotypeLikelihoods.HUMAN_HETEROZYGOSITY);
OldAndBustedGenotypeLikelihoods G = new OldAndBustedGenotypeLikelihoods(OldAndBustedGenotypeLikelihoods.HUMAN_HETEROZYGOSITY);
for ( int i = 0; i < reads.size(); i++ )
{
SAMRecord read = reads.get(i);
@ -195,7 +195,7 @@ public class MultiSampleCaller extends LocusWalker<MultiSampleCaller.MultiSample
return G;
}
double[] CountFreqs(GenotypeLikelihoods[] genotype_likelihoods)
double[] CountFreqs(OldAndBustedGenotypeLikelihoods[] genotype_likelihoods)
{
double[] allele_likelihoods = new double[4];
for (int x = 0; x < genotype_likelihoods.length; x++)
@ -230,7 +230,7 @@ public class MultiSampleCaller extends LocusWalker<MultiSampleCaller.MultiSample
return allele_likelihoods;
}
/* double CountIndelFreq(GenotypeLikelihoods[] genotype_likelihoods)
/* double CountIndelFreq(OldAndBustedGenotypeLikelihoods[] genotype_likelihoods)
{
HashMap<String, Double> indel_allele_likelihoods = new HashMap<String, Double>();
@ -260,7 +260,7 @@ public class MultiSampleCaller extends LocusWalker<MultiSampleCaller.MultiSample
}*/
// Potential precision error here.
double Compute_pD(GenotypeLikelihoods[] genotype_likelihoods)
double Compute_pD(OldAndBustedGenotypeLikelihoods[] genotype_likelihoods)
{
double pD = 0;
for (int i = 0; i < sample_names.size(); i++)
@ -280,7 +280,7 @@ public class MultiSampleCaller extends LocusWalker<MultiSampleCaller.MultiSample
double[] allele_likelihoods = new double[4];
for (int i = 0; i < 4; i++) { allele_likelihoods[i] = 1e-6/3.0; }
allele_likelihoods[BaseUtils.simpleBaseToBaseIndex(ref)] = 1.0-1e-6;
GenotypeLikelihoods[] G = new GenotypeLikelihoods[sample_names.size()];
OldAndBustedGenotypeLikelihoods[] G = new OldAndBustedGenotypeLikelihoods[sample_names.size()];
for (int j = 0; j < sample_names.size(); j++)
{
G[j] = Genotype(contexts[j], allele_likelihoods, 1e-6);
@ -296,7 +296,7 @@ public class MultiSampleCaller extends LocusWalker<MultiSampleCaller.MultiSample
double LOD(LocusContext[] contexts)
{
em_result = EM(contexts);
GenotypeLikelihoods[] G = em_result.genotype_likelihoods;
OldAndBustedGenotypeLikelihoods[] G = em_result.genotype_likelihoods;
pD = Compute_pD(G);
pNull = Compute_pNull(contexts);
lod = pD - pNull;
@ -306,11 +306,11 @@ public class MultiSampleCaller extends LocusWalker<MultiSampleCaller.MultiSample
class EM_Result
{
String[] sample_names;
GenotypeLikelihoods[] genotype_likelihoods;
OldAndBustedGenotypeLikelihoods[] genotype_likelihoods;
double[] allele_likelihoods;
int EM_N;
public EM_Result(List<String> sample_names, GenotypeLikelihoods[] genotype_likelihoods, double[] allele_likelihoods)
public EM_Result(List<String> sample_names, OldAndBustedGenotypeLikelihoods[] genotype_likelihoods, double[] allele_likelihoods)
{
this.sample_names = new String[1];
this.sample_names = sample_names.toArray(this.sample_names);
@ -337,7 +337,7 @@ public class MultiSampleCaller extends LocusWalker<MultiSampleCaller.MultiSample
}
double indel_alt_freq = 1e-4;
GenotypeLikelihoods[] G = new GenotypeLikelihoods[sample_names.size()];
OldAndBustedGenotypeLikelihoods[] G = new OldAndBustedGenotypeLikelihoods[sample_names.size()];
for (int i = 0; i < MAX_ITERATIONS; i++)
{
for (int j = 0; j < sample_names.size(); j++)
@ -387,9 +387,9 @@ public class MultiSampleCaller extends LocusWalker<MultiSampleCaller.MultiSample
return strand_score;
}
GenotypeLikelihoods HardyWeinberg(double[] allele_likelihoods)
OldAndBustedGenotypeLikelihoods HardyWeinberg(double[] allele_likelihoods)
{
GenotypeLikelihoods G = new GenotypeLikelihoods(GenotypeLikelihoods.HUMAN_HETEROZYGOSITY);
OldAndBustedGenotypeLikelihoods G = new OldAndBustedGenotypeLikelihoods(OldAndBustedGenotypeLikelihoods.HUMAN_HETEROZYGOSITY);
int k = 0;
for (int i = 0; i < 4; i++)
{
@ -409,7 +409,7 @@ public class MultiSampleCaller extends LocusWalker<MultiSampleCaller.MultiSample
else { return BaseUtils.baseIndexToSimpleBase(perm[2]); }
}
double Compute_discovery_lod(char ref, GenotypeLikelihoods[] genotype_likelihoods)
double Compute_discovery_lod(char ref, OldAndBustedGenotypeLikelihoods[] genotype_likelihoods)
{
double pBest = 0;
double pRef = 0;
@ -427,7 +427,7 @@ public class MultiSampleCaller extends LocusWalker<MultiSampleCaller.MultiSample
return allele_likelihoods[BaseUtils.simpleBaseToBaseIndex(PickAlt(ref, allele_likelihoods))];
}
int Compute_n_ref(char ref, GenotypeLikelihoods[] genotype_likelihoods)
int Compute_n_ref(char ref, OldAndBustedGenotypeLikelihoods[] genotype_likelihoods)
{
int n = 0;
for (int i = 0; i < genotype_likelihoods.length; i++)
@ -439,7 +439,7 @@ public class MultiSampleCaller extends LocusWalker<MultiSampleCaller.MultiSample
return n;
}
int Compute_n_het(char ref, GenotypeLikelihoods[] genotype_likelihoods)
int Compute_n_het(char ref, OldAndBustedGenotypeLikelihoods[] genotype_likelihoods)
{
int n = 0;
for (int i = 0; i < genotype_likelihoods.length; i++)
@ -452,7 +452,7 @@ public class MultiSampleCaller extends LocusWalker<MultiSampleCaller.MultiSample
return n;
}
int Compute_n_hom(char ref, GenotypeLikelihoods[] genotype_likelihoods)
int Compute_n_hom(char ref, OldAndBustedGenotypeLikelihoods[] genotype_likelihoods)
{
int n = 0;
for (int i = 0; i < genotype_likelihoods.length; i++)
@ -474,7 +474,7 @@ public class MultiSampleCaller extends LocusWalker<MultiSampleCaller.MultiSample
double lod = LOD(contexts);
double strand_score = StrandScore(context);
//EM_Result em_result = EM(contexts);
GenotypeLikelihoods population_genotype_likelihoods = HardyWeinberg(em_result.allele_likelihoods);
OldAndBustedGenotypeLikelihoods population_genotype_likelihoods = HardyWeinberg(em_result.allele_likelihoods);
//double pD = Compute_pD(em_result.genotype_likelihoods);
//double pNull = Compute_pNull(contexts);

View File

@ -34,13 +34,13 @@ public class PopPriorWalker extends LocusWalker<Integer, Integer> {
return true; // We are keeping all the reads
}
protected class GenotypeLikelihoods
protected class OldAndBustedGenotypeLikelihoods
{
public double[] likelihoods;
public String[] genotypes;
GenotypeLikelihoods()
OldAndBustedGenotypeLikelihoods()
{
likelihoods = new double[10];
genotypes = new String[10];
@ -124,9 +124,9 @@ public class PopPriorWalker extends LocusWalker<Integer, Integer> {
int cCount = 0;
int gCount = 0;
int tCount = 0;
GenotypeLikelihoods glAll = new GenotypeLikelihoods();
GenotypeLikelihoods glForward = new GenotypeLikelihoods();
GenotypeLikelihoods glReverse = new GenotypeLikelihoods();
OldAndBustedGenotypeLikelihoods glAll = new OldAndBustedGenotypeLikelihoods();
OldAndBustedGenotypeLikelihoods glForward = new OldAndBustedGenotypeLikelihoods();
OldAndBustedGenotypeLikelihoods glReverse = new OldAndBustedGenotypeLikelihoods();
for ( int i = 0; i < reads.size(); i++ )
{
SAMRecord read = reads.get(i);
@ -409,7 +409,7 @@ public class PopPriorWalker extends LocusWalker<Integer, Integer> {
return (gt.getAllele1() == refAllele)?gt.getAllele2():gt.getAllele1();
}
protected String dumpTheories(GenotypeLikelihoods gl) {
protected String dumpTheories(OldAndBustedGenotypeLikelihoods gl) {
StringBuilder sb = new StringBuilder();
for (int i = gl.genotypes.length-1; i >= 0; i--)
{