New hotness and old and Busted genotype likelihood objects are now in the code base as I work towards a bug-free SSG along with a cleaner interface to the genotype likelihood object
git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@1372 348d0f76-0448-11de-a6fe-93d51630548a
This commit is contained in:
parent
4986b2abd6
commit
20ff603339
|
|
@ -1,34 +1,22 @@
|
||||||
package org.broadinstitute.sting.gatk.walkers.genotyper;
|
package org.broadinstitute.sting.gatk.walkers.genotyper;
|
||||||
|
|
||||||
import net.sf.samtools.SAMRecord;
|
|
||||||
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
|
|
||||||
import org.broadinstitute.sting.utils.ReadBackedPileup;
|
|
||||||
import org.broadinstitute.sting.utils.Utils;
|
|
||||||
import org.broadinstitute.sting.utils.genotype.BasicGenotype;
|
|
||||||
import org.broadinstitute.sting.utils.genotype.Genotype;
|
|
||||||
import org.broadinstitute.sting.utils.genotype.confidence.BayesianConfidenceScore;
|
|
||||||
|
|
||||||
import static java.lang.Math.log10;
|
import static java.lang.Math.log10;
|
||||||
import static java.lang.Math.pow;
|
import static java.lang.Math.pow;
|
||||||
import java.util.ArrayList;
|
|
||||||
import java.util.HashMap;
|
|
||||||
import java.util.List;
|
|
||||||
|
|
||||||
public class GenotypeLikelihoods {
|
public abstract class GenotypeLikelihoods {
|
||||||
// precalculate these for performance (pow/log10 is expensive!)
|
// precalculate these for performance (pow/log10 is expensive!)
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* SOLID data uses Q0 bases to represent reference-fixed bases -- they shouldn't be counted
|
* SOLID data uses Q0 bases to represent reference-fixed bases -- they shouldn't be counted
|
||||||
* during GL calculations. If this field is true, Q0 bases will be removed in add().
|
* during GL calculations. If this field is true, Q0 bases will be removed in add().
|
||||||
*/
|
*/
|
||||||
private boolean filterQ0Bases = true;
|
protected boolean filterQ0Bases = true;
|
||||||
|
|
||||||
private static final double[] oneMinusData = new double[Byte.MAX_VALUE];
|
protected static final double[] oneMinusData = new double[Byte.MAX_VALUE];
|
||||||
private static final double[] oneHalfMinusDataArachne = new double[Byte.MAX_VALUE];
|
protected static final double[] oneHalfMinusDataArachne = new double[Byte.MAX_VALUE];
|
||||||
private static final double[] oneHalfMinusData3Base = new double[Byte.MAX_VALUE];
|
protected static final double[] oneHalfMinusData3Base = new double[Byte.MAX_VALUE];
|
||||||
private final boolean keepQ0Bases = false;
|
protected static final double log10Of1_3 = log10(1.0 / 3);
|
||||||
private static final double log10Of1_3 = log10(1.0 / 3);
|
protected static final double log10Of2_3 = log10(2.0 / 3);
|
||||||
private static final double log10Of2_3 = log10(2.0 / 3);
|
|
||||||
|
|
||||||
static {
|
static {
|
||||||
for (int qual = 0; qual < Byte.MAX_VALUE; qual++) {
|
for (int qual = 0; qual < Byte.MAX_VALUE; qual++) {
|
||||||
|
|
@ -36,11 +24,6 @@ public class GenotypeLikelihoods {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
private static double getOneMinusQual(final byte qual) {
|
|
||||||
return oneMinusData[qual];
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
static {
|
static {
|
||||||
for (int qual = 0; qual < Byte.MAX_VALUE; qual++) {
|
for (int qual = 0; qual < Byte.MAX_VALUE; qual++) {
|
||||||
double e = pow(10, (qual / -10.0));
|
double e = pow(10, (qual / -10.0));
|
||||||
|
|
@ -50,12 +33,7 @@ public class GenotypeLikelihoods {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
private double getOneHalfMinusQual(final byte qual) {
|
public final static String[] genotypes = new String[10];
|
||||||
return oneHalfMinusData[qual];
|
|
||||||
}
|
|
||||||
|
|
||||||
public double[] likelihoods;
|
|
||||||
public static String[] genotypes = new String[10];
|
|
||||||
static {
|
static {
|
||||||
genotypes[0] = "AA";
|
genotypes[0] = "AA";
|
||||||
genotypes[1] = "AC";
|
genotypes[1] = "AC";
|
||||||
|
|
@ -68,24 +46,6 @@ public class GenotypeLikelihoods {
|
||||||
genotypes[8] = "GT";
|
genotypes[8] = "GT";
|
||||||
genotypes[9] = "TT";
|
genotypes[9] = "TT";
|
||||||
}
|
}
|
||||||
public int coverage;
|
|
||||||
|
|
||||||
// The genotype priors;
|
|
||||||
private double priorHomRef;
|
|
||||||
private double priorHet;
|
|
||||||
private double priorHomVar;
|
|
||||||
private double[] oneHalfMinusData;
|
|
||||||
|
|
||||||
public boolean isThreeStateErrors() {
|
|
||||||
return threeStateErrors;
|
|
||||||
}
|
|
||||||
|
|
||||||
public void setThreeStateErrors(boolean threeStateErrors) {
|
|
||||||
this.threeStateErrors = threeStateErrors;
|
|
||||||
}
|
|
||||||
|
|
||||||
private boolean threeStateErrors = false;
|
|
||||||
private boolean discoveryMode = false;
|
|
||||||
|
|
||||||
public static final double HUMAN_HETEROZYGOSITY = 1e-3;
|
public static final double HUMAN_HETEROZYGOSITY = 1e-3;
|
||||||
|
|
||||||
|
|
@ -97,274 +57,8 @@ public class GenotypeLikelihoods {
|
||||||
return pdbls;
|
return pdbls;
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
public double[] likelihoods;
|
||||||
* set the mode to discovery
|
|
||||||
* @param isInDiscoveryMode
|
|
||||||
*/
|
|
||||||
public void setDiscovery(boolean isInDiscoveryMode) {
|
|
||||||
discoveryMode = isInDiscoveryMode;
|
|
||||||
}
|
|
||||||
// Store the 2nd-best base priors for on-genotype primary bases
|
|
||||||
private HashMap<String, Double> onNextBestBasePriors = new HashMap<String, Double>();
|
|
||||||
|
|
||||||
// Store the 2nd-best base priors for off-genotype primary bases
|
public abstract int add(char ref, char read, byte qual);
|
||||||
private HashMap<String, Double> offNextBestBasePriors = new HashMap<String, Double>();
|
public abstract void applyPrior(char ref);
|
||||||
|
|
||||||
public GenotypeLikelihoods(double heterozygosity) {
|
|
||||||
double[] vals = computePriors(heterozygosity);
|
|
||||||
initialize(vals[0], vals[1], vals[2]);
|
|
||||||
}
|
|
||||||
|
|
||||||
public GenotypeLikelihoods(double priorHomRef, double priorHet, double priorHomVar) {
|
|
||||||
initialize(priorHomRef, priorHet, priorHomVar);
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Are we ignoring Q0 bases during calculations?
|
|
||||||
* @return
|
|
||||||
*/
|
|
||||||
public boolean isFilteringQ0Bases() {
|
|
||||||
return filterQ0Bases;
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Enable / disable filtering of Q0 bases. Enabled by default
|
|
||||||
*
|
|
||||||
* @param filterQ0Bases
|
|
||||||
*/
|
|
||||||
public void filterQ0Bases(boolean filterQ0Bases) {
|
|
||||||
this.filterQ0Bases = filterQ0Bases;
|
|
||||||
}
|
|
||||||
|
|
||||||
private void initialize(double priorHomRef, double priorHet, double priorHomVar) {
|
|
||||||
this.oneHalfMinusData = threeStateErrors ? oneHalfMinusData3Base : oneHalfMinusDataArachne;
|
|
||||||
|
|
||||||
this.priorHomRef = priorHomRef;
|
|
||||||
this.priorHet = priorHet;
|
|
||||||
this.priorHomVar = priorHomVar;
|
|
||||||
|
|
||||||
likelihoods = new double[10];
|
|
||||||
|
|
||||||
coverage = 0;
|
|
||||||
|
|
||||||
for (int i = 0; i < likelihoods.length; i++) { likelihoods[i] = Math.log10(0.1); }
|
|
||||||
}
|
|
||||||
|
|
||||||
public double getHomRefPrior() {
|
|
||||||
return priorHomRef;
|
|
||||||
}
|
|
||||||
|
|
||||||
public void setHomRefPrior(double priorHomRef) {
|
|
||||||
this.priorHomRef = priorHomRef;
|
|
||||||
}
|
|
||||||
|
|
||||||
public double getHetPrior() {
|
|
||||||
return priorHet;
|
|
||||||
}
|
|
||||||
|
|
||||||
public void setHetPrior(double priorHet) {
|
|
||||||
this.priorHet = priorHet;
|
|
||||||
}
|
|
||||||
|
|
||||||
public double getHomVarPrior() {
|
|
||||||
return priorHomVar;
|
|
||||||
}
|
|
||||||
|
|
||||||
public void setHomVarPrior(double priorHomVar) {
|
|
||||||
this.priorHomVar = priorHomVar;
|
|
||||||
}
|
|
||||||
|
|
||||||
public int add(char ref, char read, byte qual)
|
|
||||||
{
|
|
||||||
if (qual <= 0) {
|
|
||||||
if ( isFilteringQ0Bases() ) {
|
|
||||||
return 0;
|
|
||||||
} else {
|
|
||||||
qual = 1;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
if (coverage == 0)
|
|
||||||
{
|
|
||||||
for (int i = 0; i < likelihoods.length; i++)
|
|
||||||
{
|
|
||||||
likelihoods[i] = 0;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
for (int i = 0; i < genotypes.length; i++)
|
|
||||||
{
|
|
||||||
double likelihood = calculateAlleleLikelihood(ref, read, genotypes[i], qual);
|
|
||||||
//if ( originalQual == 0 ) System.out.printf("Likelihood is %f for %c %c %d %s%n", likelihood, ref, read, qual, genotypes[i]);
|
|
||||||
likelihoods[i] += likelihood;
|
|
||||||
coverage += 1;
|
|
||||||
}
|
|
||||||
|
|
||||||
return 1;
|
|
||||||
}
|
|
||||||
|
|
||||||
private double calculateAlleleLikelihood(char ref, char read, String genotype, byte qual) {
|
|
||||||
if (qual == 0) {
|
|
||||||
// zero quals are wrong
|
|
||||||
throw new RuntimeException(String.format("Unexpected Q0 base discovered in calculateAlleleLikelihood: %c %c %d", ref, read, qual));
|
|
||||||
}
|
|
||||||
|
|
||||||
char h1 = genotype.charAt(0);
|
|
||||||
char h2 = genotype.charAt(1);
|
|
||||||
|
|
||||||
double p_base;
|
|
||||||
|
|
||||||
if ((h1 == h2) && (h1 == read)) {
|
|
||||||
// hom
|
|
||||||
p_base = getOneMinusQual(qual);
|
|
||||||
} else if ( (h1 != h2) && ((h1 == read) || (h2 == read)) ) {
|
|
||||||
// het
|
|
||||||
p_base = getOneHalfMinusQual(qual);
|
|
||||||
} else if ( threeStateErrors ) {
|
|
||||||
// error
|
|
||||||
//System.out.printf("%s %b %f %f%n", genotype, h1 != h2, log10Of2_3, log10Of1_3 );
|
|
||||||
p_base = qual / -10.0 + ( h1 != h2 ? log10Of1_3 : log10Of1_3 );
|
|
||||||
} else {
|
|
||||||
// error
|
|
||||||
p_base = qual / -10.0;
|
|
||||||
}
|
|
||||||
|
|
||||||
return p_base;
|
|
||||||
}
|
|
||||||
|
|
||||||
public String[] sorted_genotypes;
|
|
||||||
public double[] sorted_likelihoods;
|
|
||||||
|
|
||||||
public void sort() {
|
|
||||||
Integer[] permutation = Utils.SortPermutation(likelihoods);
|
|
||||||
|
|
||||||
Integer[] reverse_permutation = new Integer[permutation.length];
|
|
||||||
for (int i = 0; i < reverse_permutation.length; i++) {
|
|
||||||
reverse_permutation[i] = permutation[(permutation.length - 1) - i];
|
|
||||||
}
|
|
||||||
|
|
||||||
sorted_genotypes = Utils.PermuteArray(genotypes, reverse_permutation);
|
|
||||||
sorted_likelihoods = Utils.PermuteArray(likelihoods, reverse_permutation);
|
|
||||||
}
|
|
||||||
|
|
||||||
public String toString(char ref) {
|
|
||||||
this.sort();
|
|
||||||
double sum = 0;
|
|
||||||
String s = String.format("%s %f %f ", this.BestGenotype(), this.LodVsNextBest(), this.LodVsRef(ref));
|
|
||||||
for (int i = 0; i < sorted_genotypes.length; i++) {
|
|
||||||
if (i != 0) {
|
|
||||||
s = s + " ";
|
|
||||||
}
|
|
||||||
s = s + sorted_genotypes[i] + ":" + String.format("%.2f", sorted_likelihoods[i]);
|
|
||||||
sum += Math.pow(10,sorted_likelihoods[i]);
|
|
||||||
}
|
|
||||||
s = s + String.format(" %f", sum);
|
|
||||||
return s;
|
|
||||||
}
|
|
||||||
|
|
||||||
public void applyPrior(char ref, double[] allele_likelihoods) {
|
|
||||||
int k = 0;
|
|
||||||
for (int i = 0; i < 4; i++) {
|
|
||||||
for (int j = i; j < 4; j++) {
|
|
||||||
if (i == j) {
|
|
||||||
this.likelihoods[k] += Math.log10(allele_likelihoods[i]) + Math.log10(allele_likelihoods[j]);
|
|
||||||
} else {
|
|
||||||
this.likelihoods[k] += Math.log10(allele_likelihoods[i]) + Math.log10(allele_likelihoods[j]) + Math.log10(2);
|
|
||||||
}
|
|
||||||
k++;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
this.sort();
|
|
||||||
}
|
|
||||||
|
|
||||||
public void applyPrior(char ref) {
|
|
||||||
for (int i = 0; i < genotypes.length; i++) {
|
|
||||||
if ((genotypes[i].charAt(0) == ref) && (genotypes[i].charAt(1) == ref)) {
|
|
||||||
// hom-ref
|
|
||||||
likelihoods[i] += Math.log10(priorHomRef);
|
|
||||||
} else if ((genotypes[i].charAt(0) != ref) && (genotypes[i].charAt(1) != ref)) {
|
|
||||||
// hom-nonref
|
|
||||||
likelihoods[i] += Math.log10(priorHomVar);
|
|
||||||
} else {
|
|
||||||
// het
|
|
||||||
likelihoods[i] += Math.log10(priorHet);
|
|
||||||
}
|
|
||||||
if (Double.isInfinite(likelihoods[i])) {
|
|
||||||
likelihoods[i] = -1000;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
this.sort();
|
|
||||||
}
|
|
||||||
|
|
||||||
public double LodVsNextBest() {
|
|
||||||
this.sort();
|
|
||||||
return sorted_likelihoods[0] - sorted_likelihoods[1];
|
|
||||||
}
|
|
||||||
|
|
||||||
double ref_likelihood = Double.NaN;
|
|
||||||
|
|
||||||
public double LodVsRef(char ref) {
|
|
||||||
if ((this.BestGenotype().charAt(0) == ref) && (this.BestGenotype().charAt(1) == ref)) {
|
|
||||||
ref_likelihood = sorted_likelihoods[0];
|
|
||||||
return (-1.0 * this.LodVsNextBest());
|
|
||||||
} else {
|
|
||||||
for (int i = 0; i < genotypes.length; i++) {
|
|
||||||
if ((genotypes[i].charAt(0) == ref) && (genotypes[i].charAt(1) == ref)) {
|
|
||||||
ref_likelihood = likelihoods[i];
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
return sorted_likelihoods[0] - ref_likelihood;
|
|
||||||
}
|
|
||||||
|
|
||||||
public String BestGenotype() {
|
|
||||||
this.sort();
|
|
||||||
return this.sorted_genotypes[0];
|
|
||||||
}
|
|
||||||
|
|
||||||
public double BestPosterior() {
|
|
||||||
this.sort();
|
|
||||||
return this.sorted_likelihoods[0];
|
|
||||||
}
|
|
||||||
|
|
||||||
public double RefPosterior(char ref) {
|
|
||||||
this.LodVsRef(ref);
|
|
||||||
return this.ref_likelihood;
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* given all the data associated with a locus, make a genotypeLocus object containing the likelihoods and posterior probs
|
|
||||||
*
|
|
||||||
* @param tracker contains the reference meta data for this location, which may contain relevent information like dpSNP or hapmap information
|
|
||||||
* @param ref the reference base
|
|
||||||
* @param pileup a pileup of the reads, containing the reads and their offsets
|
|
||||||
*
|
|
||||||
* @return a GenotypeLocus, containing each of the genotypes and their associated likelihood and posterior prob values
|
|
||||||
*/
|
|
||||||
public SSGGenotypeCall callGenotypes(RefMetaDataTracker tracker, char ref, ReadBackedPileup pileup) {
|
|
||||||
filterQ0Bases(!keepQ0Bases); // Set the filtering / keeping flag
|
|
||||||
|
|
||||||
// for calculating the rms of the mapping qualities
|
|
||||||
double squared = 0.0;
|
|
||||||
for (int i = 0; i < pileup.getReads().size(); i++) {
|
|
||||||
SAMRecord read = pileup.getReads().get(i);
|
|
||||||
squared += read.getMappingQuality() * read.getMappingQuality();
|
|
||||||
int offset = pileup.getOffsets().get(i);
|
|
||||||
char base = read.getReadString().charAt(offset);
|
|
||||||
byte qual = read.getBaseQualities()[offset];
|
|
||||||
add(ref, base, qual);
|
|
||||||
}
|
|
||||||
// save off the likelihoods
|
|
||||||
if (likelihoods == null || likelihoods.length == 0) return null;
|
|
||||||
|
|
||||||
// Apply the two calculations
|
|
||||||
applyPrior(ref);
|
|
||||||
|
|
||||||
// lets setup the locus
|
|
||||||
List<Genotype> lst = new ArrayList<Genotype>();
|
|
||||||
for (int x = 0; x < this.likelihoods.length; x++) {
|
|
||||||
lst.add(new BasicGenotype(pileup.getLocation(),this.genotypes[x],new BayesianConfidenceScore(this.likelihoods[x])));
|
|
||||||
}
|
|
||||||
return new SSGGenotypeCall(discoveryMode,ref,2,lst,likelihoods,pileup);
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -0,0 +1,326 @@
|
||||||
|
package org.broadinstitute.sting.gatk.walkers.genotyper;
|
||||||
|
|
||||||
|
import net.sf.samtools.SAMRecord;
|
||||||
|
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
|
||||||
|
import org.broadinstitute.sting.utils.ReadBackedPileup;
|
||||||
|
import org.broadinstitute.sting.utils.Utils;
|
||||||
|
import org.broadinstitute.sting.utils.genotype.BasicGenotype;
|
||||||
|
import org.broadinstitute.sting.utils.genotype.Genotype;
|
||||||
|
import org.broadinstitute.sting.utils.genotype.confidence.BayesianConfidenceScore;
|
||||||
|
|
||||||
|
import static java.lang.Math.log10;
|
||||||
|
import static java.lang.Math.pow;
|
||||||
|
import java.util.ArrayList;
|
||||||
|
import java.util.HashMap;
|
||||||
|
import java.util.List;
|
||||||
|
|
||||||
|
public class NewHotnessGenotypeLikelihoods extends GenotypeLikelihoods {
|
||||||
|
private static double getOneMinusQual(final byte qual) {
|
||||||
|
return oneMinusData[qual];
|
||||||
|
}
|
||||||
|
|
||||||
|
private double getOneHalfMinusQual(final byte qual) {
|
||||||
|
return oneHalfMinusData[qual];
|
||||||
|
}
|
||||||
|
|
||||||
|
//public double[] likelihoods;
|
||||||
|
public int coverage;
|
||||||
|
|
||||||
|
// The genotype priors;
|
||||||
|
private double priorHomRef;
|
||||||
|
private double priorHet;
|
||||||
|
private double priorHomVar;
|
||||||
|
private double[] oneHalfMinusData;
|
||||||
|
|
||||||
|
public boolean isThreeStateErrors() {
|
||||||
|
return threeStateErrors;
|
||||||
|
}
|
||||||
|
|
||||||
|
public void setThreeStateErrors(boolean threeStateErrors) {
|
||||||
|
this.threeStateErrors = threeStateErrors;
|
||||||
|
}
|
||||||
|
|
||||||
|
private boolean threeStateErrors = false;
|
||||||
|
private boolean discoveryMode = false;
|
||||||
|
|
||||||
|
public static final double HUMAN_HETEROZYGOSITY = 1e-3;
|
||||||
|
|
||||||
|
public static double[] computePriors(double h) {
|
||||||
|
double[] pdbls = new double[3];
|
||||||
|
pdbls[0] = 1.0 - (3.0 * h / 2.0);
|
||||||
|
pdbls[1] = h;
|
||||||
|
pdbls[2] = h / 2.0;
|
||||||
|
return pdbls;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* set the mode to discovery
|
||||||
|
* @param isInDiscoveryMode
|
||||||
|
*/
|
||||||
|
public void setDiscovery(boolean isInDiscoveryMode) {
|
||||||
|
discoveryMode = isInDiscoveryMode;
|
||||||
|
}
|
||||||
|
// Store the 2nd-best base priors for on-genotype primary bases
|
||||||
|
private HashMap<String, Double> onNextBestBasePriors = new HashMap<String, Double>();
|
||||||
|
|
||||||
|
// Store the 2nd-best base priors for off-genotype primary bases
|
||||||
|
private HashMap<String, Double> offNextBestBasePriors = new HashMap<String, Double>();
|
||||||
|
|
||||||
|
public NewHotnessGenotypeLikelihoods(double heterozygosity) {
|
||||||
|
double[] vals = computePriors(heterozygosity);
|
||||||
|
initialize(vals[0], vals[1], vals[2]);
|
||||||
|
}
|
||||||
|
|
||||||
|
public NewHotnessGenotypeLikelihoods(double priorHomRef, double priorHet, double priorHomVar) {
|
||||||
|
initialize(priorHomRef, priorHet, priorHomVar);
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Are we ignoring Q0 bases during calculations?
|
||||||
|
* @return
|
||||||
|
*/
|
||||||
|
public boolean isFilteringQ0Bases() {
|
||||||
|
return filterQ0Bases;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Enable / disable filtering of Q0 bases. Enabled by default
|
||||||
|
*
|
||||||
|
* @param filterQ0Bases
|
||||||
|
*/
|
||||||
|
public void filterQ0Bases(boolean filterQ0Bases) {
|
||||||
|
this.filterQ0Bases = filterQ0Bases;
|
||||||
|
}
|
||||||
|
|
||||||
|
private void initialize(double priorHomRef, double priorHet, double priorHomVar) {
|
||||||
|
this.oneHalfMinusData = threeStateErrors ? oneHalfMinusData3Base : oneHalfMinusDataArachne;
|
||||||
|
|
||||||
|
this.priorHomRef = priorHomRef;
|
||||||
|
this.priorHet = priorHet;
|
||||||
|
this.priorHomVar = priorHomVar;
|
||||||
|
|
||||||
|
likelihoods = new double[10];
|
||||||
|
|
||||||
|
coverage = 0;
|
||||||
|
|
||||||
|
for (int i = 0; i < likelihoods.length; i++) { likelihoods[i] = Math.log10(0.1); }
|
||||||
|
}
|
||||||
|
|
||||||
|
public double getHomRefPrior() {
|
||||||
|
return priorHomRef;
|
||||||
|
}
|
||||||
|
|
||||||
|
public void setHomRefPrior(double priorHomRef) {
|
||||||
|
this.priorHomRef = priorHomRef;
|
||||||
|
}
|
||||||
|
|
||||||
|
public double getHetPrior() {
|
||||||
|
return priorHet;
|
||||||
|
}
|
||||||
|
|
||||||
|
public void setHetPrior(double priorHet) {
|
||||||
|
this.priorHet = priorHet;
|
||||||
|
}
|
||||||
|
|
||||||
|
public double getHomVarPrior() {
|
||||||
|
return priorHomVar;
|
||||||
|
}
|
||||||
|
|
||||||
|
public void setHomVarPrior(double priorHomVar) {
|
||||||
|
this.priorHomVar = priorHomVar;
|
||||||
|
}
|
||||||
|
|
||||||
|
public int add(char ref, char read, byte qual)
|
||||||
|
{
|
||||||
|
if (qual <= 0) {
|
||||||
|
if ( isFilteringQ0Bases() ) {
|
||||||
|
return 0;
|
||||||
|
} else {
|
||||||
|
qual = 1;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if (coverage == 0)
|
||||||
|
{
|
||||||
|
for (int i = 0; i < likelihoods.length; i++)
|
||||||
|
{
|
||||||
|
likelihoods[i] = 0;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
for (int i = 0; i < genotypes.length; i++)
|
||||||
|
{
|
||||||
|
double likelihood = calculateAlleleLikelihood(ref, read, genotypes[i], qual);
|
||||||
|
//if ( originalQual == 0 ) System.out.printf("Likelihood is %f for %c %c %d %s%n", likelihood, ref, read, qual, genotypes[i]);
|
||||||
|
likelihoods[i] += likelihood;
|
||||||
|
coverage += 1;
|
||||||
|
}
|
||||||
|
|
||||||
|
return 1;
|
||||||
|
}
|
||||||
|
|
||||||
|
private double calculateAlleleLikelihood(char ref, char read, String genotype, byte qual) {
|
||||||
|
if (qual == 0) {
|
||||||
|
// zero quals are wrong
|
||||||
|
throw new RuntimeException(String.format("Unexpected Q0 base discovered in calculateAlleleLikelihood: %c %c %d", ref, read, qual));
|
||||||
|
}
|
||||||
|
|
||||||
|
char h1 = genotype.charAt(0);
|
||||||
|
char h2 = genotype.charAt(1);
|
||||||
|
|
||||||
|
double p_base;
|
||||||
|
|
||||||
|
if ((h1 == h2) && (h1 == read)) {
|
||||||
|
// hom
|
||||||
|
p_base = getOneMinusQual(qual);
|
||||||
|
} else if ( (h1 != h2) && ((h1 == read) || (h2 == read)) ) {
|
||||||
|
// het
|
||||||
|
p_base = getOneHalfMinusQual(qual);
|
||||||
|
} else if ( threeStateErrors ) {
|
||||||
|
// error
|
||||||
|
//System.out.printf("%s %b %f %f%n", genotype, h1 != h2, log10Of2_3, log10Of1_3 );
|
||||||
|
p_base = qual / -10.0 + ( h1 != h2 ? log10Of1_3 : log10Of1_3 );
|
||||||
|
} else {
|
||||||
|
// error
|
||||||
|
p_base = qual / -10.0;
|
||||||
|
}
|
||||||
|
|
||||||
|
return p_base;
|
||||||
|
}
|
||||||
|
|
||||||
|
public String[] sorted_genotypes;
|
||||||
|
public double[] sorted_likelihoods;
|
||||||
|
|
||||||
|
public void sort() {
|
||||||
|
Integer[] permutation = Utils.SortPermutation(likelihoods);
|
||||||
|
|
||||||
|
Integer[] reverse_permutation = new Integer[permutation.length];
|
||||||
|
for (int i = 0; i < reverse_permutation.length; i++) {
|
||||||
|
reverse_permutation[i] = permutation[(permutation.length - 1) - i];
|
||||||
|
}
|
||||||
|
|
||||||
|
sorted_genotypes = Utils.PermuteArray(genotypes, reverse_permutation);
|
||||||
|
sorted_likelihoods = Utils.PermuteArray(likelihoods, reverse_permutation);
|
||||||
|
}
|
||||||
|
|
||||||
|
public String toString(char ref) {
|
||||||
|
this.sort();
|
||||||
|
double sum = 0;
|
||||||
|
String s = String.format("%s %f %f ", this.BestGenotype(), this.LodVsNextBest(), this.LodVsRef(ref));
|
||||||
|
for (int i = 0; i < sorted_genotypes.length; i++) {
|
||||||
|
if (i != 0) {
|
||||||
|
s = s + " ";
|
||||||
|
}
|
||||||
|
s = s + sorted_genotypes[i] + ":" + String.format("%.2f", sorted_likelihoods[i]);
|
||||||
|
sum += Math.pow(10,sorted_likelihoods[i]);
|
||||||
|
}
|
||||||
|
s = s + String.format(" %f", sum);
|
||||||
|
return s;
|
||||||
|
}
|
||||||
|
|
||||||
|
public void applyPrior(char ref, double[] allele_likelihoods) {
|
||||||
|
int k = 0;
|
||||||
|
for (int i = 0; i < 4; i++) {
|
||||||
|
for (int j = i; j < 4; j++) {
|
||||||
|
if (i == j) {
|
||||||
|
this.likelihoods[k] += Math.log10(allele_likelihoods[i]) + Math.log10(allele_likelihoods[j]);
|
||||||
|
} else {
|
||||||
|
this.likelihoods[k] += Math.log10(allele_likelihoods[i]) + Math.log10(allele_likelihoods[j]) + Math.log10(2);
|
||||||
|
}
|
||||||
|
k++;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
this.sort();
|
||||||
|
}
|
||||||
|
|
||||||
|
public void applyPrior(char ref) {
|
||||||
|
for (int i = 0; i < genotypes.length; i++) {
|
||||||
|
if ((genotypes[i].charAt(0) == ref) && (genotypes[i].charAt(1) == ref)) {
|
||||||
|
// hom-ref
|
||||||
|
likelihoods[i] += Math.log10(priorHomRef);
|
||||||
|
} else if ((genotypes[i].charAt(0) != ref) && (genotypes[i].charAt(1) != ref)) {
|
||||||
|
// hom-nonref
|
||||||
|
likelihoods[i] += Math.log10(priorHomVar);
|
||||||
|
} else {
|
||||||
|
// het
|
||||||
|
likelihoods[i] += Math.log10(priorHet);
|
||||||
|
}
|
||||||
|
if (Double.isInfinite(likelihoods[i])) {
|
||||||
|
likelihoods[i] = -1000;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
this.sort();
|
||||||
|
}
|
||||||
|
|
||||||
|
public double LodVsNextBest() {
|
||||||
|
this.sort();
|
||||||
|
return sorted_likelihoods[0] - sorted_likelihoods[1];
|
||||||
|
}
|
||||||
|
|
||||||
|
double ref_likelihood = Double.NaN;
|
||||||
|
|
||||||
|
public double LodVsRef(char ref) {
|
||||||
|
if ((this.BestGenotype().charAt(0) == ref) && (this.BestGenotype().charAt(1) == ref)) {
|
||||||
|
ref_likelihood = sorted_likelihoods[0];
|
||||||
|
return (-1.0 * this.LodVsNextBest());
|
||||||
|
} else {
|
||||||
|
for (int i = 0; i < genotypes.length; i++) {
|
||||||
|
if ((genotypes[i].charAt(0) == ref) && (genotypes[i].charAt(1) == ref)) {
|
||||||
|
ref_likelihood = likelihoods[i];
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return sorted_likelihoods[0] - ref_likelihood;
|
||||||
|
}
|
||||||
|
|
||||||
|
public String BestGenotype() {
|
||||||
|
this.sort();
|
||||||
|
return this.sorted_genotypes[0];
|
||||||
|
}
|
||||||
|
|
||||||
|
public double BestPosterior() {
|
||||||
|
this.sort();
|
||||||
|
return this.sorted_likelihoods[0];
|
||||||
|
}
|
||||||
|
|
||||||
|
public double RefPosterior(char ref) {
|
||||||
|
this.LodVsRef(ref);
|
||||||
|
return this.ref_likelihood;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* given all the data associated with a locus, make a genotypeLocus object containing the likelihoods and posterior probs
|
||||||
|
*
|
||||||
|
* @param tracker contains the reference meta data for this location, which may contain relevent information like dpSNP or hapmap information
|
||||||
|
* @param ref the reference base
|
||||||
|
* @param pileup a pileup of the reads, containing the reads and their offsets
|
||||||
|
*
|
||||||
|
* @return a GenotypeLocus, containing each of the genotypes and their associated likelihood and posterior prob values
|
||||||
|
*/
|
||||||
|
public SSGGenotypeCall callGenotypes(RefMetaDataTracker tracker, char ref, ReadBackedPileup pileup) {
|
||||||
|
//filterQ0Bases(!keepQ0Bases); // Set the filtering / keeping flag
|
||||||
|
|
||||||
|
// for calculating the rms of the mapping qualities
|
||||||
|
double squared = 0.0;
|
||||||
|
for (int i = 0; i < pileup.getReads().size(); i++) {
|
||||||
|
SAMRecord read = pileup.getReads().get(i);
|
||||||
|
squared += read.getMappingQuality() * read.getMappingQuality();
|
||||||
|
int offset = pileup.getOffsets().get(i);
|
||||||
|
char base = read.getReadString().charAt(offset);
|
||||||
|
byte qual = read.getBaseQualities()[offset];
|
||||||
|
add(ref, base, qual);
|
||||||
|
}
|
||||||
|
// save off the likelihoods
|
||||||
|
if (likelihoods == null || likelihoods.length == 0) return null;
|
||||||
|
|
||||||
|
// Apply the two calculations
|
||||||
|
applyPrior(ref);
|
||||||
|
|
||||||
|
// lets setup the locus
|
||||||
|
List<Genotype> lst = new ArrayList<Genotype>();
|
||||||
|
for (int x = 0; x < this.likelihoods.length; x++) {
|
||||||
|
lst.add(new BasicGenotype(pileup.getLocation(),this.genotypes[x],new BayesianConfidenceScore(this.likelihoods[x])));
|
||||||
|
}
|
||||||
|
return new SSGGenotypeCall(discoveryMode,ref,2,lst,likelihoods,pileup);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
@ -0,0 +1,324 @@
|
||||||
|
package org.broadinstitute.sting.gatk.walkers.genotyper;
|
||||||
|
|
||||||
|
import net.sf.samtools.SAMRecord;
|
||||||
|
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
|
||||||
|
import org.broadinstitute.sting.utils.ReadBackedPileup;
|
||||||
|
import org.broadinstitute.sting.utils.Utils;
|
||||||
|
import org.broadinstitute.sting.utils.genotype.BasicGenotype;
|
||||||
|
import org.broadinstitute.sting.utils.genotype.Genotype;
|
||||||
|
import org.broadinstitute.sting.utils.genotype.confidence.BayesianConfidenceScore;
|
||||||
|
|
||||||
|
import static java.lang.Math.log10;
|
||||||
|
import static java.lang.Math.pow;
|
||||||
|
import java.util.ArrayList;
|
||||||
|
import java.util.HashMap;
|
||||||
|
import java.util.List;
|
||||||
|
|
||||||
|
public class OldAndBustedGenotypeLikelihoods extends GenotypeLikelihoods {
|
||||||
|
private static double getOneMinusQual(final byte qual) {
|
||||||
|
return oneMinusData[qual];
|
||||||
|
}
|
||||||
|
|
||||||
|
private double getOneHalfMinusQual(final byte qual) {
|
||||||
|
return oneHalfMinusData[qual];
|
||||||
|
}
|
||||||
|
|
||||||
|
//public double[] likelihoods;
|
||||||
|
public int coverage;
|
||||||
|
|
||||||
|
// The genotype priors;
|
||||||
|
private double priorHomRef;
|
||||||
|
private double priorHet;
|
||||||
|
private double priorHomVar;
|
||||||
|
private double[] oneHalfMinusData;
|
||||||
|
|
||||||
|
public boolean isThreeStateErrors() {
|
||||||
|
return threeStateErrors;
|
||||||
|
}
|
||||||
|
|
||||||
|
public void setThreeStateErrors(boolean threeStateErrors) {
|
||||||
|
this.threeStateErrors = threeStateErrors;
|
||||||
|
}
|
||||||
|
|
||||||
|
private boolean threeStateErrors = false;
|
||||||
|
private boolean discoveryMode = false;
|
||||||
|
|
||||||
|
public static double[] computePriors(double h) {
|
||||||
|
double[] pdbls = new double[3];
|
||||||
|
pdbls[0] = 1.0 - (3.0 * h / 2.0);
|
||||||
|
pdbls[1] = h;
|
||||||
|
pdbls[2] = h / 2.0;
|
||||||
|
return pdbls;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* set the mode to discovery
|
||||||
|
* @param isInDiscoveryMode
|
||||||
|
*/
|
||||||
|
public void setDiscovery(boolean isInDiscoveryMode) {
|
||||||
|
discoveryMode = isInDiscoveryMode;
|
||||||
|
}
|
||||||
|
// Store the 2nd-best base priors for on-genotype primary bases
|
||||||
|
private HashMap<String, Double> onNextBestBasePriors = new HashMap<String, Double>();
|
||||||
|
|
||||||
|
// Store the 2nd-best base priors for off-genotype primary bases
|
||||||
|
private HashMap<String, Double> offNextBestBasePriors = new HashMap<String, Double>();
|
||||||
|
|
||||||
|
public OldAndBustedGenotypeLikelihoods(double heterozygosity) {
|
||||||
|
double[] vals = computePriors(heterozygosity);
|
||||||
|
initialize(vals[0], vals[1], vals[2]);
|
||||||
|
}
|
||||||
|
|
||||||
|
public OldAndBustedGenotypeLikelihoods(double priorHomRef, double priorHet, double priorHomVar) {
|
||||||
|
initialize(priorHomRef, priorHet, priorHomVar);
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Are we ignoring Q0 bases during calculations?
|
||||||
|
* @return
|
||||||
|
*/
|
||||||
|
public boolean isFilteringQ0Bases() {
|
||||||
|
return filterQ0Bases;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Enable / disable filtering of Q0 bases. Enabled by default
|
||||||
|
*
|
||||||
|
* @param filterQ0Bases
|
||||||
|
*/
|
||||||
|
public void filterQ0Bases(boolean filterQ0Bases) {
|
||||||
|
this.filterQ0Bases = filterQ0Bases;
|
||||||
|
}
|
||||||
|
|
||||||
|
private void initialize(double priorHomRef, double priorHet, double priorHomVar) {
|
||||||
|
this.oneHalfMinusData = threeStateErrors ? oneHalfMinusData3Base : oneHalfMinusDataArachne;
|
||||||
|
|
||||||
|
this.priorHomRef = priorHomRef;
|
||||||
|
this.priorHet = priorHet;
|
||||||
|
this.priorHomVar = priorHomVar;
|
||||||
|
|
||||||
|
likelihoods = new double[10];
|
||||||
|
|
||||||
|
coverage = 0;
|
||||||
|
|
||||||
|
for (int i = 0; i < likelihoods.length; i++) { likelihoods[i] = Math.log10(0.1); }
|
||||||
|
}
|
||||||
|
|
||||||
|
public double getHomRefPrior() {
|
||||||
|
return priorHomRef;
|
||||||
|
}
|
||||||
|
|
||||||
|
public void setHomRefPrior(double priorHomRef) {
|
||||||
|
this.priorHomRef = priorHomRef;
|
||||||
|
}
|
||||||
|
|
||||||
|
public double getHetPrior() {
|
||||||
|
return priorHet;
|
||||||
|
}
|
||||||
|
|
||||||
|
public void setHetPrior(double priorHet) {
|
||||||
|
this.priorHet = priorHet;
|
||||||
|
}
|
||||||
|
|
||||||
|
public double getHomVarPrior() {
|
||||||
|
return priorHomVar;
|
||||||
|
}
|
||||||
|
|
||||||
|
public void setHomVarPrior(double priorHomVar) {
|
||||||
|
this.priorHomVar = priorHomVar;
|
||||||
|
}
|
||||||
|
|
||||||
|
public int add(char ref, char read, byte qual)
|
||||||
|
{
|
||||||
|
if (qual <= 0) {
|
||||||
|
if ( isFilteringQ0Bases() ) {
|
||||||
|
return 0;
|
||||||
|
} else {
|
||||||
|
qual = 1;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if (coverage == 0)
|
||||||
|
{
|
||||||
|
for (int i = 0; i < likelihoods.length; i++)
|
||||||
|
{
|
||||||
|
likelihoods[i] = 0;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
for (int i = 0; i < genotypes.length; i++)
|
||||||
|
{
|
||||||
|
double likelihood = calculateAlleleLikelihood(ref, read, genotypes[i], qual);
|
||||||
|
//if ( originalQual == 0 ) System.out.printf("Likelihood is %f for %c %c %d %s%n", likelihood, ref, read, qual, genotypes[i]);
|
||||||
|
likelihoods[i] += likelihood;
|
||||||
|
coverage += 1;
|
||||||
|
}
|
||||||
|
|
||||||
|
return 1;
|
||||||
|
}
|
||||||
|
|
||||||
|
private double calculateAlleleLikelihood(char ref, char read, String genotype, byte qual) {
|
||||||
|
if (qual == 0) {
|
||||||
|
// zero quals are wrong
|
||||||
|
throw new RuntimeException(String.format("Unexpected Q0 base discovered in calculateAlleleLikelihood: %c %c %d", ref, read, qual));
|
||||||
|
}
|
||||||
|
|
||||||
|
char h1 = genotype.charAt(0);
|
||||||
|
char h2 = genotype.charAt(1);
|
||||||
|
|
||||||
|
double p_base;
|
||||||
|
|
||||||
|
if ((h1 == h2) && (h1 == read)) {
|
||||||
|
// hom
|
||||||
|
p_base = getOneMinusQual(qual);
|
||||||
|
} else if ( (h1 != h2) && ((h1 == read) || (h2 == read)) ) {
|
||||||
|
// het
|
||||||
|
p_base = getOneHalfMinusQual(qual);
|
||||||
|
} else if ( threeStateErrors ) {
|
||||||
|
// error
|
||||||
|
//System.out.printf("%s %b %f %f%n", genotype, h1 != h2, log10Of2_3, log10Of1_3 );
|
||||||
|
p_base = qual / -10.0 + ( h1 != h2 ? log10Of1_3 : log10Of1_3 );
|
||||||
|
} else {
|
||||||
|
// error
|
||||||
|
p_base = qual / -10.0;
|
||||||
|
}
|
||||||
|
|
||||||
|
return p_base;
|
||||||
|
}
|
||||||
|
|
||||||
|
public String[] sorted_genotypes;
|
||||||
|
public double[] sorted_likelihoods;
|
||||||
|
|
||||||
|
public void sort() {
|
||||||
|
Integer[] permutation = Utils.SortPermutation(likelihoods);
|
||||||
|
|
||||||
|
Integer[] reverse_permutation = new Integer[permutation.length];
|
||||||
|
for (int i = 0; i < reverse_permutation.length; i++) {
|
||||||
|
reverse_permutation[i] = permutation[(permutation.length - 1) - i];
|
||||||
|
}
|
||||||
|
|
||||||
|
sorted_genotypes = Utils.PermuteArray(genotypes, reverse_permutation);
|
||||||
|
sorted_likelihoods = Utils.PermuteArray(likelihoods, reverse_permutation);
|
||||||
|
}
|
||||||
|
|
||||||
|
public String toString(char ref) {
|
||||||
|
this.sort();
|
||||||
|
double sum = 0;
|
||||||
|
String s = String.format("%s %f %f ", this.BestGenotype(), this.LodVsNextBest(), this.LodVsRef(ref));
|
||||||
|
for (int i = 0; i < sorted_genotypes.length; i++) {
|
||||||
|
if (i != 0) {
|
||||||
|
s = s + " ";
|
||||||
|
}
|
||||||
|
s = s + sorted_genotypes[i] + ":" + String.format("%.2f", sorted_likelihoods[i]);
|
||||||
|
sum += Math.pow(10,sorted_likelihoods[i]);
|
||||||
|
}
|
||||||
|
s = s + String.format(" %f", sum);
|
||||||
|
return s;
|
||||||
|
}
|
||||||
|
|
||||||
|
public void applyPrior(char ref, double[] allele_likelihoods) {
|
||||||
|
int k = 0;
|
||||||
|
for (int i = 0; i < 4; i++) {
|
||||||
|
for (int j = i; j < 4; j++) {
|
||||||
|
if (i == j) {
|
||||||
|
this.likelihoods[k] += Math.log10(allele_likelihoods[i]) + Math.log10(allele_likelihoods[j]);
|
||||||
|
} else {
|
||||||
|
this.likelihoods[k] += Math.log10(allele_likelihoods[i]) + Math.log10(allele_likelihoods[j]) + Math.log10(2);
|
||||||
|
}
|
||||||
|
k++;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
this.sort();
|
||||||
|
}
|
||||||
|
|
||||||
|
public void applyPrior(char ref) {
|
||||||
|
for (int i = 0; i < genotypes.length; i++) {
|
||||||
|
if ((genotypes[i].charAt(0) == ref) && (genotypes[i].charAt(1) == ref)) {
|
||||||
|
// hom-ref
|
||||||
|
likelihoods[i] += Math.log10(priorHomRef);
|
||||||
|
} else if ((genotypes[i].charAt(0) != ref) && (genotypes[i].charAt(1) != ref)) {
|
||||||
|
// hom-nonref
|
||||||
|
likelihoods[i] += Math.log10(priorHomVar);
|
||||||
|
} else {
|
||||||
|
// het
|
||||||
|
likelihoods[i] += Math.log10(priorHet);
|
||||||
|
}
|
||||||
|
if (Double.isInfinite(likelihoods[i])) {
|
||||||
|
likelihoods[i] = -1000;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
this.sort();
|
||||||
|
}
|
||||||
|
|
||||||
|
public double LodVsNextBest() {
|
||||||
|
this.sort();
|
||||||
|
return sorted_likelihoods[0] - sorted_likelihoods[1];
|
||||||
|
}
|
||||||
|
|
||||||
|
double ref_likelihood = Double.NaN;
|
||||||
|
|
||||||
|
public double LodVsRef(char ref) {
|
||||||
|
if ((this.BestGenotype().charAt(0) == ref) && (this.BestGenotype().charAt(1) == ref)) {
|
||||||
|
ref_likelihood = sorted_likelihoods[0];
|
||||||
|
return (-1.0 * this.LodVsNextBest());
|
||||||
|
} else {
|
||||||
|
for (int i = 0; i < genotypes.length; i++) {
|
||||||
|
if ((genotypes[i].charAt(0) == ref) && (genotypes[i].charAt(1) == ref)) {
|
||||||
|
ref_likelihood = likelihoods[i];
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return sorted_likelihoods[0] - ref_likelihood;
|
||||||
|
}
|
||||||
|
|
||||||
|
public String BestGenotype() {
|
||||||
|
this.sort();
|
||||||
|
return this.sorted_genotypes[0];
|
||||||
|
}
|
||||||
|
|
||||||
|
public double BestPosterior() {
|
||||||
|
this.sort();
|
||||||
|
return this.sorted_likelihoods[0];
|
||||||
|
}
|
||||||
|
|
||||||
|
public double RefPosterior(char ref) {
|
||||||
|
this.LodVsRef(ref);
|
||||||
|
return this.ref_likelihood;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* given all the data associated with a locus, make a genotypeLocus object containing the likelihoods and posterior probs
|
||||||
|
*
|
||||||
|
* @param tracker contains the reference meta data for this location, which may contain relevent information like dpSNP or hapmap information
|
||||||
|
* @param ref the reference base
|
||||||
|
* @param pileup a pileup of the reads, containing the reads and their offsets
|
||||||
|
*
|
||||||
|
* @return a GenotypeLocus, containing each of the genotypes and their associated likelihood and posterior prob values
|
||||||
|
*/
|
||||||
|
public SSGGenotypeCall callGenotypes(RefMetaDataTracker tracker, char ref, ReadBackedPileup pileup) {
|
||||||
|
//filterQ0Bases(!keepQ0Bases); // Set the filtering / keeping flag
|
||||||
|
|
||||||
|
// for calculating the rms of the mapping qualities
|
||||||
|
double squared = 0.0;
|
||||||
|
for (int i = 0; i < pileup.getReads().size(); i++) {
|
||||||
|
SAMRecord read = pileup.getReads().get(i);
|
||||||
|
squared += read.getMappingQuality() * read.getMappingQuality();
|
||||||
|
int offset = pileup.getOffsets().get(i);
|
||||||
|
char base = read.getReadString().charAt(offset);
|
||||||
|
byte qual = read.getBaseQualities()[offset];
|
||||||
|
add(ref, base, qual);
|
||||||
|
}
|
||||||
|
// save off the likelihoods
|
||||||
|
if (likelihoods == null || likelihoods.length == 0) return null;
|
||||||
|
|
||||||
|
// Apply the two calculations
|
||||||
|
applyPrior(ref);
|
||||||
|
|
||||||
|
// lets setup the locus
|
||||||
|
List<Genotype> lst = new ArrayList<Genotype>();
|
||||||
|
for (int x = 0; x < this.likelihoods.length; x++) {
|
||||||
|
lst.add(new BasicGenotype(pileup.getLocation(),this.genotypes[x],new BayesianConfidenceScore(this.likelihoods[x])));
|
||||||
|
}
|
||||||
|
return new SSGGenotypeCall(discoveryMode,ref,2,lst,likelihoods,pileup);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
@ -114,8 +114,8 @@ public class SSGGenotypeCall implements GenotypeCall, GenotypeOutput {
|
||||||
}
|
}
|
||||||
|
|
||||||
public String toString() {
|
public String toString() {
|
||||||
return String.format("ref=%s depth=%d rmsMAPQ=%.2f bestVRef=%.2f bestVNext=%.2f likelihoods=%s",
|
return String.format("%s ref=%s depth=%d rmsMAPQ=%.2f bestVRef=%.2f bestVNext=%.2f likelihoods=%s",
|
||||||
mRefBase, readDepth, rmsMapping, bestRef, bestNext, Arrays.toString(mLikelihoods));
|
getLocation(), mRefBase, readDepth, rmsMapping, bestRef, bestNext, Arrays.toString(mLikelihoods));
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
|
|
||||||
|
|
@ -26,6 +26,7 @@ public class SingleSampleGenotyper extends LocusWalker<SSGGenotypeCall, Genotype
|
||||||
// Control output settings
|
// Control output settings
|
||||||
@Argument(fullName = "variants_out", shortName = "varout", doc = "File to which variants should be written", required = true)
|
@Argument(fullName = "variants_out", shortName = "varout", doc = "File to which variants should be written", required = true)
|
||||||
public File VARIANTS_FILE;
|
public File VARIANTS_FILE;
|
||||||
|
|
||||||
@Argument(fullName = "variant_output_format", shortName = "vf", doc = "File to which metrics should be written", required = false)
|
@Argument(fullName = "variant_output_format", shortName = "vf", doc = "File to which metrics should be written", required = false)
|
||||||
public GenotypeWriterFactory.GENOTYPE_FORMAT VAR_FORMAT = GenotypeWriterFactory.GENOTYPE_FORMAT.GELI;
|
public GenotypeWriterFactory.GENOTYPE_FORMAT VAR_FORMAT = GenotypeWriterFactory.GENOTYPE_FORMAT.GELI;
|
||||||
|
|
||||||
|
|
@ -37,7 +38,7 @@ public class SingleSampleGenotyper extends LocusWalker<SSGGenotypeCall, Genotype
|
||||||
public boolean GENOTYPE = false;
|
public boolean GENOTYPE = false;
|
||||||
|
|
||||||
@Argument(fullName = "3BaseErrors", shortName = "3BaseErrors", doc = "Should we use a 3-base error mode (so that P(b_true != b_called | e) == e / 3?", required = false)
|
@Argument(fullName = "3BaseErrors", shortName = "3BaseErrors", doc = "Should we use a 3-base error mode (so that P(b_true != b_called | e) == e / 3?", required = false)
|
||||||
public boolean THREE_BASE_ERRORS = false;
|
public boolean THREE_BASE_ERRORS = true;
|
||||||
|
|
||||||
public enum Caller {
|
public enum Caller {
|
||||||
OLD_AND_BUSTED,
|
OLD_AND_BUSTED,
|
||||||
|
|
@ -45,30 +46,24 @@ public class SingleSampleGenotyper extends LocusWalker<SSGGenotypeCall, Genotype
|
||||||
}
|
}
|
||||||
|
|
||||||
@Argument(fullName = "caller", doc = "", required = false)
|
@Argument(fullName = "caller", doc = "", required = false)
|
||||||
public Caller caller = Caller.OLD_AND_BUSTED;
|
public Caller caller = Caller.NEW_HOTNESS;
|
||||||
|
|
||||||
// Control how the genotype hypotheses are weighed
|
// Control how the genotype hypotheses are weighed
|
||||||
@Argument(fullName = "heterozygosity", shortName = "hets", doc = "Heterozygosity value used to compute prior likelihoods for any locus", required = false) public Double heterozygosity = GenotypeLikelihoods.HUMAN_HETEROZYGOSITY;
|
@Argument(fullName = "heterozygosity", shortName = "hets", doc = "Heterozygosity value used to compute prior likelihoods for any locus", required = false)
|
||||||
@Argument(fullName = "priors_hapmap", shortName = "phapmap", doc = "Comma-separated prior likelihoods for Hapmap loci (homref,het,homvar)", required = false) public String PRIORS_HAPMAP = "0.999,1e-3,1e-5";
|
public Double heterozygosity = OldAndBustedGenotypeLikelihoods.HUMAN_HETEROZYGOSITY;
|
||||||
@Argument(fullName = "priors_dbsnp", shortName = "pdbsnp", doc = "Comma-separated prior likelihoods for dbSNP loci (homref,het,homvar)", required = false) public String PRIORS_DBSNP = "0.999,1e-3,1e-5";
|
//@Argument(fullName = "priors_dbsnp", shortName = "pdbsnp", doc = "Comma-separated prior likelihoods for dbSNP loci (homref,het,homvar)", required = false)
|
||||||
|
public String PRIORS_DBSNP = "0.999,1e-3,1e-5";
|
||||||
|
|
||||||
@Argument(fullName = "keepQ0Bases", shortName = "keepQ0Bases", doc = "If true, then Q0 bases will be included in the genotyping calculation, and treated as Q1 -- this is really not a good idea", required = false)
|
@Argument(fullName = "keepQ0Bases", shortName = "keepQ0Bases", doc = "If true, then Q0 bases will be included in the genotyping calculation, and treated as Q1 -- this is really not a good idea", required = false)
|
||||||
public boolean keepQ0Bases = false;
|
public boolean keepQ0Bases = false;
|
||||||
|
|
||||||
public double[] plocus;
|
public double[] plocus;
|
||||||
public double[] phapmap;
|
//public double[] phapmap;
|
||||||
public double[] pdbsnp;
|
public double[] pdbsnp;
|
||||||
|
|
||||||
public long nFilteredQ0Bases = 0;
|
public long nFilteredQ0Bases = 0;
|
||||||
|
|
||||||
/**
|
private final static boolean TESTING_NEW = false;
|
||||||
* Specify that this walker requires reads.
|
|
||||||
*
|
|
||||||
* @return true
|
|
||||||
*/
|
|
||||||
public boolean requiresReads() {
|
|
||||||
return true;
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Filter out loci to ignore (at an ambiguous base in the reference or a locus with zero coverage).
|
* Filter out loci to ignore (at an ambiguous base in the reference or a locus with zero coverage).
|
||||||
|
|
@ -103,8 +98,8 @@ public class SingleSampleGenotyper extends LocusWalker<SSGGenotypeCall, Genotype
|
||||||
|
|
||||||
/** Initialize the walker with some sensible defaults */
|
/** Initialize the walker with some sensible defaults */
|
||||||
public void initialize() {
|
public void initialize() {
|
||||||
plocus = GenotypeLikelihoods.computePriors(heterozygosity);
|
plocus = OldAndBustedGenotypeLikelihoods.computePriors(heterozygosity);
|
||||||
phapmap = priorsArray(PRIORS_HAPMAP);
|
//phapmap = priorsArray(PRIORS_HAPMAP);
|
||||||
pdbsnp = priorsArray(PRIORS_DBSNP);
|
pdbsnp = priorsArray(PRIORS_DBSNP);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
@ -116,31 +111,32 @@ public class SingleSampleGenotyper extends LocusWalker<SSGGenotypeCall, Genotype
|
||||||
* @param context contextual information around the locus
|
* @param context contextual information around the locus
|
||||||
*/
|
*/
|
||||||
public SSGGenotypeCall map(RefMetaDataTracker tracker, char ref, LocusContext context) {
|
public SSGGenotypeCall map(RefMetaDataTracker tracker, char ref, LocusContext context) {
|
||||||
SSGGenotypeCall oldAndBusted = mapOldAndBusted(tracker, ref, context);
|
if ( TESTING_NEW ) {
|
||||||
SSGGenotypeCall newHotness = mapNewHotness(tracker, ref, context);
|
SSGGenotypeCall oldAndBusted = mapOldAndBusted(tracker, ref, context);
|
||||||
|
SSGGenotypeCall newHotness = mapNewHotness(tracker, ref, context);
|
||||||
|
|
||||||
if ( ! oldAndBusted.equals(newHotness) || true ) {
|
if ( ! oldAndBusted.equals(newHotness) ) {
|
||||||
System.out.printf("Calls not equal:%nold: %s%nnew: %s%n", oldAndBusted, newHotness);
|
System.out.printf("Calls not equal:%nold: %s%nnew: %s%n", oldAndBusted, newHotness);
|
||||||
|
}
|
||||||
|
|
||||||
|
return caller == Caller.OLD_AND_BUSTED ? oldAndBusted : newHotness;
|
||||||
|
} else {
|
||||||
|
switch ( caller ) {
|
||||||
|
case OLD_AND_BUSTED: return mapOldAndBusted(tracker, ref, context);
|
||||||
|
case NEW_HOTNESS: return mapNewHotness(tracker, ref, context);
|
||||||
|
default: return null;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
return caller == Caller.OLD_AND_BUSTED ? oldAndBusted : newHotness;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
private SSGGenotypeCall mapNewHotness(RefMetaDataTracker tracker, char ref, LocusContext context) {
|
private SSGGenotypeCall mapNewHotness(RefMetaDataTracker tracker, char ref, LocusContext context) {
|
||||||
ReadBackedPileup pileup = new ReadBackedPileup(ref, context);
|
ReadBackedPileup pileup = new ReadBackedPileup(ref, context);
|
||||||
GenotypeLikelihoods G = makeGenotypeLikelihood(tracker);
|
NewHotnessGenotypeLikelihoods G = makeNewHotnessGenotypeLikelihoods(tracker);
|
||||||
G.setDiscovery(GENOTYPE); // set it to discovery mode or variant detection mode
|
G.setDiscovery(GENOTYPE); // set it to discovery mode or variant detection mode
|
||||||
SSGGenotypeCall geno = callGenotypes(G, tracker, ref, pileup);
|
SSGGenotypeCall geno = callGenotypes(G, tracker, ref, pileup);
|
||||||
return geno;
|
return geno;
|
||||||
}
|
}
|
||||||
|
|
||||||
private SSGGenotypeCall mapOldAndBusted(RefMetaDataTracker tracker, char ref, LocusContext context) {
|
|
||||||
ReadBackedPileup pileup = new ReadBackedPileup(ref, context);
|
|
||||||
GenotypeLikelihoods G = makeGenotypeLikelihood(tracker);
|
|
||||||
G.setDiscovery(GENOTYPE); // set it to discovery mode or variant detection mode
|
|
||||||
SSGGenotypeCall geno = (SSGGenotypeCall)G.callGenotypes(tracker, ref, pileup);
|
|
||||||
return geno;
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* given all the data associated with a locus, make a genotypeLocus object containing the likelihoods and posterior probs
|
* given all the data associated with a locus, make a genotypeLocus object containing the likelihoods and posterior probs
|
||||||
|
|
@ -152,8 +148,6 @@ public class SingleSampleGenotyper extends LocusWalker<SSGGenotypeCall, Genotype
|
||||||
* @return a GenotypeLocus, containing each of the genotypes and their associated likelihood and posterior prob values
|
* @return a GenotypeLocus, containing each of the genotypes and their associated likelihood and posterior prob values
|
||||||
*/
|
*/
|
||||||
public SSGGenotypeCall callGenotypes(GenotypeLikelihoods G, RefMetaDataTracker tracker, char ref, ReadBackedPileup pileup) {
|
public SSGGenotypeCall callGenotypes(GenotypeLikelihoods G, RefMetaDataTracker tracker, char ref, ReadBackedPileup pileup) {
|
||||||
G.filterQ0Bases(!keepQ0Bases); // Set the filtering / keeping flag
|
|
||||||
|
|
||||||
// for calculating the rms of the mapping qualities
|
// for calculating the rms of the mapping qualities
|
||||||
double squared = 0.0;
|
double squared = 0.0;
|
||||||
for (int i = 0; i < pileup.getReads().size(); i++) {
|
for (int i = 0; i < pileup.getReads().size(); i++) {
|
||||||
|
|
@ -174,31 +168,14 @@ public class SingleSampleGenotyper extends LocusWalker<SSGGenotypeCall, Genotype
|
||||||
// lets setup the locus
|
// lets setup the locus
|
||||||
List<Genotype> lst = new ArrayList<Genotype>();
|
List<Genotype> lst = new ArrayList<Genotype>();
|
||||||
for (int x = 0; x < G.likelihoods.length; x++) {
|
for (int x = 0; x < G.likelihoods.length; x++) {
|
||||||
lst.add(new BasicGenotype(pileup.getLocation(),GenotypeLikelihoods.genotypes[x],new BayesianConfidenceScore(G.likelihoods[x])));
|
lst.add(new BasicGenotype(pileup.getLocation(),OldAndBustedGenotypeLikelihoods.genotypes[x],new BayesianConfidenceScore(G.likelihoods[x])));
|
||||||
}
|
}
|
||||||
return new SSGGenotypeCall(! GENOTYPE,ref,2,lst,G.likelihoods,pileup);
|
return new SSGGenotypeCall(! GENOTYPE,ref,2,lst,G.likelihoods,pileup);
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
private NewHotnessGenotypeLikelihoods makeNewHotnessGenotypeLikelihoods(RefMetaDataTracker tracker) {
|
||||||
* Calls the underlying, single locus genotype of the sample
|
NewHotnessGenotypeLikelihoods G = new NewHotnessGenotypeLikelihoods(OldAndBustedGenotypeLikelihoods.HUMAN_HETEROZYGOSITY);
|
||||||
*
|
|
||||||
* @param tracker the meta data tracker
|
|
||||||
*
|
|
||||||
* @return the likelihoods per genotype
|
|
||||||
*/
|
|
||||||
private GenotypeLikelihoods makeGenotypeLikelihood(RefMetaDataTracker tracker) {
|
|
||||||
GenotypeLikelihoods G = null;
|
|
||||||
|
|
||||||
if (isHapmapSite(tracker)) {
|
|
||||||
G = new GenotypeLikelihoods(GenotypeLikelihoods.HUMAN_HETEROZYGOSITY);
|
|
||||||
} else if (isDbSNPSite(tracker)) {
|
|
||||||
G = new GenotypeLikelihoods(pdbsnp[0], pdbsnp[1], pdbsnp[2]);
|
|
||||||
} else {
|
|
||||||
G = new GenotypeLikelihoods(plocus[0], plocus[1], plocus[2]);
|
|
||||||
}
|
|
||||||
|
|
||||||
G.filterQ0Bases(! keepQ0Bases);
|
G.filterQ0Bases(! keepQ0Bases);
|
||||||
|
|
||||||
return G;
|
return G;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
@ -244,9 +221,9 @@ public class SingleSampleGenotyper extends LocusWalker<SSGGenotypeCall, Genotype
|
||||||
*/
|
*/
|
||||||
public GenotypeWriter reduce(SSGGenotypeCall call, GenotypeWriter sum) {
|
public GenotypeWriter reduce(SSGGenotypeCall call, GenotypeWriter sum) {
|
||||||
if (call != null && (GENOTYPE || call.isVariation())) {
|
if (call != null && (GENOTYPE || call.isVariation())) {
|
||||||
System.out.printf("Score %s%n", call.getConfidenceScore());
|
//System.out.printf("Score %s%n", call.getConfidenceScore());
|
||||||
if (call.getConfidenceScore().getScore() > LOD_THRESHOLD) {
|
if (call.getConfidenceScore().getScore() > LOD_THRESHOLD) {
|
||||||
System.out.printf("Call %s%n", call);
|
//System.out.printf("Call %s%n", call);
|
||||||
sum.addGenotypeCall(call);
|
sum.addGenotypeCall(call);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
@ -258,6 +235,36 @@ public class SingleSampleGenotyper extends LocusWalker<SSGGenotypeCall, Genotype
|
||||||
logger.info(String.format("SingleSampleGenotyper filtered %d Q0 bases", nFilteredQ0Bases));
|
logger.info(String.format("SingleSampleGenotyper filtered %d Q0 bases", nFilteredQ0Bases));
|
||||||
sum.close();
|
sum.close();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
//
|
||||||
|
// TODO -- delete the old and busted routines
|
||||||
|
//
|
||||||
|
private OldAndBustedGenotypeLikelihoods makeOldAndBustedGenotypeLikelihoods(RefMetaDataTracker tracker) {
|
||||||
|
OldAndBustedGenotypeLikelihoods G = null;
|
||||||
|
|
||||||
|
if (isHapmapSite(tracker)) {
|
||||||
|
G = new OldAndBustedGenotypeLikelihoods(OldAndBustedGenotypeLikelihoods.HUMAN_HETEROZYGOSITY);
|
||||||
|
} else if (isDbSNPSite(tracker)) {
|
||||||
|
G = new OldAndBustedGenotypeLikelihoods(pdbsnp[0], pdbsnp[1], pdbsnp[2]);
|
||||||
|
} else {
|
||||||
|
G = new OldAndBustedGenotypeLikelihoods(plocus[0], plocus[1], plocus[2]);
|
||||||
|
}
|
||||||
|
|
||||||
|
G.filterQ0Bases(! keepQ0Bases);
|
||||||
|
|
||||||
|
return G;
|
||||||
|
}
|
||||||
|
|
||||||
|
//
|
||||||
|
// TODO -- delete the old and busted routines
|
||||||
|
//
|
||||||
|
private SSGGenotypeCall mapOldAndBusted(RefMetaDataTracker tracker, char ref, LocusContext context) {
|
||||||
|
ReadBackedPileup pileup = new ReadBackedPileup(ref, context);
|
||||||
|
OldAndBustedGenotypeLikelihoods G = makeOldAndBustedGenotypeLikelihoods(tracker);
|
||||||
|
G.setDiscovery(GENOTYPE); // set it to discovery mode or variant detection mode
|
||||||
|
SSGGenotypeCall geno = G.callGenotypes(tracker, ref, pileup);
|
||||||
|
return geno;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -1,124 +0,0 @@
|
||||||
package org.broadinstitute.sting.playground;
|
|
||||||
|
|
||||||
import net.sf.samtools.*;
|
|
||||||
import net.sf.samtools.SAMFileReader.ValidationStringency;
|
|
||||||
import net.sf.samtools.util.CloseableIterator;
|
|
||||||
import net.sf.picard.cmdline.CommandLineProgram;
|
|
||||||
import net.sf.picard.cmdline.Usage;
|
|
||||||
import net.sf.picard.cmdline.Option;
|
|
||||||
|
|
||||||
import java.io.*;
|
|
||||||
|
|
||||||
public class ValidateSAM extends CommandLineProgram {
|
|
||||||
// Usage and parameters
|
|
||||||
@Usage(programVersion="0.1") public String USAGE = "SAM Validator\n";
|
|
||||||
@Option(shortName="I", doc="SAM or BAM file for validation") public File INPUT_FILE;
|
|
||||||
@Option(shortName="M", doc="Maximum number of errors to detect before exiting", optional=true) public String MAX_ERRORS_ARG = "-1";
|
|
||||||
@Option(shortName="S", doc="How strict should we be with validation", optional=true) public String STRICTNESS_ARG = "strict";
|
|
||||||
@Option(shortName="SORTED", doc="Check that the SAM file is sorted", optional=true) public boolean CHECK_SORTED_ARG = false;
|
|
||||||
|
|
||||||
private long startTime = -1;
|
|
||||||
|
|
||||||
/** Required main method implementation. */
|
|
||||||
public static void main(String[] argv) {
|
|
||||||
System.exit(new ValidateSAM().instanceMain(argv));
|
|
||||||
}
|
|
||||||
|
|
||||||
public void printProgress( int nRecords, int nErrors ) {
|
|
||||||
final double elapsed = (System.currentTimeMillis() - startTime) / 1000.0;
|
|
||||||
final double secsPer1MReads = (elapsed * 1000000.0) / nRecords;
|
|
||||||
System.out.printf("Read %d records containing %d errors in %.2f secs (%.2f secs per 1M reads)%n", nRecords, nErrors, elapsed, secsPer1MReads);
|
|
||||||
}
|
|
||||||
|
|
||||||
protected int doWork() {
|
|
||||||
int MAX_ERRORS = -1; // Don't bail ever
|
|
||||||
if ( MAX_ERRORS_ARG != null ) {
|
|
||||||
MAX_ERRORS = Integer.parseInt(MAX_ERRORS_ARG);
|
|
||||||
}
|
|
||||||
|
|
||||||
// Start the timer
|
|
||||||
startTime = System.currentTimeMillis();
|
|
||||||
|
|
||||||
// Initialize the sam reader
|
|
||||||
CloseableIterator<SAMRecord> iter = null;
|
|
||||||
try {
|
|
||||||
final SAMFileReader samReader = getSamReader(INPUT_FILE);
|
|
||||||
iter = samReader.iterator();
|
|
||||||
} catch (Exception ioe) {
|
|
||||||
System.out.println("[VALIDATION FAILURE IN HEADER]: " + ioe);
|
|
||||||
ioe.printStackTrace();
|
|
||||||
return 1;
|
|
||||||
}
|
|
||||||
|
|
||||||
int nRecords = 0;
|
|
||||||
int nErrors = 0;
|
|
||||||
|
|
||||||
SAMRecord last_record = null;
|
|
||||||
|
|
||||||
while ( iter.hasNext() ) {
|
|
||||||
nRecords++;
|
|
||||||
try {
|
|
||||||
final SAMRecord ri = iter.next();
|
|
||||||
|
|
||||||
if (CHECK_SORTED_ARG &&
|
|
||||||
(last_record != null) &&
|
|
||||||
(ri.getReferenceName() == last_record.getReferenceName()) &&
|
|
||||||
(ri.getAlignmentStart() < last_record.getAlignmentStart()))
|
|
||||||
{
|
|
||||||
System.out.println("Not sorted!");
|
|
||||||
System.out.println(last_record.format());
|
|
||||||
System.out.println(ri.format() + "\n");
|
|
||||||
System.exit(-1);
|
|
||||||
}
|
|
||||||
|
|
||||||
last_record = ri;
|
|
||||||
|
|
||||||
} catch (Exception ioe) {
|
|
||||||
nErrors++;
|
|
||||||
System.out.println("[VALIDATION FAILURE IN RECORD]: " + ioe);
|
|
||||||
ioe.printStackTrace();
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
if ( MAX_ERRORS > -1 && nErrors >= MAX_ERRORS ) {
|
|
||||||
System.out.println("Maximum number of errors encountered " + nErrors);
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
|
|
||||||
if ( nRecords % 100000 == 0 ) {
|
|
||||||
printProgress( nRecords, nErrors );
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
printProgress( nRecords, nErrors );
|
|
||||||
return 0;
|
|
||||||
}
|
|
||||||
|
|
||||||
private static void usage() {
|
|
||||||
System.err.println("USAGE: org.broadinstitute.sting.ValidateSAM <SAMFile|BAMFile>");
|
|
||||||
}
|
|
||||||
|
|
||||||
private SAMFileReader getSamReader(final File samFile) {
|
|
||||||
|
|
||||||
ValidationStringency strictness = SAMFileReader.ValidationStringency.STRICT;
|
|
||||||
if ( STRICTNESS_ARG == null ) {
|
|
||||||
strictness = SAMFileReader.ValidationStringency.STRICT;
|
|
||||||
}
|
|
||||||
else if ( STRICTNESS_ARG.toLowerCase().equals("lenient") ) {
|
|
||||||
strictness = SAMFileReader.ValidationStringency.LENIENT;
|
|
||||||
}
|
|
||||||
else if ( STRICTNESS_ARG.toLowerCase().equals("silent") ) {
|
|
||||||
strictness = SAMFileReader.ValidationStringency.SILENT;
|
|
||||||
}
|
|
||||||
else {
|
|
||||||
strictness = SAMFileReader.ValidationStringency.STRICT;
|
|
||||||
}
|
|
||||||
|
|
||||||
System.err.println("Strictness is " + strictness);
|
|
||||||
final SAMFileReader samReader = new SAMFileReader(samFile, true);
|
|
||||||
samReader.setValidationStringency(strictness);
|
|
||||||
|
|
||||||
return samReader;
|
|
||||||
}
|
|
||||||
|
|
||||||
}
|
|
||||||
|
|
@ -12,7 +12,7 @@ import org.broadinstitute.sting.utils.Pair;
|
||||||
import org.broadinstitute.sting.utils.ReadBackedPileup;
|
import org.broadinstitute.sting.utils.ReadBackedPileup;
|
||||||
import org.broadinstitute.sting.utils.genotype.Genotype;
|
import org.broadinstitute.sting.utils.genotype.Genotype;
|
||||||
import org.broadinstitute.sting.gatk.walkers.*;
|
import org.broadinstitute.sting.gatk.walkers.*;
|
||||||
import org.broadinstitute.sting.gatk.walkers.genotyper.GenotypeLikelihoods;
|
import org.broadinstitute.sting.gatk.walkers.genotyper.OldAndBustedGenotypeLikelihoods;
|
||||||
import org.broadinstitute.sting.gatk.walkers.genotyper.SSGGenotypeCall;
|
import org.broadinstitute.sting.gatk.walkers.genotyper.SSGGenotypeCall;
|
||||||
|
|
||||||
import java.io.*;
|
import java.io.*;
|
||||||
|
|
@ -103,7 +103,7 @@ public class CallHLAWalker extends LocusWalker<Integer, Pair<Long, Long>>{
|
||||||
}
|
}
|
||||||
out.printf("%sAs\t%sCs\t%sTs\t%sGs\t",numAs,numCs,numTs,numGs);
|
out.printf("%sAs\t%sCs\t%sTs\t%sGs\t",numAs,numCs,numTs,numGs);
|
||||||
|
|
||||||
GenotypeLikelihoods G = new GenotypeLikelihoods(GenotypeLikelihoods.HUMAN_HETEROZYGOSITY);
|
OldAndBustedGenotypeLikelihoods G = new OldAndBustedGenotypeLikelihoods(OldAndBustedGenotypeLikelihoods.HUMAN_HETEROZYGOSITY);
|
||||||
SSGGenotypeCall geno = G.callGenotypes(tracker, ref, pileup);
|
SSGGenotypeCall geno = G.callGenotypes(tracker, ref, pileup);
|
||||||
|
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -8,7 +8,7 @@ import org.broadinstitute.sting.gatk.GenomeAnalysisEngine;
|
||||||
import org.broadinstitute.sting.gatk.LocusContext;
|
import org.broadinstitute.sting.gatk.LocusContext;
|
||||||
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
|
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
|
||||||
import org.broadinstitute.sting.gatk.walkers.LocusWalker;
|
import org.broadinstitute.sting.gatk.walkers.LocusWalker;
|
||||||
import org.broadinstitute.sting.gatk.walkers.genotyper.GenotypeLikelihoods;
|
import org.broadinstitute.sting.gatk.walkers.genotyper.OldAndBustedGenotypeLikelihoods;
|
||||||
import org.broadinstitute.sting.playground.utils.*;
|
import org.broadinstitute.sting.playground.utils.*;
|
||||||
import org.broadinstitute.sting.utils.*;
|
import org.broadinstitute.sting.utils.*;
|
||||||
import org.broadinstitute.sting.utils.ReadBackedPileup;
|
import org.broadinstitute.sting.utils.ReadBackedPileup;
|
||||||
|
|
@ -144,14 +144,14 @@ public class MultiSampleCaller extends LocusWalker<MultiSampleCaller.MultiSample
|
||||||
|
|
||||||
char ref;
|
char ref;
|
||||||
|
|
||||||
GenotypeLikelihoods Genotype(LocusContext context, double[] allele_likelihoods, double indel_alt_freq)
|
OldAndBustedGenotypeLikelihoods Genotype(LocusContext context, double[] allele_likelihoods, double indel_alt_freq)
|
||||||
{
|
{
|
||||||
ReadBackedPileup pileup = new ReadBackedPileup(ref, context);
|
ReadBackedPileup pileup = new ReadBackedPileup(ref, context);
|
||||||
String bases = pileup.getBases();
|
String bases = pileup.getBases();
|
||||||
|
|
||||||
if (bases.length() == 0)
|
if (bases.length() == 0)
|
||||||
{
|
{
|
||||||
GenotypeLikelihoods G = new GenotypeLikelihoods(GenotypeLikelihoods.HUMAN_HETEROZYGOSITY);
|
OldAndBustedGenotypeLikelihoods G = new OldAndBustedGenotypeLikelihoods(OldAndBustedGenotypeLikelihoods.HUMAN_HETEROZYGOSITY);
|
||||||
return G;
|
return G;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
@ -160,7 +160,7 @@ public class MultiSampleCaller extends LocusWalker<MultiSampleCaller.MultiSample
|
||||||
ref = Character.toUpperCase(ref);
|
ref = Character.toUpperCase(ref);
|
||||||
|
|
||||||
// Handle single-base polymorphisms.
|
// Handle single-base polymorphisms.
|
||||||
GenotypeLikelihoods G = new GenotypeLikelihoods(GenotypeLikelihoods.HUMAN_HETEROZYGOSITY);
|
OldAndBustedGenotypeLikelihoods G = new OldAndBustedGenotypeLikelihoods(OldAndBustedGenotypeLikelihoods.HUMAN_HETEROZYGOSITY);
|
||||||
for ( int i = 0; i < reads.size(); i++ )
|
for ( int i = 0; i < reads.size(); i++ )
|
||||||
{
|
{
|
||||||
SAMRecord read = reads.get(i);
|
SAMRecord read = reads.get(i);
|
||||||
|
|
@ -195,7 +195,7 @@ public class MultiSampleCaller extends LocusWalker<MultiSampleCaller.MultiSample
|
||||||
return G;
|
return G;
|
||||||
}
|
}
|
||||||
|
|
||||||
double[] CountFreqs(GenotypeLikelihoods[] genotype_likelihoods)
|
double[] CountFreqs(OldAndBustedGenotypeLikelihoods[] genotype_likelihoods)
|
||||||
{
|
{
|
||||||
double[] allele_likelihoods = new double[4];
|
double[] allele_likelihoods = new double[4];
|
||||||
for (int x = 0; x < genotype_likelihoods.length; x++)
|
for (int x = 0; x < genotype_likelihoods.length; x++)
|
||||||
|
|
@ -230,7 +230,7 @@ public class MultiSampleCaller extends LocusWalker<MultiSampleCaller.MultiSample
|
||||||
return allele_likelihoods;
|
return allele_likelihoods;
|
||||||
}
|
}
|
||||||
|
|
||||||
/* double CountIndelFreq(GenotypeLikelihoods[] genotype_likelihoods)
|
/* double CountIndelFreq(OldAndBustedGenotypeLikelihoods[] genotype_likelihoods)
|
||||||
{
|
{
|
||||||
HashMap<String, Double> indel_allele_likelihoods = new HashMap<String, Double>();
|
HashMap<String, Double> indel_allele_likelihoods = new HashMap<String, Double>();
|
||||||
|
|
||||||
|
|
@ -260,7 +260,7 @@ public class MultiSampleCaller extends LocusWalker<MultiSampleCaller.MultiSample
|
||||||
}*/
|
}*/
|
||||||
|
|
||||||
// Potential precision error here.
|
// Potential precision error here.
|
||||||
double Compute_pD(GenotypeLikelihoods[] genotype_likelihoods)
|
double Compute_pD(OldAndBustedGenotypeLikelihoods[] genotype_likelihoods)
|
||||||
{
|
{
|
||||||
double pD = 0;
|
double pD = 0;
|
||||||
for (int i = 0; i < sample_names.size(); i++)
|
for (int i = 0; i < sample_names.size(); i++)
|
||||||
|
|
@ -280,7 +280,7 @@ public class MultiSampleCaller extends LocusWalker<MultiSampleCaller.MultiSample
|
||||||
double[] allele_likelihoods = new double[4];
|
double[] allele_likelihoods = new double[4];
|
||||||
for (int i = 0; i < 4; i++) { allele_likelihoods[i] = 1e-6/3.0; }
|
for (int i = 0; i < 4; i++) { allele_likelihoods[i] = 1e-6/3.0; }
|
||||||
allele_likelihoods[BaseUtils.simpleBaseToBaseIndex(ref)] = 1.0-1e-6;
|
allele_likelihoods[BaseUtils.simpleBaseToBaseIndex(ref)] = 1.0-1e-6;
|
||||||
GenotypeLikelihoods[] G = new GenotypeLikelihoods[sample_names.size()];
|
OldAndBustedGenotypeLikelihoods[] G = new OldAndBustedGenotypeLikelihoods[sample_names.size()];
|
||||||
for (int j = 0; j < sample_names.size(); j++)
|
for (int j = 0; j < sample_names.size(); j++)
|
||||||
{
|
{
|
||||||
G[j] = Genotype(contexts[j], allele_likelihoods, 1e-6);
|
G[j] = Genotype(contexts[j], allele_likelihoods, 1e-6);
|
||||||
|
|
@ -296,7 +296,7 @@ public class MultiSampleCaller extends LocusWalker<MultiSampleCaller.MultiSample
|
||||||
double LOD(LocusContext[] contexts)
|
double LOD(LocusContext[] contexts)
|
||||||
{
|
{
|
||||||
em_result = EM(contexts);
|
em_result = EM(contexts);
|
||||||
GenotypeLikelihoods[] G = em_result.genotype_likelihoods;
|
OldAndBustedGenotypeLikelihoods[] G = em_result.genotype_likelihoods;
|
||||||
pD = Compute_pD(G);
|
pD = Compute_pD(G);
|
||||||
pNull = Compute_pNull(contexts);
|
pNull = Compute_pNull(contexts);
|
||||||
lod = pD - pNull;
|
lod = pD - pNull;
|
||||||
|
|
@ -306,11 +306,11 @@ public class MultiSampleCaller extends LocusWalker<MultiSampleCaller.MultiSample
|
||||||
class EM_Result
|
class EM_Result
|
||||||
{
|
{
|
||||||
String[] sample_names;
|
String[] sample_names;
|
||||||
GenotypeLikelihoods[] genotype_likelihoods;
|
OldAndBustedGenotypeLikelihoods[] genotype_likelihoods;
|
||||||
double[] allele_likelihoods;
|
double[] allele_likelihoods;
|
||||||
int EM_N;
|
int EM_N;
|
||||||
|
|
||||||
public EM_Result(List<String> sample_names, GenotypeLikelihoods[] genotype_likelihoods, double[] allele_likelihoods)
|
public EM_Result(List<String> sample_names, OldAndBustedGenotypeLikelihoods[] genotype_likelihoods, double[] allele_likelihoods)
|
||||||
{
|
{
|
||||||
this.sample_names = new String[1];
|
this.sample_names = new String[1];
|
||||||
this.sample_names = sample_names.toArray(this.sample_names);
|
this.sample_names = sample_names.toArray(this.sample_names);
|
||||||
|
|
@ -337,7 +337,7 @@ public class MultiSampleCaller extends LocusWalker<MultiSampleCaller.MultiSample
|
||||||
}
|
}
|
||||||
double indel_alt_freq = 1e-4;
|
double indel_alt_freq = 1e-4;
|
||||||
|
|
||||||
GenotypeLikelihoods[] G = new GenotypeLikelihoods[sample_names.size()];
|
OldAndBustedGenotypeLikelihoods[] G = new OldAndBustedGenotypeLikelihoods[sample_names.size()];
|
||||||
for (int i = 0; i < MAX_ITERATIONS; i++)
|
for (int i = 0; i < MAX_ITERATIONS; i++)
|
||||||
{
|
{
|
||||||
for (int j = 0; j < sample_names.size(); j++)
|
for (int j = 0; j < sample_names.size(); j++)
|
||||||
|
|
@ -387,9 +387,9 @@ public class MultiSampleCaller extends LocusWalker<MultiSampleCaller.MultiSample
|
||||||
return strand_score;
|
return strand_score;
|
||||||
}
|
}
|
||||||
|
|
||||||
GenotypeLikelihoods HardyWeinberg(double[] allele_likelihoods)
|
OldAndBustedGenotypeLikelihoods HardyWeinberg(double[] allele_likelihoods)
|
||||||
{
|
{
|
||||||
GenotypeLikelihoods G = new GenotypeLikelihoods(GenotypeLikelihoods.HUMAN_HETEROZYGOSITY);
|
OldAndBustedGenotypeLikelihoods G = new OldAndBustedGenotypeLikelihoods(OldAndBustedGenotypeLikelihoods.HUMAN_HETEROZYGOSITY);
|
||||||
int k = 0;
|
int k = 0;
|
||||||
for (int i = 0; i < 4; i++)
|
for (int i = 0; i < 4; i++)
|
||||||
{
|
{
|
||||||
|
|
@ -409,7 +409,7 @@ public class MultiSampleCaller extends LocusWalker<MultiSampleCaller.MultiSample
|
||||||
else { return BaseUtils.baseIndexToSimpleBase(perm[2]); }
|
else { return BaseUtils.baseIndexToSimpleBase(perm[2]); }
|
||||||
}
|
}
|
||||||
|
|
||||||
double Compute_discovery_lod(char ref, GenotypeLikelihoods[] genotype_likelihoods)
|
double Compute_discovery_lod(char ref, OldAndBustedGenotypeLikelihoods[] genotype_likelihoods)
|
||||||
{
|
{
|
||||||
double pBest = 0;
|
double pBest = 0;
|
||||||
double pRef = 0;
|
double pRef = 0;
|
||||||
|
|
@ -427,7 +427,7 @@ public class MultiSampleCaller extends LocusWalker<MultiSampleCaller.MultiSample
|
||||||
return allele_likelihoods[BaseUtils.simpleBaseToBaseIndex(PickAlt(ref, allele_likelihoods))];
|
return allele_likelihoods[BaseUtils.simpleBaseToBaseIndex(PickAlt(ref, allele_likelihoods))];
|
||||||
}
|
}
|
||||||
|
|
||||||
int Compute_n_ref(char ref, GenotypeLikelihoods[] genotype_likelihoods)
|
int Compute_n_ref(char ref, OldAndBustedGenotypeLikelihoods[] genotype_likelihoods)
|
||||||
{
|
{
|
||||||
int n = 0;
|
int n = 0;
|
||||||
for (int i = 0; i < genotype_likelihoods.length; i++)
|
for (int i = 0; i < genotype_likelihoods.length; i++)
|
||||||
|
|
@ -439,7 +439,7 @@ public class MultiSampleCaller extends LocusWalker<MultiSampleCaller.MultiSample
|
||||||
return n;
|
return n;
|
||||||
}
|
}
|
||||||
|
|
||||||
int Compute_n_het(char ref, GenotypeLikelihoods[] genotype_likelihoods)
|
int Compute_n_het(char ref, OldAndBustedGenotypeLikelihoods[] genotype_likelihoods)
|
||||||
{
|
{
|
||||||
int n = 0;
|
int n = 0;
|
||||||
for (int i = 0; i < genotype_likelihoods.length; i++)
|
for (int i = 0; i < genotype_likelihoods.length; i++)
|
||||||
|
|
@ -452,7 +452,7 @@ public class MultiSampleCaller extends LocusWalker<MultiSampleCaller.MultiSample
|
||||||
return n;
|
return n;
|
||||||
}
|
}
|
||||||
|
|
||||||
int Compute_n_hom(char ref, GenotypeLikelihoods[] genotype_likelihoods)
|
int Compute_n_hom(char ref, OldAndBustedGenotypeLikelihoods[] genotype_likelihoods)
|
||||||
{
|
{
|
||||||
int n = 0;
|
int n = 0;
|
||||||
for (int i = 0; i < genotype_likelihoods.length; i++)
|
for (int i = 0; i < genotype_likelihoods.length; i++)
|
||||||
|
|
@ -474,7 +474,7 @@ public class MultiSampleCaller extends LocusWalker<MultiSampleCaller.MultiSample
|
||||||
double lod = LOD(contexts);
|
double lod = LOD(contexts);
|
||||||
double strand_score = StrandScore(context);
|
double strand_score = StrandScore(context);
|
||||||
//EM_Result em_result = EM(contexts);
|
//EM_Result em_result = EM(contexts);
|
||||||
GenotypeLikelihoods population_genotype_likelihoods = HardyWeinberg(em_result.allele_likelihoods);
|
OldAndBustedGenotypeLikelihoods population_genotype_likelihoods = HardyWeinberg(em_result.allele_likelihoods);
|
||||||
|
|
||||||
//double pD = Compute_pD(em_result.genotype_likelihoods);
|
//double pD = Compute_pD(em_result.genotype_likelihoods);
|
||||||
//double pNull = Compute_pNull(contexts);
|
//double pNull = Compute_pNull(contexts);
|
||||||
|
|
|
||||||
|
|
@ -34,13 +34,13 @@ public class PopPriorWalker extends LocusWalker<Integer, Integer> {
|
||||||
return true; // We are keeping all the reads
|
return true; // We are keeping all the reads
|
||||||
}
|
}
|
||||||
|
|
||||||
protected class GenotypeLikelihoods
|
protected class OldAndBustedGenotypeLikelihoods
|
||||||
{
|
{
|
||||||
|
|
||||||
public double[] likelihoods;
|
public double[] likelihoods;
|
||||||
public String[] genotypes;
|
public String[] genotypes;
|
||||||
|
|
||||||
GenotypeLikelihoods()
|
OldAndBustedGenotypeLikelihoods()
|
||||||
{
|
{
|
||||||
likelihoods = new double[10];
|
likelihoods = new double[10];
|
||||||
genotypes = new String[10];
|
genotypes = new String[10];
|
||||||
|
|
@ -124,9 +124,9 @@ public class PopPriorWalker extends LocusWalker<Integer, Integer> {
|
||||||
int cCount = 0;
|
int cCount = 0;
|
||||||
int gCount = 0;
|
int gCount = 0;
|
||||||
int tCount = 0;
|
int tCount = 0;
|
||||||
GenotypeLikelihoods glAll = new GenotypeLikelihoods();
|
OldAndBustedGenotypeLikelihoods glAll = new OldAndBustedGenotypeLikelihoods();
|
||||||
GenotypeLikelihoods glForward = new GenotypeLikelihoods();
|
OldAndBustedGenotypeLikelihoods glForward = new OldAndBustedGenotypeLikelihoods();
|
||||||
GenotypeLikelihoods glReverse = new GenotypeLikelihoods();
|
OldAndBustedGenotypeLikelihoods glReverse = new OldAndBustedGenotypeLikelihoods();
|
||||||
for ( int i = 0; i < reads.size(); i++ )
|
for ( int i = 0; i < reads.size(); i++ )
|
||||||
{
|
{
|
||||||
SAMRecord read = reads.get(i);
|
SAMRecord read = reads.get(i);
|
||||||
|
|
@ -409,7 +409,7 @@ public class PopPriorWalker extends LocusWalker<Integer, Integer> {
|
||||||
return (gt.getAllele1() == refAllele)?gt.getAllele2():gt.getAllele1();
|
return (gt.getAllele1() == refAllele)?gt.getAllele2():gt.getAllele1();
|
||||||
}
|
}
|
||||||
|
|
||||||
protected String dumpTheories(GenotypeLikelihoods gl) {
|
protected String dumpTheories(OldAndBustedGenotypeLikelihoods gl) {
|
||||||
StringBuilder sb = new StringBuilder();
|
StringBuilder sb = new StringBuilder();
|
||||||
for (int i = gl.genotypes.length-1; i >= 0; i--)
|
for (int i = gl.genotypes.length-1; i >= 0; i--)
|
||||||
{
|
{
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue