New hotness and old and Busted genotype likelihood objects are now in the code base as I work towards a bug-free SSG along with a cleaner interface to the genotype likelihood object

git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@1372 348d0f76-0448-11de-a6fe-93d51630548a
This commit is contained in:
depristo 2009-08-03 23:07:53 +00:00
parent 4986b2abd6
commit 20ff603339
9 changed files with 751 additions and 524 deletions

View File

@ -1,34 +1,22 @@
package org.broadinstitute.sting.gatk.walkers.genotyper; package org.broadinstitute.sting.gatk.walkers.genotyper;
import net.sf.samtools.SAMRecord;
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
import org.broadinstitute.sting.utils.ReadBackedPileup;
import org.broadinstitute.sting.utils.Utils;
import org.broadinstitute.sting.utils.genotype.BasicGenotype;
import org.broadinstitute.sting.utils.genotype.Genotype;
import org.broadinstitute.sting.utils.genotype.confidence.BayesianConfidenceScore;
import static java.lang.Math.log10; import static java.lang.Math.log10;
import static java.lang.Math.pow; import static java.lang.Math.pow;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;
public class GenotypeLikelihoods { public abstract class GenotypeLikelihoods {
// precalculate these for performance (pow/log10 is expensive!) // precalculate these for performance (pow/log10 is expensive!)
/** /**
* SOLID data uses Q0 bases to represent reference-fixed bases -- they shouldn't be counted * SOLID data uses Q0 bases to represent reference-fixed bases -- they shouldn't be counted
* during GL calculations. If this field is true, Q0 bases will be removed in add(). * during GL calculations. If this field is true, Q0 bases will be removed in add().
*/ */
private boolean filterQ0Bases = true; protected boolean filterQ0Bases = true;
private static final double[] oneMinusData = new double[Byte.MAX_VALUE]; protected static final double[] oneMinusData = new double[Byte.MAX_VALUE];
private static final double[] oneHalfMinusDataArachne = new double[Byte.MAX_VALUE]; protected static final double[] oneHalfMinusDataArachne = new double[Byte.MAX_VALUE];
private static final double[] oneHalfMinusData3Base = new double[Byte.MAX_VALUE]; protected static final double[] oneHalfMinusData3Base = new double[Byte.MAX_VALUE];
private final boolean keepQ0Bases = false; protected static final double log10Of1_3 = log10(1.0 / 3);
private static final double log10Of1_3 = log10(1.0 / 3); protected static final double log10Of2_3 = log10(2.0 / 3);
private static final double log10Of2_3 = log10(2.0 / 3);
static { static {
for (int qual = 0; qual < Byte.MAX_VALUE; qual++) { for (int qual = 0; qual < Byte.MAX_VALUE; qual++) {
@ -36,11 +24,6 @@ public class GenotypeLikelihoods {
} }
} }
private static double getOneMinusQual(final byte qual) {
return oneMinusData[qual];
}
static { static {
for (int qual = 0; qual < Byte.MAX_VALUE; qual++) { for (int qual = 0; qual < Byte.MAX_VALUE; qual++) {
double e = pow(10, (qual / -10.0)); double e = pow(10, (qual / -10.0));
@ -50,12 +33,7 @@ public class GenotypeLikelihoods {
} }
} }
private double getOneHalfMinusQual(final byte qual) { public final static String[] genotypes = new String[10];
return oneHalfMinusData[qual];
}
public double[] likelihoods;
public static String[] genotypes = new String[10];
static { static {
genotypes[0] = "AA"; genotypes[0] = "AA";
genotypes[1] = "AC"; genotypes[1] = "AC";
@ -68,24 +46,6 @@ public class GenotypeLikelihoods {
genotypes[8] = "GT"; genotypes[8] = "GT";
genotypes[9] = "TT"; genotypes[9] = "TT";
} }
public int coverage;
// The genotype priors;
private double priorHomRef;
private double priorHet;
private double priorHomVar;
private double[] oneHalfMinusData;
public boolean isThreeStateErrors() {
return threeStateErrors;
}
public void setThreeStateErrors(boolean threeStateErrors) {
this.threeStateErrors = threeStateErrors;
}
private boolean threeStateErrors = false;
private boolean discoveryMode = false;
public static final double HUMAN_HETEROZYGOSITY = 1e-3; public static final double HUMAN_HETEROZYGOSITY = 1e-3;
@ -97,274 +57,8 @@ public class GenotypeLikelihoods {
return pdbls; return pdbls;
} }
/** public double[] likelihoods;
* set the mode to discovery
* @param isInDiscoveryMode
*/
public void setDiscovery(boolean isInDiscoveryMode) {
discoveryMode = isInDiscoveryMode;
}
// Store the 2nd-best base priors for on-genotype primary bases
private HashMap<String, Double> onNextBestBasePriors = new HashMap<String, Double>();
// Store the 2nd-best base priors for off-genotype primary bases public abstract int add(char ref, char read, byte qual);
private HashMap<String, Double> offNextBestBasePriors = new HashMap<String, Double>(); public abstract void applyPrior(char ref);
public GenotypeLikelihoods(double heterozygosity) {
double[] vals = computePriors(heterozygosity);
initialize(vals[0], vals[1], vals[2]);
}
public GenotypeLikelihoods(double priorHomRef, double priorHet, double priorHomVar) {
initialize(priorHomRef, priorHet, priorHomVar);
}
/**
* Are we ignoring Q0 bases during calculations?
* @return
*/
public boolean isFilteringQ0Bases() {
return filterQ0Bases;
}
/**
* Enable / disable filtering of Q0 bases. Enabled by default
*
* @param filterQ0Bases
*/
public void filterQ0Bases(boolean filterQ0Bases) {
this.filterQ0Bases = filterQ0Bases;
}
private void initialize(double priorHomRef, double priorHet, double priorHomVar) {
this.oneHalfMinusData = threeStateErrors ? oneHalfMinusData3Base : oneHalfMinusDataArachne;
this.priorHomRef = priorHomRef;
this.priorHet = priorHet;
this.priorHomVar = priorHomVar;
likelihoods = new double[10];
coverage = 0;
for (int i = 0; i < likelihoods.length; i++) { likelihoods[i] = Math.log10(0.1); }
}
public double getHomRefPrior() {
return priorHomRef;
}
public void setHomRefPrior(double priorHomRef) {
this.priorHomRef = priorHomRef;
}
public double getHetPrior() {
return priorHet;
}
public void setHetPrior(double priorHet) {
this.priorHet = priorHet;
}
public double getHomVarPrior() {
return priorHomVar;
}
public void setHomVarPrior(double priorHomVar) {
this.priorHomVar = priorHomVar;
}
public int add(char ref, char read, byte qual)
{
if (qual <= 0) {
if ( isFilteringQ0Bases() ) {
return 0;
} else {
qual = 1;
}
}
if (coverage == 0)
{
for (int i = 0; i < likelihoods.length; i++)
{
likelihoods[i] = 0;
}
}
for (int i = 0; i < genotypes.length; i++)
{
double likelihood = calculateAlleleLikelihood(ref, read, genotypes[i], qual);
//if ( originalQual == 0 ) System.out.printf("Likelihood is %f for %c %c %d %s%n", likelihood, ref, read, qual, genotypes[i]);
likelihoods[i] += likelihood;
coverage += 1;
}
return 1;
}
private double calculateAlleleLikelihood(char ref, char read, String genotype, byte qual) {
if (qual == 0) {
// zero quals are wrong
throw new RuntimeException(String.format("Unexpected Q0 base discovered in calculateAlleleLikelihood: %c %c %d", ref, read, qual));
}
char h1 = genotype.charAt(0);
char h2 = genotype.charAt(1);
double p_base;
if ((h1 == h2) && (h1 == read)) {
// hom
p_base = getOneMinusQual(qual);
} else if ( (h1 != h2) && ((h1 == read) || (h2 == read)) ) {
// het
p_base = getOneHalfMinusQual(qual);
} else if ( threeStateErrors ) {
// error
//System.out.printf("%s %b %f %f%n", genotype, h1 != h2, log10Of2_3, log10Of1_3 );
p_base = qual / -10.0 + ( h1 != h2 ? log10Of1_3 : log10Of1_3 );
} else {
// error
p_base = qual / -10.0;
}
return p_base;
}
public String[] sorted_genotypes;
public double[] sorted_likelihoods;
public void sort() {
Integer[] permutation = Utils.SortPermutation(likelihoods);
Integer[] reverse_permutation = new Integer[permutation.length];
for (int i = 0; i < reverse_permutation.length; i++) {
reverse_permutation[i] = permutation[(permutation.length - 1) - i];
}
sorted_genotypes = Utils.PermuteArray(genotypes, reverse_permutation);
sorted_likelihoods = Utils.PermuteArray(likelihoods, reverse_permutation);
}
public String toString(char ref) {
this.sort();
double sum = 0;
String s = String.format("%s %f %f ", this.BestGenotype(), this.LodVsNextBest(), this.LodVsRef(ref));
for (int i = 0; i < sorted_genotypes.length; i++) {
if (i != 0) {
s = s + " ";
}
s = s + sorted_genotypes[i] + ":" + String.format("%.2f", sorted_likelihoods[i]);
sum += Math.pow(10,sorted_likelihoods[i]);
}
s = s + String.format(" %f", sum);
return s;
}
public void applyPrior(char ref, double[] allele_likelihoods) {
int k = 0;
for (int i = 0; i < 4; i++) {
for (int j = i; j < 4; j++) {
if (i == j) {
this.likelihoods[k] += Math.log10(allele_likelihoods[i]) + Math.log10(allele_likelihoods[j]);
} else {
this.likelihoods[k] += Math.log10(allele_likelihoods[i]) + Math.log10(allele_likelihoods[j]) + Math.log10(2);
}
k++;
}
}
this.sort();
}
public void applyPrior(char ref) {
for (int i = 0; i < genotypes.length; i++) {
if ((genotypes[i].charAt(0) == ref) && (genotypes[i].charAt(1) == ref)) {
// hom-ref
likelihoods[i] += Math.log10(priorHomRef);
} else if ((genotypes[i].charAt(0) != ref) && (genotypes[i].charAt(1) != ref)) {
// hom-nonref
likelihoods[i] += Math.log10(priorHomVar);
} else {
// het
likelihoods[i] += Math.log10(priorHet);
}
if (Double.isInfinite(likelihoods[i])) {
likelihoods[i] = -1000;
}
}
this.sort();
}
public double LodVsNextBest() {
this.sort();
return sorted_likelihoods[0] - sorted_likelihoods[1];
}
double ref_likelihood = Double.NaN;
public double LodVsRef(char ref) {
if ((this.BestGenotype().charAt(0) == ref) && (this.BestGenotype().charAt(1) == ref)) {
ref_likelihood = sorted_likelihoods[0];
return (-1.0 * this.LodVsNextBest());
} else {
for (int i = 0; i < genotypes.length; i++) {
if ((genotypes[i].charAt(0) == ref) && (genotypes[i].charAt(1) == ref)) {
ref_likelihood = likelihoods[i];
}
}
}
return sorted_likelihoods[0] - ref_likelihood;
}
public String BestGenotype() {
this.sort();
return this.sorted_genotypes[0];
}
public double BestPosterior() {
this.sort();
return this.sorted_likelihoods[0];
}
public double RefPosterior(char ref) {
this.LodVsRef(ref);
return this.ref_likelihood;
}
/**
* given all the data associated with a locus, make a genotypeLocus object containing the likelihoods and posterior probs
*
* @param tracker contains the reference meta data for this location, which may contain relevent information like dpSNP or hapmap information
* @param ref the reference base
* @param pileup a pileup of the reads, containing the reads and their offsets
*
* @return a GenotypeLocus, containing each of the genotypes and their associated likelihood and posterior prob values
*/
public SSGGenotypeCall callGenotypes(RefMetaDataTracker tracker, char ref, ReadBackedPileup pileup) {
filterQ0Bases(!keepQ0Bases); // Set the filtering / keeping flag
// for calculating the rms of the mapping qualities
double squared = 0.0;
for (int i = 0; i < pileup.getReads().size(); i++) {
SAMRecord read = pileup.getReads().get(i);
squared += read.getMappingQuality() * read.getMappingQuality();
int offset = pileup.getOffsets().get(i);
char base = read.getReadString().charAt(offset);
byte qual = read.getBaseQualities()[offset];
add(ref, base, qual);
}
// save off the likelihoods
if (likelihoods == null || likelihoods.length == 0) return null;
// Apply the two calculations
applyPrior(ref);
// lets setup the locus
List<Genotype> lst = new ArrayList<Genotype>();
for (int x = 0; x < this.likelihoods.length; x++) {
lst.add(new BasicGenotype(pileup.getLocation(),this.genotypes[x],new BayesianConfidenceScore(this.likelihoods[x])));
}
return new SSGGenotypeCall(discoveryMode,ref,2,lst,likelihoods,pileup);
}
} }

View File

@ -0,0 +1,326 @@
package org.broadinstitute.sting.gatk.walkers.genotyper;
import net.sf.samtools.SAMRecord;
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
import org.broadinstitute.sting.utils.ReadBackedPileup;
import org.broadinstitute.sting.utils.Utils;
import org.broadinstitute.sting.utils.genotype.BasicGenotype;
import org.broadinstitute.sting.utils.genotype.Genotype;
import org.broadinstitute.sting.utils.genotype.confidence.BayesianConfidenceScore;
import static java.lang.Math.log10;
import static java.lang.Math.pow;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;
public class NewHotnessGenotypeLikelihoods extends GenotypeLikelihoods {
private static double getOneMinusQual(final byte qual) {
return oneMinusData[qual];
}
private double getOneHalfMinusQual(final byte qual) {
return oneHalfMinusData[qual];
}
//public double[] likelihoods;
public int coverage;
// The genotype priors;
private double priorHomRef;
private double priorHet;
private double priorHomVar;
private double[] oneHalfMinusData;
public boolean isThreeStateErrors() {
return threeStateErrors;
}
public void setThreeStateErrors(boolean threeStateErrors) {
this.threeStateErrors = threeStateErrors;
}
private boolean threeStateErrors = false;
private boolean discoveryMode = false;
public static final double HUMAN_HETEROZYGOSITY = 1e-3;
public static double[] computePriors(double h) {
double[] pdbls = new double[3];
pdbls[0] = 1.0 - (3.0 * h / 2.0);
pdbls[1] = h;
pdbls[2] = h / 2.0;
return pdbls;
}
/**
* set the mode to discovery
* @param isInDiscoveryMode
*/
public void setDiscovery(boolean isInDiscoveryMode) {
discoveryMode = isInDiscoveryMode;
}
// Store the 2nd-best base priors for on-genotype primary bases
private HashMap<String, Double> onNextBestBasePriors = new HashMap<String, Double>();
// Store the 2nd-best base priors for off-genotype primary bases
private HashMap<String, Double> offNextBestBasePriors = new HashMap<String, Double>();
public NewHotnessGenotypeLikelihoods(double heterozygosity) {
double[] vals = computePriors(heterozygosity);
initialize(vals[0], vals[1], vals[2]);
}
public NewHotnessGenotypeLikelihoods(double priorHomRef, double priorHet, double priorHomVar) {
initialize(priorHomRef, priorHet, priorHomVar);
}
/**
* Are we ignoring Q0 bases during calculations?
* @return
*/
public boolean isFilteringQ0Bases() {
return filterQ0Bases;
}
/**
* Enable / disable filtering of Q0 bases. Enabled by default
*
* @param filterQ0Bases
*/
public void filterQ0Bases(boolean filterQ0Bases) {
this.filterQ0Bases = filterQ0Bases;
}
private void initialize(double priorHomRef, double priorHet, double priorHomVar) {
this.oneHalfMinusData = threeStateErrors ? oneHalfMinusData3Base : oneHalfMinusDataArachne;
this.priorHomRef = priorHomRef;
this.priorHet = priorHet;
this.priorHomVar = priorHomVar;
likelihoods = new double[10];
coverage = 0;
for (int i = 0; i < likelihoods.length; i++) { likelihoods[i] = Math.log10(0.1); }
}
public double getHomRefPrior() {
return priorHomRef;
}
public void setHomRefPrior(double priorHomRef) {
this.priorHomRef = priorHomRef;
}
public double getHetPrior() {
return priorHet;
}
public void setHetPrior(double priorHet) {
this.priorHet = priorHet;
}
public double getHomVarPrior() {
return priorHomVar;
}
public void setHomVarPrior(double priorHomVar) {
this.priorHomVar = priorHomVar;
}
public int add(char ref, char read, byte qual)
{
if (qual <= 0) {
if ( isFilteringQ0Bases() ) {
return 0;
} else {
qual = 1;
}
}
if (coverage == 0)
{
for (int i = 0; i < likelihoods.length; i++)
{
likelihoods[i] = 0;
}
}
for (int i = 0; i < genotypes.length; i++)
{
double likelihood = calculateAlleleLikelihood(ref, read, genotypes[i], qual);
//if ( originalQual == 0 ) System.out.printf("Likelihood is %f for %c %c %d %s%n", likelihood, ref, read, qual, genotypes[i]);
likelihoods[i] += likelihood;
coverage += 1;
}
return 1;
}
private double calculateAlleleLikelihood(char ref, char read, String genotype, byte qual) {
if (qual == 0) {
// zero quals are wrong
throw new RuntimeException(String.format("Unexpected Q0 base discovered in calculateAlleleLikelihood: %c %c %d", ref, read, qual));
}
char h1 = genotype.charAt(0);
char h2 = genotype.charAt(1);
double p_base;
if ((h1 == h2) && (h1 == read)) {
// hom
p_base = getOneMinusQual(qual);
} else if ( (h1 != h2) && ((h1 == read) || (h2 == read)) ) {
// het
p_base = getOneHalfMinusQual(qual);
} else if ( threeStateErrors ) {
// error
//System.out.printf("%s %b %f %f%n", genotype, h1 != h2, log10Of2_3, log10Of1_3 );
p_base = qual / -10.0 + ( h1 != h2 ? log10Of1_3 : log10Of1_3 );
} else {
// error
p_base = qual / -10.0;
}
return p_base;
}
public String[] sorted_genotypes;
public double[] sorted_likelihoods;
public void sort() {
Integer[] permutation = Utils.SortPermutation(likelihoods);
Integer[] reverse_permutation = new Integer[permutation.length];
for (int i = 0; i < reverse_permutation.length; i++) {
reverse_permutation[i] = permutation[(permutation.length - 1) - i];
}
sorted_genotypes = Utils.PermuteArray(genotypes, reverse_permutation);
sorted_likelihoods = Utils.PermuteArray(likelihoods, reverse_permutation);
}
public String toString(char ref) {
this.sort();
double sum = 0;
String s = String.format("%s %f %f ", this.BestGenotype(), this.LodVsNextBest(), this.LodVsRef(ref));
for (int i = 0; i < sorted_genotypes.length; i++) {
if (i != 0) {
s = s + " ";
}
s = s + sorted_genotypes[i] + ":" + String.format("%.2f", sorted_likelihoods[i]);
sum += Math.pow(10,sorted_likelihoods[i]);
}
s = s + String.format(" %f", sum);
return s;
}
public void applyPrior(char ref, double[] allele_likelihoods) {
int k = 0;
for (int i = 0; i < 4; i++) {
for (int j = i; j < 4; j++) {
if (i == j) {
this.likelihoods[k] += Math.log10(allele_likelihoods[i]) + Math.log10(allele_likelihoods[j]);
} else {
this.likelihoods[k] += Math.log10(allele_likelihoods[i]) + Math.log10(allele_likelihoods[j]) + Math.log10(2);
}
k++;
}
}
this.sort();
}
public void applyPrior(char ref) {
for (int i = 0; i < genotypes.length; i++) {
if ((genotypes[i].charAt(0) == ref) && (genotypes[i].charAt(1) == ref)) {
// hom-ref
likelihoods[i] += Math.log10(priorHomRef);
} else if ((genotypes[i].charAt(0) != ref) && (genotypes[i].charAt(1) != ref)) {
// hom-nonref
likelihoods[i] += Math.log10(priorHomVar);
} else {
// het
likelihoods[i] += Math.log10(priorHet);
}
if (Double.isInfinite(likelihoods[i])) {
likelihoods[i] = -1000;
}
}
this.sort();
}
public double LodVsNextBest() {
this.sort();
return sorted_likelihoods[0] - sorted_likelihoods[1];
}
double ref_likelihood = Double.NaN;
public double LodVsRef(char ref) {
if ((this.BestGenotype().charAt(0) == ref) && (this.BestGenotype().charAt(1) == ref)) {
ref_likelihood = sorted_likelihoods[0];
return (-1.0 * this.LodVsNextBest());
} else {
for (int i = 0; i < genotypes.length; i++) {
if ((genotypes[i].charAt(0) == ref) && (genotypes[i].charAt(1) == ref)) {
ref_likelihood = likelihoods[i];
}
}
}
return sorted_likelihoods[0] - ref_likelihood;
}
public String BestGenotype() {
this.sort();
return this.sorted_genotypes[0];
}
public double BestPosterior() {
this.sort();
return this.sorted_likelihoods[0];
}
public double RefPosterior(char ref) {
this.LodVsRef(ref);
return this.ref_likelihood;
}
/**
* given all the data associated with a locus, make a genotypeLocus object containing the likelihoods and posterior probs
*
* @param tracker contains the reference meta data for this location, which may contain relevent information like dpSNP or hapmap information
* @param ref the reference base
* @param pileup a pileup of the reads, containing the reads and their offsets
*
* @return a GenotypeLocus, containing each of the genotypes and their associated likelihood and posterior prob values
*/
public SSGGenotypeCall callGenotypes(RefMetaDataTracker tracker, char ref, ReadBackedPileup pileup) {
//filterQ0Bases(!keepQ0Bases); // Set the filtering / keeping flag
// for calculating the rms of the mapping qualities
double squared = 0.0;
for (int i = 0; i < pileup.getReads().size(); i++) {
SAMRecord read = pileup.getReads().get(i);
squared += read.getMappingQuality() * read.getMappingQuality();
int offset = pileup.getOffsets().get(i);
char base = read.getReadString().charAt(offset);
byte qual = read.getBaseQualities()[offset];
add(ref, base, qual);
}
// save off the likelihoods
if (likelihoods == null || likelihoods.length == 0) return null;
// Apply the two calculations
applyPrior(ref);
// lets setup the locus
List<Genotype> lst = new ArrayList<Genotype>();
for (int x = 0; x < this.likelihoods.length; x++) {
lst.add(new BasicGenotype(pileup.getLocation(),this.genotypes[x],new BayesianConfidenceScore(this.likelihoods[x])));
}
return new SSGGenotypeCall(discoveryMode,ref,2,lst,likelihoods,pileup);
}
}

View File

@ -0,0 +1,324 @@
package org.broadinstitute.sting.gatk.walkers.genotyper;
import net.sf.samtools.SAMRecord;
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
import org.broadinstitute.sting.utils.ReadBackedPileup;
import org.broadinstitute.sting.utils.Utils;
import org.broadinstitute.sting.utils.genotype.BasicGenotype;
import org.broadinstitute.sting.utils.genotype.Genotype;
import org.broadinstitute.sting.utils.genotype.confidence.BayesianConfidenceScore;
import static java.lang.Math.log10;
import static java.lang.Math.pow;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;
public class OldAndBustedGenotypeLikelihoods extends GenotypeLikelihoods {
private static double getOneMinusQual(final byte qual) {
return oneMinusData[qual];
}
private double getOneHalfMinusQual(final byte qual) {
return oneHalfMinusData[qual];
}
//public double[] likelihoods;
public int coverage;
// The genotype priors;
private double priorHomRef;
private double priorHet;
private double priorHomVar;
private double[] oneHalfMinusData;
public boolean isThreeStateErrors() {
return threeStateErrors;
}
public void setThreeStateErrors(boolean threeStateErrors) {
this.threeStateErrors = threeStateErrors;
}
private boolean threeStateErrors = false;
private boolean discoveryMode = false;
public static double[] computePriors(double h) {
double[] pdbls = new double[3];
pdbls[0] = 1.0 - (3.0 * h / 2.0);
pdbls[1] = h;
pdbls[2] = h / 2.0;
return pdbls;
}
/**
* set the mode to discovery
* @param isInDiscoveryMode
*/
public void setDiscovery(boolean isInDiscoveryMode) {
discoveryMode = isInDiscoveryMode;
}
// Store the 2nd-best base priors for on-genotype primary bases
private HashMap<String, Double> onNextBestBasePriors = new HashMap<String, Double>();
// Store the 2nd-best base priors for off-genotype primary bases
private HashMap<String, Double> offNextBestBasePriors = new HashMap<String, Double>();
public OldAndBustedGenotypeLikelihoods(double heterozygosity) {
double[] vals = computePriors(heterozygosity);
initialize(vals[0], vals[1], vals[2]);
}
public OldAndBustedGenotypeLikelihoods(double priorHomRef, double priorHet, double priorHomVar) {
initialize(priorHomRef, priorHet, priorHomVar);
}
/**
* Are we ignoring Q0 bases during calculations?
* @return
*/
public boolean isFilteringQ0Bases() {
return filterQ0Bases;
}
/**
* Enable / disable filtering of Q0 bases. Enabled by default
*
* @param filterQ0Bases
*/
public void filterQ0Bases(boolean filterQ0Bases) {
this.filterQ0Bases = filterQ0Bases;
}
private void initialize(double priorHomRef, double priorHet, double priorHomVar) {
this.oneHalfMinusData = threeStateErrors ? oneHalfMinusData3Base : oneHalfMinusDataArachne;
this.priorHomRef = priorHomRef;
this.priorHet = priorHet;
this.priorHomVar = priorHomVar;
likelihoods = new double[10];
coverage = 0;
for (int i = 0; i < likelihoods.length; i++) { likelihoods[i] = Math.log10(0.1); }
}
public double getHomRefPrior() {
return priorHomRef;
}
public void setHomRefPrior(double priorHomRef) {
this.priorHomRef = priorHomRef;
}
public double getHetPrior() {
return priorHet;
}
public void setHetPrior(double priorHet) {
this.priorHet = priorHet;
}
public double getHomVarPrior() {
return priorHomVar;
}
public void setHomVarPrior(double priorHomVar) {
this.priorHomVar = priorHomVar;
}
public int add(char ref, char read, byte qual)
{
if (qual <= 0) {
if ( isFilteringQ0Bases() ) {
return 0;
} else {
qual = 1;
}
}
if (coverage == 0)
{
for (int i = 0; i < likelihoods.length; i++)
{
likelihoods[i] = 0;
}
}
for (int i = 0; i < genotypes.length; i++)
{
double likelihood = calculateAlleleLikelihood(ref, read, genotypes[i], qual);
//if ( originalQual == 0 ) System.out.printf("Likelihood is %f for %c %c %d %s%n", likelihood, ref, read, qual, genotypes[i]);
likelihoods[i] += likelihood;
coverage += 1;
}
return 1;
}
private double calculateAlleleLikelihood(char ref, char read, String genotype, byte qual) {
if (qual == 0) {
// zero quals are wrong
throw new RuntimeException(String.format("Unexpected Q0 base discovered in calculateAlleleLikelihood: %c %c %d", ref, read, qual));
}
char h1 = genotype.charAt(0);
char h2 = genotype.charAt(1);
double p_base;
if ((h1 == h2) && (h1 == read)) {
// hom
p_base = getOneMinusQual(qual);
} else if ( (h1 != h2) && ((h1 == read) || (h2 == read)) ) {
// het
p_base = getOneHalfMinusQual(qual);
} else if ( threeStateErrors ) {
// error
//System.out.printf("%s %b %f %f%n", genotype, h1 != h2, log10Of2_3, log10Of1_3 );
p_base = qual / -10.0 + ( h1 != h2 ? log10Of1_3 : log10Of1_3 );
} else {
// error
p_base = qual / -10.0;
}
return p_base;
}
public String[] sorted_genotypes;
public double[] sorted_likelihoods;
public void sort() {
Integer[] permutation = Utils.SortPermutation(likelihoods);
Integer[] reverse_permutation = new Integer[permutation.length];
for (int i = 0; i < reverse_permutation.length; i++) {
reverse_permutation[i] = permutation[(permutation.length - 1) - i];
}
sorted_genotypes = Utils.PermuteArray(genotypes, reverse_permutation);
sorted_likelihoods = Utils.PermuteArray(likelihoods, reverse_permutation);
}
public String toString(char ref) {
this.sort();
double sum = 0;
String s = String.format("%s %f %f ", this.BestGenotype(), this.LodVsNextBest(), this.LodVsRef(ref));
for (int i = 0; i < sorted_genotypes.length; i++) {
if (i != 0) {
s = s + " ";
}
s = s + sorted_genotypes[i] + ":" + String.format("%.2f", sorted_likelihoods[i]);
sum += Math.pow(10,sorted_likelihoods[i]);
}
s = s + String.format(" %f", sum);
return s;
}
public void applyPrior(char ref, double[] allele_likelihoods) {
int k = 0;
for (int i = 0; i < 4; i++) {
for (int j = i; j < 4; j++) {
if (i == j) {
this.likelihoods[k] += Math.log10(allele_likelihoods[i]) + Math.log10(allele_likelihoods[j]);
} else {
this.likelihoods[k] += Math.log10(allele_likelihoods[i]) + Math.log10(allele_likelihoods[j]) + Math.log10(2);
}
k++;
}
}
this.sort();
}
public void applyPrior(char ref) {
for (int i = 0; i < genotypes.length; i++) {
if ((genotypes[i].charAt(0) == ref) && (genotypes[i].charAt(1) == ref)) {
// hom-ref
likelihoods[i] += Math.log10(priorHomRef);
} else if ((genotypes[i].charAt(0) != ref) && (genotypes[i].charAt(1) != ref)) {
// hom-nonref
likelihoods[i] += Math.log10(priorHomVar);
} else {
// het
likelihoods[i] += Math.log10(priorHet);
}
if (Double.isInfinite(likelihoods[i])) {
likelihoods[i] = -1000;
}
}
this.sort();
}
public double LodVsNextBest() {
this.sort();
return sorted_likelihoods[0] - sorted_likelihoods[1];
}
double ref_likelihood = Double.NaN;
public double LodVsRef(char ref) {
if ((this.BestGenotype().charAt(0) == ref) && (this.BestGenotype().charAt(1) == ref)) {
ref_likelihood = sorted_likelihoods[0];
return (-1.0 * this.LodVsNextBest());
} else {
for (int i = 0; i < genotypes.length; i++) {
if ((genotypes[i].charAt(0) == ref) && (genotypes[i].charAt(1) == ref)) {
ref_likelihood = likelihoods[i];
}
}
}
return sorted_likelihoods[0] - ref_likelihood;
}
public String BestGenotype() {
this.sort();
return this.sorted_genotypes[0];
}
public double BestPosterior() {
this.sort();
return this.sorted_likelihoods[0];
}
public double RefPosterior(char ref) {
this.LodVsRef(ref);
return this.ref_likelihood;
}
/**
* given all the data associated with a locus, make a genotypeLocus object containing the likelihoods and posterior probs
*
* @param tracker contains the reference meta data for this location, which may contain relevent information like dpSNP or hapmap information
* @param ref the reference base
* @param pileup a pileup of the reads, containing the reads and their offsets
*
* @return a GenotypeLocus, containing each of the genotypes and their associated likelihood and posterior prob values
*/
public SSGGenotypeCall callGenotypes(RefMetaDataTracker tracker, char ref, ReadBackedPileup pileup) {
//filterQ0Bases(!keepQ0Bases); // Set the filtering / keeping flag
// for calculating the rms of the mapping qualities
double squared = 0.0;
for (int i = 0; i < pileup.getReads().size(); i++) {
SAMRecord read = pileup.getReads().get(i);
squared += read.getMappingQuality() * read.getMappingQuality();
int offset = pileup.getOffsets().get(i);
char base = read.getReadString().charAt(offset);
byte qual = read.getBaseQualities()[offset];
add(ref, base, qual);
}
// save off the likelihoods
if (likelihoods == null || likelihoods.length == 0) return null;
// Apply the two calculations
applyPrior(ref);
// lets setup the locus
List<Genotype> lst = new ArrayList<Genotype>();
for (int x = 0; x < this.likelihoods.length; x++) {
lst.add(new BasicGenotype(pileup.getLocation(),this.genotypes[x],new BayesianConfidenceScore(this.likelihoods[x])));
}
return new SSGGenotypeCall(discoveryMode,ref,2,lst,likelihoods,pileup);
}
}

View File

@ -114,8 +114,8 @@ public class SSGGenotypeCall implements GenotypeCall, GenotypeOutput {
} }
public String toString() { public String toString() {
return String.format("ref=%s depth=%d rmsMAPQ=%.2f bestVRef=%.2f bestVNext=%.2f likelihoods=%s", return String.format("%s ref=%s depth=%d rmsMAPQ=%.2f bestVRef=%.2f bestVNext=%.2f likelihoods=%s",
mRefBase, readDepth, rmsMapping, bestRef, bestNext, Arrays.toString(mLikelihoods)); getLocation(), mRefBase, readDepth, rmsMapping, bestRef, bestNext, Arrays.toString(mLikelihoods));
} }
/** /**

View File

@ -26,6 +26,7 @@ public class SingleSampleGenotyper extends LocusWalker<SSGGenotypeCall, Genotype
// Control output settings // Control output settings
@Argument(fullName = "variants_out", shortName = "varout", doc = "File to which variants should be written", required = true) @Argument(fullName = "variants_out", shortName = "varout", doc = "File to which variants should be written", required = true)
public File VARIANTS_FILE; public File VARIANTS_FILE;
@Argument(fullName = "variant_output_format", shortName = "vf", doc = "File to which metrics should be written", required = false) @Argument(fullName = "variant_output_format", shortName = "vf", doc = "File to which metrics should be written", required = false)
public GenotypeWriterFactory.GENOTYPE_FORMAT VAR_FORMAT = GenotypeWriterFactory.GENOTYPE_FORMAT.GELI; public GenotypeWriterFactory.GENOTYPE_FORMAT VAR_FORMAT = GenotypeWriterFactory.GENOTYPE_FORMAT.GELI;
@ -37,7 +38,7 @@ public class SingleSampleGenotyper extends LocusWalker<SSGGenotypeCall, Genotype
public boolean GENOTYPE = false; public boolean GENOTYPE = false;
@Argument(fullName = "3BaseErrors", shortName = "3BaseErrors", doc = "Should we use a 3-base error mode (so that P(b_true != b_called | e) == e / 3?", required = false) @Argument(fullName = "3BaseErrors", shortName = "3BaseErrors", doc = "Should we use a 3-base error mode (so that P(b_true != b_called | e) == e / 3?", required = false)
public boolean THREE_BASE_ERRORS = false; public boolean THREE_BASE_ERRORS = true;
public enum Caller { public enum Caller {
OLD_AND_BUSTED, OLD_AND_BUSTED,
@ -45,30 +46,24 @@ public class SingleSampleGenotyper extends LocusWalker<SSGGenotypeCall, Genotype
} }
@Argument(fullName = "caller", doc = "", required = false) @Argument(fullName = "caller", doc = "", required = false)
public Caller caller = Caller.OLD_AND_BUSTED; public Caller caller = Caller.NEW_HOTNESS;
// Control how the genotype hypotheses are weighed // Control how the genotype hypotheses are weighed
@Argument(fullName = "heterozygosity", shortName = "hets", doc = "Heterozygosity value used to compute prior likelihoods for any locus", required = false) public Double heterozygosity = GenotypeLikelihoods.HUMAN_HETEROZYGOSITY; @Argument(fullName = "heterozygosity", shortName = "hets", doc = "Heterozygosity value used to compute prior likelihoods for any locus", required = false)
@Argument(fullName = "priors_hapmap", shortName = "phapmap", doc = "Comma-separated prior likelihoods for Hapmap loci (homref,het,homvar)", required = false) public String PRIORS_HAPMAP = "0.999,1e-3,1e-5"; public Double heterozygosity = OldAndBustedGenotypeLikelihoods.HUMAN_HETEROZYGOSITY;
@Argument(fullName = "priors_dbsnp", shortName = "pdbsnp", doc = "Comma-separated prior likelihoods for dbSNP loci (homref,het,homvar)", required = false) public String PRIORS_DBSNP = "0.999,1e-3,1e-5"; //@Argument(fullName = "priors_dbsnp", shortName = "pdbsnp", doc = "Comma-separated prior likelihoods for dbSNP loci (homref,het,homvar)", required = false)
public String PRIORS_DBSNP = "0.999,1e-3,1e-5";
@Argument(fullName = "keepQ0Bases", shortName = "keepQ0Bases", doc = "If true, then Q0 bases will be included in the genotyping calculation, and treated as Q1 -- this is really not a good idea", required = false) @Argument(fullName = "keepQ0Bases", shortName = "keepQ0Bases", doc = "If true, then Q0 bases will be included in the genotyping calculation, and treated as Q1 -- this is really not a good idea", required = false)
public boolean keepQ0Bases = false; public boolean keepQ0Bases = false;
public double[] plocus; public double[] plocus;
public double[] phapmap; //public double[] phapmap;
public double[] pdbsnp; public double[] pdbsnp;
public long nFilteredQ0Bases = 0; public long nFilteredQ0Bases = 0;
/** private final static boolean TESTING_NEW = false;
* Specify that this walker requires reads.
*
* @return true
*/
public boolean requiresReads() {
return true;
}
/** /**
* Filter out loci to ignore (at an ambiguous base in the reference or a locus with zero coverage). * Filter out loci to ignore (at an ambiguous base in the reference or a locus with zero coverage).
@ -103,8 +98,8 @@ public class SingleSampleGenotyper extends LocusWalker<SSGGenotypeCall, Genotype
/** Initialize the walker with some sensible defaults */ /** Initialize the walker with some sensible defaults */
public void initialize() { public void initialize() {
plocus = GenotypeLikelihoods.computePriors(heterozygosity); plocus = OldAndBustedGenotypeLikelihoods.computePriors(heterozygosity);
phapmap = priorsArray(PRIORS_HAPMAP); //phapmap = priorsArray(PRIORS_HAPMAP);
pdbsnp = priorsArray(PRIORS_DBSNP); pdbsnp = priorsArray(PRIORS_DBSNP);
} }
@ -116,31 +111,32 @@ public class SingleSampleGenotyper extends LocusWalker<SSGGenotypeCall, Genotype
* @param context contextual information around the locus * @param context contextual information around the locus
*/ */
public SSGGenotypeCall map(RefMetaDataTracker tracker, char ref, LocusContext context) { public SSGGenotypeCall map(RefMetaDataTracker tracker, char ref, LocusContext context) {
SSGGenotypeCall oldAndBusted = mapOldAndBusted(tracker, ref, context); if ( TESTING_NEW ) {
SSGGenotypeCall newHotness = mapNewHotness(tracker, ref, context); SSGGenotypeCall oldAndBusted = mapOldAndBusted(tracker, ref, context);
SSGGenotypeCall newHotness = mapNewHotness(tracker, ref, context);
if ( ! oldAndBusted.equals(newHotness) || true ) { if ( ! oldAndBusted.equals(newHotness) ) {
System.out.printf("Calls not equal:%nold: %s%nnew: %s%n", oldAndBusted, newHotness); System.out.printf("Calls not equal:%nold: %s%nnew: %s%n", oldAndBusted, newHotness);
}
return caller == Caller.OLD_AND_BUSTED ? oldAndBusted : newHotness;
} else {
switch ( caller ) {
case OLD_AND_BUSTED: return mapOldAndBusted(tracker, ref, context);
case NEW_HOTNESS: return mapNewHotness(tracker, ref, context);
default: return null;
}
} }
return caller == Caller.OLD_AND_BUSTED ? oldAndBusted : newHotness;
} }
private SSGGenotypeCall mapNewHotness(RefMetaDataTracker tracker, char ref, LocusContext context) { private SSGGenotypeCall mapNewHotness(RefMetaDataTracker tracker, char ref, LocusContext context) {
ReadBackedPileup pileup = new ReadBackedPileup(ref, context); ReadBackedPileup pileup = new ReadBackedPileup(ref, context);
GenotypeLikelihoods G = makeGenotypeLikelihood(tracker); NewHotnessGenotypeLikelihoods G = makeNewHotnessGenotypeLikelihoods(tracker);
G.setDiscovery(GENOTYPE); // set it to discovery mode or variant detection mode G.setDiscovery(GENOTYPE); // set it to discovery mode or variant detection mode
SSGGenotypeCall geno = callGenotypes(G, tracker, ref, pileup); SSGGenotypeCall geno = callGenotypes(G, tracker, ref, pileup);
return geno; return geno;
} }
private SSGGenotypeCall mapOldAndBusted(RefMetaDataTracker tracker, char ref, LocusContext context) {
ReadBackedPileup pileup = new ReadBackedPileup(ref, context);
GenotypeLikelihoods G = makeGenotypeLikelihood(tracker);
G.setDiscovery(GENOTYPE); // set it to discovery mode or variant detection mode
SSGGenotypeCall geno = (SSGGenotypeCall)G.callGenotypes(tracker, ref, pileup);
return geno;
}
/** /**
* given all the data associated with a locus, make a genotypeLocus object containing the likelihoods and posterior probs * given all the data associated with a locus, make a genotypeLocus object containing the likelihoods and posterior probs
@ -152,8 +148,6 @@ public class SingleSampleGenotyper extends LocusWalker<SSGGenotypeCall, Genotype
* @return a GenotypeLocus, containing each of the genotypes and their associated likelihood and posterior prob values * @return a GenotypeLocus, containing each of the genotypes and their associated likelihood and posterior prob values
*/ */
public SSGGenotypeCall callGenotypes(GenotypeLikelihoods G, RefMetaDataTracker tracker, char ref, ReadBackedPileup pileup) { public SSGGenotypeCall callGenotypes(GenotypeLikelihoods G, RefMetaDataTracker tracker, char ref, ReadBackedPileup pileup) {
G.filterQ0Bases(!keepQ0Bases); // Set the filtering / keeping flag
// for calculating the rms of the mapping qualities // for calculating the rms of the mapping qualities
double squared = 0.0; double squared = 0.0;
for (int i = 0; i < pileup.getReads().size(); i++) { for (int i = 0; i < pileup.getReads().size(); i++) {
@ -174,31 +168,14 @@ public class SingleSampleGenotyper extends LocusWalker<SSGGenotypeCall, Genotype
// lets setup the locus // lets setup the locus
List<Genotype> lst = new ArrayList<Genotype>(); List<Genotype> lst = new ArrayList<Genotype>();
for (int x = 0; x < G.likelihoods.length; x++) { for (int x = 0; x < G.likelihoods.length; x++) {
lst.add(new BasicGenotype(pileup.getLocation(),GenotypeLikelihoods.genotypes[x],new BayesianConfidenceScore(G.likelihoods[x]))); lst.add(new BasicGenotype(pileup.getLocation(),OldAndBustedGenotypeLikelihoods.genotypes[x],new BayesianConfidenceScore(G.likelihoods[x])));
} }
return new SSGGenotypeCall(! GENOTYPE,ref,2,lst,G.likelihoods,pileup); return new SSGGenotypeCall(! GENOTYPE,ref,2,lst,G.likelihoods,pileup);
} }
/** private NewHotnessGenotypeLikelihoods makeNewHotnessGenotypeLikelihoods(RefMetaDataTracker tracker) {
* Calls the underlying, single locus genotype of the sample NewHotnessGenotypeLikelihoods G = new NewHotnessGenotypeLikelihoods(OldAndBustedGenotypeLikelihoods.HUMAN_HETEROZYGOSITY);
*
* @param tracker the meta data tracker
*
* @return the likelihoods per genotype
*/
private GenotypeLikelihoods makeGenotypeLikelihood(RefMetaDataTracker tracker) {
GenotypeLikelihoods G = null;
if (isHapmapSite(tracker)) {
G = new GenotypeLikelihoods(GenotypeLikelihoods.HUMAN_HETEROZYGOSITY);
} else if (isDbSNPSite(tracker)) {
G = new GenotypeLikelihoods(pdbsnp[0], pdbsnp[1], pdbsnp[2]);
} else {
G = new GenotypeLikelihoods(plocus[0], plocus[1], plocus[2]);
}
G.filterQ0Bases(! keepQ0Bases); G.filterQ0Bases(! keepQ0Bases);
return G; return G;
} }
@ -244,9 +221,9 @@ public class SingleSampleGenotyper extends LocusWalker<SSGGenotypeCall, Genotype
*/ */
public GenotypeWriter reduce(SSGGenotypeCall call, GenotypeWriter sum) { public GenotypeWriter reduce(SSGGenotypeCall call, GenotypeWriter sum) {
if (call != null && (GENOTYPE || call.isVariation())) { if (call != null && (GENOTYPE || call.isVariation())) {
System.out.printf("Score %s%n", call.getConfidenceScore()); //System.out.printf("Score %s%n", call.getConfidenceScore());
if (call.getConfidenceScore().getScore() > LOD_THRESHOLD) { if (call.getConfidenceScore().getScore() > LOD_THRESHOLD) {
System.out.printf("Call %s%n", call); //System.out.printf("Call %s%n", call);
sum.addGenotypeCall(call); sum.addGenotypeCall(call);
} }
} }
@ -258,6 +235,36 @@ public class SingleSampleGenotyper extends LocusWalker<SSGGenotypeCall, Genotype
logger.info(String.format("SingleSampleGenotyper filtered %d Q0 bases", nFilteredQ0Bases)); logger.info(String.format("SingleSampleGenotyper filtered %d Q0 bases", nFilteredQ0Bases));
sum.close(); sum.close();
} }
//
// TODO -- delete the old and busted routines
//
private OldAndBustedGenotypeLikelihoods makeOldAndBustedGenotypeLikelihoods(RefMetaDataTracker tracker) {
OldAndBustedGenotypeLikelihoods G = null;
if (isHapmapSite(tracker)) {
G = new OldAndBustedGenotypeLikelihoods(OldAndBustedGenotypeLikelihoods.HUMAN_HETEROZYGOSITY);
} else if (isDbSNPSite(tracker)) {
G = new OldAndBustedGenotypeLikelihoods(pdbsnp[0], pdbsnp[1], pdbsnp[2]);
} else {
G = new OldAndBustedGenotypeLikelihoods(plocus[0], plocus[1], plocus[2]);
}
G.filterQ0Bases(! keepQ0Bases);
return G;
}
//
// TODO -- delete the old and busted routines
//
private SSGGenotypeCall mapOldAndBusted(RefMetaDataTracker tracker, char ref, LocusContext context) {
ReadBackedPileup pileup = new ReadBackedPileup(ref, context);
OldAndBustedGenotypeLikelihoods G = makeOldAndBustedGenotypeLikelihoods(tracker);
G.setDiscovery(GENOTYPE); // set it to discovery mode or variant detection mode
SSGGenotypeCall geno = G.callGenotypes(tracker, ref, pileup);
return geno;
}
} }

View File

@ -1,124 +0,0 @@
package org.broadinstitute.sting.playground;
import net.sf.samtools.*;
import net.sf.samtools.SAMFileReader.ValidationStringency;
import net.sf.samtools.util.CloseableIterator;
import net.sf.picard.cmdline.CommandLineProgram;
import net.sf.picard.cmdline.Usage;
import net.sf.picard.cmdline.Option;
import java.io.*;
public class ValidateSAM extends CommandLineProgram {
// Usage and parameters
@Usage(programVersion="0.1") public String USAGE = "SAM Validator\n";
@Option(shortName="I", doc="SAM or BAM file for validation") public File INPUT_FILE;
@Option(shortName="M", doc="Maximum number of errors to detect before exiting", optional=true) public String MAX_ERRORS_ARG = "-1";
@Option(shortName="S", doc="How strict should we be with validation", optional=true) public String STRICTNESS_ARG = "strict";
@Option(shortName="SORTED", doc="Check that the SAM file is sorted", optional=true) public boolean CHECK_SORTED_ARG = false;
private long startTime = -1;
/** Required main method implementation. */
public static void main(String[] argv) {
System.exit(new ValidateSAM().instanceMain(argv));
}
public void printProgress( int nRecords, int nErrors ) {
final double elapsed = (System.currentTimeMillis() - startTime) / 1000.0;
final double secsPer1MReads = (elapsed * 1000000.0) / nRecords;
System.out.printf("Read %d records containing %d errors in %.2f secs (%.2f secs per 1M reads)%n", nRecords, nErrors, elapsed, secsPer1MReads);
}
protected int doWork() {
int MAX_ERRORS = -1; // Don't bail ever
if ( MAX_ERRORS_ARG != null ) {
MAX_ERRORS = Integer.parseInt(MAX_ERRORS_ARG);
}
// Start the timer
startTime = System.currentTimeMillis();
// Initialize the sam reader
CloseableIterator<SAMRecord> iter = null;
try {
final SAMFileReader samReader = getSamReader(INPUT_FILE);
iter = samReader.iterator();
} catch (Exception ioe) {
System.out.println("[VALIDATION FAILURE IN HEADER]: " + ioe);
ioe.printStackTrace();
return 1;
}
int nRecords = 0;
int nErrors = 0;
SAMRecord last_record = null;
while ( iter.hasNext() ) {
nRecords++;
try {
final SAMRecord ri = iter.next();
if (CHECK_SORTED_ARG &&
(last_record != null) &&
(ri.getReferenceName() == last_record.getReferenceName()) &&
(ri.getAlignmentStart() < last_record.getAlignmentStart()))
{
System.out.println("Not sorted!");
System.out.println(last_record.format());
System.out.println(ri.format() + "\n");
System.exit(-1);
}
last_record = ri;
} catch (Exception ioe) {
nErrors++;
System.out.println("[VALIDATION FAILURE IN RECORD]: " + ioe);
ioe.printStackTrace();
}
if ( MAX_ERRORS > -1 && nErrors >= MAX_ERRORS ) {
System.out.println("Maximum number of errors encountered " + nErrors);
break;
}
if ( nRecords % 100000 == 0 ) {
printProgress( nRecords, nErrors );
}
}
printProgress( nRecords, nErrors );
return 0;
}
private static void usage() {
System.err.println("USAGE: org.broadinstitute.sting.ValidateSAM <SAMFile|BAMFile>");
}
private SAMFileReader getSamReader(final File samFile) {
ValidationStringency strictness = SAMFileReader.ValidationStringency.STRICT;
if ( STRICTNESS_ARG == null ) {
strictness = SAMFileReader.ValidationStringency.STRICT;
}
else if ( STRICTNESS_ARG.toLowerCase().equals("lenient") ) {
strictness = SAMFileReader.ValidationStringency.LENIENT;
}
else if ( STRICTNESS_ARG.toLowerCase().equals("silent") ) {
strictness = SAMFileReader.ValidationStringency.SILENT;
}
else {
strictness = SAMFileReader.ValidationStringency.STRICT;
}
System.err.println("Strictness is " + strictness);
final SAMFileReader samReader = new SAMFileReader(samFile, true);
samReader.setValidationStringency(strictness);
return samReader;
}
}

View File

@ -12,7 +12,7 @@ import org.broadinstitute.sting.utils.Pair;
import org.broadinstitute.sting.utils.ReadBackedPileup; import org.broadinstitute.sting.utils.ReadBackedPileup;
import org.broadinstitute.sting.utils.genotype.Genotype; import org.broadinstitute.sting.utils.genotype.Genotype;
import org.broadinstitute.sting.gatk.walkers.*; import org.broadinstitute.sting.gatk.walkers.*;
import org.broadinstitute.sting.gatk.walkers.genotyper.GenotypeLikelihoods; import org.broadinstitute.sting.gatk.walkers.genotyper.OldAndBustedGenotypeLikelihoods;
import org.broadinstitute.sting.gatk.walkers.genotyper.SSGGenotypeCall; import org.broadinstitute.sting.gatk.walkers.genotyper.SSGGenotypeCall;
import java.io.*; import java.io.*;
@ -103,7 +103,7 @@ public class CallHLAWalker extends LocusWalker<Integer, Pair<Long, Long>>{
} }
out.printf("%sAs\t%sCs\t%sTs\t%sGs\t",numAs,numCs,numTs,numGs); out.printf("%sAs\t%sCs\t%sTs\t%sGs\t",numAs,numCs,numTs,numGs);
GenotypeLikelihoods G = new GenotypeLikelihoods(GenotypeLikelihoods.HUMAN_HETEROZYGOSITY); OldAndBustedGenotypeLikelihoods G = new OldAndBustedGenotypeLikelihoods(OldAndBustedGenotypeLikelihoods.HUMAN_HETEROZYGOSITY);
SSGGenotypeCall geno = G.callGenotypes(tracker, ref, pileup); SSGGenotypeCall geno = G.callGenotypes(tracker, ref, pileup);

View File

@ -8,7 +8,7 @@ import org.broadinstitute.sting.gatk.GenomeAnalysisEngine;
import org.broadinstitute.sting.gatk.LocusContext; import org.broadinstitute.sting.gatk.LocusContext;
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
import org.broadinstitute.sting.gatk.walkers.LocusWalker; import org.broadinstitute.sting.gatk.walkers.LocusWalker;
import org.broadinstitute.sting.gatk.walkers.genotyper.GenotypeLikelihoods; import org.broadinstitute.sting.gatk.walkers.genotyper.OldAndBustedGenotypeLikelihoods;
import org.broadinstitute.sting.playground.utils.*; import org.broadinstitute.sting.playground.utils.*;
import org.broadinstitute.sting.utils.*; import org.broadinstitute.sting.utils.*;
import org.broadinstitute.sting.utils.ReadBackedPileup; import org.broadinstitute.sting.utils.ReadBackedPileup;
@ -144,14 +144,14 @@ public class MultiSampleCaller extends LocusWalker<MultiSampleCaller.MultiSample
char ref; char ref;
GenotypeLikelihoods Genotype(LocusContext context, double[] allele_likelihoods, double indel_alt_freq) OldAndBustedGenotypeLikelihoods Genotype(LocusContext context, double[] allele_likelihoods, double indel_alt_freq)
{ {
ReadBackedPileup pileup = new ReadBackedPileup(ref, context); ReadBackedPileup pileup = new ReadBackedPileup(ref, context);
String bases = pileup.getBases(); String bases = pileup.getBases();
if (bases.length() == 0) if (bases.length() == 0)
{ {
GenotypeLikelihoods G = new GenotypeLikelihoods(GenotypeLikelihoods.HUMAN_HETEROZYGOSITY); OldAndBustedGenotypeLikelihoods G = new OldAndBustedGenotypeLikelihoods(OldAndBustedGenotypeLikelihoods.HUMAN_HETEROZYGOSITY);
return G; return G;
} }
@ -160,7 +160,7 @@ public class MultiSampleCaller extends LocusWalker<MultiSampleCaller.MultiSample
ref = Character.toUpperCase(ref); ref = Character.toUpperCase(ref);
// Handle single-base polymorphisms. // Handle single-base polymorphisms.
GenotypeLikelihoods G = new GenotypeLikelihoods(GenotypeLikelihoods.HUMAN_HETEROZYGOSITY); OldAndBustedGenotypeLikelihoods G = new OldAndBustedGenotypeLikelihoods(OldAndBustedGenotypeLikelihoods.HUMAN_HETEROZYGOSITY);
for ( int i = 0; i < reads.size(); i++ ) for ( int i = 0; i < reads.size(); i++ )
{ {
SAMRecord read = reads.get(i); SAMRecord read = reads.get(i);
@ -195,7 +195,7 @@ public class MultiSampleCaller extends LocusWalker<MultiSampleCaller.MultiSample
return G; return G;
} }
double[] CountFreqs(GenotypeLikelihoods[] genotype_likelihoods) double[] CountFreqs(OldAndBustedGenotypeLikelihoods[] genotype_likelihoods)
{ {
double[] allele_likelihoods = new double[4]; double[] allele_likelihoods = new double[4];
for (int x = 0; x < genotype_likelihoods.length; x++) for (int x = 0; x < genotype_likelihoods.length; x++)
@ -230,7 +230,7 @@ public class MultiSampleCaller extends LocusWalker<MultiSampleCaller.MultiSample
return allele_likelihoods; return allele_likelihoods;
} }
/* double CountIndelFreq(GenotypeLikelihoods[] genotype_likelihoods) /* double CountIndelFreq(OldAndBustedGenotypeLikelihoods[] genotype_likelihoods)
{ {
HashMap<String, Double> indel_allele_likelihoods = new HashMap<String, Double>(); HashMap<String, Double> indel_allele_likelihoods = new HashMap<String, Double>();
@ -260,7 +260,7 @@ public class MultiSampleCaller extends LocusWalker<MultiSampleCaller.MultiSample
}*/ }*/
// Potential precision error here. // Potential precision error here.
double Compute_pD(GenotypeLikelihoods[] genotype_likelihoods) double Compute_pD(OldAndBustedGenotypeLikelihoods[] genotype_likelihoods)
{ {
double pD = 0; double pD = 0;
for (int i = 0; i < sample_names.size(); i++) for (int i = 0; i < sample_names.size(); i++)
@ -280,7 +280,7 @@ public class MultiSampleCaller extends LocusWalker<MultiSampleCaller.MultiSample
double[] allele_likelihoods = new double[4]; double[] allele_likelihoods = new double[4];
for (int i = 0; i < 4; i++) { allele_likelihoods[i] = 1e-6/3.0; } for (int i = 0; i < 4; i++) { allele_likelihoods[i] = 1e-6/3.0; }
allele_likelihoods[BaseUtils.simpleBaseToBaseIndex(ref)] = 1.0-1e-6; allele_likelihoods[BaseUtils.simpleBaseToBaseIndex(ref)] = 1.0-1e-6;
GenotypeLikelihoods[] G = new GenotypeLikelihoods[sample_names.size()]; OldAndBustedGenotypeLikelihoods[] G = new OldAndBustedGenotypeLikelihoods[sample_names.size()];
for (int j = 0; j < sample_names.size(); j++) for (int j = 0; j < sample_names.size(); j++)
{ {
G[j] = Genotype(contexts[j], allele_likelihoods, 1e-6); G[j] = Genotype(contexts[j], allele_likelihoods, 1e-6);
@ -296,7 +296,7 @@ public class MultiSampleCaller extends LocusWalker<MultiSampleCaller.MultiSample
double LOD(LocusContext[] contexts) double LOD(LocusContext[] contexts)
{ {
em_result = EM(contexts); em_result = EM(contexts);
GenotypeLikelihoods[] G = em_result.genotype_likelihoods; OldAndBustedGenotypeLikelihoods[] G = em_result.genotype_likelihoods;
pD = Compute_pD(G); pD = Compute_pD(G);
pNull = Compute_pNull(contexts); pNull = Compute_pNull(contexts);
lod = pD - pNull; lod = pD - pNull;
@ -306,11 +306,11 @@ public class MultiSampleCaller extends LocusWalker<MultiSampleCaller.MultiSample
class EM_Result class EM_Result
{ {
String[] sample_names; String[] sample_names;
GenotypeLikelihoods[] genotype_likelihoods; OldAndBustedGenotypeLikelihoods[] genotype_likelihoods;
double[] allele_likelihoods; double[] allele_likelihoods;
int EM_N; int EM_N;
public EM_Result(List<String> sample_names, GenotypeLikelihoods[] genotype_likelihoods, double[] allele_likelihoods) public EM_Result(List<String> sample_names, OldAndBustedGenotypeLikelihoods[] genotype_likelihoods, double[] allele_likelihoods)
{ {
this.sample_names = new String[1]; this.sample_names = new String[1];
this.sample_names = sample_names.toArray(this.sample_names); this.sample_names = sample_names.toArray(this.sample_names);
@ -337,7 +337,7 @@ public class MultiSampleCaller extends LocusWalker<MultiSampleCaller.MultiSample
} }
double indel_alt_freq = 1e-4; double indel_alt_freq = 1e-4;
GenotypeLikelihoods[] G = new GenotypeLikelihoods[sample_names.size()]; OldAndBustedGenotypeLikelihoods[] G = new OldAndBustedGenotypeLikelihoods[sample_names.size()];
for (int i = 0; i < MAX_ITERATIONS; i++) for (int i = 0; i < MAX_ITERATIONS; i++)
{ {
for (int j = 0; j < sample_names.size(); j++) for (int j = 0; j < sample_names.size(); j++)
@ -387,9 +387,9 @@ public class MultiSampleCaller extends LocusWalker<MultiSampleCaller.MultiSample
return strand_score; return strand_score;
} }
GenotypeLikelihoods HardyWeinberg(double[] allele_likelihoods) OldAndBustedGenotypeLikelihoods HardyWeinberg(double[] allele_likelihoods)
{ {
GenotypeLikelihoods G = new GenotypeLikelihoods(GenotypeLikelihoods.HUMAN_HETEROZYGOSITY); OldAndBustedGenotypeLikelihoods G = new OldAndBustedGenotypeLikelihoods(OldAndBustedGenotypeLikelihoods.HUMAN_HETEROZYGOSITY);
int k = 0; int k = 0;
for (int i = 0; i < 4; i++) for (int i = 0; i < 4; i++)
{ {
@ -409,7 +409,7 @@ public class MultiSampleCaller extends LocusWalker<MultiSampleCaller.MultiSample
else { return BaseUtils.baseIndexToSimpleBase(perm[2]); } else { return BaseUtils.baseIndexToSimpleBase(perm[2]); }
} }
double Compute_discovery_lod(char ref, GenotypeLikelihoods[] genotype_likelihoods) double Compute_discovery_lod(char ref, OldAndBustedGenotypeLikelihoods[] genotype_likelihoods)
{ {
double pBest = 0; double pBest = 0;
double pRef = 0; double pRef = 0;
@ -427,7 +427,7 @@ public class MultiSampleCaller extends LocusWalker<MultiSampleCaller.MultiSample
return allele_likelihoods[BaseUtils.simpleBaseToBaseIndex(PickAlt(ref, allele_likelihoods))]; return allele_likelihoods[BaseUtils.simpleBaseToBaseIndex(PickAlt(ref, allele_likelihoods))];
} }
int Compute_n_ref(char ref, GenotypeLikelihoods[] genotype_likelihoods) int Compute_n_ref(char ref, OldAndBustedGenotypeLikelihoods[] genotype_likelihoods)
{ {
int n = 0; int n = 0;
for (int i = 0; i < genotype_likelihoods.length; i++) for (int i = 0; i < genotype_likelihoods.length; i++)
@ -439,7 +439,7 @@ public class MultiSampleCaller extends LocusWalker<MultiSampleCaller.MultiSample
return n; return n;
} }
int Compute_n_het(char ref, GenotypeLikelihoods[] genotype_likelihoods) int Compute_n_het(char ref, OldAndBustedGenotypeLikelihoods[] genotype_likelihoods)
{ {
int n = 0; int n = 0;
for (int i = 0; i < genotype_likelihoods.length; i++) for (int i = 0; i < genotype_likelihoods.length; i++)
@ -452,7 +452,7 @@ public class MultiSampleCaller extends LocusWalker<MultiSampleCaller.MultiSample
return n; return n;
} }
int Compute_n_hom(char ref, GenotypeLikelihoods[] genotype_likelihoods) int Compute_n_hom(char ref, OldAndBustedGenotypeLikelihoods[] genotype_likelihoods)
{ {
int n = 0; int n = 0;
for (int i = 0; i < genotype_likelihoods.length; i++) for (int i = 0; i < genotype_likelihoods.length; i++)
@ -474,7 +474,7 @@ public class MultiSampleCaller extends LocusWalker<MultiSampleCaller.MultiSample
double lod = LOD(contexts); double lod = LOD(contexts);
double strand_score = StrandScore(context); double strand_score = StrandScore(context);
//EM_Result em_result = EM(contexts); //EM_Result em_result = EM(contexts);
GenotypeLikelihoods population_genotype_likelihoods = HardyWeinberg(em_result.allele_likelihoods); OldAndBustedGenotypeLikelihoods population_genotype_likelihoods = HardyWeinberg(em_result.allele_likelihoods);
//double pD = Compute_pD(em_result.genotype_likelihoods); //double pD = Compute_pD(em_result.genotype_likelihoods);
//double pNull = Compute_pNull(contexts); //double pNull = Compute_pNull(contexts);

View File

@ -34,13 +34,13 @@ public class PopPriorWalker extends LocusWalker<Integer, Integer> {
return true; // We are keeping all the reads return true; // We are keeping all the reads
} }
protected class GenotypeLikelihoods protected class OldAndBustedGenotypeLikelihoods
{ {
public double[] likelihoods; public double[] likelihoods;
public String[] genotypes; public String[] genotypes;
GenotypeLikelihoods() OldAndBustedGenotypeLikelihoods()
{ {
likelihoods = new double[10]; likelihoods = new double[10];
genotypes = new String[10]; genotypes = new String[10];
@ -124,9 +124,9 @@ public class PopPriorWalker extends LocusWalker<Integer, Integer> {
int cCount = 0; int cCount = 0;
int gCount = 0; int gCount = 0;
int tCount = 0; int tCount = 0;
GenotypeLikelihoods glAll = new GenotypeLikelihoods(); OldAndBustedGenotypeLikelihoods glAll = new OldAndBustedGenotypeLikelihoods();
GenotypeLikelihoods glForward = new GenotypeLikelihoods(); OldAndBustedGenotypeLikelihoods glForward = new OldAndBustedGenotypeLikelihoods();
GenotypeLikelihoods glReverse = new GenotypeLikelihoods(); OldAndBustedGenotypeLikelihoods glReverse = new OldAndBustedGenotypeLikelihoods();
for ( int i = 0; i < reads.size(); i++ ) for ( int i = 0; i < reads.size(); i++ )
{ {
SAMRecord read = reads.get(i); SAMRecord read = reads.get(i);
@ -409,7 +409,7 @@ public class PopPriorWalker extends LocusWalker<Integer, Integer> {
return (gt.getAllele1() == refAllele)?gt.getAllele2():gt.getAllele1(); return (gt.getAllele1() == refAllele)?gt.getAllele2():gt.getAllele1();
} }
protected String dumpTheories(GenotypeLikelihoods gl) { protected String dumpTheories(OldAndBustedGenotypeLikelihoods gl) {
StringBuilder sb = new StringBuilder(); StringBuilder sb = new StringBuilder();
for (int i = gl.genotypes.length-1; i >= 0; i--) for (int i = gl.genotypes.length-1; i >= 0; i--)
{ {