Mark's new unified calculation model is now officially implemented.
Because it doesn't actually use EM, it's no longer a subclass of the EM model. Note that you can't use it just yet because it doesn't actually emit calls (just prints to logger). I need to deal with general UG output tomorrow. Hold off until then, Mark, and then you can go wild. git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@1891 348d0f76-0448-11de-a6fe-93d51630548a
This commit is contained in:
parent
caa3187af8
commit
9b9744109c
|
|
@ -1,12 +1,13 @@
|
||||||
package org.broadinstitute.sting.gatk.walkers.genotyper;
|
package org.broadinstitute.sting.gatk.walkers.genotyper;
|
||||||
|
|
||||||
import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
|
|
||||||
import org.broadinstitute.sting.utils.*;
|
import org.broadinstitute.sting.utils.*;
|
||||||
import org.broadinstitute.sting.utils.genotype.*;
|
import org.broadinstitute.sting.utils.genotype.*;
|
||||||
|
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
|
||||||
|
import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
|
||||||
|
|
||||||
import java.util.*;
|
import java.util.*;
|
||||||
|
|
||||||
public class AllMAFsGenotypeCalculationModel extends EMGenotypeCalculationModel {
|
public class AllMAFsGenotypeCalculationModel extends GenotypeCalculationModel {
|
||||||
|
|
||||||
protected AllMAFsGenotypeCalculationModel() {}
|
protected AllMAFsGenotypeCalculationModel() {}
|
||||||
|
|
||||||
|
|
@ -14,39 +15,78 @@ public class AllMAFsGenotypeCalculationModel extends EMGenotypeCalculationModel
|
||||||
// we cache the results to avoid having to recompute everything
|
// we cache the results to avoid having to recompute everything
|
||||||
private HashMap<Integer, double[]> nullAlleleFrequencyCache = new HashMap<Integer, double[]>();
|
private HashMap<Integer, double[]> nullAlleleFrequencyCache = new HashMap<Integer, double[]>();
|
||||||
|
|
||||||
// the allele frequencies
|
// because the Hardy-Weinberg values for a given frequency are constant,
|
||||||
private double[][] alleleFrequencies = new double[3][];
|
// we cache the results to avoid having to recompute everything
|
||||||
private double[][] oldAlleleFrequencies;
|
private HashMap<Double, double[]> hardyWeinbergValueCache = new HashMap<Double, double[]>();
|
||||||
|
|
||||||
// keep track of whether or not a given MAF is stable
|
// the allele frequency priors
|
||||||
private boolean[] frequencyStabilityArray = new boolean[3];
|
private double[] alleleFrequencyPriors;
|
||||||
|
|
||||||
|
// the allele frequency posteriors and P(f>0) mapped by alternate allele
|
||||||
|
private HashMap<Character, double[]> alleleFrequencyPosteriors = new HashMap<Character, double[]>();
|
||||||
|
private HashMap<Character, Double> PofFs = new HashMap<Character, Double>();
|
||||||
|
|
||||||
// the minimum and actual number of points in our allele frequency estimation
|
// the minimum and actual number of points in our allele frequency estimation
|
||||||
private static final int MIN_ESTIMATION_POINTS = 1000;
|
private static final int MIN_ESTIMATION_POINTS = 20; //1000;
|
||||||
private int estimationPoints;
|
private int frequencyEstimationPoints;
|
||||||
|
|
||||||
// the GenotypeLikelihoods map
|
// the GenotypeLikelihoods map
|
||||||
private HashMap<String, AlleleSpecificGenotypeLikelihoods> GLs = new HashMap<String, AlleleSpecificGenotypeLikelihoods>();
|
private HashMap<String, AlleleSpecificGenotypeLikelihoods> GLs = new HashMap<String, AlleleSpecificGenotypeLikelihoods>();
|
||||||
|
|
||||||
|
|
||||||
protected void initializeAlleleFrequencies(int numSamples, char ref) {
|
public Pair<List<GenotypeCall>, GenotypeMetaData> calculateGenotype(RefMetaDataTracker tracker, char ref, AlignmentContext context, DiploidGenotypePriors priors) {
|
||||||
// first, initialize the stability array to "unstable"
|
|
||||||
for (int i = 0; i < 3; i++)
|
// keep track of the context for each sample, overall and separated by strand
|
||||||
frequencyStabilityArray[i] = false;
|
HashMap<String, AlignmentContextBySample> contexts = splitContextBySample(context);
|
||||||
|
if ( contexts == null )
|
||||||
|
return null;
|
||||||
|
|
||||||
|
calculate(ref, contexts, priors, StratifiedContext.OVERALL);
|
||||||
|
|
||||||
|
|
||||||
|
//double lod = overall.getPofD() - overall.getPofNull();
|
||||||
|
//logger.debug("lod=" + lod);
|
||||||
|
|
||||||
|
// calculate strand score
|
||||||
|
//EMOutput forward = calculate(ref, contexts, priors, StratifiedContext.FORWARD);
|
||||||
|
//EMOutput reverse = calculate(ref, contexts, priors, StratifiedContext.REVERSE);
|
||||||
|
//double forwardLod = (forward.getPofD() + reverse.getPofNull()) - overall.getPofNull();
|
||||||
|
//double reverseLod = (reverse.getPofD() + forward.getPofNull()) - overall.getPofNull();
|
||||||
|
//logger.debug("forward lod=" + forwardLod + ", reverse lod=" + reverseLod);
|
||||||
|
//double strandScore = Math.max(forwardLod - lod, reverseLod - lod);
|
||||||
|
|
||||||
|
//logger.debug(String.format("LOD=%f, SLOD=%f", lod, strandScore));
|
||||||
|
|
||||||
|
// generate the calls
|
||||||
|
//GenotypeMetaData metadata = new GenotypeMetaData(lod, strandScore, overall.getMAF());
|
||||||
|
//return new Pair<List<GenotypeCall>, GenotypeMetaData>(genotypeCallsFromGenotypeLikelihoods(overall, ref, contexts), metadata);
|
||||||
|
return null;
|
||||||
|
}
|
||||||
|
|
||||||
|
private void calculate(char ref, HashMap<String, AlignmentContextBySample> contexts, DiploidGenotypePriors priors, StratifiedContext contextType) {
|
||||||
|
|
||||||
|
initializeAlleleFrequencies(contexts.size());
|
||||||
|
|
||||||
|
initializeGenotypeLikelihoods(ref, contexts, priors, contextType);
|
||||||
|
|
||||||
|
calculateAlleleFrequencyPosteriors(ref);
|
||||||
|
}
|
||||||
|
|
||||||
|
private void initializeAlleleFrequencies(int numSamples) {
|
||||||
|
|
||||||
// calculate the number of estimation points to use:
|
// calculate the number of estimation points to use:
|
||||||
// it's either MIN_ESTIMATION_POINTS or 2N if that's larger
|
// it's either MIN_ESTIMATION_POINTS or 2N if that's larger
|
||||||
// (add 1 for allele frequency of zero)
|
// (add 1 for allele frequency of zero)
|
||||||
estimationPoints = Math.max(MIN_ESTIMATION_POINTS, 2 * numSamples) + 1;
|
frequencyEstimationPoints = Math.max(MIN_ESTIMATION_POINTS, 2 * numSamples) + 1;
|
||||||
|
|
||||||
for (int alt = 0; alt < 3; alt++)
|
// set up the allele frequency priors
|
||||||
alleleFrequencies[alt] = getNullAlleleFrequencies(estimationPoints);
|
alleleFrequencyPriors = getNullalleleFrequencyPriors(frequencyEstimationPoints);
|
||||||
|
|
||||||
for (int i = 1; i < estimationPoints; i++)
|
for (int i = 1; i < frequencyEstimationPoints; i++)
|
||||||
logger.debug("Initial allele frequency for MAF=" + i + ": " + alleleFrequencies[0][i]);
|
logger.debug("Null allele frequency for MAF=" + i + ": " + alleleFrequencyPriors[i]);
|
||||||
}
|
}
|
||||||
|
|
||||||
private double[] getNullAlleleFrequencies(int N) {
|
private double[] getNullalleleFrequencyPriors(int N) {
|
||||||
double[] AFs = nullAlleleFrequencyCache.get(N);
|
double[] AFs = nullAlleleFrequencyCache.get(N);
|
||||||
|
|
||||||
// if it hasn't been calculated yet, do so now
|
// if it hasn't been calculated yet, do so now
|
||||||
|
|
@ -71,7 +111,7 @@ public class AllMAFsGenotypeCalculationModel extends EMGenotypeCalculationModel
|
||||||
return AFs.clone();
|
return AFs.clone();
|
||||||
}
|
}
|
||||||
|
|
||||||
protected void initializeGenotypeLikelihoods(char ref, HashMap<String, AlignmentContextBySample> contexts, DiploidGenotypePriors priors, StratifiedContext contextType) {
|
private void initializeGenotypeLikelihoods(char ref, HashMap<String, AlignmentContextBySample> contexts, DiploidGenotypePriors priors, StratifiedContext contextType) {
|
||||||
GLs.clear();
|
GLs.clear();
|
||||||
|
|
||||||
for ( String sample : contexts.keySet() ) {
|
for ( String sample : contexts.keySet() ) {
|
||||||
|
|
@ -87,76 +127,128 @@ public class AllMAFsGenotypeCalculationModel extends EMGenotypeCalculationModel
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
protected void calculateAlleleFrequencyPosteriors() {
|
private void calculateAlleleFrequencyPosteriors(char ref) {
|
||||||
|
|
||||||
}
|
for ( char altAllele : BaseUtils.BASES ) {
|
||||||
|
if ( altAllele == ref )
|
||||||
protected void applyAlleleFrequencyToGenotypeLikelihoods() {
|
|
||||||
|
|
||||||
}
|
|
||||||
|
|
||||||
protected boolean isStable() {
|
|
||||||
// We consider the EM stable when the MAF doesn't change more than EM_STABILITY_METRIC
|
|
||||||
// We compute this separately for all of the alternate alleles
|
|
||||||
for (int i = 0; i < 3; i++) {
|
|
||||||
// if we've already determined that a MAF is stable, don't recalculate
|
|
||||||
if ( frequencyStabilityArray[i] )
|
|
||||||
continue;
|
continue;
|
||||||
|
|
||||||
// determine change
|
double[] AFPosteriors = new double[frequencyEstimationPoints];
|
||||||
double AF_delta = 0.0;
|
|
||||||
for (int j = 1; j < estimationPoints; j++)
|
|
||||||
AF_delta += Math.abs(oldAlleleFrequencies[i][j] - alleleFrequencies[i][j]);
|
|
||||||
|
|
||||||
// if it's not stable, we're done
|
for ( AlleleSpecificGenotypeLikelihoods GL : GLs.values() ) {
|
||||||
if (AF_delta > EM_STABILITY_METRIC)
|
double[] PofDgivenG = GL.getNormalizedPosteriors(altAllele);
|
||||||
return false;
|
|
||||||
|
|
||||||
// it's stable, so record that fact in the stability array
|
// calculate the posterior weighted frequencies for this sample
|
||||||
frequencyStabilityArray[i] = true;
|
for (int i = 1; i < frequencyEstimationPoints; i++) {
|
||||||
|
double f = (double)i / (double)(frequencyEstimationPoints-1);
|
||||||
|
double[] hardyWeinberg = getHardyWeinbergValues(f);
|
||||||
|
|
||||||
|
double PofDgivenAFi = 0.0;
|
||||||
|
PofDgivenAFi += hardyWeinberg[GenotypeType.REF.ordinal()] * PofDgivenG[GenotypeType.REF.ordinal()];
|
||||||
|
PofDgivenAFi += hardyWeinberg[GenotypeType.HET.ordinal()] * PofDgivenG[GenotypeType.HET.ordinal()];
|
||||||
|
PofDgivenAFi += hardyWeinberg[GenotypeType.HOM.ordinal()] * PofDgivenG[GenotypeType.HOM.ordinal()];
|
||||||
|
|
||||||
|
AFPosteriors[i] += Math.log10(PofDgivenAFi);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
for (int i = 1; i < frequencyEstimationPoints; i++)
|
||||||
|
logger.debug("P(D|AF=" + i + ") for alt allele " + altAllele + ": " + AFPosteriors[i]);
|
||||||
|
|
||||||
|
// 1) multiply by null allele frequency priors to get AF posteriors
|
||||||
|
// 2) for precision purposes, we need to subtract the largest posterior value
|
||||||
|
// 3) convert back to normal space
|
||||||
|
double maxValue = findMaxEntry(AFPosteriors, true);
|
||||||
|
for (int i = 1; i < frequencyEstimationPoints; i++)
|
||||||
|
AFPosteriors[i] = Math.pow(10, alleleFrequencyPriors[i] + AFPosteriors[i] - maxValue);
|
||||||
|
|
||||||
|
// normalize
|
||||||
|
double sum = 0.0;
|
||||||
|
for (int i = 1; i < frequencyEstimationPoints; i++)
|
||||||
|
sum += AFPosteriors[i];
|
||||||
|
for (int i = 1; i < frequencyEstimationPoints; i++)
|
||||||
|
AFPosteriors[i] /= sum;
|
||||||
|
|
||||||
|
for (int i = 1; i < frequencyEstimationPoints; i++)
|
||||||
|
logger.debug("Allele frequency posterior estimate for alt allele " + altAllele + " MAF=" + i + ": " + AFPosteriors[i]);
|
||||||
|
logger.debug("P(f>0) for alt allele " + altAllele + ": " + sum);
|
||||||
|
|
||||||
|
alleleFrequencyPosteriors.put(altAllele, AFPosteriors);
|
||||||
|
PofFs.put(altAllele, sum);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
private double[] getHardyWeinbergValues(double f) {
|
||||||
|
double[] values = hardyWeinbergValueCache.get(f);
|
||||||
|
|
||||||
|
// if it hasn't been calculated yet, do so now
|
||||||
|
if ( values == null ) {
|
||||||
|
// p^2, 2pq, q^2
|
||||||
|
values = new double[] { Math.pow(1.0 - f, 2), 2.0 * (1.0 - f) * f, Math.pow(f, 2) };
|
||||||
|
hardyWeinbergValueCache.put(f, values);
|
||||||
}
|
}
|
||||||
|
|
||||||
// if we got here, then we're stable
|
return values;
|
||||||
return true;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
protected EMOutput computePofF(char ref) {
|
private static double findMaxEntry(double[] array, boolean skipZeroIndex) {
|
||||||
return null;
|
int index = skipZeroIndex ? 1 : 0;
|
||||||
|
double max = array[index++];
|
||||||
|
for (; index < array.length; index++) {
|
||||||
|
if ( array[index] > max )
|
||||||
|
max = array[index];
|
||||||
|
}
|
||||||
|
return max;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
private enum GenotypeType { REF, HET, HOM }
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* A class for the allele-specific genotype likelihoods
|
* A class for the allele-specific GL posteriors
|
||||||
*/
|
*/
|
||||||
protected class AlleleSpecificGenotypeLikelihoods {
|
private class AlleleSpecificGenotypeLikelihoods {
|
||||||
private HashMap<String, Pair<double[], double[]>> GLs = new HashMap<String, Pair<double[], double[]>>();
|
// mapping from alt allele base to posteriors
|
||||||
|
private HashMap<Character, double[]> alleleGLs = new HashMap<Character, double[]>();
|
||||||
|
|
||||||
AlleleSpecificGenotypeLikelihoods(char ref, GenotypeLikelihoods GL) {
|
AlleleSpecificGenotypeLikelihoods(char ref, GenotypeLikelihoods GL) {
|
||||||
double[] likelihoods = GL.getLikelihoods();
|
|
||||||
double[] posteriors = GL.getPosteriors();
|
double[] posteriors = GL.getPosteriors();
|
||||||
|
|
||||||
|
// get the ref data
|
||||||
|
|
||||||
// get the ref likelihood/posterior
|
|
||||||
DiploidGenotype refGenotype = DiploidGenotype.createHomGenotype(ref);
|
DiploidGenotype refGenotype = DiploidGenotype.createHomGenotype(ref);
|
||||||
double refLikelihood = GL.getLikelihood(refGenotype);
|
double refPosterior = posteriors[refGenotype.ordinal()];
|
||||||
double refPosterior = GL.getPosterior(refGenotype);
|
|
||||||
String refStr = String.valueOf(ref);
|
String refStr = String.valueOf(ref);
|
||||||
|
|
||||||
for ( char base : BaseUtils.BASES ) {
|
for ( char base : BaseUtils.BASES ) {
|
||||||
if ( base == ref )
|
if ( base == ref )
|
||||||
continue;
|
continue;
|
||||||
|
|
||||||
// get hom var and het likelihoods
|
// get hom var and het data
|
||||||
double homLikelihood = GL.getLikelihood(DiploidGenotype.createHomGenotype(base));
|
DiploidGenotype hetGenotype = ref < base ? DiploidGenotype.valueOf(refStr + String.valueOf(base)) : DiploidGenotype.valueOf(String.valueOf(base) + refStr);
|
||||||
double hetLikelihood = GL.getLikelihood(DiploidGenotype.valueOf(refStr + String.valueOf(base)));
|
DiploidGenotype homGenotype = DiploidGenotype.createHomGenotype(base);
|
||||||
|
double hetPosterior = posteriors[hetGenotype.ordinal()];
|
||||||
|
double homPosterior = posteriors[homGenotype.ordinal()];
|
||||||
|
|
||||||
|
// for precision purposes, we need to add (or really subtract, since everything is negative)
|
||||||
|
// the largest posterior value from all entries so that numbers don't get too small
|
||||||
|
double maxValue = Math.max(Math.max(refPosterior, hetPosterior), homPosterior);
|
||||||
|
double normalizedRefPosterior = Math.pow(10, refPosterior - maxValue);
|
||||||
|
double normalizedHetPosterior = Math.pow(10, hetPosterior - maxValue);
|
||||||
|
double normalizedHomPosterior = Math.pow(10, homPosterior - maxValue);
|
||||||
|
|
||||||
|
// normalize
|
||||||
|
double sum = normalizedRefPosterior + normalizedHetPosterior + normalizedHomPosterior;
|
||||||
|
normalizedRefPosterior /= sum;
|
||||||
|
normalizedHetPosterior /= sum;
|
||||||
|
normalizedHomPosterior /= sum;
|
||||||
|
|
||||||
|
double[] altPosteriors = new double[] { normalizedRefPosterior, normalizedHetPosterior, normalizedHomPosterior };
|
||||||
|
alleleGLs.put(base, altPosteriors);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
public double[] getNormalizedPosteriors(char altAllele) {
|
||||||
|
return alleleGLs.get(altAllele);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
@ -1,7 +1,5 @@
|
||||||
package org.broadinstitute.sting.gatk.walkers.genotyper;
|
package org.broadinstitute.sting.gatk.walkers.genotyper;
|
||||||
|
|
||||||
import net.sf.samtools.SAMRecord;
|
|
||||||
import net.sf.samtools.SAMReadGroupRecord;
|
|
||||||
import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
|
import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
|
||||||
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
|
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
|
||||||
import org.broadinstitute.sting.utils.*;
|
import org.broadinstitute.sting.utils.*;
|
||||||
|
|
@ -108,65 +106,10 @@ public abstract class EMGenotypeCalculationModel extends GenotypeCalculationMode
|
||||||
protected abstract boolean isStable();
|
protected abstract boolean isStable();
|
||||||
protected abstract EMOutput computePofF(char ref);
|
protected abstract EMOutput computePofF(char ref);
|
||||||
|
|
||||||
/**
|
|
||||||
* Create the mapping from sample to alignment context; also, fill in the base counts along the way.
|
|
||||||
* Returns null iff there are too many deletions at this position.
|
|
||||||
*/
|
|
||||||
protected HashMap<String, AlignmentContextBySample> splitContextBySample(AlignmentContext context) {
|
|
||||||
|
|
||||||
HashMap<String, AlignmentContextBySample> contexts = new HashMap<String, AlignmentContextBySample>();
|
|
||||||
|
|
||||||
int deletionsInPile = 0;
|
|
||||||
List<SAMRecord> reads = context.getReads();
|
|
||||||
List<Integer> offsets = context.getOffsets();
|
|
||||||
|
|
||||||
for (int i = 0; i < reads.size(); i++) {
|
|
||||||
|
|
||||||
// get the read and offset
|
|
||||||
SAMRecord read = reads.get(i);
|
|
||||||
int offset = offsets.get(i);
|
|
||||||
|
|
||||||
// find the sample; special case for pools
|
|
||||||
String sample;
|
|
||||||
if ( POOLED_INPUT ) {
|
|
||||||
sample = "POOL";
|
|
||||||
} else {
|
|
||||||
SAMReadGroupRecord readGroup = read.getReadGroup();
|
|
||||||
if ( readGroup == null ) {
|
|
||||||
if ( assumedSingleSample == null )
|
|
||||||
throw new StingException("Missing read group for read " + read.getReadName());
|
|
||||||
sample = assumedSingleSample;
|
|
||||||
} else {
|
|
||||||
sample = readGroup.getSample();
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// create a new context object if this is the first time we're seeing a read for this sample
|
|
||||||
AlignmentContextBySample myContext = contexts.get(sample);
|
|
||||||
if ( myContext == null ) {
|
|
||||||
myContext = new AlignmentContextBySample(context.getLocation());
|
|
||||||
contexts.put(sample, myContext);
|
|
||||||
}
|
|
||||||
|
|
||||||
// check for deletions
|
|
||||||
if ( offset == -1 ) {
|
|
||||||
// are there too many deletions in the pileup?
|
|
||||||
if ( ++deletionsInPile > maxDeletionsInPileup )
|
|
||||||
return null;
|
|
||||||
}
|
|
||||||
|
|
||||||
// add the read to this sample's context
|
|
||||||
// note that bad bases are added to the context (for DoC calculations later)
|
|
||||||
myContext.add(read, offset);
|
|
||||||
}
|
|
||||||
|
|
||||||
return contexts;
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* A class to keep track of the EM output
|
* A class to keep track of the EM output
|
||||||
*/
|
*/
|
||||||
protected class EMOutput {
|
protected class EMOutput {
|
||||||
private double pD, pNull, pF, MAF;
|
private double pD, pNull, pF, MAF;
|
||||||
private HashMap<String, GenotypeLikelihoods> GLs;
|
private HashMap<String, GenotypeLikelihoods> GLs;
|
||||||
|
|
@ -185,71 +128,4 @@ public abstract class EMGenotypeCalculationModel extends GenotypeCalculationMode
|
||||||
public double getMAF() { return MAF; }
|
public double getMAF() { return MAF; }
|
||||||
public HashMap<String, GenotypeLikelihoods> getGenotypeLikelihoods() { return GLs; }
|
public HashMap<String, GenotypeLikelihoods> getGenotypeLikelihoods() { return GLs; }
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
/**
|
|
||||||
* A class to keep track of the alignment context observed for a given sample.
|
|
||||||
* we currently store the overall context and strand-stratified sets,
|
|
||||||
* but any arbitrary stratification could be added.
|
|
||||||
*/
|
|
||||||
protected enum StratifiedContext { OVERALL, FORWARD, REVERSE }
|
|
||||||
protected class AlignmentContextBySample {
|
|
||||||
|
|
||||||
private AlignmentContext overall = null;
|
|
||||||
private AlignmentContext forward = null;
|
|
||||||
private AlignmentContext reverse = null;
|
|
||||||
private GenomeLoc loc;
|
|
||||||
|
|
||||||
private ArrayList<SAMRecord> allReads = new ArrayList<SAMRecord>();
|
|
||||||
private ArrayList<SAMRecord> forwardReads = new ArrayList<SAMRecord>();
|
|
||||||
private ArrayList<SAMRecord> reverseReads = new ArrayList<SAMRecord>();
|
|
||||||
|
|
||||||
private ArrayList<Integer> allOffsets = new ArrayList<Integer>();
|
|
||||||
private ArrayList<Integer> forwardOffsets = new ArrayList<Integer>();
|
|
||||||
private ArrayList<Integer> reverseOffsets = new ArrayList<Integer>();
|
|
||||||
|
|
||||||
|
|
||||||
AlignmentContextBySample(GenomeLoc loc) {
|
|
||||||
this.loc = loc;
|
|
||||||
}
|
|
||||||
|
|
||||||
public AlignmentContext getContext(StratifiedContext context) {
|
|
||||||
switch ( context ) {
|
|
||||||
case OVERALL: return getOverallContext();
|
|
||||||
case FORWARD: return getForwardContext();
|
|
||||||
case REVERSE: return getReverseContext();
|
|
||||||
}
|
|
||||||
return null;
|
|
||||||
}
|
|
||||||
|
|
||||||
private AlignmentContext getOverallContext() {
|
|
||||||
if ( overall == null )
|
|
||||||
overall = new AlignmentContext(loc, allReads, allOffsets);
|
|
||||||
return overall;
|
|
||||||
}
|
|
||||||
|
|
||||||
private AlignmentContext getForwardContext() {
|
|
||||||
if ( forward == null )
|
|
||||||
forward = new AlignmentContext(loc, forwardReads, forwardOffsets);
|
|
||||||
return forward;
|
|
||||||
}
|
|
||||||
|
|
||||||
private AlignmentContext getReverseContext() {
|
|
||||||
if ( reverse == null )
|
|
||||||
reverse = new AlignmentContext(loc, reverseReads, reverseOffsets);
|
|
||||||
return reverse;
|
|
||||||
}
|
|
||||||
|
|
||||||
public void add(SAMRecord read, int offset) {
|
|
||||||
if ( read.getReadNegativeStrandFlag() ) {
|
|
||||||
reverseReads.add(read);
|
|
||||||
reverseOffsets.add(offset);
|
|
||||||
} else {
|
|
||||||
forwardReads.add(read);
|
|
||||||
forwardOffsets.add(offset);
|
|
||||||
}
|
|
||||||
allReads.add(read);
|
|
||||||
allOffsets.add(offset);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
@ -4,10 +4,15 @@ import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
|
||||||
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
|
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
|
||||||
import org.broadinstitute.sting.utils.genotype.GenotypeMetaData;
|
import org.broadinstitute.sting.utils.genotype.GenotypeMetaData;
|
||||||
import org.broadinstitute.sting.utils.Pair;
|
import org.broadinstitute.sting.utils.Pair;
|
||||||
|
import org.broadinstitute.sting.utils.GenomeLoc;
|
||||||
|
import org.broadinstitute.sting.utils.StingException;
|
||||||
import org.apache.log4j.Logger;
|
import org.apache.log4j.Logger;
|
||||||
|
|
||||||
import java.util.*;
|
import java.util.*;
|
||||||
|
|
||||||
|
import net.sf.samtools.SAMRecord;
|
||||||
|
import net.sf.samtools.SAMReadGroupRecord;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* The model representing how we calculate a genotype given the priors and a pile
|
* The model representing how we calculate a genotype given the priors and a pile
|
||||||
* of bases and quality scores
|
* of bases and quality scores
|
||||||
|
|
@ -100,4 +105,129 @@ public abstract class GenotypeCalculationModel implements Cloneable {
|
||||||
char ref,
|
char ref,
|
||||||
AlignmentContext context,
|
AlignmentContext context,
|
||||||
DiploidGenotypePriors priors);
|
DiploidGenotypePriors priors);
|
||||||
|
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Create the mapping from sample to alignment contexts.
|
||||||
|
* @param context original alignment context
|
||||||
|
*
|
||||||
|
* @return stratified contexts, or null if there are too many deletions at this position
|
||||||
|
*/
|
||||||
|
protected HashMap<String, AlignmentContextBySample> splitContextBySample(AlignmentContext context) {
|
||||||
|
|
||||||
|
HashMap<String, AlignmentContextBySample> contexts = new HashMap<String, AlignmentContextBySample>();
|
||||||
|
|
||||||
|
int deletionsInPile = 0;
|
||||||
|
List<SAMRecord> reads = context.getReads();
|
||||||
|
List<Integer> offsets = context.getOffsets();
|
||||||
|
|
||||||
|
for (int i = 0; i < reads.size(); i++) {
|
||||||
|
|
||||||
|
// get the read and offset
|
||||||
|
SAMRecord read = reads.get(i);
|
||||||
|
int offset = offsets.get(i);
|
||||||
|
|
||||||
|
// find the sample; special case for pools
|
||||||
|
String sample;
|
||||||
|
if ( POOLED_INPUT ) {
|
||||||
|
sample = "POOL";
|
||||||
|
} else {
|
||||||
|
SAMReadGroupRecord readGroup = read.getReadGroup();
|
||||||
|
if ( readGroup == null ) {
|
||||||
|
if ( assumedSingleSample == null )
|
||||||
|
throw new StingException("Missing read group for read " + read.getReadName());
|
||||||
|
sample = assumedSingleSample;
|
||||||
|
} else {
|
||||||
|
sample = readGroup.getSample();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// create a new context object if this is the first time we're seeing a read for this sample
|
||||||
|
AlignmentContextBySample myContext = contexts.get(sample);
|
||||||
|
if ( myContext == null ) {
|
||||||
|
myContext = new AlignmentContextBySample(context.getLocation());
|
||||||
|
contexts.put(sample, myContext);
|
||||||
|
}
|
||||||
|
|
||||||
|
// check for deletions
|
||||||
|
if ( offset == -1 ) {
|
||||||
|
// are there too many deletions in the pileup?
|
||||||
|
if ( ++deletionsInPile > maxDeletionsInPileup )
|
||||||
|
return null;
|
||||||
|
}
|
||||||
|
|
||||||
|
// add the read to this sample's context
|
||||||
|
// note that bad bases are added to the context (for DoC calculations later)
|
||||||
|
myContext.add(read, offset);
|
||||||
|
}
|
||||||
|
|
||||||
|
return contexts;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
/**
|
||||||
|
* A class to keep track of the alignment context observed for a given sample.
|
||||||
|
* we currently store the overall context and strand-stratified sets,
|
||||||
|
* but any arbitrary stratification could be added.
|
||||||
|
*/
|
||||||
|
protected enum StratifiedContext { OVERALL, FORWARD, REVERSE }
|
||||||
|
protected class AlignmentContextBySample {
|
||||||
|
|
||||||
|
private AlignmentContext overall = null;
|
||||||
|
private AlignmentContext forward = null;
|
||||||
|
private AlignmentContext reverse = null;
|
||||||
|
private GenomeLoc loc;
|
||||||
|
|
||||||
|
private ArrayList<SAMRecord> allReads = new ArrayList<SAMRecord>();
|
||||||
|
private ArrayList<SAMRecord> forwardReads = new ArrayList<SAMRecord>();
|
||||||
|
private ArrayList<SAMRecord> reverseReads = new ArrayList<SAMRecord>();
|
||||||
|
|
||||||
|
private ArrayList<Integer> allOffsets = new ArrayList<Integer>();
|
||||||
|
private ArrayList<Integer> forwardOffsets = new ArrayList<Integer>();
|
||||||
|
private ArrayList<Integer> reverseOffsets = new ArrayList<Integer>();
|
||||||
|
|
||||||
|
|
||||||
|
AlignmentContextBySample(GenomeLoc loc) {
|
||||||
|
this.loc = loc;
|
||||||
|
}
|
||||||
|
|
||||||
|
public AlignmentContext getContext(StratifiedContext context) {
|
||||||
|
switch ( context ) {
|
||||||
|
case OVERALL: return getOverallContext();
|
||||||
|
case FORWARD: return getForwardContext();
|
||||||
|
case REVERSE: return getReverseContext();
|
||||||
|
}
|
||||||
|
return null;
|
||||||
|
}
|
||||||
|
|
||||||
|
private AlignmentContext getOverallContext() {
|
||||||
|
if ( overall == null )
|
||||||
|
overall = new AlignmentContext(loc, allReads, allOffsets);
|
||||||
|
return overall;
|
||||||
|
}
|
||||||
|
|
||||||
|
private AlignmentContext getForwardContext() {
|
||||||
|
if ( forward == null )
|
||||||
|
forward = new AlignmentContext(loc, forwardReads, forwardOffsets);
|
||||||
|
return forward;
|
||||||
|
}
|
||||||
|
|
||||||
|
private AlignmentContext getReverseContext() {
|
||||||
|
if ( reverse == null )
|
||||||
|
reverse = new AlignmentContext(loc, reverseReads, reverseOffsets);
|
||||||
|
return reverse;
|
||||||
|
}
|
||||||
|
|
||||||
|
public void add(SAMRecord read, int offset) {
|
||||||
|
if ( read.getReadNegativeStrandFlag() ) {
|
||||||
|
reverseReads.add(read);
|
||||||
|
reverseOffsets.add(offset);
|
||||||
|
} else {
|
||||||
|
forwardReads.add(read);
|
||||||
|
forwardOffsets.add(offset);
|
||||||
|
}
|
||||||
|
allReads.add(read);
|
||||||
|
allOffsets.add(offset);
|
||||||
|
}
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
@ -133,7 +133,7 @@ public class PointEstimateGenotypeCalculationModel extends EMGenotypeCalculation
|
||||||
}
|
}
|
||||||
|
|
||||||
// normalize
|
// normalize
|
||||||
double sum = 0;
|
double sum = 0.0;
|
||||||
for (int i = 0; i < 4; i++)
|
for (int i = 0; i < 4; i++)
|
||||||
sum += alleleFrequencies[i];
|
sum += alleleFrequencies[i];
|
||||||
for (int i = 0; i < 4; i++)
|
for (int i = 0; i < 4; i++)
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue