diff --git a/java/src/org/broadinstitute/sting/playground/gatk/walkers/genotyper/AlleleFrequencyCalculationModel.java b/java/src/org/broadinstitute/sting/playground/gatk/walkers/genotyper/AlleleFrequencyCalculationModel.java index 34a874313..8d9919c7c 100755 --- a/java/src/org/broadinstitute/sting/playground/gatk/walkers/genotyper/AlleleFrequencyCalculationModel.java +++ b/java/src/org/broadinstitute/sting/playground/gatk/walkers/genotyper/AlleleFrequencyCalculationModel.java @@ -26,11 +26,9 @@ package org.broadinstitute.sting.playground.gatk.walkers.genotyper; import org.apache.log4j.Logger; +import org.broad.tribble.util.variantcontext.VariantContext; import org.broadinstitute.sting.gatk.contexts.ReferenceContext; -import org.broadinstitute.sting.gatk.contexts.StratifiedAlignmentContext; import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; -import org.broadinstitute.sting.utils.pileup.ReadBackedPileup; -import org.broadinstitute.sting.utils.pileup.PileupElement; import org.broad.tribble.util.variantcontext.Genotype; import java.io.PrintStream; @@ -57,17 +55,6 @@ public abstract class AlleleFrequencyCalculationModel implements Cloneable { protected static final double VALUE_NOT_CALCULATED = -1.0 * Double.MAX_VALUE; - protected class CalculatedAlleleFrequency { - - public double log10PNonRef; - public int altAlleles; - - public CalculatedAlleleFrequency(double log10PNonRef, int altAlleles) { - this.log10PNonRef = log10PNonRef; - this.altAlleles = altAlleles; - } - } - protected AlleleFrequencyCalculationModel(int N, Logger logger, PrintStream verboseWriter) { this.N = N; this.logger = logger; @@ -84,31 +71,19 @@ public abstract class AlleleFrequencyCalculationModel implements Cloneable { */ protected abstract void getLog10PNonRef(RefMetaDataTracker tracker, ReferenceContext ref, - Map GLs, + Map GLs, double[] log10AlleleFrequencyPriors, double[] log10AlleleFrequencyPosteriors); /** * Can be overridden by concrete subclasses - * @param contexts alignment contexts - * @param GLs genotype likelihoods + * @param vc variant context with genotype likelihoods * @param log10AlleleFrequencyPosteriors allele frequency results * @param AFofMaxLikelihood allele frequency of max likelihood * * @return calls */ - protected abstract Map assignGenotypes(Map contexts, - Map GLs, + protected abstract Map assignGenotypes(VariantContext vc, double[] log10AlleleFrequencyPosteriors, int AFofMaxLikelihood); - - protected int getFilteredDepth(ReadBackedPileup pileup) { - int count = 0; - for ( PileupElement p : pileup ) { - if ( DiploidSNPGenotypeLikelihoods.usableBase(p, true) ) - count++; - } - - return count; - } } \ No newline at end of file diff --git a/java/src/org/broadinstitute/sting/playground/gatk/walkers/genotyper/BiallelicGenotypeLikelihoods.java b/java/src/org/broadinstitute/sting/playground/gatk/walkers/genotyper/BiallelicGenotypeLikelihoods.java index 448c1b04e..73d3b6ee0 100644 --- a/java/src/org/broadinstitute/sting/playground/gatk/walkers/genotyper/BiallelicGenotypeLikelihoods.java +++ b/java/src/org/broadinstitute/sting/playground/gatk/walkers/genotyper/BiallelicGenotypeLikelihoods.java @@ -31,8 +31,8 @@ public class BiallelicGenotypeLikelihoods { private String sample; private double[] GLs; - private double[] GPs; private Allele A, B; + private int depth; /** * Create a new object for sample with given alleles and genotype likelihoods @@ -43,32 +43,7 @@ public class BiallelicGenotypeLikelihoods { * @param log10AALikelihoods AA likelihoods * @param log10ABLikelihoods AB likelihoods * @param log10BBLikelihoods BB likelihoods - */ - public BiallelicGenotypeLikelihoods(String sample, - Allele A, - Allele B, - double log10AALikelihoods, - double log10ABLikelihoods, - double log10BBLikelihoods) { - this.sample = sample; - this.A = A; - this.B = B; - this.GLs = new double[]{log10AALikelihoods, log10ABLikelihoods, log10BBLikelihoods}; - this.GPs = new double[]{log10AALikelihoods, log10ABLikelihoods, log10BBLikelihoods}; - } - - /** - * Create a new object for sample with given alleles and genotype likelihoods & posteriors - * - * @param sample sample name - * @param A allele A - * @param B allele B - * @param log10AALikelihoods AA likelihoods - * @param log10ABLikelihoods AB likelihoods - * @param log10BBLikelihoods BB likelihoods - * @param log10AAPosteriors AA posteriors - * @param log10ABPosteriors AB posteriors - * @param log10BBPosteriors BB posteriors + * @param depth the read depth used in creating the likelihoods */ public BiallelicGenotypeLikelihoods(String sample, Allele A, @@ -76,14 +51,12 @@ public class BiallelicGenotypeLikelihoods { double log10AALikelihoods, double log10ABLikelihoods, double log10BBLikelihoods, - double log10AAPosteriors, - double log10ABPosteriors, - double log10BBPosteriors) { + int depth) { this.sample = sample; this.A = A; this.B = B; this.GLs = new double[]{log10AALikelihoods, log10ABLikelihoods, log10BBLikelihoods}; - this.GPs = new double[]{log10AAPosteriors, log10ABPosteriors, log10BBPosteriors}; + this.depth = depth; } public String getSample() { @@ -106,22 +79,6 @@ public class BiallelicGenotypeLikelihoods { return GLs; } - public double getAAPosteriors() { - return GPs[0]; - } - - public double getABPosteriors() { - return GPs[1]; - } - - public double getBBPosteriors() { - return GPs[2]; - } - - public double[] getPosteriors() { - return GPs; - } - public Allele getAlleleA() { return A; } @@ -129,4 +86,9 @@ public class BiallelicGenotypeLikelihoods { public Allele getAlleleB() { return B; } + + public int getDepth() { + return depth; + } } + diff --git a/java/src/org/broadinstitute/sting/playground/gatk/walkers/genotyper/DindelGenotypeLikelihoodsCalculationModel.java b/java/src/org/broadinstitute/sting/playground/gatk/walkers/genotyper/DindelGenotypeLikelihoodsCalculationModel.java index dbaf3beab..90f38aaad 100755 --- a/java/src/org/broadinstitute/sting/playground/gatk/walkers/genotyper/DindelGenotypeLikelihoodsCalculationModel.java +++ b/java/src/org/broadinstitute/sting/playground/gatk/walkers/genotyper/DindelGenotypeLikelihoodsCalculationModel.java @@ -29,11 +29,9 @@ import org.apache.log4j.Logger; import org.broad.tribble.util.variantcontext.VariantContext; import org.broadinstitute.sting.gatk.contexts.AlignmentContext; import org.broadinstitute.sting.gatk.walkers.indels.HaplotypeIndelErrorModel; -import org.broadinstitute.sting.utils.*; import org.broadinstitute.sting.utils.exceptions.StingException; import org.broadinstitute.sting.utils.genotype.Haplotype; import org.broadinstitute.sting.utils.pileup.ReadBackedPileup; -import org.broadinstitute.sting.utils.pileup.PileupElement; import org.broadinstitute.sting.gatk.contexts.StratifiedAlignmentContext; import org.broadinstitute.sting.gatk.contexts.ReferenceContext; import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; @@ -146,11 +144,14 @@ public class DindelGenotypeLikelihoodsCalculationModel extends GenotypeLikelihoo if (Double.isInfinite(genotypeLikelihoods[k])) genotypeLikelihoods[k] = MINUS_LOG_INFINITY; } - GLs.put(sample.getKey(), new BiallelicGenotypeLikelihoods(sample.getKey(),vc.getReference(), + GLs.put(sample.getKey(), new BiallelicGenotypeLikelihoods(sample.getKey(), + vc.getReference(), vc.getAlternateAllele(0), - genotypeLikelihoods[0],genotypeLikelihoods[1], genotypeLikelihoods[2])); + genotypeLikelihoods[0], + genotypeLikelihoods[1], + genotypeLikelihoods[2], + getFilteredDepth(pileup))); //System.out.format("%4.2f %4.2f %4.2f\n",genotypeLikelihoods[0],genotypeLikelihoods[1], genotypeLikelihoods[2]); - } } diff --git a/java/src/org/broadinstitute/sting/playground/gatk/walkers/genotyper/ExactAFCalculationModel.java b/java/src/org/broadinstitute/sting/playground/gatk/walkers/genotyper/ExactAFCalculationModel.java index 20d4c3378..22e5bd78a 100755 --- a/java/src/org/broadinstitute/sting/playground/gatk/walkers/genotyper/ExactAFCalculationModel.java +++ b/java/src/org/broadinstitute/sting/playground/gatk/walkers/genotyper/ExactAFCalculationModel.java @@ -29,12 +29,12 @@ import org.apache.log4j.Logger; import org.broad.tribble.util.variantcontext.Allele; import org.broad.tribble.util.variantcontext.Genotype; import org.broad.tribble.util.variantcontext.GenotypeLikelihoods; +import org.broad.tribble.util.variantcontext.VariantContext; import org.broad.tribble.vcf.VCFConstants; -import org.broadinstitute.sting.gatk.contexts.AlignmentContext; -import org.broadinstitute.sting.gatk.contexts.StratifiedAlignmentContext; import org.broadinstitute.sting.utils.*; import org.broadinstitute.sting.gatk.contexts.ReferenceContext; import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; +import org.broadinstitute.sting.utils.exceptions.UserException; import java.util.*; import java.io.PrintStream; @@ -73,7 +73,7 @@ public class ExactAFCalculationModel extends AlleleFrequencyCalculationModel { public void getLog10PNonRef(RefMetaDataTracker tracker, ReferenceContext ref, - Map GLs, + Map GLs, double[] log10AlleleFrequencyPriors, double[] log10AlleleFrequencyPosteriors) { @@ -108,11 +108,14 @@ public class ExactAFCalculationModel extends AlleleFrequencyCalculationModel { } */ - for ( String sample : GLs.keySet() ) { + for ( Map.Entry sample : GLs.entrySet() ) { j++; + if ( !sample.getValue().hasLikelihoods() ) + continue; + //double[] genotypeLikelihoods = MathUtils.normalizeFromLog10(GLs.get(sample).getLikelihoods()); - double[] genotypeLikelihoods = GLs.get(sample).getLikelihoods(); + double[] genotypeLikelihoods = sample.getValue().getLikelihoods().getAsVector(); //double logDenominator = Math.log10(2.0*j*(2.0*j-1)); double logDenominator = log10Cache[2*j] + log10Cache[2*j-1]; @@ -120,8 +123,7 @@ public class ExactAFCalculationModel extends AlleleFrequencyCalculationModel { //YMatrix[j][0] = YMatrix[j-1][0]*genotypeLikelihoods[GenotypeType.AA.ordinal()]; logYMatrix[j][0] = logYMatrix[j-1][0] + genotypeLikelihoods[GenotypeType.AA.ordinal()]; - int k = 1; - for (k=1; k <= 2*j; k++ ) { + for (int k=1; k <= 2*j; k++ ) { // if (k > 3 && isClearRefSite) // break; @@ -211,20 +213,22 @@ public class ExactAFCalculationModel extends AlleleFrequencyCalculationModel { /** * Can be overridden by concrete subclasses - * @param contexts alignment contexts - * @param GLs genotype likelihoods + * @param vc variant context with genotype likelihoods * @param log10AlleleFrequencyPosteriors allele frequency results * @param AFofMaxLikelihood allele frequency of max likelihood * * @return calls */ - public Map assignGenotypes(Map contexts, - Map GLs, + public Map assignGenotypes(VariantContext vc, double[] log10AlleleFrequencyPosteriors, int AFofMaxLikelihood) { - HashMap calls = new HashMap(); + if ( !vc.isVariant() ) + throw new UserException("The VCF record passed in does not contain an ALT allele at " + vc.getChr() + ":" + vc.getStart()); + Allele refAllele = vc.getReference(); + Allele altAllele = vc.getAlternateAllele(0); + Map GLs = vc.getGenotypes(); double[][] pathMetricArray = new double[GLs.size()+1][AFofMaxLikelihood+1]; int[][] tracebackArray = new int[GLs.size()+1][AFofMaxLikelihood+1]; @@ -244,10 +248,12 @@ public class ExactAFCalculationModel extends AlleleFrequencyCalculationModel { } else { - for (String sample: GLs.keySet()) { - sampleIndices.add(sample); + for ( Map.Entry sample : GLs.entrySet() ) { + if ( !sample.getValue().hasLikelihoods() ) + continue; - double[] likelihoods = GLs.get(sample).getLikelihoods(); + double[] likelihoods = sample.getValue().getLikelihoods().getAsVector(); + sampleIndices.add(sample.getKey()); for (int k=0; k <= AFofMaxLikelihood; k++) { @@ -277,61 +283,56 @@ public class ExactAFCalculationModel extends AlleleFrequencyCalculationModel { } } + HashMap calls = new HashMap(); + int startIdx = AFofMaxLikelihood; for (int k = sampleIdx; k > 0; k--) { int bestGTguess; String sample = sampleIndices.get(k-1); - BiallelicGenotypeLikelihoods GL = GLs.get(sample); - Allele alleleA = GL.getAlleleA(); - Allele alleleB = GL.getAlleleB(); + Genotype g = GLs.get(sample); + if ( !g.hasLikelihoods() ) + continue; if (SIMPLE_GREEDY_GENOTYPER) - bestGTguess = Utils.findIndexOfMaxEntry(GLs.get(sample).getLikelihoods()); + bestGTguess = Utils.findIndexOfMaxEntry(g.getLikelihoods().getAsVector()); else { int newIdx = tracebackArray[k][startIdx]; bestGTguess = startIdx - newIdx; startIdx = newIdx; } - HashMap attributes = new HashMap(); ArrayList myAlleles = new ArrayList(); - AlignmentContext context = contexts.get(sample).getContext(StratifiedAlignmentContext.StratifiedContextType.COMPLETE); - - if (context.hasBasePileup()) - attributes.put(VCFConstants.DEPTH_KEY, getFilteredDepth(context.getBasePileup())); - - else if (context.hasExtendedEventPileup()) - attributes.put(VCFConstants.DEPTH_KEY, getFilteredDepth(context.getExtendedEventPileup())); double qual; - double[] posteriors = GLs.get(sample).getPosteriors(); + double[] likelihoods = g.getLikelihoods().getAsVector(); if (bestGTguess == 0) { - myAlleles.add(alleleA); - myAlleles.add(alleleA); - qual = posteriors[0] - Math.max(posteriors[1],posteriors[2]); + myAlleles.add(refAllele); + myAlleles.add(refAllele); + qual = likelihoods[0] - Math.max(likelihoods[1], likelihoods[2]); } else if(bestGTguess == 1) { - myAlleles.add(alleleA); - myAlleles.add(alleleB); - qual = posteriors[1] - Math.max(posteriors[0],posteriors[2]); + myAlleles.add(refAllele); + myAlleles.add(altAllele); + qual = likelihoods[1] - Math.max(likelihoods[0], likelihoods[2]); } else { - myAlleles.add(alleleB); - myAlleles.add(alleleB); - qual = posteriors[2] - Math.max(posteriors[1],posteriors[0]); + myAlleles.add(altAllele); + myAlleles.add(altAllele); + qual = likelihoods[2] - Math.max(likelihoods[1], likelihoods[0]); } if (qual < 0) { // QUAL can be negative if the chosen genotype is not the most likely one individually. // In this case, we compute the actual genotype probability and QUAL is the likelihood of it not being the chosen on - double[] normalized = MathUtils.normalizeFromLog10(posteriors); + double[] normalized = MathUtils.normalizeFromLog10(likelihoods); double chosenGenotype = normalized[bestGTguess]; qual = -1.0 * Math.log10(1.0 - chosenGenotype); } - GenotypeLikelihoods likelihoods = new GenotypeLikelihoods(GL.getLikelihoods(), UnifiedGenotyperV2.DEFAULT_GENOTYPE_LIKELIHOODS_KEY); - attributes.put(likelihoods.getKey(), likelihoods.getAsString()); + HashMap attributes = new HashMap(g.getAttributes()); + attributes.remove(VCFConstants.GENOTYPE_LIKELIHOODS_KEY); + attributes.put(VCFConstants.PHRED_GENOTYPE_LIKELIHOODS_KEY, GenotypeLikelihoods.GLsToPLs(likelihoods)); calls.put(sample, new Genotype(sample, myAlleles, qual, null, attributes, false)); diff --git a/java/src/org/broadinstitute/sting/playground/gatk/walkers/genotyper/GenotypeLikelihoodsCalculationModel.java b/java/src/org/broadinstitute/sting/playground/gatk/walkers/genotyper/GenotypeLikelihoodsCalculationModel.java index 5da002d59..fac57a2f6 100755 --- a/java/src/org/broadinstitute/sting/playground/gatk/walkers/genotyper/GenotypeLikelihoodsCalculationModel.java +++ b/java/src/org/broadinstitute/sting/playground/gatk/walkers/genotyper/GenotypeLikelihoodsCalculationModel.java @@ -30,6 +30,8 @@ import org.broadinstitute.sting.gatk.contexts.StratifiedAlignmentContext; import org.broadinstitute.sting.gatk.contexts.ReferenceContext; import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; import org.broad.tribble.util.variantcontext.Allele; +import org.broadinstitute.sting.utils.pileup.PileupElement; +import org.broadinstitute.sting.utils.pileup.ReadBackedPileup; import java.util.Map; @@ -74,4 +76,14 @@ public abstract class GenotypeLikelihoodsCalculationModel implements Cloneable { StratifiedAlignmentContext.StratifiedContextType contextType, GenotypePriors priors, Map GLs); + + protected int getFilteredDepth(ReadBackedPileup pileup) { + int count = 0; + for ( PileupElement p : pileup ) { + if ( DiploidSNPGenotypeLikelihoods.usableBase(p, true) ) + count++; + } + + return count; + } } \ No newline at end of file diff --git a/java/src/org/broadinstitute/sting/playground/gatk/walkers/genotyper/GridSearchAFEstimation.java b/java/src/org/broadinstitute/sting/playground/gatk/walkers/genotyper/GridSearchAFEstimation.java index 23f52471d..642ca1106 100755 --- a/java/src/org/broadinstitute/sting/playground/gatk/walkers/genotyper/GridSearchAFEstimation.java +++ b/java/src/org/broadinstitute/sting/playground/gatk/walkers/genotyper/GridSearchAFEstimation.java @@ -29,13 +29,14 @@ import org.apache.log4j.Logger; import org.broad.tribble.util.variantcontext.Genotype; import org.broad.tribble.util.variantcontext.Allele; import org.broad.tribble.util.variantcontext.GenotypeLikelihoods; +import org.broad.tribble.util.variantcontext.VariantContext; import org.broad.tribble.vcf.VCFConstants; -import org.broadinstitute.sting.gatk.contexts.StratifiedAlignmentContext; import org.broadinstitute.sting.gatk.contexts.ReferenceContext; import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; import org.broadinstitute.sting.utils.collections.Pair; import org.broadinstitute.sting.utils.exceptions.ReviewedStingException; import org.broadinstitute.sting.utils.MathUtils; +import org.broadinstitute.sting.utils.exceptions.UserException; import java.util.*; import java.io.PrintStream; @@ -55,7 +56,7 @@ public class GridSearchAFEstimation extends AlleleFrequencyCalculationModel { protected void getLog10PNonRef(RefMetaDataTracker tracker, ReferenceContext ref, - Map GLs, + Map GLs, double[] log10AlleleFrequencyPriors, double[] log10AlleleFrequencyPosteriors) { initializeAFMatrix(GLs); @@ -86,43 +87,43 @@ public class GridSearchAFEstimation extends AlleleFrequencyCalculationModel { /** * Overrides the super class - * @param contexts alignment contexts - * @param GLs genotype likelihoods + * @param vc variant context with genotype likelihoods * @param log10AlleleFrequencyPosteriors allele frequency results + * @param AFofMaxLikelihood allele frequency of max likelihood * * @return calls */ - protected Map assignGenotypes(Map contexts, - Map GLs, + protected Map assignGenotypes(VariantContext vc, double[] log10AlleleFrequencyPosteriors, int AFofMaxLikelihood) { + if ( !vc.isVariant() ) + throw new UserException("The VCF record passed in does not contain an ALT allele at " + vc.getChr() + ":" + vc.getStart()); + + Allele refAllele = vc.getReference(); + Allele altAllele = vc.getAlternateAllele(0); HashMap calls = new HashMap(); // first, the potential alt calls for ( String sample : AFMatrix.getSamples() ) { - BiallelicGenotypeLikelihoods GL = GLs.get(sample); - Allele alleleA = GL.getAlleleA(); - Allele alleleB = GL.getAlleleB(); + Genotype g = vc.getGenotype(sample); // set the genotype and confidence Pair AFbasedGenotype = AFMatrix.getGenotype(AFofMaxLikelihood, sample); ArrayList myAlleles = new ArrayList(); if ( AFbasedGenotype.first == GenotypeType.AA.ordinal() ) { - myAlleles.add(alleleA); - myAlleles.add(alleleA); + myAlleles.add(refAllele); + myAlleles.add(refAllele); } else if ( AFbasedGenotype.first == GenotypeType.AB.ordinal() ) { - myAlleles.add(alleleA); - myAlleles.add(alleleB); + myAlleles.add(refAllele); + myAlleles.add(altAllele); } else { // ( AFbasedGenotype.first == GenotypeType.BB.ordinal() ) - myAlleles.add(alleleB); - myAlleles.add(alleleB); + myAlleles.add(altAllele); + myAlleles.add(altAllele); } - HashMap attributes = new HashMap(); - attributes.put(VCFConstants.DEPTH_KEY, getFilteredDepth(contexts.get(sample).getContext(StratifiedAlignmentContext.StratifiedContextType.COMPLETE).getBasePileup())); - - GenotypeLikelihoods likelihoods = new GenotypeLikelihoods(GL.getLikelihoods(), UnifiedGenotyperV2.DEFAULT_GENOTYPE_LIKELIHOODS_KEY); - attributes.put(likelihoods.getKey(), likelihoods.getAsString()); + HashMap attributes = new HashMap(g.getAttributes()); + attributes.remove(VCFConstants.GENOTYPE_LIKELIHOODS_KEY); + attributes.put(VCFConstants.PHRED_GENOTYPE_LIKELIHOODS_KEY, GenotypeLikelihoods.GLsToPLs(g.getLikelihoods().getAsVector())); calls.put(sample, new Genotype(sample, myAlleles, AFbasedGenotype.second, null, attributes, false)); } @@ -130,11 +131,13 @@ public class GridSearchAFEstimation extends AlleleFrequencyCalculationModel { return calls; } - private void initializeAFMatrix(Map GLs) { + private void initializeAFMatrix(Map GLs) { AFMatrix.clear(); - for ( BiallelicGenotypeLikelihoods GL : GLs.values() ) - AFMatrix.setLikelihoods(GL.getPosteriors(), GL.getSample()); + for ( Genotype g : GLs.values() ) { + if ( g.hasLikelihoods() ) + AFMatrix.setLikelihoods(g.getLikelihoods().getAsVector(), g.getSampleName()); + } } protected static class AlleleFrequencyMatrix { @@ -165,14 +168,6 @@ public class GridSearchAFEstimation extends AlleleFrequencyCalculationModel { samplesToGenotypesPerAF.clear(); } - public void setLikelihoods(double AA, double AB, double BB, String sample) { - int index = samples.size(); - samples.add(sample); - matrix[index][GenotypeType.AA.ordinal()] = AA; - matrix[index][GenotypeType.AB.ordinal()] = AB; - matrix[index][GenotypeType.BB.ordinal()] = BB; - } - public void setLikelihoods(double[] GLs, String sample) { int index = samples.size(); samples.add(sample); diff --git a/java/src/org/broadinstitute/sting/playground/gatk/walkers/genotyper/SNPGenotypeLikelihoodsCalculationModel.java b/java/src/org/broadinstitute/sting/playground/gatk/walkers/genotyper/SNPGenotypeLikelihoodsCalculationModel.java index e744c5ca3..aa83e1dca 100755 --- a/java/src/org/broadinstitute/sting/playground/gatk/walkers/genotyper/SNPGenotypeLikelihoodsCalculationModel.java +++ b/java/src/org/broadinstitute/sting/playground/gatk/walkers/genotyper/SNPGenotypeLikelihoodsCalculationModel.java @@ -85,12 +85,11 @@ public class SNPGenotypeLikelihoodsCalculationModel extends GenotypeLikelihoodsC // create the GenotypeLikelihoods object DiploidSNPGenotypeLikelihoods GL = new DiploidSNPGenotypeLikelihoods(UAC.baseModel, (DiploidSNPGenotypePriors)priors, UAC.defaultPlatform); - int nGoodBases = GL.add(pileup, true, UAC.CAP_BASE_QUALITY); + int nGoodBases = GL.add(pileup, true, true); if ( nGoodBases == 0 ) continue; double[] likelihoods = GL.getLikelihoods(); - double[] posteriors = GL.getPosteriors(); DiploidGenotype refGenotype = DiploidGenotype.createHomGenotype(refBase); DiploidGenotype hetGenotype = DiploidGenotype.createDiploidGenotype(refBase, bestAlternateAllele); @@ -101,9 +100,7 @@ public class SNPGenotypeLikelihoodsCalculationModel extends GenotypeLikelihoodsC likelihoods[refGenotype.ordinal()], likelihoods[hetGenotype.ordinal()], likelihoods[homGenotype.ordinal()], - posteriors[refGenotype.ordinal()], - posteriors[hetGenotype.ordinal()], - posteriors[homGenotype.ordinal()])); + getFilteredDepth(pileup))); } return refAllele; diff --git a/java/src/org/broadinstitute/sting/playground/gatk/walkers/genotyper/UnifiedArgumentCollection.java b/java/src/org/broadinstitute/sting/playground/gatk/walkers/genotyper/UnifiedArgumentCollection.java index 34dcd7d54..402f5ec17 100755 --- a/java/src/org/broadinstitute/sting/playground/gatk/walkers/genotyper/UnifiedArgumentCollection.java +++ b/java/src/org/broadinstitute/sting/playground/gatk/walkers/genotyper/UnifiedArgumentCollection.java @@ -93,9 +93,6 @@ public class UnifiedArgumentCollection { @Argument(fullName = "max_deletion_fraction", shortName = "deletions", doc = "Maximum fraction of reads with deletions spanning this locus for it to be callable [to disable, set to < 0 or > 1; default:0.05]", required = false) public Double MAX_DELETION_FRACTION = 0.05; - @Argument(fullName = "cap_base_quality_by_mapping_quality", shortName = "cap_base_qual", doc = "Cap the base quality of any given base by its read's mapping quality", required = false) - public boolean CAP_BASE_QUALITY = false; - public UnifiedArgumentCollection clone() { UnifiedArgumentCollection uac = new UnifiedArgumentCollection(); @@ -117,7 +114,6 @@ public class UnifiedArgumentCollection { uac.MAX_MISMATCHES = MAX_MISMATCHES; uac.USE_BADLY_MATED_READS = USE_BADLY_MATED_READS; uac.MAX_DELETION_FRACTION = MAX_DELETION_FRACTION; - uac.CAP_BASE_QUALITY = CAP_BASE_QUALITY; return uac; } diff --git a/java/src/org/broadinstitute/sting/playground/gatk/walkers/genotyper/UnifiedGenotyperEngine.java b/java/src/org/broadinstitute/sting/playground/gatk/walkers/genotyper/UnifiedGenotyperEngine.java index 44ffa4f29..7d6a26e62 100755 --- a/java/src/org/broadinstitute/sting/playground/gatk/walkers/genotyper/UnifiedGenotyperEngine.java +++ b/java/src/org/broadinstitute/sting/playground/gatk/walkers/genotyper/UnifiedGenotyperEngine.java @@ -26,6 +26,7 @@ package org.broadinstitute.sting.playground.gatk.walkers.genotyper; import org.apache.log4j.Logger; +import org.broad.tribble.util.variantcontext.GenotypeLikelihoods; import org.broad.tribble.util.variantcontext.VariantContext; import org.broad.tribble.util.variantcontext.Genotype; import org.broad.tribble.util.variantcontext.Allele; @@ -133,37 +134,115 @@ public class UnifiedGenotyperEngine { } /** - * Compute at a given locus. + * Compute full calls at a given locus. * * @param tracker the meta data tracker * @param refContext the reference base * @param rawContext contextual information around the locus * @return the VariantCallContext object */ - public VariantCallContext runGenotyper(RefMetaDataTracker tracker, ReferenceContext refContext, AlignmentContext rawContext) { + public VariantCallContext calculateLikelihoodsAndGenotypes(RefMetaDataTracker tracker, ReferenceContext refContext, AlignmentContext rawContext) { + Map stratifiedContexts = getFilteredAndStratifiedContexts(UAC, refContext, rawContext); + VariantContext vc = calculateLikelihoods(tracker, refContext, stratifiedContexts, StratifiedAlignmentContext.StratifiedContextType.COMPLETE); + return vc == null ? null : calculateGenotypes(tracker, refContext, rawContext, stratifiedContexts, vc); + } - // initialize the GenotypeCalculationModel for this thread if that hasn't been done yet + /** + * Compute GLs at a given locus. + * + * @param tracker the meta data tracker + * @param refContext the reference base + * @param rawContext contextual information around the locus + * @return the VariantContext object + */ + public VariantContext calculateLikelihoods(RefMetaDataTracker tracker, ReferenceContext refContext, AlignmentContext rawContext) { + Map stratifiedContexts = getFilteredAndStratifiedContexts(UAC, refContext, rawContext); + return calculateLikelihoods(tracker, refContext, stratifiedContexts, StratifiedAlignmentContext.StratifiedContextType.COMPLETE); + } + + private VariantContext calculateLikelihoods(RefMetaDataTracker tracker, ReferenceContext refContext, Map stratifiedContexts, StratifiedAlignmentContext.StratifiedContextType type) { + if ( stratifiedContexts == null ) + return null; + + // initialize the data for this thread if that hasn't been done yet if ( glcm.get() == null ) { glcm.set(getGenotypeLikelihoodsCalculationObject(logger, UAC)); + } + + Map GLs = new HashMap(); + Allele refAllele = glcm.get().getLikelihoods(tracker, refContext, stratifiedContexts, type, genotypePriors, GLs); + + return createVariantContextFromLikelihoods(refContext, refAllele, GLs); + } + + private VariantContext createVariantContextFromLikelihoods(ReferenceContext refContext, Allele refAllele, Map GLs) { + // no-call everyone for now + List noCall = new ArrayList(); + noCall.add(Allele.NO_CALL); + + Set alleles = new HashSet(); + alleles.add(refAllele); + boolean addedAltAllele = false; + + HashMap genotypes = new HashMap(); + for ( BiallelicGenotypeLikelihoods GL : GLs.values() ) { + if ( !addedAltAllele ) { + addedAltAllele = true; + alleles.add(GL.getAlleleA()); + alleles.add(GL.getAlleleB()); + } + + HashMap attributes = new HashMap(); + GenotypeLikelihoods likelihoods = new GenotypeLikelihoods(GL.getLikelihoods(), VCFConstants.GENOTYPE_LIKELIHOODS_KEY); + attributes.put(VCFConstants.DEPTH_KEY, GL.getDepth()); + attributes.put(likelihoods.getKey(), likelihoods.getAsString()); + + genotypes.put(GL.getSample(), new Genotype(GL.getSample(), noCall, Genotype.NO_NEG_LOG_10PERROR, null, attributes, false)); + } + + GenomeLoc loc = refContext.getLocus(); + int endLoc = calculateEndPos(alleles, refAllele, loc); + + return new VariantContext("UG_call", + loc.getContig(), + loc.getStart(), + endLoc, + alleles, + genotypes, + VariantContext.NO_NEG_LOG_10PERROR, + null, + null); + } + + /** + * Compute genotypes at a given locus. + * + * @param tracker the meta data tracker + * @param refContext the reference base + * @param rawContext contextual information around the locus + * @param vc the GL-annotated variant context + * @return the VariantCallContext object + */ + public VariantCallContext calculateGenotypes(RefMetaDataTracker tracker, ReferenceContext refContext, AlignmentContext rawContext, VariantContext vc) { + Map stratifiedContexts = getFilteredAndStratifiedContexts(UAC, refContext, rawContext); + return calculateGenotypes(tracker, refContext, rawContext, stratifiedContexts, vc); + } + + private VariantCallContext calculateGenotypes(RefMetaDataTracker tracker, ReferenceContext refContext, AlignmentContext rawContext, Map stratifiedContexts, VariantContext vc) { + + // initialize the data for this thread if that hasn't been done yet + if ( afcm.get() == null ) { log10AlleleFrequencyPosteriors.set(new double[N+1]); afcm.set(getAlleleFrequencyCalculationObject(N, logger, verboseWriter, UAC)); } - BadBaseFilter badReadPileupFilter = new BadBaseFilter(refContext, UAC); - Map stratifiedContexts = getFilteredAndStratifiedContexts(UAC, refContext, rawContext, badReadPileupFilter); - if ( stratifiedContexts == null ) - return null; - - Map GLs = new HashMap(); - Allele refAllele = glcm.get().getLikelihoods(tracker, refContext, stratifiedContexts, StratifiedAlignmentContext.StratifiedContextType.COMPLETE, genotypePriors, GLs); - // estimate our confidence in a reference call and return - if ( GLs.size() == 0 ) + if ( vc.getNSamples() == 0 ) return estimateReferenceConfidence(stratifiedContexts, genotypePriors.getHeterozygosity(), false, 1.0); // 'zero' out the AFs (so that we don't have to worry if not all samples have reads at this position) clearAFarray(log10AlleleFrequencyPosteriors.get()); - afcm.get().getLog10PNonRef(tracker, refContext, GLs, log10AlleleFrequencyPriors, log10AlleleFrequencyPosteriors.get()); + afcm.get().getLog10PNonRef(tracker, refContext, vc.getGenotypes(), log10AlleleFrequencyPriors, log10AlleleFrequencyPosteriors.get()); // find the most likely frequency int bestAFguess = MathUtils.maxElementIndex(log10AlleleFrequencyPosteriors.get()); @@ -204,17 +283,11 @@ public class UnifiedGenotyperEngine { } // create the genotypes - Map genotypes = afcm.get().assignGenotypes(stratifiedContexts, GLs, log10AlleleFrequencyPosteriors.get(), bestAFguess); - - // next, get the variant context data (alleles, attributes, etc.) - HashSet alleles = new HashSet(); - alleles.add(refAllele); - for ( Genotype g : genotypes.values() ) - alleles.addAll(g.getAlleles()); + Map genotypes = afcm.get().assignGenotypes(vc, log10AlleleFrequencyPosteriors.get(), bestAFguess); // print out stats if we have a writer if ( verboseWriter != null ) - printVerboseData(refContext.getLocus().toString(), alleles, PofF, phredScaledConfidence, normalizedPosteriors); + printVerboseData(refContext.getLocus().toString(), vc, PofF, phredScaledConfidence, normalizedPosteriors); // *** note that calculating strand bias involves overwriting data structures, so we do that last HashMap attributes = new HashMap(); @@ -237,20 +310,18 @@ public class UnifiedGenotyperEngine { if ( DEBUG_SLOD ) System.out.println("overallLog10PofF=" + overallLog10PofF); // the forward lod - GLs.clear(); - glcm.get().getLikelihoods(tracker, refContext, stratifiedContexts, StratifiedAlignmentContext.StratifiedContextType.FORWARD, genotypePriors, GLs); + VariantContext vcForward = calculateLikelihoods(tracker, refContext, stratifiedContexts, StratifiedAlignmentContext.StratifiedContextType.FORWARD); clearAFarray(log10AlleleFrequencyPosteriors.get()); - afcm.get().getLog10PNonRef(tracker, refContext, GLs, log10AlleleFrequencyPriors, log10AlleleFrequencyPosteriors.get()); + afcm.get().getLog10PNonRef(tracker, refContext, vcForward.getGenotypes(), log10AlleleFrequencyPriors, log10AlleleFrequencyPosteriors.get()); //double[] normalizedLog10Posteriors = MathUtils.normalizeFromLog10(log10AlleleFrequencyPosteriors.get(), true); double forwardLog10PofNull = log10AlleleFrequencyPosteriors.get()[0]; double forwardLog10PofF = MathUtils.log10sumLog10(log10AlleleFrequencyPosteriors.get(), 1); if ( DEBUG_SLOD ) System.out.println("forwardLog10PofNull=" + forwardLog10PofNull + ", forwardLog10PofF=" + forwardLog10PofF); // the reverse lod - GLs.clear(); - glcm.get().getLikelihoods(tracker, refContext, stratifiedContexts, StratifiedAlignmentContext.StratifiedContextType.REVERSE, genotypePriors, GLs); + VariantContext vcReverse = calculateLikelihoods(tracker, refContext, stratifiedContexts, StratifiedAlignmentContext.StratifiedContextType.REVERSE); clearAFarray(log10AlleleFrequencyPosteriors.get()); - afcm.get().getLog10PNonRef(tracker, refContext, GLs, log10AlleleFrequencyPriors, log10AlleleFrequencyPosteriors.get()); + afcm.get().getLog10PNonRef(tracker, refContext, vcReverse.getGenotypes(), log10AlleleFrequencyPriors, log10AlleleFrequencyPosteriors.get()); //normalizedLog10Posteriors = MathUtils.normalizeFromLog10(log10AlleleFrequencyPosteriors.get(), true); double reverseLog10PofNull = log10AlleleFrequencyPosteriors.get()[0]; double reverseLog10PofF = MathUtils.log10sumLog10(log10AlleleFrequencyPosteriors.get(), 1); @@ -271,26 +342,10 @@ public class UnifiedGenotyperEngine { GenomeLoc loc = refContext.getLocus(); - // todo - temp fix until we can deal with extended events properly - long endLoc; - // for indels, stop location is one more than ref allele length - boolean isSNP = true; - for (Allele a:alleles){ - if (a.getBaseString().length() != 1) { - isSNP = false; - break; - } - } - - if (isSNP) - endLoc = loc.getStart(); - else - endLoc = loc.getStart() + refAllele.length(); - - //VariantContext vc = new VariantContext("UG_call", loc.getContig(), loc.getStart(), loc.getStop(), alleles, genotypes, phredScaledConfidence/10.0, passesCallThreshold(phredScaledConfidence, atTriggerTrack) ? null : filter, attributes); - VariantContext vc = new VariantContext("UG_call", loc.getContig(), loc.getStart(), endLoc, - alleles, genotypes, phredScaledConfidence/10.0, passesCallThreshold(phredScaledConfidence, atTriggerTrack) ? null : filter, attributes); + int endLoc = calculateEndPos(vc.getAlleles(), vc.getReference(), loc); + VariantContext vcCall = new VariantContext("UG_call", loc.getContig(), loc.getStart(), endLoc, + vc.getAlleles(), genotypes, phredScaledConfidence/10.0, passesCallThreshold(phredScaledConfidence, atTriggerTrack) ? null : filter, attributes); if ( annotationEngine != null ) { // first off, we want to use the *unfiltered* context for the annotations @@ -299,23 +354,42 @@ public class UnifiedGenotyperEngine { pileup = rawContext.getExtendedEventPileup(); else if (rawContext.hasBasePileup()) pileup = rawContext.getBasePileup(); - stratifiedContexts = StratifiedAlignmentContext.splitContextBySampleName(pileup, UAC.ASSUME_SINGLE_SAMPLE); - Collection variantContexts = annotationEngine.annotateContext(tracker, refContext, stratifiedContexts, vc); - vc = variantContexts.iterator().next(); //We know the collection will always have exactly 1 element. + Collection variantContexts = annotationEngine.annotateContext(tracker, refContext, stratifiedContexts, vcCall); + vcCall = variantContexts.iterator().next(); // we know the collection will always have exactly 1 element. } - VariantCallContext call = new VariantCallContext(vc, passesCallThreshold(phredScaledConfidence, atTriggerTrack)); + VariantCallContext call = new VariantCallContext(vcCall, passesCallThreshold(phredScaledConfidence, atTriggerTrack)); call.setRefBase(refContext.getBase()); return call; } + private int calculateEndPos(Set alleles, Allele refAllele, GenomeLoc loc) { + // TODO - temp fix until we can deal with extended events properly + // for indels, stop location is one more than ref allele length + boolean isSNP = true; + for (Allele a : alleles){ + if (a.getBaseString().length() != 1) { + isSNP = false; + break; + } + } + + int endLoc = loc.getStart(); + if ( !isSNP ) + endLoc += refAllele.length(); + + return endLoc; + } + private static boolean isValidDeletionFraction(double d) { return ( d >= 0.0 && d <= 1.0 ); } - private Map getFilteredAndStratifiedContexts(UnifiedArgumentCollection UAC, ReferenceContext refContext, AlignmentContext rawContext, BadBaseFilter badBaseFilter) { + private Map getFilteredAndStratifiedContexts(UnifiedArgumentCollection UAC, ReferenceContext refContext, AlignmentContext rawContext) { + BadBaseFilter badReadPileupFilter = new BadBaseFilter(refContext, UAC); + Map stratifiedContexts = null; if ( UAC.GLmodel == GenotypeLikelihoodsCalculationModel.Model.DINDEL && rawContext.hasExtendedEventPileup() ) { @@ -342,7 +416,7 @@ public class UnifiedGenotyperEngine { stratifiedContexts = StratifiedAlignmentContext.splitContextBySampleName(rawContext.getBasePileup(), UAC.ASSUME_SINGLE_SAMPLE); // filter the reads (and test for bad pileups) - if ( !filterPileup(stratifiedContexts, badBaseFilter) ) + if ( !filterPileup(stratifiedContexts, badReadPileupFilter) ) return null; } @@ -382,9 +456,9 @@ public class UnifiedGenotyperEngine { return new VariantCallContext(QualityUtils.phredScaleErrorRate(1.0 - P_of_ref) >= UAC.STANDARD_CONFIDENCE_FOR_CALLING); } - protected void printVerboseData(String pos, Set alleles, double PofF, double phredScaledConfidence, double[] normalizedPosteriors) { + protected void printVerboseData(String pos, VariantContext vc, double PofF, double phredScaledConfidence, double[] normalizedPosteriors) { Allele refAllele = null, altAllele = null; - for ( Allele allele : alleles ) { + for ( Allele allele : vc.getAlleles() ) { if ( allele.isReference() ) refAllele = allele; else diff --git a/java/src/org/broadinstitute/sting/playground/gatk/walkers/genotyper/UnifiedGenotyperV2.java b/java/src/org/broadinstitute/sting/playground/gatk/walkers/genotyper/UnifiedGenotyperV2.java index 7d6e73e9d..c875865e9 100755 --- a/java/src/org/broadinstitute/sting/playground/gatk/walkers/genotyper/UnifiedGenotyperV2.java +++ b/java/src/org/broadinstitute/sting/playground/gatk/walkers/genotyper/UnifiedGenotyperV2.java @@ -49,7 +49,6 @@ import java.io.PrintStream; @By(DataSource.REFERENCE) @Downsample(by=DownsampleType.BY_SAMPLE, toCoverage=250) public class UnifiedGenotyperV2 extends LocusWalker implements TreeReducible { - public static final String DEFAULT_GENOTYPE_LIKELIHOODS_KEY = VCFConstants.PHRED_GENOTYPE_LIKELIHOODS_KEY; @ArgumentCollection private UnifiedArgumentCollection UAC = new UnifiedArgumentCollection(); @@ -58,10 +57,6 @@ public class UnifiedGenotyperV2 extends LocusWalker